def swapCodeForTokens(response): # takes dict of response parameters as input, like {'error':'blah blah'} or {'code':'blah blah','state':'blah blah'} db = connect_db() c=db.cursor() state = json.loads(response['state']) user_identifier = state['id'] redir = state['redir'] if 'error' in response: c.execute('UPDATE users SET imgur_json=NULL WHERE imgur_id='+app.sqlesc,(user_identifier,)) db.commit() return {'success':False} # called at the server redirect when imgur returns the code client = ImgurClient(app.config['IMGUR_CLIENTID'],app.config['IMGUR_SECRET']) credentials = client.authorize(response['code'],'authorization_code') # print credentials if 'access_token' in credentials.keys() and 'refresh_token' in credentials.keys(): db = connect_db() c = db.cursor() c.execute('UPDATE users SET imgur_json='+app.sqlesc+' WHERE imgur_id='+app.sqlesc,(json.dumps(credentials),user_identifier)) db.commit() db.close() return {'success':True,'redir':redir} else: c.execute('UPDATE users SET imgur_json=NULL WHERE imgur_id='+app.sqlesc,(user_identifier,)) db.commit() return {'success':False}
def swapCodeForTokens(response): # takes dict of response parameters as input, like {'error':'blah blah'} or {'code':'blah blah','state':'blah blah'} db = connect_db() c = db.cursor() state = json.loads(response['state']) user_identifier = state['id'] redir = state['redir'] if 'error' in response: c.execute( 'UPDATE users SET imgur_json=NULL WHERE imgur_id=' + app.sqlesc, (user_identifier, )) db.commit() return {'success': False} # called at the server redirect when imgur returns the code client = ImgurClient(app.config['IMGUR_CLIENTID'], app.config['IMGUR_SECRET']) credentials = client.authorize(response['code'], 'authorization_code') # print credentials if 'access_token' in credentials.keys( ) and 'refresh_token' in credentials.keys(): db = connect_db() c = db.cursor() c.execute( 'UPDATE users SET imgur_json=' + app.sqlesc + ' WHERE imgur_id=' + app.sqlesc, (json.dumps(credentials), user_identifier)) db.commit() db.close() return {'success': True, 'redir': redir} else: c.execute( 'UPDATE users SET imgur_json=NULL WHERE imgur_id=' + app.sqlesc, (user_identifier, )) db.commit() return {'success': False}
def setUpClass(cls) -> None: print('setUpClass') app = create_app() app.config.update( TESTING=True, SQLALCHEMY_DATABASE_URI= 'mysql+cymysql://root:159951@localhost:3306/test?charset=utf8', SQLALCHEMY_ENCODING='utf-8', SQLALCHEMY_TRACK_MODIFICATIONS= False # 屏蔽 sql alchemy 的 FSADeprecationWarning ) cls.app = app cls.client = app.test_client() with cls.app.app_context(): connect_db(app)
def checkApiAccess(userid): # something that checks whether we have api keys and whether they work; # if not, return False db = connect_db() c = db.cursor() c.execute('SELECT imgur_json FROM users WHERE id=' + app.sqlesc, (userid, )) r = c.fetchone() if len(r) > 0: try: r = json.loads(r[0]) access_token = r['access_token'] refresh_token = r['refresh_token'] except TypeError: return False else: return False client = ImgurClient(app.config['IMGUR_CLIENTID'], app.config['IMGUR_SECRET']) client.set_user_auth(access_token, refresh_token) try: client.get_account('me').url credits = client.credits # print(credits) if credits['ClientRemaining'] > 10 and credits['UserRemaining'] > 10: return True else: return None except ImgurClientError: return False
def checkApiAccess(userid): # something that checks whether we have api keys and whether they work; # if not, return False db = connect_db() c = db.cursor() c.execute('SELECT imgur_json FROM users WHERE id='+app.sqlesc,(userid,)) r = c.fetchone() if len(r) > 0: try: r = json.loads(r[0]) access_token = r['access_token'] refresh_token = r['refresh_token'] except TypeError: return False else: return False client = ImgurClient(app.config['IMGUR_CLIENTID'],app.config['IMGUR_SECRET']) client.set_user_auth(access_token,refresh_token) try: client.get_account('me').url credits = client.credits # print(credits) if credits['ClientRemaining'] > 10 and credits['UserRemaining'] > 10: return True else: return None except ImgurClientError: return False
def uploadToImgur(userid,url): db = connect_db() c = db.cursor() c.execute('SELECT map_url,name,farmname,date,imgur_json FROM playerinfo WHERE url='+app.sqlesc,(url,)) result = c.fetchone() if result[4] != None: previous_upload_properties = json.loads(result[4]) if time.time() < previous_upload_properties['upload_time']+(2*3600): return {'error':'too_soon','link':previous_upload_properties['imgur_url']} map_url = result[0] titlestring = u"{} Farm, {} by {}".format(result[2],result[3],result[1]) descriptionstring = u"Stardew Valley game progress, full summary at http://upload.farm/{}".format(url) # try: c.execute('SELECT imgur_json FROM users WHERE id='+app.sqlesc,(userid,)) r = json.loads(c.fetchone()[0]) access_token = r['access_token'] refresh_token = r['refresh_token'] client = ImgurClient(app.config['IMGUR_CLIENTID'],app.config['IMGUR_SECRET']) client.set_user_auth(access_token,refresh_token) # file = url_for('home',filename=map_url,_external=True) # print 'uploaded to',file # client.upload_from_url(file,config={'title':'uploaded from','description':'upload.farm'},anon=False) if app.config['IMGUR_DIRECT_UPLOAD'] == True: result = client.upload_from_path(map_url,config={'title':titlestring,'description':descriptionstring},anon=False) else: map_url = u"http://upload.farm/{}".format(map_url) result = client.upload_from_url(map_url,config={'title':titlestring,'description':descriptionstring},anon=False) print(result) imgur_json = json.dumps({'imgur_url':'http://imgur.com/'+result['id'],'upload_time':time.time()}) c.execute('UPDATE playerinfo SET imgur_json='+app.sqlesc+' WHERE url='+app.sqlesc,(imgur_json,url)) db.commit() try: return {'success':None,'link':result['link']} except: return {'error':'upload_issue','link':None}
def uploadToImgur(userid, url): db = connect_db() c = db.cursor() c.execute( 'SELECT map_url,name,farmname,date,imgur_json FROM playerinfo WHERE url=' + app.sqlesc, (url, )) result = c.fetchone() if result[4] != None: previous_upload_properties = json.loads(result[4]) if time.time() < previous_upload_properties['upload_time'] + (2 * 3600): return { 'error': 'too_soon', 'link': previous_upload_properties['imgur_url'] } map_url = result[0] titlestring = u"{} Farm, {} by {}".format(result[2], result[3], result[1]) descriptionstring = u"Stardew Valley game progress, full summary at http://upload.farm/{}".format( url) # try: c.execute('SELECT imgur_json FROM users WHERE id=' + app.sqlesc, (userid, )) r = json.loads(c.fetchone()[0]) access_token = r['access_token'] refresh_token = r['refresh_token'] client = ImgurClient(app.config['IMGUR_CLIENTID'], app.config['IMGUR_SECRET']) client.set_user_auth(access_token, refresh_token) # file = url_for('home',filename=map_url,_external=True) # print 'uploaded to',file # client.upload_from_url(file,config={'title':'uploaded from','description':'upload.farm'},anon=False) if app.config['IMGUR_DIRECT_UPLOAD'] == True: result = client.upload_from_path(map_url, config={ 'title': titlestring, 'description': descriptionstring }, anon=False) else: map_url = u"http://upload.farm/{}".format(map_url) result = client.upload_from_url(map_url, config={ 'title': titlestring, 'description': descriptionstring }, anon=False) print(result) imgur_json = json.dumps({ 'imgur_url': 'http://imgur.com/' + result['id'], 'upload_time': time.time() }) c.execute( 'UPDATE playerinfo SET imgur_json=' + app.sqlesc + ' WHERE url=' + app.sqlesc, (imgur_json, url)) db.commit() try: return {'success': None, 'link': result['link']} except: return {'error': 'upload_issue', 'link': None}
def getEntries(where=None): connection = connect_db() c = connection.cursor() if where==None: where='' c.execute('SELECT id,md5,url,savefileLocation FROM playerinfo '+where) entries = c.fetchall() connection.close() return entries
def getAuthUrl(userid,target=None): db = connect_db() c = db.cursor() iuid = unicode(uuid.uuid4()) imgur_id = json.dumps({'id':iuid,'redir':target}) c.execute('UPDATE users SET imgur_id='+app.sqlesc+' WHERE id='+app.sqlesc,(iuid,userid)) db.commit() db.close() client = ImgurClient(app.config['IMGUR_CLIENTID'],app.config['IMGUR_SECRET']) authorization_url = client.get_auth_url('code')+'&state='+unicode(imgur_id) return authorization_url
def fetch_tags(): """Grab all tags from Math.StackExchange, and add to our database.""" con = connect_db() cur = con.cursor() func = "/tags" params = { 'pagesize': 100, 'order': 'asc', 'sort': 'name' } process_each_page(func, params, insert_tags)
def setUp(self): """Set up a blank test database before each test""" self.db_name = name_from_uri(os.environ['TEST_DATABASE_URL']) app.app.config.update( TESTING=True, SQLALCHEMY_DATABASE_URI=os.environ['TEST_DATABASE_URL'] ) upgrade(directory='migrations') self.app = app.app.test_client() app.init_db(self.db_name) self.cursor = app.connect_db()
def processFile(filename,old_md5,rowid,url): with open(filename,'rb') as f: md5_info = md5(f) player_info = playerInfo(filename) farm_info = getFarmInfo(filename) try: assert md5_info == old_md5 except AssertionError: return False print filename,'failed md5' columns = [] values = [] for key in player_info.keys(): if type(player_info[key]) == list: for i,item in enumerate(player_info[key]): columns.append(key.replace(' ','_') + str(i)) values.append(str(item)) elif type(player_info[key]) == dict: for subkey in player_info[key]: if type(player_info[key][subkey]) == dict: for subsubkey in player_info[key][subkey]: columns.append((key+subkey+subsubkey).replace(' ','_')) values.append((player_info[key][subkey][subsubkey])) else: columns.append((key + subkey).replace(' ','_')) values.append(str(player_info[key][subkey])) else: columns.append(key) values.append(str(player_info[key])) columns.append('farm_info') values.append(json.dumps(farm_info)) columns.append('failed_processing') values.append(None) colstring = '' for c in columns: colstring += c+', ' colstring = colstring[:-2] questionmarks = ((sqlesc+',')*len(values))[:-1] try: connection = connect_db() cur = connection.cursor() cur.execute('UPDATE playerinfo SET ('+colstring+') = ('+questionmarks+') WHERE id='+sqlesc,(tuple(values+[rowid]))) cur.execute('INSERT INTO todo (task, playerid) VALUES ('+sqlesc+','+sqlesc+')',('process_image',rowid)) connection.commit() connection.close() return True except (sqlite3.OperationalError, psycopg2.ProgrammingError) as e: cur.execute('INSERT INTO errors (ip, time, notes) VALUES ('+sqlesc+','+sqlesc+','+sqlesc+')',('reprocessEntry.py', time.time(),str(e)+' '+str([columns,values]))) connection.commit() return False
def set_last_updated(): """Mark the database as having last been updated now.""" con = connect_db() cur = con.cursor() date = dt.datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S') query = "select * from last_updated where description='questions';" if cur.execute(query) == 0: query = "insert into last_updated (description, date) values(%s, %s);" desc = 'questions' cur.execute(query, [desc, date]) else: query = "update last_updated set date=%s where description='questions';" cur.execute(query, [date]) con.commit() con.close()
def getAuthUrl(userid, target=None): db = connect_db() c = db.cursor() iuid = unicode(uuid.uuid4()) imgur_id = json.dumps({'id': iuid, 'redir': target}) c.execute( 'UPDATE users SET imgur_id=' + app.sqlesc + ' WHERE id=' + app.sqlesc, (iuid, userid)) db.commit() db.close() client = ImgurClient(app.config['IMGUR_CLIENTID'], app.config['IMGUR_SECRET']) authorization_url = client.get_auth_url('code') + '&state=' + unicode( imgur_id) return authorization_url
def launched_by_known_user(context): with app.app_context(): g.db = connect_db() user = AlexaUser(TEST_USER_ID) user.set_location( TEST_USER_DATA['latitude'], TEST_USER_DATA['longitude'], TEST_USER_DATA['city'], ) data = base_alexa_request('LaunchRequest') # with patch('app.alexa.AlexaUser.get_data_by_id') as mock_gdbi: # mock_gdbi.return_value = TEST_USER_DATA context.response = context.client.post( '/alexa/', data=json.dumps(data), content_type='application/json', ) assert context.response
def get_last_updated(): """Get the MySQL datetime of the last update to the question list. Returns the epoch if no time is listed in the database. """ con = connect_db() cur = con.cursor() query = "SELECT * FROM last_updated WHERE description='questions'" if cur.execute(query) > 0: ts = cur.fetchone()['date'] else: ts = dt.fromtimestamp(0) cur.execute("""INSERT INTO last_updated (description, date) VALUES ('questions', %s);""", [ts]) con.commit() con.close() return ts
def get_last_updated(): """Get the MySQL datetime of the last update to the question list. Returns the epoch if no time is listed in the database. """ con = connect_db() cur = con.cursor() query = "SELECT * FROM last_updated WHERE description='questions'" if cur.execute(query) > 0: ts = cur.fetchone()['date'] else: ts = dt.fromtimestamp(0) cur.execute( """INSERT INTO last_updated (description, date) VALUES ('questions', %s);""", [ts]) con.commit() con.close() return ts
def create_user(username='******', password='******', email='*****@*****.**', active=False): app.init_db() password = get_password_for_database(password) if active: sql = CREATE_ACTIVE_USER else: sql = CREATE_USER sql = sql % { 'username': username, 'password': password, 'email': email, } db = app.connect_db() db.executescript(sql) db.commit()
def create_tag_categories(): """Cluster MSE tags in to categories using sklearn AffinityPropogation. Any existing category system in the database will be overwritten. """ con = connect_db() cur = con.cursor() query = """ SELECT T.id, T.name, COUNT(Q.question_id) AS count FROM ( SELECT tags.id, tags.name, COUNT(qt.question_id) AS count FROM tags JOIN question_tags AS qt ON qt.tag_id=tags.id WHERE tags.name NOT IN ('advice', 'applications', 'big-list', 'education', 'intuition', 'learning', 'math-history', 'math-software', 'reference-request', 'self-learning', 'soft-question', 'teaching', 'alternative-proof-strategy', 'proof-writing', 'visualization', 'alternative-proof', 'proof-strategy', 'proof-verification', 'solution-verification', 'definition', 'examples-counterexamples', 'mathematica', 'wolfram-alpha', 'maple', 'matlab', 'sage', 'octave', 'floor-function', 'ceiling-function', 'article-writing', 'publishing', 'combinatorial-species', 'gromov-hyperbolic-spaces', 'chemistry', 'book-recommendation') GROUP BY tags.name ) AS T JOIN question_tags AS Q ON T.id=Q.tag_id GROUP BY T.id""" cur.execute(query) tag_ids = [] tag_names = [] tag_indices = dict() tag_name_indices = dict() counts = [] for q in cur: tag_ids.append(q['id']) tag_names.append(q['name']) tag_indices[q['id']] = len(tag_ids) - 1 tag_name_indices[q['name']] = len(tag_ids) - 1 counts.append(q['count']) tag_ids = np.array(tag_ids) tag_names = np.array(tag_names) query = """ SELECT t1.id AS tag1, t2.id AS tag2, COUNT(qt1.question_id) as count FROM question_tags AS qt1 JOIN question_tags AS qt2 ON qt1.question_id=qt2.question_id JOIN tags AS t1 ON t1.id=qt1.tag_id JOIN tags AS t2 ON t2.id=qt2.tag_id WHERE t1.id IN ({taglist}) AND t2.id IN ({taglist}) GROUP BY t1.name, t2.name""".format(taglist=','.join(str(i) for i in tag_ids)) cur.execute(query) paircounts = [[0 for i in range(len(tag_ids))] for j in range(len(tag_ids))] for q in cur: t1 = q['tag1'] i1 = tag_indices[t1] t2 = q['tag2'] i2 = tag_indices[t2] c = q['count'] if i1 == i2: paircounts[i1][i1] = int(c/2) else: paircounts[i1][i2] = c sim = np.array(paircounts, dtype=np.float_) cluster = AffinityPropagation(affinity='precomputed', damping=0.5) labels = cluster.fit_predict(sim) classes = sorted(list(set(labels))) catnames = {i:tag_names[cluster.cluster_centers_indices_[i]] for i in \ range(len(cluster.cluster_centers_indices_))} cur.execute("DELETE FROM categories WHERE 1;") cur.execute("DELETE FROM tag_categories WHERE 1;") query = "INSERT INTO categories (id,name) VALUES " catnames = [tag_names[cluster.cluster_centers_indices_[c]] for c in classes] query += ','.join("({},'{}')".format(c,catnames[c]) for c in classes) cur.execute(query) query = "INSERT INTO tag_categories (tag_id, category_id) VALUES " query += ','.join("({},{})".format(tag_ids[i], labels[i]) for i \ in range(len(labels))) cur.execute(query) con.commit()
def process_api_questions(items, check_quality=True): """Add question/answer list to the database. items -- 'items' object returned from Math.SE query against questions with specific filters set (as in fetch_recent_questions) check_quality -- if True, questions are run through the effort predictor """ if len(items) == 0: return con = connect_db() cur = con.cursor() qids = [item['question_id'] for item in items] qids.sort() query = "SELECT id FROM questions WHERE id IN (" query += ','.join([str(i) for i in qids]) query += ');' cur.execute(query) existing = [q['id'] for q in cur] for item in items: question = { 'id': item['question_id'], 'body_html': item['body'], 'body_markdown': item['body_markdown'], 'creation_date': from_timestamp(item['creation_date']), 'last_activity_date':from_timestamp(item['last_activity_date']), 'link': item['link'], 'score': item['score'], 'title': item['title'], 'historic': True } if 'owner' in item and len(item['owner']) > 0: question['author_id'] = item['owner']['user_id'] else: question['author_id'] = None if 'accepted_answer_id' in item: question['accepted_answer_id'] = item['accepted_answer_id'] else: question['accepted_answer_id'] = None if 'closed_date' in item: question['closed_date'] = from_timestamp(item['closed_date']) question['closed_desc'] = item['closed_details']['description'] question['closed_reason'] = item['closed_reason'] else: question['closed_date'] = None question['closed_desc'] = None question['closed_reason'] = None if question['id'] not in existing: query = 'INSERT INTO questions (' query += ','.join(sorted(question.keys())) query += ') VALUES (' query += ','.join('%s' for i in range(len(question.keys()))) query += ');' else: query = "UPDATE questions SET " query += ','.join(k + '=%s' for k in sorted(question.keys())) query += ' WHERE id=' + str(question['id']) + ';' cur.execute(query, [question[k] for k in sorted(question.keys())]) query = "DELETE FROM question_tags WHERE question_id=%s" cur.execute(query, [question['id']]) query = "SELECT id FROM tags WHERE tags.name IN (" query += ','.join("'{}'".format(t) for t in item['tags']) query += ");" cur.execute(query) tagids = [t['id'] for t in cur] query = "INSERT INTO question_tags (question_id, tag_id) VALUES (%s,%s)" for tagid in tagids: cur.execute(query, [question['id'], tagid]) if 'answers' in item: answers = [] for answer in item['answers']: a = { 'id': answer['answer_id'], 'body_html': answer['body'], 'body_markdown': answer['body_markdown'], 'creation_date':from_timestamp(answer['creation_date']), 'is_accepted': answer['is_accepted'], 'last_activity_date': from_timestamp(answer['last_activity_date']), 'link': answer['link'], 'question_id': answer['question_id'], 'score': answer['score'] } if 'owner' in answer and len(answer['owner']) > 0: a['author_id'] = answer['owner']['user_id'] else: a['author_id'] = None answers.append(a) query = "SELECT id FROM answers WHERE id IN (" query += ','.join(str(a['id']) for a in answers) query += ') ORDER BY id ASC;' cur.execute(query) existinga = [a['id'] for a in cur] for a in answers: if a['id'] not in existinga: query = 'INSERT INTO answers (' query += ','.join(sorted(a.keys())) query += ') VALUES (' query += ','.join('%s' for i in range(len(a.keys()))) query += ');' else: query = 'UPDATE answers SET ' query += ','.join(k + '=%s' for k in sorted(a.keys())) query += ' WHERE id=' + str(a['id']) + ';' cur.execute(query, [a[k] for k in sorted(a.keys())]) con.commit() if check_quality: query = "SELECT id, body_html FROM questions WHERE id IN (" query += ','.join([str(i) for i in qids]) query += ') ORDER BY id ASC;' cur.execute(query) quals = [] bodies = [item['body_html'] for item in cur] with open('psq.pickle', 'rb') as f: clf = pickle.load(f) probs = clf.predict_proba(bodies)[:, 0] for i in range(len(qids)): query = "UPDATE questions SET quality_score=%s WHERE id=%s".format( int(100*probs[i]), qids[i]) cur.execute(query, [int(100*probs[i]), qids[i]]) con.commit() con.close()
def before_request(): g.db = connect_db()
def test_database(self): # conn = psycopg2.connect(database='flaskr_tdd') cursor = connect_db() # TODO: figure out a better string of sql commands cursor.execute("select relname from pg_class where relkind='r' and relname !~ '^(pg_|sql_)';") self.assertIsInstance(cursor.fetchall(), list)
def reset_db(): """Drop all tables and create all tables""" connect_db(app) db.drop_all() db.create_all()
def init_db(): with closing(connect_db()) as client: for item in DUMMY_DATA: client.todo.items.insert(item)
def build_psq_classifier(end_date_str=None): """Build a predictor of whether or not a question will be closed as homework / for 'lack of context'. This is accomplished by building a linear SVC model, trained on old post data. If end_date_str isn't specified, it is initialized to two weeks prior. Pickles the classifier, an instance of sklearn.svm.LinearSVC. Also stores some basic data metrics. Note that we only use posts written after 2013-06-25, the date on which the first such closure reason was instituted. """ if end_date_str == None: ts = time() - 60 * 60 * 24 * 14 end_date_str = from_timestamp(ts) con = connect_db() cur = con.cursor() trf = TfidfVectorizer( ngram_range=(2,6), stop_words='english', analyzer='char', preprocessor=preprocess_post ) reg = LogisticRegression() clf = Pipeline([('vectorizer', trf), ('reg', reg)]) X_raw = [] Y_raw = [] # Fetch closed questions from database query = """SELECT * FROM questions WHERE creation_date < '{}' AND closed_reason='off-topic' AND (closed_desc LIKE '%context%' OR closed_desc LIKE '%homework%');""".format(end_date_str) cur.execute(query) for q in cur: X_raw.append(q['body_html']) Y_raw.append(1) num_closed = len(X_raw) # Fetch an equal number of un-closed questions query = """SELECT * FROM questions WHERE creation_date < %s AND closed_reason IS NULL ORDER BY creation_date LIMIT %s""" cur.execute(query, [end_date_str, num_closed]) for q in cur: X_raw.append(q['body_html']) Y_raw.append(0) X_raw = [X_raw[i] for i in shuff] Y_raw = [Y_raw[i] for i in shuff] # Hold back 20% of examples as test set X_train, X_test, Y_train, Y_test = train_test_split( X_raw, Y_raw, test_size=0.2) test_size = len(X_test) train_size = len(X_train) # Perform grid search to tune parameters for F1-score params = [ { 'vectorizer__ngram_range': [(2,2), (2,4), (2,6), (2,8)], 'reg__penalty': ['l1', 'l2'], 'reg__C': [.01, .03, .1, .3, 1, 3, 10, 30, 100], 'reg__intercept_scaling': [.1,1,10,100] } ] gridsearch = GridSearchCV(clf, params, scoring='f1', n_jobs=4, \ pre_dispatch=8) gridsearch.fit(X_train, Y_train) clf = gridsearch.best_estimator_ print("Done training classifier!") print("Parameters from CV:") for k,v in gridsearch.best_params_.items(): print("{}: {}".format(k,v)) preds = clf.predict(X_test) print("Done making predictions for test set.") print("Results:") clf.stats = dict() clf.stats['train_size'] = train_size clf.stats['train_pos'] = np.sum(Y_train) clf.stats['train_neg'] = train_size - np.sum(Y_train) clf.stats['test_size'] = test_size clf.stats['test_pos'] = np.sum(Y_test) clf.stats['test_neg'] = test_size - np.sum(Y_test) clf.stats['accuracy'] = clf.score(X_test, Y_test) clf.stats['precision'] = precision_score(Y_test, preds) clf.stats['recall'] = recall_score(Y_test, preds) for k in clf.stats: print(" {}: {}".format(k, clf.stats[k])) with open('psq.pickle', 'wb') as f: pickle.dump(clf, f)
def setUp(self): self.db_client = connect_db(db="test-temp")
def create_tag_categories(): """Cluster MSE tags in to categories using sklearn AffinityPropogation. Any existing category system in the database will be overwritten. """ con = connect_db() cur = con.cursor() query = """ SELECT T.id, T.name, COUNT(Q.question_id) AS count FROM ( SELECT tags.id, tags.name, COUNT(qt.question_id) AS count FROM tags JOIN question_tags AS qt ON qt.tag_id=tags.id WHERE tags.name NOT IN ('advice', 'applications', 'big-list', 'education', 'intuition', 'learning', 'math-history', 'math-software', 'reference-request', 'self-learning', 'soft-question', 'teaching', 'alternative-proof-strategy', 'proof-writing', 'visualization', 'alternative-proof', 'proof-strategy', 'proof-verification', 'solution-verification', 'definition', 'examples-counterexamples', 'mathematica', 'wolfram-alpha', 'maple', 'matlab', 'sage', 'octave', 'floor-function', 'ceiling-function', 'article-writing', 'publishing', 'combinatorial-species', 'gromov-hyperbolic-spaces', 'chemistry', 'book-recommendation') GROUP BY tags.name ) AS T JOIN question_tags AS Q ON T.id=Q.tag_id GROUP BY T.id""" cur.execute(query) tag_ids = [] tag_names = [] tag_indices = dict() tag_name_indices = dict() counts = [] for q in cur: tag_ids.append(q['id']) tag_names.append(q['name']) tag_indices[q['id']] = len(tag_ids) - 1 tag_name_indices[q['name']] = len(tag_ids) - 1 counts.append(q['count']) tag_ids = np.array(tag_ids) tag_names = np.array(tag_names) query = """ SELECT t1.id AS tag1, t2.id AS tag2, COUNT(qt1.question_id) as count FROM question_tags AS qt1 JOIN question_tags AS qt2 ON qt1.question_id=qt2.question_id JOIN tags AS t1 ON t1.id=qt1.tag_id JOIN tags AS t2 ON t2.id=qt2.tag_id WHERE t1.id IN ({taglist}) AND t2.id IN ({taglist}) GROUP BY t1.name, t2.name""".format(taglist=','.join( str(i) for i in tag_ids)) cur.execute(query) paircounts = [[0 for i in range(len(tag_ids))] for j in range(len(tag_ids))] for q in cur: t1 = q['tag1'] i1 = tag_indices[t1] t2 = q['tag2'] i2 = tag_indices[t2] c = q['count'] if i1 == i2: paircounts[i1][i1] = int(c / 2) else: paircounts[i1][i2] = c sim = np.array(paircounts, dtype=np.float_) cluster = AffinityPropagation(affinity='precomputed', damping=0.5) labels = cluster.fit_predict(sim) classes = sorted(list(set(labels))) catnames = {i:tag_names[cluster.cluster_centers_indices_[i]] for i in \ range(len(cluster.cluster_centers_indices_))} cur.execute("DELETE FROM categories WHERE 1;") cur.execute("DELETE FROM tag_categories WHERE 1;") query = "INSERT INTO categories (id,name) VALUES " catnames = [ tag_names[cluster.cluster_centers_indices_[c]] for c in classes ] query += ','.join("({},'{}')".format(c, catnames[c]) for c in classes) cur.execute(query) query = "INSERT INTO tag_categories (tag_id, category_id) VALUES " query += ','.join("({},{})".format(tag_ids[i], labels[i]) for i \ in range(len(labels))) cur.execute(query) con.commit()
from unittest import TestCase from app import db, connect_db, app from models import User, Favorite, Song, Playlist from info.api_samples import SONG_JSON app.config['SQLALCHEMY_DATABASE_URI'] = 'postgresql:///phonia_music_test' app.config['SQLALCHEMY_ECHO'] = False app.config['WTF_CSRF_ENABLED'] = False connect_db(app) db.drop_all() db.create_all() class UserViewsTestCase(TestCase): def setUp(self): User.query.delete() Favorite.query.delete() Song.query.delete() db.session.commit() self.username = "******" self.user_password = "******" self.user_full_name = "TEST MAN" self.user = User.signup(username=self.username, password=self.user_password, full_name=self.user_full_name) db.session.add(self.user) db.session.commit() self.user_id = self.user.id