def init(): with DB_ENGINE.connect() as conn: res = conn.execute("SELECT COUNT(*) FROM rawcontents") app.add_template_global(res.fetchone()[0], 'progress_total') res = conn.execute( "SELECT COUNT(*) FROM rawcontents WHERE tag IS NOT NULL") app.add_template_global(res.fetchone()[0], 'progress_done')
def post_list(blockid): try: assert (blockid in ['1179', 'develop', 'free', 'funinfo', 'worldlook']) with DB_ENGINE.connect() as conn: res = conn.execute(''' SELECT p.pid, p.title, COUNT(rc.content) replycount, p.posttime, COUNT(rc.tag) tagcount FROM posts p, replys r, rawcontents rc WHERE p.blockid="{}" AND p.pid=r.pid AND rc.rid=r.rid GROUP BY r.pid '''.format(blockid)) return render_template('list.html', post_list=res) except AssertionError: return "Boom", 400
def reply_tag(pid): with DB_ENGINE.connect() as conn: transcation = conn.begin() try: for rid, tag in request.form.items(): conn.execute( update(rawcontents).where(rawcontents.c.rid == rid).values( tag=float(tag), assure=True)) transcation.commit() except: transcation.rollback() return 'failed' else: return 'success'
def completeTrainData(current): stmt = 'SELECT rid, content, vector FROM rawcontents WHERE rid IN {}' with DB_ENGINE.connect() as conn: traindata = current.merge(pd.DataFrame( conn.execute(stmt.format(tuple(current['rid'].values))).fetchall(), columns=['rid', 'content', 'vector']), on='rid') traindata['ss'] = list( map(lambda x: -1 if x[0] < 0.5 else x[1], zip(traindata['assure'], traindata['tag']))) traindata['vector'] = traindata['vector'].apply(pickle.loads) return traindata.set_index(np.arange(len(current)))
def post_detail(pid): try: pid = int(pid) with DB_ENGINE.connect() as conn: post = conn.execute(''' SELECT pid, blockid, title, pageurl, clickcount, replycount, activityuserid FROM posts WHERE pid={} '''.format(pid)).fetchone() replyres = conn.execute(''' SELECT r.rid, hostid, posttime, r.content, upCount, shang, totalScore, tag FROM replys r, rawcontents rc WHERE pid={} AND r.rid=rc.rid '''.format(pid)).fetchall() return render_template('detail.html', post=post, replys=replyres, lastpid=pid - 1, nextpid=pid + 1) except AssertionError: return "Pa", 400
def label(self, rid, choice): try: global counter tag = None if choice in self.T: tag = 1 elif choice in self.F: tag = 0 assert (tag in [0, 1]) with DB_ENGINE.connect() as conn: conn.execute( 'UPDATE rawcontents SET tag={}, assure=True WHERE rid={}'. format(tag, rid)) except: logger.info('Failed to label {} to {}'.format(rid, tag)) return i, len(predicted), 'fail' else: logger.info('Success in labeling {} to {}'.format(rid, tag)) return i, len(predicted), 'success'
def dumpinfo(post, replys, rewards, upUsers, shangUsers, GlobalList, name): # dump GlobalList fn = DATA_ROOT / '{}.json'.format(name) with open(str(fn), "w", encoding="utf-8") as f: f.write(json.dumps(GlobalList, ensure_ascii=False)) logger.info('Dump GlobalList {} success'.format(name)) # persistant with DB_ENGINE.connect() as connection: transcation = connection.begin() try: insertPosts(connection, [post]) insertReplys(connection, replys) if len(rewards['tyf']) > 0: updateTyf(connection, rewards['tyf']) if len(rewards['shang']) > 0: updateShang(connection, rewards['shang']) if len(rewards['reward']) > 0: updateReward(connection, rewards['reward']) if len(upUsers) > 0: insertUpusers(connection, upUsers) if len(shangUsers) > 0: insertShangusers(connection, shangUsers) transcation.commit() except: logger.critical('Persistant {} failed'.format(name)) transcation.rollback() raise else: logger.info('Persistant {} success'.format(name))
from gensim.models.doc2vec import Doc2Vec, TaggedDocument from gensim.test.utils import common_texts from sqlalchemy import select from model.db import DB_ENGINE, posts, rawcontents from utils.log import getLogger logger = getLogger('doc2vec') # init with DB_ENGINE.connect() as conn: s = select([posts.c.title]) documents = [ TaggedDocument(doc, [pid]) for pid, doc in enumerate(conn.execute(s)) ] model = Doc2Vec(documents, vector_size=2048, window=5, min_count=1, workers=4) model.save('RuntimeTY/d2v_2048_5_1216') model = Doc2Vec.load('RuntimeTY/d2v_2048_5_1216') logger.critical('Loaded') with DB_ENGINE.connect() as conn: s = select([rawcontents]) buffer = [] for row in conn.execute(s): rid = row[rawcontents.c.rid]