Beispiel #1
0
def init():
    with DB_ENGINE.connect() as conn:
        res = conn.execute("SELECT COUNT(*) FROM rawcontents")
        app.add_template_global(res.fetchone()[0], 'progress_total')
        res = conn.execute(
            "SELECT COUNT(*) FROM rawcontents WHERE tag IS NOT NULL")
        app.add_template_global(res.fetchone()[0], 'progress_done')
Beispiel #2
0
def post_list(blockid):
    try:
        assert (blockid in ['1179', 'develop', 'free', 'funinfo', 'worldlook'])
        with DB_ENGINE.connect() as conn:
            res = conn.execute('''
            SELECT p.pid, p.title, COUNT(rc.content) replycount, p.posttime, COUNT(rc.tag) tagcount
            FROM posts p, replys r, rawcontents rc
            WHERE p.blockid="{}" AND p.pid=r.pid AND rc.rid=r.rid
            GROUP BY r.pid
            '''.format(blockid))
            return render_template('list.html', post_list=res)
    except AssertionError:
        return "Boom", 400
Beispiel #3
0
def reply_tag(pid):
    with DB_ENGINE.connect() as conn:
        transcation = conn.begin()
        try:
            for rid, tag in request.form.items():
                conn.execute(
                    update(rawcontents).where(rawcontents.c.rid == rid).values(
                        tag=float(tag), assure=True))

            transcation.commit()
        except:
            transcation.rollback()
            return 'failed'
        else:
            return 'success'
Beispiel #4
0
def completeTrainData(current):
    stmt = 'SELECT rid, content, vector FROM rawcontents WHERE rid IN {}'

    with DB_ENGINE.connect() as conn:
        traindata = current.merge(pd.DataFrame(
            conn.execute(stmt.format(tuple(current['rid'].values))).fetchall(),
            columns=['rid', 'content', 'vector']),
                                  on='rid')

    traindata['ss'] = list(
        map(lambda x: -1 if x[0] < 0.5 else x[1],
            zip(traindata['assure'], traindata['tag'])))
    traindata['vector'] = traindata['vector'].apply(pickle.loads)

    return traindata.set_index(np.arange(len(current)))
Beispiel #5
0
def post_detail(pid):
    try:
        pid = int(pid)
        with DB_ENGINE.connect() as conn:
            post = conn.execute('''
            SELECT pid, blockid, title, pageurl, clickcount, replycount, activityuserid
            FROM posts
            WHERE pid={}
            '''.format(pid)).fetchone()

            replyres = conn.execute('''
            SELECT r.rid, hostid, posttime, r.content, upCount, shang, totalScore, tag
            FROM replys r, rawcontents rc WHERE pid={} AND r.rid=rc.rid
            '''.format(pid)).fetchall()
            return render_template('detail.html',
                                   post=post,
                                   replys=replyres,
                                   lastpid=pid - 1,
                                   nextpid=pid + 1)
    except AssertionError:
        return "Pa", 400
Beispiel #6
0
    def label(self, rid, choice):
        try:
            global counter

            tag = None
            if choice in self.T:
                tag = 1
            elif choice in self.F:
                tag = 0

            assert (tag in [0, 1])

            with DB_ENGINE.connect() as conn:
                conn.execute(
                    'UPDATE rawcontents SET tag={}, assure=True WHERE rid={}'.
                    format(tag, rid))

        except:
            logger.info('Failed to label {} to {}'.format(rid, tag))
            return i, len(predicted), 'fail'
        else:
            logger.info('Success in labeling {} to {}'.format(rid, tag))
            return i, len(predicted), 'success'
Beispiel #7
0
def dumpinfo(post, replys, rewards, upUsers, shangUsers, GlobalList, name):
    # dump GlobalList
    fn = DATA_ROOT / '{}.json'.format(name)

    with open(str(fn), "w", encoding="utf-8") as f:
        f.write(json.dumps(GlobalList, ensure_ascii=False))

    logger.info('Dump GlobalList {} success'.format(name))

    # persistant
    with DB_ENGINE.connect() as connection:
        transcation = connection.begin()
        try:
            insertPosts(connection, [post])
            insertReplys(connection, replys)
            if len(rewards['tyf']) > 0:
                updateTyf(connection, rewards['tyf'])

            if len(rewards['shang']) > 0:
                updateShang(connection, rewards['shang'])

            if len(rewards['reward']) > 0:
                updateReward(connection, rewards['reward'])

            if len(upUsers) > 0:
                insertUpusers(connection, upUsers)

            if len(shangUsers) > 0:
                insertShangusers(connection, shangUsers)

            transcation.commit()
        except:
            logger.critical('Persistant {} failed'.format(name))
            transcation.rollback()
            raise
        else:
            logger.info('Persistant {} success'.format(name))
Beispiel #8
0
from gensim.models.doc2vec import Doc2Vec, TaggedDocument
from gensim.test.utils import common_texts
from sqlalchemy import select

from model.db import DB_ENGINE, posts, rawcontents
from utils.log import getLogger

logger = getLogger('doc2vec')

# init

with DB_ENGINE.connect() as conn:
    s = select([posts.c.title])
    documents = [
        TaggedDocument(doc, [pid]) for pid, doc in enumerate(conn.execute(s))
    ]

model = Doc2Vec(documents, vector_size=2048, window=5, min_count=1, workers=4)

model.save('RuntimeTY/d2v_2048_5_1216')

model = Doc2Vec.load('RuntimeTY/d2v_2048_5_1216')

logger.critical('Loaded')

with DB_ENGINE.connect() as conn:
    s = select([rawcontents])
    buffer = []

    for row in conn.execute(s):
        rid = row[rawcontents.c.rid]