Exemple #1
0
def main(argv):
    NUM_PROCESSES = multiprocessing.cpu_count() * 2

    cmd_queue = multiprocessing.Queue()
    job_queue = multiprocessing.Queue()

    print "Create %d workers..." % NUM_PROCESSES
    for i in range(NUM_PROCESSES):
        multiprocessing.Process(target=worker,
                                args=(i, cmd_queue, job_queue)).start()

    # fetch data from database
    db, cursor = mysql_util.init_db()
    try:
        if not mysql_util.does_table_exist(db, cursor, 'paper_keywords_noun'):
            cursor.execute('CREATE TABLE paper_keywords_noun ('
                           'id INT(11) NOT NULL AUTO_INCREMENT PRIMARY KEY, '
                           'paper_id varchar(100), '
                           'ngram varchar(255), '
                           'count int(11), '
                           'INDEX (paper_id))')

        print "Fetching papers from database..."
        cursor.execute("""SELECT id, title, abstract FROM papers""")
        papers = cursor.fetchall()

        dispatch_work(cmd_queue, job_queue, papers)
    except KeyboardInterrupt as e:
        pass
    finally:
        mysql_util.close_db(db, cursor)
        print "Main process leaves"
def main(argv):
    NUM_PROCESSES = multiprocessing.cpu_count() * 2

    cmd_queue = multiprocessing.Queue()
    job_queue = multiprocessing.Queue()

    print "Create %d workers..." % NUM_PROCESSES
    for i in range(NUM_PROCESSES):
        multiprocessing.Process(target=worker, args=(i, cmd_queue, job_queue)).start()

    # fetch data from database
    db, cursor = mysql_util.init_db()
    try:
        if not mysql_util.does_table_exist(db, cursor, "paper_keywords_noun"):
            cursor.execute(
                "CREATE TABLE paper_keywords_noun ("
                "id INT(11) NOT NULL AUTO_INCREMENT PRIMARY KEY, "
                "paper_id varchar(100), "
                "ngram varchar(255), "
                "count int(11), "
                "INDEX (paper_id))"
            )

        print "Fetching papers from database..."
        cursor.execute("""SELECT id, title, abstract FROM papers""")
        papers = cursor.fetchall()

        dispatch_work(cmd_queue, job_queue, papers)
    except KeyboardInterrupt as e:
        pass
    finally:
        mysql_util.close_db(db, cursor)
        print "Main process leaves"
 def test_does_table_exist(self):
   assert_equal(mysql_util.does_table_exist('foo'), True)
   assert_equal(mysql_util.does_table_exist('bar'), True)
   assert_equal(mysql_util.does_table_exist('not_exist_tb'), False)
 def test_drop_table(self):
   mysql_util.drop_table('bar')
   assert_equal(mysql_util.does_table_exist('bar'), False)
    csvreader = csv.reader(csvfile, delimiter='\t')

    for row in csvreader:
        id = row[0]
        keyphrases = row[1:]

        if id not in hash2doi:
            continue
        doi = hash2doi[id]
        doi2keyphrases[doi] = keyphrases

db, cursor = mysql_util.init_db()

total = len(doi2keyphrases)
try:
    if not mysql_util.does_table_exist(db, cursor, 'paper_keywords_allenai'):
        cursor.execute('CREATE TABLE paper_keywords_allenai ('
            'id INT(11) NOT NULL AUTO_INCREMENT PRIMARY KEY, '
            'doi varchar(100), '
            'keyphrase varchar(255), '
            'INDEX (doi))')

    count = 1
    for doi, keyphrases in doi2keyphrases.iteritems():
        print "\r%d / %d: %s: %s" % (count, total, doi, str(keyphrases)),
        sys.stdout.flush()

        try:
            cursor.execute("""DELETE FROM paper_keywords_allenai WHERE doi=%s""", (doi,))

            for keyphrase in keyphrases: