def main(argv): NUM_PROCESSES = multiprocessing.cpu_count() * 2 cmd_queue = multiprocessing.Queue() job_queue = multiprocessing.Queue() print "Create %d workers..." % NUM_PROCESSES for i in range(NUM_PROCESSES): multiprocessing.Process(target=worker, args=(i, cmd_queue, job_queue)).start() # fetch data from database db, cursor = mysql_util.init_db() try: if not mysql_util.does_table_exist(db, cursor, 'paper_keywords_noun'): cursor.execute('CREATE TABLE paper_keywords_noun (' 'id INT(11) NOT NULL AUTO_INCREMENT PRIMARY KEY, ' 'paper_id varchar(100), ' 'ngram varchar(255), ' 'count int(11), ' 'INDEX (paper_id))') print "Fetching papers from database..." cursor.execute("""SELECT id, title, abstract FROM papers""") papers = cursor.fetchall() dispatch_work(cmd_queue, job_queue, papers) except KeyboardInterrupt as e: pass finally: mysql_util.close_db(db, cursor) print "Main process leaves"
def main(argv): NUM_PROCESSES = multiprocessing.cpu_count() * 2 cmd_queue = multiprocessing.Queue() job_queue = multiprocessing.Queue() print "Create %d workers..." % NUM_PROCESSES for i in range(NUM_PROCESSES): multiprocessing.Process(target=worker, args=(i, cmd_queue, job_queue)).start() # fetch data from database db, cursor = mysql_util.init_db() try: if not mysql_util.does_table_exist(db, cursor, "paper_keywords_noun"): cursor.execute( "CREATE TABLE paper_keywords_noun (" "id INT(11) NOT NULL AUTO_INCREMENT PRIMARY KEY, " "paper_id varchar(100), " "ngram varchar(255), " "count int(11), " "INDEX (paper_id))" ) print "Fetching papers from database..." cursor.execute("""SELECT id, title, abstract FROM papers""") papers = cursor.fetchall() dispatch_work(cmd_queue, job_queue, papers) except KeyboardInterrupt as e: pass finally: mysql_util.close_db(db, cursor) print "Main process leaves"
def test_does_table_exist(self): assert_equal(mysql_util.does_table_exist('foo'), True) assert_equal(mysql_util.does_table_exist('bar'), True) assert_equal(mysql_util.does_table_exist('not_exist_tb'), False)
def test_drop_table(self): mysql_util.drop_table('bar') assert_equal(mysql_util.does_table_exist('bar'), False)
csvreader = csv.reader(csvfile, delimiter='\t') for row in csvreader: id = row[0] keyphrases = row[1:] if id not in hash2doi: continue doi = hash2doi[id] doi2keyphrases[doi] = keyphrases db, cursor = mysql_util.init_db() total = len(doi2keyphrases) try: if not mysql_util.does_table_exist(db, cursor, 'paper_keywords_allenai'): cursor.execute('CREATE TABLE paper_keywords_allenai (' 'id INT(11) NOT NULL AUTO_INCREMENT PRIMARY KEY, ' 'doi varchar(100), ' 'keyphrase varchar(255), ' 'INDEX (doi))') count = 1 for doi, keyphrases in doi2keyphrases.iteritems(): print "\r%d / %d: %s: %s" % (count, total, doi, str(keyphrases)), sys.stdout.flush() try: cursor.execute("""DELETE FROM paper_keywords_allenai WHERE doi=%s""", (doi,)) for keyphrase in keyphrases: