def main(): max_id = str(args.max_id) logger.info("MAX:{}".format(max_id)) query = "select item_id, text from " + sqlconfig.tweet_table_name + \ " where id <= " + max_id + " and success = 1" logger.info("SQL running...") start = time.time() rows = read_table(query) elapsed_time = time.time() - start logger.info("sql_time:{0}[sec]".format(elapsed_time)) logger.info("Indexing...") start = time.time() p = ProgressBar(widgets=[Percentage(), Bar()], maxval=len(rows)).start() indexer = Indexer() for i, row in enumerate(rows): indexer.add(row[0], noun_list(row[1])) p.update(i + 1) p.finish() elapsed_time = time.time() - start logger.info("indexing_time:{0}[sec]".format(elapsed_time)) indexer.save("./index.pkl")
def test_save_and_load(self): indexer = Indexer() indexer.add("1", ["今日", "天気", "晴れ", "今日"]) indexer.add("2", ["今日", "天気", "雨"]) indexer.save("./tests/index.pkl") indexer.load("./tests/index.pkl") tweet_ids = indexer.search("今日") eq_(tweet_ids[0][0], "1") eq_(tweet_ids[0][1], 2) eq_(tweet_ids[1][0], "2") eq_(tweet_ids[1][1], 1) tweet_ids = indexer.search("雨") eq_(len(tweet_ids), 1) eq_(tweet_ids[0][0], "2")
def save_index(file_path): query = get_query(args.only_reply) logger.info('query: {}'.format(query)) logger.info("SQL running...") start = time.time() rows = read_table(query) elapsed_time = time.time() - start logger.info("sql_time:{0}[sec]".format(elapsed_time)) logger.info("Indexing...") start = time.time() p = ProgressBar(widgets=[Percentage(), Bar()], maxval=len(rows)).start() indexer = Indexer() for i, row in enumerate(rows): indexer.add(row[0], noun_list(row[1])) p.update(i + 1) p.finish() elapsed_time = time.time() - start logger.info("indexing_time:{0}[sec]".format(elapsed_time)) logger.info("Saving...") indexer.save(file_path) logger.info('Done')