Exemple #1
0
def main():
    max_id = str(args.max_id)
    logger.info("MAX:{}".format(max_id))

    query = "select item_id, text from " + sqlconfig.tweet_table_name + \
        " where id <= " + max_id + " and success = 1"

    logger.info("SQL running...")
    start = time.time()
    rows = read_table(query)
    elapsed_time = time.time() - start
    logger.info("sql_time:{0}[sec]".format(elapsed_time))

    logger.info("Indexing...")
    start = time.time()
    p = ProgressBar(widgets=[Percentage(), Bar()], maxval=len(rows)).start()
    indexer = Indexer()
    for i, row in enumerate(rows):
        indexer.add(row[0], noun_list(row[1]))
        p.update(i + 1)
    p.finish()
    elapsed_time = time.time() - start
    logger.info("indexing_time:{0}[sec]".format(elapsed_time))

    indexer.save("./index.pkl")
Exemple #2
0
    def test_save_and_load(self):
        indexer = Indexer()
        indexer.add("1", ["今日", "天気", "晴れ", "今日"])
        indexer.add("2", ["今日", "天気", "雨"])

        indexer.save("./tests/index.pkl")
        indexer.load("./tests/index.pkl")

        tweet_ids = indexer.search("今日")
        eq_(tweet_ids[0][0], "1")
        eq_(tweet_ids[0][1], 2)
        eq_(tweet_ids[1][0], "2")
        eq_(tweet_ids[1][1], 1)

        tweet_ids = indexer.search("雨")
        eq_(len(tweet_ids), 1)
        eq_(tweet_ids[0][0], "2")
Exemple #3
0
def save_index(file_path):
    query = get_query(args.only_reply)
    logger.info('query: {}'.format(query))

    logger.info("SQL running...")
    start = time.time()
    rows = read_table(query)
    elapsed_time = time.time() - start
    logger.info("sql_time:{0}[sec]".format(elapsed_time))

    logger.info("Indexing...")
    start = time.time()
    p = ProgressBar(widgets=[Percentage(), Bar()], maxval=len(rows)).start()
    indexer = Indexer()
    for i, row in enumerate(rows):
        indexer.add(row[0], noun_list(row[1]))
        p.update(i + 1)
    p.finish()
    elapsed_time = time.time() - start
    logger.info("indexing_time:{0}[sec]".format(elapsed_time))

    logger.info("Saving...")
    indexer.save(file_path)
    logger.info('Done')