Beispiel #1
0
    cursor = db_conn.cursor()
    cursor.execute("SELECT COUNT(*) FROM {}".format(annotations_table))
    return cursor.fetchone()[0]


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='Tweet annotator')
    parser.add_argument('tweet_file', help='JSON tweets file for annotation')
    parser.add_argument('keyword', help='Keyword we wish to disambiguate (determines table name and used to filter tweets)')
    parser.add_argument('--skipto', default=None, type=int, help="Skip forwards to this tweet id, continue from the next tweet")
    args = parser.parse_args()
    print("These are our args:")
    print(args)
    print(args.tweet_file, args.keyword)

    annotations_table, spotlight_table = sql_convenience.create_all_tables(args.keyword)
    tweets = tweet_generators.get_tweets(open(args.tweet_file))

    # we can skip through Tweets we've already seen in the same file by
    # specifying a tweet id to jump to
    if args.skipto is not None:
        for tweet in tweets:
            if tweet['id'] == args.skipto:
                break  # continue after this tweet

    for tweet in tweets:
        tweet_text = unicode(tweet['text'])
        annotate = True
        # determine if this is an English tweet or not
        tweet_text_bytesutf8 = tweet_text.encode('utf-8')
        language_name, language_code, is_reliable, text_bytes_found, details = cld.detect(tweet_text_bytesutf8)
    parser.add_argument('nerengine',
                        help='NER engine type (only "opencalais" at present)')
    parser.add_argument(
        '--drop',
        default=False,
        action="store_true",
        help=
        'Drops the keyword destination table so we do all annotations again')

    args = parser.parse_args()
    print(args)

    if args.nerengine == "opencalais":
        ner = opencalais_ner.OpenCalaisNER
    else:
        1 / 0

    annotations_table = "annotations_{}".format(args.keyword)
    destination_table = "{}_{}".format(args.nerengine, args.keyword)
    cursor = config.db_conn.cursor()

    if args.drop:
        sql = "DROP TABLE IF EXISTS {}".format(destination_table)
        print("Dropping table: {}".format(sql))
        cursor.execute(sql)
    annotations_table, destination_table = sql_convenience.create_all_tables(
        args.keyword)

    engine = ner(annotations_table, destination_table)
    engine.annotate_all_messages()
        "keyword", help="Keyword we wish to disambiguate (determines table name and used to filter tweets)"
    )
    parser.add_argument("nerengine", help='NER engine type (only "opencalais" at present)')
    parser.add_argument(
        "--drop",
        default=False,
        action="store_true",
        help="Drops the keyword destination table so we do all annotations again",
    )

    args = parser.parse_args()
    print(args)

    if args.nerengine == "opencalais":
        ner = opencalais_ner.OpenCalaisNER
    else:
        1 / 0

    annotations_table = "annotations_{}".format(args.keyword)
    destination_table = "{}_{}".format(args.nerengine, args.keyword)
    cursor = config.db_conn.cursor()

    if args.drop:
        sql = "DROP TABLE IF EXISTS {}".format(destination_table)
        print("Dropping table: {}".format(sql))
        cursor.execute(sql)
    annotations_table, destination_table = sql_convenience.create_all_tables(args.keyword)

    engine = ner(annotations_table, destination_table)
    engine.annotate_all_messages()