Beispiel #1
0
def format_test(src, target):
    tids = []
    tweets = []
    for line in file(src):
        tids.append(line.strip().split('\t')[0])
        tweets.append(line.strip().split('\t')[1])

    tokens, tags = tweet_tagger.runtagger_parse(tweets)

    out = open(target, 'w')
    for tid, token, tag in zip(tids, tokens, tags):
        # assume each test tweet is labeled the non-ADR class (0)
        out.write(tid + '\t0\t' + clean_tweet(token, tag) + '\n')
    out.close()
Beispiel #2
0
def format_test(src, target):
    tids = []
    tweets = []
    for line in file(src):
        tids.append(line.strip().split('\t')[0])
        tweets.append(line.strip().split('\t')[1])
    
    tokens, tags = tweet_tagger.runtagger_parse(tweets)
    
    out = open(target, 'w')
    for tid, token, tag in zip(tids, tokens, tags):
        # assume each test tweet is labeled the non-ADR class (0)
        out.write(tid + '\t0\t' + clean_tweet(token, tag) + '\n')
    out.close()
Beispiel #3
0
def format_train_dev(src, target):
    tids = []
    labels = []
    tweets = []
    for line in file(src):
        tids.append(line.strip().split('\t')[0])
        labels.append(line.strip().split('\t')[2])
        tweets.append(line.strip().split('\t')[3])

    tokens, tags = tweet_tagger.runtagger_parse(tweets)

    out = open(target, 'w')
    for tid, label, token, tag in zip(tids, labels, tokens, tags):
        out.write(tid + '\t' + label + '\t' + clean_tweet(token, tag) + '\n')
    out.close()
Beispiel #4
0
def format_train_dev(src, target):
    tids = []
    labels = []
    tweets = []
    for line in file(src):
        tids.append(line.strip().split('\t')[0])
        labels.append(line.strip().split('\t')[2])
        tweets.append(line.strip().split('\t')[3])
    
    tokens, tags = tweet_tagger.runtagger_parse(tweets)
    
    out = open(target, 'w')
    for tid, label, token, tag in zip(tids, labels, tokens, tags):
        out.write(tid + '\t' + label + '\t' + clean_tweet(token, tag) + '\n')
    out.close()