ret.append(target_in_tweet)
    return ret





if __name__ == '__main__':
    useDev = True
    if useDev == False:
        tweets_train, targets_train, labels_train = readTweetsOfficial(tokenize_tweets.FILETRAIN, 'windows-1252', 2)
        tweets_dev, targets_dev, labels_dev = readTweetsOfficial(tokenize_tweets.FILEDEV, 'windows-1252', 2)
    else:
        tweets_train, targets_train, labels_train = readTweetsOfficial(tokenize_tweets.FILETRAIN, 'windows-1252', 2)
        tweets_origdev, targets_origdev, labels_origdev = readTweetsOfficial(tokenize_tweets.FILEDEV, 'windows-1252', 2)
        tweets_train.extend(tweets_origdev)
        targets_train.extend(targets_origdev)
        labels_train.extend(labels_origdev)
        tweets_dev, targets_dev, labels_dev = readTweetsOfficial(tokenize_tweets.FILETEST, 'windows-1252', 2)

     # "model_phrase_100_samp500_it2000.ckpt"
    features_train, labels_train, features_dev, labels_dev = extractFeaturesAutoencoder("model_trump_phrase_100_samp500_it2600.ckpt",
            tweets_train, targets_train, labels_train, tweets_dev, targets_dev, labels_dev, "false", True)

    #train_classifiers(features_train, labels_train, features_dev, labels_dev, "out_auto_added.txt") # train and predict two 2-way models
    train_classifier_3way(features_train, labels_train, features_dev, labels_dev, "out_trump_postprocess.txt", [], "false", "false", useDev=useDev)
    #train_classifiers_PosVNeg(features_train, labels_train, features_dev, labels_dev, "out_auto.txt")


    eval(tokenize_tweets.FILETEST, "out_trump_postprocess.txt")
Beispiel #2
0
        if line.startswith('ID\t'):
            outfIn.write(line)
            outfOut.write(line)
        else:
            if cntr in inlist:
                outfIn.write(line)
            else:
                outfOut.write(line)
            cntr += 1

    outfIn.close()
    outfOut.close()


if __name__ == '__main__':
    tweets_gold, targets_gold, labels_gold = readTweetsOfficial(
        tokenize_tweets.FILEDEV, 'windows-1252', 2)
    tweets_res, targets_res, labels_res = readTweetsOfficial(
        "out_hillary_auto_false_targetInTweet.txt", 'windows-1252', 2)

    inlist = selectTrainData(tweets_gold, targets_gold)
    printInOutFiles(inlist, "out_hillary_auto_false_targetInTweet.txt",
                    "out_hillary_inTwe.txt", "out_hillary_outTwe.txt")
    printInOutFiles(inlist, tokenize_tweets.FILEDEV, "_gold_hillary_inTwe.txt",
                    "_gold_hillary_outTwe.txt")

    print("Inlist")
    eval("_gold_hillary_inTwe.txt", "out_hillary_inTwe.txt")

    print("Outlist")
    eval("_gold_hillary_outTwe.txt", "out_hillary_outTwe.txt")
Beispiel #3
0
            tokenize_tweets.FILEDEV, 'windows-1252', 2)
    else:
        tweets_train, targets_train, labels_train = readTweetsOfficial(
            tokenize_tweets.FILETRAIN, 'windows-1252', 2)
        tweets_origdev, targets_origdev, labels_origdev = readTweetsOfficial(
            tokenize_tweets.FILEDEV, 'windows-1252', 2)
        tweets_train.extend(tweets_origdev)
        targets_train.extend(targets_origdev)
        labels_train.extend(labels_origdev)
        tweets_dev, targets_dev, labels_dev = readTweetsOfficial(
            tokenize_tweets.FILETEST, 'windows-1252', 2)

    # "model_phrase_100_samp500_it2000.ckpt"
    features_train, labels_train, features_dev, labels_dev = extractFeaturesAutoencoder(
        "model_trump_phrase_100_samp500_it2600.ckpt", tweets_train,
        targets_train, labels_train, tweets_dev, targets_dev, labels_dev,
        "false", True)

    #train_classifiers(features_train, labels_train, features_dev, labels_dev, "out_auto_added.txt") # train and predict two 2-way models
    train_classifier_3way(features_train,
                          labels_train,
                          features_dev,
                          labels_dev,
                          "out_trump_postprocess.txt", [],
                          "false",
                          "false",
                          useDev=useDev)
    #train_classifiers_PosVNeg(features_train, labels_train, features_dev, labels_dev, "out_auto.txt")

    eval(tokenize_tweets.FILETEST, "out_trump_postprocess.txt")
            outfIn.write(line)
            outfOut.write(line)
        else:
            if cntr in inlist:
                outfIn.write(line)
            else:
                outfOut.write(line)
            cntr += 1

    outfIn.close()
    outfOut.close()


if __name__ == "__main__":
    tweets_gold, targets_gold, labels_gold = readTweetsOfficial(tokenize_tweets.FILEDEV, "windows-1252", 2)
    tweets_res, targets_res, labels_res = readTweetsOfficial(
        "out_hillary_auto_false_targetInTweet.txt", "windows-1252", 2
    )

    inlist = selectTrainData(tweets_gold, targets_gold)
    printInOutFiles(
        inlist, "out_hillary_auto_false_targetInTweet.txt", "out_hillary_inTwe.txt", "out_hillary_outTwe.txt"
    )
    printInOutFiles(inlist, tokenize_tweets.FILEDEV, "_gold_hillary_inTwe.txt", "_gold_hillary_outTwe.txt")

    print("Inlist")
    eval("_gold_hillary_inTwe.txt", "out_hillary_inTwe.txt")

    print("Outlist")
    eval("_gold_hillary_outTwe.txt", "out_hillary_outTwe.txt")