ret.append(target_in_tweet) return ret if __name__ == '__main__': useDev = True if useDev == False: tweets_train, targets_train, labels_train = readTweetsOfficial(tokenize_tweets.FILETRAIN, 'windows-1252', 2) tweets_dev, targets_dev, labels_dev = readTweetsOfficial(tokenize_tweets.FILEDEV, 'windows-1252', 2) else: tweets_train, targets_train, labels_train = readTweetsOfficial(tokenize_tweets.FILETRAIN, 'windows-1252', 2) tweets_origdev, targets_origdev, labels_origdev = readTweetsOfficial(tokenize_tweets.FILEDEV, 'windows-1252', 2) tweets_train.extend(tweets_origdev) targets_train.extend(targets_origdev) labels_train.extend(labels_origdev) tweets_dev, targets_dev, labels_dev = readTweetsOfficial(tokenize_tweets.FILETEST, 'windows-1252', 2) # "model_phrase_100_samp500_it2000.ckpt" features_train, labels_train, features_dev, labels_dev = extractFeaturesAutoencoder("model_trump_phrase_100_samp500_it2600.ckpt", tweets_train, targets_train, labels_train, tweets_dev, targets_dev, labels_dev, "false", True) #train_classifiers(features_train, labels_train, features_dev, labels_dev, "out_auto_added.txt") # train and predict two 2-way models train_classifier_3way(features_train, labels_train, features_dev, labels_dev, "out_trump_postprocess.txt", [], "false", "false", useDev=useDev) #train_classifiers_PosVNeg(features_train, labels_train, features_dev, labels_dev, "out_auto.txt") eval(tokenize_tweets.FILETEST, "out_trump_postprocess.txt")
if line.startswith('ID\t'): outfIn.write(line) outfOut.write(line) else: if cntr in inlist: outfIn.write(line) else: outfOut.write(line) cntr += 1 outfIn.close() outfOut.close() if __name__ == '__main__': tweets_gold, targets_gold, labels_gold = readTweetsOfficial( tokenize_tweets.FILEDEV, 'windows-1252', 2) tweets_res, targets_res, labels_res = readTweetsOfficial( "out_hillary_auto_false_targetInTweet.txt", 'windows-1252', 2) inlist = selectTrainData(tweets_gold, targets_gold) printInOutFiles(inlist, "out_hillary_auto_false_targetInTweet.txt", "out_hillary_inTwe.txt", "out_hillary_outTwe.txt") printInOutFiles(inlist, tokenize_tweets.FILEDEV, "_gold_hillary_inTwe.txt", "_gold_hillary_outTwe.txt") print("Inlist") eval("_gold_hillary_inTwe.txt", "out_hillary_inTwe.txt") print("Outlist") eval("_gold_hillary_outTwe.txt", "out_hillary_outTwe.txt")
tokenize_tweets.FILEDEV, 'windows-1252', 2) else: tweets_train, targets_train, labels_train = readTweetsOfficial( tokenize_tweets.FILETRAIN, 'windows-1252', 2) tweets_origdev, targets_origdev, labels_origdev = readTweetsOfficial( tokenize_tweets.FILEDEV, 'windows-1252', 2) tweets_train.extend(tweets_origdev) targets_train.extend(targets_origdev) labels_train.extend(labels_origdev) tweets_dev, targets_dev, labels_dev = readTweetsOfficial( tokenize_tweets.FILETEST, 'windows-1252', 2) # "model_phrase_100_samp500_it2000.ckpt" features_train, labels_train, features_dev, labels_dev = extractFeaturesAutoencoder( "model_trump_phrase_100_samp500_it2600.ckpt", tweets_train, targets_train, labels_train, tweets_dev, targets_dev, labels_dev, "false", True) #train_classifiers(features_train, labels_train, features_dev, labels_dev, "out_auto_added.txt") # train and predict two 2-way models train_classifier_3way(features_train, labels_train, features_dev, labels_dev, "out_trump_postprocess.txt", [], "false", "false", useDev=useDev) #train_classifiers_PosVNeg(features_train, labels_train, features_dev, labels_dev, "out_auto.txt") eval(tokenize_tweets.FILETEST, "out_trump_postprocess.txt")
outfIn.write(line) outfOut.write(line) else: if cntr in inlist: outfIn.write(line) else: outfOut.write(line) cntr += 1 outfIn.close() outfOut.close() if __name__ == "__main__": tweets_gold, targets_gold, labels_gold = readTweetsOfficial(tokenize_tweets.FILEDEV, "windows-1252", 2) tweets_res, targets_res, labels_res = readTweetsOfficial( "out_hillary_auto_false_targetInTweet.txt", "windows-1252", 2 ) inlist = selectTrainData(tweets_gold, targets_gold) printInOutFiles( inlist, "out_hillary_auto_false_targetInTweet.txt", "out_hillary_inTwe.txt", "out_hillary_outTwe.txt" ) printInOutFiles(inlist, tokenize_tweets.FILEDEV, "_gold_hillary_inTwe.txt", "_gold_hillary_outTwe.txt") print("Inlist") eval("_gold_hillary_inTwe.txt", "out_hillary_inTwe.txt") print("Outlist") eval("_gold_hillary_outTwe.txt", "out_hillary_outTwe.txt")