def train_and_trial(trn_file, test_file, clf, posit_lex_file='positive-words.txt', nega_lex_file='negative-words.txt', pickled=False): """ Train on the training file and test on the testing file, given a classifier, for the aspect extraction task. """ if pickled: f = open(trn_file, 'rb') traind = cPickle.load(f) f.close() f = open(test_file, 'rb') testd = cPickle.load(f) f.close() else: traind = XMLParser.create_exs(trn_file) testd = XMLParser.create_exs(test_file) posi_words = semeval_util.get_liu_lexicon(posit_lex_file) negi_words = semeval_util.get_liu_lexicon(nega_lex_file) senti_dictionary = semeval_util.get_mpqa_lexicon() #chunker = ConsecutiveChunker(traind['iob'], senti_dictionary) chunker = clf.train(traind['iob'], senti_dictionary) print "done training" guessed_iobs = chunker.evaluate(testd['iob']) XMLParser.create_xml(testd['orig'], guessed_iobs, testd['id'], testd['idx'], 'trial_answers.xml') compute_pr(testd['iob'], guessed_iobs)
parses_tests = ['laptops_test_phaseA-parse.txt','rest_test_phaseA-parse.txt','lap-trial-parse.txt'] results_files = ['lap_phaseA.xml','rest_phaseA.xml','lap-trial_phaseA.xml'] def get_data(dataset_name): idx = names.index(dataset_name) return pickle_trains[idx], pickle_tests[idx], parses_trains[idx], parses_tests[idx], results_files[idx] if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument("task_name", help="must be either lap or rest or dummy", type=str) #later if time parser.add_argument("-p", help="Specify that train_file is an already learned clf",type=bool, default=False) parser.add_argument("-dep", help="If true, use dependency parse features", type=bool, default=False) args = parser.parse_args() train_file, test_file, parse_train_file, parse_test_file, out_xml_file = get_data(args.task_name) results = semevalTask4.train_and_trial(train_file, test_file, parse_train_file, parse_test_file, use_dep=args.dep, pickled=True) #create results file f = open(test_file, 'rb') testd = cPickle.load(f) f.close() XMLParser.create_xml(testd['orig'], results, testd['id'], testd['idx'], out_xml_file)
for iob in traind['iob']: polarities = semeval_util.create_sentiment_sequence( iob, senti_dictionary, negate_wds) translated = [] for p, n in polarities: if p > n: translated.append('positive') elif n > p: translated.append('negative') else: translated.append('neutral') results.append(translated) semeval_util.compute_sent_acc(traind['polarity'], results) XMLParser.create_xml(traind['orig'], traind['iob'], traind['id'], traind['idx'], sentiments=results, outfile='baseline.xml') sys.exit() else: results = task4_stask2.train_and_trial(train_file, test_file) #create results file f = open(test_file, 'rb') testd = cPickle.load(f) f.close() XMLParser.create_xml(testd['orig'], testd['iob'], testd['id'], testd['idx'], sentiments=results,
senti_dictionary = semeval_util.get_mpqa_lexicon() negate_wds = semeval_util.negateWords results = [] for iob in traind['iob']: polarities = semeval_util.create_sentiment_sequence(iob, senti_dictionary, negate_wds) translated = [] for p, n in polarities: if p > n: translated.append('positive') elif n > p: translated.append('negative') else: translated.append('neutral') results.append(translated) semeval_util.compute_sent_acc(traind['polarity'], results) XMLParser.create_xml(traind['orig'], traind['iob'], traind['id'], traind['idx'], sentiments=results, outfile='baseline.xml') sys.exit() else: results = task4_stask2.train_and_trial(train_file, test_file) #create results file f = open(test_file, 'rb') testd = cPickle.load(f) f.close() XMLParser.create_xml(testd['orig'], testd['iob'], testd['id'], testd['idx'], sentiments=results, outfile=out_xml_file)