예제 #1
0
def train_and_trial(trn_file,
                    test_file,
                    clf,
                    posit_lex_file='positive-words.txt',
                    nega_lex_file='negative-words.txt',
                    pickled=False):
    """ Train on the training file and test on the testing file,
    given a classifier, for the aspect extraction task.
    """
    if pickled:
        f = open(trn_file, 'rb')
        traind = cPickle.load(f)
        f.close()
        f = open(test_file, 'rb')
        testd = cPickle.load(f)
        f.close()
    else:
        traind = XMLParser.create_exs(trn_file)
        testd = XMLParser.create_exs(test_file)
    posi_words = semeval_util.get_liu_lexicon(posit_lex_file)
    negi_words = semeval_util.get_liu_lexicon(nega_lex_file)
    senti_dictionary = semeval_util.get_mpqa_lexicon()
    #chunker = ConsecutiveChunker(traind['iob'], senti_dictionary)
    chunker = clf.train(traind['iob'], senti_dictionary)
    print "done training"

    guessed_iobs = chunker.evaluate(testd['iob'])
    XMLParser.create_xml(testd['orig'], guessed_iobs, testd['id'],
                         testd['idx'], 'trial_answers.xml')
    compute_pr(testd['iob'], guessed_iobs)
예제 #2
0
parses_tests = ['laptops_test_phaseA-parse.txt','rest_test_phaseA-parse.txt','lap-trial-parse.txt']
results_files = ['lap_phaseA.xml','rest_phaseA.xml','lap-trial_phaseA.xml']

def get_data(dataset_name):
    idx = names.index(dataset_name)
    return pickle_trains[idx], pickle_tests[idx], parses_trains[idx], parses_tests[idx], results_files[idx]


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument("task_name", help="must be either lap or rest or dummy", type=str)
    #later if time
    parser.add_argument("-p", help="Specify that train_file is an already learned clf",type=bool, default=False)
    parser.add_argument("-dep", help="If true, use dependency parse features", type=bool, default=False)
    args = parser.parse_args()

    train_file, test_file, parse_train_file, parse_test_file, out_xml_file = get_data(args.task_name)

    results = semevalTask4.train_and_trial(train_file, test_file, parse_train_file, parse_test_file,
                                           use_dep=args.dep, pickled=True)
    #create results file
    f = open(test_file, 'rb')
    testd = cPickle.load(f)
    f.close()
    XMLParser.create_xml(testd['orig'], results, testd['id'], testd['idx'], out_xml_file)





예제 #3
0
        for iob in traind['iob']:
            polarities = semeval_util.create_sentiment_sequence(
                iob, senti_dictionary, negate_wds)
            translated = []
            for p, n in polarities:
                if p > n:
                    translated.append('positive')
                elif n > p:
                    translated.append('negative')
                else:
                    translated.append('neutral')
            results.append(translated)
        semeval_util.compute_sent_acc(traind['polarity'], results)
        XMLParser.create_xml(traind['orig'],
                             traind['iob'],
                             traind['id'],
                             traind['idx'],
                             sentiments=results,
                             outfile='baseline.xml')
        sys.exit()
    else:
        results = task4_stask2.train_and_trial(train_file, test_file)

    #create results file
    f = open(test_file, 'rb')
    testd = cPickle.load(f)
    f.close()
    XMLParser.create_xml(testd['orig'],
                         testd['iob'],
                         testd['id'],
                         testd['idx'],
                         sentiments=results,
예제 #4
0
        senti_dictionary = semeval_util.get_mpqa_lexicon()
        negate_wds = semeval_util.negateWords
        results = []
        for iob in traind['iob']:
            polarities = semeval_util.create_sentiment_sequence(iob, senti_dictionary, negate_wds)
            translated = []
            for p, n in polarities:
                if p > n:
                    translated.append('positive')
                elif n > p:
                    translated.append('negative')
                else:
                    translated.append('neutral')
            results.append(translated)
        semeval_util.compute_sent_acc(traind['polarity'], results)
        XMLParser.create_xml(traind['orig'], traind['iob'], traind['id'], traind['idx'], sentiments=results,
                             outfile='baseline.xml')
        sys.exit()
    else:
        results = task4_stask2.train_and_trial(train_file, test_file)

    #create results file
    f = open(test_file, 'rb')
    testd = cPickle.load(f)
    f.close()
    XMLParser.create_xml(testd['orig'], testd['iob'], testd['id'], testd['idx'], sentiments=results, outfile=out_xml_file)