import Parser, scikit_classifier if __name__ == '__main__': # takes forever to run with open('data/output/searchresult.txt', 'w') as f: for ngram_size in [0, 1, 2]: for use_syntactic_features in [0]: for pos_window_size in [0, 1, 5, 10]: for window_size in [0, 5, 10, 50, 100, 500]: for use_lesk in [False]: for use_lesk_words in [False]: classifier = scikit_classifier.scikit_classifier( pos_window_size=pos_window_size, ngram_size=ngram_size, window_size=window_size, use_syntactic_features= use_syntactic_features, use_lesk=use_lesk, use_lesk_words=use_lesk_words) egs = Parser.load_examples( 'data/wsd-data/train_split.data') test_egs = Parser.load_examples( 'data/wsd-data/valiation_split.data') classifier.train(egs) tp = 0.0 fp = 0.0 tn = 0.0 fn = 0.0 for eg in test_egs:
import Parser, scikit_classifier if __name__ == '__main__': # takes forever to run with open('data/output/searchresult.txt', 'w') as f: for ngram_size in [0,1,2]: for use_syntactic_features in [0]: for pos_window_size in [0,1,5,10]: for window_size in [0, 5, 10, 50, 100, 500]: for use_lesk in [False]: for use_lesk_words in [False]: classifier = scikit_classifier.scikit_classifier( pos_window_size = pos_window_size, ngram_size = ngram_size, window_size = window_size, use_syntactic_features = use_syntactic_features, use_lesk = use_lesk, use_lesk_words = use_lesk_words) egs = Parser.load_examples('data/wsd-data/train_split.data') test_egs = Parser.load_examples('data/wsd-data/valiation_split.data') classifier.train(egs) tp = 0.0 fp = 0.0 tn = 0.0 fn = 0.0 for eg in test_egs: pred = classifier.predict([eg]) for (k,(s,p)) in enumerate(zip(eg.senses,pred)): if s == 1 and p == 1:
#testdata = Parser.load_data('data/wsd-data/test.data') testdata = Parser.load_test_data('data/wsd-data/test.data') with open('data/output/%s.txt'%name, 'w') as f: for testexample in testdata: prediction = classifier.predict([testexample]) for element in prediction: f.write('%d\n' % element) f.close() print("done") elif task == 5: name = "scikit_test_ws500_pos1_ngram0_synfeat0_uselesk_useleskwords" classifier = scikit_classifier.scikit_classifier( window_size=500, use_syntactic_features=0, pos_window_size=1, ngram_size=0, use_lesk=True, use_lesk_words=True, training_file='data/wsd-data/train.data', test_file='data/wsd-data/test.data') egs = Parser.load_examples('data/wsd-data/train.data') test_egs = Parser.load_examples('data/wsd-data/test.data') # Train the classifier(s) classifier.train(egs) with open('data/output/%s.txt'%name, 'w') as f: for eg in test_egs: pred = classifier.predict([eg]) for p in pred: f.write("%d\n"%p)
#testdata = Parser.load_data('data/wsd-data/test.data') testdata = Parser.load_test_data('data/wsd-data/test.data') with open('data/output/%s.txt' % name, 'w') as f: for testexample in testdata: prediction = classifier.predict([testexample]) for element in prediction: f.write('%d\n' % element) f.close() print("done") elif task == 5: name = "scikit_test_ws500_pos1_ngram0_synfeat0_uselesk_useleskwords" classifier = scikit_classifier.scikit_classifier( window_size=500, use_syntactic_features=0, pos_window_size=1, ngram_size=0, use_lesk=True, use_lesk_words=True, training_file='data/wsd-data/train.data', test_file='data/wsd-data/test.data') egs = Parser.load_examples('data/wsd-data/train.data') test_egs = Parser.load_examples('data/wsd-data/test.data') # Train the classifier(s) classifier.train(egs) with open('data/output/%s.txt' % name, 'w') as f: for eg in test_egs: pred = classifier.predict([eg]) for p in pred: f.write("%d\n" % p)