예제 #1
0
print " heldout results"
train_set = feature_sets
test_set = [(get_features(n), v) for (n, v) in held_data.items()]
print nltk.classify.accuracy(classifier, test_set)
#classifier.show_most_informative_features()

final_test_files = ['product1.txt', 'product2.txt', 'product3.txt']
final_test_path = "data/testdata/"
output_file_path = "data/testdata/classified_output.txt"
output = f = open(output_file_path, 'w+')

test_file_dict = {}  #dict of dicts
for final_test_file in final_test_files:
    #file_dict = parse.read_test_data(os.path.join(final_test_path, final_test_file))
    key_list, val_list = parse.read_test_data(
        os.path.join(final_test_path, final_test_file))
    test_file_dict[final_test_file] = (key_list, val_list)

print len(
    test_file_dict)  #dict of dict. {"product1.txt":{"sent":1...}, "product2":}

for file_name in final_test_files:
    key_list = test_file_dict[file_name][0]
    val_list = test_file_dict[file_name][1]
    keys = [int(k) for k in key_list]
    for i in xrange(0, len(key_list)):
        line_num = key_list[i]
        sentence = val_list[i]
        if "[t]" not in sentence:
            output.write("%s\t%s\t%s\n" %
                         (file_name, line_num,
예제 #2
0
#classifier.show_most_informative_features()

print " heldout results"
train_set = feature_sets
test_set = [(get_features(n), v) for (n,v) in held_data.items()]
print nltk.classify.accuracy(classifier, test_set)
#classifier.show_most_informative_features()

final_test_files = ['product1.txt', 'product2.txt', 'product3.txt',
                     'product4.txt']
final_test_path = "sampleOutput/"
output_file_path = "sampleOutput/classified_output.txt"
output = f = open(output_file_path, 'w+')

test_file_dict = {} #dict of dicts
for final_test_file in final_test_files:
    file_dict = parse.read_test_data(os.path.join(final_test_path, final_test_file))
    test_file_dict[final_test_file] = file_dict


for file_name, text_dict in test_file_dict.items():
    keys = [int(k) for k in text_dict.keys()]
    keys.sort()
    for key in keys:
        line_num = key
        sentence = text_dict[str(key)]
        #TODO CHECK FOR TITLE
        output.write("%s\t%s\t%s\n" % (file_name, line_num, classifier.classify(get_features(sentence))))
output.close()

예제 #3
0
print " heldout results"
train_set = feature_sets
test_set = [(get_features(n), v) for (n, v) in held_data.items()]
print nltk.classify.accuracy(classifier, test_set)
#classifier.show_most_informative_features()

final_test_files = [
    'product1.txt', 'product2.txt', 'product3.txt', 'product4.txt'
]
final_test_path = "sampleOutput/"
output_file_path = "sampleOutput/classified_output.txt"
output = f = open(output_file_path, 'w+')

test_file_dict = {}  #dict of dicts
for final_test_file in final_test_files:
    file_dict = parse.read_test_data(
        os.path.join(final_test_path, final_test_file))
    test_file_dict[final_test_file] = file_dict

for file_name, text_dict in test_file_dict.items():
    keys = [int(k) for k in text_dict.keys()]
    keys.sort()
    for key in keys:
        line_num = key
        sentence = text_dict[str(key)]
        #TODO CHECK FOR TITLE
        output.write(
            "%s\t%s\t%s\n" %
            (file_name, line_num, classifier.classify(get_features(sentence))))
output.close()
예제 #4
0
print " heldout results"
train_set = feature_sets
test_set = [(get_features(n), v) for (n, v) in held_data.items()]
print nltk.classify.accuracy(classifier, test_set)
# classifier.show_most_informative_features()

final_test_files = ["product1.txt", "product2.txt", "product3.txt"]
final_test_path = "data/testdata/"
output_file_path = "data/testdata/classified_output.txt"
output = f = open(output_file_path, "w+")

test_file_dict = {}  # dict of dicts
for final_test_file in final_test_files:
    # file_dict = parse.read_test_data(os.path.join(final_test_path, final_test_file))
    key_list, val_list = parse.read_test_data(os.path.join(final_test_path, final_test_file))
    test_file_dict[final_test_file] = (key_list, val_list)

print len(test_file_dict)  # dict of dict. {"product1.txt":{"sent":1...}, "product2":}

for file_name in final_test_files:
    key_list = test_file_dict[file_name][0]
    val_list = test_file_dict[file_name][1]
    keys = [int(k) for k in key_list]
    for i in xrange(0, len(key_list)):
        line_num = key_list[i]
        sentence = val_list[i]
        if "[t]" not in sentence:
            output.write("%s\t%s\t%s\n" % (file_name, line_num, classifier.classify(get_features(sentence))))
        else:
            output.write("%s\t%s\t%s\n" % (file_name, line_num, 0))