Example #1
0
import sys
from libraries import stats, files

if len(sys.argv) >= 2:
    FILE = sys.argv[1]
    reviews = files.read_from_xml(FILE)
    doc_count, token_count = stats.corpora_stats(reviews)

    print 'Total Documents: ' + str(doc_count) + ', Total Tokens: ' + str(token_count)
else:
    print 'Invalid command.Please use the format:\n python counter.py <filename>'
    sys.exit()
import sys
from libraries import files, tags

if len(sys.argv) >= 3:
    TRAIN_FILE = sys.argv[1]
    OUTPUT_FILE = sys.argv[2]

    reviews = files.read_from_xml(TRAIN_FILE)
    trained_reviews = tags.default_tag(reviews)
    files.export_to_xml(trained_reviews, OUTPUT_FILE)

else:
    print 'Invalid command.Please use the format:\n python default-tag-trainer.py <input_file> <output_file>'
    sys.exit()
Example #3
0
import sys
from libraries import stats, files

if len(sys.argv) >= 3:
    FILE_1 = sys.argv[1]
    FILE_2 = sys.argv[2]
    train_reviews = files.read_from_xml(FILE_1)
    corrected_reviews = files.read_from_xml(FILE_2)
    precision, recall, f1 = stats.tag_analysis(train_reviews, corrected_reviews)

    print 'Precision: ' + str(precision) + '\nRecall: ' + str(recall) + '\nF1: ' + str(f1)
else:
    print 'Invalid command.Please use the format:\n python analyze-tags.py <trained_file> <corrected_file>'
    sys.exit()
        int(s)
        return True
    except ValueError:
        return False

if len(sys.argv) >= 5:
    NUM_OF_TRAIN = sys.argv[1]
    if check_int(NUM_OF_TRAIN):
        NUM_OF_TRAIN = int(NUM_OF_TRAIN)
        next_index = 0

        TRAIN_FILES = []
        while next_index < NUM_OF_TRAIN:
            TRAIN_FILES.append(sys.argv[(next_index+2)])
            next_index += 1

        TEST_FILE = sys.argv[(NUM_OF_TRAIN + 2)]
        OUTPUT_FILE = sys.argv[(NUM_OF_TRAIN + 3)]

        train_reviews = []
        for TRAIN_FILE in TRAIN_FILES:
            train_reviews.extend(files.read_from_xml(TRAIN_FILE))

        test_reviews = files.read_from_xml(TEST_FILE)
        output_reviews = tags.tag_by_training(train_reviews, test_reviews)
        files.export_to_xml(output_reviews, OUTPUT_FILE)
    else:
        print 'Argument[1] must be an integer.'
else:
    print 'Invalid command.Please use the format:\n python trained-tag-trainer.py <train_file> <test_file> <output_file>'
    sys.exit()