import sys from libraries import stats, files if len(sys.argv) >= 2: FILE = sys.argv[1] reviews = files.read_from_xml(FILE) doc_count, token_count = stats.corpora_stats(reviews) print 'Total Documents: ' + str(doc_count) + ', Total Tokens: ' + str(token_count) else: print 'Invalid command.Please use the format:\n python counter.py <filename>' sys.exit()
import sys from libraries import files, tags if len(sys.argv) >= 3: TRAIN_FILE = sys.argv[1] OUTPUT_FILE = sys.argv[2] reviews = files.read_from_xml(TRAIN_FILE) trained_reviews = tags.default_tag(reviews) files.export_to_xml(trained_reviews, OUTPUT_FILE) else: print 'Invalid command.Please use the format:\n python default-tag-trainer.py <input_file> <output_file>' sys.exit()
import sys from libraries import stats, files if len(sys.argv) >= 3: FILE_1 = sys.argv[1] FILE_2 = sys.argv[2] train_reviews = files.read_from_xml(FILE_1) corrected_reviews = files.read_from_xml(FILE_2) precision, recall, f1 = stats.tag_analysis(train_reviews, corrected_reviews) print 'Precision: ' + str(precision) + '\nRecall: ' + str(recall) + '\nF1: ' + str(f1) else: print 'Invalid command.Please use the format:\n python analyze-tags.py <trained_file> <corrected_file>' sys.exit()
int(s) return True except ValueError: return False if len(sys.argv) >= 5: NUM_OF_TRAIN = sys.argv[1] if check_int(NUM_OF_TRAIN): NUM_OF_TRAIN = int(NUM_OF_TRAIN) next_index = 0 TRAIN_FILES = [] while next_index < NUM_OF_TRAIN: TRAIN_FILES.append(sys.argv[(next_index+2)]) next_index += 1 TEST_FILE = sys.argv[(NUM_OF_TRAIN + 2)] OUTPUT_FILE = sys.argv[(NUM_OF_TRAIN + 3)] train_reviews = [] for TRAIN_FILE in TRAIN_FILES: train_reviews.extend(files.read_from_xml(TRAIN_FILE)) test_reviews = files.read_from_xml(TEST_FILE) output_reviews = tags.tag_by_training(train_reviews, test_reviews) files.export_to_xml(output_reviews, OUTPUT_FILE) else: print 'Argument[1] must be an integer.' else: print 'Invalid command.Please use the format:\n python trained-tag-trainer.py <train_file> <test_file> <output_file>' sys.exit()