def main(): train_word_list, train_pos_list, train_ne_list = getTrainingData(HMM=True) test_word_list, test_pos_list, test_idx_list = getTestData(HMM=True) # tag_seq_preds = getTestPreds(train_pos_list, train_ne_list, test_pos_list, smooth=True) # tag_seq_preds = getTestPreds(train_word_list, train_ne_list, test_word_list, smooth=True) tag_seq_preds = getTestPreds(train_word_list, train_ne_list, test_word_list, smooth=True, similarity=True) formatted_preds = formatTestPreds(tag_seq_preds, test_idx_list) savePredictionsToCSV(formatted_preds)
def main(): train_word_list, _, train_ne_list = getTrainingData(HMM=True) test_word_list, test_pos_list, test_idx_list = getTestData(HMM=True) feature_type = 'text_features' low_frequency_probabilities = json.load(open(dir_path + 'Training_Test_Data/{0}'.format(feature_type))) tag_seq_preds = getTestPreds(train_word_list, train_ne_list, test_word_list, low_frequency_probabilities, smooth=None, similarity_based=True, test_pos_list=test_pos_list) formatted_preds = formatTestPreds(tag_seq_preds, test_idx_list) savePredictionsToCSV(formatted_preds)
def main(): train_word_list, _, train_ne_list = getTrainingData(HMM=True) test_word_list, test_pos_list, test_idx_list = getTestData(HMM=True) feature_type = 'text_features' low_frequency_probabilities = json.load( open(dir_path + 'Training_Test_Data/{0}'.format(feature_type))) tag_seq_preds = getTestPreds(train_word_list, train_ne_list, test_word_list, low_frequency_probabilities, smooth=None, similarity_based=True, test_pos_list=test_pos_list) formatted_preds = formatTestPreds(tag_seq_preds, test_idx_list) savePredictionsToCSV(formatted_preds)
def main(): training_data, largest_key_size = getTrainingData() context, pos, index = getTestData() predictions = makePredictions(training_data, context, pos, index, largest_key_size) savePredictionsToCSV(predictions)
def main(): training_data = getTrainingData() context, pos, index = getTestData() predictions = makePredictions(training_data, context, pos, index) savePredictionsToCSV(predictions)
''' Repurposed plotting code found here: http://matplotlib.org/examples/pylab_examples/barchart_demo.html ''' __author__ = 'Jonathan Simon' from DataProcessing.LoadData import getTrainingData, getTestData import matplotlib.pyplot as plt from collections import Counter import numpy as np train_word_list, train_pos_list, train_ne_list = getTrainingData(HMM=True) test_word_list, test_pos_list, test_idx_list = getTestData(HMM=True) # Get POS counts in training set all_train_pos = [] for sentence in train_pos_list: for pos in sentence: all_train_pos.append(pos) train_pos_counter = Counter(all_train_pos) train_pos_names, train_pos_counts = zip(*train_pos_counter.most_common()) # Get POS counts in test set all_test_pos = [] for sentence in test_pos_list: for pos in sentence: all_test_pos.append(pos) test_pos_counter = Counter(