def main():
    train_word_list, train_pos_list, train_ne_list = getTrainingData(HMM=True)
    test_word_list, test_pos_list, test_idx_list = getTestData(HMM=True)

    # tag_seq_preds = getTestPreds(train_pos_list, train_ne_list, test_pos_list, smooth=True)
    # tag_seq_preds = getTestPreds(train_word_list, train_ne_list, test_word_list, smooth=True)
    tag_seq_preds = getTestPreds(train_word_list, train_ne_list, test_word_list, smooth=True, similarity=True)
    formatted_preds = formatTestPreds(tag_seq_preds, test_idx_list)
    savePredictionsToCSV(formatted_preds)
def main():
    train_word_list, _, train_ne_list = getTrainingData(HMM=True)
    test_word_list, test_pos_list, test_idx_list = getTestData(HMM=True)
    
    feature_type = 'text_features'
    low_frequency_probabilities = json.load(open(dir_path + 'Training_Test_Data/{0}'.format(feature_type)))

    tag_seq_preds = getTestPreds(train_word_list, train_ne_list, test_word_list, low_frequency_probabilities, smooth=None, similarity_based=True, test_pos_list=test_pos_list)
    formatted_preds = formatTestPreds(tag_seq_preds, test_idx_list)
    savePredictionsToCSV(formatted_preds)
def main():
    train_word_list, train_pos_list, train_ne_list = getTrainingData(HMM=True)
    test_word_list, test_pos_list, test_idx_list = getTestData(HMM=True)

    # tag_seq_preds = getTestPreds(train_pos_list, train_ne_list, test_pos_list, smooth=True)
    # tag_seq_preds = getTestPreds(train_word_list, train_ne_list, test_word_list, smooth=True)
    tag_seq_preds = getTestPreds(train_word_list,
                                 train_ne_list,
                                 test_word_list,
                                 smooth=True,
                                 similarity=True)
    formatted_preds = formatTestPreds(tag_seq_preds, test_idx_list)
    savePredictionsToCSV(formatted_preds)
Ejemplo n.º 4
0
def main():
    train_word_list, _, train_ne_list = getTrainingData(HMM=True)
    test_word_list, test_pos_list, test_idx_list = getTestData(HMM=True)

    feature_type = 'text_features'
    low_frequency_probabilities = json.load(
        open(dir_path + 'Training_Test_Data/{0}'.format(feature_type)))

    tag_seq_preds = getTestPreds(train_word_list,
                                 train_ne_list,
                                 test_word_list,
                                 low_frequency_probabilities,
                                 smooth=None,
                                 similarity_based=True,
                                 test_pos_list=test_pos_list)
    formatted_preds = formatTestPreds(tag_seq_preds, test_idx_list)
    savePredictionsToCSV(formatted_preds)
def main():
    training_data, largest_key_size = getTrainingData()
    context, pos, index = getTestData()
    predictions = makePredictions(training_data, context, pos, index, largest_key_size)
    savePredictionsToCSV(predictions)
def main():
    training_data = getTrainingData()
    context, pos, index = getTestData()
    predictions = makePredictions(training_data, context, pos, index)
    savePredictionsToCSV(predictions)
Ejemplo n.º 7
0
'''
Repurposed plotting code found here:
http://matplotlib.org/examples/pylab_examples/barchart_demo.html
'''

__author__ = 'Jonathan Simon'

from DataProcessing.LoadData import getTrainingData, getTestData
import matplotlib.pyplot as plt
from collections import Counter
import numpy as np

train_word_list, train_pos_list, train_ne_list = getTrainingData(HMM=True)
test_word_list, test_pos_list, test_idx_list = getTestData(HMM=True)

# Get POS counts in training set
all_train_pos = []
for sentence in train_pos_list:
    for pos in sentence:
        all_train_pos.append(pos)

train_pos_counter = Counter(all_train_pos)
train_pos_names, train_pos_counts = zip(*train_pos_counter.most_common())

# Get POS counts in test set
all_test_pos = []
for sentence in test_pos_list:
    for pos in sentence:
        all_test_pos.append(pos)

test_pos_counter = Counter(
Ejemplo n.º 8
0
def main():
    training_data = getTrainingData()
    context, pos, index = getTestData()
    predictions = makePredictions(training_data, context, pos, index)
    savePredictionsToCSV(predictions)
def main():
    training_data, largest_key_size = getTrainingData()
    context, pos, index = getTestData()
    predictions = makePredictions(training_data, context, pos, index,
                                  largest_key_size)
    savePredictionsToCSV(predictions)