예제 #1
0
def getTrainingData(text_features=True):
    '''
        Loads the training data from the appropriate directory
    '''
    #Load the data present in the training file
    f = loadFile(dir_path + training_file)
    training_data = getDataFromFile(f)
    token_ner, token_pos = parseTrainingData(training_data)

    feature_type = ""
    state_features = {}

    #Considering only words with count less than 3 for the similarity based classifier
    if text_features:
        low_frequency_token_ner = findLowFrequencyWord(token_ner)
        state_features = findFeaturesForText(low_frequency_token_ner)
        feature_type = "text_features"

    #Considering all words for the POS based classifier
    else:
        state_features = findFeaturesForPOS(token_pos, token_ner)
        feature_type = "pos_features"

    #Finding the probabilities for the features
    feature_probabilities = findProbabilityForFeatures(state_features)
    saveFeaturesToDisk(feature_probabilities, feature_type)
    pprint(feature_probabilities)
    return feature_probabilities
def getTrainingData(text_features=True):
    """
        Loads the training data from the appropriate directory
    """
    # Load the data present in the training file
    f = loadFile(dir_path + training_file)
    training_data = getDataFromFile(f)
    token_ner, token_pos = parseTrainingData(training_data)

    feature_type = ""
    state_features = {}

    # Considering only words with count less than 3 for the similarity based classifier
    if text_features:
        low_frequency_token_ner = findLowFrequencyWord(token_ner)
        state_features = findFeaturesForText(low_frequency_token_ner)
        feature_type = "text_features"

    # Considering all words for the POS based classifier
    else:
        state_features = findFeaturesForPOS(token_pos, token_ner)
        feature_type = "pos_features"

    # Finding the probabilities for the features
    feature_probabilities = findProbabilityForFeatures(state_features)
    saveFeaturesToDisk(feature_probabilities, feature_type)
    pprint(feature_probabilities)
    return feature_probabilities
def getTrainingData():
    '''
        Loads the training data from the appropriate directory
    '''
    #Load the data present in the training file
    f = loadFile(dir_path + training_file)
    training_data = getDataFromFile(f)
    context, pos, ner = parseTrainingData(training_data)
    training_data = processTrainingData(context, pos, ner)
    largest_key_size = getMaxLengthKey(training_data)
    return training_data, largest_key_size
def getTrainingData():
    '''
        Loads the training data from the appropriate directory
    '''
    #Load the data present in the training file
    f = loadFile(dir_path + training_file)
    training_data = getDataFromFile(f)
    context, pos, ner = parseTrainingData(training_data)
    training_data = processTrainingData(context, pos, ner)
    largest_key_size = getMaxLengthKey(training_data)
    return training_data, largest_key_size
def getTestData(HMM=False):
    '''
        Loads the test data from the appropriate directory
    '''
    #Load the data present in the test file
    f = loadFile(dir_path + test_file)
    test_data = getDataFromFile(f)
    if not HMM:
        context, pos, index = parseTestData(test_data)
        return context, pos, index
    else:
        context, pos, index = parseTestDataHMM(test_data)
        return context, pos, index
예제 #6
0
def getTestData(HMM=False):
    '''
        Loads the test data from the appropriate directory
    '''
    #Load the data present in the test file
    f = loadFile(dir_path + test_file)
    test_data = getDataFromFile(f)
    if not HMM:
        context, pos, index = parseTestData(test_data)
        return context, pos, index
    else:
        context, pos, index = parseTestDataHMM(test_data)
        return context, pos, index
def getTrainingData(HMM=False):
    '''
        Loads the training data from the appropriate directory
    '''
    #Load the data present in the training file
    f = loadFile(dir_path + training_file)
    training_data = getDataFromFile(f)
    if not HMM:
        context, pos, ner = parseTrainingData(training_data)
        training_data = processTrainingData(context, pos, ner)
        return training_data
    else:
        context, pos, ner = parseTrainingDataHMM(training_data)
        return context, pos, ner
예제 #8
0
def getTrainingData(HMM=False):
    '''
        Loads the training data from the appropriate directory
    '''
    #Load the data present in the training file
    f = loadFile(dir_path + training_file)
    training_data = getDataFromFile(f)
    if not HMM:
        context, pos, ner = parseTrainingData(training_data)
        training_data = processTrainingData(context, pos, ner)
        return training_data
    else:
        context, pos, ner = parseTrainingDataHMM(training_data)
        return context, pos, ner