Esempio n. 1
0
def train_and_validate_viterbi2(_inputFile, _outputFile, _devFile,
                                _devOutputFile, _validateFile):
    """
    Create the Preprocessor object
    Train using the SG, EN, CN, FR datasets
    Generate the representer, vocabulary and states and feed it into an Emission object
    """
    preprocessor = Preprocessor(_inputFile)
    representer = preprocessor.get_representer()
    vocabulary = preprocessor.get_vocabulary()
    states = preprocessor.get_states()

    listOfWords = getAllTokens(_devFile)
    """
    Create the Emission and Transition objects
    Validate using the dev datasets
    Label the input sequence and output the file as dev.p3.out
    """
    emission = Emission(representer, vocabulary, states, listOfWords)
    transition = Transition2()
    transition.compute_params(preprocessor)

    label_viterbi(_devFile, _devOutputFile, emission, transition)
    """
    Calculate Validation Error
    """
    evaluate(_validateFile, _devOutputFile)
Esempio n. 2
0
def train_and_validate_emission(_inputFile, _outputFile, _devFile,
                                _devOutputFile, _validateFile):
    """
    Create the Preprocessor object
    Train using the SG, EN, CN, FR datasets
    Generate the representer, vocabulary and states and feed it into an Emission object 
    """
    preprocessor = Preprocessor(_inputFile)
    representer = preprocessor.get_representer()
    vocabulary = preprocessor.get_vocabulary()
    states = preprocessor.get_states()
    """
    Create the Emission Object
    Validate using the dev datasets
    Label the input sequence and output the file as dev.p2.out
    """
    emission = Emission(representer, vocabulary, states)
    emission.labelSequence(_devFile, _devOutputFile)
    """
    Calculate Validation Error
    """
    evaluate(_validateFile, _devOutputFile)