Esempio n. 1
0
def build_LM(in_file):
    """
    build language models for each label
    each line in in_file contains a label and a string separated by a space
    """
    print('building language models...')
    # This is an empty method
    # Pls implement your code below in

    # LMs is a dict to store the LM for each language
    LMs = {}
    with open(in_file, 'r') as file:
        for line in file:
            (label, text) = line.strip("\r\n").split(" ", 1)
            if label not in LMs:
                LMs[label] = NgramLM(label,
                                     gram_size=gram_size,
                                     token_based=token_based,
                                     start_end=start_end,
                                     case_sensitive=case_sensitive,
                                     strip_out=strip_out,
                                     add_one_smoothing=add_one_smoothing)
            LMs[label].train(text)

    return LMs