예제 #1
0
def read_conll(path, label_alphabet=None):
    word_sentences = []
    pos_sentences = []
    words = []
    poses = []

    word_alphabet = Alphabet('word', (padding_symbol, ))

    if label_alphabet is None:
        label_alphabet = Alphabet('label', (padding_symbol, ))

    with open(path) as f:
        for l in f:
            if l.strip() == "":
                word_sentences.append(words[:])
                pos_sentences.append(poses[:])
                words = []
                poses = []
            else:
                parts = l.split()
                word = parts[1]
                pos = parts[4]
                words.append(word)
                poses.append(pos)
                word_alphabet.add(word)
                label_alphabet.add(pos)

    # Add the last sentence in.
    if len(words) > 0:
        word_sentences.append(words[:])
        pos_sentences.append(poses[:])

    return word_sentences, pos_sentences, word_alphabet, label_alphabet
예제 #2
0
    def __init__(self, config):
        """
        :return:
        """
        if config.word_vector == 'word2vec':
            logger.info("Loading word2vec from disk ...")
            self.model = Word2Vec.load_word2vec_format(config.word_vector_path, binary=True)
        print("Loading done...")

        self.full_alphabet = Alphabet("full_lookup")
예제 #3
0
def read_models(model_base, data_name, model):
    logger.info("Loading models from disk.")

    models = {}

    models_to_load = ['auto', 'vanilla'] if model == 'all' else [model]

    for t in models_to_load:
        model = BaseLearner()
        model_dir = os.path.join(model_base, data_name, t)
        model.load(model_dir)

        pos_alphabet = Alphabet('pos')
        word_alphabet = Alphabet('word')

        pos_alphabet.load(model_dir)
        word_alphabet.load(model_dir)

        models[t] = (model, pos_alphabet, word_alphabet)

    logger.info("Loading done.")

    return models