def read_conll(path, label_alphabet=None): word_sentences = [] pos_sentences = [] words = [] poses = [] word_alphabet = Alphabet('word', (padding_symbol, )) if label_alphabet is None: label_alphabet = Alphabet('label', (padding_symbol, )) with open(path) as f: for l in f: if l.strip() == "": word_sentences.append(words[:]) pos_sentences.append(poses[:]) words = [] poses = [] else: parts = l.split() word = parts[1] pos = parts[4] words.append(word) poses.append(pos) word_alphabet.add(word) label_alphabet.add(pos) # Add the last sentence in. if len(words) > 0: word_sentences.append(words[:]) pos_sentences.append(poses[:]) return word_sentences, pos_sentences, word_alphabet, label_alphabet
def __init__(self, config): """ :return: """ if config.word_vector == 'word2vec': logger.info("Loading word2vec from disk ...") self.model = Word2Vec.load_word2vec_format(config.word_vector_path, binary=True) print("Loading done...") self.full_alphabet = Alphabet("full_lookup")
def read_models(model_base, data_name, model): logger.info("Loading models from disk.") models = {} models_to_load = ['auto', 'vanilla'] if model == 'all' else [model] for t in models_to_load: model = BaseLearner() model_dir = os.path.join(model_base, data_name, t) model.load(model_dir) pos_alphabet = Alphabet('pos') word_alphabet = Alphabet('word') pos_alphabet.load(model_dir) word_alphabet.load(model_dir) models[t] = (model, pos_alphabet, word_alphabet) logger.info("Loading done.") return models