def __init__(self, device): self.device = device self.word2idx = load_pickle( os.path.join(os.getcwd(), 'model', 'Stacked_BiLSTM_CRF_Fasttext_2315_word2idx.pkl')) self.idx2label = load_pickle( os.path.join(os.getcwd(), 'model', 'Stacked_BiLSTM_CRF_Fasttext_2315_idx2label.pkl')) self._build_model()
def __init__(self, device_): self.device = device_ MODEL_FOLDER = os.path.join(os.getcwd(), 'model') self.word2idx = load_pickle(os.path.join(MODEL_FOLDER, 'word_stoi.pkl')) self.idx2label = load_pickle( os.path.join(MODEL_FOLDER, 'idx2label.pkl')) self.label2idx = load_pickle( os.path.join(MODEL_FOLDER, 'label2idx.pkl')) self.pos2idx = load_pickle(os.path.join(MODEL_FOLDER, 'pos_stoi.pkl')) self.predicate2idx = load_pickle( os.path.join(MODEL_FOLDER, 'predicate_stoi.pkl')) self.pretrained_embeddings = torch.from_numpy( np.load(os.path.join(MODEL_FOLDER, 'vocab_embeddings_vector.npy'))) self._build_model()
def build_vocabulary(data_x, load_from=None): if load_from and Path(load_from).is_file(): stoi = load_pickle(load_from) itos = {key: val for key, val in enumerate(stoi)} return stoi, itos all_words = [item for sublist in data_x for item in sublist] unigrams = sorted(list(set(all_words))) stoi = {'<PAD>': 0, '<UNK>': 1} start_ = 2 stoi.update( {val: key for key, val in enumerate(unigrams, start=start_)}) itos = {key: val for key, val in enumerate(stoi)} save_pickle(load_from, stoi) save_pickle(load_from.replace('stoi', 'itos'), itos) return stoi, itos
def build_labels_vocabulary(data_y, load_from=None): if load_from and Path(load_from).is_file(): label2idx = load_pickle(load_from) idx2label = {key: val for key, val in enumerate(label2idx)} return label2idx, idx2label all_words = [item for sublist in data_y for item in sublist] unigrams = sorted(list(set(all_words))) label2idx = {'<PAD>': 0} start_ = 1 label2idx.update( {val: key for key, val in enumerate(unigrams, start=start_)}) idx2label = {key: val for key, val in enumerate(label2idx)} save_pickle(load_from, label2idx) save_pickle(load_from.replace('label2idx', 'idx2label'), idx2label) return label2idx, idx2label