Exemple #1
0
 def __init__(self, device):
     self.device = device
     self.word2idx = load_pickle(
         os.path.join(os.getcwd(), 'model',
                      'Stacked_BiLSTM_CRF_Fasttext_2315_word2idx.pkl'))
     self.idx2label = load_pickle(
         os.path.join(os.getcwd(), 'model',
                      'Stacked_BiLSTM_CRF_Fasttext_2315_idx2label.pkl'))
     self._build_model()
    def __init__(self, device_):
        self.device = device_

        MODEL_FOLDER = os.path.join(os.getcwd(), 'model')
        self.word2idx = load_pickle(os.path.join(MODEL_FOLDER,
                                                 'word_stoi.pkl'))
        self.idx2label = load_pickle(
            os.path.join(MODEL_FOLDER, 'idx2label.pkl'))
        self.label2idx = load_pickle(
            os.path.join(MODEL_FOLDER, 'label2idx.pkl'))
        self.pos2idx = load_pickle(os.path.join(MODEL_FOLDER, 'pos_stoi.pkl'))
        self.predicate2idx = load_pickle(
            os.path.join(MODEL_FOLDER, 'predicate_stoi.pkl'))
        self.pretrained_embeddings = torch.from_numpy(
            np.load(os.path.join(MODEL_FOLDER, 'vocab_embeddings_vector.npy')))

        self._build_model()
Exemple #3
0
 def build_vocabulary(data_x, load_from=None):
     if load_from and Path(load_from).is_file():
         stoi = load_pickle(load_from)
         itos = {key: val for key, val in enumerate(stoi)}
         return stoi, itos
     all_words = [item for sublist in data_x for item in sublist]
     unigrams = sorted(list(set(all_words)))
     stoi = {'<PAD>': 0, '<UNK>': 1}
     start_ = 2
     stoi.update(
         {val: key
          for key, val in enumerate(unigrams, start=start_)})
     itos = {key: val for key, val in enumerate(stoi)}
     save_pickle(load_from, stoi)
     save_pickle(load_from.replace('stoi', 'itos'), itos)
     return stoi, itos
Exemple #4
0
    def build_labels_vocabulary(data_y, load_from=None):
        if load_from and Path(load_from).is_file():
            label2idx = load_pickle(load_from)
            idx2label = {key: val for key, val in enumerate(label2idx)}
            return label2idx, idx2label
        all_words = [item for sublist in data_y for item in sublist]
        unigrams = sorted(list(set(all_words)))
        label2idx = {'<PAD>': 0}
        start_ = 1
        label2idx.update(
            {val: key
             for key, val in enumerate(unigrams, start=start_)})
        idx2label = {key: val for key, val in enumerate(label2idx)}

        save_pickle(load_from, label2idx)
        save_pickle(load_from.replace('label2idx', 'idx2label'), idx2label)
        return label2idx, idx2label