def _initialize_from_file(self, lm_file): weights = {} with open(lm_file, 'rb') as f: self._input_layer_size = pickle.load(f) self._hidden_layer_size = pickle.load(f) self._output_layer_size = pickle.load(f) self.sparse_embeddings = pickle.load(f) self.w2v_embeddings = pickle.load(f) weights['W_xi'] = pickle.load(f) weights['W_hi'] = pickle.load(f) weights['W_ci'] = pickle.load(f) weights['b_i'] = pickle.load(f) weights['W_xf'] = pickle.load(f) weights['W_hf'] = pickle.load(f) weights['W_cf'] = pickle.load(f) weights['b_f'] = pickle.load(f) weights['W_xc'] = pickle.load(f) weights['W_hc'] = pickle.load(f) weights['b_c'] = pickle.load(f) weights['W_xo'] = pickle.load(f) weights['W_ho'] = pickle.load(f) weights['W_co'] = pickle.load(f) weights['b_o'] = pickle.load(f) weights['W_hy'] = pickle.load(f) weights['b_y'] = pickle.load(f) self._lstm = _LSTM(self._input_layer_size, self._hidden_layer_size, self._output_layer_size, weights=weights) self.print('initialized model from file: %s' % lm_file)
def _initialize(self, tokenized_sentences): self.w2v_embeddings = Word2Vec(tokenized_sentences, size=self._input_layer_size, min_count=1) vocab = set() for s in tokenized_sentences: vocab.update(s) self.sparse_embeddings = {key: i for i, key in enumerate(vocab)} self._output_layer_size = len(self.sparse_embeddings) self._lstm = _LSTM(self._input_layer_size, self._hidden_layer_size, self._output_layer_size) self.print('initialized new model')