def activations2(): """ Receive a text and return HNATT activation map """ if request.method == 'GET': text = request.args.get('text', '') if len(text.strip()) == 0: return Response(status=400) ntext = text_util.normalize(text) global graph with graph.as_default(): activation_maps = h.activation_maps(text, websafe=True) preds = h.predict([ntext])[0] prediction = np.argmax(preds).astype(float) activation_maps =1 ntext=1 prediction =1 bi = 2 data = { 'activations': activation_maps, 'normalizedText': ntext, 'prediction': prediction, 'binary': bi } return jsonify(data)
def load_data(path, size=1e4, train_ratio=0.8, binary=False): print('loading Yelp reviews...') df = pd.read_csv(path, nrows=size, usecols=['stars', 'text']) df['text_tokens'] = df['text'].progress_apply(lambda x: normalize(x)) dim = 5 if binary: dim = 2 if binary: df['polarized_stars'] = df['stars'].apply(lambda x: polarize(x)) x, y = chunk_to_arrays(df, binary=binary) return balance_classes(x, y, dim, train_ratio) train_size = round(size * train_ratio) test_size = size - train_size; # training + validation set train_x = np.empty((0,)) train_y = np.empty((0,)) train_set = df[0:train_size].copy() train_set['len'] = train_set['text_tokens'].apply(lambda x: len(x)) # train_set.sort_values('len', inplace=True, ascending=True) train_x, train_y = chunk_to_arrays(train_set, binary=binary) train_y = to_one_hot(train_y, dim=dim) test_set = df[train_size:] test_x, test_y = chunk_to_arrays(test_set, binary=binary) test_y = to_one_hot(test_y) print('finished loading Yelp reviews') return (train_x, train_y), (test_x, test_y)
def activation_maps(self, text, websafe=False): normalized_text = normalize(text) encoded_text = self._encode_input(text)[0] # get word activations hidden_word_encoding_out = Model( inputs=self.word_attention_model.input, outputs=self.word_attention_model.get_layer( 'dense_transform_w').output) hidden_word_encodings = hidden_word_encoding_out.predict(encoded_text) word_context = self.word_attention_model.get_layer( 'word_attention').get_weights()[0] u_wattention = encoded_text * np.exp( np.squeeze(np.dot(hidden_word_encodings, word_context))) if websafe: u_wattention = u_wattention.astype(float) # generate word, activation pairs nopad_encoded_text = encoded_text[-len(normalized_text):] nopad_encoded_text = [ list(filter(lambda x: x > 0, sentence)) for sentence in nopad_encoded_text ] reconstructed_texts = [[ self.reverse_word_index[int(i)] for i in sentence ] for sentence in nopad_encoded_text] nopad_wattention = u_wattention[-len(normalized_text):] nopad_wattention = nopad_wattention / np.expand_dims( np.sum(nopad_wattention, -1), -1) nopad_wattention = np.array([ attention_seq[-len(sentence):] for attention_seq, sentence in zip( nopad_wattention, nopad_encoded_text) ]) word_activation_maps = [] for i, text in enumerate(reconstructed_texts): word_activation_maps.append(list(zip(text, nopad_wattention[i]))) # get sentence activations hidden_sentence_encoding_out = Model( inputs=self.model.input, outputs=self.model.get_layer('dense_transform_s').output) hidden_sentence_encodings = np.squeeze( hidden_sentence_encoding_out.predict( np.expand_dims(encoded_text, 0)), 0) sentence_context = self.model.get_layer( 'sentence_attention').get_weights()[0] u_sattention = np.exp( np.squeeze(np.dot(hidden_sentence_encodings, sentence_context), -1)) if websafe: u_sattention = u_sattention.astype(float) nopad_sattention = u_sattention[-len(normalized_text):] nopad_sattention = nopad_sattention / np.expand_dims( np.sum(nopad_sattention, -1), -1) activation_map = list(zip(word_activation_maps, nopad_sattention)) return activation_map
def _encode_input(self, x, log=False): x = np.array(x) if not x.shape: x = np.expand_dims(x, 0) texts = np.array([normalize(text) for text in x]) return self._encode_texts(texts)
def predict_single(text): ntext = normalize(text) preds = h.predict([ntext])[0] prediction = np.argmax(preds).astype(float) return prediction
# In[ ]: # load pretrained model try: print('loading pretrained model ..') h = HNATT() h.load_weights(SAVED_MODEL_DIR, SAVED_MODEL_FILENAME) except: print('unable to load pretrained model') # In[ ]: if mode == '0': print(df_test['x1'][:5]) df_test['text_tokens'] = df_test['x1'].apply(lambda x: normalize(x)) #df['text_tokens'] = df['x1'].progress_apply(lambda x: normalize(x)) #train_set['len'] = train_set['text_tokens'].apply(lambda x: len(x)) test_x = np.empty((0, )) test_y = np.empty((0, )) test_x = df_test['text_tokens'] #train_y=train_set[''] test_y = to_one_hot(y_test, dim=3) print(h.model.metrics_names) # test on test set loss_and_metrics = h.test(test_x, test_y, batch_size=64) print(loss_and_metrics)