Python normalize Examples, util.text_util.normalize Python Examples

Example #1

0

Show file

def activations2():
	"""
	Receive a text and return HNATT activation map
	"""
	if request.method == 'GET':
		text = request.args.get('text', '')
		if len(text.strip()) == 0:
			return Response(status=400)

		ntext = text_util.normalize(text)

		global graph
		with graph.as_default():
			activation_maps = h.activation_maps(text, websafe=True)
			preds = h.predict([ntext])[0]
			prediction = np.argmax(preds).astype(float)
            
            
            activation_maps =1
            ntext=1
            prediction =1
            bi = 2
			data = {
				'activations': activation_maps,
				'normalizedText': ntext,
				'prediction': prediction,
				'binary': bi
			}
			return jsonify(data)

Example #2

0

Show file

File: yelp.py Project: zzx2017/hnatt

def load_data(path, size=1e4, train_ratio=0.8, binary=False):
	print('loading Yelp reviews...')
	df = pd.read_csv(path, nrows=size, usecols=['stars', 'text'])
	df['text_tokens'] = df['text'].progress_apply(lambda x: normalize(x))
	
	dim = 5
	if binary:
		dim = 2

	if binary:
		df['polarized_stars'] = df['stars'].apply(lambda x: polarize(x))
		x, y = chunk_to_arrays(df, binary=binary)
		return balance_classes(x, y, dim, train_ratio)

	train_size = round(size * train_ratio)
	test_size = size - train_size;

	# training + validation set
	train_x = np.empty((0,))
	train_y = np.empty((0,))

	train_set = df[0:train_size].copy()
	train_set['len'] = train_set['text_tokens'].apply(lambda x: len(x))
	# train_set.sort_values('len', inplace=True, ascending=True)
	train_x, train_y = chunk_to_arrays(train_set, binary=binary)
	train_y = to_one_hot(train_y, dim=dim)

	test_set = df[train_size:]
	test_x, test_y = chunk_to_arrays(test_set, binary=binary)
	test_y = to_one_hot(test_y)
	print('finished loading Yelp reviews')

	return (train_x, train_y), (test_x, test_y)

Example #3

0

Show file

File: hnatt.py Project: saurabhvyas/hnatt

    def activation_maps(self, text, websafe=False):
        normalized_text = normalize(text)
        encoded_text = self._encode_input(text)[0]

        # get word activations
        hidden_word_encoding_out = Model(
            inputs=self.word_attention_model.input,
            outputs=self.word_attention_model.get_layer(
                'dense_transform_w').output)
        hidden_word_encodings = hidden_word_encoding_out.predict(encoded_text)
        word_context = self.word_attention_model.get_layer(
            'word_attention').get_weights()[0]
        u_wattention = encoded_text * np.exp(
            np.squeeze(np.dot(hidden_word_encodings, word_context)))
        if websafe:
            u_wattention = u_wattention.astype(float)

        # generate word, activation pairs
        nopad_encoded_text = encoded_text[-len(normalized_text):]
        nopad_encoded_text = [
            list(filter(lambda x: x > 0, sentence))
            for sentence in nopad_encoded_text
        ]
        reconstructed_texts = [[
            self.reverse_word_index[int(i)] for i in sentence
        ] for sentence in nopad_encoded_text]
        nopad_wattention = u_wattention[-len(normalized_text):]
        nopad_wattention = nopad_wattention / np.expand_dims(
            np.sum(nopad_wattention, -1), -1)
        nopad_wattention = np.array([
            attention_seq[-len(sentence):] for attention_seq, sentence in zip(
                nopad_wattention, nopad_encoded_text)
        ])
        word_activation_maps = []
        for i, text in enumerate(reconstructed_texts):
            word_activation_maps.append(list(zip(text, nopad_wattention[i])))

        # get sentence activations
        hidden_sentence_encoding_out = Model(
            inputs=self.model.input,
            outputs=self.model.get_layer('dense_transform_s').output)
        hidden_sentence_encodings = np.squeeze(
            hidden_sentence_encoding_out.predict(
                np.expand_dims(encoded_text, 0)), 0)
        sentence_context = self.model.get_layer(
            'sentence_attention').get_weights()[0]
        u_sattention = np.exp(
            np.squeeze(np.dot(hidden_sentence_encodings, sentence_context),
                       -1))
        if websafe:
            u_sattention = u_sattention.astype(float)
        nopad_sattention = u_sattention[-len(normalized_text):]

        nopad_sattention = nopad_sattention / np.expand_dims(
            np.sum(nopad_sattention, -1), -1)

        activation_map = list(zip(word_activation_maps, nopad_sattention))

        return activation_map

Example #4

0

Show file

File: hnatt.py Project: saurabhvyas/hnatt

 def _encode_input(self, x, log=False):
     x = np.array(x)
     if not x.shape:
         x = np.expand_dims(x, 0)
     texts = np.array([normalize(text) for text in x])
     return self._encode_texts(texts)

Example #5

0

Show file

File: test.py Project: saurabhvyas/hnatt

def predict_single(text):
    ntext = normalize(text)
    preds = h.predict([ntext])[0]
    prediction = np.argmax(preds).astype(float)
    return prediction

Example #6

0

Show file

File: test.py Project: saurabhvyas/hnatt

# In[ ]:

# load pretrained model
try:
    print('loading pretrained model ..')
    h = HNATT()
    h.load_weights(SAVED_MODEL_DIR, SAVED_MODEL_FILENAME)
except:
    print('unable to load pretrained model')

# In[ ]:

if mode == '0':
    print(df_test['x1'][:5])
    df_test['text_tokens'] = df_test['x1'].apply(lambda x: normalize(x))

    #df['text_tokens'] = df['x1'].progress_apply(lambda x: normalize(x))
    #train_set['len'] = train_set['text_tokens'].apply(lambda x: len(x))

    test_x = np.empty((0, ))
    test_y = np.empty((0, ))

    test_x = df_test['text_tokens']
    #train_y=train_set['']

    test_y = to_one_hot(y_test, dim=3)
    print(h.model.metrics_names)
    # test on test set
    loss_and_metrics = h.test(test_x, test_y, batch_size=64)
    print(loss_and_metrics)