def load_and_normalize(filename): data = load(filename) columns = data.shape[1] Y = data[:, columns - 1] Y = Y.reshape(-1, 1) m = Y.size ones = np.ones(m) X = data[:, 0:columns - 1] X_normalize, X_mean, X_std = normalize.normalize(X) Y_normalize, Y_mean, Y_std = normalize.normalize(Y) return np.column_stack( (ones, X_normalize)), Y_normalize, X_normalize, Y_mean, Y_std, X_mean, X_std
def activations(): """ Receive a text and return HNATT activation map """ if request.method == 'GET': text = request.args.get('text', '') if len(text.strip()) == 0: return Response(status=400) ntext = normalize(text) global graph with graph.as_default(): activation_maps = h.activation_maps(text, doc_topics, websafe=True) preds = h.predict(text, doc_topics) prediction = float(preds) data = { 'activations': activation_maps, 'normalizedText': ntext, 'prediction': prediction, 'binary': True } return jsonify(data) else: return Response(status=501)
def main(): data = load_data.load('data2.txt') X = data[:, 0:2] print('X is \n{0}\n'.format(X)) print('mean(X) is {0}'.format(np.mean(X, axis=0))) print('std(X) is {0}'.format(np.std(X, axis=0))) print('normalization of X is\n {0}'.format(normalize.normalize(X)))
def activation_maps(self, text, doc_topics, websafe=False): normalized_text = normalize(text) encoded_text = self._encode_input(text)[0] # get word activations hidden_word_encoding_out = Model( inputs=self.word_attention_model.input, outputs=self.word_attention_model.get_layer( 'dense_transform_w').output) hidden_word_encodings = hidden_word_encoding_out.predict(encoded_text) word_context = self.word_attention_model.get_layer( 'word_attention').get_weights()[0] u_wattention = encoded_text * np.exp( np.squeeze(np.dot(hidden_word_encodings, word_context))) if websafe: u_wattention = u_wattention.astype(float) # generate word, activation pairs nopad_encoded_text = encoded_text[-len(normalized_text):] nopad_encoded_text = [ list(filter(lambda x: x > 0, sentence)) for sentence in nopad_encoded_text ] reconstructed_texts = [[ self.reverse_word_index[int(i)] for i in sentence ] for sentence in nopad_encoded_text] nopad_wattention = u_wattention[-len(normalized_text):] nopad_wattention = nopad_wattention / np.expand_dims( np.sum(nopad_wattention, -1), -1) nopad_wattention = np.array([ attention_seq[-len(sentence):] for attention_seq, sentence in zip( nopad_wattention, nopad_encoded_text) ]) word_activation_maps = [] for i, text in enumerate(reconstructed_texts): word_activation_maps.append(list(zip(text, nopad_wattention[i]))) # get sentence activations hidden_sentence_encoding_out = Model( inputs=self.model.input, outputs=self.model.get_layer('dense_transform_s').output) hidden_sentence_encodings = np.squeeze( hidden_sentence_encoding_out.predict( [np.expand_dims(encoded_text, 0), np.array(doc_topics)]), 0) # np.zeros(shape=[522,425]) sentence_context = self.model.get_layer( 'sentence_attention').get_weights()[0] u_sattention = np.exp( np.squeeze(np.dot(hidden_sentence_encodings, sentence_context), -1)) if websafe: u_sattention = u_sattention.astype(float) nopad_sattention = u_sattention[-len(normalized_text):] nopad_sattention = nopad_sattention / np.expand_dims( np.sum(nopad_sattention, -1), -1) activation_map = list(zip(word_activation_maps, nopad_sattention)) return activation_map
def plot_pca_activations(u_h_history, num_timesteps, save_filename, num_original_dims, num_classes): allContextActivations = np.reshape(cuda.to_cpu(u_h_history), (-1, num_original_dims)) pca = PCA(n_components=2) pcaContextActivations = pca.fit_transform( StandardScaler().fit_transform(allContextActivations)) Y, offset, dataRange, minmax = normalize(pcaContextActivations) pcaComp1 = 0 pcaComp2 = 1 if num_classes > 4: split = int(np.ceil(num_classes / 2)) else: split = len(u_h_history) colors = matplotlib.cm.rainbow(np.linspace(0, 1, split)) fig = plt.figure() ax = fig.add_subplot(111) for i in range(split): ax.plot(Y[i * (num_timesteps + 1), pcaComp1], Y[i * (num_timesteps + 1), pcaComp2], color=colors[i % split], label=str(i), marker='o', markersize=5) ax.plot(Y[i * (num_timesteps + 1):(i + 1) * (num_timesteps + 1), pcaComp1], Y[i * (num_timesteps + 1):(i + 1) * (num_timesteps + 1), pcaComp2], color=colors[i], label=str(i)) plt.legend() fig.savefig(save_filename + "-a") plt.close() if len(u_h_history) > split: fig = plt.figure() ax = fig.add_subplot(111) for i in range(split, len(u_h_history)): ax.plot(Y[i * (num_timesteps + 1), pcaComp1], Y[i * (num_timesteps + 1), pcaComp2], color=colors[i % split], label=str(i), marker='o', markersize=5) ax.plot(Y[i * (num_timesteps + 1):(i + 1) * (num_timesteps + 1), pcaComp1], Y[i * (num_timesteps + 1):(i + 1) * (num_timesteps + 1), pcaComp2], color=colors[i % split], label=str(i)) plt.legend() fig.savefig(save_filename + "-b") plt.close()
def __getitem__(self, index): # index = index % 100 color_path = self.color_paths[index] color_img = Image.open(color_path).convert('RGB') key = random.choice(self.category) line_path = self.line_paths[key][index] line_img = Image.open(line_path).convert('L') assert self.paths_match(color_path, line_path), \ "The label_path %s and image_path %s don't match." % \ (color_path, line_path) color_img, line_img = self.sync_transform(color_img, line_img) color_img = normalize(color_img) line_img = normalize(line_img) # Target tensor: normalized Lab color image target_tensor = color_img.squeeze(0) colorization_data = util.get_colorization_data(color_img, self.opt) # Fit to SPADE real_image = target_tensor hint_tensor = torch.cat( (colorization_data['mask_B'], colorization_data['hint_B']), dim=1).squeeze(0) sketch_tensor = line_img.squeeze(0) image_path = line_path return { 'label': real_image, 'instance': hint_tensor, 'image': sketch_tensor, 'path': image_path, }
def new_document(text): dictionary = gensim.corpora.Dictionary.load("./lda_stuff/lda_model_T300.dictionary") norm_text = normalize(text) norm_text = [word for sent in norm_text for word in sent.split()] new_corpus = [dictionary.doc2bow(norm_text)] lda = gensim.models.LdaMulticore.load("./lda_stuff/lda_model_T300.model") # lda.update(new_corpus) vec = lda[new_corpus[0]] all_vec = [vec[i][1] for i in range(300)] topics = max(all_vec) l = [i for i, j in enumerate(all_vec) if j == topics] for topic in l: print(lda.print_topic(topic))
data_for_pca_transform = all_neuron_activations # data_for_pca_transform = all_initial_states # scaling data to achieve mean=0 and var=1 scaler = StandardScaler().fit(data_for_pca_transform) data_for_pca_transform_scaled = scaler.transform(data_for_pca_transform) # create PCA mapping pca = PCA(n_components=num_neurons) pca.fit(data_for_pca_transform_scaled) all_pca_data = pca.transform(data_for_pca_transform_scaled) # compute factors for normalizing everything to [-1, 1] if normalize_for_statistics: from utils.normalize import normalize, range2norm, norm2range all_pca_data_normalized, norm_offset, norm_range, minmax = normalize(all_pca_data) # apply PCA mapping to map initial states ... pca_trained_is = pca.transform(scaler.transform(trained_is)) if normalize_for_statistics: pca_trained_is = range2norm(pca_trained_is, norm_offset, norm_range, minmax) pca_inferred_is = np.empty((len(test_hyp_all), 1), dtype=object) for i in range(len(test_hyp_all)): # per test_hyp pca_inferred_is[i,0] = np.empty((num_inferences, 1), dtype=object) for j in range(num_inferences): # per inference pca_inferred_is[i,0][j,0] = pca.transform(scaler.transform(inferred_is[i,0][j,0])) if normalize_for_statistics: pca_inferred_is[i,0][j,0] = range2norm(pca_inferred_is[i,0][j,0], norm_offset, norm_range, minmax) # ... and neuron activations
def _encode_input(self, x): x = np.array(x) if not x.shape: x = np.expand_dims(x, 0) texts = np.array([normalize(text) for text in x]) return self._encode_texts(texts)