def load_intermediate_outputs(input_filename, embedding_json, fc_json, lstm_hidden_json, lstm_cell_json, layer_name=None): #layer_name is currently not needed - for networks with more layers we will need it, as the json structure will change keys_hidden, data_hidden = data_format.get_data(lstm_hidden_json) keys_cell, data_cell = data_format.get_data(lstm_cell_json) keys_fc, data_fc = data_format.get_data(fc_json) keys_ebd, data_ebd = data_format.get_data(embedding_json) lstm_hidden = data_hidden[input_filename] lstm_cell = data_cell[input_filename] fc_out = data_fc[input_filename] embedding_output_data = data_ebd[input_filename] T = embedding_output_data.shape d = lstm_cell.shape[1] return fc_out, lstm_hidden, lstm_cell, embedding_output_data, d, T
def get_DstMatrix_singleReview(review_MaxAct_json, final_embed_mat, embedding_size, dictionary_w): # Get similarity matrix between neurons based on the custom distance function defined above, calculated based on a single review. keys, data = data_format.get_data(review_MaxAct_json) kkeys = list(keys) dstMat = np.zeros((len(kkeys), len(kkeys))) for i in range((len(kkeys))): for j in range(i, len(kkeys)): dstMat[i, j] = neuron_distance(final_embed_mat=final_embed_mat, embedding_size=embedding_size, neuron1=list(data[kkeys[i]]), neuron2=list(data[kkeys[j]]), dictionary_w=dictionary_w) return dstMat
def get_NeuronExcitingWords_dict(lstm_hidden_json, kkeys, k, save_dir, topN=5): # Get the N words that excite each LSTM cell maximum, i.e. neuron of output has maximum value during forward pass d = collections.OrderedDict() keys_hidden, data_hidden = data_format.get_data(lstm_hidden_json) kdata = data_hidden[k] for i in range(min(len(kkeys), kdata.shape[0])): ord_cells = np.argsort(kdata[i, :], axis=0, kind='quicksort') d[kkeys[i]] = ord_cells[-(topN + 1):-1].tolist() NtoW = invert_dict_nonunique(d, topN) NtoW_keys = map(int, list(NtoW.keys())) for i in range(kdata.shape[1]): if i not in NtoW_keys: NtoW[str(i)] = [] with open(save_dir + str(k) + "_ActCells.json", 'w') as f: json.dump(NtoW, f) return str(k) + "_ActCells.json", NtoW
def get_MostExcitingWords_allReviews(save_dir, neuronWords_jsons, topN=5): #Get list of top-N exciting words for each neuron based on the whole dataset of reviews #neuron-word data dictionary nw_data = dict() done = [] for i in neuronWords_jsons: keys, data = data_format.get_data(save_dir + i) kkeys = list(keys) for j in kkeys: if j not in done: if j in list(nw_data.keys()): vals = list(data[j]) + list( set(list(nw_data[j])) - set(list(data[j]))) nw_data[j] = vals else: nw_data[j] = data[j] if len(list(nw_data[j])) >= topN: done.append(j) return nw_data
x=0, y=0, anchor="bottom_left")) """ ------------------------------------------------------------------------------------------------------------------------ MAIN APP CODE ------------------------------------------------------------------------------------------------------------------------ """ # Provide data paths and files load_dir = "./bokeh_vis/static/" lstm_layer_name = "lstm" #Get trained model parameters: weights and gate values keys, data = data_format.get_data(load_dir + "model.json") #Get raw input keys_raw, data_raw = data_format.get_data(load_dir + "test_data_text.pickle") #Load auxiliary data with open(load_dir + "lstm_predictions.pickle", "rb") as handle: predicted_tgs = pickle.load(handle) with open(load_dir + "exploratoryDataFull.pickle", 'rb') as f: (testX, embed_mat, excitingWords_fullSet, similarityMatrix_AllReviews, similarityMatrix_PerReview, neuron_types, totalLRP, LRP) = pickle.load(f) _, lstm_hidden = data_format.get_data( load_dir + "test_model_internals_lstm_hidden.pickle") #Get preset buttons' selections