def reload_corpus(all_path=Path+'/data-repository/', train_perc=0.7, shuffle=False): x = CsvUtility.read_array_from_csv(all_path, 'feature_matrix.csv') y = CsvUtility.read_array_from_csv(all_path, 'result_matrix.csv') train_size = int(x.shape[0] * train_perc) # shuffle the train set if shuffle: idx = np.random.permutation(x.shape[0]) CsvUtility.write_array2csv(idx, Path+'/data-repository/', 'random_idx.csv') else: idx = CsvUtility.read_array_from_csv(all_path, 'random_idx.csv') x_train = x[idx] y_train = y[idx] return x_train[:train_size], y_train[:train_size], x_train[train_size:], y_train[train_size:], idx
def get_simple_inference_penalty(net): # get loss from gamma with lda model # gamma = get_topicdist_lda(Path+'/data-repository/selected_docs4LDA.csv', 20) gamma = CsvUtility.read_array_from_csv(Path+'/data-repository', 'topicdist_result.csv') penalty = Variable(torch.FloatTensor([0.0])) gammas = Variable(torch.from_numpy(gamma)).float() latent_neuron_topics = np.array([]) for para_iter, para in enumerate(net.parameters()): if para_iter == 0: latent_neuron_topics = para.abs().mm(gammas) # print 'latent_neuron_topics : ', latent_neuron_topics latent_neuron_topics = latent_neuron_topics / (latent_neuron_topics.sum(dim=1).view(-1, 1)) # print 'Norm latent_neuron_topics : ', latent_neuron_topics penalty = Variable(torch.FloatTensor([1.0])) / (latent_neuron_topics.max(dim=1)[0].sum()) return penalty, latent_neuron_topics.data.numpy()