def train(epoch): h_phi.train() to_average = [] # train for query, candidates in zip(batched_query_train, batched_neighbor_train): optimizer.zero_grad() cand_x, cand_y = candidates query_x, query_y = query cand_x = cand_x.to(device=gpu) cand_y = cand_y.to(device=gpu) query_x = query_x.to(device=gpu) query_y = query_y.to(device=gpu) neighbor_e = h_phi(cand_x).reshape(NUM_TRAIN_NEIGHBORS, EMBEDDING_SIZE) query_e = h_phi(query_x).reshape(NUM_TRAIN_QUERIES, EMBEDDING_SIZE) neighbor_y_oh = one_hot(cand_y).reshape(NUM_TRAIN_NEIGHBORS, 10) query_y_oh = one_hot(query_y).reshape(NUM_TRAIN_QUERIES, 10) losses = dknn_loss(query_e, neighbor_e, query_y_oh, neighbor_y_oh) loss = losses.mean() loss.backward() optimizer.step() to_average.append((-loss).item() / k) print('Avg. train correctness of top k:', sum(to_average) / len(to_average)) print('Avg. train correctness of top k:', sum(to_average) / len(to_average), file=logfile) logfile.flush()
def continuous_kappa(y, t, y_pow=1, eps=1e-15): if y.ndim == 1: y = one_hot(y, m=5) if t.ndim == 1: t = one_hot(t, m=5) # Weights. num_scored_items, num_ratings = y.shape ratings_mat = np.tile(np.arange(0, num_ratings)[:, None], reps=(1, num_ratings)) ratings_squared = (ratings_mat - ratings_mat.T)**2 weights = ratings_squared / float(num_ratings - 1)**2 if y_pow != 1: y_ = y**y_pow y_norm = y_ / (eps + y_.sum(axis=1)[:, None]) y = y_norm hist_rater_a = np.sum(y, axis=0) hist_rater_b = np.sum(t, axis=0) conf_mat = np.dot(y.T, t) nom = weights * conf_mat denom = (weights * np.dot(hist_rater_a[:, None], hist_rater_b[None, :]) / num_scored_items) return 1 - nom.sum() / denom.sum(), conf_mat, \ hist_rater_a, hist_rater_b, nom, denom
def continuous_kappa(y, t, y_pow=1, eps=1e-15): if y.ndim == 1: y = one_hot(y, m=5) if t.ndim == 1: t = one_hot(t, m=5) # Weights. num_scored_items, num_ratings = y.shape ratings_mat = np.tile(np.arange(0, num_ratings)[:, None], reps=(1, num_ratings)) ratings_squared = (ratings_mat - ratings_mat.T) ** 2 weights = ratings_squared / float(num_ratings - 1) ** 2 if y_pow != 1: y_ = y ** y_pow y_norm = y_ / (eps + y_.sum(axis=1)[:, None]) y = y_norm hist_rater_a = np.sum(y, axis=0) hist_rater_b = np.sum(t, axis=0) conf_mat = np.dot(y.T, t) nom = weights * conf_mat denom = (weights * np.dot(hist_rater_a[:, None], hist_rater_b[None, :]) / num_scored_items) return 1 - nom.sum() / denom.sum(), conf_mat, \ hist_rater_a, hist_rater_b, nom, denom
def __generateData(self, sample_size): self.data = {} symbolsIn = [] symbolsOut = [] mnist = input_data.read_data_sets("MNIST_data/", one_hot=True) trainSamples = [[], [], [], [], [], [], [], [], [], []] testSamples = [[], [], [], [], [], [], [], [], [], []] symbols = [ np.load("MNIST_data/0.npy"), np.load("MNIST_data/1.npy"), np.load("MNIST_data/2.npy"), np.load("MNIST_data/3.npy"), np.load("MNIST_data/4.npy"), np.load("MNIST_data/5.npy"), np.load("MNIST_data/6.npy"), np.load("MNIST_data/7.npy"), np.load("MNIST_data/8.npy"), np.load("MNIST_data/9.npy") ] for i in range(len(mnist.train.images)): trainSamples[argmax(mnist.train.labels[i])].append( mnist.train.images[i]) for i in range(len(mnist.test.images)): testSamples[argmax(mnist.test.labels[i])].append( mnist.test.images[i]) for a in range(0, 10): for b in range(0, 10): self.data[str(a) + "+" + str(b)] = [] leftSymbol = symbols[a] rightSymbol = symbols[b] target = one_hot((a + b) // 10) + one_hot((a + b) % 10) symbolsIn.append([leftSymbol, rightSymbol]) symbolsOut.append(target) for i in range(sample_size): left = trainSamples[a][randint(0, len(trainSamples[a]) - 1)] right = trainSamples[b][randint(0, len(trainSamples[b]) - 1)] self.data[str(a) + "+" + str(b)].append({ "left-noisy": left, "left-symbol": leftSymbol, "right-noisy": right, "right-symbol": rightSymbol, "target": target }) self.symbolsIn = np.array(symbolsIn) self.symbolsOut = np.array(symbolsOut)
def forward(self, center_word, context_words): ''' Forward pass @param center_word @param context_words: list of words ''' center_word_1hot = one_hot(center_word) center_word_embedding = self.fc1(center_word_1hot) context_representation = torch.zeros(self.embedding_dimension * 2).cuda() for i, context_word in enumerate(context_words): context_word_embedding = self.fc1(one_hot(context_word)) concatenated = torch.cat( [context_word_embedding, center_word_embedding], dim=0) context_representation += F.relu(self.fc2(concatenated)) mu = self.fc3(context_representation) sigma = F.softplus(self.fc4(context_representation)) # Kingma-Welling reparameterization trick epsilon_noise = self.epsilon.sample().cuda() reparameterized_sample = mu + (epsilon_noise * sigma) categorical_distribution = F.softmax(self.re1(reparameterized_sample), dim=0) p_mean = self.p_mean(center_word_1hot) p_sigma = F.softplus(self.p_sigma(center_word_1hot)) return categorical_distribution, mu, sigma, p_mean, p_sigma
def sufficient_statistics(self, x): if len(x.shape) == 2: stats = one_hot(x.long(), self.K) elif len(x.shape) == 3: stats = one_hot(x.long(), self.K).reshape(-1, self.num_dims * self.K) else: raise AssertionError("Input must be 2 or 3 dimensional tensor.") return stats
def train(): tr_X, tr_y = load_train(size='_t') tr_X = norm4d_per_sample(tr_X) tr_y = one_hot(tr_y, 2) te_X, te_y = load_test(size='_t') te_y = one_hot(te_y, 2) te_X = norm4d_per_sample(te_X) model = DeepCNN('vgg') model.train(tr_X, tr_y, te_X, te_y)
def train(): tr_X, tr_y = load_train(size='_t') tr_X = norm4d_per_sample(tr_X) tr_y = one_hot(tr_y, 2) te_X, te_y = load_test(size='_t') te_y = one_hot(te_y, 2) te_X = norm4d_per_sample(te_X) model = RL(istrained=False, name='rl_noun') model.train(tr_X, tr_y, te_X, te_y)
def train(G, D, data_iter, n_epochs, lr): opt_g = optim.Adam(G.parameters(), lr, betas=[.5, .999]) opt_d = optim.Adam(D.parameters(), lr, betas=[.5, .999]) G.train() D.train() for epoch in range(n_epochs): for i, (X, L) in enumerate(data_iter): x_real = X.view(-1, dim_im).to(DEVICE) l = one_hot(L, 10).to(DEVICE) z = torch.randn(l.size(0), dim_z, device=DEVICE) fake_x = G(torch.cat([z, l], 1)) fake_score = D(torch.cat([fake_x.detach(), l], 1)) real_score = D(torch.cat([x_real, l], 1)) loss_d = -torch.mean( torch.log(real_score + 1e-10) + torch.log(1 - fake_score + 1e-10)) D.zero_grad() loss_d.backward() opt_d.step() fake_score = D(torch.cat([fake_x, l], 1)) real_score = D(torch.cat([x_real, l], 1)) loss_g = torch.mean(torch.log(1 - fake_score + 1e-10)) # loss_g = -torch.mean(torch.log(fake_score + 1e-10)) G.zero_grad() loss_g.backward() opt_g.step() if (i + 1) % print_every == 0: print('Epoch %d Batch %d ' % (epoch + 1, i + 1) + 'Loss D: %0.3f ' % loss_d.item() + 'Loss G: %0.3f ' % loss_g.item() + 'fake_score: %0.3f ' % torch.mean(fake_score).item() + 'real_score: %0.3f ' % torch.mean(real_score).item()) _imags = fake_x.view(batch_size, 1, 28, 28).data tv.utils.save_image( _imags, save_dir + '{}_{}.png'.format(epoch + 1, i + 1)) if (epoch + 1) % save_epoch_freq == 0: torch.save(G.state_dict(), save_dir + 'net_g.pt') torch.save(G.state_dict(), save_dir + 'net_d.pt') # test with torch.no_grad(): G.eval() z = torch.randn(1, dim_z, device=DEVICE).repeat(10, 1) c = one_hot(torch.range(0, 9, dtype=torch.long), 10).to(DEVICE) fake_x = G(torch.cat([z, c], 1)) _imags = fake_x.view(-1, 1, 28, 28) tv.utils.save_image(_imags, save_dir + '{}.png'.format(epoch + 1))
def data_generator(self): data_path = '../processed_dataset/frame_step_1_seperate_no_empty' c = data_reader(data_path, self.time_window) self.x_train, self.x_test, label_train, label_test, self.gestures = c.run() self.y_train = utils.one_hot(label_train, len(self.gestures)) self.y_test = utils.one_hot(label_test, len(self.gestures)) self.n_classes = len(self.gestures) print('x_train:', np.shape(self.x_train)) print('x_test:', np.shape(self.x_test)) print('y_train:', np.shape(self.y_train)) print('y_test:', np.shape(self.y_test))
def sample(): xs = {0: one_hot(char_to_i[np.random.choice(chars)], vocab_size)} h = {-1: np.zeros((1, hidden_size))} print('\n\n') for t in range(100): h[t] = np.tanh(xs[t].dot(wxh) + h[t - 1].dot(whh) + bh) y = h[t].dot(why) + by softmax = np.exp(y) / np.exp(y).sum() next_i = np.random.choice(vocab_size, p=softmax[0]) xs[t + 1] = one_hot(next_i, vocab_size) print(i_to_char[next_i], end='') print('\n\n')
def get_batch(self, batch_size, shuffle=True): indices = list(range(self.num_samples)) if shuffle: np.random.shuffle(indices) have_train_num = 0 for i, begin in enumerate( range(0, self.num_samples - batch_size, batch_size)): yield self.X[indices[begin:begin + batch_size], :], one_hot( self.y[indices[begin:begin + batch_size]], self.class_num) have_train_num = (i + 1) * batch_size if have_train_num < self.num_samples: yield self.X[indices[have_train_num:], :], one_hot( self.y[indices[have_train_num:]], self.class_num)
def state2tensor(state): player, leader, trick, hand, played_cards = state player = one_hot(player, 4) leader = one_hot(leader, 4) trick = hand2tensor(trick) hand = hand2tensor(hand) pc = torch.zeros(32) for c in played_cards: pc[card2ix[c]] = 1. state = torch.cat((player, leader, trick, hand, pc)).unsqueeze(0) return state
def interpolate(G, z, shifts_r, shifts_count, dim, deformator=None, with_central_border=False): shifted_images = [] for shift in np.arange(-shifts_r, shifts_r + 1e-9, shifts_r / shifts_count): if deformator is not None: z_deformed = z + deformator(one_hot(z.shape[1:], shift, dim).cuda()) else: z_deformed = z + one_hot(z.shape[1:], shift, dim).cuda() shifted_image = G(z_deformed).cpu()[0] if shift == 0.0 and with_central_border: shifted_image = add_border(shifted_image) shifted_images.append(shifted_image) return shifted_images
def standardize_targets(Y, cost): Y = np.asarray(Y) ndim = len(Y.shape) if ndim == 1: Y = Y.reshape(-1, 1) if Y.shape[1] == 1 and cost.__name__ == 'CategoricalCrossEntropy': Y = one_hot(Y, negative_class=0.) if Y.shape[1] == 1 and 'Hinge' in cost.__name__: if len(np.unique(Y)) > 2: Y = one_hot(Y, negative_class=-1.) else: Y[Y==0] -= 1 return Y
def standardize_targets(Y, cost): Y = np.asarray(Y) ndim = len(Y.shape) if ndim == 1: Y = Y.reshape(-1, 1) if Y.shape[1] == 1 and cost.__name__ == 'CategoricalCrossEntropy': Y = one_hot(Y, negative_class=0.) if Y.shape[1] == 1 and 'Hinge' in cost.__name__: if len(np.unique(Y)) > 2: Y = one_hot(Y, negative_class=-1.) else: Y[Y == 0] -= 1 return Y
def gen_images(paths, labels=None, shuffle=False, repeat=False, name="NOT NAMED!!!(check data.gen_images)"): paths_shuffled = np.array(paths) print "Data loader INITIALIZED: %s" % name print " shuffle: %r" % shuffle print " repeat: %r" % repeat print " no. paths: %d" % len(paths) if labels is not None: labels = utils.one_hot(labels, m=num_classes).astype('float32') labels_shuffled = np.array(labels) while True: if shuffle: state = np.random.get_state() np.random.shuffle(paths_shuffled) if labels is not None: np.random.set_state(state) np.random.shuffle(labels_shuffled) for k in xrange(len(paths_shuffled)): path = paths_shuffled[k] img = skimage.io.imread(path, as_grey=True) if labels is not None: yield img, labels_shuffled[k] else: yield img if not repeat: break
def predict(model, test_iter, cuda=CUDA_DEFAULT): # Monitoring loss total_loss = 0 count = 0 criterion = get_criterion(model.model_str) for batch in test_iter: # Get data img, label = batch label = one_hot(label) img = img.view(img.size(0), -1) img, label = variable(img, cuda=cuda), variable(label, to_float=False, cuda=cuda) batch_size = img.size(0) if cuda: img = img.cuda() label = label.cuda() # predict kwargs = _get_kwargs(model.model_str, img, label) output = model.forward(**kwargs) output = (output[0].view(batch_size, -1), output[1], output[2]) loss = criterion(img, output) # monitoring count += batch_size total_loss += t.sum(loss.data) # cut graph with .data # monitoring avg_loss = total_loss / count print("Validation loss is %.4f" % avg_loss) return avg_loss
def gen_samples(self, z=None, classes=None, batch_size=None, device='cpu'): assert (z is None) ^ (batch_size is None), 'one of: z, batch_size should be provided' if z is None: z = self.make_noise(batch_size, device) if classes is None: classes = self.G.mixed_classes(z.shape[0]).to(device) shift = self.deformator( one_hot(self.G.dim_z, self.p.latent_shift_r, self.background_dim).to(device)) img = self.G(z, classes) img_shifted_pos = self.G(z + shift, classes) if self.p.synthezing == MaskSynthesizing.DIFF: img_shifted_neg = self.G(z - shift, classes) diff = get_diff(0.5 * img_shifted_neg + 0.5, 0.5 * img_shifted_pos + 0.5) diff = normalize_per_sample(diff) mask = diff_to_mask(diff, self.p.mask_thr) elif self.p.synthezing == MaskSynthesizing.INTENSITY: intensity = 0.5 * torch.mean(img_shifted_pos, dim=1) + 0.5 mask = (intensity < self.p.mask_thr).to(torch.long) return img, img_shifted_pos, mask
def initialize(self, x_shared, y_shared, m_shared, start=True): self.x_shared, self.y_shared, self.m_shared = x_shared, y_shared, m_shared labels = np.load(self.labels_path) labels[:] = map(lambda y: utils.one_hot(y,self.n_classes), labels) self.labels_shm = shm.memmap(data=labels) self.x_shm, self.y_shm = [], [] for _ in range(3): self.x_shm.append(shm.memmap(shape=self.x_shape, dtype=floatX)) self.y_shm.append(shm.memmap(shape=self.y_shape, dtype=floatX)) self.is_terminated = mp.Value(ctypes.c_bool, False) self.write_locks = [mp.Lock(), mp.Lock()] self.read_locks = [mp.Lock(), mp.Lock()] for lock in self.write_locks: lock.acquire() for lock in self.read_locks: lock.acquire() self.validating = mp.Lock() self.loader_path = "/dev/shm/loader/" if not os.path.exists(self.loader_path): os.mkdir(self.loader_path) self.loader_path += "%f"%time() self.mem_locks = [] for i in range(self.n_mem_slots): self.mem_locks.append(mp.Lock()) video_number = split.train_idxs[npr.randint(len(split.train_idxs))] path = self.data_path+str(video_number).zfill(4)+".npy" shutil.copyfile(path, self.loader_path+"%i_%i"%(i, video_number)) if start: self.start() signal.signal(signal.SIGINT, self.terminate) signal.signal(signal.SIGTERM, self.terminate)
def sample(self, t=1.): gumbel = -torch.log(-torch.log( torch.Tensor(self.logit_probs.size()).uniform_( 1e-5, 1. - 1e-5).cuda())) # B, M, H, W sel = one_hot(torch.argmax(self.logit_probs / t + gumbel, 1), self.num_mix, dim=1) # B, M, H, W sel = sel.unsqueeze(1) # B, 1, M, H, W # select logistic parameters means = torch.sum(self.means * sel, dim=2) # B, 3, H, W log_scales = torch.sum(self.log_scales * sel, dim=2) # B, 3, H, W coeffs = torch.sum(self.coeffs * sel, dim=2) # B, 3, H, W # cells from logistic & clip to interval # we don't actually round to the nearest 8bit value when sampling u = torch.Tensor(means.size()).uniform_(1e-5, 1. - 1e-5).cuda() # B, 3, H, W x = means + torch.exp(log_scales) / t * ( torch.log(u) - torch.log(1. - u)) # B, 3, H, W x0 = torch.clamp(x[:, 0, :, :], -1, 1.) # B, H, W x1 = torch.clamp(x[:, 1, :, :] + coeffs[:, 0, :, :] * x0, -1, 1) # B, H, W x2 = torch.clamp(x[:, 2, :, :] + coeffs[:, 1, :, :] * x0 + coeffs[:, 2, :, :] * x1, -1, 1) # B, H, W x0 = x0.unsqueeze(1) x1 = x1.unsqueeze(1) x2 = x2.unsqueeze(1) x = torch.cat([x0, x1, x2], 1) x = x / 2. + 0.5 return x
def load_train(self): labels = utils.one_hot(data.labels_train, m=121).astype(np.float32) split = np.load(DEFAULT_VALIDATION_SPLIT_PATH) split = np.load(DEFAULT_VALIDATION_SPLIT_PATH) indices_train = split['indices_train'] indices_valid = split['indices_valid'] image_shapes = np.asarray([img.shape for img in data.load('train') ]).astype(np.float32) moments = np.load("data/image_moment_stats_v1_train.pkl") centroid_distance = np.abs(moments["centroids"][:, [1, 0]] - image_shapes / 2) info = np.concatenate( (centroid_distance, image_shapes, moments["angles"][:, None], moments["minor_axes"][:, None], moments["major_axes"][:, None]), 1).astype(np.float32) self.info_train = info[indices_train] self.info_valid = info[indices_valid] self.y_train = np.load(self.train_pred_file).astype(np.float32) self.y_valid = np.load(self.valid_pred_file).astype(np.float32) self.labels_train = labels[indices_train] self.labels_valid = labels[indices_valid]
def __getitem__(self, index: int) -> List[Any]: filename: str = self.filenames[index] path_name: Path = Path(filename) images: List[D] if path_name.suffix == ".png": images = [Image.open(files[index]).convert('L') for files in self.files] elif path_name.suffix == ".npy": images = [np.load(files[index]) for files in self.files] else: raise ValueError(filename) # Final transforms and assertions t_tensors: List[Tensor] = [tr(e) for (tr, e) in zip(self.transforms, images)] assert 0 <= t_tensors[0].min() and t_tensors[0].max() <= 1 # main image is between 0 and 1 b, w, h = t_tensors[0].shape assert b == 1 for ttensor in t_tensors[1:]: # All masks (ground truths) are class encoded assert one_hot(ttensor, axis=0) assert ttensor.shape == (self.C, w, h) bounds = [f(*t_tensors, filename) for f in self.bounds_generators] # return t_tensors + [filename] + bounds return [filename] + t_tensors + bounds
def run_evaluation(agents, game, boards, name=''): winners = 0. for t in range(args.trials): states, any_valid = game.reset(boards) print(f'\r[{t + 1:2d}/{args.trials:2d}] {name} boards eval...') step = 0 while any_valid and not game.finished(): print(f'\r[{step:4d}]', end='') step += 1 actions = torch.stack([a.act(s) for a, s in zip(agents, states)], dim=0) states, rewards = game.step(actions) for a, r in zip(agents, rewards): a.observe(r) print() for a in agents: a.reset() winners += one_hot(game.winners() + 1, num_classes=game.num_players + 1).float() winners /= args.trials print(winners.float().mean(0)) print(winners.float().std(0)) plt.subplot(1, 2, 1) plt.hist(winners[:, 1].float().cpu().numpy(), bins=args.trials + 1, range=(0, 1), label='player 1') plt.legend() plt.xlim(0., 1.) plt.subplot(1, 2, 2) plt.hist(winners[:, 2].float().cpu().numpy(), bins=args.trials + 1, range=(0, 1), label='player 2') plt.xlim(0., 1.) plt.legend() plt.savefig(f'{name.lower()}.png') plt.close()
def create_decoder_input(self, final_caps, labels=None): """ Construct decoder input based on class probs and final capsules. Flattens capsules to [batch_size, num_final_caps * dim_final_caps] and sets all values which do not come from the correct class/capsule to zero (masks). During training the labels are used to masks, during inference the max of the class probabilities. Args: final_caps (FloatTensor): Final capsules of shape: batch_size, num final caps, length final caps. labels (LongTensor, optional): Corresponding labels of shape: batch_size. Used to mask the decoder input if given, else the largest logit computed from the final_caps is used. Returns: FloatTensor: Flattend and masked version of the final capsules. """ # get targets form the final_caps if not given if labels is None: targets = self.compute_predictions(self.compute_logits(final_caps)) else: targets = labels # create one hot masks masks = one_hot(targets, final_caps.shape[1]) # mask the capsules masked_caps = final_caps * masks[:, :, None] # flatten the masked, final capsules decoder_input = masked_caps.view(final_caps.shape[0], -1) return decoder_input
def log_losses(y, t, eps=1e-15): if t.ndim == 1: t = one_hot(t) y = np.clip(y, eps, 1 - eps) losses = -np.sum(t * np.log(y), axis=1) return losses
def edge_tensors(self, edge_types, device, type_onehot=True): num_edges = len(self.edges) # get directed edge indices in both directions as tensor edge_indices = torch.tensor( [[e['part_a'], e['part_b']] for e in self.edges] + [[e['part_b'], e['part_a']] for e in self.edges], device=device, dtype=torch.long).view(1, num_edges * 2, 2) # get edge type as tensor edge_type = torch.tensor( [edge_types.index(edge['type']) for edge in self.edges], device=device, dtype=torch.long) if type_onehot: edge_type = one_hot( inp=edge_type, label_count=len(edge_types)).transpose( 0, 1).view(1, num_edges, len(edge_types)).to(dtype=torch.float32) else: edge_type = edge_type.view(1, num_edges) edge_type = torch.cat( [edge_type, edge_type], dim=1) # add edges in other direction (symmetric adjacency) return edge_type, edge_indices
def _get_minibatch_feed_dict(self, target_q_values, non_terminal_minibatch, terminal_minibatch): """ Helper to construct the feed_dict for train_op. Takes the non-terminal and terminal minibatches as well as the max q-values computed from the target network for non-terminal states. Computes the expected q-values based on discounted future reward. @return: feed_dict to be used for train_op """ assert len(target_q_values) == len(non_terminal_minibatch) states = [] expected_q = [] actions = [] # Compute expected q-values to plug into the loss function minibatch = itertools.chain(non_terminal_minibatch, terminal_minibatch) for item, target_q in zip_longest(minibatch, target_q_values, fillvalue=0): state, action, reward, _, _ = item states.append(state) # target_q will be 0 for terminal states due to fillvalue in zip_longest expected_q.append(reward + self.config.reward_discount * target_q) actions.append(utils.one_hot(action, self.env.action_space.n)) return { self.network.x_placeholder: states, self.network.q_placeholder: expected_q, self.network.action_placeholder: actions, }
def generate_gradients(self, input_image, target_class): # Put model in evaluation mode self.model.eval() x = input_image.clone() x.requires_grad = True with torch.enable_grad(): # Forward model_output = self.model(x) # Zero grads self.model.zero_grad() grad_outputs = one_hot(target_class, model_output.shape[1]) grad_outputs = tensor2cuda(grad_outputs) grad = torch.autograd.grad(model_output, x, grad_outputs=grad_outputs, only_inputs=True)[0] self.model.train() return grad
def __init__(self, filename, neurons=[2, 3, 1], activations=None, learning_rate=0.1, epoch=1000, mini_batch_size=256): data = pd.read_csv(filename).to_numpy() self.X = data[:, :len(data[0]) - 1] self.Y = data[:, len(data[0]) - 1] self.Y = self.Y.reshape((self.Y.shape[0], 1)) self.number_of_examples, self.number_of_features = self.X.shape self.number_of_classes, self.Y = np.unique(self.Y, return_inverse=True) self.multi_class = False if len(self.number_of_classes) > 2: self.multi_class = True neurons[-1] = len(self.number_of_classes) self.Y = utils.one_hot(self.Y) self.layers = len(neurons) self.neurons = neurons self.neurons.insert(0, self.number_of_features) self.activations = activations self.learning_rate = learning_rate self.epoch = epoch self.mini_batch_size = mini_batch_size if self.number_of_examples < 2000: self.mini_batch_size = self.number_of_examples self.predicted_weights = [] self.predicted_b_values = []
def predict(self, case_ids, catchphrase_repr_norm, case_sentences, case_catchphrases): """ case_ids: list of case_id """ LOGGER.info("start to predict {}case".format(len(case_ids))) case_sents = [case_sentences[case_id] for case_id in case_ids] y_preds=[] LOGGER.info("retrieve relevant documents for queries in test set") for i,sent in enumerate(tqdm(case_sents)): y_pred = self.match_catch_repr(sent, catchphrase_repr_norm).detach().numpy() # LOGGER.info("The prediction for sentence:"+sent) # LOGGER.info(y_pred) y_preds.append(y_pred) y_pred=np.vstack(y_preds) NUM_CATCHPHRASES = len(catchphrase_repr_norm) LOGGER.info("The prediction for The first 3 sentence:") LOGGER.info(y_pred[:3]) y_true_s = [case_catchphrases[case_id] for case_id in case_ids] y_true = np.array([one_hot(y_true_, NUM_CATCHPHRASES) for y_true_ in y_true_s]) # one hot form return y_true, y_pred
def learn(self): num_steps = len(self.rewards) # discount reward over whole episode r = 0. rewards = torch.zeros((num_steps, self.states[0].shape[0]), device=device) for n in reversed(range(num_steps)): rewards[n, :] = r = self.rewards[n] + self.discount * r rewards = rewards.view(-1) actions = one_hot(torch.cat(self.actions), num_classes=self.num_actions) policy = torch.cat(self.policies).view(-1, self.num_actions) value = torch.cat(self.values).view(-1) advantage = rewards - value # MSE on rewards and values loss = 0.5 * torch.mean(torch.pow(advantage, 2.)) # CE on policy and actions loss -= torch.mean(advantage.detach() * torch.log(torch.sum(actions.float() * policy, dim=1) + 1e-8)) # entropy pentalty loss += self.beta * torch.mean(torch.sum(policy * torch.log(policy + 1e-8), dim=-1)) loss.backward() self.optimizer.step() self.optimizer.zero_grad() self.reset() return loss.item(), torch.mean(value).item()
def multinomial_train(self, X, y, C, w0=None, b0=None, eta=0.5, max_iterations=1000): """ Inputs: - X: training features, a N-by-D numpy array, where N is the number of training points and D is the dimensionality of features - y: multiclass training labels, a N dimensional numpy array where N is the number of training points, indicating the labels of training data - C: number of classes in the data - eta: learning rate - max_iterations: maximum number for iterations to perform Returns: - w: C-by-D weight matrix of multinomial logistic regression, where C is the number of classes and D is the dimensionality of features. - b: bias vector of length C, where C is the number of classes """ N, D = X.shape w = np.zeros((C, D)) if w0 is not None: w = w0 assert w.shape == ( C, D), f"check your w0, its dimension should be: {(C, D)}" b = np.zeros(C) if b0 is not None: b = b0 W = np.hstack( (b.reshape(-1, 1), w)).T # shape (D+1, C), you could ignore this, I use this # shape(N, D + 1), to visulize vectorization when implement it X = np.insert(X, 0, 1, axis=1) Y = one_hot(y, nb_class=C) P = softmax(X @ W) - Y tol = 1e-5 for it in range(max_iterations): idx = minibatch(X) W_prev = W W = W - eta / N * X[idx, :].T @ P[idx, :] P[idx, :] = softmax(X[idx, :] @ W) - Y[idx, :] if np.max(np.abs(W_prev - W)) < tol: print(f"Converged in {it} iters.") break w = W.T[:, 1:] b = W.T[:, 0] assert w.shape == (C, D) assert b.shape == (C, ) self.w = w self.b = b
def compute_roc_data(dataloader, model, device): female_y = [] male_y = [] ys = (female_y,male_y) female_p = [] male_p = [] ps = (female_p,male_p) model.eval() for X,y,gender in dataloader: pred = model(X.to(device)) for i in range(2): filt = (gender[:,i] == 1) if y[filt].shape[0] > 0: ys[i].append(utils.one_hot(y[filt]).cpu().numpy()) ps[i].append(pred[filt].data.cpu().numpy()) female_ys = np.concatenate(ys[0]) male_ys = np.concatenate(ys[1]) female_ps = np.concatenate(ps[0]) male_ps = np.concatenate(ps[1]) return female_ys, male_ys, female_ps, male_ps
def iterate(self, loader, model, criterion, optimizer, training=True): if training: model.train() else: model.eval() props = {k: 0 for k in status_properties} for i, data in enumerate(loader): x, targets = data x = x.to(device) targets = targets.view(-1).to(device) preds = model(x) if isinstance(self.criterion, BCEDiceLoss): targets = one_hot(targets, 3) loss = criterion(preds, targets) props['loss'] += loss.item() a, a1, a2, a3 = accuracy_from_logits(preds.clone(), targets.clone()) props['accuracy'] += a.item() props['accuracy_1'] += a1 props['accuracy_2'] += a2 props['accuracy_3'] += a3 if training: optimizer.zero_grad() loss.backward() optimizer.step() clip_grad_norm_(model.parameters(), 0.5) L = len(loader) props = {k: v / L for k, v in props.items()} return props
def build_set(corpus, vocab_len, size=2000, one_hot_enc=True): dataset = itertools.islice( corpus.generate(indexer=idxr, fitted=True), size) X, y = list(zip(*dataset)) X = np.asarray(X) if one_hot_enc: X = one_hot(X, vocab_len) y = to_categorical(y, nb_classes=vocab_len) return X, y
def load_train(self): images = data.load('train') labels = utils.one_hot(data.labels_train, m=121).astype(np.float32) split = np.load(self.validation_split_path) indices_train = split['indices_train'] indices_valid = split['indices_valid'] self.images_train = images[indices_train] self.labels_train = labels[indices_train] self.images_valid = images[indices_valid] self.labels_valid = labels[indices_valid]
def load_train(self): images = data.load('train') labels = utils.one_hot(data.labels_train).astype(np.float32) split = pickle.load(open(self.validation_split_path, 'rb')) indices_train = split['indices_train'] indices_valid = split['indices_valid'] self.images_train = images[indices_train] self.labels_train = labels[indices_train] self.images_valid = images[indices_valid] self.labels_valid = labels[indices_valid]
def update_agent(agent, replay_memory, gamma, optim, batch_size): samples = replay_memory.sample(batch_size) states, actions, rewards, next_states, non_ends = samples_to_tensors(samples) actions = utils.one_hot(actions.unsqueeze(1), agent.num_actions) targets = agent.compute_targets(rewards, next_states, non_ends, gamma) states = Variable(states) actions = Variable(actions) targets = Variable(targets) loss = agent.loss(states, actions, targets) loss.backward() optim.step() optim.zero_grad() return loss.data[0]
def compute_targets(self, rewards, next_states, non_ends, gamma): """Compute batch of targets for distributional dqn params: rewards: Tensor [batch, 1] next_states: Tensor [batch, channel, w, h] non_ends: Tensor [batch, 1] gamma: float """ assert not self.double_dqn, 'not supported yet' # get next distribution next_states = Variable(next_states, volatile=True) # [batch, num_actions], [batch, num_actions, num_atoms] next_q_vals, next_probs = self._q_values(self.target_q_net, next_states) next_actions = next_q_vals.data.max(1, True)[1] # [batch, 1] next_actions = utils.one_hot(next_actions, self.num_actions).unsqueeze(2) next_greedy_probs = (next_actions * next_probs.data).sum(1) # transform the distribution rewards = rewards.unsqueeze(1) non_ends = non_ends.unsqueeze(1) proj_zpoints = rewards + gamma * non_ends * self.zpoints.data proj_zpoints.clamp_(self.vmin, self.vmax) # project onto shared support b = (proj_zpoints - self.vmin) / self.delta_z lower = b.floor() upper = b.ceil() # handle corner case where b is integer eq = (upper == lower).float() lower -= eq lt0 = (lower < 0).float() lower += lt0 upper += lt0 # note: it's faster to do the following on cpu ml = (next_greedy_probs * (upper - b)).cpu().numpy() mu = (next_greedy_probs * (b - lower)).cpu().numpy() lower = lower.cpu().numpy().astype(np.int32) upper = upper.cpu().numpy().astype(np.int32) batch_size = rewards.size(0) mass = np.zeros((batch_size, self.num_atoms), dtype=np.float32) brange = range(batch_size) for i in range(self.num_atoms): mass[brange, lower[brange, i]] += ml[brange, i] mass[brange, upper[brange, i]] += mu[brange, i] return torch.from_numpy(mass).cuda()
def sample_evolution(start, cls, ns=100): # start = start data sample = t.compile_function(initial_vmap, mb_size=1, monitors=[m_model], name='evaluate', train=False, mode=mode) data = start plot_data(data) label = one_hot(np.atleast_2d(cls), dim=10) label = label.reshape((label.shape[0], 1, label.shape[1])) while True: for k in range(ns): for x in sample({ rbm.v: data, rbm.s: label }): # draw a new sample data = x[0] plot_data(data)
def load_train(self): labels = utils.one_hot(data.labels_train, m=121).astype(np.float32) split = np.load(DEFAULT_VALIDATION_SPLIT_PATH) split = np.load(DEFAULT_VALIDATION_SPLIT_PATH) indices_train = split['indices_train'] indices_valid = split['indices_valid'] image_shapes = np.asarray([img.shape for img in data.load('train')]).astype(np.float32) self.image_shapes_train = image_shapes[indices_train] self.image_shapes_valid = image_shapes[indices_valid] self.y_train = np.load(self.train_pred_file).astype(np.float32) self.y_valid = np.load(self.valid_pred_file).astype(np.float32) self.labels_train = labels[indices_train] self.labels_valid = labels[indices_valid]
def load_train(self): train_images = data.load('train') train_labels = utils.one_hot(data.labels_train).astype(np.float32) if ("valid_pred_file" in self.__dict__): valid_pseudo_labels = np.load(self.valid_pred_file).astype(np.float32) else: print "No valid_pred_file set. Only using test-set for pseudolabeling!!" shuffle = np.load("test_shuffle_seed0.npy") if not ("shard" in self.__dict__): raise ValueError("Missing argument: shard: (should be value in {0, 1, 2})") if not self.shard in [0, 1, 2]: raise ValueError("Wrong argument: shard: (should be value in {0, 1, 2})") N = len(shuffle) if self.shard == 0: train_shard = shuffle[N/3:] if self.shard == 1: train_shard = np.concatenate((shuffle[:N/3], shuffle[2*N/3:])) if self.shard == 2: train_shard = shuffle[:2*N/3] test_images = data.load('test')[train_shard] test_pseudo_labels = np.load(self.test_pred_file)[train_shard].astype(np.float32) print test_pseudo_labels.shape if not hasattr(self, 'validation_split_path'): self.validation_split_path = DEFAULT_VALIDATION_SPLIT_PATH split = np.load(self.validation_split_path) indices_train = split['indices_train'] indices_valid = split['indices_valid'] self.images_train = train_images[indices_train] self.labels_train = train_labels[indices_train] if ("valid_pred_file" in self.__dict__): self.images_pseudo = np.concatenate((train_images[indices_valid], test_images), 0) self.labels_pseudo = np.concatenate((valid_pseudo_labels, test_pseudo_labels), 0) else: self.images_pseudo = test_images self.labels_pseudo = test_pseudo_labels self.images_valid = train_images[indices_valid] self.labels_valid = train_labels[indices_valid]
def compute_targets(self, rewards, next_states, non_ends, gamma): """Compute batch of targets for dqn params: rewards: Tensor [batch] next_states: Tensor [batch, channel, w, h] non_ends: Tensor [batch] gamma: float """ next_q_vals = self.target_q_values(next_states) if self.double_dqn: next_actions = self.online_q_values(next_states).max(1, True)[1] next_actions = utils.one_hot(next_actions, self.num_actions) next_qs = (next_q_vals * next_actions).sum(1) else: next_qs = next_q_vals.max(1)[0] # max returns a pair targets = rewards + gamma * next_qs * non_ends return targets
def load_train(self): labels = utils.one_hot(data.labels_train, m=121).astype(np.float32) split = np.load(DEFAULT_VALIDATION_SPLIT_PATH) split = np.load(DEFAULT_VALIDATION_SPLIT_PATH) indices_train = split['indices_train'] indices_valid = split['indices_valid'] image_shapes = np.asarray([img.shape for img in data.load('train')]).astype(np.float32) moments = np.load("data/image_moment_stats_v1_train.pkl") centroid_distance = np.abs(moments["centroids"][:, [1, 0]] - image_shapes / 2) info = np.concatenate((centroid_distance, image_shapes, moments["angles"][:, None], moments["minor_axes"][:, None], moments["major_axes"][:, None]), 1).astype(np.float32) self.info_train = info[indices_train] self.info_valid = info[indices_valid] self.y_train = np.load(self.train_pred_file).astype(np.float32) self.y_valid = np.load(self.valid_pred_file).astype(np.float32) self.labels_train = labels[indices_train] self.labels_valid = labels[indices_valid]
def load_train(self): labels = utils.one_hot(data.labels_train, m=121).astype(np.float32) split = np.load(DEFAULT_VALIDATION_SPLIT_PATH) split = np.load(DEFAULT_VALIDATION_SPLIT_PATH) indices_train = split['indices_train'] indices_valid = split['indices_valid'] features = np.load("data/features_train.pkl").item() if "aaronmoments" in self.features: print "aaronmoments" def normalize(x): return x # return (x - x.mean(axis=0,keepdims=True))/x.std(axis=0,keepdims=True) image_shapes = np.asarray([img.shape for img in data.load('train')]).astype(np.float32) moments = np.load("data/image_moment_stats_v1_train.pkl") centroid_distance = np.abs(moments["centroids"][:, [1, 0]] - image_shapes / 2) angles = moments["angles"][:, None] minor_axes = moments["minor_axes"][:, None] major_axes = moments["major_axes"][:, None] centroid_distance = normalize(centroid_distance) angles = normalize(angles) minor_axes = normalize(minor_axes) major_axes = normalize(major_axes) features["aaronmoments"] = np.concatenate([centroid_distance,angles,minor_axes,major_axes], 1).astype(np.float32) info = np.concatenate([features[feat] for feat in self.features], 1).astype(np.float32) print info.shape self.info_train = info[indices_train] self.info_valid = info[indices_valid] self.y_train = np.load(self.train_pred_file).astype(np.float32) self.y_valid = np.load(self.valid_pred_file).astype(np.float32) self.labels_train = labels[indices_train] self.labels_valid = labels[indices_valid]
if prev != 0: s+= "%i,%i,%i\n"%(prev, start+1, j) start = j prev = p file = open(csv_path+"Sample"+str(split.test_idxs[i]).zfill(4)+"_prediction.csv", "w") file.write(s[:-1]) file.close() print "%i%%"%int(np.round((i+1)/float(len(preds))*100.)) from sklearn import metrics labels = np.load("data/labels_raw_test.npy") # labelsn = labels.copy() for i, lbl in enumerate(labels): labels[i] = lbl[:-1] # labels[i] = lbl[1:] labels[:] = map(lambda y: utils.one_hot(y,21), labels) # print labels[0].shape, preds[0].shape roc, prec, rec, acc = [], [],[], [] l, p = np.vstack(labels), np.vstack(preds) # ln = np.vstack(labelsn) # print np.mean(l==p) y_pred = np.argmax(p,1) y_true = np.argmax(l,1) # print l.shape # print y.shape acc = np.mean(y_true==y_pred) rec = metrics.recall_score(y_true, y_pred)
def run(): # Fetch data f1 = file('../data/mldata/mnist_data.pkl','rb') mnist = pickle.load(f1) f1.close() split = 60000 X_train = np.reshape(mnist.data[:split], (-1,1,28,28))/255.0 Y_train = mnist.target[:split] X_test = np.reshape(mnist.data[split:], (-1,1,28,28))/255.0 Y_test = mnist.target[split:] n_classes = np.unique(Y_train).size # Downsample training data n_train_samples = 3000 train_idxs = np.random.random_integers(0, split-1, n_train_samples) X_train = X_train[train_idxs, ...] Y_train = Y_train[train_idxs, ...] Y_train_one_hot = one_hot(Y_train) print ('number of train samples: %d')%(n_train_samples) print ('number of test samples: %d')%(X_test.shape[0]) # setup network nn = NeuralNetwork( layers = [ Layers.Convolution( n_feats=12, filter_shape=(5,5), strides=(1,1), weight_scale=0.1, weight_decay=0.001), Layers.Activation('relu'), Layers.Pool( pool_shape=(2,2), strides=(2,2), mode='max'), Layers.Convolution( n_feats=16, filter_shape=(5,5), strides=(1,1), weight_scale=0.1, weight_decay=0.001), Layers.Activation('relu'), Layers.Flatten(), Layers.Linear( n_out=n_classes, weight_scale=0.1, weight_decay=0.02), Layers.Softmax() ] ) #check gradient # nn.check_gradients(X_train[:10], Y_train_one_hot[:10]) # Train neural network t0 = time.time() nn.train(X_train, Y_train_one_hot, learning_rate=0.05, max_iter=3, batch_size=32) t1 = time.time() print('Duration: %.1fs' % (t1-t0)) # Evaluate on test data # Y_test_one_hot = one_hot(Y_test) error = nn.error(X_test, Y_test) print('Test error rate: %.4f' % error)
model.compile(OPTIMIZER, loss='categorical_crossentropy', metrics=['accuracy']) model.summary() print("Starting training") db = E.use(path, exp_id="char-fill").model(args.model) with db.session(vars(args), ensure_unique=False) as session: try: from time import time start = time() for e in range(EPOCHS): losses = [] batches = train.generate_batches( indexer=idxr, batch_size=BATCH_SIZE), batches = itertools.islice(batches, NUM_BATCHES) for b, (X, y) in enumerate(batches): X = np.asarray(X) if has_emb else one_hot(X, n_chars) y = to_categorical(y, nb_classes=n_chars) loss, _ = model.train_on_batch(X, y) losses.append(loss) if b % args.loss == 0: dev_loss, dev_acc = model.test_on_batch(X_dev, y_dev) avg_loss, last_loss = np.mean(losses), losses[-1] log_batch(e, b, avg_loss, last_loss, dev_loss, dev_acc) print() session.add_epoch( e, {'training_loss': str(np.mean(losses)), 'dev_loss': str(dev_loss), 'dev_acc': str(dev_acc)}) except KeyboardInterrupt: print("Interrupted") finally:
mode = None # load data print ">> Loading dataset..." f = gzip.open('datasets/mnist.pkl.gz','rb') train_set, valid_set, test_set = cPickle.load(f) f.close() train_set_x, train_set_y = train_set valid_set_x, valid_set_y = valid_set test_set_x, test_set_y = test_set # convert labels to one hot representation train_set_y_oh = one_hot(np.atleast_2d(train_set_y).T) valid_set_y_oh = one_hot(np.atleast_2d(valid_set_y).T) test_set_y_oh = one_hot(np.atleast_2d(test_set_y).T) # dim 0 = minibatches, dim 1 = units, dim 2 = states train_set_y_oh = train_set_y_oh.reshape((train_set_y_oh.shape[0], 1, train_set_y_oh.shape[1])) valid_set_y_oh = valid_set_y_oh.reshape((valid_set_y_oh.shape[0], 1, valid_set_y_oh.shape[1])) test_set_y_oh = test_set_y_oh.reshape((test_set_y_oh.shape[0], 1, test_set_y_oh.shape[1])) # make the sets a bit smaller for testing purposes train_set_x = train_set_x[:10000] train_set_y_oh = train_set_y_oh[:10000] valid_set_x = valid_set_x[:1000] valid_set_y_oh = valid_set_y_oh[:1000]
def train(self, learning_schedule = {0: 0.015, 500: 0.0015, 800: 0.00015, 1000: 0.000015}, momentum = 0.9, max_epochs=3000, save_every = 20, save_path = os.getcwd()): self.save_every = save_every self.metadata_tmp_path = save_path+"/model_params.pkl" self.learning_rate_schedule = learning_schedule self.learning_rate = theano.shared(np.float32(self.learning_rate_schedule[0])) self.momentum = momentum #for trainer self.updates = nn.updates.nesterov_momentum(self.loss, self.all_params, self.learning_rate, self.momentum) train_fn = self.nesterov_trainer() #nesterov with momentum. train_set_iterator = DataLoader(os.getcwd(),train_test_valid='train') best_dev_loss = numpy.inf dev_set_iterator = DataLoader(os.getcwd(), train_test_valid='valid') dev_set_iterator.build_unequal_samples_map() #for loading the data onto the gpu #create_train_gen = lambda: train_set_iterator.create_gen(max_epochs) patience = 1000 patience_increase = 2. improvement_threshold = 0.995 done_looping = False print '... training the model' start_time = time.clock() epoch = 0 timer = None #for plotting self._costs = [] self._train_errors = [] self._dev_errors = [] while (epoch < max_epochs) and (not done_looping): losses_train = [] losses = [] avg_costs = [] timer = time.time() for iteration, (x, y) in enumerate(train_set_iterator): if iteration in self.learning_rate_schedule: lr = np.float32(self.learning_rate_schedule[iteration]) print " setting learning rate to %.7f" % lr self.learning_rate.set_value(lr) print " load training data onto GPU" avg_cost = train_fn(x, y) if np.isnan(avg_cost): raise RuntimeError("NaN DETECTED.") if type(avg_cost) == list: avg_costs.append(avg_cost[0]) else: avg_costs.append(avg_cost) #for saving the batch if ((iteration + 1) % save_every) == 0: print print "Saving metadata, parameters" with open(self.metadata_tmp_path, 'w') as f: pickle.dump({'losses_train': avg_costs,'param_values': nn.layers.get_all_param_values(self.output_layer)}, f, pickle.HIGHEST_PROTOCOL) mean_train_loss = numpy.mean(avg_costs) #print " mean training loss:\t\t%.6f" % mean_train_loss #losses_train.append(mean_train_loss) #accuracy assessment output = utils.one_hot(self.predict_(x)(),m=20) train_loss = utils.log_loss(output, y) acc = 1 - utils.accuracy(output, y) losses.append(train_loss) del output del x del y print(' epoch %i took %f seconds' % (epoch, time.time() - timer)) print(' epoch %i, avg costs %f' % (epoch, mean_train_loss)) print(' epoch %i, training error %f' % (epoch, acc)) #for plotting self._costs.append(mean_train_loss) self._train_errors.append(acc) #valid accuracy xd,yd = dev_set_iterator.random_batch() valid_output = utils.one_hot(self.predict_(xd)(),m=20) valid_acc = 1 - utils.accuracy(valid_output, yd) self._dev_errors.append(valid_acc) del valid_output del xd del yd if valid_acc < best_dev_loss: best_dev_loss = valid_acc best_params = copy.deepcopy(self.all_params ) print('!!! epoch %i, validation error of best model %f' % (epoch, valid_acc)) print print "Saving best performance parameters" with open(self.metadata_tmp_path, 'w') as f: pickle.dump({'losses_train': avg_costs,'param_values': nn.layers.get_all_param_values(self.output_layer)}, f, pickle.HIGHEST_PROTOCOL) if (valid_acc < best_dev_loss * improvement_threshold): patience = max(patience, iteration * patience_increase) if patience <= iteration: done_looping = True break epoch += 1
s = np.load("validation_split_v1.pkl") t_valid = data.labels_train[s['indices_valid']] predictions_list = [np.load(path) for path in valid_predictions_paths] predictions_stack = np.array(predictions_list).astype(theano.config.floatX) # num_sources x num_datapoints x 121 print "Individual prediction errors" individual_prediction_errors = [utils.log_loss(p, t_valid) for p in predictions_list] del predictions_list for i in xrange(n_models): print individual_prediction_errors[i], os.path.basename(valid_predictions_paths[i]) print # optimizing weights X = theano.shared(predictions_stack) # source predictions t = theano.shared(utils.one_hot(t_valid)) # targets W = T.vector('W') s = T.nnet.softmax(W).reshape((W.shape[0], 1, 1)) weighted_avg_predictions = T.sum(X * s, axis=0) # T.tensordot(X, s, [[0], [0]]) error = nn_plankton.log_loss(weighted_avg_predictions, t) grad = T.grad(error, W) f = theano.function([W], error) g = theano.function([W], grad) w_init = np.zeros(n_models, dtype=theano.config.floatX) out, loss, _ = scipy.optimize.fmin_l_bfgs_b(f, w_init, fprime=g, pgtol=1e-09, epsilon=1e-08, maxfun=10000) weights = np.exp(out) weights /= weights.sum()