def predict(self, x, attn_type = "hard"): #predict with greedy decoding emb = self.embedding(x) h = Variable(torch.zeros(1, x.size(0), self.hidden_dim)) c = Variable(torch.zeros(1, x.size(0), self.hidden_dim)) enc_h, _ = self.encoder(emb, (h, c)) y = [Variable(torch.zeros(x.size(0)).long())] self.attn = [] for t in range(x.size(1)): emb_t = self.embedding(y[-1]) dec_h, (h, c) = self.decoder(emb_t.unsqueeze(1), (h, c)) scores = torch.bmm(enc_h, dec_h.transpose(1,2)).squeeze(2) attn_dist = F.softmax(scores, dim = 1) self.attn.append(attn_dist.data) if attn_type == "hard": _, argmax = attn_dist.max(1) one_hot = Variable(torch.zeros_like(attn_dist.data).scatter_(-1, argmax.data.unsqueeze(1), 1)) context = torch.bmm(one_hot.unsqueeze(1), enc_h).squeeze(1) else: context = torch.bmm(attn_dist.unsqueeze(1), enc_h).squeeze(1) pred = self.vocab_layer(torch.cat([dec_h.squeeze(1), context], 1)) _, next_token = pred.max(1) y.append(next_token) self.attn = torch.stack(self.attn, 0).transpose(0, 1) return torch.stack(y, 0).transpose(0, 1)
def evaluate(attention_model,x_test,y_test): """ cv results Args: attention_model : {object} model x_test : {nplist} x_test y_test : {nplist} y_test Returns: cv-accuracy """ attention_model.batch_size = x_test.shape[0] attention_model.hidden_state = attention_model.init_hidden() x_test_var = Variable(torch.from_numpy(x_test).type(torch.LongTensor)) y_test_pred,_ = attention_model(x_test_var) if bool(attention_model.type): y_preds = torch.max(y_test_pred,1)[1] y_test_var = Variable(torch.from_numpy(y_test).type(torch.LongTensor)) else: y_preds = torch.round(y_test_pred.type(torch.DoubleTensor).squeeze(1)) y_test_var = Variable(torch.from_numpy(y_test).type(torch.DoubleTensor)) return torch.eq(y_preds,y_test_var).data.sum()/x_test_var.size(0)
def predict_proba(self, dataset): """Predict predict probability for dataset. This method will only work with method logistic/multiclass Parameters: ---------- dataset (dict): dictionary with the testing dataset - X_wide_test, X_deep_test, target Returns: -------- array-like with the probability for dataset. """ X_w = Variable(torch.from_numpy(dataset.wide)).float() X_d = Variable(torch.from_numpy(dataset.deep)) if use_cuda: X_w, X_d = X_w.cuda(), X_d.cuda() # set the model in evaluation mode so dropout is not applied net = self.eval() pred = net(X_w,X_d).cpu() if self.method == "logistic": pred = pred.squeeze(1).data.numpy() probs = np.zeros([pred.shape[0],2]) probs[:,0] = 1-pred probs[:,1] = pred return probs if self.method == "multiclass": return pred.data.numpy()
def train(ep): model.train() total_loss = 0 count = 0 train_idx_list = np.arange(len(X_train), dtype="int32") np.random.shuffle(train_idx_list) for idx in train_idx_list: data_line = X_train[idx] x, y = Variable(data_line[:-1]), Variable(data_line[1:]) if args.cuda: x, y = x.cuda(), y.cuda() optimizer.zero_grad() output = model(x.unsqueeze(0)).squeeze(0) loss = -torch.trace(torch.matmul(y, torch.log(output).float().t()) + torch.matmul((1 - y), torch.log(1 - output).float().t())) total_loss += loss.data[0] count += output.size(0) if args.clip > 0: torch.nn.utils.clip_grad_norm(model.parameters(), args.clip) loss.backward() optimizer.step() if idx > 0 and idx % args.log_interval == 0: cur_loss = total_loss / count print("Epoch {:2d} | lr {:.5f} | loss {:.5f}".format(ep, lr, cur_loss)) total_loss = 0.0 count = 0
def forward(self, k, x, logposterior): ''' k: number of samples x: [B,X] logposterior(z) -> [P,B] ''' self.B = x.size()[0] self.P = k #Encode out = x for i in range(len(self.encoder_weights)-1): out = self.act_func(self.encoder_weights[i](out)) out = self.encoder_weights[-1](out) mean = out[:,:self.z_size] logvar = out[:,self.z_size:] #Sample eps = Variable(torch.FloatTensor(k, self.B, self.z_size).normal_().type(self.dtype)) #[P,B,Z] z = eps.mul(torch.exp(.5*logvar)) + mean #[P,B,Z] logqz = lognormal(z, mean, logvar) #[P,B] logdetsum = 0. for i in range(self.n_flows): z, logdet = self.norm_flow(self.params[i],z) logdetsum += logdet return z, logqz-logdetsum
def test_broadcast_subspace(self): a = zeros((100, 100)) v = Variable(torch.arange(0, 100))[:, None] b = Variable(torch.arange(99, -1, -1).long()) a[b] = v expected = b.double().unsqueeze(1).expand(100, 100) self.assertEqual(a, expected)
def predict(self, dataset): """Predict target for dataset. Parameters: ---------- dataset (dict): dictionary with the testing dataset - X_wide_test, X_deep_test, target Returns: -------- array-like with the target for dataset """ X_w = Variable(torch.from_numpy(dataset.wide)).float() X_d = Variable(torch.from_numpy(dataset.deep)) if use_cuda: X_w, X_d = X_w.cuda(), X_d.cuda() # set the model in evaluation mode so dropout is not applied net = self.eval() pred = net(X_w,X_d).cpu() if self.method == "regression": return pred.squeeze(1).data.numpy() if self.method == "logistic": return (pred > 0.5).squeeze(1).data.numpy() if self.method == "multiclass": _, pred_cat = torch.max(pred, 1) return pred_cat.data.numpy()
def sample(self, mu, logvar, k): eps = Variable(torch.FloatTensor(k, self.B, self.z_size).normal_()) #[P,B,Z] z = eps.mul(torch.exp(.5*logvar)) + mu #[P,B,Z] logpz = lognormal(z, Variable(torch.zeros(self.B, self.z_size)), Variable(torch.zeros(self.B, self.z_size))) #[P,B] logqz = lognormal(z, mu, logvar) return z, logpz, logqz
def generate(model, start_words, ix2word, word2ix, prefix_words=None): """ 给定几个词,根据这几个词接着生成一首完整的诗歌 start_words:u'春江潮水连海平' 比如start_words 为 春江潮水连海平,可以生成: """ results = list(start_words) start_word_len = len(start_words) # 手动设置第一个词为<START> input = Variable(t.Tensor([word2ix['<START>']]).view(1, 1).long()) if opt.use_gpu: input = input.cuda() hidden = None if prefix_words: for word in prefix_words: output, hidden = model(input, hidden) input = Variable(input.data.new([word2ix[word]])).view(1, 1) for i in range(opt.max_gen_len): output, hidden = model(input, hidden) if i < start_word_len: w = results[i] input = Variable(input.data.new([word2ix[w]])).view(1, 1) else: top_index = output.data[0].topk(1)[1][0] w = ix2word[top_index] results.append(w) input = Variable(input.data.new([top_index])).view(1, 1) if w == '<EOP>': del results[-1] break return results
def l2l_validate(model, cluster_center, n_epoch=100): val_accuracy = [] for epoch in range(n_epoch): data_l = generate_data_l(cluster_center) data_n = generate_data_n(cluster_center, model.n_class_n) x_l, y_l = Variable(torch.from_numpy(data_l[0])).float(), Variable( torch.from_numpy(data_l[1])) x_n, y_n = Variable(torch.from_numpy(data_n[0])).float(), Variable( torch.from_numpy(data_n[1])) pred_ll, pred_nl, w, b = model(x_l, x_n) M = Variable(torch.zeros(model.n_class_n, model.n_dim)) B = Variable(torch.zeros(model.n_class_n)) for k in range(model.n_class_n): M[k] = torch.cat((w[:, 0][y_n == model.n_class_l + k].view(-1, 1), w[:, 1][y_n == model.n_class_l + k].view(-1, 1)), 1).mean(0) B[k] = b[y_n == model.n_class_l + k].mean() pred_ln = torch.mm(x_l, M.t()) + B.view(1, -1).expand_as(torch.mm(x_l, M.t())) pred_nn = torch.mm(x_n, M.t()) + B.view(1, -1).expand_as(torch.mm(x_n, M.t())) pred = torch.cat((torch.cat((pred_ll, pred_nl)), torch.cat((pred_ln, pred_nn))), 1) pred = pred.data.max(1)[1] y = torch.cat((y_l, y_n)) accuracy = pred.eq(y.data).cpu().sum() * 1.0 / y.size()[0] # print('accuracy: %.2f' % accuracy) val_accuracy.append(accuracy) acc_l = pred.eq(y.data).cpu()[0:100].sum() * 1.0 / 100 acc_n = pred.eq(y.data).cpu()[100:150].sum() * 1.0 / 50 print('accuracy: %.2f, lifelong accuracy: %.2f, new accuracy: %.2f' % (accuracy, acc_l, acc_n)) return numpy.mean(numpy.asarray(val_accuracy))
def forward_single_image_tensor(self, img_tensor): """ Simple forward pass on the network. Normalize the image if we are in TEST mode If we are in TRAIN mode then assume the dataset object has already normalized the image :param img_tensor: torch.FloatTensor with shape [3,H,W] :type img_tensor: :return: torch.FloatTensor with shape [H, W, D] :rtype: """ assert len(img_tensor.shape) == 3 # transform to shape [1,3,H,W] img_tensor = img_tensor.unsqueeze(0) # The fcn throws and error if we don't use a variable here . . . # Maybe it's because it is in train mode? img_tensor = Variable(img_tensor.cuda(), requires_grad=False) res = self.forward(img_tensor) # shape [1,D,H,W] # print "res.shape 1", res.shape res = res.squeeze(0) # shape [D,H,W] # print "res.shape 2", res.shape res = res.permute(1,2,0) # shape [H,W,D] # print "res.shape 3", res.shape return res
def train(epoch): epoch_loss = 0 for iteration, batch in enumerate(training_data_loader, 1): randH = random.randint(0, opt.remsize) randW = random.randint(0, opt.remsize) input = Variable(batch[0][:, :, randH:randH + opt.size, randW:randW + opt.size]) target = Variable(batch[1][:, :, randH + target_gap:randH + target_gap + target_size, randW + target_gap:randW + target_gap + target_size]) #target =target.squeeze(1) #print(target.data.size()) if cuda: input = input.cuda() target = target.cuda() input = unet(input) #print(input.data.size()) loss = criterion( input, target) epoch_loss += loss.data[0] loss.backward() optimizer.step() if iteration%10 is 0: print("===> Epoch[{}]({}/{}): Loss: {:.4f}".format(epoch, iteration, len(training_data_loader), loss.data[0])) imgout = input.data/2 +1 torchvision.utils.save_image(imgout,"/home/wcd/PytorchProject/Unet/unetdata/checkpoint/epch_"+str(epoch)+'.jpg') print("===> Epoch {} Complete: Avg. Loss: {:.4f}".format(epoch, epoch_loss / len(training_data_loader)))
def probs(self, generator, outputs, vocab_pointer_switches, context_question_switches, context_attention, question_attention, context_indices, question_indices, oov_to_limited_idx): size = list(outputs.size()) size[-1] = self.generative_vocab_size scores = generator(outputs.view(-1, outputs.size(-1))).view(size) p_vocab = F.softmax(scores, dim=scores.dim()-1) scaled_p_vocab = vocab_pointer_switches.expand_as(p_vocab) * p_vocab effective_vocab_size = self.generative_vocab_size + len(oov_to_limited_idx) if self.generative_vocab_size < effective_vocab_size: size[-1] = effective_vocab_size - self.generative_vocab_size buff = Variable(scaled_p_vocab.data.new(*size).fill_(EPSILON)) scaled_p_vocab = torch.cat([scaled_p_vocab, buff], dim=buff.dim()-1) p_context_ptr = Variable(scaled_p_vocab.data.new(*scaled_p_vocab.size()).fill_(EPSILON)) p_context_ptr.scatter_add_(p_context_ptr.dim()-1, context_indices.unsqueeze(1).expand_as(context_attention), context_attention) scaled_p_context_ptr = (context_question_switches * (1 - vocab_pointer_switches)).expand_as(p_context_ptr) * p_context_ptr p_question_ptr = Variable(scaled_p_vocab.data.new(*scaled_p_vocab.size()).fill_(EPSILON)) p_question_ptr.scatter_add_(p_question_ptr.dim()-1, question_indices.unsqueeze(1).expand_as(question_attention), question_attention) scaled_p_question_ptr = ((1 - context_question_switches) * (1 - vocab_pointer_switches)).expand_as(p_question_ptr) * p_question_ptr probs = scaled_p_vocab + scaled_p_context_ptr + scaled_p_question_ptr return probs
def visualizeModel(model, numImages=6): wasTraining = model.training model.eval() imagesSoFar = 0 fig = plt.figure() for i, (inputs, labels) in enumerate(dataloaders['val']): if use_gpu: inputs, labels = inputs.cuda(), labels.cuda() inputs, labels = Variable(inputs), Variable(labels) outputs = model(inputs) _, preds = torch.max(outputs.data, 1) for j in range(inputs.size(0)): imagesSoFar += 1 nCols = 2 ax = plt.subplot(numImages // nCols, nCols, imagesSoFar) ax.axis('off') ax.set_title('predicted: {}'.format(class_names[preds[j]])) imshow(inputs.cpu().data[j]) if imagesSoFar == numImages: model.train(mode=wasTraining) return model.train(mode=wasTraining)
def _pad_packed_sequence(sequence, batch_first=False, padding_value=0): var_data, batch_sizes = sequence max_batch_size = int(batch_sizes[0]) output = var_data.data.new(len(batch_sizes), max_batch_size, *var_data.size()[1:]).fill_(padding_value) output = Variable(output) lengths = [] data_offset = 0 prev_batch_size = int(batch_sizes[0]) prev_i = 0 for i, batch_size in enumerate(batch_sizes.tolist() + [0]): if batch_size != prev_batch_size: l = prev_batch_size * (i - prev_i) tmp = var_data[data_offset:data_offset + l] output[prev_i:i, :prev_batch_size] = tmp.view(i - prev_i, prev_batch_size, *tmp.size()[1:]) data_offset += l prev_i = i dec = prev_batch_size - batch_size if dec > 0: lengths.extend((i,) * dec) prev_batch_size = batch_size lengths.reverse() if batch_first: output = output.transpose(0, 1) # This Variable doesn't actually have any history (well, # technically it does; it's just untracked), it is purely here to # make ONNX export easier. That is to say, from an autodiff # standpoint this doesn't make any sense. return output, Variable(torch.LongTensor(lengths))
def main(): dtype = torch.FloatTensor N, d_in, H, d_out = 64, 1000, 100, 10 # d_in表示输入维度,d_out输出维度,H是隐藏层维度数 x = Variable(torch.randn(N, d_in).type(dtype), requires_grad=False) y = Variable(torch.randn(N, d_out).type(dtype), requires_grad=False) w1 = Variable(torch.randn(d_in, H).type(dtype), requires_grad=True) w2 = Variable(torch.randn(H, d_out).type(dtype), requires_grad=True) learning_rate = 1e-6 for t in range(500): relu = MyRelu() y_pred = relu(x.mm(w1)).mm(w2) loss = (y_pred - y).pow(2).sum() loss.backward() w1.data -= learning_rate * w1.grad.data w2.data -= learning_rate * w2.grad.data w1.grad.data.zero_() w2.grad.data.zero_() print(loss.data[0])
def show_result(num_epoch, show = False, save = False, path = 'result.png', isFix=False): z_ = torch.randn((5*5, 100)).view(-1, 100, 1, 1) z_ = Variable(z_.cuda(), volatile=True) G.eval() if isFix: test_images = G(fixed_z_) else: test_images = G(z_) G.train() size_figure_grid = 5 fig, ax = plt.subplots(size_figure_grid, size_figure_grid, figsize=(5, 5)) for i, j in itertools.product(range(size_figure_grid), range(size_figure_grid)): ax[i, j].get_xaxis().set_visible(False) ax[i, j].get_yaxis().set_visible(False) for k in range(5*5): i = k // 5 j = k % 5 ax[i, j].cla() ax[i, j].imshow((test_images[k].cpu().data.numpy().transpose(1, 2, 0) + 1) / 2) label = 'Epoch {0}'.format(num_epoch) fig.text(0.5, 0.04, label, ha='center') plt.savefig(path) if show: plt.show() else: plt.close()
def forward_pass(self): ##Variables output transformed for cuda X=self.initialize_input() self.batch_y=self.sample['groundtruth'] Y = Variable(self.batch_y.float()) Y=Y.cuda() ## fwd if self.dist_net=='v2': self.batch_y_dist=distance_map_batch_v2(self.batch_y,self.threshold,self.bins) Y_dist = Variable(self.batch_y_dist.float()) Y_dist=Y_dist.cuda() probs_dist,probs_seg=self.predict(X) loss_seg=self.criterion(Y,probs_seg,self.loss_fn) loss_dist=self.criterion(Y_dist,probs_dist,'cross-entropy') loss=loss_seg+loss_dist else: self.batch_y_dist=None probs_seg=self.predict(X) probs_dist=None loss=self.criterion(Y,probs_seg,self.loss_fn) return loss,probs_dist,probs_seg
def validate(val_loader, model, criterion, location,num_epochs, print_freq): batch_time = AverageMeter() losses = AverageMeter() # switch to evaluate mode model.eval() end = time.time() for i, (image, target, _) in enumerate(val_loader): image_var = Variable(image, volatile=True) target_var = Variable(target, volatile=True) if 'cuda' in location: image_var = image_var.cuda() target_var = target_var.cuda() # compute output output = model(image_var) loss = criterion(output, target_var) losses.update(loss.data[0], image.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % print_freq == 0: print('Test: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})'.format( i, len(val_loader), batch_time=batch_time, loss=losses)) print(' * Val Loss {loss.avg:.3f}' .format(loss=losses)) return loss
def to_variable(numpy_data, volatile=False, is_cuda=True): numpy_data = numpy_data.astype(np.float32) torch_data = torch.from_numpy(numpy_data).float() variable = Variable(torch_data, volatile=volatile) if is_cuda: variable = variable.cuda() return variable
def stylize(**kwargs): opt = Config() for k_, v_ in kwargs.items(): setattr(opt, k_, v_) # 图片处理 content_image = tv.datasets.folder.default_loader(opt.content_path) content_transform = tv.transforms.Compose([ tv.transforms.ToTensor(), tv.transforms.Lambda(lambda x: x.mul(255)) ]) content_image = content_transform(content_image) content_image = content_image.unsqueeze(0) content_image = Variable(content_image, volatile=True) # 模型 style_model = TransformerNet().eval() style_model.load_state_dict(t.load(opt.model_path, map_location=lambda _s, _: _s)) if opt.use_gpu: content_image = content_image.cuda() style_model.cuda() # 风格迁移与保存 output = style_model(content_image) output_data = output.cpu().data[0] tv.utils.save_image(((output_data / 255)).clamp(min=0, max=1), opt.result_path)
def train(dataloader): uf.train() total_loss = 0 total_items = 0 start_time = time.time() for i_batch, batch in enumerate(dataloader): output_seq = Variable(batch['output_seq']) del (batch['output_seq']) for k in batch: batch[k] = Variable(batch[k]) if DEVICE_NO != -1: output_seq = output_seq.cuda(DEVICE_NO) for k in batch: batch[k] = batch[k].cuda(DEVICE_NO) uf.zero_grad() pred = uf.forward(**batch) pred = pred.view(-1, pred.size(-1)) output_seq = output_seq.view(-1) loss = criteria(pred, output_seq) loss.backward() num_items = len([x for x in output_seq if int(x) != criteria.ignore_index]) total_loss += num_items * loss.data total_items += num_items optimizer.step() if i_batch % log_interval == 0 and i_batch > 0: cur_loss = total_loss[0] / total_items elapsed = time.time() - start_time print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:04.2f} | ms/batch {:5.2f} | ' 'loss {:5.2f} | ppl {:8.2f}'.format( epoch, i_batch, len(dataloader.dataset) // dataloader.batch_size, optimizer.param_groups[0]['lr'], elapsed * 1000 / log_interval, cur_loss, math.exp(cur_loss))) total_loss = 0 total_items = 0 start_time = time.time()
def sample(self, mu, logvar, k): # print (mu) # print (logvar) if torch.cuda.is_available(): eps = Variable(torch.FloatTensor(k, self.B, self.z_size).normal_()).cuda() #[P,B,Z] # print (mu.size()) # print (logvar.size()) # print (eps.size()) z = eps.mul(torch.exp(.5*logvar)) + mu #[P,B,Z] logpz = lognormal(z, Variable(torch.zeros(self.B, self.z_size).cuda()), Variable(torch.zeros(self.B, self.z_size)).cuda()) #[P,B] # logqz = lognormal(z, mu, logvar) logqz = lognormal(z, Variable(mu.data), Variable(logvar.data)) else: eps = Variable(torch.FloatTensor(k, self.B, self.z_size).normal_())#[P,B,Z] z = eps.mul(torch.exp(.5*logvar)) + mu #[P,B,Z] logpz = lognormal(z, Variable(torch.zeros(self.B, self.z_size)), Variable(torch.zeros(self.B, self.z_size))) #[P,B] logqz = lognormal(z, mu, logvar) return z, logpz, logqz
def random_batch(batch_size=3): input_seqs = [] target_seqs = [] # Choose random pairs for i in range(batch_size): pair = random.choice(pairs) input_seqs.append(indexes_from_sentence(input_lang, pair[0])) target_seqs.append(indexes_from_sentence(output_lang, pair[1])) # Zip into pairs, sort by length (descending), unzip seq_pairs = sorted(zip(input_seqs, target_seqs), key=lambda p: len(p[0]), reverse=True) input_seqs, target_seqs = zip(*seq_pairs) # For input and target sequences, get array of lengths and pad with 0s to max length input_lengths = [len(s) for s in input_seqs] input_padded = [pad_seq(s, max(input_lengths)) for s in input_seqs] target_lengths = [len(s) for s in target_seqs] target_padded = [pad_seq(s, max(target_lengths)) for s in target_seqs] # Turn padded arrays into (batch x seq) tensors, transpose into (seq x batch) input_var = Variable(torch.LongTensor(input_padded)).transpose(0, 1) target_var = Variable(torch.LongTensor(target_padded)).transpose(0, 1) if USE_CUDA: input_var = input_var.cuda() target_var = target_var.cuda() return input_var, input_lengths, target_var, target_lengths
def update(self): next_value = self.actor_critic(Variable(self.rollouts.states[-1], volatile=True))[0].data self.rollouts.compute_returns(next_value, self.use_gae, self.gamma, self.tau) # values, action_log_probs, dist_entropy = self.actor_critic.evaluate_actions( # Variable(self.rollouts.states[:-1].view(-1, *self.obs_shape)), # Variable(self.rollouts.actions.view(-1, self.action_shape))) values = torch.cat(self.rollouts.value_preds, 0).view(self.num_steps, self.num_processes, 1) action_log_probs = torch.cat(self.rollouts.action_log_probs).view(self.num_steps, self.num_processes, 1) dist_entropy = torch.cat(self.rollouts.dist_entropy).view(self.num_steps, self.num_processes, 1) self.rollouts.value_preds = [] self.rollouts.action_log_probs = [] self.rollouts.dist_entropy = [] advantages = Variable(self.rollouts.returns[:-1]) - values value_loss = advantages.pow(2).mean() action_loss = -(Variable(advantages.data) * action_log_probs).mean() self.optimizer.zero_grad() cost = action_loss + value_loss*self.value_loss_coef - dist_entropy.mean()*self.entropy_coef cost.backward() nn.utils.clip_grad_norm(self.actor_critic.parameters(), self.grad_clip) self.optimizer.step()
def forward(self, inputs): # inputs (bs,words/sentence) 10,7 bsz = inputs.size(0) # batch size might change if inputs.size(1) < 3: # padding issues on really short sentences pads = Variable(torch.zeros(bsz,3-inputs.size(1))).type(torch.LongTensor) inputs = torch.cat([inputs,pads.cuda()],dim=1) embeds = self.embeddings(inputs) # 10,h,300 embeds = embeds.unsqueeze(3) embeds = embeds.permute(0,2,1,3) s_embeds = self.s_embeddings(inputs) s_embeds = s_embeds.unsqueeze(3) s_embeds = s_embeds.permute(0,2,1,3) out = torch.cat([embeds,s_embeds],dim=3) #print(out.size()) fw3 = self.conv3(out) # 10,100,h,1 fw5 = self.conv5(out) # 10,100,h,1 fw7 = self.conv7(out) # 10,100,h,1 out = torch.cat([fw3,fw5,fw7],dim=1) out = F.relu(out) # 10,300,h/3,1 #out = self.avgpool(out) #out = F.relu(self.conv(out)) #print(out.size()) #out = out.view(bsz,n_featmaps*3,-1,2) # 10,300,7 #print(out.size()) out = self.maxpool(out) # 10,300,1,1 out = out.view(bsz,-1) # 10,600 out = self.dropout(out) # 10,2 out = self.linear(out) # 10,2 return out
def F_affine2d(x, matrix, center=True): """ 2D Affine image transform on torch.autograd.Variable """ if matrix.dim() == 2: matrix = matrix.view(-1,2,3) A_batch = matrix[:,:,:2] if A_batch.size(0) != x.size(0): A_batch = A_batch.repeat(x.size(0),1,1) b_batch = matrix[:,:,2].unsqueeze(1) # make a meshgrid of normal coordinates _coords = th_iterproduct(x.size(1),x.size(2)) coords = Variable(_coords.unsqueeze(0).repeat(x.size(0),1,1).float(), requires_grad=False) if center: # shift the coordinates so center is the origin coords[:,:,0] = coords[:,:,0] - (x.size(1) / 2. + 0.5) coords[:,:,1] = coords[:,:,1] - (x.size(2) / 2. + 0.5) # apply the coordinate transformation new_coords = coords.bmm(A_batch.transpose(1,2)) + b_batch.expand_as(coords) if center: # shift the coordinates back so origin is origin new_coords[:,:,0] = new_coords[:,:,0] + (x.size(1) / 2. + 0.5) new_coords[:,:,1] = new_coords[:,:,1] + (x.size(2) / 2. + 0.5) # map new coordinates using bilinear interpolation x_transformed = F_bilinear_interp2d(x, new_coords) return x_transformed
def __val(self): """ Validation function during the train phase. """ self.seg_net.eval() start_time = time.time() for j, data_tuple in enumerate(self.val_loader): # Change the data type. inputs = Variable(data_tuple[0].cuda(async=True), volatile=True) targets = Variable(data_tuple[1].cuda(async=True), volatile=True) # Forward pass. outputs = self.seg_net(inputs) # Compute the loss of the val batch. loss_pixel = self.pixel_loss(outputs, targets) loss = loss_pixel self.val_losses.update(loss.data[0], inputs.size(0)) # Update the vars of the val phase. self.batch_time.update(time.time() - start_time) start_time = time.time() self.module_utilizer.save_net(self.seg_net, self.iters) # Print the log info & reset the states. Log.info( 'Test Time {batch_time.sum:.3f}s, ({batch_time.avg:.3f})\t' 'Loss {loss.avg:.8f}\n'.format( batch_time=self.batch_time, loss=self.val_losses)) self.batch_time.reset() self.val_losses.reset() self.seg_net.train()
def update_parameters(self, batch): state_batch = Variable(torch.cat(batch.state)) action_batch = Variable(torch.cat(batch.action)) reward_batch = Variable(torch.cat(batch.reward)) mask_batch = Variable(torch.cat(batch.mask)) next_state_batch = Variable(torch.cat(batch.next_state)) next_action_batch = self.actor_target(next_state_batch) next_state_action_values = self.critic_target(next_state_batch, next_action_batch) reward_batch = reward_batch.unsqueeze(1) mask_batch = mask_batch.unsqueeze(1) expected_state_action_batch = reward_batch + (self.gamma * mask_batch * next_state_action_values) self.critic_optim.zero_grad() state_action_batch = self.critic((state_batch), (action_batch)) value_loss = F.mse_loss(state_action_batch, expected_state_action_batch) value_loss.backward() self.critic_optim.step() self.actor_optim.zero_grad() policy_loss = -self.critic((state_batch),self.actor((state_batch))) policy_loss = policy_loss.mean() policy_loss.backward() self.actor_optim.step() soft_update(self.actor_target, self.actor, self.tau) soft_update(self.critic_target, self.critic, self.tau) return value_loss.item(), policy_loss.item()
def vector_grad(): x = Variable(torch.ones(2)*3, requires_grad=True) y = Variable(torch.ones(2)*4, requires_grad=True) z = x.pow(2) + 3*y.pow(2) z.backward(torch.ones(2)) print(x.grad) print(y.grad)
def forward(self, sent_tuple): # sent_len: [max_len, ..., min_len] (batch) # sent: Variable(seqlen x batch x worddim) sent, sent_len = sent_tuple bsize = sent.size(1) self.init_lstm = self.init_lstm if bsize == self.init_lstm.size(1) else \ Variable(torch.FloatTensor(2, bsize, self.enc_lstm_dim).zero_()).cuda() # Sort by length (keep idx) sent_len, idx_sort = np.sort(sent_len)[::-1], np.argsort(-sent_len) sent = sent.index_select(1, Variable(torch.cuda.LongTensor(idx_sort))) # Handling padding in Recurrent Networks sent_packed = nn.utils.rnn.pack_padded_sequence(sent, sent_len) sent_output = self.enc_lstm(sent_packed, (self.init_lstm, self.init_lstm))[0] # seqlen x batch x 2*nhid sent_output = nn.utils.rnn.pad_packed_sequence(sent_output)[0] # Un-sort by length idx_unsort = np.argsort(idx_sort) sent_output = sent_output.index_select(1, Variable(torch.cuda.LongTensor(idx_unsort))) sent_output = sent_output.transpose(0,1).contiguous() sent_output_proj = self.proj_lstm(sent_output.view(-1, 2*self.enc_lstm_dim)).view(bsize, -1, 2*self.enc_lstm_dim) sent_key_proj = self.proj_key(sent_output.view(-1, 2*self.enc_lstm_dim)).view(bsize, -1, 2*self.enc_lstm_dim) sent_key_proj = torch.tanh(sent_key_proj) # NAACL : u_it=tanh(W_w.h_it + b_w) like in NAACL paper # Temperature Temp = 3 sent_w1 = self.query_embedding(Variable(torch.LongTensor(bsize*[0]).cuda())).unsqueeze(2) #(bsize, nhid, 1) keys1 = sent_key_proj.bmm(sent_w1).squeeze(2) / Temp keys1 = keys1 + ((keys1 == 0).float()*-1000) alphas1 = self.softmax(keys1).unsqueeze(2).expand_as(sent_key_proj) emb1 = torch.sum(alphas1 * sent_output_proj, 1).squeeze(1) sent_w2 = self.query_embedding(Variable(torch.LongTensor(bsize*[1]).cuda())).unsqueeze(2) #(bsize, nhid, 1) keys2 = sent_key_proj.bmm(sent_w2).squeeze(2) / Temp keys2 = keys2 + ((keys2 == 0).float()*-1000) alphas2 = self.softmax(keys2).unsqueeze(2).expand_as(sent_key_proj) emb2 = torch.sum(alphas2 * sent_output_proj, 1).squeeze(1) sent_w3 = self.query_embedding(Variable(torch.LongTensor(bsize*[1]).cuda())).unsqueeze(2) #(bsize, nhid, 1) keys3 = sent_key_proj.bmm(sent_w3).squeeze(2) / Temp keys3 = keys3 + ((keys3 == 0).float()*-1000) alphas3 = self.softmax(keys3).unsqueeze(2).expand_as(sent_key_proj) emb3 = torch.sum(alphas3 * sent_output_proj, 1).squeeze(1) sent_w4 = self.query_embedding(Variable(torch.LongTensor(bsize*[1]).cuda())).unsqueeze(2) #(bsize, nhid, 1) keys4 = sent_key_proj.bmm(sent_w4).squeeze(2) / Temp keys4 = keys4 + ((keys4 == 0).float()*-1000) alphas4 = self.softmax(keys4).unsqueeze(2).expand_as(sent_key_proj) emb4 = torch.sum(alphas4 * sent_output_proj, 1).squeeze(1) if int(time.time()) % 100 == 0: print('alphas', torch.cat((alphas1.data[0, :, 0], alphas2.data[0, :, 0], torch.abs(alphas1.data[0, :, 0] - alphas2.data[0, :, 0])), 1)) emb = torch.cat((emb1, emb2, emb3, emb4), 1) return emb
def forward(self, predictions, targets): """Multibox Loss Args: predictions (tuple): A tuple containing loc preds, conf preds, and prior boxes from SSD net. conf shape: torch.size(batch_size,num_priors,num_classes) loc shape: torch.size(batch_size,num_priors,4) priors shape: torch.size(num_priors,4) targets (tensor): Ground truth boxes and labels for a batch, shape: [batch_size,num_objs,5] (last idx is the label). """ loc_data, conf_data, priors = predictions #num即batch_size num = loc_data.size(0) priors = priors[:loc_data.size(1), :] num_priors = (priors.size(0)) num_classes = self.num_classes # match priors (default boxes) and ground truth boxes loc_t = torch.Tensor(num, num_priors, 4) conf_t = torch.LongTensor(num, num_priors) # print('loc_t.size',loc_t.size()) for idx in range(num): truths = targets[idx][:, :-1].data labels = targets[idx][:, -1].data defaults = priors.data match(self.threshold, truths, defaults, self.variance, labels, loc_t, conf_t, idx) if self.use_gpu: loc_t = loc_t.cuda() conf_t = conf_t.cuda() # wrap targets loc_t = Variable(loc_t, requires_grad=False) conf_t = Variable(conf_t, requires_grad=False) pos = conf_t > 0 #pos返回的是value只有0和1的tensor,size和conf_t相同 num_pos = pos.sum(dim=1, keepdim=True) #size:(15,1) # Localization Loss (Smooth L1) # Shape: [batch,num_priors,4] pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) #扩充的值是复制 # print('2',loc_data.size(),pos_idx.size(),loc_data[pos_idx].size()) loc_p = loc_data[pos_idx].view(-1, 4) # print('1',loc_p.size(),loc_data[pos_idx].size()) loc_t = loc_t[pos_idx].view(-1, 4) #在计算loss_l的时候,算的是conf_t>0的框的loss loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False) # Compute max conf across batch for hard negative mining batch_conf = conf_data.view(-1, self.num_classes) # print('1',conf_t.size()) ''' batch_conf.size:(15*8732,10) conf_t.size:(15,8732),conf_t对应的是真实的label,包括背景. eg:15张图片,每个图片生成8732个框,一共有10个类别,batch_conf是预测值,conf_t是真实的label,batch_conf.gather所做的就是返回这些框真实类别的得分, 比如有一个框的真实的label为7,则返回batch_conf中对应预测框的第7个类别的得分,第二项的size为(15*8732,1) ''' loss_c = log_sum_exp(batch_conf) - batch_conf.gather( 1, conf_t.view(-1, 1)) # Hard Negative Mining # loss_c=loss_c.view(pos.size()[0],pos.size()[1]) # loss_c[pos]=0 loss_c = loss_c.view(num, -1) #size:(15,8732) loss_c[ pos] = 0 # filter out pos boxes for now pos.size:(15,8732),将所有正例的loss_c设置为0了 # loss_c = loss_c.view(num, -1) _, loss_idx = loss_c.sort(1, descending=True) _, idx_rank = loss_idx.sort(1) # num_pos = pos.long().sum(1, keepdim=True) # print('num_pose',num_pos,pos.size(1)) num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1) #返回的size为(15,1),值为num_pose*3,不能超过最大值max neg = idx_rank < num_neg.expand_as( idx_rank ) #idx_rank.size:(15,8732) num_neg.size:(15,1),idx_rank里面是所有负例的loss排序,loss最大的值的位置为0 # Confidence Loss Including Positive and Negative Examples # print(pos.size(),pos.unsqueeze(2).size()) pos_idx = pos.unsqueeze(2).expand_as(conf_data) neg_idx = neg.unsqueeze(2).expand_as(conf_data) #将所有正例和负例的框摘出来,正例为conf_t>0的框,负例的个数为z正例的3倍,选择的是loss_c根据排序较大的值 conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view( -1, self.num_classes) targets_weighted = conf_t[(pos + neg).gt(0)] # print(conf_p.size(),targets_weighted.size()) loss_c = F.cross_entropy(conf_p, targets_weighted, size_average=False) # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N N = num_pos.data.sum() loss_l /= N loss_c /= N return loss_l, loss_c
def trainNet(net, batch_size, n_epochs, learning_rate): #Print all of the hyperparameters of the training iteration: print("===== HYPERPARAMETERS =====") print("batch_size=", batch_size) print("epochs=", n_epochs) print("learning_rate=", learning_rate) print("=" * 30) print("Number of train samples: ", len(train_data)) print("Number of test samples: ", len(validation_data)) print("Detected Classes are: ", train_data.class_to_idx) #Get training data train_loader = train_data_loader #get_train_loader(batch_size) n_batches = len(train_loader) #Create our loss and optimizer functions loss, optimizer = createLossAndOptimizer(net, learning_rate) #Time for printing training_start_time = time.time() #Loop for n_epochs for epoch in range(n_epochs): running_loss = 0.0 print_every = n_batches // 10 start_time = time.time() total_train_loss = 0 for i, data in enumerate(train_loader, 0): #Get inputs inputs, labels = data #Wrap them in a Variable object inputs, labels = Variable(inputs), Variable(labels) #Set the parameter gradients to zero optimizer.zero_grad() #Forward pass, backward pass, optimize outputs = net(inputs) loss_size = loss(outputs, labels) loss_size.backward() optimizer.step() #Print statistics #running_loss += loss_size.data[0] #total_train_loss += loss_size.data[0] running_loss += loss_size.item() total_train_loss += loss_size.item() #Print every 10th batch of an epoch if (i + 1) % (print_every + 1) == 0: print("Epoch {}, {:d}% \t train_loss: {:.2f} took: {:.2f}s".format( epoch+1, int(100 * (i+1) / n_batches), running_loss / print_every, time.time() - start_time)) #Reset running loss and time print("this is the {} th running".format(i)) running_loss = 0.0 start_time = time.time() print("epoch finished, took {:.2f}s".format(time.time() - training_start_time)) #At the end of the epoch, do a pass on the validation set computeAccuracy(net, loss, validation_data_loader, 'validate epoch') print("Training finished, took {:.2f}s".format(time.time() - training_start_time)) test(net)
def computeAccuracy(net, loss, accuracy_data_loader, title): total_val_loss = 0 total = 0 correct = 0 total_a = 0 correct_a = 0 total_a_b = 0 total_a_c = 0 total_a_d = 0 total_b = 0 correct_b = 0 total_b_a = 0 total_b_c = 0 total_b_d = 0 total_c = 0 correct_c = 0 total_c_a = 0 total_c_b = 0 total_c_d = 0 total_d = 0 correct_d = 0 total_d_a = 0 total_d_b = 0 total_d_c = 0 for inputs, labels in accuracy_data_loader: #Wrap tensors in Variables inputs, labels = Variable(inputs), Variable(labels) #Forward pass val_outputs = net(inputs) val_loss_size = loss(val_outputs, labels) _, predicted = torch.max(val_outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() for l, p in zip(labels, predicted): if l.item() == 0: total_a +=1 if l == p: correct_a +=1 elif p == 1: total_a_b +=1 elif p == 2: total_a_c +=1 elif p == 3: total_a_d +=1 elif l.item() == 1: total_b +=1 if l == p: correct_b +=1 elif p == 0: total_b_a +=1 elif p == 2: total_b_c +=1 elif p == 3: total_b_d +=1 elif l.item() == 2: total_c +=1 if l == p: correct_c +=1 elif p == 0: total_c_a +=1 elif p == 1: total_c_b +=1 elif p == 3: total_c_d +=1 elif l.item() == 3: total_d +=1 if l == p: correct_d +=1 elif p == 0: total_d_a +=1 elif p == 1: total_d_b +=1 elif p ==2: total_d_c +=1 #total_val_loss += val_loss_size.data[0] total_val_loss += val_loss_size.item() print("Validation loss = {:.2f}".format(total_val_loss / len(accuracy_data_loader))) print("{} total images {}".format(title, total)) print("{} correct images {}".format(title, correct)) print('Accuracy of the network on the {} images: {} %%'.format(title, 100 * correct / total)) print("total a images {}".format(total_a)) print("correct a images {}".format(correct_a)) print('Accuracy of the a images: {} %%'.format(100 * correct_a / total_a)) print("incorrect a images predicted b {}".format(total_a_b)) print("incorrect a images predicted c {}".format(total_a_c)) print("incorrect a images predicted d {}".format(total_a_d)) print("total b images {}".format(total_b)) print("correct b images {}".format(correct_b)) print('Accuracy of the b images: {} %%'.format(100 * correct_b / total_b)) print("incorrect b images predicted a {}".format(total_b_a)) print("incorrect b images predicted c {}".format(total_b_c)) print("incorrect b images predicted d {}".format(total_b_d)) print("total c images {}".format(total_c)) print("correct c images {}".format(correct_c)) print('Accuracy of the c images: {} %%'.format(100 * correct_c / total_c)) print("incorrect c images predicted a {}".format(total_c_a)) print("incorrect c images predicted b {}".format(total_c_b)) print("incorrect c images predicted d {}".format(total_c_d)) print("total d images {}".format(total_d)) print("correct d images {}".format(correct_d)) print('Accuracy of the d images: {} %%'.format(100 * correct_d / total_d)) print("incorrect d images predicted a {}".format(total_d_a)) print("incorrect d images predicted b {}".format(total_d_b)) print("incorrect d images predicted c {}".format(total_d_c))
import torch from torch.autograd import Variable LR = 1e-3 x = torch.tensor([1., 2., 3., 4., 5., 6., 7., 8., 9., 11.]) y = torch.tensor([3., 5., 7., 9., 11., 14., 15., 18., 20., 23.]) x = Variable(x, requires_grad=True) y = Variable(y, requires_grad=True) t0 = torch.FloatTensor(torch.rand(1, 10)) t1 = torch.FloatTensor(10) def hypothesis(x): y_pred = t0 + t1 * x return y_pred def costfunc(y_pred, y): loss = (y_pred - y).pow(2).sum() / 10 return loss for i in range(200): y_pred = hypothesis(x) loss = costfunc(y_pred, y) t0_grad = (y_pred - y).sum() / 5 t1_grad = ((y_pred - y) * x.t()).sum() / 5 t0 -= LR * t0_grad t1 -= LR * t1_grad
def train_step_2(trainloader, net_s, net_z, net_d, optimizer_zc, optimizer_d, criterion_rec, criterion_zc, criterion_d, epoch, use_cuda, _sigma1, _sigma2, _lambda): losses = AverageMeter() losses1 = AverageMeter() losses2 = AverageMeter() losses_d_rec = AverageMeter() losses_d = AverageMeter() print('\n Epoch: %d' % epoch) net_z.train() net_d.train() decoder_loss = 0.0 adversarial_loss = 0.0 for i, (inputs, pairweights, sampweights, pairs, index) in enumerate(trainloader): inputs = torch.squeeze(inputs,0) pairweights = torch.squeeze(pairweights) sampweights = torch.squeeze(sampweights) index = torch.squeeze(index) pairs = pairs.view(-1, 2) if use_cuda: inputs = inputs.cuda() pairweights = pairweights.cuda() sampweights = sampweights.cuda() index = index.cuda() pairs = pairs.cuda() inputs, sampweights, pairweights = Variable(inputs), Variable(sampweights, requires_grad=False), \ Variable(pairweights, requires_grad=False) # train z encoder and decoder if i % 3 == 0: # zero the parameter gradients optimizer_d.zero_grad() optimizer_zc.zero_grad() # forward + backward + optimize outputs_s, _ = net_s(inputs) outputs_z, dec_z = net_z(inputs) loss1 = criterion_rec(inputs, dec_z, sampweights) loss2 = criterion_zc(outputs_z, sampweights, pairweights, pairs, index, _sigma1, _sigma2, _lambda) loss_zc = loss1 + loss2 # record loss losses1.update(loss1.data[0], inputs.size(0)) losses2.update(loss2.data[0], inputs.size(0)) losses.update(loss_zc.data[0], inputs.size(0)) decoder_input = torch.cat((outputs_s, outputs_z),1) outputs_d = net_d(decoder_input) #beta = 1.985 # change? beta = 1.99 # change? loss_d_rec = criterion_d(outputs_d, inputs) loss_d = loss_d_rec - beta * loss_zc #record loss losses_d_rec.update(loss_d_rec.data[0], inputs.size(0)) losses_d.update(loss_d.data[0], inputs.size(0)) loss_d.backward() #loss_zc.backward() optimizer_d.step() optimizer_zc.step() decoder_loss += loss_d.data[0] print('dcc_reconstruction_loss', losses1.avg, epoch) print('dcc_clustering_loss', losses2.avg, epoch) print('dcc_loss', losses.avg, epoch) print('total_reconstruction_loss', losses_d_rec.avg, epoch) print('total_loss', losses_d.avg, epoch) # log to TensorBoard if args.tensorboard: log_value('dcc_reconstruction_loss', losses1.avg, epoch) log_value('dcc_clustering_loss', losses2.avg, epoch) log_value('dcc_loss', losses.avg, epoch) log_value('total_reconstruction_loss', losses_d_rec.avg, epoch) log_value('total_loss', losses_d.avg, epoch) # train adversarial clustering else: # zero the parameter gradients optimizer_zc.zero_grad() # forward + backward + optimize outputs_z, dec_z = net_z(inputs) loss1 = criterion_rec(inputs, dec_z, sampweights) loss2 = criterion_zc(outputs_z, sampweights, pairweights, pairs, index, _sigma1, _sigma2, _lambda) loss_zc = loss1 + loss2 # record loss losses1.update(loss1.data[0], inputs.size(0)) losses2.update(loss2.data[0], inputs.size(0)) losses.update(loss_zc.data[0], inputs.size(0)) loss_zc.backward() optimizer_zc.step() adversarial_loss += loss_zc.data[0] # print statistics if i % 2000 == 1999: # print every 2000 mini-batches print('[%d, %5d] decoder loss: %.3f, adversarial loss: %.3f' %(epoch + 1, i + 1, decoder_loss / 500, adversarial_loss / 1500)) decoder_loss = 0.0 adversarial_loss = 0.0
class InnerAttentionYANGEncoder(nn.Module): def __init__(self, config): super(InnerAttentionYANGEncoder, self).__init__() self.bsize = config['bsize'] self.word_emb_dim = config['word_emb_dim'] self.enc_lstm_dim = config['enc_lstm_dim'] self.pool_type = config['pool_type'] self.enc_lstm = nn.LSTM(self.word_emb_dim, self.enc_lstm_dim, 1, bidirectional=True) self.init_lstm = Variable(torch.FloatTensor(2, self.bsize, self.enc_lstm_dim).zero_()).cuda() self.proj_lstm = nn.Linear(2*self.enc_lstm_dim, 2*self.enc_lstm_dim, bias=True) self.proj_query = nn.Linear(2*self.enc_lstm_dim, 2*self.enc_lstm_dim, bias=True) self.proj_enc = nn.Linear(2*self.enc_lstm_dim, 2*self.enc_lstm_dim, bias=True) self.query_embedding = nn.Embedding(1, 2*self.enc_lstm_dim) self.softmax = nn.Softmax() def forward(self, sent_tuple): # sent_len: [max_len, ..., min_len] (batch) # sent: Variable(seqlen x batch x worddim) sent, sent_len = sent_tuple bsize = sent.size(1) self.init_lstm = self.init_lstm if bsize == self.init_lstm.size(1) else \ Variable(torch.FloatTensor(2, bsize, self.enc_lstm_dim).zero_()).cuda() # Sort by length (keep idx) sent_len, idx_sort = np.sort(sent_len)[::-1], np.argsort(-sent_len) sent = sent.index_select(1, Variable(torch.cuda.LongTensor(idx_sort))) # Handling padding in Recurrent Networks sent_packed = nn.utils.rnn.pack_padded_sequence(sent, sent_len) sent_output = self.enc_lstm(sent_packed, (self.init_lstm, self.init_lstm))[0] # seqlen x batch x 2*nhid sent_output = nn.utils.rnn.pad_packed_sequence(sent_output)[0] # Un-sort by length idx_unsort = np.argsort(idx_sort) sent_output = sent_output.index_select(1, Variable(torch.cuda.LongTensor(idx_unsort))) sent_output = sent_output.transpose(0,1).contiguous() sent_output_proj = self.proj_lstm(sent_output.view(-1, 2*self.enc_lstm_dim)).view(bsize, -1, 2*self.enc_lstm_dim) sent_keys = self.proj_enc(sent_output.view(-1, 2*self.enc_lstm_dim)).view(bsize, -1, 2*self.enc_lstm_dim) sent_max = torch.max(sent_output, 1)[0].squeeze(1) # (bsize, 2*nhid) sent_summary = self.proj_query( sent_max).unsqueeze(1).expand_as(sent_keys) # (bsize, seqlen, 2*nhid) sent_M = torch.tanh(sent_keys + sent_summary) # (bsize, seqlen, 2*nhid) YANG : M = tanh(Wh_i + Wh_avg sent_w = self.query_embedding(Variable(torch.LongTensor( bsize*[0]).cuda())).unsqueeze(2) # (bsize, 2*nhid, 1) sent_alphas = self.softmax(sent_M.bmm(sent_w).squeeze(2)).unsqueeze(1) # (bsize, 1, seqlen) if int(time.time()) % 200 == 0: print('w', torch.max(sent_w[0]), torch.min(sent_w[0])) print('alphas', sent_alphas[0][0][0:sent_len[0]]) # Get attention vector emb = sent_alphas.bmm(sent_output_proj).squeeze(1) return emb
def train(self, X_train, x_train_2, y_train, windwos_size, predict_move, ex_data): #defined RNN model class RNN(nn.Module): def __init__(self, i_size, h_size, n_layers, o_size): super(RNN, self).__init__() self.rnn = nn.LSTM( #need to chnage this value to get more input input_size=i_size * 2, hidden_size=h_size, num_layers=n_layers) self.out = nn.Linear(h_size, o_size) def forward(self, x, h_state): r_out, hidden_state = self.rnn(x, h_state) hidden_size = hidden_state[-1].size(-1) r_out = r_out.view(-1, hidden_size) outs = self.out(r_out) return outs, hidden_state print(torch.cuda.is_available()) #torch.backends.cudnn.enabled = False #torch.backends.cudnn.benchmark = True #print("torch = ",torch.cuda.device_count()) self.rnn = RNN(self.INPUT_SIZE, self.HIDDEN_SIZE, self.NUM_LAYERS, self.OUTPUT_SIZE) #self.rnn.cuda() self.rnn.cuda() optimiser = torch.optim.Adam(self.rnn.parameters(), lr=self.learning_rate) criterion = nn.MSELoss() for epoch in range(self.num_epochs): hidden_state = None for stage in range(0, len(X_train) - windwos_size - self.INPUT_SIZE, windwos_size - predict_move): X_train_data = [] Y_train_Data = [] X_train_data_r = None Y_train_data_r = None for i in range(self.INPUT_SIZE + stage, self.INPUT_SIZE + stage + windwos_size): tempdata = [] tempdata = np.append(X_train[i - self.INPUT_SIZE:i, 0], x_train_2[i - self.INPUT_SIZE:i, 0]) #tempdata = np.append(tempdata, ex_data[i - self.INPUT_SIZE:i, 0]) X_train_data.append(tempdata) Y_train_Data.append(y_train[i + predict_move, 0]) X_train_data_r, Y_train_data_r = np.array( X_train_data), np.array(Y_train_Data) X_train_data_r = np.reshape( X_train_data_r, (X_train_data_r.shape[0], 1, X_train_data_r.shape[1])) inputs = Variable( torch.from_numpy(X_train_data_r).float()).cuda() labels = Variable( torch.from_numpy(Y_train_data_r).float()).cuda() output, hidden_state = self.rnn(inputs, hidden_state) loss = criterion(output.view(-1), labels) optimiser.zero_grad() # back propagation loss.backward(retain_graph=True) # update optimiser.step() print('epoch {}, loss {}'.format(epoch, loss.item())) return self.rnn
def forward(self, predictions, wrapper, wrapper_mask): """Multibox Loss Args: predictions (tuple): A tuple containing loc preds, conf preds, mask preds, and prior boxes from SSD net. loc shape: torch.size(batch_size,num_priors,4) conf shape: torch.size(batch_size,num_priors,num_classes) masks shape: torch.size(batch_size,num_priors,mask_dim) priors shape: torch.size(num_priors,4) proto* shape: torch.size(batch_size,mask_h,mask_w,mask_dim) targets (list<tensor>): Ground truth boxes and labels for a batch, shape: [batch_size][num_objs,5] (last idx is the label). masks (list<tensor>): Ground truth masks for each object in each image, shape: [batch_size][num_objs,im_height,im_width] num_crowds (list<int>): Number of crowd annotations per batch. The crowd annotations should be the last num_crowds elements of targets and masks. * Only if mask_type == lincomb """ loc_data = predictions['loc'] conf_data = predictions['conf'] mask_data = predictions['mask'] priors = predictions['priors'] if cfg.mask_type == mask_type.lincomb: proto_data = predictions['proto'] if cfg.use_instance_coeff: inst_data = predictions['inst'] else: inst_data = None targets, masks, num_crowds = wrapper.get_args(wrapper_mask) labels = [None] * len(targets) # Used in sem segm loss batch_size = loc_data.size(0) # This is necessary for training on multiple GPUs because # DataParallel will cat the priors from each GPU together priors = priors[:loc_data.size(1), :] num_priors = (priors.size(0)) num_classes = self.num_classes # Match priors (default boxes) and ground truth boxes # These tensors will be created with the same device as loc_data loc_t = loc_data.new(batch_size, num_priors, 4) gt_box_t = loc_data.new(batch_size, num_priors, 4) conf_t = loc_data.new(batch_size, num_priors).long() idx_t = loc_data.new(batch_size, num_priors).long() defaults = priors.data if cfg.use_class_existence_loss: class_existence_t = loc_data.new(batch_size, num_classes - 1) for idx in range(batch_size): truths = targets[idx][:, :-1].data labels[idx] = targets[idx][:, -1].data.long() if cfg.use_class_existence_loss: # Construct a one-hot vector for each object and collapse it into an existence vector with max # Also it's fine to include the crowd annotations here class_existence_t[idx, :] = torch.eye( num_classes - 1, device=conf_t.get_device())[labels[idx]].max(dim=0)[0] # Split the crowd annotations because they come bundled in cur_crowds = num_crowds[idx] if cur_crowds > 0: split = lambda x: (x[-cur_crowds:], x[:-cur_crowds]) crowd_boxes, truths = split(truths) # We don't use the crowd labels or masks _, labels[idx] = split(labels[idx]) _, masks[idx] = split(masks[idx]) else: crowd_boxes = None match(self.pos_threshold, self.neg_threshold, truths, defaults, labels[idx], crowd_boxes, loc_t, conf_t, idx_t, idx, loc_data[idx]) gt_box_t[idx, :, :] = truths[idx_t[idx]] # wrap targets loc_t = Variable(loc_t, requires_grad=False) conf_t = Variable(conf_t, requires_grad=False) idx_t = Variable(idx_t, requires_grad=False) pos = conf_t > 0 num_pos = pos.sum(dim=1, keepdim=True) # Shape: [batch,num_priors,4] pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) losses = {} # Localization Loss (Smooth L1) if cfg.train_boxes: loc_p = loc_data[pos_idx].view(-1, 4) loc_t = loc_t[pos_idx].view(-1, 4) losses['B'] = F.smooth_l1_loss(loc_p, loc_t, reduction='sum') * cfg.bbox_alpha if cfg.train_masks: if cfg.mask_type == mask_type.direct: if cfg.use_gt_bboxes: pos_masks = [] for idx in range(batch_size): pos_masks.append(masks[idx][idx_t[idx, pos[idx]]]) masks_t = torch.cat(pos_masks, 0) masks_p = mask_data[pos, :].view(-1, cfg.mask_dim) losses['M'] = F.binary_cross_entropy( torch.clamp(masks_p, 0, 1), masks_t, reduction='sum') * cfg.mask_alpha else: losses['M'] = self.direct_mask_loss( pos_idx, idx_t, loc_data, mask_data, priors, masks) elif cfg.mask_type == mask_type.lincomb: losses.update( self.lincomb_mask_loss(pos, idx_t, loc_data, mask_data, priors, proto_data, masks, gt_box_t, inst_data)) if cfg.mask_proto_loss is not None: if cfg.mask_proto_loss == 'l1': losses['P'] = torch.mean( torch.abs(proto_data) ) / self.l1_expected_area * self.l1_alpha elif cfg.mask_proto_loss == 'disj': losses['P'] = -torch.mean( torch.max(F.log_softmax(proto_data, dim=-1), dim=-1)[0]) # Confidence loss if cfg.use_focal_loss: if cfg.use_sigmoid_focal_loss: losses['C'] = self.focal_conf_sigmoid_loss(conf_data, conf_t) elif cfg.use_objectness_score: losses['C'] = self.focal_conf_objectness_loss( conf_data, conf_t) else: losses['C'] = self.focal_conf_loss(conf_data, conf_t) else: losses['C'] = self.ohem_conf_loss(conf_data, conf_t, pos, batch_size) # These losses also don't depend on anchors if cfg.use_class_existence_loss: losses['E'] = self.class_existence_loss(predictions['classes'], class_existence_t) if cfg.use_semantic_segmentation_loss: losses['S'] = self.semantic_segmentation_loss( predictions['segm'], masks, labels) # Divide all losses by the number of positives. # Don't do it for loss[P] because that doesn't depend on the anchors. total_num_pos = num_pos.data.sum().float() for k in losses: if k not in ('P', 'E', 'S'): losses[k] /= total_num_pos else: losses[k] /= batch_size # Loss Key: # - B: Box Localization Loss # - C: Class Confidence Loss # - M: Mask Loss # - P: Prototype Loss # - D: Coefficient Diversity Loss # - E: Class Existence Loss # - S: Semantic Segmentation Loss return losses
class InnerAttentionNAACLEncoder(nn.Module): def __init__(self, config): super(InnerAttentionNAACLEncoder, self).__init__() self.bsize = config['bsize'] self.word_emb_dim = config['word_emb_dim'] self.enc_lstm_dim = config['enc_lstm_dim'] self.pool_type = config['pool_type'] self.enc_lstm = nn.LSTM(self.word_emb_dim, self.enc_lstm_dim, 1, bidirectional=True) self.init_lstm = Variable(torch.FloatTensor(2, self.bsize, self.enc_lstm_dim).zero_()).cuda() self.proj_key = nn.Linear(2*self.enc_lstm_dim, 2*self.enc_lstm_dim, bias=False) self.proj_lstm = nn.Linear(2*self.enc_lstm_dim, 2*self.enc_lstm_dim, bias=False) self.query_embedding = nn.Embedding(1, 2*self.enc_lstm_dim) self.softmax = nn.Softmax() def forward(self, sent_tuple): # sent_len: [max_len, ..., min_len] (batch) # sent: Variable(seqlen x batch x worddim) sent, sent_len = sent_tuple bsize = sent.size(1) self.init_lstm = self.init_lstm if bsize == self.init_lstm.size(1) else \ Variable(torch.FloatTensor(2, bsize, self.enc_lstm_dim).zero_()).cuda() # Sort by length (keep idx) sent_len, idx_sort = np.sort(sent_len)[::-1], np.argsort(-sent_len) sent = sent.index_select(1, Variable(torch.cuda.LongTensor(idx_sort))) # Handling padding in Recurrent Networks sent_packed = nn.utils.rnn.pack_padded_sequence(sent, sent_len) sent_output = self.enc_lstm(sent_packed, (self.init_lstm, self.init_lstm))[0] # seqlen x batch x 2*nhid sent_output = nn.utils.rnn.pad_packed_sequence(sent_output)[0] # Un-sort by length idx_unsort = np.argsort(idx_sort) sent_output = sent_output.index_select(1, Variable(torch.cuda.LongTensor(idx_unsort))) sent_output = sent_output.transpose(0,1).contiguous() sent_output_proj = self.proj_lstm(sent_output.view(-1, 2*self.enc_lstm_dim)).view(bsize, -1, 2*self.enc_lstm_dim) sent_key_proj = self.proj_key(sent_output.view(-1, 2*self.enc_lstm_dim)).view(bsize, -1, 2*self.enc_lstm_dim) sent_key_proj = torch.tanh(sent_key_proj) # NAACL paper: u_it=tanh(W_w.h_it + b_w) (bsize, seqlen, 2nhid) sent_w = self.query_embedding(Variable(torch.LongTensor(bsize*[0]).cuda())).unsqueeze(2) #(bsize, 2*nhid, 1) Temp = 2 keys = sent_key_proj.bmm(sent_w).squeeze(2) / Temp # Set probas of padding to zero in softmax keys = keys + ((keys == 0).float()*-10000) alphas = self.softmax(keys/Temp).unsqueeze(2).expand_as(sent_output) if int(time.time()) % 100 == 0: print('w', torch.max(sent_w), torch.min(sent_w)) print('alphas', alphas[0, :, 0]) emb = torch.sum(alphas * sent_output_proj, 1).squeeze(1) return emb
def train_optimizer_attack(args): assert "Attack" in args.train_task task = train_task_list.tasks[args.train_task] print("Training ZO optimizer...\nOptimizer: {}. Optimizee: {}".format( task["nn_optimizer"].__name__, task["optimizee"].__name__)) attack_model = task["attack_model"]() # targeted model to attack if args.cuda: attack_model.cuda(args.gpu_num) ckpt_dict = torch.load(task["attack_model_ckpt"], map_location='cpu') attack_model.load_state_dict(ckpt_dict) attack_model.eval() attack_model.reset() # not include parameters meta_model = task["optimizee"](optimizee.AttackModel(attack_model), task['batch_size']) # meta optimizer if args.cuda: meta_model.cuda(args.gpu_num) train_loader, test_loader = meta_model.dataset_loader( args.data_dir, task['batch_size'], task['test_batch_size']) train_loader = iter(cycle(train_loader)) if args.warm_start_ckpt != "None": meta_optimizer = task["nn_optimizer"](optimizee.MetaModel(meta_model), args, ckpt_path=args.warm_start_ckpt) else: meta_optimizer = task["nn_optimizer"](optimizee.MetaModel(meta_model), args) if args.cuda: meta_optimizer.cuda(args.gpu_num) optimizer = optim.Adam(meta_optimizer.parameters(), lr=task['lr']) min_test_loss = float("inf") for epoch in range(1, task["max_epoch"] + 1): decrease_in_loss = 0.0 final_loss = 0.0 meta_optimizer.train() for i in range(args.updates_per_epoch): # The `optimizee` for attack task model = task["optimizee"](optimizee.AttackModel(attack_model), task['batch_size']) if args.cuda: model.cuda(args.gpu_num) # In the attack task, each attacked image corresponds to a particular optmizee model data, target = next(train_loader) data, target = Variable(data.double()), Variable(target) if args.cuda: data, target = data.cuda(args.gpu_num), target.cuda( args.gpu_num) # Compute initial loss of the model f_x = model(data.double()) initial_loss = model.loss(f_x, target) for k in range(task['optimizer_steps'] // args.truncated_bptt_step): # Keep states for truncated BPTT meta_optimizer.reset_state(keep_states=k > 0, model=model, use_cuda=args.cuda, gpu_num=args.gpu_num) loss_sum = 0 prev_loss = torch.zeros(1) if args.cuda: prev_loss = prev_loss.cuda(args.gpu_num) for j in range(args.truncated_bptt_step): # Perfom a meta update using gradients from model # and return the current meta model saved in the nn_optimizer meta_model, *_ = meta_optimizer.meta_update( model, data, target) # Compute a loss for a step the meta nn_optimizer if not args.use_finite_diff: # Use first-order method to train the zeroth-order optimizer # (assume the gradient is available in training time) f_x = meta_model(data) loss = meta_model.loss(f_x, target) else: # Use zeroth-order method to train the zeroth-order optimizer # Approximate the gradient loss = optimizee.custom_loss(meta_model.weight, data, target, meta_model.nondiff_loss) loss_sum += (k * args.truncated_bptt_step + j) * (loss - Variable(prev_loss)) prev_loss = loss.data if hasattr(meta_optimizer, "reg_loss"): loss_sum += meta_optimizer.reg_loss if hasattr(meta_optimizer, "grad_reg_loss"): loss_sum += meta_optimizer.grad_reg_loss # Update the parameters of the meta nn_optimizer meta_optimizer.zero_grad() loss_sum.backward() for name, param in meta_optimizer.named_parameters(): if param.requires_grad: param.grad.data.clamp_(-1, 1) optimizer.step() # Compute relative decrease in the loss function w.r.t initial # value decrease_in_loss += loss.item() / initial_loss.item() final_loss += loss.item() # test meta_optimizer.eval() test_loss_sum = 0.0 test_loss_ratio = 0.0 num = 0 for (test_data, test_target) in test_loader: test_data, test_target = Variable( test_data.double()), Variable(test_target) if args.cuda: test_data, test_target = test_data.cuda( args.gpu_num), test_target.cuda(args.gpu_num) model = task["optimizee"](optimizee.AttackModel(attack_model), task['test_batch_size']) if args.cuda: model.cuda(args.gpu_num) # Compute initial loss of the model f_x = model(test_data.double()) test_initial_loss = model.loss(f_x, test_target) test_loss = 0.0 meta_optimizer.reset_state(keep_states=False, model=model, use_cuda=args.cuda, gpu_num=args.gpu_num) for _ in range(task["test_optimizer_steps"]): _, test_loss, _ = meta_optimizer.meta_update( model, test_data, test_target) test_loss_sum += test_loss test_loss_ratio += test_loss / test_initial_loss num += 1 msg = "Epoch: {}, final loss {}, average final/initial loss ratio: {}, test loss {}, test loss ratio {}".format( epoch, final_loss / args.updates_per_epoch, decrease_in_loss / args.updates_per_epoch, test_loss_sum / num, test_loss_ratio / num) print(msg) with open(os.path.join(args.output_dir, "train_log.txt"), 'a+') as f: f.write(msg + '\n') if epoch % args.epochs_per_ckpt == 0: meta_optimizer.save(epoch, args.output_dir) if test_loss_sum < min_test_loss: min_test_loss = test_loss_sum meta_optimizer.save(epoch, args.output_dir, best=True)
def train(args, Xgmodel, AEmodel): pos_count, neg_count = 0, 0 training_samples, training_labels = [], [] training_samples_encoded = [] iter = 1 train_seen_bidids = set() criterion = nn.MSELoss() optimizer = torch.optim.Adam(AEmodel.parameters(), lr=args.ae_lr, weight_decay=args.weight_decay) while iter < args.iterations: print('iteration number:', iter) for date in dates: filepath = '../../Data/training3rd/imp.' + date + '.txt.bz2' with bz2.BZ2File(filepath) as f: for line in f: line = line.split('\n')[0].split('\t') if line[dicts[1]['bidid']] in dicts[0][1]\ or line[dicts[1]['bidid']] in dicts[0][2]: continue true_label = 1 if line[dicts[1] ['bidid']] in dicts[0][0] else 0 if (pos_count == 0 \ or float(neg_count) / pos_count > args.imbalance_factor) \ and true_label == 0: continue elif true_label == 0: neg_count += 1 else: pos_count += 1 train_seen_bidids.add(line[dicts[1]['bidid']]) training_sample = Xgmodel(line, dicts) training_sample = Variable( torch.FloatTensor(training_sample)).view(1, -1) training_samples.append(training_sample) encoded_output = AEmodel.encode(training_sample) output = AEmodel.decode(encoded_output) loss = criterion(output, training_sample) optimizer.zero_grad() loss.backward() optimizer.step() training_labels.append(true_label) if iter == args.iterations: break iter += 1 for training_sample in training_samples: training_sample = AEmodel.encode(training_sample).data[0].numpy() training_samples_encoded.append(training_sample) dtrain = xgb.DMatrix(training_samples_encoded, training_labels) param = { 'max_depth': args.max_depth, 'eta': args.lr, 'silent': 1, 'objective': 'binary:logistic' } bst = xgb.train(param, dtrain, args.num_rounds) print('pos_count:', pos_count, 'neg_count:', neg_count) if not args.cv: if not os.path.isdir(args.save_dir): os.makedirs(args.save_dir) save_path = os.path.join(args.save_dir, 'xgboost.model') bst.save_model(save_path) return bst, train_seen_bidids
out = self.regression(x) return out ## 3. 创建模型实例 model = SingleLinearRegression() ## 4. 设计判决准则 criterion = nn.MSELoss() ## 5. 使用优化方法 optimizer = optim.SGD(model.parameters(), lr=1e-3) epoch = 2000 for i in range(epoch): x_train = Variable(x_train) y_train = Variable(y_train) ## 6. 获取模型的输出值 out = model(x_train) ## 7. 得到损失函数值 loss = criterion(y_train, out) ## 8. 清空参数的所有梯度 optimizer.zero_grad() ## 9. 计算梯度值 loss.backward() ## 10. 跟新参数
def train(args): with open(args.input_path, 'r') as f: data, statement_target, drop_target, operator_target = load_data(f, args.max_len) model = PCCoder() if use_cuda: model.cuda() model = nn.DataParallel(model) # The cuda types are not used here on purpose - most GPUs can't handle so much memory data, statement_target, drop_target, operator_target = torch.LongTensor(data), torch.LongTensor(statement_target), \ torch.FloatTensor(drop_target), torch.LongTensor(operator_target) optimizer = torch.optim.Adam(model.parameters(), lr=learn_rate) statement_criterion = nn.CrossEntropyLoss() drop_criterion = nn.BCELoss() operator_criterion = nn.CrossEntropyLoss() lr_sched = torch.optim.lr_scheduler.StepLR(optimizer, step_size=4) dataset_size = data.shape[0] indices = list(range(dataset_size)) random.shuffle(indices) train_size = int(0.9 * dataset_size) train_data = data[indices[:train_size]] train_statement_target = statement_target[indices[:train_size]] train_drop_target = drop_target[indices[:train_size]] train_operator_target = operator_target[indices[:train_size]] test_data = Variable(data[indices[train_size:]].type(LongTensor)) test_statement_target = Variable(statement_target[indices[train_size:]].type(LongTensor)) test_drop_target = Variable(drop_target[indices[train_size:]].type(FloatTensor)) test_operator_target = Variable(operator_target[indices[train_size:]].type(LongTensor)) train_dataset = TensorDataset(train_data, train_statement_target, train_drop_target, train_operator_target) data_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) for epoch in range(num_epochs): model.train() print("Epoch %d" % epoch) lr_sched.step() statement_losses = [] drop_losses = [] operator_losses = [] for batch in tqdm(data_loader): x = Variable(batch[0].type(LongTensor)) y = Variable(batch[1].type(LongTensor)) z = Variable(batch[2].type(FloatTensor)) w = Variable(batch[3].type(LongTensor)) optimizer.zero_grad() pred_act, pred_drop, pred_operator = model(x) statement_loss = statement_criterion(pred_act, y) drop_loss = drop_criterion(pred_drop, z) operator_loss = operator_criterion(pred_operator, w) loss = statement_loss + operator_loss + drop_loss statement_losses.append(statement_loss.item()) drop_losses.append(drop_loss.item()) operator_losses.append(operator_loss.item()) loss.backward() optimizer.step() avg_statement_train_loss = np.array(statement_losses).mean() avg_drop_train_loss = np.array(drop_losses).mean() avg_operator_train_loss = np.array(operator_losses).mean() model.eval() with torch.no_grad(): # Iterate through test set to avoid out of memory issues statement_pred, drop_pred, operator_pred = [], [], [] for i in range(0, len(test_data), test_iterator_size): output = model(test_data[i: i + test_iterator_size]) statement_pred.append(output[0]) drop_pred.append(output[1]) operator_pred.append(output[2]) statement_pred = torch.cat(statement_pred, dim=0) drop_pred = torch.cat(drop_pred, dim=0) operator_pred = torch.cat(operator_pred, dim=0) test_statement_loss = statement_criterion(statement_pred, test_statement_target) test_drop_loss = drop_criterion(drop_pred, test_drop_target) test_operator_loss = operator_criterion(operator_pred, test_operator_target) print("Train loss: S %f" % avg_statement_train_loss, "D %f" % avg_drop_train_loss, "F %f" % avg_operator_train_loss) print("Test loss: S %f" % test_statement_loss.item(), "D %f" % test_drop_loss.item(), "F %f" % test_operator_loss.item()) predict = statement_pred.data.max(1)[1] test_error = (predict != test_statement_target.data).sum().item() / float(test_data.shape[0]) print("Test classification error: %f" % test_error) model.module.save(args.output_path + ".%d" % epoch)
def __call__(self, model, inputs, targets, to_numpy=True): """ Produce adversarial examples for ``inputs``. :param model: the model to attack :type model: nn.Module :param inputs: the original images tensor, of dimension [B x C x H x W]. ``inputs`` can be on either CPU or GPU, but it will eventually be moved to the same device as the one the parameters of ``model`` reside :type inputs: torch.FloatTensor :param targets: the original image labels, or the attack targets, of dimension [B]. If ``self.targeted`` is ``True``, then ``targets`` is treated as the attack targets, otherwise the labels. ``targets`` can be on either CPU or GPU, but it will eventually be moved to the same device as the one the parameters of ``model`` reside :type targets: torch.LongTensor :param to_numpy: True to return an `np.ndarray`, otherwise, `torch.FloatTensor` :type to_numpy: bool :return: the adversarial examples on CPU, of dimension [B x C x H x W] """ # sanity check assert isinstance(model, nn.Module) assert len(inputs.size()) == 4 assert len(targets.size()) == 1 # get a copy of targets in numpy before moving to GPU, used when doing # the binary search on `scale_const` targets_np = targets.clone().cpu().numpy() # type: np.ndarray # the type annotations here are used only for type hinting and do # not indicate the actual type (cuda or cpu); same applies to all codes # below inputs = runutils.make_cuda_consistent( model, inputs)[0] # type: # torch.FloatTensor targets = runutils.make_cuda_consistent( model, targets)[0] # type: # torch.FloatTensor # run the model a little bit to get the `num_classes` num_classes = model(Variable(inputs[0][None, :], requires_grad=False)).size(1) # type: int batch_size = inputs.size(0) # type: int # `lower_bounds_np`, `upper_bounds_np` and `scale_consts_np` are used # for binary search of each `scale_const` in the batch. The element-wise # inquality holds: lower_bounds_np < scale_consts_np <= upper_bounds_np lower_bounds_np = np.zeros(batch_size) upper_bounds_np = np.ones(batch_size) * self.c_range[1] scale_consts_np = np.ones(batch_size) * self.c_range[0] # Optimal attack to be found. # The three "placeholders" are defined as: # - `o_best_l2`: the least L2 norms # - `o_best_l2_ppred`: the perturbed predictions made by the adversarial # perturbations with the least L2 norms # - `o_best_advx`: the underlying adversarial example of # `o_best_l2_ppred` o_best_l2 = np.ones(batch_size) * np.inf o_best_l2_ppred = -np.ones(batch_size) o_best_advx = inputs.clone().cpu().numpy() # type: np.ndarray # convert `inputs` to tanh-space inputs_tanh = self._to_tanh_space(inputs) # type: torch.FloatTensor inputs_tanh_var = Variable(inputs_tanh, requires_grad=False) # the one-hot encoding of `targets` targets_oh = torch.zeros(targets.size() + (num_classes, )) # type: torch.FloatTensor targets_oh = runutils.make_cuda_consistent(model, targets_oh)[0] targets_oh.scatter_(1, targets.unsqueeze(1), 1.0) targets_oh_var = Variable(targets_oh, requires_grad=False) # the perturbation variable to optimize. # `pert_tanh` is essentially the adversarial perturbation in tanh-space. # In Carlini's code it's denoted as `modifier` pert_tanh = torch.zeros(inputs.size()) # type: torch.FloatTensor if self.init_rand: nn.init.normal(pert_tanh, mean=0, std=1e-3) pert_tanh = runutils.make_cuda_consistent(model, pert_tanh)[0] pert_tanh_var = Variable(pert_tanh, requires_grad=True) optimizer = optim.Adam([pert_tanh_var], lr=self.optimizer_lr) for sstep in range(self.binary_search_steps): if self.repeat and sstep == self.binary_search_steps - 1: scale_consts_np = upper_bounds_np scale_consts = torch.from_numpy( np.copy(scale_consts_np)).float() # type: torch.FloatTensor scale_consts = runutils.make_cuda_consistent(model, scale_consts)[0] scale_consts_var = Variable(scale_consts, requires_grad=False) # print('Using scale consts:', list(scale_consts_np)) # FIXME # the minimum L2 norms of perturbations found during optimization best_l2 = np.ones(batch_size) * np.inf # the perturbed predictions corresponding to `best_l2`, to be used # in binary search of `scale_const` best_l2_ppred = -np.ones(batch_size) # previous (summed) batch loss, to be used in early stopping policy prev_batch_loss = np.inf # type: float for optim_step in range(self.max_steps): batch_loss, pert_norms_np, pert_outputs_np, advxs_np = \ self._optimize(model, optimizer, inputs_tanh_var, pert_tanh_var, targets_oh_var, scale_consts_var) # if optim_step % 10 == 0: print('batch [{}] loss: {}'.format(optim_step, batch_loss)) # FIXME if self.abort_early and not optim_step % (self.max_steps // 10): if batch_loss > prev_batch_loss * (1 - self.ae_tol): break prev_batch_loss = batch_loss # update best attack found during optimization pert_predictions_np = np.argmax(pert_outputs_np, axis=1) comp_pert_predictions_np = np.argmax( self._compensate_confidence(pert_outputs_np, targets_np), axis=1) for i in range(batch_size): l2 = pert_norms_np[i] cppred = comp_pert_predictions_np[i] ppred = pert_predictions_np[i] tlabel = targets_np[i] ax = advxs_np[i] if self._attack_successful(cppred, tlabel): assert cppred == ppred if l2 < best_l2[i]: best_l2[i] = l2 best_l2_ppred[i] = ppred if l2 < o_best_l2[i]: o_best_l2[i] = l2 o_best_l2_ppred[i] = ppred o_best_advx[i] = ax # binary search of `scale_const` for i in range(batch_size): tlabel = targets_np[i] assert best_l2_ppred[i] == -1 or \ self._attack_successful(best_l2_ppred[i], tlabel) assert o_best_l2_ppred[i] == -1 or \ self._attack_successful(o_best_l2_ppred[i], tlabel) if best_l2_ppred[i] != -1: # successful; attempt to lower `scale_const` by halving it if scale_consts_np[i] < upper_bounds_np[i]: upper_bounds_np[i] = scale_consts_np[i] # `upper_bounds_np[i] == c_range[1]` implies no solution # found, i.e. upper_bounds_np[i] has never been updated by # scale_consts_np[i] until # `scale_consts_np[i] > 0.1 * c_range[1]` if upper_bounds_np[i] < self.c_range[1] * 0.1: scale_consts_np[i] = (lower_bounds_np[i] + upper_bounds_np[i]) / 2 else: # failure; multiply `scale_const` by ten if no solution # found; otherwise do binary search if scale_consts_np[i] > lower_bounds_np[i]: lower_bounds_np[i] = scale_consts_np[i] if upper_bounds_np[i] < self.c_range[1] * 0.1: scale_consts_np[i] = (lower_bounds_np[i] + upper_bounds_np[i]) / 2 else: scale_consts_np[i] *= 10 if not to_numpy: o_best_advx = torch.from_numpy(o_best_advx).float().to(device) return o_best_advx
def optimizer_train_optimizee_attack(args): assert "Attack" in args.train_task task = train_task_list.tasks[args.train_task] attack_model = task["attack_model"]() if args.cuda: attack_model.cuda(args.gpu_num) ckpt_dict = torch.load(task["attack_model_ckpt"], map_location='cpu') attack_model.load_state_dict(ckpt_dict) attack_model.eval() attack_model.reset() # not include parameters for test_idx in task['tests']['test_indexes']: _, test_loader = task["tests"]["optimizee"].dataset_loader( args.data_dir, task['batch_size'], task['tests']['test_batch_size']) test_loader = iter(test_loader) for _ in range(test_idx): # attacked image data, target = next(test_loader) data, target = Variable(data.double()), Variable(target) if args.cuda: data, target = data.cuda(args.gpu_num), target.cuda(args.gpu_num) meta_model = task["tests"]["optimizee"]( optimizee.AttackModel(attack_model), task['tests']['test_batch_size']) if args.cuda: meta_model.cuda(args.gpu_num) ckpt_path = os.path.join(args.output_dir, args.ckpt_path) # ZO-LSTM (leanred ZO optimizer) if "nn_opt" in task["tests"]: meta_optimizer = task["nn_optimizer"]( optimizee.MetaModel(meta_model), args) if args.cuda: meta_optimizer.cuda(args.gpu_num) meta_optimizer.load(ckpt_path) meta_optimizer.eval() nn_opt_loss_array = [] # ZO-SGD if "base_opt" in task["tests"]: base_optimizer = task["tests"]["base_opt"]( None, args, task["tests"]["base_lr"]) base_optimizer.eval() base_opt_loss_array = [] # ZO-signSGD if "sign_opt" in task["tests"]: sign_optimizer = task["tests"]["sign_opt"]( None, args, task["tests"]["sign_lr"]) sign_optimizer.eval() sign_opt_loss_array = [] # ZO-ADAM if "adam_opt" in task["tests"]: adam_optimizer = task["tests"]["adam_opt"]( None, args, task["tests"]["adam_lr"], task["tests"]["adam_beta_1"], task["tests"]["adam_beta_2"]) adam_optimizer.eval() adam_opt_loss_array = [] # ZO-LSTM-no-query (without QueryRNN) if "nn_opt_no_query" in task["tests"]: meta_model_2 = task["tests"]["optimizee"]( optimizee.AttackModel(attack_model), task['tests']['test_batch_size']) if args.cuda: meta_model_2.cuda(args.gpu_num) nn_optimizer_no_query = task["tests"]["nn_opt_no_query"]( optimizee.MetaModel(meta_model_2), args) if args.cuda: nn_optimizer_no_query.cuda(args.gpu_num) nn_optimizer_no_query.load(ckpt_path) nn_optimizer_no_query.eval() nn_opt_no_query_loss_array = [] # ZO-LSTM-no-update (without UpdateRNN) if "nn_opt_no_update" in task["tests"]: meta_model_3 = task["tests"]["optimizee"]( optimizee.AttackModel(attack_model), task['tests']['test_batch_size']) if args.cuda: meta_model_3.cuda(args.gpu_num) nn_optimizer_no_update = task["tests"]["nn_opt_no_update"]( optimizee.MetaModel(meta_model_3), args) if args.cuda: nn_optimizer_no_update.cuda(args.gpu_num) nn_optimizer_no_update.load(ckpt_path) nn_optimizer_no_update.eval() nn_opt_no_update_loss_array = [] # ZO-LSTM-guided (use Guided-ES to modify search distribution) if "nn_opt_guided" in task["tests"]: meta_model_4 = task["tests"]["optimizee"]( optimizee.AttackModel(attack_model), task['tests']['test_batch_size']) if args.cuda: meta_model_4.cuda(args.gpu_num) nn_optimizer_guided = task["tests"]["nn_opt_guided"]( optimizee.MetaModel(meta_model_4), args) if args.cuda: nn_optimizer_guided.cuda(args.gpu_num) nn_optimizer_guided.load(ckpt_path) nn_optimizer_guided.eval() nn_opt_guided_loss_array = [] for num in range(1, task["tests"]["test_num"] + 1): model = task["tests"]["optimizee"]( optimizee.AttackModel(attack_model), task['tests']['test_batch_size']) if args.cuda: model.cuda(args.gpu_num) if "nn_opt" in task["tests"]: meta_optimizer.reset_state(keep_states=False, model=model, use_cuda=args.cuda, gpu_num=args.gpu_num) nn_opt_state = copy.deepcopy(model.state_dict()) if "base_opt" in task["tests"]: base_opt_state = copy.deepcopy(model.state_dict()) if "sign_opt" in task["tests"]: sign_opt_state = copy.deepcopy(model.state_dict()) if "adam_opt" in task["tests"]: adam_optimizer.reset_state(keep_states=False, model=model, use_cuda=args.cuda, gpu_num=args.gpu_num) adam_opt_state = copy.deepcopy(model.state_dict()) if "nn_opt_no_query" in task["tests"]: nn_optimizer_no_query.reset_state(keep_states=False, model=model, use_cuda=args.cuda, gpu_num=args.gpu_num) nn_opt_no_query_state = copy.deepcopy(model.state_dict()) if "nn_opt_no_update" in task["tests"]: nn_optimizer_no_update.reset_state(keep_states=False, model=model, use_cuda=args.cuda, gpu_num=args.gpu_num) nn_opt_no_update_state = copy.deepcopy(model.state_dict()) if "nn_opt_guided" in task["tests"]: nn_optimizer_guided.reset_state(keep_states=False, model=model, use_cuda=args.cuda, gpu_num=args.gpu_num) nn_opt_guided_state = copy.deepcopy(model.state_dict()) for step in range(1, task["tests"]["n_steps"] + 1): msg = "iteration {}".format(step) # nn_opt if "nn_opt" in task["tests"]: model.load_state_dict(nn_opt_state) with torch.no_grad(): _, nn_opt_loss, nn_f_x = meta_optimizer.meta_update( model, data, target) nn_opt_state = copy.deepcopy(model.state_dict()) msg += ", nn_opt_loss {:.6f}".format( nn_opt_loss.data.item()) nn_opt_loss_array.append(nn_opt_loss.data.item()) # base_opt if "base_opt" in task["tests"]: model.load_state_dict(base_opt_state) with torch.no_grad(): _, base_opt_loss, base_f_x = base_optimizer.meta_update( model, data, target) base_opt_state = copy.deepcopy(model.state_dict()) msg = msg + ", base_opt_loss {:.6f}".format( base_opt_loss.data.item()) base_opt_loss_array.append(base_opt_loss.data.item()) # sign_opt if "sign_opt" in task["tests"]: model.load_state_dict(sign_opt_state) with torch.no_grad(): _, sign_opt_loss, sign_f_x = sign_optimizer.meta_update( model, data, target) sign_opt_state = copy.deepcopy(model.state_dict()) msg = msg + ", sign_opt_loss {:.6f}".format( sign_opt_loss.data.item()) sign_opt_loss_array.append(sign_opt_loss.data.item()) if "adam_opt" in task["tests"]: model.load_state_dict(adam_opt_state) with torch.no_grad(): _, adam_opt_loss, adam_f_x = adam_optimizer.meta_update( model, data, target) adam_opt_state = copy.deepcopy(model.state_dict()) msg = msg + ", adam_opt_loss {:.6f}".format( adam_opt_loss.data.item()) adam_opt_loss_array.append(adam_opt_loss.data.item()) if "nn_opt_no_query" in task["tests"]: model.load_state_dict(nn_opt_no_query_state) with torch.no_grad(): _, nn_opt_no_query_loss, nn_no_query_f_x = nn_optimizer_no_query.meta_update( model, data, target, pred_query=False) nn_opt_no_query_state = copy.deepcopy(model.state_dict()) msg = msg + ", nn_opt_no_query_loss {:.6f}".format( nn_opt_no_query_loss.data.item()) nn_opt_no_query_loss_array.append( nn_opt_no_query_loss.data.item()) if "nn_opt_no_update" in task["tests"]: model.load_state_dict(nn_opt_no_update_state) with torch.no_grad(): _, nn_opt_no_update_loss, nn_no_update_f_x = nn_optimizer_no_update.meta_update( model, data, target, pred_update=False, base_lr=task["tests"]["base_lr"]) nn_opt_no_update_state = copy.deepcopy(model.state_dict()) msg = msg + ", nn_opt_no_update_loss {:.6f}".format( nn_opt_no_update_loss.data.item()) nn_opt_no_update_loss_array.append( nn_opt_no_update_loss.data.item()) if "nn_opt_guided" in task["tests"]: model.load_state_dict(nn_opt_guided_state) with torch.no_grad(): _, nn_opt_guided_loss, nn_guided_f_x = nn_optimizer_guided.meta_update( model, data, target, guided=True, base_lr=task["tests"]["base_lr"]) nn_opt_guided_state = copy.deepcopy(model.state_dict()) msg = msg + ", nn_opt_guided_loss {:.6f}".format( nn_opt_guided_loss.data.item()) nn_opt_guided_loss_array.append( nn_opt_guided_loss.data.item()) print(msg) if args.save_loss: if "nn_opt" in task["tests"]: np.save( os.path.join( args.output_dir, "nn_opt_loss_array_{}_q_{}.npy".format( test_idx, args.grad_est_q)), np.array(nn_opt_loss_array)) if "base_opt" in task["tests"]: np.save( os.path.join( args.output_dir, "base_opt_loss_array_{}_q_{}.npy".format( test_idx, args.grad_est_q)), np.array(base_opt_loss_array)) if "sign_opt" in task["tests"]: np.save( os.path.join( args.output_dir, "sign_opt_loss_array_{}_q_{}.npy".format( test_idx, args.grad_est_q)), np.array(sign_opt_loss_array)) if "adam_opt" in task["tests"]: np.save( os.path.join( args.output_dir, "adam_opt_loss_array_{}_q_{}.npy".format( test_idx, args.grad_est_q)), np.array(adam_opt_loss_array)) if "nn_opt_no_query" in task["tests"]: np.save( os.path.join( args.output_dir, "nn_opt_no_query_loss_array_{}_q_{}.npy".format( test_idx, args.grad_est_q)), np.array(nn_opt_no_query_loss_array)) if "nn_opt_no_update" in task["tests"]: np.save( os.path.join( args.output_dir, "nn_opt_no_update_loss_array_{}_q_{}.npy".format( test_idx, args.grad_est_q)), np.array(nn_opt_no_update_loss_array)) if "nn_opt_guided" in task["tests"]: np.save( os.path.join( args.output_dir, "nn_opt_guided_loss_array_{}_q_{}.npy".format( test_idx, args.grad_est_q)), np.array(nn_opt_guided_loss_array)) print("Test num {}, test idx {}, done!".format(num, test_idx)) if args.save_fig: assert args.save_loss fig = plt.figure(figsize=(8, 6)) iteration = np.arange(1, task["tests"]["n_steps"] + 1) if "base_opt" in task["tests"]: base_opt_loss_array = np.load( os.path.join( args.output_dir, "base_opt_loss_array_{}_q_{}.npy".format( test_idx, args.grad_est_q))).reshape( (task["tests"]["test_num"], task["tests"]["n_steps"])) base_opt_mean = np.mean(base_opt_loss_array, axis=0) base_opt_std = np.std(base_opt_loss_array, axis=0) plt.plot(iteration, base_opt_mean, 'c', label='ZO-SGD') plt.fill_between(iteration, base_opt_mean - base_opt_std, base_opt_mean + base_opt_std, color='c', alpha=0.2) if "sign_opt" in task["tests"]: sign_opt_loss_array = np.load( os.path.join( args.output_dir, "sign_opt_loss_array_{}_q_{}.npy".format( test_idx, args.grad_est_q))).reshape( (task["tests"]["test_num"], task["tests"]["n_steps"])) sign_opt_mean = np.mean(sign_opt_loss_array, axis=0) sign_opt_std = np.std(sign_opt_loss_array, axis=0) plt.plot(iteration, sign_opt_mean, 'g', label='ZO-signSGD') plt.fill_between(iteration, sign_opt_mean - sign_opt_std, sign_opt_mean + sign_opt_std, color='g', alpha=0.2) if "adam_opt" in task["tests"]: adam_opt_loss_array = np.load( os.path.join( args.output_dir, "adam_opt_loss_array_{}_q_{}.npy".format( test_idx, args.grad_est_q))).reshape( (task["tests"]["test_num"], task["tests"]["n_steps"])) adam_opt_mean = np.mean(adam_opt_loss_array, axis=0) adam_opt_std = np.std(adam_opt_loss_array, axis=0) plt.plot(iteration, adam_opt_mean, 'darkorange', label='ZO-ADAM') plt.fill_between(iteration, adam_opt_mean - adam_opt_std, adam_opt_mean + adam_opt_std, color='darkorange', alpha=0.2) if "nn_opt" in task["tests"]: nn_opt_loss_array = np.load( os.path.join( args.output_dir, "nn_opt_loss_array_{}_q_{}.npy".format( test_idx, args.grad_est_q))).reshape( (task["tests"]["test_num"], task["tests"]["n_steps"])) nn_opt_mean = np.mean(nn_opt_loss_array, axis=0) nn_opt_std = np.std(nn_opt_loss_array, axis=0) plt.plot(iteration, nn_opt_mean, 'b', label='ZO-LSTM') plt.fill_between(iteration, nn_opt_mean - nn_opt_std, nn_opt_mean + nn_opt_std, color='b', alpha=0.2) if "nn_opt_no_query" in task["tests"]: nn_opt_no_query_loss_array = np.load( os.path.join( args.output_dir, "nn_opt_no_query_loss_array_{}_q_{}.npy".format( test_idx, args.grad_est_q))).reshape( (task["tests"]["test_num"], task["tests"]["n_steps"])) nn_opt_no_query_mean = np.mean(nn_opt_no_query_loss_array, axis=0) nn_opt_no_query_std = np.std(nn_opt_no_query_loss_array, axis=0) plt.plot(iteration, nn_opt_no_query_mean, 'r', label='ZO-LSTM-no-query') plt.fill_between(iteration, nn_opt_no_query_mean - nn_opt_no_query_std, nn_opt_no_query_mean + nn_opt_no_query_std, color='r', alpha=0.2) if "nn_opt_no_update" in task["tests"]: nn_opt_no_update_loss_array = np.load( os.path.join( args.output_dir, "nn_opt_no_update_loss_array_{}_q_{}.npy".format( test_idx, args.grad_est_q))).reshape( (task["tests"]["test_num"], task["tests"]["n_steps"])) nn_opt_no_update_mean = np.mean(nn_opt_no_update_loss_array, axis=0) nn_opt_no_update_std = np.std(nn_opt_no_update_loss_array, axis=0) plt.plot(iteration, nn_opt_no_update_mean, 'm', label='ZO-LSTM-no-update') plt.fill_between(iteration, nn_opt_no_update_mean - nn_opt_no_update_std, nn_opt_no_update_mean + nn_opt_no_update_std, color='m', alpha=0.2) if "nn_opt_guided" in task["tests"]: nn_opt_guided_loss_array = np.load( os.path.join( args.output_dir, "nn_opt_guided_loss_array_{}_q_{}.npy".format( test_idx, args.grad_est_q))).reshape( (task["tests"]["test_num"], task["tests"]["n_steps"])) nn_opt_guided_mean = np.mean(nn_opt_guided_loss_array, axis=0) nn_opt_guided_std = np.std(nn_opt_guided_loss_array, axis=0) plt.plot(iteration, nn_opt_guided_mean, 'saddlebrown', label='ZO-LSTM-GuidedES') plt.fill_between(iteration, nn_opt_guided_mean - nn_opt_guided_std, nn_opt_guided_mean + nn_opt_guided_std, color='saddlebrown', alpha=0.2) plt.xlabel('iteration', fontsize=15) plt.ylabel('loss', fontsize=15) plt.legend(prop={'size': 15}) fig.savefig( os.path.join( args.output_dir, args.fig_preffix + '_{}_q_{}.png'.format(test_idx, args.grad_est_q)))
def train(self): # noise for test. self.z_test=torch.Tensor(self.loader.batch_size, self.nz).normal_(0.0,1.0) # self.z_test.data.resize_(self.loader.batch_size, self.nz).normal_(0.0,1.0) if self.use_cuda: self.z_test=self.z_test.cuda() self.z_test=Variable(self.z_test) #,volatile=True for step in range(2,self.max_resl+1+5): for iter in tqdm(range(0,(self.trns_tick*2+self.stab_tick*2)*self.TICK,self.loader.batch_size)): self.globalIter=self.globalIter+1 self.stack=self.stack+self.loader.batch_size if self.stack>ceil(len(self.loader.dataset)): self.epoch=self.epoch+1 self.stack=int(self.stack%(ceil(len(self.loader.dataset)))) # reslolution scheduler. self.resl_scheduler() # update discriminator. for i in range(1): self.D.zero_grad() # zero gradients. self.require_grad(self.D, True) self.x.data=self.feed_interpolated_input(self.loader.get_batch()) if self.flag_add_noise: self.x=self.add_noise(self.x) self.z=torch.randn(self.loader.batch_size,self.nz,1,1) if self.use_cuda: self.z=self.z.cuda() self.x_tilde=self.G(self.z) self.fx=self.D(self.x) self.fx_tilde = self.D(self.x_tilde.detach()) # loss_d=F.mse_loss(self.fx.squeeze(), torch.ones_like(self.fx.squeeze()))+F.mse_loss(self.fx_tilde.squeeze(), torch.zeros_like(self.fx_tilde.squeeze()))+self.calc_gradient_penalty() loss_d = self.fx.squeeze().mean() - self.fx_tilde.squeeze().mean() + self.calc_gradient_penalty() loss_d.backward(retain_graph=False) self.opt_d.step() # update generator. for i in range(1): self.G.zero_grad() # zero gradients. self.require_grad(self.D, False) fx_tilde=self.D(self.x_tilde) # loss_g = F.mse_loss(fx_tilde.squeeze(), torch.ones_like(self.fx_tilde.squeeze())) loss_g = fx_tilde.squeeze().mean() loss_g.backward(retain_graph=False) self.opt_g.step() # logging. log_msg = ' [E:{0}][T:{1}][{2:6}/{3:6}] errD: {4:.4f} | errG: {5:.4f} | ' \ '[lr:{11:.5f}][cur:{6:.3f}][resl:{7:4}][{8}][{9:.1f}%][{10:.1f}%]'.format( self.epoch, self.globalTick, self.stack, len(self.loader.dataset), loss_d.item(),loss_g.item(), self.resl, int(pow(2, floor(self.resl))), self.phase, self.complete['gen'],self.complete['dis'],self.lr) tqdm.write(log_msg) # save model. self.snapshot('repo/model') # save image grid. if self.globalIter % self.config.save_img_every == 0: with torch.no_grad(): x_test = self.G(self.z_test) utils.mkdir('repo/save/grid') utils.save_image_grid(x_test.data, 'repo/save/grid/{}_{}_G{}_D{}.jpg'.format( int(self.globalIter / self.config.save_img_every), self.phase, self.complete['gen'],self.complete['dis'])) utils.mkdir('repo/save/resl_{}'.format(int(floor(self.resl)))) utils.save_image_single(x_test.data,'repo/save/resl_{}/{}_{}_G{}_D{}.jpg'.format( int(floor(self.resl)), int(self.globalIter / self.config.save_img_every),self.phase,self.complete['gen'],self.complete['dis'])) # tensorboard visualization. if self.use_tb: with torch.no_grad(): x_test = self.D(self.z_test) self.tb.add_scalar('data/loss_g', loss_g[0].item(), self.globalIter) self.tb.add_scalar('data/loss_d', loss_d[0].item(), self.globalIter) self.tb.add_scalar('tick/lr', self.lr, self.globalIter) self.tb.add_scalar('tick/cur_resl', int(pow(2, floor(self.resl))), self.globalIter)
else: print("Validing...") # 设置为False,不会进行Dropout并使用running mean和running var model.train(False) running_loss = 0.0 running_corrects = 0 # enuerate(),返回的是索引和元素值,数字1表明设置start=1,即索引值从1开始 for batch, data in enumerate(dataloader[phase], 1): # X: 图片,16*3*64*64; y: 标签,16 X, y = data # 封装成Variable类 if Use_gpu: X, y = Variable(X.cuda()), Variable(y.cuda()) else: X, y = Variable(X), Variable(y) # y_pred: 预测概率矩阵,16*2 y_pred = model(X) # pred,概率较大值对应的索引值,可看做预测结果 _, pred = torch.max(y_pred.data, 1) # 梯度归零 optimizer.zero_grad() # 计算损失 loss = loss_f(y_pred, y)
def forward(self, x): x = self.encoder(x) x = self.decoder(x) return x model = autoencoder().cuda() criterion = nn.MSELoss() optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-5) for epoch in range(num_epochs): for data in dataloader: img, _ = data img = img.view(img.size(0), -1) img = Variable(img).cuda() output = model(img) loss = criterion(output, img) optimizer.zero_grad() loss.backward() optimizer.step() print('epoch [{}/{}], loss:{:.4f}'.format(epoch + 1, num_epochs, loss.item())) if epoch % 10 == 0: pic = to_img(output.cpu().data) save_image(pic, 'image_{}.png'.format(epoch)) torch.save(model.state_dict(), './sim_autoencoder.pth')
def active_learning_taylor(func_name,start_rand_idxs=None, bud=None, valid=True,fac_loc_idx=None): torch.manual_seed(42) torch.cuda.manual_seed(42) np.random.seed(42) random.seed(42) torch.backends.cudnn.deterministic = True #model = ThreeLayerNet(M, num_cls, 5, 5) #model = LogisticRegNet(M, num_cls) model = TwoLayerNet(M, num_cls, 100) # if data_name == 'mnist': # model = MnistNet() if torch.cuda.device_count() > 1: print("Using:", torch.cuda.device_count(), "GPUs!") model = nn.DataParallel(model) cudnn.benchmark = True model = model.to(device) idxs = start_rand_idxs if func_name == 'Facloc Regularized': x_val1 = torch.cat([x_val, x_trn[fac_loc_idx]], dim=0) y_val1 = torch.cat([y_val, y_trn[fac_loc_idx]], dim=0) criterion = nn.CrossEntropyLoss() criterion_nored = nn.CrossEntropyLoss(reduction='none') optimizer = optim.SGD(model.parameters(), lr=learning_rate) if func_name == 'Full OneStep': setf_model = SetFunctionBatch(x_val, y_val, model, criterion, criterion_nored, learning_rate, device) elif func_name == 'Facility Location': if data_name != 'covertype': setf_model = SetFunctionFacLoc(device, train_batch_size_for_greedy) idxs = setf_model.lazy_greedy_max(bud, x_trn,model) else: idxs = run_stochastic_Facloc(x_trn, y_trn, bud) facility_loaction_warm_start = copy.deepcopy(idxs) elif func_name == 'Facloc Regularized': setf_model = SetFunctionTaylor(x_val1, y_val1, model, criterion, criterion_nored, learning_rate, device,num_cls) else: #setf_model = SetFunctionTaylorDeep(train_loader_greedy, valid_loader, valid, model, # criterion, criterion_nored, learning_rate, device, N) setf_model = SetFunctionTaylor(x_val, y_val, model, criterion, criterion_nored, learning_rate, device,num_cls) #setf_model = SetFunctionTaylorDeep_ReLoss_Mean(x_trn, y_trn, train_batch_size_for_greedy, x_val, y_val, valid, model, # criterion, criterion_nored, learning_rate, device, N) remainList = set(list(range(N))) idxs = list(idxs) remainList = remainList.difference(idxs) if func_name == 'Taylor Online': print("Starting Online OneStep Run with taylor on loss!") elif func_name == 'Full OneStep': print("Starting Online OneStep Run without taylor!") elif func_name == 'Facloc Regularized': print("Starting Facility Location Regularized Online OneStep Run with taylor!") elif func_name == 'Random Greedy': print("Starting Randomized Greedy Online OneStep Run with taylor!") elif func_name == 'Facility Location': print("Starting Facility Location!") elif func_name == 'Random': print("Starting Random Run!") elif func_name == 'Random Perturbation': print("Starting Online OneStep Run with taylor with random perturbation!") elif func_name == "FASS": print("Filtered Active Submodular Selection(FASS)!") #elif func_name == 'Proximal': #print("Starting Online Proximal OneStep Run with taylor!") #elif func_name == 'Taylor on Logit': # print("Starting Online OneStep Run with taylor on logit!") # if valid: # print("Online OneStep Run with Taylor approximation and with Validation Set",file=logfile) # else: # print("Online OneStep Run with Taylor approximation and without Validation Set",file=logfile) val_accies = np.zeros(no_select) test_accies = np.zeros(no_select) unlab_accies = np.zeros(no_select) # idxs = start_rand_idxs def weight_reset(m): torch.manual_seed(42) torch.cuda.manual_seed(42) np.random.seed(42) random.seed(42) torch.backends.cudnn.deterministic = True if isinstance(m, nn.Linear): #m.reset_parameters() m.weight.data.normal_(0.0, 0.02) m.bias.data.fill_(0) model = model.apply(weight_reset).cuda() #print(model.linear2.weight) for n in range(no_select): loader_tr = DataLoader(CustomDataset_act(x_trn[idxs], y_trn[idxs], transform=None),batch_size=no_points) model.train() for i in range(num_epochs): # inputs, targets = x_trn[idxs].to(device), y_trn[idxs].to(device) '''inputs, targets = x_trn[idxs], y_trn[idxs] optimizer.zero_grad() scores = model(inputs) loss = criterion(scores, targets) loss.backward() optimizer.step()''' #model = model.apply(weight_reset).cuda() accFinal = 0. for batch_idx in list(loader_tr.batch_sampler): x, y, idxs = loader_tr.dataset[batch_idx] x, y = Variable(x.cuda()), Variable(y.cuda()) optimizer.zero_grad() out = model(x) loss = F.cross_entropy(out, y) accFinal += torch.sum((torch.max(out,1)[1] == y).float()).data.item() loss.backward() if (i % 50 == 0) and (accFinal < 0.2): # reset if not converging model = model.apply(weight_reset).cuda() optimizer = optim.SGD(model.parameters(), lr = learning_rate) # clamp gradients, just in case for p in filter(lambda p: p.grad is not None, model.parameters()): p.grad.data.clamp_(min=-.1, max=.1) optimizer.step() #if accFinal/len(loader_tr.dataset.X) >= 0.99: # break '''with torch.no_grad(): # val_in, val_t = x_val.to(device), y_val.to(device) val_outputs = model(x_val) val_loss = criterion(val_outputs, y_val) full_trn_outputs = model(x_trn) full_trn_loss = criterion(full_trn_outputs, y_trn)''' #accFinal = torch.sum((torch.max(scores,1)[1] == targets).float()).data.item() #print(accFinal / len(loader_tr.dataset.X)) #if i % print_every == 0: # Print Training and Validation Loss print( n+1,'Time', 'SubsetTrn', loss.item())#, ,FullTrn,ValLoss: full_trn_loss.item(), val_loss.item()) curr_X_trn = x_trn[list(remainList)] curr_Y_trn = y_trn[list(remainList)] model.eval() with torch.no_grad(): '''full_trn_out = model(x_trn) full_trn_loss = criterion(full_trn_out, y_trn).mean() sub_trn_out = model(x_trn[idxs]) sub_trn_loss = criterion(sub_trn_out, y_trn[idxs]).mean()''' val_out = model(x_val) val_loss = criterion(val_out, y_val) _, val_predict = val_out.max(1) val_correct = val_predict.eq(y_val).sum().item() val_total = y_val.size(0) val_acc = 100 * val_correct / val_total correct = 0 total = 0 inputs, targets = x_tst.to(device), y_tst.to(device) outputs = model(inputs) test_loss = criterion(outputs, targets) _, predicted = outputs.max(1) total += targets.size(0) correct += predicted.eq(targets).sum().item() tst_acc = 100.0 * correct / total rem_out = model(curr_X_trn) rem_loss = criterion(rem_out, curr_Y_trn) _, rem_predict = rem_out.max(1) rem_correct = rem_predict.eq(curr_Y_trn).sum().item() rem_total = curr_Y_trn.size(0) rem_acc = 100 * rem_correct / rem_total val_accies[n] = val_acc test_accies[n] = tst_acc unlab_accies[n] = rem_acc #if ((i + 1) % select_every == 0) and func_name not in ['Facility Location','Random']: # val_in, val_t = x_val.to(device), y_val.to(device) # Transfer them to device cached_state_dict = copy.deepcopy(model.state_dict()) clone_dict = copy.deepcopy(model.state_dict()) # Dont put the logs for Selection on logfile!! # print("With Taylor approximation",file=logfile) # print("selEpoch: %d, Starting Selection:" % i, str(datetime.datetime.now()),file=logfile) #t_ng_start = time.time() if func_name == 'Random Greedy': new_idxs = setf_model.naive_greedy_max(curr_X_trn,rem_predict,int(0.9 * no_points), clone_dict) new_idxs = list(np.array(list(remainList))[new_idxs]) remainList = remainList.difference(new_idxs) new_idxs.extend(list(np.random.choice(list(remainList), size=int(0.1 * no_points), replace=False))) remainList = remainList.difference(new_idxs) idxs.extend(new_idxs) elif func_name == "FASS": fn = nn.Softmax(dim=1) soft = fn(rem_out) entropy2 = Categorical(probs = soft).entropy() #print(entropy2.shape) if 5*no_points < entropy2.shape[0]: values,indices = entropy2.topk(5*no_points) #indices = list(np.array(list(remainList))[indices.cpu()]) else: indices = [i for i in range(entropy2.shape[0])]#list(remainList) knn_idxs_flag_val = perform_knnsb_selection(datadir, data_name, curr_X_trn[indices],rem_predict[indices], fraction, selUsing='val') #print(knn_idxs_flag_val) #print(len(knn_idxs_flag_val)) ##print(len(knn_idxs_flag_val),len(indices)) knn_idxs_flag_val = list(np.array(list(remainList))[indices.cpu()][knn_idxs_flag_val]) remainList = remainList.difference(knn_idxs_flag_val) idxs.extend(knn_idxs_flag_val) elif func_name == 'Random': state = np.random.get_state() np.random.seed(n*n) #new_idxs = gen_rand_prior_indices(list(remainList), size=no_points) new_idxs = np.random.choice(list(remainList), size=no_points, replace=False) np.random.set_state(state) remainList = remainList.difference(new_idxs) idxs.extend(new_idxs) elif func_name == 'Random Perturbation': new_idxs = setf_model.naive_greedy_max(curr_X_trn,rem_predict,no_points, clone_dict,None,True) # , grads_idxs new_idxs = np.array(list(remainList))[new_idxs] remainList = remainList.difference(new_idxs) idxs.extend(new_idxs) elif func_name == 'Facility Location': if data_name == 'covertype': new_idxs = run_stochastic_Facloc(curr_X_trn, rem_predict, bud) else: new_idxs = setf_model.lazy_greedy_max(bud, curr_X_trn ,model) new_idxs = np.array(list(remainList))[new_idxs] remainList = remainList.difference(new_idxs) idxs.extend(new_idxs) else: new_idxs = setf_model.naive_greedy_max(curr_X_trn,rem_predict,no_points, clone_dict) # , grads_idxs new_idxs = np.array(list(remainList))[new_idxs] remainList = remainList.difference(new_idxs) idxs.extend(new_idxs) '''elif func_name == 'Proximal': previous = torch.zeros(N,device=device) previous[idxs] = 1.0 new_idxs = setf_model.naive_greedy_max(bud, clone_dict,None,previous) idxs = new_idxs''' # print("selEpoch: %d, Selection Ended at:" % (i), str(datetime.datetime.now()),file=logfile) # print("Naive greedy total time with taylor:", time.time()-t_ng_start,file=logfile) model.load_state_dict(cached_state_dict) # Calculate Final SubsetTrn, FullTrn, Val and Test Loss # Calculate Val and Test Accuracy if func_name == 'Facility Location': return val_accies, test_accies, unlab_accies, idxs,facility_loaction_warm_start else: return val_accies, test_accies, unlab_accies, idxs
def train(**kwargs): opt.parse(kwargs) vis = Visualizer(opt.env) # step1: configure model model = getattr(models, opt.model)() if opt.load_model_path: model.load(opt.load_model_path) if opt.use_gpu: model.cuda() # step2: data train_data = DogCat(opt.train_data_root,train=True) val_data = DogCat(opt.train_data_root,train=False) train_dataloader = DataLoader(train_data,opt.batch_size, shuffle=True,num_workers=opt.num_workers) val_dataloader = DataLoader(val_data,opt.batch_size, shuffle=False,num_workers=opt.num_workers) # step3: criterion and optimizer criterion = t.nn.CrossEntropyLoss() lr = opt.lr optimizer = t.optim.Adam(model.parameters(),lr = lr,weight_decay = opt.weight_decay) # step4: meters loss_meter = meter.AverageValueMeter() confusion_matrix = meter.ConfusionMeter(2) previous_loss = 1e100 # train for epoch in range(opt.max_epoch): loss_meter.reset() confusion_matrix.reset() for ii,(data,label) in tqdm(enumerate(train_dataloader)): # train model input = Variable(data) target = Variable(label) if opt.use_gpu: input = input.cuda() target = target.cuda() optimizer.zero_grad() score = model(input) loss = criterion(score,target) loss.backward() optimizer.step() # meters update and visualize loss_meter.add(loss.data[0]) confusion_matrix.add(score.data, target.data) if ii%opt.print_freq==opt.print_freq-1: vis.plot('loss', loss_meter.value()[0]) # 进入debug模式 if os.path.exists(opt.debug_file): import ipdb; ipdb.set_trace() model.save() # validate and visualize val_cm,val_accuracy = val(model,val_dataloader) vis.plot('val_accuracy',val_accuracy) vis.log("epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}".format( epoch = epoch,loss = loss_meter.value()[0],val_cm = str(val_cm.value()),train_cm=str(confusion_matrix.value()),lr=lr)) # update learning rate if loss_meter.value()[0] > previous_loss: lr = lr * opt.lr_decay # 第二种降低学习率的方法:不会有moment等信息的丢失 for param_group in optimizer.param_groups: param_group['lr'] = lr previous_loss = loss_meter.value()[0]
class trainer: def __init__(self,config): self.config=config if torch.cuda.is_available(): self.use_cuda=True torch.set_default_tensor_type('torch.cuda.FloatTensor') else: self.use_cuda=False torch.set_default_tensor_type('torch.FloatTensor') self.nz = config.nz self.optimizer = config.optimizer self.resl = 2 # we start from 2^2 = 4 self.lr = config.lr self.eps_drift = config.eps_drift self.smoothing = config.smoothing self.max_resl = config.max_resl self.trns_tick = config.trns_tick self.stab_tick = config.stab_tick self.TICK = config.TICK self.globalIter = 0 self.globalTick = 0 self.kimgs = 0 self.stack = 0 self.epoch = 0 self.fadein = {'gen': None, 'dis': None} self.complete = {'gen': 0, 'dis': 0} self.phase = 'init' self.flag_flush_gen = False self.flag_flush_dis = False self.flag_add_noise = self.config.flag_add_noise self.flag_add_drift = self.config.flag_add_drift self.loader=DL.dataloader(config) self.LAMBDA=2 # network self.G = network.Generator(config) self.D = network.Discriminator(config) print('Generator structure: ') print(self.G.model) print('Discriminator structure: ') print(self.D.model) if self.use_cuda: torch.cuda.manual_seed(config.random_seed) #self.G = self.G.cuda() #self.D = self.D.cuda() if config.n_gpu==1: self.G=torch.nn.DataParallel(self.G).cuda(device=0) self.D=torch.nn.DataParallel(self.D).cuda(device=0) else: gpus=[] for i in range(config.n_gpu): gpus.append(i) self.G=torch.nn.DataParallel(self.G,device_ids=gpus).cuda() self.D=torch.nn.DataParallel(self.D,device_ids=gpus).cuda() self.renew_everything() self.use_tb=config.use_tb if self.use_tb: self.tb=tensorboard.tf_recorder() def resl_scheduler(self): ''' this function will schedule image resolution(self.resl) progressively. it should be called every iteration to ensure resl value is updated properly. step 1. (trns_tick) --> transition in generator. step 2. (stab_tick) --> stabilize. step 3. (trns_tick) --> transition in discriminator. step 4. (stab_tick) --> stabilize. ''' self.batchsize=self.loader.batch_size delta=1.0/(2*self.trns_tick+2*self.stab_tick) d_alpha=1.0*self.batchsize/self.trns_tick/self.TICK # 连续除法运算 if self.fadein['gen'] is not None: if self.resl%1.0<(self.trns_tick)*delta: self.fadein['gen'].update_alpha(d_alpha) self.complete['gen']=self.fadein['gen'].alpha*100 self.phase='gtrns' elif self.resl%1.0>=(self.trns_tick)*delta and self.resl%1.0<(self.trns_tick+self.stab_tick)*delta: self.phase='gstab' if self.fadein['dis'] is not None: if self.resl%1.0>=(self.trns_tick+self.stab_tick)*delta and self.resl%1.0<(self.stab_tick+self.trns_tick*2)*delta: self.fadein['dis'].update_alpha(d_alpha) self.complete['dis']=self.fadein['dis'].alpha*100 self.phase='dtrns' elif self.resl%1.0>=(self.stab_tick+self.trns_tick*2)*delta and self.phase !='final': self.phase='dstab' prev_kimgs=self.kimgs self.kimgs=self.kimgs+self.batchsize if (self.kimgs%self.TICK)<(prev_kimgs%self.TICK): # 即训练了TICK张图片后 self.globalTick=self.globalTick+1 # increase linearly every tick, and grow network structure. prev_resl=floor(self.resl) self.resl=self.resl+delta self.resl=max(2,min(10.5,self.resl)) # clamping, range: 4 ~ 1024 # flush network. if self.flag_flush_gen and self.resl%1.0 >= (self.trns_tick+self.stab_tick)*delta and prev_resl !=2: if self.fadein['gen'] is not None: self.fadein['gen'].update_alpha(d_alpha) self.complete['gen']=self.fadein['gen'].alpha*100 self.flag_flush_gen=False self.G.module.flush_network() # flush G print(self.G.module.model) self.fadein['gen']=None self.complete['gen']=0.0 self.phase='dtrns' elif self.flag_flush_dis and floor(self.resl) != prev_resl and prev_resl != 2: # 除开上面if后的区域及特殊节点 if self.fadein['dis'] is not None: self.fadein['dis'].update_alpha(d_alpha) self.complete['dis']=self.fadein['dis'].alpha*100 self.flag_flush_dis=False self.D.module.flush_network() print(self.D.module.model) self.fadein['dis']=None self.complete['dis']=0.0 if floor(self.resl)<self.max_resl and self.phase !='final': self.phase='gtrns' # grow network. if floor(self.resl)!=prev_resl and floor(self.resl)<self.max_resl+1: self.lr=self.lr*float(self.config.lr_decay) self.G.module.grow_network(floor(self.resl)) self.D.module.grow_network(floor(self.resl)) self.renew_everything() self.fadein['gen']=dict(self.G.module.model.named_children())['fadein_block'] self.fadein['dis']=dict(self.D.module.model.named_children())['fadein_block'] self.flag_flush_gen=True self.flag_flush_dis=True #勿漏否则无法flush print('renew_everything: ') if floor(self.resl)>=self.max_resl and self.resl%1.0>=(self.stab_tick+self.trns_tick*2)*delta: self.phase='final' self.resl=self.max_resl+(self.stab_tick+self.trns_tick*2)*delta def renew_everything(self): # renew dataloader. self.loader.renew(min(floor(self.resl),self.max_resl)) # define tensors self.z=torch.Tensor(self.loader.batch_size, self.nz) self.x=torch.Tensor(self.loader.batch_size, 3, self.loader.imsize, self.loader.imsize) self.x_tile=torch.Tensor(self.loader.batch_size, 3, self.loader.imsize, self.loader.imsize) self.real_label=torch.Tensor(self.loader.batch_size).fill_(1) self.fake_label=torch.Tensor(self.loader.batch_size).fill_(0) # enable cuda if self.use_cuda: self.z=self.z.cuda() self.x=self.x.cuda() self.x_tilde=self.x.cuda() self.real_label=self.real_label.cuda() self.fake_label=self.fake_label.cuda() torch.cuda.manual_seed(config.random_seed) # wrapping autograd Variable self.x=Variable(self.x) self.x_tilde=Variable(self.x_tilde) self.z=Variable(self.z) self.real_label=Variable(self.real_label) self.fake_label=Variable(self.fake_label) # ship new model to cuda if self.use_cuda: self.G=self.G.cuda() self.D=self.D.cuda() # optimizer betas=(self.config.beta1,self.config.beta2) #print(list(filter(lambda p: p.requires_grad, self.G.parameters()))) #print(list(filter(lambda p: p.requires_grad, self.D.parameters()))) if self.optimizer=='adam': self.opt_g=Adam(filter(lambda p: p.requires_grad,self.G.module.parameters()),lr=self.lr,betas=betas,weight_decay=0.0) self.opt_d=Adam(filter(lambda p: p.requires_grad,self.D.module.parameters()),lr=self.lr,betas=betas,weight_decay=0.0) elif self.optimizer=='rmsprop': self.opt_g = torch.optim.RMSprop(filter(lambda p: p.requires_grad, self.G.module.parameters()), lr=self.lr, alpha=0.9,weight_decay=0.0) self.opt_d = torch.optim.RMSprop(filter(lambda p: p.requires_grad, self.D.module.parameters()), lr=self.lr, alpha=0.9,weight_decay=0.0) else: self.opt_g = torch.optim.SGD(filter(lambda p: p.requires_grad, self.G.module.parameters()), lr=self.lr,weight_decay=0.0) self.opt_d = torch.optim.SGD(filter(lambda p: p.requires_grad, self.D.module.parameters()), lr=self.lr,weight_decay=0.0) def feed_interpolated_input(self,x): if self.phase=='gtrns' and floor(self.resl)>2 and floor(self.resl)<=self.max_resl: alpha=self.complete['gen']/100.0 transform=transforms.Compose([transforms.ToPILImage(), transforms.Resize(size=int(pow(2,floor(self.resl)-1)),interpolation=0), transforms.Resize(size=int(pow(2,floor(self.resl))),interpolation=0), transforms.ToTensor(), ]) x_low=x.clone().add(1).mul(0.5) for i in range(x_low.size(0)): x_low[i]=transform(x_low[i]).mul(2).add(-1) x=torch.add(x.mul(alpha),x_low.mul(1-alpha)) #interpolated_x if self.use_cuda: return x.cuda() else: return x def add_noise(self,x): if self.flag_add_noise==False: return x if hasattr(self,'_d_'): self._d_=self._d_*0.9+torch.mean(self.fx_tilde).item()*0.1 else: self._d_=0.0 strength=0.2*max(0,self._d_-0.5)**2 z=np.random.randn(*x.size()).astype(np.float32)*strength z=Variable(torch.from_numpy(z)).cuda()if self.use_cuda else Variable(torch.from_numpy(z)) return x+z def require_grad(self, model, feature_extracting): if feature_extracting: for param in model.parameters(): param.requires_grad = True else: for param in model.parameters(): param.requires_grad = False def calc_gradient_penalty(self): alpha = torch.rand(1)*torch.ones_like(self.x) interpolates = alpha * self.x.detach() + ((torch.ones_like(self.x.detach()) - alpha) * self.x_tilde.detach()) if self.use_cuda: interpolates = interpolates.cuda() interpolates.requires_grad_(True) disc_interpolates = self.D(interpolates) if self.use_cuda: grad_outputs=torch.ones(disc_interpolates.size()).cuda() else: grad_outputs=torch.ones(disc_interpolates.size()) gradients = autograd.grad(outputs=disc_interpolates, inputs=interpolates, grad_outputs=grad_outputs, create_graph=True, retain_graph=True, only_inputs=True)[0] gradients = gradients.view(gradients.size(0), -1) gradient_penalty = ((gradients.norm(2, dim=1)) ** 6).mean() * self.LAMBDA return gradient_penalty def train(self): # noise for test. self.z_test=torch.Tensor(self.loader.batch_size, self.nz).normal_(0.0,1.0) # self.z_test.data.resize_(self.loader.batch_size, self.nz).normal_(0.0,1.0) if self.use_cuda: self.z_test=self.z_test.cuda() self.z_test=Variable(self.z_test) #,volatile=True for step in range(2,self.max_resl+1+5): for iter in tqdm(range(0,(self.trns_tick*2+self.stab_tick*2)*self.TICK,self.loader.batch_size)): self.globalIter=self.globalIter+1 self.stack=self.stack+self.loader.batch_size if self.stack>ceil(len(self.loader.dataset)): self.epoch=self.epoch+1 self.stack=int(self.stack%(ceil(len(self.loader.dataset)))) # reslolution scheduler. self.resl_scheduler() # update discriminator. for i in range(1): self.D.zero_grad() # zero gradients. self.require_grad(self.D, True) self.x.data=self.feed_interpolated_input(self.loader.get_batch()) if self.flag_add_noise: self.x=self.add_noise(self.x) self.z=torch.randn(self.loader.batch_size,self.nz,1,1) if self.use_cuda: self.z=self.z.cuda() self.x_tilde=self.G(self.z) self.fx=self.D(self.x) self.fx_tilde = self.D(self.x_tilde.detach()) # loss_d=F.mse_loss(self.fx.squeeze(), torch.ones_like(self.fx.squeeze()))+F.mse_loss(self.fx_tilde.squeeze(), torch.zeros_like(self.fx_tilde.squeeze()))+self.calc_gradient_penalty() loss_d = self.fx.squeeze().mean() - self.fx_tilde.squeeze().mean() + self.calc_gradient_penalty() loss_d.backward(retain_graph=False) self.opt_d.step() # update generator. for i in range(1): self.G.zero_grad() # zero gradients. self.require_grad(self.D, False) fx_tilde=self.D(self.x_tilde) # loss_g = F.mse_loss(fx_tilde.squeeze(), torch.ones_like(self.fx_tilde.squeeze())) loss_g = fx_tilde.squeeze().mean() loss_g.backward(retain_graph=False) self.opt_g.step() # logging. log_msg = ' [E:{0}][T:{1}][{2:6}/{3:6}] errD: {4:.4f} | errG: {5:.4f} | ' \ '[lr:{11:.5f}][cur:{6:.3f}][resl:{7:4}][{8}][{9:.1f}%][{10:.1f}%]'.format( self.epoch, self.globalTick, self.stack, len(self.loader.dataset), loss_d.item(),loss_g.item(), self.resl, int(pow(2, floor(self.resl))), self.phase, self.complete['gen'],self.complete['dis'],self.lr) tqdm.write(log_msg) # save model. self.snapshot('repo/model') # save image grid. if self.globalIter % self.config.save_img_every == 0: with torch.no_grad(): x_test = self.G(self.z_test) utils.mkdir('repo/save/grid') utils.save_image_grid(x_test.data, 'repo/save/grid/{}_{}_G{}_D{}.jpg'.format( int(self.globalIter / self.config.save_img_every), self.phase, self.complete['gen'],self.complete['dis'])) utils.mkdir('repo/save/resl_{}'.format(int(floor(self.resl)))) utils.save_image_single(x_test.data,'repo/save/resl_{}/{}_{}_G{}_D{}.jpg'.format( int(floor(self.resl)), int(self.globalIter / self.config.save_img_every),self.phase,self.complete['gen'],self.complete['dis'])) # tensorboard visualization. if self.use_tb: with torch.no_grad(): x_test = self.D(self.z_test) self.tb.add_scalar('data/loss_g', loss_g[0].item(), self.globalIter) self.tb.add_scalar('data/loss_d', loss_d[0].item(), self.globalIter) self.tb.add_scalar('tick/lr', self.lr, self.globalIter) self.tb.add_scalar('tick/cur_resl', int(pow(2, floor(self.resl))), self.globalIter) def get_state(self,target): if target=='gen': state={'resl':self.resl, 'state_dict':self.G.module.state_dict(), 'optimizer':self.opt_g.state_dict()} return state elif target=='dis': state={'resl':self.resl, 'state_dict':self.D.module.state_dict(), 'optimizer':self.opt_d.state_dict()} return state def snapshot(self,path): if not os.path.exists(path): if os.name=='nt': os.system('mkdir {}'.format(path.replace('/','\\'))) # 勿在mkdir后遗漏空格以免创建路径失败!!! else: os.system('mkdir -p {}'.format(path)) # save every 100 tick if the network is in stab phase. ndis='dis_R{}_T{}.pth.tar'.format(int(floor(self.resl)),self.globalTick) ngen='gen_R{}_T{}.pth.tar'.format(int(floor(self.resl)),self.globalTick) if self.globalTick%50==0: if self.phase=='gstab'or self.phase=='dstab'or self.phase=='final': save_path=os.path.join(path,ndis) if not os.path.exists(save_path): torch.save(self.get_state('dis'),save_path) save_path=os.path.join(path,ngen) torch.save(self.get_state('gen'),save_path) print('[snapshot] model saved @ {}'.format(path))
class Decoder(nn.Module): def __init__( self, n_mel_channels, n_frames_per_step, encoder_embedding_dim, attention_dim, attention_rnn_dim, attention_location_n_filters, attention_location_kernel_size, decoder_rnn_dim, prenet_dim, max_decoder_steps, gate_threshold, p_attention_dropout, p_decoder_dropout, ): super(Decoder, self).__init__() self.n_mel_channels = n_mel_channels self.n_frames_per_step = n_frames_per_step self.encoder_embedding_dim = encoder_embedding_dim self.attention_rnn_dim = attention_rnn_dim self.decoder_rnn_dim = decoder_rnn_dim self.prenet_dim = prenet_dim self.max_decoder_steps = max_decoder_steps self.gate_threshold = gate_threshold self.p_attention_dropout = p_attention_dropout self.p_decoder_dropout = p_decoder_dropout self.prenet = Prenet(n_mel_channels * n_frames_per_step, [prenet_dim, prenet_dim]) self.attention_rnn = nn.LSTMCell(prenet_dim + encoder_embedding_dim, attention_rnn_dim) self.attention_layer = Attention( attention_rnn_dim, encoder_embedding_dim, attention_dim, attention_location_n_filters, attention_location_kernel_size, ) self.decoder_rnn = nn.LSTMCell(attention_rnn_dim + encoder_embedding_dim, decoder_rnn_dim, 1) self.linear_projection = LinearNorm(decoder_rnn_dim + encoder_embedding_dim, n_mel_channels * n_frames_per_step) self.gate_layer = LinearNorm(decoder_rnn_dim + encoder_embedding_dim, 1, bias=True, w_init_gain="sigmoid") def get_go_frame(self, memory): """Gets all zeros frames to use as first decoder input PARAMS ------ memory: decoder outputs RETURNS ------- decoder_input: all zeros frames """ B = memory.size(0) decoder_input = Variable(memory.data.new(B, self.n_mel_channels * self.n_frames_per_step).zero_()) return decoder_input def initialize_decoder_states(self, memory, mask): """Initializes attention rnn states, decoder rnn states, attention weights, attention cumulative weights, attention context, stores memory and stores processed memory PARAMS ------ memory: Encoder outputs mask: Mask for padded data if training, expects None for inference """ B = memory.size(0) MAX_TIME = memory.size(1) self.attention_hidden = Variable(memory.data.new(B, self.attention_rnn_dim).zero_()) self.attention_cell = Variable(memory.data.new(B, self.attention_rnn_dim).zero_()) self.decoder_hidden = Variable(memory.data.new(B, self.decoder_rnn_dim).zero_()) self.decoder_cell = Variable(memory.data.new(B, self.decoder_rnn_dim).zero_()) self.attention_weights = Variable(memory.data.new(B, MAX_TIME).zero_()) self.attention_weights_cum = Variable(memory.data.new(B, MAX_TIME).zero_()) self.attention_context = Variable(memory.data.new(B, self.encoder_embedding_dim).zero_()) self.memory = memory self.processed_memory = self.attention_layer.memory_layer(memory) self.mask = mask def parse_decoder_inputs(self, decoder_inputs): """Prepares decoder inputs, i.e. mel outputs PARAMS ------ decode encoder_kernel_size=5, encoder_n_convolutions=3, encoder_embedding_dim=512,r_inputs: inputs used for teacher-forced training, i.e. mel-specs RETURNS ------- inputs: processed decoder inputs """ # (B, n_mel_channels, T_out) -> (B, T_out, n_mel_channels) decoder_inputs = decoder_inputs.transpose(1, 2) decoder_inputs = decoder_inputs.view( decoder_inputs.size(0), int(decoder_inputs.size(1) / self.n_frames_per_step), -1 ) # (B, T_out, n_mel_channels) -> (T_out, B, n_mel_channels) decoder_inputs = decoder_inputs.transpose(0, 1) return decoder_inputs def parse_decoder_outputs(self, mel_outputs, gate_outputs, alignments): """Prepares decoder outputs for output PARAMS ------ mel_outputs: gate_outputs: gate output energies alignments: RETURNS ------- mel_outputs: gate_outpust: gate output energies alignments: """ # (T_out, B) -> (B, T_out) alignments = torch.stack(alignments).transpose(0, 1) # (T_out, B) -> (B, T_out) gate_outputs = torch.stack(gate_outputs).transpose(0, 1) gate_outputs = gate_outputs.contiguous() # (T_out, B, n_mel_channels) -> (B, T_out, n_mel_channels) mel_outputs = torch.stack(mel_outputs).transpose(0, 1).contiguous() # decouple frames per step mel_outputs = mel_outputs.view(mel_outputs.size(0), -1, self.n_mel_channels) # (B, T_out, n_mel_channels) -> (B, n_mel_channels, T_out) mel_outputs = mel_outputs.transpose(1, 2) return mel_outputs, gate_outputs, alignments def decode(self, decoder_input): """Decoder step using stored states, attention and memory PARAMS ------ decoder_input: previous mel output RETURNS ------- mel_output: gate_output: gate output energies attention_weights: """ cell_input = torch.cat((decoder_input, self.attention_context), -1) self.attention_hidden, self.attention_cell = self.attention_rnn( cell_input, (self.attention_hidden, self.attention_cell) ) self.attention_hidden = F.dropout(self.attention_hidden, self.p_attention_dropout, self.training) attention_weights_cat = torch.cat( (self.attention_weights.unsqueeze(1), self.attention_weights_cum.unsqueeze(1)), dim=1 ) self.attention_context, self.attention_weights = self.attention_layer( self.attention_hidden, self.memory, self.processed_memory, attention_weights_cat, self.mask ) self.attention_weights_cum += self.attention_weights decoder_input = torch.cat((self.attention_hidden, self.attention_context), -1) self.decoder_hidden, self.decoder_cell = self.decoder_rnn( decoder_input, (self.decoder_hidden, self.decoder_cell) ) self.decoder_hidden = F.dropout(self.decoder_hidden, self.p_decoder_dropout, self.training) decoder_hidden_attention_context = torch.cat((self.decoder_hidden, self.attention_context), dim=1) decoder_output = self.linear_projection(decoder_hidden_attention_context) gate_prediction = self.gate_layer(decoder_hidden_attention_context) return decoder_output, gate_prediction, self.attention_weights def forward(self, memory, decoder_inputs, memory_lengths, device): """Decoder forward pass for training PARAMS ------ memory: Encoder outputs decoder_inputs: Decoder inputs for teacher forcing. i.e. mel-specs memory_lengths: Encoder output lengths for attention masking. RETURNS ------- mel_outputs: mel outputs from the decoder gate_outputs: gate outputs from the decoder alignments: sequence of attention weights from the decoder """ decoder_input = self.get_go_frame(memory).unsqueeze(0) decoder_inputs = self.parse_decoder_inputs(decoder_inputs) decoder_inputs = torch.cat((decoder_input, decoder_inputs), dim=0) decoder_inputs = self.prenet(decoder_inputs) self.initialize_decoder_states(memory, mask=~get_mask_from_lengths(memory_lengths, device)) mel_outputs, gate_outputs, alignments = [], [], [] while len(mel_outputs) < decoder_inputs.size(0) - 1: decoder_input = decoder_inputs[len(mel_outputs)] mel_output, gate_output, attention_weights = self.decode(decoder_input) mel_outputs += [mel_output.squeeze(1)] gate_outputs += [gate_output.squeeze(1)] alignments += [attention_weights] mel_outputs, gate_outputs, alignments = self.parse_decoder_outputs(mel_outputs, gate_outputs, alignments) return mel_outputs, gate_outputs, alignments def inference(self, memory, max_decoder_steps=None): """Decoder inference PARAMS ------ memory: Encoder outputs RETURNS ------- mel_outputs: mel outputs from the decoder gate_outputs: gate outputs from the decoder alignments: sequence of attention weights from the decoder """ if not max_decoder_steps: # Use default max decoder steps if not given max_decoder_steps = self.max_decoder_steps decoder_input = self.get_go_frame(memory) self.initialize_decoder_states(memory, mask=None) mel_outputs, gate_outputs, alignments = [], [], [] while True: decoder_input = self.prenet(decoder_input) mel_output, gate_output, alignment = self.decode(decoder_input) mel_outputs += [mel_output.squeeze(1)] gate_outputs += [gate_output] alignments += [alignment] if torch.sigmoid(gate_output.data) > self.gate_threshold: break elif len(mel_outputs) == max_decoder_steps: raise Exception( "Warning! Reached max decoder steps. Either the model is low quality or the given sentence is too short/long" ) decoder_input = mel_output mel_outputs, gate_outputs, alignments = self.parse_decoder_outputs(mel_outputs, gate_outputs, alignments) return mel_outputs, gate_outputs, alignments
def train_model(model, criterion, optimizer, lr_scheduler, lr, dset_loaders, dset_sizes, use_gpu, num_epochs, exp_dir='./', resume=''): print('dictoinary length' + str(len(dset_loaders))) #reg_params=model.reg_params since = time.time() best_model = model best_acc = 0.0 if os.path.isfile(resume): print("=> loading checkpoint '{}'".format(resume)) checkpoint = torch.load(resume) start_epoch = checkpoint['epoch'] #best_prec1 = checkpoint['best_prec1'] #model = checkpoint['model'] model.load_state_dict(checkpoint['state_dict']) #modelx = checkpoint['model'] #model.reg_params=modelx.reg_params print('load') optimizer.load_state_dict(checkpoint['optimizer']) #pdb. #model.reg_params=reg_params #del model.reg_params print("=> loaded checkpoint '{}' (epoch {})".format( resume, checkpoint['epoch'])) else: start_epoch = 0 print("=> no checkpoint found at '{}'".format(resume)) print(str(start_epoch)) #pdb.set_trace() for epoch in range(start_epoch, num_epochs): print('Epoch {}/{}'.format(epoch, num_epochs - 1)) print('-' * 10) # Each epoch has a training and validation phase for phase in ['train', 'val']: if phase == 'train': optimizer = lr_scheduler(optimizer, epoch, lr) model.train(True) # Set model to training mode else: model.train(False) # Set model to evaluate mode running_loss = 0.0 running_corrects = 0 # Iterate over data. for data in dset_loaders[phase]: # get the inputs inputs, labels = data inputs = inputs.squeeze() # wrap them in Variable if use_gpu: inputs, labels = Variable(inputs.cuda()), \ Variable(labels.cuda()) else: inputs, labels = Variable(inputs), Variable(labels) # zero the parameter gradients optimizer.zero_grad() model.zero_grad() # forward outputs = model(inputs) _, preds = torch.max(outputs.data, 1) loss = criterion(outputs, labels) # backward + optimize only if in training phase if phase == 'train': loss.backward() #print('step') optimizer.step() # statistics running_loss += loss.data[0] running_corrects += torch.sum(preds == labels.data) epoch_loss = running_loss / dset_sizes[phase] epoch_acc = running_corrects / dset_sizes[phase] print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc)) # deep copy the model if phase == 'val' and epoch_acc > best_acc: del outputs del labels del inputs del loss del preds best_acc = epoch_acc #best_model = copy.deepcopy(model) torch.save(model, os.path.join(exp_dir, 'best_model.pth.tar')) #epoch_file_name=exp_dir+'/'+'epoch-'+str(epoch)+'.pth.tar' epoch_file_name = exp_dir + '/' + 'epoch' + '.pth.tar' save_checkpoint( { 'epoch': epoch + 1, 'epoch_acc': epoch_acc, 'arch': 'alexnet', 'model': model, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), }, epoch_file_name) print() time_elapsed = time.time() - since print('Training complete in {:.0f}m {:.0f}s'.format( time_elapsed // 60, time_elapsed % 60)) print('Best val Acc: {:4f}'.format(best_acc)) return model
num_workers=0) # Training loop for epoch in range(epochTimes): #test the result on test set model.eval() test_loss = 0 correct = 0 for i, data in enumerate(test_loader, 0): # get the inputs inputs, labels = data labels = labels.type(torch.FloatTensor) # wrap them in Variable inputs, labels = Variable(inputs), Variable(labels) # Forward pass: Compute predicted y by passing x to the model y_pred = model(inputs) if(i<10): print(labels.data[0]) print(y_pred.data[0]) # Compute and print loss loss = criterion(y_pred, labels) test_loss += loss test_loss /= len(test_loader.dataset) print('Average loss after ' + str(epoch) + ': ' + str(test_loss.data[0]))
train_start_time = time.time() early_counter = 0 decay_counter = 0 best_per = 0 for e_ in range(config.epoch): print("Epoch: ", e_ + 1) batch_counter = 0 for ie, example in enumerate(train_examples): token_ids, char_ids, entities, class_samples = example # skip for initial experiments # if len(token_ids) > 20: # continue token_var = Variable(torch.LongTensor(np.array(token_ids))) sample_vars = sample2tensor(class_samples, config.if_gpu) if config.if_gpu: token_var = token_var.cuda() char_vars = [] for char_id_l in char_ids: char_var = Variable(torch.LongTensor(np.array(char_id_l))) if config.if_gpu: char_var = char_var.cuda() char_vars.append(char_var) ner_model.train() optimizer.zero_grad() loss = ner_model.forward(token_var, char_vars, entities, sample_vars) loss.backward() clip_model_grad(ner_model, config.clip_norm) print("{2}: sentece length {0} : loss {1}".format(
def training_loop(args, model, criterion, optimizer, dataset, f, device, experiment): start = time.time() best_weights = copy.deepcopy(model.state_dict()) best_acc = 0.0 for epoch in range(args.num_epochs): print(f'Epoch {epoch} began') running_loss = 0.0 running_corrects = 0 # training phase for idx, data in enumerate(Bar(dataset['train_dataloader'])): inputs = Variable(data.get('image')).to(device) target = Variable(data.get('target')).to(device) # forward pass output = model(inputs) _, preds = torch.max(output, 1) loss = criterion(output, target) loss = loss / args.accumulation_steps # Normalize accumulated loss (averaged) loss = loss.mean() # backward pass loss.backward() # Backward pass (mean of parallel loss) if (idx+1) % args.accumulation_steps == 0: # Wait for several backward steps optimizer.step() # Now we can do an optimizer step model.zero_grad() # Reset gradient tensors running_loss += loss.item() * inputs.size(0) running_corrects += torch.sum(preds == target.data) # log training stats train_epoch_loss = running_loss / len(dataset['train_data']) train_epoch_acc = running_corrects.double() / len(dataset['train_data']) print('Epoch [{}/{}], training loss:{:.4f}'.format(epoch+1, args.num_epochs, train_epoch_loss)) print('Epoch [{}/{}], training accuracy:{:.4f}'.format(epoch+1, args.num_epochs, train_epoch_acc)) # validation phase running_loss = 0.0 running_corrects = 0 with torch.no_grad(): for idx, data in enumerate(Bar(dataset['val_dataloader'])): inputs = Variable(data.get('image')).to(device) target = Variable(data.get('target')).to(device) output = model(inputs) _, preds = torch.max(output, 1) loss = criterion(output, target).mean() running_loss += loss.item() * inputs.size(0) running_corrects += torch.sum(preds == target.data) # log validation stats valid_epoch_loss = running_loss / len(dataset['val_data']) valid_epoch_acc = running_corrects.double() / len(dataset['val_data']) print('Epoch [{}/{}], validation loss:{:.4f}'.format(epoch+1, args.num_epochs, valid_epoch_loss)) print('Epoch [{}/{}], validation accuracy:{:.4f}'.format(epoch+1, args.num_epochs, valid_epoch_acc)) # append to experiment report print(f'{epoch+1}\t{train_epoch_loss}\t{train_epoch_acc}\t{valid_epoch_loss}\t{valid_epoch_acc}', file=open(f, "a")) # save best weights if valid_epoch_acc > best_acc: best_acc = valid_epoch_acc best_weights = copy.deepcopy(model.state_dict()) torch.save(model.state_dict(), f'models/{args.dataset}/{experiment}.pth') time_elapsed = time.time() - start print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60), file=open(f, "a")) print('Best val Acc: {:4f}'.format(best_acc), file=open(f, "a")) # load best weights model.load_state_dict(f'models/{args.dataset}/{experiment}.pth') return model
def routing(self, x): """ Routing algorithm for capsule. :input: tensor x of shape [128, 8, 1152] :return: vector output of capsule j """ batch_size = x.size(0) x = x.transpose( 1, 2) # dim 1 and dim 2 are swapped. out tensor shape: [128, 1152, 8] # Stacking and adding a dimension to a tensor. # stack ops output shape: [128, 1152, 10, 8] # unsqueeze ops output shape: [128, 1152, 10, 8, 1] x = torch.stack([x] * self.num_unit, dim=2).unsqueeze(4) # Convert single weight to batch weight. # [1 x 1152 x 10 x 16 x 8] to: [128, 1152, 10, 16, 8] batch_weight = torch.cat([self.weight] * batch_size, dim=0) # u_hat is "prediction vectors" from the capsules in the layer below. # Transform inputs by weight matrix. # Matrix product of 2 tensors with shape: [128, 1152, 10, 16, 8] x [128, 1152, 10, 8, 1] # u_hat shape: [128, 1152, 10, 16, 1] u_hat = torch.matmul(batch_weight, x) # All the routing logits (b_ij in the paper) are initialized to zero. # self.in_channel = primary_unit_size = 32 * 6 * 6 = 1152 # self.num_unit = num_classes = 10 # b_ij shape: [1, 1152, 10, 1] b_ij = Variable(torch.zeros(1, self.in_channel, self.num_unit, 1)) if self.cuda_enabled: b_ij = b_ij.cuda() # From the paper in the "Capsules on MNIST" section, # the sample MNIST test reconstructions of a CapsNet with 3 routing iterations. num_iterations = self.num_routing for iteration in range(num_iterations): # Routing algorithm # Calculate routing or also known as coupling coefficients (c_ij). # c_ij shape: [1, 1152, 10, 1] c_ij = F.softmax( b_ij, dim=2) # Convert routing logits (b_ij) to softmax. # c_ij shape from: [128, 1152, 10, 1] to: [128, 1152, 10, 1, 1] c_ij = torch.cat([c_ij] * batch_size, dim=0).unsqueeze(4) # Implement equation 2 in the paper. # s_j is total input to a capsule, is a weigthed sum over all "prediction vectors". # u_hat is weighted inputs, prediction ˆuj|i made by capsule i. # c_ij * u_hat shape: [128, 1152, 10, 16, 1] # s_j output shape: [batch_size=128, 1, 10, 16, 1] # Sum of Primary Capsules outputs, 1152D becomes 1D. s_j = (c_ij * u_hat).sum(dim=1, keepdim=True) # Squash the vector output of capsule j. # v_j shape: [batch_size, weighted sum of PrimaryCaps output, # num_classes, output_unit_size from u_hat, 1] # == [128, 1, 10, 16, 1] # So, the length of the output vector of a capsule is 16, which is in dim 3. v_j = utils.squash(s_j, dim=3) # in_channel is 1152. # v_j1 shape: [128, 1152, 10, 16, 1] v_j1 = torch.cat([v_j] * self.in_channel, dim=1) # The agreement. # Transpose u_hat with shape [128, 1152, 10, 16, 1] to [128, 1152, 10, 1, 16], # so we can do matrix product u_hat and v_j1. # u_vj1 shape: [1, 1152, 10, 1] u_vj1 = torch.matmul(u_hat.transpose(3, 4), v_j1).squeeze(4).mean(dim=0, keepdim=True) # Update routing (b_ij) by adding the agreement to the initial logit. b_ij = b_ij + u_vj1 # activation = torch.nn.Sigmoid() return v_j.squeeze(1) # shape: [128, 10, 16, 1]
def calc_sl_loss(probs, update=True): y_true = conf.batch_label y_true = Variable(torch.from_numpy(y_true)).cuda().long() loss = criterion(probs, y_true) return loss
#提取无流边界数据2 N_noflow2=10000 X_noflow2_col=x[0]+(x[50]-x[0])*lhs(1, N_noflow2) Y_noflow2_col=y[50]*np.ones((N_noflow2,1)) T_noflow2_col=0+(t[49]-0)*lhs(1, N_noflow2) TXY_noflow2 = np.hstack((T_noflow2_col,X_noflow2_col,Y_noflow2_col)) kesi_noflow_col=np.random.randn(N_noflow1+N_noflow2,n_eigen) TXY_noflow=np.vstack((TXY_noflow1,TXY_noflow2)) TXY_kesi_noflow=np.hstack((TXY_noflow,kesi_noflow_col)) TXY_kesi_noflow = torch.from_numpy(TXY_kesi_noflow) TXY_kesi_noflow = TXY_kesi_noflow.type(torch.FloatTensor) TXY_kesi_noflow = TXY_kesi_noflow.to(device) TXY_kesi_noflow= Variable(TXY_kesi_noflow, requires_grad=True) TXY_kesi_f = torch.from_numpy(TXY_kesi_f) TXY_kesi_f = TXY_kesi_f.type(torch.FloatTensor) TXY_kesi_train = torch.from_numpy(TXY_kesi_train) TXY_kesi_train=TXY_kesi_train.type(torch.FloatTensor) H_train = torch.from_numpy(H_train) H_train=H_train.type(torch.FloatTensor) w_x_tf = torch.from_numpy(w_x) w_x_tf = w_x_tf.type(torch.FloatTensor) w_y_tf = torch.from_numpy(w_y)