def get_indices(s): ''' function to obtain indices of non-zero values that do not consider interactions btw themself ex) q_1x, q_1x or q_1y, q_1y or q_2x, q_2x .... parameters ---------- s : torch.tensor dq list shape shape is [nsamples, nparticle, nparticle, DIM] DIM = 2 for 2D LJ models n : nparticle m : torch.tensor make 1 all of pw-n4T0.35-hard1 shape and then become 0 when consider interactions btw themself Returns ---------- indices of non-zero values in m ''' n = s[1] m = torch.ones(s) # check - run "python3.7 -O" to switch off #if __debug__: # m2 = torch.clone(m) # for i in range(n): m2[:,i,i,:] = 0 dm = torch.diagonal(m, 0, 1, 2) # offset, nparticle, nparticle torch.fill_(dm, 0.0) #if __debug__: # err = (m-m2)**2 # assert (torch.sum(err)<1e-6),'error in diagonal computations' return m.nonzero(as_tuple=True)
def get(self): """Call this at the end of an epoch to get all of the data from the buffer, with advantages appropriately normalized (shifted to have mean zero and std one). Also, resets some pointers in the buffer. """ mbuf = self.mask_buf.clone() torch.fill_(self.mask_buf, 1.0) for b in range(self.nagents): self.ptr[b], self.path_start_idx[b] = 0, 0 # the next two lines implement the advantage normalization trick adv = self.adv_buf[b] # This is supposed to be computed across all processes but it # becomes a big bottleneck # adv_mean, adv_std = ( # adv.mean(), # adv.std(), # ) adv_mean, adv_std = hvd_scalar_statistics(self.adv_buf[b]) self.adv_buf[b] = (adv - adv_mean) / (adv_std + 1e-7) # The entire buffer will most likely not be filled return dict( obs=self.state_buf, lidar=self.lidar_buf, act=self.act_buf, ret=self.ret_buf, adv=self.adv_buf, logp=self.logp_buf, vest=self.val_buf, mask=mbuf, )
def lava_neuron_forward(lava_neuron: nn.Module, x_seq: torch.Tensor, v: torch.Tensor or float): # x_seq.shape = [T, N, *] # lave uses shape = [*, T], while SJ uses shape = [T, *] unsqueeze_flag = False if x_seq.dim() == 2: x_seq = x_seq.unsqueeze(1) # lave needs input with shape [N, ... ,T] unsqueeze_flag = True if isinstance(v, float): v_init = v v = torch.zeros_like(x_seq[0]) if v_init != 0.: torch.fill_(v, v_init) x_seq_shape = x_seq.shape x_seq = x_seq.flatten(2).permute(1, 2, 0) # [T, N, *] -> [N, *, T] lava_neuron.voltage_state = v spike = lava_neuron(x_seq).permute(2, 0, 1) v = lava_neuron.voltage_state.reshape(x_seq_shape[1:]) spike = spike.reshape(x_seq_shape) if unsqueeze_flag: v = v.squeeze(1) spike = spike.squeeze(1) return spike, v
def train(self, inputImages, targetImages, num_updaters=100, lr=3e-4): genOpt = torch.optim.Adam(self.gen.paramers(), lr=lr) disOpt = torch.optim.Adam(self.dis.paramers(), lr=lr) for updater in range(num_updaters): disOpt.zero_grad() realDis = self.dis(inputImages) label = torch.full_like(realDis, 1.0, dtype=torch.float) diss_Loss_real = self.criterion(realDis, label) diss_Loss_real.backward() # fake noise = self.sampleX(inputImages.size(), -2.5, 0.5) fakeDiss = self.dis(noise) torch.fill_(label, 0.0) diss_Loss_fake = self.criterion(fakeDiss, label) diss_Loss_fake.backward() discriminatorLoss = diss_Loss_real.item() + diss_Loss_fake.item() disOpt.step() #Generator genOpt.zero_grad() torch.fill_(1.0) fakeDis = self.dis(noise) generatorLoss = self.criterion(fakeDis, label) generatorLoss.backward() genOpt.step() if updater % (num_updaters / 20) == 0: print('%d / %d Loss_D %.4f' % (updater, num_updaters, discriminatorLoss)) print('Loss_Generator %.4f' % (generatorLoss.ipem()))
def unpack_dqdp_tau(self, y, qlist_shape): ''' function to make output unpack parameter _____________ y : predict y.shape = [ nsamples * nparticle * nparticle, 2] return _____________ y2 : shape is [nsamples,nparticle,2] ''' nsamples, nparticle, DIM = qlist_shape y1 = torch.reshape(y, (nsamples, nparticle, nparticle, DIM)) # check - run "python3.7 -O" to switch off #if __debug__: # y2 = torch.clone(y) # for i in range(nparticle): y2[:,i,i,:] = 0 dy = torch.diagonal(y1, 0, 1, 2) # offset, nparticle, nparticle torch.fill_(dy, 0.0) #if __debug__: # err = (y-y2)**2 # assert (torch.sum(err)<1e-6),'error in diagonal computations' y2 = torch.sum(y1, dim=2) # sum over dim =2 that is all neighbors # y.shape = [nsamples,nparticle,2] return y2
def unpack_dqdp_tau(self,y, qlist_shape): nsamples, nparticle, DIM = qlist_shape y1 = torch.reshape(y, (nsamples, nparticle, nparticle, DIM)) dy = torch.diagonal(y1,0,1,2) # offset, nparticle, nparticle torch.fill_(dy,0.0) y2 = torch.sum(y1, dim=2) # y2.shape = [nsamples,nparticle,2] return y2
def compute_delj(dx, MInt, VInt, delj, use_delj_trick): if not use_delj_trick: torch.fill_(delj, 0.5) return wj = 2 * MInt * dx epsj = torch.exp(wj / VInt) if not torch.equal(epsj, torch.full(epsj.size(), 1.0)) and not torch.equal( wj, torch.zeros(wj.size())): delj = (-epsj * wj + epsj * VInt - VInt) / (wj - epsj * wj)
def zero_ufields(self,s13s7): nsamples, npar, npar_ngrids, DIM = s13s7.shape make_zero_ufields = s13s7.view(nsamples, npar, npar, npar_ngrids // npar, DIM) # make_zero_phi.shape is [nsamples, nparticle, nparticle, ngrids, DIM] dy = torch.diagonal(make_zero_ufields, 0, 1, 2) # offset, nparticle, nparticle torch.fill_(dy, 0.0) s13s7_reshape = make_zero_ufields.view(nsamples, npar, npar_ngrids, DIM) # s13s7_reshape.shape is [nsamples, nparticle, nparticle*ngrids, DIM] return s13s7_reshape
def generate_jit(self, x: torch.Tensor, alpha: float = 1.0, beta: float = 1.0) -> Dict[str, torch.Tensor]: with torch.no_grad(): dur_hat = self.dur_pred(x, alpha=alpha) dur_hat = dur_hat.squeeze(2) if torch.sum(dur_hat.long()) <= 0: torch.fill_(dur_hat, value=2.) pitch_hat = self.pitch_pred(x).transpose(1, 2) * beta energy_hat = self.energy_pred(x).transpose(1, 2) return self._generate_mel(x=x, dur_hat=dur_hat, pitch_hat=pitch_hat, energy_hat=energy_hat)
def forward(self, doc, rating, meta): output = {} # extra padding batch_size, num_sents, num_words = doc['tokens'].shape largest_filter = max([i.kernel_size[0] for i in self._sent_cnn._convolution_layers]) if num_sents < largest_filter: z = torch.fill_(torch.zeros(batch_size, largest_filter - num_sents, num_words, dtype=torch.long), self.vocab.get_token_index('@@PADDING@@')) doc['tokens'] = torch.cat((doc['tokens'], z), dim=1) mask = get_text_field_mask(doc, num_wrapping_dims=1) doc = self._embed(doc['tokens']) batch_size, num_sents, num_words = mask.size() word_reps = doc.view(batch_size * num_sents, num_words, -1) word_mask = mask.view(batch_size * num_sents, num_words) sent_mask = mask[:, :, 0] sent_reps = self._word_cnn(word_reps, word_mask).view(batch_size, num_sents, -1) #print(sent_reps.shape) #print(sent_mask.shape) passage_reps = self._sent_cnn(sent_reps, sent_mask) clf = self._doc_project(passage_reps) output['prediction'] = [self._vocab.get_token_from_index(i.item(), 'labels') for i in clf.argmax(dim=-1)] output['loss'] = self._crit(clf, rating) self._acc(clf, rating) return output
def zero_phi_fields(self, s12s6): nsamples, npar, npar_ngrids = s12s6.shape make_zero_phi = torch.reshape( s12s6, (nsamples, npar, npar, npar_ngrids // npar)) # make_zero_phi.shape is [nsamples, nparticle, nparticle, ngrids] dy = torch.diagonal(make_zero_phi, 0, 1, 2) # offset, nparticle, nparticle torch.fill_(dy, 0.0) s12s6_reshape = torch.reshape(make_zero_phi, (nsamples, npar, npar_ngrids)) return s12s6_reshape
def pad_labels(labels, pad_token, max_len): input_len = len(labels) if input_len < max_len: pad_len = max_len - input_len pad_seq = torch.fill_(torch.zeros(pad_len), pad_token).long() labels = torch.cat((labels, pad_seq)) return labels
def zero_derivate_phi_fields(self, s12s6): nsamples, npar, npar_ngrids, DIM = s12s6.shape make_zero_dphi = torch.reshape( s12s6, (nsamples, npar, npar, npar_ngrids // npar, DIM)) # make_zero_phi.shape is [nsamples, nparticle, nparticle, ngrids, DIM] dy = torch.diagonal(make_zero_dphi, 0, 1, 2) # offset, nparticle, nparticle torch.fill_(dy, 0.0) s12s6_reshape = torch.reshape(make_zero_dphi, (nsamples, npar, npar_ngrids, DIM)) # s12s6_reshape.shape is [nsamples, nparticle, nparticle*ngrids, DIM] return s12s6_reshape
def pad_sequence_phonemes(phonemes, phn_lengths, PAD_TOKEN=6): lengths = phn_lengths max_length = max(lengths) padded_phonemes_batch = [] for phoneme in phonemes: pad_mat = torch.fill_(torch.zeros((max_length - phoneme.shape[0])), PAD_TOKEN).long() padded_phonemes = torch.cat((phoneme, pad_mat), 0) padded_phonemes_batch.append(padded_phonemes) return padded_phonemes_batch
def generate( self, x: torch.Tensor, alpha=1.0, pitch_function: Callable[[torch.Tensor], torch.Tensor] = lambda x: x, energy_function: Callable[[torch.Tensor], torch.Tensor] = lambda x: x ) -> Dict[str, torch.Tensor]: self.eval() with torch.no_grad(): dur_hat = self.dur_pred(x, alpha=alpha) dur_hat = dur_hat.squeeze(2) if torch.sum(dur_hat.long()) <= 0: torch.fill_(dur_hat, value=2.) pitch_hat = self.pitch_pred(x).transpose(1, 2) pitch_hat = pitch_function(pitch_hat) energy_hat = self.energy_pred(x).transpose(1, 2) energy_hat = energy_function(energy_hat) return self._generate_mel(x=x, dur_hat=dur_hat, pitch_hat=pitch_hat, energy_hat=energy_hat)
def __init__(self, input_size, batch_size, output_size, n_training, loss_function=F.nll_loss): super().__init__() self.l1 = BLinear(input_size, 10) self.l2 = BLinear(10, 10) self.l3 = BLinear(10, output_size) self.batch_size = batch_size self.input_size = input_size self.output_size = output_size self.n_training = n_training self.loss_function = loss_function self.noise = torch.fill_(torch.zeros(batch_size), 1e-3)
def train(epochs=2,device=device,test=False,pretrained=False): E=nn.BCELoss() model_G_path='./models/modelG.pth' model_D_path='./models/modelD.pth' if pretrained is True: modelG=Generator() modelD=Discriminator() modelG.load_state_dict(torch.load(model_G_path)) modelD.load_state_dict(torch.load(model_D_path)) modelG=modelG.to(device) modelD=modelD.to(device) else: modelG=Generator().to(device) modelD=Discriminator().to(device) fix_data=torch.randn(64,100,1,1,device=device) optG=optim.Adam(modelG.parameters(),lr=0.0002,betas=(0.05,0.999)) optD=optim.Adam(modelD.parameters(),lr=0.0002,betas=(0.05,0.999)) for e in range(epochs): for i,(input,target) in enumerate(dataloader): ##1.训练D ##1.1真实数据 optD.zero_grad() input=input.to(device) label=input.new_ones((input.shape[0],))#标签全1 out=modelD(input)##真实预测概率 loss_real=E(out,label) loss_real.backward() D_predreal_mean=out.mean().item() ##1.2 假数据 torch.fill_(label,0)#标签全0 ##modelG的输入: noise=torch.randn(input.shape[0],100,1,1,device=device) ##使用modelG生成假图片 fake_img=modelG(noise) ##假数据预测概率 ##这里的fake_img要detach一下,因为fake_img是modelG的输出, ##如果不detach,那么下面的loss_fake反向传播的时候,会传播到modelG并计算modelG中参数的梯度, ##这是没有必要的. out=modelD(fake_img.detach()) loss_fake=E(out,label) loss_fake.backward() loss_D=loss_real+loss_fake D_predfake_mean1=out.mean().item() optD.step() ##2. 训练G optG.zero_grad() torch.fill_(label,1) ##这里fake_img不能再detach了,需要通过fake_img这个中间变量,计算关于model参数的梯度. ##一旦detach,反向传播到fake_img就停止了,fake_img作为modelG的输出,就不能继续下去了. out=modelD(fake_img) loss_G=E(out,label) loss_G.backward() D_predfake_mean2=out.mean().item() optG.step() if test is True: with torch.no_grad(): imgs=modelG(fix_data).detach().cpu() img_list.append(tv.utils.make_grid(imgs,padding=2,normalize=True)) return 'testOK' if i % 100 == 0: ##输出统计信息 print('e:[{}/{}] i:[{}/{}] loss_D:{:.4f} loss_G:{:.4f} pred_D:{:.4f} pred_G1:{:.4f} pred_G2:{:.4f}'.format( e,epochs,i,len(dataloader),loss_D.item(),loss_G.item(),D_predreal_mean,D_predfake_mean1,D_predfake_mean2 )) if i % 500 == 0: with torch.no_grad(): imgs=modelG(fix_data).detach().cpu() img_list.append(tv.utils.make_grid(imgs,padding=2,normalize=True))##这里的normalize把生成的图像 ##从[-1,1],标准化到[0,1]. ##全部训练完毕,所需时间太久 ##i达到150次以后,停止 #if i>1500: # break torch.save(modelG.state_dict(),model_G_path) torch.save(modelD.state_dict(),model_D_path)
def evaluate(epoch, eval_type='valid', final_eval=False): nli_net.eval() correct = 0. global val_acc_best, lr, stop_training, adam_stop if eval_type == 'valid': print('\nVALIDATION : Epoch {0}'.format(epoch)) s1 = valid['s1'] if eval_type == 'valid' else test['s1'] s2 = valid['s2'] if eval_type == 'valid' else test['s2'] target = valid['label'] if eval_type == 'valid' else test['label'] for stidx in range(0, len(s1), params.batch_size): #print(stidx, "/", len(s1)) # prepare batch #print(s1[i], "\n", s2[i], s1[i:i + params.batch_size]) #print(s1[stidx], "\n", s2[stidx]) #print(s1[stidx:stidx + params.batch_size],"\n\n\n", s2[stidx:stidx + params.batch_size], "\n\n\n", target[stidx:stidx + params.batch_size], "\n\n\n") s1_batch, s1_len = get_batch(s1[stidx:stidx + params.batch_size], word_vec, params.word_emb_dim) s2_batch, s2_len = get_batch(s2[stidx:stidx + params.batch_size], word_vec, params.word_emb_dim) s1_batch, s2_batch = Variable(s1_batch.cuda()), Variable( s2_batch.cuda()) tgt_batch = Variable( torch.LongTensor(target[stidx:stidx + params.batch_size])).cuda() s1_batch = s1_batch.transpose(0, 1) s2_batch = s2_batch.transpose(0, 1) position_a = torch.zeros(s1_batch.size(0), s1_batch.size(1)) # 16 21 mask_a = torch.zeros(s1_batch.size(0), s1_batch.size(1)) for i in range(s1_batch.size(0)): # 16 j = 1 torch.fill_(position_a[i], int(s1_len[i])) for k in range(s1_len[i]): #21 position_a[i][k] = j j += 1 mask_a[i][k] = 1 position_a = position_a.long() position_b = torch.zeros(s2_batch.size(0), s2_batch.size(1)) # 16 21 mask_b = torch.zeros(s2_batch.size(0), s2_batch.size(1)) for i in range(s2_batch.size(0)): # 16 j = 1 torch.fill_(position_b[i], int(s2_len[i])) for k in range(s2_len[i]): #21 position_b[i][k] = j j += 1 mask_b[i][k] = 1 position_b = position_b.long() position_a, position_b = Variable(position_a.cuda()), Variable( position_b.cuda()) # model forward output = nli_net((s1_batch, s1_len, position_a, mask_a.cuda()), (s2_batch, s2_len, position_b, mask_b.cuda())) pred = output.data.max(1)[1] correct += pred.long().eq(tgt_batch.data.long()).cpu().sum() # save model eval_acc = round(100 * int(correct.data) / len(s1), 2) if final_eval: print('finalgrep : accuracy {0} : {1}'.format(eval_type, eval_acc)) else: print('togrep : results : epoch {0} ; mean accuracy {1} :\ {2}'.format(epoch, eval_type, eval_acc)) with open(filename, "a") as f: f.write(str(epoch) + " " + str(eval_acc) + "\n") if eval_type == 'valid' and epoch <= params.n_epochs: if eval_acc > val_acc_best: print('saving model at epoch {0}'.format(epoch)) if not os.path.exists(params.outputdir): os.makedirs(params.outputdir) torch.save(nli_net.state_dict(), os.path.join(params.outputdir, params.outputmodelname)) val_acc_best = eval_acc else: if 'sgd' in params.optimizer: optimizer.param_groups[0][ 'lr'] = optimizer.param_groups[0]['lr'] / params.lrshrink print('Shrinking lr by : {0}. New lr = {1}'.format( params.lrshrink, optimizer.param_groups[0]['lr'])) if optimizer.param_groups[0]['lr'] < params.minlr: stop_training = True if 'adam' in params.optimizer: # early stopping (at 2nd decrease in accuracy) stop_training = adam_stop adam_stop = True return eval_acc
def trainepoch(epoch): print('\nTRAINING : Epoch ' + str(epoch)) nli_net.train() all_costs = [] logs = [] words_count = 0 last_time = time.time() correct = 0. # shuffle the data permutation = np.random.permutation(len(train['s1'])) #permutation = np.arange(len(train['s1'])) s1 = train['s1'][permutation] s2 = train['s2'][permutation] target = train['label'][permutation] optimizer.param_groups[0]['lr'] = optimizer.param_groups[0]['lr'] * params.decay if epoch>1\ and 'sgd' in params.optimizer else optimizer.param_groups[0]['lr'] print('Learning rate : {0}'.format(optimizer.param_groups[0]['lr'])) for stidx in range(0, len(s1), params.batch_size): # prepare batch s1_batch, s1_len = get_batch(s1[stidx:stidx + params.batch_size], word_vec, params.word_emb_dim) s2_batch, s2_len = get_batch(s2[stidx:stidx + params.batch_size], word_vec, params.word_emb_dim) s1_batch, s2_batch = Variable(s1_batch.cuda()), Variable( s2_batch.cuda()) #print(s1_batch.size(), "\n\n", s2_batch.size(), s1_len) #asaas #print("\n\n") #print(s1_batch, s2_batch) tgt_batch = Variable( torch.LongTensor(target[stidx:stidx + params.batch_size])).cuda() k = s1_batch.size(1) # actual batch size s1_batch = s1_batch.transpose(0, 1) s2_batch = s2_batch.transpose(0, 1) position_a = torch.zeros(s1_batch.size(0), s1_batch.size(1)) # 16 21 mask_a = torch.zeros(s1_batch.size(0), s1_batch.size(1)) for i in range(s1_batch.size(0)): # 16 j = 1 torch.fill_(position_a[i], int(s1_len[i])) for k in range(s1_len[i]): #21 position_a[i][k] = j j += 1 mask_a[i][k] = 1 position_a = position_a.long() position_b = torch.zeros(s2_batch.size(0), s2_batch.size(1)) # 16 21 mask_b = torch.zeros(s2_batch.size(0), s2_batch.size(1)) for i in range(s2_batch.size(0)): # 16 j = 1 torch.fill_(position_b[i], int(s2_len[i])) for k in range(s2_len[i]): #21 position_b[i][k] = j j += 1 mask_b[i][k] = 1 position_b = position_b.long() position_a, position_b = Variable(position_a.cuda()), Variable( position_b.cuda()) # model forward output = nli_net((s1_batch, s1_len, position_a, mask_a.cuda()), (s2_batch, s2_len, position_b, mask_b.cuda())) #print(output, tgt_batch) #print(nli_net.classifier[1].bias) #print(output) #asasas pred = output.data.max(1)[1] correct += pred.long().eq(tgt_batch.data.long()).cpu().sum() assert len(pred) == len(s1[stidx:stidx + params.batch_size]) # loss loss = loss_fn(output, tgt_batch) all_costs.append(float(loss)) words_count += (s1_batch.nelement() + s2_batch.nelement()) / params.word_emb_dim # backward optimizer.zero_grad() loss.backward() #print(nli_net.encoder.enc_lstm.bias_ih_l0.grad) # gradient clipping (off by default) shrink_factor = 1 total_norm = 0 k = s1_batch.size(0) count = 0 for name, p in nli_net.named_parameters(): #print(name) #print(total_norm) if p.requires_grad: count += 1 # print(name) p.grad.data.div_(k) # divide by the actual batch size total_norm += p.grad.data.norm()**2 # print("\n\n\n\n") #print(nli_net.classifier[1].bias, nli_net.encoder.enc_lstm.weight_hh_l0, k) #print(total_norm) #asasas total_norm = np.sqrt(total_norm) if total_norm > params.max_norm: print("shrinking applied...................") shrink_factor = params.max_norm / total_norm current_lr = optimizer.param_groups[0][ 'lr'] # current lr (no external "lr", for adam) optimizer.param_groups[0][ 'lr'] = current_lr * shrink_factor # just for update # optimizer step optimizer.step() optimizer.param_groups[0]['lr'] = current_lr if len(all_costs) == 100: logs.append( '{0} ; loss {1} ; sentence/s {2} ; words/s {3} ; accuracy train : {4}' .format( stidx, round(np.mean(all_costs), 2), int( len(all_costs) * params.batch_size / (time.time() - last_time)), int(words_count * 1.0 / (time.time() - last_time)), round(100. * int(correct.data) / (stidx + k), 2))) print(logs[-1]) last_time = time.time() words_count = 0 all_costs = [] train_acc = round(100 * int(correct.data) / len(s1), 2) print('results : epoch {0} ; mean accuracy train : {1}'.format( epoch, train_acc)) return train_acc
def get_gt_single(self, im, gt_instances, featmap_sizes=None): gt_bboxes_raw = gt_instances[im].gt_boxes.tensor gt_labels_raw = gt_instances[im].gt_classes gt_masks_raw = gt_instances[im].gt_masks.tensor device = gt_labels_raw[0].device gt_areas = torch.sqrt((gt_bboxes_raw[:, 2] - gt_bboxes_raw[:, 0]) * (gt_bboxes_raw[:, 3] - gt_bboxes_raw[:, 1])) ins_label_list = [] cate_label_list = [] ins_ind_label_list = [] for (lower_bound, upper_bound), stride, featmap_size, num_grid \ in zip(self.scale_ranges, self.strides, featmap_sizes, self.seg_num_grids): ins_label = torch.zeros([num_grid ** 2, featmap_size[0], featmap_size[1]], dtype=torch.uint8, device=device) # NOTE: gt_labels_raw between 0~79. cate_label = torch.zeros([num_grid, num_grid], dtype=torch.int64, device=device) cate_label = torch.fill_(cate_label, -1.) ins_ind_label = torch.zeros([num_grid ** 2], dtype=torch.bool, device=device) hit_indices = ((gt_areas >= lower_bound) & (gt_areas <= upper_bound)).nonzero().flatten() if len(hit_indices) == 0: ins_label_list.append(ins_label) cate_label_list.append(cate_label) ins_ind_label_list.append(ins_ind_label) continue gt_bboxes = gt_bboxes_raw[hit_indices] gt_labels = gt_labels_raw[hit_indices] gt_masks = gt_masks_raw[hit_indices.cpu().numpy(), ...].cpu().numpy().astype(np.uint8) half_ws = 0.5 * (gt_bboxes[:, 2] - gt_bboxes[:, 0]) * self.sigma half_hs = 0.5 * (gt_bboxes[:, 3] - gt_bboxes[:, 1]) * self.sigma output_stride = stride / 2 for seg_mask, gt_label, half_h, half_w in zip(gt_masks, gt_labels, half_hs, half_ws): if seg_mask.sum() < 10: continue # mass center upsampled_size = (featmap_sizes[0][0] * 4, featmap_sizes[0][1] * 4) center_h, center_w = ndimage.measurements.center_of_mass(seg_mask) coord_w = int((center_w / upsampled_size[1]) // (1. / num_grid)) coord_h = int((center_h / upsampled_size[0]) // (1. / num_grid)) # left, top, right, down top_box = max(0, int(((center_h - half_h) / upsampled_size[0]) // (1. / num_grid))) down_box = min(num_grid - 1, int(((center_h + half_h) / upsampled_size[0]) // (1. / num_grid))) left_box = max(0, int(((center_w - half_w) / upsampled_size[1]) // (1. / num_grid))) right_box = min(num_grid - 1, int(((center_w + half_w) / upsampled_size[1]) // (1. / num_grid))) top = max(top_box, coord_h - 1) down = min(down_box, coord_h + 1) left = max(coord_w - 1, left_box) right = min(right_box, coord_w + 1) cate_label[top:(down + 1), left:(right + 1)] = gt_label # ins seg_mask = imrescale(seg_mask, scale=1. / output_stride) seg_mask = torch.Tensor(seg_mask) for i in range(top, down + 1): for j in range(left, right + 1): label = int(i * num_grid + j) ins_label[label, :seg_mask.shape[0], :seg_mask.shape[1]] = seg_mask ins_ind_label[label] = True ins_label_list.append(ins_label) cate_label_list.append(cate_label) ins_ind_label_list.append(ins_ind_label) return ins_label_list, cate_label_list, ins_ind_label_list
def get_ground_truth_single(self, img_idx, gt_instances, mask_feat_size): gt_bboxes_raw = gt_instances[img_idx].gt_boxes.tensor gt_labels_raw = gt_instances[img_idx].gt_classes gt_masks_raw = gt_instances[img_idx].gt_masks.tensor device = gt_labels_raw[0].device # ins gt_areas = torch.sqrt((gt_bboxes_raw[:, 2] - gt_bboxes_raw[:, 0]) * (gt_bboxes_raw[:, 3] - gt_bboxes_raw[:, 1])) ins_label_list = [] cate_label_list = [] ins_ind_label_list = [] grid_order_list = [] for (lower_bound, upper_bound), stride, num_grid \ in zip(self.scale_ranges, self.strides, self.num_grids): hit_indices = ((gt_areas >= lower_bound) & (gt_areas <= upper_bound)).nonzero().flatten() num_ins = len(hit_indices) ins_label = [] grid_order = [] cate_label = torch.zeros([num_grid, num_grid], dtype=torch.int64, device=device) cate_label = torch.fill_(cate_label, self.num_classes) ins_ind_label = torch.zeros([num_grid**2], dtype=torch.bool, device=device) if num_ins == 0: ins_label = torch.zeros( [0, mask_feat_size[0], mask_feat_size[1]], dtype=torch.uint8, device=device) ins_label_list.append(ins_label) cate_label_list.append(cate_label) ins_ind_label_list.append(ins_ind_label) grid_order_list.append([]) continue gt_bboxes = gt_bboxes_raw[hit_indices] gt_labels = gt_labels_raw[hit_indices] gt_masks = gt_masks_raw[hit_indices, ...] half_ws = 0.5 * (gt_bboxes[:, 2] - gt_bboxes[:, 0]) * self.sigma half_hs = 0.5 * (gt_bboxes[:, 3] - gt_bboxes[:, 1]) * self.sigma # mass center center_ws, center_hs = center_of_mass(gt_masks) valid_mask_flags = gt_masks.sum(dim=-1).sum(dim=-1) > 0 output_stride = 4 gt_masks = gt_masks.permute(1, 2, 0).to(dtype=torch.uint8).cpu().numpy() gt_masks = imrescale(gt_masks, scale=1. / output_stride) if len(gt_masks.shape) == 2: gt_masks = gt_masks[..., None] gt_masks = torch.from_numpy(gt_masks).to(dtype=torch.uint8, device=device).permute( 2, 0, 1) for seg_mask, gt_label, half_h, half_w, center_h, center_w, valid_mask_flag in zip( gt_masks, gt_labels, half_hs, half_ws, center_hs, center_ws, valid_mask_flags): if not valid_mask_flag: continue upsampled_size = (mask_feat_size[0] * 4, mask_feat_size[1] * 4) coord_w = int( (center_w / upsampled_size[1]) // (1. / num_grid)) coord_h = int( (center_h / upsampled_size[0]) // (1. / num_grid)) # left, top, right, down top_box = max( 0, int(((center_h - half_h) / upsampled_size[0]) // (1. / num_grid))) down_box = min( num_grid - 1, int(((center_h + half_h) / upsampled_size[0]) // (1. / num_grid))) left_box = max( 0, int(((center_w - half_w) / upsampled_size[1]) // (1. / num_grid))) right_box = min( num_grid - 1, int(((center_w + half_w) / upsampled_size[1]) // (1. / num_grid))) top = max(top_box, coord_h - 1) down = min(down_box, coord_h + 1) left = max(coord_w - 1, left_box) right = min(right_box, coord_w + 1) cate_label[top:(down + 1), left:(right + 1)] = gt_label for i in range(top, down + 1): for j in range(left, right + 1): label = int(i * num_grid + j) cur_ins_label = torch.zeros( [mask_feat_size[0], mask_feat_size[1]], dtype=torch.uint8, device=device) cur_ins_label[:seg_mask.shape[0], :seg_mask. shape[1]] = seg_mask ins_label.append(cur_ins_label) ins_ind_label[label] = True grid_order.append(label) if len(ins_label) == 0: ins_label = torch.zeros( [0, mask_feat_size[0], mask_feat_size[1]], dtype=torch.uint8, device=device) else: ins_label = torch.stack(ins_label, 0) ins_label_list.append(ins_label) cate_label_list.append(cate_label) ins_ind_label_list.append(ins_ind_label) grid_order_list.append(grid_order) return ins_label_list, cate_label_list, ins_ind_label_list, grid_order_list
def trainepoch(epoch, RL_train=True, LSTM_train=True): print('\nTRAINING : Epoch ' + str(epoch)) actorModel.train(False) criticModel.train(False) if RL_train: actorModel.train() if LSTM_train: criticModel.train() all_costs = [] logs = [] words_count = 0 last_time = time.time() correct = 0. # shuffle the data permutation = np.random.permutation(len(train['s1'])) s1 = train['s1'][permutation] s2 = train['s2'][permutation] target = train['label'][permutation] critic_active_optimizer.param_groups[0]['lr'] = critic_active_optimizer.param_groups[0]['lr'] * params.decay if epoch>1\ and 'sgd' in params.optimizer else critic_active_optimizer.param_groups[0]['lr'] print('Learning rate : {0}'.format( critic_active_optimizer.param_groups[0]['lr'])) critic_target_optimizer.param_groups[0]['lr'] = critic_target_optimizer.param_groups[0]['lr'] * params.decay if epoch>1\ and 'sgd' in params.optimizer else critic_target_optimizer.param_groups[0]['lr'] print('Learning rate : {0}'.format( critic_target_optimizer.param_groups[0]['lr'])) #print(criticModel.target_pred.enc_lstm.bias_ih_l0) #print(criticModel.active_pred.enc_lstm.bias_ih_l0) criticModel.assign_active_network() actorModel.assign_active_network() #print(criticModel.target_pred.enc_lstm.bias_ih_l0) #print(criticModel.active_pred.enc_lstm.bias_ih_l0) for stidx in range(0, len(s1), params.batch_size): # prepare batch totloss = 0. s1_batch, s1_len = get_batch(s1[stidx:stidx + params.batch_size], word_vec, params.word_emb_dim) s2_batch, s2_len = get_batch(s2[stidx:stidx + params.batch_size], word_vec, params.word_emb_dim) print(s1_batch.size(), s1_len) asasas s1_batch, s2_batch = Variable(s1_batch.cuda()), Variable( s2_batch.cuda()) tgt_batch = Variable( torch.LongTensor(target[stidx:stidx + params.batch_size])).cuda() predict = torch.zeros(params.batch_size, params.n_classes).cuda() k = s1_batch.size(1) # actual batch size s1_batch = s1_batch.transpose(0, 1) s2_batch = s2_batch.transpose(0, 1) position_a = torch.zeros(s1_batch.size(0), s1_batch.size(1)) # 16 21 mask_a = torch.zeros(s1_batch.size(0), s1_batch.size(1)) for i in range(s1_batch.size(0)): # 16 j = 1 torch.fill_(position_a[i], int(s1_len[i])) for k in range(s1_len[i]): #21 position_a[i][k] = j j += 1 mask_a[i][k] = 1 position_a = position_a.long() position_b = torch.zeros(s2_batch.size(0), s2_batch.size(1)) # 16 21 mask_b = torch.zeros(s2_batch.size(0), s2_batch.size(1)) for i in range(s2_batch.size(0)): # 16 j = 1 torch.fill_(position_b[i], int(s2_len[i])) for k in range(s2_len[i]): #21 position_b[i][k] = j j += 1 mask_b[i][k] = 1 position_b = position_b.long() position_a, position_b = Variable(position_a.cuda()), Variable( position_b.cuda()) # model forward #output = criticModel((s1_batch, s1_len, position_a, mask_a.cuda()), (s2_batch, s2_len, position_b, mask_b.cuda()), "target") avgloss = 0 aveloss = 0. total_norm = 0 for kk in range(params.batch_size): left = s1_batch[kk].view(1, -1, 300) right = s2_batch[kk].view(1, -1, 300) left_len = s1_len[kk] right_len = s2_len[kk] left_position = position_a[kk].view(1, -1) right_position = position_b[kk].view(1, -1) left_mask = mask_a[kk].view(1, -1) right_mask = mask_b[kk].view(1, -1) tgt = tgt_batch[kk].view(-1) if RL_train: leftSummary = criticModel.summary(left)[-1] rightSummary = criticModel.summary(right)[-1] actionlist_left, actionlist_right, statelist_left, statelist_right, losslist = [], [], [], [], [] aveLoss = 0. for i in range(samplecnt): actions_left, states_left, Rinput_left, Rlength_left = Sampling_RL( left, rightSummary, epsilon, Random=True) actions_right, states_right, Rinput_right, Rlength_right = Sampling_RL( right, leftSummary, epsilon, Random=True) actionlist_left.append(actions_left) statelist_left.append(states_left) actionlist_right.append(actions_right) statelist_right.append(states_right) L = (Rinput_left, int(Rinput_left.size(1)), None, None) R = (Rinput_right, int(Rinput_right.size(1)), None, None) out = criticModel(L, R, scope="target") loss_ = loss_fn(out, tgt) loss_ += (float(Rlength_left) / int(left.size(1)))**2 * 0.15 loss_ += (float(Rlength_right) / int(right.size(1)))**2 * 0.15 aveloss += loss_ losslist.append(loss_) aveloss /= samplecnt totloss += aveloss grad1 = None grad2 = None grad3 = None grad4 = None flag = 0 for i in range(samplecnt): #5 for pos in range(len(actionlist_left[i])): #19 --> 13 rr = [0, 0] rr[actionlist_left[i][pos]] = ( (losslist[i] - aveloss) * alpha).cpu().item() g = actorModel.get_gradient(statelist_left[i][pos][0], statelist_left[i][pos][1], statelist_left[i][pos][2], rr, scope="target") if flag == 0: grad1 = g[0] grad2 = g[1] grad3 = g[2] grad4 = g[3] flag = 1 else: grad1 += g[0] grad2 += g[1] grad3 += g[2] grad3 += g[3] for pos in range(len(actionlist_right[i])): # 25 --> 5 rr = [0, 0] rr[actionlist_right[i][pos]] = ( (losslist[i] - aveloss) * alpha).cpu().item() g = actorModel.get_gradient(statelist_right[i][pos][0], statelist_right[i][pos][1], statelist_right[i][pos][2], rr, scope="target") grad1 += g[0] grad2 += g[1] grad3 += g[2] grad3 += g[3] actor_target_optimizer.zero_grad() actor_active_optimizer.zero_grad() actorModel.assign_active_network_gradients( grad1, grad2, grad3, grad4) actor_active_optimizer.step() else: critic_active_optimizer.zero_grad() critic_target_optimizer.zero_grad() output = criticModel( (left, left_len, left_position, left_mask.cuda()), (right, right_len, right_position, right_mask.cuda()), "target") predict[kk] = output loss = loss_fn(output, tgt) avgloss += loss.item() loss.backward() criticModel.assign_active_network_gradients() critic_active_optimizer.step() if RL_train: actorModel.update_target_network() else: for name, p in criticModel.active_pred.named_parameters(): if p.requires_grad: p.grad.data.div_( params.batch_size) # divide by the actual batch size total_norm += p.grad.data.norm().item()**2 for name, p in criticModel.active_classifier.named_parameters(): if p.requires_grad: p.grad.data.div_( params.batch_size) # divide by the actual batch size total_norm += p.grad.data.norm().item()**2 total_norm = np.sqrt(total_norm) shrink_factor = 1 if total_norm > params.max_norm: print("shrinking.............................") shrink_factor = params.max_norm / total_norm current_lr = critic_active_optimizer.param_groups[0][ 'lr'] # current lr (no external "lr", for adam) critic_active_optimizer.param_groups[0]['lr'] = current_lr * ( shrink_factor) # just for update pred = predict.data.max(1)[1] correct += pred.long().eq(tgt_batch.data.long()).cpu().sum() assert len(pred) == len(s1[stidx:stidx + params.batch_size]) all_costs.append(float(avgloss / params.batch_size)) words_count += (s1_batch.nelement() + s2_batch.nelement()) / params.word_emb_dim # optimizer step criticModel.assign_target_network() if len(all_costs) == 100: logs.append( '{0} ; loss {1} ; sentence/s {2} ; words/s {3} ; accuracy train : {4}' .format( stidx, round(np.mean(all_costs), 2), int( len(all_costs) * params.batch_size / (time.time() - last_time)), int(words_count * 1.0 / (time.time() - last_time)), round(100. * int(correct.data) / (stidx + k), 2))) print(logs[-1]) last_time = time.time() words_count = 0 all_costs = [] if LSTM_train: train_acc = round(100 * int(correct.data) / len(s1), 2) print('results : epoch {0} ; mean accuracy train : {1}'.format( epoch, train_acc)) return train_acc else: return None
# python -m spacy download en nlp = spacy.load('en') def describe(x): print("Type: {}".format(x.type())) print("Shape: {}".format(x.shape)) print("Values: \n{}".format(x)) #%% describe(torch.Tensor(2, 3)) describe(torch.rand(2,3)) describe(torch.randn(2,3)) x = torch.ones(2,3) x = torch.fill_(x, 5) #pytorch x_ means inplace operation x = torch.Tensor([[1, 2, 3], [4, 5, 6]]) # torch defaults to floats not doubles x = torch.FloatTensor([[1, 2, 3], [4, 5, 6]]) x = x.long() # you can use add normal operators or torch.add() etc x = torch.arange(6)
def read_snap_dataset_as_list(dir, name): list_dir = os.listdir(dir) if len(list_dir) == 1 and osp.isdir(osp.join(dir, list_dir[0])): dir = osp.join(dir, list_dir[0]) if 'ego-' in name: files = [ file for file in os.listdir(dir) if osp.isfile(osp.join(dir, file)) ] files.sort() data_list = [] for i in range(5, len(files), 5): circles_file = files[i] edges_file = files[i + 1] egofeat_file = files[i + 2] feat_file = files[i + 3] # featnames_file = files[i+4] x = torch.from_numpy(np.loadtxt(osp.join(dir, feat_file))).long() indices = x[:, 0] indices_assoc = to_assoc(indices) x = x[:, 1:] circles = [] f = open(osp.join(dir, circles_file), "r") c_line = f.readline() while not c_line == '': circles.append([ from_assoc(indices_assoc, int(i)) for i in c_line.split()[1:] ]) c_line = f.readline() f.close() edge_index = np.loadtxt(osp.join(dir, edges_file)) edge_index = torch.from_numpy(edge_index).transpose(0, 1).long() # TODO find more efficient way to do this for i in range(edge_index.shape[0]): for j in range(edge_index.shape[1]): edge_index[i][j] = from_assoc(indices_assoc, edge_index[i][j].item()) x_ego = np.loadtxt(osp.join(dir, egofeat_file)) x_ego = torch.from_numpy(x_ego).long() x = torch.cat((x, x_ego.unsqueeze(0))) # ego Node is connected so every other node edge_index_ego = torch.fill_(torch.zeros((2, x.shape[0] - 1)), x.shape[0] - 1) edge_index_ego[0] = torch.arange(x.shape[0] - 1) # nodes undirected in ego-Facebook if name == 'ego-Facebook': edge_index_ego2 = torch.fill_(torch.zeros((2, x.shape[0] - 1)), x.shape[0] - 1) edge_index_ego2[1] = torch.arange(x.shape[0] - 1) edge_index = torch.cat((edge_index, edge_index_ego.long(), edge_index_ego2.long()), dim=1) edge_index = torch.cat((edge_index, edge_index_ego.long()), dim=1) data = Data(x=x, edge_index=edge_index, circles=circles) data_list.append(data) return data_list elif 'soc-' in name: if name == 'soc-Pokec': # TODO? read out features from 'soc-pokec-profiles.txt' edge_index = np.loadtxt( osp.join(dir, 'soc-pokec-relationships.txt')) edge_index = torch.from_numpy(edge_index).transpose(0, 1).long() data = Data(edge_index=edge_index) return [data] else: list_dir = os.listdir(dir) if len(list_dir) == 1 and osp.isfile(osp.join(dir, list_dir[0])): edge_index = np.loadtxt(osp.join(dir, list_dir[0])) ids = np.unique(edge_index) for i, j in zip(ids, range(len(ids))): edge_index[edge_index == i] = j assert np.sum( np.not_equal(np.unique(edge_index), np.arange(len(ids)))) == 0 edge_index = torch.from_numpy(edge_index).transpose(0, 1)\ .long() data = Data(edge_index=edge_index) return [data] elif 'wiki-' in name: if name == 'wiki-Vote': list_dir = os.listdir(dir) if len(list_dir) == 1 and osp.isfile(osp.join(dir, list_dir[0])): edge_index = np.loadtxt(osp.join(dir, list_dir[0])) ids = np.unique(edge_index) for i, j in zip(ids, range(len(ids))): edge_index[edge_index == i] = j assert np.sum( np.not_equal(np.unique(edge_index), np.arange(len(ids)))) == 0 edge_index = torch.from_numpy(edge_index).transpose(0, 1)\ .long() data = Data(edge_index=edge_index) return [data] elif name == 'wiki-RfA': list_dir = os.listdir(dir) if len(list_dir) == 1 and osp.isfile(osp.join(dir, list_dir[0])): i = 0 with open(osp.join(dir, list_dir[0])) as f: line = f.readline() while not line == '': print(i, line) if i == 10: raise i += 1 line = f.readline()
def get_ground_truth_single(self, img_idx, gt_instances, mask_feat_size): gt_bboxes_raw = gt_instances[img_idx].gt_boxes.tensor gt_labels_raw = gt_instances[img_idx].gt_classes gt_masks_raw = gt_instances[img_idx].gt_masks.tensor device = gt_labels_raw[0].device # ins gt_areas = torch.sqrt((gt_bboxes_raw[:, 2] - gt_bboxes_raw[:, 0]) * (gt_bboxes_raw[:, 3] - gt_bboxes_raw[:, 1])) ins_label_list = [] cate_label_list = [] ins_ind_label_list = [] grid_order_list = [] # appended by rufeng zhang. num_gt_ins = len(gt_areas) gt_ins_ids = gt_labels_raw.new_tensor( [_ for _ in range(1, num_gt_ins + 1)]) ins_id_list = [] for (lower_bound, upper_bound), stride, num_grid \ in zip(self.scale_ranges, self.strides, self.num_grids): hit_indices = ((gt_areas >= lower_bound) & (gt_areas <= upper_bound)).nonzero().flatten() num_ins = len(hit_indices) ins_label = [] grid_order = [] cate_label = torch.zeros([num_grid, num_grid], dtype=torch.int64, device=device) cate_label = torch.fill_(cate_label, self.num_classes) ins_ind_label = torch.zeros([num_grid**2], dtype=torch.bool, device=device) # append. ins_id_label = torch.zeros([num_grid, num_grid], dtype=torch.int64, device=device) if num_ins == 0: ins_label = torch.zeros( [0, mask_feat_size[0], mask_feat_size[1]], dtype=torch.uint8, device=device) ins_label_list.append(ins_label) cate_label_list.append(cate_label) ins_ind_label_list.append(ins_ind_label) grid_order_list.append([]) ins_id_list.append(ins_id_label) continue gt_bboxes = gt_bboxes_raw[hit_indices] gt_labels = gt_labels_raw[hit_indices] gt_masks = gt_masks_raw[hit_indices.cpu().numpy(), ...].cpu().numpy().astype('uint8') gt_ids = gt_ins_ids[hit_indices] half_ws = 0.5 * (gt_bboxes[:, 2] - gt_bboxes[:, 0]) * self.sigma half_hs = 0.5 * (gt_bboxes[:, 3] - gt_bboxes[:, 1]) * self.sigma output_stride = 4 label_code = [] for seg_mask, gt_label, half_h, half_w, gt_id in zip( gt_masks, gt_labels, half_hs, half_ws, gt_ids): if seg_mask.sum() == 0: continue # mass center upsampled_size = (mask_feat_size[0] * 4, mask_feat_size[1] * 4) center_h, center_w = ndimage.measurements.center_of_mass( seg_mask) coord_w = int( (center_w / upsampled_size[1]) // (1. / num_grid)) coord_h = int( (center_h / upsampled_size[0]) // (1. / num_grid)) # left, top, right, down top_box = max( 0, int(((center_h - half_h) / upsampled_size[0]) // (1. / num_grid))) down_box = min( num_grid - 1, int(((center_h + half_h) / upsampled_size[0]) // (1. / num_grid))) left_box = max( 0, int(((center_w - half_w) / upsampled_size[1]) // (1. / num_grid))) right_box = min( num_grid - 1, int(((center_w + half_w) / upsampled_size[1]) // (1. / num_grid))) top = max(top_box, coord_h - 1) down = min(down_box, coord_h + 1) left = max(coord_w - 1, left_box) right = min(right_box, coord_w + 1) cate_label[top:(down + 1), left:(right + 1)] = gt_label seg_mask = imrescale(seg_mask, scale=1. / output_stride) seg_mask = torch.Tensor(seg_mask) # append. ins_id_label[top:(down + 1), left:(right + 1)] = gt_id for i in range(top, down + 1): for j in range(left, right + 1): label = int(i * num_grid + j) # 可能是对应同一个mask. # if label in label_code: # continue #label_code.append(label) cur_ins_label = torch.zeros( [mask_feat_size[0], mask_feat_size[1]], dtype=torch.uint8, device=device) cur_ins_label[:seg_mask.shape[0], :seg_mask. shape[1]] = seg_mask ins_label.append(cur_ins_label) ins_ind_label[label] = True grid_order.append(label) num_1 = (cate_label != 80).sum() num_2 = ins_ind_label.sum() ins_label = torch.stack(ins_label, 0).to(device=device) num_3 = ins_label.shape[0] # assert num_2.item() == num_3 a = 1 if num_3 > num_2.item(): print('{}/{}'.format(num_2, num_3)) ins_label_list.append(ins_label) cate_label_list.append(cate_label) ins_ind_label_list.append(ins_ind_label) grid_order_list.append(grid_order) ins_id_list.append(ins_id_label) return ins_label_list, cate_label_list, ins_ind_label_list, grid_order_list, ins_id_list