def train(train_loader, model, criterion, optimizer, epoch, loss_weight, use_cuda): print "Training... Epoch = %d" % epoch ip1_loader = [] idx_loader = [] for i,(data, target) in enumerate(train_loader): if use_cuda: data = data.cuda() target = target.cuda() data, target = Variable(data), Variable(target) ip1, pred = model(data) loss = criterion[0](pred, target) + loss_weight * criterion[1](target, ip1) optimizer[0].zero_grad() optimizer[1].zero_grad() loss.backward() optimizer[0].step() optimizer[1].step() ip1_loader.append(ip1) idx_loader.append((target)) feat = torch.cat(ip1_loader, 0) labels = torch.cat(idx_loader, 0) visualize(feat.data.cpu().numpy(),labels.data.cpu().numpy(),epoch)
def forward(self, x1, x1_mask, x2, x2_mask): # Encode all layers for i in range(self.num_layers): # Forward #print('doc_rnn_input:',doc_rnn_input.size()) #print(i,' x1',x1.size()) x1 = self.doc_rnns[i](x1,x1_mask) #print(i,' x1',x1.size()) #print(i,' x2',x2.size()) x2 = self.question_rnns[i](x2,x2_mask) #q_merge_weights = self.question_self_attns[i](x2, x2_mask) #question_hidden = layers.weighted_avg(x2, q_merge_weights) matched_x2_hiddens = self.doc_attns[i](x1,x2,x2_mask) matched_x2_hiddens = self.doc_convs[i](matched_x2_hiddens) matched_x1_hiddens = self.doc_attns[i](x2,x1,x1_mask) matched_x1_hiddens = self.doc_convs[i](matched_x1_hiddens) #print(i,' hidden:',matched_x2_hiddens.size()) #print(i,' x1:',x1.size()) x1 = torch.cat([x1,matched_x2_hiddens],dim=2) x2 = torch.cat([x2, matched_x1_hiddens], dim=2) return x1.contiguous(), x2.contiguous()
def forward(self, x, coord): #x = (x-128.)/128. out = self.preBlock(x)#16 out_pool,indices0 = self.maxpool1(out) out1 = self.forw1(out_pool)#32 out1_pool,indices1 = self.maxpool2(out1) out2 = self.forw2(out1_pool)#64 #out2 = self.drop(out2) out2_pool,indices2 = self.maxpool3(out2) out3 = self.forw3(out2_pool)#96 out3_pool,indices3 = self.maxpool4(out3) out4 = self.forw4(out3_pool)#96 #out4 = self.drop(out4) rev3 = self.path1(out4) comb3 = self.back3(torch.cat((rev3, out3), 1))#96+96 #comb3 = self.drop(comb3) rev2 = self.path2(comb3) feat = self.back2(torch.cat((rev2, out2,coord), 1))#64+64 comb2 = self.drop(feat) out = self.output(comb2) size = out.size() out = out.view(out.size(0), out.size(1), -1) #out = out.transpose(1, 4).transpose(1, 2).transpose(2, 3).contiguous() out = out.transpose(1, 2).contiguous().view(size[0], size[2], size[3], size[4], len(config['anchors']), 5) #out = out.view(-1, 5) return feat,out
def update(self): next_value = self.actor_critic(Variable(self.rollouts.states[-1], volatile=True))[0].data self.rollouts.compute_returns(next_value, self.use_gae, self.gamma, self.tau) # values, action_log_probs, dist_entropy = self.actor_critic.evaluate_actions( # Variable(self.rollouts.states[:-1].view(-1, *self.obs_shape)), # Variable(self.rollouts.actions.view(-1, self.action_shape))) values = torch.cat(self.rollouts.value_preds, 0).view(self.num_steps, self.num_processes, 1) action_log_probs = torch.cat(self.rollouts.action_log_probs).view(self.num_steps, self.num_processes, 1) dist_entropy = torch.cat(self.rollouts.dist_entropy).view(self.num_steps, self.num_processes, 1) self.rollouts.value_preds = [] self.rollouts.action_log_probs = [] self.rollouts.dist_entropy = [] advantages = Variable(self.rollouts.returns[:-1]) - values value_loss = advantages.pow(2).mean() action_loss = -(Variable(advantages.data) * action_log_probs).mean() self.optimizer.zero_grad() cost = action_loss + value_loss*self.value_loss_coef - dist_entropy.mean()*self.entropy_coef cost.backward() nn.utils.clip_grad_norm(self.actor_critic.parameters(), self.grad_clip) self.optimizer.step()
def forward(self, input, context, question, output=None, hidden=None, context_alignment=None, question_alignment=None): context_output = output.squeeze(1) if output is not None else self.make_init_output(context) context_alignment = context_alignment if context_alignment is not None else self.make_init_output(context) question_alignment = question_alignment if question_alignment is not None else self.make_init_output(question) context_outputs, vocab_pointer_switches, context_question_switches, context_attentions, question_attentions, context_alignments, question_alignments = [], [], [], [], [], [], [] for emb_t in input.split(1, dim=1): emb_t = emb_t.squeeze(1) context_output = self.dropout(context_output) if self.input_feed: emb_t = torch.cat([emb_t, context_output], 1) dec_state, hidden = self.rnn(emb_t, hidden) context_output, context_attention, context_alignment = self.context_attn(dec_state, context) question_output, question_attention, question_alignment = self.question_attn(dec_state, question) vocab_pointer_switch = self.vocab_pointer_switch(torch.cat([dec_state, context_output, emb_t], -1)) context_question_switch = self.context_question_switch(torch.cat([dec_state, question_output, emb_t], -1)) context_output = self.dropout(context_output) context_outputs.append(context_output) vocab_pointer_switches.append(vocab_pointer_switch) context_question_switches.append(context_question_switch) context_attentions.append(context_attention) context_alignments.append(context_alignment) question_attentions.append(question_attention) question_alignments.append(question_alignment) context_outputs, vocab_pointer_switches, context_question_switches, context_attention, question_attention = [self.package_outputs(x) for x in [context_outputs, vocab_pointer_switches, context_question_switches, context_attentions, question_attentions]] return context_outputs, context_attention, question_attention, context_alignment, question_alignment, vocab_pointer_switches, context_question_switches, hidden
def forward(self, prev_emb, dec_state, attn_state): input_tensor = torch.cat((prev_emb, dec_state, attn_state), dim=1) z = self.sig(self.gate(input_tensor)) proj_source = self.source_proj(attn_state) proj_target = self.target_proj( torch.cat((prev_emb, dec_state), dim=1)) return z, proj_source, proj_target
def test_forward(self): batch = 16 len1, len2 = 21, 24 seq_len1 = torch.randint(low=len1 - 10, high=len1 + 1, size=(batch,)).long() seq_len2 = torch.randint(low=len2 - 10, high=len2 + 1, size=(batch,)).long() mask1 = [] for w in seq_len1: mask1.append([1] * w.item() + [0] * (len1 - w.item())) mask1 = torch.FloatTensor(mask1) mask2 = [] for w in seq_len2: mask2.append([1] * w.item() + [0] * (len2 - w.item())) mask2 = torch.FloatTensor(mask2) d = 200 # hidden dimension l = 20 # number of perspective test1 = torch.randn(batch, len1, d) test2 = torch.randn(batch, len2, d) test1 = test1 * mask1.view(-1, len1, 1).expand(-1, len1, d) test2 = test2 * mask2.view(-1, len2, 1).expand(-1, len2, d) test1_fw, test1_bw = torch.split(test1, d // 2, dim=-1) test2_fw, test2_bw = torch.split(test2, d // 2, dim=-1) ml_fw = BiMpmMatching.from_params(Params({"is_forward": True, "num_perspectives": l})) ml_bw = BiMpmMatching.from_params(Params({"is_forward": False, "num_perspectives": l})) vecs_p_fw, vecs_h_fw = ml_fw(test1_fw, mask1, test2_fw, mask2) vecs_p_bw, vecs_h_bw = ml_bw(test1_bw, mask1, test2_bw, mask2) vecs_p, vecs_h = torch.cat(vecs_p_fw + vecs_p_bw, dim=2), torch.cat(vecs_h_fw + vecs_h_bw, dim=2) assert vecs_p.size() == torch.Size([batch, len1, 10 + 10 * l]) assert vecs_h.size() == torch.Size([batch, len2, 10 + 10 * l]) assert ml_fw.get_output_dim() == ml_bw.get_output_dim() == vecs_p.size(2) // 2 == vecs_h.size(2) // 2
def forward(input, hidden, weight): assert(len(weight) == total_layers) next_hidden = [] if lstm: hidden = list(zip(*hidden)) for i in range(num_layers): all_output = [] for j, inner in enumerate(inners): l = i * num_directions + j hy, output = inner(input, hidden[l], weight[l]) next_hidden.append(hy) all_output.append(output) input = torch.cat(all_output, input.dim() - 1) if dropout != 0 and i < num_layers - 1: input = F.dropout(input, p=dropout, training=train, inplace=False) if lstm: next_h, next_c = zip(*next_hidden) next_hidden = ( torch.cat(next_h, 0).view(total_layers, *next_h[0].size()), torch.cat(next_c, 0).view(total_layers, *next_c[0].size()) ) else: next_hidden = torch.cat(next_hidden, 0).view( total_layers, *next_hidden[0].size()) return next_hidden, input
def forward(self, inputs, targets): """ Args: - inputs: feature matrix with shape (batch_size, feat_dim) - targets: ground truth labels with shape (num_classes) """ n = inputs.size(0) # Compute pairwise distance, replace by the official when merged dist = torch.pow(inputs, 2).sum(dim=1, keepdim=True).expand(n, n) dist = dist + dist.t() dist.addmm_(1, -2, inputs, inputs.t()) dist = dist.clamp(min=1e-12).sqrt() # for numerical stability # For each anchor, find the hardest positive and negative mask = targets.expand(n, n).eq(targets.expand(n, n).t()) dist_ap, dist_an = [], [] for i in range(n): dist_ap.append(dist[i][mask[i]].max().unsqueeze(0)) dist_an.append(dist[i][mask[i] == 0].min().unsqueeze(0)) dist_ap = torch.cat(dist_ap) dist_an = torch.cat(dist_an) # Compute ranking hinge loss y = torch.ones_like(dist_an) loss = self.ranking_loss(dist_an, dist_ap, y) return loss
def forward(input, hidden, weight): output = [] input_offset = 0 last_batch_size = batch_sizes[0] hiddens = [] flat_hidden = not isinstance(hidden, tuple) if flat_hidden: hidden = (hidden,) for batch_size in batch_sizes: step_input = input[input_offset:input_offset + batch_size] input_offset += batch_size dec = last_batch_size - batch_size if dec > 0: hiddens.append(tuple(h[-dec:] for h in hidden)) hidden = tuple(h[:-dec] for h in hidden) last_batch_size = batch_size if flat_hidden: hidden = (inner(step_input, hidden[0], *weight),) else: hidden = inner(step_input, hidden, *weight) output.append(hidden[0]) hiddens.append(hidden) hiddens.reverse() hidden = tuple(torch.cat(h, 0) for h in zip(*hiddens)) assert hidden[0].size(0) == batch_sizes[0] if flat_hidden: hidden = hidden[0] output = torch.cat(output, 0) return hidden, output
def forward(input, hidden, weight): output = [] input_offset = input.size(0) last_batch_size = batch_sizes[-1] initial_hidden = hidden flat_hidden = not isinstance(hidden, tuple) if flat_hidden: hidden = (hidden,) initial_hidden = (initial_hidden,) hidden = tuple(h[:batch_sizes[-1]] for h in hidden) for batch_size in reversed(batch_sizes): inc = batch_size - last_batch_size if inc > 0: hidden = tuple(torch.cat((h, ih[last_batch_size:batch_size]), 0) for h, ih in zip(hidden, initial_hidden)) last_batch_size = batch_size step_input = input[input_offset - batch_size:input_offset] input_offset -= batch_size if flat_hidden: hidden = (inner(step_input, hidden[0], *weight),) else: hidden = inner(step_input, hidden, *weight) output.append(hidden[0]) output.reverse() output = torch.cat(output, 0) if flat_hidden: hidden = hidden[0] return hidden, output
def l2l_validate(model, cluster_center, n_epoch=100): val_accuracy = [] for epoch in range(n_epoch): data_l = generate_data_l(cluster_center) data_n = generate_data_n(cluster_center, model.n_class_n) x_l, y_l = Variable(torch.from_numpy(data_l[0])).float(), Variable( torch.from_numpy(data_l[1])) x_n, y_n = Variable(torch.from_numpy(data_n[0])).float(), Variable( torch.from_numpy(data_n[1])) pred_ll, pred_nl, w, b = model(x_l, x_n) M = Variable(torch.zeros(model.n_class_n, model.n_dim)) B = Variable(torch.zeros(model.n_class_n)) for k in range(model.n_class_n): M[k] = torch.cat((w[:, 0][y_n == model.n_class_l + k].view(-1, 1), w[:, 1][y_n == model.n_class_l + k].view(-1, 1)), 1).mean(0) B[k] = b[y_n == model.n_class_l + k].mean() pred_ln = torch.mm(x_l, M.t()) + B.view(1, -1).expand_as(torch.mm(x_l, M.t())) pred_nn = torch.mm(x_n, M.t()) + B.view(1, -1).expand_as(torch.mm(x_n, M.t())) pred = torch.cat((torch.cat((pred_ll, pred_nl)), torch.cat((pred_ln, pred_nn))), 1) pred = pred.data.max(1)[1] y = torch.cat((y_l, y_n)) accuracy = pred.eq(y.data).cpu().sum() * 1.0 / y.size()[0] # print('accuracy: %.2f' % accuracy) val_accuracy.append(accuracy) acc_l = pred.eq(y.data).cpu()[0:100].sum() * 1.0 / 100 acc_n = pred.eq(y.data).cpu()[100:150].sum() * 1.0 / 50 print('accuracy: %.2f, lifelong accuracy: %.2f, new accuracy: %.2f' % (accuracy, acc_l, acc_n)) return numpy.mean(numpy.asarray(val_accuracy))
def forward(self, input): x, low_level_features = self.resnet_features(input) x1 = self.aspp1(x) x2 = self.aspp2(x) x3 = self.aspp3(x) x4 = self.aspp4(x) x5 = self.global_avg_pool(x) x5 = F.upsample(x5, size=x4.size()[2:], mode='bilinear', align_corners=True) x = torch.cat((x1, x2, x3, x4, x5), dim=1) x = self.conv1(x) x = self.bn1(x) x = self.relu(x) x = F.upsample(x, size=(int(math.ceil(input.size()[-2]/4)), int(math.ceil(input.size()[-1]/4))), mode='bilinear', align_corners=True) low_level_features = self.conv2(low_level_features) low_level_features = self.bn2(low_level_features) low_level_features = self.relu(low_level_features) x = torch.cat((x, low_level_features), dim=1) x = self.last_conv(x) x = F.interpolate(x, size=input.size()[2:], mode='bilinear', align_corners=True) return x
def forward(self, x, x_prev): x_relu = self.relu(x_prev) # path 1 x_path1 = self.path_1(x_relu) # path 2 x_path2 = self.path_2.pad(x_relu) x_path2 = x_path2[:, :, 1:, 1:] x_path2 = self.path_2.avgpool(x_path2) x_path2 = self.path_2.conv(x_path2) # final path x_left = self.final_path_bn(torch.cat([x_path1, x_path2], 1)) x_right = self.conv_1x1(x) x_comb_iter_0_left = self.comb_iter_0_left(x_right) x_comb_iter_0_right = self.comb_iter_0_right(x_left) x_comb_iter_0 = x_comb_iter_0_left + x_comb_iter_0_right x_comb_iter_1_left = self.comb_iter_1_left(x_left) x_comb_iter_1_right = self.comb_iter_1_right(x_left) x_comb_iter_1 = x_comb_iter_1_left + x_comb_iter_1_right x_comb_iter_2_left = self.comb_iter_2_left(x_right) x_comb_iter_2 = x_comb_iter_2_left + x_left x_comb_iter_3_left = self.comb_iter_3_left(x_left) x_comb_iter_3_right = self.comb_iter_3_right(x_left) x_comb_iter_3 = x_comb_iter_3_left + x_comb_iter_3_right x_comb_iter_4_left = self.comb_iter_4_left(x_right) x_comb_iter_4 = x_comb_iter_4_left + x_right x_out = torch.cat([x_left, x_comb_iter_0, x_comb_iter_1, x_comb_iter_2, x_comb_iter_3, x_comb_iter_4], 1) return x_out
def forward(self, context, question, context_char=None, question_char=None, context_f=None, question_f=None): assert context_char is not None and question_char is not None and context_f is not None \ and question_f is not None vis_param = {} # (seq_len, batch, additional_feature_size) context_f = context_f.transpose(0, 1) question_f = question_f.transpose(0, 1) # word-level embedding: (seq_len, batch, word_embedding_size) context_vec, context_mask = self.embedding.forward(context) question_vec, question_mask = self.embedding.forward(question) # char-level embedding: (seq_len, batch, char_embedding_size) context_emb_char, context_char_mask = self.char_embedding.forward(context_char) question_emb_char, question_char_mask = self.char_embedding.forward(question_char) context_vec_char = self.char_encoder.forward(context_emb_char, context_char_mask, context_mask) question_vec_char = self.char_encoder.forward(question_emb_char, question_char_mask, question_mask) # mix embedding: (seq_len, batch, embedding_size) context_vec = torch.cat((context_vec, context_vec_char, context_f), dim=-1) question_vec = torch.cat((question_vec, question_vec_char, question_f), dim=-1) # encode: (seq_len, batch, hidden_size*2) context_encode, _ = self.encoder.forward(context_vec, context_mask) question_encode, zs = self.encoder.forward(question_vec, question_mask) align_ct = context_encode for i in range(self.num_align_hops): # align: (seq_len, batch, hidden_size*2) qt_align_ct, alpha = self.aligner[i](align_ct, question_encode, question_mask) bar_ct = self.aligner_sfu[i](align_ct, torch.cat([qt_align_ct, align_ct * qt_align_ct, align_ct - qt_align_ct], dim=-1)) vis_param['match'] = alpha # self-align: (seq_len, batch, hidden_size*2) ct_align_ct, self_alpha = self.self_aligner[i](bar_ct, context_mask) hat_ct = self.self_aligner_sfu[i](bar_ct, torch.cat([ct_align_ct, bar_ct * ct_align_ct, bar_ct - ct_align_ct], dim=-1)) vis_param['self-match'] = self_alpha # aggregation: (seq_len, batch, hidden_size*2) align_ct, _ = self.aggregation[i](hat_ct, context_mask) # pointer net: (answer_len, batch, context_len) for i in range(self.num_ptr_hops): ans_range_prop, zs = self.ptr_net[i](align_ct, context_mask, zs) # answer range ans_range_prop = ans_range_prop.transpose(0, 1) if not self.training and self.enable_search: ans_range = answer_search(ans_range_prop, context_mask) else: _, ans_range = torch.max(ans_range_prop, dim=2) return ans_range_prop, ans_range, vis_param
def CreateFeature(model, phase, outputPath='.'): """ Create h5py dataset for feature extraction. ARGS: outputPath : h5py output path model : used model labelList : list of corresponding groundtruth texts """ featurenet = feature_net(model) if use_gpu: featurenet.cuda() feature_map = torch.FloatTensor() label_map = torch.LongTensor() for data in tqdm(dataloader[phase]): img, label = data if use_gpu: img = Variable(img, volatile=True).cuda() else: img = Variable(img, volatile=True) out = featurenet(img) feature_map = torch.cat((feature_map, out.cpu().data), 0) label_map = torch.cat((label_map, label), 0) feature_map = feature_map.numpy() label_map = label_map.numpy() file_name = '_feature_{}.hd5f'.format(model) h5_path = os.path.join(outputPath, phase) + file_name with h5py.File(h5_path, 'w') as h: h.create_dataset('data', data=feature_map) h.create_dataset('label', data=label_map)
def test(model): game_state = GameState() # initial action is do nothing action = torch.zeros([model.number_of_actions], dtype=torch.float32) action[0] = 1 image_data, reward, terminal = game_state.frame_step(action) image_data = resize_and_bgr2gray(image_data) image_data = image_to_tensor(image_data) state = torch.cat((image_data, image_data, image_data, image_data)).unsqueeze(0) while True: # get output from the neural network output = model(state)[0] action = torch.zeros([model.number_of_actions], dtype=torch.float32) if torch.cuda.is_available(): # put on GPU if CUDA is available action = action.cuda() # get action action_index = torch.argmax(output) if torch.cuda.is_available(): # put on GPU if CUDA is available action_index = action_index.cuda() action[action_index] = 1 # get next state image_data_1, reward, terminal = game_state.frame_step(action) image_data_1 = resize_and_bgr2gray(image_data_1) image_data_1 = image_to_tensor(image_data_1) state_1 = torch.cat((state.squeeze(0)[1:, :, :], image_data_1)).unsqueeze(0) # set state to be state_1 state = state_1
def _call(self, x): shape = x.shape[:-1] + (1 + x.shape[-1],) one = x.new([1]).expand(x.shape[:-1] + (1,)) numer = sigmoid(x) denom = (1 - numer).cumprod(-1) probs = torch.cat([numer, one], -1) * torch.cat([one, denom], -1) return probs
def forward(self, x): # print("fffff",x) embed = self.embed(x) # CNN cnn_x = embed cnn_x = torch.transpose(cnn_x, 0, 1) cnn_x = cnn_x.unsqueeze(1) cnn_x = [F.relu(conv(cnn_x)).squeeze(3) for conv in self.convs1] # [(N,Co,W), ...]*len(Ks) cnn_x = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in cnn_x] # [(N,Co), ...]*len(Ks) cnn_x = torch.cat(cnn_x, 1) cnn_x = self.dropout(cnn_x) # LSTM lstm_x = embed.view(len(x), embed.size(1), -1) lstm_out, self.hidden = self.lstm(lstm_x, self.hidden) lstm_out = torch.transpose(lstm_out, 0, 1) lstm_out = torch.transpose(lstm_out, 1, 2) # lstm_out = F.tanh(lstm_out) lstm_out = F.max_pool1d(lstm_out, lstm_out.size(2)).squeeze(2) # CNN and LSTM cat cnn_x = torch.transpose(cnn_x, 0, 1) lstm_out = torch.transpose(lstm_out, 0, 1) cnn_lstm_out = torch.cat((cnn_x, lstm_out), 0) cnn_lstm_out = torch.transpose(cnn_lstm_out, 0, 1) # linear cnn_lstm_out = self.hidden2label1(F.tanh(cnn_lstm_out)) cnn_lstm_out = self.hidden2label2(F.tanh(cnn_lstm_out)) # output logit = cnn_lstm_out return logit
def safe_zeros_backward(inp, dim): # note that the gradient is equivalent to: # cumprod(exclusive, normal) * cumprod(exclusive, reverse), e.g.: # input: [ a, b, c] # cumprod(exclusive, normal): [1 , a, a * b] # cumprod(exclusive, reverse): [b * c, c, 1] # product: [b * c, a * c, a * b] # and this is safe under input with 0s. if inp.size(dim) == 1: return grad_output ones_size = torch.Size((inp.size()[:dim] + (1,) + inp.size()[dim + 1:])) ones = Variable(grad_output.data.new(ones_size).fill_(1)) exclusive_normal_nocp = torch.cat((ones, inp.narrow(dim, 0, inp.size(dim) - 1)), dim) exclusive_normal = exclusive_normal_nocp.cumprod(dim) def reverse_dim(var, dim): index = Variable(torch.arange(var.size(dim) - 1, -1, -1, out=var.data.new().long())) return var.index_select(dim, index) narrow_reverse = reverse_dim(inp.narrow(dim, 1, inp.size(dim) - 1), dim) exclusive_reverse_nocp = torch.cat((ones, narrow_reverse), dim) exclusive_reverse = reverse_dim(exclusive_reverse_nocp.cumprod(dim), dim) grad_input = grad_output.expand_as(exclusive_normal).mul(exclusive_normal.mul(exclusive_reverse)) return grad_input
def reply(self): if (len(self.memory) < BATCH_SIZE): return transitions = self.memory.sample(BATCH_SIZE) batch = Transition(*zip(*transitions)) non_final_mask = torch.ByteTensor(tuple(map(lambda s: s is not None, batch.next_state))) state_batch = torch.cat(batch.state) action_batch = torch.cat(batch.action) reward_batch = torch.cat(batch.reward) non_final_next_state = torch.cat([s for s in batch.next_state if s is not None]) self.model.eval() state_action_values = torch.squeeze(self.model(state_batch).gather(1, action_batch)) next_state_values = torch.zeros(BATCH_SIZE).type(torch.FloatTensor) next_state_values[non_final_mask] = self.model(non_final_next_state).data.max(1)[0] expected_state_action_values = reward_batch + GAMMA * next_state_values self.model.train() loss = F.smooth_l1_loss(state_action_values, expected_state_action_values) self.optimizer.zero_grad() loss.backward() self.optimizer.step()
def sample_relax(logits, surrogate): cat = Categorical(logits=logits) u = torch.rand(B,C).clamp(1e-10, 1.-1e-10).cuda() gumbels = -torch.log(-torch.log(u)) z = logits + gumbels b = torch.argmax(z, dim=1) #.view(B,1) logprob = cat.log_prob(b).view(B,1) # czs = [] # for j in range(1): # z = sample_relax_z(logits) # surr_input = torch.cat([z, x, logits.detach()], dim=1) # cz = surrogate.net(surr_input) # czs.append(cz) # czs = torch.stack(czs) # cz = torch.mean(czs, dim=0)#.view(1,1) surr_input = torch.cat([z, x, logits.detach()], dim=1) cz = surrogate.net(surr_input) cz_tildes = [] for j in range(1): z_tilde = sample_relax_given_b(logits, b) surr_input = torch.cat([z_tilde, x, logits.detach()], dim=1) cz_tilde = surrogate.net(surr_input) cz_tildes.append(cz_tilde) cz_tildes = torch.stack(cz_tildes) cz_tilde = torch.mean(cz_tildes, dim=0) #.view(B,1) return b, logprob, cz, cz_tilde
def forward(self, x): # feedforward x to the first layer and add the result to the list x_first_out = self.layers_list[0].forward(x) # initialize the list forwarded_output_list = [] forwarded_output_list.append(x_first_out) prev_x = x # feedforward process from the second to the last layer for i in range(1, self.num_layers): # concatenate filters concatenated_filters = torch.cat((forwarded_output_list[i-1], prev_x), 1) # forward x_next_out = self.layers_list[i].forward(concatenated_filters) # add to the list forwarded_output_list.append(x_next_out) # prepare the temporary variable for the next loop prev_x = concatenated_filters # prepare the output (this will have (k_feature_maps * layers) feature maps) output_x = torch.cat(forwarded_output_list, 1) return output_x
def circular_convolution_conv(keys, values, cuda=False): ''' For the circular convolution of x and y to be equivalent, you must pad the vectors with zeros to length at least N + L - 1 before you take the DFT. After you invert the product of the DFTs, retain only the first N + L - 1 elements. ''' assert values.dim() == keys.dim() == 2, "only 2 dims supported" batch_size = keys.size(0) keys_feature_size = keys.size(1) values_feature_size = values.size(1) required_size = keys_feature_size + values_feature_size - 1 # zero pad upto N+L-1 zero_for_keys = Variable(float_type(cuda)( batch_size, required_size - keys_feature_size).zero_()) zero_for_values = Variable(float_type(cuda)( batch_size, required_size - values_feature_size).zero_()) keys = torch.cat([keys, zero_for_keys], -1) values = torch.cat([values, zero_for_values], -1) # do the conv and reshape and return print('values = ', values.view(batch_size, 1, -1).size(), ' keys = ', keys.view(batch_size, 1, -1).size()) print('conv = ', F.conv1d(values.view(batch_size, 1, -1), keys.view(batch_size, 1, -1)).size()) return F.conv1d(values.view(batch_size, 1, -1), keys.view(batch_size, 1, -1)).squeeze()[:, 0:required_size]
def forward(self, category, input, hidden): input_combined = torch.cat((category, input, hidden), 1) hidden = self.i2h(input_combined) output = self.i2o(input_combined) output_combined = torch.cat((hidden, output), 1) output = self.o2o(output_combined) return output, hidden
def __call__(self, grid): batch_size, _, grid_dimX, grid_dimY, grid_dimZ = grid.size() k = 1.0 x_coords = 2.0 * k * torch.arange(grid_dimX, dtype=torch.float32).unsqueeze(1).unsqueeze(1 ).expand(grid_dimX, grid_dimY, grid_dimZ) / (grid_dimX - 1.0) - 1.0 y_coords = 2.0 * k * torch.arange(grid_dimY, dtype=torch.float32).unsqueeze(1).unsqueeze(0 ).expand(grid_dimX, grid_dimY, grid_dimZ) / (grid_dimY - 1.0) - 1.0 z_coords = 2.0 * k * torch.arange(grid_dimZ, dtype=torch.float32).unsqueeze(0).unsqueeze(0 ).expand(grid_dimX, grid_dimY, grid_dimZ) / (grid_dimZ - 1.0) - 1.0 coords = torch.stack((x_coords, y_coords, z_coords), dim=0) if self.with_r: rs = ((x_coords ** 2) + (y_coords ** 2) + (z_coords ** 2)) ** 0.5 rs = k * rs / torch.max(rs) rs = torch.unsqueeze(rs, dim=0) coords = torch.cat((coords, rs), dim=0) coords = torch.unsqueeze(coords, dim=0).repeat(batch_size, 1, 1, 1, 1) grid = torch.cat((coords.to(grid.device), grid), dim=1) return grid
def circular_convolution_fft(keys, values, normalized=True, conj=False, cuda=False): ''' For the circular convolution of x and y to be equivalent, you must pad the vectors with zeros to length at least N + L - 1 before you take the DFT. After you invert the product of the DFTs, retain only the first N + L - 1 elements. ''' assert values.dim() == keys.dim() == 2, "only 2 dims supported" assert values.size(-1) % 2 == keys.size(-1) % 2 == 0, "need last dim to be divisible by 2" batch_size, keys_feature_size = keys.size(0), keys.size(1) values_feature_size = values.size(1) required_size = keys_feature_size + values_feature_size - 1 required_size = required_size + 1 if required_size % 2 != 0 else required_size # conj transpose keys = Complex(keys).conj().unstack() if conj else keys # reshape to [batch, [real, imag]] half = keys.size(-1) // 2 keys = torch.cat([keys[:, 0:half].unsqueeze(2), keys[:, half:].unsqueeze(2)], -1) values = torch.cat([values[:, 0:half].unsqueeze(2), values[:, half:].unsqueeze(2)], -1) # do the fft, ifft and return num_required kf = torch.fft(keys, signal_ndim=1, normalized=normalized) vf = torch.fft(values, signal_ndim=1, normalized=normalized) kvif = torch.ifft(kf*vf, signal_ndim=1, normalized=normalized)#[:, 0:required_size] # if conj: # return Complex(kvif[:, :, 1], kvif[:, :, 0]).unstack() #return Complex(kvif[:, :, 0], kvif[:, :, 1]).abs() if not conj \ # return Complex(kvif[:, :, 0], kvif[:, :, 1]).unstack() # if not conj \ # else Complex(kvif[:, :, 1], kvif[:, :, 0]).abs() return Complex(kvif[:, :, 0], kvif[:, :, 1]).unstack().view(batch_size, -1)
def forward(self, x, geneexpr): out1 = F.relu(self.conv1(x)) # 3 of these out1a = F.relu(self.conv1a(x)) out1b = F.relu(self.conv1b(x)) out = self.maxpool_3(torch.cat([out1,out1a,out1b],dim=1)) # (?, 300, 600) out = F.pad(out,(5,5)) out = F.relu(self.conv2(out)) # (?, 300, 140) out = self.maxpool_4(out) # (?, 300, 35) out = F.pad(out,(3,3)) out = F.relu(self.conv3(out)) # (?, 500, 32) out = F.pad(out,(1,1)) out = self.maxpool_4(out) # (?, 500, 8) out = out.view(-1, 200*13) # (?, 500*8) if self.gdl == 0: geneexpr = self.dropout(geneexpr) geneexpr = F.relu(self.genelinear(geneexpr)) elif self.gdl == 1: geneexpr = F.relu(self.genelinear(geneexpr)) # (?, 500) geneexpr = self.dropout(geneexpr) out = torch.cat([out, geneexpr], dim=1) # (?, 200*13+500) out = F.relu(self.linear1(out)) # (?, 800) out = self.dropout(out) out = F.relu(self.linear2(out)) # (?, 800) out = self.dropout(out) return self.output(out) # (?, 1)
def encode(self, src_sents_var, src_sents_len): """Encode the input natural language utterance Args: src_sents_var: a variable of shape (src_sent_len, batch_size), representing word ids of the input src_sents_len: a list of lengths of input source sentences, sorted by descending order Returns: src_encodings: source encodings of shape (batch_size, src_sent_len, hidden_size * 2) last_state, last_cell: the last hidden state and cell state of the encoder, of shape (batch_size, hidden_size) """ # (tgt_query_len, batch_size, embed_size) # apply word dropout if self.training and self.args.word_dropout: mask = Variable(self.new_tensor(src_sents_var.size()).fill_(1. - self.args.word_dropout).bernoulli().long()) src_sents_var = src_sents_var * mask + (1 - mask) * self.vocab.source.unk_id src_token_embed = self.src_embed(src_sents_var) packed_src_token_embed = pack_padded_sequence(src_token_embed, src_sents_len) # src_encodings: (tgt_query_len, batch_size, hidden_size) src_encodings, (last_state, last_cell) = self.encoder_lstm(packed_src_token_embed) src_encodings, _ = pad_packed_sequence(src_encodings) # src_encodings: (batch_size, tgt_query_len, hidden_size) src_encodings = src_encodings.permute(1, 0, 2) # (batch_size, hidden_size * 2) last_state = torch.cat([last_state[0], last_state[1]], 1) last_cell = torch.cat([last_cell[0], last_cell[1]], 1) return src_encodings, (last_state, last_cell)
def knn(Mxx, Mxy, Myy, k, sqrt): n0 = Mxx.size(0) n1 = Myy.size(0) label = torch.cat((torch.ones(n0),torch.zeros(n1))) M = torch.cat((torch.cat((Mxx,Mxy),1), torch.cat((Mxy.transpose(0,1),Myy), 1)), 0) if sqrt: M = M.abs().sqrt() INFINITY = float('inf') val, idx = (M+torch.diag(INFINITY*torch.ones(n0+n1))).topk(k, 0, False) count = torch.zeros(n0+n1) for i in range(0,k): count = count + label.index_select(0,idx[i]) pred = torch.ge(count, (float(k)/2)*torch.ones(n0+n1)).float() s = Score_knn() s.tp = (pred*label).sum() s.fp = (pred*(1-label)).sum() s.fn = ((1-pred)*label).sum() s.tn = ((1-pred)*(1-label)).sum() s.precision = s.tp/(s.tp+s.fp) s.recall = s.tp/(s.tp+s.fn) s.acc_t = s.tp/(s.tp+s.fn) s.acc_f = s.tn/(s.tn+s.fp) s.acc = torch.eq(label, pred).float().mean() s.k = k return s
# Beam Search beam_lls, beam_toks, beam_seqs = None, None, None lm_probs = F.log_softmax(lm_model( XMB.unsqueeze(1), sequence_mask=MMB), dim=-1) dist = lm_probs[:, -1, :].squeeze() beam_lls, beam_toks = dist.topk(args.beam) beam_losses.append(beam_lls) ended = (beam_toks == end_token).float() counts = (2 - ended) beam_toks = beam_toks.unsqueeze(1) beam_seqs = beam_toks.clone() XMB = XMB.repeat(args.beam, 1, 1) MMB = MMB.repeat(args.beam, 1) next_pos = XMB[:, -1:, 1] + 1 next_x = torch.cat((beam_toks, next_pos), -1).unsqueeze(1) XMB = torch.cat((XMB, next_x), 1) MMB = torch.cat([MMB, torch.ones(XMB.size(0), 1, device=MMB.device)], 1) for _ in range(args.gen_len): # Compute distribution for current beam lm_probs = F.log_softmax(lm_model( XMB.unsqueeze(1), sequence_mask=MMB), dim=-1) dist = lm_probs[:, -1, :].squeeze() # get hypothesis tokens for distribution hyp_beam_lls, hyp_beam_toks = dist.topk(args.beam) # Compute masks and expand beam expanded_ended = ended.unsqueeze(1).repeat(1, args.beam)
def extract_features(self, inputs): """ Returns output of the final convolution layer """ skipconnection={} ret={} # Stem x = self._swish(self._bn0(self._conv_stem(inputs))) skipconnection[0] = x # Blocks index = 0 for idx, block in enumerate(self._blocks): drop_connect_rate = self._global_params.drop_connect_rate if drop_connect_rate: drop_connect_rate *= float(idx) / len(self._blocks) x = block(x, drop_connect_rate=drop_connect_rate) skipconnection[idx+1] = x index+= 1 # Head x = self._swish(self._bn1(self._conv_head(x))) #------------------------------------------------------------------------------------ # decoder EDGE MAPS & CORNERS MAPS Conv2d = get_same_padding_conv2d(image_size=self._global_params.image_size, conv_type=self._conv_type) conv1a = Conv2d(x.shape[1], skipconnection[index].shape[1], kernel_size=3, bias=True, stride=1) d_2x_ec = self._swish(conv1a(x)) d_2x = F.interpolate(d_2x_ec, scale_factor=2, mode="bilinear", align_corners=True) #for i in range(index): # print("index: ",index-i, "shape: ", skipconnection[index-i].shape[1]) d_concat_2x = torch.cat((d_2x,skipconnection[index-5]),dim=1) conv1b = Conv2d(d_concat_2x.shape[1], skipconnection[index-5].shape[1], kernel_size=3, bias=True, stride=1) d_4x_ec = self._swish(conv1b(d_concat_2x)) d_4x = F.interpolate(d_4x_ec, scale_factor=2, mode="bilinear", align_corners=True) conv1c = Conv2d(d_4x.shape[1], 2, kernel_size=3, bias=True, stride=1) output4x_likelihood = conv1c(d_4x) ret['output4x'] = output4x_likelihood d_concat_4x = torch.cat((d_4x,skipconnection[index-11],output4x_likelihood),dim=1) conv2a = Conv2d(d_concat_4x.shape[1], skipconnection[index-11].shape[1], kernel_size=3, bias=True, stride=1) d_8x_ec = self._swish(conv2a(d_concat_4x)) d_8x = F.interpolate(d_8x_ec, scale_factor=2, mode="bilinear", align_corners=True) conv2b = Conv2d(d_8x.shape[1], 2, kernel_size=3, bias=True, stride=1) output8x_likelihood = conv2b(d_8x) ret['output8x'] = output8x_likelihood d_concat_8x = torch.cat((d_8x,skipconnection[index-13],output8x_likelihood),dim=1) conv3a = Conv2d(d_concat_8x.shape[1], skipconnection[index-13].shape[1], kernel_size=5, bias=True, stride=1) d_16x_ec = self._swish(conv3a(d_concat_8x)) d_16x = F.interpolate(d_16x_ec, scale_factor=2, mode="bilinear", align_corners=True) conv3b = Conv2d(d_16x.shape[1], 2, kernel_size=3, bias=True, stride=1) output16x_likelihood = conv3b(d_16x) ret['output16x'] = output16x_likelihood d_concat_16x = torch.cat((d_16x,skipconnection[index-15],output16x_likelihood),dim=1) conv4a = Conv2d(d_concat_16x.shape[1], skipconnection[index-15].shape[1], kernel_size=5, bias=True, stride=1) d_16x_conv1 = self._swish(conv4a(d_concat_16x)) conv4b = Conv2d(d_16x_conv1.shape[1], 2, kernel_size=3, bias=True, stride=1) output_likelihood = conv4b(d_16x_conv1) ret['output'] = output_likelihood return ret
def forward(self, x): x_ = x x = self.conv_1(x) x = self.conv_2(x) return torch.cat([x_, x], 1)
def update(self, sample, agent_i, parallel=False, logger=None): """ Update parameters of agent model based on sample from replay buffer Inputs: sample: tuple of (observations, actions, rewards, next observations, and episode end masks) sampled randomly from the replay buffer. Each is a list with entries corresponding to each agent agent_i (int): index of agent to update parallel (bool): If true, will average gradients across threads logger (SummaryWriter from Tensorboard-Pytorch): If passed in, important quantities will be logged """ obs, acs, rews, next_obs, dones = sample curr_agent = self.agents[agent_i] curr_agent.critic_optimizer.zero_grad() if self.alg_types[agent_i] == 'MADDPG': if self.discrete_action: # one-hot encode action all_trgt_acs = [onehot_from_logits(pi(nobs)) for pi, nobs in zip(self.target_policies, next_obs)] else: all_trgt_acs = [pi(nobs) for pi, nobs in zip(self.target_policies, next_obs)] trgt_vf_in = torch.cat((*next_obs, *all_trgt_acs), dim=1) else: # DDPG if self.discrete_action: trgt_vf_in = torch.cat((next_obs[agent_i], onehot_from_logits( curr_agent.target_policy( next_obs[agent_i]))), dim=1) else: trgt_vf_in = torch.cat((next_obs[agent_i], curr_agent.target_policy(next_obs[agent_i])), dim=1) target_value = (rews[agent_i].view(-1, 1) + self.gamma * curr_agent.target_critic(trgt_vf_in) * (1 - dones[agent_i].view(-1, 1))) if self.alg_types[agent_i] == 'MADDPG': vf_in = torch.cat((*obs, *acs), dim=1) else: # DDPG vf_in = torch.cat((obs[agent_i], acs[agent_i]), dim=1) actual_value = curr_agent.critic(vf_in) vf_loss = MSELoss(actual_value, target_value.detach()) vf_loss.backward() if parallel: average_gradients(curr_agent.critic) torch.nn.utils.clip_grad_norm(curr_agent.critic.parameters(), 0.5) curr_agent.critic_optimizer.step() curr_agent.policy_optimizer.zero_grad() if self.discrete_action: # Forward pass as if onehot (hard=True) but backprop through a differentiable # Gumbel-Softmax sample. The MADDPG paper uses the Gumbel-Softmax trick to backprop # through discrete categorical samples, but I'm not sure if that is # correct since it removes the assumption of a deterministic policy for # DDPG. Regardless, discrete policies don't seem to learn properly without it. curr_pol_out = curr_agent.policy(obs[agent_i]) curr_pol_vf_in = gumbel_softmax(curr_pol_out, hard=True) # softmax + argmax else: curr_pol_out = curr_agent.policy(obs[agent_i]) curr_pol_vf_in = curr_pol_out if self.alg_types[agent_i] == 'MADDPG': all_pol_acs = [] for i, pi, ob in zip(range(self.nagents), self.policies, obs): if i == agent_i: all_pol_acs.append(curr_pol_vf_in) elif self.discrete_action: all_pol_acs.append(onehot_from_logits(pi(ob))) else: all_pol_acs.append(pi(ob)) vf_in = torch.cat((*obs, *all_pol_acs), dim=1) else: # DDPG vf_in = torch.cat((obs[agent_i], curr_pol_vf_in), dim=1) pol_loss = -curr_agent.critic(vf_in).mean() pol_loss += (curr_pol_out**2).mean() * 1e-3 pol_loss.backward() if parallel: average_gradients(curr_agent.policy) torch.nn.utils.clip_grad_norm(curr_agent.policy.parameters(), 0.5) curr_agent.policy_optimizer.step() if logger is not None: logger.add_scalars('agent%i/losses' % agent_i, {'vf_loss': vf_loss, 'pol_loss': pol_loss}, self.niter)
def forward(self, encoded_question, question_length, encoded_support, support_length, correct_start, answer2question, is_eval): # casting long_tensor = torch.cuda.LongTensor if encoded_question.is_cuda else torch.LongTensor answer2question = answer2question.type(long_tensor) # computing single time attention over question attention_scores = self._linear_question_attention(encoded_question) q_mask = misc.mask_for_lengths(question_length) attention_scores = attention_scores.squeeze(2) + q_mask question_attention_weights = F.softmax(attention_scores, dim=1) question_state = torch.matmul(question_attention_weights.unsqueeze(1), encoded_question).squeeze(1) # Prediction # start start_input = torch.cat( [question_state.unsqueeze(1) * encoded_support, encoded_support], 2) q_start_state = self._linear_q_start( start_input) + self._linear_q_start_q(question_state).unsqueeze(1) start_scores = self._linear_start_scores( F.relu(q_start_state)).squeeze(2) support_mask = misc.mask_for_lengths(support_length) start_scores = start_scores + support_mask _, predicted_start_pointer = start_scores.max(1) def align(t): return torch.index_select(t, 0, answer2question) if is_eval: start_pointer = predicted_start_pointer else: # use correct start during training, because p(end|start) should be optimized start_pointer = correct_start.type(long_tensor) predicted_start_pointer = align(predicted_start_pointer) start_scores = align(start_scores) start_input = align(start_input) encoded_support = align(encoded_support) question_state = align(question_state) support_mask = align(support_mask) # end u_s = [] for b, p in enumerate(start_pointer): u_s.append(encoded_support[b, p.data[0]]) u_s = torch.stack(u_s) end_input = torch.cat( [encoded_support * u_s.unsqueeze(1), start_input], 2) q_end_state = self._linear_q_end(end_input) + self._linear_q_end_q( question_state).unsqueeze(1) end_scores = self._linear_end_scores(F.relu(q_end_state)).squeeze(2) end_scores = end_scores + support_mask max_support = support_length.max().data[0] if is_eval: end_scores += misc.mask_for_lengths(start_pointer, max_support, mask_right=False) _, predicted_end_pointer = end_scores.max(1) return start_scores, end_scores, predicted_start_pointer, predicted_end_pointer
def forward(self, emb_question, question_length, emb_support, support_length, unique_word_chars, unique_word_char_length, question_words2unique, support_words2unique, word_in_question, correct_start, answer2support, is_eval): """fast_qa model Args: emb_question: [Q, L_q, N] question_length: [Q] emb_support: [Q, L_s, N] support_length: [Q] unique_word_chars unique_word_char_length question_words2unique support_words2unique word_in_question: [Q, L_s] correct_start: [A], only during training, i.e., is_eval=False answer2question: [A], only during training, i.e., is_eval=False is_eval: [] Returns: start_scores [B, L_s, N], end_scores [B, L_s, N], span_prediction [B, 2] """ # Some helpers float_tensor = torch.cuda.FloatTensor if emb_question.is_cuda else torch.FloatTensor long_tensor = torch.cuda.LongTensor if emb_question.is_cuda else torch.LongTensor batch_size = question_length.data.shape[0] max_question_length = question_length.max().data[0] support_mask = misc.mask_for_lengths(support_length) question_binary_mask = misc.mask_for_lengths(question_length, mask_right=False, value=1.0) if self._with_char_embeddings: # compute combined embeddings [char_emb_question, char_emb_support] = self._conv_char_embedding( unique_word_chars, unique_word_char_length, [question_words2unique, support_words2unique]) emb_question = torch.cat([emb_question, char_emb_question], 2) emb_support = torch.cat([emb_support, char_emb_support], 2) # compute encoder features question_features = torch.autograd.Variable( torch.ones(batch_size, max_question_length, 2, out=float_tensor())) question_features = question_features.type_as(emb_question) v_wiqw = self._v_wiq_w # [B, L_q, L_s] wiq_w = torch.matmul(emb_question * v_wiqw, emb_support.transpose(1, 2)) # [B, L_q, L_s] wiq_w = wiq_w + support_mask.unsqueeze(1) wiq_w = F.softmax(wiq_w.view(batch_size * max_question_length, -1), dim=1).view(batch_size, max_question_length, -1) # [B, L_s] wiq_w = torch.matmul(question_binary_mask.unsqueeze(1), wiq_w).squeeze(1) # [B, L , 2] support_features = torch.stack([word_in_question, wiq_w], dim=2) if self._with_char_embeddings: # highway layer to allow for interaction between concatenated embeddings emb_question = self._embedding_projection(emb_question) emb_support = self._embedding_projection(emb_support) emb_question = self._embedding_highway(emb_question) emb_support = self._embedding_highway(emb_support) # dropout dropout = self._shared_resources.config.get("dropout", 0.0) emb_question = F.dropout(emb_question, dropout, training=not is_eval) emb_support = F.dropout(emb_support, dropout, training=not is_eval) # subjectivity support_subjectivity = F.softmax(self.subj1(emb_support)) # extend embeddings with features emb_question_ext = torch.cat([emb_question, question_features], 2) emb_support_ext = torch.cat([emb_support, support_features], 2) # encode question and support # [B, L, 2 * size] encoded_question = self._bilstm(emb_question_ext)[0] encoded_support = self._bilstm(emb_support_ext)[0] # [B, L, size] encoded_support = F.tanh( F.linear(encoded_support, self._support_projection)) encoded_question = F.tanh( F.linear(encoded_question, self._question_projection)) start_scores, end_scores, predicted_start_pointer, predicted_end_pointer = \ self._answer_layer(encoded_question, question_length, encoded_support, support_length, correct_start, answer2support, is_eval) # no multi paragraph support yet doc_idx = torch.autograd.Variable( torch.zeros(predicted_start_pointer.data.shape[0], out=long_tensor())) span = torch.stack( [doc_idx, predicted_start_pointer, predicted_end_pointer], 1) return start_scores, end_scores, span, support_subjectivity
def append_batch(X, beam_toks, mask): next_pos = X[:, -1:, 1] + 1 next_x = torch.cat((beam_toks.unsqueeze(1), next_pos), -1).unsqueeze(1) next_mask = torch.cat([mask, torch.ones(X.size(0), 1, device=mask.device)], 1) return torch.cat((X, next_x), 1), next_mask
epoch_cost3 = [] num_minibatches = int(n_sampE / mb_size) for i, (dataE, dataM, dataC, target) in enumerate(trainLoader): flag = 0 AutoencoderE.train() AutoencoderM.train() AutoencoderC.train() Clas.train() if torch.mean(target)!=0. and torch.mean(target)!=1.: ZEX = AutoencoderE(dataE) ZMX = AutoencoderM(dataM) ZCX = AutoencoderC(dataC) ZT = torch.cat((ZEX, ZMX, ZCX), 1) ZT = F.normalize(ZT, p=2, dim=0) Pred = Clas(ZT) Triplets = TripSel2(ZT, target) loss = lam * trip_criterion(ZT[Triplets[:,0],:],ZT[Triplets[:,1],:],ZT[Triplets[:,2],:]) + C_loss(Pred,target.view(-1,1)) y_true = target.view(-1,1) y_pred = Pred AUC = roc_auc_score(y_true.detach().numpy(),y_pred.detach().numpy()) solverE.zero_grad() solverM.zero_grad() solverC.zero_grad() SolverClass.zero_grad()
def batch_norm(self, bn, x1, x2): # h = bn(torch.cat((x1, x2), dim=1)) # std = np.sqrt(bn.running_var.clone().cpu().numpy()) # return h, std return bn(torch.cat((x1, x2), dim=1))
def forward(self, state, action): xs = F.relu(self.fcs1(state)) x = torch.cat((xs, action), dim=1) x = F.relu(self.fc2(x)) x = F.relu(self.fc3(x)) return self.fc4(x)
def train(model, bs, max_epoch, optimizer1, optimizer2, optimizer3, reg, qonly, observe, command, filename, uf, use_useremb): model.train() lsigmoid = nn.LogSigmoid() reg_float = float(reg.data.cpu().numpy()[0]) for epoch in range(max_epoch): # _______ Do the evaluation _______ if epoch % observe == 0 and epoch > 0: print('Evaluating on item prediction') evaluate_7(model, epoch, filename) print('Evaluating on feature similarity') evaluate_8(model, epoch, filename) tt = time.time() pickle_file_path = '../../data/{}/v1-speed-train-{}.pickle'.format(dir1, epoch) if uf == 1: pickle_file_path = '../../data/{}/v1-speed-train-{}.pickle'.format(dir1, epoch + 30) with open(pickle_file_path, 'rb') as f: pickle_file = pickle.load(f) print('Open pickle file: {} takes {} seconds'.format(pickle_file_path, time.time() - tt)) pickle_file_length = len(pickle_file[0]) model.train() mix = list(zip(pickle_file[0], pickle_file[1], pickle_file[2], pickle_file[3], pickle_file[4])) random.shuffle(mix) I, II, III, IV, V = zip(*mix) new_pk_file = [I, II, III, IV, V] start = time.time() print('Starting {} epoch'.format(epoch)) epoch_loss = 0 epoch_loss_2 = 0 max_iter = int(pickle_file_length / float(bs)) for iter_ in range(max_iter): if iter_ > 1 and iter_ % 100 == 0: print('--') print('Takes {} seconds to finish {}% of this epoch'.format(str(time.time() - start), float(iter_) * 100 / max_iter)) print('loss is: {}'.format(float(epoch_loss) / (bs * iter_))) print('iter_:{} Bias grad norm: {}, Static grad norm: {}, Preference grad norm: {}'.format(iter_,torch.norm(model.Bias.grad),torch.norm(model.ui_emb.weight.grad),torch.norm(model.feature_emb.weight.grad))) pos_list, pos_list2, neg_list, neg_list2, new_neg_list, new_neg_list2, preference_list_1, preference_list_new, index_none, residual_feature, neg_feature \ = translate_pickle_to_data(new_pk_file, iter_, bs, pickle_file_length, uf) optimizer1.zero_grad() optimizer2.zero_grad() result_pos, feature_bias_matrix_pos, nonzero_matrix_pos = model(pos_list, pos_list2, preference_list_1) # (bs, 1), (bs, 2, 1), (bs, 2, emb_size) result_neg, feature_bias_matrix_neg, nonzero_matrix_neg = model(neg_list, neg_list2, preference_list_1) diff = (result_pos - result_neg) loss = - lsigmoid(diff).sum(dim=0) # The Minus is crucial is if command in [8]: # The second type of negative sample new_result_neg, new_feature_bias_matrix_neg, new_nonzero_matrix_neg = model(new_neg_list, new_neg_list2, preference_list_new) # Reason for this is that, sometimes the sample is missing, so we have to also omit that in result_pos T = cuda_(torch.tensor([])) for i in range(bs): if i in index_none: continue T = torch.cat([T, result_pos[i]], dim=0) T = T.view(T.shape[0], -1) assert T.shape[0] == new_result_neg.shape[0] diff = T - new_result_neg if loss is not None: loss += - lsigmoid(diff).sum(dim=0) else: loss = - lsigmoid(diff).sum(dim=0) # regularization if reg_float != 0: if qonly != 1: feature_bias_matrix_pos_ = (feature_bias_matrix_pos ** 2).sum(dim=1) # (bs, 1) feature_bias_matrix_neg_ = (feature_bias_matrix_neg ** 2).sum(dim=1) # (bs, 1) nonzero_matrix_pos_ = (nonzero_matrix_pos ** 2).sum(dim=2).sum(dim=1, keepdim=True) # (bs, 1) nonzero_matrix_neg_ = (nonzero_matrix_neg ** 2).sum(dim=2).sum(dim=1, keepdim=True) # (bs, 1) new_nonzero_matrix_neg_ = (new_nonzero_matrix_neg_ ** 2).sum(dim=2).sum(dim=1, keepdim=True) regular_norm = (feature_bias_matrix_pos_ + feature_bias_matrix_neg_ + nonzero_matrix_pos_ + nonzero_matrix_neg_ + new_nonzero_matrix_neg_) loss += (reg * regular_norm).sum(dim=0) else: nonzero_matrix_pos_ = (nonzero_matrix_pos ** 2).sum(dim=2).sum(dim=1, keepdim=True) nonzero_matrix_neg_ = (nonzero_matrix_neg ** 2).sum(dim=2).sum(dim=1, keepdim=True) loss += (reg * nonzero_matrix_pos_).sum(dim=0) loss += (reg * nonzero_matrix_neg_).sum(dim=0) epoch_loss += loss.data loss.backward() optimizer1.step() optimizer2.step() if uf == 1: # updating feature embedding # we try to optimize A = model.feature_emb(preference_list_1)[..., :-1] user_emb = model.ui_emb(pos_list[:, 0])[..., :-1].unsqueeze(dim=1).detach() if use_useremb == 1: A = torch.cat([A, user_emb], dim=1) B = model.feature_emb(residual_feature)[..., :-1] C = model.feature_emb(neg_feature)[..., :-1] D = torch.matmul(A, B.transpose(2, 1)) E = torch.matmul(A, C.transpose(2, 1)) p_vs_residual = D.view(D.shape[0], -1, 1) p_vs_neg = E.view(E.shape[0], -1, 1) p_vs_residual = p_vs_residual.sum(dim=1) p_vs_neg = p_vs_neg.sum(dim=1) diff = (p_vs_residual - p_vs_neg) temp = - lsigmoid(diff).sum(dim=0) loss = temp epoch_loss_2 += temp.data if iter_ % 1000 == 0 and iter_ > 0: print('2ND iter_:{} preference grad norm: {}'.format(iter_, torch.norm(model.feature_emb.weight.grad))) print('2ND loss is: {}'.format(float(epoch_loss_2) / (bs * iter_))) optimizer3.zero_grad() loss.backward() optimizer3.step() # These line is to make an alert on console when we meet gradient explosion. if iter_ > 0 and iter_ % 1 == 0: if torch.norm(model.ui_emb.weight.grad) > 100 or torch.norm(model.feature_emb.weight.grad) > 500: print('iter_:{} Bias grad norm: {}, F-bias grad norm: {}, F-embedding grad norm: {}'.format(iter_,torch.norm(model.Bias.grad),torch.norm(model.ui_emb.weight.grad),torch.norm(model.feature_emb.weight.grad))) # Uncomment this to use clip gradient norm (but currently we don't need) # clip_grad_norm_(model.ui_emb.weight, 5000) # clip_grad_norm_(model.feature_emb.weight, 5000) print('epoch loss: {}'.format(epoch_loss / pickle_file_length)) print('epoch loss 2: {}'.format(epoch_loss_2 / pickle_file_length)) if epoch % 1 == 0: PATH = '../../data/FM-model-merge/' + filename + 'epoch-{}.pt'.format(epoch) torch.save(model.state_dict(), PATH) print('Model saved at {}'.format(PATH)) PATH = '../../data/FM-log-merge/' + filename + '.txt' with open(PATH, 'a') as f: f.write('Starting {} epoch\n'.format(epoch)) f.write('training loss 1: {}\n'.format(epoch_loss / len(train_list))) f.write('training loss 2: {}\n'.format(epoch_loss_2 / len(train_list)))
def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, labels=()): """Performs Non-Maximum Suppression (NMS) on inference results Returns: detections with shape: nx6 (x1, y1, x2, y2, conf, cls) """ nc = prediction.shape[2] - 5 # number of classes xc = prediction[..., 4] > conf_thres # candidates # Settings min_wh, max_wh = 2, 4096 # (pixels) minimum and maximum box width and height max_det = 300 # maximum number of detections per image max_nms = 30000 # maximum number of boxes into torchvision.ops.nms() time_limit = 10.0 # seconds to quit after redundant = True # require redundant detections multi_label = nc > 1 # multiple labels per box (adds 0.5ms/img) merge = False # use merge-NMS t = time.time() output = [torch.zeros((0, 6), device=prediction.device)] * prediction.shape[0] for xi, x in enumerate(prediction): # image index, image inference # Apply constraints # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height x = x[xc[xi]] # confidence # Cat apriori labels if autolabelling if labels and len(labels[xi]): l = labels[xi] v = torch.zeros((len(l), nc + 5), device=x.device) v[:, :4] = l[:, 1:5] # box v[:, 4] = 1.0 # conf v[range(len(l)), l[:, 0].long() + 5] = 1.0 # cls x = torch.cat((x, v), 0) # If none remain process next image if not x.shape[0]: continue # Compute conf x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf # Box (center x, center y, width, height) to (x1, y1, x2, y2) box = xywh2xyxy(x[:, :4]) # Detections matrix nx6 (xyxy, conf, cls) if multi_label: i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1) else: # best class only conf, j = x[:, 5:].max(1, keepdim=True) x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres] # Filter by class if classes is not None: x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)] # Apply finite constraint # if not torch.isfinite(x).all(): # x = x[torch.isfinite(x).all(1)] # Check shape n = x.shape[0] # number of boxes if not n: # no boxes continue elif n > max_nms: # excess boxes x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence # Batched NMS c = x[:, 5:6] * (0 if agnostic else max_wh) # classes boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS if i.shape[0] > max_det: # limit detections i = i[:max_det] if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean) # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4) iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix weights = iou * scores[None] # box weights x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes if redundant: i = i[iou.sum(1) > 1] # require redundancy output[xi] = x[i] if (time.time() - t) > time_limit: print(f'WARNING: NMS time limit {time_limit}s exceeded') break # time limit exceeded return output
def forward(self, noise, labels, code): gen_input = torch.cat((noise, labels, code), -1) out = self.l1(gen_input) out = out.view(out.shape[0], 128, self.init_size, self.init_size) img = self.conv_blocks(out) return img
def forward(self, x): x = self.avg(x) return torch.cat((x, x.mul(0)), 1)
def forward(self, x): for b in self.block: y = b(x) # concat in channels x = torch.cat((x, y), dim=1) return x
# Predict y_predicted = esn(inputs) # Plot plt.plot(labels[0].numpy(), 'r') plt.plot(y_predicted[0].numpy(), 'g') plt.title(sfgram_dataset.last_text) plt.show() # Add to y and ^y if i == 0: total_predicted = y_predicted total_labels = labels else: total_predicted = torch.cat((total_predicted, y_predicted), dim=1) total_labels = torch.cat((total_labels, labels), dim=1) # end if # Total total += 1.0 # end for # Outputs stats print(u"Min : {}".format(torch.min(total_predicted))) print(u"Max : {}".format(torch.max(total_predicted))) print(u"Mean : {}".format(torch.mean(total_predicted))) print(u"Std : {}".format(torch.std(total_predicted))) # Save result xp.add_result(0)
def forward(self, input): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) # the first set of _num_anchors channels are bg probs # the second set are the fg probs ''' input is a tuple: input[0]: rpn_cls_prob.data input[1]: rpn_bbox_pred.data input[2]: im_info input[3]: cfg_key (TRAIN/TEST) ''' scores = input[0][:, self._num_anchors:, :, :] # scores for fg. bbox_deltas = input[1] # offsets BCHW, where C=9*4 im_info = input[2] cfg_key = input[3] pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH min_size = cfg[cfg_key].RPN_MIN_SIZE batch_size = bbox_deltas.size(0) feat_height, feat_width = scores.size(2), scores.size(3) shift_x = np.arange(0, feat_width) * self._feat_stride shift_y = np.arange(0, feat_height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = torch.from_numpy(np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose()) shifts = shifts.contiguous().type_as(scores).float() A = self._num_anchors K = shifts.size(0) self._anchors = self._anchors.type_as(scores) # anchors = self._anchors.view(1, A, 4) + shifts.view(1, K, 4).permute(1, 0, 2).contiguous() anchors = self._anchors.view(1, A, 4) + shifts.view(K, 1, 4) anchors = anchors.view(1, K * A, 4).expand(batch_size, K * A, 4) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: bbox_deltas = bbox_deltas.permute(0, 2, 3, 1).contiguous()#BCHW->BHWC bbox_deltas = bbox_deltas.view(batch_size, -1, 4) #B(HW9)4 # Same story for the scores: scores = scores.permute(0, 2, 3, 1).contiguous() #BCHW->BHWC scores = scores.view(batch_size, -1) #B(HW9) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info, batch_size) # proposals = clip_boxes_batch(proposals, im_info, batch_size) # assign the score to 0 if it's non keep. # keep = self._filter_boxes(proposals, min_size * im_info[:, 2]) # trim keep index to make it euqal over batch # keep_idx = torch.cat(tuple(keep_idx), 0) # scores_keep = scores.view(-1)[keep_idx].view(batch_size, trim_size) # proposals_keep = proposals.view(-1, 4)[keep_idx, :].contiguous().view(batch_size, trim_size, 4) # _, order = torch.sort(scores_keep, 1, True) scores_keep = scores proposals_keep = proposals _, order = torch.sort(scores_keep, 1, True) output = scores.new(batch_size, post_nms_topN, 5).zero_() for i in range(batch_size): # # 3. remove predicted boxes with either height or width < threshold # # (NOTE: convert min_size to input image scale stored in im_info[2]) proposals_single = proposals_keep[i] scores_single = scores_keep[i] # # 4. sort all (proposal, score) pairs by score from highest to lowest # # 5. take top pre_nms_topN (e.g. 6000) order_single = order[i] if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel(): order_single = order_single[:pre_nms_topN] proposals_single = proposals_single[order_single, :] scores_single = scores_single[order_single].view(-1,1) # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep_idx_i = nms(torch.cat((proposals_single, scores_single), 1), nms_thresh, force_cpu=not cfg.USE_GPU_NMS) keep_idx_i = keep_idx_i.long().view(-1) if post_nms_topN > 0: keep_idx_i = keep_idx_i[:post_nms_topN] proposals_single = proposals_single[keep_idx_i, :] scores_single = scores_single[keep_idx_i, :] # padding 0 at the end. num_proposal = proposals_single.size(0) output[i,:,0] = i output[i,:num_proposal,1:] = proposals_single return output
def predict(self, each_cont): each_trend, each_ori_trend, metadata = each_cont input_seq, output_seq = each_trend if self.conf["dataset"] == "FIT": seq_id, each_grp, each_ele, each_norm, each_city, each_gender, each_age = metadata city_id = each_city.squeeze(1) # [batch_size] gender_id = each_gender.squeeze(1) # [batch_size] age_id = each_age.squeeze(1) # [batch_size] city_embed = self.city_embeds(city_id) #[batch_size, feat_size] gender_embed = self.gender_embeds( gender_id) #[batch_size, feat_size] age_embed = self.age_embeds(age_id) #[batch_size, feat_size] grp_embed = self.city_gender_age_agg( torch.cat([city_embed, gender_embed, age_embed], dim=1)) else: seq_id, each_grp, each_ele, each_norm = metadata grp_id = each_grp.squeeze(1) # [batch_size] grp_embed = self.grp_embeds(grp_id) #[batch_size, feat_size] ele_id = each_ele.squeeze(1) # [batch_size] ori_ele_embed = self.ele_embeds(ele_id) #[batch_size, feat_size] # affiliation relation if self.conf["ext_kg"] is True: #ele_embed_prop_1 = torch.matmul(self.adj, self.ele_embeds.weight) # [ele_num, feat_size] #ele_embed_prop_2 = torch.matmul(self.adj_2, self.ele_embeds.weight) # [ele_num, feat_size] #ele_embed_prop_1 = ele_embed_prop_1[ele_id, :] #[batch_size, feat_size] #ele_embed_prop_2 = ele_embed_prop_2[ele_id, :] #[batch_size, feat_size] #ele_embed = self.agg_prop(torch.cat([ori_ele_embed, ele_embed_prop_1, ele_embed_prop_2], dim=1)) ele_embed_prop_1 = torch.matmul( self.adj, self.ele_embeds.weight) # [ele_num, feat_size] ele_embed_prop_1 = ele_embed_prop_1[ ele_id, :] #[batch_size, feat_size] ele_embed = ori_ele_embed + ele_embed_prop_1 else: ele_embed = ori_ele_embed enc_time_embed = self.time_embeds( input_seq[:, :, 0].long()) #[batch_size, enc_seq_len, feat_size] dec_time_embed = self.time_embeds( output_seq[:, :, 0].long()) #[batch_size, dec_seq_len, feat_size] # encode part: # input_seq: [batch_size, enc_seq_len, 2] enc_seq_len = input_seq.shape[1] enc_grp_embed = grp_embed.unsqueeze(1).expand( -1, enc_seq_len, -1) #[batch_size, enc_seq_len, feat_size] enc_ele_embed = ele_embed.unsqueeze(1).expand( -1, enc_seq_len, -1) #[batch_size, enc_seq_len, feat_size] if self.conf["use_grp_embed"] is True: enc_input_seq = torch.cat( [ enc_grp_embed, enc_ele_embed, enc_time_embed, input_seq[:, :, 1].unsqueeze(-1) ], dim=-1) #[batch_size, enc_seq_len, enc_input_size] else: enc_input_seq = torch.cat( [ enc_ele_embed, enc_time_embed, input_seq[:, :, 1].unsqueeze(-1) ], dim=-1) #[batch_size, enc_seq_len, enc_input_size] enc_input_seq = enc_input_seq.permute( 1, 0, 2) #[enc_seq_len, batch_size, enc_input_size] enc_outputs, (enc_hidden, enc_c) = self.encoder( enc_input_seq ) #outputs: [seq_len, batch_size, hidden_size], hidden: [1, batch_size, hidden_size] enc_grd = input_seq[:, 1:, 1] #[batch_size, enc_seq_len-1] enc_output_feat = enc_outputs.permute( 1, 0, 2)[:, 1:, :] #[batch_size, enc_seq_len-1, hidden_size] enc_pred = self.enc_linear(enc_output_feat).squeeze( -1) #[batch_size, enc_seq_len-1] # decode part: # output_seq: [batch_size, dec_seq_len, 2] dec_seq_len = output_seq.shape[1] dec_grp_embed = grp_embed.unsqueeze(1).expand( -1, dec_seq_len, -1) #[batch_size, dec_seq_len, feat_size] dec_ele_embed = ele_embed.unsqueeze(1).expand( -1, dec_seq_len, -1) #[batch_size, dec_seq_len, feat_size] if self.conf["use_grp_embed"] is True: dec_input_seq = torch.cat( [dec_grp_embed, dec_ele_embed, dec_time_embed], dim=-1) #[batch_size, dec_seq_len, dec_input_size] else: dec_input_seq = torch.cat( [dec_ele_embed, dec_time_embed], dim=-1) #[batch_size, dec_seq_len, dec_input_size] dec_input_seq = dec_input_seq.permute( 1, 0, 2) #[dec_seq_len, batch_size, dec_input_size] dec_init_hidden = enc_hidden.expand(2, -1, -1) #[2, batch_size, hidden_size] dec_init_c = enc_c.expand(2, -1, -1) #[2, batch_size, hidden_size] dec_output_feat, _ = self.decoder( dec_input_seq, (dec_init_hidden.contiguous(), dec_init_c.contiguous() )) #outputs: [seq_len, batch_size, hidden_size*2] dec_output_feat = dec_output_feat.permute( 1, 0, 2) # [batch_size, seq_len, hidden_size*2] dec_grd = output_seq[:, :, 1] #[batch_size, dec_seq_len] dec_pred = self.dec_linear(dec_output_feat).squeeze( -1) #[batch_size, dec_seq_len] enc_loss = self.loss_function(enc_pred, enc_grd) dec_loss = self.loss_function(dec_pred, dec_grd) return enc_loss, dec_loss, dec_pred, enc_hidden.squeeze( 0 ), enc_output_feat, dec_output_feat # [batch_size, hidden_size], [batch_size, enc_seq_len-1, hidden_size], [batch_size, seq_len, hidden_size*2]
def forward(self, input_p_q, label_p): batch_size = input_p_q.shape[0] # get inputs without target series inputs = input_p_q[:, :self.T_enco, list(range(0, self.output_column)) + list(range(self.output_column + 1, 18))] labels_p = label_p[:, :self.T_enco] # # Spatial attention phase 1 h1 = torch.zeros(batch_size, self.n_hid, device=device) c1 = torch.zeros(batch_size, self.n_hid, device=device) mid_output = torch.zeros(batch_size, self.T_enco, self.n_hid, device=device) for i in range(0, self.T_enco): atte_score_i = self.Ve1( self.Tanh( self.We1(torch.cat([h1, c1], dim=1)).repeat( self.n_inp - 1, 1, 1).permute(1, 0, 2) + self.Ue1(inputs.transpose(1, 2)))).squeeze() inputs_i = torch.mul(inputs[:, i, :], atte_score_i.softmax(dim=1)) h1, c1 = self.encoder1(inputs_i, (h1, c1)) mid_output[:, i, :] = h1 # Spatial attention phase 2 mid_output = torch.cat([mid_output, labels_p.unsqueeze(2)], dim=2) h = torch.zeros(batch_size, self.n_hid, device=device) c = torch.zeros(batch_size, self.n_hid, device=device) final_output = torch.zeros(batch_size, self.T_enco, self.n_hid, device=device) for i in range(0, self.T_enco): atte_score_i = self.Ve2( self.Tanh( self.We2(torch.cat([h, c], dim=1)).repeat( self.n_hid + 1, 1, 1).permute(1, 0, 2) + self.Ue2(mid_output.transpose(1, 2)))).squeeze() inputs_i2 = torch.mul(mid_output[:, i, :], atte_score_i.softmax(dim=1)) h, c = self.encoder2(inputs_i2, (h, c)) final_output[:, i, :] = h # Temporal attention hi = torch.zeros(batch_size, self.h_hid, device=device) ci = torch.zeros(batch_size, self.h_hid, device=device) decode_output = [] for i_decoder in range(0, self.T_deco + 6): atte_score_2_x = self.Wd(torch.cat([hi, ci], dim=1)).repeat(self.T_enco, 1, 1).transpose(0, 1) + \ self.Ud(final_output) atte_score_2 = self.Vd(self.Tanh(atte_score_2_x)) # torch.Size([B, 40, 1]) decoder_in = torch.bmm( atte_score_2.transpose(1, 2).softmax(dim=2), final_output) # [B * 1 * 64] = [B * 1 * 40] * [B * 40 * 64] hi, ci = self.decoder(decoder_in.squeeze(), (hi, ci)) decode_output += [hi.unsqueeze(1)] # regressor out = torch.cat(decode_output, dim=1) out = self.regressor(out) out = out[:, -self.T_deco:, :] return out.squeeze()
def forward(self, h_states, seq_start_end, end_pos): """ Inputs: - h_states: Tesnsor of shape (num_layers, batch, h_dim) - seq_start_end: A list of tuples which delimit sequences within batch. - end_pos: Absolute end position of obs_traj (batch, 2) Output: - pool_h: Tensor of shape (batch, h_dim) """ pool_h = [] for _, (start, end) in enumerate(seq_start_end): start = start.item() end = end.item() num_ped = end - start grid_size = self.grid_size * self.grid_size curr_hidden = h_states.view(-1, self.h_dim)[start:end] curr_hidden_repeat = curr_hidden.repeat(num_ped, 1) curr_end_pos = end_pos[start:end] curr_pool_h_size = (num_ped * grid_size) + 1 curr_pool_h = curr_hidden.new_zeros((curr_pool_h_size, self.h_dim)) # curr_end_pos = curr_end_pos.data top_left, bottom_right = self.get_bounds(curr_end_pos) # Repeat position -> P1, P2, P1, P2 curr_end_pos = curr_end_pos.repeat(num_ped, 1) # Repeat bounds -> B1, B1, B2, B2 top_left = self.repeat(top_left, num_ped) bottom_right = self.repeat(bottom_right, num_ped) grid_pos = self.get_grid_locations( top_left, curr_end_pos).type_as(seq_start_end) # Make all positions to exclude as non-zero # Find which peds to exclude x_bound = (curr_end_pos[:, 0] >= bottom_right[:, 0]) + ( curr_end_pos[:, 0] <= top_left[:, 0]) y_bound = (curr_end_pos[:, 1] >= top_left[:, 1]) + ( curr_end_pos[:, 1] <= bottom_right[:, 1]) within_bound = x_bound + y_bound within_bound[0::num_ped + 1] = 1 # Don't include the ped itself within_bound = within_bound.view(-1) # This is a tricky way to get scatter add to work. Helps me avoid a # for loop. Offset everything by 1. Use the initial 0 position to # dump all uncessary adds. grid_pos += 1 total_grid_size = self.grid_size * self.grid_size offset = torch.arange(0, total_grid_size * num_ped, total_grid_size).type_as(seq_start_end) offset = self.repeat(offset.view(-1, 1), num_ped).view(-1) grid_pos += offset grid_pos[within_bound != 0] = 0 grid_pos = grid_pos.view(-1, 1).expand_as(curr_hidden_repeat) curr_pool_h = curr_pool_h.scatter_add(0, grid_pos, curr_hidden_repeat) curr_pool_h = curr_pool_h[1:] pool_h.append(curr_pool_h.view(num_ped, -1)) pool_h = torch.cat(pool_h, dim=0) pool_h = self.mlp_pool(pool_h) return pool_h
def dqn_learing(d, output_path=None, save_or_plot="plot"): ############### # BUILD MODEL # ############### # 读入参数,不用locals()是因为我希望所有的param都有据可依 batch_size = d["batch_size"] learning_starts = d["learning_starts"] learning_freq = d["learning_freq"] target_update_freq = d["target_update_freq"] # num_actor = d["num_actor"] action_enum = d["action_enum"] num_layer = d["num_layer"] layer_size = d["layer_size"] gamma = d["gamma"] log_every_n_steps = d["log_every_n_steps"] ub = d["use_batchnorm"] dropout = d["dropout"] total_step = d["total_step"] env = Env(d, "train") num_actor = env.num_actor print("Start Training") print(f"grid bus size: {env.wrapper.net['bus'].shape[0]}") print(f"grid line size: {env.wrapper.net['line'].shape[0]}") print("") print(f"device : {device}") print(f"num_sample: {env.num_total_network}") print(f"total_step: {total_step}") print(f"learning_starts {learning_starts}") print(f"log_every_n_steps: {log_every_n_steps}") print(f"target_update_freq: {target_update_freq}") print("") print(f"actor: {d['actor']}") print(f"action attribute: {d['action_attribute']}") print(f"observer: {d['observer']}") print(f"observe attribute: {d['observe_attribute']}") print(f"target: {d['target']}") print(f"target attribute: {d['target_attribute']}") print("\n") # Initialize target q function and q function Q = DQN(env.num_observation, len(action_enum), num_actor, num_layer, layer_size, ub, dropout).to(device) target_Q = DQN(env.num_observation, len(action_enum), num_actor, num_layer, layer_size, ub, dropout).to(device) optimizer = env.create_optim(Q.parameters()) exploration = env.create_explor_schedule() optim_scheduler = env.create_optim_scheduler(optimizer) df_res = env.create_df_res() replay_buffer = env.create_replay_buffer() ############### # RUN ENV # ############### explor_value = 'None' mean_episode_reward = -float('nan') best_mean_episode_reward = -float('inf') last_obs = env.initial_run() time_point = time.time() for t in count(): if optim_scheduler: optim_scheduler.step() env.num_step = t if env.stopping_criterion(): break # 初始化batch norm # 这个是因为batch norm刚开始是不知道mean和var的 # 而推测动作是需要一个mean和var来输给eval的 if t == learning_starts and ub: input_ = torch.FloatTensor(replay_buffer.obs[:learning_starts+1]).to(device) input_ = Q.bn(input_) ### Step the env and store the transition # Store lastest observation in replay memory and last_idx can be used to store action, reward, done last_idx = replay_buffer.store_obs(last_obs) # Choose random action if not yet start learning if t > learning_starts: explor_value = exploration.value(t) action = env.select_epilson_greedy_action(Q, last_obs, explor_value, device) else: action = env.rand_action(num_actor, len(action_enum)) # Advance one step last_obs, reward, done = env.step(action) replay_buffer.store_result(last_idx, action.squeeze(), reward, done) if done: last_obs = env.reset() ### Perform experience replay and train the network. if (t > learning_starts and t % learning_freq == 0 and replay_buffer.can_sample(batch_size)): # Use the replay buffer to sample a batch of transitions # Note: done_mask[i] is 1 if the next state corresponds to the end of an episode, # in which case there is no Q-value at the next state; at the end of an # episode, only the current state reward contributes to the target obs_batch, act_batch, rew_batch, next_obs_batch, done_mask = replay_buffer.sample(batch_size) # 看是不是gpu版本 obs_batch = torch.from_numpy(obs_batch).to(device) act_batch = torch.from_numpy(act_batch).to(device) rew_batch = torch.from_numpy(rew_batch).to(device) next_obs_batch = torch.from_numpy(next_obs_batch).to(device) not_done_mask = torch.from_numpy(1 - done_mask).to(device) # Compute current Q value, q_func takes only state and output value for every state-action pair # 把Q value中对应实际动作的那部分挑出来,并整理shape current_Q_values = Q(obs_batch).gather(-1, env.onehot_decode(act_batch).unsqueeze(-1)) # 挑出q值中最大的那个,映射到没有结束的调度序列中 # Detach, 因为next Q的梯度是不希望传播的 next_max_q = target_Q(next_obs_batch).detach().max(2)[0] next_Q_values = not_done_mask.unsqueeze(1) * next_max_q # Compute the target of the current Q values rew_batch = rew_batch.unsqueeze(1) # 拼接reward让其可以对应二维输出 rew_batch_resize = torch.cat(tuple(rew_batch for _ in range(num_actor)), 1) # rew_batch_resize = torch.cat((rew_batch, rew_batch, rew_batch, rew_batch), 1) target_Q_values = rew_batch_resize + (gamma * next_Q_values) # Compute Bellman error bellman_error = target_Q_values - current_Q_values.squeeze() # clip the bellman error between [-1 , 1] # 其实这里是不需要的,不过留着吧 clipped_bellman_error = bellman_error.clamp(-1, 1) d_error = clipped_bellman_error * -1.0 optimizer.zero_grad() current_Q_values.backward(d_error.unsqueeze(2).data) optimizer.step() if t % target_update_freq == 0: target_Q.load_state_dict(Q.state_dict()) ### 4. Log progress and keep track of statistics # 初始化log info if not "best_mean_reward" in locals(): best_mean_reward = -1 df_res.loc[t, :] = [t, env.num_episode, reward, explor_value] if (t + 1) % log_every_n_steps == 0: time_used = time.time() - time_point time_point = time.time() lr = optimizer.param_groups[0]['lr'] # 输出信息 print(f"Timestep: {t}") print(f"Episodes: {env.num_episode}") print(f"Time Consumption: {time_used:.2f} s") mean_steps_episode, mean_reward_episode = env.cal_epi_reward(df_res, env.num_episode) best_mean_reward = max(best_mean_reward, mean_reward_episode) print(f"Mean Reward ({log_every_n_steps} episodes): {mean_reward_episode:.2f}") print(f"Best Mean Reward: {best_mean_reward:.2f}") if t > learning_starts: print(f"Exploration: {explor_value:.2f}") print(f"Learning Rate: {lr}") print("") # 过一遍测试集,看accu if d["do_test"]: env_test = Env(d, "test") Q.eval() buf_reward, buf_step = 0, 0 last_obs = env_test.initial_run() for t_test in count(): env_test.num_step = t_test if env_test.stopping_criterion(): break action = env_test.select_epilson_greedy_action(Q, last_obs, 0, device) last_obs, reward, done = env_test.step(action) if done == True: last_obs = env_test.reset() buf_reward += reward Q.train() print(f"Test Scord: {buf_reward/d['num_test_case']}") print("\n") # 结束 print("Learning Finished") if not output_path: output_path = "res" csv_name = output_path + ".csv" png_name = output_path + ".png" df_res.to_csv(csv_name, index=False) epi_x = range(100, df_res.iloc[-1]["Episode"], 100) epi_reward = [env.cal_epi_reward(df_res, ele)[1] for ele in epi_x] plt.plot(epi_x, epi_reward) if save_or_plot == "plot": plt.show() elif save_or_plot == "save": plt.savefig(png_name) plt.clf() print([env.wrapper.net[ele][attr] for ele, attr in zip(d['actor'], d['action_attribute'])]) print([env.wrapper.net[ele][attr] for ele, attr in zip(d['target'], d['target_attribute'])])
def _forward_loss( self, state: Dict[str, torch.Tensor], target_tokens: Dict[str, torch.LongTensor] ) -> Dict[str, torch.Tensor]: """ Make forward pass during training or do greedy search during prediction. Notes ----- We really only use the predictions from the method to test that beam search with a beam size of 1 gives the same results. """ # shape: (batch_size, max_input_sequence_length, encoder_output_dim) encoder_outputs = state["encoder_outputs"] # shape: (batch_size, max_input_sequence_length) source_mask = state["source_mask"] # shape: (batch_size, max_target_sequence_length) targets = target_tokens["tokens"] # Prepare embeddings for targets. They will be used as gold embeddings during decoder training # shape: (batch_size, max_target_sequence_length, embedding_dim) target_embedding = self.target_embedder(targets) # shape: (batch_size, max_target_batch_sequence_length) target_mask = util.get_text_field_mask(target_tokens) if self._scheduled_sampling_ratio == 0 and self._decoder_net.decodes_parallel: _, decoder_output = self._decoder_net( previous_state=state, previous_steps_predictions=target_embedding[:, :-1, :], encoder_outputs=encoder_outputs, source_mask=source_mask, previous_steps_mask=target_mask[:, :-1], ) # shape: (group_size, max_target_sequence_length, num_classes) logits = self._output_projection_layer(decoder_output) else: batch_size = source_mask.size()[0] _, target_sequence_length = targets.size() # The last input from the target is either padding or the end symbol. # Either way, we don't have to process it. num_decoding_steps = target_sequence_length - 1 # Initialize target predictions with the start index. # shape: (batch_size,) last_predictions = source_mask.new_full( (batch_size,), fill_value=self._start_index ) # shape: (steps, batch_size, target_embedding_dim) steps_embeddings = torch.Tensor([]) step_logits: List[torch.Tensor] = [] for timestep in range(num_decoding_steps): if ( self.training and torch.rand(1).item() < self._scheduled_sampling_ratio ): # Use gold tokens at test time and at a rate of 1 - _scheduled_sampling_ratio # during training. # shape: (batch_size, steps, target_embedding_dim) state["previous_steps_predictions"] = steps_embeddings # shape: (batch_size, ) effective_last_prediction = last_predictions else: # shape: (batch_size, ) effective_last_prediction = targets[:, timestep] if timestep == 0: state["previous_steps_predictions"] = torch.Tensor([]) else: # shape: (batch_size, steps, target_embedding_dim) state["previous_steps_predictions"] = target_embedding[ :, :timestep ] # shape: (batch_size, num_classes) output_projections, state = self._prepare_output_projections( effective_last_prediction, state ) # list of tensors, shape: (batch_size, 1, num_classes) step_logits.append(output_projections.unsqueeze(1)) # shape (predicted_classes): (batch_size,) _, predicted_classes = torch.max(output_projections, 1) # shape (predicted_classes): (batch_size,) last_predictions = predicted_classes # shape: (batch_size, 1, target_embedding_dim) last_predictions_embeddings = self.target_embedder( last_predictions ).unsqueeze(1) # This step is required, since we want to keep up two different prediction history: gold and real if steps_embeddings.shape[-1] == 0: # There is no previous steps, except for start vectors in ``last_predictions`` # shape: (group_size, 1, target_embedding_dim) steps_embeddings = last_predictions_embeddings else: # shape: (group_size, steps_count, target_embedding_dim) steps_embeddings = torch.cat( [steps_embeddings, last_predictions_embeddings], 1 ) # shape: (batch_size, num_decoding_steps, num_classes) logits = torch.cat(step_logits, 1) # Compute loss. target_mask = util.get_text_field_mask(target_tokens) loss = self._get_loss(logits, targets, target_mask) # TODO: We will be using beam search to get predictions for validation, but if beam size in 1 # we could consider taking the last_predictions here and building step_predictions # and use that instead of running beam search again, if performance in validation is taking a hit output_dict = {"loss": loss} return output_dict
def forward(self, xf, xh_list): decomp_att_list, maps = self.decomp_att(xf, xh_list) decomp_fh_list = [self.conv_fh(torch.cat([xf * decomp_att_list[i+1], xh_list[i]], dim=1)) for i in range(len(xh_list))] return decomp_fh_list, decomp_att_list, maps
def forward(self, x): x = self.relu(x) out = torch.cat([self.conv_1(x), self.conv_2(x[:, :, 1:, 1:])], dim=1) out = self.bn(out) return out
def forward(self, F_dep_hu, hv): huv = self.R_dep(torch.cat([F_dep_hu, hv], dim=1)) return huv
def forward(self, xp, xh, xf, xl): # _, _, th, tw = xp.size() # _, _, h, w = xh.size() # # xh = F.interpolate(xh, (th, tw), mode='bilinear', align_corners=True) # xf = F.interpolate(xf, (th, tw), mode='bilinear', align_corners=True) # feature transform f_node = self.f_conv(xf) p_conv = self.p_conv(xp) p_node_list = list(torch.split(p_conv, self.hidden_dim, dim=1)) h_conv = self.h_conv(xh) h_node_list = list(torch.split(h_conv, self.hidden_dim, dim=1)) bg_node = self.bg_conv(torch.cat([xp, xh, xf], dim=1)) # node supervision bg_cls = self.bg_cls(bg_node) p_cls = self.p_cls(p_conv) h_cls = self.h_cls(h_conv) f_cls = self.f_cls(f_node) f_seg = torch.cat([bg_cls, f_cls], dim=1) h_seg = torch.cat([bg_cls, h_cls], dim=1) p_seg = torch.cat([bg_cls, p_cls], dim=1) f_att_list = list(torch.split(self.softmax(f_seg), 1, dim=1)) h_att_list = list(torch.split(self.softmax(h_seg), 1, dim=1)) p_att_list = list(torch.split(self.softmax(p_seg), 1, dim=1)) # output p_seg = [p_seg] h_seg = [h_seg] f_seg = [f_seg] decomp_fh_att_map = [] decomp_up_att_map = [] decomp_lp_att_map = [] Fdep_att_list = [] # input p_node_list = [p_node_list] h_node_list = [h_node_list] f_node = [f_node] f_att_list = [f_att_list] h_att_list = [h_att_list] p_att_list = [p_att_list] for iter in range(3): p_fea_list_new, h_fea_list_new, f_fea_new, decomp_fh_att_map_new, decomp_up_att_map_new, decomp_lp_att_map_new, Fdep_att_list_new = \ self.gnn(p_node_list[iter], h_node_list[iter], f_node[iter], xp, f_att_list[iter], h_att_list[iter], p_att_list[iter]) # node supervision p_cls_new = self.p_cls(torch.cat(p_fea_list_new, dim=1)) h_cls_new = self.h_cls(torch.cat(h_fea_list_new, dim=1)) f_cls_new = self.f_cls(f_fea_new) f_seg_new = torch.cat([bg_cls, f_cls_new], dim=1) h_seg_new = torch.cat([bg_cls, h_cls_new], dim=1) p_seg_new = torch.cat([bg_cls, p_cls_new], dim=1) p_node_list.append(p_fea_list_new) h_node_list.append(h_fea_list_new) f_node.append(f_fea_new) f_att_list_new = list(torch.split(self.softmax(f_seg_new), 1, dim=1)) h_att_list_new = list(torch.split(self.softmax(h_seg_new), 1, dim=1)) p_att_list_new = list(torch.split(self.softmax(p_seg_new), 1, dim=1)) f_att_list.append(f_att_list_new) h_att_list.append(h_att_list_new) p_att_list.append(p_att_list_new) p_seg.append(p_seg_new) h_seg.append(h_seg_new) f_seg.append(f_seg_new) decomp_fh_att_map.append(decomp_fh_att_map_new) decomp_up_att_map.append(decomp_up_att_map_new) decomp_lp_att_map.append(decomp_lp_att_map_new) Fdep_att_list.append(Fdep_att_list_new) xphf_infer = torch.cat([bg_node] + p_fea_list_new, dim=1) p_seg_final = self.final_cls(xphf_infer, xp, xh, xf, xl) p_seg.append(p_seg_final) return p_seg, h_seg, f_seg, decomp_fh_att_map, decomp_up_att_map, decomp_lp_att_map, Fdep_att_list
def collate_fn(batch): img, label, path, shapes = zip(*batch) # transposed for i, l in enumerate(label): l[:, 0] = i # add target image index for build_targets() return torch.stack(img, 0), torch.cat(label, 0), path, shapes
def forward(self, xh, xp_list, xp_att_list): com_att = sum(xp_att_list) xph_message = sum([self.conv_ch(torch.cat([xh, xp * com_att], dim=1)) for xp in xp_list]) return xph_message
def detect_image(self, image_id, image): self.confidence = 0.01 f = open("./input/detection-results/" + image_id + ".txt", "w") image_shape = np.array(np.shape(image)[0:2]) crop_img = np.array( letterbox_image( image, (self.model_image_size[0], self.model_image_size[1]))) photo = np.array(crop_img, dtype=np.float32) photo /= 255.0 photo = np.transpose(photo, (2, 0, 1)) photo = photo.astype(np.float32) images = [] images.append(photo) images = np.asarray(images) with torch.no_grad(): images = torch.from_numpy(images) if self.cuda: images = images.cuda() outputs = self.net(images) output_list = [] for i in range(2): output_list.append(self.yolo_decodes[i](outputs[i])) output = torch.cat(output_list, 1) batch_detections = non_max_suppression(output, len(self.class_names), conf_thres=self.confidence, nms_thres=self.iou) try: batch_detections = batch_detections[0].cpu().numpy() except: return image top_index = batch_detections[:, 4] * batch_detections[:, 5] > self.confidence top_conf = batch_detections[top_index, 4] * batch_detections[top_index, 5] top_label = np.array(batch_detections[top_index, -1], np.int32) top_bboxes = np.array(batch_detections[top_index, :4]) top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( top_bboxes[:, 0], -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims( top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1) # 去掉灰条 boxes = yolo_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.model_image_size[0], self.model_image_size[1]]), image_shape) for i, c in enumerate(top_label): predicted_class = self.class_names[c] score = str(top_conf[i]) top, left, bottom, right = boxes[i] f.write("%s %s %s %s %s %s\n" % (predicted_class, score[:6], str(int(left)), str( int(top)), str(int(right)), str(int(bottom)))) f.close() return
def make_data(): a_range=torch.arange(1,3,dtype=torch.float,requires_grad=True).view((1,-1)) a_range=torch.cat([a_range,a_range+2],dim=0).unsqueeze(0) a_range=a_range ** 2 return a_range