def bootstrapped_cross_entropy2d(input, target, K, weight=None, size_average=True): batch_size = input.size()[0] def _bootstrap_xentropy_single(input, target, K, weight=None, size_average=True): n, c, h, w = input.size() log_p = F.log_softmax(input, dim=1) log_p = log_p.transpose(1, 2).transpose(2, 3).contiguous().view(-1, c) log_p = log_p[target.view(n * h * w, 1).repeat(1, c) >= 0] log_p = log_p.view(-1, c) mask = target >= 0 target = target[mask] loss = F.nll_loss(log_p, target, weight=weight, ignore_index=250, reduce=False, size_average=False) topk_loss, _ = loss.topk(K) reduced_topk_loss = topk_loss.sum() / K return reduced_topk_loss loss = 0.0 # Bootstrap from each image not entire batch for i in range(batch_size): loss += _bootstrap_xentropy_single(input=torch.unsqueeze(input[i], 0), target=torch.unsqueeze(target[i], 0), K=K, weight=weight, size_average=size_average) return loss / float(batch_size)
def outer(vec1, vec2=None): '''Batch support for vectors outer products. This function is broadcast-able, so you can provide batched vec1 or batched vec2 or both. Args: vec1: A vector of size (Batch, Size1). vec2: A vector of size (Batch, Size2) if vec2 is None, vec2 = vec1. Returns: The outer product of vec1 and vec2 (Batch, Size1, Size2). ''' if vec2 is None: vec2 = vec1 if len(vec1.size()) == 1 and len(vec2.size()) == 1: return torch.ger(vec1, vec2) else: # batch outer product if len(vec1.size()) == 1: vec1 = torch.unsqueeze(vec1, 0) if len(vec2.size()) == 1: vec2 = torch.unsqueeze(vec2, 0) vec1 = torch.unsqueeze(vec1, -1) vec2 = torch.unsqueeze(vec2, -2) if vec1.size(0) == vec2.size(0): return torch.bmm(vec1, vec2) else: return vec1.matmul(vec2)
def ycrcb_to_rgb_torch(input_tensor, delta = 0.5): y, cr, cb = input_tensor[:,0,:,:], input_tensor[:,1,:,:], input_tensor[:,2,:,:] r = torch.unsqueeze(y + 1.403 * (cr - delta), 1) g = torch.unsqueeze(y - 0.714 * (cr - delta) - 0.344 * (cb - delta), 1) b = torch.unsqueeze(y + 1.773 * (cb - delta), 1) return torch.cat([r, g, b], 1)
def _morph_face(self, face, expresion): face = torch.unsqueeze(self._transform(Image.fromarray(face)), 0) expresion = torch.unsqueeze(torch.from_numpy(expresion/5.0), 0) test_batch = {'real_img': face, 'real_cond': expresion, 'desired_cond': expresion, 'sample_id': torch.FloatTensor(), 'real_img_path': []} self._model.set_input(test_batch) imgs, _ = self._model.forward(keep_data_for_visuals=False, return_estimates=True) return imgs['concat']
def forward(self, image_feat_variable, input_question_variable, input_answers=None, **kwargs): question_embeddings = [] for q_model in self.question_embedding_models: q_embedding = q_model(input_question_variable) question_embeddings.append(q_embedding) question_embedding = torch.cat(question_embeddings, dim=1) if isinstance(image_feat_variable, list): image_embeddings = [] for idx, image_feat in enumerate(image_feat_variable): ques_embedding_each = torch.unsqueeze( question_embedding[idx, :], 0) image_feat_each = torch.unsqueeze(image_feat, dim=0) attention_each = self.image_attention_model( image_feat_each, ques_embedding_each) image_embedding_each = torch.sum( attention_each * image_feat, dim=1) image_embeddings.append(image_embedding_each) image_embedding = torch.cat(image_embeddings, dim=0) else: attention = self.image_attention_model( image_feat_variable, question_embedding) image_embedding = torch.sum(attention * image_feat_variable, dim=1) joint_embedding = self.nonLinear_question( question_embedding) * self.nonLinear_image(image_embedding) logit_res = self.classifier(joint_embedding) return logit_res
def __call__(self, grid): batch_size, _, grid_dimX, grid_dimY, grid_dimZ = grid.size() k = 1.0 x_coords = 2.0 * k * torch.arange(grid_dimX, dtype=torch.float32).unsqueeze(1).unsqueeze(1 ).expand(grid_dimX, grid_dimY, grid_dimZ) / (grid_dimX - 1.0) - 1.0 y_coords = 2.0 * k * torch.arange(grid_dimY, dtype=torch.float32).unsqueeze(1).unsqueeze(0 ).expand(grid_dimX, grid_dimY, grid_dimZ) / (grid_dimY - 1.0) - 1.0 z_coords = 2.0 * k * torch.arange(grid_dimZ, dtype=torch.float32).unsqueeze(0).unsqueeze(0 ).expand(grid_dimX, grid_dimY, grid_dimZ) / (grid_dimZ - 1.0) - 1.0 coords = torch.stack((x_coords, y_coords, z_coords), dim=0) if self.with_r: rs = ((x_coords ** 2) + (y_coords ** 2) + (z_coords ** 2)) ** 0.5 rs = k * rs / torch.max(rs) rs = torch.unsqueeze(rs, dim=0) coords = torch.cat((coords, rs), dim=0) coords = torch.unsqueeze(coords, dim=0).repeat(batch_size, 1, 1, 1, 1) grid = torch.cat((coords.to(grid.device), grid), dim=1) return grid
def predict(self, wm, s, a, ls): with torch.no_grad(): self.embedding, _ = create_emb_layer(wm) s_embedded = self.embedding(s) a_embedded = self.embedding(a) # Average the aspect embedding a_new_embedded = torch.zeros(len(s),1,100) for i in range(len(a_embedded)): if len(torch.nonzero(a_embedded[i])): a_new_embedded[i] = torch.unsqueeze(torch.sum(a_embedded[i], 0)/len(torch.nonzero(a_embedded[i])),0) a_embedded = a_new_embedded embedded = torch.zeros(len(s),40,200) # Concatenate each word in sentence with aspect vector zero_tag = torch.zeros(100).cuda() for i in range(len(s_embedded)): for j in range(40): if j<(ls[i]-1): embedded[i][j] = torch.unsqueeze(torch.cat((s_embedded[i][j].cuda(),torch.squeeze(a_embedded[i].cuda(),0)),0),0) else: embedded[i][j] = torch.unsqueeze(torch.cat((s_embedded[i][j].cuda(),zero_tag),0),0) out, (h, c) = self.lstm(embedded.cuda()) hidden = self.dropout(torch.cat((h[-2,:,:], h[-1,:,:]), dim=1)) hidden2pred = self.fc(hidden) pred = self.softmax(hidden2pred) return pred
def forward(self, x): if self.transform_input: x_ch0 = torch.unsqueeze(x[:, 0], 1) * (0.229 / 0.5) + (0.485 - 0.5) / 0.5 x_ch1 = torch.unsqueeze(x[:, 1], 1) * (0.224 / 0.5) + (0.456 - 0.5) / 0.5 x_ch2 = torch.unsqueeze(x[:, 2], 1) * (0.225 / 0.5) + (0.406 - 0.5) / 0.5 x = torch.cat((x_ch0, x_ch1, x_ch2), 1) # 299 x 299 x 3 x = self.Conv2d_1a_3x3(x) # 149 x 149 x 32 x = self.Conv2d_2a_3x3(x) # 147 x 147 x 32 x = self.Conv2d_2b_3x3(x) # 147 x 147 x 64 x = F.max_pool2d(x, kernel_size=3, stride=2) # 73 x 73 x 64 x = self.Conv2d_3b_1x1(x) # 73 x 73 x 80 x = self.Conv2d_4a_3x3(x) # 71 x 71 x 192 x = F.max_pool2d(x, kernel_size=3, stride=2) # 35 x 35 x 192 x = self.Mixed_5b(x) # 35 x 35 x 256 x = self.Mixed_5c(x) # 35 x 35 x 288 x = self.Mixed_5d(x) # 35 x 35 x 288 x = self.Mixed_6a(x) # 17 x 17 x 768 x = self.Mixed_6b(x) # 17 x 17 x 768 x = self.Mixed_6c(x) # 17 x 17 x 768 x = self.Mixed_6d(x) # 17 x 17 x 768 x = self.Mixed_6e(x) # 17 x 17 x 768 if self.training and self.aux_logits: aux = self.AuxLogits(x) # 17 x 17 x 768 x = self.Mixed_7a(x) # 8 x 8 x 1280 x = self.Mixed_7b(x) # 8 x 8 x 2048 x = self.Mixed_7c(x) # 8 x 8 x 2048 x = F.avg_pool2d(x, kernel_size=8) # 1 x 1 x 2048 x = F.dropout(x, training=self.training) # 1 x 1 x 2048 x = x.view(x.size(0), -1) # 2048 x = self.fc(x) # 1000 (num_classes) if self.training and self.aux_logits: return x, aux return x
def forward(self, output, target): P = F.softmax(output) f_out = F.log_softmax(output) Pt = P.gather(1, torch.unsqueeze(target, 1)) focus_p = torch.pow(1 - Pt, self.y) alpha = 0.25 nll_feature = -f_out.gather(1, torch.unsqueeze(target, 1)) weight_nll = alpha * focus_p * nll_feature loss = weight_nll.mean() return loss
def _mask_attentions(attention, image_locs): batch_size, num_loc, n_att = attention.data.shape tmp1 = torch.unsqueeze( torch.arange(0, num_loc).type(torch.LongTensor), dim=0).expand(batch_size, num_loc) tmp1 = tmp1.cuda() if use_cuda else tmp1 tmp2 = torch.unsqueeze(image_locs.data, 1).expand(batch_size, num_loc) mask = torch.ge(tmp1, tmp2) mask = torch.unsqueeze(mask, 2).expand_as(attention) attention.data.masked_fill_(mask, 0) return attention
def run(self): complete_episodes = 0 episode_final = False output = open('result.log', 'w') print(self.num_states, self.num_actions) for episode in range(NUM_EPISODE): observation = self.env.reset() state = torch.from_numpy(observation).type(torch.FloatTensor) state = torch.unsqueeze(state, 0) for step in range(MAX_STEPS): if episode_final: self.env.render(mode='rgb_array') action = self.agent.get_action(state, episode) observation_next, _, done, _ = self.env.step(action.item()) state_next = torch.from_numpy(observation_next).type(torch.FloatTensor) state_next = torch.unsqueeze(state_next, 0) reward = torch.FloatTensor([0.0]) if done: state_next = None if 199 <= step: reward = torch.FloatTensor([-1.0]) complete_episodes = 0 else: reward = torch.FloatTensor([1.0]) complete_episodes = complete_episodes + 1 self.agent.memory(state, action, state_next, reward) self.agent.update_q_function() state = state_next if done: message = 'episode: {0}, step: {1}'.format(episode, step) print(message) output.write(message + '\n') break if episode_final: break if 10 <= complete_episodes: print('success 10 times in sequence') # episode_final = True self.env.close() output.close()
def forward(self, img, qst): x = self.conv(img) ## x = (64 x 24 x 5 x 5) """g""" mb = x.size()[0] n_channels = x.size()[1] d = x.size()[2] # x_flat = (64 x 25 x 24) x_flat = x.view(mb,n_channels,d*d).permute(0,2,1) # add coordinates x_flat = torch.cat([x_flat, self.coord_tensor],2) # add question everywhere qst = torch.unsqueeze(qst, 1) qst = qst.repeat(1,25,1) qst = torch.unsqueeze(qst, 2) # cast all pairs against each other x_i = torch.unsqueeze(x_flat,1) # (64x1x25x26+11) x_i = x_i.repeat(1,25,1,1) # (64x25x25x26+11) x_j = torch.unsqueeze(x_flat,2) # (64x25x1x26+11) x_j = torch.cat([x_j,qst],3) x_j = x_j.repeat(1,1,25,1) # (64x25x25x26+11) # concatenate all together x_full = torch.cat([x_i,x_j],3) # (64x25x25x2*26+11) # reshape for passing through network x_ = x_full.view(mb*d*d*d*d,63) x_ = self.g_fc1(x_) x_ = F.relu(x_) x_ = self.g_fc2(x_) x_ = F.relu(x_) x_ = self.g_fc3(x_) x_ = F.relu(x_) x_ = self.g_fc4(x_) x_ = F.relu(x_) # reshape again and sum x_g = x_.view(mb,d*d*d*d,256) x_g = x_g.sum(1).squeeze() """f""" x_f = self.f_fc1(x_g) x_f = F.relu(x_f) return self.fcout(x_f)
def plot_means(ax, model, data, xlimits=[-6, 6], ylimits=[-6, 6], numticks=101, cmap=None, alpha=1., legend=False, n_samps=10, cs_to_use=None): x = np.linspace(*xlimits, num=numticks) y = np.linspace(*ylimits, num=numticks) X, Y = np.meshgrid(x, y) aaa = torch.from_numpy(np.concatenate([np.atleast_2d(X.ravel()), np.atleast_2d(Y.ravel())]).T).type(torch.FloatTensor) if len(data) < n_samps: n_samps = len(data) means = [] for samp_i in range(n_samps): if samp_i % 1000 == 0: print samp_i mean, logvar = model.encode(Variable(torch.unsqueeze(data[samp_i],0))) # print mean.data[0][0] means.append(np.array([mean.data[0][0],mean.data[0][1]])) # print mean # print mean[0][0].data[0] means=np.array(means) # print means.T[0] # plt.scatter(means.T[0],means.T[1], marker='x', s=3, alpha=alpha) plt.scatter(means.T[0],means.T[1], s=.1, alpha=alpha) ax.set_yticks([]) ax.set_xticks([]) plt.gca().set_aspect('equal', adjustable='box')
def update_parameters(self, batch): state_batch = Variable(torch.cat(batch.state)) next_state_batch = Variable(torch.cat(batch.next_state), volatile=True) action_batch = Variable(torch.cat(batch.action)) reward_batch = Variable(torch.cat(batch.reward)) mask_batch = Variable(torch.cat(batch.mask)) next_action_batch = self.actor_target(next_state_batch) next_state_action_values = self.critic_target(next_state_batch, next_action_batch) reward_batch = torch.unsqueeze(reward_batch, 1) expected_state_action_batch = reward_batch + (self.gamma * next_state_action_values) self.critic_optim.zero_grad() state_action_batch = self.critic((state_batch), (action_batch)) value_loss = MSELoss(state_action_batch, expected_state_action_batch) value_loss.backward() self.critic_optim.step() self.actor_optim.zero_grad() policy_loss = -self.critic((state_batch),self.actor((state_batch))) policy_loss = policy_loss.mean() policy_loss.backward() self.actor_optim.step() soft_update(self.actor_target, self.actor, self.tau) soft_update(self.critic_target, self.critic, self.tau)
def loss(anchors, data, pred, threshold): iou = pred['iou'] device_id = iou.get_device() if torch.cuda.is_available() else None rows, cols = pred['feature'].size()[-2:] iou_matrix, _iou, _, _data = iou_match(pred['yx_min'].data, pred['yx_max'].data, data) anchors = utils.ensure_device(anchors, device_id) positive = fit_positive(rows, cols, *(data[key] for key in 'yx_min, yx_max'.split(', ')), anchors) negative = ~positive & (_iou < threshold) _center_offset, _size_norm = fill_norm(*(_data[key] for key in 'yx_min, yx_max'.split(', ')), anchors) positive, negative, _iou, _center_offset, _size_norm, _cls = (torch.autograd.Variable(t) for t in (positive, negative, _iou, _center_offset, _size_norm, _data['cls'])) _positive = torch.unsqueeze(positive, -1) loss = {} # iou loss['foreground'] = F.mse_loss(iou[positive], _iou[positive], size_average=False) loss['background'] = torch.sum(square(iou[negative])) # bbox loss['center'] = F.mse_loss(pred['center_offset'][_positive], _center_offset[_positive], size_average=False) loss['size'] = F.mse_loss(pred['size_norm'][_positive], _size_norm[_positive], size_average=False) # cls if 'logits' in pred: logits = pred['logits'] if len(_cls.size()) > 3: loss['cls'] = F.mse_loss(F.softmax(logits, -1)[_positive], _cls[_positive], size_average=False) else: loss['cls'] = F.cross_entropy(logits[_positive].view(-1, logits.size(-1)), _cls[positive].view(-1)) # normalize cnt = float(np.multiply.reduce(positive.size())) for key in loss: loss[key] /= cnt return loss, dict(iou=_iou, data=_data, positive=positive, negative=negative)
def main(): x = torch.unsqueeze(torch.linspace(-1, 1, 100), dim=1) y = x.pow(2) + 0.2 * torch.rand(x.size()) x = Variable(x) y = Variable(y) net = RegreNN(1,1) optm = torch.optim.SGD(net.parameters(),lr=0.5e-1) loss_func = torch.nn.MSELoss() plt.ion() for i in range(600): v = net(x) loss = loss_func(v,y) optm.zero_grad() loss.backward() optm.step() if i % 100 == 0: print(loss) plt.cla() plt.scatter(x.data.numpy(), y.data.numpy()) plt.plot(x.data.numpy(), v.data.numpy(), 'r-', lw=5) plt.text(0.5, 0, 'Loss=%.4f' % loss.data[0], fontdict={'size': 20, 'color': 'red'}) plt.pause(0.1) plt.ioff() plt.show()
def forward(self, x): if len(x.size()) == 3: # N x k xdim # N x dim x k x 1 x_reshape = torch.unsqueeze(x.permute(0, 2, 1), 3) elif len(x.size()) == 2: # N x dim # N x dim x 1 x 1 x_reshape = torch.unsqueeze(torch.unsqueeze(x, 2), 3) iatt_conv1 = self.conv1(x_reshape) # N x hidden_dim x * x 1 iatt_relu = F.relu(iatt_conv1) iatt_conv2 = self.conv2(iatt_relu) # N x out_dim x * x 1 if len(x.size()) == 3: iatt_conv3 = torch.squeeze(iatt_conv2, 3).permute(0, 2, 1) elif len(x.size()) == 2: iatt_conv3 = torch.squeeze(torch.squeeze(iatt_conv2, 3), 2) return iatt_conv3
def plot_isocontours_expected(ax, model, data, xlimits=[-6, 6], ylimits=[-6, 6], numticks=101, cmap=None, alpha=1., legend=False): x = np.linspace(*xlimits, num=numticks) y = np.linspace(*ylimits, num=numticks) X, Y = np.meshgrid(x, y) # zs = np.exp(func(np.concatenate([np.atleast_2d(X.ravel()), np.atleast_2d(Y.ravel())]).T)) aaa = torch.from_numpy(np.concatenate([np.atleast_2d(X.ravel()), np.atleast_2d(Y.ravel())]).T).type(torch.FloatTensor) n_samps = 10 if len(data) < n_samps: n_samps = len(data) for samp_i in range(n_samps): if samp_i % 1000 == 0: print samp_i mean, logvar = model.encode(Variable(torch.unsqueeze(data[samp_i],0))) func = lambda zs: lognormal4(torch.Tensor(zs), torch.squeeze(mean.data), torch.squeeze(logvar.data)) # print aaa.size() bbb = func(aaa) # print 'sum:1', torch.sum(bbb) ddd = torch.exp(bbb) # print 'sum:', torch.sum(ddd) # print ddd.size() # fdsa if samp_i ==0: sum_of_all = ddd else: sum_of_all = sum_of_all + ddd avg_of_all = sum_of_all / n_samps Z = avg_of_all.view(X.shape) Z=Z.numpy() # print 'sum:', np.sum(Z) cs = plt.contour(X, Y, Z, cmap=cmap, alpha=alpha) if legend: nm, lbl = cs.legend_elements() plt.legend(nm, lbl, fontsize=4) ax.set_yticks([]) ax.set_xticks([]) plt.gca().set_aspect('equal', adjustable='box') return Z
def _forward_rnn(cell, input_, length, hx): # max_time = input_.size(0) seq_len = input_.size(0) output = [] for i in range(seq_len): h_next, c_next = cell(input_=input_[i], hx=hx) if i == 0: output = torch.unsqueeze((h_next, 0)) else: output = torch.cat([output, torch.unsqueeze(h_next, 0)], 0) # mask = (i < length).float().unsqueeze(1).expand_as(h_next) # h_next = h_next*mask + hx[0]*(1 - mask) # c_next = c_next*mask + hx[1]*(1 - mask) hx_next = (h_next, c_next) # output.append(h_next) hx = hx_next output = torch.stack(output, 0) return output, hx
def __getitem__(self, index): this_record = self.list_sample[index] # load image and label image_path = os.path.join(self.root_dataset, this_record['fpath_img']) segm_path = os.path.join(self.root_dataset, this_record['fpath_segm']) img = imread(image_path, mode='RGB') img = img[:, :, ::-1] # BGR to RGB!!! segm = imread(segm_path) ori_height, ori_width, _ = img.shape img_resized_list = [] for this_short_size in self.imgSize: # calculate target height and width scale = min(this_short_size / float(min(ori_height, ori_width)), self.imgMaxSize / float(max(ori_height, ori_width))) target_height, target_width = int(ori_height * scale), int(ori_width * scale) # to avoid rounding in network target_height = round2nearest_multiple(target_height, self.padding_constant) target_width = round2nearest_multiple(target_width, self.padding_constant) # resize img_resized = cv2.resize(img.copy(), (target_width, target_height)) # image to float img_resized = img_resized.astype(np.float32) img_resized = img_resized.transpose((2, 0, 1)) img_resized = self.img_transform(torch.from_numpy(img_resized)) img_resized = torch.unsqueeze(img_resized, 0) img_resized_list.append(img_resized) segm = torch.from_numpy(segm.astype(np.int)).long() batch_segms = torch.unsqueeze(segm, 0) batch_segms = batch_segms - 1 # label from -1 to 149 output = dict() output['img_ori'] = img.copy() output['img_data'] = [x.contiguous() for x in img_resized_list] output['seg_label'] = batch_segms.contiguous() output['info'] = this_record['fpath_img'] return output
def plot_isocontours_expected_norm_ind(ax, model, data, xlimits=[-6, 6], ylimits=[-6, 6], numticks=101, cmap=None, alpha=1., legend=False, n_samps=10, cs_to_use=None): x = np.linspace(*xlimits, num=numticks) y = np.linspace(*ylimits, num=numticks) X, Y = np.meshgrid(x, y) # zs = np.exp(func(np.concatenate([np.atleast_2d(X.ravel()), np.atleast_2d(Y.ravel())]).T)) aaa = torch.from_numpy(np.concatenate([np.atleast_2d(X.ravel()), np.atleast_2d(Y.ravel())]).T).type(torch.FloatTensor) # n_samps = 10 if len(data) < n_samps: n_samps = len(data) for samp_i in range(n_samps): if samp_i % 1000 == 0: print samp_i mean, logvar = model.encode(Variable(torch.unsqueeze(data[samp_i],0))) func = lambda zs: lognormal4(torch.Tensor(zs), torch.squeeze(mean.data), torch.squeeze(logvar.data)) # print aaa.size() bbb = func(aaa) zs = bbb.numpy() max_ = np.max(zs) zs_sum = np.log(np.sum(np.exp(zs-max_))) + max_ zs = zs - zs_sum ddd = np.exp(zs) Z = ddd Z = Z.reshape(X.shape) if cs_to_use != None: cs = plt.contour(X, Y, Z, cmap=cmap, alpha=alpha, levels=cs_to_use.levels) else: cs = plt.contour(X, Y, Z, cmap=cmap, alpha=alpha) # if samp_i ==0: # sum_of_all = ddd # else: # sum_of_all = sum_of_all + ddd # avg_of_all = sum_of_all / n_samps # Z = avg_of_all.reshape(X.shape) # print 'sum:', np.sum(Z) # if legend: # nm, lbl = cs.legend_elements() # plt.legend(nm, lbl, fontsize=4) ax.set_yticks([]) ax.set_xticks([]) plt.gca().set_aspect('equal', adjustable='box') return Z, cs
def forward(self, image_feat, question_embedding): _, num_location, _ = image_feat.shape question_embedding_expand = torch.unsqueeze( question_embedding, 1).expand(-1, num_location, -1) concat_feature = torch.cat( (image_feat, question_embedding_expand), dim=2) raw_attention = self.lc(self.Fa(concat_feature)) # softmax across locations attention = F.softmax(raw_attention, dim=1).expand_as(image_feat) return attention
def compute_raw_att(self, image_feat, question_embedding): _, num_location, _ = image_feat.shape image_fa = self.Fa_image(image_feat) question_fa = self.Fa_txt(question_embedding) question_fa_expand = torch.unsqueeze( question_fa, 1).expand(-1, num_location, -1) joint_feature = image_fa * question_fa_expand joint_feature = self.dropout(joint_feature) raw_attention = self.lc(joint_feature) return raw_attention
def run(self): complete_episodes = 0 for episode in range(NUM_EPISODE): observation = self.env.reset() state = torch.from_numpy(observation).type(torch.FloatTensor) state = torch.unsqueeze(state, 0) for step in range(MAX_STEPS): action = self.agent.get_action(state, episode) observation_next, _, done, _ = self.env.step(action.item()) if done: state_next = None self.total_step = np.hstack((self.total_step[1:], step + 1)) if step < 195: reward = torch.FloatTensor([-1.0]) complete_episodes = 0 else: reward = torch.FloatTensor([1.0]) complete_episodes = complete_episodes + 1 else: reward = torch.FloatTensor([0.0]) state_next = torch.from_numpy(observation_next).type(torch.FloatTensor) state_next = torch.unsqueeze(state_next, 0) self.agent.memory(state, action, state_next, reward) self.agent.update_q_function() state = state_next if done: print('episode: {0}, steps: {1}, mean steps {2}'.format(episode, step, self.total_step.mean())) break if 10 <= complete_episodes: print('success 10 times in sequence') self.env.close()
def forward(self, s, a, ls): with torch.no_grad(): embedded = self.embedding(s.cuda()) a_embedded = self.embedding(a.cuda()) # Average the aspect embedding a_new_embedded = torch.zeros(len(s),1,100) for i in range(len(a_embedded)): if len(torch.nonzero(a_embedded[i])): a_new_embedded[i] = torch.unsqueeze(torch.sum(a_embedded[i].cuda(), 0)/len(torch.nonzero(a_embedded[i].cuda())),0) a_embedded = a_new_embedded """ embedded = torch.zeros(len(s),20,200) # Concatenate each word in sentence with aspect vector zero_tag = torch.zeros(100) for i in range(len(s_embedded)): for j in range(20): if j<(ls[i]-1): embedded[i][j] = torch.unsqueeze(torch.cat((s_embedded[i][j],torch.squeeze(a_embedded[i],0)),0),0) else: embedded[i][j] = torch.unsqueeze(torch.cat((s_embedded[i][j],zero_tag),0),0) """ out, (h, c) = self.lstm1(embedded) with torch.no_grad(): new_embedded = torch.zeros(len(s), 20, 612) zero_tag = torch.zeros(100).cuda() for i in range(len(out)): for j in range(20): if j<(ls[i]-1): new_embedded[i][j] = torch.unsqueeze(torch.cat((out[i][j].cuda(),torch.squeeze(a_embedded[i].cuda(),0)),0),0) else: new_embedded[i][j] = torch.unsqueeze(torch.cat((out[i][j].cuda(),zero_tag),0),0) out2, (h2, c2) = self.lstm2(new_embedded.cuda()) hidden = self.dropout(torch.cat((h2[-2,:,:], h2[-1,:,:]), dim=1)) hidden2pred = self.fc(hidden) pred = self.softmax(hidden2pred) return pred
def choose_action(self, x): x = Variable(torch.unsqueeze(torch.FloatTensor(x), 0)) # input only one sample if np.random.uniform() < EPSILON: # greedy actions_value = self.eval_net.forward(x) action = torch.max(actions_value, 1)[1].data.numpy() action = action[0, 0] if ENV_A_SHAPE == 0 else action.reshape(ENV_A_SHAPE) # return the argmax index else: # random action = np.random.randint(0, N_ACTIONS) action = action if ENV_A_SHAPE == 0 else action.reshape(ENV_A_SHAPE) return action
def mul_diag(A, vec): '''Batch support for matrix-diag(vector) product. Args: A: General matrix of size (M, Size). vec: Vector of size (Batch, Size). Returns: The result of multiplying A with diag(vec) (Batch, M). ''' return A * torch.unsqueeze(vec, - 2)
def choose_action(self, s): ''' 根据输入的状态得到所有可行动作的价值估计 ''' s = Variable(torch.unsqueeze(torch.FloatTensor(s), 0)) # input only one sample if np.random.uniform() < epsilon: # greedy 贪婪算法 actions_value = self.eval_net(s) action = torch.max(actions_value, 1)[1].data[0] else: # random 随机选择 action = np.random.randint(0, n_actions) return action
def forward(self, image_feat, question_embed): image1 = self.lc_image(image_feat) ques1 = self.lc_ques(question_embed) if len(image_feat.data.shape) == 3: num_location = image_feat.data.size(1) ques1_expand = ( torch.unsqueeze(ques1, 1).expand(-1, num_location, -1)) else: ques1_expand = ques1 joint_feature = image1 * ques1_expand joint_feature = self.dropout(joint_feature) return joint_feature
def plot_isocontours_expected_W(ax, model, samp, xlimits=[-6, 6], ylimits=[-6, 6], numticks=101, cmap=None, alpha=1., legend=False): x = np.linspace(*xlimits, num=numticks) y = np.linspace(*ylimits, num=numticks) X, Y = np.meshgrid(x, y) # zs = np.exp(func(np.concatenate([np.atleast_2d(X.ravel()), np.atleast_2d(Y.ravel())]).T)) aaa = torch.from_numpy(np.concatenate([np.atleast_2d(X.ravel()), np.atleast_2d(Y.ravel())]).T).type(torch.FloatTensor) n_Ws = 10 for i in range(n_Ws): if i % 10 ==0: print i Ws, logpW, logqW = model.sample_W() #_ , [1], [1] func = lambda zs: log_bernoulli(model.decode(Ws, Variable(torch.unsqueeze(zs,1))), Variable(torch.unsqueeze(samp,0)))+ Variable(torch.unsqueeze(lognormal4(torch.Tensor(zs), torch.zeros(2), torch.zeros(2)), 1)) bbb = func(aaa) zs = bbb.data.numpy() # zs = np.exp(zs/784) # print zs.shape max_ = np.max(zs) # print max_ zs_sum = np.log(np.sum(np.exp(zs-max_))) + max_ zs = zs - zs_sum zs = np.exp(zs) if i ==0: sum_of_all = zs else: sum_of_all = sum_of_all + zs avg_of_all = sum_of_all / n_Ws Z = avg_of_all.reshape(X.shape) # Z = zs.view(X.shape) # Z=Z.numpy() cs = plt.contour(X, Y, Z, cmap=cmap, alpha=alpha) if legend: nm, lbl = cs.legend_elements() plt.legend(nm, lbl, fontsize=4) ax.set_yticks([]) ax.set_xticks([]) plt.gca().set_aspect('equal', adjustable='box')
def forward(self, root_node, inputs): embs = torch.unsqueeze(self.emb(inputs), 1) outputs = [] final_state = self.recursive_forward(root_node, embs, outputs) outputs = torch.cat(outputs, 0) return outputs, final_state
for i in df2['input']: i[0] = (i[0] - x1_mean) / x1_div i[1] = (i[1] - x2_mean) / x2_div x = df2['input'] for i in df3['quantity']: i = (i - y_mean) / y_div y = df2['quantity'] y = torch.FloatTensor([(_ - y_mean) / y_div for _ in df2['quantity']]) x = torch.FloatTensor([x]).squeeze(2) # y = torch.FloatTensor([y]) x = torch.squeeze(x, dim=1) y = torch.unsqueeze(y, dim=1) x, y = Variable(x), Variable(y) # np_data = np.arange(6).reshape((2,3)) # torch_data = torch.FloatTensor(np_data) # x = torch.unsqueeze(torch.linspace(-1,1,100), dim=1) # y = x.pow(3) + 0.2*torch.rand(x.size()) # x, y = Variable(x), Variable(y) # print(x.dtype) print(x) # print(y.dtype) print(y) print('成功导入数据!')
def a2c_train_step(agent, abstractor, loader, opt, grad_fn, gamma=0.99, reward_fn=compute_rouge_l, stop_reward_fn=compute_rouge_n(n=1), stop_coeff=1.0): opt.zero_grad() indices = [] probs = [] baselines = [] ext_sents = [] art_batch, abs_batch = next(loader) for raw_arts in art_batch: (inds, ms), bs = agent(raw_arts) baselines.append(bs) indices.append(inds) probs.append(ms) ext_sents += [raw_arts[idx.item()] for idx in inds if idx.item() < len(raw_arts)] with torch.no_grad(): summaries = abstractor(ext_sents) i = 0 rewards = [] avg_reward = 0 for inds, abss in zip(indices, abs_batch): rs = ([reward_fn(summaries[i+j], abss[j]) for j in range(min(len(inds)-1, len(abss)))] + [0 for _ in range(max(0, len(inds)-1-len(abss)))] + [stop_coeff*stop_reward_fn( list(concat(summaries[i:i+len(inds)-1])), list(concat(abss)))]) assert len(rs) == len(inds) avg_reward += rs[-1]/stop_coeff i += len(inds)-1 # compute discounted rewards R = 0 disc_rs = [] for r in rs[::-1]: R = r + gamma * R disc_rs.insert(0, R) rewards += disc_rs indices = list(concat(indices)) probs = list(concat(probs)) baselines = list(concat(baselines)) # standardize rewards reward = torch.Tensor(rewards).to(baselines[0].device) reward = (reward - reward.mean()) / ( reward.std() + float(np.finfo(np.float32).eps)) baseline = torch.cat(baselines).squeeze() avg_advantage = 0 losses = [] for action, p, r, b in zip(indices, probs, reward, baseline): advantage = r - b avg_advantage += advantage losses.append(-p.log_prob(action) * (advantage/len(indices))) # divide by T*B critic_loss = F.mse_loss(baseline, reward) critic_loss = torch.unsqueeze(critic_loss, 0) # backprop and update autograd.backward( [critic_loss] + losses, [torch.ones(1).to(critic_loss.device)]*(1+len(losses)) ) grad_log = grad_fn() opt.step() log_dict = {} log_dict.update(grad_log) log_dict['reward'] = avg_reward/len(art_batch) log_dict['advantage'] = avg_advantage.item()/len(indices) log_dict['mse'] = critic_loss.item() assert not math.isnan(log_dict['grad_norm']) return log_dict
def forward(self, im_data, im_info, gt_boxes, num_boxes): batch_size = im_data.size(0) im_info = im_info.data gt_boxes = gt_boxes.data num_boxes = num_boxes.data # feed image data to base model to obtain base feature map # Bottom-up c1 = self.RCNN_layer0(im_data) c2 = self.RCNN_layer1(c1) c3 = self.RCNN_layer2(c2) c4 = self.RCNN_layer3(c3) c5 = self.RCNN_layer4(c4) # Top-down p5 = self.RCNN_toplayer(c5) p4 = self._upsample_add(p5, self.RCNN_latlayer1(c4)) p4 = self.RCNN_smooth1(p4) p3 = self._upsample_add(p4, self.RCNN_latlayer2(c3)) p3 = self.RCNN_smooth2(p3) p2 = self._upsample_add(p3, self.RCNN_latlayer3(c2)) p2 = self.RCNN_smooth3(p2) p6 = self.maxpool2d(p5) rpn_feature_maps = [p2, p3, p4, p5, p6] mrcnn_feature_maps = [p2, p3, p4, p5] rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn( rpn_feature_maps, im_info, gt_boxes, num_boxes) # if it is training phrase, then use ground trubut bboxes for refining if self.training: roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes) rois, rois_label, gt_assign, rois_target, rois_inside_ws, rois_outside_ws = roi_data ## NOTE: additionally, normalize proposals to range [0, 1], # this is necessary so that the following roi pooling # is correct on different feature maps # rois[:, :, 1::2] /= im_info[0][1] # rois[:, :, 2::2] /= im_info[0][0] rois = rois.view(-1, 5) rois_label = rois_label.view(-1).long() gt_assign = gt_assign.view(-1).long() pos_id = rois_label.nonzero().squeeze() gt_assign_pos = gt_assign[pos_id] rois_label_pos = rois_label[pos_id] rois_label_pos_ids = pos_id rois_pos = Variable(rois[pos_id]) rois = Variable(rois) rois_label = Variable(rois_label) rois_target = Variable(rois_target.view(-1, rois_target.size(2))) rois_inside_ws = Variable( rois_inside_ws.view(-1, rois_inside_ws.size(2))) rois_outside_ws = Variable( rois_outside_ws.view(-1, rois_outside_ws.size(2))) else: ## NOTE: additionally, normalize proposals to range [0, 1], # this is necessary so that the following roi pooling # is correct on different feature maps # rois[:, :, 1::2] /= im_info[0][1] # rois[:, :, 2::2] /= im_info[0][0] rois_label = None gt_assign = None rois_target = None rois_inside_ws = None rois_outside_ws = None rpn_loss_cls = 0 rpn_loss_bbox = 0 rois = rois.view(-1, 5) pos_id = torch.arange(0, rois.size(0)).long().type_as(rois).long() rois_label_pos_ids = pos_id rois_pos = Variable(rois[pos_id]) rois = Variable(rois) # pooling features based on rois, output 14x14 map (128,64,7,7) roi_pool_feat = self._PyramidRoI_Feat(mrcnn_feature_maps, rois, im_info) Use_emsemble = False emsemble_vgg, emsemble_detnet = [False, True] if Use_emsemble: if emsemble_vgg: model_vgg = Cnn() model_vgg = model_vgg.cuda() ## vgg net pretrained_model_vgg = '/home/lab30202/lq/ai_future/single_classsification_vgg/model_save/galxay_star_classification_vgg.pth' # 预训练模型参数保存地址 pretrained_dict = torch.load(pretrained_model_vgg) model_dict = model_vgg.state_dict() pretrained_dict = { k: v for k, v in pretrained_dict.items() if k in model_dict } model_dict.update(pretrained_dict) model_vgg.load_state_dict(model_dict) feature_map_vgg = model_vgg.convnet(im_data) if self.training: idx_l = [x for x in range(0, 128, 1)] else: idx_l = [x for x in range(0, 300, 1)] idx_l = torch.LongTensor(idx_l) feat = self.RCNN_roi_align(feature_map_vgg, rois[idx_l], 0.5) roi_pool_vgg = feat.view(feat.shape[0], -1) cls_score_vgg = model_vgg.fc(roi_pool_vgg) # cls_prob_vgg = F.softmax(cls_score_vgg,dim=1) if emsemble_detnet: ## detnet detnet = Detnet() detnet = detnet.cuda() # Bottom-up c1_det = detnet.RCNN_layer0_det(im_data) c2_det = detnet.RCNN_layer1_det(c1_det) c3_det = detnet.RCNN_layer2_det(c2_det) c4_det = detnet.RCNN_layer3_det(c3_det) c5_det = detnet.RCNN_layer4_det(c4_det) c6_det = detnet.RCNN_layer5_det(c5_det) # Top-down p6_det = detnet.RCNN_toplayer_det(c6_det) p5_det = detnet.RCNN_latlayer1_det(c5_det) + p6_det p4_det = detnet.RCNN_latlayer2_det(c4_det) + p5_det p3_det = detnet._upsample_add( p4_det, detnet.RCNN_latlayer3_det(c3_det)) p3_det = detnet.RCNN_smooth1_det(p3_det) p2_det = detnet._upsample_add( p3_det, detnet.RCNN_latlayer4_det(c2_det)) p2_det = detnet.RCNN_smooth2_det(p2_det) rpn_feature_maps_det = [p2_det, p3_det, p4_det, p5_det, p6_det] mrcnn_feature_maps_det = [p2_det, p3_det, p4_det, p5_det] rois_det, rpn_loss_cls_det, rpn_loss_bbox_det = self.RCNN_rpn( rpn_feature_maps_det, im_info, gt_boxes, num_boxes) if self.training: roi_data_det = self.RCNN_proposal_target( rois_det, gt_boxes, num_boxes) rois_det, rois_label_det, gt_assign_det, rois_target_det, rois_inside_ws_det, rois_outside_ws_det = roi_data_det rois_det = rois_det.view(-1, 5) rois_label_det = rois_label_det.view(-1).long() gt_assign_det = gt_assign_det.view(-1).long() pos_id_det = rois_label_det.nonzero().squeeze() gt_assign_pos_det = gt_assign_det[pos_id_det] rois_label_pos_det = rois_label_det[pos_id_det] rois_label_pos_ids_det = pos_id_det rois_pos_det = Variable(rois_det[pos_id_det]) rois_det = Variable(rois_det) rois_label_det = Variable(rois_label_det) rois_target_det = Variable( rois_target_det.view(-1, rois_target_det.size(2))) rois_inside_ws_det = Variable( rois_inside_ws_det.view(-1, rois_inside_ws_det.size(2))) rois_outside_ws_det = Variable( rois_outside_ws_det.view(-1, rois_outside_ws_det.size(2))) else: rois_label_det = None gt_assign_det = None rois_target_det = None rois_inside_ws_det = None rois_outside_ws_det = None rpn_loss_cls_det = 0 rpn_loss_bbox_det = 0 rois_det = rois_det.view(-1, 5) pos_id_det = torch.arange( 0, rois_det.size(0)).long().type_as(rois_det).long() rois_label_pos_ids_det = pos_id_det rois_pos_det = Variable(rois_det[pos_id_det]) rois_det = Variable(rois_det) feat_det = self._PyramidRoI_Feat(mrcnn_feature_maps_det, rois, im_info) if emsemble_detnet: pooled_feat_det = detnet._head_to_tail(feat_det) cls_score_det = self.RCNN_cls_score(pooled_feat_det) else: roi_pool_det = feat_det.view(feat_det.shape[0], -1) cls_score_det = model_vgg.fc(roi_pool_det) pooled_feat = self._head_to_tail(roi_pool_feat) # compute bbox offset bbox_pred = self.RCNN_bbox_pred(pooled_feat) if self.training and not self.class_agnostic: # select the corresponding columns according to roi labels bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4) bbox_pred_select = torch.gather( bbox_pred_view, 1, rois_label.long().view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4)) bbox_pred = bbox_pred_select.squeeze(1) # compute object classification probability cls_score = self.RCNN_cls_score(pooled_feat) # cls_prob = F.softmax(cls_score,dim=1) if Use_emsemble: if emsemble_detnet and emsemble_vgg: cls_score_liner = 0.5 * cls_score + 0.3 * cls_score_vgg + 0.2 * cls_score_det cls_score = model_vgg.fc_new(cls_score_liner) cls_prob = F.softmax(cls_score, dim=1) elif emsemble_vgg and not emsemble_detnet: cls_score_liner = cls_score + cls_score_vgg cls_score = model_vgg.fc_new(cls_score_liner) cls_prob = F.softmax(cls_score, dim=1) elif emsemble_detnet and not emsemble_vgg: cls_score_liner = cls_score + cls_score_det cls_score = detnet.fc_add(cls_score_liner) cls_prob = F.softmax(cls_score, dim=1) else: cls_score = self.RCNN_cls_score(pooled_feat) cls_prob = F.softmax(cls_score, dim=1) RCNN_loss_cls = 0 RCNN_loss_bbox = 0 if self.training: # loss (cross entropy) for object classification Use_focal_loss = True Use_label_smoothing = False Use_Giou_loss = False if not Use_focal_loss: if Use_label_smoothing: # criteria = LabelSmoothSoftmaxCE(label_smoothing=0.1) criteria = LabelSmoothSoftmaxCE(lb_pos=0.9, lb_neg=5e-3) RCNN_loss_cls = criteria(cls_score, rois_label) else: RCNN_loss_cls = F.cross_entropy(cls_score, rois_label) else: FL = FocalLoss(class_num=self.n_classes, alpha=1, gamma=2) RCNN_loss_cls = FL(cls_score, rois_label) RCNN_loss_cls = RCNN_loss_cls.type(torch.FloatTensor).cuda() # loss (l1-norm) for bounding box regression if Use_Giou_loss: rois1 = rois.view(batch_size, -1, rois.size(1)) boxes = rois1.data[:, :, 1:5] bbox_pred1 = bbox_pred.view(batch_size, -1, bbox_pred.size(1)) box_deltas = bbox_pred1.data # if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # # Optionally normalize targets by a precomputed mean and stdev # box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ # + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() # box_deltas = box_deltas.view(1, -1, 4 * len(self.classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) pred_boxes /= im_info[0][2].cuda() # RCNN_loss_bbox = generalized_iou_loss(rois_target,bbox_pred) _, _, RCNN_loss_bbox = Giou_np(pred_boxes, boxes) else: RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws) rois = rois.view(batch_size, -1, rois.size(1)) cls_prob = cls_prob.view(batch_size, -1, cls_prob.size(1)) bbox_pred = bbox_pred.view(batch_size, -1, bbox_pred.size(1)) if self.training: rois_label = rois_label.view(batch_size, -1) rpn_loss_cls = torch.unsqueeze(rpn_loss_cls, 0) rpn_loss_bbox = torch.unsqueeze(rpn_loss_bbox, 0) RCNN_loss_cls = torch.unsqueeze(RCNN_loss_cls, 0) RCNN_loss_bbox = torch.unsqueeze(RCNN_loss_bbox, 0) return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label
def run_training_epoch(self, total_train_batches, epoch=-1): """ Runs one training epoch :param total_train_batches: Number of batches to train on :return: mean_training_categorical_crossentropy_loss and mean_training_accuracy """ total_c_loss = 0. total_accuracy = 0. # Create the optimizer optimizer = self.__create_optimizer(self.matchingNet, self.lr) with tqdm.tqdm(total=total_train_batches) as pbar: for i in range(total_train_batches): # train epoch x_support_set, y_support_set, x_target, y_target, support_set_y_actuals, target_y_actuals = \ self.data.get_batch_training(str_type = 'train',rotate_flag = True) x_support_set = Variable(torch.from_numpy(x_support_set)).float() y_support_set = Variable(torch.from_numpy(y_support_set),requires_grad=False).long() x_target = Variable(torch.from_numpy(x_target)).float() y_target = Variable(torch.from_numpy(y_target),requires_grad=False).long() # y_support_set: Add extra dimension for the one_hot y_support_set = torch.unsqueeze(y_support_set, 2) sequence_length = y_support_set.size()[1] batch_size = y_support_set.size()[0] y_support_set_one_hot = torch.FloatTensor(batch_size, sequence_length, self.classes_per_set).zero_() y_support_set_one_hot.scatter_(2, y_support_set.data, 1) y_support_set_one_hot = Variable(y_support_set_one_hot) # Reshape channels size = x_support_set.size() x_support_set = x_support_set.view(size[0],size[1],size[4],size[2],size[3]) size = x_target.size() x_target = x_target.view(size[0],size[1],size[4],size[2],size[3]) if self.isCudaAvailable: acc, c_loss_value, _ = self.matchingNet(x_support_set.cuda(), y_support_set_one_hot.cuda(), x_target.cuda(), y_target.cuda(), epoch = epoch, target_y_actuals = target_y_actuals, support_set_y_actuals = support_set_y_actuals ) else: acc, c_loss_value, _ = self.matchingNet(x_support_set, y_support_set_one_hot, x_target, y_target, epoch = epoch, target_y_actuals = target_y_actuals, support_set_y_actuals = support_set_y_actuals) # Before the backward pass, use the optimizer object to zero all of the # gradients for the variables it will update (which are the learnable weights # of the model) optimizer.zero_grad() # Backward pass: compute gradient of the loss with respect to model parameters c_loss_value.backward() # Calling the step function on an Optimizer makes an update to its parameters optimizer.step() # update the optimizer learning rate self.__adjust_learning_rate(optimizer) #iter_out = "tr_loss: {}, tr_accuracy: {}".format(c_loss_value.data[0], acc.data[0]) iter_out = "tr_loss: {}, tr_accuracy: {}".format(c_loss_value.data, acc.data) pbar.set_description(iter_out) pbar.update(1) total_c_loss += c_loss_value.data #c_loss_value.data[0] total_accuracy += acc.data #acc.data[0] self.total_train_iter += 1 if self.total_train_iter % 2000 == 0: self.lr /= 2 print("change learning rate", self.lr) total_c_loss = total_c_loss / total_train_batches total_accuracy = total_accuracy / total_train_batches return total_c_loss, total_accuracy
def forward(self, seq, msk=None): if msk is None: return torch.mean(seq, 0) else: msk = torch.unsqueeze(msk, -1) return torch.sum(seq * msk, 0) / torch.sum(msk)
if intHeight != ((intHeight >> 7) << 7): intHeight_pad = ( ((intHeight >> 7) + 1) << 7) # more than necessary intPaddingTop = int((intHeight_pad - intHeight) / 2) intPaddingBottom = intHeight_pad - intHeight - intPaddingTop else: intHeight_pad = intHeight intPaddingTop = 32 intPaddingBottom = 32 pader = torch.nn.ReplicationPad2d( [intPaddingLeft, intPaddingRight, intPaddingTop, intPaddingBottom]) torch.set_grad_enabled(False) X0 = Variable(torch.unsqueeze(X0, 0)) X1 = Variable(torch.unsqueeze(X1, 0)) X0 = pader(X0) X1 = pader(X1) if use_cuda: X0 = X0.cuda() X1 = X1.cuda() proc_end = time.time() y_s, offset, filter = model(torch.stack((X0, X1), dim=0)) y_ = y_s[save_which] proc_timer.update(time.time() - proc_end) tot_timer.update(time.time() - end) end = time.time() print("*****************current image process time \t " +
def kernel_matrix(x): x1 = torch.unsqueeze(x, 0) x2 = torch.unsqueeze(x, 1) x3 = torch.pow(x1-x2, 2) x4 = torch.sum(x3, 2) return torch.exp( -0.5 * x4 )
def projection_from_nested_spd_to_spd(x_spd_low_dimension, projection_matrix, projection_complement_matrix, bottom_spd_matrix, contraction_matrix): """ This function is an approximation of the inverse of the function projection_from_spd_to_nested_spd. It maps low-dimensional SPD matrices to the original SPD space. To do so, we consider that the nested SPD matrix Y = W'XW is the d x d upper-left part of the rotated matrix Xr = R'XR, where R = [W, V] and Xr = [Y B; B' C]. In order to recover X, we assume a constant SPD matrix C, and B = Y^0.5*K*C^0.5 to ensure the PDness of Xr, with K a contraction matrix (norm(K) <=1). We first reconstruct Xr, and then X as X = RXrR'. Note that W and V belong to Grassmann manifolds, W \in G(D,d) and V \in G(D,D-d), and must have orthonormal columns, so that W'V = 0. Parameters ---------- :param x_spd_low_dimension: low dimensional SPD matrix or set of low dimensional SPD matrices (d x d or N x d x d) :param projection_matrix: element of the Grassmann manifold (D x d) :param projection_complement_matrix: element of the Grassmann manifold (D x D-d) Note that we must have torch.mm(projection_complement_matrix.T, projection_matrix) = 0. :param bottom_spd_matrix: bottom-right part of the rotated SPD matrix (D-d, D-d) :param contraction_matrix: matrix whose norm is <=1 (d x D-d) Returns ------- :return: SPD matrix or set of SPD matrices (D x D or N x D x D) """ # Type torch_type = x_spd_low_dimension.dtype # Number of data if x_spd_low_dimension.ndim == 2: nb_data = 1 x_spd_low_dimension = torch.unsqueeze(x_spd_low_dimension, 0) one_data_output = True # To return a 2D SPD matrix else: nb_data = x_spd_low_dimension.shape[0] one_data_output = False # To return a 3D array of nb_data SPD matrices # SPD matrices array initialization dimension = projection_matrix.shape[0] x_spd = torch.zeros((nb_data, dimension, dimension), dtype=torch_type) # Compute rotation matrix rotation_matrix = torch.cat( (projection_matrix, projection_complement_matrix), dim=1) # inverse_rotation_matrix = torch.inverse(rotation_matrix) # Compute sqrtm of the bottom block sqrt_bottom_spd_matrix = sqrtm_torch(bottom_spd_matrix) # Solve the equation for each data for n in range(nb_data): # Compute sqrtm of the top block sqrt_top_spd_matrix = sqrtm_torch(x_spd_low_dimension[n]) # Side block side_block = torch.mm( torch.mm(sqrt_top_spd_matrix, contraction_matrix), sqrt_bottom_spd_matrix) # Reconstruct full SPD matrix x_spd_reconstructed = torch.cat((torch.cat( (x_spd_low_dimension[n], side_block), dim=1), torch.cat((side_block.T, bottom_spd_matrix), dim=1)), dim=0) # Rotate the matrix back to finalize the reconstruction x_spd[n] = torch.mm(rotation_matrix, torch.mm(x_spd_reconstructed, rotation_matrix.T)) if one_data_output: x_spd = x_spd[0] return x_spd
def applay_elastic_tensor_transform(self, grid): self.image = grid_sample(torch.unsqueeze(self.image, dim=0), grid).data[0, ...] self.mask = grid_sample(torch.unsqueeze(self.mask, dim=0), grid).round().data[0, ...]
def __getitem__(self, idx): sample = [torch.unsqueeze(torch.tensor(self.datas[idx]), dim = 0), self.labels[idx]] if self.transform: sample = self.transform(sample) return sample
import json data_transform = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) # load image img = Image.open("../tulip.jpg") plt.imshow(img) # [N, C, H, W] img = data_transform(img) # expand batch dimension img = torch.unsqueeze(img, dim=0) # read class_indict try: json_file = open('./class_indices.json', 'r') class_indict = json.load(json_file) except Exception as e: print(e) exit(-1) # create model model = MobileNetV2(num_classes=5) # load model weights model_weight_path = "./MobileNetV2.pth" model.load_state_dict(torch.load(model_weight_path)) model.eval()
def train(verbose=True): train, train_prot, test, test_prot = read_dataset(name='adult') x_train, x_val, y_train, y_val, prot_train, prot_val = train_test_split(train.drop(['Target'], axis=1), train['Target'], train_prot, test_size=0.2, random_state=SEED) input_size = train.shape[1] - 1 num_classes = 2 model = NetRegression(input_size, num_classes, arch=[120, 60]).to(DEVICE) criterion = nn.CrossEntropyLoss() optimiser = torch.optim.Adam(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_reg) train_data = torch.tensor(x_train.values).float() train_target = torch.tensor(y_train.values).long() train_protect = torch.tensor(prot_train).float() train_tensor = data_utils.TensorDataset(train_data, train_target, train_protect) train_loader = data_utils.DataLoader(dataset=train_tensor, batch_size=config.batch_size, shuffle=True) best_loss = np.inf training_patience = 0 with torch.autograd.set_detect_anomaly(True): for epoch in range(config.num_epochs): model.train() for i, (x, y, a) in enumerate(train_loader): x, y, a = x.to(DEVICE), y.to(DEVICE), a.to(DEVICE) optimiser.zero_grad() outputs = model(x) pred_loss = criterion(outputs, y) y = torch.unsqueeze(y, 1).double() a = torch.unsqueeze(a, 1) fair_loss = cond_fair_loss(outputs, a, y) loss = pred_loss + config.fairness_reg * fair_loss loss.backward() optimiser.step() if verbose and i%20==0: acc = calc_accuracy(outputs, y) print('Epoch: [%d/%d], Batch: [%d/%d], Loss: %.4f, Pred Loss: %.4f, Fair Loss: %.4f, Accuracy: %.4f' % (epoch+1, config.num_epochs, i, len(x_train)//config.batch_size, loss.item(), pred_loss.item(), fair_loss.item(), acc)) val_results = evaluate_model(model, criterion, x_val, y_val, prot_val, type='classification', fairness='eo') print('\t Validation Performance: Loss: %.4f, Accuracy: %.4f, DEO: %.4f, DI: %.4f, Fair-COCCO: %.4f' % (val_results['loss'], val_results['accuracy'], val_results['deo'], val_results['di'], val_results['cocco'])) if val_results['loss'] < best_loss: best_loss = val_results['loss'] training_patience = 0 torch.save(model.state_dict(), config.save_model_path) else: training_patience += 1 if training_patience == config.patience: break print('\nTraining Complete, loading best model') model.load_state_dict(torch.load(config.save_model_path, map_location=torch.device(DEVICE))) test_results = evaluate_model(model, criterion, test.drop(['Target'], axis=1), test['Target'], test_prot, type='classification', fairness='eo') print('\t Test Performance: Loss: %.4f, Accuracy: %.4f, DEO: %.4f, DI: %.4f, Fair-COCCO: %.4f' % (test_results['loss'], test_results['accuracy'], test_results['deo'], test_results['di'], test_results['cocco']))
def predict_volumes(model, rimg_in=None, cimg_in=None, bmsk_in=None, suffix="unet_pre_mask", save_dice=False, save_nii=True, nii_outdir=None, verbose=False, rescale_dim=256, num_slice=3): import torch import torch.nn as nn import numpy as np from torch.autograd import Variable from CPAC.unet.function import extract_large_comp, estimate_dice, write_nifti from CPAC.unet.model import UNet2d from CPAC.unet.dataset import VolumeDataset, BlockDataset from torch.utils.data import DataLoader import os, sys import nibabel as nib import pickle use_gpu = torch.cuda.is_available() model_on_gpu = next(model.parameters()).is_cuda use_bn = True if use_gpu: if not model_on_gpu: model.cuda() else: if model_on_gpu: model.cpu() NoneType = type(None) if isinstance(rimg_in, NoneType) and isinstance(cimg_in, NoneType): print("Input rimg_in or cimg_in") sys.exit(1) if save_dice: dice_dict = dict() volume_dataset = VolumeDataset(rimg_in=rimg_in, cimg_in=cimg_in, bmsk_in=bmsk_in) volume_loader = DataLoader(dataset=volume_dataset, batch_size=1) for idx, vol in enumerate(volume_loader): if len(vol) == 1: # just img ptype = 1 # Predict cimg = vol bmsk = None block_dataset = BlockDataset(rimg=cimg, bfld=None, bmsk=None, num_slice=num_slice, rescale_dim=rescale_dim) elif len(vol) == 2: # img & msk ptype = 2 # image test cimg = vol[0] bmsk = vol[1] block_dataset = BlockDataset(rimg=cimg, bfld=None, bmsk=bmsk, num_slice=num_slice, rescale_dim=rescale_dim) elif len(vol == 3): # img bias_field & msk ptype = 3 # image bias correction test cimg = vol[0] bfld = vol[1] bmsk = vol[2] block_dataset = BlockDataset(rimg=cimg, bfld=bfld, bmsk=bmsk, num_slice=num_slice, rescale_dim=rescale_dim) else: print("Invalid Volume Dataset!") sys.exit(2) rescale_shape = block_dataset.get_rescale_shape() raw_shape = block_dataset.get_raw_shape() for od in range(3): backard_ind = np.arange(3) backard_ind = np.insert(np.delete(backard_ind, 0), od, 0) block_data, slice_list, slice_weight = block_dataset.get_one_directory( axis=od) pr_bmsk = torch.zeros( [len(slice_weight), rescale_dim, rescale_dim]) if use_gpu: pr_bmsk = pr_bmsk.cuda() for (i, ind) in enumerate(slice_list): if ptype == 1: rimg_blk = block_data[i] if use_gpu: rimg_blk = rimg_blk.cuda() elif ptype == 2: rimg_blk, bmsk_blk = block_data[i] if use_gpu: rimg_blk = rimg_blk.cuda() bmsk_blk = bmsk_blk.cuda() else: rimg_blk, bfld_blk, bmsk_blk = block_data[i] if use_gpu: rimg_blk = rimg_blk.cuda() bfld_blk = bfld_blk.cuda() bmsk_blk = bmsk_blk.cuda() pr_bmsk_blk = model(torch.unsqueeze(Variable(rimg_blk), 0)) pr_bmsk[ind[1], :, :] = pr_bmsk_blk.data[0][1, :, :] if use_gpu: pr_bmsk = pr_bmsk.cpu() pr_bmsk = pr_bmsk.permute(backard_ind[0], backard_ind[1], backard_ind[2]) pr_bmsk = pr_bmsk[:rescale_shape[0], :rescale_shape[1], : rescale_shape[2]] uns_pr_bmsk = torch.unsqueeze(pr_bmsk, 0) uns_pr_bmsk = torch.unsqueeze(uns_pr_bmsk, 0) uns_pr_bmsk = nn.functional.interpolate(uns_pr_bmsk, size=raw_shape, mode="trilinear", align_corners=False) pr_bmsk = torch.squeeze(uns_pr_bmsk) if od == 0: pr_3_bmsk = torch.unsqueeze(pr_bmsk, 3) else: pr_3_bmsk = torch.cat((pr_3_bmsk, torch.unsqueeze(pr_bmsk, 3)), dim=3) pr_bmsk = pr_3_bmsk.mean(dim=3) pr_bmsk = pr_bmsk.numpy() pr_bmsk_final = extract_large_comp(pr_bmsk > 0.5) if isinstance(bmsk, torch.Tensor): bmsk = bmsk.data[0].numpy() dice = estimate_dice(bmsk, pr_bmsk_final) if verbose: print(dice) t1w_nii = volume_dataset.getCurCimgNii() t1w_path = t1w_nii.get_filename() t1w_dir, t1w_file = os.path.split(t1w_path) t1w_name = os.path.splitext(t1w_file)[0] t1w_name = os.path.splitext(t1w_name)[0] if save_nii: t1w_aff = t1w_nii.affine t1w_shape = t1w_nii.shape if isinstance(nii_outdir, NoneType): nii_outdir = os.getcwd() out_path = os.path.join(nii_outdir, t1w_name + "_" + suffix + ".nii.gz") write_nifti(np.array(pr_bmsk_final, dtype=np.float32), t1w_aff, t1w_shape, out_path) if save_dice: dice_dict[t1w_name] = dice if save_dice: return dice_dict # return output mask return out_path
matplotlib """ import torch import torch.utils.data as Data import torch.nn.functional as F from torch.autograd import Variable import matplotlib.pyplot as plt # torch.manual_seed(1) # reproducible LR = 0.01 BATCH_SIZE = 32 EPOCH = 12 # fake dataset x = torch.unsqueeze(torch.linspace(-1, 1, 1000), dim=1) y = x.pow(2) + 0.1 * torch.normal(torch.zeros(*x.size())) # plot dataset plt.scatter(x.numpy(), y.numpy()) plt.show() # put dateset into torch dataset torch_dataset = Data.TensorDataset(data_tensor=x, target_tensor=y) loader = Data.DataLoader( dataset=torch_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2, )
def get_similarity(self, CT_feature, CR_features): CT_feature = torch.unsqueeze(CT_feature, dim=1) return torch.sum(CT_feature*CR_features, dim=[3, 4], keepdim=True)/torch.sqrt(torch.sum(CR_features**2, dim=[3, 4], keepdim=True))
def main(args=None): parser = argparse.ArgumentParser() add_arguments(parser) if len(sys.argv) == 1 and args is None: # no arggument passed? error, some parameters were expected # Show help if no args provided parser.print_help(sys.stderr) sys.exit(2) args = parser.parse_args(args) # retrieve parsed arguments Console.info("Bayesian Neural Network for hi-res inference from low res acoustic priors (LGA-Bathymetry)") # let's check if input files exist if os.path.isfile(args.target): Console.info("Target input file: ", args.target) else: Console.error("Target input file [" + args.target + "] not found. Please check the provided input path (-t, --target)") if os.path.isfile(args.latent): Console.info("Latent input file: ", args.latent) else: Console.error("Latent input file [" + args.latent + "] not found. Please check the provided input path (-l, --latent)") # check for pre-trained network # if output file exists, warn user if os.path.isfile(args.network): Console.warn("Destination trained network file [", args.network, "] already exists. It will be overwritten (default action)") else: Console.info("Destination trained network: ", args.network) if os.path.isfile(args.output): Console.warn("Output file [", args.output, "] already exists. It will be overwritten (default action)") else: Console.info("Output file: ", args.output) # it can be "none" if (args.epochs): num_epochs = args.epochs else: num_epochs = 150 if (args.samples): n_samples = args.samples else: num_epochs = 20 if (args.key): col_key = args.key else: col_key = 'mean_slope' if (args.xinput): input_key = args.key else: input_key = 'latent_' # // TODO : add arg parser, admit input file (dataset), config file, validation dataset file, mode (train, validate, predict) Console.info("Geotech landability/measurability predictor from low-res acoustics. Uses Bayesian Neural Networks as predictive engine") dataset_filename = args.latent # dataset containing the predictive input. e.g. the latent vector target_filename = args.target # output variable to be predicted, e.g. mean_slope # dataset_filename = "data/output-201811-merged-h14.xls" # dataset containing the predictive input # target_filename = "data/target/koyo20181121-stat-r002-slo.csv" # output variable to be predicted Console.info("Loading dataset: " + dataset_filename) X, y, index_df = CustomDataloader.load_dataset(dataset_filename, target_filename, matching_key='relative_path', target_key = col_key) # relative_path is the common key in both tables # X, y, index_df = CustomDataloader.load_toydataset(dataset_filename, target_key = col_key, input_prefix= input_key, matching_key='uuid') # relative_path is the common key in both tables Console.info("Data loaded...") # y = y/10 #some rescale WARNING #X = X/10.0 # n_sample = X.shape[0] n_latents = X.shape[1] # X = StandardScaler().fit_transform(X) # y = StandardScaler().fit_transform(np.expand_dims(y, -1)) # this is resizing the array so it can match Size (D,1) expected by pytorch # norm = MinMaxScaler().fit(y) # y_norm = norm.transform(y) # min max normalization of our input data # y_norm = (y - 5.0)/30.0 y_norm = y norm = MinMaxScaler().fit(X) X_norm = norm.transform(X) # min max normalization of our input data print ("X [min,max]", np.amin(X),"/", np.amax(X)) print ("X_norm [min,max]", np.amin(X_norm),"/", np.amax(X_norm)) print ("Y [min,max]", np.amin(y),"/", np.amax(y)) X_train, X_test, y_train, y_test = train_test_split(X_norm, y_norm, test_size=.25, # 3:1 ratio shuffle = True) X_train, y_train = torch.tensor(X_train).float(), torch.tensor(y_train).float() X_test, y_test = torch.tensor(X_test).float(), torch.tensor(y_test).float() y_train = torch.unsqueeze(y_train, -1) # PyTorch will complain if we feed the (N) tensor rather than a (NX1) tensor y_test = torch.unsqueeze(y_test, -1) # we add an additional dummy dimension # sys.exit(1) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') regressor = BayesianRegressor(n_latents, 1).to(device) # Single output being predicted # regressor.init optimizer = optim.Adam(regressor.parameters(), lr=0.002) # learning rate criterion = torch.nn.MSELoss() # print("Model's state_dict:") # for param_tensor in regressor.state_dict(): # print(param_tensor, "\t", regressor .state_dict()[param_tensor].size()) ds_train = torch.utils.data.TensorDataset(X_train, y_train) dataloader_train = torch.utils.data.DataLoader(ds_train, batch_size=16, shuffle=True) ds_test = torch.utils.data.TensorDataset(X_test, y_test) dataloader_test = torch.utils.data.DataLoader(ds_test, batch_size=16, shuffle=True) iteration = 0 # Training time test_hist = [] uncert_hist = [] train_hist = [] fit_hist = [] ufit_hist = [] elbo_kld = 1.0 print ("ELBO KLD factor: ", elbo_kld/X_train.shape[0]); for epoch in range(num_epochs): train_loss = [] for i, (datapoints, labels) in enumerate(dataloader_train): optimizer.zero_grad() loss = regressor.sample_elbo(inputs=datapoints.to(device), labels=labels.to(device), criterion=criterion, # MSELoss sample_nbr=n_samples, complexity_cost_weight=elbo_kld/X_train.shape[0]) # normalize the complexity cost by the number of input points loss.backward() # the returned loss is the combination of fit loss (MSELoss) and complexity cost (KL_div against the ) optimizer.step() train_loss.append(loss.item()) test_loss = [] fit_loss = [] for k, (test_datapoints, test_labels) in enumerate(dataloader_test): sample_loss = regressor.sample_elbo(inputs=test_datapoints.to(device), labels=test_labels.to(device), criterion=criterion, sample_nbr=n_samples, complexity_cost_weight=elbo_kld/X_test.shape[0]) fit_loss_sample = regressor.sample_elbo(inputs=test_datapoints.to(device), labels=test_labels.to(device), criterion=criterion, sample_nbr=n_samples, complexity_cost_weight=0) # we are interested in the reconstruction/prediction loss only (no KL cost) test_loss.append(sample_loss.item()) fit_loss.append(fit_loss_sample.item()) mean_test_loss = statistics.mean(test_loss) stdv_test_loss = statistics.stdev(test_loss) mean_train_loss = statistics.mean(train_loss) mean_fit_loss = statistics.mean(fit_loss) stdv_fit_loss = statistics.stdev(fit_loss) Console.info("Epoch [" + str(epoch) + "] Train loss: {:.4f}".format(mean_train_loss) + " Valid. loss: {:.4f}".format(mean_test_loss) + " Fit loss: {:.4f} ***".format(mean_fit_loss) ) Console.progress(epoch, num_epochs) test_hist.append(mean_test_loss) uncert_hist.append(stdv_test_loss) train_hist.append(mean_train_loss) fit_hist.append(mean_fit_loss) ufit_hist.append(stdv_fit_loss) # train_hist.append(statistics.mean(train_loss)) # if (epoch % 50) == 0: # every 50 epochs, we save a network snapshot # temp_name = "bnn_model_" + str(epoch) + ".pth" # torch.save(regressor.state_dict(), temp_name) Console.info("Training completed!") # torch.save(regressor.state_dict(), "bnn_model_N" + str (num_epochs) + ".pth") torch.save(regressor.state_dict(), args.network) export_df = pd.DataFrame([train_hist, test_hist, uncert_hist, fit_hist, ufit_hist]).transpose() export_df.columns = ['train_error', 'test_error', 'test_error_stdev', 'test_loss', 'test_loss_stdev'] print ("head", export_df.head()) output_name = "bnn_training_S" + str(n_samples) + "_E" + str(num_epochs) + "_H" + str(n_latents) + ".csv" export_df.to_csv(output_name) # export_df.to_csv("bnn_train_report.csv") # df = pd.read_csv(input_filename, index_col=0) # use 1t column as ID, the 2nd (relative_path) can be used as part of UUID # Once trained, we start inferring expected = [] uncertainty = [] predicted = [] # == y Console.info("testing predictions...") idx = 0 # for x in X_test: Xp_ = torch.tensor(X_norm).float() for x in Xp_: predictions = [] for n in range(n_samples): p = regressor(x.to(device)).item() # print ("p.type", type(p)) ----> float # print ("p.len", len(p)) predictions.append(p) #1D output, retieve single item # print ("pred.type", type(predictions)) # print ("pred.len", len(predictions)) ---> 10 (n_samples) p_mean = statistics.mean(predictions) p_stdv = statistics.stdev(predictions) idx = idx + 1 # print ("p_mean", type(p_mean)) --> float predicted.append(p_mean) uncertainty.append(p_stdv) Console.progress(idx, len(Xp_)) # print ("predicted:" , predicted) # print ("predicted.type", type(predicted)) # print ("predicted.len", len(predicted)) # print ("X.len:" , len(X_test)) # y_list = y_train.squeeze().tolist() y_list = y_norm.squeeze().tolist() # y_list = y_test.squeeze().tolist() # y_list = [element.item() for element in y_test.flatten()] xl = np.squeeze(X_norm).tolist() # print ("y_list.len", len(y_list)) # predicted.len = X.len (as desired) # pred_df = pd.DataFrame ([xl, y_list, predicted, uncertainty, index_df]).transpose() pred_df = pd.DataFrame ([y_list, predicted, uncertainty, index_df]).transpose() # pred_df = pd.DataFrame ([y_list, predicted, uncertainty, index_df.values.tolist() ]).transpose() # pred_df.columns = ['Xp_', 'y', 'predicted', 'uncertainty', 'index'] pred_df.columns = ['y', 'predicted', 'uncertainty', 'index'] output_name = "bnn_predictions_S" + str(n_samples) + "_E" + str(num_epochs) + "_H" + str(n_latents) + ".csv" # output_name = args.output pred_df.to_csv(output_name)
def warper_img(self, img): img_tensor = torch.Tensor(img).cuda() img_var = Variable(img_tensor) img_var = torch.unsqueeze(img_var, dim=0) return img_var
num_workers=3) if __name__ == '__main__': for epoch in range(EPOCHS): print('Number of epoch: ', epoch) running_loss = 0.0 for i, data in enumerate(data_loader, 0): #print(i) inputs = data[0].to(device) #inputs = torch.cuda.FloatTensor(inputs) labels = data[1].to(device) #labels = torch.cuda.FloatTensor(labels) optimizer.zero_grad() #set_trace() inputs = torch.unsqueeze(inputs, 1) #inputs.to(device) #labels.to(device) outputs = model(inputs) loss = criterion(outputs, torch.max(labels, 1)[1]) loss.backward() optimizer.step() running_loss += loss.item() if i % 2000 == 1999: # print every 2000 mini-batches print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 2000)) running_loss = 0.0
def node_init(self, batch_size): self.batch_size = batch_size self.node.resize_(self.batch_size, self.dim, self.node_num) self.node.copy_( torch.unsqueeze(self.node_init_value, dim=0).expand_as(self.node))
def __init__(self, args): super(Model, self).__init__() layers = args.layers classes = args.classes sync_bn = args.sync_bn pretrained = True assert layers in [50, 101, 152] assert classes > 1 from torch.nn import BatchNorm2d as BatchNorm self.zoom_factor = args.zoom_factor self.criterion = nn.CrossEntropyLoss(ignore_index=255) self.shot = args.shot self.train_iter = args.train_iter self.eval_iter = args.eval_iter self.pyramid = args.pyramid models.BatchNorm = BatchNorm print('INFO: Using ResNet {}'.format(layers)) if layers == 50: resnet = models.resnet50(pretrained=pretrained) elif layers == 101: resnet = models.resnet101(pretrained=pretrained) else: resnet = models.resnet152(pretrained=pretrained) self.layer0 = nn.Sequential(resnet.conv1, resnet.bn1, resnet.relu1, resnet.conv2, resnet.bn2, resnet.relu2, resnet.conv3, resnet.bn3, resnet.relu3, resnet.maxpool) self.layer1, self.layer2, self.layer3, self.layer4 = resnet.layer1, resnet.layer2, resnet.layer3, resnet.layer4 for n, m in self.layer3.named_modules(): if 'conv2' in n: m.dilation, m.padding, m.stride = (2, 2), (2, 2), (1, 1) elif 'downsample.0' in n: m.stride = (1, 1) for n, m in self.layer4.named_modules(): if 'conv2' in n: m.dilation, m.padding, m.stride = (4, 4), (4, 4), (1, 1) elif 'downsample.0' in n: m.stride = (1, 1) reduce_dim = 256 fea_dim = 1024 + 512 self.cls = nn.Sequential( nn.Conv2d(reduce_dim, reduce_dim, kernel_size=3, padding=1, bias=False), nn.ReLU(inplace=True), nn.Dropout2d(p=0.1), nn.Conv2d(reduce_dim, classes, kernel_size=1)) self.down_conv = nn.Sequential( nn.Conv2d(fea_dim, reduce_dim, kernel_size=1, padding=0, bias=False), nn.Dropout2d(p=0.5)) # Using Feature Enrichment Module from PFENet as context module if self.pyramid: self.pyramid_bins = args.ppm_scales self.avgpool_list = [] for bin in self.pyramid_bins: if bin > 1: self.avgpool_list.append(nn.AdaptiveAvgPool2d(bin)) self.corr_conv = [] self.beta_conv = [] self.inner_cls = [] for bin in self.pyramid_bins: self.corr_conv.append( nn.Sequential( nn.Conv2d(reduce_dim * 2 + 1, reduce_dim, kernel_size=1, padding=0, bias=False), nn.ReLU(inplace=True), )) self.beta_conv.append( nn.Sequential( nn.Conv2d(reduce_dim, reduce_dim, kernel_size=3, padding=1, bias=False), nn.ReLU(inplace=True), nn.Conv2d(reduce_dim, reduce_dim, kernel_size=3, padding=1, bias=False), nn.ReLU(inplace=True))) self.inner_cls.append( nn.Sequential( nn.Conv2d(reduce_dim, reduce_dim, kernel_size=3, padding=1, bias=False), nn.ReLU(inplace=True), nn.Dropout2d(p=0.1), nn.Conv2d(reduce_dim, classes, kernel_size=1))) self.corr_conv = nn.ModuleList(self.corr_conv) self.beta_conv = nn.ModuleList(self.beta_conv) self.inner_cls = nn.ModuleList(self.inner_cls) self.alpha_conv = [] for idx in range(len(self.pyramid_bins) - 1): self.alpha_conv.append( nn.Sequential( nn.Conv2d(2 * reduce_dim, reduce_dim, kernel_size=1, stride=1, padding=0, bias=False), nn.ReLU(inplace=True))) self.alpha_conv = nn.ModuleList(self.alpha_conv) self.res1 = nn.Sequential( nn.Conv2d(reduce_dim * len(self.pyramid_bins), reduce_dim, kernel_size=1, padding=0, bias=False), nn.ReLU(inplace=True), ) self.res2 = nn.Sequential( nn.Conv2d(reduce_dim, reduce_dim, kernel_size=3, padding=1, bias=False), nn.ReLU(inplace=True), nn.Conv2d(reduce_dim, reduce_dim, kernel_size=3, padding=1, bias=False), nn.ReLU(inplace=True), ) # Using ASPP as context module else: self.ASPP = ASPP(out_channels=reduce_dim) self.corr_conv = nn.Sequential( nn.Conv2d(reduce_dim * 2 + 1, reduce_dim, kernel_size=3, padding=1, bias=False), nn.ReLU(inplace=True), nn.Dropout2d(p=0.5)) self.skip1 = nn.Sequential( nn.ReLU(inplace=True), nn.Conv2d(reduce_dim, reduce_dim, kernel_size=3, stride=1, padding=1, bias=False), nn.ReLU(inplace=True), nn.Conv2d(reduce_dim, reduce_dim, kernel_size=3, stride=1, padding=1, bias=False)) self.skip2 = nn.Sequential( nn.ReLU(inplace=True), nn.Conv2d(reduce_dim, reduce_dim, kernel_size=3, stride=1, padding=1, bias=False), nn.ReLU(inplace=True), nn.Conv2d(reduce_dim, reduce_dim, kernel_size=3, stride=1, padding=1, bias=False)) self.skip3 = nn.Sequential( nn.ReLU(inplace=True), nn.Conv2d(reduce_dim, reduce_dim, kernel_size=3, stride=1, padding=1, bias=False), nn.ReLU(inplace=True), nn.Conv2d(reduce_dim, reduce_dim, kernel_size=3, stride=1, padding=1, bias=False)) self.decoder = build_decoder(256) self.cls_aux = nn.Sequential( nn.Conv2d(reduce_dim, reduce_dim, kernel_size=3, padding=1, bias=False), nn.ReLU(inplace=True), nn.Dropout2d(p=0.1), nn.Conv2d(reduce_dim, classes, kernel_size=1)) dic = np.load( "/data/mdw/few_shot_segmentation/model/asgNet/dic_base_class_prototype.npy", allow_pickle=True) dic_list = dic.tolist() for i in range(15): dic_list[i] = torch.unsqueeze(dic_list[i], dim=0) self.dic_tensor = torch.cat(dic_list, dim=0) # 15 * 256 * 1 * 1
root='./mnist/', train=True, transform=torchvision.transforms.ToTensor(), download=DOWNLOAD_MNIST, ) #torch_dataset = Data.TensorDataset(x, y), 建立自己的数据库 print(train_data.train_data.size()) print(train_data.train_labels.size()) train_loader = Data.DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True) test_data = torchvision.datasets.MNIST(root='./mnist/', train=False) test_x = torch.unsqueeze(test_data.test_data, dim=1).type( torch.FloatTensor)[:2000] / 255. test_y = test_data.test_labels[:2000] class Attention(nn.Module): def __init__(self): #config.hidden_size=768 #config.transformer.num_heads = 12 super(Attention, self).__init__() self.num_attention_heads = 8 self.attention_head_size = 4 self.all_head_size = 32 self.query = nn.Linear(32, 32) self.key = nn.Linear(32, 32) self.value = nn.Linear(32, 32)
def train(): ########### Hyperparameters ########### hidden_size = 512 # size of hidden state seq_len = 100 # length of LSTM sequence num_layers = 3 # num of layers in LSTM layer stack lr = 0.002 # learning rate epochs = 100 # max number of epochs op_seq_len = 200 # total num of characters in output test sequence load_chk = False # load weights from save_path directory to continue training save_path = "./preTrained/CharRNN_shakespeare.pth" data_path = "./data/shakespeare.txt" ####################################### # load the text file data = open(data_path, 'r').read() chars = sorted(list(set(data))) data_size, vocab_size = len(data), len(chars) print("----------------------------------------") print("Data has {} characters, {} unique".format(data_size, vocab_size)) print("----------------------------------------") # char to index and index to char maps char_to_ix = {ch: i for i, ch in enumerate(chars)} ix_to_char = {i: ch for i, ch in enumerate(chars)} # convert data from chars to indices data = list(data) for i, ch in enumerate(data): data[i] = char_to_ix[ch] # data tensor on device data = torch.tensor(data).to(device) data = torch.unsqueeze(data, dim=1) # model instance rnn = RNN(vocab_size, vocab_size, hidden_size, num_layers).to(device) # load checkpoint if True if load_chk: rnn.load_state_dict(torch.load(save_path)) print("Model loaded successfully !!") print("----------------------------------------") # loss function and optimizer loss_fn = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(rnn.parameters(), lr=lr) # training loop for i_epoch in range(1, epochs + 1): # random starting point (1st 100 chars) from data to begin data_ptr = np.random.randint(100) n = 0 running_loss = 0 hidden_state = None while True: input_seq = data[data_ptr:data_ptr + seq_len] target_seq = data[data_ptr + 1:data_ptr + seq_len + 1] # forward pass output, hidden_state = rnn(input_seq, hidden_state) # compute loss loss = loss_fn(torch.squeeze(output), torch.squeeze(target_seq)) running_loss += loss.item() # compute gradients and take optimizer step optimizer.zero_grad() loss.backward() optimizer.step() # update the data pointer data_ptr += seq_len n += 1 # if at end of data : break if data_ptr + seq_len + 1 > data_size: break # print loss and save weights after every epoch print("Epoch: {0} \t Loss: {1:.8f}".format(i_epoch, running_loss / n)) torch.save(rnn.state_dict(), save_path) # sample / generate a text sequence after every epoch data_ptr = 0 hidden_state = None # random character from data to begin rand_index = np.random.randint(data_size - 1) input_seq = data[rand_index:rand_index + 1] print("----------------------------------------") while True: # forward pass output, hidden_state = rnn(input_seq, hidden_state) # construct categorical distribution and sample a character output = F.softmax(torch.squeeze(output), dim=0) dist = Categorical(output) index = dist.sample() # print the sampled character print(ix_to_char[index.item()], end='') # next input is current output input_seq[0][0] = index.item() data_ptr += 1 if data_ptr > op_seq_len: break print("\n----------------------------------------")
def forward(self, content_feature, style_feature): content_feature = torch.squeeze(torch.squeeze(content_feature, -1), -1) style_feature = torch.squeeze(torch.squeeze(style_feature, -1), -1) mixed = self.mixer(content_feature, style_feature) return torch.unsqueeze(torch.unsqueeze(mixed, -1), -1)
def generate_image(bounding_box_min_x, bounding_box_min_y, bounding_box_min_z, \ bounding_box_max_x, bounding_box_max_y, bounding_box_max_z, \ voxel_size, grid_res_x, grid_res_y, grid_res_z, width, height, grid, camera, back, camera_list): # Get normal vectors for points on the grid [grid_normal_x, grid_normal_y, grid_normal_z] = get_grid_normal(grid, voxel_size, grid_res_x, grid_res_y, grid_res_z) # Generate rays e = camera w_h_3 = torch.zeros(width, height, 3).cuda() w_h = torch.zeros(width, height).cuda() eye_x = e[0] eye_y = e[1] eye_z = e[2] # Do ray tracing in cpp outputs = renderer.ray_matching(w_h_3, w_h, grid, width, height, bounding_box_min_x, bounding_box_min_y, bounding_box_min_z, \ bounding_box_max_x, bounding_box_max_y, bounding_box_max_z, \ grid_res_x, grid_res_y, grid_res_z, \ eye_x, \ eye_y, \ eye_z ) # {intersection_pos, voxel_position, directions} intersection_pos_rough = outputs[0] voxel_min_point_index = outputs[1] ray_direction = outputs[2] # Initialize grid values and normals for intersection voxels intersection_grid_normal_x = Tensor(width, height, 8) intersection_grid_normal_y = Tensor(width, height, 8) intersection_grid_normal_z = Tensor(width, height, 8) intersection_grid = Tensor(width, height, 8) # Make the pixels with no intersections with rays be 0 mask = (voxel_min_point_index[:, :, 0] != -1).type(Tensor) # Get the indices of the minimum point of the intersecting voxels x = voxel_min_point_index[:, :, 0].type(torch.cuda.LongTensor) y = voxel_min_point_index[:, :, 1].type(torch.cuda.LongTensor) z = voxel_min_point_index[:, :, 2].type(torch.cuda.LongTensor) x[x == -1] = 0 y[y == -1] = 0 z[z == -1] = 0 # Get the x-axis of normal vectors for the 8 points of the intersecting voxel # This line is equivalent to grid_normal_x[x,y,z] x1 = torch.index_select( grid_normal_x.view(-1), 0, z.view(-1) + grid_res_x * y.view(-1) + grid_res_x * grid_res_x * x.view(-1)).view(x.shape).unsqueeze_(2) x2 = torch.index_select( grid_normal_x.view(-1), 0, (z + 1).view(-1) + grid_res_x * y.view(-1) + grid_res_x * grid_res_x * x.view(-1)).view(x.shape).unsqueeze_(2) x3 = torch.index_select( grid_normal_x.view(-1), 0, z.view(-1) + grid_res_x * (y + 1).view(-1) + grid_res_x * grid_res_x * x.view(-1)).view(x.shape).unsqueeze_(2) x4 = torch.index_select(grid_normal_x.view(-1), 0, (z + 1).view(-1) + grid_res_x * (y + 1).view(-1) + grid_res_x * grid_res_x * x.view(-1)).view( x.shape).unsqueeze_(2) x5 = torch.index_select( grid_normal_x.view(-1), 0, z.view(-1) + grid_res_x * y.view(-1) + grid_res_x * grid_res_x * (x + 1).view(-1)).view(x.shape).unsqueeze_(2) x6 = torch.index_select(grid_normal_x.view(-1), 0, (z + 1).view(-1) + grid_res_x * y.view(-1) + grid_res_x * grid_res_x * (x + 1).view(-1)).view(x.shape).unsqueeze_(2) x7 = torch.index_select( grid_normal_x.view(-1), 0, z.view(-1) + grid_res_x * (y + 1).view(-1) + grid_res_x * grid_res_x * (x + 1).view(-1)).view(x.shape).unsqueeze_(2) x8 = torch.index_select(grid_normal_x.view(-1), 0, (z + 1).view(-1) + grid_res_x * (y + 1).view(-1) + grid_res_x * grid_res_x * (x + 1).view(-1)).view( x.shape).unsqueeze_(2) intersection_grid_normal_x = torch.cat( (x1, x2, x3, x4, x5, x6, x7, x8), 2) + (1 - mask.view(width, height, 1).repeat(1, 1, 8)) # Get the y-axis of normal vectors for the 8 points of the intersecting voxel y1 = torch.index_select( grid_normal_y.view(-1), 0, z.view(-1) + grid_res_x * y.view(-1) + grid_res_x * grid_res_x * x.view(-1)).view(x.shape).unsqueeze_(2) y2 = torch.index_select( grid_normal_y.view(-1), 0, (z + 1).view(-1) + grid_res_x * y.view(-1) + grid_res_x * grid_res_x * x.view(-1)).view(x.shape).unsqueeze_(2) y3 = torch.index_select( grid_normal_y.view(-1), 0, z.view(-1) + grid_res_x * (y + 1).view(-1) + grid_res_x * grid_res_x * x.view(-1)).view(x.shape).unsqueeze_(2) y4 = torch.index_select(grid_normal_y.view(-1), 0, (z + 1).view(-1) + grid_res_x * (y + 1).view(-1) + grid_res_x * grid_res_x * x.view(-1)).view( x.shape).unsqueeze_(2) y5 = torch.index_select( grid_normal_y.view(-1), 0, z.view(-1) + grid_res_x * y.view(-1) + grid_res_x * grid_res_x * (x + 1).view(-1)).view(x.shape).unsqueeze_(2) y6 = torch.index_select(grid_normal_y.view(-1), 0, (z + 1).view(-1) + grid_res_x * y.view(-1) + grid_res_x * grid_res_x * (x + 1).view(-1)).view(x.shape).unsqueeze_(2) y7 = torch.index_select( grid_normal_y.view(-1), 0, z.view(-1) + grid_res_x * (y + 1).view(-1) + grid_res_x * grid_res_x * (x + 1).view(-1)).view(x.shape).unsqueeze_(2) y8 = torch.index_select(grid_normal_y.view(-1), 0, (z + 1).view(-1) + grid_res_x * (y + 1).view(-1) + grid_res_x * grid_res_x * (x + 1).view(-1)).view( x.shape).unsqueeze_(2) intersection_grid_normal_y = torch.cat( (y1, y2, y3, y4, y5, y6, y7, y8), 2) + (1 - mask.view(width, height, 1).repeat(1, 1, 8)) # Get the z-axis of normal vectors for the 8 points of the intersecting voxel z1 = torch.index_select( grid_normal_z.view(-1), 0, z.view(-1) + grid_res_x * y.view(-1) + grid_res_x * grid_res_x * x.view(-1)).view(x.shape).unsqueeze_(2) z2 = torch.index_select( grid_normal_z.view(-1), 0, (z + 1).view(-1) + grid_res_x * y.view(-1) + grid_res_x * grid_res_x * x.view(-1)).view(x.shape).unsqueeze_(2) z3 = torch.index_select( grid_normal_z.view(-1), 0, z.view(-1) + grid_res_x * (y + 1).view(-1) + grid_res_x * grid_res_x * x.view(-1)).view(x.shape).unsqueeze_(2) z4 = torch.index_select(grid_normal_z.view(-1), 0, (z + 1).view(-1) + grid_res_x * (y + 1).view(-1) + grid_res_x * grid_res_x * x.view(-1)).view( x.shape).unsqueeze_(2) z5 = torch.index_select( grid_normal_z.view(-1), 0, z.view(-1) + grid_res_x * y.view(-1) + grid_res_x * grid_res_x * (x + 1).view(-1)).view(x.shape).unsqueeze_(2) z6 = torch.index_select(grid_normal_z.view(-1), 0, (z + 1).view(-1) + grid_res_x * y.view(-1) + grid_res_x * grid_res_x * (x + 1).view(-1)).view(x.shape).unsqueeze_(2) z7 = torch.index_select( grid_normal_z.view(-1), 0, z.view(-1) + grid_res_x * (y + 1).view(-1) + grid_res_x * grid_res_x * (x + 1).view(-1)).view(x.shape).unsqueeze_(2) z8 = torch.index_select(grid_normal_z.view(-1), 0, (z + 1).view(-1) + grid_res_x * (y + 1).view(-1) + grid_res_x * grid_res_x * (x + 1).view(-1)).view( x.shape).unsqueeze_(2) intersection_grid_normal_z = torch.cat( (z1, z2, z3, z4, z5, z6, z7, z8), 2) + (1 - mask.view(width, height, 1).repeat(1, 1, 8)) # Change from grid coordinates to world coordinates voxel_min_point = Tensor([ bounding_box_min_x, bounding_box_min_y, bounding_box_min_z ]) + voxel_min_point_index * voxel_size intersection_pos = compute_intersection_pos(grid, intersection_pos_rough,\ voxel_min_point, voxel_min_point_index,\ ray_direction, voxel_size, mask) intersection_pos = intersection_pos * mask.repeat(3, 1, 1).permute(1, 2, 0) shading = Tensor(width, height).fill_(0) # Compute the normal vectors for the intersecting points intersection_normal_x = get_intersection_normal(intersection_grid_normal_x, intersection_pos, voxel_min_point, voxel_size) intersection_normal_y = get_intersection_normal(intersection_grid_normal_y, intersection_pos, voxel_min_point, voxel_size) intersection_normal_z = get_intersection_normal(intersection_grid_normal_z, intersection_pos, voxel_min_point, voxel_size) # Put all the xyz-axis of the normal vectors into a single matrix intersection_normal_x_resize = intersection_normal_x.unsqueeze_(2) intersection_normal_y_resize = intersection_normal_y.unsqueeze_(2) intersection_normal_z_resize = intersection_normal_z.unsqueeze_(2) intersection_normal = torch.cat( (intersection_normal_x_resize, intersection_normal_y_resize, intersection_normal_z_resize), 2) intersection_normal = intersection_normal / torch.unsqueeze( torch.norm(intersection_normal, p=2, dim=2), 2).repeat(1, 1, 3) # Create the point light shading = 0 light_position = camera.repeat(width, height, 1) light_norm = torch.unsqueeze( torch.norm(light_position - intersection_pos, p=2, dim=2), 2).repeat(1, 1, 3) light_direction_point = (light_position - intersection_pos) / light_norm light_direction = camera.repeat(width, height, 1) l_dot_n = torch.sum(light_direction * intersection_normal, 2).unsqueeze_(2) shading += 2 * torch.max( l_dot_n, Tensor(width, height, 1).fill_(0))[:, :, 0] / torch.pow( torch.sum( (light_position - intersection_pos) * light_direction_point, dim=2), 2) # Get the final image image = shading * mask image[mask == 0] = 1 mask = torch.clamp(image * 10000, 0, 1) return image, mask
def applay_elastic_tensor_transform(self, grid): tensor = torch.unsqueeze(self.image, dim=0) self.image = grid_sample(tensor, grid).data[0, ...]
def forward(self, x1, x2, __, seq_lens, ___, ____): """ forwarding function of the model called by the pytorch :x: input tensor :x2: input tensor for global temporal preferences :seq_lens: list cataining the length of each seqeunce :return: output tensor """ batch_size = x1.size(0) max_seq_length = x1.size(1) embed_size = x1.size(2) outputs = torch.zeros( [max_seq_length, batch_size, embed_size], device=torch.device( 'cuda' if torch.cuda.is_available() else 'cpu')) # x2: [batch_size, seq_len, num_of_temporal, embed_dim] x2 = pack(x2, seq_lens, batch_first=True).data mha_input = torch.transpose(x2, 0, 1) _, x2_score = self._mha(mha_input, mha_input, mha_input) x2_score = torch.softmax(torch.mean(x2_score, 2, keepdim=False), dim=1) x2_score = torch.unsqueeze(x2_score, dim=1) x2 = torch.squeeze(torch.bmm(x2_score, x2), dim=1) #x2 = torch.mean(x2, 0, keepdim=False) #x2 = torch.mean(x2.data, 1, keepdim=False) x2 = self._mlp_mha(x2) # sequence embedding x1 = pack(x1, seq_lens, batch_first=True) x1, _ = self.rnn(x1) sequence_lenths = x1.batch_sizes.cpu().numpy() cursor = 0 prev_x1s = [] for step in range(sequence_lenths.shape[0]): sequence_lenth = sequence_lenths[step] x1_step = x1.data[cursor:cursor + sequence_lenth] x2_step = x2[cursor:cursor + sequence_lenth] prev_x1s.append(x1_step) prev_x1s = [prev_x1[:sequence_lenth] for prev_x1 in prev_x1s] prev_hs = torch.stack(prev_x1s, dim=1) # attn_score = [] # for prev in range(prev_hs.size(1)): # attn_input = torch.cat((prev_hs[:,prev,:], x2_step), dim=1) # attn_score.append(torch.matmul(attn_input, self._W_attn) + self._b_attn) # attn_score = torch.softmax(torch.stack(attn_score, dim=1), dim=1) # x1_step = torch.squeeze(torch.bmm(torch.transpose(attn_score, 1, 2), prev_hs), dim=1) x1_step = torch.mean(prev_hs, dim=1, keepdim=False) x_step = torch.cat((x1_step, x2_step), dim=1) x_step = self.mlp(x_step) outputs[step][:sequence_lenth] = x_step cursor += sequence_lenth outputs = torch.transpose(outputs, 0, 1) #outputs = self._dropout(outputs) return outputs
def step( self, Ybar_t: torch.Tensor, dec_state: Tuple[torch.Tensor, torch.Tensor], enc_hiddens: torch.Tensor, enc_hiddens_proj: torch.Tensor, enc_masks: torch.Tensor ) -> Tuple[Tuple, torch.Tensor, torch.Tensor]: """ Compute one forward step of the LSTM decoder, including the attention computation. @param Ybar_t (Tensor): Concatenated Tensor of [Y_t o_prev], with shape (b, e + h). The input for the decoder, where b = batch size, e = embedding size, h = hidden size. @param dec_state (tuple(Tensor, Tensor)): Tuple of tensors both with shape (b, h), where b = batch size, h = hidden size. First tensor is decoder's prev hidden state, second tensor is decoder's prev cell. @param enc_hiddens (Tensor): Encoder hidden states Tensor, with shape (b, src_len, h * 2), where b = batch size, src_len = maximum source length, h = hidden size. @param enc_hiddens_proj (Tensor): Encoder hidden states Tensor, projected from (h * 2) to h. Tensor is with shape (b, src_len, h), where b = batch size, src_len = maximum source length, h = hidden size. @param enc_masks (Tensor): Tensor of sentence masks shape (b, src_len), where b = batch size, src_len is maximum source length. @returns dec_state (tuple (Tensor, Tensor)): Tuple of tensors both shape (b, h), where b = batch size, h = hidden size. First tensor is decoder's new hidden state, second tensor is decoder's new cell. @returns combined_output (Tensor): Combined output Tensor at timestep t, shape (b, h), where b = batch size, h = hidden size. @returns e_t (Tensor): Tensor of shape (b, src_len). It is attention scores distribution. Note: You will not use this outside of this function. We are simply returning this value so that we can sanity check your implementation. """ combined_output = None ### YOUR CODE HERE (~3 Lines) ### TODO: ### 1. Apply the decoder to `Ybar_t` and `dec_state`to obtain the new dec_state. ### 2. Split dec_state into its two parts (dec_hidden, dec_cell) ### 3. Compute the attention scores e_t, a Tensor shape (b, src_len). ### Note: b = batch_size, src_len = maximum source length, h = hidden size. ### ### Hints: ### - dec_hidden is shape (b, h) and corresponds to h^dec_t in the PDF (batched) ### - enc_hiddens_proj is shape (b, src_len, h) and corresponds to W_{attProj} h^enc (batched). ### - Use batched matrix multiplication (torch.bmm) to compute e_t. ### - To get the tensors into the right shapes for bmm, you will need to do some squeezing and unsqueezing. ### - When using the squeeze() function make sure to specify the dimension you want to squeeze ### over. Otherwise, you will remove the batch dimension accidentally, if batch_size = 1. ### ### Use the following docs to implement this functionality: ### Batch Multiplication: ### https://pytorch.org/docs/stable/torch.html#torch.bmm ### Tensor Unsqueeze: ### https://pytorch.org/docs/stable/torch.html#torch.unsqueeze ### Tensor Squeeze: ### https://pytorch.org/docs/stable/torch.html#torch.squeeze dec_hidden, dec_cell = dec_state = self.decoder(Ybar_t, dec_state) e_t = torch.squeeze( enc_hiddens_proj @ torch.unsqueeze(dec_hidden, dim=2), dim=2) ### END YOUR CODE # Set e_t to -inf where enc_masks has 1 if enc_masks is not None: e_t.data.masked_fill_(enc_masks.byte(), -float('inf')) ### YOUR CODE HERE (~6 Lines) ### TODO: ### 1. Apply softmax to e_t to yield alpha_t ### 2. Use batched matrix multiplication between alpha_t and enc_hiddens to obtain the ### attention output vector, a_t. ### Hints: ### - alpha_t is shape (b, src_len) ### - enc_hiddens is shape (b, src_len, 2h) ### - a_t should be shape (b, 2h) ### - You will need to do some squeezing and unsqueezing. ### Note: b = batch size, src_len = maximum source length, h = hidden size. ### ### 3. Concatenate dec_hidden with a_t to compute tensor U_t ### 4. Apply the combined output projection layer to U_t to compute tensor V_t ### 5. Compute tensor O_t by first applying the Tanh function and then the dropout layer. ### ### Use the following docs to implement this functionality: ### Softmax: ### https://pytorch.org/docs/stable/nn.html#torch.nn.functional.softmax ### Batch Multiplication: ### https://pytorch.org/docs/stable/torch.html#torch.bmm ### Tensor View: ### https://pytorch.org/docs/stable/tensors.html#torch.Tensor.view ### Tensor Concatenation: ### https://pytorch.org/docs/stable/torch.html#torch.cat ### Tanh: ### https://pytorch.org/docs/stable/torch.html#torch.tanh alpha_t = F.softmax(e_t, dim=1) a_t = torch.squeeze(torch.unsqueeze(alpha_t, dim=1) @ enc_hiddens, dim=1) U_t = torch.cat([a_t, dec_hidden], dim=1) V_t = self.combined_output_projection(U_t) O_t = self.dropout(torch.tanh(V_t)) ### END YOUR CODE combined_output = O_t return dec_state, combined_output, e_t
if __name__ == '__main__': display = False use_img = False img_path = r"C:\Users\ROYA2\Documents\Capture.PNG" if exists(img_path) and use_img: image = Image.open(img_path) if display: image.save('input_img.png') image.show() img = imt.ToTensor()(image) input(img.shape) samp_data = torch.unsqueeze(img, 0) else: img = np.random.rand(3, 5, 45, 45) samp_data = torch.from_numpy(img) if display: image = imt.ToPILImage()(img) image.save('input_img.png') image.show() layer = TvarLayer(14, [3, 3],samp_data.shape) out = layer.forward(samp_data) # input(out.shape) if display: image = imt.ToPILImage()(out[0]) image.save('output_img.png') image.show()
import torch import torchvision.transforms as transforms from PIL import Image from model import LeNet transform = transforms.Compose( [transforms.Resize((32, 32)), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') net = LeNet() net.load_state_dict(torch.load('Lenet.pth')) im = Image.open('1.jpg') im = transform(im) # [C, H, W] im = torch.unsqueeze(im, dim=0) # [N, C, H, W] with torch.no_grad(): outputs = net(im) predict = torch.max(outputs, dim=1)[1].data.numpy() print(classes[int(predict)])