def get_flow(t, theta, map_size): """ Rotates the map by theta and translates the rotated map by t. Assume that the robot rotates by an angle theta and then moves forward by translation t. This function returns the flow field field. For every pixel in the new image it tells us which pixel in the original image it came from: NewI(x, y) = OldI(flow_x(x,y), flow_y(x,y)). Assume there is a point p in the original image. Robot rotates by R and moves forward by t. p1 = Rt*p; p2 = p1 - t; (the world moves in opposite direction. So, p2 = Rt*p - t, thus p2 came from R*(p2+t), which is what this function calculates. t: ... x 2 (translation for B batches of N motions each). theta: ... x 1 (rotation for B batches of N motions each). Output: ... x map_size x map_size x 2 """ B = t.view(-1, 2).size()[0] tx, ty = torch.unbind(torch.view(t, [-1, 1, 1, 1, 2]), dim=4) # Bx1x1x1 theta = torch.view(theta, [-1, 1, 1, 1]) # c = tf.constant((map_size - 1.) / 2., dtype=tf.float32) c = Variable(torch.Tensor([(map_size - 1.) / 2.]).double()) x, y = np.meshgrid(np.arange(map_size[0]), np.arange(map_size[1])) x = Variable(x[np.newaxis, :, :, np.newaxis]).view(1, map_size[0], map_size[1], 1) y = Variable(y[np.newaxis, :, :, np.newaxis]).view(1, map_size[0], map_size[1], 1) # x = tf.constant(x[np.newaxis, :, :, np.newaxis], dtype=tf.float32, name='x', # shape=[1, map_size, map_size, 1]) # y = tf.constant(y[np.newaxis, :, :, np.newaxis], dtype=tf.float32, name='y', # shape=[1, map_size, map_size, 1]) tx = tx - c.expand(tx.size()) x = x.expand([B] + x.size()[1:]) x = x + tx.expand(x.size()) ty = ty - c.expand(ty.size()) y = y.expand([B] + y.size()[1:]) y = y + ty.expand(y.size()) # BxHxWx1 # x = x - (-tx + c.expand(tx.size())) #1xHxWx1 # y = y - (-ty + c.expand(ty.size())) sin_theta = torch.sin(theta) #Bx1x1x1 cos_theta = torch.cos(theta) xr = x * cos_theta.expand(x.size()) - y * sin_theta.expand(y.size()) yr = x * sin_theta.expand(x.size()) + y * cos_theta.expand( y.size()) # BxHxWx1 # xr = cos_theta * x - sin_theta * y # yr = sin_theta * x + cos_theta * y xr = xr + c.expand(xr.size()) yr = yr + c.expand(yr.size()) flow = torch.stack([xr, yr], axis=-1) sh = t.size()[:-1] + [map_size[0], map_size[1], 2] # sh = tf.unstack(tf.shape(t), axis=0) # sh = tf.stack(sh[:-1] + [tf.constant(_, dtype=tf.int32) for _ in [map_size, map_size, 2]]) flow = torch.view(flow, shape=sh) return flow
def get_representation(self, encoded): if 1 - self.opt.dropout_rate_probs < 1e-6: encoded = self.dropout_embedding(encoded) representation = [] for one_type in self.opt.pooling_type.split(','): if one_type == 'max': probs = torch.max(encoded, dim=1) elif one_type == 'average': probs = torch.mean(encoded, dim=1) elif one_type == 'none': probs = torch.view(encoded.size(0), -1).contiguous() elif one_type == 'max_col': probs = torch.max(torch.transpose(encoded, 1, 2), dim=1) elif one_type == 'average_col': probs = torch.mean(torch.transpose(encoded, 1, 2), dim=1) else: print( 'Wrong input pooling type -- The default flatten layer is used.' ) probs = torch.view(encoded.size(0), -1).contiguous() representation.append(probs) if len(representation) > 1: representation = torch.cat(representation, dim=-1) else: representation = representation[0] return representation
def forward(self, x): """ input is the image activations following a convolutional layer. dimensions: N x C x H x W The co-occurrence layer computes a vector of length C ** 2 """ x = F.relu(self.conv1(x)) x = F.pad(x, (2, 2, 2, 2), mode='reflect') x = self.gaussian(x) N, C, H, W = x.size() # list of length H*W of (N, C, H, W) tensors containing each offset x_offsets = [ self.roll(self.roll(x, i, 2), j, 3) for i in range(H) for j in range(W) ] x_offsets = torch.cat(x_offsets, 1).to(DEVICE) # (N, C*H*W, H, W) x_offsets = torch.view(N, C * H * W, H * W).permute(0, 2, 1) # (N, H*W, C*H*W) x_base = x.view(N, C, H * W) # (N, C, H*W) corrs = torch.bmm(x_base, x_offsets) # (N, C, C*H*W) corrs = corrs.view(N, C * C, H * W).permute(0, 2, 1) c_ij, best_offset = torch.max(corrs, 1) # (N, C*C) return c_ij
def forward(self, text, z): """ Given a caption embedding and latent variable z(noise), generate an image Arguments --------- text : torch.FloatTensor Output of the skipthought embedding model for the caption text.size() = (batch_size, text_embed_dim) z : torch.FloatTensor Latent variable or noise z.size() = (batch_size, z_dim) -------- Returns -------- output : An image of shape (64, 64, 3) """ reduced_text = self.reduced_text_dim( text) # (batch_size, reduced_text_dim) concat = torch.cat((reduced_text, z), 1) # (batch_size, reduced_text_dim + z_dim) concat = self.concat(concat) # (batch_size, 64*8*4*4) concat = torch.view(-1, 4, 4, 64 * 8) # (batch_size, 4, 4, 64*8) d_net_out = self.d_net(concat) # (batch_size, 64, 64, 3) output = d_net_out / 2. + 0.5 # (batch_size, 64, 64, 3) return output
def forward(self, x): if self.resnet == False: conv1 = self.lrelu(self.conv1(x)) conv2 = self.lrelu(self.conv2(conv1)) conv3 = self.lrelu(self.conv3(conv2)) conv4 = self.lrelu(self.conv4(conv3)) conv4 = torch.view(conv4.size(0) * self.num_rotation, -1) gan_logits = self.fully_connect_gan1(conv4) if self.ssup: rot_logits = self.fully_connect_rot1(conv4) rot_prob = self.softmax(rot_logits) else: re1 = self.re1(x) re2 = self.re2(re1) re3 = self.re3(re2) re4 = self.re4(re3) re4 = self.relu(re4) re4 = torch.sum(re4, dim=(2, 3)) gan_logits = self.fully_connect_gan2(re4) if self.ssup: rot_logits = self.fully_connect_rot2(re4) rot_prob = self.softmax(rot_logits) if self.ssup: return self.sigmoid(gan_logits), gan_logits, rot_logits, rot_prob else: return self.sigmoid(gan_logits), gan_logits
def sent2vec(sent=''): """ Parameters ---------- sent : string sentence Returns ------- sentence_vector : torch.FloatTensor sentence vector from word vector, formatting in torch.tensor example ------- I love you. => tensor([ [1.0765e-01, -3.6939e+00, 1.2139e+00, -1.0561e+00, -2.0084e+00, # "I" vector -1.4055e+00, -9.0298e-01, -2.3618e-01, 1.5151e+00, -1.2158e-01, 2.3321e+00, -5.7944e-01, -2.2252e-01, ...], [-6.3879e-01, -1.7294e+00, 1.1637e-01, -1.0025e+00, -6.6298e-01, # "love" vector -1.6146e+00, -1.1563e+00, -1.4284e+00, 1.1772e+00, -1.4051e+00, -5.2077e-01, -4.0171e-01, -1.9743e-01, ...], [4.7850e-01, -1.4013e+00, -7.7003e-01, -9.6428e-01, -6.0314e-01, # "you" vector 1.7834e-01, 6.1909e-02, -2.0041e-01, 4.4003e-01, 5.2138e-01, -2.2191e-01, -2.6324e-02, -1.1932e+00, ...] ]) => torch.Size([3,250]) #[keywords num, word2vec dim] """ inputs = torch.tensor([[1.0, 2.0, 3.0]]) inputs = torch.cat(inputs) # torch.cat 合併向量 inputs = torch.view(len(inputs), 250) #torch.view 依指定數字做組合 return inputs
def backward(ctx, grad_output): gate = ctx.saved_tensors[0] if gate.item() == 0: beta = torch.cuda.FloatTensor(grad_output.size(0)).uniform_(0, 1) beta = torch.view(beta.size(0), 1, 1, 1).expand_as(grad_output) beta = Variable(beta) return beta * grad_output, None, None, None else: return grad_output, None, None, None
def imageModel(self, img): img = torch.FloatTensor(img) f_I = self.model( img ) # It should return feature maps of shape = (batch_size, 14, 14, 512) f_I = torch.view(self.batch_size, 14 * 14, 512) v_I = F.tanh(self.W_I(f_I)) # (batch_size, 196, hidden_size) return v_I
def forward(self, x): out = self.upsample(x) out = self.relu(self.conv1(out)) out = self.relu(self.conv2(out)) out = self.relu(self.conv3(out)) out = self.relu(self.conv4(out)) out = self.relu(self.conv5(out)) out = self.relu(self.conv6(out)) out = self.conv7(out) out = torch.view(-1, self.n_actions) acc = self.sigmoid(out[0]) steer = self.tanh(out[1]) bools = out[2:] return acc, steer, bools
def forward(self, features, captions, concepts, lengths): """ :param features: encoded picture features, batch_size * 196 * 152 :param captions: batch_size * time_step :param concepts: concepts of picture[sparse matrix], batch_size * concepts_size :param lengths: valid lengths for each padded caption. :return: predicts of each time step. """ batch_size, time_step = captions.data.shape predicts = torch.zeros(batch_size, time_step, self.vocab_size) # we can initialize as mean of features or view it as 196 * 152 1d feature vector h0, c0 = self.get_start_states(batch_size) word_embeddings = self.E_voc( captions) # batch_size * time_steps * embed_size concepts_embeddings = self.E_concept( concepts) # batch_size * num_concepts * con_embed_size for t in xrange(time_step): batch_size = sum(i >= t for i in lengths) words_input = word_embeddings[:batch_size, t, :] if t == 0: xt = self.feature_ly(torch.view(batch_size, -1)) # batch * input_size else: alpha, _ = self.att_in(concepts_embeddings, words_input) alpha = alpha.unsqueeze(2).expand(-1, -1, self.embed_size_concept) weighted_sum = torch.sum(alpha * concepts_embeddings, 1).squeeze(1) weighted_sum = self.concept_dim_ly(weighted_sum) xt = self.att_in_out_ly(weighted_sum + words_input) h0, c0 = self.lstm_cell(xt, (h0[:batch_size, :], c0[:batch_size, :])) beta = self.att_out(h0, concepts_embeddings) # batch size, hidden_size, #concepts weighted_sum_out = torch.sum(beta * F.relu(concepts_embeddings), 1).squeeze(1) weighted_sum_out = self.linear_w(weighted_sum_out) outputs = self.att_out_out(weighted_sum_out) predicts[:batch_size, t, :] = outputs return outputs
def pca(self, X, k): # k is the components you want # mean of each feature mean = torch.sum(X) # normalization norm_X = X - mean norm_X = torch.view(1, len(X)) # scatter matrix scatter_matrix = torch.dot(torch.transpose(norm_X, 0, 1), norm_X) # Calculate the eigenvectors and eigenvalues eig_val, eig_vec = np.linalg.eig(scatter_matrix.numpy()) eig_pairs = [(np.abs(eig_val[i]), eig_vec[:, i]) for i in range(len(X))] # sort eig_vec based on eig_val from highest to lowest eig_pairs.sort(reverse=True) # select the top k eig_vec feature = np.array([ele[1] for ele in eig_pairs[:k]]) # get new data data = np.dot(norm_X, np.transpose(feature)) data = torch.tensor(data) return data
def loss(self, y_true, y_pred, from_logits=False, label_smoothing=0): """ Calculate the loss (The test process will use this function) TODO you should provide this function no matter you use it in training or not; because the test process would call this function :return: loss (float) """ y_true = torch.view(-1, y_true) # calculate the padding mask mask = torch.cast(torch.math.not_equal(y_true, 0), y_pred.dtype) # calculate the loss loss_ = self.compile_params['loss'](y_true, y_pred) # remove the padding part's loss by timing the mask loss_ *= mask # calculate the mean loss return tf.reduce_mean(loss_)
def forward(self, image, text): """ Given the image and its caption embedding, predict whether the image is real or fake. Arguments --------- image : torch.FloatTensor image.size() = (batch_size, 64, 64, 3) text : torch.FloatTensor Output of the skipthought embedding model for the caption text.size() = (batch_size, text_embed_dim) -------- Returns -------- output : Probability for the image being real/fake logit : Final score of the discriminator """ d_net_out = self.d_net(image) # (batch_size, 4, 4, 512) text_reduced = self.text_reduced_dim(text) # (batch_size, text_reduced_dim) text_reduced = text_reduced.squeeze(1) # (batch_size, 1, text_reduced_dim) text_reduced = text_reduced.squeeze(2) # (batch_size, 1, 1, text_reduced_dim) text_reduced = text_reduced.expand(1, 4, 4, self.text_reduced_dim) concat_out = torch.cat((d_net_out, text_reduced), 3) # (1, 4, 4, 512+text_reduced_dim) logit = self.cat_net(concat_out) concat_out = torch.view(-1, concat_out.size()[1] * concat_out.size()[2] * concat_out.size()[3]) concat_out = self.linear(concat_out) output = F.sigmoid(logit) return output, logit
def forward(self, sound, target): # print(sound.shape, target.shape) enc_mask = get_enc_padding_mask(sound).to(self.device) sound, enc_mask = self.sound_embed(sound, enc_mask) new_feat_len = torch.tensor([len(enc_mask[0]) - enc_mask[i].sum() for i in range(enc_mask.shape[0])]).to(self.device) sound[enc_mask] = 0 target = self.text_embed(target) target = self.pos_encoder(target) trg_mask = generate_square_subsequent_mask(target.size(1)).to(self.device) # for asr # out = self.transformer(sound.permute(1,0,2), target.permute(1, 0, 2), # tgt_mask=trg_mask, src_key_padding_mask=enc_mask) enc = self.transformer.encoder(sound.permute(1, 0, 2), src_key_padding_mask=enc_mask) # out_ctc = self.lin_ctc(enc).permute(1, 0, 2) # for asr # out = self.transformer.decoder(target.permute(1, 0, 2), enc, tgt_mask=trg_mask) # for asr out = self.transformer.decoder(target.permute(1, 0, 2), enc) # for classifier out = torch.view(-1, out) # for classifier # out = out.max(dim=0, keepdim=True)[0] out = self.out_lin_class(out.permute(1, 0, 2)[:, 1, :]) # for classifier # out = self.out_lin(out.permute(1, 0, 2)) # for asr # return out, out_ctc, new_feat_len # for asr return out
return x def num_flat_features(self,x): size=x.size()[1:] num_features=1 for s in size: num_features*=s return num_features net=Net() print(net) params=list(net.parameters()) print(len(params)) print(params[0].size()) input=torch.randn(1,1,32,32) out=net(input) print(out) net.zero_grad() out.backward(torch.randn(1,10)) output=net(input) target=torch.randn(10) target=torch.view(1,-1) criterion=nn.MSELoss() loss=criterion(output,target) print(loss)
def forward(self, roi_feat, position_embedding, nongt_dim, fc_dim, feat_dim, dim=(1024, 1024, 1024), group=16, index=1): """ Attetion module with vectorized version Args: roi_feat: [num_rois, feat_dim] position_embedding: [num_rois, nongt_dim, emb_dim] nongt_dim: fc_dim: should be same as group feat_dim: dimension of roi_feat, should be same as dim[2] dim: a 3-tuple of (query, key, output) group: index: Returns: output: [num_rois, ovr_feat_dim, output_dim] """ # 因为dim默认是(1024, 1024, 1024),group默认是16,所以dim_group就是(64, 64, 64)。 dim_group = (dim[0] / group, dim[1] / group, dim[2] / group) # 在roi_feat的维度0上选取前nongt_dim的值,得到的nongt_roi_feat的维度是[nongt_dim, feat_dim] nongt_roi_feat = roi_feat[:nongt_dim, :] # 将[num_rois, nongt_dim, emb_dim]的position_embedding reshape emb_shape = position_embedding.shape() # [num_rois * nongt_dim, emb_dim] position_embedding_reshape = torch.view(emb_shape[0] * emb_shape[1], emb_shape[2]) # position_feat_1, [num_rois * nongt_dim, fc_dim] position_feat_1 = F.relu(self.pos_fc(position_embedding_reshape)) # aff_weight, [num_rois, nongt_dim, fc_dim] aff_weight = position_feat_1.view(-1, nongt_dim, fc_dim) # 几何权重, [num_rois, fc_dim, nongt_dim] aff_weight = aff_weight.transpose(0, 2, 1) # multi head assert dim[0] == dim[1], 'Matrix multiply requires same dimensions!' # 用全连接层得到q_data,全连接层参数对应论文中公式4的WQ, # roi_feat对应公式4的fA,维度[num_rois, feat_dim]。q_data:[num_rois, 1024] q_data = self.query(roi_feat) # [num_rois, group, dim_group[0]],默认是[num_rois, 16, 64], q_data_batch = q_data.view(-1, group, dim_group[0]) # [group, num_rois, dim_group[0]],默认是[16, num_rois, 64]。 q_data_batch = q_data_batch.transpose(1, 0, 2) # 用全连接层得到k_data,全连接层参数对应论文中公式4的WK, # nongt_roi_feat对应公式4的fA,维度[nongt_dim, feat_dim]。k_data:[nongt_dim, 1024] k_data = self.key(nongt_roi_feat) # [nongt_dim, group, dim_group[1]],默认是[nongt_dim, 16, 64], k_data_batch = k_data.view(-1, group, dim_group[1]) # [group, nongt_dim, dim_group[1]],默认是[16, nongt_dim, 64]。 k_data_batch = k_data_batch.transpose(1, 0, 2) v_data = nongt_roi_feat # 论文中公式4的矩阵乘法。 # aff维度是[group, num_rois, nongt_dim],默认是[16, num_rois, nongt_dim]。 aff = torch.bmm(q_data_batch, k_data_batch.transpose(0, 2, 1)) # aff_scale, [group, num_rois, nongt_dim] 对应论文中公式4的除法 aff_scale = (1.0 / torch.sqrt(float(dim_group[1]))) * aff # [num_rois, group, nongt_dim] # 这个aff_scale就是论文中公式4的结果:wA aff_scale = aff_scale.transpose(1, 0, 2) assert fc_dim == group, 'fc_dim != group' # weighted_aff, [num_rois, fc_dim, nongt_dim] # maximum对应论文中公式5,softmax实现公式3,而在softmax中 # # 会对输入求指数(以e为底),而要达到论文中公式3的形式(e的指数只有wA,没有wG), # # 就要先对wGmn求log,这样再求指数时候就恢复成wG。简而言之就是e^(log(wG)+wA)=wG+e^(wA)。 # # softmax实现论文中公式3的操作,axis设置为2表示在维度2上进行归一化。 weighted_aff = torch.log(torch.maximum(aff_weight, 1e-6)) + aff_scale # [num_rois, fc_dim, nongt_dim] aff_softmax = self.weighted_affinity(weighted_aff) # [num_rois * fc_dim, nongt_dim] aff_softmax_reshape = aff_softmax.view(-1, nongt_dim) # 公式2 # output_t, [num_rois * fc_dim, feat_dim] w和fA相乘 output_t = torch.mm(aff_softmax_reshape, v_data) # output_t, [num_rois, fc_dim * feat_dim, 1, 1] output_t = output_t.view(-1, fc_dim * feat_dim, 1, 1) # 公式2用dim[2](默认是1024)的1*1卷积计算,卷积层的参数对应论文中公式2的WV # linear_out, [num_rois, dim[2], 1, 1] linear_out = self.linear_out(output_t) # [num_rois, dim[2]], # 加上groups的操作(group数量设置为fc_dim,默认是16,对应论文中的Nr参数)完成了concat所有的fR output = linear_out.squeeze() return output
def forward(self, inputs, state, inputs_mask, hidden_state_mask, output_mask): # State if self.architecture.dual_state(): hidden_state, cell_state = state else: hidden_state = cell_state = state # RNN Dropout dropped_inputs = inputs_mask * inputs dropped_hidden_state = hidden_state_mask * hidden_state # Content if self.architecture.content.has_transformation: if self.architecture.content.has_state: content_args = torch.cat( [dropped_inputs, dropped_hidden_state], -1) else: content_args = dropped_inputs content = torch.mm(content_args, self.w_content) else: content = inputs # TODO Should this be dropped out? Technically, the dropout is for the matrices. if self.architecture.content.has_bias: content = content + self.b_content if self.architecture.content.has_tanh: content = F.tanh(content) # Gates - Computation if self.architecture.gates.has_transformation: args = [] if self.architecture.gates.is_state_arg: args.append(dropped_hidden_state) if self.architecture.gates.is_content_arg: args.append(content) # TODO Should this be dropped out? if self.architecture.gates.is_input_arg: args.append(dropped_inputs) gates = torch.mm(torch.cat(args, -1), self.w_gates) + self.b_gates else: gates = self.b_gates # Gates - Aggregation num_gates = self.architecture.gates.num_gates() # Softmax if self.architecture.gates.is_softmax: gates = torch.view( torch.view(gates, [-1, self.hidden_size, num_gates]), [-1, num_gates]) gates = F.softmax(gates) gates = torch.view(gates, [-1, self.hidden_size, num_gates]) gates = [ torch.unsqueeze(gate, -1) for gate in torch.split(gates, 1, -1) ] new_cell_state = gates[0] * cell_state + gates[1] * content if self.architecture.gates.has_highway and self.input_size == self.hidden_size: new_cell_state += gates[2] * inputs output = new_hidden_state = new_cell_state output = output_mask * output # TODO This is different because it includes the highway # Sigmoid else: gates = torch.split(F.sigmoid(gates), self.hidden_size, -1) if self.architecture.gates.is_coupled: new_cell_state = gates[0] * cell_state + (1 - gates[0]) * content gates = gates[1:] else: new_cell_state = gates[0] * cell_state + gates[1] * content gates = gates[2:] new_hidden_state = new_cell_state if self.architecture.gates.has_tanh: new_hidden_state = F.tanh(new_hidden_state) if self.architecture.gates.has_zero_gate: new_hidden_state = gates[0] * new_hidden_state output = new_hidden_state output = output_mask * output # TODO This is different because it includes the highway if self.architecture.gates.has_highway and self.input_size == self.hidden_size: output = gates[-1] * output + (1 - gates[-1]) * inputs return output, (new_hidden_state, new_cell_state)
def dense_resample(im, flow_im, output_valid_mask=False): """ Resample reward at particular locations. Args: im: ...xHxW matrix to sample from. flow_im: ...xHxWx2 matrix, samples the image using absolute offsets as given by the flow_im. """ valid_mask = None x, y = torch.unbind(flow_im, axis=-1) x = x.view(-1) y = y.view(-1) # constants # shape = tf.unstack(tf.shape(im)) # channels = shape[-1] shape = im.size() width = shape[-1] height = shape[-2] num_batch = 1 for dim in shape[:-2]: num_batch *= dim zero = Variable(torch.Tensor([0]).double()) # num_batch = tf.cast(tf.reduce_prod(tf.stack(shape[:-3])), 'int32') # zero = tf.constant(0, dtype=tf.int32) # Round up and down. x0 = torch.floor(x) x1 = x0 + 1 y0 = torch.floor(y) y1 = y0 + 1 x0 = x0.clamp(0, width - 1) x1 = x1.clamp(0, width - 1) y0 = y0.clamp(0, height - 1) y1 = y1.clamp(0, height - 1) dim2 = width dim1 = width * height # Create base index base = torch.range(num_batch) * dim1 base = base.view(-1, 1) # base = tf.reshape(tf.range(num_batch) * dim1, shape=[-1, 1]) base = base.expand(base.size()[0], height * width).view(-1) # batch_size * H * W # base = tf.reshape(tf.tile(base, [1, height * width]), shape=[-1]) base_y0 = base + y0.expand(base.size()) * dim2 base_y1 = base + y1.expand(base.size()) * dim2 idx_a = base_y0 + x0.expand(base_y0.size()) idx_b = base_y1 + x0.expand(base_y1.size()) idx_c = base_y0 + x1.expand(base_y0.size()) idx_d = base_y1 + x1.expand(base_y1.size()) # use indices to lookup pixels in the flat image and restore channels dim # sh = tf.stack([tf.constant(-1, dtype=tf.int32), channels]) im_flat = torch.view(im, [-1]) # im_flat = tf.cast(tf.reshape(im, sh), dtype=tf.float32) pixel_a = torch.gather(im_flat, idx_a) pixel_b = torch.gather(im_flat, idx_b) pixel_c = torch.gather(im_flat, idx_c) pixel_d = torch.gather(im_flat, idx_d) # and finally calculate interpolated values # x1_f = tf.to_float(x1) # y1_f = tf.to_float(y1) x1_f = x1.float() y1_f = y1.float() wa = torch.unsqueeze(((x1_f - x) * (y1_f - y)), 1) wb = torch.unsqueeze(((x1_f - x) * (1.0 - (y1_f - y))), 1) wc = torch.unsqueeze(((1.0 - (x1_f - x)) * (y1_f - y)), 1) wd = torch.unsqueeze(((1.0 - (x1_f - x)) * (1.0 - (y1_f - y))), 1) output = wa * pixel_a.unsqueeze(1) + wb * pixel_b.unsqueeze( 1) + wc * pixel_c.unsqueeze(1) + wd * pixel_d.unsqueeze(1) # output = tf.reshape(output, shape=tf.shape(im)) output = output.view(im.size()) return output, valid_mask