def convert_padding_direction(src_tokens, padding_idx, right_to_left=False, left_to_right=False): assert right_to_left ^ left_to_right pad_mask = src_tokens.eq(padding_idx) if not pad_mask.any(): # no padding, return early return src_tokens if left_to_right and not pad_mask[:, 0].any(): # already right padded return src_tokens if right_to_left and not pad_mask[:, -1].any(): # already left padded return src_tokens max_len = src_tokens.size(1) range = buffered_arange(max_len).type_as(src_tokens).expand_as(src_tokens) num_pads = pad_mask.long().sum(dim=1, keepdim=True) if right_to_left: index = torch.remainder(range - num_pads, max_len) else: index = torch.remainder(range + num_pads, max_len) return src_tokens.gather(1, index)
def _unwrap_t(self, angles): ''' unwrap angles, adapted from https://github.com/numpy/numpy/blob/v1.13.0/numpy/lib/function_base.py#L2118-L2170 ''' d_angles = angles[:, 1:] - angles[:, :-1] ddmod = torch.remainder(d_angles + np.pi, 2*np.pi) - np.pi good = (ddmod == -np.pi) * (d_angles > 0) #instead of & ddmod[good] = np.pi ph_correct = ddmod - d_angles ph_correct[d_angles.abs() < np.pi] = 0 ph_correct = ph_correct.cumsum(1) ph_correct += angles[:, 1:] angles[:, 1:] = ph_correct
def torch_mod(x): return torch.remainder(x, TWO_PI)
def beam_decode(self, encoder_outputs, decoder_input, decoder_hidden, context): # From https://github.com/budzianowski/PyTorch-Beam-Search-Decoding/blob/master/decode_beam.py ''' :param decoder_hidden: input tensor of shape [B, H] for start of the decoding :param encoder_outputs: if you are using attention mechanism you can pass encoder outputs, [T, B, H] where T is the maximum length of input sentence :return: decoded_batch ''' beam_width = 64 decoded_batch = [] batch_size = 1 input_length = encoder_outputs.size(1) # (batch, seq_len) mask = self.mask.repeat(input_length).unsqueeze(0).repeat( batch_size, 1) # Generating arang(input_length), broadcasted across batch_size runner = torch.arange(input_length, device=self.config[DEVICE]) runner = runner.unsqueeze(0).expand(batch_size, -1).long() # decoding goes sentence by sentence for idx in range(batch_size): # Number of sentence to generate node = BeamSearchNode(decoder_hidden, None, decoder_input, torch.zeros(1, device=self.config[DEVICE]), 0, mask.clone(), -1) nodes = [] # start the queue nodes.append((-node.eval(), node)) qsize = 1 # start beam search for tstep in range(input_length): # give up when decoding takes too long new_nodes = [] inputs, hiddens_h, hiddens_c, masks, old_nodes, old_logprobs = [], [], [], [], [], [] while len(nodes) > 0: # fetch the best nodes score, n = nodes.pop() decoder_input = n.dec_input inputs.append(decoder_input) decoder_hidden = n.h hiddens_h.append(decoder_hidden[0]) hiddens_c.append(decoder_hidden[1]) mask = n.mask masks.append(mask) old_nodes.append(n) old_logprobs.append(n.logp) inputs = torch.cat(inputs, dim=0) hiddens_h = torch.cat(hiddens_h, dim=0) hiddens_c = torch.cat(hiddens_c, dim=0) hiddens = (hiddens_h, hiddens_c) masks = torch.cat(masks, dim=0) old_logprobs = torch.cat(old_logprobs).unsqueeze(1).expand( -1, input_length) # decode for one step using decoder h_t, c_t, outs, raw_att = self.step( inputs, hiddens, masks, context.repeat(inputs.shape[0], 1, 1)) beam_indexes = torch.arange( inputs.shape[0]).repeat_interleave(input_length) num_candidates = min(beam_width, input_length * inputs.shape[0]) att_logprobs = self.log_softmax(raw_att) att_logprobs += old_logprobs log_prob, indexes = torch.topk(att_logprobs.view(-1), num_candidates) beam_indexes = beam_indexes[indexes] decoded_t = torch.remainder(indexes, input_length) one_hot_pointers = (runner == decoded_t.unsqueeze(1).expand( -1, outs.shape[1])).float() new_masks = masks[beam_indexes] * (1 - one_hot_pointers) embedding_mask = one_hot_pointers.unsqueeze(2).expand( -1, -1, self.embedding_dim).bool() decoder_input = encoder_outputs.repeat( num_candidates, 1, 1)[embedding_mask.data].view(num_candidates, self.embedding_dim) for new_k in range(num_candidates): if log_prob[new_k] == self.att.inf: break beam_idx = beam_indexes[new_k] node = BeamSearchNode( (h_t[beam_idx].unsqueeze(0), c_t[beam_idx].unsqueeze(0)), old_nodes[beam_idx], decoder_input[beam_idx].unsqueeze(0), log_prob[new_k].unsqueeze(0), old_nodes[beam_idx].length + 1, new_masks[new_k].unsqueeze(0), decoded_t[new_k].item()) score = -node.eval() new_nodes.append((score, node)) qsize += 1 # Prune the queue if necessary if qsize > beam_width: nodes = sorted(new_nodes, key=operator.itemgetter(0))[:beam_width] else: nodes = new_nodes endnodes = nodes utterances = [] for score, n in sorted(endnodes, key=operator.itemgetter(0)): utterance = [] utterance.append(n.word_id) # back trace while n.prevNode != None: n = n.prevNode utterance.append(n.word_id) utterance = utterance[::-1] utterances.append(utterance) decoded_batch.append(utterances) return torch.tensor(decoded_batch[0][0][1:], device=self.config[DEVICE])
def _get_angle_loss(self, angle: torch.Tensor, target_angle: torch.Tensor) -> torch.Tensor: scaled_angle = torch.remainder(angle, torch.Tensor([np.pi]).to(self._device)) return F.mse_loss(scaled_angle, target_angle)
def evaluate(self): self.model.eval() std_loss = Accumulator('std_loss') adv_loss = Accumulator('adv_loss') std_corr = Accumulator('std_corr') adv_corr = Accumulator('adv_corr') std_logits = Accumulator('std_logits') adv_logits = Accumulator('adv_logits') seen_classes = [] adv_images = Accumulator('adv_images') first_batch_images = Accumulator('first_batch_images') from PIL import Image # for batch_idx, (data, target) in enumerate(self.val_loader[0]): # if self.cuda: # data, target = data.cuda(non_blocking=True), target.cuda(non_blocking=True) # with torch.no_grad(): # #output = self.model(data) # data_cpy = data.clone().detach() # std_cpy = data.clone().detach() # std_cpy is used for finding the standard accuracy and has transforms applied as normal # # data_cpy = torch.tensor([]) # # std_cpy = torch.tensor([]) # # for idx in range(len(data_cpy)): # # #print("Tensor is cuda?", data_cpy.is_cuda) # # # data_cpy = torch.cat((data_cpy, torch.tensor(transforms.functional.normalize(transforms.functional.to_tensor(data[idx, :]), IMAGENET_MEAN, IMAGENET_STD) ))) # # #std_cpy[idx] = transforms.functional.normalize(data[idx].clone().cpu(), IMAGENET_MEAN, IMAGENET_STD).cuda() # DELETE # # transformedTensor = applyTransforms(np.copy(data[idx, :])) # # std_cpy = torch.cat((std_cpy, torch.tensor(transforms.functional.normalize(transformedTensor.clone().cpu(), IMAGENET_MEAN, IMAGENET_STD)))) # # #std_cpy[idx, :] = transforms.functional.normalize(transformedTensor.cpu(), IMAGENET_MEAN, IMAGENET_STD).cuda() # # transformedImage = norm_to_pil_image(np.array(std_cpy[idx, :].cpu())) # # transformedImage.save('sample_data/standard' + str(idx) + '.png') # # untransformedImage = norm_to_pil_image(np.array(data_cpy[idx, :].cpu())) # # untransformedImage.save('sample_data/data' + str(idx) + '.png') # # # print(np.array(data_cpy[idx].cpu()) - np.array(std_cpy[idx].cpu())) # output = self.model(std_cpy) # std_logits.update(output.cpu()) # loss = F.cross_entropy(output, target, reduction='none').cpu() # std_loss.update(loss) # corr = correct(output, target) # corr = corr.view(corr.size()[0]).cpu() # std_corr.update(corr) # # run_output = {'std_loss':std_loss.avg, # 'std_acc':std_corr.avg} # print('Standard Batch', batch_idx) # print(run_output) for batch_idx, (data, target) in enumerate(self.val_loader[1]): # data is normalized at this point if self.cuda: data, target = data.cuda(non_blocking=True), target.cuda( non_blocking=True) # for idx in range(len(data)): # savedImage = norm_to_pil_image(data[idx]) # savedImage.save("sample_data/eric" + str(idx) + '.png') # with torch.no_grad(): # #output = self.model(data) # data_cpy = data.clone().detach() # std_cpy = data.clone().detach() # std_cpy is used for finding the standard accuracy and has transforms applied as normal # # data_cpy = torch.tensor([]) # # std_cpy = torch.tensor([]) # # for idx in range(len(data_cpy)): # # #print("Tensor is cuda?", data_cpy.is_cuda) # # data_cpy = torch.cat((data_cpy, torch.tensor(transforms.functional.normalize(transforms.functional.to_tensor(data[idx, :]), IMAGENET_MEAN, IMAGENET_STD) ))) # # #std_cpy[idx] = transforms.functional.normalize(data[idx].clone().cpu(), IMAGENET_MEAN, IMAGENET_STD).cuda() # DELETE # # transformedTensor = applyTransforms(np.copy(data[idx, :])) # # std_cpy = torch.cat((std_cpy, torch.tensor(transforms.functional.normalize(transformedTensor.clone().cpu(), IMAGENET_MEAN, IMAGENET_STD)))) # # #std_cpy[idx, :] = transforms.functional.normalize(transformedTensor.cpu(), IMAGENET_MEAN, IMAGENET_STD).cuda() # # transformedImage = norm_to_pil_image(np.array(std_cpy[idx, :].cpu())) # # transformedImage.save('sample_data/standard' + str(idx) + '.png') # # untransformedImage = norm_to_pil_image(np.array(data_cpy[idx, :].cpu())) # # untransformedImage.save('sample_data/data' + str(idx) + '.png') # # # print(np.array(data_cpy[idx].cpu()) - np.array(std_cpy[idx].cpu())) # output_adv = self.model(data) # adv_logits.update(output_adv.cpu()) # loss = F.cross_entropy(output_adv, target, reduction='none').cpu() # adv_loss.update(loss) # corr = correct(output_adv, target) # corr = corr.view(corr.size()[0]).cpu() # adv_corr.update(corr) rand_target = torch.randint(0, self.nb_classes - 1, target.size(), dtype=target.dtype, device='cuda') rand_target = torch.remainder(target + rand_target + 1, self.nb_classes) data_cpy = data.clone().detach() for idx in range(len(data_cpy)): # savedImage = norm_to_pil_image(data_adv[idx]) # savedImage.save("sample_data/before_transforms" + str(idx) + '.png') unnormalized = reverse_normalization(data[idx]) changed = np.swapaxes( np.array(unnormalized.cpu().detach()) * 255.0, 0, 2) transformed = applyTransforms( np.swapaxes( np.array(unnormalized.cpu().clone().detach()) * 255.0, 0, 2)) data_cpy[idx] = transforms.functional.normalize( transformed.clone().cpu(), IMAGENET_MEAN, IMAGENET_STD).cuda() #from PIL import Image data_adv = self.attack(self.model, data_cpy, rand_target, avoid_target=False, scale_eps=False) # for idx in range(len(data)): # savedImage = norm_to_pil_image(data_adv[idx]) # savedImage.save("sample_data/eric" + str(idx) + '.png') with torch.no_grad(): output_adv = self.model(data_adv) adv_logits.update(output_adv.cpu()) loss = F.cross_entropy(output_adv, target, reduction='none').cpu() adv_loss.update(loss) corr = correct(output_adv, target) corr = corr.view(corr.size()[0]).cpu() adv_corr.update(corr) run_output = {'adv_loss': adv_loss.avg, 'adv_acc': adv_corr.avg} print('Adv Batch', batch_idx) print(run_output) summary_dict = { 'std_acc': std_corr.avg.item(), 'adv_acc': adv_corr.avg.item() } print(std_loss.avg, std_corr.avg, adv_loss.avg, adv_corr.avg)
def pmod(torch_tensor, modulus): return torch.remainder(torch_tensor, modulus)
def _handle_row_wise_sharding(input, world_size, weight, rank, local_shard, pg): # flatten the ids across all input and sort input_size = input.size() input_1d = torch.reshape(input, (-1, )).contiguous() input_sorted, indices_1d = torch.sort(input_1d) rearrange_indices_1d = torch.argsort(indices_1d) input_sorted.contiguous() # Decide which rank the input goes to by check the sharding range. split_size = get_split_size(weight.size(0), world_size) rearrange_rows = False input_split_sizes: List[int] = [0] * world_size input_split_start_indices: List[int] = [0] * world_size # When we do the chunk split, we always ensure the first N - 1 chunks get max out # and then the Nth chunk gets the rest. So input_split_sizes like [3, 3, 3, 4] # are not possible. The expected split size will be [4, 4, 4, 1]. sharded_dim_size_max = get_chunked_dim_size(weight.size(0), split_size, 0) for idx, placement in enumerate(weight._sharding_spec.placements): sharded_dim_size = get_chunked_dim_size(weight.size(0), split_size, idx) start_row_idx = idx * sharded_dim_size_max end_row_idx = start_row_idx + sharded_dim_size start_idx = torch.searchsorted(input_sorted, start_row_idx).item() end_idx = torch.searchsorted(input_sorted, end_row_idx).item() input_split_sizes[placement.rank()] = int(end_idx - start_idx) input_split_start_indices[placement.rank()] = int(start_idx) if placement.rank() != idx: rearrange_rows = True rearrange_indices_1d_second_order = None if rearrange_rows: # Need to re-arrange the 1D tensor to be sent via all2all. indices: List[List[int]] = [[0]] * world_size for placement in weight._sharding_spec.placements: split_length = input_split_sizes[placement.rank()] offset_idx = input_split_start_indices[placement.rank()] indices[placement.rank()] = list( range(offset_idx, offset_idx + split_length)) indices_flatten = list(idx for indice in indices for idx in indice) input_sorted = input_sorted.index_select( 0, torch.tensor(indices_flatten, device=input.device)) rearrange_indices_1d_second_order = torch.argsort( torch.Tensor(indices_flatten)) # Get the input split size to be sent from each rank to the current rank. # We can then infer the output split size. input_split_sizes_tensor = ( torch.Tensor(input_split_sizes).type("torch.IntTensor").cuda(rank)) output_split_sizes_tensor = torch.empty(world_size, dtype=torch.int32, device=input.device) dist.all_to_all_single( output_split_sizes_tensor, input_split_sizes_tensor, group=pg, ) output_split_sizes = output_split_sizes_tensor.tolist() # Input sent from each rank to the current rank may have different sizes. gathered_input = torch.empty(sum(output_split_sizes), dtype=torch.int64, device=input.device) # Perform the modular operation of the 1D tensor to be sent to each rank. input_sorted = torch.remainder(input_sorted, sharded_dim_size_max) # Perform alltoall dist.all_to_all_single( gathered_input, input_sorted, input_split_sizes=input_split_sizes, output_split_sizes=output_split_sizes, group=pg, ) # Perform local embedding look up. gathered_input_embeddings = torch.nn.functional.embedding( gathered_input, local_shard) # Gather all lookup result appropriately by performing alltoall again gathered_output = torch.empty(input_sorted.size(0), weight.size(1), device=input.device) dist.all_to_all_single( gathered_output, gathered_input_embeddings, input_split_sizes=output_split_sizes, output_split_sizes=input_split_sizes, group=pg, ) # Rearrange the results to its original shape. if rearrange_indices_1d_second_order is not None: gathered_output = gathered_output[rearrange_indices_1d_second_order] gathered_output = gathered_output[rearrange_indices_1d] # Return the appropriate local result. return torch.reshape(gathered_output, (*input_size, weight.size(1)))
def compute_projection(self, depth, camera_to_world, world_to_grid): # compute projection by voxels -> image #print 'camera_to_world', camera_to_world #print 'intrinsic', self.intrinsic #print(world_to_grid) world_to_camera = torch.inverse(camera_to_world) grid_to_world = torch.inverse(world_to_grid) voxel_bounds_min, voxel_bounds_max = self.compute_frustum_bounds( world_to_grid, camera_to_world) voxel_bounds_min = np.maximum( voxel_bounds_min, 0).cuda().float() if depth.is_cuda else np.maximum( voxel_bounds_min, 0).cpu().float() voxel_bounds_max = np.minimum( voxel_bounds_max, self.volume_dims).cuda().float() if depth.is_cuda else np.minimum( voxel_bounds_max, self.volume_dims).cpu().float() # coordinates within frustum bounds # TODO python opt for this part instead of lua/torch opt? lin_ind_volume = torch.arange( 0, self.volume_dims[0] * self.volume_dims[1] * self.volume_dims[2], out=torch.LongTensor()) lin_ind_volume = lin_ind_volume.cuda( ) if depth.is_cuda else lin_ind_volume.cpu() coords = camera_to_world.new(4, lin_ind_volume.size(0)) coords[2] = lin_ind_volume / (self.volume_dims[0] * self.volume_dims[1]) tmp = lin_ind_volume - (coords[2] * self.volume_dims[0] * self.volume_dims[1]).long() coords[1] = tmp / self.volume_dims[0] coords[0] = torch.remainder(tmp, self.volume_dims[0]) coords[3].fill_(1) mask_frustum_bounds = torch.ge( coords[0], voxel_bounds_min[0]) * torch.ge( coords[1], voxel_bounds_min[1]) * torch.ge( coords[2], voxel_bounds_min[2]) mask_frustum_bounds = mask_frustum_bounds * torch.lt( coords[0], voxel_bounds_max[0]) * torch.lt( coords[1], voxel_bounds_max[1]) * torch.lt( coords[2], voxel_bounds_max[2]) if not mask_frustum_bounds.any(): print('error: nothing in frustum bounds') return None lin_ind_volume = lin_ind_volume[mask_frustum_bounds] coords = coords.resize_(4, lin_ind_volume.size(0)) coords[2] = lin_ind_volume / (self.volume_dims[0] * self.volume_dims[1]) tmp = lin_ind_volume - (coords[2] * self.volume_dims[0] * self.volume_dims[1]).long() coords[1] = tmp / self.volume_dims[0] coords[0] = torch.remainder(tmp, self.volume_dims[0]) coords[3].fill_(1) # transform to current frame p = torch.mm(world_to_camera, torch.mm(grid_to_world, coords)) # project into image p[0] = (p[0] * self.intrinsic[0][0]) / p[2] + self.intrinsic[0][2] p[1] = (p[1] * self.intrinsic[1][1]) / p[2] + self.intrinsic[1][2] pi = torch.round(p).long() valid_ind_mask = torch.ge(pi[0], 0) * torch.ge(pi[1], 0) * torch.lt( pi[0], self.image_dims[0]) * torch.lt(pi[1], self.image_dims[1]) if not valid_ind_mask.any(): print('error: no valid image indices') return None valid_image_ind_x = pi[0][valid_ind_mask] valid_image_ind_y = pi[1][valid_ind_mask] valid_image_ind_lin = valid_image_ind_y * self.image_dims[ 0] + valid_image_ind_x depth_vals = torch.index_select(depth.view(-1), 0, valid_image_ind_lin) depth_mask = depth_vals.ge(self.depth_min) * depth_vals.le( self.depth_max) * torch.abs(depth_vals - p[2][valid_ind_mask]).le( self.voxel_size) if not depth_mask.any(): print('error: no valid depths') return None lin_ind_update = lin_ind_volume[valid_ind_mask] lin_ind_update = lin_ind_update[depth_mask] lin_indices_3d = lin_ind_update.new( self.volume_dims[0] * self.volume_dims[1] * self.volume_dims[2] + 1 ) #needs to be same size for all in batch... (first element has size) lin_indices_2d = lin_ind_update.new( self.volume_dims[0] * self.volume_dims[1] * self.volume_dims[2] + 1 ) #needs to be same size for all in batch... (first element has size) lin_indices_3d[0] = lin_ind_update.shape[0] lin_indices_2d[0] = lin_ind_update.shape[0] lin_indices_3d[1:1 + lin_indices_3d[0]] = lin_ind_update lin_indices_2d[1:1 + lin_indices_2d[0]] = torch.index_select( valid_image_ind_lin, 0, torch.nonzero(depth_mask)[:, 0]) num_ind = lin_indices_3d[0] #print '[proj] #ind = ', lin_indices_3d[0] #print '2d', torch.min(lin_indices_2d[1:1+num_ind]), torch.max(lin_indices_2d[1:1+num_ind]) #print '3d', torch.min(lin_indices_3d[1:1+num_ind]), torch.max(lin_indices_3d[1:1+num_ind]) return lin_indices_3d, lin_indices_2d
}], 0.0005) bar = tqdm(range(config.N_iters)) for i in bar: img_idx = np.random.choice(i_train) target = imgs[img_idx] # pose = poses[img_idx, :3, :4] # rays_o, rays_d = get_rays(H, W, focal, pose) # can be moved to pre-computed rays_o, rays_d = rays[img_idx] rays_o = torch.from_numpy(rays_o).cuda() rays_d = torch.from_numpy(rays_d).cuda() target = torch.from_numpy(target).cuda() rand_idx = torch.from_numpy( np.int64(random.sample(range(H * W), k=config.N_rand))) rand_idx_x = torch.remainder(rand_idx, W) rand_idx_y = torch.div(rand_idx, W, rounding_mode='floor') rays_o = rays_o[rand_idx_y, rand_idx_x] rays_d = rays_d[rand_idx_y, rand_idx_x] target_s = target[rand_idx_y, rand_idx_x] optim.zero_grad() rgb0, rgb = render(rays_o, rays_d, 2, 6, embed_fn, embeddirs_fn, net, net_fine) err0 = torch.pow(rgb0 - target_s, 2).mean() err1 = torch.pow(rgb - target_s, 2).mean() err = err0 + err1 err.backward() optim.step() outstr = 'LsCoarse: %.4f LsFine: %.4f' % (err0.cpu().detach().numpy(),
def phase_vocoder(complex_specgrams, rate, phase_advance): """ Phase vocoder. Given a STFT tensor, speed up in time without modifying pitch by a factor of `rate`. Args: complex_specgrams (Tensor): (*, channel, num_freqs, time, complex=2) rate (float): Speed-up factor. phase_advance (Tensor): Expected phase advance in each bin. (num_freqs, 1). Returns: complex_specgrams_stretch (Tensor): (*, channel, num_freqs, ceil(time/rate), complex=2). Example: >>> num_freqs, hop_length = 1025, 512 >>> # (batch, channel, num_freqs, time, complex=2) >>> complex_specgrams = torch.randn(16, 1, num_freqs, 300, 2) >>> rate = 1.3 # Slow down by 30% >>> phase_advance = torch.linspace( >>> 0, math.pi * hop_length, num_freqs)[..., None] >>> x = phase_vocoder(complex_specgrams, rate, phase_advance) >>> x.shape # with 231 == ceil(300 / 1.3) torch.Size([16, 1, 1025, 231, 2]) """ ndim = complex_specgrams.dim() time_slice = [slice(None)] * (ndim - 2) time_steps = torch.arange(0, complex_specgrams.size(-2), rate, device=complex_specgrams.device) alphas = torch.remainder(time_steps, torch.tensor(1., device=complex_specgrams.device)) phase_0 = angle(complex_specgrams[time_slice + [slice(1)]]) # Time Padding complex_specgrams = torch.nn.functional.pad(complex_specgrams, [0, 0, 0, 2]) complex_specgrams_0 = complex_specgrams[time_slice + [time_steps.long()]] # (new_bins, num_freqs, 2) complex_specgrams_1 = complex_specgrams[time_slice + [(time_steps + 1).long()]] angle_0 = angle(complex_specgrams_0) angle_1 = angle(complex_specgrams_1) norm_0 = torch.norm(complex_specgrams_0, dim=-1) norm_1 = torch.norm(complex_specgrams_1, dim=-1) phase = angle_1 - angle_0 - phase_advance phase = phase - 2 * math.pi * torch.round(phase / (2 * math.pi)) # Compute Phase Accum phase = phase + phase_advance phase = torch.cat([phase_0, phase[time_slice + [slice(-1)]]], dim=-1) phase_acc = torch.cumsum(phase, -1) mag = alphas * norm_1 + (1 - alphas) * norm_0 real_stretch = mag * torch.cos(phase_acc) imag_stretch = mag * torch.sin(phase_acc) complex_specgrams_stretch = torch.stack([real_stretch, imag_stretch], dim=-1) return complex_specgrams_stretch
def decode_one_sentence_adaptive_rl(machine, seq_len, init_dec_hidden, init_dec_cell, enc_hidden_seq, initial_beam_size, max_beam_size, model, shared_model, reward_coef_fscore, reward_coef_beam_size, label_true_seq, f_score_index_begin, counter, lock, optimizer, args, ): # Currently, batch size can only be 1 batch_size = 1 # Each beta is (batch size, beam size) matrix, # and there will be T_y of them in the sequence # y => same beta_seq = [] y_seq = [] logP_seq = [] accum_logP_seq = [] if machine.attention: # This would be the attention alpha_{ij} coefficients # in the shape of (output seq len, batch size, beam size, input seq len) attention_seq = [] else: attention_seq = None # For RL episode episode = [] # init_label's shape => (batch size, 1), # with all elements machine.BEG_INDEX if machine.gpu: init_label_emb = \ machine.label_embedding( Variable(torch.LongTensor(batch_size, 1).zero_()).cuda() \ + machine.BEG_INDEX) \ .view(batch_size, machine.label_embedding_dim) else: init_label_emb = \ machine.label_embedding( Variable(torch.LongTensor(batch_size, 1).zero_()) \ + machine.BEG_INDEX) \ .view(batch_size, machine.label_embedding_dim) # t = 0, only one input beam from init (t = -1) # Only one dec_hidden_out, dec_cell_out # => dec_hidden_out has shape (batch size, hidden dim) dec_hidden_out, dec_cell_out = \ machine.decoder_cell(init_label_emb, (init_dec_hidden, init_dec_cell)) # Attention if machine.attention: dec_hidden_out = dec_hidden_out[None, :, :] # add 1 nominal dim dec_hidden_out, attention = \ machine.attention(dec_hidden_out, enc_hidden_seq, 0, machine.enc2dec_hidden) # remove the added dim dec_hidden_out = dec_hidden_out.view(batch_size, machine.hidden_dim) attention = attention.view(batch_size, seq_len) # dec_hidden_beam shape => (1, batch size, hidden dim), # 1 because there is only 1 input beam dec_hidden_beam = torch.stack([dec_hidden_out], dim=0) dec_cell_beam = torch.stack([dec_cell_out], dim=0) # This one is for backtracking (need permute) if machine.attention: # For better explanation, see in the "for t" loop below # # Originally attention has shape (batch size, input seq len) # # At t = 0, there is only 1 beam, so formally attention is actually # in shape (1, batch size, input seq len), where 1 is beam size. attention_beam = torch.stack([attention], dim=0) # We need to permute (swap) the dimensions into # the shape (batch size, 1, input seq len) attention_beam = attention_beam.permute(1, 0, 2) # score_out.shape => (batch size, |V^y|) score_out = machine.hidden2score(dec_hidden_out) \ .view(batch_size, machine.label_size) logP_out = machine.score2logP(score_out).view(batch_size, machine.label_size) # Initial step, accumulated logP is the same as logP accum_logP_out = logP_out logP_out_list = [logP_out] accum_logP_out_list = [accum_logP_out] # This one is for backtracking (need permute) logP_output_beam = torch.stack(logP_out_list, dim=0).permute(1, 0, 2) accum_logP_output_beam = torch.stack(accum_logP_out_list, dim=0).permute(1, 0, 2) # score_matrix.shape => (batch size, |V^y| * 1) # * 1 because there is only 1 input beam logP_matrix = torch.cat(logP_out_list, dim=1) accum_logP_matrix = torch.cat(accum_logP_out_list, dim=1) # Just for code consistency (about reward calculation) cur_beam_size_in = 1 # Just for code consistency (about experience tuple) cur_state = machine.make_state(accum_logP_matrix, logP_matrix, 1, max_beam_size) action = None # All beta^{t=0, b} are actually 0 # beta_beam.shape => (batch size, beam size), # each row is [y^{t, b=0}, y^{t, b=1}, ..., y^{t, b=B-1}] # y_beam, score_beam => same action_seq = [] beam_size_seq = [] beam_size = initial_beam_size beam_size_seq.append(beam_size) accum_logP_beam, index_beam = torch.topk(accum_logP_matrix, beam_size, dim=1) beta_beam = torch.floor(index_beam.float() / machine.label_size).long() y_beam = torch.remainder(index_beam, machine.label_size) # This one is for backtracking beta_seq.append(beta_beam) y_seq.append(y_beam) if machine.attention: attention_seq.append(attention_beam) logP_seq.append(logP_output_beam) accum_logP_seq.append(accum_logP_output_beam) # Just for sentence with length = 1 label_pred_seq, accum_logP_pred_seq, logP_pred_seq, attention_pred_seq = machine.backtracking( 1, batch_size, y_seq, beta_seq, attention_seq, logP_seq, accum_logP_seq) # ----------------- # Sync params with the shared model model.load_state_dict(shared_model.state_dict()) values = [] log_probs = [] rewards = [] entropies = [] # ----------------- # t = 1, 2, ..., (T_y - 1 == seq_len - 1) for t in range(1, seq_len): # print("At time step {} seq_len={}".format(t, seq_len)) # We loop through beam because we expect that # usually batch size > beam size # # DESIGN: This may not be true anymore in adaptive beam search, # since we expect batch size = 1 in this case. # So is beam operations vectorizable? accum_logP_matrix, logP_matrix, dec_hidden_beam, dec_cell_beam, attention_beam, accum_logP_output_beam, logP_output_beam = \ machine.decode_beam_step_rl(beam_size, y_beam, beta_beam, dec_hidden_beam, dec_cell_beam, accum_logP_beam, enc_hidden_seq, seq_len, t) # Actually, at t = T_y - 1 == seq_len - 1, # you don't have to take action (you don't have to pick a beam of predictions anymore), because at this last output step, you would pick only the highest result, and do the backtracking from it to determine the best sequence. # However, in the current version of this code, we temporarily keep doing one more beam picking, just to be compatible with the backtracking function and the rest of the code. # We delay the improvement to the future work. # # Note that this state is actually the output state at t state = machine.make_state(accum_logP_matrix, logP_matrix, beam_size, max_beam_size) # For experience tuple prev_state = cur_state cur_state = state prev_action = action # For reward calculation prev_beam_size_in = cur_beam_size_in cur_beam_size_in = beam_size # policy network showtime value, logit = model(state) prob = F.softmax(logit, dim=-1) log_prob = F.log_softmax(logit, dim=-1) # TODO: for naive MLP policy network only prob = prob.view(1, -1) log_prob = log_prob.view(1, -1) entropy = -(log_prob * prob).sum(1, keepdim=True) entropies.append(entropy) action = prob.multinomial().data log_prob = log_prob.gather(1, Variable(action)) # state, reward, done, _ = env.step(action.numpy()) # done = done or episode_length >= args.max_episode_length # reward = max(min(reward, 1), -1) with lock: counter.value += 1 # popule data values.append(value) log_probs.append(log_prob) action_seq.append(action) # print(type(action)) # TODO: reivew this action = action.numpy()[0] # update beam size w.r.t to the action chosen if action == 0 and beam_size > 1: beam_size -= 1 elif action == 2 and beam_size < max_beam_size: beam_size += 1 beam_size_seq.append(beam_size) accum_logP_beam, index_beam = \ torch.topk(accum_logP_matrix, beam_size, dim=1) beta_beam = torch.floor( index_beam.float() / machine.label_size).long() y_beam = torch.remainder(index_beam, machine.label_size) beta_seq.append(beta_beam) y_seq.append(y_beam) if machine.attention: attention_seq.append(attention_beam) logP_seq.append(logP_output_beam) accum_logP_seq.append(accum_logP_output_beam) # Compute the F-score for the sequence [0, 1, ..., t] (length t+1) using y_seq, betq_seq we got so far. This is the ("partial", so to speak) F-score at this t. label_pred_seq, accum_logP_pred_seq, logP_pred_seq, attention_pred_seq = \ machine.backtracking( t + 1, batch_size, y_seq, beta_seq, attention_seq, logP_seq, accum_logP_seq) cur_fscore = machine.get_fscore(label_pred_seq, label_true_seq, f_score_index_begin) # If t >= 2, compute the reward, # and generate the experience tuple ( s_{t-1}, a_{t-1}, r_{t-1}, s_t ) # reward = None if t >= 2: reward = machine.get_reward(cur_fscore, fscore, cur_beam_size_in, prev_beam_size_in, reward_coef_fscore, reward_coef_beam_size) experience_tuple = (prev_state, prev_action, reward, cur_state) episode.append(experience_tuple) rewards.append(reward) fscore = cur_fscore # End for t # print("rewards: {}".format(rewards)) # print("actions: {}".format(action_seq)) # backprop now with actor-critic R = torch.zeros(1, 1) values.append(Variable(R)) policy_loss = 0 value_loss = 0 R = Variable(R) gae = torch.zeros(1, 1) for i in reversed(range(len(rewards))): R = args.gamma * R + rewards[i] advantage = R - values[i] value_loss = value_loss + 0.5 * advantage.pow(2) # Generalized Advantage Estimataion delta_t = rewards[i] + args.gamma * \ values[i + 1].data - values[i].data gae = gae * args.gamma * args.tau + delta_t policy_loss = policy_loss - \ log_probs[i] * Variable(gae) - args.entropy_coef * \ entropies[i] # print(policy_loss) optimizer.zero_grad() (policy_loss + args.value_loss_coef * value_loss).backward() torch.nn.utils.clip_grad_norm(model.parameters(), args.max_grad_norm) ensure_shared_grads(model, shared_model) optimizer.step() return label_pred_seq, accum_logP_pred_seq, logP_pred_seq, \ attention_pred_seq, episode, beam_size_seq
L = Kp.size()[0] if args.range == 'short': mask = torch.triu(torch.ones(L, L), diagonal=6) - torch.triu( torch.ones(L, L), diagonal=12) if args.range == 'medium': mask = torch.triu(torch.ones(L, L), diagonal=12) - torch.triu( torch.ones(L, L), diagonal=24) if args.range == "large": mask = torch.triu(torch.ones(L, L), diagonal=24) mask = Variable(mask) Kp = Kp * mask top_couplings = torch.topk(Kp.view(-1), int(args.a * L))[1] top_couplings = ((top_couplings / L), torch.remainder(top_couplings, L)) k = top_couplings[0].size()[0] torch.save(top_couplings, "../results/1BDO_A_top_coupl.out") # Calculating the distances structure = PDBParser().get_structure('1BDO_A', '../database/1BDO_A.pdb') model = structure[0] L = len(list(structure.get_residues())) distances = np.zeros((L, L)) for chain in model: for i in range(L): for j in range(L): distances[i][j] = chain[i + 77]['CA'] - chain[j + 77]['CA'] #Renormalizing to plot the contact map
def sample_lines(self, meta, jmap, joff, mode): with torch.no_grad(): junc = meta["junc"] # [N, 2] jtyp = meta["jtyp"] # [N] Lpos = meta["Lpos"] Lneg = meta["Lneg"] n_type = jmap.shape[0] jmap = non_maximum_suppression(jmap).reshape(n_type, -1) joff = joff.reshape(n_type, 2, -1) max_K = M.n_dyn_junc // n_type N = len(junc) if mode != "training": K = min(int((jmap > M.eval_junc_thres).float().sum().item()), max_K) else: K = min(int(N * 2 + 2), max_K) if K < 2: K = 2 device = jmap.device # index: [N_TYPE, K] score, index = torch.topk(jmap, k=K) y = torch.true_divide(index, 128) + torch.gather( joff[:, 0], 1, index) + 0.5 x = torch.remainder(index, 128) + torch.gather( joff[:, 1], 1, index) + 0.5 # xy: [N_TYPE, K, 2] xy = torch.cat([y[..., None], x[..., None]], dim=-1) xy_ = xy[..., None, :] del x, y, index # dist: [N_TYPE, K, N] dist = torch.sum((xy_ - junc)**2, -1) cost, match = torch.min(dist, -1) # xy: [N_TYPE * K, 2] # match: [N_TYPE, K] for t in range(n_type): match[t, jtyp[match[t]] != t] = N match[cost > 1.5 * 1.5] = N match = match.flatten() _ = torch.arange(n_type * K, device=device) u, v = torch.meshgrid(_, _) u, v = u.flatten(), v.flatten() up, vp = match[u], match[v] label = Lpos[up, vp] if mode == "training": c = torch.zeros_like(label, dtype=torch.bool) # sample positive lines cdx = label.nonzero().flatten() if len(cdx) > M.n_dyn_posl: # print("too many positive lines") perm = torch.randperm(len(cdx), device=device)[:M.n_dyn_posl] cdx = cdx[perm] c[cdx] = 1 # sample negative lines cdx = Lneg[up, vp].nonzero().flatten() if len(cdx) > M.n_dyn_negl: # print("too many negative lines") perm = torch.randperm(len(cdx), device=device)[:M.n_dyn_negl] cdx = cdx[perm] c[cdx] = 1 # sample other (unmatched) lines cdx = torch.randint(len(c), (M.n_dyn_othr, ), device=device) c[cdx] = 1 else: c = (u < v).flatten() # sample lines u, v, label = u[c], v[c], label[c] xy = xy.reshape(n_type * K, 2) xyu, xyv = xy[u], xy[v] line = torch.cat([xyu[:, None], xyv[:, None]], 1) xy = xy.reshape(n_type, K, 2) jcs = [xy[i, score[i] > 0.03] for i in range(n_type)] return line, label.float(), jcs
def test_remainder(x, y): c = torch.remainder(torch.add(x, y), 3.0) return c
def __call__(self, p, scale, padding): p_nor = normalize_3d_coordinate(p, scale, padding) p = torch.remainder(p_nor, 1 / self.res) * self.res # always possitive # p = coordinate2index(p_nor, self.res, coord_type='3d') return p
def _shared_step(self): with th.no_grad(): # Frequently alloc and free shared memory to hold intermediate tensor is expensive # We cache shared memory buffers in shared_emb. shared_emb = {emb.name: ([], []) for emb in self._params} # Go through all sparse embeddings for emb in self._params: # pylint: disable=too-many-nested-blocks emb_name = emb.name # we need to combine gradients from multiple forward paths idx = [] grad = [] for i, data in emb._trace: idx.append(i) grad.append(data.grad.data) # If the sparse embedding is not used in the previous forward step # The idx and grad will be empty, initialize them as empty tensors to # avoid crashing the optimizer step logic. # # Note: we cannot skip the gradient exchange and update steps as other # working processes may send gradient update requests corresponding # to certain embedding to this process. idx = th.cat(idx, dim=0) if len(idx) != 0 else \ th.zeros((0,), dtype=th.long, device=th.device('cpu')) grad = th.cat(grad, dim=0) if len(grad) != 0 else \ th.zeros((0, emb.embedding_dim), dtype=th.float32, device=th.device('cpu')) device = grad.device idx_dtype = idx.dtype grad_dtype = grad.dtype grad_dim = grad.shape[1] if self._world_size > 1: if emb_name not in self._shared_cache: self._shared_cache[emb_name] = {} # Each training process takes the resposibility of updating a range # of node embeddings, thus we can parallel the gradient update. # The overall progress includes: # 1. In each training process: # 1.a Deciding which process a node embedding belongs to according # to the formula: process_id = node_idx mod num_of_process(N) # 1.b Split the node index tensor and gradient tensor into N parts # according to step 1. # 1.c Write each node index sub-tensor and gradient sub-tensor into # different DGL shared memory buffers. # 2. Cross training process synchronization # 3. In each traning process: # 3.a Collect node index sub-tensors and gradient sub-tensors # 3.b Do gradient update # 4. Done idx_split = th.remainder(idx, self._world_size).long() for i in range(self._world_size): mask = idx_split == i idx_i = idx[mask] grad_i = grad[mask] if i == self._rank: shared_emb[emb_name][0].append(idx_i) shared_emb[emb_name][1].append(grad_i) else: # currently nccl does not support Alltoallv operation # we need to use CPU shared memory to share gradient # across processes idx_i = idx_i.to(th.device('cpu')) grad_i = grad_i.to(th.device('cpu')) idx_shmem_name = 'idx_{}_{}_{}'.format( emb_name, self._rank, i) grad_shmem_name = 'grad_{}_{}_{}'.format( emb_name, self._rank, i) # Create shared memory to hold temporary index and gradient tensor for # cross-process send and recv. if idx_shmem_name not in self._shared_cache[emb_name] or \ self._shared_cache[emb_name][idx_shmem_name].shape[0] \ < idx_i.shape[0]: if idx_shmem_name in self._shared_cache[ emb_name]: self.shmem_buffer_holder.append( self._shared_cache[emb_name] [idx_shmem_name]) self.shmem_buffer_holder.append( self._shared_cache[emb_name] [grad_shmem_name]) # The total number of buffers is the number of NodeEmbeddings * # world_size * (world_size - 1). The minimun buffer size is 128. # # We extend the buffer by idx_i.shape[0] * 2 to avoid # frequent shared memory allocation. # The overall buffer cost will be smaller than three times # the maximum memory requirement for sharing gradients. buffer_size = 128 if idx_i.shape[ 0] < 128 else idx_i.shape[0] * 2 idx_shmem = create_shared_mem_array(idx_shmem_name, \ (buffer_size,), idx_dtype) grad_shmem = create_shared_mem_array(grad_shmem_name, \ (buffer_size, grad_dim), grad_dtype) self._shared_cache[emb_name][ idx_shmem_name] = idx_shmem self._shared_cache[emb_name][ grad_shmem_name] = grad_shmem # Fill shared memory with temporal index tensor and gradient tensor self._shared_cache[emb_name][idx_shmem_name][:idx_i.shape[0]] \ = idx_i self._shared_cache[emb_name][grad_shmem_name][:idx_i.shape[0]] \ = grad_i self._opt_meta[emb_name][ self._rank][i] = idx_i.shape[0] else: shared_emb[emb_name][0].append(idx) shared_emb[emb_name][1].append(grad) # make sure the idx shape is passed to each process through opt_meta if self._world_size > 1: th.distributed.barrier() for emb in self._params: # pylint: disable=too-many-nested-blocks emb_name = emb.name if self._world_size > 1: # The first element in shared_emb[emb_name][0] is the local idx device = shared_emb[emb_name][0][0].device # gather gradients from all other processes for i in range(self._world_size): if i != self._rank: idx_shmem_name = 'idx_{}_{}_{}'.format( emb_name, i, self._rank) grad_shmem_name = 'grad_{}_{}_{}'.format( emb_name, i, self._rank) size = self._opt_meta[emb_name][i][self._rank] # Retrive shared memory holding the temporal index and gradient # tensor that is sent to current training process if idx_shmem_name not in self._shared_cache[emb_name] or \ self._shared_cache[emb_name][idx_shmem_name].shape[0] < size: buffer_size = 128 if size < 128 else size * 2 idx_shmem = get_shared_mem_array(idx_shmem_name, \ (buffer_size,), idx_dtype) grad_shmem = get_shared_mem_array(grad_shmem_name, \ (buffer_size, grad_dim), grad_dtype) self._shared_cache[emb_name][ idx_shmem_name] = idx_shmem self._shared_cache[emb_name][ grad_shmem_name] = grad_shmem idx_i = self._shared_cache[emb_name][ idx_shmem_name][:size] grad_i = self._shared_cache[emb_name][ grad_shmem_name][:size] shared_emb[emb_name][0].append( idx_i.to(device, non_blocking=True)) shared_emb[emb_name][1].append( grad_i.to(device, non_blocking=True)) if self._clean_grad: # clean gradient track for emb in self._params: emb.reset_trace() self._clean_grad = False for emb in self._params: emb_name = emb.name idx = th.cat(shared_emb[emb_name][0], dim=0) grad = th.cat(shared_emb[emb_name][1], dim=0) self.update(idx, grad, emb) # synchronized gradient update if self._world_size > 1: th.distributed.barrier()
def _handle_row_wise_sharding( input, world_size, weight, local_shard, max_norm, norm_type, padding_idx, rank, pg ): """ Entry-point function to handle the logic of row-wise sharding of weight for embedding. (Detailed explanations of the logic can be found in the comment for sharded_embedding.) Args: input: list of ID used for lookup and aggregation. world_size: number of ranks. weight: shareded weight tensor. local_shard: row-wise shared local weight used for lookup. max_norm: If given, each embedding vector with norm larger than max_norm is renormalized to have norm max_norm. Note: this will modify weight in-place. norm_type: The p in the p-norm to compute for the max_norm option. padding_idx: If specified, the entries at padding_idx do not contribute to the gradient; therefore, the embedding vector at padding_idx is not updated during training, i.e. it remains as a fixed “pad”. rank: # of cuda process. pg: process group. Returns: final result of lookup. """ # flatten the ids across all input and sort input_size = input.size() input_1d = torch.reshape(input, (-1,)).contiguous() input_sorted, indices_1d = torch.sort(input_1d) rearrange_indices_1d = torch.argsort(indices_1d) input_sorted.contiguous() ( input_sorted, input_split_sizes, sharded_dim_size_max, _, rearrange_indices_1d_second_order, padding_idx, ) = _handle_row_wise_lookup_distribute( input_sorted, input, world_size, weight, rank, padding_idx ) # Get the input split size to be sent from each rank to the current rank. # We can then infer the output split size. output_split_sizes = _communicate_size_to_each_rank( input_split_sizes, world_size, input, pg ) # Input sent from each rank to the current rank may have different sizes. gathered_input = torch.empty( sum(output_split_sizes), dtype=torch.int64, device=input.device ) # Perform the modular operation of the 1D tensor to be sent to each rank. input_sorted = torch.remainder(input_sorted, sharded_dim_size_max) # Perform alltoall dist.all_to_all_single( gathered_input, input_sorted, input_split_sizes=input_split_sizes, output_split_sizes=output_split_sizes, group=pg, ) # If input is None, passing in max_norm causes # errors in CUDA. if max_norm is not None and gathered_input.size(0) == 0: max_norm = None # Perform local embedding look up. gathered_input_embeddings = torch.nn.functional.embedding( gathered_input, local_shard, padding_idx=padding_idx, max_norm=max_norm, norm_type=norm_type, ) # Gather all lookup result appropriately by performing alltoall again gathered_output = torch.empty( input_sorted.size(0), weight.size(1), device=input.device ) dist.all_to_all_single( gathered_output, gathered_input_embeddings, input_split_sizes=output_split_sizes, output_split_sizes=input_split_sizes, group=pg, ) # Rearrange the results to its original shape. if rearrange_indices_1d_second_order is not None: gathered_output = gathered_output[rearrange_indices_1d_second_order] gathered_output = gathered_output[rearrange_indices_1d] # Return the appropriate local result. return torch.reshape(gathered_output, (*input_size, weight.size(1)))
def forward(self, x, y): out = torch.remainder(x, y) return out
if torch.cuda.is_available(): alpha_0 = alpha_0.cuda() # Multiply transition_matrix and alpha_0 # tmp is a SparseTensor of size ND x B with K non-zero rows. # i.e. tmp._indices() is a 1 x K Tensor # tmp._values() is a K x B Tensor tmp = torch.hspmm(sp_trans.transpose(0, 1), alpha_0) # Roll indices of tmp from ND to N x D. tmp_rolled_indices = torch.zeros([2, tmp._indices().size(1)], dtype=torch.long) if torch.cuda.is_available(): tmp_rolled_indices = tmp_rolled_indices.cuda() tmp_rolled_indices[0] = tmp._indices() / 3237 # destination-states tmp_rolled_indices[1] = torch.remainder(tmp._indices(), 3237) # pdf-ids tmp_rolled = torch.sparse_coo_tensor(tmp_rolled_indices, tmp._values()) # nnet_outputs at time t. We can do exp beforehand. size D x B nnet_outputs = torch.randn([3237, 128]) if torch.cuda.is_available(): nnet_outputs = nnet_outputs.cuda() nnet_outputs.exp_() # Lookup indices of nnet_outputs based on pdf-ids. size K x B nnet_outputs_lookup = nnet_outputs.index_select(0, tmp_rolled_indices[1]) # Element-wise product with the nnet_outputs for the K rows # Output is K x B. tmp2 = torch.mul(tmp._values(), nnet_outputs_lookup)
def evaluate(self): self.model.eval() std_loss = Accumulator('std_loss') adv_loss = Accumulator('adv_loss') std_corr = Accumulator('std_corr') adv_corr = Accumulator('adv_corr') std_logits = Accumulator('std_logits') adv_logits = Accumulator('adv_logits') seen_classes = [] adv_images = Accumulator('adv_images') first_batch_images = Accumulator('first_batch_images') from PIL import Image for batch_idx, (data, target) in enumerate(self.val_loader[0]): if self.cuda: data, target = data.cuda(non_blocking=True), target.cuda( non_blocking=True) with torch.no_grad(): std_cpy = data.clone().detach( ) # std_cpy is used for finding the standard accuracy and has transforms applied as normal output = self.model(std_cpy) std_logits.update(output.cpu()) loss = F.cross_entropy(output, target, reduction='none').cpu() std_loss.update(loss) corr = correct(output, target) corr = corr.view(corr.size()[0]).cpu() std_corr.update(corr) run_output = {'std_loss': std_loss.avg, 'std_acc': std_corr.avg} print('Standard Batch', batch_idx) print(run_output) for batch_idx, (data, target) in enumerate(self.val_loader[1]): # data is normalized at this point if self.cuda: data, target = data.cuda(non_blocking=True), target.cuda( non_blocking=True) rand_target = torch.randint(0, self.nb_classes - 1, target.size(), dtype=target.dtype, device='cuda') rand_target = torch.remainder(target + rand_target + 1, self.nb_classes) data_cpy = data.clone().detach() for idx in range(len(data_cpy)): unnormalized = reverse_normalization(data[idx]) changed = np.swapaxes( np.array(unnormalized.cpu().detach()) * 255.0, 0, 2) transformed = applyTransforms( np.swapaxes( np.array(unnormalized.cpu().clone().detach()) * 255.0, 0, 2)) data_cpy[idx] = transforms.functional.normalize( transformed.clone().cpu(), IMAGENET_MEAN, IMAGENET_STD).cuda() data_adv = self.attack(self.model, data_cpy, rand_target, avoid_target=False, scale_eps=False) with torch.no_grad(): output_adv = self.model(data_adv) adv_logits.update(output_adv.cpu()) loss = F.cross_entropy(output_adv, target, reduction='none').cpu() adv_loss.update(loss) corr = correct(output_adv, target) corr = corr.view(corr.size()[0]).cpu() adv_corr.update(corr) run_output = {'adv_loss': adv_loss.avg, 'adv_acc': adv_corr.avg} print('Adv Batch', batch_idx) print(run_output) summary_dict = { 'std_acc': std_corr.avg.item(), 'adv_acc': adv_corr.avg.item() } print(std_loss.avg, std_corr.avg, adv_loss.avg, adv_corr.avg)
def __rmod__(self, other): if has_torch_function_variadic(self, other): return handle_torch_function(Tensor.__rmod__, (self, other), self, other) return torch.remainder(other, self)
def online_test(epoch, lbd): global best_acc net.eval() test_loss = 0 correct = 0 total = 0 correct_count = torch.zeros(3**NUM_FLAG) entire_count = torch.ones(3**NUM_FLAG) szs = 2 * NUM_FLAG * torch.ones(3**NUM_FLAG) with torch.no_grad(): for batch_idx, (inputs, targets) in enumerate(testloader): inputs, targets = inputs.to(device), targets.to(device) outputs_list = [] flags_list = [] flags2_list = [] for i in range(args.ensemble_size): outputs_temp, flags_temp, flags2_temp = net(inputs) outputs_list.append(outputs_temp) flags_list.append(flags_temp) flags2_list.append(flags2_temp) if i < 1: outputs = outputs_temp else: outputs += outputs_temp _, predicted = outputs.max(1) for i in range(args.ensemble_size): _, predicted_temp = outputs_list[i].max(1) correct_temp = predicted_temp.eq(predicted).sum().item() # flag encoding idx = 0 sz = 0 for j in range(NUM_FLAG): if flags_list[i][j] and flags2_list[i][j]: idx += ((3**j) * 2) sz += 2 elif (flags_list[i][j]) and (not flags2_list[i][j]): idx += ((3**j)) sz += 1 correct_count[idx] += correct_temp entire_count[idx] += predicted.size(0) szs[idx] = sz loss = criterion(outputs, targets) test_loss += loss.item() total += targets.size(0) correct += predicted.eq(targets).sum().item() #print((correct_count / entire_count).unique(sorted=False)) # best flag decoding acc = correct_count / entire_count score = acc + lbd * (1 - szs / szs.max()) sort_val, sort_idx = score.sort(0, descending=True) idx = sort_idx[0] best_flag = [] best_flag2 = [] for j in range(NUM_FLAG): if (torch.remainder(idx, 3) >= 2) and (torch.remainder(idx, 3) < 3): best_flag.append(True) best_flag2.append(True) elif torch.remainder(idx, 3) == 1: best_flag.append(True) best_flag2.append(False) else: best_flag.append(False) best_flag2.append(False) idx_temp = idx / 3 idx = idx_temp #print(sort_val.tolist()) print(best_flag) print(best_flag2) # Print Test Result print('Ensemble Test: Epoch#%02d : Loss: %.3f | Acc: %.3f%% (%d/%d)' % (epoch, test_loss / (batch_idx + 1), 100. * correct / total, correct, total)) # full model testing full_net.eval() test_loss = 0 correct = 0 total = 0 entire_time = 0 with torch.no_grad(): for batch_idx, (inputs, targets) in enumerate(testloader): inputs, targets = inputs.to(device), targets.to(device) outputs_list = [] flags_list = [] start_time = time.time() outputs, _, _ = full_net(inputs) entire_time += (time.time() - start_time) _, predicted = outputs.max(1) loss = criterion(outputs, targets) test_loss += loss.item() total += targets.size(0) correct += predicted.eq(targets).sum().item() print('Full Model Test: Epoch#%02d : Loss: %.3f | Acc: %.3f%% (%d/%d)' % (epoch, test_loss / (batch_idx + 1), 100. * correct / total, correct, total)) print('running time for each batch : %.8f' % (entire_time / (batch_idx + 1))) # flag setting full_net.module.flags = best_flag full_net.module.flags2 = best_flag2 full_net.module.cum_num_blocks = [2, 4, 6, 8] full_net.eval() test_loss = 0 correct = 0 total = 0 entire_time = 0 # flagged model testing with torch.no_grad(): for batch_idx, (inputs, targets) in enumerate(testloader): inputs, targets = inputs.to(device), targets.to(device) outputs_list = [] flags_list = [] start_time = time.time() outputs, _, _ = full_net(inputs) entire_time += (time.time() - start_time) _, predicted = outputs.max(1) loss = criterion(outputs, targets) test_loss += loss.item() total += targets.size(0) correct += predicted.eq(targets).sum().item() print('Adapted Model Test: Epoch#%02d : Loss: %.3f | Acc: %.8f%% (%d/%d)' % (epoch, test_loss / (batch_idx + 1), 100. * correct / total, correct, total)) print('running time for each batch : %.8f' % (entire_time / (batch_idx + 1)))
def cuda_rem(x, y): return 1 + torch.remainder(x, y) - 1
def forward(self, state, time_step, args, reset_flag=False): if args.demo_type == 'uav': if args: coefs = args.variance * 2 prior_decay = args.prior_decay else: coefs = [0.09, 0.09] prior_decay = 0.005 time_step = torch.Tensor([time_step])[0] perspective = torch.atan(state[12] / state[13]) first_perspective = torch.where( state[13] > 0, torch.where(state[12] > 0, perspective / np.pi * 180.0, (perspective + 2 * np.pi) / np.pi * 180.0), (perspective + np.pi) / np.pi * 180.0) target = torch.atan(state[10] / state[11]) position_target = torch.where( state[11] > 0, torch.where(state[10] > 0, target / np.pi * 180.0, (target + 2 * np.pi) / np.pi * 180.0), (target + np.pi) / np.pi * 180.0) first_target = torch.remainder(first_perspective - position_target, 360.0) average_direction = torch.where( torch.sign(180.0 - first_target) + 1.0 > 0, -first_target / 180.0, (360.0 - first_target) / 180.0) variance_direction = 0.0 * average_direction + coefs[0] # 0.1 turning_free = torch.where( torch.sign(4 - torch.argmin(state[0:9]).float()) + 1.0 > 0, 45.0 + 0 * average_direction, -45.0 + 0 * average_direction) average_free = turning_free / 180.0 variance_free = 0.0 * average_free + coefs[0] # 0.1 average_steer = torch.where( torch.sign(100 * torch.min(state[0:9]) - 15.0) + 1.0 > 0, average_direction, average_free) variance_steer = torch.where( torch.sign(100 * torch.min(state[0:9]) - 15.0) + 1.0 > 0, variance_direction, variance_free) speed = state[14] average_throttle = torch.clamp(2.5 - 50 * (speed / 2 + 0.5), -0.5, 0.5) variance_throttle = 0.0 * average_throttle + coefs[1] decay = prior_decay * (time_step - 1) + 1 covariance = torch.cat( (variance_steer.unsqueeze_(0), variance_throttle.unsqueeze_(0)), 0) * decay average = torch.cat( (average_steer.unsqueeze_(0), average_throttle.unsqueeze_(0)), 0) elif args.demo_type == 'uav_wrong': if reset_flag: self.current_direct_wrong = 'north' self.min_distance_x = 50.0 self.min_distance_y = 50.0 if args: coefs = args.variance * 2 prior_decay = args.prior_decay else: coefs = [0.09, 0.09] prior_decay = 0.005 time_step = torch.Tensor([time_step])[0] perspective = torch.atan(state[12] / state[13]) first_perspective = torch.where( state[13] > 0, torch.where(state[12] > 0, perspective / np.pi * 180.0, (perspective + 2 * np.pi) / np.pi * 180.0), (perspective + np.pi) / np.pi * 180.0) target = torch.atan(state[10] / state[11]) position_target = torch.where( state[11] > 0, torch.where(state[10] > 0, target / np.pi * 180.0, (target + 2 * np.pi) / np.pi * 180.0), (target + np.pi) / np.pi * 180.0) distance = (state[9] / 2 + 0.5) * (torch.sqrt(torch.Tensor([2])[0]) * 3000) distance_y = torch.abs(distance * torch.sin( 2 * position_target / 360 * torch.Tensor([np.pi])[0])) distance_x = torch.abs(distance * torch.cos( 2 * position_target / 360 * torch.Tensor([np.pi])[0])) if distance_y > self.min_distance_y: self.current_direct_wrong = 'north' elif distance_x > self.min_distance_x: if self.current_direct_wrong == 'north': self.min_distance_x -= 5 self.current_direct_wrong = 'east' else: if self.current_direct_wrong == 'east': self.min_distance_y -= 5 self.current_direct_wrong = 'north' if self.current_direct_wrong == 'north': if position_target > 0 and position_target < 180: position_target = 90 else: position_target = 270 else: if position_target < 90 or position_target > 270: position_target = 0 else: position_target = 180 first_target = torch.remainder(first_perspective - position_target, 360.0) average_direction = torch.where( torch.sign(180.0 - first_target) + 1.0 > 0, -first_target / 180.0, (360.0 - first_target) / 180.0) variance_direction = 0.0 * average_direction + coefs[0] # 0.1 turning_free = torch.where( torch.sign(4 - torch.argmin(state[0:9]).float()) + 1.0 > 0, 45.0 + 0 * average_direction, -45.0 + 0 * average_direction) average_free = turning_free / 180.0 variance_free = 0.0 * average_free + coefs[0] # 0.1 average_steer = torch.where( torch.sign(100 * torch.min(state[0:9]) - 15.0) + 1.0 > 0, average_direction, average_free) variance_steer = torch.where( torch.sign(100 * torch.min(state[0:9]) - 15.0) + 1.0 > 0, variance_direction, variance_free) speed = state[14] average_throttle = torch.clamp(2.5 - 50 * (speed / 2 + 0.5), -0.5, 0.5) variance_throttle = 0.0 * average_throttle + coefs[1] decay = prior_decay * (time_step - 1) + 1 covariance = torch.cat( (variance_steer.unsqueeze_(0), variance_throttle.unsqueeze_(0)), 0) * decay average = torch.cat( (average_steer.unsqueeze_(0), average_throttle.unsqueeze_(0)), 0) else: average = self.agent_ddpg.select_action(state) time_step = torch.Tensor([time_step])[0] decay = args.prior_decay * (time_step - 1) + 1 covariance = torch.ones(average.shape) * 0.1 * decay return average, covariance
def log_uniform_sample(N, size): log_N = math.log(N) x = torch.Tensor(size).uniform_(0, 1) value = torch.exp(x * log_N).long() - 1 return torch.remainder(value, N)
def evaluate(self): self.model.eval() std_loss = Accumulator('std_loss') adv_loss = Accumulator('adv_loss') std_corr = Accumulator('std_corr') adv_corr = Accumulator('adv_corr') std_logits = Accumulator('std_logits') adv_logits = Accumulator('adv_logits') seen_classes = [] adv_images = Accumulator('adv_images') first_batch_images = Accumulator('first_batch_images') for batch_idx, (data, target) in enumerate(self.val_loader): if self.cuda: data, target = data.cuda(non_blocking=True), target.cuda( non_blocking=True) with torch.no_grad(): output = self.model(data) std_logits.update(output.cpu()) loss = F.cross_entropy(output, target, reduction='none').cpu() std_loss.update(loss) corr = correct(output, target) corr = corr.view(corr.size()[0]).cpu() std_corr.update(corr) rand_target = torch.randint(0, self.nb_classes - 1, target.size(), dtype=target.dtype, device='cuda') rand_target = torch.remainder(target + rand_target + 1, self.nb_classes) data_adv = self.attack(self.model, data, rand_target, avoid_target=False, scale_eps=False) for idx in range(target.size()[0]): if target[idx].cpu() not in seen_classes: seen_classes.append(target[idx].cpu()) orig_image = norm_to_pil_image(data[idx].detach().cpu()) adv_image = norm_to_pil_image(data_adv[idx].detach().cpu()) adv_images.update( (orig_image, adv_image, target[idx].cpu())) if batch_idx == 0: for idx in range(target.size()[0]): orig_image = norm_to_pil_image(data[idx].detach().cpu()) adv_image = norm_to_pil_image(data_adv[idx].detach().cpu()) first_batch_images.update((orig_image, adv_image)) with torch.no_grad(): output_adv = self.model(data_adv) adv_logits.update(output_adv.cpu()) loss = F.cross_entropy(output_adv, target, reduction='none').cpu() adv_loss.update(loss) corr = correct(output_adv, target) corr = corr.view(corr.size()[0]).cpu() adv_corr.update(corr) run_output = { 'std_loss': std_loss.avg, 'std_acc': std_corr.avg, 'adv_loss': adv_loss.avg, 'adv_acc': adv_corr.avg } print('Batch', batch_idx) print(run_output) if batch_idx % 20 == 0: self.logger.log(run_output, batch_idx) summary_dict = { 'std_acc': std_corr.avg.item(), 'adv_acc': adv_corr.avg.item() } self.logger.log_summary(summary_dict) for orig_img, adv_img, target in adv_images.vals: self.logger.log_image(orig_img, 'orig_{}.png'.format(target)) self.logger.log_image(adv_img, 'adv_{}.png'.format(target)) for idx, imgs in enumerate(first_batch_images.vals): orig_img, adv_img = imgs self.logger.log_image(orig_img, 'init_orig_{}.png'.format(idx)) self.logger.log_image(adv_img, 'init_adv_{}.png'.format(idx)) self.logger.end() print(std_loss.avg, std_corr.avg, adv_loss.avg, adv_corr.avg)
def pointwise_ops(self): a = torch.randn(4) b = torch.randn(4) t = torch.tensor([-1, -2, 3], dtype=torch.int8) r = torch.tensor([0, 1, 10, 0], dtype=torch.int8) t = torch.tensor([-1, -2, 3], dtype=torch.int8) s = torch.tensor([4, 0, 1, 0], dtype=torch.int8) f = torch.zeros(3) g = torch.tensor([-1, 0, 1]) w = torch.tensor([0.3810, 1.2774, -0.2972, -0.3719, 0.4637]) return ( torch.abs(torch.tensor([-1, -2, 3])), torch.absolute(torch.tensor([-1, -2, 3])), torch.acos(a), torch.arccos(a), torch.acosh(a.uniform_(1.0, 2.0)), torch.add(a, 20), torch.add(a, torch.randn(4, 1), alpha=10), torch.addcdiv(torch.randn(1, 3), torch.randn(3, 1), torch.randn(1, 3), value=0.1), torch.addcmul(torch.randn(1, 3), torch.randn(3, 1), torch.randn(1, 3), value=0.1), torch.angle(a), torch.asin(a), torch.arcsin(a), torch.asinh(a), torch.arcsinh(a), torch.atan(a), torch.arctan(a), torch.atanh(a.uniform_(-1.0, 1.0)), torch.arctanh(a.uniform_(-1.0, 1.0)), torch.atan2(a, a), torch.bitwise_not(t), torch.bitwise_and(t, torch.tensor([1, 0, 3], dtype=torch.int8)), torch.bitwise_or(t, torch.tensor([1, 0, 3], dtype=torch.int8)), torch.bitwise_xor(t, torch.tensor([1, 0, 3], dtype=torch.int8)), torch.ceil(a), torch.clamp(a, min=-0.5, max=0.5), torch.clamp(a, min=0.5), torch.clamp(a, max=0.5), torch.clip(a, min=-0.5, max=0.5), torch.conj(a), torch.copysign(a, 1), torch.copysign(a, b), torch.cos(a), torch.cosh(a), torch.deg2rad( torch.tensor([[180.0, -180.0], [360.0, -360.0], [90.0, -90.0]])), torch.div(a, b), torch.divide(a, b, rounding_mode="trunc"), torch.divide(a, b, rounding_mode="floor"), torch.digamma(torch.tensor([1.0, 0.5])), torch.erf(torch.tensor([0.0, -1.0, 10.0])), torch.erfc(torch.tensor([0.0, -1.0, 10.0])), torch.erfinv(torch.tensor([0.0, 0.5, -1.0])), torch.exp(torch.tensor([0.0, math.log(2.0)])), torch.exp2(torch.tensor([0.0, math.log(2.0), 3.0, 4.0])), torch.expm1(torch.tensor([0.0, math.log(2.0)])), torch.fake_quantize_per_channel_affine( torch.randn(2, 2, 2), (torch.randn(2) + 1) * 0.05, torch.zeros(2), 1, 0, 255, ), torch.fake_quantize_per_tensor_affine(a, 0.1, 0, 0, 255), torch.float_power(torch.randint(10, (4, )), 2), torch.float_power(torch.arange(1, 5), torch.tensor([2, -3, 4, -5])), torch.floor(a), # torch.floor_divide(torch.tensor([4.0, 3.0]), torch.tensor([2.0, 2.0])), # torch.floor_divide(torch.tensor([4.0, 3.0]), 1.4), torch.fmod(torch.tensor([-3, -2, -1, 1, 2, 3]), 2), torch.fmod(torch.tensor([1, 2, 3, 4, 5]), 1.5), torch.frac(torch.tensor([1.0, 2.5, -3.2])), torch.randn(4, dtype=torch.cfloat).imag, torch.ldexp(torch.tensor([1.0]), torch.tensor([1])), torch.ldexp(torch.tensor([1.0]), torch.tensor([1, 2, 3, 4])), torch.lerp(torch.arange(1.0, 5.0), torch.empty(4).fill_(10), 0.5), torch.lerp( torch.arange(1.0, 5.0), torch.empty(4).fill_(10), torch.full_like(torch.arange(1.0, 5.0), 0.5), ), torch.lgamma(torch.arange(0.5, 2, 0.5)), torch.log(torch.arange(5) + 10), torch.log10(torch.rand(5)), torch.log1p(torch.randn(5)), torch.log2(torch.rand(5)), torch.logaddexp(torch.tensor([-1.0]), torch.tensor([-1, -2, -3])), torch.logaddexp(torch.tensor([-100.0, -200.0, -300.0]), torch.tensor([-1, -2, -3])), torch.logaddexp(torch.tensor([1.0, 2000.0, 30000.0]), torch.tensor([-1, -2, -3])), torch.logaddexp2(torch.tensor([-1.0]), torch.tensor([-1, -2, -3])), torch.logaddexp2(torch.tensor([-100.0, -200.0, -300.0]), torch.tensor([-1, -2, -3])), torch.logaddexp2(torch.tensor([1.0, 2000.0, 30000.0]), torch.tensor([-1, -2, -3])), torch.logical_and(r, s), torch.logical_and(r.double(), s.double()), torch.logical_and(r.double(), s), torch.logical_and(r, s, out=torch.empty(4, dtype=torch.bool)), torch.logical_not(torch.tensor([0, 1, -10], dtype=torch.int8)), torch.logical_not( torch.tensor([0.0, 1.5, -10.0], dtype=torch.double)), torch.logical_not( torch.tensor([0.0, 1.0, -10.0], dtype=torch.double), out=torch.empty(3, dtype=torch.int16), ), torch.logical_or(r, s), torch.logical_or(r.double(), s.double()), torch.logical_or(r.double(), s), torch.logical_or(r, s, out=torch.empty(4, dtype=torch.bool)), torch.logical_xor(r, s), torch.logical_xor(r.double(), s.double()), torch.logical_xor(r.double(), s), torch.logical_xor(r, s, out=torch.empty(4, dtype=torch.bool)), torch.logit(torch.rand(5), eps=1e-6), torch.hypot(torch.tensor([4.0]), torch.tensor([3.0, 4.0, 5.0])), torch.i0(torch.arange(5, dtype=torch.float32)), torch.igamma(a, b), torch.igammac(a, b), torch.mul(torch.randn(3), 100), torch.multiply(torch.randn(4, 1), torch.randn(1, 4)), torch.mvlgamma(torch.empty(2, 3).uniform_(1.0, 2.0), 2), torch.tensor([float("nan"), float("inf"), -float("inf"), 3.14]), torch.nan_to_num(w), torch.nan_to_num(w, nan=2.0), torch.nan_to_num(w, nan=2.0, posinf=1.0), torch.neg(torch.randn(5)), # torch.nextafter(torch.tensor([1, 2]), torch.tensor([2, 1])) == torch.tensor([eps + 1, 2 - eps]), torch.polygamma(1, torch.tensor([1.0, 0.5])), torch.polygamma(2, torch.tensor([1.0, 0.5])), torch.polygamma(3, torch.tensor([1.0, 0.5])), torch.polygamma(4, torch.tensor([1.0, 0.5])), torch.pow(a, 2), torch.pow(torch.arange(1.0, 5.0), torch.arange(1.0, 5.0)), torch.rad2deg( torch.tensor([[3.142, -3.142], [6.283, -6.283], [1.570, -1.570]])), torch.randn(4, dtype=torch.cfloat).real, torch.reciprocal(a), torch.remainder(torch.tensor([-3.0, -2.0]), 2), torch.remainder(torch.tensor([1, 2, 3, 4, 5]), 1.5), torch.round(a), torch.rsqrt(a), torch.sigmoid(a), torch.sign(torch.tensor([0.7, -1.2, 0.0, 2.3])), torch.sgn(a), torch.signbit(torch.tensor([0.7, -1.2, 0.0, 2.3])), torch.sin(a), torch.sinc(a), torch.sinh(a), torch.sqrt(a), torch.square(a), torch.sub(torch.tensor((1, 2)), torch.tensor((0, 1)), alpha=2), torch.tan(a), torch.tanh(a), torch.trunc(a), torch.xlogy(f, g), torch.xlogy(f, g), torch.xlogy(f, 4), torch.xlogy(2, g), )
def __call__(self, p): p = torch.remainder(p, self.s) / self.s # always possitive # p = torch.fmod(p, self.s) / self.s # same sign as input p! p = self.pe(p) return p
def test_remainder(self): x = torch.randn(2, 3, 4) y = torch.randn(2, 1, 4) self.assertONNX(lambda x, y: torch.remainder(x, y), (x, y))
def forward(self, input, pos, key_padding_mask=None, attn_mask=None, indices=None, mems=None, incremental=False, incremental_cache=None, double_precision=False): bsz = input.size(1) ensemble = self.r_i.size(0) if key_padding_mask is not None: assert ( attn_mask is None ), "ERROR attn_mask and key_padding_mask should not be both defined!" mask = key_padding_mask if len(mask.shape) == 3: mask = mask.squeeze(0).transpose(0, 1) elif attn_mask is not None: mask = attn_mask if len(mask.shape) == 3: mask = mask.squeeze(-1) else: mask = None if self.training: if indices is None: with torch.no_grad(): indices = torch.arange(0, bsz, device=input.device, dtype=torch.long) indices = torch.remainder(indices, ensemble) r_i = torch.index_select(self.r_i, 0, indices) s_i = torch.index_select(self.s_i, 0, indices) # r_o = torch.index_select(self.r_o, 0, indices) # s_o = torch.index_select(self.s_o, 0, indices) r_p = torch.index_select(self.r_p, 0, indices) s_p = torch.index_select(self.s_p, 0, indices) else: input = input.repeat(1, ensemble, 1) pos = pos.repeat(1, ensemble, 1) # if key_padding_mask is not None: # mask = mask.repeat(ensemble, 1) r_i = self.r_i.repeat(bsz, 1).view(bsz, ensemble, self.r_i.size(-1)). \ transpose(0, 1).contiguous().view(-1, self.r_i.size(-1)) s_i = self.s_i.repeat(bsz, 1).view(bsz, ensemble, self.s_i.size(-1)). \ transpose(0, 1).contiguous().view(-1, self.s_i.size(-1)) r_p = self.r_p.repeat(bsz, 1).view(bsz, ensemble, self.r_p.size(-1)). \ transpose(0, 1).contiguous().view(-1, self.r_p.size(-1)) s_p = self.s_p.repeat(bsz, 1).view(bsz, ensemble, self.s_p.size(-1)). \ transpose(0, 1).contiguous().view(-1, self.s_p.size(-1)) # r_o = self.r_o.repeat(bsz, 1).view(bsz, ensemble, self.r_o.size(-1)). \ # transpose(0, 1).contiguous().view(-1, self.r_o.size(-1)) # s_o = self.s_o.repeat(bsz, 1).view(bsz, ensemble, self.s_o.size(-1)). \ # transpose(0, 1).contiguous().view(-1, self.r_o.size(-1)) is_training = self.training outputs, coverage = self.attn_func( input, pos, attn_mask is not None, is_training, self.num_heads, ensemble, self.in_proj_weight, self.out_proj_weight, self.pos_proj_weight, self.in_proj_bias, self.out_proj_bias, self.pos_proj_bias, r_i, s_i, r_p, s_p, self.r_w_bias, self.r_r_bias, mask, self.dropout, incremental, incremental_cache, double_precision) # last False is double precision return outputs, coverage
def run_remainder(x, y): c = torch.remainder(torch.add(x, y), x) return c
def compute_projection(self, depth, camera_to_world, world_to_grid): # compute projection by voxels -> image #print 'camera_to_world', camera_to_world #print 'intrinsic', self.intrinsic #print(world_to_grid) world_to_camera = torch.inverse(camera_to_world) grid_to_world = torch.inverse(world_to_grid) voxel_bounds_min, voxel_bounds_max = self.compute_frustum_bounds(world_to_grid, camera_to_world) voxel_bounds_min = np.maximum(voxel_bounds_min, 0).cuda().float() if depth.is_cuda else np.maximum(voxel_bounds_min, 0).cpu().float() voxel_bounds_max = np.minimum(voxel_bounds_max, self.volume_dims).cuda().float() if depth.is_cuda else np.minimum(voxel_bounds_max, self.volume_dims).cpu().float() # coordinates within frustum bounds # TODO python opt for this part instead of lua/torch opt? lin_ind_volume = torch.arange(0, self.volume_dims[0]*self.volume_dims[1]*self.volume_dims[2], out=torch.LongTensor()) lin_ind_volume = lin_ind_volume.cuda() if depth.is_cuda else lin_ind_volume.cpu() coords = camera_to_world.new(4, lin_ind_volume.size(0)) coords[2] = lin_ind_volume / (self.volume_dims[0]*self.volume_dims[1]) tmp = lin_ind_volume - (coords[2]*self.volume_dims[0]*self.volume_dims[1]).long() coords[1] = tmp / self.volume_dims[0] coords[0] = torch.remainder(tmp, self.volume_dims[0]) coords[3].fill_(1) mask_frustum_bounds = torch.ge(coords[0], voxel_bounds_min[0]) * torch.ge(coords[1], voxel_bounds_min[1]) * torch.ge(coords[2], voxel_bounds_min[2]) mask_frustum_bounds = mask_frustum_bounds * torch.lt(coords[0], voxel_bounds_max[0]) * torch.lt(coords[1], voxel_bounds_max[1]) * torch.lt(coords[2], voxel_bounds_max[2]) if not mask_frustum_bounds.any(): print('error: nothing in frustum bounds') return None lin_ind_volume = lin_ind_volume[mask_frustum_bounds] coords = coords.resize_(4, lin_ind_volume.size(0)) coords[2] = lin_ind_volume / (self.volume_dims[0]*self.volume_dims[1]) tmp = lin_ind_volume - (coords[2]*self.volume_dims[0]*self.volume_dims[1]).long() coords[1] = tmp / self.volume_dims[0] coords[0] = torch.remainder(tmp, self.volume_dims[0]) coords[3].fill_(1) # transform to current frame p = torch.mm(world_to_camera, torch.mm(grid_to_world, coords)) # project into image p[0] = (p[0] * self.intrinsic[0][0]) / p[2] + self.intrinsic[0][2] p[1] = (p[1] * self.intrinsic[1][1]) / p[2] + self.intrinsic[1][2] pi = torch.round(p).long() valid_ind_mask = torch.ge(pi[0], 0) * torch.ge(pi[1], 0) * torch.lt(pi[0], self.image_dims[0]) * torch.lt(pi[1], self.image_dims[1]) if not valid_ind_mask.any(): print('error: no valid image indices') return None valid_image_ind_x = pi[0][valid_ind_mask] valid_image_ind_y = pi[1][valid_ind_mask] valid_image_ind_lin = valid_image_ind_y * self.image_dims[0] + valid_image_ind_x depth_vals = torch.index_select(depth.view(-1), 0, valid_image_ind_lin) depth_mask = depth_vals.ge(self.depth_min) * depth_vals.le(self.depth_max) * torch.abs(depth_vals - p[2][valid_ind_mask]).le(self.voxel_size) if not depth_mask.any(): print('error: no valid depths') return None lin_ind_update = lin_ind_volume[valid_ind_mask] lin_ind_update = lin_ind_update[depth_mask] lin_indices_3d = lin_ind_update.new(self.volume_dims[0]*self.volume_dims[1]*self.volume_dims[2] + 1) #needs to be same size for all in batch... (first element has size) lin_indices_2d = lin_ind_update.new(self.volume_dims[0]*self.volume_dims[1]*self.volume_dims[2] + 1) #needs to be same size for all in batch... (first element has size) lin_indices_3d[0] = lin_ind_update.shape[0] lin_indices_2d[0] = lin_ind_update.shape[0] lin_indices_3d[1:1+lin_indices_3d[0]] = lin_ind_update lin_indices_2d[1:1+lin_indices_2d[0]] = torch.index_select(valid_image_ind_lin, 0, torch.nonzero(depth_mask)[:,0]) num_ind = lin_indices_3d[0] #print '[proj] #ind = ', lin_indices_3d[0] #print '2d', torch.min(lin_indices_2d[1:1+num_ind]), torch.max(lin_indices_2d[1:1+num_ind]) #print '3d', torch.min(lin_indices_3d[1:1+num_ind]), torch.max(lin_indices_3d[1:1+num_ind]) return lin_indices_3d, lin_indices_2d