Exemple #1
0
def convert_padding_direction(src_tokens, padding_idx, right_to_left=False, left_to_right=False):
    assert right_to_left ^ left_to_right
    pad_mask = src_tokens.eq(padding_idx)
    if not pad_mask.any():
        # no padding, return early
        return src_tokens
    if left_to_right and not pad_mask[:, 0].any():
        # already right padded
        return src_tokens
    if right_to_left and not pad_mask[:, -1].any():
        # already left padded
        return src_tokens
    max_len = src_tokens.size(1)
    range = buffered_arange(max_len).type_as(src_tokens).expand_as(src_tokens)
    num_pads = pad_mask.long().sum(dim=1, keepdim=True)
    if right_to_left:
        index = torch.remainder(range - num_pads, max_len)
    else:
        index = torch.remainder(range + num_pads, max_len)
    return src_tokens.gather(1, index)
 def _unwrap_t(self, angles):
     '''
     unwrap angles, adapted from https://github.com/numpy/numpy/blob/v1.13.0/numpy/lib/function_base.py#L2118-L2170
     '''
     
     d_angles = angles[:, 1:] - angles[:, :-1]
     
     ddmod = torch.remainder(d_angles + np.pi, 2*np.pi) - np.pi
     
     good = (ddmod == -np.pi) * (d_angles > 0) #instead of &
     ddmod[good] = np.pi
     
     ph_correct = ddmod - d_angles
     ph_correct[d_angles.abs() < np.pi] = 0
     ph_correct = ph_correct.cumsum(1)
     ph_correct += angles[:, 1:]
     
     angles[:, 1:] = ph_correct
Exemple #3
0
def torch_mod(x):
    return torch.remainder(x, TWO_PI)
Exemple #4
0
    def beam_decode(self, encoder_outputs, decoder_input, decoder_hidden,
                    context):
        # From https://github.com/budzianowski/PyTorch-Beam-Search-Decoding/blob/master/decode_beam.py
        '''
        :param decoder_hidden: input tensor of shape [B, H] for start of the decoding
        :param encoder_outputs: if you are using attention mechanism you can pass encoder outputs, [T, B, H] where T is the maximum length of input sentence
        :return: decoded_batch
        '''
        beam_width = 64
        decoded_batch = []
        batch_size = 1
        input_length = encoder_outputs.size(1)

        # (batch, seq_len)
        mask = self.mask.repeat(input_length).unsqueeze(0).repeat(
            batch_size, 1)

        # Generating arang(input_length), broadcasted across batch_size
        runner = torch.arange(input_length, device=self.config[DEVICE])
        runner = runner.unsqueeze(0).expand(batch_size, -1).long()
        # decoding goes sentence by sentence
        for idx in range(batch_size):
            # Number of sentence to generate
            node = BeamSearchNode(decoder_hidden, None, decoder_input,
                                  torch.zeros(1, device=self.config[DEVICE]),
                                  0, mask.clone(), -1)
            nodes = []

            # start the queue
            nodes.append((-node.eval(), node))
            qsize = 1

            # start beam search
            for tstep in range(input_length):
                # give up when decoding takes too long
                new_nodes = []
                inputs, hiddens_h, hiddens_c, masks, old_nodes, old_logprobs = [], [], [], [], [], []
                while len(nodes) > 0:
                    # fetch the best nodes
                    score, n = nodes.pop()
                    decoder_input = n.dec_input
                    inputs.append(decoder_input)
                    decoder_hidden = n.h
                    hiddens_h.append(decoder_hidden[0])
                    hiddens_c.append(decoder_hidden[1])
                    mask = n.mask
                    masks.append(mask)
                    old_nodes.append(n)
                    old_logprobs.append(n.logp)
                inputs = torch.cat(inputs, dim=0)
                hiddens_h = torch.cat(hiddens_h, dim=0)
                hiddens_c = torch.cat(hiddens_c, dim=0)
                hiddens = (hiddens_h, hiddens_c)
                masks = torch.cat(masks, dim=0)
                old_logprobs = torch.cat(old_logprobs).unsqueeze(1).expand(
                    -1, input_length)
                # decode for one step using decoder
                h_t, c_t, outs, raw_att = self.step(
                    inputs, hiddens, masks,
                    context.repeat(inputs.shape[0], 1, 1))
                beam_indexes = torch.arange(
                    inputs.shape[0]).repeat_interleave(input_length)
                num_candidates = min(beam_width,
                                     input_length * inputs.shape[0])
                att_logprobs = self.log_softmax(raw_att)
                att_logprobs += old_logprobs
                log_prob, indexes = torch.topk(att_logprobs.view(-1),
                                               num_candidates)
                beam_indexes = beam_indexes[indexes]

                decoded_t = torch.remainder(indexes, input_length)
                one_hot_pointers = (runner == decoded_t.unsqueeze(1).expand(
                    -1, outs.shape[1])).float()
                new_masks = masks[beam_indexes] * (1 - one_hot_pointers)
                embedding_mask = one_hot_pointers.unsqueeze(2).expand(
                    -1, -1, self.embedding_dim).bool()
                decoder_input = encoder_outputs.repeat(
                    num_candidates, 1,
                    1)[embedding_mask.data].view(num_candidates,
                                                 self.embedding_dim)
                for new_k in range(num_candidates):
                    if log_prob[new_k] == self.att.inf:
                        break
                    beam_idx = beam_indexes[new_k]
                    node = BeamSearchNode(
                        (h_t[beam_idx].unsqueeze(0),
                         c_t[beam_idx].unsqueeze(0)), old_nodes[beam_idx],
                        decoder_input[beam_idx].unsqueeze(0),
                        log_prob[new_k].unsqueeze(0),
                        old_nodes[beam_idx].length + 1,
                        new_masks[new_k].unsqueeze(0), decoded_t[new_k].item())
                    score = -node.eval()
                    new_nodes.append((score, node))
                    qsize += 1

                # Prune the queue if necessary
                if qsize > beam_width:
                    nodes = sorted(new_nodes,
                                   key=operator.itemgetter(0))[:beam_width]
                else:
                    nodes = new_nodes
            endnodes = nodes
            utterances = []
            for score, n in sorted(endnodes, key=operator.itemgetter(0)):
                utterance = []
                utterance.append(n.word_id)
                # back trace
                while n.prevNode != None:
                    n = n.prevNode
                    utterance.append(n.word_id)
                utterance = utterance[::-1]
                utterances.append(utterance)
            decoded_batch.append(utterances)
        return torch.tensor(decoded_batch[0][0][1:],
                            device=self.config[DEVICE])
Exemple #5
0
 def _get_angle_loss(self, angle: torch.Tensor,
                     target_angle: torch.Tensor) -> torch.Tensor:
     scaled_angle = torch.remainder(angle,
                                    torch.Tensor([np.pi]).to(self._device))
     return F.mse_loss(scaled_angle, target_angle)
    def evaluate(self):
        self.model.eval()

        std_loss = Accumulator('std_loss')
        adv_loss = Accumulator('adv_loss')
        std_corr = Accumulator('std_corr')
        adv_corr = Accumulator('adv_corr')
        std_logits = Accumulator('std_logits')
        adv_logits = Accumulator('adv_logits')

        seen_classes = []
        adv_images = Accumulator('adv_images')
        first_batch_images = Accumulator('first_batch_images')

        from PIL import Image

        # for batch_idx, (data, target) in enumerate(self.val_loader[0]):
        #     if self.cuda:
        #         data, target = data.cuda(non_blocking=True), target.cuda(non_blocking=True)
        #     with torch.no_grad():
        #         #output = self.model(data)
        #         data_cpy = data.clone().detach()
        #         std_cpy = data.clone().detach() # std_cpy is used for finding the standard accuracy and has transforms applied as normal
        #         # data_cpy = torch.tensor([])
        #         # std_cpy = torch.tensor([])
        #         # for idx in range(len(data_cpy)):
        #         #     #print("Tensor is cuda?", data_cpy.is_cuda)
        #
        #         #     data_cpy = torch.cat((data_cpy, torch.tensor(transforms.functional.normalize(transforms.functional.to_tensor(data[idx, :]), IMAGENET_MEAN, IMAGENET_STD)      )))
        #         #     #std_cpy[idx] = transforms.functional.normalize(data[idx].clone().cpu(), IMAGENET_MEAN, IMAGENET_STD).cuda() # DELETE
        #         #     transformedTensor = applyTransforms(np.copy(data[idx, :]))
        #         #     std_cpy = torch.cat((std_cpy, torch.tensor(transforms.functional.normalize(transformedTensor.clone().cpu(), IMAGENET_MEAN, IMAGENET_STD))))
        #         #     #std_cpy[idx, :] = transforms.functional.normalize(transformedTensor.cpu(), IMAGENET_MEAN, IMAGENET_STD).cuda()
        #         #     transformedImage = norm_to_pil_image(np.array(std_cpy[idx, :].cpu()))
        #         #     transformedImage.save('sample_data/standard' + str(idx) + '.png')
        #         #     untransformedImage = norm_to_pil_image(np.array(data_cpy[idx, :].cpu()))
        #         #     untransformedImage.save('sample_data/data' + str(idx) + '.png')
        #         #     # print(np.array(data_cpy[idx].cpu()) - np.array(std_cpy[idx].cpu()))
        #         output = self.model(std_cpy)
        #         std_logits.update(output.cpu())
        #         loss = F.cross_entropy(output, target, reduction='none').cpu()
        #         std_loss.update(loss)
        #         corr = correct(output, target)
        #         corr = corr.view(corr.size()[0]).cpu()
        #         std_corr.update(corr)
        #
        #     run_output = {'std_loss':std_loss.avg,
        #                   'std_acc':std_corr.avg}
        #     print('Standard Batch', batch_idx)
        #     print(run_output)

        for batch_idx, (data, target) in enumerate(self.val_loader[1]):

            # data is normalized at this point

            if self.cuda:
                data, target = data.cuda(non_blocking=True), target.cuda(
                    non_blocking=True)

            # for idx in range(len(data)):
            #     savedImage = norm_to_pil_image(data[idx])
            #     savedImage.save("sample_data/eric" + str(idx) + '.png')

            # with torch.no_grad():
            #     #output = self.model(data)
            #     data_cpy = data.clone().detach()
            #     std_cpy = data.clone().detach() # std_cpy is used for finding the standard accuracy and has transforms applied as normal
            #     # data_cpy = torch.tensor([])
            #     # std_cpy = torch.tensor([])
            #     # for idx in range(len(data_cpy)):
            #     #     #print("Tensor is cuda?", data_cpy.is_cuda)

            #     #     data_cpy = torch.cat((data_cpy, torch.tensor(transforms.functional.normalize(transforms.functional.to_tensor(data[idx, :]), IMAGENET_MEAN, IMAGENET_STD)      )))
            #     #     #std_cpy[idx] = transforms.functional.normalize(data[idx].clone().cpu(), IMAGENET_MEAN, IMAGENET_STD).cuda() # DELETE
            #     #     transformedTensor = applyTransforms(np.copy(data[idx, :]))
            #     #     std_cpy = torch.cat((std_cpy, torch.tensor(transforms.functional.normalize(transformedTensor.clone().cpu(), IMAGENET_MEAN, IMAGENET_STD))))
            #     #     #std_cpy[idx, :] = transforms.functional.normalize(transformedTensor.cpu(), IMAGENET_MEAN, IMAGENET_STD).cuda()
            #     #     transformedImage = norm_to_pil_image(np.array(std_cpy[idx, :].cpu()))
            #     #     transformedImage.save('sample_data/standard' + str(idx) + '.png')
            #     #     untransformedImage = norm_to_pil_image(np.array(data_cpy[idx, :].cpu()))
            #     #     untransformedImage.save('sample_data/data' + str(idx) + '.png')
            #     #     # print(np.array(data_cpy[idx].cpu()) - np.array(std_cpy[idx].cpu()))
            #     output_adv = self.model(data)
            #     adv_logits.update(output_adv.cpu())
            #     loss = F.cross_entropy(output_adv, target, reduction='none').cpu()
            #     adv_loss.update(loss)
            #     corr = correct(output_adv, target)
            #     corr = corr.view(corr.size()[0]).cpu()
            #     adv_corr.update(corr)

            rand_target = torch.randint(0,
                                        self.nb_classes - 1,
                                        target.size(),
                                        dtype=target.dtype,
                                        device='cuda')
            rand_target = torch.remainder(target + rand_target + 1,
                                          self.nb_classes)

            data_cpy = data.clone().detach()

            for idx in range(len(data_cpy)):
                # savedImage = norm_to_pil_image(data_adv[idx])
                # savedImage.save("sample_data/before_transforms" + str(idx) + '.png')
                unnormalized = reverse_normalization(data[idx])
                changed = np.swapaxes(
                    np.array(unnormalized.cpu().detach()) * 255.0, 0, 2)

                transformed = applyTransforms(
                    np.swapaxes(
                        np.array(unnormalized.cpu().clone().detach()) * 255.0,
                        0, 2))
                data_cpy[idx] = transforms.functional.normalize(
                    transformed.clone().cpu(), IMAGENET_MEAN,
                    IMAGENET_STD).cuda()

            #from PIL import Image
            data_adv = self.attack(self.model,
                                   data_cpy,
                                   rand_target,
                                   avoid_target=False,
                                   scale_eps=False)

            # for idx in range(len(data)):
            #     savedImage = norm_to_pil_image(data_adv[idx])
            #     savedImage.save("sample_data/eric" + str(idx) + '.png')

            with torch.no_grad():
                output_adv = self.model(data_adv)
                adv_logits.update(output_adv.cpu())
                loss = F.cross_entropy(output_adv, target,
                                       reduction='none').cpu()
                adv_loss.update(loss)
                corr = correct(output_adv, target)
                corr = corr.view(corr.size()[0]).cpu()
                adv_corr.update(corr)

            run_output = {'adv_loss': adv_loss.avg, 'adv_acc': adv_corr.avg}
            print('Adv Batch', batch_idx)
            print(run_output)

        summary_dict = {
            'std_acc': std_corr.avg.item(),
            'adv_acc': adv_corr.avg.item()
        }
        print(std_loss.avg, std_corr.avg, adv_loss.avg, adv_corr.avg)
Exemple #7
0
def pmod(torch_tensor, modulus):
    return torch.remainder(torch_tensor, modulus)
Exemple #8
0
def _handle_row_wise_sharding(input, world_size, weight, rank, local_shard,
                              pg):
    # flatten the ids across all input and sort
    input_size = input.size()
    input_1d = torch.reshape(input, (-1, )).contiguous()
    input_sorted, indices_1d = torch.sort(input_1d)
    rearrange_indices_1d = torch.argsort(indices_1d)
    input_sorted.contiguous()

    # Decide which rank the input goes to by check the sharding range.
    split_size = get_split_size(weight.size(0), world_size)
    rearrange_rows = False
    input_split_sizes: List[int] = [0] * world_size
    input_split_start_indices: List[int] = [0] * world_size
    # When we do the chunk split, we always ensure the first N - 1 chunks get max out
    # and then the Nth chunk gets the rest. So input_split_sizes like [3, 3, 3, 4]
    # are not possible. The expected split size will be [4, 4, 4, 1].
    sharded_dim_size_max = get_chunked_dim_size(weight.size(0), split_size, 0)
    for idx, placement in enumerate(weight._sharding_spec.placements):
        sharded_dim_size = get_chunked_dim_size(weight.size(0), split_size,
                                                idx)
        start_row_idx = idx * sharded_dim_size_max
        end_row_idx = start_row_idx + sharded_dim_size
        start_idx = torch.searchsorted(input_sorted, start_row_idx).item()
        end_idx = torch.searchsorted(input_sorted, end_row_idx).item()
        input_split_sizes[placement.rank()] = int(end_idx - start_idx)
        input_split_start_indices[placement.rank()] = int(start_idx)
        if placement.rank() != idx:
            rearrange_rows = True

    rearrange_indices_1d_second_order = None
    if rearrange_rows:
        # Need to re-arrange the 1D tensor to be sent via all2all.
        indices: List[List[int]] = [[0]] * world_size
        for placement in weight._sharding_spec.placements:
            split_length = input_split_sizes[placement.rank()]
            offset_idx = input_split_start_indices[placement.rank()]
            indices[placement.rank()] = list(
                range(offset_idx, offset_idx + split_length))
        indices_flatten = list(idx for indice in indices for idx in indice)

        input_sorted = input_sorted.index_select(
            0, torch.tensor(indices_flatten, device=input.device))
        rearrange_indices_1d_second_order = torch.argsort(
            torch.Tensor(indices_flatten))

    # Get the input split size to be sent from each rank to the current rank.
    # We can then infer the output split size.
    input_split_sizes_tensor = (
        torch.Tensor(input_split_sizes).type("torch.IntTensor").cuda(rank))
    output_split_sizes_tensor = torch.empty(world_size,
                                            dtype=torch.int32,
                                            device=input.device)
    dist.all_to_all_single(
        output_split_sizes_tensor,
        input_split_sizes_tensor,
        group=pg,
    )
    output_split_sizes = output_split_sizes_tensor.tolist()

    # Input sent from each rank to the current rank may have different sizes.
    gathered_input = torch.empty(sum(output_split_sizes),
                                 dtype=torch.int64,
                                 device=input.device)

    # Perform the modular operation of the 1D tensor to be sent to each rank.
    input_sorted = torch.remainder(input_sorted, sharded_dim_size_max)

    # Perform alltoall
    dist.all_to_all_single(
        gathered_input,
        input_sorted,
        input_split_sizes=input_split_sizes,
        output_split_sizes=output_split_sizes,
        group=pg,
    )

    # Perform local embedding look up.
    gathered_input_embeddings = torch.nn.functional.embedding(
        gathered_input, local_shard)

    # Gather all lookup result appropriately by performing alltoall again
    gathered_output = torch.empty(input_sorted.size(0),
                                  weight.size(1),
                                  device=input.device)
    dist.all_to_all_single(
        gathered_output,
        gathered_input_embeddings,
        input_split_sizes=output_split_sizes,
        output_split_sizes=input_split_sizes,
        group=pg,
    )

    # Rearrange the results to its original shape.
    if rearrange_indices_1d_second_order is not None:
        gathered_output = gathered_output[rearrange_indices_1d_second_order]
    gathered_output = gathered_output[rearrange_indices_1d]

    # Return the appropriate local result.
    return torch.reshape(gathered_output, (*input_size, weight.size(1)))
Exemple #9
0
    def compute_projection(self, depth, camera_to_world, world_to_grid):
        # compute projection by voxels -> image
        #print 'camera_to_world', camera_to_world
        #print 'intrinsic', self.intrinsic
        #print(world_to_grid)
        world_to_camera = torch.inverse(camera_to_world)
        grid_to_world = torch.inverse(world_to_grid)
        voxel_bounds_min, voxel_bounds_max = self.compute_frustum_bounds(
            world_to_grid, camera_to_world)
        voxel_bounds_min = np.maximum(
            voxel_bounds_min,
            0).cuda().float() if depth.is_cuda else np.maximum(
                voxel_bounds_min, 0).cpu().float()
        voxel_bounds_max = np.minimum(
            voxel_bounds_max,
            self.volume_dims).cuda().float() if depth.is_cuda else np.minimum(
                voxel_bounds_max, self.volume_dims).cpu().float()

        # coordinates within frustum bounds
        # TODO python opt for this part instead of lua/torch opt?
        lin_ind_volume = torch.arange(
            0,
            self.volume_dims[0] * self.volume_dims[1] * self.volume_dims[2],
            out=torch.LongTensor())
        lin_ind_volume = lin_ind_volume.cuda(
        ) if depth.is_cuda else lin_ind_volume.cpu()
        coords = camera_to_world.new(4, lin_ind_volume.size(0))
        coords[2] = lin_ind_volume / (self.volume_dims[0] *
                                      self.volume_dims[1])
        tmp = lin_ind_volume - (coords[2] * self.volume_dims[0] *
                                self.volume_dims[1]).long()
        coords[1] = tmp / self.volume_dims[0]
        coords[0] = torch.remainder(tmp, self.volume_dims[0])
        coords[3].fill_(1)
        mask_frustum_bounds = torch.ge(
            coords[0], voxel_bounds_min[0]) * torch.ge(
                coords[1], voxel_bounds_min[1]) * torch.ge(
                    coords[2], voxel_bounds_min[2])
        mask_frustum_bounds = mask_frustum_bounds * torch.lt(
            coords[0], voxel_bounds_max[0]) * torch.lt(
                coords[1], voxel_bounds_max[1]) * torch.lt(
                    coords[2], voxel_bounds_max[2])
        if not mask_frustum_bounds.any():
            print('error: nothing in frustum bounds')
            return None
        lin_ind_volume = lin_ind_volume[mask_frustum_bounds]
        coords = coords.resize_(4, lin_ind_volume.size(0))
        coords[2] = lin_ind_volume / (self.volume_dims[0] *
                                      self.volume_dims[1])
        tmp = lin_ind_volume - (coords[2] * self.volume_dims[0] *
                                self.volume_dims[1]).long()
        coords[1] = tmp / self.volume_dims[0]
        coords[0] = torch.remainder(tmp, self.volume_dims[0])
        coords[3].fill_(1)

        # transform to current frame
        p = torch.mm(world_to_camera, torch.mm(grid_to_world, coords))

        # project into image
        p[0] = (p[0] * self.intrinsic[0][0]) / p[2] + self.intrinsic[0][2]
        p[1] = (p[1] * self.intrinsic[1][1]) / p[2] + self.intrinsic[1][2]
        pi = torch.round(p).long()

        valid_ind_mask = torch.ge(pi[0], 0) * torch.ge(pi[1], 0) * torch.lt(
            pi[0], self.image_dims[0]) * torch.lt(pi[1], self.image_dims[1])
        if not valid_ind_mask.any():
            print('error: no valid image indices')
            return None

        valid_image_ind_x = pi[0][valid_ind_mask]
        valid_image_ind_y = pi[1][valid_ind_mask]
        valid_image_ind_lin = valid_image_ind_y * self.image_dims[
            0] + valid_image_ind_x
        depth_vals = torch.index_select(depth.view(-1), 0, valid_image_ind_lin)
        depth_mask = depth_vals.ge(self.depth_min) * depth_vals.le(
            self.depth_max) * torch.abs(depth_vals - p[2][valid_ind_mask]).le(
                self.voxel_size)

        if not depth_mask.any():
            print('error: no valid depths')
            return None

        lin_ind_update = lin_ind_volume[valid_ind_mask]
        lin_ind_update = lin_ind_update[depth_mask]
        lin_indices_3d = lin_ind_update.new(
            self.volume_dims[0] * self.volume_dims[1] * self.volume_dims[2] + 1
        )  #needs to be same size for all in batch... (first element has size)
        lin_indices_2d = lin_ind_update.new(
            self.volume_dims[0] * self.volume_dims[1] * self.volume_dims[2] + 1
        )  #needs to be same size for all in batch... (first element has size)
        lin_indices_3d[0] = lin_ind_update.shape[0]
        lin_indices_2d[0] = lin_ind_update.shape[0]
        lin_indices_3d[1:1 + lin_indices_3d[0]] = lin_ind_update
        lin_indices_2d[1:1 + lin_indices_2d[0]] = torch.index_select(
            valid_image_ind_lin, 0,
            torch.nonzero(depth_mask)[:, 0])
        num_ind = lin_indices_3d[0]
        #print '[proj] #ind = ', lin_indices_3d[0]
        #print '2d', torch.min(lin_indices_2d[1:1+num_ind]), torch.max(lin_indices_2d[1:1+num_ind])
        #print '3d', torch.min(lin_indices_3d[1:1+num_ind]), torch.max(lin_indices_3d[1:1+num_ind])
        return lin_indices_3d, lin_indices_2d
Exemple #10
0
}], 0.0005)

bar = tqdm(range(config.N_iters))
for i in bar:
    img_idx = np.random.choice(i_train)
    target = imgs[img_idx]
    # pose = poses[img_idx, :3, :4]
    # rays_o, rays_d = get_rays(H, W, focal, pose) # can be moved to pre-computed
    rays_o, rays_d = rays[img_idx]
    rays_o = torch.from_numpy(rays_o).cuda()
    rays_d = torch.from_numpy(rays_d).cuda()
    target = torch.from_numpy(target).cuda()

    rand_idx = torch.from_numpy(
        np.int64(random.sample(range(H * W), k=config.N_rand)))
    rand_idx_x = torch.remainder(rand_idx, W)
    rand_idx_y = torch.div(rand_idx, W, rounding_mode='floor')
    rays_o = rays_o[rand_idx_y, rand_idx_x]
    rays_d = rays_d[rand_idx_y, rand_idx_x]
    target_s = target[rand_idx_y, rand_idx_x]

    optim.zero_grad()
    rgb0, rgb = render(rays_o, rays_d, 2, 6, embed_fn, embeddirs_fn, net,
                       net_fine)
    err0 = torch.pow(rgb0 - target_s, 2).mean()
    err1 = torch.pow(rgb - target_s, 2).mean()
    err = err0 + err1
    err.backward()
    optim.step()

    outstr = 'LsCoarse: %.4f  LsFine: %.4f' % (err0.cpu().detach().numpy(),
Exemple #11
0
def phase_vocoder(complex_specgrams, rate, phase_advance):
    """
    Phase vocoder. Given a STFT tensor, speed up in time
    without modifying pitch by a factor of `rate`.

    Args:
        complex_specgrams (Tensor):
            (*, channel, num_freqs, time, complex=2)
        rate (float): Speed-up factor.
        phase_advance (Tensor): Expected phase advance in
            each bin. (num_freqs, 1).

    Returns:
        complex_specgrams_stretch (Tensor):
            (*, channel, num_freqs, ceil(time/rate), complex=2).

    Example:
        >>> num_freqs, hop_length = 1025, 512
        >>> # (batch, channel, num_freqs, time, complex=2)
        >>> complex_specgrams = torch.randn(16, 1, num_freqs, 300, 2)
        >>> rate = 1.3 # Slow down by 30%
        >>> phase_advance = torch.linspace(
        >>>    0, math.pi * hop_length, num_freqs)[..., None]
        >>> x = phase_vocoder(complex_specgrams, rate, phase_advance)
        >>> x.shape # with 231 == ceil(300 / 1.3)
        torch.Size([16, 1, 1025, 231, 2])
    """
    ndim = complex_specgrams.dim()
    time_slice = [slice(None)] * (ndim - 2)

    time_steps = torch.arange(0,
                              complex_specgrams.size(-2),
                              rate,
                              device=complex_specgrams.device)

    alphas = torch.remainder(time_steps,
                             torch.tensor(1., device=complex_specgrams.device))
    phase_0 = angle(complex_specgrams[time_slice + [slice(1)]])

    # Time Padding
    complex_specgrams = torch.nn.functional.pad(complex_specgrams,
                                                [0, 0, 0, 2])

    complex_specgrams_0 = complex_specgrams[time_slice + [time_steps.long()]]
    # (new_bins, num_freqs, 2)
    complex_specgrams_1 = complex_specgrams[time_slice +
                                            [(time_steps + 1).long()]]

    angle_0 = angle(complex_specgrams_0)
    angle_1 = angle(complex_specgrams_1)

    norm_0 = torch.norm(complex_specgrams_0, dim=-1)
    norm_1 = torch.norm(complex_specgrams_1, dim=-1)

    phase = angle_1 - angle_0 - phase_advance
    phase = phase - 2 * math.pi * torch.round(phase / (2 * math.pi))

    # Compute Phase Accum
    phase = phase + phase_advance
    phase = torch.cat([phase_0, phase[time_slice + [slice(-1)]]], dim=-1)
    phase_acc = torch.cumsum(phase, -1)

    mag = alphas * norm_1 + (1 - alphas) * norm_0

    real_stretch = mag * torch.cos(phase_acc)
    imag_stretch = mag * torch.sin(phase_acc)

    complex_specgrams_stretch = torch.stack([real_stretch, imag_stretch],
                                            dim=-1)

    return complex_specgrams_stretch
Exemple #12
0
def decode_one_sentence_adaptive_rl(machine, seq_len, init_dec_hidden,
                                    init_dec_cell,
                                    enc_hidden_seq, initial_beam_size, max_beam_size,
                                    model, shared_model,
                                    reward_coef_fscore, reward_coef_beam_size,
                                    label_true_seq, f_score_index_begin,
                                    counter, lock, optimizer,
                                    args,
                                    ):
  # Currently, batch size can only be 1
  batch_size = 1

  # Each beta is (batch size, beam size) matrix,
  # and there will be T_y of them in the sequence
  # y => same
  beta_seq = []
  y_seq = []

  logP_seq = []
  accum_logP_seq = []

  if machine.attention:
    # This would be the attention alpha_{ij} coefficients
    # in the shape of (output seq len, batch size, beam size, input seq len)
    attention_seq = []
  else:
    attention_seq = None

  # For RL episode
  episode = []

  # init_label's shape => (batch size, 1),
  # with all elements machine.BEG_INDEX
  if machine.gpu:
    init_label_emb = \
      machine.label_embedding(
        Variable(torch.LongTensor(batch_size, 1).zero_()).cuda() \
        + machine.BEG_INDEX) \
        .view(batch_size, machine.label_embedding_dim)
  else:
    init_label_emb = \
      machine.label_embedding(
        Variable(torch.LongTensor(batch_size, 1).zero_()) \
        + machine.BEG_INDEX) \
        .view(batch_size, machine.label_embedding_dim)

  # t = 0, only one input beam from init (t = -1)
  # Only one dec_hidden_out, dec_cell_out
  # => dec_hidden_out has shape (batch size, hidden dim)
  dec_hidden_out, dec_cell_out = \
    machine.decoder_cell(init_label_emb,
                         (init_dec_hidden, init_dec_cell))

  # Attention
  if machine.attention:
    dec_hidden_out = dec_hidden_out[None, :, :]  # add 1 nominal dim
    dec_hidden_out, attention = \
      machine.attention(dec_hidden_out, enc_hidden_seq, 0, machine.enc2dec_hidden)

    # remove the added dim
    dec_hidden_out = dec_hidden_out.view(batch_size, machine.hidden_dim)
    attention = attention.view(batch_size, seq_len)

  # dec_hidden_beam shape => (1, batch size, hidden dim),
  # 1 because there is only 1 input beam
  dec_hidden_beam = torch.stack([dec_hidden_out], dim=0)
  dec_cell_beam = torch.stack([dec_cell_out], dim=0)

  # This one is for backtracking (need permute)
  if machine.attention:
    # For better explanation, see in the "for t" loop below
    #
    # Originally attention has shape (batch size, input seq len)
    #
    # At t = 0, there is only 1 beam, so formally attention is actually
    # in shape (1, batch size, input seq len), where 1 is beam size.
    attention_beam = torch.stack([attention], dim=0)

    # We need to permute (swap) the dimensions into
    # the shape (batch size, 1, input seq len)
    attention_beam = attention_beam.permute(1, 0, 2)

  # score_out.shape => (batch size, |V^y|)
  score_out = machine.hidden2score(dec_hidden_out) \
    .view(batch_size, machine.label_size)
  logP_out = machine.score2logP(score_out).view(batch_size, machine.label_size)

  # Initial step, accumulated logP is the same as logP
  accum_logP_out = logP_out

  logP_out_list = [logP_out]
  accum_logP_out_list = [accum_logP_out]

  # This one is for backtracking (need permute)
  logP_output_beam = torch.stack(logP_out_list, dim=0).permute(1, 0, 2)
  accum_logP_output_beam = torch.stack(accum_logP_out_list, dim=0).permute(1,
                                                                           0,
                                                                           2)

  # score_matrix.shape => (batch size, |V^y| * 1)
  # * 1 because there is only 1 input beam
  logP_matrix = torch.cat(logP_out_list, dim=1)
  accum_logP_matrix = torch.cat(accum_logP_out_list, dim=1)

  # Just for code consistency (about reward calculation)
  cur_beam_size_in = 1

  # Just for code consistency (about experience tuple)
  cur_state = machine.make_state(accum_logP_matrix, logP_matrix, 1,
                                 max_beam_size)
  action = None

  # All beta^{t=0, b} are actually 0
  # beta_beam.shape => (batch size, beam size),
  # each row is [y^{t, b=0}, y^{t, b=1}, ..., y^{t, b=B-1}]
  # y_beam, score_beam => same

  action_seq = []
  beam_size_seq = []
  beam_size = initial_beam_size
  beam_size_seq.append(beam_size)
  accum_logP_beam, index_beam = torch.topk(accum_logP_matrix, beam_size,
                                           dim=1)

  beta_beam = torch.floor(index_beam.float() / machine.label_size).long()
  y_beam = torch.remainder(index_beam, machine.label_size)

  # This one is for backtracking
  beta_seq.append(beta_beam)
  y_seq.append(y_beam)
  if machine.attention:
    attention_seq.append(attention_beam)
  logP_seq.append(logP_output_beam)
  accum_logP_seq.append(accum_logP_output_beam)

  # Just for sentence with length = 1
  label_pred_seq, accum_logP_pred_seq, logP_pred_seq, attention_pred_seq = machine.backtracking(
    1, batch_size, y_seq, beta_seq, attention_seq, logP_seq, accum_logP_seq)

  # -----------------
  # Sync params with the shared model
  model.load_state_dict(shared_model.state_dict())

  values = []
  log_probs = []
  rewards = []
  entropies = []
  # -----------------

  # t = 1, 2, ..., (T_y - 1 == seq_len - 1)
  for t in range(1, seq_len):
    # print("At time step {} seq_len={}".format(t, seq_len))


    # We loop through beam because we expect that
    # usually batch size > beam size
    #
    # DESIGN: This may not be true anymore in adaptive beam search,
    # since we expect batch size = 1 in this case.
    # So is beam operations vectorizable?

    accum_logP_matrix, logP_matrix, dec_hidden_beam, dec_cell_beam, attention_beam, accum_logP_output_beam, logP_output_beam = \
      machine.decode_beam_step_rl(beam_size, y_beam, beta_beam,
                                  dec_hidden_beam, dec_cell_beam, accum_logP_beam,
                                  enc_hidden_seq, seq_len, t)

    # Actually, at t = T_y - 1 == seq_len - 1,
    # you don't have to take action (you don't have to pick a beam of predictions anymore), because at this last output step, you would pick only the highest result, and do the backtracking from it to determine the best sequence.
    # However, in the current version of this code, we temporarily keep doing one more beam picking, just to be compatible with the backtracking function and the rest of the code.
    # We delay the improvement to the future work.
    #
    # Note that this state is actually the output state at t
    state = machine.make_state(accum_logP_matrix, logP_matrix,
                               beam_size, max_beam_size)

    # For experience tuple
    prev_state = cur_state
    cur_state = state
    prev_action = action

    # For reward calculation
    prev_beam_size_in = cur_beam_size_in
    cur_beam_size_in = beam_size

    # policy network showtime
    value, logit = model(state)
    prob = F.softmax(logit, dim=-1)
    log_prob = F.log_softmax(logit, dim=-1)


    # TODO: for naive MLP policy network only
    prob = prob.view(1, -1)
    log_prob = log_prob.view(1, -1)


    entropy = -(log_prob * prob).sum(1, keepdim=True)
    entropies.append(entropy)

    action = prob.multinomial().data
    log_prob = log_prob.gather(1, Variable(action))

    # state, reward, done, _ = env.step(action.numpy())
    # done = done or episode_length >= args.max_episode_length
    # reward = max(min(reward, 1), -1)

    with lock:
      counter.value += 1

    # popule data
    values.append(value)
    log_probs.append(log_prob)
    action_seq.append(action)



    # print(type(action))
    # TODO: reivew this
    action = action.numpy()[0]



    # update beam size w.r.t to the action chosen
    if action == 0 and beam_size > 1:
      beam_size -= 1
    elif action == 2 and beam_size < max_beam_size:
      beam_size += 1

    beam_size_seq.append(beam_size)

    accum_logP_beam, index_beam = \
      torch.topk(accum_logP_matrix, beam_size, dim=1)

    beta_beam = torch.floor(
      index_beam.float() / machine.label_size).long()
    y_beam = torch.remainder(index_beam, machine.label_size)
    beta_seq.append(beta_beam)
    y_seq.append(y_beam)
    if machine.attention:
      attention_seq.append(attention_beam)
    logP_seq.append(logP_output_beam)
    accum_logP_seq.append(accum_logP_output_beam)

    # Compute the F-score for the sequence [0, 1, ..., t] (length t+1) using y_seq, betq_seq we got so far. This is the ("partial", so to speak) F-score at this t.
    label_pred_seq, accum_logP_pred_seq, logP_pred_seq, attention_pred_seq = \
      machine.backtracking(
      t + 1, batch_size, y_seq, beta_seq, attention_seq, logP_seq,
      accum_logP_seq)

    cur_fscore = machine.get_fscore(label_pred_seq, label_true_seq,
                                    f_score_index_begin)

    # If t >= 2, compute the reward,
    # and generate the experience tuple ( s_{t-1}, a_{t-1}, r_{t-1}, s_t )
    # reward = None
    if t >= 2:
      reward = machine.get_reward(cur_fscore, fscore, cur_beam_size_in,
                                  prev_beam_size_in, reward_coef_fscore,
                                  reward_coef_beam_size)
      experience_tuple = (prev_state, prev_action, reward, cur_state)
      episode.append(experience_tuple)


      rewards.append(reward)

    fscore = cur_fscore
  # End for t

  # print("rewards: {}".format(rewards))
  # print("actions: {}".format(action_seq))

  # backprop now with actor-critic
  R = torch.zeros(1, 1)
  values.append(Variable(R))
  policy_loss = 0
  value_loss = 0
  R = Variable(R)
  gae = torch.zeros(1, 1)
  for i in reversed(range(len(rewards))):
    R = args.gamma * R + rewards[i]
    advantage = R - values[i]
    value_loss = value_loss + 0.5 * advantage.pow(2)

    # Generalized Advantage Estimataion
    delta_t = rewards[i] + args.gamma * \
                           values[i + 1].data - values[i].data
    gae = gae * args.gamma * args.tau + delta_t

    policy_loss = policy_loss - \
                  log_probs[i] * Variable(gae) - args.entropy_coef * \
                                                 entropies[i]
  # print(policy_loss)

  optimizer.zero_grad()

  (policy_loss + args.value_loss_coef * value_loss).backward()
  torch.nn.utils.clip_grad_norm(model.parameters(), args.max_grad_norm)

  ensure_shared_grads(model, shared_model)
  optimizer.step()


  return label_pred_seq, accum_logP_pred_seq, logP_pred_seq, \
         attention_pred_seq, episode, beam_size_seq
    L = Kp.size()[0]

    if args.range == 'short':
        mask = torch.triu(torch.ones(L, L), diagonal=6) - torch.triu(
            torch.ones(L, L), diagonal=12)
    if args.range == 'medium':
        mask = torch.triu(torch.ones(L, L), diagonal=12) - torch.triu(
            torch.ones(L, L), diagonal=24)
    if args.range == "large":
        mask = torch.triu(torch.ones(L, L), diagonal=24)
    mask = Variable(mask)
    Kp = Kp * mask

    top_couplings = torch.topk(Kp.view(-1), int(args.a * L))[1]

    top_couplings = ((top_couplings / L), torch.remainder(top_couplings, L))
    k = top_couplings[0].size()[0]
    torch.save(top_couplings, "../results/1BDO_A_top_coupl.out")

    # Calculating the distances
    structure = PDBParser().get_structure('1BDO_A', '../database/1BDO_A.pdb')
    model = structure[0]
    L = len(list(structure.get_residues()))
    distances = np.zeros((L, L))

    for chain in model:
        for i in range(L):
            for j in range(L):
                distances[i][j] = chain[i + 77]['CA'] - chain[j + 77]['CA']

    #Renormalizing to plot the contact map
Exemple #14
0
    def sample_lines(self, meta, jmap, joff, mode):
        with torch.no_grad():
            junc = meta["junc"]  # [N, 2]
            jtyp = meta["jtyp"]  # [N]
            Lpos = meta["Lpos"]
            Lneg = meta["Lneg"]

            n_type = jmap.shape[0]
            jmap = non_maximum_suppression(jmap).reshape(n_type, -1)
            joff = joff.reshape(n_type, 2, -1)
            max_K = M.n_dyn_junc // n_type
            N = len(junc)
            if mode != "training":
                K = min(int((jmap > M.eval_junc_thres).float().sum().item()),
                        max_K)
            else:
                K = min(int(N * 2 + 2), max_K)
            if K < 2:
                K = 2
            device = jmap.device

            # index: [N_TYPE, K]
            score, index = torch.topk(jmap, k=K)
            y = torch.true_divide(index, 128) + torch.gather(
                joff[:, 0], 1, index) + 0.5
            x = torch.remainder(index, 128) + torch.gather(
                joff[:, 1], 1, index) + 0.5

            # xy: [N_TYPE, K, 2]
            xy = torch.cat([y[..., None], x[..., None]], dim=-1)
            xy_ = xy[..., None, :]
            del x, y, index

            # dist: [N_TYPE, K, N]
            dist = torch.sum((xy_ - junc)**2, -1)
            cost, match = torch.min(dist, -1)

            # xy: [N_TYPE * K, 2]
            # match: [N_TYPE, K]
            for t in range(n_type):
                match[t, jtyp[match[t]] != t] = N
            match[cost > 1.5 * 1.5] = N
            match = match.flatten()

            _ = torch.arange(n_type * K, device=device)
            u, v = torch.meshgrid(_, _)
            u, v = u.flatten(), v.flatten()
            up, vp = match[u], match[v]
            label = Lpos[up, vp]

            if mode == "training":
                c = torch.zeros_like(label, dtype=torch.bool)

                # sample positive lines
                cdx = label.nonzero().flatten()
                if len(cdx) > M.n_dyn_posl:
                    # print("too many positive lines")
                    perm = torch.randperm(len(cdx),
                                          device=device)[:M.n_dyn_posl]
                    cdx = cdx[perm]
                c[cdx] = 1

                # sample negative lines
                cdx = Lneg[up, vp].nonzero().flatten()
                if len(cdx) > M.n_dyn_negl:
                    # print("too many negative lines")
                    perm = torch.randperm(len(cdx),
                                          device=device)[:M.n_dyn_negl]
                    cdx = cdx[perm]
                c[cdx] = 1

                # sample other (unmatched) lines
                cdx = torch.randint(len(c), (M.n_dyn_othr, ), device=device)
                c[cdx] = 1
            else:
                c = (u < v).flatten()

            # sample lines
            u, v, label = u[c], v[c], label[c]
            xy = xy.reshape(n_type * K, 2)
            xyu, xyv = xy[u], xy[v]

            line = torch.cat([xyu[:, None], xyv[:, None]], 1)

            xy = xy.reshape(n_type, K, 2)
            jcs = [xy[i, score[i] > 0.03] for i in range(n_type)]
            return line, label.float(), jcs
Exemple #15
0
 def test_remainder(x, y):
     c = torch.remainder(torch.add(x, y), 3.0)
     return c
Exemple #16
0
 def __call__(self, p, scale, padding):
     p_nor = normalize_3d_coordinate(p, scale, padding)
     p = torch.remainder(p_nor, 1 / self.res) * self.res  # always possitive
     # p = coordinate2index(p_nor, self.res, coord_type='3d')
     return p
Exemple #17
0
    def _shared_step(self):
        with th.no_grad():
            # Frequently alloc and free shared memory to hold intermediate tensor is expensive
            # We cache shared memory buffers in shared_emb.
            shared_emb = {emb.name: ([], []) for emb in self._params}

            # Go through all sparse embeddings
            for emb in self._params:  # pylint: disable=too-many-nested-blocks
                emb_name = emb.name

                # we need to combine gradients from multiple forward paths
                idx = []
                grad = []
                for i, data in emb._trace:
                    idx.append(i)
                    grad.append(data.grad.data)
                # If the sparse embedding is not used in the previous forward step
                # The idx and grad will be empty, initialize them as empty tensors to
                # avoid crashing the optimizer step logic.
                #
                # Note: we cannot skip the gradient exchange and update steps as other
                # working processes may send gradient update requests corresponding
                # to certain embedding to this process.
                idx = th.cat(idx, dim=0) if len(idx) != 0 else \
                    th.zeros((0,), dtype=th.long, device=th.device('cpu'))
                grad = th.cat(grad, dim=0) if len(grad) != 0 else \
                    th.zeros((0, emb.embedding_dim), dtype=th.float32, device=th.device('cpu'))

                device = grad.device
                idx_dtype = idx.dtype
                grad_dtype = grad.dtype
                grad_dim = grad.shape[1]
                if self._world_size > 1:
                    if emb_name not in self._shared_cache:
                        self._shared_cache[emb_name] = {}

                    # Each training process takes the resposibility of updating a range
                    # of node embeddings, thus we can parallel the gradient update.
                    # The overall progress includes:
                    #   1. In each training process:
                    #     1.a Deciding which process a node embedding belongs to according
                    #         to the formula: process_id = node_idx mod num_of_process(N)
                    #     1.b Split the node index tensor and gradient tensor into N parts
                    #         according to step 1.
                    #     1.c Write each node index sub-tensor and gradient sub-tensor into
                    #         different DGL shared memory buffers.
                    #   2. Cross training process synchronization
                    #   3. In each traning process:
                    #     3.a Collect node index sub-tensors and gradient sub-tensors
                    #     3.b Do gradient update
                    #   4. Done
                    idx_split = th.remainder(idx, self._world_size).long()
                    for i in range(self._world_size):
                        mask = idx_split == i
                        idx_i = idx[mask]
                        grad_i = grad[mask]

                        if i == self._rank:
                            shared_emb[emb_name][0].append(idx_i)
                            shared_emb[emb_name][1].append(grad_i)
                        else:
                            # currently nccl does not support Alltoallv operation
                            # we need to use CPU shared memory to share gradient
                            # across processes
                            idx_i = idx_i.to(th.device('cpu'))
                            grad_i = grad_i.to(th.device('cpu'))
                            idx_shmem_name = 'idx_{}_{}_{}'.format(
                                emb_name, self._rank, i)
                            grad_shmem_name = 'grad_{}_{}_{}'.format(
                                emb_name, self._rank, i)

                            # Create shared memory to hold temporary index and gradient tensor for
                            # cross-process send and recv.
                            if idx_shmem_name not in self._shared_cache[emb_name] or \
                                self._shared_cache[emb_name][idx_shmem_name].shape[0] \
                                    < idx_i.shape[0]:

                                if idx_shmem_name in self._shared_cache[
                                        emb_name]:
                                    self.shmem_buffer_holder.append(
                                        self._shared_cache[emb_name]
                                        [idx_shmem_name])
                                    self.shmem_buffer_holder.append(
                                        self._shared_cache[emb_name]
                                        [grad_shmem_name])

                                # The total number of buffers is the number of NodeEmbeddings *
                                # world_size * (world_size - 1). The minimun buffer size is 128.
                                #
                                # We extend the buffer by idx_i.shape[0] * 2 to avoid
                                # frequent shared memory allocation.
                                # The overall buffer cost will be smaller than three times
                                # the maximum memory requirement for sharing gradients.
                                buffer_size = 128 if idx_i.shape[
                                    0] < 128 else idx_i.shape[0] * 2
                                idx_shmem = create_shared_mem_array(idx_shmem_name, \
                                    (buffer_size,), idx_dtype)
                                grad_shmem = create_shared_mem_array(grad_shmem_name, \
                                    (buffer_size, grad_dim), grad_dtype)
                                self._shared_cache[emb_name][
                                    idx_shmem_name] = idx_shmem
                                self._shared_cache[emb_name][
                                    grad_shmem_name] = grad_shmem

                            # Fill shared memory with temporal index tensor and gradient tensor
                            self._shared_cache[emb_name][idx_shmem_name][:idx_i.shape[0]] \
                                = idx_i
                            self._shared_cache[emb_name][grad_shmem_name][:idx_i.shape[0]] \
                                = grad_i
                            self._opt_meta[emb_name][
                                self._rank][i] = idx_i.shape[0]
                else:
                    shared_emb[emb_name][0].append(idx)
                    shared_emb[emb_name][1].append(grad)

            # make sure the idx shape is passed to each process through opt_meta
            if self._world_size > 1:
                th.distributed.barrier()
            for emb in self._params:  # pylint: disable=too-many-nested-blocks
                emb_name = emb.name
                if self._world_size > 1:
                    # The first element in shared_emb[emb_name][0] is the local idx
                    device = shared_emb[emb_name][0][0].device
                    # gather gradients from all other processes
                    for i in range(self._world_size):
                        if i != self._rank:
                            idx_shmem_name = 'idx_{}_{}_{}'.format(
                                emb_name, i, self._rank)
                            grad_shmem_name = 'grad_{}_{}_{}'.format(
                                emb_name, i, self._rank)
                            size = self._opt_meta[emb_name][i][self._rank]

                            # Retrive shared memory holding the temporal index and gradient
                            # tensor that is sent to current training process
                            if idx_shmem_name not in self._shared_cache[emb_name] or \
                                self._shared_cache[emb_name][idx_shmem_name].shape[0] < size:
                                buffer_size = 128 if size < 128 else size * 2
                                idx_shmem = get_shared_mem_array(idx_shmem_name, \
                                    (buffer_size,), idx_dtype)
                                grad_shmem = get_shared_mem_array(grad_shmem_name, \
                                    (buffer_size, grad_dim), grad_dtype)
                                self._shared_cache[emb_name][
                                    idx_shmem_name] = idx_shmem
                                self._shared_cache[emb_name][
                                    grad_shmem_name] = grad_shmem

                            idx_i = self._shared_cache[emb_name][
                                idx_shmem_name][:size]
                            grad_i = self._shared_cache[emb_name][
                                grad_shmem_name][:size]
                            shared_emb[emb_name][0].append(
                                idx_i.to(device, non_blocking=True))
                            shared_emb[emb_name][1].append(
                                grad_i.to(device, non_blocking=True))

            if self._clean_grad:
                # clean gradient track
                for emb in self._params:
                    emb.reset_trace()
                self._clean_grad = False

            for emb in self._params:
                emb_name = emb.name

                idx = th.cat(shared_emb[emb_name][0], dim=0)
                grad = th.cat(shared_emb[emb_name][1], dim=0)
                self.update(idx, grad, emb)

            # synchronized gradient update
            if self._world_size > 1:
                th.distributed.barrier()
Exemple #18
0
def _handle_row_wise_sharding(
    input, world_size, weight, local_shard, max_norm, norm_type, padding_idx, rank, pg
):
    """
    Entry-point function to handle the logic of row-wise sharding of weight
    for embedding. (Detailed explanations of the logic can be found in
    the comment for sharded_embedding.)

    Args:
        input: list of ID used for lookup and aggregation.
        world_size: number of ranks.
        weight: shareded weight tensor.
        local_shard: row-wise shared local weight used for lookup.
        max_norm: If given, each embedding vector with norm larger
            than max_norm is renormalized to have norm max_norm.
            Note: this will modify weight in-place.
        norm_type: The p in the p-norm to compute for the max_norm option.
        padding_idx: If specified, the entries at padding_idx do
            not contribute to the gradient; therefore, the embedding
            vector at padding_idx is not updated during training,
            i.e. it remains as a fixed “pad”.
        rank: # of cuda process.
        pg: process group.

    Returns: final result of lookup.
    """
    # flatten the ids across all input and sort
    input_size = input.size()
    input_1d = torch.reshape(input, (-1,)).contiguous()
    input_sorted, indices_1d = torch.sort(input_1d)
    rearrange_indices_1d = torch.argsort(indices_1d)
    input_sorted.contiguous()

    (
        input_sorted,
        input_split_sizes,
        sharded_dim_size_max,
        _,
        rearrange_indices_1d_second_order,
        padding_idx,
    ) = _handle_row_wise_lookup_distribute(
        input_sorted, input, world_size, weight, rank, padding_idx
    )

    # Get the input split size to be sent from each rank to the current rank.
    # We can then infer the output split size.
    output_split_sizes = _communicate_size_to_each_rank(
        input_split_sizes, world_size, input, pg
    )

    # Input sent from each rank to the current rank may have different sizes.
    gathered_input = torch.empty(
        sum(output_split_sizes), dtype=torch.int64, device=input.device
    )

    # Perform the modular operation of the 1D tensor to be sent to each rank.
    input_sorted = torch.remainder(input_sorted, sharded_dim_size_max)

    # Perform alltoall
    dist.all_to_all_single(
        gathered_input,
        input_sorted,
        input_split_sizes=input_split_sizes,
        output_split_sizes=output_split_sizes,
        group=pg,
    )

    # If input is None, passing in max_norm causes
    # errors in CUDA.
    if max_norm is not None and gathered_input.size(0) == 0:
        max_norm = None

    # Perform local embedding look up.
    gathered_input_embeddings = torch.nn.functional.embedding(
        gathered_input,
        local_shard,
        padding_idx=padding_idx,
        max_norm=max_norm,
        norm_type=norm_type,
    )

    # Gather all lookup result appropriately by performing alltoall again
    gathered_output = torch.empty(
        input_sorted.size(0), weight.size(1), device=input.device
    )
    dist.all_to_all_single(
        gathered_output,
        gathered_input_embeddings,
        input_split_sizes=output_split_sizes,
        output_split_sizes=input_split_sizes,
        group=pg,
    )

    # Rearrange the results to its original shape.
    if rearrange_indices_1d_second_order is not None:
        gathered_output = gathered_output[rearrange_indices_1d_second_order]
    gathered_output = gathered_output[rearrange_indices_1d]

    # Return the appropriate local result.
    return torch.reshape(gathered_output, (*input_size, weight.size(1)))
 def forward(self, x, y):
     out = torch.remainder(x, y)
     return out
if torch.cuda.is_available():
    alpha_0 = alpha_0.cuda()

# Multiply transition_matrix and alpha_0
# tmp is a SparseTensor of size ND x B with K non-zero rows.
# i.e. tmp._indices() is a 1 x K Tensor
# tmp._values() is a K x B Tensor
tmp = torch.hspmm(sp_trans.transpose(0, 1), alpha_0)

# Roll indices of tmp from ND to N x D.
tmp_rolled_indices = torch.zeros([2, tmp._indices().size(1)], dtype=torch.long)
if torch.cuda.is_available():
    tmp_rolled_indices = tmp_rolled_indices.cuda()

tmp_rolled_indices[0] = tmp._indices() / 3237   # destination-states
tmp_rolled_indices[1] = torch.remainder(tmp._indices(), 3237)  # pdf-ids
tmp_rolled = torch.sparse_coo_tensor(tmp_rolled_indices, tmp._values())

# nnet_outputs at time t. We can do exp beforehand. size D x B
nnet_outputs = torch.randn([3237, 128])
if torch.cuda.is_available():
    nnet_outputs = nnet_outputs.cuda()
nnet_outputs.exp_()

# Lookup indices of nnet_outputs based on pdf-ids. size K x B
nnet_outputs_lookup = nnet_outputs.index_select(0, tmp_rolled_indices[1])

# Element-wise product with the nnet_outputs for the K rows
# Output is K x B.
tmp2 = torch.mul(tmp._values(), nnet_outputs_lookup)
    def evaluate(self):
        self.model.eval()

        std_loss = Accumulator('std_loss')
        adv_loss = Accumulator('adv_loss')
        std_corr = Accumulator('std_corr')
        adv_corr = Accumulator('adv_corr')
        std_logits = Accumulator('std_logits')
        adv_logits = Accumulator('adv_logits')

        seen_classes = []
        adv_images = Accumulator('adv_images')
        first_batch_images = Accumulator('first_batch_images')

        from PIL import Image

        for batch_idx, (data, target) in enumerate(self.val_loader[0]):
            if self.cuda:
                data, target = data.cuda(non_blocking=True), target.cuda(
                    non_blocking=True)
            with torch.no_grad():
                std_cpy = data.clone().detach(
                )  # std_cpy is used for finding the standard accuracy and has transforms applied as normal
                output = self.model(std_cpy)
                std_logits.update(output.cpu())
                loss = F.cross_entropy(output, target, reduction='none').cpu()
                std_loss.update(loss)
                corr = correct(output, target)
                corr = corr.view(corr.size()[0]).cpu()
                std_corr.update(corr)

            run_output = {'std_loss': std_loss.avg, 'std_acc': std_corr.avg}
            print('Standard Batch', batch_idx)
            print(run_output)

        for batch_idx, (data, target) in enumerate(self.val_loader[1]):

            # data is normalized at this point

            if self.cuda:
                data, target = data.cuda(non_blocking=True), target.cuda(
                    non_blocking=True)

            rand_target = torch.randint(0,
                                        self.nb_classes - 1,
                                        target.size(),
                                        dtype=target.dtype,
                                        device='cuda')
            rand_target = torch.remainder(target + rand_target + 1,
                                          self.nb_classes)

            data_cpy = data.clone().detach()

            for idx in range(len(data_cpy)):
                unnormalized = reverse_normalization(data[idx])
                changed = np.swapaxes(
                    np.array(unnormalized.cpu().detach()) * 255.0, 0, 2)

                transformed = applyTransforms(
                    np.swapaxes(
                        np.array(unnormalized.cpu().clone().detach()) * 255.0,
                        0, 2))
                data_cpy[idx] = transforms.functional.normalize(
                    transformed.clone().cpu(), IMAGENET_MEAN,
                    IMAGENET_STD).cuda()

            data_adv = self.attack(self.model,
                                   data_cpy,
                                   rand_target,
                                   avoid_target=False,
                                   scale_eps=False)

            with torch.no_grad():
                output_adv = self.model(data_adv)
                adv_logits.update(output_adv.cpu())
                loss = F.cross_entropy(output_adv, target,
                                       reduction='none').cpu()
                adv_loss.update(loss)
                corr = correct(output_adv, target)
                corr = corr.view(corr.size()[0]).cpu()
                adv_corr.update(corr)

            run_output = {'adv_loss': adv_loss.avg, 'adv_acc': adv_corr.avg}
            print('Adv Batch', batch_idx)
            print(run_output)

        summary_dict = {
            'std_acc': std_corr.avg.item(),
            'adv_acc': adv_corr.avg.item()
        }
        print(std_loss.avg, std_corr.avg, adv_loss.avg, adv_corr.avg)
Exemple #22
0
 def __rmod__(self, other):
     if has_torch_function_variadic(self, other):
         return handle_torch_function(Tensor.__rmod__, (self, other), self,
                                      other)
     return torch.remainder(other, self)
def online_test(epoch, lbd):
    global best_acc
    net.eval()
    test_loss = 0
    correct = 0
    total = 0

    correct_count = torch.zeros(3**NUM_FLAG)
    entire_count = torch.ones(3**NUM_FLAG)
    szs = 2 * NUM_FLAG * torch.ones(3**NUM_FLAG)
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(testloader):
            inputs, targets = inputs.to(device), targets.to(device)

            outputs_list = []
            flags_list = []
            flags2_list = []

            for i in range(args.ensemble_size):
                outputs_temp, flags_temp, flags2_temp = net(inputs)
                outputs_list.append(outputs_temp)
                flags_list.append(flags_temp)
                flags2_list.append(flags2_temp)

                if i < 1:
                    outputs = outputs_temp
                else:
                    outputs += outputs_temp

            _, predicted = outputs.max(1)

            for i in range(args.ensemble_size):
                _, predicted_temp = outputs_list[i].max(1)
                correct_temp = predicted_temp.eq(predicted).sum().item()

                # flag encoding
                idx = 0
                sz = 0
                for j in range(NUM_FLAG):
                    if flags_list[i][j] and flags2_list[i][j]:
                        idx += ((3**j) * 2)
                        sz += 2
                    elif (flags_list[i][j]) and (not flags2_list[i][j]):
                        idx += ((3**j))
                        sz += 1
                correct_count[idx] += correct_temp
                entire_count[idx] += predicted.size(0)
                szs[idx] = sz

            loss = criterion(outputs, targets)

            test_loss += loss.item()

            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()

            #print((correct_count / entire_count).unique(sorted=False))

    # best flag decoding
    acc = correct_count / entire_count
    score = acc + lbd * (1 - szs / szs.max())
    sort_val, sort_idx = score.sort(0, descending=True)
    idx = sort_idx[0]
    best_flag = []
    best_flag2 = []
    for j in range(NUM_FLAG):
        if (torch.remainder(idx, 3) >= 2) and (torch.remainder(idx, 3) < 3):
            best_flag.append(True)
            best_flag2.append(True)
        elif torch.remainder(idx, 3) == 1:
            best_flag.append(True)
            best_flag2.append(False)
        else:
            best_flag.append(False)
            best_flag2.append(False)
        idx_temp = idx / 3
        idx = idx_temp

    #print(sort_val.tolist())
    print(best_flag)
    print(best_flag2)

    # Print Test Result
    print('Ensemble Test: Epoch#%02d : Loss: %.3f | Acc: %.3f%% (%d/%d)' %
          (epoch, test_loss /
           (batch_idx + 1), 100. * correct / total, correct, total))

    # full model testing
    full_net.eval()
    test_loss = 0
    correct = 0
    total = 0
    entire_time = 0

    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(testloader):
            inputs, targets = inputs.to(device), targets.to(device)

            outputs_list = []
            flags_list = []
            start_time = time.time()
            outputs, _, _ = full_net(inputs)
            entire_time += (time.time() - start_time)

            _, predicted = outputs.max(1)
            loss = criterion(outputs, targets)

            test_loss += loss.item()

            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()

    print('Full Model Test: Epoch#%02d : Loss: %.3f | Acc: %.3f%% (%d/%d)' %
          (epoch, test_loss /
           (batch_idx + 1), 100. * correct / total, correct, total))
    print('running time for each batch : %.8f' % (entire_time /
                                                  (batch_idx + 1)))

    # flag setting
    full_net.module.flags = best_flag
    full_net.module.flags2 = best_flag2
    full_net.module.cum_num_blocks = [2, 4, 6, 8]
    full_net.eval()
    test_loss = 0
    correct = 0
    total = 0
    entire_time = 0

    # flagged model testing
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(testloader):
            inputs, targets = inputs.to(device), targets.to(device)

            outputs_list = []
            flags_list = []

            start_time = time.time()
            outputs, _, _ = full_net(inputs)
            entire_time += (time.time() - start_time)

            _, predicted = outputs.max(1)
            loss = criterion(outputs, targets)

            test_loss += loss.item()

            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()

    print('Adapted Model Test: Epoch#%02d : Loss: %.3f | Acc: %.8f%% (%d/%d)' %
          (epoch, test_loss /
           (batch_idx + 1), 100. * correct / total, correct, total))
    print('running time for each batch : %.8f' % (entire_time /
                                                  (batch_idx + 1)))
Exemple #24
0
 def cuda_rem(x, y):
     return 1 + torch.remainder(x, y) - 1
Exemple #25
0
    def forward(self, state, time_step, args, reset_flag=False):
        if args.demo_type == 'uav':
            if args:
                coefs = args.variance * 2
                prior_decay = args.prior_decay
            else:
                coefs = [0.09, 0.09]
                prior_decay = 0.005
            time_step = torch.Tensor([time_step])[0]
            perspective = torch.atan(state[12] / state[13])
            first_perspective = torch.where(
                state[13] > 0,
                torch.where(state[12] > 0, perspective / np.pi * 180.0,
                            (perspective + 2 * np.pi) / np.pi * 180.0),
                (perspective + np.pi) / np.pi * 180.0)

            target = torch.atan(state[10] / state[11])
            position_target = torch.where(
                state[11] > 0,
                torch.where(state[10] > 0, target / np.pi * 180.0,
                            (target + 2 * np.pi) / np.pi * 180.0),
                (target + np.pi) / np.pi * 180.0)

            first_target = torch.remainder(first_perspective - position_target,
                                           360.0)

            average_direction = torch.where(
                torch.sign(180.0 - first_target) + 1.0 > 0,
                -first_target / 180.0, (360.0 - first_target) / 180.0)
            variance_direction = 0.0 * average_direction + coefs[0]  # 0.1

            turning_free = torch.where(
                torch.sign(4 - torch.argmin(state[0:9]).float()) + 1.0 > 0,
                45.0 + 0 * average_direction, -45.0 + 0 * average_direction)
            average_free = turning_free / 180.0
            variance_free = 0.0 * average_free + coefs[0]  # 0.1

            average_steer = torch.where(
                torch.sign(100 * torch.min(state[0:9]) - 15.0) + 1.0 > 0,
                average_direction, average_free)
            variance_steer = torch.where(
                torch.sign(100 * torch.min(state[0:9]) - 15.0) + 1.0 > 0,
                variance_direction, variance_free)

            speed = state[14]
            average_throttle = torch.clamp(2.5 - 50 * (speed / 2 + 0.5), -0.5,
                                           0.5)

            variance_throttle = 0.0 * average_throttle + coefs[1]
            decay = prior_decay * (time_step - 1) + 1

            covariance = torch.cat(
                (variance_steer.unsqueeze_(0),
                 variance_throttle.unsqueeze_(0)), 0) * decay

            average = torch.cat(
                (average_steer.unsqueeze_(0), average_throttle.unsqueeze_(0)),
                0)

        elif args.demo_type == 'uav_wrong':
            if reset_flag:
                self.current_direct_wrong = 'north'
                self.min_distance_x = 50.0
                self.min_distance_y = 50.0

            if args:
                coefs = args.variance * 2
                prior_decay = args.prior_decay
            else:
                coefs = [0.09, 0.09]
                prior_decay = 0.005
            time_step = torch.Tensor([time_step])[0]
            perspective = torch.atan(state[12] / state[13])
            first_perspective = torch.where(
                state[13] > 0,
                torch.where(state[12] > 0, perspective / np.pi * 180.0,
                            (perspective + 2 * np.pi) / np.pi * 180.0),
                (perspective + np.pi) / np.pi * 180.0)

            target = torch.atan(state[10] / state[11])
            position_target = torch.where(
                state[11] > 0,
                torch.where(state[10] > 0, target / np.pi * 180.0,
                            (target + 2 * np.pi) / np.pi * 180.0),
                (target + np.pi) / np.pi * 180.0)

            distance = (state[9] / 2 +
                        0.5) * (torch.sqrt(torch.Tensor([2])[0]) * 3000)

            distance_y = torch.abs(distance * torch.sin(
                2 * position_target / 360 * torch.Tensor([np.pi])[0]))
            distance_x = torch.abs(distance * torch.cos(
                2 * position_target / 360 * torch.Tensor([np.pi])[0]))

            if distance_y > self.min_distance_y:
                self.current_direct_wrong = 'north'
            elif distance_x > self.min_distance_x:
                if self.current_direct_wrong == 'north':
                    self.min_distance_x -= 5
                self.current_direct_wrong = 'east'
            else:
                if self.current_direct_wrong == 'east':
                    self.min_distance_y -= 5
                self.current_direct_wrong = 'north'

            if self.current_direct_wrong == 'north':
                if position_target > 0 and position_target < 180:
                    position_target = 90
                else:
                    position_target = 270

            else:
                if position_target < 90 or position_target > 270:
                    position_target = 0
                else:
                    position_target = 180

            first_target = torch.remainder(first_perspective - position_target,
                                           360.0)

            average_direction = torch.where(
                torch.sign(180.0 - first_target) + 1.0 > 0,
                -first_target / 180.0, (360.0 - first_target) / 180.0)
            variance_direction = 0.0 * average_direction + coefs[0]  # 0.1

            turning_free = torch.where(
                torch.sign(4 - torch.argmin(state[0:9]).float()) + 1.0 > 0,
                45.0 + 0 * average_direction, -45.0 + 0 * average_direction)

            average_free = turning_free / 180.0
            variance_free = 0.0 * average_free + coefs[0]  # 0.1
            average_steer = torch.where(
                torch.sign(100 * torch.min(state[0:9]) - 15.0) + 1.0 > 0,
                average_direction, average_free)
            variance_steer = torch.where(
                torch.sign(100 * torch.min(state[0:9]) - 15.0) + 1.0 > 0,
                variance_direction, variance_free)

            speed = state[14]
            average_throttle = torch.clamp(2.5 - 50 * (speed / 2 + 0.5), -0.5,
                                           0.5)
            variance_throttle = 0.0 * average_throttle + coefs[1]

            decay = prior_decay * (time_step - 1) + 1

            covariance = torch.cat(
                (variance_steer.unsqueeze_(0),
                 variance_throttle.unsqueeze_(0)), 0) * decay
            average = torch.cat(
                (average_steer.unsqueeze_(0), average_throttle.unsqueeze_(0)),
                0)
        else:
            average = self.agent_ddpg.select_action(state)
            time_step = torch.Tensor([time_step])[0]
            decay = args.prior_decay * (time_step - 1) + 1
            covariance = torch.ones(average.shape) * 0.1 * decay

        return average, covariance
Exemple #26
0
def log_uniform_sample(N, size):
    log_N = math.log(N)
    x = torch.Tensor(size).uniform_(0, 1)
    value = torch.exp(x * log_N).long() - 1
    return torch.remainder(value, N)
    def evaluate(self):
        self.model.eval()

        std_loss = Accumulator('std_loss')
        adv_loss = Accumulator('adv_loss')
        std_corr = Accumulator('std_corr')
        adv_corr = Accumulator('adv_corr')
        std_logits = Accumulator('std_logits')
        adv_logits = Accumulator('adv_logits')

        seen_classes = []
        adv_images = Accumulator('adv_images')
        first_batch_images = Accumulator('first_batch_images')

        for batch_idx, (data, target) in enumerate(self.val_loader):
            if self.cuda:
                data, target = data.cuda(non_blocking=True), target.cuda(
                    non_blocking=True)
            with torch.no_grad():
                output = self.model(data)
                std_logits.update(output.cpu())
                loss = F.cross_entropy(output, target, reduction='none').cpu()
                std_loss.update(loss)
                corr = correct(output, target)
                corr = corr.view(corr.size()[0]).cpu()
                std_corr.update(corr)

            rand_target = torch.randint(0,
                                        self.nb_classes - 1,
                                        target.size(),
                                        dtype=target.dtype,
                                        device='cuda')
            rand_target = torch.remainder(target + rand_target + 1,
                                          self.nb_classes)
            data_adv = self.attack(self.model,
                                   data,
                                   rand_target,
                                   avoid_target=False,
                                   scale_eps=False)

            for idx in range(target.size()[0]):
                if target[idx].cpu() not in seen_classes:
                    seen_classes.append(target[idx].cpu())
                    orig_image = norm_to_pil_image(data[idx].detach().cpu())
                    adv_image = norm_to_pil_image(data_adv[idx].detach().cpu())
                    adv_images.update(
                        (orig_image, adv_image, target[idx].cpu()))

            if batch_idx == 0:
                for idx in range(target.size()[0]):
                    orig_image = norm_to_pil_image(data[idx].detach().cpu())
                    adv_image = norm_to_pil_image(data_adv[idx].detach().cpu())
                    first_batch_images.update((orig_image, adv_image))

            with torch.no_grad():
                output_adv = self.model(data_adv)
                adv_logits.update(output_adv.cpu())
                loss = F.cross_entropy(output_adv, target,
                                       reduction='none').cpu()
                adv_loss.update(loss)
                corr = correct(output_adv, target)
                corr = corr.view(corr.size()[0]).cpu()
                adv_corr.update(corr)

            run_output = {
                'std_loss': std_loss.avg,
                'std_acc': std_corr.avg,
                'adv_loss': adv_loss.avg,
                'adv_acc': adv_corr.avg
            }
            print('Batch', batch_idx)
            print(run_output)
            if batch_idx % 20 == 0:
                self.logger.log(run_output, batch_idx)

        summary_dict = {
            'std_acc': std_corr.avg.item(),
            'adv_acc': adv_corr.avg.item()
        }
        self.logger.log_summary(summary_dict)
        for orig_img, adv_img, target in adv_images.vals:
            self.logger.log_image(orig_img, 'orig_{}.png'.format(target))
            self.logger.log_image(adv_img, 'adv_{}.png'.format(target))
        for idx, imgs in enumerate(first_batch_images.vals):
            orig_img, adv_img = imgs
            self.logger.log_image(orig_img, 'init_orig_{}.png'.format(idx))
            self.logger.log_image(adv_img, 'init_adv_{}.png'.format(idx))

        self.logger.end()
        print(std_loss.avg, std_corr.avg, adv_loss.avg, adv_corr.avg)
Exemple #28
0
 def pointwise_ops(self):
     a = torch.randn(4)
     b = torch.randn(4)
     t = torch.tensor([-1, -2, 3], dtype=torch.int8)
     r = torch.tensor([0, 1, 10, 0], dtype=torch.int8)
     t = torch.tensor([-1, -2, 3], dtype=torch.int8)
     s = torch.tensor([4, 0, 1, 0], dtype=torch.int8)
     f = torch.zeros(3)
     g = torch.tensor([-1, 0, 1])
     w = torch.tensor([0.3810, 1.2774, -0.2972, -0.3719, 0.4637])
     return (
         torch.abs(torch.tensor([-1, -2, 3])),
         torch.absolute(torch.tensor([-1, -2, 3])),
         torch.acos(a),
         torch.arccos(a),
         torch.acosh(a.uniform_(1.0, 2.0)),
         torch.add(a, 20),
         torch.add(a, torch.randn(4, 1), alpha=10),
         torch.addcdiv(torch.randn(1, 3),
                       torch.randn(3, 1),
                       torch.randn(1, 3),
                       value=0.1),
         torch.addcmul(torch.randn(1, 3),
                       torch.randn(3, 1),
                       torch.randn(1, 3),
                       value=0.1),
         torch.angle(a),
         torch.asin(a),
         torch.arcsin(a),
         torch.asinh(a),
         torch.arcsinh(a),
         torch.atan(a),
         torch.arctan(a),
         torch.atanh(a.uniform_(-1.0, 1.0)),
         torch.arctanh(a.uniform_(-1.0, 1.0)),
         torch.atan2(a, a),
         torch.bitwise_not(t),
         torch.bitwise_and(t, torch.tensor([1, 0, 3], dtype=torch.int8)),
         torch.bitwise_or(t, torch.tensor([1, 0, 3], dtype=torch.int8)),
         torch.bitwise_xor(t, torch.tensor([1, 0, 3], dtype=torch.int8)),
         torch.ceil(a),
         torch.clamp(a, min=-0.5, max=0.5),
         torch.clamp(a, min=0.5),
         torch.clamp(a, max=0.5),
         torch.clip(a, min=-0.5, max=0.5),
         torch.conj(a),
         torch.copysign(a, 1),
         torch.copysign(a, b),
         torch.cos(a),
         torch.cosh(a),
         torch.deg2rad(
             torch.tensor([[180.0, -180.0], [360.0, -360.0], [90.0,
                                                              -90.0]])),
         torch.div(a, b),
         torch.divide(a, b, rounding_mode="trunc"),
         torch.divide(a, b, rounding_mode="floor"),
         torch.digamma(torch.tensor([1.0, 0.5])),
         torch.erf(torch.tensor([0.0, -1.0, 10.0])),
         torch.erfc(torch.tensor([0.0, -1.0, 10.0])),
         torch.erfinv(torch.tensor([0.0, 0.5, -1.0])),
         torch.exp(torch.tensor([0.0, math.log(2.0)])),
         torch.exp2(torch.tensor([0.0, math.log(2.0), 3.0, 4.0])),
         torch.expm1(torch.tensor([0.0, math.log(2.0)])),
         torch.fake_quantize_per_channel_affine(
             torch.randn(2, 2, 2),
             (torch.randn(2) + 1) * 0.05,
             torch.zeros(2),
             1,
             0,
             255,
         ),
         torch.fake_quantize_per_tensor_affine(a, 0.1, 0, 0, 255),
         torch.float_power(torch.randint(10, (4, )), 2),
         torch.float_power(torch.arange(1, 5), torch.tensor([2, -3, 4,
                                                             -5])),
         torch.floor(a),
         # torch.floor_divide(torch.tensor([4.0, 3.0]), torch.tensor([2.0, 2.0])),
         # torch.floor_divide(torch.tensor([4.0, 3.0]), 1.4),
         torch.fmod(torch.tensor([-3, -2, -1, 1, 2, 3]), 2),
         torch.fmod(torch.tensor([1, 2, 3, 4, 5]), 1.5),
         torch.frac(torch.tensor([1.0, 2.5, -3.2])),
         torch.randn(4, dtype=torch.cfloat).imag,
         torch.ldexp(torch.tensor([1.0]), torch.tensor([1])),
         torch.ldexp(torch.tensor([1.0]), torch.tensor([1, 2, 3, 4])),
         torch.lerp(torch.arange(1.0, 5.0),
                    torch.empty(4).fill_(10), 0.5),
         torch.lerp(
             torch.arange(1.0, 5.0),
             torch.empty(4).fill_(10),
             torch.full_like(torch.arange(1.0, 5.0), 0.5),
         ),
         torch.lgamma(torch.arange(0.5, 2, 0.5)),
         torch.log(torch.arange(5) + 10),
         torch.log10(torch.rand(5)),
         torch.log1p(torch.randn(5)),
         torch.log2(torch.rand(5)),
         torch.logaddexp(torch.tensor([-1.0]), torch.tensor([-1, -2, -3])),
         torch.logaddexp(torch.tensor([-100.0, -200.0, -300.0]),
                         torch.tensor([-1, -2, -3])),
         torch.logaddexp(torch.tensor([1.0, 2000.0, 30000.0]),
                         torch.tensor([-1, -2, -3])),
         torch.logaddexp2(torch.tensor([-1.0]), torch.tensor([-1, -2, -3])),
         torch.logaddexp2(torch.tensor([-100.0, -200.0, -300.0]),
                          torch.tensor([-1, -2, -3])),
         torch.logaddexp2(torch.tensor([1.0, 2000.0, 30000.0]),
                          torch.tensor([-1, -2, -3])),
         torch.logical_and(r, s),
         torch.logical_and(r.double(), s.double()),
         torch.logical_and(r.double(), s),
         torch.logical_and(r, s, out=torch.empty(4, dtype=torch.bool)),
         torch.logical_not(torch.tensor([0, 1, -10], dtype=torch.int8)),
         torch.logical_not(
             torch.tensor([0.0, 1.5, -10.0], dtype=torch.double)),
         torch.logical_not(
             torch.tensor([0.0, 1.0, -10.0], dtype=torch.double),
             out=torch.empty(3, dtype=torch.int16),
         ),
         torch.logical_or(r, s),
         torch.logical_or(r.double(), s.double()),
         torch.logical_or(r.double(), s),
         torch.logical_or(r, s, out=torch.empty(4, dtype=torch.bool)),
         torch.logical_xor(r, s),
         torch.logical_xor(r.double(), s.double()),
         torch.logical_xor(r.double(), s),
         torch.logical_xor(r, s, out=torch.empty(4, dtype=torch.bool)),
         torch.logit(torch.rand(5), eps=1e-6),
         torch.hypot(torch.tensor([4.0]), torch.tensor([3.0, 4.0, 5.0])),
         torch.i0(torch.arange(5, dtype=torch.float32)),
         torch.igamma(a, b),
         torch.igammac(a, b),
         torch.mul(torch.randn(3), 100),
         torch.multiply(torch.randn(4, 1), torch.randn(1, 4)),
         torch.mvlgamma(torch.empty(2, 3).uniform_(1.0, 2.0), 2),
         torch.tensor([float("nan"),
                       float("inf"), -float("inf"), 3.14]),
         torch.nan_to_num(w),
         torch.nan_to_num(w, nan=2.0),
         torch.nan_to_num(w, nan=2.0, posinf=1.0),
         torch.neg(torch.randn(5)),
         # torch.nextafter(torch.tensor([1, 2]), torch.tensor([2, 1])) == torch.tensor([eps + 1, 2 - eps]),
         torch.polygamma(1, torch.tensor([1.0, 0.5])),
         torch.polygamma(2, torch.tensor([1.0, 0.5])),
         torch.polygamma(3, torch.tensor([1.0, 0.5])),
         torch.polygamma(4, torch.tensor([1.0, 0.5])),
         torch.pow(a, 2),
         torch.pow(torch.arange(1.0, 5.0), torch.arange(1.0, 5.0)),
         torch.rad2deg(
             torch.tensor([[3.142, -3.142], [6.283, -6.283],
                           [1.570, -1.570]])),
         torch.randn(4, dtype=torch.cfloat).real,
         torch.reciprocal(a),
         torch.remainder(torch.tensor([-3.0, -2.0]), 2),
         torch.remainder(torch.tensor([1, 2, 3, 4, 5]), 1.5),
         torch.round(a),
         torch.rsqrt(a),
         torch.sigmoid(a),
         torch.sign(torch.tensor([0.7, -1.2, 0.0, 2.3])),
         torch.sgn(a),
         torch.signbit(torch.tensor([0.7, -1.2, 0.0, 2.3])),
         torch.sin(a),
         torch.sinc(a),
         torch.sinh(a),
         torch.sqrt(a),
         torch.square(a),
         torch.sub(torch.tensor((1, 2)), torch.tensor((0, 1)), alpha=2),
         torch.tan(a),
         torch.tanh(a),
         torch.trunc(a),
         torch.xlogy(f, g),
         torch.xlogy(f, g),
         torch.xlogy(f, 4),
         torch.xlogy(2, g),
     )
Exemple #29
0
 def __call__(self, p):
     p = torch.remainder(p, self.s) / self.s  # always possitive
     # p = torch.fmod(p, self.s) / self.s # same sign as input p!
     p = self.pe(p)
     return p
Exemple #30
0
 def test_remainder(self):
     x = torch.randn(2, 3, 4)
     y = torch.randn(2, 1, 4)
     self.assertONNX(lambda x, y: torch.remainder(x, y), (x, y))
    def forward(self,
                input,
                pos,
                key_padding_mask=None,
                attn_mask=None,
                indices=None,
                mems=None,
                incremental=False,
                incremental_cache=None,
                double_precision=False):

        bsz = input.size(1)
        ensemble = self.r_i.size(0)

        if key_padding_mask is not None:
            assert (
                attn_mask is None
            ), "ERROR attn_mask and key_padding_mask should not be both defined!"
            mask = key_padding_mask
            if len(mask.shape) == 3:
                mask = mask.squeeze(0).transpose(0, 1)
        elif attn_mask is not None:
            mask = attn_mask
            if len(mask.shape) == 3:
                mask = mask.squeeze(-1)
        else:
            mask = None

        if self.training:
            if indices is None:
                with torch.no_grad():
                    indices = torch.arange(0,
                                           bsz,
                                           device=input.device,
                                           dtype=torch.long)
                    indices = torch.remainder(indices, ensemble)

            r_i = torch.index_select(self.r_i, 0, indices)
            s_i = torch.index_select(self.s_i, 0, indices)
            # r_o = torch.index_select(self.r_o, 0, indices)
            # s_o = torch.index_select(self.s_o, 0, indices)
            r_p = torch.index_select(self.r_p, 0, indices)
            s_p = torch.index_select(self.s_p, 0, indices)
        else:
            input = input.repeat(1, ensemble, 1)
            pos = pos.repeat(1, ensemble, 1)
            # if key_padding_mask is not None:
            #     mask = mask.repeat(ensemble, 1)
            r_i = self.r_i.repeat(bsz, 1).view(bsz, ensemble, self.r_i.size(-1)). \
                transpose(0, 1).contiguous().view(-1, self.r_i.size(-1))
            s_i = self.s_i.repeat(bsz, 1).view(bsz, ensemble, self.s_i.size(-1)). \
                transpose(0, 1).contiguous().view(-1, self.s_i.size(-1))
            r_p = self.r_p.repeat(bsz, 1).view(bsz, ensemble, self.r_p.size(-1)). \
                transpose(0, 1).contiguous().view(-1, self.r_p.size(-1))
            s_p = self.s_p.repeat(bsz, 1).view(bsz, ensemble, self.s_p.size(-1)). \
                transpose(0, 1).contiguous().view(-1, self.s_p.size(-1))
            # r_o = self.r_o.repeat(bsz, 1).view(bsz, ensemble, self.r_o.size(-1)). \
            #     transpose(0, 1).contiguous().view(-1, self.r_o.size(-1))
            # s_o = self.s_o.repeat(bsz, 1).view(bsz, ensemble, self.s_o.size(-1)). \
            #     transpose(0, 1).contiguous().view(-1, self.r_o.size(-1))

        is_training = self.training

        outputs, coverage = self.attn_func(
            input, pos, attn_mask is not None, is_training, self.num_heads,
            ensemble, self.in_proj_weight, self.out_proj_weight,
            self.pos_proj_weight, self.in_proj_bias, self.out_proj_bias,
            self.pos_proj_bias, r_i, s_i, r_p, s_p, self.r_w_bias,
            self.r_r_bias, mask, self.dropout, incremental, incremental_cache,
            double_precision)
        # last False is double precision

        return outputs, coverage
Exemple #32
0
 def run_remainder(x, y):
     c = torch.remainder(torch.add(x, y), x)
     return c
Exemple #33
0
    def compute_projection(self, depth, camera_to_world, world_to_grid):
        # compute projection by voxels -> image
        #print 'camera_to_world', camera_to_world
        #print 'intrinsic', self.intrinsic
        #print(world_to_grid)
        world_to_camera = torch.inverse(camera_to_world)
        grid_to_world = torch.inverse(world_to_grid)
        voxel_bounds_min, voxel_bounds_max = self.compute_frustum_bounds(world_to_grid, camera_to_world)
        voxel_bounds_min = np.maximum(voxel_bounds_min, 0).cuda().float() if depth.is_cuda else np.maximum(voxel_bounds_min, 0).cpu().float()
        voxel_bounds_max = np.minimum(voxel_bounds_max, self.volume_dims).cuda().float() if depth.is_cuda else np.minimum(voxel_bounds_max, self.volume_dims).cpu().float()

        # coordinates within frustum bounds
        # TODO python opt for this part instead of lua/torch opt?
        lin_ind_volume = torch.arange(0, self.volume_dims[0]*self.volume_dims[1]*self.volume_dims[2], out=torch.LongTensor())
        lin_ind_volume = lin_ind_volume.cuda() if depth.is_cuda else lin_ind_volume.cpu()
        coords = camera_to_world.new(4, lin_ind_volume.size(0))
        coords[2] = lin_ind_volume / (self.volume_dims[0]*self.volume_dims[1])
        tmp = lin_ind_volume - (coords[2]*self.volume_dims[0]*self.volume_dims[1]).long()
        coords[1] = tmp / self.volume_dims[0]
        coords[0] = torch.remainder(tmp, self.volume_dims[0])
        coords[3].fill_(1)
        mask_frustum_bounds = torch.ge(coords[0], voxel_bounds_min[0]) * torch.ge(coords[1], voxel_bounds_min[1]) * torch.ge(coords[2], voxel_bounds_min[2])
        mask_frustum_bounds = mask_frustum_bounds * torch.lt(coords[0], voxel_bounds_max[0]) * torch.lt(coords[1], voxel_bounds_max[1]) * torch.lt(coords[2], voxel_bounds_max[2])
        if not mask_frustum_bounds.any():
            print('error: nothing in frustum bounds')
            return None
        lin_ind_volume = lin_ind_volume[mask_frustum_bounds]
        coords = coords.resize_(4, lin_ind_volume.size(0))
        coords[2] = lin_ind_volume / (self.volume_dims[0]*self.volume_dims[1])
        tmp = lin_ind_volume - (coords[2]*self.volume_dims[0]*self.volume_dims[1]).long()
        coords[1] = tmp / self.volume_dims[0]
        coords[0] = torch.remainder(tmp, self.volume_dims[0])
        coords[3].fill_(1)

        # transform to current frame
        p = torch.mm(world_to_camera, torch.mm(grid_to_world, coords))

        # project into image
        p[0] = (p[0] * self.intrinsic[0][0]) / p[2] + self.intrinsic[0][2]
        p[1] = (p[1] * self.intrinsic[1][1]) / p[2] + self.intrinsic[1][2]
        pi = torch.round(p).long()

        valid_ind_mask = torch.ge(pi[0], 0) * torch.ge(pi[1], 0) * torch.lt(pi[0], self.image_dims[0]) * torch.lt(pi[1], self.image_dims[1])
        if not valid_ind_mask.any():
            print('error: no valid image indices')
            return None

        valid_image_ind_x = pi[0][valid_ind_mask]
        valid_image_ind_y = pi[1][valid_ind_mask]
        valid_image_ind_lin = valid_image_ind_y * self.image_dims[0] + valid_image_ind_x
        depth_vals = torch.index_select(depth.view(-1), 0, valid_image_ind_lin)
        depth_mask = depth_vals.ge(self.depth_min) * depth_vals.le(self.depth_max) * torch.abs(depth_vals - p[2][valid_ind_mask]).le(self.voxel_size)

        if not depth_mask.any():
            print('error: no valid depths')
            return None

        lin_ind_update = lin_ind_volume[valid_ind_mask]
        lin_ind_update = lin_ind_update[depth_mask]
        lin_indices_3d = lin_ind_update.new(self.volume_dims[0]*self.volume_dims[1]*self.volume_dims[2] + 1) #needs to be same size for all in batch... (first element has size)
        lin_indices_2d = lin_ind_update.new(self.volume_dims[0]*self.volume_dims[1]*self.volume_dims[2] + 1) #needs to be same size for all in batch... (first element has size)
        lin_indices_3d[0] = lin_ind_update.shape[0]
        lin_indices_2d[0] = lin_ind_update.shape[0]
        lin_indices_3d[1:1+lin_indices_3d[0]] = lin_ind_update
        lin_indices_2d[1:1+lin_indices_2d[0]] = torch.index_select(valid_image_ind_lin, 0, torch.nonzero(depth_mask)[:,0])
        num_ind = lin_indices_3d[0]
        #print '[proj] #ind = ', lin_indices_3d[0]
        #print '2d', torch.min(lin_indices_2d[1:1+num_ind]), torch.max(lin_indices_2d[1:1+num_ind])
        #print '3d', torch.min(lin_indices_3d[1:1+num_ind]), torch.max(lin_indices_3d[1:1+num_ind])
        return lin_indices_3d, lin_indices_2d