Esempio n. 1
0
 def forward(self, x):
     bs = x.shape[0]
     x = torch.repeat_interleave(x, repeats=self.eot_samples, dim=0)
     y = self.model(self.noise(x))
     y = y.view(bs, self.eot_samples, self.num_classes)
     return torch.mean(y, dim=1)
path_save_synthesis = f'{path_parent}_grow={GROW_ON_K_ITER}_bg={BACKGROUND_INTENSITY:.02f}_step={STEP_SIZE}_scale_mask={SCALE_MASK}_seed_value={SEED_VALUE}/'
Path(path_save_synthesis).mkdir(parents=True, exist_ok=True)#OMM

#%%
for idx_lesion, (target, coord, mask, this_seed) in enumerate(zip(targets, coords, masks, seeds)):
  # if idx_lesion==3:break #OMM
  # prepare seed
  seed, seed_tensor, seed_pool = prepare_seed(target, this_seed, device, num_channels = num_channels, pool_size = 1024)

  # initialize model
  model = CeA_00(device = device, grow_on_k_iter=GROW_ON_K_ITER, background_intensity=BACKGROUND_INTENSITY, step_size=STEP_SIZE, scale_mask=SCALE_MASK)
  optimizer = torch.optim.Adam(model.parameters(), lr = 1e-3)
  scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[1500,2500], gamma=0.1) ## keep 1e-4 longer
  model_str = correct_label_in_plot(model)
  target = torch.tensor(target.transpose(-1,0,1)).unsqueeze(0).to(device)
  target_batch = torch.repeat_interleave(target, repeats = sample_size, dim = 0)

  losses = []
  alive_masks = []
  others=[]
  # train automata
  
  start = time()
  
  inner_iter_aux = 0
  inner_iter = 100
  inner_iters=[]
  for i in range(epochs):
    inner_iter, inner_iter_aux = epochs_in_inner_loop(i, inner_iter_aux, inner_iter)
    inner_iters.append(inner_iter)
    def recognize_batch_beam_autoreg_LM_multi_hyp(self, encoder_outputs, beam,
                                                  Am_weight, gamma, LM_model,
                                                  len_pen, args):
        """Beam search, decode one utterence now. 
        Args: encoder_outputs: T x H, 
        char_list: list of character, args: args.beam, 
        Returns: nbest_hyps: """

        enc_out_len = encoder_outputs.size(1)
        #----------------------------
        maxlen = int(enc_out_len * len_pen)

        ### This works but can be increased but it takes memory, can be increased Works ---Memory?
        hyps = beam
        #----------------------------
        print("beam,hyps,len_pen,maxlen,enc_out_len,Am_weight", beam, hyps,
              len_pen, maxlen, enc_out_len, Am_weight)

        batch_size = encoder_outputs.size(0)
        ys = torch.ones(batch_size * hyps,
                        1).fill_(self.sos_id).type_as(encoder_outputs).long()
        score_1 = torch.zeros_like(ys).float()
        rep_encoder_outputs = torch.repeat_interleave(encoder_outputs, hyps, 0)

        store_ended_hyps = []
        store_ended_LLR = []
        #============================
        scores_list = []
        start_collecting = False
        for i in range(maxlen):

            #----------------------------------------------------
            ## if loop to use or not an LM (or) skip the LM for the first step
            ##

            if Am_weight == 1 or (i < 1):
                #print("not using a LM")
                COMB_AM_MT_local_scores, scores_list, present_label, dec_output_Bneck = self.prediction_from_trained_model(
                    ys, rep_encoder_outputs, scores_list)
            else:
                AM_local_scores, scores_list, present_label, dec_output_Bneck = self.prediction_from_trained_model(
                    ys, rep_encoder_outputs, scores_list)
                LM_local_scores, scores_list, present_label, scores = LM_model.decoder.prediction_from_trained_model(
                    ys, encoder_outputs, scores_list)

                ####0.5 to 1.5
                COMB_AM_MT_local_scores = Am_weight * AM_local_scores + (
                    1 - Am_weight) * LM_local_scores
            #-------------------------------------------------------------------------------------------------------------------------

            ys, score_1 = self.prediction_from_trained_model_beam_Search(
                i, ys, score_1, COMB_AM_MT_local_scores, beam, hyps, gamma,
                batch_size)
            ##---------------------------------------------------

            score_1, store_ended_hyps, store_ended_LLR = self.get_multiple_hypothesis(
                store_ended_hyps, store_ended_LLR, ys, score_1, i, maxlen)
            #----------------------------------------------------

            #### removing blank predictions :::::::> ####  prdicting eos at the first token
            ##.pop(index) in python return a value   --->  careful

            remove_blank_predictions_index = [
                index for index, element in enumerate(store_ended_hyps)
                if (len(element) == 2 and element[0] == self.sos_id
                    and element[1] == self.eos_id) == True
            ]
            [
                store_ended_hyps.pop(element)
                for element in remove_blank_predictions_index
            ]
            [
                store_ended_LLR.pop(element)
                for element in remove_blank_predictions_index
            ]

            #----------------------------------------------------
            if len(store_ended_hyps) >= hyps:
                break
        #----------------------------------------------------
        ys = nn.utils.rnn.pad_sequence(store_ended_hyps,
                                       batch_first=True,
                                       padding_value=self.eos_id)
        score_1 = nn.utils.rnn.pad_sequence(store_ended_LLR,
                                            batch_first=True,
                                            padding_value=0)

        #producing the correct_order
        #----------------------------------------------------
        XS = [torch.sum(i) for i in store_ended_LLR]
        XS1 = sorted(((e, i) for i, e in enumerate(XS)), reverse=True)
        correct_sorted_order = [i[1] for i in XS1]
        #----------------------------------------------------
        ys = ys[correct_sorted_order]
        score_1 = score_1[correct_sorted_order]
        ##------------------------------
        print(ys, torch.sum(score_1, dim=1))
        #breakpoint()
        #--------------------------------
        return ys, score_1
Esempio n. 4
0
def expand_mask(mask, args):
    mask_block_rows = args.mask_block_rows
    mask_block_cols = args.mask_block_cols
    mask = torch.repeat_interleave(mask, mask_block_rows, dim=0)
    mask = torch.repeat_interleave(mask, mask_block_cols, dim=1)
    return mask
Esempio n. 5
0
    def _evaluate_image(self, idx: int, class_id: int, area_range: Tuple[int,
                                                                         int],
                        max_det: int, ious: dict) -> Optional[dict]:
        """Perform evaluation for single class and image.

        Args:
            idx:
                Image Id, equivalent to the index of supplied samples.
            class_id:
                Class Id of the supplied ground truth and detection labels.
            area_range:
                List of lower and upper bounding box area threshold.
            max_det:
                Maximum number of evaluated detection bounding boxes.
            ious:
                IoU results for image and class.
        """
        gt = self.groundtruth_boxes[idx]
        det = self.detection_boxes[idx]
        gt_label_mask = self.groundtruth_labels[idx] == class_id
        det_label_mask = self.detection_labels[idx] == class_id

        # No Gt and No predictions --> ignore image
        if len(gt_label_mask) == 0 and len(det_label_mask) == 0:
            return None

        nb_iou_thrs = len(self.iou_thresholds)

        # Some GT but no predictions
        if len(gt_label_mask) > 0 and len(det_label_mask) == 0:
            return self.__evaluate_image_gt_no_preds(gt, gt_label_mask,
                                                     area_range, nb_iou_thrs)

        # Some predictions but no GT
        if len(gt_label_mask) == 0 and len(det_label_mask) >= 0:
            return self.__evaluate_image_preds_no_gt(det, idx, det_label_mask,
                                                     max_det, area_range,
                                                     nb_iou_thrs)

        gt = gt[gt_label_mask]
        det = det[det_label_mask]
        if gt.numel() == 0 and det.numel() == 0:
            return None

        areas = box_area(gt)
        ignore_area = (areas < area_range[0]) | (areas > area_range[1])

        # sort dt highest score first, sort gt ignore last
        ignore_area_sorted, gtind = torch.sort(ignore_area.to(torch.uint8))
        # Convert to uint8 temporarily and back to bool, because "Sort currently does not support bool dtype on CUDA"
        ignore_area_sorted = ignore_area_sorted.to(torch.bool)
        gt = gt[gtind]
        scores = self.detection_scores[idx]
        scores_filtered = scores[det_label_mask]
        scores_sorted, dtind = torch.sort(scores_filtered, descending=True)
        det = det[dtind]
        if len(det) > max_det:
            det = det[:max_det]
        # load computed ious
        ious = ious[idx, class_id][:, gtind] if len(
            ious[idx, class_id]) > 0 else ious[idx, class_id]

        nb_iou_thrs = len(self.iou_thresholds)
        nb_gt = len(gt)
        nb_det = len(det)
        gt_matches = torch.zeros((nb_iou_thrs, nb_gt),
                                 dtype=torch.bool,
                                 device=gt.device)
        det_matches = torch.zeros((nb_iou_thrs, nb_det),
                                  dtype=torch.bool,
                                  device=gt.device)
        gt_ignore = ignore_area_sorted
        det_ignore = torch.zeros((nb_iou_thrs, nb_det),
                                 dtype=torch.bool,
                                 device=gt.device)

        if torch.numel(ious) > 0:
            for idx_iou, t in enumerate(self.iou_thresholds):
                for idx_det, _ in enumerate(det):
                    m = MeanAveragePrecision._find_best_gt_match(
                        t, gt_matches, idx_iou, gt_ignore, ious, idx_det)
                    if m == -1:
                        continue
                    det_ignore[idx_iou, idx_det] = gt_ignore[m]
                    det_matches[idx_iou, idx_det] = 1
                    gt_matches[idx_iou, m] = 1

        # set unmatched detections outside of area range to ignore
        det_areas = box_area(det)
        det_ignore_area = (det_areas < area_range[0]) | (det_areas >
                                                         area_range[1])
        ar = det_ignore_area.reshape((1, nb_det))
        det_ignore = torch.logical_or(
            det_ignore,
            torch.logical_and(det_matches == 0,
                              torch.repeat_interleave(ar, nb_iou_thrs, 0)))
        return {
            "dtMatches": det_matches.to(self.device),
            "gtMatches": gt_matches.to(self.device),
            "dtScores": scores_sorted.to(self.device),
            "gtIgnore": gt_ignore.to(self.device),
            "dtIgnore": det_ignore.to(self.device),
        }
    def losses(self, predictions, proposals, current_step=0):
        """
        Args:
            predictions: return values of :meth:`forward()`.
            proposals (list[Instances]): proposals that match the features
                that were used to compute predictions.
            current_step: current optimizer step. Used for losses with an annealing component.
        """
        global device

        pred_class_logits, pred_proposal_deltas, pred_class_logits_var, pred_proposal_covs = predictions

        if len(proposals):
            box_type = type(proposals[0].proposal_boxes)
            # cat(..., dim=0) concatenates over all images in the batch
            proposals_boxes = box_type.cat(
                [p.proposal_boxes for p in proposals])
            assert (
                not proposals_boxes.tensor.requires_grad), "Proposals should not require gradients!"

            # The following fields should exist only when training.
            if proposals[0].has("gt_boxes"):
                gt_boxes = box_type.cat([p.gt_boxes for p in proposals])
                assert proposals[0].has("gt_classes")
                gt_classes = cat([p.gt_classes for p in proposals], dim=0)
        else:
            proposals_boxes = Boxes(
                torch.zeros(
                    0, 4, device=pred_proposal_deltas.device))

        no_instances = len(proposals) == 0  # no instances found

        # Compute Classification Loss
        if no_instances:
            # TODO 0.0 * pred.sum() is enough since PT1.6
            loss_cls = 0.0 * F.cross_entropy(
                pred_class_logits,
                torch.zeros(
                    0,
                    dtype=torch.long,
                    device=pred_class_logits.device),
                reduction="sum",)
        else:
            if self.compute_cls_var:
                # Compute classification variance according to:
                # "What Uncertainties Do We Need in Bayesian Deep Learning for Computer Vision?", NIPS 2017
                if self.cls_var_loss == 'loss_attenuation':
                    num_samples = self.cls_var_num_samples

                    # Compute standard deviation
                    pred_class_logits_var = torch.sqrt(
                        torch.exp(pred_class_logits_var))

                    # Produce normal samples using logits as the mean and the standard deviation computed above
                    # Scales with GPU memory. 12 GB ---> 3 Samples per anchor for
                    # COCO dataset.
                    univariate_normal_dists = distributions.normal.Normal(
                        pred_class_logits, scale=pred_class_logits_var)

                    pred_class_stochastic_logits = univariate_normal_dists.rsample(
                        (num_samples,))
                    pred_class_stochastic_logits = pred_class_stochastic_logits.view(
                        (pred_class_stochastic_logits.shape[1] * num_samples, pred_class_stochastic_logits.shape[2], -1))
                    pred_class_logits = pred_class_stochastic_logits.squeeze(
                        2)

                    # Produce copies of the target classes to match the number of
                    # stochastic samples.
                    gt_classes_target = torch.unsqueeze(gt_classes, 0)
                    gt_classes_target = torch.repeat_interleave(
                        gt_classes_target, num_samples, dim=0).view(
                        (gt_classes_target.shape[1] * num_samples, -1))
                    gt_classes_target = gt_classes_target.squeeze(1)

                    loss_cls = F.cross_entropy(
                        pred_class_logits, gt_classes_target, reduction="mean")

            elif self.cls_var_loss == 'evidential':
                # ToDo: Currently does not provide any reasonable mAP Results
                # (15% mAP)

                # Assume dirichlet parameters are output.
                alphas = get_dir_alphas(pred_class_logits)

                # Get sum of all alphas
                dirichlet_s = alphas.sum(1).unsqueeze(1)

                # Generate one hot vectors for ground truth
                one_hot_vectors = torch.nn.functional.one_hot(
                    gt_classes, alphas.shape[1])

                # Compute loss. This loss attempts to put all evidence on the
                # correct location.
                per_instance_loss = (
                    one_hot_vectors * (torch.digamma(dirichlet_s) - torch.digamma(alphas)))

                # Compute KL divergence regularizer loss
                estimated_dirichlet = torch.distributions.dirichlet.Dirichlet(
                    (alphas - 1.0) * (1.0 - one_hot_vectors) + 1.0)
                uniform_dirichlet = torch.distributions.dirichlet.Dirichlet(
                    torch.ones_like(one_hot_vectors).type(torch.FloatTensor).to(device))
                kl_regularization_loss = torch.distributions.kl.kl_divergence(
                    estimated_dirichlet, uniform_dirichlet)

                # Compute final loss
                annealing_multiplier = torch.min(
                    torch.as_tensor(
                        current_step /
                        self.annealing_step).to(device),
                    torch.as_tensor(1.0).to(device))

                per_proposal_loss = per_instance_loss.sum(
                    1) + annealing_multiplier * kl_regularization_loss

                # Compute evidence auxiliary loss
                evidence_maximization_loss = smooth_l1_loss(
                    dirichlet_s,
                    100.0 *
                    torch.ones_like(dirichlet_s).to(device),
                    beta=self.smooth_l1_beta,
                    reduction='mean')

                evidence_maximization_loss *= annealing_multiplier

                # Compute final loss
                foreground_loss = per_proposal_loss[(gt_classes >= 0) & (
                    gt_classes < pred_class_logits.shape[1] - 1)]
                background_loss = per_proposal_loss[gt_classes ==
                                                    pred_class_logits.shape[1] - 1]

                loss_cls = (torch.mean(foreground_loss) + torch.mean(background_loss)
                            ) / 2 + 0.01 * evidence_maximization_loss
            else:
                loss_cls = F.cross_entropy(
                    pred_class_logits, gt_classes, reduction="mean")

        # Compute regression loss:
        if no_instances:
            # TODO 0.0 * pred.sum() is enough since PT1.6
            loss_box_reg = 0.0 * smooth_l1_loss(
                pred_proposal_deltas,
                torch.zeros_like(pred_proposal_deltas),
                0.0,
                reduction="sum",
            )
        else:
            gt_proposal_deltas = self.box2box_transform.get_deltas(
                proposals_boxes.tensor, gt_boxes.tensor
            )
            box_dim = gt_proposal_deltas.size(1)  # 4 or 5
            cls_agnostic_bbox_reg = pred_proposal_deltas.size(1) == box_dim
            device = pred_proposal_deltas.device

            bg_class_ind = pred_class_logits.shape[1] - 1

            # Box delta loss is only computed between the prediction for the gt class k
            # (if 0 <= k < bg_class_ind) and the target; there is no loss defined on predictions
            # for non-gt classes and background.
            # Empty fg_inds produces a valid loss of zero as long as the size_average
            # arg to smooth_l1_loss is False (otherwise it uses torch.mean internally
            # and would produce a nan loss).
            fg_inds = torch.nonzero(
                (gt_classes >= 0) & (gt_classes < bg_class_ind), as_tuple=True
            )[0]
            if cls_agnostic_bbox_reg:
                # pred_proposal_deltas only corresponds to foreground class for
                # agnostic
                gt_class_cols = torch.arange(box_dim, device=device)
            else:
                fg_gt_classes = gt_classes[fg_inds]
                # pred_proposal_deltas for class k are located in columns [b * k : b * k + b],
                # where b is the dimension of box representation (4 or 5)
                # Note that compared to Detectron1,
                # we do not perform bounding box regression for background
                # classes.
                gt_class_cols = box_dim * \
                    fg_gt_classes[:, None] + torch.arange(box_dim, device=device)
                gt_covar_class_cols = self.bbox_cov_dims * \
                    fg_gt_classes[:, None] + torch.arange(self.bbox_cov_dims, device=device)

            loss_reg_normalizer = gt_classes.numel()

            pred_proposal_deltas = pred_proposal_deltas[fg_inds[:,
                                                                None], gt_class_cols]
            gt_proposals_delta = gt_proposal_deltas[fg_inds]

            if self.compute_bbox_cov:
                pred_proposal_covs = pred_proposal_covs[fg_inds[:,
                                                                None], gt_covar_class_cols]
                pred_proposal_covs = clamp_log_variance(pred_proposal_covs)

                if self.bbox_cov_loss == 'negative_log_likelihood':
                    if self.bbox_cov_type == 'diagonal':
                        # Ger foreground proposals.
                        _proposals_boxes = proposals_boxes.tensor[fg_inds]

                        # Compute regression negative log likelihood loss according to:
                        # "What Uncertainties Do We Need in Bayesian Deep Learning for Computer Vision?", NIPS 2017
                        loss_box_reg = 0.5 * torch.exp(-pred_proposal_covs) * smooth_l1_loss(
                            pred_proposal_deltas, gt_proposals_delta, beta=self.smooth_l1_beta)
                        loss_covariance_regularize = 0.5 * pred_proposal_covs
                        loss_box_reg += loss_covariance_regularize

                        loss_box_reg = torch.sum(
                            loss_box_reg) / loss_reg_normalizer
                    else:
                        # Multivariate Gaussian Negative Log Likelihood loss using pytorch
                        # distributions.multivariate_normal.log_prob()
                        forecaster_cholesky = covariance_output_to_cholesky(
                            pred_proposal_covs)

                        multivariate_normal_dists = distributions.multivariate_normal.MultivariateNormal(
                            pred_proposal_deltas, scale_tril=forecaster_cholesky)

                        loss_box_reg = - \
                            multivariate_normal_dists.log_prob(gt_proposals_delta)
                        loss_box_reg = torch.sum(
                            loss_box_reg) / loss_reg_normalizer

                elif self.bbox_cov_loss == 'second_moment_matching':
                    # Compute regression covariance using second moment
                    # matching.
                    loss_box_reg = smooth_l1_loss(pred_proposal_deltas,
                                                  gt_proposals_delta,
                                                  self.smooth_l1_beta)
                    errors = (pred_proposal_deltas - gt_proposals_delta)
                    if self.bbox_cov_type == 'diagonal':
                        # Handel diagonal case
                        second_moment_matching_term = smooth_l1_loss(
                            torch.exp(pred_proposal_covs), errors ** 2, beta=self.smooth_l1_beta)
                        loss_box_reg += second_moment_matching_term
                        loss_box_reg = torch.sum(
                            loss_box_reg) / loss_reg_normalizer
                    else:
                        # Handel full covariance case
                        errors = torch.unsqueeze(errors, 2)
                        gt_error_covar = torch.matmul(
                            errors, torch.transpose(errors, 2, 1))

                        # This is the cholesky decomposition of the covariance matrix.
                        # We reconstruct it from 10 estimated parameters as a
                        # lower triangular matrix.
                        forecaster_cholesky = covariance_output_to_cholesky(
                            pred_proposal_covs)

                        predicted_covar = torch.matmul(
                            forecaster_cholesky, torch.transpose(
                                forecaster_cholesky, 2, 1))

                        second_moment_matching_term = smooth_l1_loss(
                            predicted_covar, gt_error_covar, beta=self.smooth_l1_beta, reduction='sum')
                        loss_box_reg = (
                            torch.sum(loss_box_reg) + second_moment_matching_term) / loss_reg_normalizer

                elif self.bbox_cov_loss == 'energy_loss':
                    forecaster_cholesky = covariance_output_to_cholesky(
                        pred_proposal_covs)

                    # Define per-anchor Distributions
                    multivariate_normal_dists = distributions.multivariate_normal.MultivariateNormal(
                        pred_proposal_deltas, scale_tril=forecaster_cholesky)
                    # Define Monte-Carlo Samples
                    distributions_samples = multivariate_normal_dists.rsample(
                        (self.bbox_cov_num_samples + 1,))

                    distributions_samples_1 = distributions_samples[0:self.bbox_cov_num_samples, :, :]
                    distributions_samples_2 = distributions_samples[1:
                                                                    self.bbox_cov_num_samples + 1, :, :]

                    # Compute energy score
                    loss_covariance_regularize = - smooth_l1_loss(
                        distributions_samples_1,
                        distributions_samples_2,
                        beta=self.smooth_l1_beta,
                        reduction="sum") / self.bbox_cov_num_samples   # Second term

                    gt_proposals_delta_samples = torch.repeat_interleave(
                        gt_proposals_delta.unsqueeze(0), self.bbox_cov_num_samples, dim=0)

                    loss_first_moment_match = 2.0 * smooth_l1_loss(
                        distributions_samples_1,
                        gt_proposals_delta_samples,
                        beta=self.smooth_l1_beta,
                        reduction="sum") / self.bbox_cov_num_samples  # First term

                    # Final Loss
                    loss_box_reg = (
                        loss_first_moment_match + loss_covariance_regularize) / loss_reg_normalizer
                else:
                    raise ValueError(
                        'Invalid regression loss name {}.'.format(
                            self.bbox_cov_loss))

                # Perform loss annealing. Not really essential in Generalized-RCNN case, but good practice for more
                # elaborate regression variance losses.
                standard_regression_loss = smooth_l1_loss(pred_proposal_deltas,
                                                          gt_proposals_delta,
                                                          self.smooth_l1_beta,
                                                          reduction="sum",)
                standard_regression_loss = standard_regression_loss / loss_reg_normalizer

                probabilistic_loss_weight = get_probabilistic_loss_weight(
                    current_step, self.annealing_step)

                loss_box_reg = (1.0 - probabilistic_loss_weight) * \
                    standard_regression_loss + probabilistic_loss_weight * loss_box_reg
            else:
                loss_box_reg = smooth_l1_loss(pred_proposal_deltas,
                                              gt_proposals_delta,
                                              self.smooth_l1_beta,
                                              reduction="sum",)
                loss_box_reg = loss_box_reg / loss_reg_normalizer

        return {"loss_cls": loss_cls, "loss_box_reg": loss_box_reg}
Esempio n. 7
0
    def forward(self, part, part_seg):

        # part_seg -> one hot coding
        part_seg = part_seg[:, :, 0]
        part_seg = torch.nn.functional.one_hot(part_seg.to(torch.int64),
                                               12).transpose(1, 2)

        sp_feat, sp_cabins, sp_idx, trans = self.spcoder(
            torch.cat((part_seg.float(), part), 1))
        loss_trans = feature_transform_regularizer(trans)
        pn_feat = self.pncoder(torch.cat((part_seg.float(), part), 1).float())
        pn_feat = pn_feat.unsqueeze(2).expand(part.size(0), self.dim_pn,
                                              self.num_points).contiguous()
        part_regions = []
        sp_feat_conv = self.ptmapper(sp_feat)
        out_sp_local = []
        out_seg = []
        out_sp_global = []
        out_pcn = []
        for i in range(0, self.n_primitives):
            """
            part_regions.append(
                torch.gather(part, dim=2, index=sp_idx[:, :, i, :].long()))
            """
            # stn3d
            part_regions.append(sp_feat[:, -3:, i, :])

            rand_grid = Variable(
                torch.cuda.FloatTensor(part.size(0), 2, self.num_points // 16))
            rand_grid.data.uniform_(0, 1)
            # here self.num_points // self.n_primitives = 8*4

            mesh_grid = torch.meshgrid([
                torch.linspace(0.0, 1.0, 64),
                torch.linspace(0.0, 1.0, self.num_points // 64)
            ])
            mesh_grid = torch.cat((torch.reshape(
                mesh_grid[0],
                (self.num_points // self.n_primitives * self.n_primitives, 1)),
                                   torch.reshape(
                                       mesh_grid[1],
                                       (self.num_points // self.n_primitives *
                                        self.n_primitives, 1))),
                                  dim=1)
            mesh_grid = torch.transpose(mesh_grid, 0, 1).unsqueeze(0).repeat(
                sp_feat_conv.shape[0], 1, 1)
            mesh_grid = torch.cat(
                (mesh_grid, torch.zeros(part.size(0), 1, mesh_grid.shape[2])),
                dim=1)
            # y = SoftPool(sp_feat_conv[:, :, i, :])[0][:,:,i,:]
            y = sp_feat_conv[:, :, i, :]
            out_seg.append(y)
            # y = torch.cat((y, pn_feat), 1).contiguous()
            out_sp_local.append(self.decoder1[i](y))
            # pn_feat = torch.max(sp_feat[:,:,:,0], dim=1)[0].unsqueeze(2).expand(part.size(0),sp_feat_conv.size(1), mesh_grid.size(2)).contiguous()

            y = torch.cat(
                (rand_grid.repeat(1, 1, 16),
                 torch.repeat_interleave(sp_cabins[:, :, i, :],
                                         repeats=self.num_points // 16,
                                         dim=2), pn_feat), 1).contiguous()
            out_sp_global.append(self.decoder2[i](y))
        # y = torch.cat((mesh_grid.cuda(), pn_feat), 1).contiguous()
        y = torch.cat((mesh_grid.cuda(), pn_feat), 1).contiguous()
        out_pcn = self.decoder3(y)

        # part_regions = torch.cat(part_regions, 2).contiguous()
        out1 = []
        out3 = []
        for i in range(np.size(part_regions)):
            part_regions[i] = part_regions[i].transpose(1, 2).contiguous()
            out1.append(out_sp_local[i].transpose(1, 2).contiguous())
            out_seg[i] = out_seg[i].transpose(1, 2).contiguous()
            sm = nn.Softmax(dim=2)
            out_seg[i] = sm(out_seg[i])
            out3.append(out_sp_global[i].transpose(1, 2).contiguous())

        out4 = out_pcn.transpose(1, 2).contiguous()
        # out_sp_local = torch.cat(out_sp_local, 2).contiguous()
        # out_sp_global = torch.cat(out_sp_global, 2).contiguous()
        # out_pcn = torch.cat(out_pcn, 2).contiguous()
        # out_seg = torch.cat(out_seg, 2).contiguous()

        dist, _, mean_mst_dis = self.expansion(
            out1[0], self.num_points // self.n_primitives, 1.5)
        loss_mst = torch.mean(dist)

        id0 = torch.zeros(out_sp_local[0].shape[0], 1,
                          out_sp_local[0].shape[2]).cuda().contiguous()
        out_sp_local[0] = torch.cat((out_sp_local[0], id0), 1)
        id1 = torch.ones(part.shape[0], 1, part.shape[2]).cuda().contiguous()
        part = torch.cat((part, id1), 1)
        """
        id2 = torch.zeros(out_sp_global.shape[0], 1,
                          out_sp_global.shape[2]).cuda().contiguous()
        out_sp_global = torch.cat((out_sp_global, id2), 1)
        id3 = torch.zeros(out_pcn.shape[0], 1,
                          out_pcn.shape[2]).cuda().contiguous()
        out_pcn = torch.cat((out_pcn, id3), 1)
        """
        fusion = torch.cat((out_sp_local[0], part), 2)
        # fusion = torch.cat((out_sp_global, out_pcn, part), 2)

        resampled_idx = MDS_module.minimum_density_sample(
            fusion[:, 0:3, :].transpose(1, 2).contiguous(), out1[0].shape[1],
            mean_mst_dis)
        fusion = MDS_module.gather_operation(fusion, resampled_idx)
        delta = self.res(fusion)
        fusion = fusion[:, 0:3, :]
        out2 = (fusion + delta).transpose(2, 1).contiguous()
        return out1, out2, out3, out4, loss_mst, out_seg, part_regions, loss_trans
Esempio n. 8
0
    def forward(self, S0, V0, rate, BS_vol, indices, z, z1, MC_samples):
        S_old = torch.repeat_interleave(S0, MC_samples, dim=0)
        # Uncomment when using BS Control Variate:
        # BS_old = torch.repeat_interleave(S0, MC_samples, dim=0)
        V_old = torch.repeat_interleave(V0, MC_samples, dim=0)
        K_call = self.strikes_call
        # K_put = self.strikes_put
        zeros = torch.repeat_interleave(torch.zeros(1, 1), MC_samples, dim=0)
        average_SS = torch.Tensor()
        average_SS1 = torch.Tensor()
        average_SS_OTM = torch.Tensor()
        average_SS1_ITM = torch.Tensor()
        # use fixed step size
        h = self.timegrid[1] - self.timegrid[0]
        n_steps = len(self.timegrid) - 1
        # set maturity counter
        countmat = -1

        # Control Variate
        cv = 0

        # Solve for S_t, V_t (Euler)
        irand = [randrange(0, n_steps + 1, 1) for k in range(300)]
        for i in range(1, len(self.timegrid)):

            dW = (torch.sqrt(h) * z[:, i - 1]).reshape(MC_samples, 1)
            dW1 = (torch.sqrt(h) * z1[:, i - 1]).reshape(MC_samples, 1)
            current_time = torch.ones(1, 1) * self.timegrid[i - 1]
            input_time = torch.repeat_interleave(current_time,
                                                 MC_samples,
                                                 dim=0)
            inputNN = torch.cat(
                [input_time.reshape(MC_samples, 1), S_old, V_old], 1)
            inputNNvol = torch.cat([input_time.reshape(MC_samples, 1), V_old],
                                   1)

            input_CV = torch.cat([input_time.reshape(MC_samples, 1), S_old], 1)
            input_CV = S_old
            cv += self.control_variate(input_CV.detach()) * dW

            if int(i) in irand:
                S_new = S_old + S_old * rate * h + self.diffusion(inputNN) * dW
                V_new = V_old + self.driftV(inputNNvol) * h + self.diffusionV(
                    inputNNvol) * dW + self.diffusionV1(inputNNvol) * dW1
            else:
                S_new = S_old + S_old * rate * h + self.diffusion(
                    inputNN).detach() * dW
                V_new = V_old + self.driftV(inputNNvol).detach(
                ) * h + self.diffusionV(inputNNvol).detach(
                ) * dW + self.diffusionV1(inputNNvol).detach() * dW1
            S_new = torch.cat([S_new, zeros], 1)
            S_new = torch.max(S_new, 1, keepdim=True)[0]
            S_old = S_new
            V_old = V_new

            # If particular timestep is a maturity for Vanilla option

            if int(i) in indices:
                countmat += 1
                Z_new = torch.Tensor()
                Z_newP_ITM = torch.Tensor()
                Z_newP_OTM = torch.Tensor()
                Z_new2 = torch.Tensor()
                # countstrikecall=-1

                # Evaluate put (OTM) and call (OTM) option prices

                for strike in K_call:
                    # countstrikecall+=1
                    # strike_put = torch.ones(1,1)*K_put[countstrikecall]
                    #  K_extended_put = torch.repeat_interleave(strike_put, MC_samples, dim=0).float()
                    # Since we use the same number of maturities for vanilla calls and puts:
                    price = torch.clamp(S_old - strike, 0) - cv
                    var_price_no_cv = torch.var(torch.clamp(S_old - strike, 0))

                    # price_OTM = torch.cat([K_extended_put-S_old,zeros],1) #put OTM
                    # Discounting assumes we use 2-year time horizon
                    var_price = torch.var(price)
                    # price_OTM = torch.max(price_OTM, 1, keepdim=True)[0]*torch.exp(-rate*1*i/n_steps)
                    Z_new = torch.cat([Z_new, price], 1)

                avg_S = Z_new.mean(dim=0, keepdim=True).T
                average_SS = torch.cat([average_SS, avg_S.T], 0)  # call OTM

        return average_SS, var_price, var_price_no_cv
Esempio n. 9
0
def train(args, train_loader, val_loader, generator, encoder, g_ema, g_optim,
          d_optim, device):
    max_cat = len(train_loader.dataset.classes)
    train_loader = sample_data(train_loader)
    max_vps = args.max_vps
    # num_nbrs = reproj_consist.num_nbrs
    if args.bins > 1:
        criterion = kldiv_loss
    elif args.soft_l1:
        criterion = nn.SmoothL1Loss()
    else:
        criterion = nn.L1Loss()
    colors = [torch.tensor(c, device=device) for c in gen_colors(max_vps)]
    to_discrete = ToDiscrete(args.bins, smoothing=args.smoothing)
    to_continuous = ToContinuous()
    wc2cc, cc2wc = read_camera_positions('camPosListDodecAzEl.txt', device,
                                         args.loader_type)

    pbar = range(args.iter)

    if get_rank() == 0:
        pbar = tqdm(pbar,
                    initial=args.start_iter,
                    dynamic_ncols=True,
                    smoothing=0.01)

    loss_dict = {}
    r1_loss = torch.tensor(0.0, device=device)
    path_loss = torch.tensor(0.0, device=device)
    path_lengths = torch.tensor(0.0, device=device)
    mean_path_length_avg = 0

    if args.distributed:
        g_module = generator.module
        e_module = encoder.module

    else:
        g_module = generator
        e_module = encoder

    accum = args.decay  # 0.5 ** (32 / (10 * 1000))

    n_fixed_samples = min(
        8 * max_cat, args.batch if args.val_batch is None else args.val_batch)
    fixed_object, fixed_cat = next(iter(val_loader))
    fixed_object, fixed_cat = fixed_object[:
                                           n_fixed_samples], fixed_cat[:
                                                                       n_fixed_samples]

    fixed_vp_in = torch.tensor(np.random.choice(max_vps, n_fixed_samples))
    fixed_cat_in = fixed_cat
    fixed_cat_out = torch.repeat_interleave(fixed_cat, max_vps)
    fixed_input = fixed_object[np.arange(n_fixed_samples), fixed_vp_in]
    if args.bins > 1 and args.loader_type != 'merged':
        fixed_dm_in = to_discrete(fixed_input)
    elif args.bins > 1 and args.loader_type == 'merged' and args.input_quant:
        fixed_dm_in = fixed_input[:, [0]]
        fixed_sil = (fixed_dm_in > -1).float()
        fixed_dm_in = to_discrete(torch.cat((fixed_dm_in, fixed_sil), dim=1))
    elif args.loader_type == 'merged':
        fixed_dm_in = fixed_input[:, [0]]
    else:
        fixed_dm_in = fixed_input
    fixed_dm_in, fixed_vp_in, fixed_cat = fixed_dm_in.to(
        device), fixed_vp_in.to(device), fixed_cat.to(device)

    if args.loader_type == 'merged':
        # Since first 20 are the inputs, we can directly index them
        fixed_object = fixed_object[:, :, [1, 2]]

    if args.wandb:
        # Render fixed output point clouds
        fixed_pc = [
            depth2cloud(fixed_object[b, v, 0, :, :],
                        cc2wc[v].cpu(),
                        args.size,
                        fixed_object[b, v, 1, :, :],
                        data_type='ortho') for b in range(n_fixed_samples)
            for v in range(max_vps)
        ] if args.load_sil else [
            depth2cloud(fixed_object[b, v, 0, :, :],
                        cc2wc[v].cpu(),
                        args.size,
                        data_type='ortho') for b in range(n_fixed_samples)
            for v in range(max_vps)
        ]
        # Add colors
        fixed_pc = [
            torch.cat([
                fixed_pc[ii],
                torch.zeros_like(fixed_pc[ii]) + colors[ii % max_vps].cpu()
            ],
                      dim=1) for ii in range(len(fixed_pc))
        ]
        fixed_pc = [
            torch.cat(fixed_pc[b * max_vps:(b + 1) * max_vps])
            for b in range(n_fixed_samples)
        ]
        fixed_pc = [
            wandb.Object3D(fixed_pc[b][:, [2, 0, 1, 3, 4, 5]].data.numpy(),
                           caption="Object_%02d" % b)
            for b in range(n_fixed_samples)
        ]

        # Render inputs
        fixed_im = [
            wandb.Image(fixed_input[ii, 0].data.numpy(),
                        caption="Object_%02d_%02d" % (ii, vp.item()))
            for ii, vp in enumerate(fixed_vp_in)
        ]

        wandb.log({
            "Fixed Input": fixed_im,
            "Fixed PC": fixed_pc,
        }, step=0)

    requires_grad(generator, True)
    requires_grad(encoder, True)

    for idx in pbar:
        i = idx + args.start_iter

        if i > args.iter:
            print('Done!')
            break

        ## Training
        objects, cats = next(train_loader)
        # objects, cats = objects.to(device), cats.to(device)

        B = objects.shape[0]
        vp_in_out = torch.tensor(
            np.random.choice(max_vps, B * args.num_vps * 2).reshape(-1, 2))
        vp_in = vp_in_out[:, 0]
        vp_out = vp_in_out[:, 1]
        batch_ids = np.repeat(np.arange(B), args.num_vps)

        if args.loader_type == 'merged':
            objects_in = objects[:, :, [0]]
            objects_out = objects[:, :, [1, 2]]
        else:
            objects_in = objects_out = objects

        dm_in = objects_in[batch_ids, vp_in]
        dm_out = objects_out[batch_ids, vp_out]
        cats = torch.repeat_interleave(cats, repeats=args.num_vps)
        dm_in, dm_out, vp_in, vp_out, cats = \
            dm_in.to(device), dm_out.to(device), vp_in.to(device), vp_out.to(device), cats.to(device)

        if args.bins > 1 and args.loader_type != 'merged':
            dm_in = to_discrete(dm_in)
            dm_out = to_discrete(dm_out)
        elif args.bins > 1 and args.input_quant:
            # dm_in = to_discrete(dm_in)
            dm_sil = (dm_in > -1).float()
            dm_in = to_discrete(torch.cat((dm_in, dm_sil), dim=1))
            dm_out = to_discrete(dm_out)
        elif args.bins > 1:
            dm_out = to_discrete(dm_out)

        styles = encoder(dm_in, viewpoints=vp_in, categories=cats)

        # Take average of latents of viewpoints to retain only style info
        styles = styles.reshape(B, args.num_vps, -1)
        styles = torch.mean(styles, dim=1).unsqueeze(1)
        styles = styles.expand(-1, args.num_vps,
                               -1).reshape(B * args.num_vps, -1)

        # Reproject this style in new viewpoints
        reprojections, _ = generator([styles],
                                     viewpoints=vp_out,
                                     categories=cats)

        loss = criterion(reprojections, dm_out)
        loss_dict['reproj_loss_train'] = loss

        d_regularize = args.d_reg_every > 0 and i % args.d_reg_every == 0
        if d_regularize:
            dm_in.requires_grad = True
            r1_loss = d_r1_loss(styles, dm_in)

            loss += args.r1 / 2 * r1_loss * args.d_reg_every + 0 * styles
        loss_dict['r1'] = r1_loss

        g_regularize = args.g_reg_every > 0 and i % args.g_reg_every == 0
        if g_regularize:
            path_batch_size = max(1, B // args.path_batch_shrink)
            path_loss, mean_path_length, path_lengths = g_path_regularize(
                reprojections[:path_batch_size], styles[:path_batch_size],
                mean_path_length)
            weighted_path_loss = args.path_regularize * args.g_reg_every * path_loss

            if args.path_batch_shrink:
                weighted_path_loss += 0 * reprojections[0, 0, 0, 0]

            loss += weighted_path_loss
            mean_path_length_avg = (reduce_sum(mean_path_length).item() /
                                    get_world_size())
        loss_dict['path'] = path_loss
        loss_dict['path_length'] = path_lengths.mean()

        accumulate(g_ema, g_module, accum)
        loss_reduced = reduce_loss_dict(loss_dict)

        reproj_loss_train = loss_reduced['reproj_loss_train'].mean().item()
        r1_val = loss_reduced['r1'].mean().item()
        path_loss_val = loss_reduced['path'].mean().item()
        path_length_val = loss_reduced['path_length'].mean().item()

        encoder.zero_grad()
        generator.zero_grad()
        loss.backward()
        d_optim.step()
        g_optim.step()

        if get_rank() == 0:
            pbar.set_description((
                f'reproj_loss_train: {reproj_loss_train:.4f}; '
                # f'r1: {r1_val:.4f}; '
                # f'path: {path_loss_val:.4f}; '
                # f'mean path: {mean_path_length_avg:.4f}, '
            ))

            if wandb and args.wandb:
                wandb.log({
                    'Train reprojection': reproj_loss_train,
                }, step=i)

            if i % 1000 == 0:
                with torch.no_grad():
                    g_ema.eval()
                    generator.eval()
                    encoder.eval()

                    reproj_loss_val = torch.tensor(0.0)
                    val_count = 0
                    for objects, cats in val_loader:
                        # Reshape appropriately
                        # B, V, C, H, W = objects.shape
                        B = objects.shape[0]
                        objects, cats = objects.to(device), cats.to(device)

                        if args.loader_type == 'merged':
                            objects_in = objects[:, :, [0]]
                            objects_out = objects[:, :, [1, 2]]
                        else:
                            objects_in = objects_out = objects

                        batch_ids = np.repeat(np.arange(B), max_vps)
                        vp_in_out = torch.tensor(
                            B * list(range(max_vps))).to(device)
                        dm_in = objects_in[batch_ids, vp_in_out]
                        dm_out = objects_out[batch_ids, vp_in_out]
                        dm_in, dm_out, vp_in, vp_out, cats = \
                            dm_in.to(device), dm_out.to(device), vp_in.to(device), vp_out.to(device), cats.to(device)

                        # if args.bins > 1:
                        # 	dm_out = to_discrete(dm_out)

                        if args.bins > 1 and args.loader_type != 'merged':
                            dm_in = to_discrete(dm_in)
                            dm_out = to_discrete(dm_out)
                        elif args.bins > 1 and args.input_quant:
                            dm_sil = (dm_in > -1).float()
                            dm_in = to_discrete(
                                torch.cat((dm_in, dm_sil), dim=1))
                            # dm_in = to_discrete(dm_in)
                            dm_out = to_discrete(dm_out)
                        elif args.bins > 1:
                            dm_out = to_discrete(dm_out)

                        cats = torch.repeat_interleave(cats, repeats=max_vps)
                        styles = encoder(dm_in,
                                         viewpoints=vp_in_out,
                                         categories=cats)

                        # Take average of latents of viewpoints to retain only style info
                        styles = styles.reshape(B, max_vps, -1)
                        styles = torch.mean(styles, dim=1).unsqueeze(1)
                        styles = styles.expand(-1, max_vps,
                                               -1).reshape(B * max_vps, -1)

                        reprojections, _ = g_ema([styles],
                                                 viewpoints=vp_in_out,
                                                 categories=cats)

                        reproj_loss_val += criterion(reprojections, dm_out)
                        val_count += 1

                        # Do full validation every 5000 steps, else do partial
                        if i % 5000 == 0 and i > 1:
                            continue

                        if val_count > 5:
                            break

                    reproj_loss_val /= val_count
                    reproj_loss_val = reproj_loss_val.item()
                    # loss_dict['reproj_loss_val'] = reproj_loss_val
                    print(f'reproj_loss_valid: {reproj_loss_val:.4f}; ')

                    if args.wandb:
                        wandb.log({
                            "Valid reprojection": reproj_loss_val,
                        },
                                  step=i)
                        styles = encoder(fixed_dm_in,
                                         viewpoints=fixed_vp_in,
                                         categories=fixed_cat_in)
                        styles = styles.unsqueeze(1)
                        styles = styles.expand(-1, max_vps, -1).reshape(
                            n_fixed_samples * max_vps, -1)
                        fixed_vp_out = torch.tensor(
                            n_fixed_samples * list(range(max_vps))).to(device)

                        reprojections, _ = g_ema([styles],
                                                 viewpoints=fixed_vp_out,
                                                 categories=fixed_cat_out)
                        if args.bins > 1:
                            reprojections = to_continuous(reprojections)

                        # PC reconstructions for val data
                        recon_pc = [
                            depth2cloud(reprojections[ii, 0, :, :],
                                        cc2wc[vp],
                                        args.size,
                                        reprojections[ii, 1, :, :],
                                        data_type=args.loader_type)
                            for ii, vp in enumerate(fixed_vp_out)
                        ] if args.load_sil else [
                            depth2cloud(reprojections[ii, 0, :, :],
                                        cc2wc[vp],
                                        args.size,
                                        data_type=args.loader_type)
                            for ii, vp in enumerate(fixed_vp_out)
                        ]
                        # Add colors
                        recon_pc = [
                            torch.cat([
                                recon_pc[ii],
                                torch.zeros_like(recon_pc[ii]) + colors[vp]
                            ],
                                      dim=1)
                            for ii, vp in enumerate(fixed_vp_out)
                        ]
                        # print(len(recon_pc), B, max_vps, num_nbrs)
                        recon_pc = [
                            torch.cat(recon_pc[b * max_vps:(b + 1) * max_vps])
                            for b in range(n_fixed_samples)
                        ]
                        recon_pc = [
                            wandb.Object3D(
                                recon_pc[b]
                                [:, [2, 0, 1, 3, 4, 5]].data.cpu().numpy(),
                                caption="Object_%02d" % b)
                            for b in range(n_fixed_samples)
                        ]

                        wandb.log({
                            "Recon PC": recon_pc,
                        }, step=i)

                torch.save(
                    {
                        'g': g_module.state_dict(),
                        'd': e_module.state_dict(),
                        'g_ema': g_ema.state_dict(),
                        'g_optim': g_optim.state_dict(),
                        'd_optim': d_optim.state_dict(),
                    },
                    args.ckpt_save_directory + f'/{str(i).zfill(6)}.pt',
                )
                requires_grad(generator, True)
                requires_grad(encoder, True)
                encoder.train()
                generator.train()
Esempio n. 10
0
    def run_one_epoch(self,
                      model,
                      optimizer_info={},
                      cur_epoch=1,
                      mode='',
                      freeze=False):
        model = model.to(self.device)
        if mode == 'train':
            model.train()
            optimizer = torch.optim.AdamW(
                # [
                #     {'params': model.encoder.parameters(), 'lr': optimizer_info['lr']},
                #     {'params': model.LSTM.parameters(), 'lr': optimizer_info['lr']},
                #     {'params': model.decoder.parameters(), 'lr': optimizer_info['lr']},
                #     {'params': model.MF.parameters(), 'lr': optimizer_info['lr']},
                # ],
                model.parameters(),
                lr=optimizer_info['lr'],
                weight_decay=optimizer_info['weight_decay'])
            if freeze:
                model.embedding_layer.eval()
                for param in model.embedding_layer.parameters():
                    param.requires_grad = False
        else:
            model.eval()

        total_loss = 0.
        start_time = time.time()
        batch_id = 0
        log_interval = 256
        total_prediction = []
        total_label = []
        total_output = []

        for data, masked_data in self.data_loader[mode]:

            input_name = model.embedding_layer.input_name

            mlm_loss = 0.
            if mode == 'train':
                masked_inputs = {}
                for name in input_name:
                    masked_inputs[name] = masked_data[name].to(self.device)

                _, _, mlm_outputs, _, _ = model(
                    src=masked_inputs,
                    src_mask=data['src_mask'].to(self.device),
                    segment_info=data['segment_info'].to(self.device))

                for name in input_name:
                    mask_label = torch.masked_select(input=data[name],
                                                     mask=masked_data['mask'])
                    mask_label = mask_label - 1  # id 从 0开始
                    mlm_output = mlm_outputs[name].cpu()

                    # mlm_output:(batch_size, seq_len, output_dim)--> (batch_size,mask_num, output_dim)
                    mlm_output = torch.masked_select(
                        input=mlm_output,
                        mask=torch.repeat_interleave(
                            masked_data['mask'].unsqueeze(-1),
                            repeats=mlm_output.size(-1),
                            dim=-1)).view(-1, mlm_output.size(-1))

                    mlm_loss += torch.nn.CrossEntropyLoss()(input=mlm_output,
                                                            target=mask_label)

            inputs = {}
            for name in input_name:
                inputs[name] = data[name].to(self.device)

            task_label = data['label'].view(-1)
            query = data['query']

            cur_batch_size = task_label.size()[0]

            _, _, _, task_output, attention_weights = model(
                src=inputs,
                segment_info=data['segment_info'].to(self.device),
                src_mask=data['src_mask'].to(self.device))
            task_output = task_output.cpu()
            task_output = torch.gather(task_output, dim=1,
                                       index=query - 1).view(-1)

            task_loss = torch.nn.BCELoss()(input=task_output,
                                           target=task_label)

            loss = mlm_loss / len(input_name) + task_loss

            total_loss += loss.item() * cur_batch_size

            prediction = torch.where(task_output > 0.5, 1, 0)
            total_prediction.extend(prediction.view(-1).detach().numpy())
            total_label.extend(task_label.view(-1).detach().numpy())
            total_output.extend(task_output.view(-1).detach().numpy())

            # 防止梯度爆炸的梯度截断,梯度超过0.5就截断
            if mode == 'train':
                # torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5)
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                if batch_id % log_interval == 0 and batch_id > 0:
                    elapsed = time.time() - start_time
                    print('| epoch {:3d} | {:5d}/{:5d} batches | s {:5.2f} | '
                          'loss {:5.3f} | ppl {:8.3f}'.format(
                              cur_epoch, batch_id,
                              len(self.data_loader[mode].dataset) //
                              self.batch_size, elapsed, loss.item(),
                              math.exp(loss.item())))

                    start_time = time.time()

            batch_id += 1

        if mode == 'test':
            attention_weight = attention_weights[0][0].unsqueeze(
                0).cpu().detach().numpy()  # (BHCW)
            attention_weight = (1 - attention_weight) * 255
            self.writer.add_image(tag='attention_weights',
                                  img_tensor=attention_weight,
                                  global_step=cur_epoch)

        auc = sklearn.metrics.roc_auc_score(total_label, total_output)
        acc = sklearn.metrics.accuracy_score(total_label, total_prediction)

        return model, total_loss / (len(self.data_loader[mode].dataset) -
                                    1), auc, acc
Esempio n. 11
0
    def train(self, replay_buffer, iterations, batch_size=64):

        for it in range(iterations):
            # Sample replay buffer / batch
            state, action, next_state, reward, not_done = replay_buffer.sample(
                batch_size)

            # Variational Auto-Encoder Training
            recon, mean, std = self.vae(state, action)
            recon_loss = F.mse_loss(recon, action)
            # 一般(各分量独立的)正态分布与标准正态分布的 KL 散度
            KL_loss = -0.5 * (1 + torch.log(std.pow(2)) - mean.pow(2) -
                              std.pow(2)).mean()
            vae_loss = recon_loss + 0.5 * KL_loss
            self.vae_loss.append(vae_loss)
            # Clears the gradients
            self.vae_optimizer.zero_grad()
            vae_loss.backward()
            self.vae_optimizer.step()

            # Critic Training
            with torch.no_grad():
                # Duplicate next state 10 times
                next_state = torch.repeat_interleave(next_state, 10, 0)

                # Compute value of perturbed actions sampled from the VAE
                target_Q1, target_Q2 = self.critic_target(
                    next_state,
                    self.actor_target(next_state, self.vae.decode(next_state)))

                # Soft Clipped Double Q-learning
                target_Q = self.lmbda * torch.min(target_Q1, target_Q2) + (
                    1. - self.lmbda) * torch.max(target_Q1, target_Q2)
                # Take max over each action sampled from the VAE
                target_Q = target_Q.reshape(batch_size,
                                            -1).max(1)[0].reshape(-1, 1)

                target_Q = reward + not_done * self.discount * target_Q

            current_Q1, current_Q2 = self.critic(state, action)
            critic_loss = F.mse_loss(current_Q1, target_Q) + F.mse_loss(
                current_Q2, target_Q)
            self.critic_loss.append(critic_loss)
            self.critic_optimizer.zero_grad()
            critic_loss.backward()
            self.critic_optimizer.step()

            # Pertubation Model / Action Training
            sampled_actions = self.vae.decode(state)
            perturbed_actions = self.actor(state, sampled_actions)

            # Update through DPG
            actor_loss = -self.critic.q1(state, perturbed_actions).mean()
            self.actor_loss.append(actor_loss)
            self.actor_optimizer.zero_grad()
            actor_loss.backward()
            self.actor_optimizer.step()

            # Update Target Networks
            # soft update
            for param, target_param in zip(self.critic.parameters(),
                                           self.critic_target.parameters()):
                target_param.data.copy_(self.tau * param.data +
                                        (1 - self.tau) * target_param.data)

            for param, target_param in zip(self.actor.parameters(),
                                           self.actor_target.parameters()):
                target_param.data.copy_(self.tau * param.data +
                                        (1 - self.tau) * target_param.data)
Esempio n. 12
0
def repeat_frames(input, n_frames, dim_t=1):
    """ repeat a input of shape [b x ... x H x W] n_frames times to generate a tensor of [b x n_frames x ... x H x W]
    """
    input = input.unsqueeze(dim_t)
    input = torch.repeat_interleave(input, n_frames, dim=dim_t)
    return input
Esempio n. 13
0
    def forward(
        self,
        *,
        observations: types.ObservationsTorch,
        controls: types.ControlsTorch,
    ) -> types.StatesTorch:
        """Particle filter forward pass, single timestep.

        Args:
            observations (dict or torch.Tensor): observation inputs. should be
                either a dict of tensors or tensor of shape `(N, ...)`.
            controls (dict or torch.Tensor): control inputs. should be either a
                dict of tensors or tensor of shape `(N, ...)`.

        Returns:
            torch.Tensor: Predicted state for each batch element. Shape should
            be `(N, state_dim).`
        """

        # Make sure our particle filter's been initialized
        assert self._initialized, "Particle filter not initialized!"

        # Get our batch size (N), current particle count (M), & state dimension
        N, M, state_dim = self.particle_states.shape
        assert state_dim == self.state_dim
        assert len(fannypack.utils.SliceWrapper(controls)) == N

        # Decide whether or not we're resampling
        resample = self.resample
        if resample is None:
            # If not explicitly set, we disable resampling in train mode (to allow
            # gradients to propagate through time) and enable in eval mode (to prevent
            # particle deprivation)
            resample = not self.training

        # If we're not resampling and our current particle count doesn't match
        # our desired particle count, we need to either expand or contract our
        # particle set
        if not resample and self.num_particles != M:
            indices = self.particle_states.new_zeros(
                (N, self.num_particles), dtype=torch.long
            )

            # If output particles > our input particles, for the beginning part we copy
            # particles directly to reduce variance
            copy_count = (self.num_particles // M) * M
            if copy_count > 0:
                indices[:, :copy_count] = torch.arange(M).repeat(copy_count // M)[
                    None, :
                ]

            # For remaining particles, we sample w/o replacement (also lowers variance)
            remaining_count = self.num_particles - copy_count
            assert remaining_count >= 0
            if remaining_count > 0:
                indices[:, copy_count:] = torch.randperm(M, device=indices.device)[
                    None, :remaining_count
                ]

            # Gather new particles, weights
            M = self.num_particles
            self.particle_states = self.particle_states.gather(
                1, indices[:, :, None].expand((N, M, state_dim))
            )
            self.particle_log_weights = self.particle_log_weights.gather(1, indices)
            assert self.particle_states.shape == (N, self.num_particles, state_dim)
            assert self.particle_log_weights.shape == (N, self.num_particles)

            # Normalize particle weights to sum to 1.0
            self.particle_log_weights = self.particle_log_weights - torch.logsumexp(
                self.particle_log_weights, dim=1, keepdim=True
            )

        # Propagate particles through our dynamics model
        # A bit of extra effort is required for the extra particle dimension
        # > For our states, we flatten along the N/M axes
        # > For our controls, we repeat each one `M` times, if M=3:
        #       [u0 u1 u2] should become [u0 u0 u0 u1 u1 u1 u2 u2 u2]
        #
        # Currently each of the M particles within a "sample" get the same action, but
        # we could also add noise in the action space (a la Jonschkowski et al. 2018)
        reshaped_states = self.particle_states.reshape(-1, self.state_dim)
        reshaped_controls = fannypack.utils.SliceWrapper(controls).map(
            lambda tensor: torch.repeat_interleave(tensor, repeats=M, dim=0)
        )
        predicted_states, scale_trils = self.dynamics_model(
            initial_states=reshaped_states, controls=reshaped_controls
        )
        self.particle_states = (
            torch.distributions.MultivariateNormal(
                loc=predicted_states, scale_tril=scale_trils
            )
            .rsample()  # Note that we use `rsample` to make sampling differentiable
            .view(N, M, self.state_dim)
        )
        assert self.particle_states.shape == (N, M, self.state_dim)

        # Re-weight particles using observations
        self.particle_log_weights = self.particle_log_weights + self.measurement_model(
            states=self.particle_states,
            observations=observations,
        )

        # Normalize particle weights to sum to 1.0
        self.particle_log_weights = self.particle_log_weights - torch.logsumexp(
            self.particle_log_weights, dim=1, keepdim=True
        )

        # Compute output
        state_estimates: types.StatesTorch
        if self.estimation_method == "weighted_average":
            state_estimates = torch.sum(
                torch.exp(self.particle_log_weights[:, :, np.newaxis])
                * self.particle_states,
                dim=1,
            )
        elif self.estimation_method == "argmax":
            best_indices = torch.argmax(self.particle_log_weights, dim=1)
            state_estimates = torch.gather(
                self.particle_states, dim=1, index=best_indices
            )
        else:
            assert False, "Unsupported estimation method!"

        # Resampling
        if resample:
            self._resample()

        # Post-condition :)
        assert state_estimates.shape == (N, state_dim)
        assert self.particle_states.shape == (N, self.num_particles, state_dim)
        assert self.particle_log_weights.shape == (N, self.num_particles)

        return state_estimates
Esempio n. 14
0
 def other_ops(self):
     a = torch.randn(4)
     b = torch.randn(4)
     c = torch.randint(0, 8, (5, ), dtype=torch.int64)
     e = torch.randn(4, 3)
     f = torch.randn(4, 4, 4)
     size = [0, 1]
     dims = [0, 1]
     return (
         torch.atleast_1d(a),
         torch.atleast_2d(a),
         torch.atleast_3d(a),
         torch.bincount(c),
         torch.block_diag(a),
         torch.broadcast_tensors(a),
         torch.broadcast_to(a, (4)),
         # torch.broadcast_shapes(a),
         torch.bucketize(a, b),
         torch.cartesian_prod(a),
         torch.cdist(e, e),
         torch.clone(a),
         torch.combinations(a),
         torch.corrcoef(a),
         # torch.cov(a),
         torch.cross(e, e),
         torch.cummax(a, 0),
         torch.cummin(a, 0),
         torch.cumprod(a, 0),
         torch.cumsum(a, 0),
         torch.diag(a),
         torch.diag_embed(a),
         torch.diagflat(a),
         torch.diagonal(e),
         torch.diff(a),
         torch.einsum("iii", f),
         torch.flatten(a),
         torch.flip(e, dims),
         torch.fliplr(e),
         torch.flipud(e),
         torch.kron(a, b),
         torch.rot90(e),
         torch.gcd(c, c),
         torch.histc(a),
         torch.histogram(a),
         torch.meshgrid(a),
         torch.meshgrid(a, indexing="xy"),
         torch.lcm(c, c),
         torch.logcumsumexp(a, 0),
         torch.ravel(a),
         torch.renorm(e, 1, 0, 5),
         torch.repeat_interleave(c),
         torch.roll(a, 1, 0),
         torch.searchsorted(a, b),
         torch.tensordot(e, e),
         torch.trace(e),
         torch.tril(e),
         torch.tril_indices(3, 3),
         torch.triu(e),
         torch.triu_indices(3, 3),
         torch.vander(a),
         torch.view_as_real(torch.randn(4, dtype=torch.cfloat)),
         torch.view_as_complex(torch.randn(4, 2)),
         torch.resolve_conj(a),
         torch.resolve_neg(a),
     )
Esempio n. 15
0
    def attribute(
        self,
        inputs,
        baselines=None,
        target=None,
        n_steps=500,
        method="riemann_trapezoid",
    ):
        r"""
            Computes conductance using gradients along the path, applying
            riemann's method or gauss-legendre.
            The details of the approach can be found here:
            https://arxiv.org/abs/1805.12233

            Args

                inputs:     A single high dimensional input tensor, in which
                            dimension 0 corresponds to number of examples.
                baselines:   A single high dimensional baseline tensor,
                            which has the same shape as the input
                target:     Predicted class index. This is necessary only for
                            classification use cases
                n_steps:    The number of steps used by the approximation method
                method:     Method for integral approximation, one of `riemann_right`,
                            `riemann_middle`, `riemann_trapezoid` or `gausslegendre`

            Return

                attributions: Total conductance with respect to each neuron in
                              output of given layer
        """
        if baselines is None:
            baselines = 0
        gradient_mask = apply_gradient_requirements((inputs,))
        # retrieve step size and scaling factor for specified approximation method
        step_sizes_func, alphas_func = approximation_parameters(method)
        step_sizes, alphas = step_sizes_func(n_steps), alphas_func(n_steps)

        # compute scaled inputs from baseline to final input.
        scaled_features = torch.cat(
            [baselines + alpha * (inputs - baselines) for alpha in alphas], dim=0
        )

        # Conductance Gradients - Returns gradient of output with respect to
        # hidden layer, gradient of hidden layer with respect to input,
        # and number of hidden units.
        input_gradients, mid_layer_gradients, hidden_units = self._conductance_grads(
            self.forward_func, scaled_features, target
        )
        # Multiply gradient of hidden layer with respect to input by input - baseline
        scaled_input_gradients = torch.repeat_interleave(
            inputs - baselines, hidden_units, dim=0
        )
        scaled_input_gradients = input_gradients * scaled_input_gradients.repeat(
            *([len(alphas)] + [1] * (len(scaled_input_gradients.shape) - 1))
        )

        # Sum gradients for each input neuron in order to have total
        # for each hidden unit and reshape to match hidden layer shape
        summed_input_grads = torch.sum(
            scaled_input_gradients, tuple(range(1, len(scaled_input_gradients.shape)))
        ).view_as(mid_layer_gradients)

        # Rescale gradients of hidden layer by by step size.
        scaled_grads = mid_layer_gradients.contiguous().view(
            n_steps, -1
        ) * torch.tensor(step_sizes).view(n_steps, 1).to(mid_layer_gradients.device)

        undo_gradient_requirements((inputs,), gradient_mask)

        # Element-wise mutliply gradient of output with respect to hidden layer
        # and summed gradients with respect to input (chain rule) and sum across
        # stepped inputs.
        return _reshape_and_sum(
            scaled_grads.view(mid_layer_gradients.shape) * summed_input_grads,
            n_steps,
            inputs.shape[0],
            mid_layer_gradients.shape[1:],
        )
Esempio n. 16
0
 def forward(self, x, n):
     batch_dim = x.shape[0]
     mu = self.mu_head(x)
     mu = torch.repeat_interleave(mu, n, 0)
     return mu
Esempio n. 17
0
    def _conductance_grads(self, forward_fn, input, target_ind=None):
        with torch.autograd.set_grad_enabled(True):
            # Set a forward hook on specified module and run forward pass to
            # get output tensor size.
            saved_tensor = None

            def forward_hook(module, inp, out):
                nonlocal saved_tensor
                saved_tensor = out

            hook = self.layer.register_forward_hook(forward_hook)
            output = forward_fn(input)

            # Compute layer output tensor dimensions and total number of units.
            # The hidden layer tensor is assumed to have dimension (num_hidden, ...)
            # where the product of the dimensions >= 1 correspond to the total
            # number of hidden neurons in the layer.
            layer_size = tuple(saved_tensor.size())[1:]
            layer_units = int(np.prod(layer_size))

            # Remove unnecessary forward hook.
            hook.remove()

            # Backward hook function to override gradients in order to obtain
            # just the gradient of each hidden unit with respect to input.
            saved_grads = None

            def backward_hook(grads):
                nonlocal saved_grads
                saved_grads = grads
                zero_mat = torch.zeros((1,) + layer_size)
                scatter_indices = torch.arange(0, layer_units).view_as(zero_mat)
                # Creates matrix with each layer containing a single unit with
                # value 1 and remaining zeros, which will provide gradients
                # with respect to each unit independently.
                to_return = torch.zeros((layer_units,) + layer_size).scatter(
                    0, scatter_indices, 1
                )
                to_repeat = [1] * len(to_return.shape)
                to_repeat[0] = grads.shape[0] // to_return.shape[0]
                expanded = to_return.repeat(to_repeat)
                return expanded

            # Create a forward hook in order to attach backward hook to appropriate
            # tensor. Save backward hook in order to remove hook appropriately.
            back_hook = None

            def forward_hook_register_back(module, inp, out):
                nonlocal back_hook
                back_hook = out.register_hook(backward_hook)

            hook = self.layer.register_forward_hook(forward_hook_register_back)

            # Expand input to include layer_units copies of each input.
            # This allows obtaining gradient with respect to each hidden unit
            # in one pass.
            expanded_input = torch.repeat_interleave(input, layer_units, dim=0)
            output = forward_fn(expanded_input)
            hook.remove()
            output = output[:, target_ind] if target_ind is not None else output
            input_grads = torch.autograd.grad(torch.unbind(output), expanded_input)

            # Remove backwards hook
            back_hook.remove()

            # Remove duplicates in gradient with respect to hidden layer,
            # choose one for each layer_units indices.
            output_mid_grads = torch.index_select(
                saved_grads,
                0,
                torch.tensor(range(0, input_grads[0].shape[0], layer_units)),
            )
        return input_grads[0], output_mid_grads, layer_units
Esempio n. 18
0
    def _gconv(self, inputs, adj_mx, global_embs, state, option='r'):
        r"""Graph attention network based convolution computation.

    Args:
      inputs: input vector, with shape (batch_size, self.num_nodes,
        self.rnn_units).
      adj_mx: adjacency matrix, with shape (self.num_nodes, self.num_nodes).
      global_embs: global embedding matrix, with shape (self.num_nodes,
        self.rnn_units).
      state: hidden vectors from the last unit, with shape(batch_size,
        self.num_nodes, self.rnn_units). If this is the first unit, usually hx
        is supposed to be a zero vector.
      option: indicate whether the output is reset gate vector ('r'), update
        gate vector ('u'), or candidate hidden vector ('c').

    Returns:
      out: output, can be reset gate vector (option is 'r'), update gate
      vector (option is 'u'), or
        candidate hidden vector (option is 'c').
    """
        batch_size = inputs.shape[0]
        num_nodes = self.num_nodes

        x = torch.cat([inputs, state], dim=-1)  # input_dim
        out = torch.zeros(size=(batch_size, num_nodes, self.rnn_units),
                          device=device)

        for relation_id in range(self.num_relation_types - 1):
            if option == 'r':
                r_weights_left = self.r_weights[:2 * self.rnn_units, :,
                                                relation_id]
                r_biases_left = self.r_biases[:self.rnn_units, relation_id]
                r_weights_right = r_weights_left if self.share_attn_weights else self.r_weights[
                    2 * self.rnn_units:, :, relation_id]
                r_biases_right = r_biases_left if self.share_attn_weights else self.r_biases[
                    self.rnn_units:, relation_id]
                x_left = torch.matmul(x, r_weights_left) + r_biases_left
                x_right = torch.matmul(x, r_weights_right) + r_biases_right
            elif option == 'u':
                u_weights_left = self.u_weights[:2 * self.rnn_units, :,
                                                relation_id]
                u_biases_left = self.u_biases[:self.rnn_units, relation_id]
                u_weights_right = u_weights_left if self.share_attn_weights else self.u_weights[
                    2 * self.rnn_units:, :, relation_id]
                u_biases_right = u_biases_left if self.share_attn_weights else self.u_biases[
                    self.rnn_units:, relation_id]
                x_left = torch.matmul(x, u_weights_left) + u_biases_left
                x_right = torch.matmul(x, u_weights_right) + u_biases_right
            elif option == 'c':
                c_weights_left = self.c_weights[:2 * self.rnn_units, :,
                                                relation_id]
                c_biases_left = self.c_biases[:self.rnn_units, relation_id]
                c_weights_right = c_weights_left if self.share_attn_weights else self.c_weights[
                    2 * self.rnn_units:, :, relation_id]
                c_biases_right = c_biases_left if self.share_attn_weights else self.c_biases[
                    self.rnn_units:, relation_id]
                x_left = torch.matmul(x, c_weights_left) + c_biases_left
                x_right = torch.matmul(x, c_weights_right) + c_biases_right

            i, j = torch.nonzero(adj_mx[:, :, relation_id], as_tuple=True)
            i, j = i.to(device), j.to(device)
            x_left_per_edge = x_left.index_select(1, i)
            x_right_per_edge = x_right.index_select(1, j)
            x_per_edge = x_left_per_edge + x_right_per_edge
            x_per_edge = nn.functional.leaky_relu(x_per_edge,
                                                  self.negative_slope)

            alpha = (x_per_edge * global_embs[i]).sum(dim=2)
            alpha = softmax(alpha, index=i, num_nodes=num_nodes, dim=1)

            attns = torch.zeros([batch_size, num_nodes, num_nodes],
                                device=device)
            batch_idxs = torch.arange(batch_size, device=device)
            batch_expand = torch.repeat_interleave(batch_idxs, len(i), dim=0)
            i_expand = torch.repeat_interleave(i.view(1, -1),
                                               batch_size,
                                               dim=0).view(-1)
            j_expand = torch.repeat_interleave(j.view(1, -1),
                                               batch_size,
                                               dim=0).view(-1)
            indices = (batch_expand, i_expand, j_expand)
            attns.index_put_(indices, alpha.view(-1))

            zero_mask = (
                adj_mx[:, :, relation_id] == 0).unsqueeze(0).repeat_interleave(
                    batch_size, dim=0)
            zero_coeffs = torch.ones([batch_size, num_nodes, num_nodes],
                                     device=device) / zero_mask.float().sum(
                                         dim=-1, keepdim=True)
            attns[zero_mask] = zero_coeffs[zero_mask]

            out += torch.bmm(adj_mx[:, :, relation_id] * attns,
                             x_right) + x_left

        return out
    def policy(
        self,
        predictor: Union[RLPredictor, OnPolicyPredictor, None],
        state,
        test,
        state_preprocessor=None,
    ) -> Tuple[torch.Tensor, float]:
        """
        Selects the next action.

        :param predictor: RLPredictor/OnPolicyPredictor object whose policy to
            follow. If set to None, use a random policy.
        :param state: State to evaluate predictor's policy on.
        :param test: Whether or not to bypass exploration (if predictor is not None).
            For discrete action problems, the exploration policy is epsilon-greedy.
            For continuous action problems, the exploration is achieved by adding
            noise to action outputs.
        :param state_preprocessor: State preprocessor to use to preprocess states
        """
        assert len(state.size()) == 1

        # Convert state to batch of size 1
        state = state.unsqueeze(0)

        if predictor is None or (not test and self.action_type
                                 == EnvType.DISCRETE_ACTION
                                 and float(torch.rand(1)) < self.epsilon):
            raw_action, _, action_probability = self.sample_policy(
                state=None, use_continuous_action=False)
            if self.action_type == EnvType.DISCRETE_ACTION:
                action = torch.zeros([self.action_dim])
                action[raw_action] = 1.0
                return action, action_probability
            return raw_action, action_probability

        action = torch.zeros([self.action_dim])

        if state_preprocessor:
            state = state_preprocessor.forward(state)

        if isinstance(predictor, DQNPredictor):
            action_probability = 1.0 if test else 1.0 - self.epsilon
            # Use DQNPredictor directly - useful to test caffe2 predictor
            # assumes state preprocessor already part of predictor net.
            sparse_states = predictor.in_order_dense_to_sparse(state)
            q_values = predictor.predict(sparse_states)
            action_idx = int(max(q_values[0],
                                 key=q_values[0].get)) - self.state_dim
            action[action_idx] = 1.0
            return action, action_probability
        elif isinstance(predictor, ParametricDQNPredictor):
            # Needs to get a list of candidate actions if actions are continuous
            if self.action_type == EnvType.CONTINUOUS_ACTION:
                raise NotImplementedError()
            action_probability = 1.0 if test else 1.0 - self.epsilon
            state = np.repeat(state, repeats=self.action_dim, axis=0)
            sparse_states = predictor.in_order_dense_to_sparse(state)
            sparse_actions = [{
                str(i + self.state_dim): 1
            } for i in range(self.action_dim)]
            q_values = predictor.predict(sparse_states, sparse_actions)
            q_values = np.fromiter(
                map(lambda x: x["Q"], q_values),
                np.float  # type: ignore
            ).reshape(self.action_dim)
            action_idx = np.argmax(q_values)
            action[action_idx] = 1.0
            return action, action_probability
        elif predictor.policy_net():  # type: ignore
            action_set = predictor.policy(state)  # type: ignore
            action, action_probability = action_set.greedy, action_set.greedy_propensity
            action = action[0, :]
            return action, action_probability
        else:
            action_probability = 1.0 if test else 1.0 - self.epsilon
            if predictor.discrete_action():  # type: ignore
                policy_action_set = predictor.policy(  # type: ignore
                    state, torch.ones([1, self.action_dim]))
            else:
                states_tiled = torch.repeat_interleave(state,
                                                       repeats=self.action_dim,
                                                       axis=0)
                policy_action_set = predictor.policy(  # type: ignore
                    states_tiled,
                    (torch.eye(
                        self.action_dim), torch.ones((self.action_dim, 1))),
                )

            if self.softmax_policy:
                action[policy_action_set.softmax] = 1.0
            else:
                action[policy_action_set.greedy] = 1.0
        return action, action_probability
Esempio n. 20
0
def beam_search(decoder: nn.Module,
                att_net: nn.Module,
                enc_out: th.Tensor,
                lm: Optional[LmType] = None,
                ctc_prob: Optional[th.Tensor] = None,
                lm_weight: float = 0,
                beam_size: int = 8,
                nbest: int = 1,
                max_len: int = -1,
                max_len_ratio: float = 1,
                min_len: int = 0,
                min_len_ratio: float = 0,
                sos: int = -1,
                eos: int = -1,
                unk: int = -1,
                len_norm: bool = True,
                end_detect: bool = False,
                ctc_weight: float = 0,
                len_penalty: float = 0,
                cov_penalty: float = 0,
                temperature: float = 1,
                allow_partial: bool = False,
                cov_threshold: float = 0.5,
                eos_threshold: float = 1) -> List[Dict]:
    """
    Vectorized beam search algothrim (see batch version beam_search_batch)
    Args
        att_net (nn.Module): attention network
        enc_out (Tensor): 1 x T x F, encoder output
    """
    if sos < 0 or eos < 0:
        raise RuntimeError(f"Invalid SOS/EOS ID: {sos:d}/{eos:d}")
    N, T, D_enc = enc_out.shape
    if N != 1:
        raise RuntimeError(
            f"Got batch size {N:d}, now only support one utterance")
    if not hasattr(decoder, "step"):
        raise RuntimeError("Function step should defined in decoder network")
    if beam_size > decoder.vocab_size:
        raise RuntimeError(f"Beam size({beam_size}) > vocabulary size")

    min_len = max(min_len, int(min_len_ratio * T))
    max_len = min(max_len, int(max_len_ratio * T)) if max_len_ratio > 0 else T
    logger.info(f"--- shape of the encoder output: {T} x {D_enc}")
    logger.info("--- length constraint of the decoding " +
                f"sequence: ({min_len}, {max_len})")
    nbest = min(beam_size, nbest)
    device = enc_out.device
    att_ali = None
    dec_hid = None
    # N x T x F => N*beam x T x F
    enc_out = th.repeat_interleave(enc_out, beam_size, 0)
    att_ctx = th.zeros([N * beam_size, D_enc], device=device)
    proj = th.zeros([N * beam_size, D_enc], device=device)

    beam_param = BeamSearchParam(beam_size=beam_size,
                                 sos=sos,
                                 eos=eos,
                                 unk=unk,
                                 device=device,
                                 min_len=min_len,
                                 max_len=max_len,
                                 len_norm=len_norm,
                                 lm_weight=lm_weight,
                                 end_detect=end_detect,
                                 ctc_weight=ctc_weight,
                                 len_penalty=len_penalty,
                                 cov_penalty=cov_penalty,
                                 allow_partial=allow_partial,
                                 cov_threshold=cov_threshold,
                                 eos_threshold=eos_threshold,
                                 ctc_beam_size=int(beam_size * 1.5))
    beam_tracker = BeamTracker(beam_param, ctc_prob=ctc_prob)

    lm_state = None
    # clear states
    att_net.clear()
    # step by step
    stop = False
    while not stop:
        # beam
        pre_tok, point = beam_tracker[-1]

        # step forward
        dec_hid = adjust_hidden(point, dec_hid)
        att_ali = None if att_ali is None else att_ali[point]
        dec_out, att_ctx, dec_hid, att_ali, proj = decoder.step(
            att_net,
            pre_tok,
            enc_out,
            att_ctx[point],
            dec_hid=dec_hid,
            att_ali=att_ali,
            proj=proj[point])
        # compute prob: beam x V, nagetive
        am_prob = tf.log_softmax(dec_out / temperature, dim=-1)
        if lm and beam_param.lm_weight > 0:
            # beam x V
            lm_prob, lm_state = lm_score_impl(lm, point, pre_tok, lm_state)
        else:
            lm_prob = 0
        # one beam search step
        stop = beam_tracker.step(am_prob, lm_prob, att_ali=att_ali)
    # return nbest
    return beam_tracker.nbest_hypos(nbest)
Esempio n. 21
0
    def forward(self, x, globel_step):
        """
        Forward pass

        :param x: (B, 3, H, W)
        :param globel_step: global step (training)
        :return:
            fg_likelihood: (B, 3, H, W)
            y_nobg: (B, 3, H, W), foreground reconstruction
            alpha_map: (B, 1, H, W)
            kl: (B,) total foreground kl
            boundary_loss: (B,)
            log: a dictionary containing anything we need for visualization
        """
        B = x.size(0)
        # if globel_step:
        self.anneal(globel_step)

        # Everything is (B, G*G, D), where D varies
        z_pres, z_depth, z_scale, z_shift, z_where, \
        z_pres_logits, z_depth_post, z_scale_post, z_shift_post = self.img_encoder(x, self.tau)

        # (B, 3, H, W) -> (B*G*G, 3, H, W). Note we must use repeat_interleave instead of repeat
        x_repeat = torch.repeat_interleave(x, arch.G**2, dim=0)

        # (B*G*G, 3, H, W), where G is the grid size
        # Extract glimpse
        x_att = spatial_transform(
            x_repeat,
            z_where.view(B * arch.G**2, 4),
            (B * arch.G**2, 3, arch.glimpse_size, arch.glimpse_size),
            inverse=False)

        # (B*G*G, D)
        z_what, z_what_post = self.z_what_net(x_att)

        # Decode z_what into small reconstructed glimpses
        # All (B*G*G, 3, H, W)
        o_att, alpha_att = self.glimpse_dec(z_what)
        # z_pres: (B, G*G, 1) -> (B*G*G, 1, 1, 1)
        alpha_att_hat = alpha_att * z_pres.view(-1, 1, 1, 1)
        # (B*G*G, 3, H, W)
        y_att = alpha_att_hat * o_att

        # Compute pixel-wise object weights
        # (B*G*G, 1, H, W). These are glimpse size
        importance_map = alpha_att_hat * 100.0 * torch.sigmoid(
            -z_depth.view(B * arch.G**2, 1, 1, 1))
        # (B*G*G, 1, H, W). These are of full resolution
        importance_map_full_res = spatial_transform(
            importance_map,
            z_where.view(B * arch.G**2,
                         4), (B * arch.G**2, 1, *arch.img_shape),
            inverse=True)

        # (B*G*G, 1, H, W) -> (B, G*G, 1, H, W)
        importance_map_full_res = importance_map_full_res.view(
            B, arch.G**2, 1, *arch.img_shape)
        # Normalize (B, >G*G<, 1, H, W)
        importance_map_full_res_norm = torch.softmax(importance_map_full_res,
                                                     dim=1)

        # To full resolution
        # (B*G*G, 3, H, W) -> (B, G*G, 3, H, W)
        y_each_cell = spatial_transform(y_att,
                                        z_where.view(B * arch.G**2, 4),
                                        (B * arch.G**2, 3, *arch.img_shape),
                                        inverse=True).view(
                                            B, arch.G**2, 3, *arch.img_shape)
        # Weighted sum, (B, 3, H, W)
        y_nobg = (y_each_cell * importance_map_full_res_norm).sum(dim=1)

        # To full resolution
        # (B*G*G, 1, H, W) -> (B, G*G, 1, H, W)
        alpha_map = spatial_transform(alpha_att_hat,
                                      z_where.view(B * arch.G**2, 4),
                                      (B * arch.G**2, 1, *arch.img_shape),
                                      inverse=True).view(
                                          B, arch.G**2, 1, *arch.img_shape)

        # Weighted sum, (B, 1, H, W)
        alpha_map = (alpha_map * importance_map_full_res_norm).sum(dim=1)

        # Everything is computed. Now let's compute loss
        # Compute KL divergences
        # (B, G*G, 1)
        kl_z_pres = kl_divergence_bern_bern(z_pres_logits,
                                            self.prior_z_pres_prob)

        # (B, G*G, 1)
        kl_z_depth = kl_divergence(z_depth_post, self.z_depth_prior)

        # (B, G*G, 2)
        kl_z_scale = kl_divergence(z_scale_post, self.z_scale_prior)
        kl_z_shift = kl_divergence(z_shift_post, self.z_shift_prior)

        # Reshape z_what and z_what_post
        # (B*G*G, D) -> (B, G*G, D)
        z_what = z_what.view(B, arch.G**2, arch.z_what_dim)
        z_what_post = Normal(*[
            x.view(B, arch.G**2, arch.z_what_dim)
            for x in [z_what_post.mean, z_what_post.stddev]
        ])
        # (B, G*G, D)
        kl_z_what = kl_divergence(z_what_post, self.z_what_prior)

        # dimensionality check
        assert ((kl_z_pres.size() == (B, arch.G**2, 1))
                and (kl_z_depth.size() == (B, arch.G**2, 1))
                and (kl_z_scale.size() == (B, arch.G**2, 2))
                and (kl_z_shift.size() == (B, arch.G**2, 2))
                and (kl_z_what.size() == (B, arch.G**2, arch.z_what_dim)))

        # Reduce (B, G*G, D) -> (B,)
        kl_z_pres, kl_z_depth, kl_z_scale, kl_z_shift, kl_z_what = [
            x.flatten(start_dim=1).sum(1) for x in
            [kl_z_pres, kl_z_depth, kl_z_scale, kl_z_shift, kl_z_what]
        ]
        # (B,)
        kl_z_where = kl_z_scale + kl_z_shift

        # Compute boundary loss
        # (1, 1, K, K)
        boundary_kernel = self.boundary_kernel[None, None].to(x.device)
        # (1, 1, K, K) * (B*G*G, 1, 1) -> (B*G*G, 1, K, K)
        boundary_kernel = boundary_kernel * z_pres.view(B * arch.G**2, 1, 1, 1)
        # (B, G*G, 1, H, W), to full resolution
        boundary_map = spatial_transform(boundary_kernel,
                                         z_where.view(B * arch.G**2, 4),
                                         (B * arch.G**2, 1, *arch.img_shape),
                                         inverse=True).view(
                                             B, arch.G**2, 1, *arch.img_shape)
        # (B, 1, H, W)
        boundary_map = boundary_map.sum(dim=1)
        # TODO: some magic number. For reproducibility I will keep it
        boundary_map = boundary_map * 1000
        # (B, 1, H, W) * (B, 1, H, W)
        overlap = boundary_map * alpha_map
        # TODO: another magic number. For reproducibility I will keep it
        p_boundary = Normal(0, 0.7)
        # (B, 1, H, W)
        boundary_loss = p_boundary.log_prob(overlap)
        # (B,)
        boundary_loss = boundary_loss.flatten(start_dim=1).sum(1)

        # NOTE: we want to minimize this
        boundary_loss = -boundary_loss

        # Compute foreground likelhood
        fg_dist = Normal(y_nobg, self.fg_sigma)
        fg_likelihood = fg_dist.log_prob(x)

        kl = kl_z_what + kl_z_where + kl_z_pres + kl_z_depth

        if not arch.boundary_loss or globel_step > arch.bl_off_step:
            boundary_loss = boundary_loss * 0.0

        # For visualizating
        # Dimensionality check
        assert ((z_pres.size() == (B, arch.G**2, 1))
                and (z_depth.size() == (B, arch.G**2, 1))
                and (z_scale.size() == (B, arch.G**2, 2))
                and (z_shift.size() == (B, arch.G**2, 2))
                and (z_where.size() == (B, arch.G**2, 4))
                and (z_what.size() == (B, arch.G**2, arch.z_what_dim)))
        log = {
            'fg': y_nobg,
            'z_what': z_what,
            'z_where': z_where,
            'z_pres': z_pres,
            'z_scale': z_scale,
            'z_shift': z_shift,
            'z_depth': z_depth,
            'z_pres_prob': torch.sigmoid(z_pres_logits),
            'prior_z_pres_prob': self.prior_z_pres_prob.unsqueeze(0),
            'o_att': o_att,
            'alpha_att_hat': alpha_att_hat,
            'alpha_att': alpha_att,
            'alpha_map': alpha_map,
            'boundary_loss': boundary_loss,
            'boundary_map': boundary_map,
            'importance_map_full_res_norm': importance_map_full_res_norm,
            'kl_z_what': kl_z_what,
            'kl_z_pres': kl_z_pres,
            'kl_z_scale': kl_z_scale,
            'kl_z_shift': kl_z_shift,
            'kl_z_depth': kl_z_depth,
            'kl_z_where': kl_z_where,
        }
        return fg_likelihood, y_nobg, alpha_map, kl, boundary_loss, log
Esempio n. 22
0
def radius_graph_pbc(data, radius, max_num_neighbors_threshold, device):
    batch_size = len(data.natoms)

    # position of the atoms
    atom_pos = data.pos

    # Before computing the pairwise distances between atoms, first create a list of atom indices to compare for the entire batch
    num_atoms_per_image = data.natoms
    num_atoms_per_image_sqr = (num_atoms_per_image ** 2).long()

    # index offset between images
    index_offset = (
        torch.cumsum(num_atoms_per_image, dim=0) - num_atoms_per_image
    )

    index_offset_expand = torch.repeat_interleave(
        index_offset, num_atoms_per_image_sqr
    )
    num_atoms_per_image_expand = torch.repeat_interleave(
        num_atoms_per_image, num_atoms_per_image_sqr
    )

    # Compute a tensor containing sequences of numbers that range from 0 to num_atoms_per_image_sqr for each image
    # that is used to compute indices for the pairs of atoms. This is a very convoluted way to implement
    # the following (but 10x faster since it removes the for loop)
    # for batch_idx in range(batch_size):
    #    batch_count = torch.cat([batch_count, torch.arange(num_atoms_per_image_sqr[batch_idx], device=device)], dim=0)
    num_atom_pairs = torch.sum(num_atoms_per_image_sqr)
    index_sqr_offset = (
        torch.cumsum(num_atoms_per_image_sqr, dim=0) - num_atoms_per_image_sqr
    )
    index_sqr_offset = torch.repeat_interleave(
        index_sqr_offset, num_atoms_per_image_sqr
    )
    atom_count_sqr = (
        torch.arange(num_atom_pairs, device=device) - index_sqr_offset
    )

    # Compute the indices for the pairs of atoms (using division and mod)
    # If the systems get too large this apporach could run into numerical precision issues
    index1 = (
        (atom_count_sqr // num_atoms_per_image_expand)
    ).long() + index_offset_expand
    index2 = (
        atom_count_sqr % num_atoms_per_image_expand
    ).long() + index_offset_expand
    # Get the positions for each atom
    pos1 = torch.index_select(atom_pos, 0, index1)
    pos2 = torch.index_select(atom_pos, 0, index2)

    # Tensor of unit cells. Assumes 9 cells in -1, 0, 1 offsets in the x and y dimensions
    unit_cell = torch.tensor(
        [
            [-1, -1, 0],
            [-1, 0, 0],
            [-1, 1, 0],
            [0, -1, 0],
            [0, 0, 0],
            [0, 1, 0],
            [1, -1, 0],
            [1, 0, 0],
            [1, 1, 0],
        ],
        device=device,
    ).float()
    num_cells = len(unit_cell)
    unit_cell_per_atom = unit_cell.view(1, num_cells, 3).repeat(
        len(index2), 1, 1
    )
    unit_cell = torch.transpose(unit_cell, 0, 1)
    unit_cell_batch = unit_cell.view(1, 3, num_cells).expand(
        batch_size, -1, -1
    )

    # Compute the x, y, z positional offsets for each cell in each image
    data_cell = torch.transpose(data.cell, 1, 2)
    pbc_offsets = torch.bmm(data_cell, unit_cell_batch)
    pbc_offsets_per_atom = torch.repeat_interleave(
        pbc_offsets, num_atoms_per_image_sqr, dim=0
    )

    # Expand the positions and indices for the 9 cells
    pos1 = pos1.view(-1, 3, 1).expand(-1, -1, num_cells)
    pos2 = pos2.view(-1, 3, 1).expand(-1, -1, num_cells)
    index1 = index1.view(-1, 1).repeat(1, num_cells).view(-1)
    index2 = index2.view(-1, 1).repeat(1, num_cells).view(-1)
    # Add the PBC offsets for the second atom
    pos2 = pos2 + pbc_offsets_per_atom

    # Compute the squared distance between atoms
    atom_distance_sqr = torch.sum((pos1 - pos2) ** 2, dim=1)
    atom_distance_sqr = atom_distance_sqr.view(-1)

    # Remove pairs that are too far apart
    mask_within_radius = torch.le(atom_distance_sqr, radius * radius)
    # Remove pairs with the same atoms (distance = 0.0)
    mask_not_same = torch.gt(atom_distance_sqr, 0.0001)
    mask = torch.logical_and(mask_within_radius, mask_not_same)
    index1 = torch.masked_select(index1, mask)
    index2 = torch.masked_select(index2, mask)
    unit_cell = torch.masked_select(
        unit_cell_per_atom.view(-1, 3), mask.view(-1, 1).expand(-1, 3)
    )
    unit_cell = unit_cell.view(-1, 3)

    num_atoms = len(data.pos)
    num_neighbors = torch.zeros(num_atoms, device=device)
    num_neighbors.index_add_(0, index1, torch.ones(len(index1), device=device))
    num_neighbors = num_neighbors.long()
    max_num_neighbors = torch.max(num_neighbors).long()

    # Compute neighbors per image
    _max_neighbors = copy.deepcopy(num_neighbors)
    _max_neighbors[
        _max_neighbors > max_num_neighbors_threshold
    ] = max_num_neighbors_threshold
    _num_neighbors = torch.zeros(num_atoms + 1, device=device).long()
    _natoms = torch.zeros(data.natoms.shape[0] + 1, device=device).long()
    _num_neighbors[1:] = torch.cumsum(_max_neighbors, dim=0)
    _natoms[1:] = torch.cumsum(data.natoms, dim=0)
    num_neighbors_image = (
        _num_neighbors[_natoms[1:]] - _num_neighbors[_natoms[:-1]]
    )

    # If max_num_neighbors is below the threshold, return early
    if (
        max_num_neighbors <= max_num_neighbors_threshold
        or max_num_neighbors_threshold <= 0
    ):
        return torch.stack((index2, index1)), unit_cell, num_neighbors_image

    atom_distance_sqr = torch.masked_select(atom_distance_sqr, mask)

    # Create a tensor of size [num_atoms, max_num_neighbors] to sort the distances of the neighbors.
    # Fill with values greater than radius*radius so we can easily remove unused distances later.
    distance_sort = torch.zeros(
        num_atoms * max_num_neighbors, device=device
    ).fill_(radius * radius + 1.0)

    # Create an index map to map distances from atom_distance_sqr to distance_sort
    index_neighbor_offset = torch.cumsum(num_neighbors, dim=0) - num_neighbors
    index_neighbor_offset_expand = torch.repeat_interleave(
        index_neighbor_offset, num_neighbors
    )
    index_sort_map = (
        index1 * max_num_neighbors
        + torch.arange(len(index1), device=device)
        - index_neighbor_offset_expand
    )
    distance_sort.index_copy_(0, index_sort_map, atom_distance_sqr)
    distance_sort = distance_sort.view(num_atoms, max_num_neighbors)

    # Sort neighboring atoms based on distance
    distance_sort, index_sort = torch.sort(distance_sort, dim=1)
    # Select the max_num_neighbors_threshold neighbors that are closest
    distance_sort = distance_sort[:, :max_num_neighbors_threshold]
    index_sort = index_sort[:, :max_num_neighbors_threshold]

    # Offset index_sort so that it indexes into index1
    index_sort = index_sort + index_neighbor_offset.view(-1, 1).expand(
        -1, max_num_neighbors_threshold
    )
    # Remove "unused pairs" with distances greater than the radius
    mask_within_radius = torch.le(distance_sort, radius * radius)
    index_sort = torch.masked_select(index_sort, mask_within_radius)

    # At this point index_sort contains the index into index1 of the closest max_num_neighbors_threshold neighbors per atom
    # Create a mask to remove all pairs not in index_sort
    mask_num_neighbors = torch.zeros(len(index1), device=device).bool()
    mask_num_neighbors.index_fill_(0, index_sort, True)

    # Finally mask out the atoms to ensure each atom has at most max_num_neighbors_threshold neighbors
    index1 = torch.masked_select(index1, mask_num_neighbors)
    index2 = torch.masked_select(index2, mask_num_neighbors)
    unit_cell = torch.masked_select(
        unit_cell.view(-1, 3), mask_num_neighbors.view(-1, 1).expand(-1, 3)
    )
    unit_cell = unit_cell.view(-1, 3)

    edge_index = torch.stack((index2, index1))

    return edge_index, unit_cell, num_neighbors_image
Esempio n. 23
0
 def expand_mask(mask, block_rows, block_cols):
     mask = torch.repeat_interleave(mask, block_rows, dim=0)
     mask = torch.repeat_interleave(mask, block_cols, dim=1)
     return mask
Esempio n. 24
0
    def cut_lonely_connections(self):
        govs = []
        gov_in = None
        gov_out = None
        do_avg_pool = 0
        for layer, (is_conv,
                    next_is_conv) in lookahead_type(self.model.modules()):
            is_conv = isinstance(layer, nn.Conv2d)
            is_fc = isinstance(layer, nn.Linear)
            is_avgpool = isinstance(layer, nn.AdaptiveAvgPool2d)
            if is_avgpool:
                do_avg_pool = int(np.prod(layer.output_size))
            elif is_conv or is_fc:

                out_dim, in_dim = layer.weight.shape[:2]

                if gov_in is None:

                    gov_in = nn.Parameter(torch.ones(in_dim).to(self.device),
                                          requires_grad=True)
                    govs.append(gov_in)

                else:
                    gov_in = gov_out

                gov_out = nn.Parameter(torch.ones(out_dim).to(self.device),
                                       requires_grad=True)
                govs.append(gov_out)
            # substitute activation function
            if is_fc:
                if do_avg_pool > 0:
                    layer.do_avg_pool = do_avg_pool
                    do_avg_pool = 0
                # layer.forward = types.MethodType(group_snip_forward_linear, layer)
            # if is_conv:
            #     layer.forward = types.MethodType(group_snip_conv2d_forward, layer)
        indices = {}
        idx = 0
        for id, layer in self.model.mask.items():
            if 'conv' in id:
                # input
                input = []
                for i in range(layer.shape[1]):
                    if len(torch.nonzero(layer[:, i, :, :])) == 0:
                        input.append(0)
                    else:
                        input.append(1)
                # output
                output = []
                for i in range(layer.shape[0]):
                    if len(torch.nonzero(layer[i, :, :, :])) == 0:
                        output.append(0)
                    else:
                        output.append(1)
            else:
                # input
                input = []
                for i in range(layer.shape[1]):
                    if len(torch.nonzero(layer[:, i])) == 0:
                        input.append(0)
                    else:
                        input.append(1)
                # output
                output = []
                for i in range(layer.shape[0]):
                    if len(torch.nonzero(layer[i, :])) == 0:
                        output.append(0)
                    else:
                        output.append(1)
            # indices
            indices[(idx, id)] = torch.tensor(input)
            idx += 1
            indices[(idx, id)] = torch.tensor(output)
            idx += 1
        old_key = ()
        old_length = 0
        input = True
        for key, value in indices.items():
            length = len(value)
            # TODO: Handle early in training by resetting the optimizer
            if input == True:
                # breakpoint()
                if length == old_length:
                    indices[old_key] = value.__or__(indices[old_key])
                    indices[key] = value.__or__(indices[old_key])
                elif old_length != 0 and length % old_length == 0 and (
                        'fc' in key[1] or 'classifier' in key[1]):
                    ratio = length // old_length
                    new_indices = torch.repeat_interleave(
                        indices[old_key], ratio)
                    for i in range(old_length):
                        if sum(new_indices[i * ratio:ratio * (i + 1)].__or__(
                                value[i * ratio:ratio * (i + 1)])) == ratio:
                            indices[old_key][i] = 1
                        else:
                            indices[old_key][i] = 0
                    indices[key] = torch.repeat_interleave(
                        indices[old_key], ratio)
            old_length = length
            old_key = key
            input = not input
        self.structured_prune(indices)
        return indices
Esempio n. 25
0
    def forward(self, x, y=None, x_raw=None):
        """
        Add loss for identifying signal in a frame?
        """
        batch_size = x.shape[0]
        n_timesteps = x.shape[1]

        prev_h = torch.zeros(batch_size, self.rnn_dim).to(self.device)
        prev_c = torch.zeros(batch_size, self.rnn_dim).to(self.device)

        if self.use_frame_loss:
            frame_logits = []

        for t in range(n_timesteps):
            x_t = x[:, t, :, :].unsqueeze(1)
            conv_t = self.conv_block(x_t).view(batch_size, -1)
            # conv_t = self.dropout(conv_t)
            proj_t = self.linear_projection(conv_t)
            # proj_t = self.dropout(proj_t)     INCLUDED IN RNN BLOCK
            prev_h, prev_c = self.rnn_block(proj_t, prev_h, prev_c)
            if self.use_frame_loss:
                frame_logit = self.frame_linear(prev_h)
                frame_logits.append(frame_logit)

        logits = self.clf(prev_h)
        # if self.use_frame_loss:
        #     frame_logits = torch.cat(frame_logits, dim=0)

        outputs = (logits, )
        if y is not None:
            loss_fct = CrossEntropyLoss(weight=self.loss_weight)
            loss = loss_fct(logits, y)

            if self.use_frame_loss:
                # print('y.shape: {}'.format(y.shape))
                y_frame = torch.repeat_interleave(y.view(-1, 1),
                                                  n_timesteps,
                                                  dim=1)
                # print('y_frame.shape: {}'.format(y_frame.shape))
                x_frame = x_raw.view(batch_size, n_timesteps, -1)
                # print('x_frame.shape: {}'.format(x_frame.shape))
                x_max = torch.max(x_frame, dim=-1)[0]
                # print('x_max: {}'.format(x_max))
                # print('x_max.shape: {}'.format(x_max.shape))
                y_frame[(y_frame == 1) & (x_max < 10)] = 0
                # print('y_frame: {}'.format(y_frame))
                # print('y_frame.shape: {} mean: {}'.format(y_frame.shape, y_frame.double().mean()))

                frame_logits = torch.cat(frame_logits, dim=0)
                # print('frame_logits.shape: {}'.format(frame_logits.shape))
                frame_logits = frame_logits.view(-1, 2)
                # print('frame_logits.shape: {}'.format(frame_logits.shape))
                y_frame = y_frame.view(-1)
                # print('y_frame.shape: {}'.format(y_frame.shape))
                frame_loss = loss_fct(frame_logits, y_frame)

                loss += self.frame_loss_weight * frame_loss

                # input('okty')

            outputs = (loss, ) + outputs

        return outputs
Esempio n. 26
0
def test_graph_size_norm():
    batch = torch.repeat_interleave(torch.full((10, ), 10, dtype=torch.long))
    norm = GraphSizeNorm()
    out = norm(torch.randn(100, 16), batch)
    assert out.size() == (100, 16)
    def prediction_from_trained_model_beam_Search(self, i, ys, score_1,
                                                  AM_local_scores, beam, hyps,
                                                  gamma, batch_size):
        """
            ####vecotorized beam-search ===>beam search that happens parllelly i.e., 
            1.Each prefix is treated as a invidual sequence when given to the model and the predictions for each prefixes are obtained;
            2.Each prefix has a beam of new possible labels, so each prefix is repeated beam number of times and the new label is concatented so does the likeli-hood score;
            3.the new prefixes are hyps_no*beam are pruned to settle with hyps_no prefixes
            4.Eos threshold is used if any of the predicted labels in the beam is eos
            5. If any of the hypotheis has ended the duplication avoided to increase diverse batches

            #folded accordingly and the beam of new
            """

        if i == 0:

            ###for the first time just repeat the hyps and add the beam to the hyposhesis
            local_best_scores, local_best_ids = torch.topk(AM_local_scores,
                                                           hyps,
                                                           dim=1,
                                                           largest=True,
                                                           sorted=True)
            #---------------------
            present_ids = (local_best_ids[::hyps]).contiguous().view(-1, 1)
            present_scores = (local_best_scores[::hyps]).contiguous().view(
                -1, 1)

            ##for not allow ing eos as first token
            ##first lable cannot be eos
            #-----------------------------------------------------------------------
            mask = torch.eq(present_ids, self.eos_id)

            ys = torch.cat((ys, present_ids), dim=1)
            score_1 = torch.cat((score_1, present_scores), dim=1)
            #-----------------------------------------------------------------------
            mask = torch.eq(present_ids, self.eos_id)
            score_1 = score_1 - mask * 1000

            ###Not corrected ------>should be expanded and selected with selection index,,,,, but model regenerates them with labels in i=>1
        #----------------------------------------------------------------------------
        else:

            #---------------------
            local_best_scores, local_best_ids = torch.topk(AM_local_scores,
                                                           beam,
                                                           dim=1,
                                                           largest=True,
                                                           sorted=True)
            #---------------------
            ###################################################

            ####filtering EOS if EOS has occured with the value leess than the threshold then filtering out
            # ---------EoS threshold--------------------------------
            not_eos_mask = (local_best_ids == self.eos_id)
            ###EOS scores and ids ,Non EOS score and IDs
            ##max of Non Eos in dim=1 by making non-Eos -1000
            ##max of Eos in dim=1 by making non Eos -1000
            ##compute the threshold if [ EOS > gamma * NON_EOS]
            #####filter out using outer product

            NON_EOS_mask, NON_EOS_mask_ids = torch.max(
                local_best_scores * ~not_eos_mask + not_eos_mask * -1000,
                dim=1)
            EOS_mask, EOS_mask_ids = torch.max(
                local_best_scores * not_eos_mask + ~not_eos_mask * -1000,
                dim=1)

            EOS_out = EOS_mask > gamma * NON_EOS_mask

            EOS_SCORE_MASK = (not_eos_mask.transpose(0, 1) *
                              EOS_out).transpose(0, 1)
            local_best_scores = local_best_scores - (not_eos_mask * 1 *
                                                     ~EOS_SCORE_MASK * 1000.0)
            #--------------------------------------------------------
            #repeat the prefixes beam times

            ys_1 = torch.repeat_interleave(ys, beam, 0)
            score_2 = torch.repeat_interleave(score_1, beam, 0)
            #----------------------------------------------------
            present_ids = (local_best_ids).contiguous().view(-1, 1)
            present_scores = (local_best_scores).contiguous().view(-1, 1)
            #----------------------------------------------------
            #concatenate labels and scores to the prefixes
            ys = torch.cat((ys_1, present_ids), dim=1)
            score_1 = torch.cat((score_2, present_scores), dim=1)
            #----------------------------------------------------

            ###fold accordingly to get hyps *beam and prune out the worst hypothisis keeping beam no of prefixes
            pres_acuml_score = torch.cumsum(score_1, dim=1)[:, -1]
            al1, al2 = torch.topk(pres_acuml_score.view(
                batch_size, hyps * beam, 1),
                                  hyps,
                                  dim=1,
                                  largest=True,
                                  sorted=True)
            selecting_index = torch.cat([al2] * ys.size(1), dim=2)
            #----------------------------------------------------
            #----------------------------------------------------
            ###regrouping acording to utterances after selecting top K this is needed for gathering as per topk
            ys = ys.view(batch_size, hyps * beam, -1)
            score_1 = score_1.view(batch_size, hyps * beam, -1)
            #----------------------------------------------------
            ###prunng the output using gather
            #selecting the top labels and scores
            ys = torch.gather(ys, 1, selecting_index)
            score_1 = torch.gather(score_1, 1, selecting_index)

            ###making it ready for next iteration
            ### converting the selected hypothesis per utterances to the seperate hypothesis to process the parallel
            ys = ys.view(batch_size * hyps, -1)
            score_1 = score_1.view(batch_size * hyps, -1)
            ####################################################
            #----------Eos servived the past iteration then it is
            #acccepted EOS so no new labels after EOS, score, should be set to zero otherwise we get bad hypotheis
            if i > 1:
                selected_EOS = torch.eq(ys[:, -2], self.eos_id)
                score_1[:, -1] = score_1[:, -1] * (~selected_EOS)
                ys[:, -1][selected_EOS] = self.eos_id
            #------------------------------
        return ys, score_1
Esempio n. 28
0
 def _repeat_one_sequence(self, x, d):
     """Repeat each frame according to duration for torch 1.1+."""
     return torch.repeat_interleave(x, d, dim=0)
Esempio n. 29
0
    def _apply_texture(
        self,
        videos: "torch.Tensor",
        patch: "torch.Tensor",
        foreground: Optional["torch.Tensor"],
        patch_points: Optional[np.ndarray],
    ) -> "torch.Tensor":
        """
        Apply texture over background and overlay foreground.

        :param videos: Video samples.
        :param patch: Patch to apply.
        :param foreground: Foreground mask.
        :param patch_points: Array of shape (nb_frames, 4, 2) containing four pairs of integers (height, width)
                             corresponding to the coordinates of the four corners top-left, top-right, bottom-right,
                             bottom-left of the transformed image in the coordinate-system of the original image.
        :return: Patched videos.
        """
        import torch  # lgtm [py/repeated-import]
        import torchvision

        nb_samples = videos.shape[0]
        nb_frames = videos.shape[1]
        frame_height = videos.shape[2]
        frame_width = videos.shape[3]

        image_mask = self._get_patch_mask(nb_samples=nb_samples)
        image_mask = image_mask.float()

        patch = patch.float()
        padded_patch = torch.stack([patch] * nb_samples)

        if patch_points is None:
            pad_h_before = self.x_min
            pad_h_after = int(videos.shape[self.i_h + 1] - pad_h_before -
                              image_mask.shape[self.i_h_patch + 1])

            pad_w_before = self.y_min
            pad_w_after = int(videos.shape[self.i_w + 1] - pad_w_before -
                              image_mask.shape[self.i_w_patch + 1])

            image_mask = image_mask.permute(0, 3, 1, 2)

            image_mask = torchvision.transforms.functional.pad(
                img=image_mask,
                padding=[pad_w_before, pad_h_before, pad_w_after, pad_h_after],
                fill=0,
                padding_mode="constant",
            )

            image_mask = image_mask.permute(0, 2, 3, 1)

            image_mask = torch.unsqueeze(image_mask, dim=1)
            image_mask = torch.repeat_interleave(image_mask,
                                                 dim=1,
                                                 repeats=nb_frames)
            image_mask = image_mask.float()

            padded_patch = padded_patch.permute(0, 3, 1, 2)

            padded_patch = torchvision.transforms.functional.pad(
                img=padded_patch,
                padding=[pad_w_before, pad_h_before, pad_w_after, pad_h_after],
                fill=0,
                padding_mode="constant",
            )

            padded_patch = padded_patch.permute(0, 2, 3, 1)

            padded_patch = torch.unsqueeze(padded_patch, dim=1)
            padded_patch = torch.repeat_interleave(padded_patch,
                                                   dim=1,
                                                   repeats=nb_frames)

            padded_patch = padded_patch.float()

        else:

            startpoints = [[0, 0], [frame_width, 0],
                           [frame_width, frame_height], [0, frame_height]]
            endpoints = np.zeros_like(patch_points)
            endpoints[:, :, 0] = patch_points[:, :, 1]
            endpoints[:, :, 1] = patch_points[:, :, 0]

            image_mask = image_mask.permute(0, 3, 1, 2)

            image_mask = torchvision.transforms.functional.resize(
                img=image_mask,
                size=[int(videos.shape[2]),
                      int(videos.shape[3])],
                interpolation=torchvision.transforms.InterpolationMode.
                BILINEAR,
            )

            image_mask_list = []

            for i_frame in range(nb_frames):

                image_mask_i = torchvision.transforms.functional.perspective(
                    img=image_mask,
                    startpoints=startpoints,
                    endpoints=endpoints[i_frame],
                    interpolation=torchvision.transforms.InterpolationMode.
                    BILINEAR,
                    fill=0,
                )

                image_mask_i = image_mask_i.permute(0, 2, 3, 1)

                image_mask_list.append(image_mask_i)

            image_mask = torch.stack(image_mask_list, dim=1)
            image_mask = image_mask.float()

            padded_patch = padded_patch.permute(0, 3, 1, 2)

            padded_patch = torchvision.transforms.functional.resize(
                img=padded_patch,
                size=[int(videos.shape[2]),
                      int(videos.shape[3])],
                interpolation=torchvision.transforms.InterpolationMode.
                BILINEAR,
            )

            padded_patch_list = []

            for i_frame in range(nb_frames):
                padded_patch_i = torchvision.transforms.functional.perspective(
                    img=padded_patch,
                    startpoints=startpoints,
                    endpoints=endpoints[i_frame],
                    interpolation=torchvision.transforms.InterpolationMode.
                    BILINEAR,
                    fill=0,
                )

                padded_patch_i = padded_patch_i.permute(0, 2, 3, 1)

                padded_patch_list.append(padded_patch_i)

            padded_patch = torch.stack(padded_patch_list, dim=1)
            padded_patch = padded_patch.float()

        inverted_mask = (torch.from_numpy(
            np.ones(shape=image_mask.shape, dtype=np.float32)).to(
                self.estimator.device) - image_mask)

        if foreground is not None:
            combined = (videos * inverted_mask + padded_patch * image_mask -
                        padded_patch * ~foreground.bool() +
                        videos * ~foreground.bool() * image_mask)
        else:
            combined = videos * inverted_mask + padded_patch * image_mask

        return combined
Esempio n. 30
0
def update_first_layer_conv(net, layers, layer_index, data_, dtype, scd_args, criterion,
                            target, device):
    layer = layers[layer_index]
    data = data_
    batch_size = scd_args.batch_size
    for idx in np.random.permutation(
            net._modules[layer].weight.shape[0])[:scd_args.updated_nodes]:
        net._modules[layer].bias[idx].zero_()
        # Get the global bias for this batch
        train_loss, global_bias = init_conv(net, data, layer, criterion,
                                          target, dtype, idx, scd_args)

        weights = net._modules[layer].weight
        weight_size = weights.size()[1:]
        n_nodes = weight_size[0] * weight_size[1] * weight_size[2]
        updated_features = min(n_nodes, scd_args.updated_conv_features)
        cords_index = np.random.choice(n_nodes, updated_features, False)
        cords = []
        for i in range(weight_size[0]):
            for j in range(weight_size[1]):
                for k in range(weight_size[2]):
                    cords.append([i, j, k])
        cords = torch.tensor(cords)[cords_index]

        best_w = weights[idx:idx+1].clone()
        w_incs1 = torch.tensor([-1, 1]).type_as(best_w) * scd_args.w_inc1
        if 'si' in layer:
            inc = []
            for i in range(w_incs1.shape[0]):
                w_inc = w_incs1[i]
                w_ = torch.repeat_interleave(
                    best_w, updated_features, dim=0)
                for i in range(updated_features):
                    w_[i, cords[i][0], cords[i][1], cords[i][2]] += w_inc
                inc.append(w_)
            w_ = torch.cat(inc, dim=0)
            del inc
            if scd_args.normalize:
                w_ /= w_.view((updated_features* w_incs1.shape[0], -1)).norm(dim=1).view((-1, 1, 1, 1))
            # w_ = torch.cat([w_, -1.0 * w_], dim=1)
        else:
            w_incs2 = -1

            w_ = torch.repeat_interleave(
                best_w, updated_features, dim=0)
            for i in range(updated_features):
                w_[i, cords[i][0], cords[i][1], cords[i][2]] *= w_incs2

        ic = updated_features * w_incs1.shape[0] if 'si' in layer else \
            updated_features

        temp_module = torch.nn.Conv2d(in_channels=data.size(1), out_channels=ic,
                kernel_size=list(weights.size()[2:]),
            padding=net._modules[layer].padding).to(dtype=dtype, device=device)
        temp_module.weight = nn.Parameter(w_)
        temp_module.bias.zero_()
        temp_module.requires_grad_(False)

        # projection's shape  nrows(1500) * ic(96) * H * W
        projection = temp_module(data)
        del temp_module
        new_projection, bias = update_conv_weight(projection, global_bias,
                                                  scd_args)
        del projection
        n_batch = data_.size(0) // batch_size
        yps = []
        for i in range(n_batch):
            new_projection_batch = new_projection[
                    batch_size * i: batch_size * (i + 1)]
            n_r = new_projection_batch.size(0)  # 1500
            n_w = new_projection_batch.size(1)  # 16
            n_b = new_projection_batch.size(2)  # 20
            height = new_projection_batch.size(3)  # 32
            width = new_projection_batch.size(4)
            new_projection_batch = new_projection_batch.reshape((n_r, n_w * n_b, height, width))
            # new_projection 1500*16*20  bias 16*20
            # original projection feed into next layer
            projection_batch = net(data[batch_size * i: batch_size * (i + 1)], input_=layer, layer=layer+'_projection')

            # replace projection[:, idx] after flatten variations
            projection_batch = torch.repeat_interleave(projection_batch.unsqueeze_(dim=1), n_w*n_b, dim=1)
            projection_batch[:, :, idx] = new_projection_batch
            del new_projection_batch
            projection_batch = projection_batch.transpose_(0, 1).reshape((-1, projection_batch.size(2), height, width))
            yp = net(projection_batch, input_=layer + '_ap').reshape((n_w * n_b, n_r, -1))
            del projection_batch
            yp = yp.transpose_(0, 1).reshape((n_r, n_w, n_b))
            yps.append(yp)
        yps = torch.cat(yps, dim=0)
        loss_group = criterion(yps, target[:n_batch * batch_size].unsqueeze(dim=1))
        loss_group = loss_group.cpu().numpy()
        new_loss = loss_group.min()
        if new_loss <= train_loss:
            row, col = np.unravel_index(loss_group.argmin(), loss_group.shape)
            net._modules[layer].weight[idx] = nn.Parameter(w_[row], requires_grad=False)
            net._modules[layer].bias[idx].fill_(bias[row, col])

        del w_, loss_group, bias
        return min(new_loss, train_loss)