예제 #1
0
 def transform(self, skels):
     
     d_skels = skels[:, 1:, :] - skels[:, :-1, :]
     
     segment_len = (d_skels**2).sum(2).sqrt().mean(1)
     
     angles = torch.atan2(d_skels[:, :, 0], d_skels[:, :, 1])
     
     self._unwrap_t(angles)
     
     mean_angle = angles.mean(1)
     angles -= angles.mean(1).view(-1, 1)
     
     eigenworms = torch.matmul(angles, self.eigen_components.t())
     
     head_coords = skels[:, 0, :].contiguous()
 
     return head_coords, segment_len, mean_angle, eigenworms
예제 #2
0
 def decode(sin_t, cos_t):
     theta = torch.atan2(sin_t.float(), cos_t.float())
     return theta
def points_xyz_to_cylinder(points_xyz):
    points_x, points_y, points_z = torch.unbind(points_xyz, axis=-1)
    points_rho = torch.sqrt(points_x**2 + points_y**2)
    points_phi = torch.atan2(points_y, points_x)
    points_cylinder = torch.stack([points_phi, points_z, points_rho], axis=-1)
    return points_cylinder
예제 #4
0
    def extract_descriptor(self,
                           images):
        """ Main function of this class, which extracts the descriptors from
        a batch of images.

        Args:
            images : list of 2D array of int or float.
                List of images to form the batch. All images must have be
                grayscale (only two dimensions) and its values are assumed
                to be in the interval [0, 255]. All images must have the same
                size.

        Returns:
            (This explanation below is modified from scikit-image).
            descs : 4D array of floats
                Grid of DAISY descriptors for the given image as an array
                dimensionality  (N, R, P, Q) where
                ``N = len(images)``
                ``R = (rings * histograms + 1) * orientations``
                ``P = ceil((M - radius*2) / step)``
                ``Q = ceil((N - radius*2) / step)``
        """
        images = np.stack(images, axis=0)[:, None]
        images = torch.from_numpy(images.astype(np.float32)) / 255.0
        if self.fp16:
            images = images.half()
        else:
            images = images.float()
        if self.cuda:
            images = images.cuda()

        self.batch_size = images.shape[0]
        self.max_batch_size = max(self.max_batch_size, self.batch_size)

        if (self.dx is None or self.dx.shape[0] < self.max_batch_size or
                self.dx.shape[2] != images.shape[2] or
                self.dx.shape[3] != images.shape[3]):
            shape = (self.max_batch_size,) + images.shape[1:]
            self.dx = torch.zeros(shape)
            if self.fp16:
                self.dx = self.dx.half()
            else:
                self.dx = self.dx.float()
            if self.cuda:
                self.dx = self.dx.cuda()
            self.dy = torch.zeros(shape)
            if self.fp16:
                self.dy = self.dy.half()
            else:
                self.dy = self.dy.float()
            if self.cuda:
                self.dy = self.dy.cuda()
        dx = self.dx[:self.batch_size]
        dx[:, :, :, :-1] = (images[:, :, :, 1:] - images[:, :, :, :-1])
        dy = self.dy[:self.batch_size]
        dy[:, :, :-1, :] = (images[:, :, 1:, :] - images[:, :, :-1, :])

        # Compute gradient orientation and magnitude and their contribution
        # to the histograms.
        grad_mag = torch.sqrt(dx ** 2 + dy ** 2)
        grad_ori = torch.atan2(dy, dx)
        hist = torch.exp(self.orientation_kappa * torch.cos(
            grad_ori - self.orientation_angles))
        hist *= grad_mag

        # Smooth orientation histograms for the center and all rings.
        hist_smooth = self._compute_ring_histograms(hist)

        # Assemble descriptor grid.
        theta = np.array([2 * np.pi * j / self.histograms
                          for j in range(self.histograms)])
        desc_dims = (self.rings * self.histograms + 1) * self.orientations
        desc_shape = (self.max_batch_size, desc_dims,
                      images.shape[2] - 2 * self.radius,
                      images.shape[3] - 2 * self.radius)
        if self.descs is None or self.descs.shape != desc_shape:
            self.descs = torch.empty(desc_shape)
            if self.fp16:
                self.descs = self.descs.half()
            else:
                self.descs = self.descs.float()
            if self.cuda:
                self.descs = self.descs.cuda()
        descs = self.descs[:self.batch_size]
        descs[:, :self.orientations, :, :] = hist_smooth[
            :, 0, :, self.radius:-self.radius, self.radius:-self.radius]
        idx = self.orientations
        cos_theta = np.cos(theta)
        sin_theta = np.sin(theta)
        for i in range(self.rings):
            for j in range(self.histograms):
                y_min = self.radius + int(round(
                    self.ring_radii[i] * sin_theta[j]))
                y_max = descs.shape[2] + y_min
                x_min = self.radius + int(round(
                    self.ring_radii[i] * cos_theta[j]))
                x_max = descs.shape[3] + x_min
                # print(i, j, y_min, y_max, x_min, x_max)
                descs[:, idx:idx + self.orientations, :, :] = hist_smooth[
                    :, i + 1, :, y_min:y_max, x_min:x_max]
                idx += self.orientations
        descs = descs[:, :, ::self.step, ::self.step]

        # Normalize descriptors.
        if self.normalization != 'off':
            if self.fp16:
                descs += 1e-3
            else:
                descs += 1e-10
            if self.normalization == 'l1':
                descs /= torch.sum(descs, dim=1, keepdim=True)
            elif self.normalization == 'l2':
                descs /= torch.sqrt(torch.sum(
                    torch.pow(descs, 2), dim=1, keepdim=True))
            elif self.normalization == 'daisy':
                for i in range(0, desc_dims, self.orientations):
                    norms = torch.sqrt(torch.sum(
                        torch.pow(descs[:, i:i + self.orientations], 2),
                        dim=1, keepdim=True))
                    descs[:, i:i + self.orientations] /= norms

        if self.return_numpy:
            descs = descs.detach().cpu().numpy()

        return descs
def main():
    args = parse_args()

    with open('models/pose/%s/config.yml' % args.pose_name, 'r') as f:
        config = yaml.load(f, Loader=yaml.FullLoader)

    print('-' * 20)
    for key in config.keys():
        print('%s: %s' % (key, str(config[key])))
    print('-' * 20)

    cudnn.benchmark = True

    df = pd.read_csv('inputs/train.csv')
    img_ids = df['ImageId'].values
    img_paths = np.array('inputs/train_images/' + df['ImageId'].values +
                         '.jpg')
    mask_paths = np.array('inputs/train_masks/' + df['ImageId'].values +
                          '.jpg')
    labels = np.array(
        [convert_str_to_labels(s) for s in df['PredictionString']])
    with open('outputs/decoded/val/%s.json' % args.det_name, 'r') as f:
        dets = json.load(f)

    if config['rot'] == 'eular':
        num_outputs = 3
    elif config['rot'] == 'trig':
        num_outputs = 6
    elif config['rot'] == 'quat':
        num_outputs = 4
    else:
        raise NotImplementedError

    test_transform = Compose([
        transforms.Resize(config['input_w'], config['input_h']),
        transforms.Normalize(),
        ToTensor(),
    ])

    det_df = {
        'ImageId': [],
        'img_path': [],
        'det': [],
        'mask': [],
    }

    name = '%s_%.2f' % (args.det_name, args.score_th)
    if args.nms:
        name += '_nms%.2f' % args.nms_th

    output_dir = 'processed/pose_images/val/%s' % name
    os.makedirs(output_dir, exist_ok=True)

    df = []
    kf = KFold(n_splits=config['n_splits'], shuffle=True, random_state=41)
    for fold, (train_idx, val_idx) in enumerate(kf.split(img_paths)):
        print('Fold [%d/%d]' % (fold + 1, config['n_splits']))

        # create model
        model = get_pose_model(config['arch'],
                               num_outputs=num_outputs,
                               freeze_bn=config['freeze_bn'])
        model = model.cuda()

        model_path = 'models/pose/%s/model_%d.pth' % (config['name'], fold + 1)
        if not os.path.exists(model_path):
            print('%s is not exists.' % model_path)
            continue
        model.load_state_dict(torch.load(model_path))

        model.eval()

        val_img_ids = img_ids[val_idx]
        val_img_paths = img_paths[val_idx]

        fold_det_df = {
            'ImageId': [],
            'img_path': [],
            'det': [],
            'mask': [],
        }

        for img_id, img_path in tqdm(zip(val_img_ids, val_img_paths),
                                     total=len(val_img_ids)):
            img = cv2.imread(img_path)
            height, width = img.shape[:2]

            det = np.array(dets[img_id])
            det = det[det[:, 6] > args.score_th]
            if args.nms:
                det = nms(det, dist_th=args.nms_th)

            for k in range(len(det)):
                pitch, yaw, roll, x, y, z, score, w, h = det[k]

                fold_det_df['ImageId'].append(img_id)
                fold_det_df['det'].append(det[k])
                output_path = '%s_%d.jpg' % (img_id, k)
                fold_det_df['img_path'].append(output_path)

                x, y = convert_3d_to_2d(x, y, z)
                w *= 1.1
                h *= 1.1
                xmin = int(round(x - w / 2))
                xmax = int(round(x + w / 2))
                ymin = int(round(y - h / 2))
                ymax = int(round(y + h / 2))

                cropped_img = img[ymin:ymax, xmin:xmax]
                if cropped_img.shape[0] > 0 and cropped_img.shape[1] > 0:
                    cv2.imwrite(os.path.join(output_dir, output_path),
                                cropped_img)
                    fold_det_df['mask'].append(1)
                else:
                    fold_det_df['mask'].append(0)

        fold_det_df = pd.DataFrame(fold_det_df)

        test_set = PoseDataset(output_dir + '/' +
                               fold_det_df['img_path'].values,
                               fold_det_df['det'].values,
                               transform=test_transform,
                               masks=fold_det_df['mask'].values)
        test_loader = torch.utils.data.DataLoader(
            test_set,
            batch_size=config['batch_size'],
            shuffle=False,
            num_workers=config['num_workers'],
            # pin_memory=True,
        )

        fold_dets = []
        with torch.no_grad():
            for input, batch_det, mask in tqdm(test_loader,
                                               total=len(test_loader)):
                input = input.cuda()
                batch_det = batch_det.numpy()
                mask = mask.numpy()

                output = model(input)
                output = output.cpu()

                if config['rot'] == 'trig':
                    yaw = torch.atan2(output[..., 1:2], output[..., 0:1])
                    pitch = torch.atan2(output[..., 3:4], output[..., 2:3])
                    roll = torch.atan2(output[..., 5:6], output[..., 4:5])
                    roll = rotate(roll, -np.pi)

                pitch = pitch.cpu().numpy()[:, 0]
                yaw = yaw.cpu().numpy()[:, 0]
                roll = roll.cpu().numpy()[:, 0]

                batch_det[mask, 0] = pitch[mask]
                batch_det[mask, 1] = yaw[mask]
                batch_det[mask, 2] = roll[mask]

                fold_dets.append(batch_det)

        fold_dets = np.vstack(fold_dets)

        fold_det_df['det'] = fold_dets.tolist()
        fold_det_df = fold_det_df.groupby('ImageId')['det'].apply(list)
        fold_det_df = pd.DataFrame({
            'ImageId': fold_det_df.index.values,
            'PredictionString': fold_det_df.values,
        })

        df.append(fold_det_df)
        break
    df = pd.concat(df).reset_index(drop=True)

    for i in tqdm(range(len(df))):
        img_id = df.loc[i, 'ImageId']
        det = np.array(df.loc[i, 'PredictionString'])

        if args.show:
            img = cv2.imread('inputs/train_images/%s.jpg' % img_id)
            img_pred = visualize(img, det)
            plt.imshow(img_pred[..., ::-1])
            plt.show()

        df.loc[i, 'PredictionString'] = convert_labels_to_str(det[:, :7])

    name += '_%s' % args.pose_name

    df.to_csv('outputs/submissions/val/%s.csv' % name, index=False)
예제 #6
0
 def trans_no_rotation(self, state):
     """
     Transform the coordinate to agent-centric.
     Input tuple include robot state tensor and human state tensor.
     robot state tensor is of size (batch_size, number, state_length)(for example 100*1*9)
     human state tensor is of size (batch_size, number, state_length)(for example 100*5*5)
     """
     # for robot
     # 'px', 'py', 'vx', 'vy', 'radius', 'gx', 'gy', 'v_pref', 'theta'
     #  0     1      2     3      4        5     6      7         8
     # for human
     #  'px', 'py', 'vx', 'vy', 'radius'
     #  0     1      2     3      4
     assert len(state[0].shape) == 3
     if state[1] is None:
         robot_state = state[0]
         dx = robot_state[:, :, 5] - robot_state[:, :, 0]
         dy = robot_state[:, :, 6] - robot_state[:, :, 1]
         dx = dx.unsqueeze(1)
         dy = dy.unsqueeze(1)
         radius_r = robot_state[:, :, 4].unsqueeze(1)
         dg = torch.norm(torch.cat([dx, dy], dim=2), 2, dim=2, keepdim=True)
         rot = torch.atan2(dy, dx)
         vx = robot_state[:, :, 2].unsqueeze(1)
         vy = robot_state[:, :, 3].unsqueeze(1)
         v_pref = robot_state[:, :, 7].unsqueeze(1)
         px_r = torch.zeros_like(v_pref)
         py_r = torch.zeros_like(v_pref)
         theta = robot_state[:, :, 8].unsqueeze(1)
         new_robot_state = torch.cat((px_r, py_r, vx, vy, radius_r, dg, rot, v_pref, theta), dim=2)
         new_state = (new_robot_state, None)
         return new_state
     else:
         batch = state[0].shape[0]
         robot_state = state[0]
         human_state = state[1]
         human_num = state[1].shape[1]
         dx = robot_state[:, :, 5] - robot_state[:, :, 0]
         dy = robot_state[:, :, 6] - robot_state[:, :, 1]
         dx = dx.unsqueeze(1)
         dy = dy.unsqueeze(1)
         radius_r = robot_state[:, :, 4].unsqueeze(1)
         dg = torch.norm(torch.cat([dx, dy], dim=2), 2, dim=2, keepdim=True)
         rot = torch.atan2(dy, dx)
         vx = robot_state[:, :, 2].unsqueeze(1)
         vy = robot_state[:, :, 3].unsqueeze(1)
         v_pref = robot_state[:, :, 7].unsqueeze(1)
         px_r = torch.zeros_like(v_pref)
         py_r = torch.zeros_like(v_pref)
         theta = robot_state[:, :, 8].unsqueeze(1)
         new_robot_state = torch.cat((px_r, py_r, vx, vy, radius_r, dg, rot, v_pref, theta), dim=2)
         new_human_state = None
         for i in range(human_num):
             dx1 = human_state[:, i, 0].unsqueeze(1) - robot_state[:, :, 0]
             dy1 = human_state[:, i, 1].unsqueeze(1) - robot_state[:, :, 1]
             dx1 = dx1.unsqueeze(1).reshape((batch, 1, -1))
             dy1 = dy1.unsqueeze(1).reshape((batch, 1, -1))
             vx1 = (human_state[:, i, 2].unsqueeze(1).unsqueeze(2)).reshape((batch, 1, -1))
             vy1 = (human_state[:, i, 3].unsqueeze(1).unsqueeze(2)).reshape((batch, 1, -1))
             radius_h = human_state[:, i, 4].unsqueeze(1).unsqueeze(2)
             cur_human_state = torch.cat((dx1, dy1, vx1, vy1, radius_h), dim=2)
             if new_human_state is None:
                 new_human_state = cur_human_state
             else:
                 new_human_state = torch.cat((new_human_state, cur_human_state), dim=1)
         new_state = (new_robot_state, new_human_state)
         return new_state
예제 #7
0
    def forward(self, inputs, lens=None):
        specs = self.stft(inputs)
        real = specs[:, :self.fft_len // 2 + 1]
        imag = specs[:, self.fft_len // 2 + 1:]
        spec_mags = torch.sqrt(real**2 + imag**2 + 1e-8)
        spec_mags = spec_mags
        spec_phase = torch.atan2(imag, real)
        spec_phase = spec_phase
        cspecs = torch.stack([real, imag], 1)
        cspecs = cspecs[:, :, 1:]
        '''
        means = torch.mean(cspecs, [1,2,3], keepdim=True)
        std = torch.std(cspecs, [1,2,3], keepdim=True )
        normed_cspecs = (cspecs-means)/(std+1e-8)
        out = normed_cspecs
        '''

        out = cspecs
        encoder_out = []

        for idx, layer in enumerate(self.encoder):
            out = layer(out)
            #    print('encoder', out.size())
            encoder_out.append(out)

        batch_size, channels, dims, lengths = out.size()
        out = out.permute(3, 0, 1, 2)
        if self.use_clstm:
            r_rnn_in = out[:, :, :channels // 2]
            i_rnn_in = out[:, :, channels // 2:]
            r_rnn_in = torch.reshape(
                r_rnn_in, [lengths, batch_size, channels // 2 * dims])
            i_rnn_in = torch.reshape(
                i_rnn_in, [lengths, batch_size, channels // 2 * dims])

            r_rnn_in, i_rnn_in = self.enhance([r_rnn_in, i_rnn_in])

            r_rnn_in = torch.reshape(
                r_rnn_in, [lengths, batch_size, channels // 2, dims])
            i_rnn_in = torch.reshape(
                i_rnn_in, [lengths, batch_size, channels // 2, dims])
            out = torch.cat([r_rnn_in, i_rnn_in], 2)

        else:
            # to [L, B, C, D]
            out = torch.reshape(out, [lengths, batch_size, channels * dims])
            out, _ = self.enhance(out)
            out = self.tranform(out)
            out = torch.reshape(out, [lengths, batch_size, channels, dims])

        out = out.permute(1, 2, 3, 0)

        for idx in range(len(self.decoder)):
            out = complex_cat([out, encoder_out[-1 - idx]], 1)
            out = self.decoder[idx](out)
            out = out[..., 1:]
        #    print('decoder', out.size())
        mask_real = out[:, 0]
        mask_imag = out[:, 1]
        mask_real = F.pad(mask_real, [0, 0, 1, 0])
        mask_imag = F.pad(mask_imag, [0, 0, 1, 0])

        if self.masking_mode == 'E':
            mask_mags = (mask_real**2 + mask_imag**2)**0.5
            real_phase = mask_real / (mask_mags + 1e-8)
            imag_phase = mask_imag / (mask_mags + 1e-8)
            mask_phase = torch.atan2(imag_phase, real_phase)

            # mask_mags = torch.clamp_(mask_mags,0,100)
            mask_mags = torch.tanh(mask_mags)
            est_mags = mask_mags * spec_mags
            est_phase = spec_phase + mask_phase
            real = est_mags * torch.cos(est_phase)
            imag = est_mags * torch.sin(est_phase)
        elif self.masking_mode == 'C':
            real, imag = real * mask_real - imag * mask_imag, real * mask_imag + imag * mask_real
        elif self.masking_mode == 'R':
            real, imag = real * mask_real, imag * mask_imag

        out_spec = torch.cat([real, imag], 1)
        out_wav = self.istft(out_spec)

        out_wav = torch.squeeze(out_wav, 1)
        # out_wav = torch.tanh(out_wav)
        # add _ to be a in-place operation
        out_wav = torch.clamp_(out_wav, -1, 1)
        return out_spec, out_wav
예제 #8
0
    def _get_target_single(self, gt_bboxes, gt_labels, gt_bboxes_3d,
                           gt_labels_3d, centers2d, depths, attr_labels,
                           points, regress_ranges, num_points_per_lvl):
        """Compute regression and classification targets for a single image."""
        num_points = points.size(0)
        num_gts = gt_labels.size(0)
        if not isinstance(gt_bboxes_3d, torch.Tensor):
            gt_bboxes_3d = gt_bboxes_3d.tensor.to(gt_bboxes.device)
        if num_gts == 0:
            return gt_labels.new_full((num_points,), self.background_label), \
                   gt_bboxes.new_zeros((num_points, 4)), \
                   gt_labels_3d.new_full(
                       (num_points,), self.background_label), \
                   gt_bboxes_3d.new_zeros((num_points, self.bbox_code_size)), \
                   gt_bboxes_3d.new_zeros((num_points,)), \
                   attr_labels.new_full(
                       (num_points,), self.attr_background_label)

        # change orientation to local yaw
        gt_bboxes_3d[..., 6] = -torch.atan2(
            gt_bboxes_3d[..., 0], gt_bboxes_3d[..., 2]) + gt_bboxes_3d[..., 6]

        areas = (gt_bboxes[:, 2] - gt_bboxes[:, 0]) * (gt_bboxes[:, 3] -
                                                       gt_bboxes[:, 1])
        areas = areas[None].repeat(num_points, 1)
        regress_ranges = regress_ranges[:, None, :].expand(
            num_points, num_gts, 2)
        gt_bboxes = gt_bboxes[None].expand(num_points, num_gts, 4)
        centers2d = centers2d[None].expand(num_points, num_gts, 2)
        gt_bboxes_3d = gt_bboxes_3d[None].expand(num_points, num_gts,
                                                 self.bbox_code_size)
        depths = depths[None, :, None].expand(num_points, num_gts, 1)
        xs, ys = points[:, 0], points[:, 1]
        xs = xs[:, None].expand(num_points, num_gts)
        ys = ys[:, None].expand(num_points, num_gts)

        delta_xs = (xs - centers2d[..., 0])[..., None]
        delta_ys = (ys - centers2d[..., 1])[..., None]
        bbox_targets_3d = torch.cat(
            (delta_xs, delta_ys, depths, gt_bboxes_3d[..., 3:]), dim=-1)

        left = xs - gt_bboxes[..., 0]
        right = gt_bboxes[..., 2] - xs
        top = ys - gt_bboxes[..., 1]
        bottom = gt_bboxes[..., 3] - ys
        bbox_targets = torch.stack((left, top, right, bottom), -1)

        assert self.center_sampling is True, 'Setting center_sampling to '\
            'False has not been implemented for FCOS3D.'
        # condition1: inside a `center bbox`
        radius = self.center_sample_radius
        center_xs = centers2d[..., 0]
        center_ys = centers2d[..., 1]
        center_gts = torch.zeros_like(gt_bboxes)
        stride = center_xs.new_zeros(center_xs.shape)

        # project the points on current lvl back to the `original` sizes
        lvl_begin = 0
        for lvl_idx, num_points_lvl in enumerate(num_points_per_lvl):
            lvl_end = lvl_begin + num_points_lvl
            stride[lvl_begin:lvl_end] = self.strides[lvl_idx] * radius
            lvl_begin = lvl_end

        center_gts[..., 0] = center_xs - stride
        center_gts[..., 1] = center_ys - stride
        center_gts[..., 2] = center_xs + stride
        center_gts[..., 3] = center_ys + stride

        cb_dist_left = xs - center_gts[..., 0]
        cb_dist_right = center_gts[..., 2] - xs
        cb_dist_top = ys - center_gts[..., 1]
        cb_dist_bottom = center_gts[..., 3] - ys
        center_bbox = torch.stack(
            (cb_dist_left, cb_dist_top, cb_dist_right, cb_dist_bottom), -1)
        inside_gt_bbox_mask = center_bbox.min(-1)[0] > 0

        # condition2: limit the regression range for each location
        max_regress_distance = bbox_targets.max(-1)[0]
        inside_regress_range = (
            (max_regress_distance >= regress_ranges[..., 0])
            & (max_regress_distance <= regress_ranges[..., 1]))

        # center-based criterion to deal with ambiguity
        dists = torch.sqrt(torch.sum(bbox_targets_3d[..., :2]**2, dim=-1))
        dists[inside_gt_bbox_mask == 0] = INF
        dists[inside_regress_range == 0] = INF
        min_dist, min_dist_inds = dists.min(dim=1)

        labels = gt_labels[min_dist_inds]
        labels_3d = gt_labels_3d[min_dist_inds]
        attr_labels = attr_labels[min_dist_inds]
        labels[min_dist == INF] = self.background_label  # set as BG
        labels_3d[min_dist == INF] = self.background_label  # set as BG
        attr_labels[min_dist == INF] = self.attr_background_label

        bbox_targets = bbox_targets[range(num_points), min_dist_inds]
        bbox_targets_3d = bbox_targets_3d[range(num_points), min_dist_inds]
        relative_dists = torch.sqrt(
            torch.sum(bbox_targets_3d[..., :2]**2,
                      dim=-1)) / (1.414 * stride[:, 0])
        # [N, 1] / [N, 1]
        centerness_targets = torch.exp(-self.centerness_alpha * relative_dists)

        return labels, bbox_targets, labels_3d, bbox_targets_3d, \
            centerness_targets, attr_labels
예제 #9
0
 def phase(self):
     return torch.atan2(self.imag, self.real)
예제 #10
0
    def backward(ctx, grad_output):
        w_gt, h_gt, w, h = ctx.w_gt, ctx.h_gt, ctx.w, ctx.h

        arc = 8 * (torch.atan2(w_gt, h_gt) - torch.atan2(w, h)) / (np.pi**2)

        return -h_gt * arc, w_gt * arc, h * arc, w * arc
예제 #11
0
    def _get_bboxes_single(self,
                           cls_scores,
                           bbox_preds,
                           dir_cls_preds,
                           attr_preds,
                           centernesses,
                           mlvl_points,
                           input_meta,
                           cfg,
                           rescale=False):
        """Transform outputs for a single batch item into bbox predictions.

        Args:
            cls_scores (list[Tensor]): Box scores for a single scale level
                Has shape (num_points * num_classes, H, W).
            bbox_preds (list[Tensor]): Box energies / deltas for a single scale
                level with shape (num_points * bbox_code_size, H, W).
            dir_cls_preds (list[Tensor]): Box scores for direction class
                predictions on a single scale level with shape \
                (num_points * 2, H, W)
            attr_preds (list[Tensor]): Attribute scores for each scale level
                Has shape (N, num_points * num_attrs, H, W)
            centernesses (list[Tensor]): Centerness for a single scale level
                with shape (num_points, H, W).
            mlvl_points (list[Tensor]): Box reference for a single scale level
                with shape (num_total_points, 2).
            input_meta (dict): Metadata of input image.
            cfg (mmcv.Config): Test / postprocessing configuration,
                if None, test_cfg would be used.
            rescale (bool): If True, return boxes in original image space.

        Returns:
            tuples[Tensor]: Predicted 3D boxes, scores, labels and attributes.
        """
        view = np.array(input_meta['cam2img'])
        scale_factor = input_meta['scale_factor']
        cfg = self.test_cfg if cfg is None else cfg
        assert len(cls_scores) == len(bbox_preds) == len(mlvl_points)
        mlvl_centers2d = []
        mlvl_bboxes = []
        mlvl_scores = []
        mlvl_dir_scores = []
        mlvl_attr_scores = []
        mlvl_centerness = []

        for cls_score, bbox_pred, dir_cls_pred, attr_pred, centerness, \
                points in zip(cls_scores, bbox_preds, dir_cls_preds,
                              attr_preds, centernesses, mlvl_points):
            assert cls_score.size()[-2:] == bbox_pred.size()[-2:]
            scores = cls_score.permute(1, 2, 0).reshape(
                -1, self.cls_out_channels).sigmoid()
            dir_cls_pred = dir_cls_pred.permute(1, 2, 0).reshape(-1, 2)
            dir_cls_score = torch.max(dir_cls_pred, dim=-1)[1]
            attr_pred = attr_pred.permute(1, 2, 0).reshape(-1, self.num_attrs)
            attr_score = torch.max(attr_pred, dim=-1)[1]
            centerness = centerness.permute(1, 2, 0).reshape(-1).sigmoid()

            bbox_pred = bbox_pred.permute(1, 2,
                                          0).reshape(-1,
                                                     sum(self.group_reg_dims))
            bbox_pred = bbox_pred[:, :self.bbox_code_size]
            nms_pre = cfg.get('nms_pre', -1)
            if nms_pre > 0 and scores.shape[0] > nms_pre:
                max_scores, _ = (scores * centerness[:, None]).max(dim=1)
                _, topk_inds = max_scores.topk(nms_pre)
                points = points[topk_inds, :]
                bbox_pred = bbox_pred[topk_inds, :]
                scores = scores[topk_inds, :]
                dir_cls_pred = dir_cls_pred[topk_inds, :]
                centerness = centerness[topk_inds]
                dir_cls_score = dir_cls_score[topk_inds]
                attr_score = attr_score[topk_inds]
            # change the offset to actual center predictions
            bbox_pred[:, :2] = points - bbox_pred[:, :2]
            if rescale:
                bbox_pred[:, :2] /= bbox_pred[:, :2].new_tensor(scale_factor)
            pred_center2d = bbox_pred[:, :3].clone()
            bbox_pred[:, :3] = self.pts2Dto3D(bbox_pred[:, :3], view)
            mlvl_centers2d.append(pred_center2d)
            mlvl_bboxes.append(bbox_pred)
            mlvl_scores.append(scores)
            mlvl_dir_scores.append(dir_cls_score)
            mlvl_attr_scores.append(attr_score)
            mlvl_centerness.append(centerness)

        mlvl_centers2d = torch.cat(mlvl_centers2d)
        mlvl_bboxes = torch.cat(mlvl_bboxes)
        mlvl_dir_scores = torch.cat(mlvl_dir_scores)

        # change local yaw to global yaw for 3D nms
        if mlvl_bboxes.shape[0] > 0:
            dir_rot = limit_period(mlvl_bboxes[..., 6] - self.dir_offset, 0,
                                   np.pi)
            mlvl_bboxes[...,
                        6] = (dir_rot + self.dir_offset +
                              np.pi * mlvl_dir_scores.to(mlvl_bboxes.dtype))

        cam_intrinsic = mlvl_centers2d.new_zeros((4, 4))
        cam_intrinsic[:view.shape[0], :view.shape[1]] = \
            mlvl_centers2d.new_tensor(view)
        mlvl_bboxes[:, 6] = torch.atan2(
            mlvl_centers2d[:, 0] - cam_intrinsic[0, 2],
            cam_intrinsic[0, 0]) + mlvl_bboxes[:, 6]
        mlvl_bboxes_for_nms = xywhr2xyxyr(
            input_meta['box_type_3d'](mlvl_bboxes,
                                      box_dim=self.bbox_code_size,
                                      origin=(0.5, 0.5, 0.5)).bev)

        mlvl_scores = torch.cat(mlvl_scores)
        padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1)
        # remind that we set FG labels to [0, num_class-1] since mmdet v2.0
        # BG cat_id: num_class
        mlvl_scores = torch.cat([mlvl_scores, padding], dim=1)
        mlvl_attr_scores = torch.cat(mlvl_attr_scores)
        mlvl_centerness = torch.cat(mlvl_centerness)
        # no scale_factors in box3d_multiclass_nms
        # Then we multiply it from outside
        mlvl_nms_scores = mlvl_scores * mlvl_centerness[:, None]
        results = box3d_multiclass_nms(mlvl_bboxes, mlvl_bboxes_for_nms,
                                       mlvl_nms_scores, cfg.score_thr,
                                       cfg.max_per_img, cfg, mlvl_dir_scores,
                                       mlvl_attr_scores)
        bboxes, scores, labels, dir_scores, attrs = results
        attrs = attrs.to(labels.dtype)  # change data type to int
        bboxes = input_meta['box_type_3d'](bboxes,
                                           box_dim=self.bbox_code_size,
                                           origin=(0.5, 0.5, 0.5))
        # Note that the predictions use origin (0.5, 0.5, 0.5)
        # Due to the ground truth centers2d are the gravity center of objects
        # v0.10.0 fix inplace operation to the input tensor of cam_box3d
        # So here we also need to add origin=(0.5, 0.5, 0.5)
        if not self.pred_attrs:
            attrs = None

        return bboxes, scores, labels, attrs
예제 #12
0
 def forward(self, output, label):
     return torch.mean(
         torch.atan2((torch.norm(torch.cross(output, label), dim=1)),
                     (torch.sum(output * label, dim=1))) * 180 / np.pi)
예제 #13
0
    def scale(self, scale_x: float, scale_y: float) -> None:
        """
        Scale the rotated box with horizontal and vertical scaling factors
        Note: when scale_factor_x != scale_factor_y,
        the rotated box does not preserve the rectangular shape when the angle
        is not a multiple of 90 degrees under resize transformation.
        Instead, the shape is a parallelogram (that has skew)
        Here we make an approximation by fitting a rotated rectangle to the parallelogram.
        """
        self.tensor[:, 0] *= scale_x
        self.tensor[:, 1] *= scale_y
        theta = self.tensor[:, 4] * math.pi / 180.0
        c = torch.cos(theta)
        s = torch.sin(theta)

        # In image space, y is top->down and x is left->right
        # Consider the local coordintate system for the rotated box,
        # where the box center is located at (0, 0), and the four vertices ABCD are
        # A(-w / 2, -h / 2), B(w / 2, -h / 2), C(w / 2, h / 2), D(-w / 2, h / 2)
        # the midpoint of the left edge AD of the rotated box E is:
        # E = (A+D)/2 = (-w / 2, 0)
        # the midpoint of the top edge AB of the rotated box F is:
        # F(0, -h / 2)
        # To get the old coordinates in the global system, apply the rotation transformation
        # (Note: the right-handed coordinate system for image space is yOx):
        # (old_x, old_y) = (s * y + c * x, c * y - s * x)
        # E(old) = (s * 0 + c * (-w/2), c * 0 - s * (-w/2)) = (-c * w / 2, s * w / 2)
        # F(old) = (s * (-h / 2) + c * 0, c * (-h / 2) - s * 0) = (-s * h / 2, -c * h / 2)
        # After applying the scaling factor (sfx, sfy):
        # E(new) = (-sfx * c * w / 2, sfy * s * w / 2)
        # F(new) = (-sfx * s * h / 2, -sfy * c * h / 2)
        # The new width after scaling tranformation becomes:

        # w(new) = |E(new) - O| * 2
        #        = sqrt[(sfx * c * w / 2)^2 + (sfy * s * w / 2)^2] * 2
        #        = sqrt[(sfx * c)^2 + (sfy * s)^2] * w
        # i.e., scale_factor_w = sqrt[(sfx * c)^2 + (sfy * s)^2]
        #
        # For example,
        # when angle = 0 or 180, |c| = 1, s = 0, scale_factor_w == scale_factor_x;
        # when |angle| = 90, c = 0, |s| = 1, scale_factor_w == scale_factor_y
        self.tensor[:, 2] *= torch.sqrt((scale_x * c)**2 + (scale_y * s)**2)

        # h(new) = |F(new) - O| * 2
        #        = sqrt[(sfx * s * h / 2)^2 + (sfy * c * h / 2)^2] * 2
        #        = sqrt[(sfx * s)^2 + (sfy * c)^2] * h
        # i.e., scale_factor_h = sqrt[(sfx * s)^2 + (sfy * c)^2]
        #
        # For example,
        # when angle = 0 or 180, |c| = 1, s = 0, scale_factor_h == scale_factor_y;
        # when |angle| = 90, c = 0, |s| = 1, scale_factor_h == scale_factor_x
        self.tensor[:, 3] *= torch.sqrt((scale_x * s)**2 + (scale_y * c)**2)

        # The angle is the rotation angle from y-axis in image space to the height
        # vector (top->down in the box's local coordinate system) of the box in CCW.
        #
        # angle(new) = angle_yOx(O - F(new))
        #            = angle_yOx( (sfx * s * h / 2, sfy * c * h / 2) )
        #            = atan2(sfx * s * h / 2, sfy * c * h / 2)
        #            = atan2(sfx * s, sfy * c)
        #
        # For example,
        # when sfx == sfy, angle(new) == atan2(s, c) == angle(old)
        self.tensor[:,
                    4] = torch.atan2(scale_x * s, scale_y * c) * 180 / math.pi
예제 #14
0
def solve_3d_bbox_single(bbox2D, corners, theta_l, calib):
    """
    Input:
        bbox2D: Tensor(4), [x1, y1, x2, y2]
        corners: Tensor(8, 3), aligned corners without rotation
        theta_l: camera direction [-pi, pi]
        calib: calibration metrices in KITTI
    """

    x1, y1, x2, y2 = bbox2D

    # useful calibrations
    P2 = calib['P2']
    R0_rect = torch.eye(4)
    R0_rect[:3, :3] = calib['R0_rect']
    K = torch.matmul(P2, R0_rect)

    # use 2D bbox to estimate global rotation
    theta_ray = torch.atan2(P2[0, 0], (x1 + x2) * 0.5 - P2[0, 2])
    ry = np.pi - theta_ray - theta_l

    Ry_T = torch.tensor([[torch.cos(ry), 0.0, -torch.sin(ry)], [0.0, 1.0, 0.0],
                         [torch.sin(ry), 0.0,
                          torch.cos(ry)]])

    corners = torch.matmul(corners, Ry_T)  # rotated corners

    # adjust front side
    if theta_l >= np.pi / 2.0 and theta_l < np.pi:
        corners = corners[[3, 0, 1, 2, 7, 4, 5, 6]].contiguous()
    elif theta_l >= -np.pi and theta_l < -np.pi / 2.0:
        corners = corners[[2, 3, 0, 1, 6, 7, 4, 5]].contiguous()
    elif theta_l >= -np.pi / 2.0 and theta_l < 0.0:
        corners = corners[[1, 2, 3, 0, 5, 6, 7, 4]].contiguous()

    # start solve constrains
    X = torch.eye(4)
    A = torch.zeros(4, 3)
    b = torch.zeros(4)

    # prepare constrains
    constrains = {}

    # x1 -> 7, 6
    constrains['x1'] = {}

    for i in [7, 6]:
        constrains['x1'][i] = {}
        X[:3, 3] = corners[i]
        K_X = torch.matmul(K, X)
        constrains['x1'][i]['A'] = K_X[0, :3] - x1 * K_X[2, :3]
        constrains['x1'][i]['b'] = x1 * K_X[2, 3] - K_X[0, 3]

    # x2 -> 4, 7
    constrains['x2'] = {}

    for i in [4, 7]:
        constrains['x2'][i] = {}
        X[:3, 3] = corners[i]
        K_X = torch.matmul(K, X)
        constrains['x2'][i]['A'] = K_X[0, :3] - x2 * K_X[2, :3]
        constrains['x2'][i]['b'] = x2 * K_X[2, 3] - K_X[0, 3]

    # y1 -> 4, 5, 6, 7
    constrains['y1'] = {}

    for i in [4, 5, 6, 7]:
        constrains['y1'][i] = {}
        X[:3, 3] = corners[i]
        K_X = torch.matmul(K, X)
        constrains['y1'][i]['A'] = K_X[1, :3] - y1 * K_X[2, :3]
        constrains['y1'][i]['b'] = y1 * K_X[2, 3] - K_X[1, 3]

    # y2 -> 2, 3, 0
    constrains['y2'] = {}

    for i in [2, 3, 0]:
        constrains['y2'][i] = {}
        X[:3, 3] = corners[i]
        K_X = torch.matmul(K, X)
        constrains['y2'][i]['A'] = K_X[1, :3] - y2 * K_X[2, :3]
        constrains['y2'][i]['b'] = y2 * K_X[2, 3] - K_X[1, 3]

    # solving linear functions
    error = float('inf')

    # case 1: only see front side
    A[0] = constrains['x1'][7]['A']
    b[0] = constrains['x1'][7]['b']
    A[1] = constrains['x2'][4]['A']
    b[1] = constrains['x2'][4]['b']

    for i in [3, 0]:
        A[2] = constrains['y2'][i]['A']
        b[2] = constrains['y2'][i]['b']
        for j in [4, 5, 6, 7]:
            A[3] = constrains['y1'][j]['A']
            b[3] = constrains['y1'][j]['b']

            trans_t = torch.matmul(torch.pinverse(A), b)
            error_t = torch.norm(torch.matmul(A, trans_t) - b)

            if error_t < error:
                trans = trans_t
                error = error_t

    # case 2: see both front side and lateral side
    A[0] = constrains['x1'][6]['A']
    b[0] = constrains['x1'][6]['b']

    for i in [2, 3, 0]:
        A[2] = constrains['y2'][i]['A']
        b[2] = constrains['y2'][i]['b']
        for j in [4, 5, 6, 7]:
            A[3] = constrains['y1'][j]['A']
            b[3] = constrains['y1'][j]['b']

            trans_t = torch.matmul(torch.pinverse(A), b)
            error_t = torch.norm(torch.matmul(A, trans_t) - b)

            if error_t < error:
                trans = trans_t
                error = error_t

    # case 2: only see lateral side
    A[1] = constrains['x2'][7]['A']
    b[1] = constrains['x2'][7]['b']

    for i in [2, 3]:
        A[2] = constrains['y2'][i]['A']
        b[2] = constrains['y2'][i]['b']
        for j in [4, 5, 6, 7]:
            A[3] = constrains['y1'][j]['A']
            b[3] = constrains['y1'][j]['b']

            trans_t = torch.matmul(torch.pinverse(A), b)
            error_t = torch.norm(torch.matmul(A, trans_t) - b)

            if error_t < error:
                trans = trans_t
                error = error_t

    return trans
예제 #15
0
def R2euler(R):
    return stackify((torch.atan2(R[2, 1], R[2, 2]),
                     torch.atan2(-R[2, 0],
                                 torch.sqrt(R[0, 0]**2 + R[1, 0]**2)),
                     torch.atan2(R[1, 0], R[0, 0])))
예제 #16
0
def cubic_spline(
    inputs,
    unnormalized_widths,
    unnormalized_heights,
    unnorm_derivatives_left,
    unnorm_derivatives_right,
    inverse=False,
    left=0.0,
    right=1.0,
    bottom=0.0,
    top=1.0,
    min_bin_width=DEFAULT_MIN_BIN_WIDTH,
    min_bin_height=DEFAULT_MIN_BIN_HEIGHT,
    eps=DEFAULT_EPS,
    quadratic_threshold=DEFAULT_QUADRATIC_THRESHOLD,
):
    """
    References:
    > Blinn, J. F. (2007). How to solve a cubic equation, part 5: Back to numerics. IEEE Computer
    Graphics and Applications, 27(3):78–89.
    """
    if torch.min(inputs) < left or torch.max(inputs) > right:
        raise InputOutsideDomain()

    num_bins = unnormalized_widths.shape[-1]

    if min_bin_width * num_bins > 1.0:
        raise ValueError("Minimal bin width too large for the number of bins")
    if min_bin_height * num_bins > 1.0:
        raise ValueError("Minimal bin height too large for the number of bins")

    if inverse:
        inputs = (inputs - bottom) / (top - bottom)
    else:
        inputs = (inputs - left) / (right - left)

    widths = F.softmax(unnormalized_widths, dim=-1)
    widths = min_bin_width + (1 - min_bin_width * num_bins) * widths

    cumwidths = torch.cumsum(widths, dim=-1)
    cumwidths[..., -1] = 1
    cumwidths = F.pad(cumwidths, pad=(1, 0), mode="constant", value=0.0)

    heights = F.softmax(unnormalized_heights, dim=-1)
    heights = min_bin_height + (1 - min_bin_height * num_bins) * heights

    cumheights = torch.cumsum(heights, dim=-1)
    cumheights[..., -1] = 1
    cumheights = F.pad(cumheights, pad=(1, 0), mode="constant", value=0.0)

    slopes = heights / widths
    min_something_1 = torch.min(torch.abs(slopes[..., :-1]),
                                torch.abs(slopes[..., 1:]))
    min_something_2 = (0.5 * (widths[..., 1:] * slopes[..., :-1] +
                              widths[..., :-1] * slopes[..., 1:]) /
                       (widths[..., :-1] + widths[..., 1:]))
    min_something = torch.min(min_something_1, min_something_2)

    derivatives_left = (torch.sigmoid(unnorm_derivatives_left) * 3 *
                        slopes[..., 0][..., None])
    derivatives_right = (torch.sigmoid(unnorm_derivatives_right) * 3 *
                         slopes[..., -1][..., None])

    derivatives = min_something * (torch.sign(slopes[..., :-1]) +
                                   torch.sign(slopes[..., 1:]))
    derivatives = torch.cat([derivatives_left, derivatives, derivatives_right],
                            dim=-1)

    a = (derivatives[..., :-1] + derivatives[..., 1:] -
         2 * slopes) / widths.pow(2)
    b = (3 * slopes - 2 * derivatives[..., :-1] -
         derivatives[..., 1:]) / widths
    c = derivatives[..., :-1]
    d = cumheights[..., :-1]

    if inverse:
        bin_idx = torchutils.searchsorted(cumheights, inputs)[..., None]
    else:
        bin_idx = torchutils.searchsorted(cumwidths, inputs)[..., None]

    inputs_a = a.gather(-1, bin_idx)[..., 0]
    inputs_b = b.gather(-1, bin_idx)[..., 0]
    inputs_c = c.gather(-1, bin_idx)[..., 0]
    inputs_d = d.gather(-1, bin_idx)[..., 0]

    input_left_cumwidths = cumwidths.gather(-1, bin_idx)[..., 0]
    input_right_cumwidths = cumwidths.gather(-1, bin_idx + 1)[..., 0]

    if inverse:
        # Modified coefficients for solving the cubic.
        inputs_b_ = (inputs_b / inputs_a) / 3.0
        inputs_c_ = (inputs_c / inputs_a) / 3.0
        inputs_d_ = (inputs_d - inputs) / inputs_a

        delta_1 = -inputs_b_.pow(2) + inputs_c_
        delta_2 = -inputs_c_ * inputs_b_ + inputs_d_
        delta_3 = inputs_b_ * inputs_d_ - inputs_c_.pow(2)

        discriminant = 4.0 * delta_1 * delta_3 - delta_2.pow(2)

        depressed_1 = -2.0 * inputs_b_ * delta_1 + delta_2
        depressed_2 = delta_1

        three_roots_mask = (
            discriminant >= 0
        )  # Discriminant == 0 might be a problem in practice.
        one_root_mask = discriminant < 0

        outputs = torch.zeros_like(inputs)

        # Deal with one root cases.

        p = torchutils.cbrt((-depressed_1[one_root_mask] +
                             torch.sqrt(-discriminant[one_root_mask])) / 2.0)
        q = torchutils.cbrt((-depressed_1[one_root_mask] -
                             torch.sqrt(-discriminant[one_root_mask])) / 2.0)

        outputs[one_root_mask] = ((p + q) - inputs_b_[one_root_mask] +
                                  input_left_cumwidths[one_root_mask])

        # Deal with three root cases.

        theta = torch.atan2(torch.sqrt(discriminant[three_roots_mask]),
                            -depressed_1[three_roots_mask])
        theta /= 3.0

        cubic_root_1 = torch.cos(theta)
        cubic_root_2 = torch.sin(theta)

        root_1 = cubic_root_1
        root_2 = -0.5 * cubic_root_1 - 0.5 * math.sqrt(3) * cubic_root_2
        root_3 = -0.5 * cubic_root_1 + 0.5 * math.sqrt(3) * cubic_root_2

        root_scale = 2 * torch.sqrt(-depressed_2[three_roots_mask])
        root_shift = (-inputs_b_[three_roots_mask] +
                      input_left_cumwidths[three_roots_mask])

        root_1 = root_1 * root_scale + root_shift
        root_2 = root_2 * root_scale + root_shift
        root_3 = root_3 * root_scale + root_shift

        root1_mask = ((input_left_cumwidths[three_roots_mask] - eps) <
                      root_1).float()
        root1_mask *= (
            root_1 < (input_right_cumwidths[three_roots_mask] + eps)).float()

        root2_mask = ((input_left_cumwidths[three_roots_mask] - eps) <
                      root_2).float()
        root2_mask *= (
            root_2 < (input_right_cumwidths[three_roots_mask] + eps)).float()

        root3_mask = ((input_left_cumwidths[three_roots_mask] - eps) <
                      root_3).float()
        root3_mask *= (
            root_3 < (input_right_cumwidths[three_roots_mask] + eps)).float()

        roots = torch.stack([root_1, root_2, root_3], dim=-1)
        masks = torch.stack([root1_mask, root2_mask, root3_mask], dim=-1)
        mask_index = torch.argsort(masks, dim=-1,
                                   descending=True)[..., 0][..., None]
        outputs[three_roots_mask] = torch.gather(roots,
                                                 dim=-1,
                                                 index=mask_index).view(-1)

        # Deal with a -> 0 (almost quadratic) cases.

        quadratic_mask = inputs_a.abs() < quadratic_threshold
        a = inputs_b[quadratic_mask]
        b = inputs_c[quadratic_mask]
        c = inputs_d[quadratic_mask] - inputs[quadratic_mask]
        alpha = (-b + torch.sqrt(b.pow(2) - 4 * a * c)) / (2 * a)
        outputs[quadratic_mask] = alpha + input_left_cumwidths[quadratic_mask]

        shifted_outputs = outputs - input_left_cumwidths
        logabsdet = -torch.log((3 * inputs_a * shifted_outputs.pow(2) +
                                2 * inputs_b * shifted_outputs + inputs_c))
    else:
        shifted_inputs = inputs - input_left_cumwidths
        outputs = (inputs_a * shifted_inputs.pow(3) +
                   inputs_b * shifted_inputs.pow(2) +
                   inputs_c * shifted_inputs + inputs_d)

        logabsdet = torch.log((3 * inputs_a * shifted_inputs.pow(2) +
                               2 * inputs_b * shifted_inputs + inputs_c))

    if inverse:
        outputs = outputs * (right - left) + left
    else:
        outputs = outputs * (top - bottom) + bottom

    return outputs, logabsdet
예제 #17
0
파일: rsan.py 프로젝트: cwghnu/asteroid
def mag_phase(complex_tensor):
    mag = (complex_tensor.pow(2.0).sum(-1) + 1e-8).pow(0.5 * 1.0)
    phase = torch.atan2(complex_tensor[..., 1], complex_tensor[..., 0])
    return mag, phase
예제 #18
0
 def phase(x):
     phase = torch.atan2(x[..., 0], x[..., 1])
     return phase
예제 #19
0
 def rotate(self, state):
     """
     Transform the coordinate to agent-centric.
     Input tuple include robot state tensor and human state tensor.
     robot state tensor is of size (batch_size, number, state_length)(for example 100*1*9)
     human state tensor is of size (batch_size, number, state_length)(for example 100*5*5)
     """
     # for robot
     # 'px', 'py', 'vx', 'vy', 'radius', 'gx', 'gy', 'v_pref', 'theta'
     #  0     1      2     3      4        5     6      7         8
     # for human
     #  'px', 'py', 'vx', 'vy', 'radius'
     #  0     1      2     3      4
     assert len(state[0].shape) == 3
     if len(state[1].shape) == 3:
         batch = state[0].shape[0]
         robot_state = state[0]
         human_state = state[1]
         human_num = state[1].shape[1]
         dx = robot_state[:, :, 5] - robot_state[:, :, 0]
         dy = robot_state[:, :, 6] - robot_state[:, :, 1]
         dx = dx.unsqueeze(1)
         dy = dy.unsqueeze(1)
         dg = torch.norm(torch.cat([dx, dy], dim=2), 2, dim=2, keepdim=True)
         rot = torch.atan2(dy, dx)
         cos_rot = torch.cos(rot)
         sin_rot = torch.sin(rot)
         transform_matrix = torch.cat((cos_rot, -sin_rot, sin_rot, cos_rot), dim=1).reshape(batch, 2, 2)
         robot_velocities = torch.bmm(robot_state[:, :, 2:4], transform_matrix)
         radius_r = robot_state[:, :, 4].unsqueeze(1)
         v_pref = robot_state[:, :, 7].unsqueeze(1)
         target_heading = torch.zeros_like(radius_r)
         pos_r = torch.zeros_like(robot_velocities)
         cur_heading = (robot_state[:, :, 8].unsqueeze(1) - rot + np.pi) % (2 * np.pi) - np.pi
         new_robot_state = torch.cat((pos_r, robot_velocities, radius_r, dg, target_heading, v_pref, cur_heading), dim=2)
         human_positions = human_state[:, :, 0:2] - robot_state[:, :, 0:2]
         human_positions = torch.bmm(human_positions, transform_matrix)
         human_velocities = human_state[:, :, 2:4]
         human_velocities = torch.bmm(human_velocities, transform_matrix)
         human_radius = human_state[:, :, 4].unsqueeze(2) + 0.3
         new_human_state = torch.cat((human_positions, human_velocities, human_radius), dim=2)
         new_state = (new_robot_state, new_human_state)
         return new_state
     else:
         batch = state[0].shape[0]
         robot_state = state[0]
         dx = robot_state[:, :, 5] - robot_state[:, :, 0]
         dy = robot_state[:, :, 6] - robot_state[:, :, 1]
         dx = dx.unsqueeze(1)
         dy = dy.unsqueeze(1)
         radius_r = robot_state[:, :, 4].unsqueeze(1)
         dg = torch.norm(torch.cat([dx, dy], dim=2), 2, dim=2, keepdim=True)
         rot = torch.atan2(dy, dx)
         cos_rot = torch.cos(rot)
         sin_rot = torch.sin(rot)
         vx = (robot_state[:, :, 2].unsqueeze(1) * cos_rot +
               robot_state[:, :, 3].unsqueeze(1) * sin_rot).reshape((batch, 1, -1))
         vy = (robot_state[:, :, 3].unsqueeze(1) * cos_rot -
               robot_state[:, :, 2].unsqueeze(1) * sin_rot).reshape((batch, 1, -1))
         v_pref = robot_state[:, :, 7].unsqueeze(1)
         theta = robot_state[:, :, 8].unsqueeze(1)
         px_r = torch.zeros_like(v_pref)
         py_r = torch.zeros_like(v_pref)
         new_robot_state = torch.cat((px_r, py_r, vx, vy, radius_r, dg, rot, v_pref, theta), dim=2)
         new_state = (new_robot_state, None)
         return new_state
예제 #20
0
 def get_angle(v1, v2):
     return torch.atan2(
         torch.cross(v1, v2, dim=1).norm(p=2, dim=1), (v1 * v2).sum(dim=1))
예제 #21
0
 def _homography_joint_svd(
     self,
     top_corners: torch.Tensor,  # in [-1, 1]
     bottom_corners: torch.Tensor,  # in [-1, 1]
     floor_z: float = -1.6,
     ceil_z: float = 1.6,
 ):
     b, N, _ = top_corners.size()
     floor_u = bottom_corners[:, :, 0] * np.pi
     floor_v = bottom_corners[:, :, 1] * (-0.5 * np.pi)
     floor_c = floor_z / torch.tan(floor_v)
     floor_x = floor_c * torch.sin(floor_u)
     floor_y = -floor_c * torch.cos(floor_u)
     floor_xy = torch.stack([floor_x, floor_y], dim=-1)
     floor_scale = self._get_scale_all(floor_xy)
     floor_scale = floor_scale / 2.0
     floor_ceil_c = torch.linalg.norm(floor_xy, ord=2, dim=-1)
     floor_ceil_v = top_corners[:, :, 1] * (-0.5 * np.pi)
     floor_ceil_z = (floor_ceil_c * torch.tan(floor_ceil_v)).mean(
         dim=1, keepdim=True)
     floor_ceil_z = floor_ceil_z.unsqueeze(1).expand(b, 4, 1).contiguous()
     ceil_u_t = top_corners[:, :, 0] * np.pi
     ceil_v_t = top_corners[:, :, 1] * (-0.5 * np.pi)
     ceil_c = ceil_z / torch.tan(ceil_v_t)
     ceil_x = ceil_c * torch.sin(ceil_u_t)
     ceil_y = -ceil_c * torch.cos(ceil_u_t)
     ceil_xy = torch.stack([ceil_x, ceil_y], dim=-1)
     ceil_floor_c = torch.linalg.norm(ceil_xy, ord=2, dim=-1)
     ceil_v_b = bottom_corners[:, :, 1] * (-0.5 * np.pi)
     ceil_floor_z = (ceil_floor_c * torch.tan(ceil_v_b)).mean(dim=1,
                                                              keepdim=True)
     fix_ceil = -ceil_z / ceil_floor_z
     ceil_z_fix = ceil_z * fix_ceil
     ceil_z_fix = ceil_z_fix.unsqueeze(1).expand(b, 4, 1).contiguous()
     ceil_floor_fixed_c = ceil_z_fix.squeeze(-1) / torch.tan(ceil_v_t)
     ceil_x = ceil_floor_fixed_c * torch.sin(ceil_u_t)
     ceil_y = -ceil_floor_fixed_c * torch.cos(ceil_u_t)
     ceil_xy = torch.stack([ceil_x, ceil_y], dim=-1)
     ceil_scale = self._get_scale_all(ceil_xy)
     ceil_scale = ceil_scale / 2.0
     joint_xy = 0.5 * (floor_xy + ceil_xy)
     joint_scale = 0.5 * (floor_scale + ceil_scale)
     joint_centroid = joint_xy.mean(dim=1)
     joint_xy = joint_xy - joint_centroid.unsqueeze(1)
     inds = torch.sort(
         torch.atan2(joint_xy[..., 0], joint_xy[..., 1] + 1e-12))[1]
     axes = self.cuboid_axes[:, inds.squeeze(), :]
     homography = kornia.get_perspective_transform(joint_xy, axes)
     homogeneous = torch.cat(
         [joint_xy, torch.ones_like(joint_xy[..., -1:])], dim=2)
     xformed = (homography @ homogeneous.transpose(1, 2)).transpose(1, 2)
     xformed = xformed[:, :, :2] / xformed[:, :, 2].unsqueeze(-1)
     rect_joint_xy = xformed * joint_scale.unsqueeze(
         1) + joint_centroid.unsqueeze(1)
     original_xy = joint_xy + joint_centroid.unsqueeze(1)
     R, t, s = self._svd(rect_joint_xy, original_xy[:, inds.squeeze(), :])
     rect_joint_xy = self._transform_points(rect_joint_xy, R, t, s)
     bottom_points = torch.cat(
         [rect_joint_xy, floor_z * torch.ones_like(floor_c.unsqueeze(-1))],
         dim=-1)
     top_points = torch.cat([rect_joint_xy, ceil_z_fix], dim=-1)
     return top_points, bottom_points
예제 #22
0
def extract_ampl_phase(fft_im):
    # fft_im: size should be bx3xhxwx2
    fft_amp = fft_im[:, :, :, :, 0]**2 + fft_im[:, :, :, :, 1]**2
    fft_amp = torch.sqrt(fft_amp)
    fft_pha = torch.atan2(fft_im[:, :, :, :, 1], fft_im[:, :, :, :, 0])
    return fft_amp, fft_pha
예제 #23
0
def xyz2uv(xy, z=-1):
    c = torch.sqrt((xy**2).sum(1))
    u = torch.atan2(xy[:, 1], xy[:, 0]).view(-1, 1)
    v = torch.atan2(torch.zeros_like(c) + z, c).view(-1, 1)
    return torch.cat([u, v], dim=1)
예제 #24
0
def getCouzinModelDir_vector_torch(diff, angles, r_r, r_s, viewingzone):
    # this is a partially optimized way to calculate the desired direction.  It is "partially optimized", because some of the things below could probably be better - the baseline performance is about the same as the naiive cpu implementation.  However, with this, the scaling with larger number of neighbors is much better - adding a larger social zone (and therefore more neighbors), does not change the running time, whereas it does significantly for the regular version
    ntorch = torch.from_numpy(diff)
    viewtest = torch.from_numpy(np.cos([viewingzone]))
    anglestorch = torch.from_numpy(angles[:, None])

    if viewingzone < np.pi:
        viewneighbors = torch.cos(
            anglestorch -
            torch.atan2(ntorch[:, :, 1], ntorch[:, :, 0])) > viewtest
    else:
        viewneighbors = torch.ones(diff.shape[0:2])
        viewneighbors = viewneighbors.type(torch.uint8)

    # this wasn't any faster.
#     calc = torch.cos(anglestorch-torch.atan2(ntorch[:,:,1],ntorch[:,:,0]))
#     viewneighbors = calc.ge(viewtest)

    repzone = viewneighbors & (ntorch[:, :, 2] <= r_r) & (ntorch[:, :, 2] > 0)
    socialzone = viewneighbors & (np.logical_not(repzone)) & (
        ntorch[:, :, 2] <= r_s) & (ntorch[:, :, 2] > 0)

    userep = torch.sum(repzone, 1) > 0
    usesocial = np.logical_not(userep) & (torch.sum(socialzone, 1) > 0)

    repzonedouble = repzone.type(torch.DoubleTensor)
    socialzonedouble = socialzone.type(torch.DoubleTensor)

    xrep = ntorch[:, :, 0] * repzonedouble
    yrep = ntorch[:, :, 1] * repzonedouble
    distrep = ntorch[:, :, 2][repzone]
    xrep[repzone] = xrep[repzone] / distrep
    yrep[repzone] = yrep[repzone] / distrep
    rx = -torch.sum(xrep, 1)
    ry = -torch.sum(yrep, 1)
    rx = rx[userep]
    ry = ry[userep]
    rnorm = torch.sqrt(rx**2 + ry**2)
    rx, ry = rx / rnorm, ry / rnorm

    xsoc = ntorch[:, :, 0] * socialzonedouble
    ysoc = ntorch[:, :, 1] * socialzonedouble
    distsoc = ntorch[:, :, 2][socialzone]
    xsoc[socialzone] = xsoc[socialzone] / distsoc
    ysoc[socialzone] = ysoc[socialzone] / distsoc
    ax, ay = torch.sum(xsoc, 1), torch.sum(ysoc, 1)
    ax = ax[usesocial]
    ay = ay[usesocial]

    vxsoc = ntorch[:, :, 3] * socialzonedouble
    vysoc = ntorch[:, :, 4] * socialzonedouble
    vnorm = torch.sqrt(vxsoc[socialzone]**2 + vysoc[socialzone]**2)
    vxsoc[socialzone] = vxsoc[socialzone] / vnorm
    vysoc[socialzone] = vysoc[socialzone] / vnorm
    ox, oy = torch.sum(vxsoc, 1), torch.sum(vysoc, 1)
    ox = ox[usesocial]
    oy = oy[usesocial]
    sx, sy = ax + ox, ay + oy
    snorm = torch.sqrt(sx**2 + sy**2)
    sx = sx / snorm
    sy = sy / snorm

    newdirs = torch.zeros((len(diff), 2))
    newdirs = newdirs.type(torch.DoubleTensor)
    newdirs[userep, 0] = rx
    newdirs[userep, 1] = ry
    newdirs[usesocial, 0] = sx
    newdirs[usesocial, 1] = sy

    return newdirs.data.numpy()
예제 #25
0
    def _predict(self):
        # This is just _build_network in tf-faster-rcnn
        torch.backends.cudnn.benchmark = False
        net_conv = self._image_to_head()

        # build the anchors for the image
        self._anchor_component(net_conv.size(2), net_conv.size(3))

        rois = self._region_proposal(net_conv)
        if cfg.POOLING_MODE == 'align':
            pool5 = self._roi_align_layer(net_conv, rois)
        else:
            pool5 = self._roi_pool_layer(net_conv, rois)

        if self._mode == 'TRAIN':
            torch.backends.cudnn.benchmark = True  # benchmark because now the input size are fixed
        fc7 = self._head_to_tail(pool5)

        cls_prob, bbox_pred = self._region_classification(fc7)

        # print("pool5 = {}".format(pool5.shape))
        # print("fc7 = {}".format(fc7.shape))
        # print("rois = {}".format(rois.shape))
        # print("bbox_pred = {}".format(bbox_pred.shape))
        # print("bbox_pred_net = {}".format(self.bbox_pred_net.weight))

        # return rois, cls_prob, bbox_pred

        num_rois = rois.shape[0]
        z = self.relation_fc_1(fc7)
        z = F.relu(self.relation_fc_2(z))
        eps = torch.mm(z, z.t())
        _, indices = torch.topk(eps, k=32, dim=0)

        cls_w = self.cls_score_net.weight
        represent = torch.mm(cls_prob, cls_w)
        # print("cls_w = {}, cls_prob = {}, represent = {}".format(cls_w.shape, cls_prob.shape, represent.shape))

        cls_pred = torch.max(cls_prob, 1)[1]
        bbox_pred_reshape = bbox_pred.view(-1, 1001, 4)
        bbox_pred_cls = torch.zeros(num_rois, 4)
        for i, cls in enumerate(cls_pred):
            bbox_pred_cls[i] = bbox_pred_reshape[i][cls]
        bbox_pred_ctr = bbox_pred_cls[:, 0:2] + bbox_pred_cls[:, 2:4]

        relation = torch.empty(2, 32 * num_rois,
                               dtype=torch.long).to(self._device)
        # U = torch.empty(32*128, 2).to(self._device)

        relation[0] = torch.Tensor(list(range(num_rois)) *
                                   32)  # , type=torch.long)
        relation[1] = indices.view(-1)

        # print("relation[0] = {}".format(relation[0]))
        # print("bbox_pred_ctr ={}".format(bbox_pred_ctr[relation[0]]))
        coord_i = bbox_pred_ctr[relation[0]]
        coord_j = bbox_pred_ctr[relation[1]]
        # print("coord_i = {}, coord_j= {}".format(coord_i.shape, coord_j.shape))
        d = torch.sqrt((coord_i[:, 0] - coord_j[:, 0])**2 +
                       (coord_i[:, 1] - coord_j[:, 1])**2)
        theta = torch.atan2((coord_j[:, 1] - coord_i[:, 1]),
                            (coord_j[:, 0] - coord_i[:, 0]))
        U = torch.stack([d, theta], dim=1).to(self._device)

        # print("represent = {} ".format(represent.data))
        # print("relation = {} ".format(relation.data))
        # print("U = {} ".format(U.data))

        f = self.gaussian(represent, relation, U)
        f2 = F.relu(self.sg_conv_1(f))
        h = F.relu(self.sg_conv_2(f2))

        # print("fc7 = {}, h = {}".format(fc7.shape, h.shape))
        new_f = torch.cat([fc7, h], dim=1)
        # print("fc7 = {}, h = {}, new_f = {}".format(fc7.shape, h.shape, new_f.shape))
        new_cls_prob, new_bbox_pred = self._new_region_classification(new_f)

        for k in self._predictions.keys():
            self._score_summaries[k] = self._predictions[k]

        # print("rois = {}, new_cls_prob = {}, new_bbox_pred = {}".format(rois.shape, new_cls_prob.shape, new_bbox_pred.shape))

        return rois, new_cls_prob, new_bbox_pred
예제 #26
0
def single_step_euler(ode_params, x_curr, y_curr, z_curr, t_curr, input_params,
                      device_name):

    h = ode_params.h
    A = ode_params.A
    f2 = ode_params.f2
    rrpc = ode_params.rrpc.float()

    a_p = input_params[0]
    a_q = input_params[3]
    a_r = input_params[6]
    a_s = input_params[9]
    a_t = input_params[12]

    b_p = input_params[1]
    b_q = input_params[4]
    b_r = input_params[7]
    b_s = input_params[10]
    b_t = input_params[13]

    theta_p = input_params[2]
    theta_q = input_params[5]
    theta_r = input_params[8]
    theta_s = input_params[11]
    theta_t = input_params[14]

    alpha = 1 - (x_curr * x_curr + y_curr * y_curr)**0.5
    cast = (t_curr / h).type(torch.IntTensor)
    tensor_temp = 1 + cast
    tensor_temp = tensor_temp % len(rrpc)
    # print(tensor_temp.numpy())
    # print(len(rrpc))
    #tensor_temp = tf.reshape(tensor_temp, [])
    if rrpc[tensor_temp] == 0:
        print("***inside zero***")
        omega = (2.0 * math.pi / 1e-3)
        # omega = torch.tensor(math.inf).to(device_name)
    else:
        omega = (2.0 * math.pi / rrpc[tensor_temp]).to(device_name)

    d_x_d_t_next = alpha * x_curr - omega * y_curr

    d_y_d_t_next = alpha * y_curr + omega * x_curr

    theta = torch.atan2(y_curr, x_curr)
    delta_theta_p = torch.fmod(theta - theta_p, 2 * math.pi)
    delta_theta_q = torch.fmod(theta - theta_q, 2 * math.pi)
    delta_theta_r = torch.fmod(theta - theta_r, 2 * math.pi)
    delta_theta_s = torch.fmod(theta - theta_s, 2 * math.pi)
    delta_theta_t = torch.fmod(theta - theta_t, 2 * math.pi)

    z_p = a_p * delta_theta_p * \
           torch.exp((- delta_theta_p * delta_theta_p / (2 * b_p * b_p)))

    z_q = a_q * delta_theta_q * \
           torch.exp((- delta_theta_q * delta_theta_q / (2 * b_q * b_q)))

    z_r = a_r * delta_theta_r * \
           torch.exp((- delta_theta_r * delta_theta_r / (2 * b_r * b_r)))

    z_s = a_s * delta_theta_s * \
           torch.exp((- delta_theta_s * delta_theta_s / (2 * b_s * b_s)))

    z_t = a_t * delta_theta_t * \
          torch.exp((- delta_theta_t * delta_theta_t / (2 * b_t * b_t)))

    z_0_t = (
        A * torch.sin(torch.tensor(2 * math.pi).to(device_name) * f2 *
                      t_curr).to(device_name)).to(device_name)

    d_z_d_t_next = -1 * (z_p + z_q + z_r + z_s + z_t) - (z_curr - z_0_t)

    k1_x = h * d_x_d_t_next

    k1_y = h * d_y_d_t_next

    k1_z = h * d_z_d_t_next
    # Calculate next stage:
    x_next = x_curr + k1_x
    y_next = y_curr + k1_y
    z_next = z_curr + k1_z

    return x_next, y_next, z_next
예제 #27
0
    def predict(self, example, preds_dicts, test_cfg, **kwargs):
        """decode, nms, then return the detection result. Additionaly support double flip testing 
        """
        # get loss info
        rets = []
        metas = []

        double_flip = test_cfg.get('double_flip', False)

        post_center_range = test_cfg.post_center_limit_range
        if len(post_center_range) > 0:
            post_center_range = torch.tensor(
                post_center_range,
                dtype=preds_dicts[0]['hm'].dtype,
                device=preds_dicts[0]['hm'].device,
            )

        for task_id, preds_dict in enumerate(preds_dicts):
            # convert N C H W to N H W C
            for key, val in preds_dict.items():
                preds_dict[key] = val.permute(0, 2, 3, 1).contiguous()

            batch_size = preds_dict['hm'].shape[0]

            if double_flip:
                assert batch_size % 4 == 0, print(batch_size)
                batch_size = int(batch_size / 4)
                for k in preds_dict.keys():
                    # transform the prediction map back to their original coordinate befor flipping
                    # the flipped predictions are ordered in a group of 4. The first one is the original pointcloud
                    # the second one is X flip pointcloud(y=-y), the third one is Y flip pointcloud(x=-x), and the last one is
                    # X and Y flip pointcloud(x=-x, y=-y).
                    # Also please note that pytorch's flip function is defined on higher dimensional space, so dims=[2] means that
                    # it is flipping along the axis with H length(which is normaly the Y axis), however in our traditional word, it is flipping along
                    # the X axis. The below flip follows pytorch's definition yflip(y=-y) xflip(x=-x)
                    _, H, W, C = preds_dict[k].shape
                    preds_dict[k] = preds_dict[k].reshape(
                        int(batch_size), 4, H, W, C)
                    preds_dict[k][:, 1] = torch.flip(preds_dict[k][:, 1],
                                                     dims=[1])
                    preds_dict[k][:, 2] = torch.flip(preds_dict[k][:, 2],
                                                     dims=[2])
                    preds_dict[k][:, 3] = torch.flip(preds_dict[k][:, 3],
                                                     dims=[1, 2])

            if "metadata" not in example or len(example["metadata"]) == 0:
                meta_list = [None] * batch_size
            else:
                meta_list = example["metadata"]
                if double_flip:
                    meta_list = meta_list[:4 * int(batch_size):4]

            batch_hm = torch.sigmoid(preds_dict['hm'])

            batch_dim = torch.exp(preds_dict['dim'])

            batch_rots = preds_dict['rot'][..., 0:1]
            batch_rotc = preds_dict['rot'][..., 1:2]
            batch_reg = preds_dict['reg']
            batch_hei = preds_dict['height']

            if double_flip:
                batch_hm = batch_hm.mean(dim=1)
                batch_hei = batch_hei.mean(dim=1)
                batch_dim = batch_dim.mean(dim=1)

                # y = -y reg_y = 1-reg_y
                batch_reg[:, 1, ..., 1] = 1 - batch_reg[:, 1, ..., 1]
                batch_reg[:, 2, ..., 0] = 1 - batch_reg[:, 2, ..., 0]

                batch_reg[:, 3, ..., 0] = 1 - batch_reg[:, 3, ..., 0]
                batch_reg[:, 3, ..., 1] = 1 - batch_reg[:, 3, ..., 1]
                batch_reg = batch_reg.mean(dim=1)

                # first yflip
                # y = -y theta = pi -theta
                # sin(pi-theta) = sin(theta) cos(pi-theta) = -cos(theta)
                # batch_rots[:, 1] the same
                batch_rotc[:, 1] *= -1

                # then xflip x = -x theta = 2pi - theta
                # sin(2pi - theta) = -sin(theta) cos(2pi - theta) = cos(theta)
                # batch_rots[:, 2] the same
                batch_rots[:, 2] *= -1

                # double flip
                batch_rots[:, 3] *= -1
                batch_rotc[:, 3] *= -1

                batch_rotc = batch_rotc.mean(dim=1)
                batch_rots = batch_rots.mean(dim=1)

            batch_rot = torch.atan2(batch_rots, batch_rotc)

            batch, H, W, num_cls = batch_hm.size()

            batch_reg = batch_reg.reshape(batch, H * W, 2)
            batch_hei = batch_hei.reshape(batch, H * W, 1)

            batch_rot = batch_rot.reshape(batch, H * W, 1)
            batch_dim = batch_dim.reshape(batch, H * W, 3)
            batch_hm = batch_hm.reshape(batch, H * W, num_cls)

            ys, xs = torch.meshgrid([torch.arange(0, H), torch.arange(0, W)])
            ys = ys.view(1, H, W).repeat(batch, 1, 1).to(batch_hm)
            xs = xs.view(1, H, W).repeat(batch, 1, 1).to(batch_hm)

            xs = xs.view(batch, -1, 1) + batch_reg[:, :, 0:1]
            ys = ys.view(batch, -1, 1) + batch_reg[:, :, 1:2]

            xs = xs * test_cfg.out_size_factor * test_cfg.voxel_size[
                0] + test_cfg.pc_range[0]
            ys = ys * test_cfg.out_size_factor * test_cfg.voxel_size[
                1] + test_cfg.pc_range[1]

            if 'vel' in preds_dict:
                batch_vel = preds_dict['vel']

                if double_flip:
                    # flip vy
                    batch_vel[:, 1, ..., 1] *= -1
                    # flip vx
                    batch_vel[:, 2, ..., 0] *= -1

                    batch_vel[:, 3] *= -1

                    batch_vel = batch_vel.mean(dim=1)

                batch_vel = batch_vel.reshape(batch, H * W, 2)
                batch_box_preds = torch.cat(
                    [xs, ys, batch_hei, batch_dim, batch_vel, batch_rot],
                    dim=2)
            else:
                batch_box_preds = torch.cat(
                    [xs, ys, batch_hei, batch_dim, batch_rot], dim=2)

            metas.append(meta_list)

            if test_cfg.get('per_class_nms', False):
                pass
            else:
                rets.append(
                    self.post_processing(batch_box_preds, batch_hm, test_cfg,
                                         post_center_range, task_id))

        # Merge branches results
        ret_list = []
        num_samples = len(rets[0])

        ret_list = []
        for i in range(num_samples):
            ret = {}
            for k in rets[0][i].keys():
                if k in ["box3d_lidar", "scores"]:
                    ret[k] = torch.cat([ret[i][k] for ret in rets])
                elif k in ["label_preds"]:
                    flag = 0
                    for j, num_class in enumerate(self.num_classes):
                        rets[j][i][k] += flag
                        flag += num_class
                    ret[k] = torch.cat([ret[i][k] for ret in rets])

            ret['metadata'] = metas[0][i]
            ret_list.append(ret)

        return ret_list
예제 #28
0
        if not (configs.mosaic and configs.show_train_data):
            img_file = img_files[0]
            img_rgb = cv2.imread(img_file)
            calib = kitti_data_utils.Calibration(
                img_file.replace(".png", ".txt").replace("image_2", "calib"))
            objects_pred = invert_target(targets[:, 1:],
                                         calib,
                                         img_rgb.shape,
                                         RGB_Map=None)
            img_rgb = show_image_with_boxes(img_rgb, objects_pred, calib,
                                            False)

        # Rescale target
        targets[:, 2:6] *= configs.img_size
        # Get yaw angle
        targets[:, 6] = torch.atan2(targets[:, 6], targets[:, 7])

        img_bev = imgs.squeeze() * 255
        img_bev = img_bev.permute(1, 2, 0).numpy().astype(np.uint8)
        img_bev = cv2.resize(img_bev, (configs.img_size, configs.img_size))

        for c, x, y, w, l, yaw in targets[:, 1:7].numpy():
            # Draw rotated box
            bev_utils.drawRotatedBox(img_bev, x, y, w, l, yaw,
                                     cnf.colors[int(c)])

        img_bev = cv2.flip(cv2.flip(img_bev, 0), 1)

        if configs.mosaic and configs.show_train_data:
            cv2.imshow('mosaic_sample', img_bev)
        else:
예제 #29
0
def main():
    #model select
    print('Model initializing\n')
    net = torch.nn.DataParallel(AttentionModel(257, hidden_size = args.hidden_size, dropout_p = args.dropout_p, use_attn = args.attn_use, stacked_encoder = args.stacked_encoder, attn_len = args.attn_len))

    #Check point load
    print('Trying Checkpoint Load\n')
    best_PESQ = 0.
    best_STOI = 0.
    ckpt_path = args.ckpt_path

    if os.path.exists(ckpt_path):
    	ckpt = torch.load(ckpt_path)
    	try:
       	    net.load_state_dict(ckpt['model'])
            net = net.module # uncover DataParallel
            best_STOI = ckpt['best_STOI']

            print('checkpoint is loaded !')
            print('current best loss : %.4f' % best_STOI)
    	except RuntimeError as e:
            print('wrong checkpoint\n')
    else:
        print('checkpoint not exist!')
        print('current best loss : %.4f' % best_STOI)

    #test phase
    net.eval()
    with torch.no_grad():
        inputData, sr = librosa.load(args.noisy_wav, sr=None)
        outputData, sr = librosa.load(args.clean_wav, sr=None)
        inputData = np.float32(inputData)
        outputData = np.float32(outputData)
        mixed_audio = torch.from_numpy(inputData).type(torch.FloatTensor)
        clean_audio = torch.from_numpy(outputData).type(torch.FloatTensor)

        mixed = stft(mixed_audio)
        mixed = mixed.unsqueeze(0)
        mixed = mixed.transpose(1,2)
        cleaned = stft(clean_audio)
        cleaned = cleaned.unsqueeze(0)
        cleaned = cleaned.transpose(1,2)
        real, imag = mixed[..., 0], mixed[..., 1]
        clean_real, clean_imag = cleaned[..., 0], cleaned[..., 1]
        mag = torch.sqrt(real**2 + imag**2)
        clean_mag = torch.sqrt(clean_real**2 + clean_imag**2)
        phase = torch.atan2(imag, real)

        logits_mag, logits_attn_weight = net(mag)
        logits_real = logits_mag * torch.cos(phase)
        logits_imag = logits_mag * torch.sin(phase)
        logits_real, logits_imag = torch.squeeze(logits_real, 1), torch.squeeze(logits_imag, 1)
        logits_real = logits_real.transpose(1,2)
        logits_imag = logits_imag.transpose(1,2)

        logits_audio = istft(logits_real, logits_imag, inputData.shape[0])
        logits_audio = torch.squeeze(logits_audio, dim=1)

        print(logits_audio[0])
        librosa.output.write_wav('./out.wav', logits_audio[0].cpu().data.numpy(), 16000)
        test_loss = F.mse_loss(logits_mag, clean_mag, True)
        test_PESQ = pesq(outputData, logits_audio[0].detach().cpu().numpy(), 16000)
        test_STOI = stoi(outputData, logits_audio[0].detach().cpu().numpy(), 16000, extended=False)

        print("Saved attention weight visualization to attention_viz.png")
        utils.plot_head_map(logits_attn_weight[0])

        # FIXME - Issue with pcm_f32le. Require pcm_s16le
        print("Saved clean spectrogram visualization to spec_clean.png")
        clean_spect = utils.make_spectrogram_array(args.clean_wav)
        utils.save_spectrogram(clean_spect, 'clean')

        print("Saved noisy spectrogram visualization to spec_noisy.png")
        noisy_spect = utils.make_spectrogram_array(args.noisy_wav)
        utils.save_spectrogram(noisy_spect, 'noisy')

        print("Saved enhanced spectrogram visualization to spec_enhanced.png")
        enhanced_spect = utils.make_spectrogram_array('./out.wav')
        utils.save_spectrogram(enhanced_spect, 'enhanced')

        #test accuracy
        print('test loss : {:.4f} PESQ : {:.4f} STOI : {:.4f}'.format(test_loss, test_PESQ, test_STOI))
예제 #30
0
 def forward(self, input1, input2):
     self.save_for_backward(input1, input2)
     return torch.atan2(input1, input2)
예제 #31
0
def angle(complex_tensor):
    return torch.atan2(complex_tensor[..., 1], complex_tensor[..., 0])
예제 #32
0
 def forward(self, input, return_rot_matrix = True):
     xy = self.features(self.input_norm(input)).view(-1,2) 
     angle = torch.atan2(xy[:,0] + 1e-8, xy[:,1]+1e-8);
     if return_rot_matrix:
         return get_rotation_matrix(angle)
     return angle