Esempio n. 1
0
    def __call__(self, x):
        """Forward computation of PSPNet

        Args:
            x: Input array or Variable.

        Returns:
            Training time: it returns the outputs from auxiliary branch and the
                main branch. So the returned value is a tuple of two Variables.
            Inference time: it returns the output of the main branch. So the
                returned value is a sinle Variable which forms
                ``(N, n_class, H, W)`` where ``N`` is the batchsize and
                ``n_class`` is the number of classes specified in the
                constructor. ``H, W`` is the input image size.

        """
        if chainer.settings.train:
            aux, h = self.trunk(x)
            aux = F.dropout(self.cbr_aux(aux), ratio=0.1)
            aux = self.out_aux(aux)
            aux = F.resize_images(aux, x.shape[2:])
        else:
            h = self.trunk(x)

        h = self.ppm(h)
        h = F.dropout(self.cbr_main(h), ratio=0.1)
        h = self.out_main(h)
        h = F.resize_images(h, x.shape[2:])

        if chainer.settings.train:
            return aux, h
        else:
            return h
Esempio n. 2
0
    def __call__(self, x, img_size):
        assert img_size[0] % 16 == 0
        assert img_size[1] % 16 == 0

        # # conv1 -> bn1 -> res2 -> res3 -> res4
        # h = self.extractor(x)  # 1/16
        h = x

        # res5
        h = self.res5(h)  # 1/16

        assert h.shape[2] == (img_size[0] / 16)
        assert h.shape[3] == (img_size[1] / 16)

        h = self.conv6(h)  # 1/16
        conv6 = h

        # score
        h = self.score_fr(conv6)  # 1/16
        h = F.resize_images(h, img_size)  # 1/1
        score = h

        # score_oc
        h = self.score_oc(conv6)  # 1/16
        h = F.resize_images(h, img_size)  # 1/1
        score_oc = h

        return score, score_oc
    def __call__(self, orig_img):
        orig_img = orig_img.copy()
        if self.precise:
            return self.detect_precise(orig_img)
        orig_img_h, orig_img_w, _ = orig_img.shape

        input_w, input_h = self.compute_optimal_size(orig_img, params['inference_img_size'])
        map_w, map_h = self.compute_optimal_size(orig_img, params['heatmap_size'])

        resized_image = cv2.resize(orig_img, (input_w, input_h))
        x_data = self.preprocess(resized_image)

        if self.device >= 0:
            x_data = cuda.to_gpu(x_data)

        h1s, h2s = self.model(x_data)

        pafs = F.resize_images(h1s[-1], (map_h, map_w)).data[0]
        heatmaps = F.resize_images(h2s[-1], (map_h, map_w)).data[0]

        if self.device >= 0:
            pafs = pafs.get()
            cuda.get_device_from_id(self.device).synchronize()

        all_peaks = self.compute_peaks_from_heatmaps(heatmaps)
        if len(all_peaks) == 0:
            return np.empty((0, len(JointType), 3)), np.empty(0)
        all_connections = self.compute_connections(pafs, all_peaks, map_w, params)
        subsets = self.grouping_key_points(all_connections, all_peaks, params)
        all_peaks[:, 1] *= orig_img_w / map_w
        all_peaks[:, 2] *= orig_img_h / map_h
        poses = self.subsets_to_pose_array(subsets, all_peaks)
        scores = subsets[:, -2]
        return poses, scores
Esempio n. 4
0
    def extract(self, images):
        prepared_images = self.xp.asarray(images)
        prepared_images = self.models[0].prepare(prepared_images, 1)

        h_segs, h_hors, h_vers = [], [], []
        for i in range(len(self.scales)):
            H, W = prepared_images.shape[2:]
            hh = int((H // self.models[0].downscale) * self.scales[i])
            ww = int((W // self.models[0].downscale) * self.scales[i])
            resized_prepared_images = F.resize_images(prepared_images,
                                                      (hh, ww))

            with chainer.using_config('train',
                                      False), chainer.no_backprop_mode():
                h_seg, h_hor, h_ver = self.__call__(resized_prepared_images)

            if self.scales[i] != 1:
                h_seg = F.resize_images(h_seg,
                                        (512 // self.models[0].downscale,
                                         512 // self.models[0].downscale))
                h_hor = F.resize_images(h_hor,
                                        (512 // self.models[0].downscale,
                                         512 // self.models[0].downscale))
                h_ver = F.resize_images(h_ver,
                                        (512 // self.models[0].downscale,
                                         512 // self.models[0].downscale))
            weight = self.weights[i] / sum(self.weights)
            h_segs.append(h_seg.array * weight)
            h_hors.append(h_hor.array * weight)
            h_vers.append(h_ver.array * weight)
        h_seg = cuda.to_cpu(sum(h_segs))
        h_hor = cuda.to_cpu(sum(h_hors))
        h_ver = cuda.to_cpu(sum(h_vers))
        return h_seg, h_hor, h_ver
Esempio n. 5
0
    def __call__(self, orig_img):
        orig_img = orig_img.copy()
        if self.precise:
            return self.detect_precise(orig_img)
        orig_img_h, orig_img_w, _ = orig_img.shape

        input_w, input_h = self.compute_optimal_size(orig_img, params['inference_img_size'])
        map_w, map_h = self.compute_optimal_size(orig_img, params['heatmap_size'])

        resized_image = cv2.resize(orig_img, (input_w, input_h))
        x_data = self.preprocess(resized_image)

        if self.device >= 0:
            x_data = cuda.to_gpu(x_data)

        h1s, h2s = self.model(x_data)

        pafs = F.resize_images(h1s[-1], (map_h, map_w)).data[0]
        heatmaps = F.resize_images(h2s[-1], (map_h, map_w)).data[0]

        if self.device >= 0:
            pafs = pafs.get()
            cuda.get_device_from_id(self.device).synchronize()

        all_peaks = self.compute_peaks_from_heatmaps(heatmaps)
        if len(all_peaks) == 0:
            return np.empty((0, len(JointType), 3)), np.empty(0)
        all_connections = self.compute_connections(pafs, all_peaks, map_w, params)
        subsets = self.grouping_key_points(all_connections, all_peaks, params)
        all_peaks[:, 1] *= orig_img_w / map_w
        all_peaks[:, 2] *= orig_img_h / map_h
        poses = self.subsets_to_pose_array(subsets, all_peaks)
        scores = subsets[:, -2]
        return poses, scores
Esempio n. 6
0
    def shared_middle(self, batch_size, width_rgb, width_flow, rpn_scores_rgb, rpn_locs_rgb, rpn_scores_flow, rpn_locs_flow,
                      anchor_rgb, gt_segments_rgb, labels, seg_info):
        #  rpn_scores_rgb shape = (N, W_rgb * A, 2) rpn_scores_flow shape = (N, W_flow * A, 2)
        n_anchor = anchor_rgb.shape[1]
        rpn_locs_flow = F.transpose(rpn_locs_flow.reshape(batch_size, width_flow, n_anchor, 2), axes=(0, 3, 1, 2))  # (B, 2, W_flow, A)
        rpn_locs_flow = F.resize_images(rpn_locs_flow, (width_rgb, n_anchor))   # (B, 2, W_rgb, A)
        # B, W_rgb, A, 2 => B, W_rgb * A, 2
        rpn_locs_flow = F.reshape(F.transpose(rpn_locs_flow, axes=(0, 2, 3 ,1)), shape=(batch_size, width_rgb * n_anchor, 2))
        rpn_locs = F.average(F.stack([rpn_locs_rgb, rpn_locs_flow]), axis=0)

        rpn_scores_flow = F.transpose(rpn_scores_flow.reshape(batch_size, width_flow, n_anchor, 2), axes=(0, 3, 1, 2))
        rpn_scores_flow  = F.resize_images(rpn_scores_flow, (width_rgb, n_anchor)) # (B, 2, W_rgb, A)
        # B, W_rgb, A, 2 => B, W_rgb * A, 2
        rpn_scores_flow = F.reshape(F.transpose(rpn_scores_flow, axes=(0, 2, 3, 1)),
                                  shape=(batch_size, width_rgb * n_anchor, 2))
        rpn_scores = F.average(F.stack([rpn_scores_rgb,rpn_scores_flow]), axis=0)
        #  merge over!

        rois, roi_indices = self.time_seg_train_chain_rgb.nms_process(batch_size, width_rgb,
                                                                      n_anchor, rpn_scores, rpn_locs, anchor_rgb)

        sample_roi, sample_roi_index, gt_roi_loc, gt_roi_label = self.time_seg_train_chain_rgb.proposal_target_creator(
            rois, roi_indices, gt_segments_rgb, labels, seg_info,
            self.time_seg_train_chain_rgb.loc_normalize_mean, self.time_seg_train_chain_rgb.loc_normalize_std)
        return sample_roi, sample_roi_index, gt_roi_loc, gt_roi_label
def compute_loss(imgs, pafs_ys, heatmaps_ys, pafs_t, heatmaps_t, ignore_mask):
    heatmap_loss_log = []
    paf_loss_log = []
    total_loss = 0

    paf_masks = ignore_mask[:, None].repeat(pafs_t.shape[1], axis=1)
    heatmap_masks = ignore_mask[:, None].repeat(heatmaps_t.shape[1], axis=1)

    # compute loss on each stage
    for pafs_y, heatmaps_y in zip(pafs_ys, heatmaps_ys):
        stage_pafs_t = pafs_t.copy()
        stage_heatmaps_t = heatmaps_t.copy()
        stage_paf_masks = paf_masks.copy()
        stage_heatmap_masks = heatmap_masks.copy()

        if pafs_y.shape != stage_pafs_t.shape:
            stage_pafs_t = F.resize_images(stage_pafs_t, pafs_y.shape[2:]).data
            stage_heatmaps_t = F.resize_images(stage_heatmaps_t, pafs_y.shape[2:]).data
            stage_paf_masks = F.resize_images(stage_paf_masks.astype('f'), pafs_y.shape[2:]).data > 0
            stage_heatmap_masks = F.resize_images(stage_heatmap_masks.astype('f'), pafs_y.shape[2:]).data > 0

        stage_pafs_t[stage_paf_masks == True] = pafs_y.data[stage_paf_masks == True]
        stage_heatmaps_t[stage_heatmap_masks == True] = heatmaps_y.data[stage_heatmap_masks == True]

        pafs_loss = F.mean_squared_error(pafs_y, stage_pafs_t)
        heatmaps_loss = F.mean_squared_error(heatmaps_y, stage_heatmaps_t)

        total_loss += pafs_loss + heatmaps_loss

        paf_loss_log.append(float(cuda.to_cpu(pafs_loss.data)))
        heatmap_loss_log.append(float(cuda.to_cpu(heatmaps_loss.data)))

    return total_loss, paf_loss_log, heatmap_loss_log
    def predict(self, x, no_of_predictions=1, seq_len=4):
        """

		:param x:
		:param no_of_predictions:
		:param seq_len:
		:return:
		"""
        # x shape = [n, 12, h, w]
        xp = cp.get_array_module(x)
        n, c, h, w = x.shape
        outputs = []

        for i in range(no_of_predictions):
            print("Predicting frame no : ", i + 1)
            seq = resize_images(x, (int(h / 2**3), int(w / 2**3)))
            print((int(h / 2**3), int(w / 2**3)))
            output = None

            for j in range(1, 5):
                output = self.singleforward(j, seq, output)
                if j != 4:
                    seq = resize_images(
                        x, (int(h / 2**(3 - j)), int(w / 2**(3 - j))))

            outputs.append(output.data)
            x = xp.concatenate([x, output.data], 1)[:, -seq_len * 3:, :, :]
            print("Predictions done for : ", i + 1)
        return outputs
Esempio n. 9
0
    def __call__(self, x):
        assert x.shape[2] % 16 == 0
        assert x.shape[3] % 16 == 0

        # conv1 -> bn1 -> res2 -> res3 -> res4
        h = self.extractor(x)  # 1/16

        # res5
        h = self.res5(h)  # 1/16

        assert h.shape[2] == (x.shape[2] / 16)
        assert h.shape[3] == (x.shape[3] / 16)

        h = self.conv6(h)  # 1/16
        conv6 = h

        # score
        h = self.score_fr(conv6)  # 1/16
        h = F.resize_images(h, x.shape[2:4])  # 1/1
        score = h

        # score_oc
        h = self.score_oc(conv6)  # 1/16
        h = F.resize_images(h, x.shape[2:4])  # 1/1
        score_oc = h

        return score, score_oc
Esempio n. 10
0
    def __call__(self, x):
        height, width = x.shape[2:]
        if self.is_height:
            real_sp_size = height
            real_in_size = (real_sp_size, width)
            base_in_size = (self.base_sp_size, width)
        else:
            real_sp_size = width
            real_in_size = (height, real_sp_size)
            base_in_size = (height, self.base_sp_size)

        if real_sp_size != self.base_sp_size:
            if real_sp_size < self.base_sp_size:
                x = F.resize_images(x, output_shape=base_in_size, mode="bilinear", align_corners=True)
            else:
                # ksize = (real_in_size[0] // base_in_size[0], real_in_size[1] // base_in_size[1])
                # x = F.average_pooling_2d(x, ksize=ksize)
                x = F.resize_images(x, output_shape=base_in_size, mode="bilinear", align_corners=True)

        x = F.swapaxes(x, axis1=1, axis2=self.index)
        x = self.conv(x)
        x = F.swapaxes(x, axis1=1, axis2=self.index)

        changed_sp_size = x.shape[self.index]
        if real_sp_size != changed_sp_size:
            if changed_sp_size < real_sp_size:
                x = F.resize_images(x, output_shape=real_in_size, mode="bilinear", align_corners=True)
            else:
                # ksize = (x.shape[2] // real_in_size[0], x.shape[3] // real_in_size[1])
                # x = F.average_pooling_2d(x, ksize=ksize)
                x = F.resize_images(x, output_shape=real_in_size, mode="bilinear", align_corners=True)

        return x
    def __call__(self,img):
        edit_img = img.copy()
        img_h,img_w ,_ = edit_img.shape
        #画像とheatmapの大きさの最適化(stride=8の倍数にする)
        input_w,input_h = self.compute_optimal_size(edit_img,constants['img_size'])
        map_w,map_h = self.compute_optimal_size(edit_img,constants['heatmap_size'])
        #画像サイズの更新と学習器に入れるためにデータの編集
        resized_image = cv2.resize(edit_img, (input_w, input_h))
        x_data = self.preprocess(resized_image)
        #GPUへの適用
        if self.device >= 0:
            x_data = cuda.to_gpu(x_data)
        #学習器からの出力(全ステージから)
        Ss,Ls = self.model(x_data)
        #最終ステージの物のみ取り出す
        heatmaps = F.resize_images(Ss[-1], (map_h, map_w)).data[0]
        pafs = F.resize_images(Ls[-1], (map_h, map_w)).data[0]

        if self.device >= 0:
            pafs = pafs.get()
            cuda.get_device_from_id(self.device).synchronize()
        #heatmapからPeakを計算する
        all_peaks = self.compute_peaks_from_heatmaps(heatmaps)
        if len(all_peaks) == 0:
            return np.empty((0, len(JointType), 3)), np.empty(0)
        #peakとpafからConnectionを計算する
        all_connections = self.compute_connections(pafs, all_peaks, map_w, constants)
        #subsetの作成
        subsets = self.grouping_key_points(all_connections, all_peaks, constants)
        all_peaks[:, 1] *= img_w / map_w
        all_peaks[:, 2] *= img_h / map_h
        #poseの計算
        poses = self.subsets_to_pose_array(subsets, all_peaks)
        return poses
Esempio n. 12
0
    def __call__(self, x):
        xp = self.xp
        batch_size, nframes, nchannels = x.shape[:3]
        in_size = x.shape[3:]

        if self.in_episodes is None:
            self.in_episodes = self.out_episodes = nframes
        else:
            assert self.in_episodes == nframes

        self.reset_state()

        # BNCHW -> NBCHW
        x = x.transpose((1, 0, 2, 3, 4))

        # encode
        for i in range(self.in_episodes):
            xi = F.resize_images(x[i], self.patch_size)
            xi = xi.reshape((batch_size, -1))
            for e in self.encoder:
                hi = e(xi)
                xi = hi

        self.copy_state()

        # decode (reconstruct)
        reconst_imgs = []
        with chainer.cuda.get_device_from_id(self._device_id):
            xi = chainer.Variable(xp.zeros_like(xi, dtype=xi.dtype))
        for i in range(self.in_episodes):
            for r in self.reconst:
                ri = r(xi)
                xi = ri
            ri = ri.reshape(
                (batch_size, self.n_channels) + self.patch_size)  # BCHW
            ri = F.resize_images(ri, in_size)
            reconst_imgs.append(ri[:, xp.newaxis])  # B, 1, C, H, W

        reconst_imgs = F.concat(reconst_imgs, axis=1)  # BFCHW

        # decode (prediction)
        pred_imgs = None
        if self.predict:
            pred_imgs = []
            with chainer.cuda.get_device_from_id(self._device_id):
                xi = chainer.Variable(xp.zeros_like(xi, dtype=xi.dtype))
            for i in range(self.out_episodes):
                for p in self.pred:
                    pi = p(xi)
                    xi = pi
                pi = pi.reshape((batch_size, self.n_channels) +
                                self.patch_size)
                pi = F.resize_images(pi, in_size)
                pred_imgs.append(pi[:, xp.newaxis])

            pred_imgs = F.concat(pred_imgs, axis=1)  # BFCHW

        return reconst_imgs, pred_imgs
Esempio n. 13
0
    def __call__(self, orig_img):
        orig_img = orig_img.copy()
        if self.precise:
            return self.detect_precise(orig_img)
        orig_img_h, orig_img_w, _ = orig_img.shape

        input_w, input_h = self.compute_optimal_size(
            orig_img, params['inference_img_size'])
        map_w, map_h = self.compute_optimal_size(orig_img,
                                                 params['heatmap_size'])

        resized_image = cv2.resize(orig_img, (input_w, input_h))
        x_data = self.preprocess(resized_image)

        if self.device >= 0:
            x_data = cuda.to_gpu(x_data)

        print("x_data.shape", x_data.shape, type(x_data))
        resS = IEresult("models/FP32/pose_iter_440000.xml",
                        "models/FP32/pose_iter_440000.bin", "CPU", x_data)
        print("IEresult done", resS.keys())
        for k in resS.keys():
            if resS[k].shape[1] == 38: H1S = resS[k]
            if resS[k].shape[1] == 19: H2S = resS[k]
        print("            stddiv/mean/max/min")
        h1s, h2s = self.model(x_data)
        print("IEbase: H1S %11.7f %11.7f %11.7f %11.7f" % self.statistics(H1S))
        print("chainer:h1s %11.7f %11.7f %11.7f %11.7f" %
              self.statistics(h1s[-1].data[0]))
        print("IEbase: H2S %11.7f %11.7f %11.7f %11.7f" % self.statistics(H2S))
        print("chainer:h2s %11.7f %11.7f %11.7f %11.7f" %
              self.statistics(h2s[-1].data[0]))
        print("len(h1s)", len(h1s), type(h1s))
        print("len(h2s)", len(h2s), type(h2s))
        print("h1s[-1].shape", h1s[-1].shape, type(h1s[-1]))
        print("h2s[-1].shape", h2s[-1].shape, type(h2s[-1]))

        pafs = F.resize_images(h1s[-1], (map_h, map_w)).data[0]
        heatmaps = F.resize_images(h2s[-1], (map_h, map_w)).data[0]
        print("pafs.shape", pafs.shape, type(pafs.shape))
        print("heatmaps.shape", heatmaps.shape, type(heatmaps.shape))

        if self.device >= 0:
            pafs = pafs.get()
            cuda.get_device_from_id(self.device).synchronize()

        all_peaks = self.compute_peaks_from_heatmaps(heatmaps)
        if len(all_peaks) == 0:
            return np.empty((0, len(JointType), 3)), np.empty(0)
        all_connections = self.compute_connections(pafs, all_peaks, map_w,
                                                   params)
        subsets = self.grouping_key_points(all_connections, all_peaks, params)
        all_peaks[:, 1] *= orig_img_w / map_w
        all_peaks[:, 2] *= orig_img_h / map_h
        poses = self.subsets_to_pose_array(subsets, all_peaks)
        scores = subsets[:, -2]
        return poses, scores
	def update_core(self):
		#convert incoming array into variables with either cpu/gpu compatibility
		data = Variable(self.converter(self.get_iterator('main').next(), self.device))

		n, c, h, w = data.shape
		# Get the ground truth and the sequential inout that is to be fed to the
		# network
		seq, gt = split_axis(data, [c-3], 1)
		# get rid of memory
		del data

		output = None
		total_loss_dis_adv = 0
		total_loss_gen_adv = 0
		for i in range(1, 5):
			# Downscaling of ground truth images for loss calculations
			if i != 4:
				downscaled_gt = resize_images(gt, (int(h / 2 ** (4 - i)),
				                                   int(w / 2 ** (4 - i))))
				downscaled_seq = resize_images(seq, (int(h / 2 ** (4 - i)),
			                                     int(w / 2 ** (4 - i))))
			else:
				downscaled_gt = gt
				downscaled_seq = seq

			output = self.GenNetwork.singleforward(i, downscaled_seq,
			                                         output)
			dis_output_fake = self.DisNetwork.singleforward(i,output)
			dis_outplut_real = self.DisNetwork.singleforward(i, downscaled_gt)

			loss_dis = (loss_target1(dis_outplut_real) + loss_target0(dis_output_fake)) / 2

			loss_gen = loss_target1(dis_output_fake)

			total_loss_dis_adv += loss_dis
			total_loss_gen_adv += loss_gen

		loss_l2 = l2_loss(output, gt)
		loss_gdl = gradient_loss(output, gt)

		composite_gen_loss = self.LAM_LP*loss_l2 + self.LAM_GDL*loss_gdl + self.LAM_ADV*total_loss_gen_adv
		report({'L2Loss':loss_l2},self.GenNetwork)
		report({'GDL':loss_gdl},self.GenNetwork)
		report({'AdvLoss':total_loss_gen_adv},self.GenNetwork)
		report({'DisLoss':total_loss_dis_adv},self.DisNetwork)
		report({'CompositeGenLoss':composite_gen_loss},self.GenNetwork)

		# TODO: Come up with a more elegant way
		self.DisNetwork.cleargrads()
		self.GenNetwork.cleargrads()
		composite_gen_loss.backward()
		self._optimizers["GeneratorNetwork"].update()

		self.DisNetwork.cleargrads()
		self.GenNetwork.cleargrads()
		total_loss_dis_adv.backward()
		self._optimizers["DiscriminatorNetwork"].update()
Esempio n. 15
0
    def __call__(self, orig_img, fast_mode=False):
        orig_img_h, orig_img_w, _ = orig_img.shape

        resized_output_img_w, resized_output_img_h = self.compute_optimal_size(
            orig_img, params['heatmap_size'])

        pafs_sum = 0
        heatmaps_sum = 0
        # use only the first scale on fast mode
        scales = [params['inference_scales'][0]
                  ] if fast_mode else params['inference_scales']

        for scale in scales:
            print("Inference scale: %.1f..." % (scale))
            img_size = int(params['inference_img_size'] * scale)
            resized_input_img_w, resized_input_img_h = self.compute_optimal_size(
                orig_img, img_size)

            resized_image = cv2.resize(
                orig_img, (resized_input_img_w, resized_input_img_h))
            x_data = np.array(resized_image[np.newaxis], dtype=np.float32
                              ).transpose(0, 3, 1, 2) / 256 - 0.5

            if self.device >= 0:
                x_data = cuda.to_gpu(x_data)

            h1s, h2s = self.model(x_data)

            pafs_sum += F.resize_images(
                h1s[-1], (resized_output_img_h, resized_output_img_w)).data[0]
            heatmaps_sum += F.resize_images(
                h2s[-1], (resized_output_img_h, resized_output_img_w)).data[0]

        pafs = pafs_sum / len(scales)
        heatmaps = heatmaps_sum / len(scales)

        if self.device >= 0:
            pafs = cuda.to_cpu(pafs)

        all_peaks = self.compute_peaks_from_heatmaps(heatmaps)
        all_peaks_flatten = np.array([
            peak for peaks_each_category in all_peaks
            for peak in peaks_each_category
        ])
        if len(all_peaks_flatten) == 0:
            return np.empty((0, len(JointType), 3))
        all_connections = self.compute_connections(pafs, all_peaks,
                                                   all_peaks_flatten,
                                                   resized_output_img_w,
                                                   params)
        subsets = self.grouping_key_points(all_connections, all_peaks_flatten,
                                           params)
        all_peaks_flatten[:, 0] *= orig_img_w / resized_output_img_w
        all_peaks_flatten[:, 1] *= orig_img_h / resized_output_img_h
        person_pose_array = self.subsets_to_person_pose_array(
            subsets, all_peaks_flatten)
        return person_pose_array
Esempio n. 16
0
def viz_input(args, config):
    """Visualize input for network."""
    subprocess.call(['sh', "setup.sh"])
    model = get_model(config["model"])
    devices = parse_devices(config['gpus'], config['updater']['name'])
    test_data = load_dataset_test(config["dataset"])
    test_iter = create_iterator_test(test_data,
                                     config['iterator'])
    dataset_config = config['dataset']['test']['args']
    for i_b,batch in enumerate(test_iter):
        #gt_prob, gt_reg are ground truth
        x_list, counter, indexes_list, gt_prob, gt_reg, batch, n_no_empty = batch[0]
        #print(gt_prob.shape)
        #print(gt_reg[0,:,:].shape)
        gt_prob = F.resize_images(gt_prob.astype("f")[np.newaxis, np.newaxis],
                                  (400, 352))[0, 0].data
        gt_reg = F.resize_images(gt_reg[7,:,:].astype("f")[np.newaxis, np.newaxis],
                                  (400, 352))[0, 0].data
        len_image = len(x_list)
        fig, axes = plt.subplots(2, 3, figsize=(20, 7))
        thres_list = dataset_config['thres_t']
        for index, (x, indexes) in enumerate(zip(x_list, indexes_list)):
            x = x[:, :, 0]
            x = feature_to_voxel(x, indexes, 3, 10, 400, 352, batch)
            input_x = chainer.cuda.to_cpu(x.data.astype("f")[0])
            input_x = input_x.max(axis=(0, 1))
            image = np.ones(input_x.shape, dtype='f') * 0.95
            slice1 = image.copy()
            slice2 = image.copy()
            slice3 = image.copy()
            # lidar data
            slice1[input_x != 0] = 0.3
            slice2[input_x != 0] = .8
            slice3[input_x != 0] = 0.2
            # probability of each box
            slice1[gt_prob != 0] = 1
            slice2[gt_prob != 0] = 0
            slice3[gt_prob != 0] = 0
						# regression for ground truth
            slice1[gt_reg != 0] = 0
            slice2[gt_reg != 0] = 0
            slice3[gt_reg != 0] = 1

            image = np.ones((400, 352, 3))
            image[:, :, 0] = slice1
            image[:, :, 1] = slice2
            image[:, :, 2] = slice3
            i = int(index / 3)
            j = int(index % 3)
            axes[i, j].imshow(image[::-1][100:300], cmap="hot")
            axes[i, j].axis('off')
            axes[i, j].set_title("Thres: {}".format(thres_list[index]))
        plt.tight_layout()
        plt.savefig("images/vis/"+"batch_"+str(i_b)+".png")
        plt.close()
Esempio n. 17
0
 def __call__(self, x):
     in_size = self.in_size if self.fixed_size else x.shape[2:]
     x, _ = self.backbone(x)
     x, y, z = self.head(x)
     x = F.resize_images(x, output_shape=in_size)
     if self.aux:
         y = F.resize_images(y, output_shape=in_size)
         z = F.resize_images(z, output_shape=in_size)
         return x, y, z
     else:
         return x
 def __call__(self, top, middle, bottom):
     h = self.refine_1_1(bottom)
     h = self.refine_1_2(h)
     h = self.refine_1_3(h)
     refine_1_upsample = F.resize_images(h, (top.shape[2], top.shape[3]))
     h = self.refine_2_1(middle)
     h = self.refine_2_2(h)
     refine_2_upsample = F.resize_images(h, (top.shape[2], top.shape[3]))
     h_top = self.refine_3_1(top)
     refine_concat = F.concat((refine_1_upsample, refine_2_upsample, h_top),
                              axis=1)
     return refine_concat
Esempio n. 19
0
    def compute_loss(self, images, pafs_ys, heatmaps_ys, ground_truth_pafs,
                     ground_truth_heatmaps, ignore_mask):
        """

        ground_truth_pafs : list of grount truth paf

        """
        heatmap_losses = []
        paf_losses = []
        loss = 0.0

        paf_masks = ignore_mask[:, None].repeat(ground_truth_pafs.shape[1],
                                                axis=1)
        heatmap_masks = ignore_mask[:, None].repeat(
            ground_truth_heatmaps.shape[1], axis=1)

        # compute loss on each stage
        for pafs_y, heatmaps_y in zip(pafs_ys, heatmaps_ys):
            stage_ground_truth_pafs = ground_truth_pafs.copy()
            stage_ground_truth_heatmaps = ground_truth_heatmaps.copy()
            stage_paf_masks = paf_masks.copy()
            stage_heatmap_masks = heatmap_masks.copy()

            if pafs_y.shape != stage_ground_truth_pafs.shape:
                stage_ground_truth_pafs = F.resize_images(
                    stage_ground_truth_pafs, pafs_y.shape[2:]).data
                stage_ground_truth_heatmaps = F.resize_images(
                    stage_ground_truth_heatmaps, pafs_y.shape[2:]).data
                stage_paf_masks = F.resize_images(stage_paf_masks.astype('f'),
                                                  pafs_y.shape[2:]).data > 0
                stage_heatmap_masks = F.resize_images(
                    stage_heatmap_masks.astype('f'), pafs_y.shape[2:]).data > 0

            stage_ground_truth_pafs[stage_paf_masks == True] = \
                pafs_y.data[stage_paf_masks == True]
            stage_ground_truth_heatmaps[stage_heatmap_masks == True] = \
                heatmaps_y.data[stage_heatmap_masks == True]

            pafs_loss = F.mean_squared_error(pafs_y, stage_ground_truth_pafs)
            heatmaps_loss = F.mean_squared_error(heatmaps_y,
                                                 stage_ground_truth_heatmaps)

            loss += pafs_loss + heatmaps_loss

            paf_losses.append(float(chainer.cuda.to_cpu(pafs_loss.data)))
            heatmap_losses.append(
                float(chainer.cuda.to_cpu(heatmaps_loss.data)))

        return loss, paf_losses, heatmap_losses
    def __call__(self, refine_concat):
        h = self.vect_conv1(refine_concat)
        h = self.vect_conv2(h)
        if self.upsample:
            h = F.resize_images(h, (2 * h.shape[2], 2 * h.shape[3]))
        h = self.vect_conv3(h)
        vect_out = self.vect_conv4(h)

        h = self.heat_conv1(refine_concat)
        h = self.heat_conv2(h)
        if self.upsample:
            h = F.resize_images(h, (2 * h.shape[2], 2 * h.shape[3]))
        h = self.heat_conv3(h)
        heat_out = F.sigmoid(self.heat_conv4(h))
        return vect_out, heat_out
 def get_example(self, i):
     if i % 100 == 0 and i != 0:
         percentage = i * 100 / len(self.imgs_file_list)
         print("Progress: {0:d}%".format(int(percentage)))
     calib_dir = self.calib_dir_list[i]
     imgs_path = self.imgs_file_list[i]
     tgt_img_path = imgs_path[0]
     src_imgs_path = imgs_path[1]
     tgt_img = load_as_float_norm(tgt_img_path)
     src_imgs = [load_as_float_norm(path) for path in src_imgs_path]
     gt_pose = read_file_list(self.gt_files[i])
     orig_shape = tgt_img.shape[:2]
     tgt_img = F.resize_images(tgt_img[None], (self.height, self.width)).data[0]
     src_imgs = F.resize_images(np.array(src_imgs, dtype='f'), (self.height, self.width)).data
     return tgt_img, src_imgs, [], gt_pose
Esempio n. 22
0
    def __call__(self, x):
        h = self.trunk(x)
        h_cp = F.dropout(self.cbr_cp(h), ratio=0.1)
        h_cp = F.tanh(self.out_cp(h_cp))
        h_cp = F.resize_images(h_cp, x.shape[2:])

        h_ocp = F.dropout(self.cbr_ocp(h), ratio=0.1)
        h_ocp = F.tanh(self.out_ocp(h_ocp))
        h_ocp = F.resize_images(h_ocp, x.shape[2:])

        h = F.dropout(self.cbr_main(h), ratio=0.1)
        h = self.out_main(h)
        h = F.resize_images(h, x.shape[2:])

        return h, h_cp, h_ocp
Esempio n. 23
0
    def __call__(self, orig_img):
        orig_img = orig_img.copy()
        if self.precise:
            return self.detect_precise(orig_img)
        orig_img_h, orig_img_w, _ = orig_img.shape

        input_w, input_h = self.compute_optimal_size(orig_img, params['inference_img_size'])
        map_w, map_h = self.compute_optimal_size(orig_img, params['heatmap_size'])

        resized_image = cv2.resize(orig_img, (input_w, input_h))
        x_data = self.preprocess(resized_image)

     #   if self.device >= 0:
     #       x_data = cuda.to_gpu(x_data)

        print("x_data.shape",x_data.shape,type(x_data))
     #   IE_bin = "models/FP32/pose_iter_440000.bin"
     #   IE_xml = "models/FP32/pose_iter_440000.xml"
        if self.device == 'CPU'   : data_type='FP32'
        if self.device == 'MYRIAD': data_type='FP16'
        resS = IEresult(self.IE_xml, self.IE_bin, self.device, x_data)
        print("IEresult done",resS.keys())

        for k in resS.keys():
            if resS[k].shape[1]==38: H1S=resS[k]
            if resS[k].shape[1]==19: H2S=resS[k]
        h1s = [ Variable(H1S) ]
        h2s = [ Variable(H2S) ]

        pafs = F.resize_images(h1s[-1], (map_h, map_w)).data[0]
        heatmaps = F.resize_images(h2s[-1], (map_h, map_w)).data[0]
        print("pafs.shape",pafs.shape,type(pafs.shape))
        print("heatmaps.shape",heatmaps.shape,type(heatmaps.shape))

     #   if self.device >= 0:
     #       pafs = pafs.get()
     #       cuda.get_device_from_id(self.device).synchronize()

        all_peaks = self.compute_peaks_from_heatmaps(heatmaps)
        if len(all_peaks) == 0:
            return np.empty((0, len(JointType), 3)), np.empty(0)
        all_connections = self.compute_connections(pafs, all_peaks, map_w, params)
        subsets = self.grouping_key_points(all_connections, all_peaks, params)
        all_peaks[:, 1] *= orig_img_w / map_w
        all_peaks[:, 2] *= orig_img_h / map_h
        poses = self.subsets_to_pose_array(subsets, all_peaks)
        scores = subsets[:, -2]
        return poses, scores
Esempio n. 24
0
    def encode(self, image, obj, desc, num):
        xp = cuda.cupy
        cuda.get_device(GPU.gpus_to_use[num % GPU.num_gpus]).use()

        obj = np.asarray(obj, dtype=np.float32)
        obj = np.repeat(obj[np.newaxis], image.shape[0], axis=0)
        desc = np.asarray(desc, dtype=np.float32)
        desc = np.repeat(desc[np.newaxis], image.shape[0], axis=0)

        o_in = cuda.to_gpu(obj, GPU.gpus_to_use[num % GPU.num_gpus])
        d_in = cuda.to_gpu(desc, GPU.gpus_to_use[num % GPU.num_gpus])
        x_in = cuda.to_gpu(image, GPU.gpus_to_use[num % GPU.num_gpus])

        att, _, _ = self.enc_models[num % 2](Variable(x_in),
                                             Variable(o_in),
                                             Variable(d_in),
                                             train=False)

        att = F.reshape(att, (-1, 1, self.att_size, self.att_size))
        att = F.resize_images(att, (self.image_size, self.image_size))

        cir_z, _, _, _ = self.att_enc_models[num % 2](Variable(x_in) * att,
                                                      train=False)

        return cir_z, F.squeeze(F.concat((o_in[0], d_in[0]), axis=-1))
Esempio n. 25
0
def inception_forward(model, ims, batch_size):
    n, c, w, h = ims.shape
    n_batches = int(math.ceil(float(n) / float(batch_size)))

    xp = model.xp

    # Compute the softmax predicitions for for all images, split into batches
    # in order to fit in memory

    ys = xp.empty((n, 1008), dtype=xp.float32)  # Softmax container

    for i in range(n_batches):
        batch_start = (i * batch_size)
        batch_end = min((i + 1) * batch_size, n)

        ims_batch = ims[batch_start:batch_end]
        ims_batch = xp.asarray(ims_batch)  # To GPU if using CuPy
        ims_batch = Variable(ims_batch)

        # Resize image to the shape expected by the inception module
        if (w, h) != (299, 299):
            ims_batch = F.resize_images(ims_batch, (299, 299))  # bilinear

        # Feed images to the inception module to get the softmax predictions
        with chainer.using_config('train', False), chainer.using_config(
                'enable_backprop', False):
            y = model(ims_batch)
        ys[batch_start:batch_end] = y.data
    ys = ys[:, 1:1001]  # 0 and 1001-1008 are the dummies
    return ys
def rasterize_silhouettes(
        faces,
        image_size=DEFAULT_IMAGE_SIZE,
        anti_aliasing=DEFAULT_ANTI_ALIASING,
        near=DEFAULT_NEAR,
        far=DEFAULT_FAR,
        eps=DEFAULT_EPS,
        background_color=DEFAULT_BACKGROUND_COLOR,
):
    if anti_aliasing:
        # 2x super-sampling
        faces = faces * (2 * image_size - 1) / (2 * image_size - 2)
        images = neural_renderer.Rasterize(
            image_size * 2, near, far, eps, background_color, return_rgb=False, return_alpha=True, return_depth=False)(
            faces)[1]
    else:
        images = neural_renderer.Rasterize(
            image_size, near, far, eps, background_color, return_rgb=False, return_alpha=True, return_depth=False)(
            faces)[1]

    # transpose & vertical flip
    images = images[:, ::-1, :]

    if anti_aliasing:
        # 0.5x down-sampling
        images = cf.resize_images(images[:, None, :, :], (image_size, image_size))[:, 0]

    return images
Esempio n. 27
0
 def forward(self, inputs, device):
     x, = inputs
     output_shape = self.output_shape[2:]
     y = functions.resize_images(x,
                                 output_shape,
                                 align_corners=self.align_corners)
     return y,
Esempio n. 28
0
    def _hand_estimate_chainer_backend_each(self, hand_bgr, cx, cy, left_hand):
        xp = self.hand_net.xp

        if left_hand:
            hand_bgr = cv2.flip(hand_bgr, 1)  # 1 = vertical

        resized = cv2.resize(hand_bgr, (368, 368),
                             interpolation=cv2.INTER_CUBIC)
        x = np.array(resized[np.newaxis], dtype=np.float32)
        x = x.transpose(0, 3, 1, 2)
        x = x / 256 - 0.5

        if self.gpu >= 0:
            x = chainer.cuda.to_gpu(x)
        x = chainer.Variable(x)

        heatmaps = self.hand_net(x)
        heatmaps = F.resize_images(heatmaps[-1], hand_bgr.shape[:2])[0]
        if self.gpu >= 0:
            heatmaps.to_cpu()
        heatmaps = heatmaps.array

        if left_hand:
            heatmaps = heatmaps.transpose(1, 2, 0)
            heatmaps = cv2.flip(heatmaps, 1)
            heatmaps = heatmaps.transpose(2, 0, 1)

        # get peak on heatmap
        hmaps = []
        if xp == np:
            # cpu
            for i in range(heatmaps.shape[0] - 1):
                heatmap = gaussian_filter(heatmaps[i],
                                          sigma=self.hand_gaussian_sigma)
                hmaps.append(heatmap)
        else:
            heatmaps = chainer.cuda.to_gpu(heatmaps)
            heatmaps = F.convolution_2d(heatmaps[:, xp.newaxis],
                                        self.hand_gaussian_kernel,
                                        stride=1,
                                        pad=int(self.hand_gaussian_ksize / 2))
            heatmaps = chainer.cuda.to_cpu(xp.squeeze(heatmaps.array))
            for heatmap in heatmaps[:-1]:
                hmaps.append(heatmap)
        keypoints = []
        idx_offset = 0
        if left_hand:
            idx_offset += len(hmaps)
        for i, heatmap in enumerate(hmaps):
            conf = heatmap.max()
            cds = np.array(np.where(heatmap == conf)).flatten().tolist()
            py = cy + cds[0] - hand_bgr.shape[0] / 2
            px = cx + cds[1] - hand_bgr.shape[1] / 2
            keypoints.append({
                'x': px,
                'y': py,
                'score': conf,
                'limb': self.index2handname[idx_offset + i]
            })
        return keypoints
    def __call__(self, last_fm_middle_top):

        h = self.vect_conv1(last_fm_middle_top)
        h = self.vect_conv2(h)
        if self.upsample:
            h = F.resize_images(h, (2 * h.shape[2], 2 * h.shape[3]))
        h = self.vect_conv3(h)
        vect_out = self.vect_conv4(h)

        h = self.heat_conv1(last_fm_middle_top)
        h = self.heat_conv2(h)
        if self.upsample:
            h = F.resize_images(h, (2 * h.shape[2], 2 * h.shape[3]))
        h = self.heat_conv3(h)
        heat_out = self.heat_conv4(h)
        return vect_out, heat_out
Esempio n. 30
0
    def __call__(self, hand_img, fast_mode=False, hand_type="right"):
        if hand_type == "left":
            hand_img = cv2.flip(hand_img, 1)

        hand_img_h, hand_img_w, _ = hand_img.shape

        resized_image = cv2.resize(hand_img,
                                   (params["hand_inference_img_size"],
                                    params["hand_inference_img_size"]))
        x_data = np.array(resized_image[np.newaxis],
                          dtype=np.float32).transpose(0, 3, 1, 2) / 256 - 0.5

        if self.device >= 0:
            x_data = cuda.to_gpu(x_data)

        hs = self.model(x_data)
        heatmaps = F.resize_images(hs[-1], (hand_img_h, hand_img_w)).data[0]

        if self.device >= 0:
            heatmaps = heatmaps.get()

        if hand_type == "left":
            heatmaps = cv2.flip(heatmaps.transpose(1, 2, 0),
                                1).transpose(2, 0, 1)

        keypoints = self.compute_peaks_from_heatmaps(heatmaps)

        return keypoints
Esempio n. 31
0
def get_mean_cov(model, ims, batch_size=100):
    n, c, w, h = ims.shape
    n_batches = int(math.ceil(float(n) / float(batch_size)))

    xp = model.xp

    print('Batch size:', batch_size)
    print('Total number of images:', n)
    print('Total number of batches:', n_batches)

    ys = xp.empty((n, 2048), dtype=xp.float32)

    for i in range(n_batches):
        print('Running batch', i + 1, '/', n_batches, '...')
        batch_start = (i * batch_size)
        batch_end = min((i + 1) * batch_size, n)

        ims_batch = ims[batch_start:batch_end]
        ims_batch = xp.asarray(ims_batch)  # To GPU if using CuPy
        ims_batch = Variable(ims_batch)

        # Resize image to the shape expected by the inception module
        if (w, h) != (299, 299):
            ims_batch = F.resize_images(ims_batch, (299, 299))  # bilinear

        # Feed images to the inception module to get the features
        with chainer.using_config('train', False), chainer.using_config('enable_backprop', False):
            y = model(ims_batch, get_feature=True)
        ys[batch_start:batch_end] = y.data

    mean = chainer.cuda.to_cpu(xp.mean(ys, axis=0))
    # cov = F.cross_covariance(ys, ys, reduce="no").data.get()
    cov = np.cov(chainer.cuda.to_cpu(ys).T)

    return mean, cov
Esempio n. 32
0
    def forward(self, x1, x2):
        x1 = F.relu(self.bn1(self.conv1(x1)))
        x2 = F.relu(self.bn2(self.conv2(x2)))
        x2 = F.resize_images(x2, (x1.shape[2], x1.shape[3]))
        x = F.concat((x1, x2), axis=1)

        return x
Esempio n. 33
0
    def _hand_estimate_chainer_backend_each(self, hand_bgr, cx, cy, left_hand):
        xp = self.hand_net.xp
        device_id = self.hand_net._device_id

        if left_hand:
            hand_bgr = cv2.flip(hand_bgr, 1)  # 1 = vertical

        resized = cv2.resize(hand_bgr, (368, 368), interpolation=cv2.INTER_CUBIC)
        x = np.array(resized[np.newaxis], dtype=np.float32)
        x = x.transpose(0, 3, 1, 2)
        x = x / 256 - 0.5

        if self.gpu >= 0:
            with chainer.cuda.get_device_from_id(device_id):
                x = chainer.cuda.to_gpu(x)
        x = chainer.Variable(x)

        heatmaps = self.hand_net(x)
        heatmaps = F.resize_images(heatmaps[-1], hand_bgr.shape[:2])[0]
        if self.gpu >= 0:
            heatmaps.to_cpu()
        heatmaps = heatmaps.array

        if left_hand:
            heatmaps = heatmaps.transpose(1, 2, 0)
            heatmaps = cv2.flip(heatmaps, 1)
            heatmaps = heatmaps.transpose(2, 0, 1)

        # get peak on heatmap
        hmaps = []
        if xp == np:
            # cpu
            for i in range(heatmaps.shape[0] - 1):
                heatmap = gaussian_filter(heatmaps[i], sigma=self.hand_gaussian_sigma)
                hmaps.append(heatmap)
        else:
            with chainer.cuda.get_device_from_id(device_id):
                heatmaps = chainer.cuda.to_gpu(heatmaps)
            heatmaps = F.convolution_2d(
                heatmaps[:, xp.newaxis], self.hand_gaussian_kernel,
                stride=1, pad=int(self.hand_gaussian_ksize / 2))
            heatmaps = chainer.cuda.to_cpu(xp.squeeze(heatmaps.array))
            for heatmap in heatmaps[:-1]:
                hmaps.append(heatmap)
        keypoints = []
        idx_offset = 0
        if left_hand:
            idx_offset += len(hmaps)
        for i, heatmap in enumerate(hmaps):
            conf = heatmap.max()
            cds = np.array(np.where(heatmap==conf)).flatten().tolist()
            py = cy + cds[0] - hand_bgr.shape[0] / 2
            px = cx + cds[1] - hand_bgr.shape[1] / 2
            keypoints.append({'x': px, 'y': py, 'score': conf,
                              'limb': self.index2handname[idx_offset+i]})
        return keypoints
    def __call__(self, orig_img, fast_mode=False):
        orig_img_h, orig_img_w, _ = orig_img.shape

        resized_output_img_w, resized_output_img_h = self.compute_optimal_size(orig_img, params['heatmap_size'])

        pafs_sum = 0
        heatmaps_sum = 0
        # use only the first scale on fast mode
        scales = [params['inference_scales'][0]] if fast_mode else params['inference_scales']

        for scale in scales:
            print("Inference scale: %.1f..." % (scale))
            img_size = int(params['inference_img_size'] * scale)
            resized_input_img_w, resized_input_img_h = self.compute_optimal_size(orig_img, img_size)

            resized_image = cv2.resize(orig_img, (resized_input_img_w, resized_input_img_h))
            x_data = np.array(resized_image[np.newaxis], dtype=np.float32).transpose(0, 3, 1, 2) / 256 - 0.5

            if self.device >= 0:
                x_data = cuda.to_gpu(x_data)

            h1s, h2s = self.model(x_data)

            pafs_sum += F.resize_images(h1s[-1], (resized_output_img_h, resized_output_img_w)).data[0]
            heatmaps_sum += F.resize_images(h2s[-1], (resized_output_img_h, resized_output_img_w)).data[0]

        pafs = pafs_sum / len(scales)
        heatmaps = heatmaps_sum / len(scales)

        if self.device >= 0:
            pafs = cuda.to_cpu(pafs)

        all_peaks = self.compute_peaks_from_heatmaps(heatmaps)
        all_peaks_flatten = np.array([peak for peaks_each_category in all_peaks for peak in peaks_each_category])
        if len(all_peaks_flatten) == 0:
            return np.empty((0, len(JointType), 3))
        all_connections = self.compute_connections(pafs, all_peaks, all_peaks_flatten, resized_output_img_w, params)
        subsets = self.grouping_key_points(all_connections, all_peaks_flatten, params)
        all_peaks_flatten[:, 0] *= orig_img_w / resized_output_img_w
        all_peaks_flatten[:, 1] *= orig_img_h / resized_output_img_h
        person_pose_array = self.subsets_to_person_pose_array(subsets, all_peaks_flatten)
        return person_pose_array
    def __call__(self, face_img, fast_mode=False):
        face_img_h, face_img_w, _ = face_img.shape

        resized_image = cv2.resize(face_img, (params["face_inference_img_size"], params["face_inference_img_size"]))
        x_data = np.array(resized_image[np.newaxis], dtype=np.float32).transpose(0, 3, 1, 2) / 256 - 0.5

        if self.device >= 0:
            x_data = cuda.to_gpu(x_data)

        hs = self.model(x_data)
        heatmaps = F.resize_images(hs[-1], (face_img_h, face_img_w)).data[0]
        keypoints = self.compute_peaks_from_heatmaps(heatmaps)

        return keypoints
    def __call__(self, hand_img, fast_mode=False, hand_type="right"):
        if hand_type == "left":
            hand_img = cv2.flip(hand_img, 1)

        hand_img_h, hand_img_w, _ = hand_img.shape

        resized_image = cv2.resize(hand_img, (params["hand_inference_img_size"], params["hand_inference_img_size"]))
        x_data = np.array(resized_image[np.newaxis], dtype=np.float32).transpose(0, 3, 1, 2) / 256 - 0.5

        if self.device >= 0:
            x_data = cuda.to_gpu(x_data)

        hs = self.model(x_data)
        heatmaps = F.resize_images(hs[-1], (hand_img_h, hand_img_w)).data[0]

        if self.device >= 0:
            heatmaps = heatmaps.get()

        if hand_type == "left":
            heatmaps = cv2.flip(heatmaps.transpose(1, 2, 0), 1).transpose(2, 0, 1)

        keypoints = self.compute_peaks_from_heatmaps(heatmaps)

        return keypoints
    def _pose_estimate_chainer_backend(self, bgr_img):
        if self.gpu >= 0:
            chainer.cuda.get_device_from_id(self.gpu).use()
        xp = self.pose_net.xp

        org_h, org_w, _ = bgr_img.shape
        if not (self.width is None or self.height is None):
            bgr_img = cv2.resize(bgr_img, (self.width, self.height))

        heatmap_avg = xp.zeros((bgr_img.shape[0], bgr_img.shape[1], 19),
                               dtype=np.float32)
        paf_avg = xp.zeros((bgr_img.shape[0], bgr_img.shape[1], 38),
                           dtype=np.float32)

        for scale in self.scales:
            img = cv2.resize(bgr_img, (0, 0), fx=scale,
                             fy=scale, interpolation=cv2.INTER_CUBIC)
            padded_img, pad = padRightDownCorner(
                img, self.stride, self.pad_value)
            x = np.transpose(np.float32(
                padded_img[:, :, :, np.newaxis]), (3, 2, 0, 1)) / 256 - 0.5
            if self.gpu >= 0:
                x = chainer.cuda.to_gpu(x)
            x = chainer.Variable(x)
            pafs, heatmaps = self.pose_net(x)
            paf = pafs[-1]
            heatmap = heatmaps[-1]

            # extract outputs, resize, and remove padding
            heatmap = F.resize_images(
                heatmap, (heatmap.data.shape[2] * self.stride,
                          heatmap.data.shape[3] * self.stride))
            heatmap = heatmap[:, :, :padded_img.shape[0] -
                              pad[2], :padded_img.shape[1] - pad[3]]
            heatmap = F.resize_images(
                heatmap, (bgr_img.shape[0], bgr_img.shape[1]))
            heatmap = xp.transpose(xp.squeeze(heatmap.data), (1, 2, 0))
            paf = F.resize_images(
                paf, (paf.data.shape[2] * self.stride,
                      paf.data.shape[3] * self.stride))
            paf = paf[:, :, :padded_img.shape[0] -
                      pad[2], :padded_img.shape[1] - pad[3]]
            paf = F.resize_images(paf, (bgr_img.shape[0], bgr_img.shape[1]))
            paf = xp.transpose(xp.squeeze(paf.data), (1, 2, 0))

            coeff = 1.0 / len(self.scales)
            paf_avg += paf * coeff
            heatmap_avg += heatmap * coeff

        heatmav_left = xp.zeros_like(heatmap_avg)
        heatmav_left[1:, :] = heatmap_avg[:-1, :]
        heatmav_right = xp.zeros_like(heatmap_avg)
        heatmav_right[:-1, :] = heatmap_avg[1:, :]
        heatmav_up = xp.zeros_like(heatmap_avg)
        heatmav_up[:, 1:] = heatmap_avg[:, :-1]
        heatmav_down = xp.zeros_like(heatmap_avg)
        heatmav_down[:, :-1] = heatmap_avg[:, 1:]
        peaks_binary = (heatmap_avg >= heatmav_left) & \
                       (heatmap_avg >= heatmav_right) & \
                       (heatmap_avg >= heatmav_up) & \
                       (heatmap_avg >= heatmav_down) & \
                       (heatmap_avg > self.thre1)

        peaks = xp.array(xp.nonzero(peaks_binary[..., :len(self.index2limbname)-1]), dtype=np.int32).T
        peak_counter = peaks.shape[0]
        all_peaks = xp.zeros((peak_counter, 4), dtype=np.float32)
        all_peaks[:, 0] = peaks[:, 1]
        all_peaks[:, 1] = peaks[:, 0]
        all_peaks[:, 2] = heatmap_avg[peaks.T.tolist()]
        peaks_order = peaks[..., 2]
        try:
            all_peaks = all_peaks[xp.argsort(peaks_order)]
        except AttributeError:
            # cupy.argsort is not available at cupy==1.0.1
            peaks_order = chainer.cuda.to_cpu(peaks_order)
            all_peaks = all_peaks[np.argsort(peaks_order)]
        all_peaks[:, 3] = xp.arange(peak_counter, dtype=np.float32)
        if self.gpu >= 0:
            all_peaks = chainer.cuda.to_cpu(all_peaks)
            peaks_order = chainer.cuda.to_cpu(peaks_order)
        all_peaks = np.split(all_peaks, np.cumsum(
            np.bincount(peaks_order, minlength=len(self.index2limbname)-1)))
        connection_all = []
        mid_num = 10
        eps = 1e-8
        score_mid = paf_avg[:, :, [[x - 19 for x in self.map_idx[k]]
                                   for k in range(len(self.map_idx))]]

        cands = np.array(all_peaks, dtype=object)[
            np.array(self.limb_sequence, dtype=np.int32) - 1]
        candAs = cands[:, 0]
        candBs = cands[:, 1]
        nAs = np.array([len(candA) for candA in candAs])
        nBs = np.array([len(candB) for candB in candBs])
        target_indices = np.nonzero(np.logical_and(nAs != 0, nBs != 0))[0]
        if len(target_indices) == 0:
            return [], []

        all_candidates_A = [np.repeat(np.array(tmp_candA, dtype=np.float32), nB, axis=0)
                            for tmp_candA, nB in zip(candAs, nBs)]
        all_candidates_B = [np.tile(np.array(tmp_candB, dtype=np.float32), (nA, 1))
                            for tmp_candB, nA in zip(candBs, nAs)]

        target_candidates_B = [all_candidates_B[index]
                               for index in target_indices]
        target_candidates_A = [all_candidates_A[index]
                               for index in target_indices]

        vec = np.vstack(target_candidates_B)[
            :, :2] - np.vstack(target_candidates_A)[:, :2]
        if self.gpu >= 0:
            vec = chainer.cuda.to_gpu(vec)
        norm = xp.sqrt(xp.sum(vec ** 2, axis=1)) + eps
        vec = vec / norm[:, None]
        start_end = zip(np.round(np.mgrid[np.vstack(target_candidates_A)[:, 1].reshape(-1, 1):np.vstack(target_candidates_B)[:, 1].reshape(-1, 1):(mid_num * 1j)]).astype(np.int32),
                        np.round(np.mgrid[np.vstack(target_candidates_A)[:, 0].reshape(-1, 1):np.vstack(
                            target_candidates_B)[:, 0].reshape(-1, 1):(mid_num * 1j)]).astype(np.int32),
                        np.concatenate([[[index] * mid_num for i in range(len(c))] for index, c in zip(target_indices, target_candidates_B)]),)

        v = score_mid[np.concatenate(
            start_end, axis=1).tolist()].reshape(-1, mid_num, 2)
        score_midpts = xp.sum(v * xp.repeat(vec, (mid_num),
                                            axis=0).reshape(-1, mid_num, 2), axis=2)
        score_with_dist_prior = xp.sum(score_midpts, axis=1) / mid_num + \
            xp.minimum(0.5 * bgr_img.shape[0] / norm - 1,
                       xp.zeros_like(norm, dtype=np.float32))
        c1 = xp.sum(score_midpts > self.thre2, axis=1) > 0.8 * mid_num
        c2 = score_with_dist_prior > 0.0
        criterion = xp.logical_and(c1, c2)

        indices_bins = np.cumsum(nAs * nBs)
        indices_bins = np.concatenate(
            [np.zeros(1), indices_bins]).astype(np.int32)
        target_candidate_indices = xp.nonzero(criterion)[0]
        if self.gpu >= 0:
            target_candidate_indices = chainer.cuda.to_cpu(
                target_candidate_indices)
            score_with_dist_prior = chainer.cuda.to_cpu(score_with_dist_prior)

        k_s = np.digitize(target_candidate_indices, indices_bins) - 1
        i_s = (target_candidate_indices - (indices_bins[k_s])) // nBs[k_s]
        j_s = (target_candidate_indices - (indices_bins[k_s])) % nBs[k_s]

        connection_candidate = np.concatenate([k_s.reshape(-1, 1),
                                               i_s.reshape(-1, 1),
                                               j_s.reshape(-1, 1),
                                               score_with_dist_prior[
                                                   target_candidate_indices][None, ].T,
                                               (score_with_dist_prior[target_candidate_indices][None, ] +
                                                np.concatenate(target_candidates_A)[target_candidate_indices, 2] + np.concatenate(target_candidates_B)[target_candidate_indices, 2]).T], axis=1)

        sorted_indices = np.argsort(
            connection_candidate[:, 0] * 100 - connection_candidate[:, 3])

        connection_all = []
        for _ in range(0, 19):
            connection = np.zeros((0, 5), dtype=np.float32)
            connection_all.append(connection)

        for c_candidate in connection_candidate[sorted_indices]:
            k, i, j = c_candidate[0:3].astype(np.int32)
            score = c_candidate[3]
            if(len(connection_all[k]) >= min(nAs[k], nBs[k])):
                continue
            i *= nBs[k]
            if(i not in connection_all[k][:, 3] and j not in connection_all[k][:, 4]):
                connection_all[k] = np.vstack([connection_all[k], np.array(
                    [all_candidates_A[k][i][3], all_candidates_B[k][j][3], score, i, j], dtype=np.float32)])

        joint_cands_indices = -1 * np.ones((0, 20))
        candidate = np.array(
            [item for sublist in all_peaks for item in sublist])
        for k in range(len(self.map_idx)):
            partAs = connection_all[k][:, 0]
            partBs = connection_all[k][:, 1]
            indexA, indexB = np.array(self.limb_sequence[k]) - 1
            for i in range(len(connection_all[k])):  # = 1:size(temp,1)
                found = 0
                joint_cands_indices_idx = [-1, -1]
                # 1:size(joint_cands_indices,1):
                for j in range(len(joint_cands_indices)):
                    if joint_cands_indices[j][indexA] == float(partAs[i]) or joint_cands_indices[j][indexB] == float(partBs[i]):
                        joint_cands_indices_idx[found] = j
                        found += 1

                if found == 1:
                    j = joint_cands_indices_idx[0]
                    if(joint_cands_indices[j][indexB] != float(partBs[i])):
                        joint_cands_indices[j][indexB] = partBs[i]
                        joint_cands_indices[j][-1] += 1
                        joint_cands_indices[
                            j][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2]
                        joint_cands_indices[
                            j][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2]
                elif found == 2:  # if found 2 and disjoint, merge them
                    j1, j2 = joint_cands_indices_idx
                    membership = ((joint_cands_indices[j1] >= 0).astype(
                        int) + (joint_cands_indices[j2] >= 0).astype(int))[:-2]
                    if len(np.nonzero(membership == 2)[0]) == 0:  # merge
                        joint_cands_indices[j1][
                            :-2] += (joint_cands_indices[j2][:-2] + 1)
                        joint_cands_indices[
                            j1][-2:] += joint_cands_indices[j2][-2:]
                        joint_cands_indices[j1][-2] += connection_all[k][i][2]
                        joint_cands_indices = np.delete(
                            joint_cands_indices, j2, 0)
                    else:  # as like found == 1
                        joint_cands_indices[j1][indexB] = partBs[i]
                        joint_cands_indices[j1][-1] += 1
                        joint_cands_indices[
                            j1][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2]

                # if find no partA in the joint_cands_indices, create a new
                # joint_cands_indices
                elif not found and k < len(self.index2limbname) - 2:
                    row = -1 * np.ones(20)
                    row[indexA] = partAs[i]
                    row[indexB] = partBs[i]
                    row[-1] = 2
                    row[-2] = sum(candidate[connection_all[k]
                                            [i, :2].astype(int), 2]) + connection_all[k][i][2]
                    joint_cands_indices = np.vstack([joint_cands_indices, row])

        # delete some rows of joint_cands_indices which has few parts occur
        deleteIdx = []
        for i in range(len(joint_cands_indices)):
            if joint_cands_indices[i][-1] < 4 or joint_cands_indices[i][-2] / joint_cands_indices[i][-1] < 0.4:
                deleteIdx.append(i)
        joint_cands_indices = np.delete(joint_cands_indices, deleteIdx, axis=0)

        return self._extract_joint_position(joint_cands_indices, candidate), all_peaks
Esempio n. 38
0
 def forward(self, inputs, device):
     x, = inputs
     output_shape = self.output_shape[2:]
     y = functions.resize_images(x, output_shape)
     return y,
Esempio n. 39
0
 def f(x):
     return functions.resize_images(x, output_shape)
Esempio n. 40
0
    def predict_depth(self, rgb, mask_score, depth_viz, rgb_pool5):
        # conv_depth_1
        h = F.relu(self.conv_depth_1_1(depth_viz))
        h = F.relu(self.conv_depth_1_2(h))
        h = F.max_pooling_2d(h, 2, stride=2, pad=0)
        depth_pool1 = h  # 1/2

        # conv_depth_2
        h = F.relu(self.conv_depth_2_1(depth_pool1))
        h = F.relu(self.conv_depth_2_2(h))
        h = F.max_pooling_2d(h, 2, stride=2, pad=0)
        depth_pool2 = h  # 1/4

        # conv_depth_3
        h = F.relu(self.conv_depth_3_1(depth_pool2))
        h = F.relu(self.conv_depth_3_2(h))
        h = F.relu(self.conv_depth_3_3(h))
        h = F.max_pooling_2d(h, 2, stride=2, pad=0)
        depth_pool3 = h  # 1/8

        # conv_depth_4
        h = F.relu(self.conv_depth_4_1(depth_pool3))
        h = F.relu(self.conv_depth_4_2(h))
        h = F.relu(self.conv_depth_4_3(h))
        h = F.max_pooling_2d(h, 2, stride=2, pad=0)
        depth_pool4 = h  # 1/16

        # conv_depth_5
        h = F.relu(self.conv_depth_5_1(depth_pool4))
        h = F.relu(self.conv_depth_5_2(h))
        h = F.relu(self.conv_depth_5_3(h))
        h = F.max_pooling_2d(h, 2, stride=2, pad=0)
        depth_pool5 = h  # 1/32

        if self.masking is True:
            # Apply negative_mask to depth_pool5
            # (N, C, H, W) -> (N, H, W)
            mask_pred_tmp = F.argmax(self.mask_score, axis=1)
            # (N, H, W) -> (N, 1, H, W), float required for resizing
            mask_pred_tmp = mask_pred_tmp[:, None, :, :].data.astype(
                self.xp.float32)  # 1/1
            resized_mask_pred = F.resize_images(
                mask_pred_tmp,
                (depth_pool5.shape[2], depth_pool5.shape[3]))  # 1/32
            depth_pool5_cp = depth_pool5
            masked_depth_pool5 = depth_pool5_cp * \
                (resized_mask_pred.data == 0.0).astype(self.xp.float32)
        else:
            masked_depth_pool5 = depth_pool5

        if self.concat is True:
            # concatenate rgb_pool5 and depth_pool5
            concat_pool5 = F.concat((rgb_pool5, masked_depth_pool5), axis=1)

            # concat_fc6
            h = F.relu(self.concat_fc6(concat_pool5))
            h = F.dropout(h, ratio=.5)
            concat_fc6 = h  # 1/32
        else:
            # concat_fc6
            h = F.relu(self.depth_fc6(masked_depth_pool5))
            h = F.dropout(h, ratio=.5)
            concat_fc6 = h  # 1/32

        # concat_fc7
        h = F.relu(self.concat_fc7(concat_fc6))
        h = F.dropout(h, ratio=.5)
        concat_fc7 = h  # 1/32

        # depth_score_fr
        h = self.depth_score_fr(concat_fc7)
        depth_score_fr = h  # 1/32

        # depth_score_pool3
        scale_depth_pool3 = 0.0001 * depth_pool3
        h = self.depth_score_pool3(scale_depth_pool3)
        depth_score_pool3 = h  # 1/8

        # depth_score_pool4
        scale_depth_pool4 = 0.01 * depth_pool4
        h = self.depth_score_pool4(scale_depth_pool4)
        depth_score_pool4 = h  # 1/16

        # depth upscore2
        h = self.depth_upscore2(depth_score_fr)
        depth_upscore2 = h  # 1/16

        # depth_score_pool4c
        h = depth_score_pool4[:, :,
                              5:5 + depth_upscore2.data.shape[2],
                              5:5 + depth_upscore2.data.shape[3]]
        depth_score_pool4c = h  # 1/16

        # depth_fuse_pool4
        h = depth_upscore2 + depth_score_pool4c
        depth_fuse_pool4 = h  # 1/16

        # depth_upscore_pool4
        h = self.depth_upscore_pool4(depth_fuse_pool4)
        depth_upscore_pool4 = h  # 1/8

        # depth_score_pool3c
        h = depth_score_pool3[:, :,
                              9:9 + depth_upscore_pool4.data.shape[2],
                              9:9 + depth_upscore_pool4.data.shape[3]]
        depth_score_pool3c = h  # 1/8

        # depth_fuse_pool3
        h = depth_upscore_pool4 + depth_score_pool3c
        depth_fuse_pool3 = h  # 1/8

        # depth_upscore8
        h = self.depth_upscore8(depth_fuse_pool3)
        depth_upscore8 = h  # 1/1

        # depth_score
        h = depth_upscore8[:, :,
                           31:31 + rgb.shape[2],
                           31:31 + rgb.shape[3]]
        depth_score = h  # 1/1

        return depth_score
Esempio n. 41
0
 def f(x):
     y = functions.resize_images(x, output_shape)
     return y * y
Esempio n. 42
0
 def check_forward(self, x, output_shape):
     y = functions.resize_images(x, output_shape)
     testing.assert_allclose(y.data, self.out)