def darknet_preprocess(self, images): # resize the images to shorter edge be neth neth = netw = self.resolution im_h = images.shape[1] im_w = images.shape[2] if (1.0 * netw / im_w) < (1.0 * neth / im_h): new_w = netw new_h = (im_h * netw) // im_w else: new_h = neth new_w = (im_w * neth) // im_h images = resize_images(images, [new_h, new_w]) resized = images # BCHW format images = np.transpose(images, (0, 3, 1, 2)) # RGB to BGR images = images[:, ::-1, :, :] # to float and to 0-1 images = images / 255.0 # as continuous memory images = np.ascontiguousarray(images, dtype=np.float32) dark_frames = Image(images) return dark_frames, resized, images
def compute_logits(self, image): image = resize_images(image, [self.height, self.width]) image_bak = image # image shape B*H*W*C image = np.transpose(image, (0, 3, 1, 2)) # has shape B*C*H*W image = image.astype(np.float32) image = image / 255.0 image = torch.fromNumpyArray(image) out = self.segment_func(image, self.output_downsample_factor) out = out.asNumpyArray() # out has shape batch*H*W*#classes if self.attach_lane_color: # compute the lane mask assert (out.shape[3] == 6) argmax = np.argmax(out, axis=3) # now shape B*H*W not_lane = (argmax != 5) downsampled_images = image_bak[:, ::self. output_downsample_factor, ::self. output_downsample_factor, :] downsampled_images[not_lane, :] = 0 downsampled_images = downsampled_images / 255.0 out = np.concatenate((out, downsampled_images), axis=3) return out
def split_camera_middle_batch(sensor_data, sensor_names): id = sensor_names.index('CameraMiddle') rest_data = sensor_data[0:id] + sensor_data[(id + 1):] middle = sensor_data[id] # now splitting the image into two smaller ones middle_shape = middle.shape # now shape is B H W C middle_shape = middle_shape[1:] middle = middle[:, middle_shape[0] // 4:middle_shape[0] * 3 // 4, :, :] left = middle[:, :, 0:middle_shape[1] // 2, :] left = resize_images(left, (middle_shape[0], middle_shape[1])) right = middle[:, :, middle_shape[1] // 2:, :] right = resize_images(right, (middle_shape[0], middle_shape[1])) rest_data += [left, right] return rest_data
def compute_logits(self, images): # might be zoomed images #images = resize_images(images, [512*3//4, 512]) images = resize_images(images, [400 * 3 // 4, 400]) [N, H, W, C] = images.shape assert H >= self._resolution[0] assert W >= self._resolution[1] # get central crop of given resolution h0 = int(0.5 * (H - self._resolution[0])) hN = int(0.5 * (H + self._resolution[0])) w0 = int(0.5 * (W - self._resolution[1])) wN = int(0.5 * (W + self._resolution[1])) center_crop = images[:, h0:hN, w0:wN, :] self.last_images = center_crop center_crop = center_crop.transpose(0, 3, 1, 2) center_crop = (center_crop - self._means_marvin_tensor) / 127.0 center_crop = torch.from_numpy(center_crop).type(torch.FloatTensor) input = center_crop.cuda(self._opts['gpu'], non_blocking=True) logits = self._model(input) logits = logits.squeeze() #print("logits shape", logits.size()) # size is batch*1 #logits = torch.nn.functional.sigmoid(logits) return logits
def compute_logits(self, image): # TODO pin_memory=True, rgb_mean = [0.41738699, 0.45732192, 0.46886091] rgb_std = [0.25685097, 0.26509955, 0.29067996] image = resize_images(image, [self.height, self.width]) # image shape B*H*W*C with torch.no_grad(): img = torch.from_numpy(image) img = img.cuda(non_blocking=True) # from B H W C to B C H W img = img.permute(0, 3, 1, 2) img = img.float() # from a int tensor to a float tensor img = img / 255.0 # normalize it img.sub_(img.new(rgb_mean).view(1, -1, 1, 1)) img.div_(img.new(rgb_std).view(1, -1, 1, 1)) sem_logits = self.model(img) # this logits has size of batch*nclass*H*W # compute the visualization within cuda max_value, argmax = sem_logits.max(1) # the output has size of batch*H*W self.argmax = argmax.cpu().numpy() sem_logits = sem_logits.permute(0, 2, 3, 1) sem_logits = sem_logits.cpu().numpy() # out has shape batch * nclass * H * W: [1, 65, 72, 96] return sem_logits
def compute_disparity_average(self, images): # assume a RGB input image input_image = resize_images(images, [self.height, self.width]) input_image = np.concatenate((input_image, input_image[:, :, ::-1, :]), axis=0) input_image = input_image.astype(np.float32) / 255 disp = self.sess.run(self.model.disp_left_est[0], feed_dict={self.left: input_image}) disp_pp = self.post_process_disparity(disp.squeeze()).astype( np.float32) # convert w*h image to batch*w*h*1 disp_pp = np.expand_dims(disp_pp, axis=3) return disp_pp
def compute(self, images, intermediate_size=(576, 768)): # depth 256*512, seg: 576*768, yolo 312*416 if intermediate_size is not None: images = resize_images(images, intermediate_size) self.images = images for mode in self.instances.keys(): assert (self.num_replicates[mode] == 1) conn = self.instances[mode] conn.send(("compute", images)) out_logits = {} for mode in self.instances.keys(): conn = self.instances[mode] out_logits[mode] = conn.recv() return out_logits
def compute_logits(self, image): image = resize_images(image, [self.height, self.width]) image = image[:, ::2, ::2, :] # normalize with mean and std image = (image / 255.0 - self.mean) / self.std image = image.astype(np.float32) image = np.transpose(image, [0, 3, 1, 2]) image = torch.from_numpy(image) image_var = Variable(image, requires_grad=False, volatile=True) pred = self.model(image_var)[0] pred = pred[:, :, ::self.output_downsample_factor, ::self. output_downsample_factor] # _, pred = torch.max(final, 1) pred = pred.permute([0, 2, 3, 1]) pred = pred.cpu().data.numpy() return pred
def compute(self, images, intermediate_size=(576, 768)): # depth 256*512, seg: 576*768, yolo 312*416 if intermediate_size is not None: if intermediate_size[0] * images.shape[2] != images.shape[ 1] * intermediate_size[1]: print( "warning: the images aspect ratio is changed in all_perceptions" ) images = resize_images(images, intermediate_size) self.images = images for mode in self.instances.keys(): assert (self.num_replicates[mode] == 1) conn = self.instances[mode] conn.send(("compute", images)) out_logits = {} for mode in self.instances.keys(): conn = self.instances[mode] out_logits[mode] = conn.recv() return out_logits
def camera_middle_zoom_batch(sensor_data, sensor_names, zoom_dict): out = {} for key in zoom_dict: id = sensor_names.index(key) # rest_data = sensor_data[0:id] + sensor_data[(id+1):] middle = sensor_data[id] if zoom_dict[key]: # now splitting the image into two smaller ones middle_shape = middle.shape # now shape is B H W C middle_shape = middle_shape[1:] middle = middle[:, middle_shape[0] // 4:middle_shape[0] * 3 // 4, middle_shape[1] // 4:middle_shape[1] * 3 // 4, :] middle = resize_images(middle, (middle_shape[0], middle_shape[1])) out[key] = middle ans = [] for key in sensor_names: if key in out: ans.append(out[key]) else: raise ValueError("zoom dict not complete") return ans
def _merge_logits_all_perception(self, logits_dict): res = [] det_sz = (39, 52) #print(sorted(logits_dict.keys())) for key in sorted(logits_dict.keys()): if key == "seg": factor = 3 size = (det_sz[0] * factor, det_sz[1] * factor) resized = resize_images(logits_dict[key], size, interpolation=cv2.INTER_NEAREST) resized *= 0.1 resized = self._space2depth(resized, factor) res.append(resized) elif key == "depth": factor = 5 size = (det_sz[0] * factor, det_sz[1] * factor) resized = resize_images(logits_dict[key], size, interpolation=cv2.INTER_LINEAR) resized *= 50 resized = self._space2depth(resized, factor) res.append(resized) elif "det" in key: dB, dH, dW, dC = logits_dict[key].shape # compute the effective height eH = int(1.0 * det_sz[0] / det_sz[1] * dW) assert (eH == det_sz[0] and dW == det_sz[1]) # compute the upper margine H_start = (dH - eH) // 2 # crop the useful part cropped = logits_dict[key][:, H_start:(H_start + eH), :, :] # multiply the amplify factor num_classes = dC // 9 - 5 # we amplify the objectness score by 10 factor = [1.0] * 4 + [10.0] + [1.0] * num_classes factor = np.array(factor * 9) factor = np.reshape(factor, newshape=(1, 1, 1, -1)) cropped = cropped * factor res.append(cropped) elif key == "seg_abn": factor = 2 size = (det_sz[0] * factor, det_sz[1] * factor) resized = resize_images(logits_dict[key], size, interpolation=cv2.INTER_NEAREST) resized *= 0.1 resized = self._space2depth(resized, factor) res.append(resized) elif key == "0intersection": factor = 1 size = (det_sz[0] * factor, det_sz[1] * factor) # replicate the image to the size expanded = np.reshape(logits_dict[key], (-1, 1, 1, 1)) resized = np.tile(expanded, (1, size[0], size[1], 1)) res.append(resized) elif key == "drivable_area": factor = 3 size = (det_sz[0] * factor, det_sz[1] * factor) resized = resize_images(logits_dict[key], size, interpolation=cv2.INTER_NEAREST) resized *= 0.1 resized = self._space2depth(resized, factor) res.append(resized) concat = np.concatenate(res, axis=3) return concat