def current_screen(self): # Max of two consecutive frames assert self.last_raw_screen is not None rgb_img = np.maximum(self.ale.getScreenRGB(), self.last_raw_screen) # Make sure the last raw screen is used only once self.last_raw_screen = None assert rgb_img.shape == (210, 160, 3) # RGB -> Luminance img = rgb_img[:, :, 0] * 0.2126 + rgb_img[:, :, 1] * \ 0.0722 + rgb_img[:, :, 2] * 0.7152 img = img.astype(np.uint8) if img.shape == (250, 160): raise RuntimeError("This ROM is for PAL. Please use ROMs for NTSC") assert img.shape == (210, 160) if self.crop_or_scale == 'crop': # Shrink (210, 160) -> (110, 84) img = imresize(img, (84, 110)) assert img.shape == (110, 84) # Crop (110, 84) -> (84, 84) unused_height = 110 - 84 bottom_crop = 8 top_crop = unused_height - bottom_crop img = img[top_crop:110 - bottom_crop, :] elif self.crop_or_scale == 'scale': img = imresize(img, (84, 84)) else: raise RuntimeError('crop_or_scale must be either crop or scale') assert img.shape == (84, 84) return img
def current_screen(self): # Max of two consecutive frames assert self.last_raw_screen is not None rgb_img = np.maximum(self.ale.getScreenRGB(), self.last_raw_screen) # Make sure the last raw screen is used only once self.last_raw_screen = None assert rgb_img.shape == (210, 160, 3) # RGB -> Luminance img = rgb_img[:, :, 0] * 0.2126 + rgb_img[:, :, 1] * \ 0.0722 + rgb_img[:, :, 2] * 0.7152 img = img.astype(np.uint8) if img.shape == (250, 160): raise RuntimeError("This ROM is for PAL. Please use ROMs for NTSC") assert img.shape == (210, 160) if self.crop_or_scale == 'crop': # Shrink (210, 160) -> (110, 84) img = imresize(img, (84, 110)) assert img.shape == (110, 84) # Crop (110, 84) -> (84, 84) unused_height = 110 - 84 bottom_crop = 8 top_crop = unused_height - bottom_crop img = img[top_crop: 110 - bottom_crop, :] elif self.crop_or_scale == 'scale': img = imresize(img, (84, 84)) else: raise RuntimeError('crop_or_scale must be either crop or scale') assert img.shape == (84, 84) return img
def upscale_alg(self, im_l_y, s): h_gt, w_gt = im_l_y.shape[0]*s, im_l_y.shape[1]*s # hpsz = self.params['patch_size']/2 itr_all = int(np.ceil(np.log(s)/np.log(self.params['mdl_scale']))) indata = np.zeros((self.params['input_size']**2, self.params['batch']), dtype=np.float32) outdata = np.zeros((self.params['output_size']**2, self.params['batch']), dtype=np.float32) idxdata = np.zeros((2, self.params['batch']), dtype=np.float32) ftrs = np.zeros((self.params['batch'], self.params['output_size']**2), dtype=np.float32) for itr in range(itr_all): print 'itr:', itr if self.params['matlab_bic']==1: im_y = utils.imresize_bic2(im_l_y, self.params['mdl_scale']) else: im_y = utils.imresize(im_l_y, self.params['mdl_scale']) im_y = utils.ExtendBorder(im_y, self.params['border_size']) h, w = im_y.shape Height_idx = range(0, h-self.params['input_size'], self.params['output_size']) Width_idx = range(0, w-self.params['input_size'], self.params['output_size']) Height_idx += [h-self.params['input_size']] Width_idx += [w-self.params['input_size']] bcnt = 0 im_h_y = np.zeros((h, w), dtype=np.float32) t0 = time.time() for i in Height_idx: for j in Width_idx: idxdata[0, bcnt] = i idxdata[1, bcnt] = j tmp = im_y[i:i+self.params['input_size'], j:j+self.params['input_size']] indata[:, bcnt] = np.reshape(tmp, (indata.shape[0], )) bcnt += 1 if bcnt==self.params['batch'] or (i==Height_idx[-1] and j==Width_idx[-1]): self.model.do_write_one_feature([indata, outdata], ftrs, self.params['layer_idx']) for b in range(bcnt): si = idxdata[0, b]+self.params['border_size'] sj = idxdata[1, b]+self.params['border_size'] im_h_y[si:si+self.params['output_size'], sj:sj+self.params['output_size']] = \ np.reshape(ftrs[b, :], (self.params['output_size'], self.params['output_size'])) bcnt = 0 t1 = time.time() print 'convnet time: {}'.format(t1-t0) im_h_y = im_h_y[self.params['border_size']:-self.params['border_size'], self.params['border_size']:-self.params['border_size']] im_l_y = im_h_y # shrink size to gt if (im_h_y.shape[0]>h_gt): print 'downscale from {} to {}'.format(im_h_y.shape, (h_gt, w_gt)) if self.params['matlab_bic']==1: im_h_y = utils.imresize_bic2(im_h_y, 1.0*h_gt/im_h_y.shape[0]) else: im_h_y = utils.imresize(im_h_y, 1.0*h_gt/im_h_y.shape[0]) assert(im_h_y.shape[1]==w_gt) return im_h_y
def render(self, size=None): image = np.zeros((self.size, self.size, 3), np.uint8) flow = np.zeros((self.size, self.size, 2), np.float32) image[..., :] = self.color for obj in self.objs: obj.render(image, flow) if size is not None: image = imresize(image, size=size) flow = imresize(flow, size=size) / self.size * size return image, flow
def upscale_alg(self, im_l_y, s): h_gt, w_gt = im_l_y.shape[0] * s, im_l_y.shape[1] * s hpsz = self.PATCH_SIZE / 2 itr_all = int(np.ceil(np.log(s) / np.log(self.MDL_SCALE))) for itr in range(itr_all): print 'itr:', itr im_y = utils.imresize(im_l_y, self.MDL_SCALE) im_y = utils.ExtendBorder(im_y, self.BORDER_SIZE) mdl = self.mdls[itr] # extract gradient features convfea = utils.ExtrConvFea(im_y, mdl['conv']) im_mean = utils.ExtrConvFea(im_y, mdl['mean2']) diffms = utils.ExtrConvFea(im_y, mdl['diffms']) # matrix operation h, w, c = convfea.shape convfea = convfea.reshape([h * w, c]) convfea_norm = np.linalg.norm(convfea, axis=1) convfea = (convfea.T / convfea_norm).T wd = np.dot(convfea, mdl['wd']) z0 = utils.ShLU(wd, 1) z = utils.ShLU(np.dot(z0, mdl['usd1']) + wd, 1) #sparse code hPatch = np.dot(z, mdl['ud']) hNorm = np.linalg.norm(hPatch, axis=1) diffms = diffms.reshape([h * w, diffms.shape[2]]) mNorm = np.linalg.norm(diffms, axis=1) hPatch = (hPatch.T / hNorm * mNorm).T * self.SCALE_Y hPatch = hPatch * mdl['addp'].flatten() hPatch = hPatch.reshape([h, w, hPatch.shape[1]]) im_h_y = im_mean[:, :, 0] h, w = im_h_y.shape cnt = 0 for ii in range(self.PATCH_SIZE - 1, -1, -1): for jj in range(self.PATCH_SIZE - 1, -1, -1): im_h_y = im_h_y + hPatch[jj:(jj + h), ii:(ii + w), cnt] cnt = cnt + 1 im_l_y = im_h_y # shrink size to gt if (im_h_y.shape[0] > h_gt): print 'downscale from {} to {}'.format(im_h_y.shape, (h_gt, w_gt)) im_h_y = utils.imresize(im_h_y, 1.0 * h_gt / im_h_y.shape[0]) assert (im_h_y.shape[1] == w_gt) return im_h_y
def upscale_alg(self, im_l_y, s): h_gt, w_gt = im_l_y.shape[0]*s, im_l_y.shape[1]*s hpsz = self.PATCH_SIZE/2 itr_all = int(np.ceil(np.log(s)/np.log(self.MDL_SCALE))) for itr in range(itr_all): print 'itr:', itr im_y = utils.imresize(im_l_y, self.MDL_SCALE) im_y = utils.ExtendBorder(im_y, self.BORDER_SIZE) mdl=self.mdls[itr] # extract gradient features convfea = utils.ExtrConvFea(im_y, mdl['conv']) im_mean = utils.ExtrConvFea(im_y, mdl['mean2']) diffms = utils.ExtrConvFea(im_y, mdl['diffms']) # matrix operation h, w, c = convfea.shape convfea = convfea.reshape([h*w, c]) convfea_norm = np.linalg.norm(convfea, axis=1) convfea = (convfea.T/convfea_norm).T wd = np.dot(convfea, mdl['wd']) z0 = utils.ShLU(wd, 1) z = utils.ShLU(np.dot(z0, mdl['usd1'])+wd, 1) #sparse code hPatch = np.dot(z, mdl['ud']) hNorm = np.linalg.norm(hPatch, axis=1) diffms = diffms.reshape([h*w, diffms.shape[2]]) mNorm = np.linalg.norm(diffms, axis=1) hPatch = (hPatch.T/hNorm*mNorm).T*self.SCALE_Y hPatch = hPatch*mdl['addp'].flatten() hPatch = hPatch.reshape([h, w, hPatch.shape[1]]) im_h_y = im_mean[:, :, 0] h, w = im_h_y.shape cnt = 0 for ii in range(self.PATCH_SIZE-1, -1, -1): for jj in range(self.PATCH_SIZE-1, -1, -1): im_h_y = im_h_y+hPatch[jj:(jj+h), ii:(ii+w), cnt] cnt = cnt+1 im_l_y = im_h_y # shrink size to gt if (im_h_y.shape[0]>h_gt): print 'downscale from {} to {}'.format(im_h_y.shape, (h_gt, w_gt)) im_h_y = utils.imresize(im_h_y, 1.0*h_gt/im_h_y.shape[0]) assert(im_h_y.shape[1]==w_gt) return im_h_y
def main(): model = ModelPipeline() webcam = WebcamCapture() base_fps = webcam.get(cv2.CAP_PROP_FPS) print('Base FPS:', base_fps) info_frame = crop(webcam.read()) mp.Process(target=pull, args=(info_frame, base_fps)).start() context = zmq.Context() socket = context.socket(zmq.PUSH) socket.bind('tcp://*:5555') height, width, _ = info_frame.shape hand_data_keys = ['origin', 'joints', 'distX', 'distY', 'vert'] fps_send = FPS('Send:') while True: frame_large = webcam.read_rgb() frame_large = crop(frame_large) frame_large_l = frame_large[:, :width // 2] frame_large_r = frame_large[:, width // 2:] frame_l = imresize(frame_large_l, (128, 128)) frame_r = imresize(frame_large_r, (128, 128)) # iv - intermediate values ivl, _ = model.process(np.flip(frame_l, axis=1)) ivr, _ = model.process(frame_r) hand_data_l = calc_hand_data(ivl) hand_data_r = calc_hand_data(ivr) if hand_data_l is not None and hand_data_r is not None: socket.send_json( { 'dataL': dict(zip(hand_data_keys, hand_data_l)), 'dataR': dict(zip(hand_data_keys, hand_data_r)), 'frameWidth': frame_large.shape[1], 'frameHeight': frame_large.shape[0], }, zmq.SNDMORE) socket.send(np.flip(frame_large, axis=0).tobytes()) fps_send()
def upscale(self, im_l, s): """ % im_l: LR image, float np array in [0, 255] % im_h: HR image, float np array in [0, 255] """ im_l = im_l/255.0 if len(im_l.shape)==3 and im_l.shape[2]==3: im_l_ycbcr = utils.rgb2ycbcr(im_l) else: im_l_ycbcr = np.zeros([im_l.shape[0], im_l.shape[1], 3]) im_l_ycbcr[:, :, 0] = im_l im_l_ycbcr[:, :, 1] = im_l im_l_ycbcr[:, :, 2] = im_l im_l_y = im_l_ycbcr[:, :, 0]*255 #[16 235] im_h_y = self.upscale_alg(im_l_y, s) # recover color if len(im_l.shape)==3: im_ycbcr = utils.imresize(im_l_ycbcr, s); im_ycbcr[:, :, 0] = im_h_y/255.0; #[16/255 235/255] im_h = utils.ycbcr2rgb(im_ycbcr)*255.0 else: im_h = im_h_y im_h = np.clip(im_h, 0, 255) im_h_y = np.clip(im_h_y, 0, 255) return im_h,im_h_y
def __getitem__(self, index): # get downscaled and cropped image (if necessary) index_noisy, index_clean = index, np.random.randint( 0, len(self.cleandir_files)) noisy_image = self.input_transform( Image.open(self.noisy_dir_files[index_noisy])) clean_image = self.input_transform( Image.open(self.cleandir_files[index_clean])) if self.rotations: angle = random.choice([0, 90, 180, 270]) noisy_image = TF.rotate(noisy_image, angle) angle = random.choice([0, 90, 180, 270]) clean_image = TF.rotate(clean_image, angle) if self.cropped: cropped_image_noisy = self.crop_transform(noisy_image) clean_image = TF.to_tensor(clean_image) resized_image = utils.imresize(clean_image, 1.0 / self.upscale_factor, True) # resized_image = clean_image if self.cropped: return clean_image, resized_image, TF.to_tensor( cropped_image_noisy) else: return resized_image
def upscale(self, im_l, s): """ % im_l: LR image, float np array in [0, 255] % im_h: HR image, float np array in [0, 255] """ im_l = im_l/255.0 if len(im_l.shape)==3 and im_l.shape[2]==3: im_l_ycbcr = utils.rgb2ycbcr(im_l) else: im_l_ycbcr = np.zeros([im_l.shape[0], im_l.shape[1], 3]) im_l_ycbcr[:, :, 0] = im_l im_l_ycbcr[:, :, 1] = im_l im_l_ycbcr[:, :, 2] = im_l im_l_y = im_l_ycbcr[:, :, 0]*255 #[16 235] im_h_y = self.upscale_alg(im_l_y, s) # recover color #print 'recover color...' if len(im_l.shape)==3: im_ycbcr = utils.imresize(im_l_ycbcr, s); im_ycbcr[:, :, 0] = im_h_y/255.0; #[16/255 235/255] im_h = utils.ycbcr2rgb(im_ycbcr)*255.0 else: im_h = im_h_y #print 'clip...' im_h = np.clip(im_h, 0, 255) im_h_y = np.clip(im_h_y, 0, 255) return im_h,im_h_y
def load_mask(mask_path, shape, return_mask_img=False): if K.image_data_format() == "channels_first": _, channels, width, height = shape else: _, width, height, channels = shape mask = imread(mask_path, mode="L") # Grayscale mask load mask = imresize(mask, (width, height)).astype('float32') # Perform binarization of mask mask[mask <= 127] = 0 mask[mask > 128] = 255 max = np.amax(mask) mask /= max if return_mask_img: return mask mask_shape = shape[1:] mask_tensor = np.empty(mask_shape) for i in range(channels): if K.image_data_format() == "channels_first": mask_tensor[i, :, :] = mask else: mask_tensor[:, :, i] = mask return mask_tensor
def load_training_batch(dataset_dir, TRAIN_SIZE, PATCH_WIDTH, PATCH_HEIGHT, DSLR_SCALE): train_directory_dslr = dataset_dir + 'train/canon/' train_directory_phone = dataset_dir + 'train/huawei_raw/' # NUM_TRAINING_IMAGES = 46839 NUM_TRAINING_IMAGES = len([name for name in os.listdir(train_directory_phone) if os.path.isfile(os.path.join(train_directory_phone, name))]) TRAIN_IMAGES = np.random.choice(np.arange(0, NUM_TRAINING_IMAGES), TRAIN_SIZE, replace=False) train_data = np.zeros((TRAIN_SIZE, PATCH_WIDTH, PATCH_HEIGHT, 4)) train_answ = np.zeros((TRAIN_SIZE, int(PATCH_WIDTH * DSLR_SCALE), int(PATCH_HEIGHT * DSLR_SCALE), 3)) i = 0 for img in TRAIN_IMAGES: I = np.asarray(imageio.imread((train_directory_phone + str(img) + '.png'))) I = extract_bayer_channels(I) train_data[i, :] = I I = np.asarray(Image.open(train_directory_dslr + str(img) + '.jpg')) I = utils.imresize(I, DSLR_SCALE / 2, interp='bicubic') I = np.float16(np.reshape(I, [1, int(PATCH_WIDTH * DSLR_SCALE), int(PATCH_HEIGHT * DSLR_SCALE), 3])) / 255 train_answ[i, :] = I i += 1 return train_data, train_answ
def load_imgs(file_paths, resize=0.5): slice_ = (slice(0, 112), slice(0, 92)) h_slice, w_slice = slice_ h = (h_slice.stop - h_slice.start) // (h_slice.step or 1) w = (w_slice.stop - w_slice.start) // (w_slice.step or 1) if resize is not None: resize = float(resize) h = int(resize * h) w = int(resize * w) n_faces = len(file_paths) faces = np.zeros((n_faces, h, w), dtype=np.float32) # iterate over the collected file path to load the jpeg files as numpy # arrays for i, file_path in enumerate(file_paths): img = imread(file_path) face = np.asarray(img[slice_], dtype=np.float32) face /= 255.0 # scale uint8 coded colors to the [0.0, 1.0] floats if resize is not None: face = imresize(face, resize) faces[i, ...] = face return faces
def d_s(pan, ms, fused, q=1, r=4, ws=7): """calculates Spatial Distortion Index (D_S). :param pan: high resolution panchromatic image. :param ms: low resolution multispectral image. :param fused: high resolution fused image. :param q: parameter to emphasize large spatial differences (default = 1). :param r: ratio of high resolution to low resolution (default=4). :param ws: sliding window size (default = 7). :returns: float -- D_S. """ pan = pan.astype(np.float64) fused = fused.astype(np.float64) pan_degraded = uniform_filter(pan.astype(np.float64), size=ws) / (ws**2) pan_degraded = imresize(pan_degraded, (pan.shape[0] // r, pan.shape[1] // r)) L = ms.shape[2] M1 = np.zeros(L) M2 = np.zeros(L) for l in range(L): M2[l] = uqi(ms[:, :, l], pan_degraded[:, :, l]) M1[l] = uqi(fused[:, :, l], pan[:, :, l]) diff = np.abs(M1 - M2)**q return ((1. / L) * (np.sum(diff)))**(1. / q)
def renderframe(modeltest, outname, sess, upsample_method): # TODO finish this print("Model: " + modeltest + ' saved test file ' + outname) # load test image input_img_path = '/home/kth/deepstuff/frames/bk01.jpg' testimg = utils.imread2(input_img_path) testimg = utils.imresize(testimg, 1) #testimg = utils.imresize_xy(testimg,256,256) testimg_4d = testimg[np.newaxis, :] # .astype(np.float32) # tf.reset_default_graph() with tf.variable_scope('img_t_net_test', reuse=tf.AUTO_REUSE): Xtest = tf.placeholder(tf.float32, shape=testimg_4d.shape, name='input') Ytest = create_net(Xtest, upsample_method) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) print("Evaluating test image...") with tf.Session() as sesstest: sesstest.run(init_op) img_out = sesstest.run(Ytest, feed_dict={Xtest: testimg_4d}) img_out = np.squeeze(img_out) utils.imwrite(outname, img_out)
def generate_from_coarsest(self, scale, reals, mode='rand'): """ Use random/fixed noise to generate from coarsest scale""" fake = tf.zeros_like(reals[0]) if scale > 0: if mode == 'rand': for i in range(scale): z_rand = tf.random.normal(reals[i].shape) z_rand = self.NoiseAmp[i] * z_rand fake = self.generators[i](fake, z_rand) fake = imresize(fake, new_shapes=reals[i + 1].shape) if mode == 'rec': for i in range(scale): z_fixed = self.NoiseAmp[i] * self.Z_fixed[i] fake = self.generators[i](fake, z_fixed) fake = imresize(fake, new_shapes=reals[i + 1].shape) return fake
def __call__(self, images): in_h, in_w, _ = images[0].shape scaled_h, scaled_w = self.h, self.w scaled_images = [imresize(im, (scaled_h, scaled_w)) for im in images] return scaled_images
def save_image_callback(model, info_dict=None): global prev_min_val, start_time, content info_dict = info_dict or {} loss_value = info_dict.get('loss', None) i = info_dict.get('iter', -1) print("Model params", len(model.trainable_variables)) if loss_value is not None: loss_val = loss_value.numpy() if prev_min_val == -1: prev_min_val = loss_val improvement = (prev_min_val - loss_val) / prev_min_val * 100 print("Current loss value:", loss_val, " Improvement : %0.3f" % improvement, "%") prev_min_val = loss_val if (i + 1) % 100 == 0: img = model.x.numpy() # save current generated image img = deprocess_image(img) if preserve_color and content is not None: img = original_color_transform(content, img, mask=color_mask) if not rescale_image: img_ht = int(img_width * aspect_ratio) print("Rescaling Image to (%d, %d)" % (img_width, img_ht)) img = imresize(img, (img_width, img_ht), interp=args.rescale_method) if rescale_image: print("Rescaling Image to (%d, %d)" % (img_WIDTH, img_HEIGHT)) img = imresize(img, (img_WIDTH, img_HEIGHT), interp=args.rescale_method) fname = result_prefix + "_at_iteration_%d.png" % (i + 1) imsave(fname, img) end_time = time.time() print("Image saved as", fname) print("Iteration %d completed in %ds" % (i + 1, end_time - start_time))
def process(self, img, side): frame = imresize(img, (128, 128)) if side == self.flip_side: frame = np.flip(frame, axis=1) _, theta_mpii = self.model.process(frame) theta_mano = mpii_to_mano(theta_mpii) return theta_mano, frame
def preprocess(self): ''' Preprocess frame for agent ''' img = None if self.blend_method == "max": img = np.amax(self.buffer, axis=0) return imresize(img, self.screen_dims)
def get_feature(self, cat, img, feature): """ Load a feature from disk. """ filename = self.path(cat, img, feature) data = loadmat(filename) name = [k for k in data.keys() if not k.startswith('__')] if self.size is not None: return imresize(data[name.pop()], self.size) return data[name.pop()]
def create_real_pyramid(self, real_image, num_scales): """ Create the pyramid of scales """ reals = [real_image] for i in range(1, num_scales): reals.append(imresize(real_image, scale_factor=pow(self.scale_factor, i))) """ Reverse it to coarse-fine scales """ reals.reverse() for real in reals: print(real.shape) return reals
def SinGAN_inject(self, reals, inject_scale=1): """ Inject reference image on given scale (inject_scale should > 0)""" fake = reals[inject_scale] for scale in range(inject_scale, len(reals)): fake = imresize(fake, new_shapes=reals[scale].shape) z = tf.random.normal(fake.shape) z = z * self.NoiseAmp[scale] fake = self.model[scale](fake, z) return fake
def get_image(self, cat, img): """ Loads an image from disk. """ filename = self.path(cat, img) data = [] if filename.endswith('mat'): data = loadmat(filename)['output'] else: data = imread(filename) if self.size is not None: return imresize(data, self.size) else: return data
def flow(self, mode='train'): while True: if mode =='train': shuffle(self.train_keys) keys = self.train_keys elif mode == 'val' or mode == 'demo': shuffle(self.validation_keys) keys = self.validation_keys else: raise Exception('invalid mode: %s' % mode) inputs = [] targets = [] for key in keys: image_path = self.path_prefix + key image_array = imread(image_path) image_array = imresize(image_array, self.image_size) num_image_channels = len(image_array.shape) if num_image_channels != 3: continue ground_truth = self.ground_truth_data[key] if self.do_random_crop: image_array = self._do_random_crop(image_array) image_array = image_array.astype('float32') if mode == 'train' or mode == 'demo': if self.ground_truth_transformer != None: image_array, ground_truth = self.transform( image_array, ground_truth) ground_truth = ( self.ground_truth_transformer.assign_boxes( ground_truth)) else: image_array = self.transform(image_array)[0] inputs.append(image_array) targets.append(ground_truth) if len(targets) == self.batch_size: inputs = np.asarray(inputs) targets = np.asarray(targets) # this will not work for boxes targets = to_categorical(targets) if mode == 'train' or mode == 'val': inputs = self.preprocess_images(inputs) yield self._wrap_in_dictionary(inputs, targets) if mode == 'demo': yield self._wrap_in_dictionary(inputs, targets) inputs = [] targets = []
def __call__(self, images): in_h, in_w, _ = images[0].shape x_scaling, y_scaling = np.random.uniform(1, 1.15, 2) scaled_h, scaled_w = int(in_h * y_scaling), int(in_w * x_scaling) scaled_images = [imresize(im, (scaled_h, scaled_w)) for im in images] offset_y = np.random.randint(scaled_h - self.h + 1) offset_x = np.random.randint(scaled_w - self.w + 1) cropped_images = [ im[offset_y:offset_y + self.h, offset_x:offset_x + self.w] for im in scaled_images ] return cropped_images
def load_mask(mask_path, shape): mask = imread(mask_path, mode="L") # Grayscale mask load width, height, _ = shape mask = imresize(mask, (width, height), interp='bicubic').astype('float32') # Perform binarization of mask mask[mask <= 127] = 0 mask[mask > 128] = 255 mask /= 255 mask = mask.astype(np.int32) return mask
def __getitem__(self, index): # get downscaled and cropped image (if necessary) noisy_image = self.input_transform(Image.open(self.files[index])) if self.rotations: angle = random.choice([0, 90, 180, 270]) noisy_image = TF.rotate(noisy_image, angle) if self.cropped: cropped_image = self.crop_transform(noisy_image) noisy_image = TF.to_tensor(noisy_image) resized_image = utils.imresize(noisy_image, 1.0 / self.upscale_factor, True) if self.cropped: return resized_image, TF.to_tensor(cropped_image) else: return resized_image
def process_one_category(data_path): bird_category = int(data_path.split('/')[-1].split('.')[0]) filenames = os.listdir(data_path) out_dir = 'output/bird_{0:03d}'.format(bird_category) os.mkdir(out_dir) # load images raw_images = [plt.imread(os.path.join(data_path, filename)) for filename in filenames] for i in range(len(raw_images)): img = raw_images[i] if np.array(img).shape[-1] > 3: raw_images[i] = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB) cv2.imwrite(os.path.join(out_dir, 'raw_{0:03d}_{1}.png'.format(bird_category, i)), img) raw_images = [imresize(img, 224, 224) for img in raw_images] # resize raw_images = np.stack(raw_images) # preprocess images = raw_images.transpose((0, 3, 1, 2)).astype('float32') # to numpy, NxCxHxW, float32 images -= np.array([0.485, 0.456, 0.406]).reshape((1, 3, 1, 1)) # zero mean images /= np.array([0.229, 0.224, 0.225]).reshape((1, 3, 1, 1)) # unit variance images = torch.from_numpy(images) # convert to pytorch tensor if cuda: images = images.cuda() net = models.vgg19(pretrained=True) # load pre-trained VGG-19 if cuda: net = net.cuda() del net.features._modules['36'] # remove max-pooling after final conv layer with torch.no_grad(): features = net.features(images) flat_features = features.permute(0, 2, 3, 1).contiguous().view((-1, features.size(1))) # NxCxHxW -> (N*H*W)xC print('Reshaped features from {0}x{1}x{2}x{3} to ({0}*{2}*{3})x{1} = {4}x{1}'.format(*features.shape, flat_features.size(0))) for K in [15]: with torch.no_grad(): W, _ = NMF(flat_features, K, random_seed=0, cuda=cuda, max_iter=50) heatmaps = W.cpu().view(features.size(0), features.size(2), features.size(3), K).permute(0, 3, 1, 2) # (N*H*W)xK -> NxKxHxW heatmaps = torch.nn.functional.interpolate(heatmaps, size=(224, 224), mode='bilinear', align_corners=False) # 14x14 -> 224x224 heatmaps /= heatmaps.max(dim=3, keepdim=True)[0].max(dim=2, keepdim=True)[0] # normalize by factor (i.e., 1 of K) heatmaps = heatmaps.cpu().numpy() # print(heatmaps.shape) # (60, K, 224, 224) save_mask2d(heatmaps, K, out_dir)
def __getitem__(self, index): images = [] for k in range(2): image = imread( os.path.join(self.data_path, '{0}_im{1}.png'.format(self.data[index], k + 1))) image = imresize(image, size=self.size) images.append(image) tmp = np.load( os.path.join(self.data_path, '{0}_f.npy'.format( self.data[index]))).astype(np.float32) flow_inputs = [] for axis in range(2): input = imresize(tmp[axis], size=self.size) flow_inputs.append(input) flow_targets = [] for axis in range(2): target = imresize(tmp[axis], size=self.size) * self.scale flow_targets.append(target) image_inputs = images[0] flow_inputs = np.stack(flow_inputs, axis=0) image_targets = images[1] flow_targets = np.stack(flow_targets, axis=0) returns = { 'image_inputs': image_inputs.astype(np.float32), 'flow_inputs': flow_inputs.astype(np.float32), 'image_targets': image_targets.astype(np.float32), 'flow_targets': flow_targets.astype(np.float32), } return returns
def live_application(capture, output_dirpath): model = ModelPipeline() frame_index = 0 mano_params = [] measure_time = True while True: frame_large = capture.read() if frame_large is None: print(f'none frame {frame_index}') # if frame_index == 0: # continue break # if frame_large.shape[0] > frame_large.shape[1]: # margin = int((frame_large.shape[0] - frame_large.shape[1]) / 2) # frame_large = frame_large[margin:-margin] # else: # margin = int((frame_large.shape[1] - frame_large.shape[0]) / 2) # frame_large = frame_large[:, margin:-margin] frame = imresize(frame_large, (128, 128)) if measure_time: ends1 = [] ends2 = [] for i in range(1000): start = time.time() _, theta_mpii = model.process(frame) end1 = time.time() theta_mano = mpii_to_mano(theta_mpii) end2 = time.time() ends1.append(end1 - start) ends2.append(end2 - start) t1 = np.mean(ends1[10:]) t2 = np.mean(ends2[10:]) print(f't1: {t1 * 1000:.2f}ms, {1 / t1:.2f}hz') print(f't2: {t2 * 1000:.2f}ms, {1 / t2:.2f}hz') return else: _, theta_mpii = model.process(frame) theta_mano = mpii_to_mano(theta_mpii) mano_params.append(deepcopy(theta_mano.tolist())) osp.join(output_dirpath, "%06d.jpg" % frame_index) frame_index += 1 with open(osp.join(output_dirpath, f'{capture.side}.pickle'), 'w') as f: json.dump(mano_params, f)
def Downsample(self, x, type=None): assert len(x.shape) == 4, '4D input: NCHW' if type == 'matlab': print(x.shape) y = torch.cat([ imresize(x[i], scale=self.downsample_scale).view( 1, x.shape[1], int(self.downsample_scale * x.shape[2]), int(self.downsample_scale * x.shape[3])) for i in range(x.shape[0]) ], dim=0) # sample by sample else: #use torch.bicubic as default, no anti-aliasing y = F.interpolate(x, scale_factor=self.downsample_scale, mode="bicubic") #bicubic, bilinear return y.type(self.dtype)
def __getitem__(self, index): # get downscaled, cropped and gt (if available) image hr_image = Image.open(self.hr_files[index]) w, h = hr_image.size cs = utils.calculate_valid_crop_size(min(w, h), self.upscale_factor) if self.crop_size is not None: cs = min(cs, self.crop_size) cropped_image = TF.to_tensor(T.CenterCrop(cs // self.upscale_factor)(hr_image)) hr_image = T.CenterCrop(cs)(hr_image) hr_image = TF.to_tensor(hr_image) resized_image = utils.imresize(hr_image, 1.0 / self.upscale_factor, True) if self.lr_files is None: return resized_image, cropped_image, resized_image else: lr_image = Image.open(self.lr_files[index]) lr_image = TF.to_tensor(T.CenterCrop(cs // self.upscale_factor)(lr_image)) return resized_image, cropped_image, lr_image
def upscale_alg(self, im_l_y, s): im_h_y = utils.imresize(im_l_y, s) return im_h_y
def phi(obs): resized = imresize(obs.image_buffer, (84, 84)) return resized.transpose(2, 0, 1).astype(np.float32) / 255