def h_autoencoder_grad(self, h, encoder, decoder, gen_out_layer, topleft, inpainting): ''' Compute the gradient of the energy of P(input) wrt input, which is given by decode(encode(input))-input {see Alain & Bengio, 2014}. Specifically, we compute E(G(h)) - h. Note: this is an "upside down" auto-encoder for h that goes h -> x -> h with G modeling h -> x and E modeling x -> h. ''' generated = encoder.forward(feat=h) x = encoder.blobs[gen_out_layer].data.copy() # 256x256 # Crop from 256x256 to 227x227 image_size = decoder.blobs['data'].shape # (1, 3, 227, 227) cropped_x = x[:, :, topleft[0]:topleft[0] + image_size[2], topleft[1]:topleft[1] + image_size[3]] # Mask the image when inpainting if inpainting is not None: cropped_x = util.apply_mask(img=cropped_x, mask=inpainting['mask'], context=inpainting['image']) # Push this 227x227 image through net decoder.forward(data=cropped_x) code = decoder.blobs['fc6'].data g = code - h return g
def sampling( self, condition_net, image_encoder, image_generator, gen_in_layer, gen_out_layer, start_code, n_iters, lr, lr_end, threshold, layer, conditions, #units=None, xy=0, epsilon1=1, epsilon2=1, epsilon3=1e-10, inpainting=None, # in-painting args output_dir=None, reset_every=0, save_every=1): # Get the input and output sizes image_shape = condition_net.blobs['data'].data.shape generator_output_shape = image_generator.blobs[ gen_out_layer].data.shape encoder_input_shape = image_encoder.blobs['data'].data.shape # Calculate the difference between the input image of the condition net # and the output image from the generator image_size = util.get_image_size(image_shape) generator_output_size = util.get_image_size(generator_output_shape) encoder_input_size = util.get_image_size(encoder_input_shape) # The top left offset to crop the output image to get a 227x227 image topleft = util.compute_topleft(image_size, generator_output_size) topleft_DAE = util.compute_topleft(encoder_input_size, generator_output_size) src = image_generator.blobs[ gen_in_layer] # the input feature layer of the generator # Make sure the layer size and initial vector size match assert src.data.shape == start_code.shape # Variables to store the best sample last_xx = np.zeros(image_shape) # best image last_prob = -sys.maxint # highest probability h = start_code.copy() condition_idx = 1 list_samples = [] i = 0 print('Captions to be conditioned :') for i in xrange(len(conditions)): print(conditions[i]['readable']) while True: step_size = lr + ((lr_end - lr) * i) / n_iters # condition = conditions[condition_idx] # Select a class # 1. Compute the epsilon1 term --- d_prior = self.h_autoencoder_grad(h=h, encoder=image_generator, decoder=image_encoder, gen_out_layer=gen_out_layer, topleft=topleft_DAE, inpainting=inpainting) # 2. Compute the epsilon2 term --- # Push the code through the generator to get an image x image_generator.blobs["feat"].data[:] = h generated = image_generator.forward() x = generated[gen_out_layer].copy() # 256x256 # Crop from 256x256 to 227x227 cropped_x = x[:, :, topleft[0]:topleft[0] + image_size[0], topleft[1]:topleft[1] + image_size[1]] cropped_x_copy = cropped_x.copy() # pdb.set_trace() if inpainting is not None: cropped_x = util.apply_mask(img=cropped_x, mask=inpainting['mask'], context=inpainting['image']) # Forward pass the image x to the condition net up to an unit k at the given layer # Backprop the gradient through the condition net to the image layer to get a gradient image grad_caption = [] # pdb.set_trace() for length in xrange(len(conditions)): condition_ids = conditions[length]['sentence'] d_condition_x, prob, info = self.forward_backward_from_x_to_condition( net=condition_net, end=layer, image=cropped_x, condition=condition_ids) grad_caption.append(d_condition_x) # Average all the gradients of the captions d_condition_x = np.mean(grad_caption, axis=0) if inpainting is not None: # Mask out the class gradient image d_condition_x[:] *= inpainting["mask"] # An additional objective for matching the context image d_context_x256 = np.zeros_like(x.copy()) d_context_x256[:, :, topleft[0]:topleft[0] + image_size[0], topleft[1]:topleft[1] + image_size[1]] = ( inpainting["image"] - cropped_x_copy) * inpainting["mask_neg"] d_context_h = self.backward_from_x_to_h( generator=image_generator, diff=d_context_x256, start=gen_in_layer, end=gen_out_layer) # Put the gradient back in the 256x256 format d_condition_x256 = np.zeros_like(x) d_condition_x256[:, :, topleft[0]:topleft[0] + image_size[0], topleft[1]:topleft[1] + image_size[1]] = d_condition_x.copy() # Backpropagate the above gradient all the way to h (through generator) # This gradient 'd_condition' is d log(p(y|h)) / dh (the epsilon2 term in Eq. 11 in the paper) d_condition = self.backward_from_x_to_h(generator=image_generator, diff=d_condition_x256, start=gen_in_layer, end=gen_out_layer) self.print_progress(i, info, prob, d_condition) # 3. Compute the epsilon3 term --- noise = np.zeros_like(h) if epsilon3 > 0: noise = np.random.normal(0, epsilon3, h.shape) # Gaussian noise # Update h according to Eq.11 in the paper d_h = epsilon1 * d_prior + epsilon2 * d_condition + noise # Plus the optional epsilon4 for matching the context region when in-painting if inpainting is not None: d_h += inpainting["epsilon4"] * d_context_h h += step_size / np.abs(d_h).mean() * d_h h = np.clip(h, a_min=0, a_max=30) # Keep the code within a realistic range # Reset the code every N iters (for diversity when running a long sampling chain) if reset_every > 0 and i % reset_every == 0 and i > 0: h = np.random.normal(0, 1, h.shape) # Experimental: For sample diversity, it's a good idea to randomly pick epsilon1 as well epsilon1 = np.random.uniform(low=1e-6, high=1e-2) # Save every sample last_xx = cropped_x.copy() last_prob = prob # Filter samples based on threshold or every N iterations if save_every > 0 and i % save_every == 0 and prob > threshold: name = "%s/samples/%05d.jpg" % (output_dir, i) label = self.get_label(condition) list_samples.append((last_xx.copy(), name, label)) # Stop if grad is 0 if norm(d_h) == 0: print " d_h is 0" break # Randomly sample a class every N iterations if i > 0 and i % n_iters == 0: condition_idx += 1 # pdb.set_trace() break i += 1 # Next iter # returning the last sample print "-------------------------" print "Last sample: prob [%s] " % last_prob return last_xx, list_samples
def RelativePoseEstimationViaCompletion(net, data_s, data_t, args): """ The main algorithm: Given two set of scans, alternate between scan completion and pairwise matching args need to contain: snumclass: number of semantic class featureDim: feature dimension outputType: ['rgb':color,'d':depth,'n':normal,'s':semantic,'f':feature] maskMethod: ['second'] alterStep: dataset: para: """ EPS = 1e-12 args.idx_f_start = 0 if 'rgb' in args.outputType: args.idx_f_start += 3 if 'n' in args.outputType: args.idx_f_start += 3 if 'd' in args.outputType: args.idx_f_start += 1 if 's' in args.outputType: args.idx_f_start += args.snumclass if 'f' in args.outputType: args.idx_f_end = args.idx_f_start + args.featureDim with torch.set_grad_enabled(False): R_hat=np.eye(4) # get the complete scans complete_s=torch.cat((torch_op.v(data_s['rgb']),torch_op.v(data_s['norm']),torch_op.v(data_s['depth']).unsqueeze(2)),2).permute(2,0,1).unsqueeze(0) complete_t=torch.cat((torch_op.v(data_t['rgb']),torch_op.v(data_t['norm']),torch_op.v(data_t['depth']).unsqueeze(2)),2).permute(2,0,1).unsqueeze(0) # apply the observation mask view_s,mask_s,_ = util.apply_mask(complete_s.clone(),args.maskMethod) view_t,mask_t,_ = util.apply_mask(complete_t.clone(),args.maskMethod) mask_s=torch_op.npy(mask_s[0,:,:,:]).transpose(1,2,0) mask_t=torch_op.npy(mask_t[0,:,:,:]).transpose(1,2,0) # append mask for valid data tpmask = (view_s[:,6:7,:,:]!=0).float().cuda() view_s=torch.cat((view_s,tpmask),1) tpmask = (view_t[:,6:7,:,:]!=0).float().cuda() view_t=torch.cat((view_t,tpmask),1) for alter_ in range(args.alterStep): # warp the second scan using current transformation estimation view_t2s=torch_op.v(util.warping(torch_op.npy(view_t),np.linalg.inv(R_hat),args.dataset)) view_s2t=torch_op.v(util.warping(torch_op.npy(view_s),R_hat,args.dataset)) # append the warped scans view0 = torch.cat((view_s,view_t2s),1) view1 = torch.cat((view_t,view_s2t),1) # generate complete scans f=net(torch.cat((view0,view1))) f0=f[0:1,:,:,:] f1=f[1:2,:,:,:] data_sc,data_tc={},{} # replace the observed region with gt depth/normal data_sc['normal'] = (1-mask_s)*torch_op.npy(f0[0,3:6,:,:]).transpose(1,2,0)+mask_s*data_s['norm'] data_tc['normal'] = (1-mask_t)*torch_op.npy(f1[0,3:6,:,:]).transpose(1,2,0)+mask_t*data_t['norm'] data_sc['normal']/= (np.linalg.norm(data_sc['normal'],axis=2,keepdims=True)+EPS) data_tc['normal']/= (np.linalg.norm(data_tc['normal'],axis=2,keepdims=True)+EPS) data_sc['depth'] = (1-mask_s[:,:,0])*torch_op.npy(f0[0,6,:,:])+mask_s[:,:,0]*data_s['depth'] data_tc['depth'] = (1-mask_t[:,:,0])*torch_op.npy(f1[0,6,:,:])+mask_t[:,:,0]*data_t['depth'] data_sc['obs_mask'] = mask_s.copy() data_tc['obs_mask'] = mask_t.copy() data_sc['rgb'] = (mask_s*data_s['rgb']*255).astype('uint8') data_tc['rgb'] = (mask_t*data_t['rgb']*255).astype('uint8') # for scannet, we use the original size rgb image(480x640) to extract sift keypoint if 'scannet' in args.dataset: data_sc['rgb_full'] = (data_s['rgb_full']*255).astype('uint8') data_tc['rgb_full'] = (data_t['rgb_full']*255).astype('uint8') data_sc['depth_full'] = data_s['depth_full'] data_tc['depth_full'] = data_t['depth_full'] # extract feature maps f0_feat=f0[:,args.idx_f_start:args.idx_f_end,:,:] f1_feat=f1[:,args.idx_f_start:args.idx_f_end,:,:] data_sc['feat']=f0_feat.squeeze(0) data_tc['feat']=f1_feat.squeeze(0) para_this = copy.copy(args.para) para_this.sigmaAngle1 = para_this.sigmaAngle1[alter_] para_this.sigmaAngle2 = para_this.sigmaAngle2[alter_] para_this.sigmaDist = para_this.sigmaDist[alter_] para_this.sigmaFeat = para_this.sigmaFeat[alter_] # run relative pose module to get next estimate R_hat = RelativePoseEstimation(data_sc,data_tc,para_this,args.dataset,args.representation,doCompletion=args.completion,maskMethod=args.maskMethod,index=None) return R_hat
def bmRequestTypeType(self, val): self.bmRequestType = apply_mask(REQUEST_TYPE_MASK['type_'], self.bmRequestType, REQUEST_TYPE_TYPE[val])
def bmRequestTypeRecipient(self, val): self.bmRequestType = apply_mask(REQUEST_TYPE_MASK['recipient'], self.bmRequestType, REQUEST_TYPE_RECIPIENT[val])
def test_bmrequest_type_direction_host_to_device(self): self.setup.bmRequestType = apply_mask( REQUEST_TYPE_MASK['direction'], self.setup.bmRequestType, REQUEST_TYPE_DIRECTION['host_to_device']) self.assertEqual(self.setup.bmRequestTypeDirection, 'host_to_device')
def bmRequestTypeDirection(self, val): self.bmRequestType = apply_mask(REQUEST_TYPE_MASK['direction'], self.bmRequestType, REQUEST_TYPE_DIRECTION[val])
def training(): print('setting up...') if pc.TRAIN: num_features = util.get_number_of_features(pp.CELEB_FACES_FC6_TRAIN) # num_features = util.get_number_of_features_from_train(pp.CELEB_FACES_FC6_TRAIN) # for server all_names = np.array(util.get_names_h5_file(pp.FC6_TRAIN_H5)) path_images = pp.CELEB_FACES_FC6_TRAIN else: num_features = util.get_number_of_features(pp.CELEB_FACES_FC6_TEST) all_names = np.array(util.get_names_h5_file(pp.FC6_TEST_H5)) path_images = pp.CELEB_FACES_FC6_TEST total_steps = num_features / pc.BATCH_SIZE mask_L_sti = util.get_L_sti_mask() # ---------------------------------------------------------------- # GENERATOR generator = Generator() # generator = GeneratorPaper() generator_train_loss = np.zeros(pc.EPOCHS) generator_optimizer = chainer.optimizers.Adam(alpha=0.0002, beta1=0.9, beta2=0.999, eps=10**-8) generator_optimizer.setup(generator) # ---------------------------------------------------------------- # DISCRIMINATOR discriminator = Discriminator() # discriminator = DiscriminatorPaper() discriminator_train_loss = np.zeros(pc.EPOCHS) discriminator_optimizer = chainer.optimizers.Adam(alpha=0.0002, beta1=0.9, beta2=0.999, eps=10**-8) discriminator_optimizer.setup(discriminator) # ---------------------------------------------------------------- # VGG16 FOR FEATURE LOSS vgg16 = VGG16Layers() # ---------------------------------------------------------------- save_list = random.sample(xrange(num_features), 20) save_list_names = [''] * 20 cnt = 0 for i in save_list: save_list_names[cnt] = util.sed_line(path_images, i).strip().split(',')[0] cnt += 1 ones1 = util.make_ones(generator) zeros = util.make_zeros(generator) print('training...') for epoch in range(pc.EPOCHS): # shuffle training instances order = range(num_features) random.shuffle(order) names_order = all_names[order] train_gen = True train_dis = True print('epoch %d' % epoch) for step in range(total_steps): names = names_order[step * pc.BATCH_SIZE:(step + 1) * pc.BATCH_SIZE] features = util.get_features_h5_in_batches(names, train=pc.TRAIN) features = util.to_correct_input(features) labels_32, labels_224 = util.get_labels(names) # labels_32 = util.get_labels(names) # vgg16_features = util.get_features_h5_in_batches(names, train=pc.TRAIN, which_features='vgg16') # vgg16_features = util.to_correct_input(vgg16_features) # labels_32 = np.asarray(labels_32, dtype=np.float32) with chainer.using_config('train', train_gen): generator.cleargrads() prediction = generator(chainer.Variable(features)) with chainer.using_config('train', train_dis): discriminator.cleargrads() print('prediction shape', np.shape(prediction.data)) data = np.reshape( generator(chainer.Variable(features)).data, (pc.BATCH_SIZE, 32, 32, 3)) data = np.transpose(data, (0, 3, 1, 2)) fake_prob = discriminator(chainer.Variable(data)) other_data = np.reshape(labels_32, (pc.BATCH_SIZE, 32, 32, 3)) other_data = np.transpose(other_data, (0, 3, 1, 2)) real_prob = discriminator(chainer.Variable(other_data)) feature_truth = vgg16(labels_224, layers=['conv3_3'])['conv3_3'] feature_reconstruction = vgg16(util.fix_prediction_for_vgg16( prediction, vgg16), layers=['conv3_3'])['conv3_3'] # feature_reconstruction = None # ---------------------------------------------------------------- # CALCULATE LOSS lambda_adv = 10**2 lambda_sti = 2 * (10**-6) lambda_fea = 10**-2 l_adv = lambda_adv * F.sigmoid_cross_entropy( fake_prob, ones1.data) # TODO: mask is probably breaking the graph, fix this thing_1 = util.apply_mask(labels_32, mask_L_sti) thing_2 = util.apply_mask(prediction.data, mask_L_sti) l_sti = lambda_sti * F.mean_squared_error(thing_1, thing_2) l_fea = lambda_fea * F.mean_squared_error( feature_truth, feature_reconstruction) generator_loss = l_adv + l_sti + l_fea generator_loss.backward() generator_optimizer.update() generator_train_loss[epoch] += generator_loss.data lambda_dis = 10**2 discriminator_loss = lambda_dis * ( F.sigmoid_cross_entropy(real_prob, ones1.data) + F.sigmoid_cross_entropy(fake_prob, zeros.data)) discriminator_loss.backward() discriminator_optimizer.update() discriminator_train_loss[epoch] += discriminator_loss.data # ---------------------------------------------------------------- # when to suspend / resume training dis_adv_ratio = discriminator_loss.data / l_adv.data if dis_adv_ratio < 0.1: train_dis = False if dis_adv_ratio > 0.5: train_dis = True if dis_adv_ratio > 10: train_gen = False if dis_adv_ratio < 2: train_gen = True # print('%d/%d %d/%d generator: %f l_adv: %f l_sti: %f discriminator: %f l3: %f l4: %f' % ( # epoch, pc.EPOCHS, step, total_steps, generator_loss.data, l_adv.data, l_sti.data, discriminator_loss.data, # l3.data, l4.data)) print( '%d/%d %d/%d generator: %f l_adv: %f l_sti: %f l_fea: %f discriminator: %f dis/adv: %f' % (epoch, pc.EPOCHS, step, total_steps, generator_loss.data, l_adv.data, l_sti.data, l_fea.data, discriminator_loss.data, dis_adv_ratio)) # information = util.update_information(information1, step, generator_loss.data, l_adv.data, l_sti.data) # information = util.update_information(information2, step, discriminator_loss.data, l3.data, l4.data) # visualizing loss # prev_max_ax1 = util.plot_everything(information1, fig1, lines1, ax1, prev_max_ax1, step) # prev_max_ax2 = util.plot_everything(information2, fig2, lines2, ax2, prev_max_ax2, step) with chainer.using_config('train', False): for i in range(len(names)): if names[i] in save_list_names: f = np.expand_dims(features[i], 0) prediction = generator(f) util.save_image(prediction, names[i], epoch, pp.RECONSTRUCTION_FOLDER) print("image '%s' saved" % names[i]) # if (epoch+1) % pc.SAVE_EVERY_N_STEPS == 0: # util.save_model(generator, epoch) generator_train_loss[epoch] /= total_steps print(generator_train_loss[epoch]) discriminator_train_loss[epoch] /= total_steps print(discriminator_train_loss[epoch])
args.idx_f_end = args.idx_f_start + args.featureDim with torch.set_grad_enabled(False): R_hat=np.eye(4) # get the complete scans complete_s=torch.cat((torch_op.v(data['rgb'][:,0,:,:,:]),torch_op.v(data['norm'][:,0,:,:,:]),torch_op.v(data['depth'][:,0:1,:,:])),1) complete_t=torch.cat((torch_op.v(data['rgb'][:,1,:,:,:]),torch_op.v(data['norm'][:,1,:,:,:]),torch_op.v(data['depth'][:,1:2,:,:])),1) # apply the observation mask view_s,mask_s,_ = util.apply_mask(complete_s.clone(),args.maskMethod) view_t,mask_t,_ = util.apply_mask(complete_t.clone(),args.maskMethod) mask_s=torch_op.npy(mask_s[0,:,:,:]).transpose(1,2,0) mask_t=torch_op.npy(mask_t[0,:,:,:]).transpose(1,2,0) # append mask for valid data tpmask = (view_s[:,6:7,:,:]!=0).float().cuda() view_s=torch.cat((view_s,tpmask),1) tpmask = (view_t[:,6:7,:,:]!=0).float().cuda() view_t=torch.cat((view_t,tpmask),1) # warp the second scan using current transformation estimation view_t2s=torch_op.v(util.warping(torch_op.npy(view_t),np.linalg.inv(R_hat),args.dataset)) view_s2t=torch_op.v(util.warping(torch_op.npy(view_s),R_hat,args.dataset)) # append the warped scans
def reconstruct_soundfield(model, sf_sample, mask, factor, frequencies, filename, num_file, com_num, results_dict): """ Reconstruct and evaluate sound field Args: model: keras model sf_sample: np.ndarray factor: int frequencies: list filename: string num_file: int com_num: int results_dict: dict Returns: dict """ # Create one sample batch. Expand dims sf_sample = np.expand_dims(sf_sample, axis=0) sf_gt = copy.deepcopy(sf_sample) mask = np.expand_dims(mask, axis=0) mask_gt = copy.deepcopy(mask) # preprocessing irregular_sf, mask = util.preprocessing(factor, sf_sample, mask) #predict sound field pred_sf = model.predict([irregular_sf, mask]) #measured observations. To use in postprocessing measured_sf = util.downsampling(factor, copy.deepcopy(sf_gt)) measured_sf = util.apply_mask(measured_sf, mask_gt) #compute csv fields split_filename = filename[:-4].split('_') pattern = np.where(mask_gt[0, :, :, 0].flatten() == 1)[0] num_mic = len(pattern) for freq_num, freq in enumerate(frequencies): #Postprocessing reconstructed_sf_slice = util.postprocessing(pred_sf, measured_sf, freq_num, pattern, factor) #Compute Metrics reconstructed_sf_slice = util.postprocessing(pred_sf, measured_sf, freq_num, pattern, factor) nmse = util.compute_NMSE(sf_gt[0, :, :, freq_num], reconstructed_sf_slice) data_range = sf_gt[0, :, :, freq_num].max() - sf_gt[0, :, :, freq_num].min() ssim = util.compute_SSIM(sf_gt[0, :, :, freq_num].astype('float32'), reconstructed_sf_slice, data_range) average_pressure_real = util.compute_average_pressure(sf_gt[0, :, :, freq_num]) average_pressure_predicted = util.compute_average_pressure( reconstructed_sf_slice) average_pressure_previous = util.compute_average_pressure( measured_sf[0, :, :, freq_num]) #store results results_dict['freq'].append(freq) results_dict['name'].append(filename[:-4]) results_dict['xDim'].append(split_filename[2]) results_dict['yDim'].append(split_filename[3]) results_dict['m2'].append(split_filename[4]) results_dict['num_mics'].append(num_mic) results_dict['num_comb'].append(com_num) results_dict['num_file'].append(num_file) results_dict['pattern'].append(pattern) results_dict['NMSE'].append(nmse) results_dict['SSIM'].append(ssim) results_dict['p_real'].append(average_pressure_real) results_dict['p_predicted'].append(average_pressure_predicted) results_dict['p_previous'].append(average_pressure_previous) return results_dict