def eval_loss(net, criterion, loader, use_cuda=False): """ Evaluate the loss value for a given 'net' on the dataset provided by the loader. Args: net: the neural net model criterion: loss function loader: dataloader use_cuda: use cuda or not Returns: loss value and accuracy """ correct = 0 total_loss = 0 total = 0 # number of samples num_batch = len(loader) if use_cuda: net.cuda() net.eval() with torch.no_grad(): if isinstance(criterion, nn.CrossEntropyLoss): for batch_idx, (inputs, targets) in enumerate(loader): batch_size = inputs.size(0) total += batch_size inputs = Variable(inputs) targets = Variable(targets) if use_cuda: inputs, targets = inputs.cuda(), targets.cuda() outputs = net(inputs) loss = criterion(outputs, targets) total_loss += loss.item()*batch_size _, predicted = torch.max(outputs.data, 1) correct += predicted.eq(targets).sum().item() elif isinstance(criterion, nn.MSELoss): for batch_idx, (inputs, targets) in enumerate(loader): batch_size = inputs.size(0) total += batch_size inputs = Variable(inputs) one_hot_targets = torch.FloatTensor(batch_size, 10).zero_() one_hot_targets = one_hot_targets.scatter_(1, targets.view(batch_size, 1), 1.0) one_hot_targets = one_hot_targets.float() one_hot_targets = Variable(one_hot_targets) if use_cuda: inputs, one_hot_targets = inputs.cuda(), one_hot_targets.cuda() outputs = F.softmax(net(inputs)) loss = criterion(outputs, one_hot_targets) total_loss += loss.item()*batch_size _, predicted = torch.max(outputs.data, 1) correct += predicted.cpu().eq(targets).sum().item() return total_loss/total, 100.*correct/total
def calc_flops(model, input_size): global USE_GPU def conv_hook(self, input, output): batch_size, input_channels, input_height, input_width = input[0].size() output_channels, output_height, output_width = output[0].size() kernel_ops = self.kernel_size[0] * self.kernel_size[1] * ( self.in_channels / self.groups) * (2 if multiply_adds else 1) bias_ops = 1 if self.bias is not None else 0 params = output_channels * (kernel_ops + bias_ops) flops = batch_size * params * output_height * output_width list_conv.append(flops) def linear_hook(self, input, output): batch_size = input[0].size(0) if input[0].dim() == 2 else 1 weight_ops = self.weight.nelement() * (2 if multiply_adds else 1) bias_ops = self.bias.nelement() flops = batch_size * (weight_ops + bias_ops) list_linear.append(flops) def bn_hook(self, input, output): list_bn.append(input[0].nelement()) def relu_hook(self, input, output): list_relu.append(input[0].nelement()) def pooling_hook(self, input, output): batch_size, input_channels, input_height, input_width = input[0].size() output_channels, output_height, output_width = output[0].size() kernel_ops = self.kernel_size * self.kernel_size bias_ops = 0 params = output_channels * (kernel_ops + bias_ops) flops = batch_size * params * output_height * output_width list_pooling.append(flops) def foo(net): childrens = list(net.children()) if not childrens: if isinstance(net, torch.nn.Conv2d): net.register_forward_hook(conv_hook) if isinstance(net, torch.nn.Linear): net.register_forward_hook(linear_hook) if isinstance(net, torch.nn.BatchNorm2d): net.register_forward_hook(bn_hook) if isinstance(net, torch.nn.ReLU): net.register_forward_hook(relu_hook) if isinstance(net, torch.nn.MaxPool2d) or isinstance( net, torch.nn.AvgPool2d): net.register_forward_hook(pooling_hook) return for c in childrens: foo(c) multiply_adds = False list_conv, list_bn, list_relu, list_linear, list_pooling = [], [], [], [], [] foo(model) if '0.4.' in torch.__version__: if USE_GPU: input = torch.cuda.FloatTensor( torch.rand(2, 3, input_size, input_size).cuda()) else: input = torch.FloatTensor(torch.rand(2, 3, input_size, input_size)) else: input = Variable(torch.rand(2, 3, input_size, input_size), requires_grad=True) _ = model(input) total_flops = (sum(list_conv) + sum(list_linear) + sum(list_bn) + sum(list_relu) + sum(list_pooling)) print(' + Number of FLOPs: %.2fM' % (total_flops / 1e6 / 2))
Auto_Encoder = Autoencoder() # we are going to use BCE loss and Adam Adam_optimizer to get the result loss = nn.BCELoss() loss_sum = nn.BCELoss(reduction='sum') Adam_optimizer = optim.Adam(Auto_Encoder.parameters(), lr=2e-4) loss_epoch = [0] * num_epochs for epoch in range(num_epochs): Final_loss_sum = 0 for n_batch, (images, _) in enumerate(data_holder): N = images.size(0) images = Variable(images.view(images.size(0), -1)) output_value = Auto_Encoder(images) Final_Loss = loss(output_value, images) # Total loss Final_loss_sum += loss_sum(output_value, images) Adam_optimizer.zero_grad() Final_Loss.backward() Adam_optimizer.step() if (n_batch + 1) % 100 == 0: print('Epoch [%d/%d], Iter [%d/%d] Loss: %.5f' % (epoch + 1, num_epochs, n_batch, len(data) // batch_size, Final_Loss.item())) # Average loss loss_epoch[epoch] = Final_loss_sum / len(data) / 784
def impute(vae, features, mask, output_loss_path, max_iterations=1000, tolerance=1e-3, variable_sizes=None, noise_learning_rate=None ): start_time = time.time() vae = to_cuda_if_available(vae) logger = Logger(output_loss_path, append=False) loss_function = MSELoss() inverted_mask = 1 - mask observed = features * mask missing = torch.randn_like(features) if noise_learning_rate is not None: missing = Variable(missing, requires_grad=True) optim = Adam([missing], weight_decay=0, lr=noise_learning_rate) vae.train(mode=True) for iteration in range(max_iterations): logger.start_timer() if noise_learning_rate is not None: optim.zero_grad() noisy_features = observed + missing * inverted_mask _, reconstructed, _, _ = vae(noisy_features, training=True) observed_loss = masked_reconstruction_loss_function(reconstructed, features, mask, variable_sizes) missing_loss = masked_reconstruction_loss_function(reconstructed, features, inverted_mask, variable_sizes) loss = torch.sqrt(loss_function(compose_with_mask(features, reconstructed, mask), features)) if noise_learning_rate is None: missing = reconstructed * inverted_mask else: observed_loss.backward() optim.step() observed_loss, missing_loss, loss = to_cpu_if_available(observed_loss, missing_loss, loss) observed_loss = observed_loss.data.numpy() missing_loss = missing_loss.data.numpy() loss = loss.data.numpy() logger.log(iteration, max_iterations, "vae", "observed_loss", observed_loss) logger.log(iteration, max_iterations, "vae", "missing_loss", missing_loss) logger.log(iteration, max_iterations, "vae", "loss", loss) if observed_loss < tolerance: break logger.close() print("Total time: {:02f}s".format(time.time() - start_time))
popt = [1.00620913e+03, -9.31529767e-02] def exp_funk(x, a, b): return a * np.exp(b*x) probs = exp_funk(np.arange(1, 49), *popt)/10188.568276854 print('Start training loop') for epoch in tqdm(range(num_epochs)): epoch += eph + 1 epoch_time = time.time() calculation_time = 0 for batch in tqdm(dataloader): calc_time = time.time() netG.train() btch_sz = len(batch['shower']) real_showers = Variable(batch['shower']).float().to(device) # real_showers[real_showers<0.3] = 0 # real_energys = Variable(batch['energy']*0).float().to(device) real_free_path = batch['free_path'].float().to(device).reshape(btch_sz, 1, 1, 1, 1) # Adversarial ground truths valid_label = Variable(FloatTensor(btch_sz, 1).fill_(1.0), requires_grad=False) fake_label = Variable(FloatTensor(btch_sz, 1).fill_(0.0), requires_grad=False) ###################################################### # Train Discriminator ###################################################### netD.zero_grad() # Forward pass real batch through Disctiminator
def beam_search(self, data: List, w: int, max_time: int): """ Implements beam search for different models. :param data: Input data :param w: beam width :param max_time: Maximum length till the program has to be generated :return all_beams: all beams to find out the indices of all the """ data, input_op = data # Beam, dictionary, with elements as list. Each element of list # containing index of the selected output and the corresponding # probability. batch_size = data.size()[1] h = Variable(torch.zeros(1, batch_size, self.hd_sz)).cuda() # Last beams' data B = {0: {"input": input_op, "h": h}, 1: None} next_B = {} x_f = self.encoder.encode(data[-1, :, 0:1, :, :]) x_f = x_f.view(1, batch_size, self.in_sz) # List to store the probs of last time step prev_output_prob = [ Variable(torch.ones(batch_size, self.num_draws)).cuda() ] all_beams = [] all_inputs = [] for timestep in range(0, max_time): outputs = [] for b in range(w): if not B[b]: break input_op = B[b]["input"] h = B[b]["h"] input_op_rnn = self.relu(self.dense_input_op(input_op[:, 0, :])) input_op_rnn = input_op_rnn.view(1, batch_size, self.input_op_sz) input = torch.cat((x_f, input_op_rnn), 2) h, _ = self.rnn(input, h) hd = self.relu(self.dense_fc_1(self.drop(h[0]))) dense_output = self.dense_output(self.drop(hd)) output = self.logsoftmax(dense_output) # Element wise multiply by previous probabs output = torch.nn.Softmax()(output) output = output * prev_output_prob[b] outputs.append(output) next_B[b] = {} next_B[b]["h"] = h if len(outputs) == 1: outputs = outputs[0] else: outputs = torch.cat(outputs, 1) next_beams_index = torch.topk(outputs, w, 1, sorted=True)[1] next_beams_prob = torch.topk(outputs, w, 1, sorted=True)[0] # print (next_beams_prob) current_beams = { "parent": next_beams_index.data.cpu().numpy() // (self.num_draws), "index": next_beams_index % (self.num_draws) } # print (next_beams_index % (self.num_draws)) next_beams_index %= (self.num_draws) all_beams.append(current_beams) # Update previous output probabilities temp = Variable(torch.zeros(batch_size, 1)).cuda() prev_output_prob = [] for i in range(w): for index in range(batch_size): temp[index, 0] = next_beams_prob[index, i] prev_output_prob.append(temp.repeat(1, self.num_draws)) # hidden state for next step B = {} for i in range(w): B[i] = {} temp = Variable(torch.zeros(h.size())).cuda() for j in range(batch_size): temp[0, j, :] = next_B[current_beams["parent"][j, i]]["h"][0, j, :] B[i]["h"] = temp # one_hot for input to the next step for i in range(w): arr = Variable( torch.zeros(batch_size, self.num_draws + 1).scatter_( 1, next_beams_index[:, i:i + 1].data.cpu(), 1.0)).cuda() B[i]["input"] = arr.unsqueeze(1) all_inputs.append(B) return all_beams, next_beams_prob, all_inputs
EPOCH = 70 UPDATE_FREQ = 1 # k = 1, on prend le max de log(D(G(z))) au lieu de min log(1 - D(G(z))) gen = Generator(LATENT_SPACE_DIM, [256, 512, 1024], INPUT_DIM) disc = Discriminator(INPUT_DIM, [1024, 512, 256]) loss_fucntion = nn.BCELoss() loss_gene_memory = [] loss_disc_memory = [] optimizer_d = optim.Adam(disc.parameters(), lr=0.0002) optimizer_g = optim.Adam(gen.parameters(), lr=0.0002) test_input_z = Variable( torch.Tensor(np.random.normal(0, 1, (1, LATENT_SPACE_DIM)))) for e in range(EPOCH): loss_gene_memory = [] loss_disc_memory = [] if e % 3 == 0: with torch.no_grad(): fake_image_test = gen(test_input_z) fake_image_test = vectors_to_images(fake_image_test, 1, 28) plot_image(rescale_image(fake_image_test.data[0][0], 0.5)) for idx, (images, _) in enumerate(train_loader): images = torch.Tensor(images) size_batch = images.shape[0] # Training Discriminator
if_primitives=True, if_jitter=False) prev_test_loss = 1e20 prev_test_reward = 0 test_size = config.test_size batch_size = config.batch_size for epoch in range(0, config.epochs): train_loss = 0 Accuracies = [] imitate_net.train() # Number of times to accumulate gradients num_accums = config.num_traj for batch_idx in range(config.train_size // (config.batch_size * config.num_traj)): optimizer.zero_grad() loss_sum = Variable(torch.zeros(1)).cuda().data for _ in range(num_accums): for k in data_labels_paths.keys(): data, labels = next(train_gen_objs[k]) data = data[:, :, 0:config.top_k + 1, :, :, :] one_hot_labels = prepare_input_op(labels, len(generator.unique_draw)) one_hot_labels = Variable(torch.from_numpy(one_hot_labels)).cuda() data = Variable(torch.from_numpy(data)).cuda() labels = Variable(torch.from_numpy(labels)).cuda() data = data.permute(1, 0, 2, 3, 4, 5) # forward pass outputs = imitate_net([data, one_hot_labels, k]) loss = losses_joint(outputs, labels, time_steps=k + 1) / types_prog / \
def train_simple_trans(): opt = TrainOptions().parse() data_root = 'data/processed' train_params = {'lr': 0.01, 'epoch_milestones': (100, 500)} # dataset = DynTexNNFTrainDataset(data_root, 'flame') dataset = DynTexFigureTrainDataset(data_root, 'flame') dataloader = DataLoader(dataset=dataset, batch_size=opt.batchsize, num_workers=opt.num_workers, shuffle=True) nnf_conf = 3 syner = Synthesiser() nnfer = NNFPredictor(out_channel=nnf_conf) if torch.cuda.is_available(): syner = syner.cuda() nnfer = nnfer.cuda() optimizer_nnfer = Adam(nnfer.parameters(), lr=train_params['lr']) table = Table() for epoch in range(opt.epoch): pbar = tqdm(total=len(dataloader), desc='epoch#{}'.format(epoch)) pbar.set_postfix({'loss': 'N/A'}) loss_tot = 0.0 gamma = epoch / opt.epoch for i, (source_t, target_t, source_t1, target_t1) in enumerate(dataloader): if torch.cuda.is_available(): source_t = Variable(source_t, requires_grad=True).cuda() target_t = Variable(target_t, requires_grad=True).cuda() source_t1 = Variable(source_t1, requires_grad=True).cuda() target_t1 = Variable(target_t1, requires_grad=True).cuda() nnf = nnfer(source_t, target_t) if nnf_conf == 3: nnf = nnf[:, : 2, :, :] * nnf[:, 2:, :, :] # mask via the confidence # --- synthesis --- target_predict = syner(source_t, nnf) target_t1_predict = syner(source_t1, nnf) loss_t = tnf.mse_loss(target_predict, target_t) loss_t1 = tnf.mse_loss(target_t1_predict, target_t1) loss = loss_t + loss_t1 optimizer_nnfer.zero_grad() loss.backward() optimizer_nnfer.step() loss_tot += float(loss_t1) # --- vis --- name = os.path.join(data_root, '../result/', str(epoch), '{}.png'.format(str(i))) index = str(epoch) + '({})'.format(i) if not os.path.exists('/'.join(name.split('/')[:-1])): os.makedirs('/'.join(name.split('/')[:-1])) cv2.imwrite( name.replace('.png', '_s.png'), (source_t.detach().cpu().numpy()[0].transpose(1, 2, 0) * 255).astype('int')) cv2.imwrite( name.replace('.png', '_s1.png'), (source_t1.detach().cpu().numpy()[0].transpose(1, 2, 0) * 255).astype('int')) cv2.imwrite( name.replace('.png', '_t.png'), (target_t.detach().cpu().numpy()[0].transpose(1, 2, 0) * 255).astype('int')) cv2.imwrite( name.replace('.png', '_p.png'), (target_predict.detach().cpu().numpy()[0].transpose(1, 2, 0) * 255).astype('int')) cv2.imwrite( name.replace('.png', '_t1.png'), (target_t1.detach().cpu().numpy()[0].transpose(1, 2, 0) * 255).astype('int')) cv2.imwrite(name.replace('.png', '_p1.png'), (target_t1_predict.detach().cpu().numpy()[0].transpose( 1, 2, 0) * 255).astype('int')) # vis in table table.add( index, os.path.abspath(name.replace('.png', '_s.png')).replace( '/mnt/cephfs_hl/lab_ad_idea/maoyiming', '')) table.add( index, os.path.abspath(name.replace('.png', '_s1.png')).replace( '/mnt/cephfs_hl/lab_ad_idea/maoyiming', '')) table.add( index, os.path.abspath(name.replace('.png', '_t.png')).replace( '/mnt/cephfs_hl/lab_ad_idea/maoyiming', '')) table.add( index, os.path.abspath(name.replace('.png', '_t1.png')).replace( '/mnt/cephfs_hl/lab_ad_idea/maoyiming', '')) table.add( index, os.path.abspath(name.replace('.png', '_p.png')).replace( '/mnt/cephfs_hl/lab_ad_idea/maoyiming', '')) table.add( index, os.path.abspath(name.replace('.png', '_p1.png')).replace( '/mnt/cephfs_hl/lab_ad_idea/maoyiming', '')) pbar.set_postfix({'loss': str(loss_tot / (i + 1))}) pbar.update(1) table.build_html('data/') pbar.close()
def train_complex_trans(): opt = TrainOptions().parse() data_root = 'data/processed' train_params = {'lr': 0.001, 'epoch_milestones': (100, 500)} dataset = DynTexFigureTransTrainDataset(data_root, 'flame') dataloader = DataLoader(dataset=dataset, batch_size=opt.batchsize, num_workers=opt.num_workers, shuffle=True) nnf_conf = 3 syner = Synthesiser() nnfer = NNFPredictor(out_channel=nnf_conf) flownet = NNFPredictor(out_channel=nnf_conf) if torch.cuda.is_available(): syner = syner.cuda() nnfer = nnfer.cuda() flownet = flownet.cuda() optimizer_nnfer = Adam(nnfer.parameters(), lr=train_params['lr']) optimizer_flow = Adam(flownet.parameters(), lr=train_params['lr'] * 0.1) scheduler_nnfer = lr_scheduler.MultiStepLR( optimizer_nnfer, gamma=0.1, last_epoch=-1, milestones=train_params['epoch_milestones']) scheduler_flow = lr_scheduler.MultiStepLR( optimizer_flow, gamma=0.1, last_epoch=-1, milestones=train_params['epoch_milestones']) table = Table() writer = SummaryWriter(log_dir=opt.log_dir) for epoch in range(opt.epoch): scheduler_flow.step() scheduler_nnfer.step() pbar = tqdm(total=len(dataloader), desc='epoch#{}'.format(epoch)) pbar.set_postfix({'loss': 'N/A'}) loss_tot = 0.0 for i, (source_t, target_t, source_t1, target_t1) in enumerate(dataloader): if torch.cuda.is_available(): source_t = Variable(source_t, requires_grad=True).cuda() target_t = Variable(target_t, requires_grad=True).cuda() source_t1 = Variable(source_t1, requires_grad=True).cuda() target_t1 = Variable(target_t1, requires_grad=True).cuda() nnf = nnfer(source_t, target_t) flow = flownet(source_t, source_t1) # mask... if nnf_conf == 3: nnf = nnf[:, : 2, :, :] * nnf[:, 2:, :, :] # mask via the confidence flow = flow[:, :2, :, :] * flow[:, 2:, :, :] # --- synthesis --- source_t1_predict = syner(source_t, flow) # flow penalty target_flow = syner(flow, nnf) # predict flow target_t1_predict = syner(target_t, target_flow) # target_t1_predict = syner(source_t1, nnf) loss_t1_f = tnf.mse_loss(source_t1, source_t1_predict) # flow penalty loss_t1 = tnf.mse_loss(target_t1_predict, target_t1) # total penalty loss = loss_t1_f + loss_t1 * 2 optimizer_flow.zero_grad() optimizer_nnfer.zero_grad() loss.backward() optimizer_nnfer.step() optimizer_flow.step() loss_tot += float(loss) # --- vis --- if epoch % 10 == 0 and i % 2 == 0: name = os.path.join(data_root, '../result/', str(epoch), '{}.png'.format(str(i))) index = str(epoch) + '({})'.format(i) if not os.path.exists('/'.join(name.split('/')[:-1])): os.makedirs('/'.join(name.split('/')[:-1])) cv2.imwrite( name.replace('.png', '_s.png'), (source_t.detach().cpu().numpy()[0].transpose(1, 2, 0) * 255).astype('int')) cv2.imwrite( name.replace('.png', '_s1.png'), (source_t1.detach().cpu().numpy()[0].transpose(1, 2, 0) * 255).astype('int')) cv2.imwrite( name.replace('.png', '_t.png'), (target_t.detach().cpu().numpy()[0].transpose(1, 2, 0) * 255).astype('int')) cv2.imwrite( name.replace('.png', '_p.png'), (source_t1_predict.detach().cpu().numpy()[0].transpose( 1, 2, 0) * 255).astype('int')) cv2.imwrite( name.replace('.png', '_t1.png'), (target_t1.detach().cpu().numpy()[0].transpose(1, 2, 0) * 255).astype('int')) cv2.imwrite( name.replace('.png', '_p1.png'), (target_t1_predict.detach().cpu().numpy()[0].transpose( 1, 2, 0) * 255).astype('int')) # vis in table table.add( index, os.path.abspath(name.replace('.png', '_s.png')).replace( '/mnt/cephfs_hl/lab_ad_idea/maoyiming', '')) table.add( index, os.path.abspath(name.replace('.png', '_s1.png')).replace( '/mnt/cephfs_hl/lab_ad_idea/maoyiming', '')) table.add( index, os.path.abspath(name.replace('.png', '_t.png')).replace( '/mnt/cephfs_hl/lab_ad_idea/maoyiming', '')) table.add( index, os.path.abspath(name.replace('.png', '_t1.png')).replace( '/mnt/cephfs_hl/lab_ad_idea/maoyiming', '')) table.add( index, os.path.abspath(name.replace('.png', '_p.png')).replace( '/mnt/cephfs_hl/lab_ad_idea/maoyiming', '')) table.add( index, os.path.abspath(name.replace('.png', '_p1.png')).replace( '/mnt/cephfs_hl/lab_ad_idea/maoyiming', '')) pbar.set_postfix({'loss': str(loss_tot / (i + 1))}) writer.add_scalar('scalars/{}/loss_train'.format(opt.time), float(loss), i + int(epoch * len(dataloader))) writer.add_scalar('scalars/{}/lr'.format(opt.time), float(scheduler_nnfer.get_lr()[0]), i + int(epoch * len(dataloader))) pbar.update(1) table.build_html('data/') pbar.close() writer.close()
path=data_set_path, if_augment=False) test_gen = generator.test_gen(batch_size=config.batch_size, path=data_set_path, if_augment=False) imitate_net.epsilon = 0 RS_iou = 0 RS_chamfer = 0 distances = 0 pred_expressions = [] for i in range(test_size // config.batch_size): data_ = next(test_gen) labels = np.zeros((config.batch_size, max_len), dtype=np.int32) one_hot_labels = prepare_input_op(labels, len(unique_draw)) one_hot_labels = Variable(torch.from_numpy(one_hot_labels)).cuda() data = Variable(torch.from_numpy(data_), volatile=True).cuda() outputs, samples, baselines = imitate_net( [data, one_hot_labels, max_len]) R, _, pred_images, expressions = reinforce.generate_rewards( samples, labels, data_, stack_size=max_len // 2 + 1, power=1, reward="iou") RS_iou += np.mean(R) / (test_size // config.batch_size) R, _, _, expressions, distance = reinforce.generate_rewards( samples, data_,
def zeros_target(size): ''' Tensor containing zeros, with shape = size ''' data = Variable(torch.zeros(size)) return data
def ones_target(size): ''' Tensor containing ones, with shape = size ''' data = Variable(torch.ones(size)) return data
def detect_onet(self, im, dets): h, w, c = im.shape if dets is None: return None, None dets = get_square_bbox(dets) dets[:, 0:4] = np.round(dets[:, 0:4]) [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = pad(dets, w, h) num_boxes = dets.shape[0] cropped_ims_tensors = [] for i in range(num_boxes): tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8) tmp[dy[i]:edy[i] + 1, dx[i]:edx[i] + 1, :] = im[y[i]:ey[i] + 1, x[i]:ex[i] + 1, :] crop_im = cv2.resize(tmp, (48, 48)) crop_im_tensor = convert_image_to_tensor(crop_im) cropped_ims_tensors.append(crop_im_tensor) feed_imgs = Variable(torch.stack(cropped_ims_tensors)) if torch.cuda.is_available(): feed_imgs = feed_imgs.to(self.device) cls_map, reg, landmark = self.onet_detector(feed_imgs) cls_map = cls_map.cpu().data.numpy() reg = reg.cpu().data.numpy() landmark = landmark.cpu().data.numpy() keep_inds = np.where(cls_map > self.threshold[2])[0] if len(keep_inds) > 0: boxes = dets[keep_inds] cls = cls_map[keep_inds] reg = reg[keep_inds] landmark = landmark[keep_inds] else: return None, None keep = nms(boxes, 0.7, mode="Minimum") if len(keep) == 0: return None, None keep_cls = cls[keep] keep_boxes = boxes[keep] keep_reg = reg[keep] keep_landmark = landmark[keep] bw = keep_boxes[:, 2] - keep_boxes[:, 0] + 1 bh = keep_boxes[:, 3] - keep_boxes[:, 1] + 1 align_topx = keep_boxes[:, 0] + keep_reg[:, 0] * bw align_topy = keep_boxes[:, 1] + keep_reg[:, 1] * bh align_bottomx = keep_boxes[:, 2] + keep_reg[:, 2] * bw align_bottomy = keep_boxes[:, 3] + keep_reg[:, 3] * bh align_landmark_topx = keep_boxes[:, 0] align_landmark_topy = keep_boxes[:, 1] boxes_align = np.vstack([align_topx, align_topy, align_bottomx, align_bottomy, keep_cls[:, 0] ]) boxes_align = boxes_align.T landmark = np.vstack([ align_landmark_topx + keep_landmark[:, 0] * bw, align_landmark_topy + keep_landmark[:, 1] * bh, align_landmark_topx + keep_landmark[:, 2] * bw, align_landmark_topy + keep_landmark[:, 3] * bh, align_landmark_topx + keep_landmark[:, 4] * bw, align_landmark_topy + keep_landmark[:, 5] * bh, align_landmark_topx + keep_landmark[:, 6] * bw, align_landmark_topy + keep_landmark[:, 7] * bh, align_landmark_topx + keep_landmark[:, 8] * bw, align_landmark_topy + keep_landmark[:, 9] * bh, ]) landmark_align = landmark.T return boxes_align, landmark_align
def detect_pnet(self, im): """Get face candidates through pnet Parameters: ---------- im: numpy array input image array one batch Returns: ------- boxes: numpy array detected boxes before calibration boxes_align: numpy array boxes after calibration """ # im = self.unique_image_format(im) # original wider face data h, w, c = im.shape net_size = 12 current_scale = float( net_size) / self.min_face_size # find initial scale # print('imgshape:{0}, current_scale:{1}'.format(im.shape, current_scale)) im_resized = self.resize_image(im, current_scale) # scale = 1.0 current_height, current_width, _ = im_resized.shape # fcn all_boxes = list() i = 0 while min(current_height, current_width) > net_size: # print(i) feed_imgs = [] image_tensor = image_tools.convert_image_to_tensor(im_resized) feed_imgs.append(image_tensor) feed_imgs = torch.stack(feed_imgs) feed_imgs = Variable(feed_imgs) if self.pnet_detector.use_cuda: feed_imgs = feed_imgs.cuda() # self.pnet_detector is a trained pnet torch model # receptive field is 12×12 # 12×12 --> score # 12×12 --> bounding box cls_map, reg = self.pnet_detector(feed_imgs) cls_map_np = image_tools.convert_chwTensor_to_hwcNumpy( cls_map.cpu()) reg_np = image_tools.convert_chwTensor_to_hwcNumpy(reg.cpu()) # print(cls_map_np.shape, reg_np.shape) # cls_map_np = (1, n, m, 1) reg_np.shape = (1, n, m 4) # time.sleep(5) # landmark_np = image_tools.convert_chwTensor_to_hwcNumpy(landmark.cpu()) # self.threshold[0] = 0.6 # print(cls_map_np[0,:,:].shape) # time.sleep(4) # boxes = [x1, y1, x2, y2, score, reg] boxes = self.generate_bounding_box(cls_map_np[0, :, :], reg_np, current_scale, self.thresh[0]) # generate pyramid images current_scale *= self.scale_factor # self.scale_factor = 0.709 im_resized = self.resize_image(im, current_scale) current_height, current_width, _ = im_resized.shape if boxes.size == 0: continue # non-maximum suppresion # keep = utils.nms(boxes[:, :5], 0.5, 'Union') # boxes = boxes[keep] # print(boxes.shape) all_boxes.append(boxes) # i+=1 if len(all_boxes) == 0: return None, None all_boxes = np.vstack(all_boxes) # print("shape of all boxes {0}".format(all_boxes.shape)) # time.sleep(5) # merge the detection from first stage keep = utils.nms(all_boxes[:, 0:5], 0.7, 'Union') all_boxes = all_boxes[keep] # boxes = all_boxes[:, :5] # x2 - x1 # y2 - y1 bw = all_boxes[:, 2] - all_boxes[:, 0] + 1 bh = all_boxes[:, 3] - all_boxes[:, 1] + 1 # landmark_keep = all_boxes[:, 9:].reshape((5,2)) boxes = np.vstack([ all_boxes[:, 0], all_boxes[:, 1], all_boxes[:, 2], all_boxes[:, 3], all_boxes[:, 4], # all_boxes[:, 0] + all_boxes[:, 9] * bw, # all_boxes[:, 1] + all_boxes[:,10] * bh, # all_boxes[:, 0] + all_boxes[:, 11] * bw, # all_boxes[:, 1] + all_boxes[:, 12] * bh, # all_boxes[:, 0] + all_boxes[:, 13] * bw, # all_boxes[:, 1] + all_boxes[:, 14] * bh, # all_boxes[:, 0] + all_boxes[:, 15] * bw, # all_boxes[:, 1] + all_boxes[:, 16] * bh, # all_boxes[:, 0] + all_boxes[:, 17] * bw, # all_boxes[:, 1] + all_boxes[:, 18] * bh ]) boxes = boxes.T # boxes = boxes = [x1, y1, x2, y2, score, reg] reg= [px1, py1, px2, py2] (in prediction) align_topx = all_boxes[:, 0] + all_boxes[:, 5] * bw align_topy = all_boxes[:, 1] + all_boxes[:, 6] * bh align_bottomx = all_boxes[:, 2] + all_boxes[:, 7] * bw align_bottomy = all_boxes[:, 3] + all_boxes[:, 8] * bh # refine the boxes boxes_align = np.vstack([ align_topx, align_topy, align_bottomx, align_bottomy, all_boxes[:, 4], # align_topx + all_boxes[:,9] * bw, # align_topy + all_boxes[:,10] * bh, # align_topx + all_boxes[:,11] * bw, # align_topy + all_boxes[:,12] * bh, # align_topx + all_boxes[:,13] * bw, # align_topy + all_boxes[:,14] * bh, # align_topx + all_boxes[:,15] * bw, # align_topy + all_boxes[:,16] * bh, # align_topx + all_boxes[:,17] * bw, # align_topy + all_boxes[:,18] * bh, ]) boxes_align = boxes_align.T return boxes, boxes_align
def detect_pnet(self, im): h, w, c = im.shape net_size = 12 current_scale = float(net_size) / self.min_face_size im_resized = resize_image(im, current_scale) current_height, current_width, _ = im_resized.shape all_boxes = list() while min(current_height, current_width) > net_size: feed_imgs = [] image_tensor = convert_image_to_tensor(im_resized) feed_imgs.append(image_tensor) feed_imgs = torch.stack(feed_imgs) feed_imgs = Variable(feed_imgs) if torch.cuda.is_available(): feed_imgs = feed_imgs.cuda() cls_map, reg = self.pnet_detector(feed_imgs) cls_map_np = convert_chw_tensor_to_hwc_numpy(cls_map.cpu()) reg_np = convert_chw_tensor_to_hwc_numpy(reg.cpu()) boxes = generate_bounding_box(cls_map_np[0, :, :], reg_np, current_scale, self.threshold[0]) current_scale *= self.scale_factor im_resized = resize_image(im, current_scale) current_height, current_width, _ = im_resized.shape if boxes.size == 0: continue keep = nms(boxes[:, :5], 0.5, 'Union') boxes = boxes[keep] all_boxes.append(boxes) if len(all_boxes) == 0: return None, None all_boxes = np.vstack(all_boxes) keep = nms(all_boxes[:, 0:5], 0.7, 'Union') all_boxes = all_boxes[keep] bw = all_boxes[:, 2] - all_boxes[:, 0] + 1 bh = all_boxes[:, 3] - all_boxes[:, 1] + 1 boxes = np.vstack([all_boxes[:, 0], all_boxes[:, 1], all_boxes[:, 2], all_boxes[:, 3], all_boxes[:, 4] ]) boxes = boxes.T align_topx = all_boxes[:, 0] + all_boxes[:, 5] * bw align_topy = all_boxes[:, 1] + all_boxes[:, 6] * bh align_bottomx = all_boxes[:, 2] + all_boxes[:, 7] * bw align_bottomy = all_boxes[:, 3] + all_boxes[:, 8] * bh boxes_align = np.vstack([align_topx, align_topy, align_bottomx, align_bottomy, all_boxes[:, 4] ]) boxes_align = boxes_align.T return boxes, boxes_align
noise = torch.FloatTensor(bsize, nz) input = input.cuda() noise = noise.cuda() # setup optimizer optimizer_en = optim.Adam(encoder.parameters(), lr=0.0002, betas=(0.5, 0.999)) optimizer_de = optim.Adam(decoder.parameters(), lr=0.0002, betas=(0.5, 0.999)) # train for epoch in range(25): for i, (data, _) in enumerate(loader, 0): bsize_now = data.size(0) data = data.cuda() input.resize_as_(data).copy_(data) mu, logvar = encoder(Variable(input)) # re-parameterize std = logvar.mul(0.5).exp_() noise.resize_(bsize_now, nz).normal_(0, 1) output = decoder( (Variable(noise).mul(std).add_(mu)).view(bsize_now, nz, 1, 1)) loss = loss_function(output, Variable(input), mu, logvar, bsize, img_size) encoder.zero_grad() decoder.zero_grad() loss.backward() optimizer_de.step() optimizer_en.step()
def detect_onet(self, im, dets): """Get face candidates using onet Parameters: ---------- im: numpy array input image array dets: numpy array detection results of rnet Returns: ------- boxes_align: numpy array boxes after calibration landmarks_align: numpy array landmarks after calibration """ h, w, c = im.shape if dets is None: return None, None dets = self.square_bbox(dets) dets[:, 0:4] = np.round(dets[:, 0:4]) [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(dets, w, h) num_boxes = dets.shape[0] # cropped_ims_tensors = np.zeros((num_boxes, 3, 24, 24), dtype=np.float32) cropped_ims_tensors = [] for i in range(num_boxes): tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8) # crop input image tmp[dy[i]:edy[i] + 1, dx[i]:edx[i] + 1, :] = im[y[i]:ey[i] + 1, x[i]:ex[i] + 1, :] crop_im = cv2.resize(tmp, (48, 48)) crop_im_tensor = image_tools.convert_image_to_tensor(crop_im) # cropped_ims_tensors[i, :, :, :] = crop_im_tensor cropped_ims_tensors.append(crop_im_tensor) feed_imgs = Variable(torch.stack(cropped_ims_tensors)) if self.rnet_detector.use_cuda: feed_imgs = feed_imgs.cuda() cls_map, reg, landmark = self.onet_detector(feed_imgs) cls_map = cls_map.cpu().data.numpy() reg = reg.cpu().data.numpy() landmark = landmark.cpu().data.numpy() keep_inds = np.where(cls_map > self.thresh[2])[0] if len(keep_inds) > 0: boxes = dets[keep_inds] cls = cls_map[keep_inds] reg = reg[keep_inds] landmark = landmark[keep_inds] else: return None, None keep = utils.nms(boxes, 0.7, mode="Minimum") if len(keep) == 0: return None, None keep_cls = cls[keep] keep_boxes = boxes[keep] keep_reg = reg[keep] keep_landmark = landmark[keep] bw = keep_boxes[:, 2] - keep_boxes[:, 0] + 1 bh = keep_boxes[:, 3] - keep_boxes[:, 1] + 1 align_topx = keep_boxes[:, 0] + keep_reg[:, 0] * bw align_topy = keep_boxes[:, 1] + keep_reg[:, 1] * bh align_bottomx = keep_boxes[:, 2] + keep_reg[:, 2] * bw align_bottomy = keep_boxes[:, 3] + keep_reg[:, 3] * bh align_landmark_topx = keep_boxes[:, 0] align_landmark_topy = keep_boxes[:, 1] boxes_align = np.vstack([ align_topx, align_topy, align_bottomx, align_bottomy, keep_cls[:, 0], # align_topx + keep_landmark[:, 0] * bw, # align_topy + keep_landmark[:, 1] * bh, # align_topx + keep_landmark[:, 2] * bw, # align_topy + keep_landmark[:, 3] * bh, # align_topx + keep_landmark[:, 4] * bw, # align_topy + keep_landmark[:, 5] * bh, # align_topx + keep_landmark[:, 6] * bw, # align_topy + keep_landmark[:, 7] * bh, # align_topx + keep_landmark[:, 8] * bw, # align_topy + keep_landmark[:, 9] * bh, ]) boxes_align = boxes_align.T landmark = np.vstack([ align_landmark_topx + keep_landmark[:, 0] * bw, align_landmark_topy + keep_landmark[:, 1] * bh, align_landmark_topx + keep_landmark[:, 2] * bw, align_landmark_topy + keep_landmark[:, 3] * bh, align_landmark_topx + keep_landmark[:, 4] * bw, align_landmark_topy + keep_landmark[:, 5] * bh, align_landmark_topx + keep_landmark[:, 6] * bw, align_landmark_topy + keep_landmark[:, 7] * bh, align_landmark_topx + keep_landmark[:, 8] * bw, align_landmark_topy + keep_landmark[:, 9] * bh, ]) landmark_align = landmark.T return boxes_align, landmark_align
def repackage_hidden(h): if type(h) == Variable: return Variable(h.data) else: return tuple(repackage_hidden(v) for v in h)
def forward(self, x: List): """ Forward pass for all architecture :param x: Has different meaning with different mode of training :return: """ if self.mode == 1: ''' Variable length training. This mode runs for one more than the length of program for producing stop symbol. Note that there is no padding as is done in traditional RNN for variable length programs. This is done mainly because of computational efficiency of forward pass, that is, each batch contains only programs of same length and losses from all batches of different time-lengths are combined to compute gradient and update in the network. This ensures that every update of the network has equal contribution coming from programs of different lengths. Training is done using the script train_synthetic.py ''' data, input_op, program_len = x assert data.size()[0] == program_len + 1, "Incorrect stack size!!" batch_size = data.size()[1] h = Variable(torch.zeros(1, batch_size, self.hd_sz)).cuda() x_f = self.encoder.encode(data[-1, :, 0:1, :, :]) x_f = x_f.view(1, batch_size, self.in_sz) outputs = [] for timestep in range(0, program_len + 1): # X_f is always input to the RNN at every time step # along with previous predicted label input_op_rnn = self.relu( self.dense_input_op(input_op[:, timestep, :])) input_op_rnn = input_op_rnn.view(1, batch_size, self.input_op_sz) input = torch.cat((self.drop(x_f), input_op_rnn), 2) h, _ = self.rnn(input, h) hd = self.relu(self.dense_fc_1(self.drop(h[0]))) output = self.logsoftmax(self.dense_output(self.drop(hd))) outputs.append(output) return outputs elif self.mode == 2: '''Train variable length RL''' # program length in this case is the maximum time step that RNN runs data, input_op, program_len = x batch_size = data.size()[1] h = Variable(torch.zeros(1, batch_size, self.hd_sz)).cuda() x_f = self.encoder.encode(data[-1, :, 0:1, :, :]) x_f = x_f.view(1, batch_size, self.in_sz) outputs = [] samples = [] temp_input_op = input_op[:, 0, :] for timestep in range(0, program_len): # X_f is the input to the RNN at every time step along with previous # predicted label input_op_rnn = self.relu(self.dense_input_op(temp_input_op)) input_op_rnn = input_op_rnn.view(1, batch_size, self.input_op_sz) input = torch.cat((x_f, input_op_rnn), 2) h, _ = self.rnn(input, h) hd = self.relu(self.dense_fc_1(self.drop(h[0]))) dense_output = self.dense_output(self.drop(hd)) output = self.logsoftmax(dense_output) # output for loss, these are log-probabs outputs.append(output) output_probs = self.softmax(dense_output) # Get samples from output probabs based on epsilon greedy way # Epsilon will be reduced to 0 gradually following some schedule if np.random.rand() < self.epsilon: # This is during training sample = torch.multinomial(output_probs, 1) else: # This is during testing if self.pytorch_version == "1": sample = torch.max(output_probs, 1)[1] elif self.pytorch_version == "3": sample = torch.max(output_probs, 1)[1].view(batch_size, 1) # Stopping the gradient to flow backward from samples sample = sample.detach() samples.append(sample) # Create next input to the RNN from the sampled instructions arr = Variable( torch.zeros(batch_size, self.num_draws + 1).scatter_( 1, sample.data.cpu(), 1.0)).cuda() arr = arr.detach() temp_input_op = arr return [outputs, samples] else: assert False, "Incorrect mode!!"
test_batch_size = config.batch_size test_gen_objs[k] = generator.get_test_data( test_batch_size, k, num_train_images=dataset_sizes[k][0], num_test_images=dataset_sizes[k][1], jitter_program=jit) for k in dataset_sizes.keys(): test_batch_size = config.batch_size for _ in range(dataset_sizes[k][1] // test_batch_size): with torch.no_grad(): data_, labels = next(test_gen_objs[k]) one_hot_labels = prepare_input_op(labels, len(generator.unique_draw)) one_hot_labels = Variable( torch.from_numpy(one_hot_labels)).cuda() data = Variable(torch.from_numpy(data_)).cuda() labels = Variable(torch.from_numpy(labels)).cuda() test_output = imitate_net.test([data, one_hot_labels, max_len]) acc += float((torch.argmax(torch.stack(test_output), dim=2)[:k].permute(1, 0) == labels[:, :-1]).float().sum()) \ / (len(labels) * (k)) / len(dataset_sizes) / (dataset_sizes[k][1] // test_batch_size) pred_images, correct_prog, pred_prog = parser.get_final_canvas( test_output, if_just_expressions=False, if_pred_images=True) target_images = data_[-1, :, 0, :, :].astype(dtype=bool) targ_prog = parser.labels2exps(labels, k) programs_tar[jit] += targ_prog programs_pred[jit] += pred_prog distance = chamfer(target_images, pred_images)
def train(self): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Data iterator data_iter = iter(self.data_loader) step_per_epoch = len(self.data_loader) model_save_step = int(self.model_save_step * step_per_epoch) # Fixed input for debugging fixed_z = tensor2var(torch.randn(self.batch_size, 90)) #self.z_dim # Start with trained model if self.pretrained_model: start = self.pretrained_model + 1 else: start = 0 # Start time start_time = time.time() for step in range(start, self.total_step): # ================== Train D ================== # self.D.train() self.G.train() try: real_images, labels = next(data_iter) except: data_iter = iter(self.data_loader) real_images, labels = next(data_iter) # Compute loss with real images real_images = tensor2var(real_images) labels = tensor2var(encode(labels)) d_out_real, dr1, dr2 = self.D( real_images, labels ) #labels not added in generator, generator still not sorted. d_loss_real = -torch.mean(d_out_real) z = tensor2var(torch.randn(real_images.size(0), 90)) fake_images, gf1, gf2 = self.G(z, labels) d_out_fake, df1, df2 = self.D(fake_images, labels) d_loss_fake = d_out_fake.mean() # Backward + Optimize d_loss = d_loss_real + d_loss_fake self.reset_grad() d_loss.backward() self.d_optimizer.step() if self.adv_loss == 'wgan-gp': # gradient penalty alpha = torch.rand(real_images.size(0), 1, 1, 1).to(device).expand_as(real_images) interpolated = Variable(alpha * real_images.data + (1 - alpha) * fake_images.data, requires_grad=True) out, _, _ = self.D(interpolated, labels) grad = torch.autograd.grad(outputs=out, inputs=interpolated, grad_outputs=torch.ones( out.size()).to(device), retain_graph=True, create_graph=True, only_inputs=True)[0] grad = grad.view(grad.size(0), -1) grad_l2norm = torch.sqrt(torch.sum(grad**2, dim=1)) d_loss_gp = torch.mean((grad_l2norm - 1)**2) # Backward + Optimize d_loss = self.lambda_gp * d_loss_gp self.reset_grad() d_loss.backward() self.d_optimizer.step() # ================== Train G and gumbel ================== # # Create random noise z = tensor2var(torch.randn(real_images.size(0), 90)) #self.z_dim fake_images, _, _ = self.G(z, labels) # Compute loss with fake images g_out_fake, _, _ = self.D(fake_images, labels) # batch x n if self.adv_loss == 'wgan-gp': g_loss_fake = -g_out_fake.mean() elif self.adv_loss == 'hinge': g_loss_fake = -g_out_fake.mean() self.reset_grad() g_loss_fake.backward() self.g_optimizer.step() # Print out log info if (step + 1) % self.log_step == 0: elapsed = time.time() - start_time elapsed = str(datetime.timedelta(seconds=elapsed)) log_info = { 'G_loss': g_loss_fake.item(), 'D_loss': g_loss_fake.item() } self.writer.add_scalar('G_loss', g_loss_fake.item(), step) self.writer.add_scalar('D_loss', d_loss.item(), step) print("Elapsed [{}], G_step [{}/{}], D_step[{}/{}], " " G_Loss: {:.4f}, D Loss: {:.4f}".format( elapsed, step + 1, self.total_step, (step + 1), self.total_step, g_loss_fake.item(), d_loss.item())) # Sample images if (step + 1) % self.sample_step == 0: fake_images, _, _ = self.G(fixed_z, labels) fid_score = self.fid_model.compute_fid(real_images, fake_images) self.writer.add_scalar('FID_score', fid_score.item(), step) save_image( denorm(fake_images.data), os.path.join(self.sample_path, '{}_fake.png'.format(step + 1))) print("Elapsed [{}], G_step [{}/{}], D_step[{}/{}], " " FID score: {:.4f}".format(elapsed, step + 1, self.total_step, (step + 1), self.total_step, fid_score.item())) if (step + 1) % model_save_step == 0: torch.save( self.G.state_dict(), os.path.join(self.model_save_path, '{}_G.pth'.format(step + 1))) torch.save( self.D.state_dict(), os.path.join(self.model_save_path, '{}_D.pth'.format(step + 1))) self.writer.close()
model.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=0, ceil_mode=True) # change for i in range(2, 5): getattr(model, 'layer%d' % i)[0].conv1.stride = (2, 2) getattr(model, 'layer%d' % i)[0].conv2.stride = (1, 1) if pretrained: model.load_state_dict(torch.load('models/resnet152-caffe.pth')) return model def resnet152(pretrained=False, **kwargs): """Constructs a ResNet-152 model. Args: pretrained (bool): If True, returns a model pre-trained on ImageNet """ model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs) if pretrained: # model.load_state_dict(model_zoo.load_url(model_urls['resnet152'])) model.load_state_dict(torch.load('models/resnet152.pth')) return model if __name__ == "__main__": net = resnet101(pretrained=True).cuda() x = torch.Tensor(2, 3, 256, 256).cuda() sb = net(Variable(x)) pdb.set_trace()
super(PropertyLoss, self).__init__() self.glass_criterian = nn.CrossEntropyLoss(size_average=True) self.mask_criterian = nn.CrossEntropyLoss(size_average=True) def forward(self, pred, label): """ pred: [glass_pred, mask_pred] label: [glass_label, mask_label] """ glass_pred = pred[0] mask_pred = pred[1] glass_label = label[:, 0].view(-1) mask_label = label[:, 1].view(-1) glass_loss = self.glass_criterian(glass_pred, glass_label) mask_loss = self.mask_criterian(mask_pred, mask_label) loss = glass_loss + mask_loss return loss if __name__ == '__main__': x = Variable(torch.ones(100, 3, 32, 32)) l = [Variable(torch.ones(100).long()), Variable(torch.ones(100).long())] # x = Variable(x) # l = Variable(l) loss = PropertyLoss() net = Property_Net() L = loss(net(x), l) print(L)
res101 = resnet101(pretrained=True).cuda() seg = Seg().cuda() weight = torch.ones(22) weight[21] = 0 criterion = CrossEntropyLoss2d(weight.cuda()) optimizer_seg = torch.optim.Adam(seg.parameters(), lr=1e-3) optimizer_feat = torch.optim.Adam(res101.parameters(), lr=1e-4) for t in range(10): for i, (img, label) in enumerate(loader): img = img.cuda() label = label[0].cuda() label = Variable(label) input = Variable(img) feats = res101(input) output = seg(feats) seg.zero_grad() res101.zero_grad() loss = criterion(output, label) loss.backward() optimizer_feat.step() optimizer_seg.step() ## see input = make_image_grid(img, mean, std) label = make_label_grid(label.data)
if 'vgg' in base: self.output_convs = nn.Conv2d(dims[-1], c_output, 1, 1) else: self.output_convs = nn.Sequential( nn.Conv2d(dims[-1], c_output, 1, 1), nn.ConvTranspose2d(c_output, c_output, 4, 2, 1)) self.apply(weight_init) self.feature = getattr(thismodule, base)(pretrained=pretrained) self.feature.feats = {} self.feature = procs[base](self.feature) def forward(self, x, boxes=None, ids=None): self.feature.feats[x.device.index] = [] x = self.feature(x) feats = self.feature.feats[x.device.index] feats = feats[::-1] for i, feat in enumerate(feats): x = self.upscales[i](x) x = torch.cat((feats[i], x), 1) x = self.reduce_convs[i](x) pred = self.output_convs(x) return pred if __name__ == "__main__": fcn = WSFCN2(base='densenet169').cuda() x = torch.Tensor(2, 3, 256, 256).cuda() sb = fcn(Variable(x))
test_batch_size = config.batch_size test_gen_objs[k] = generator.get_test_data( test_batch_size, k, num_train_images=dataset_sizes[k][0], num_test_images=dataset_sizes[k][1], jitter_program=jit) for k in dataset_sizes.keys(): test_batch_size = config.batch_size for i in range(dataset_sizes[k][1] // test_batch_size): print(k, i, dataset_sizes[k][1] // test_batch_size) data_, labels = next(test_gen_objs[k]) pred_images, pred_prog = evaluator.test(data_, parser, max_len) target_images = data_[-1, :, 0, :, :].astype(dtype=bool) labels = Variable(torch.from_numpy(labels)).cuda() targ_prog = parser.labels2exps(labels, k) programs_tar[jit] += targ_prog programs_pred[jit] += pred_prog distance = chamfer(target_images, pred_images) total_CD += np.sum(distance) over_all_CD[jit] = total_CD / total_size metrics["chamfer"] = over_all_CD print(metrics, model_name) print(over_all_CD) results_path = "trained_models/results/{}/".format(model_name) os.makedirs(os.path.dirname(results_path), exist_ok=True)
k, num_train_images=dataset_sizes[k][0], num_test_images=dataset_sizes[k][1], jitter_program=True) prev_test_loss = 1e20 prev_test_cd = 1e20 prev_test_iou = 0 for epoch in range(config.epochs): train_loss = 0 Accuracies = [] imitate_net.train() for batch_idx in range(config.train_size // (config.batch_size * config.num_traj)): optimizer.zero_grad() loss = Variable(torch.zeros(1)).cuda().data for _ in range(config.num_traj): for k in data_labels_paths.keys(): data, labels = next(train_gen_objs[k]) data = data[:, :, 0:1, :, :] one_hot_labels = prepare_input_op(labels, len(generator.unique_draw)) one_hot_labels = Variable( torch.from_numpy(one_hot_labels)).cuda() data = Variable(torch.from_numpy(data)).cuda() labels = Variable(torch.from_numpy(labels)).cuda() outputs = imitate_net([data, one_hot_labels, k]) loss_k = (losses_joint(outputs, labels, time_steps=k + 1) / ( k + 1)) / len(data_labels_paths.keys()) / config.num_traj loss_k.backward()
for idx, (seq, seq_len) in enumerate(zip(vectorized_seqs, seq_lengths)): seq_tensor[idx, :seq_len] = torch.LongTensor(seq) return seq_tensor def make_variable(names): sequence_and_length = [str2ascii_arr(name) for name in names] vectorized_seq = [sl[0] for sl in sequence_and_length] seq_lengths = torch.LongTensor([sl[1] for sl in sequence_and_length]) return pad_sequence(vectorized_seq, seq_lengths) if __name__ == '__main__': classifier = Model(N_CHARS, HIDDEN_SIZE, N_CLASSES) arr, _ = str2ascii_arr("adylov") inp = Variable(torch.LongTensor([arr])) indx2char = ['h', 'i', 'e', 'l', 'o'] x_data = [0, 1, 0, 2, 3, 3] x_one_hot_look = [[1, 0, 0, 0, 0], [0, 1, 0, 0, 0], [0, 0, 1, 0, 0], [0, 0, 0, 1, 0], [0, 0, 0, 0, 1]] y_data = [1, 0, 2, 3, 3, 4] #ihello x_one_hot = [x_one_hot_look[x] for x in x_data] inputs = autograd.Variable(torch.Tensor(x_one_hot)) print("input size", inputs.size()) labels = Variable(torch.LongTensor(y_data)) num_classes = 5
def train(train_loader, model, criterion, optimizer, epoch): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top2 = AverageMeter() # switch to train mode model.train() end = time.time() for i, (input, target) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) # pytorch 0.4.0 compatible if '0.4.' in torch.__version__: if USE_GPU: input_var = torch.cuda.FloatTensor(input.cuda()) target_var = torch.cuda.LongTensor(target.cuda()) else: input_var = torch.FloatTensor(input) target_var = torch.LongTensor(target) else: # pytorch 0.3.1 or less compatible if USE_GPU: input = input.cuda() target = target.cuda(async=True) input_var = Variable(input) target_var = Variable(target) # compute output output = model(input_var) loss = criterion(output, target_var) prec1, prec2 = accuracy(output.data, target_var, topk=(1, 2)) # measure accuracy and record loss reduced_prec1 = prec1.clone() reduced_prec2 = prec2.clone() top1.update(reduced_prec1[0]) top2.update(reduced_prec2[0]) reduced_loss = loss.data.clone() losses.update(reduced_loss) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() # check whether the network is well connected optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: with open('logs/{}_{}.log'.format(time_stp, args.arch), 'a+') as flog: line = 'Epoch: [{0}][{1}/{2}]\t ' \ 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' \ 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' \ 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' \ 'Prec@5 {top2.val:.3f} ({top2.avg:.3f})'.format(epoch, i, len(train_loader), batch_time=batch_time, loss=losses, top1=top1, top2=top2) print(line) flog.write('{}\n'.format(line))
def init_hidden(self): return Variable(torch.zeros(num_layers, batch_size, hidden_size))
def detect_rnet(self, im, dets): """Get face candidates using rnet Parameters: ---------- im: numpy array input image array dets: numpy array detection results of pnet Returns: ------- boxes: numpy array detected boxes before calibration boxes_align: numpy array boxes after calibration """ # im: an input image h, w, c = im.shape if dets is None: return None, None # (705, 5) = [x1, y1, x2, y2, score, reg] # print("pnet detection {0}".format(dets.shape)) # time.sleep(5) # return square boxes dets = self.square_bbox(dets) # rounds dets[:, 0:4] = np.round(dets[:, 0:4]) [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(dets, w, h) num_boxes = dets.shape[0] ''' # helper for setting RNet batch size batch_size = self.rnet_detector.batch_size ratio = float(num_boxes) / batch_size if ratio > 3 or ratio < 0.3: print "You may need to reset RNet batch size if this info appears frequently, \ face candidates:%d, current batch_size:%d"%(num_boxes, batch_size) ''' # cropped_ims_tensors = np.zeros((num_boxes, 3, 24, 24), dtype=np.float32) cropped_ims_tensors = [] for i in range(num_boxes): tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8) tmp[dy[i]:edy[i] + 1, dx[i]:edx[i] + 1, :] = im[y[i]:ey[i] + 1, x[i]:ex[i] + 1, :] crop_im = cv2.resize(tmp, (24, 24)) crop_im_tensor = image_tools.convert_image_to_tensor(crop_im) # cropped_ims_tensors[i, :, :, :] = crop_im_tensor cropped_ims_tensors.append(crop_im_tensor) feed_imgs = Variable(torch.stack(cropped_ims_tensors)) if self.rnet_detector.use_cuda: feed_imgs = feed_imgs.cuda() cls_map, reg = self.rnet_detector(feed_imgs) cls_map = cls_map.cpu().data.numpy() reg = reg.cpu().data.numpy() # landmark = landmark.cpu().data.numpy() keep_inds = np.where(cls_map > self.thresh[1])[0] if len(keep_inds) > 0: boxes = dets[keep_inds] cls = cls_map[keep_inds] reg = reg[keep_inds] # landmark = landmark[keep_inds] else: return None, None keep = utils.nms(boxes, 0.7) if len(keep) == 0: return None, None keep_cls = cls[keep] keep_boxes = boxes[keep] keep_reg = reg[keep] # keep_landmark = landmark[keep] bw = keep_boxes[:, 2] - keep_boxes[:, 0] + 1 bh = keep_boxes[:, 3] - keep_boxes[:, 1] + 1 boxes = np.vstack([ keep_boxes[:, 0], keep_boxes[:, 1], keep_boxes[:, 2], keep_boxes[:, 3], keep_cls[:, 0], # keep_boxes[:,0] + keep_landmark[:, 0] * bw, # keep_boxes[:,1] + keep_landmark[:, 1] * bh, # keep_boxes[:,0] + keep_landmark[:, 2] * bw, # keep_boxes[:,1] + keep_landmark[:, 3] * bh, # keep_boxes[:,0] + keep_landmark[:, 4] * bw, # keep_boxes[:,1] + keep_landmark[:, 5] * bh, # keep_boxes[:,0] + keep_landmark[:, 6] * bw, # keep_boxes[:,1] + keep_landmark[:, 7] * bh, # keep_boxes[:,0] + keep_landmark[:, 8] * bw, # keep_boxes[:,1] + keep_landmark[:, 9] * bh, ]) align_topx = keep_boxes[:, 0] + keep_reg[:, 0] * bw align_topy = keep_boxes[:, 1] + keep_reg[:, 1] * bh align_bottomx = keep_boxes[:, 2] + keep_reg[:, 2] * bw align_bottomy = keep_boxes[:, 3] + keep_reg[:, 3] * bh boxes_align = np.vstack([ align_topx, align_topy, align_bottomx, align_bottomy, keep_cls[:, 0], # align_topx + keep_landmark[:, 0] * bw, # align_topy + keep_landmark[:, 1] * bh, # align_topx + keep_landmark[:, 2] * bw, # align_topy + keep_landmark[:, 3] * bh, # align_topx + keep_landmark[:, 4] * bw, # align_topy + keep_landmark[:, 5] * bh, # align_topx + keep_landmark[:, 6] * bw, # align_topy + keep_landmark[:, 7] * bh, # align_topx + keep_landmark[:, 8] * bw, # align_topy + keep_landmark[:, 9] * bh, ]) boxes = boxes.T boxes_align = boxes_align.T return boxes, boxes_align
def noise(size): n = Variable(torch.randn(size,100)) return n