def forward(self, x: List): """ Forward pass for all architecture :param x: Has different meaning with different mode of training :return: """ if self.mode == 1: ''' Variable length training. This mode runs for one more than the length of program for producing stop symbol. Note that there is no padding as is done in traditional RNN for variable length programs. This is done mainly because of computational efficiency of forward pass, that is, each batch contains only programs of same length and losses from all batches of different time-lengths are combined to compute gradient and update in the network. This ensures that every update of the network has equal contribution coming from programs of different lengths. Training is done using the script train_synthetic.py ''' data, input_op, program_len = x # assert data.size()[0] == program_len + 1, "Incorrect stack size!!" # batch_size = data.size()[1] batch_size = data.size()[0] h = Variable(torch.zeros(1, batch_size, self.hd_sz)).cuda() # x_f = self.encoder.encode(data[-1, :, 0:1, :, :]) x_f = self.encoder.encode(data.unsqueeze(1)) x_f = x_f.view(1, batch_size, self.in_sz) # remove stop token for input to decoder input_op_rnn = self.relu( self.dense_input_op(input_op))[:, :-1, :].permute(1, 0, 2) # input_op_rnn = torch.zeros((program_len+1, batch_size, self.input_op_sz)).cuda() x_f = x_f.repeat(program_len + 1, 1, 1) input = torch.cat((self.drop(x_f), input_op_rnn), 2) output, h = self.rnn(input, h) output = self.relu(self.dense_fc_1(self.drop(output))) output = self.tf_logsoftmax(self.dense_output(self.drop(output))) return output elif self.mode == 2: '''Train variable length RL''' # program length in this case is the maximum time step that RNN runs data, input_op, program_len = x batch_size = data.size()[1] h = Variable(torch.zeros(1, batch_size, self.hd_sz)).cuda() x_f = self.encoder.encode(data[-1, :, 0:1, :, :]) x_f = x_f.view(1, batch_size, self.in_sz) outputs = [] samples = [] temp_input_op = input_op[:, 0, :] for timestep in range(0, program_len): # X_f is the input to the RNN at every time step along with previous # predicted label input_op_rnn = self.relu(self.dense_input_op(temp_input_op)) input_op_rnn = input_op_rnn.view(1, batch_size, self.input_op_sz) input = torch.cat((x_f, input_op_rnn), 2) h, _ = self.rnn(input, h) hd = self.relu(self.dense_fc_1(self.drop(h[0]))) dense_output = self.dense_output(self.drop(hd)) output = self.logsoftmax(dense_output) # output for loss, these are log-probabs outputs.append(output) output_probs = self.softmax(dense_output) # Get samples from output probabs based on epsilon greedy way # Epsilon will be reduced to 0 gradually following some schedule if np.random.rand() < self.epsilon: # This is during training sample = torch.multinomial(output_probs, 1) else: # This is during testing sample = torch.max(output_probs, 1)[1].view(batch_size, 1) # Stopping the gradient to flow backward from samples sample = sample.detach() samples.append(sample) # Create next input to the RNN from the sampled instructions arr = Variable( torch.zeros(batch_size, self.num_draws + 1).scatter_( 1, sample.data.cpu(), 1.0)).cuda() arr = arr.detach() temp_input_op = arr return [outputs, samples] else: assert False, "Incorrect mode!!"
def eval_loss(net, criterion, loader, args): """ Evaluate the loss value for a given 'net' on the dataset provided by the loader. Args: net: the neural net model criterion: loss function loader: dataloader use_cuda: use cuda or not Returns: loss value and accuracy """ correct = 0 total_loss = 0 total = 0 # number of samples num_batch = len(loader) if args.cuda: net.cuda() net.eval() with torch.no_grad(): if isinstance(criterion, nn.CrossEntropyLoss): for batch_idx, (inputs, targets) in enumerate(loader): batch_size = inputs.size(0) total += batch_size inputs = Variable(inputs) targets = Variable(targets) if args.cuda: inputs, targets = inputs.cuda(), targets.cuda() if args.dataset == 'minist': inputs, targets = Variable(inputs.view( -1, 28 * 28)), Variable(targets) outputs = net(inputs) loss = criterion(outputs, targets) total_loss += loss.item() * batch_size _, predicted = torch.max(outputs.data, 1) correct += predicted.eq(targets.data).cpu().sum().item() elif isinstance(criterion, nn.MSELoss): for batch_idx, (inputs, targets) in enumerate(loader): batch_size = inputs.size(0) total += batch_size inputs = Variable(inputs) one_hot_targets = torch.FloatTensor(batch_size, 10).zero_() one_hot_targets = one_hot_targets.scatter_( 1, targets.view(batch_size, 1), 1.0) one_hot_targets = one_hot_targets.float() one_hot_targets = Variable(one_hot_targets) if args.cuda: inputs, one_hot_targets = inputs.cuda( ), one_hot_targets.cuda() outputs = F.softmax(net(inputs)) loss = criterion(outputs, one_hot_targets) total_loss += loss.item() * batch_size _, predicted = torch.max(outputs.data, 1) correct += predicted.cpu().eq(targets).sum().item() return total_loss / total, 100. * correct / total
def real_data_labels(size): data = Variable(torch.ones(size, 1)) return data
def load_data_3d(path_to_dataset, subjects, actions, sample_rate, seq_len): """ adapted from https://github.com/una-dinosauria/human-motion-prediction/src/data_utils.py#L216 :param path_to_dataset: :param subjects: :param actions: :param sample_rate: :param seq_len: :return: """ sampled_seq = [] complete_seq = [] for subj in subjects: for action_idx in np.arange(len(actions)): action = actions[action_idx] if not (subj == 5): for subact in [1, 2]: # subactions print("Reading subject {0}, action {1}, subaction {2}". format(subj, action, subact)) filename = '{0}/S{1}/{2}_{3}.txt'.format( path_to_dataset, subj, action, subact) action_sequence = readCSVasFloat(filename) n, d = action_sequence.shape even_list = range(0, n, sample_rate) num_frames = len(even_list) the_sequence = np.array(action_sequence[even_list, :]) the_seq = Variable( torch.from_numpy(the_sequence)).float().cuda() # remove global rotation and translation the_seq[:, 0:6] = 0 p3d = expmap2xyz_torch(the_seq) the_sequence = p3d.view(num_frames, -1).cpu().data.numpy() fs = np.arange(0, num_frames - seq_len + 1) fs_sel = fs for i in np.arange(seq_len - 1): fs_sel = np.vstack((fs_sel, fs + i + 1)) fs_sel = fs_sel.transpose() seq_sel = the_sequence[fs_sel, :] if len(sampled_seq) == 0: sampled_seq = seq_sel complete_seq = the_sequence else: sampled_seq = np.concatenate((sampled_seq, seq_sel), axis=0) complete_seq = np.append(complete_seq, the_sequence, axis=0) else: print("Reading subject {0}, action {1}, subaction {2}".format( subj, action, 1)) filename = '{0}/S{1}/{2}_{3}.txt'.format( path_to_dataset, subj, action, 1) action_sequence = readCSVasFloat(filename) n, d = action_sequence.shape even_list = range(0, n, sample_rate) num_frames1 = len(even_list) the_sequence1 = np.array(action_sequence[even_list, :]) the_seq1 = Variable( torch.from_numpy(the_sequence1)).float().cuda() the_seq1[:, 0:6] = 0 p3d1 = expmap2xyz_torch(the_seq1) the_sequence1 = p3d1.view(num_frames1, -1).cpu().data.numpy() print("Reading subject {0}, action {1}, subaction {2}".format( subj, action, 2)) filename = '{0}/S{1}/{2}_{3}.txt'.format( path_to_dataset, subj, action, 2) action_sequence = readCSVasFloat(filename) n, d = action_sequence.shape even_list = range(0, n, sample_rate) num_frames2 = len(even_list) the_sequence2 = np.array(action_sequence[even_list, :]) the_seq2 = Variable( torch.from_numpy(the_sequence2)).float().cuda() the_seq2[:, 0:6] = 0 p3d2 = expmap2xyz_torch(the_seq2) the_sequence2 = p3d2.view(num_frames2, -1).cpu().data.numpy() # print("action:{}".format(action)) # print("subact1:{}".format(num_frames1)) # print("subact2:{}".format(num_frames2)) fs_sel1, fs_sel2 = find_indices_srnn(num_frames1, num_frames2, seq_len) seq_sel1 = the_sequence1[fs_sel1, :] seq_sel2 = the_sequence2[fs_sel2, :] if len(sampled_seq) == 0: sampled_seq = seq_sel1 sampled_seq = np.concatenate((sampled_seq, seq_sel2), axis=0) complete_seq = the_sequence1 complete_seq = np.append(complete_seq, the_sequence2, axis=0) else: sampled_seq = np.concatenate((sampled_seq, seq_sel1), axis=0) sampled_seq = np.concatenate((sampled_seq, seq_sel2), axis=0) complete_seq = np.append(complete_seq, the_sequence1, axis=0) complete_seq = np.append(complete_seq, the_sequence2, axis=0) # ignore constant joints and joints at same position with other joints joint_to_ignore = np.array([0, 1, 6, 11, 16, 20, 23, 24, 28, 31]) dimensions_to_ignore = np.concatenate( (joint_to_ignore * 3, joint_to_ignore * 3 + 1, joint_to_ignore * 3 + 2)) dimensions_to_use = np.setdiff1d(np.arange(complete_seq.shape[1]), dimensions_to_ignore) return sampled_seq, dimensions_to_ignore, dimensions_to_use
right_freq_map = np.zeros(shape=(512, clip_size)) for (start, end) in windows(whole_clip, window_size=window_size, stride=window_size): frame_end = end whole = whole_clip[:, start:end] if (whole.shape[1] != window_size): frame_end = clip_size whole = whole_clip[:, clip_size - window_size:clip_size] whole_in = np.reshape( whole, (1, 1, config.feature_size[0], config.feature_size[1])) if config.use_gpu: whole_in = Variable(torch.from_numpy(whole_in)).cuda() else: whole_in = Variable(torch.from_numpy(whole_in)) masks = net.module.predict(whole_in) # 1 * 2 * 512 * 64 masks = masks.data.cpu().numpy() # may optimize mask here audio_transfer.fix_mask(whole, masks[0, 0, :, :], 60) audio_transfer.fix_mask(whole, masks[0, 1, :, :], 60) left = masks[0, 0, :, :] * whole right = masks[0, 1, :, :] * whole print(start, frame_end)
def detect_onet(self, im, dets): """Get face candidates using onet Parameters: ---------- im: numpy array input image array dets: numpy array detection results of rnet Returns: ------- boxes_align: numpy array boxes after calibration landmarks_align: numpy array landmarks after calibration """ h, w, c = im.shape if dets is None: return None, None if dets.shape[0] == 0: return None, None detss = dets dets = self.square_bbox(dets) dets[:, 0:4] = np.round(dets[:, 0:4]) [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(dets, w, h) num_boxes = dets.shape[0] # cropped_ims_tensors = np.zeros((num_boxes, 3, 24, 24), dtype=np.float32) cropped_ims_tensors = [] for i in range(num_boxes): try: tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8) # crop input image tmp[dy[i]:edy[i] + 1, dx[i]:edx[i] + 1, :] = im[y[i]:ey[i] + 1, x[i]:ex[i] + 1, :] except: print(dy[i], edy[i], dx[i], edx[i], y[i], ey[i], x[i], ex[i], tmpw[i], tmph[i]) print(dets[i]) print(detss[i]) print(h, w) crop_im = cv2.resize(tmp, (48, 48)) crop_im_tensor = image_tools.convert_image_to_tensor(crop_im) # cropped_ims_tensors[i, :, :, :] = crop_im_tensor cropped_ims_tensors.append(crop_im_tensor) feed_imgs = Variable(torch.stack(cropped_ims_tensors)) if self.rnet_detector.use_cuda: feed_imgs = feed_imgs.cuda() cls_map, reg = self.onet_detector(feed_imgs) cls_map = cls_map.cpu().data.numpy() reg = reg.cpu().data.numpy() keep_inds = np.where(cls_map > self.thresh[2])[0] if len(keep_inds) > 0: boxes = dets[keep_inds] cls = cls_map[keep_inds] reg = reg[keep_inds] else: return None, None keep = utils.nms(boxes, 0.7, mode="Minimum") if len(keep) == 0: return None, None keep_cls = cls[keep] keep_boxes = boxes[keep] keep_reg = reg[keep] bw = keep_boxes[:, 2] - keep_boxes[:, 0] + 1 bh = keep_boxes[:, 3] - keep_boxes[:, 1] + 1 align_topx = keep_boxes[:, 0] + keep_reg[:, 0] * bw align_topy = keep_boxes[:, 1] + keep_reg[:, 1] * bh align_bottomx = keep_boxes[:, 2] + keep_reg[:, 2] * bw align_bottomy = keep_boxes[:, 3] + keep_reg[:, 3] * bh boxes_align = np.vstack([ align_topx, align_topy, align_bottomx, align_bottomy, keep_cls[:, 0], ]) boxes_align = boxes_align.T return boxes_align
if __name__ == "__main__": r = np.random.rand(2, 3) * 10 # r = np.array([[0.4, 1.5, -0.0], [0, 0, 1.4]]) r1 = r[0] R1 = expmap2rotmat(r1) q1 = rotmat2quat(R1) # R1 = np.array([[0, 0, 1], [0, 1, 0], [1, 0, 0]]) e1 = rotmat2euler(R1) r2 = r[1] R2 = expmap2rotmat(r2) q2 = rotmat2quat(R2) # R2 = np.array([[0, 0, -1], [0, 1, 0], [1, 0, 0]]) e2 = rotmat2euler(R2) r = Variable(torch.from_numpy(r)).cuda().float() # q = expmap2quat_torch(r) R = expmap2rotmat_torch(r) q = rotmat2quat_torch(R) # R = Variable(torch.from_numpy( # np.array([[[0, 0, 1], [0, 1, 0], [1, 0, 0]], [[0, 0, -1], [0, 1, 0], [1, 0, 0]]]))).cuda().float() eul = rotmat2euler_torch(R) eul = eul.cpu().data.numpy() R = R.cpu().data.numpy() q = q.cpu().data.numpy() if np.max(np.abs(eul[0] - e1)) < 0.000001: print('e1 clear') else: print('e1 error {}'.format(np.max(np.abs(eul[0] - e1)))) if np.max(np.abs(eul[1] - e2)) < 0.000001:
def noise(size): ''' Generates a 1-d vector of gaussian sampled random values ''' n = Variable(torch.randn(size, 100)) return n
def train(train_loader, model, optimizer, epoch, args, chamfer, visualizer, train_writer): batch_time = AverageMeter() data_time = AverageMeter() lossess = AverageMeter() model.train() end = time.time() epoch_iter = 0 for i, (input) in enumerate(train_loader): #measuring loading time epoch_iter += args.batch_size data_time.update(time.time() - end) # target = target.cuda(async=True) # input = input.cuda(async=True) #input = [j.cuda() for j in input] """ For list mainly""" #target = [j.cuda() for j in target] input_var = Variable(input, requires_grad=True) # target_var = torch.autograd.Variable(target) trans_input = torch.squeeze(input_var) trans_input = torch.transpose(trans_input, 1, 2) #pc_1,pc_2,pc_3 = model(input_var) pc_1 = model(input_var) trans_input_temp = trans_input[1, :, :] pc_1_temp = pc_1[1, :, :] visuals = OrderedDict([ ('Train_input_pc', trans_input_temp.detach().cpu().numpy()), ('Train_predicted_pc', pc_1_temp.detach().cpu().numpy()) ]) loss_1 = chamfer(trans_input, pc_1) # # loss_2 = chamfer(trans_input, pc_2) # loss_3 = chamfer(trans_input, pc_3) loss = loss_1 #+ loss_2 + loss_3 if not (epoch == 0 and i <= 20): lossess.update(loss.item(), input.size(0)) # errors = OrderedDict([('loss', loss.item()),('loss_1', loss_1.item()),('loss_2', loss_2.item()),('loss_3', loss_3.item())]) errors = OrderedDict([('loss', loss.item()), ('loss_1', loss_1.item())]) optimizer.zero_grad() loss.backward() optimizer.step() #measured elapsed time batch_time.update(time.time() - end) end = time.time() epoch_size = len(train_loader) if i % args.print_freq == 0: print( 'Epoch: [{0}][{1}/{2}]\t Batch Time: {3} sec\t Data Load Time: {4} sec \t Loss{5}' .format(epoch, i, epoch_size, batch_time, data_time, loss)) visualizer.display_current_results(visuals, epoch, i) # output_writer.add_embedding(torch.transpose(trans_input_temp,0,1),global_step=epoch) if not (epoch == 0 and i <= 20): visualizer.plot_current_errors(epoch, float(i) / epoch_size, args, errors) train_writer.add_scalar('train_loss', loss.item(), epoch) return lossess.avg, input_var, pc_1
def ones_target(size): ''' Tensor containing ones, with shape = size ''' data = Variable(torch.ones(size, 1)) return data
def zeros_target(size): ''' Tensor containing zeros, with shape = size ''' data = Variable(torch.zeros(size, 1)) return data
error.backward() optimizer.step() return error num_test_samples = 25 test_noise = noise(num_test_samples) # Create logger instance logger = Logger(model_name='VGAN', data_name='faces') num_epochs = 500 for epoch in range(num_epochs): for n_batch, (real_batch, _) in enumerate(data_loader): #Train Disc real_data = Variable(images_to_vectors(real_batch)) fake_data = generator(noise(real_batch.size(0))).detach() d_error, d_pred_real, d_pred_fake = train_discriminator( d_optimizer, real_data, fake_data) #Train Gen fake_data = generator(noise(real_batch.size(0))) g_error = train_generator(g_optimizer, fake_data) #print (d_error, g_error, epoch) logger.log(d_error, g_error, epoch, n_batch, num_batches) if (n_batch) % 100 == 0: test_images = vectors_to_images(generator(test_noise)) test_images = test_images.data logger.log_images(test_images, num_test_samples, epoch, n_batch,
# c = b.float() # d = c.long() # e = d.cpu()[1].numpy() # import skimage.io as io # import numpy as np # io.imsave('qwe.jpg',e.astype(np.float)) import torch import torch.nn as nn from torch.autograd.variable import Variable import os import time os.environ['CUDA_VISIBLE_DEVICES'] = '2' time.sleep(0.5) n1 = nn.Linear(1000, 1000) n2 = nn.Linear(1000, 1000) n1.cuda() n2.cuda() for step in range(100000): i = Variable(torch.randn(32, 1000).cuda()) o1 = n1(i) o11 = n1(o1) o1.detach_() o2 = n2(o11) loss = o2.sum() loss.backward() # 显存不是在这释放的,即便不backward,显存也不会一直增加 print(loss, step, '/', 100000)
def random_input(size): r = Variable(torch.randn(size, 100)).to(device) return r
def detect_pnet(self, im): """Get face candidates through pnet Parameters: ---------- im: numpy array input image array one batch Returns: ------- boxes: numpy array detected boxes before calibration boxes_align: numpy array boxes after calibration """ # im = self.unique_image_format(im) # original wider face data h, w, c = im.shape net_size = 12 current_scale = float( net_size) / self.min_face_size # find initial scale # print('imgshape:{0}, current_scale:{1}'.format(im.shape, current_scale)) im_resized = self.resize_image(im, current_scale) # scale = 1.0 current_height, current_width, _ = im_resized.shape # fcn all_boxes = list() while min(current_height, current_width) > net_size: # print('current:',current_height, current_width) feed_imgs = [] image_tensor = image_tools.convert_image_to_tensor(im_resized) feed_imgs.append(image_tensor) feed_imgs = torch.stack(feed_imgs) feed_imgs = Variable(feed_imgs) if self.pnet_detector.use_cuda: feed_imgs = feed_imgs.cuda() # self.pnet_detector is a trained pnet torch model # receptive field is 12×12 # 12×12 --> score # 12×12 --> bounding box cls_map, reg = self.pnet_detector(feed_imgs) cls_map_np = image_tools.convert_chwTensor_to_hwcNumpy( cls_map.cpu()) reg_np = image_tools.convert_chwTensor_to_hwcNumpy(reg.cpu()) # print(cls_map_np.shape, reg_np.shape) # cls_map_np = (1, n, m, 1) reg_np.shape = (1, n, m 4) # time.sleep(5) # landmark_np = image_tools.convert_chwTensor_to_hwcNumpy(landmark.cpu()) # self.threshold[0] = 0.6 # print(cls_map_np[0,:,:].shape) # time.sleep(4) # boxes = [x1, y1, x2, y2, score, reg] boxes = self.generate_bounding_box(cls_map_np[0, :, :], reg_np, current_scale, self.thresh[0]) # cv2.rectangle(im,(300,100),(400,200),color=(0,0,0)) # cv2.rectangle(im,(400,200),(500,300),color=(0,0,0)) # generate pyramid images current_scale *= self.scale_factor # self.scale_factor = 0.709 im_resized = self.resize_image(im, current_scale) current_height, current_width, _ = im_resized.shape if boxes.size == 0: continue # non-maximum suppresion keep = utils.nms(boxes[:, :5], 0.5, 'Union') boxes = boxes[keep] all_boxes.append(boxes) if len(all_boxes) == 0: return None, None all_boxes = np.vstack(all_boxes) # print("shape of all boxes {0}".format(all_boxes.shape)) # time.sleep(5) # merge the detection from first stage keep = utils.nms(all_boxes[:, 0:5], 0.7, 'Union') all_boxes = all_boxes[keep] # boxes = all_boxes[:, :5] # x2 - x1 # y2 - y1 bw = all_boxes[:, 2] - all_boxes[:, 0] + 1 bh = all_boxes[:, 3] - all_boxes[:, 1] + 1 boxes = np.vstack([ all_boxes[:, 0], all_boxes[:, 1], all_boxes[:, 2], all_boxes[:, 3], all_boxes[:, 4], ]) boxes = boxes.T # boxes = boxes = [x1, y1, x2, y2, score, reg] reg= [px1, py1, px2, py2] (in prediction) align_topx = all_boxes[:, 0] + all_boxes[:, 5] * bw align_topy = all_boxes[:, 1] + all_boxes[:, 6] * bh align_bottomx = all_boxes[:, 2] + all_boxes[:, 7] * bw align_bottomy = all_boxes[:, 3] + all_boxes[:, 8] * bh # refine the boxes boxes_align = np.vstack([ align_topx, align_topy, align_bottomx, align_bottomy, all_boxes[:, 4], ]) boxes_align = boxes_align.T # remove invalid box valindex = [True for _ in range(boxes_align.shape[0])] for i in range(boxes_align.shape[0]): if boxes_align[i][2] - boxes_align[i][0] <= 3 or boxes_align[i][ 3] - boxes_align[i][1] <= 3: valindex[i] = False print('pnet has one smaller than 3') else: if boxes_align[i][2] < 1 or boxes_align[i][ 0] > w - 2 or boxes_align[i][3] < 1 or boxes_align[i][ 1] > h - 2: valindex[i] = False print('pnet has one out') boxes_align = boxes_align[valindex, :] boxes = boxes[valindex, :] return boxes, boxes_align
criterion = nn.NLLLoss2d() optimizer = optim.Adam(pcnn.parameters(), lr=0.0002, betas=(0.5, 0.999)) # train for epoch in range(100): for i, (data, _) in enumerate(loader, 0): data = data.mean(1, keepdim=True) bsize_now, _, h, w = data.size() ids = (255 * data).long() label = torch.FloatTensor(bsize_now, 256, h, w).scatter_(1, ids, torch.ones(ids.size())).cuda() input = Variable(data).cuda() output = pcnn(input) loss = criterion(F.log_softmax(output), Variable(ids[:, 0]).cuda()) pcnn.zero_grad() loss.backward() optimizer.step() if i % 100 == 0: # ########################## # # Visualization # ########################## _, temp = torch.max(output, 1) images = make_label_grid(temp.data.float().unsqueeze(1)[:8] / 255) writer.add_image('output', images, i) images = make_label_grid(data[:8])
def detect_rnet(self, im, dets): """Get face candidates using rnet Parameters: ---------- im: numpy array input image array dets: numpy array detection results of pnet Returns: ------- boxes: numpy array detected boxes before calibration boxes_align: numpy array boxes after calibration """ # im: an input image h, w, c = im.shape if dets is None: return None, None if dets.shape[0] == 0: return None, None # (705, 5) = [x1, y1, x2, y2, score, reg] # print("pnet detection {0}".format(dets.shape)) # time.sleep(5) detss = dets # return square boxes dets = self.square_bbox(dets) detsss = dets # rounds dets[:, 0:4] = np.round(dets[:, 0:4]) [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(dets, w, h) num_boxes = dets.shape[0] # cropped_ims_tensors = np.zeros((num_boxes, 3, 24, 24), dtype=np.float32) cropped_ims_tensors = [] for i in range(num_boxes): try: tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8) tmp[dy[i]:edy[i] + 1, dx[i]:edx[i] + 1, :] = im[y[i]:ey[i] + 1, x[i]:ex[i] + 1, :] except: print(dy[i], edy[i], dx[i], edx[i], y[i], ey[i], x[i], ex[i], tmpw[i], tmph[i]) print(dets[i]) print(detss[i]) print(detsss[i]) print(h, w) exit() crop_im = cv2.resize(tmp, (24, 24)) crop_im_tensor = image_tools.convert_image_to_tensor(crop_im) # cropped_ims_tensors[i, :, :, :] = crop_im_tensor cropped_ims_tensors.append(crop_im_tensor) feed_imgs = Variable(torch.stack(cropped_ims_tensors)) if self.rnet_detector.use_cuda: feed_imgs = feed_imgs.cuda() cls_map, reg = self.rnet_detector(feed_imgs) cls_map = cls_map.cpu().data.numpy() reg = reg.cpu().data.numpy() keep_inds = np.where(cls_map > self.thresh[1])[0] if len(keep_inds) > 0: boxes = dets[keep_inds] cls = cls_map[keep_inds] reg = reg[keep_inds] else: return None, None keep = utils.nms(boxes, 0.7) if len(keep) == 0: return None, None keep_cls = cls[keep] keep_boxes = boxes[keep] keep_reg = reg[keep] bw = keep_boxes[:, 2] - keep_boxes[:, 0] + 1 bh = keep_boxes[:, 3] - keep_boxes[:, 1] + 1 boxes = np.vstack([ keep_boxes[:, 0], keep_boxes[:, 1], keep_boxes[:, 2], keep_boxes[:, 3], keep_cls[:, 0], ]) align_topx = keep_boxes[:, 0] + keep_reg[:, 0] * bw align_topy = keep_boxes[:, 1] + keep_reg[:, 1] * bh align_bottomx = keep_boxes[:, 2] + keep_reg[:, 2] * bw align_bottomy = keep_boxes[:, 3] + keep_reg[:, 3] * bh boxes_align = np.vstack([ align_topx, align_topy, align_bottomx, align_bottomy, keep_cls[:, 0], ]) boxes = boxes.T boxes_align = boxes_align.T # remove invalid box valindex = [True for _ in range(boxes_align.shape[0])] for i in range(boxes_align.shape[0]): if boxes_align[i][2] - boxes_align[i][0] <= 3 or boxes_align[i][ 3] - boxes_align[i][1] <= 3: valindex[i] = False print('rnet has one smaller than 3') else: if boxes_align[i][2] < 1 or boxes_align[i][ 0] > w - 2 or boxes_align[i][3] < 1 or boxes_align[i][ 1] > h - 2: valindex[i] = False print('rnet has one out') boxes_align = boxes_align[valindex, :] boxes = boxes[valindex, :] return boxes, boxes_align
def trainNet(net, n_epochs, learning_rate): #Print all of the hyperparameters of the training iteration: print("===== HYPERPARAMETERS =====") #print("batch_size=", batch_size) print("epochs=", n_epochs) print("learning_rate=", learning_rate) print("=" * 30) #load epoch 3 model net.load_state_dict(torch.load('model_progr/simple_regre100_32')) #loss = torch.nn.MarginRankingLoss(margin = 0.001) loss = torch.nn.L1Loss() #Create our loss and optimizer functions optimizer = optim.Adam(net.parameters(), lr=learning_rate) #Time for printing training_start_time = time.time() num_batch = 1 #Loop for n_epochs for epoch in range(n_epochs): print "epoch " + str(epoch + 32) running_loss = 0.0 start_time = time.time() total_train_loss = 0 #for i in range(num_batch): #for files in glob.glob('batch/T0913*input*'): for train in train_list: #Get inputs #print files #print("loading batch " + str(i)) for files in sorted(glob.glob('smaller_batch/' + train + '*input*')): print files #input1 = np.transpose(np.load('batch/T0913_input_batch'+str(num_batch+1)+'.npy'),(0,4,1,2,3)) #labels = np.load('batch/T0913_score_batch'+str(num_batch+1)+'.npy') input1 = np.transpose(np.load(files), (0, 4, 1, 2, 3)) labels = np.load( files.split('_input_')[0] + '_score_' + files.split('_input_')[1]) #Wrap them in a Variable object input1, labels = Variable( torch.FloatTensor(input1).cuda()), Variable( torch.FloatTensor(labels).cuda()) #Set the parameter gradients to zero optimizer.zero_grad() #Forward pass, backward pass, optimize output1 = net(input1) #output2 = net(input2) #pdb.set_trace() loss_size = loss(output1, labels) loss_size.backward() optimizer.step() #Print statistics #pdb.set_trace() running_loss += loss_size.item() total_train_loss += loss_size.item() del input1 del labels gc.collect() #callbacks = [EarlyStopping(patience=10), ReduceLROnPlateau(factor=0.5, patience=5)] torch.save( net.state_dict(), '/net/kihara/scratch/ding48/CASP12/model_progr/simple_regre100_' + str(epoch + 32)) print("total train loss: " + str(total_train_loss)) print("finished training") #At the end of the epoch, do a pass on the validation set total_val_loss = 0 num_val = 1 #for i in range(num_val): #for vals in glob.glob('batch/T0887*input*'): for test in test_list: for vals in sorted(glob.glob('smaller_batch/' + test + '*input*')): print vals #val_input1 = np.transpose(np.load('batch/T0913_input_batch2.npy'),(0,4,1,2,3)) #val_labels = np.load('batch/T0913_score_batch2.npy') val_input1 = np.transpose(np.load(vals), (0, 4, 1, 2, 3)) val_labels = np.load( vals.split('_input_')[0] + '_score_' + vals.split('_input_')[1]) #Wrap tensors in Variables val_input1, val_labels = Variable( torch.FloatTensor(val_input1).cuda()), Variable( torch.FloatTensor(val_labels).cuda()) #Forward pass val_output1 = net(val_input1) print val_output1 output1_mean = val_output1 - torch.mean(val_output1) label_mean = val_labels - torch.mean(val_labels) cost = torch.sum(output1_mean * label_mean) / ( torch.sqrt(torch.sum(output1_mean**2)) * torch.sqrt(torch.sum(label_mean**2))) #print cost val_loss_size = loss(val_output1, val_labels) #print val_loss_size total_val_loss += val_loss_size.item() del val_input1 del val_labels gc.collect() print("Validation loss = {:.2f}".format(total_val_loss)) #callbacks = [ModelCheckpoint(file='/net/kihara/scratch/ding48/CASP12/model_progr/model_{epoch}_{total_train_loss}.pt', monitor='total_val_loss', save_best_only=False, max_checkpoints=3)] print("Training finished, took {:.2f}s".format(time.time() - training_start_time))
def calc_flops(model, input_size): global USE_GPU def conv_hook(self, input, output): batch_size, input_channels, input_height, input_width = input[0].size() output_channels, output_height, output_width = output[0].size() kernel_ops = self.kernel_size[0] * self.kernel_size[1] * (self.in_channels / self.groups) * ( 2 if multiply_adds else 1) bias_ops = 1 if self.bias is not None else 0 params = output_channels * (kernel_ops + bias_ops) flops = batch_size * params * output_height * output_width list_conv.append(flops) def linear_hook(self, input, output): batch_size = input[0].size(0) if input[0].dim() == 2 else 1 weight_ops = self.weight.nelement() * (2 if multiply_adds else 1) bias_ops = self.bias.nelement() flops = batch_size * (weight_ops + bias_ops) list_linear.append(flops) def bn_hook(self, input, output): list_bn.append(input[0].nelement()) def relu_hook(self, input, output): list_relu.append(input[0].nelement()) def pooling_hook(self, input, output): batch_size, input_channels, input_height, input_width = input[0].size() output_channels, output_height, output_width = output[0].size() kernel_ops = self.kernel_size * self.kernel_size bias_ops = 0 params = output_channels * (kernel_ops + bias_ops) flops = batch_size * params * output_height * output_width list_pooling.append(flops) def foo(net): childrens = list(net.children()) if not childrens: if isinstance(net, torch.nn.Conv2d): net.register_forward_hook(conv_hook) if isinstance(net, torch.nn.Linear): net.register_forward_hook(linear_hook) if isinstance(net, torch.nn.BatchNorm2d): net.register_forward_hook(bn_hook) if isinstance(net, torch.nn.ReLU): net.register_forward_hook(relu_hook) if isinstance(net, torch.nn.MaxPool2d) or isinstance(net, torch.nn.AvgPool2d): net.register_forward_hook(pooling_hook) return for c in childrens: foo(c) multiply_adds = False list_conv, list_bn, list_relu, list_linear, list_pooling = [], [], [], [], [] foo(model) if '0.4.' in torch.__version__: if USE_GPU: input = torch.cuda.FloatTensor(torch.rand(2, 3, input_size, input_size).cuda()) else: input = torch.FloatTensor(torch.rand(2, 3, input_size, input_size)) else: input = Variable(torch.rand(2, 3, input_size, input_size), requires_grad=True) _ = model(input) total_flops = (sum(list_conv) + sum(list_linear) + sum(list_bn) + sum(list_relu) + sum(list_pooling)) print(' + Number of FLOPs: %.2fG' % (total_flops / 1e9 / 2))
def detect_pnet(self, im): """Get face candidates through pnet Parameters: ---------- im: numpy array input image array Returns: ------- boxes: numpy array detected boxes before calibration boxes_align: numpy array boxes after calibration """ # im = self.unique_image_format(im) h, w, c = im.shape net_size = 12 current_scale = float(net_size) / self.min_face_size # find initial scale im_resized = self.resize_image(im, current_scale) current_height, current_width, _ = im_resized.shape # fcn all_boxes = list() while min(current_height, current_width) > net_size: feed_imgs = [] image_tensor = image_tools.convert_image_to_tensor(im_resized) feed_imgs.append(image_tensor.float()) feed_imgs = torch.stack(feed_imgs) feed_imgs = Variable(feed_imgs) if self.pnet_detector.use_cuda: feed_imgs = feed_imgs.cuda() cls_map, reg = self.pnet_detector(feed_imgs) cls_map_np = image_tools.convert_chwTensor_to_hwcNumpy(cls_map.cpu()) reg_np = image_tools.convert_chwTensor_to_hwcNumpy(reg.cpu()) # landmark_np = image_tools.convert_chwTensor_to_hwcNumpy(landmark.cpu()) boxes = self.generate_bounding_box(cls_map_np[ 0, :, :], reg_np, current_scale, self.thresh[0]) current_scale *= self.scale_factor im_resized = self.resize_image(im, current_scale) current_height, current_width, _ = im_resized.shape if boxes.size == 0: continue keep = utils.nms(boxes[:, :5], 0.5, 'Union') boxes = boxes[keep] all_boxes.append(boxes) if len(all_boxes) == 0: return None, None all_boxes = np.vstack(all_boxes) # merge the detection from first stage keep = utils.nms(all_boxes[:, 0:5], 0.7, 'Union') all_boxes = all_boxes[keep] # boxes = all_boxes[:, :5] bw = all_boxes[:, 2] - all_boxes[:, 0] + 1 bh = all_boxes[:, 3] - all_boxes[:, 1] + 1 # landmark_keep = all_boxes[:, 9:].reshape((5,2)) boxes = np.vstack([all_boxes[:,0], all_boxes[:,1], all_boxes[:,2], all_boxes[:,3], all_boxes[:,4], # all_boxes[:, 0] + all_boxes[:, 9] * bw, # all_boxes[:, 1] + all_boxes[:,10] * bh, # all_boxes[:, 0] + all_boxes[:, 11] * bw, # all_boxes[:, 1] + all_boxes[:, 12] * bh, # all_boxes[:, 0] + all_boxes[:, 13] * bw, # all_boxes[:, 1] + all_boxes[:, 14] * bh, # all_boxes[:, 0] + all_boxes[:, 15] * bw, # all_boxes[:, 1] + all_boxes[:, 16] * bh, # all_boxes[:, 0] + all_boxes[:, 17] * bw, # all_boxes[:, 1] + all_boxes[:, 18] * bh ]) boxes = boxes.T align_topx = all_boxes[:, 0] + all_boxes[:, 5] * bw align_topy = all_boxes[:, 1] + all_boxes[:, 6] * bh align_bottomx = all_boxes[:, 2] + all_boxes[:, 7] * bw align_bottomy = all_boxes[:, 3] + all_boxes[:, 8] * bh # refine the boxes boxes_align = np.vstack([ align_topx, align_topy, align_bottomx, align_bottomy, all_boxes[:, 4], # align_topx + all_boxes[:,9] * bw, # align_topy + all_boxes[:,10] * bh, # align_topx + all_boxes[:,11] * bw, # align_topy + all_boxes[:,12] * bh, # align_topx + all_boxes[:,13] * bw, # align_topy + all_boxes[:,14] * bh, # align_topx + all_boxes[:,15] * bw, # align_topy + all_boxes[:,16] * bh, # align_topx + all_boxes[:,17] * bw, # align_topy + all_boxes[:,18] * bh, ]) boxes_align = boxes_align.T return boxes, boxes_align
def load_data_cmu_3d(path_to_dataset, actions, input_n, output_n, data_std=0, data_mean=0, is_test=False): seq_len = input_n + output_n nactions = len(actions) sampled_seq = [] complete_seq = [] for action_idx in np.arange(nactions): action = actions[action_idx] path = '{}/{}'.format(path_to_dataset, action) count = 0 for _ in os.listdir(path): count = count + 1 for examp_index in np.arange(count): filename = '{}/{}/{}_{}.txt'.format(path_to_dataset, action, action, examp_index + 1) action_sequence = readCSVasFloat(filename) n, d = action_sequence.shape exptmps = Variable( torch.from_numpy(action_sequence)).float().cuda() xyz = expmap2xyz_torch_cmu(exptmps) xyz = xyz.view(-1, 38 * 3) xyz = xyz.cpu().data.numpy() action_sequence = xyz even_list = range(0, n, 2) the_sequence = np.array(action_sequence[even_list, :]) num_frames = len(the_sequence) if not is_test: fs = np.arange(0, num_frames - seq_len + 1) fs_sel = fs for i in np.arange(seq_len - 1): fs_sel = np.vstack((fs_sel, fs + i + 1)) fs_sel = fs_sel.transpose() seq_sel = the_sequence[fs_sel, :] if len(sampled_seq) == 0: sampled_seq = seq_sel complete_seq = the_sequence else: sampled_seq = np.concatenate((sampled_seq, seq_sel), axis=0) complete_seq = np.append(complete_seq, the_sequence, axis=0) else: source_seq_len = 50 target_seq_len = 25 total_frames = source_seq_len + target_seq_len batch_size = 8 SEED = 1234567890 rng = np.random.RandomState(SEED) for _ in range(batch_size): idx = rng.randint(0, num_frames - total_frames) seq_sel = the_sequence[idx + (source_seq_len - input_n):( idx + source_seq_len + output_n), :] seq_sel = np.expand_dims(seq_sel, axis=0) if len(sampled_seq) == 0: sampled_seq = seq_sel complete_seq = the_sequence else: sampled_seq = np.concatenate((sampled_seq, seq_sel), axis=0) complete_seq = np.append(complete_seq, the_sequence, axis=0) if not is_test: data_std = np.std(complete_seq, axis=0) data_mean = np.mean(complete_seq, axis=0) joint_to_ignore = np.array([0, 1, 2, 7, 8, 13, 16, 20, 29, 24, 27, 33, 36]) dimensions_to_ignore = np.concatenate( (joint_to_ignore * 3, joint_to_ignore * 3 + 1, joint_to_ignore * 3 + 2)) dimensions_to_use = np.setdiff1d(np.arange(complete_seq.shape[1]), dimensions_to_ignore) data_std[dimensions_to_ignore] = 1.0 data_mean[dimensions_to_ignore] = 0.0 return sampled_seq, dimensions_to_ignore, dimensions_to_use, data_mean, data_std
def detect_rnet(self, im, dets): """Get face candidates using rnet Parameters: ---------- im: numpy array input image array dets: numpy array detection results of pnet Returns: ------- boxes: numpy array detected boxes before calibration boxes_align: numpy array boxes after calibration """ h, w, c = im.shape if dets is None: return None,None dets = self.square_bbox(dets) dets[:, 0:4] = np.round(dets[:, 0:4]) [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(dets, w, h) num_boxes = dets.shape[0] ''' # helper for setting RNet batch size batch_size = self.rnet_detector.batch_size ratio = float(num_boxes) / batch_size if ratio > 3 or ratio < 0.3: print "You may need to reset RNet batch size if this info appears frequently, \ face candidates:%d, current batch_size:%d"%(num_boxes, batch_size) ''' # cropped_ims_tensors = np.zeros((num_boxes, 3, 24, 24), dtype=np.float32) cropped_ims_tensors = [] for i in range(num_boxes): tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8) tmp[dy[i]:edy[i]+1, dx[i]:edx[i]+1, :] = im[y[i]:ey[i]+1, x[i]:ex[i]+1, :] crop_im = cv2.resize(tmp, (24, 24)) crop_im_tensor = image_tools.convert_image_to_tensor(crop_im) # cropped_ims_tensors[i, :, :, :] = crop_im_tensor cropped_ims_tensors.append(crop_im_tensor) feed_imgs = Variable(torch.stack(cropped_ims_tensors)) if self.rnet_detector.use_cuda: feed_imgs = feed_imgs.cuda() cls_map, reg = self.rnet_detector(feed_imgs.float()) cls_map = cls_map.cpu().data.numpy() reg = reg.cpu().data.numpy() # landmark = landmark.cpu().data.numpy() keep_inds = np.where(cls_map > self.thresh[1])[0] if len(keep_inds) > 0: boxes = dets[keep_inds] cls = cls_map[keep_inds] reg = reg[keep_inds] # landmark = landmark[keep_inds] else: return None, None keep = utils.nms(boxes, 0.7) if len(keep) == 0: return None, None keep_cls = cls[keep] keep_boxes = boxes[keep] keep_reg = reg[keep] # keep_landmark = landmark[keep] bw = keep_boxes[:, 2] - keep_boxes[:, 0] + 1 bh = keep_boxes[:, 3] - keep_boxes[:, 1] + 1 boxes = np.vstack([ keep_boxes[:,0], keep_boxes[:,1], keep_boxes[:,2], keep_boxes[:,3], keep_cls[:,0], # keep_boxes[:,0] + keep_landmark[:, 0] * bw, # keep_boxes[:,1] + keep_landmark[:, 1] * bh, # keep_boxes[:,0] + keep_landmark[:, 2] * bw, # keep_boxes[:,1] + keep_landmark[:, 3] * bh, # keep_boxes[:,0] + keep_landmark[:, 4] * bw, # keep_boxes[:,1] + keep_landmark[:, 5] * bh, # keep_boxes[:,0] + keep_landmark[:, 6] * bw, # keep_boxes[:,1] + keep_landmark[:, 7] * bh, # keep_boxes[:,0] + keep_landmark[:, 8] * bw, # keep_boxes[:,1] + keep_landmark[:, 9] * bh, ]) align_topx = keep_boxes[:,0] + keep_reg[:,0] * bw align_topy = keep_boxes[:,1] + keep_reg[:,1] * bh align_bottomx = keep_boxes[:,2] + keep_reg[:,2] * bw align_bottomy = keep_boxes[:,3] + keep_reg[:,3] * bh boxes_align = np.vstack([align_topx, align_topy, align_bottomx, align_bottomy, keep_cls[:, 0], # align_topx + keep_landmark[:, 0] * bw, # align_topy + keep_landmark[:, 1] * bh, # align_topx + keep_landmark[:, 2] * bw, # align_topy + keep_landmark[:, 3] * bh, # align_topx + keep_landmark[:, 4] * bw, # align_topy + keep_landmark[:, 5] * bh, # align_topx + keep_landmark[:, 6] * bw, # align_topy + keep_landmark[:, 7] * bh, # align_topx + keep_landmark[:, 8] * bw, # align_topy + keep_landmark[:, 9] * bh, ]) boxes = boxes.T boxes_align = boxes_align.T return boxes, boxes_align
k, num_train_images=dataset_sizes[k][0], num_test_images=dataset_sizes[k][1], jitter_program=True) prev_test_loss = 1e20 prev_test_cd = 1e20 prev_test_iou = 0 for epoch in range(config.epochs): train_loss = 0 Accuracies = [] imitate_net.train() for batch_idx in range(config.train_size // (config.batch_size * config.num_traj)): optimizer.zero_grad() loss = Variable(torch.zeros(1)).cuda().data for _ in range(config.num_traj): for k in dataset_sizes.keys(): data, labels = next(train_gen_objs[k]) labels_cont = torch.from_numpy( labels_to_cont(labels, generator.unique_draw)).to(device).float() data = data[:, :, 0:1, :, :] one_hot_labels = prepare_input_op(labels, len(generator.unique_draw)) one_hot_labels = Variable( torch.from_numpy(one_hot_labels)).cuda() data = Variable(torch.from_numpy(data)).cuda() labels = Variable(torch.from_numpy(labels)).cuda() outputs = imitate_net([data, one_hot_labels, k]) loss_k = imitate_net.loss_function(
def main(): # Load dataset trainset, testset = iispt_dataset.load_dataset(config.testset, 0.0) selected_set = testset selected_set_len = testset.__len__() # Load model net = iispt_net.IISPTNet() net.load_state_dict(torch.load(config.model_path)) # Put in eval mode net.eval() print("Model loaded") # Statistics accumulators statLowL1 = [] statLowSs = [] statGaussianL1 = [] statGaussianSs = [] statResultL1 = [] statResultSs = [] # Loop for each test example print("Processing {} items".format(selected_set_len)) for i in range(selected_set_len): if i % 100 == 0: print("Processing index {}".format(i)) item = selected_set.__getitem__(i) aug = item["aug"] if aug != 0: # Only process un-augmented samples continue item_input = item["t"] item_input = item_input.unsqueeze(0) # Run the network on the data input_variable = Variable(item_input) result = net(input_variable) resultImg = pfm.loadFromConvOutNpArray(result.data.numpy()[0]) resultImg.normalize_intensity_upstream(item["mean"]) expectedImg = pfm.load(item["p_name"]) lowImg = pfm.load(item["d_name"]) # Normalize the maps according to their mean for better statistics resultImg.divideMean() expectedImg.divideMean() lowImg.divideMean() gaussianImg = lowImg.makeCopy() gaussianImg.gaussianBlur(1.0) # Compute metrics on 1SPP lowL1 = lowImg.computeL1Loss(expectedImg) lowSs = lowImg.computeStructuralSimilarity(expectedImg) # Compute metrics on blurred gaussianL1 = gaussianImg.computeL1Loss(expectedImg) gaussianSs = gaussianImg.computeStructuralSimilarity(expectedImg) # Compute metrics on NN predicted resultL1 = resultImg.computeL1Loss(expectedImg) resultSs = resultImg.computeStructuralSimilarity(expectedImg) # Record statistics statLowL1.append(lowL1) statLowSs.append(lowSs) statGaussianL1.append(gaussianL1) statGaussianSs.append(gaussianSs) statResultL1.append(resultL1) statResultSs.append(resultSs) print("Statistics collection completed") # To numpy statLowL1 = numpy.array(statLowL1) statLowSs = numpy.array(statLowSs) statGaussianL1 = numpy.array(statGaussianL1) statGaussianSs = numpy.array(statGaussianSs) statResultL1 = numpy.array(statResultL1) statResultSs = numpy.array(statResultSs) plot(statLowL1, statGaussianL1, statResultL1, "L1", -0.1, 1.6) plot(statLowSs, statGaussianSs, statResultSs, "Structural Similarity", -0.1, 1.0) # Compute P values for L1 t, p = scipy.stats.kruskal(statGaussianL1, statResultL1) print("P value L1 gaussian-predicted {}".format(p)) # Compute P values for Ss t, p = scipy.stats.kruskal(statGaussianSs, statResultSs) print("P value Ss gaussian-predicted {}".format(p)) # Compute P values for L1 t, p = scipy.stats.kruskal(statLowL1, statResultL1) print("P value L1 low-predicted {}".format(p)) # Compute P values for Ss t, p = scipy.stats.kruskal(statLowSs, statResultSs) print("P value Ss low-predicted {}".format(p))
if_augment=False) prev_test_reward = 0 imitate_net.epsilon = config.eps # Number of batches to accumulate before doing the gradient update. num_traj = config.num_traj training_reward_save = 0 for epoch in range(config.epochs): train_loss = 0 total_reward = 0 imitate_net.epsilon = 1 imitate_net.train() for batch_idx in range(config.train_size // (config.batch_size)): optimizer.zero_grad() loss_sum = Variable(torch.zeros(1)).cuda().data Rs = np.zeros((config.batch_size, 1)) for _ in range(num_traj): labels = np.zeros((config.batch_size, max_len), dtype=np.int32) data_ = next(train_gen) one_hot_labels = prepare_input_op(labels, len(unique_draw)) one_hot_labels = Variable(torch.from_numpy(one_hot_labels)).cuda() data = Variable(torch.from_numpy(data_), volatile=False).cuda() outputs, samples = imitate_net([data, one_hot_labels, max_len]) R = reinforce.generate_rewards(samples, data_, time_steps=max_len, stack_size=max_len // 2 + 1, reward=reward, power=power) R = R[0]
def validate(val_loader, model, criterion, epoch): global time_stp batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() result_list = [] label_list = [] predicted_list = [] # switch to evaluate mode model.eval() end = time.time() with torch.no_grad(): for i, data in enumerate(val_loader): with torch.no_grad(): rgb_img, depth_img, ir_img, hsv_img, YCbCr_img, label, dirs = data[ 0], data[1], data[2], data[3], data[4], data[5], data[6] rgb_var = Variable(rgb_img).float().to(device) depth_var = Variable(depth_img).float().to(device) ir_var = Variable(ir_img).float().to(device) hsv_img_var = Variable(hsv_img).float().to(device) YCbCr_img_var = Variable(YCbCr_img).float().to(device) target_var = Variable(label).long().to(device) # compute output output = model(rgb_var, depth_var, ir_var, hsv_img_var, YCbCr_img_var, args.weight_list) loss = criterion(output, target_var) # measure accuracy and record loss prec1, prec2 = accuracy(output.data, target_var, topk=(1, 2)) losses.update(loss.data, rgb_img.size(0)) top1.update(prec1[0], rgb_img.size(0)) soft_output = torch.softmax(output, dim=-1) preds = soft_output.to('cpu').detach().numpy() label = label.to('cpu').detach().numpy() _, predicted = torch.max(soft_output.data, 1) predicted = predicted.to('cpu').detach().numpy() for i_batch in range(preds.shape[0]): result_list.append(preds[i_batch, 1]) label_list.append(label[i_batch]) predicted_list.append(predicted[i_batch]) if args.val_save: f = open( 'submission/{}_{}_{}_submission.txt'.format( time_stp, args.arch, epoch), 'a+') rgb_dir = dirs[i_batch].replace( os.getcwd() + '/data/', '') depth_dir = rgb_dir.replace('profile', 'depth') ir_dir = rgb_dir.replace('profile', 'ir') f.write(rgb_dir + ' ' + depth_dir + ' ' + ir_dir + ' ' + str(preds[i_batch, 1]) + '\n') # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: line = 'Test: [{0}/{1}]\t' \ 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' \ 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' \ 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'.format(i, len(val_loader), batch_time=batch_time, loss=losses, top1=top1) logger.Print(line) tn, fp, fn, tp = confusion_matrix(label_list, predicted_list).ravel() apcer = fp / (tn + fp) npcer = fn / (fn + tp) acer = (apcer + npcer) / 2 metric = roc.cal_metric(label_list, result_list) eer = metric[0] tprs = metric[1] auc = metric[2] xy_dic = metric[3] with open(log_path + '/val_result_{}_{}.txt'.format(time_stp, args.arch), 'a+') as f_result: result_line = 'epoch: {} EER: {:.6f} TPR@FPR=10E-2: {:.6f} TPR@FPR=10E-3: {:.6f} APCER:{:.6f} NPCER:{:.6f} AUC: {:.8f} Acc:{:.3f} TN:{} FP : {} FN:{} TP:{} ACER:{:.8f} '.format( epoch, eer, tprs["TPR@FPR=10E-2"], tprs["TPR@FPR=10E-3"], apcer, npcer, auc, top1.avg, tn, fp, fn, tp, acer) f_result.write('{}\n'.format(result_line)) logger.Print(result_line) return top1.avg
num_epochs = 100 num_test_samples = 16 test_noise = networks.noise(num_test_samples) logger = utils.Logger(model_name='VGAN', data_name='Cats') discriminator_loss = [] generator_loss = [] for epoch in range(num_epochs): for n_batch, real_batch in enumerate(data_loader): # 1. Train Discriminator real_data = Variable(networks.images_to_vectors(real_batch)) # Generate fake data fake_data = generator(networks.noise(real_data.size(0))).detach() # Train D d_error, d_pred_real, d_pred_fake = train_discriminator( d_optimizer, real_data, fake_data, discriminator, generator, loss) discriminator_loss.append(d_error) # 2. Train Generator # Generate fake data fake_data = generator(networks.noise(real_batch.size(0))) # Train G
def train(self, data, weights, penalty=True, quantum_instance=None, shots=None): """ Perform one training step w.r.t to the discriminator's parameters Args: data (tuple): real_batch: torch.Tensor, Training data batch. generated_batch: numpy array, Generated data batch. weights (tuple): real problem, generated problem penalty (bool): Indicate whether or not penalty function is applied to the loss function. quantum_instance (QuantumInstance): Quantum Instance (depreciated) shots (int): Number of shots for hardware or qasm execution. Not used for classical network (only quantum ones) Returns: dict: with Discriminator loss (torch.Tensor) and updated parameters (array). """ # pylint: disable=E1101 # pylint: disable=E1102 # Reset gradients self._optimizer.zero_grad() real_batch = data[0] real_prob = weights[0] generated_batch = data[1] generated_prob = weights[1] real_batch = np.reshape(real_batch, (len(real_batch), 1)) real_batch = torch.tensor(real_batch, dtype=torch.float32) real_batch = Variable(real_batch) real_prob = np.reshape(real_prob, (len(real_prob), 1)) real_prob = torch.tensor(real_prob, dtype=torch.float32) # Train on Real Data prediction_real = self.get_label(real_batch) # Calculate error and back propagate error_real = self.loss(prediction_real, torch.ones(len(prediction_real), 1), real_prob) error_real.backward() # Train on Generated Data generated_batch = np.reshape(generated_batch, (len(generated_batch), self._n_features)) generated_prob = np.reshape(generated_prob, (len(generated_prob), 1)) generated_prob = torch.tensor(generated_prob, dtype=torch.float32) prediction_fake = self.get_label(generated_batch) # Calculate error and back propagate error_fake = self.loss(prediction_fake, torch.zeros(len(prediction_fake), 1), generated_prob) error_fake.backward() if penalty: self.gradient_penalty(real_batch).backward() # pylint: enable=E1101 # pylint: enable=E1102 # Update weights with gradients self._optimizer.step() # Return error and predictions for real and fake inputs loss_ret = 0.5 * (error_real + error_fake) self._ret['loss'] = loss_ret.detach().numpy() params = [] for param in self._discriminator.parameters(): params.append(param.data.detach().numpy()) self._ret['params'] = params return self._ret
def fake_data_labels(size): data = Variable(torch.zeros(size, 1)) return data
def beam_search(self, data: List, w: int, max_time: int): """ Implements beam search for different models. :param data: Input data :param w: beam width :param max_time: Maximum length till the program has to be generated :return all_beams: all beams to find out the indices of all the """ data, input_op = data # Beam, dictionary, with elements as list. Each element of list # containing index of the selected output and the corresponding # probability. # batch_size = data.size()[1] batch_size = data.size()[0] h = Variable(torch.zeros(1, batch_size, self.hd_sz)).cuda() # Last beams' data B = {0: {"input": input_op, "h": h}, 1: None} next_B = {} # x_f = self.encoder.encode(data[-1, :, 0:1, :, :]) x_f = self.encoder.encode(data.unsqueeze(1)) x_f = x_f.view(1, batch_size, self.in_sz) # List to store the probs of last time step prev_output_prob = [ Variable(torch.ones(batch_size, self.num_draws)).cuda() ] all_beams = [] all_inputs = [] for timestep in range(0, max_time): outputs = [] for b in range(w): if not B[b]: break input_op = B[b]["input"] h = B[b]["h"] input_op_rnn = self.relu(self.dense_input_op(input_op[:, 0, :])) input_op_rnn = input_op_rnn.view(1, batch_size, self.input_op_sz) input = torch.cat((x_f, input_op_rnn), 2) h, _ = self.rnn(input, h) hd = self.relu(self.dense_fc_1(self.drop(h[0]))) dense_output = self.dense_output(self.drop(hd)) output = self.logsoftmax(dense_output) # Element wise multiply by previous probabs output = torch.nn.Softmax(1)(output) output = output * prev_output_prob[b] outputs.append(output) next_B[b] = {} next_B[b]["h"] = h if len(outputs) == 1: outputs = outputs[0] else: outputs = torch.cat(outputs, 1) next_beams_index = torch.topk(outputs, w, 1, sorted=True)[1] next_beams_prob = torch.topk(outputs, w, 1, sorted=True)[0] # print (next_beams_prob) current_beams = { "parent": next_beams_index.data.cpu().numpy() // (self.num_draws), "index": next_beams_index % (self.num_draws) } # print (next_beams_index % (self.num_draws)) next_beams_index %= (self.num_draws) all_beams.append(current_beams) # Update previous output probabilities temp = Variable(torch.zeros(batch_size, 1)).cuda() prev_output_prob = [] for i in range(w): for index in range(batch_size): temp[index, 0] = next_beams_prob[index, i] prev_output_prob.append(temp.repeat(1, self.num_draws)) # hidden state for next step B = {} for i in range(w): B[i] = {} temp = Variable(torch.zeros(h.size())).cuda() for j in range(batch_size): temp[0, j, :] = next_B[current_beams["parent"][j, i]]["h"][0, j, :] B[i]["h"] = temp # one_hot for input to the next step for i in range(w): arr = Variable( torch.zeros(batch_size, self.num_draws + 1).scatter_( 1, next_beams_index[:, i:i + 1].data.cpu(), 1.0)).cuda() B[i]["input"] = arr.unsqueeze(1) all_inputs.append(B) return all_beams, next_beams_prob, all_inputs