def predict(self, images): assert len(images.size()) == 4 # b, c, h, w for param in self.model.parameters(): param.requires_grad = False self.model.eval() images = images.contiguous() if self.usegpu: images = images.cuda(async=True) images = self.__define_variable(images, volatile=True) sem_seg_predictions, ins_seg_predictions, n_objects_predictions = \ self.model(images) sem_seg_predictions = torch.nn.functional.softmax( sem_seg_predictions, dim=1) n_objects_predictions = n_objects_predictions * self.max_n_objects n_objects_predictions = torch.round(n_objects_predictions).int() sem_seg_predictions = sem_seg_predictions.data.cpu() ins_seg_predictions = ins_seg_predictions.data.cpu() n_objects_predictions = n_objects_predictions.data.cpu() return sem_seg_predictions, ins_seg_predictions, n_objects_predictions
def evaluate(attention_model,x_test,y_test): """ cv results Args: attention_model : {object} model x_test : {nplist} x_test y_test : {nplist} y_test Returns: cv-accuracy """ attention_model.batch_size = x_test.shape[0] attention_model.hidden_state = attention_model.init_hidden() x_test_var = Variable(torch.from_numpy(x_test).type(torch.LongTensor)) y_test_pred,_ = attention_model(x_test_var) if bool(attention_model.type): y_preds = torch.max(y_test_pred,1)[1] y_test_var = Variable(torch.from_numpy(y_test).type(torch.LongTensor)) else: y_preds = torch.round(y_test_pred.type(torch.DoubleTensor).squeeze(1)) y_test_var = Variable(torch.from_numpy(y_test).type(torch.DoubleTensor)) return torch.eq(y_preds,y_test_var).data.sum()/x_test_var.size(0)
def train(self, kb, epochs, learning_rate): batcher = BatchNegSampler( kb=kb, arity=1, batch_size=self.params.mb, neg_per_pos=self.params.neg_ratio, ) opt = optim.Adam(params=self.nn.parameters(), lr=learning_rate) criterion = torch.nn.BCELoss() for epoch in range(epochs): epoch_loss = [] epoch_acc = [] epoch_f1 = [] for batch in batcher: emoj_idxs = batch[0] phr_idxs = batch[1] labels = autograd.Variable(torch.FloatTensor(batch[2])) input = autograd.Variable( torch.tensor(self.embeddings_array[phr_idxs]) ) out = self.nn(input, emoj_idxs) loss = criterion(out, labels) batch_acc = torch.div( (labels == torch.round(out)).sum().double(), batcher.batch_size, ) batch_f1 = metrics.f1_score( labels.detach().numpy(), torch.round(out).detach().numpy() ) self.nn.zero_grad() epoch_loss.append(np.float32(loss.data)) epoch_acc.append(batch_acc) epoch_f1.append(batch_f1) loss.backward() opt.step() epoch_loss = np.round(np.mean(epoch_loss), 2) epoch_acc = np.round(np.mean(epoch_acc), 2) epoch_f1 = np.round(np.mean(epoch_f1), 2) print( str.format( "Epoch: {} \n Training loss: {} \n Training acc: {} \n Training f1: {} \n ===================", epoch + 1, str(epoch_loss), epoch_acc, epoch_f1, ) )
def accuracy(pred, label): correct = [] for i in range(len(pred)): correct.append(int(torch.round(pred[i][int(label[i])]).item())==1) correct = torch.tensor(correct) return torch.sum(correct)
def compute_frustum_bounds(self, world_to_grid, camera_to_world): corner_points = camera_to_world.new(8, 4, 1).fill_(1) # depth min corner_points[0,:3,0] = self.depth_to_skeleton(0, 0, self.depth_min) corner_points[1,:3,0] = self.depth_to_skeleton(self.image_dims[0] - 1, 0, self.depth_min) corner_points[2,:3,0] = self.depth_to_skeleton(self.image_dims[0] - 1, self.image_dims[1] - 1, self.depth_min) corner_points[3,:3,0] = self.depth_to_skeleton(0, self.image_dims[1] - 1, self.depth_min) # depth max corner_points[4,:3,0] = self.depth_to_skeleton(0, 0, self.depth_max) corner_points[5,:3,0] = self.depth_to_skeleton(self.image_dims[0] - 1, 0, self.depth_max) corner_points[6,:3,0] = self.depth_to_skeleton(self.image_dims[0] - 1, self.image_dims[1] - 1, self.depth_max) corner_points[7,:3,0] = self.depth_to_skeleton(0, self.image_dims[1] - 1, self.depth_max) p = torch.bmm(camera_to_world.repeat(8, 1, 1), corner_points) pl = torch.round(torch.bmm(world_to_grid.repeat(8, 1, 1), torch.floor(p))) pu = torch.round(torch.bmm(world_to_grid.repeat(8, 1, 1), torch.ceil(p))) bbox_min0, _ = torch.min(pl[:, :3, 0], 0) bbox_min1, _ = torch.min(pu[:, :3, 0], 0) bbox_min = np.minimum(bbox_min0, bbox_min1) bbox_max0, _ = torch.max(pl[:, :3, 0], 0) bbox_max1, _ = torch.max(pu[:, :3, 0], 0) bbox_max = np.maximum(bbox_max0, bbox_max1) return bbox_min, bbox_max
def forward(ctx, input): """Forward pass Parameters ========== :param input: input tensor Returns ======= :return: a tensor which is round(input)""" # We can cache arbitrary Tensors for use in the backward pass using the # save_for_backward method. # ctx.save_for_backward(input) return torch.round(input)
def main(): # Args parser = argparse.ArgumentParser() parser.add_argument("--model_name", default='CBN_10epch', help="Classifier model path") parser.add_argument("--classifier", default='CBN', help="Choose classifier architecture, C, CBN") args = parser.parse_args() # Select training device device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Load specified Classifier if args.classifier == 'CBN': net = ClassConvBN() elif args.classifier == 'C': net = ClassConv() else: net = ClassConv() print('Bad Classifier option, running classifier C') net.to(device) # Load parameters from trained model net.load_state_dict( torch.load('../../STEAD_CNN/models/' + args.model_name + '.pth')) net.eval() # File name file = '../../Data_Tides/CSULB_T13_EarthTide_earthtide_mean_360_519.mat' # Read file data with h5py.File(file, 'r') as f: data = f['clipdata'][()] # Cut data to build traces matrix data_cut = data[:(data.size // 6000) * 6000] # Matrix of traces data_reshape = data_cut.reshape((data.size // 6000, -1)) fs = 1000 total = 0 tr_seismic, tr_noise = 0, 0 fil_seismic, fil_noise = 0, 0 seis_traces = [] seis_fil_traces = [] noise_traces = [] noise_fil_traces = [] # For every trace in the file for idx, trace in enumerate(data_reshape): # Filter fil_trace = butter_bandpass_filter(trace, 0.5, 1, fs, order=3) # Normalize resamp_trace = trace / np.max(np.abs(trace)) resamp_fil_trace = fil_trace / np.max(np.abs(fil_trace)) # Numpy to Torch resamp_trace = torch.from_numpy(resamp_trace).to(device).unsqueeze(0) resamp_fil_trace = torch.from_numpy(resamp_fil_trace).to( device).unsqueeze(0) # Prediction out_trace = net(resamp_trace.float()) out_fil_trace = net(resamp_fil_trace.float()) pred_trace = torch.round(out_trace.data).item() pred_fil_trace = torch.round(out_fil_trace.data).item() # Count traces total += 1 if pred_trace: tr_seismic += 1 seis_traces.append(idx) else: tr_noise += 1 noise_traces.append(idx) if pred_fil_trace: fil_seismic += 1 seis_fil_traces.append(idx) else: fil_noise += 1 noise_fil_traces.append(idx) seis_tr_id = np.random.choice(seis_traces, 1) seis_fil_tr_id = np.random.choice(seis_fil_traces, 1) noise_tr_id = np.random.choice(seis_traces, 1) noise_fil_tr_id = np.random.choice(seis_fil_traces, 1) plt.figure() plt.plot(data[seis_tr_id]) plt.savefig('seis_trace1.png') plt.clf() plt.plot(data[seis_fil_tr_id]) plt.savefig('seis_fil_trace1.png') plt.clf() plt.plot(data[noise_tr_id]) plt.savefig('noise_trace1.png') plt.clf() plt.plot(data[noise_fil_tr_id]) plt.savefig('noise_fil_trace1.png') # Results print( f'Inferencia Tides:\n\n' f'File: CSULB_T13_EarthTide_earthtide_mean_360_519.mat\n' f'Total traces: {total}\n' f'Predicted seismic: {tr_seismic}, predicted noise: {tr_noise}\n' f'Predicted fil_seismic: {fil_seismic}, predicted fil_noise: {fil_noise}\n' )
def loss(threshold): scale = 127. / threshold q = torch.clamp(data * scale, -128, 127) q = torch.round(q) / scale return torch.mean((data - q)**2)
def get_k_lowest_eig(adj, k): r""" Compute the k-lowest eigenvectors of the Laplacian matrix for each connected components of the graph. If there are disconnected graphs, then the first k eigenvectors are computed for each sub-graph separately. Parameters -------------- adj: tensor(..., N, N) Batches of symmetric adjacency matrices k: int Compute the k-th smallest eigenvectors and eigenvalues. normalize_L: bool Whether to normalize the Laplacian matrix If `False`, then `L = D - A` If `True`, then `L = D^-1 (D - A)` Returns ------------- eigvec: tensor(..., N, k) Resulting k-lowest eigenvectors of the Laplacian matrix of each sub-graph, with the same batching as the `adj` tensor. The dim==-1 represents the k-th vectors. The dim==-2 represents the N elements of each vector. If the a given graph is disconnected, it will give the first ``k`` eigenvector of each sub-graph, and will force the first eigenvector to be 0-vectors. If there are ``m`` eigenvectors for a given sub-graph, with ``m < k``, it will return 0-vectors for all eigenvectors ``> m`` """ # Reshape as a 3D tensor for easier looping along batches device = adj.device shape = list(adj.shape) if adj.ndim == 2: adj = adj.unsqueeze(0) elif adj.ndim > 3: adj = adj.view(-1, shape[-2], shape[-1]) L = get_laplacian_matrix(adj, normalize_L=False) # Compute and sort the eigenvectors eigval_all, eigvec_all = torch.symeig(L.cpu(), eigenvectors=True) eigval_all = eigval_all.to(device) eigvec_all = eigvec_all.to(device) sort_idx = torch.argsort(eigval_all.abs(), dim=-1, descending=False) sort_idx_vec = sort_idx.unsqueeze(-2).expand(eigvec_all.shape) eigval_sort = torch.gather(eigval_all, dim=-1, index=sort_idx) eigvec_sort = torch.gather(eigvec_all, dim=-1, index=sort_idx_vec) k_lowest_eigvec = [] # Loop each graph to detect if some of them are disconnected. If they are disconnected, # then modify the eigenvectors such that the lowest k eigenvectors are returned for # each sub-graph for ii in range(adj.shape[0]): this_eigval = eigval_sort[ii] num_connected = torch.sum(this_eigval.abs() < EPS) # If there is a single connected graph, then return the k lowest eigen functions if num_connected <= 1: this_eigvec = eigvec_sort[ii, :, :k] if k > this_eigvec.shape[-1]: temp_eigvec = torch.zeros(this_eigvec.shape[0], k) temp_eigvec[:, :k] = this_eigvec this_eigvec = temp_eigvec k_lowest_eigvec.append(this_eigvec) # Otherwise, return the k lowest eigen functions for each sub-graph elif num_connected > 1: eigvec0 = eigvec_sort[ii, :, :num_connected] unique_idx = torch.zeros(1) factor = 100 # Use the eigenvectors with 0 eigenvalues to find the unique sub-graphs # And loop to make sure the number of detected sub-graphs is consistent with the # Number of connected sub-graphs. while (max(unique_idx) + 1) != num_connected: eigvec0_round = torch.round(eigvec0 / (factor * EPS)) _, unique_idx = torch.unique(eigvec0_round, return_inverse=True, dim=0) if (max(unique_idx) + 1) < num_connected: factor = (factor / 2) elif (max(unique_idx) + 1) > num_connected: factor = (factor * 3) # Find the eigenvectors associated to each sub-graph sub_graph_factors = torch.zeros(num_connected, len(this_eigval)) for sub_ii in range(num_connected): sub_idx = torch.where(unique_idx == sub_ii)[0] sub_graph_factors[sub_ii, :] = torch.mean(torch.abs( eigvec_sort[ii, sub_idx, :]), dim=-2) max_idx = torch.argmax(sub_graph_factors, dim=0)[num_connected:] # Concatenate the k lowest eigenvectors of each sub-graph this_k_lowest_eigvec = torch.zeros(len(this_eigval), k) for sub_ii in range(num_connected): sub_idx = torch.where(unique_idx == sub_ii)[0] k_lowest_idx = torch.where( max_idx == sub_ii)[0][:k - 1] + num_connected for kk_enum, kk in enumerate(k_lowest_idx): this_k_lowest_eigvec[sub_idx, kk_enum + 1] = eigvec_sort[ii, sub_idx, kk] k_lowest_eigvec.append(this_k_lowest_eigvec) # Stack and Reshape to match the input batch shape k_lowest_eigvec = torch.stack(k_lowest_eigvec, dim=0).view(*(shape[:-2] + [-1, k])) return k_lowest_eigvec
def dice_round(preds, trues, is_average=True): preds = torch.round(preds) return dice(preds, trues, is_average=is_average)
def test_round(self): x = torch.tensor([0.9920, -1.0362, -1.5000, 2.5000], requires_grad=True) self.assertONNX(lambda x: torch.round(x), x, opset_version=11)
def quanz_data(x, pos, scale, offset, n): x_q = torch.clamp( torch.round(x / (2**pos * scale)) + offset, -2**(n - 1), 2**(n - 1) - 1) return x_q
def _PyramidRoI_Feat(self, feat_maps, rois, im_info): ''' roi pool on pyramid feature maps''' # do roi pooling based on predicted rois img_area = im_info[0][0] * im_info[0][1] h = rois.data[:, 4] - rois.data[:, 2] + 1 w = rois.data[:, 3] - rois.data[:, 1] + 1 roi_level = torch.log(torch.sqrt(h * w) / 50.0) roi_level = torch.round(roi_level + 4) roi_level[roi_level < 2] = 2 roi_level[roi_level > 5] = 5 # roi_level.fill_(5) if cfg.POOLING_MODE == 'crop': # pdb.set_trace() # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5)) # NOTE: need to add pyrmaid grid_xy = _affine_grid_gen(rois, feat_maps.size()[2:], self.grid_size) grid_yx = torch.stack( [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]], 3).contiguous() roi_pool_feat = self.RCNN_roi_crop(feat_maps, Variable(grid_yx).detach()) if cfg.CROP_RESIZE_WITH_MAX_POOL: roi_pool_feat = F.max_pool2d(roi_pool_feat, 2, 2) elif cfg.POOLING_MODE == 'align': roi_pool_feats = [] box_to_levels = [] for i, l in enumerate(range(2, 6)): if (roi_level == l).sum() == 0: continue # idx_l = (roi_level == l).nonzero().squeeze() idx_l = (roi_level == l).nonzero() if idx_l.shape[0] > 1: idx_l = idx_l.squeeze() else: idx_l = idx_l.view(-1) box_to_levels.append(idx_l) scale = feat_maps[i].size(2) / im_info[0][0] feat = self.RCNN_roi_align(feat_maps[i], rois[idx_l], scale) roi_pool_feats.append(feat) roi_pool_feat = torch.cat(roi_pool_feats, 0) box_to_level = torch.cat(box_to_levels, 0) idx_sorted, order = torch.sort(box_to_level) roi_pool_feat = roi_pool_feat[order] elif cfg.POOLING_MODE == 'pool': roi_pool_feats = [] box_to_levels = [] for i, l in enumerate(range(2, 6)): if (roi_level == l).sum() == 0: continue idx_l = (roi_level == l).nonzero().squeeze() box_to_levels.append(idx_l) scale = feat_maps[i].size(2) / im_info[0][0] feat = self.RCNN_roi_pool(feat_maps[i], rois[idx_l], scale) roi_pool_feats.append(feat) roi_pool_feat = torch.cat(roi_pool_feats, 0) box_to_level = torch.cat(box_to_levels, 0) idx_sorted, order = torch.sort(box_to_level) roi_pool_feat = roi_pool_feat[order] return roi_pool_feat
# Training for i, data in enumerate(train_generator): image, truth = data truth = truth / 255 predictions = model(image) loss = loss_fn(predictions, truth) optimizer.zero_grad() loss.backward() optimizer.step() running_loss += loss.item() # Validation with torch.no_grad(): for i, data in enumerate(val_generator): image, truth = data truth = truth / 255 predictions = model(image) predictions = torch.round( predictions) # Rounding the predictions for classification loss = loss_fn(predictions, truth) val_loss += loss.item() val_acc += dice_sim(predictions, truth) * 100 epoch_train_loss = running_loss / (train_size // batch_size + 1) epoch_val_loss = val_loss / (val_size // batch_size + 1) epoch_val_acc = val_acc / (val_size // batch_size + 1) scheduler.step(epoch_val_loss) # LR Scheduler print( f"==>train_loss: {epoch_train_loss} ==>val_loss: {epoch_val_loss} ==>val_accuracy: {epoch_val_acc}" )
def train_segmentation(model, num_epochs, dataloader_dict, criterion, optimizer, verbose=False, detailed_time=False): print("Starting training loop...\n") print("Model's device = {}".format(model.my_device)) device = model.my_device # Track best model best_model_wts = copy.deepcopy(model.state_dict()) current_best_loss = 1000 # Initialize loss dict to record training, figures are per epoch epoch_loss_dict = { 'train': { 'acc': [], 'loss': [], 'IoU': [], 'time': [] }, 'val': { 'acc': [], 'loss': [], 'IoU': [], 'time': [] } } batch_loss_dict = { 'train': { 'acc': [], 'loss': [], 'IoU': [] }, 'val': { 'acc': [], 'loss': [], 'IoU': [] } } if detailed_time: epoch_loss_dict['train']['backward_pass_time'] = [] epoch_loss_dict['train']['data_fetch_time'] = [] epoch_loss_dict['val']['backward_pass_time'] = [] epoch_loss_dict['val']['data_fetch_time'] = [] # For each epoch for epoch in range(num_epochs): # Each epoch has training and validation phase for phase in ['train', 'val']: begin_epoch_time = time.time() if phase == 'train': model.train() else: model.eval() running_loss = 0.0 running_corrects = 0 running_IoU = 0.0 total_obs = 0 # For each mini-batch in the dataloader total_data_fetch = 0 total_pass = 0 b_data_fetch = time.time() for i, data in enumerate(dataloader_dict[phase]): if detailed_time: total_data_fetch += (time.time() - b_data_fetch) b_pass = time.time() images, target = data img_size = target.shape[-1] #assert img_size == 128 target = torch.tensor(target, dtype=torch.float32, device=device) images = images.to(device) batch_size = target.shape[0] total_obs += batch_size # Zero the gradients model.zero_grad() # Forward pass -- reshape output to be like the target (has extra 1D dimension) output = model(images) output = output.view(target.shape) assert output.min().item() > 0 assert output.max().item() < 1 # Just round since we have binary classification preds = torch.round(output) correct_count = (preds == target).sum() # Calculate loss error = criterion(output, target) # Make detailed output if verbose verbose_str = "" # Training steps if phase == 'train': # Backpropogate the error error.backward() # Take optimizer step optimizer.step() if detailed_time: total_pass += (time.time() - b_pass) # The error is divided by the batch size, so reverse this batch_loss_dict[phase]['acc'].append(correct_count.item() / batch_size) batch_loss_dict[phase]['loss'].append(error.item()) batch_loss_dict[phase]['IoU'].append( test_eval.inter_over_union(preds, target)) running_loss += error.item() * batch_size running_corrects += correct_count.item() running_IoU += test_eval.inter_over_union(preds, target) * batch_size if detailed_time: b_data_fetch = time.time() epoch_loss = running_loss / total_obs epoch_acc = running_corrects / (total_obs * img_size * img_size) epoch_IoU = running_IoU / total_obs if epoch_loss < current_best_loss: current_best_loss = epoch_loss best_model_wts = copy.deepcopy(model.state_dict()) t = time.time() - begin_epoch_time # Add to our epoch_loss_dict epoch_loss_dict[phase]['acc'].append(epoch_acc) epoch_loss_dict[phase]['loss'].append(epoch_loss) epoch_loss_dict[phase]['IoU'].append(epoch_IoU) epoch_loss_dict[phase]['time'].append(t) if detailed_time: epoch_loss_dict[phase]['backward_pass_time'].append(total_pass) epoch_loss_dict[phase]['data_fetch_time'].append( total_data_fetch) print("PHASE={} EPOCH={} TIME={} LOSS={} ACC={}".format( phase, epoch, t, epoch_loss, epoch_acc)) return model, best_model_wts, epoch_loss_dict, batch_loss_dict
y_data_val = [] for i in range(batch_begin, batch_end): #x_data_val.append(util.convert_to_one_hot(words[i], words2int, vocab_size)) x_data_val.append(util.get_index(words[i], words2int)) #caso indice per parola y_data_val.append(labels[i]) #x_tensor_val = Variable(torch.FloatTensor(x_data)) #y_tensor_val = Variable(torch.FloatTensor(y_data)) x_tensor_val = util.prepare_sequence(x_data_val,seq_len) y_tensor_val = util.prepare_sequence(y_data_val,seq_len) y_pred_val,_ = model(x_tensor_val, (hn ,cn)) #print(y_pred_val.shape) y_pred_val = torch.squeeze(y_pred_val,2) #output = torch.max(y_pred_val, dim=1) #non serve perché ogni cella dell'lstm manda in output la label predetta new_y_pred_val = torch.round(y_pred_val) #costruzione delle due liste per calcolo accuracy prediction = [] true = [] new_y_pred_val_np = new_y_pred_val.detach().numpy() new_y_true_val_np = y_tensor_val.detach().numpy() for row in new_y_pred_val_np: for value in row: if (value == 0): prediction.append(0) else: prediction.append(1) for row in new_y_true_val_np: for value in row: if (value == 0): true.append(0)
def trainer(textField, trainSet, devSet): # Creating the logging. logging.basicConfig(filename=Cfg.logDir + f'/logging-{currentTime}.txt', filemode='a', level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s', datefmt='%Y-%m-%d %H:%M:%S %p') # Logging the information. logging.info(f''' Vocabulary Size: {Cfg.vs} Embedding Size: {Cfg.es} Hidden Size: {Cfg.hs} Class Size: {Cfg.cs} Learning Rate: {Cfg.lr} Adam Beta One: {Cfg.beta1} Adam Beta Two: {Cfg.beta2} Weight Decay: {Cfg.wd} Batch Size: {Cfg.bs} Epoches: {Cfg.epoches} Random Seed: {Cfg.seed} GPU ID: {Cfg.GPUID} Model Directory: {Cfg.modelDir} Log Directory: {Cfg.logDir} Dataset Directory: {Cfg.dataDir} ''') # Creating the visdom. vis = Visdom(env='WordAverageModel') # Creating the graph. lossGraph = vis.line( X=[0], Y=[0], opts=dict(legend=['TrainingLoss', 'EvaluatingLoss'], xlabel='Epoches', ylabel='Loss', title=f'Training and Evaluating Loss - {currentTime}'), name='TrainingLoss') vis.line(X=[0], Y=[0], win=lossGraph, update='append', name='EvaluatingLoss') accGraph = vis.line( X=[0], Y=[0], opts=dict(legend=['TrainingAcc', 'EvaluatingAcc'], xlabel='Epoches', ylabel='Acc', title=f'Training and Evaluating Acc - {currentTime}'), name='TrainingAcc') vis.line(X=[0], Y=[0], win=accGraph, update='append', name='EvaluatingAcc') # Creating the sequence to sequence model. model = WordAverageModelNN( Cfg.vs + 2, Cfg.es, Cfg.hs, Cfg.cs, textField.vocab.stoi[textField.pad_token]).to(device) # Customizing the initialized parameters of the embedding layer. # Getting the vocabulary as the vectors. gloveVector = textField.vocab.vectors # Reinitializing the parameters of the embedding layer. model.embedding.weight.data.copy_(gloveVector) # Adding the '<unk>' and '<pad>' tokens into the parameters of the embedding layer. model.embedding.weight.data[textField.vocab.stoi[textField.pad_token]] model.embedding.weight.data[textField.vocab.stoi[textField.unk_token]] # Setting the optimizer. optimizer = optim.Adam(model.parameters(), lr=Cfg.lr, weight_decay=Cfg.wd, betas=[Cfg.beta1, Cfg.beta2]) # Setting the loss function. loss = nn.BCEWithLogitsLoss() # Setting the list to storing the training loss. trainLosses = [] # Setting the list to storing the training accuracy. trainAccs = [] # Setting the list to storing the evaluating loss. evalLosses = [] # Setting the list to storing the evaluating accuracy. evalAccs = [] # Training the model. for epoch in range(Cfg.epoches): # Setting the list for storing the training loss and accuracy. trainLoss = [] trainAcc = [] # Setting the loading bar. with tqdm(total=len(trainSet), desc=f'Epoch {epoch + 1}/{Cfg.epoches}', unit='batches', dynamic_ncols=True) as pbars: for i, trainData in enumerate(trainSet): # Feeding the data into the model. prediction = model(trainData.text) # Computing the loss. cost = loss(prediction, trainData.label) # Storing the loss. trainLoss.append(cost.item()) # Clearing the previous gradient. optimizer.zero_grad() # Applying the backward propagation. cost.backward() # Updating the parameters. optimizer.step() # Computing the accuracy. accuracy = (torch.round( torch.sigmoid(prediction)) == trainData.label) accuracy = accuracy.sum().float() / len(accuracy) # Storing the accurcy. trainAcc.append(accuracy.item()) # Updating the loading bar. pbars.update(1) # Updating the training information. pbars.set_postfix_str(' - Train Loss %.4f - Train Acc %.4f' % (np.mean(trainLoss), np.mean(trainAcc))) # Closing the loading bar. pbars.close() # Printing the hint for evaluating. print('Evaluating...', end=' ') # Evalutaing the model. evalLoss, evalAcc = evaluator(model.eval(), loss, devSet) # Printing the evaluating information. print(' - Eval Loss %.4f - Eval Acc %.4f' % (evalLoss, evalAcc)) # Storing the training and evaluating information. trainLosses.append(np.mean(trainLoss)) trainAccs.append(np.mean(trainAcc)) evalLosses.append(evalLoss) evalAccs.append(evalAcc) # Logging the information. logging.info( 'Epoch [%d/%d] -> Training: Loss [%.4f] - Acc [%.4f] || Evaluating: Loss [%.4f] - Acc [%.4f]' % (epoch + 1, Cfg.epoches, np.mean(trainLoss), np.mean(trainAcc), evalLoss, evalAcc)) # Drawing the graph. vis.line(X=[k for k in range(1, len(trainLosses) + 1)], Y=trainLosses, win=lossGraph, update='new', name='TrainingLoss') vis.line(X=[k for k in range(1, len(evalLosses) + 1)], Y=evalLosses, win=lossGraph, update='new', name='EvaluatingLoss') vis.line(X=[k for k in range(1, len(trainAccs) + 1)], Y=trainAccs, win=accGraph, update='new', name='TrainingAcc') vis.line(X=[k for k in range(1, len(evalAccs) + 1)], Y=evalAccs, win=accGraph, update='new', name='EvaluatingAcc') # Giving the hint for saving the model. logging.info("Model Saved") # Saving the model. torch.save( model.train().state_dict(), Cfg.modelDir + f'/{currentTime}/WordAverageModel-Epoch{epoch + 1}.pt') # Converting the model state. model = model.train() # Saving the graph. vis.save(envs=['WordAverageModel'])
def sample_adj_random(self, adj_logits): adj_rand = torch.rand(adj_logits.size()) adj_rand = adj_rand.triu(1) adj_rand = torch.round(adj_rand) adj_rand = adj_rand + adj_rand.T return adj_rand
def quant(input): input = torch.round(input / (2**(-args.aprec))) * (2**(-args.aprec)) return input
img = img.view(1, 28, 28) save_image(img, './sample_{}.png'.format(name)) def min_max_normalization(tensor, min_value, max_value): min_tensor = tensor.min() tensor = (tensor - min_tensor) max_tensor = tensor.max() tensor = tensor / max_tensor tensor = tensor * (max_value - min_value) + min_value return tensor img_transform = transforms.Compose([ transforms.ToTensor(), lambda x: min_max_normalization(x, 0, 1), lambda x: torch.round(x) ]) dataset = MNIST('./data', transform=img_transform, download=True) dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True) class autoencoder(nn.Module): def __init__(self): super(autoencoder, self).__init__() self.encoder = nn.Sequential(nn.Linear(28 * 28, 256), nn.ReLU(True), nn.Linear(256, 64), nn.ReLU(True)) self.decoder = nn.Sequential(nn.Linear(64, 256), nn.ReLU(True), nn.Linear(256, 28 * 28), nn.Sigmoid())
def pow_2_round(dims): return 2 ** torch.round(torch.log2(dims.type(torch.float)))
def ROIAlign(feature_maps, rois, config, pool_size, mode='bilinear'): """Implements ROI Align on the features. Params: - pool_shape: [height, width] of the output pooled regions. Usually [7, 7] - image_shape: [height, width, chanells]. Shape of input image in pixels Inputs: - boxes: [batch, num_boxes, (x1, y1, x2, y2)] in normalized coordinates. Possibly padded with zeros if not enough boxes to fill the array. - Feature maps: List of feature maps from different levels of the pyramid. Each is [batch, channels, height, width] Output: Pooled regions in the shape: [batch, num_boxes, height, width, channels]. The width and height are those specific in the pool_shape in the layer constructor. """ """ [ x2-x1 x1 + x2 - W + 1 ] [ ----- 0 --------------- ] [ W - 1 W - 1 ] [ ] [ y2-y1 y1 + y2 - H + 1 ] [ 0 ----- --------------- ] [ H - 1 H - 1 ] """ #feature_maps= [P2, P3, P4, P5] rois = rois.detach() crop_resize = CropAndResize(pool_size, pool_size, 0) roi_number = rois.size()[1] pooled = rois.data.new( config.IMAGES_PER_GPU*rois.size( 1), 256, pool_size, pool_size).zero_() rois = rois.view( config.IMAGES_PER_GPU*rois.size(1), 4) # Loop through levels and apply ROI pooling to each. P2 to P5. x_1 = rois[:, 0] y_1 = rois[:, 1] x_2 = rois[:, 2] y_2 = rois[:, 3] roi_level = log2_graph( torch.div(torch.sqrt((y_2 - y_1) * (x_2 - x_1)), 224.0)) roi_level = torch.clamp(torch.clamp( torch.add(torch.round(roi_level), 4), min=2), max=5) # P2 is 256x256, P3 is 128x128, P4 is 64x64, P5 is 32x32 # P2 is 4, P3 is 8, P4 is 16, P5 is 32 for i, level in enumerate(range(2, 6)): scaling_ratio = 2**level height = float(config.IMAGE_MAX_DIM)/ scaling_ratio width = float(config.IMAGE_MAX_DIM) / scaling_ratio ixx = torch.eq(roi_level, level) box_indices = ixx.view(-1).int() * 0 ix = torch.unsqueeze(ixx, 1) level_boxes = torch.masked_select(rois, ix) if level_boxes.size()[0] == 0: continue level_boxes = level_boxes.view(-1, 4) crops = crop_resize(feature_maps[i], torch.div( level_boxes, float(config.IMAGE_MAX_DIM) )[:, [1, 0, 3, 2]], box_indices) indices_pooled = ixx.nonzero()[:, 0] pooled[indices_pooled.data, :, :, :] = crops.data pooled = pooled.view(config.IMAGES_PER_GPU, roi_number, 256, pool_size, pool_size) pooled = Variable(pooled).cuda() return pooled
def sparse_perturb_multiple(data_idx, pf_minus, pf_plus, n, m, undirected, nsamples, offset_both_idx): """ Randomly flip bits. Parameters ---------- data_idx: torch.Tensor [2, ?] The indices of the non-zero elements pf_minus: float, 0 <= p_plus <= 1 The probability to flip a one to a zero pf_plus : float, 0 <= p_plus <= 1 The probability to flip a zero to a one n : int The shape of the tensor m : int The shape of the tensor undirected : bool If true for every (i, j) also perturb (j, i) nsamples : int Number of perturbed samples offset_both_idx : bool Whether to offset both matrix indices (for adjacency matrix) Returns ------- perturbed_data_idx: torch.Tensor [2, ?] The indices of the non-zero elements of multiple concatenated matrices after perturbation """ if undirected: # select only one direction of the edges, ignore self loops data_idx = data_idx[:, data_idx[0] < data_idx[1]] idx_copies = copy_idx(data_idx, n, nsamples, offset_both_idx) w_existing = torch.ones_like(idx_copies[0]) to_del = torch.cuda.BoolTensor(idx_copies.shape[1]).bernoulli_(pf_minus) w_existing[to_del] = 0 if offset_both_idx: assert n == m nadd_persample_np = np.random.binomial( n * m, pf_plus, size=nsamples) # 6x faster than PyTorch nadd_persample = torch.cuda.FloatTensor(nadd_persample_np) nadd_persample_with_repl = torch.round( torch.log(1 - nadd_persample / (n * m)) / np.log(1 - 1 / (n * m))).long() nadd_with_repl = nadd_persample_with_repl.sum() to_add = data_idx.new_empty([2, nadd_with_repl]) to_add[0].random_(n * m) to_add[1] = to_add[0] % m to_add[0] = to_add[0] // m to_add = offset_idx(to_add, nadd_persample_with_repl, m, [0, 1]) if undirected: # select only one direction of the edges, ignore self loops to_add = to_add[:, to_add[0] < to_add[1]] else: nadd = np.random.binomial(nsamples * n * m, pf_plus) # 6x faster than PyTorch nadd_with_repl = int( np.round( np.log(1 - nadd / (nsamples * n * m)) / np.log(1 - 1 / (nsamples * n * m)))) to_add = data_idx.new_empty([2, nadd_with_repl]) to_add[0].random_(nsamples * n * m) to_add[1] = to_add[0] % m to_add[0] = to_add[0] // m w_added = torch.ones_like(to_add[0]) if offset_both_idx: mb = nsamples * m else: mb = m # if an edge already exists but has been removed do not add it back # hence we coalesce with the min value joined, weights = coalesce(torch.cat((idx_copies, to_add), 1), torch.cat((w_existing, w_added), 0), nsamples * n, mb, 'min') per_data_idx = joined[:, weights > 0] if undirected: per_data_idx = torch.cat((per_data_idx, per_data_idx[[1, 0]]), 1) # Check that there are no off-diagonal edges # if offset_both_idx: # batch0 = to_add[0] // n # batch1 = to_add[1] // n # assert torch.all(batch0 == batch1) return per_data_idx
loss = criterion(y_pred, y_batch.unsqueeze(1)) acc = binary_acc(y_pred, y_batch.unsqueeze(1)) loss.backward() optimizer.step() epoch_loss += loss.item() epoch_acc += acc.item() print( f'Epoch {e+0:03}: | Loss: {epoch_loss/len(train_loader):.5f} | Acc: {epoch_acc/len(train_loader):.3f}' ) ###################### OUTPUT ####### y_pred_list = [] model.eval() with torch.no_grad(): for X_batch in test_loader: X_batch = X_batch.to(device) y_test_pred = model(X_batch) y_test_pred = torch.sigmoid(y_test_pred) y_pred_tag = torch.round(y_test_pred) y_pred_list.append(y_pred_tag.cpu().numpy()) y_pred_list = [a.squeeze().tolist() for a in y_pred_list] confusion_matrix(y_test, y_pred_list) print(classification_report(y_test, y_pred_list))
def extract(): #training g and d on standard l2 loss os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"]="1" split = args.split isSewa = args.sewa isSemaine = args.semaine toLoadModel = True resume_iters=args.resume_iters use_skip = args.useSkip useLatent = args.useLatent tryDenoise = args.tryDenoise addLoss = args.addLoss useWeight = args.useWeightNormalization singleTask = args.singleTask trainQuadrant = args.trainQuadrant alterQuadrant = args.alterQuadrant nSel = args.nSel #curDir = "/home/deckyal/eclipse-workspace/FaceTracking/" c_dim=2 image_size=128 g_conv_dim=16 d_conv_dim=16 lambda_cls=1 lambda_rec=10 lambda_gp=10 inputC = 3#input channel for discriminator batch_size=args.batch_size#200 #50#40#70#20 #, help='mini-batch size') isVideo = False seq_length = 2 # Test configuration. test_iters=200000 #, help='test model from this step') # Miscellaneous. num_workers=1 log_dir='stargan/logs' model_save_dir='stargan/models' sample_dir='stargan/samples-g_adl' result_dir='stargan/results' # Step size. log_step=20 sample_step=5#1000 model_save_step=10 lr_update_step=100#1000 #model_save_step=10000 #lr_update_step=1000 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') if not os.path.exists(model_save_dir): os.makedirs(model_save_dir) multi_gpu = False testSplit = split print("Test split " , testSplit) nSplit = 5 listSplit = [] for i in range(nSplit): if i!=testSplit : listSplit.append(i) print(listSplit) if not isSewa : if not isSemaine : d_name = 'AFEW-VA-Fixed' additionName = "AF"+str(split)+"-" else : d_name = 'Sem-Short' additionName = "SEM"+str(split)+"-" dbType = 0 else : d_name = 'SEWA' dbType = 1 additionName = "SW"+str(split)+"-" additionName+=(str(nSel)+'-') additionName+=(str(g_conv_dim)+'-') additionName+=(str(d_conv_dim)+'-') if trainQuadrant : if alterQuadrant : additionName+="QDAL-" c_dim = 1 else : additionName+="QD-" c_dim = 4 if tryDenoise : additionName+="Den-" save_name = additionName+str(testSplit) transform =transforms.Compose([ transforms.Resize((image_size,image_size)), transforms.ToTensor(), transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]) ]) toDelete = False VD = SEWAFEWReduced([d_name], None, True, image_size, transform, False, False, 1,split=False, nSplit = nSplit,listSplit=[testSplit] ,isVideo=isVideo, seqLength = seq_length, returnQuadrant=trainQuadrant, returnNoisy = tryDenoise,dbType = dbType, isSemaine=isSemaine) dataloaderV = torch.utils.data.DataLoader(dataset = VD, batch_size = batch_size, shuffle = False) if nSel : G = GeneratorMZ(g_conv_dim, 0, 1,use_skip,useLatent) D = DiscriminatorMZR(image_size, d_conv_dim, c_dim, 4,inputC=inputC) else : G = GeneratorM(g_conv_dim, 0, 1,use_skip,useLatent) D = DiscriminatorM(image_size, d_conv_dim, c_dim, 6) print_network(G, 'G') print_network(D, 'D') if toLoadModel : print('Loading models from iterations : ',resume_iters) G_path = os.path.join(curDir+model_save_dir, '{}G-{}.ckpt'.format(additionName,resume_iters)) D_path = os.path.join(curDir+model_save_dir, '{}D-{}.ckpt'.format(additionName,resume_iters)) G.load_state_dict(torch.load(G_path, map_location=lambda storage, loc: storage),strict=False) D.load_state_dict(torch.load(D_path, map_location=lambda storage, loc: storage),strict=False) G.to(device) D.to(device) listValO = [] listAroO = [] listValL = [] listAroL = [] a = 0 b = 1 iterator = 0 tvo = [];tao=[];tvl = []; tal = []; anyDiffer = False print('length : ',len(dataloaderV)) for x,(data) in enumerate(dataloaderV,0) : if trainQuadrant: rinputs, rlabels,rldmrk = data[0],data[5],data[2] else : rinputs, rlabels,rldmrk = data[0],data[1],data[2] #for real_batch,va,gt,M,ln,q,noisy_batch,weight in (dataloader) : fNames = data[4] G.train() D.train() inputs = rinputs.cuda()#to(device) labels = rlabels.cuda()#to(device) with torch.set_grad_enabled(False) : inputsM,z = G(inputs,returnInter = True) _, outputs = D(inputsM) if trainQuadrant: if alterQuadrant : outputs = torch.round(outputs) else : _,outputs = torch.max(outputs,1) print('inside ') if trainQuadrant : print(x,',',int(truediv(len(VD),batch_size)),outputs[:2], labels[:2],outputs.shape) else : print(x,',',int(truediv(len(VD),batch_size)),outputs[:2], labels[:2],outputs[:,0].shape[0],outputs.shape) #print(outputs.shape) print(z.shape) zSave = z.cpu().numpy() qSave = outputs.cpu().numpy() combine = True #now saving the results individually for fname,features,va in zip(fNames, zSave,qSave): iterator+=1 #first inspect the dir dirName, fName = os.path.split(fname) fName = fName.split('.')[0]+'.npz' listDir = dirName.split('/') listDir[-1] = 'FT-'+additionName+'z' dirTgt = '/'.join(listDir) if not toDelete : checkDirMake(dirTgt) #va = np.array([5,-5]) #print(va) if not isSewa: q = toQuadrant(va, -10, 10, False) else : q = toQuadrant(va, 0, 1, False) #print(q) if combine : tmp=np.zeros((1,features.shape[1],features.shape[2]),np.float32)+q features=np.concatenate((features,tmp),0) print(tmp[0,0,:2]) print(fname, features.shape) if os.path.isdir(dirTgt) and toDelete: # and isSewa or False: print('removing : ',dirTgt) #os.remove(os.path.join(dirTgt,fNameOri)) #exit(0) shutil.rmtree(dirTgt) #print(dirTgt, fName) vaq = np.array([va[0],va[1],q]) #print('vaq : ',vaq) if not toDelete :#not os.path.isfile(os.path.join(dirTgt,fName)) : #np.save(os.path.join(dirTgt,fName),features) np.savez(os.path.join(dirTgt,fName),z=features,vaq=vaq) #exit(0) #np.save('testing.npy',zSave) #exit(0) if not trainQuadrant : shape = outputs[:,0].shape[0] else : shape = outputs.shape[0] if shape != batch_size : #in case the batch size is differ, usually at end of iter anyDiffer = True print('differ') if trainQuadrant: tvo.append(outputs.detach().cpu()) tao.append(outputs.detach().cpu()) tvl.append(labels.detach().cpu()) tal.append(labels.detach().cpu()) else : tvo.append(outputs[:,a].detach().cpu()) tao.append(outputs[:,b].detach().cpu()) tvl.append(labels[:,a].detach().cpu()) tal.append(labels[:,b].detach().cpu()) else : print('equal') if trainQuadrant : listValO.append(outputs.detach().cpu()) listAroO.append(outputs.detach().cpu()) listValL.append(labels.detach().cpu()) listAroL.append(labels.detach().cpu()) else : listValO.append(outputs[:,a].detach().cpu()) listAroO.append(outputs[:,b].detach().cpu()) listValL.append(labels[:,a].detach().cpu()) listAroL.append(labels[:,b].detach().cpu()) if len(listValO) > 0 : est_V = np.asarray(torch.stack(listValO)).flatten() est_A = np.asarray(torch.stack(listAroO)).flatten() gt_V = np.asarray(torch.stack(listValL)).flatten() gt_A = np.asarray(torch.stack(listAroL)).flatten() if anyDiffer : est_Vt = np.asarray(torch.stack(tvo)).flatten() est_At = np.asarray(torch.stack(tao)).flatten() gt_Vt = np.asarray(torch.stack(tvl)).flatten() gt_At = np.asarray(torch.stack(tal)).flatten() #now concatenate if len(listValO) > 0 : est_V = np.concatenate((est_V,est_Vt)) est_A = np.concatenate((est_A,est_At)) gt_V = np.concatenate((gt_V,gt_Vt)) gt_A = np.concatenate((gt_A,gt_At)) else : est_V,est_A,gt_V,gt_A = est_Vt,est_At,gt_Vt,gt_At print(est_V.shape, gt_V.shape) mseV = calcMSE(est_V, gt_V) mseA = calcMSE(est_A, gt_A) corV = calcCOR(est_V, gt_V) corA = calcCOR(est_A, gt_A) iccV = calcICC(est_V, gt_V) iccA = calcICC(est_A, gt_A) cccV = calcCCC(est_V, gt_V) cccA = calcCCC(est_A, gt_A) iccV2 = calcCCC(gt_V, gt_V) iccA2 = calcCCC(gt_A, gt_A) print('MSEV : ',mseV, ', CORV : ',corV,', CCCV : ',cccV,', ICCV : ',iccV) print('MSEA : ',mseA, ', CORA : ',corA,', CCCA : ',cccA,', ICCA : ',iccA)
x_deq = (x_q - offset) * (2**pos * scale) return x_deq for i in range(1): data = torch.rand((3, 4)) pos, scale = cnnl_quant_param(data, 8) print(pos, scale) pos2, scale2, o = quant_param_offset(data, 8) print(pos2, scale2, o) (pos, scale, offset) = get_quanz_param(data, 8) print("pos:", pos) print("scale:", scale) print("offset:", offset) t = data * scale d_q = torch.round(t / torch.pow(torch.tensor([2.0]), pos)) print("==================") print(data) # print(d_q) print("==================") x_q = quanz_data(data, pos, scale, offset, 8) print("x_q:", x_q) x_deq = dequanz_data(x_q, pos, scale, offset) print("x_deq:", x_deq) print(scale * scale2)
def train_w_gdc_adl(): #training g and d on standard l2 loss split = args.split isSewa = args.sewa isSemaine = args.semaine modelExist = True toLoadModel = True resume_iters=args.resume_iters#89 GName = None;#"AF0-0-16-16-Den-UA-G-429.ckpt" DName = None;#"AF0-0-16-16-Den-UA-D-429.ckpt" use_skip = args.useSkip useLatent = args.useLatent tryDenoise = args.tryDenoise addLoss = args.addLoss useWeight = args.useWeightNormalization singleTask = args.singleTask trainQuadrant = args.trainQuadrant alterQuadrant = args.alterQuadrant nSel = args.nSel #curDir = "/home/deckyal/eclipse-workspace/FaceTracking/" c_dim=2 image_size=128 g_conv_dim=args.gConv d_conv_dim=args.dConv lambda_cls=1 lambda_rec=10 lambda_gp=10 inputC = 3#input channel for discriminator visEvery = 5 saveEvery = 10 # Training configuration. dataset='CelebA' #, choices=['CelebA', 'RaFD', 'Both']) batch_size=args.batch_size#50#40#70#20 #, help='mini-batch size') num_iters=200000 #, help='number of total iterations for training D') num_iters_decay=100000 #, help='number of iterations for decaying lr') g_lr=0.0001 #, help='learning rate for G') d_lr=0.0001 #, help='learning rate for D') n_critic=5 #, help='number of D updates per each G update') beta1=0.5 #, help='beta1 for Adam optimizer') beta2=0.999 #, help='beta2 for Adam optimizer') #selected_attrs=['Black_Hair', 'Blond_Hair', 'Brown_Hair', 'Male', 'Young'] #', '--list', nargs='+', help='selected attributes for the CelebA dataset',default=['Black_Hair', 'Blond_Hair', 'Brown_Hair', 'Male', 'Young']) isVideo = False seq_length = 2 # Test configuration. test_iters=200000 #, help='test model from this step') # Miscellaneous. num_workers=1 log_dir='stargan/logs' model_save_dir='stargan/models' sample_dir='stargan/samples-g_adl' result_dir='stargan/results' # Step size. log_step=20 sample_step=5#1000 model_save_step=10 lr_update_step=100#1000 #model_save_step=10000 #lr_update_step=1000 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') if not os.path.exists(model_save_dir): os.makedirs(model_save_dir) multi_gpu = args.multi_gpu testSplit = split print("Test split " , testSplit) nSplit = 5 listSplit = [] for i in range(nSplit): if i!=testSplit: listSplit.append(i) print(listSplit) if isSemaine: isSewa = 0 if not isSewa : if not isSemaine : d_name = 'AFEW-VA-Fixed' additionName = "AF"+str(split)+"-" else : d_name = 'Sem-Short' additionName = "SEM"+str(split)+"-" dbType = 0 else : d_name = 'SEWA' dbType = 1 additionName = "SW"+str(split)+"-" additionName+=(str(nSel)+'-') additionName+=(str(g_conv_dim)+'-') additionName+=(str(d_conv_dim)+'-') if trainQuadrant : if alterQuadrant : additionName+="QDAL-" c_dim = 1 else : additionName+="QD-" c_dim = 4 if tryDenoise : additionName+="Den-" transform =transforms.Compose([ #transforms.Resize((image_size,image_size)), #transforms.CenterCrop(image_size), transforms.ToTensor(), transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]) ]) #AFEW-VA-Small ID = SEWAFEWReduced([d_name], None, True, image_size, transform, False, True, 1,split=True, nSplit = nSplit ,listSplit=listSplit ,isVideo=isVideo, seqLength = seq_length, returnQuadrant=trainQuadrant, returnNoisy = tryDenoise,dbType = dbType, returnWeight = useWeight,isSemaine = isSemaine) #ID = AFEWVA([d_name], None, True, image_size, transform, False, True, 1,split=True, nSplit = nSplit ,listSplit=listSplit # ,isVideo=isVideo, seqLength = seq_length, returnQuadrant=trainQuadrant, returnNoisy = tryDenoise,dbType = dbType,returnWeight = useWeight) dataloader = torch.utils.data.DataLoader(dataset = ID, batch_size = batch_size, shuffle = True,worker_init_fn=worker_init_fn) VD = SEWAFEWReduced([d_name], None, True, image_size, transform, False, False, 1,split=True, nSplit = nSplit,listSplit=[testSplit] ,isVideo=isVideo, seqLength = seq_length, returnQuadrant=trainQuadrant, returnNoisy = tryDenoise,dbType = dbType, isSemaine = isSemaine) #VD = AFEWVA([d_name], None, True, image_size, transform, False, False, 1,split=True, nSplit = nSplit,listSplit=[testSplit] # ,isVideo=isVideo, seqLength = seq_length, returnNoisy = tryDenoise,dbType = dbType) dataloaderV = torch.utils.data.DataLoader(dataset = VD, batch_size = batch_size, shuffle = False) #Build model """Create a generator and a discriminator.""" if nSel : G = GeneratorMZ(g_conv_dim, 0, 1,use_skip,useLatent) D = DiscriminatorMZR(image_size, d_conv_dim, c_dim, 4,inputC=inputC) C = CombinerSeqAtt(image_size, d_conv_dim, c_dim, 4,64,512,1,batch_size,useCH=True) else : G = GeneratorM(g_conv_dim, 0, 1,use_skip,useLatent) D = DiscriminatorM(image_size, d_conv_dim, c_dim, 6) C = CombinerSeqAtt(image_size, d_conv_dim, c_dim, 4,64,512,1,batch_size,useCH=True) print_network(G, 'G') print_network(D, 'D') if toLoadModel : print('Loading models from iterations : ',resume_iters) if modelExist : additionName+='UA-' if GName is None : G_path = os.path.join(curDir+model_save_dir, '{}G-{}.ckpt'.format(additionName,resume_iters)) D_path = os.path.join(curDir+model_save_dir, '{}D-{}.ckpt'.format(additionName,resume_iters)) C_path = os.path.join(curDir+model_save_dir, '{}C-{}.ckpt'.format(additionName,resume_iters)) else : G_path = os.path.join(curDir+model_save_dir, GName) D_path = os.path.join(curDir+model_save_dir, DName) C_path = os.path.join(curDir+model_save_dir, DName) print('loading ',G_path) print('loading ',D_path) G.load_state_dict(torch.load(G_path, map_location=lambda storage, loc: storage)) D.load_state_dict(torch.load(D_path, map_location=lambda storage, loc: storage)) C.load_state_dict(torch.load(D_path, map_location=lambda storage, loc: storage)) if not modelExist: additionName+='UA-' else : print('Initiating models') G.apply(weights_init_uniform_rule) D.apply(weights_init_uniform_rule) save_name = additionName+str(testSplit) err_file = curDir+save_name+".txt" print('err file : ',err_file) g_optimizer = torch.optim.Adam(G.parameters(), g_lr, [beta1, beta2]) d_optimizer = torch.optim.Adam(D.parameters(), d_lr, [beta1, beta2]) c_optimizer = torch.optim.Adam(C.parameters(), d_lr, [beta1, beta2]) G.to(device) D.to(device) C.to(device) if multi_gpu: G = nn.DataParallel(G) D = nn.DataParallel(D) # Set data loader. data_loader = dataloader if not trainQuadrant or (alterQuadrant): criterion = nn.MSELoss() else : criterion = nn.CrossEntropyLoss() #F.cross_entropy(logit, target) # Fetch fixed inputs for debugging. data = next(iter(dataloader)) x_fixed, rlabels,rldmrk,_ = data[0],data[1],data[2],data[3]# x_fixed, c_org if trainQuadrant : if tryDenoise : x_fixed = data[6].cuda() x_target = data[0].cuda() else : if tryDenoise : x_fixed = data[5].cuda() x_target = data[0].cuda() x_fixed = x_fixed.to(device) # Learning rate cache for decaying. d_lr = d_lr start_iters = 0 # Start training. print('Start training...') start_time = time.time() if trainQuadrant : q1 = data[4] f = open(err_file,'w+') f.write("err : ") f.close() lowest_loss = 99999 lMSA,lMSV,lCCV,lCCA,lICA,lICV,lCRA, lCRV, total = 9999,9999,-9999, -9999, -9999, -9999, -9999, -9999, -9999 w,wv,wa = None,None,None for i in range(start_iters, num_iters): random.seed() manualSeed = random.randint(1, 10000) # use if you want new results random.seed(manualSeed) torch.manual_seed(manualSeed) print('Epoch {}/{}'.format(i, num_iters - 1)) print('-'*10) running_loss = 0 G.train() D.train() for x,(data) in enumerate(dataloader,0) : rinputs, rlabels,rldmrk,_ =data[0],data[1],data[2],data[3] if trainQuadrant : if alterQuadrant : quadrant = data[5].float().cuda() else : quadrant = data[5].cuda() if tryDenoise : noisy = data[6].cuda() else : if tryDenoise : noisy = data[5].cuda() if useWeight : w = data[6].cuda() #print(w) wv = w[:,1] wa = w[:,0] else : if useWeight : w = data[5].cuda() #print(w) wv = w[:,1] wa = w[:,0] inputs = rinputs.cuda()#to(device) labels = rlabels.cuda()#to(device) # Compute loss with real images. out_src, out_cls = D(inputs) d_loss_real = - torch.mean(out_src) if not trainQuadrant: if useWeight : d_loss_cls = calcMSET(out_cls,labels,w) #criterion(out_cls, labels) else : d_loss_cls = criterion(out_cls, labels) #classification_loss(out_cls, label_org, dataset) if addLoss : ov,oa,lv,la = out_cls[:,0],out_cls[:,1], labels[:,0], labels[:,1] corV = -calcCORT(ov, lv, wv) corA = -calcCORT(oa, la, wa) cccV = -calcCCCT(ov, lv, wv) cccA = -calcCCCT(oa, la, wa) iccV = -calcICCT(ov, lv, wv) iccA = -calcICCT(oa, la, wa) d_loss_cls = d_loss_cls + corV+corA +cccV+cccA+iccV+iccA else : #print('q ',quadrant) #print(out_cls.shape, quadrant.shape ) if alterQuadrant : d_loss_cls = criterion(torch.squeeze(out_cls), quadrant) else : d_loss_cls = criterion(out_cls, quadrant) if x%10 == 0 : if not trainQuadrant: print(x,'-',len(dataloader)," Res - label-G : ", out_cls[:3],labels[:3]) else : if alterQuadrant : print(x,'-',len(dataloader)," Res - label-G : ", torch.round(out_cls[:3]),quadrant[:3]) else : print(x,'-',len(dataloader)," Res - label-G : ", torch.max(out_cls[:3],1)[1],quadrant[:3]) # Compute loss with fake images. if tryDenoise : theInput = noisy else : theInput = inputs x_fake = G(theInput) out_src, out_cls = D(x_fake.detach()) d_loss_fake = torch.mean(out_src) # Compute loss for gradient penalty. alpha = torch.rand(theInput.size(0), 1, 1, 1).to(device) x_hat = (alpha * theInput.data + (1 - alpha) * x_fake.data).requires_grad_(True) out_src, _ = D(x_hat) d_loss_gp = gradient_penalty(out_src, x_hat) # Backward and optimize. d_loss = d_loss_real + d_loss_fake + lambda_cls * d_loss_cls + lambda_gp * d_loss_gp #reset_grad() g_optimizer.zero_grad() d_optimizer.zero_grad() d_loss.backward() d_optimizer.step() # Logging. loss = {} loss['D/loss_real'] = d_loss_real.item() loss['D/loss_fake'] = d_loss_fake.item() loss['D/loss_cls'] = d_loss_cls.item() loss['D/loss_gp'] = d_loss_gp.item() ###! Actual training of the generator if (i+1) % n_critic == 0: # Original-to-target domain. if tryDenoise : z,x_fake = G(noisy,returnInter = True) else : z,x_fake = G(inputs) out_src, out_cls = D(x_fake) if x%10 == 0 : print("Res - label-D : ", out_cls[:3],labels[:3]) g_loss_fake = - torch.mean(out_src) if not trainQuadrant: #g_loss_cls = criterion(out_cls, labels) #classification_loss(out_cls, label_org, dataset) if useWeight : g_loss_cls = calcMSET(out_cls,labels,w) #criterion(out_cls, labels) else : g_loss_cls = criterion(out_cls, labels) #classification_loss(out_cls, label_org, dataset) if addLoss : ov,oa,lv,la = out_cls[:,0],out_cls[:,1], labels[:,0], labels[:,1] corV = -calcCORT(ov, lv, wv) corA = -calcCORT(oa, la, wa) cccV = -calcCCCT(lv, lv, wv) cccA = -calcCCCT(oa, la, wa) iccV = -calcICCT(ov, lv, wv) iccA = -calcICCT(oa, la, wa) g_loss_cls = g_loss_cls + corV+corA +cccV+cccA+iccV+iccA else : if alterQuadrant : g_loss_cls = criterion(torch.squeeze(out_cls), quadrant) else : g_loss_cls = criterion(out_cls, quadrant) if not isSewa: q = toQuadrant(out_cls, -10, 10, False) else : q = toQuadrant(out_cls, 0, 1, False) out_c = C(torch.cat((z,q),1)) if useWeight : c_loss = calcMSET(out_cls,labels,w) #criterion(out_cls, labels) else : c_loss = criterion(out_cls, labels) #classification_loss(out_cls, label_org, dataset) if addLoss : ov,oa,lv,la = out_c[:,0],out_c[:,1], labels[:,0], labels[:,1] corV = -calcCORT(ov, lv, wv) corA = -calcCORT(oa, la, wa) cccV = -calcCCCT(lv, lv, wv) cccA = -calcCCCT(oa, la, wa) iccV = -calcICCT(ov, lv, wv) iccA = -calcICCT(oa, la, wa) c_loss = c_loss + corV+corA +cccV+cccA+iccV+iccA # Target-to-original domain. x_reconst = G(x_fake) g_loss_rec = torch.mean(torch.abs(inputs - x_reconst)) # Backward and optimize. g_loss = g_loss_fake + lambda_rec * g_loss_rec + lambda_cls * g_loss_cls #reset_grad() g_optimizer.zero_grad() d_optimizer.zero_grad() c_optimizer.zero_grad() c_loss.backward() g_loss.backward() g_optimizer.step() c_optimizer.step() # Logging. loss['G/loss_fake'] = g_loss_fake.item() loss['G/loss_rec'] = g_loss_rec.item() loss['G/loss_cls'] = g_loss_cls.item() loss['C'] = c_loss.item() ###! Getting the training metrics and samples #running_loss += loss.item() * inputs.size(0) #print("{}/{} loss : {}/{}".format(x,int(len(dataloader.dataset)/batch_size),lossC.item(),lossR.item())) if (i+1) % 10 == 0: et = time.time() - start_time et = str(datetime.timedelta(seconds=et))[:-7] log = "Elapsed [{}], Iteration [{}/{}], Inner {}/{} \n".format(et, i+1, num_iters,x,int(len(dataloader.dataset)/batch_size)) for tag, value in loss.items(): log += ", {}: {:.4f}".format(tag, value) print(log) f = open(err_file,'a') f.write("Elapsed [{}], Iteration [{}/{}], Inner {}/{} \n".format(et, i+1, num_iters,x,int(len(dataloader.dataset)/batch_size))) f.write(log) f.close() # Translate fixed images for debugging. if (i+1) % visEvery == 0: with torch.no_grad(): x_fake_list = [x_fixed] x_concat = G(x_fixed) sample_path = os.path.join(curDir+sample_dir, '{}{}-images-denoised.jpg'.format(i+1,additionName)) save_image(denorm(x_concat.data.cpu()), sample_path, nrow=int(round(batch_size/4)), padding=0) print('Saved real and fake denoised images into {}...'.format(sample_path)) if tryDenoise : x_concat = x_fixed sample_path = os.path.join(curDir+sample_dir, '{}{}-images-original.jpg'.format(i+1,additionName)) save_image(denorm(x_concat.data.cpu()), sample_path, nrow=int(round(batch_size/4)), padding=0) print('Saved real and fake real images into {}...'.format(sample_path)) x_concat = x_target sample_path = os.path.join(curDir+sample_dir, '{}{}-images-groundtruth.jpg'.format(i+1,additionName)) save_image(denorm(x_concat.data.cpu()), sample_path, nrow=int(round(batch_size/4)), padding=0) print('Saved real and fake real images into {}...'.format(sample_path)) # Save model checkpoints. if (i+1) % saveEvery == 0: G_path = os.path.join(curDir+model_save_dir, '{}G-{}.ckpt'.format(additionName,i)) D_path = os.path.join(curDir+model_save_dir, '{}D-{}.ckpt'.format(additionName,i)) C_path = os.path.join(curDir+model_save_dir, '{}C-{}.ckpt'.format(additionName,i)) if multi_gpu : torch.save(G.module.state_dict(), G_path) torch.save(D.module.state_dict(), D_path) torch.save(C.module.state_dict(), C_path) else : torch.save(G.state_dict(), G_path) torch.save(D.state_dict(), D_path) torch.save(C.state_dict(), C_path) print('Saved model checkpoints into {}...'.format(model_save_dir)) print(G_path) # Decay learning rates. if (i+1) % lr_update_step == 0 and (i+1) > 50: g_lr -= (g_lr / float(num_iters_decay)) d_lr -= (d_lr / float(num_iters_decay)) update_lr_ind(d_optimizer,d_lr) update_lr_ind(g_optimizer,g_lr) print ('Decayed learning rates, g_lr: {}, d_lr: {}.'.format(g_lr, d_lr)) epoch_loss = running_loss / len(dataloader.dataset) print('Loss : {:.4f}'.format(epoch_loss)) if i %2 == 0 : if multi_gpu : torch.save(D.module.state_dict(),curDir+'t-models/'+'-D'+save_name) torch.save(G.module.state_dict(),curDir+'t-models/'+'-G'+save_name) torch.save(C.module.state_dict(),curDir+'t-models/'+'-C'+save_name) else : torch.save(D.state_dict(),curDir+'t-models/'+'-D'+save_name) torch.save(G.state_dict(),curDir+'t-models/'+'-G'+save_name) torch.save(G.state_dict(),curDir+'t-models/'+'-C'+save_name) #Deep copy the model_ft if i%5 == 0 :#epoch_loss < lowest_loss : if trainQuadrant : a = 0 b = 0 else : a = 0 b = 1 lowest_loss = lowest_loss print("outp8ut : ",out_cls[0]) print("labels : ",labels[0]) if True : listValO = [] listAroO = [] listValL = [] listAroL = [] tvo = [];tao=[];tvl = []; tal = []; anyDiffer = False for x,(data) in enumerate(dataloaderV,0) : if trainQuadrant: rinputs, rlabels,rldmrk = data[0],data[5],data[2] else : rinputs, rlabels,rldmrk = data[0],data[1],data[2] G.eval() D.eval() C.eval() inputs = rinputs.cuda()#to(device) labels = rlabels.cuda()#to(device) with torch.set_grad_enabled(False) : z,inputsM = G(inputs,returnInter = True) _, outD = D(inputsM) if not isSewa: q = toQuadrant(outD, -10, 10, False) else : q = toQuadrant(outD, 0, 1, False) outputs = C(torch.cat((z,q),1)) if trainQuadrant: if alterQuadrant : outputs = torch.round(outputs) else : _,outputs = torch.max(outputs,1) if trainQuadrant : print(x,',',int(truediv(len(VD),batch_size)),outputs[:2], labels[:2],outputs.shape) else : print(x,',',int(truediv(len(VD),batch_size)),outputs[:2], labels[:2],outputs[:,0].shape[0],outputs.shape) #print(outputs.shape) if not trainQuadrant : shape = outputs[:,0].shape[0] else : shape = outputs.shape[0] if shape != batch_size : #in case the batch size is differ, usually at end of iter anyDiffer = True print('differ') if trainQuadrant: tvo.append(outputs.detach().cpu()) tao.append(outputs.detach().cpu()) tvl.append(labels.detach().cpu()) tal.append(labels.detach().cpu()) else : tvo.append(outputs[:,a].detach().cpu()) tao.append(outputs[:,b].detach().cpu()) tvl.append(labels[:,a].detach().cpu()) tal.append(labels[:,b].detach().cpu()) else : print('equal') if trainQuadrant : listValO.append(outputs.detach().cpu()) listAroO.append(outputs.detach().cpu()) listValL.append(labels.detach().cpu()) listAroL.append(labels.detach().cpu()) else : listValO.append(outputs[:,a].detach().cpu()) listAroO.append(outputs[:,b].detach().cpu()) listValL.append(labels[:,a].detach().cpu()) listAroL.append(labels[:,b].detach().cpu()) est_V = np.asarray(torch.stack(listValO)).flatten() est_A = np.asarray(torch.stack(listAroO)).flatten() gt_V = np.asarray(torch.stack(listValL)).flatten() gt_A = np.asarray(torch.stack(listAroL)).flatten() if anyDiffer : est_Vt = np.asarray(torch.stack(tvo)).flatten() est_At = np.asarray(torch.stack(tao)).flatten() gt_Vt = np.asarray(torch.stack(tvl)).flatten() gt_At = np.asarray(torch.stack(tal)).flatten() #now concatenate est_V = np.concatenate((est_V,est_Vt)) est_A = np.concatenate((est_A,est_At)) gt_V = np.concatenate((gt_V,gt_Vt)) gt_A = np.concatenate((gt_A,gt_At)) print(est_V.shape, gt_V.shape) mseV = calcMSE(est_V, gt_V) mseA = calcMSE(est_A, gt_A) corV = calcCOR(est_V, gt_V) corA = calcCOR(est_A, gt_A) iccV = calcICC(est_V, gt_V) iccA = calcICC(est_A, gt_A) cccV = calcCCC(est_V, gt_V) cccA = calcCCC(est_A, gt_A) iccV2 = calcCCC(gt_V, gt_V) iccA2 = calcCCC(gt_A, gt_A) if lMSA > mseA : lMSA = mseA if lMSV > mseV : lMSV = mseV if corA > lCRA : lCRA = corA if corV > lCRV : lCRV = corV if cccA > lCCA : lCCA = cccA if cccV > lCCV : lCCV = cccV if iccA > lICA : lICA = iccA if iccV > lICV : lICV = iccV if (corA+corV+cccA+cccV+iccA+iccV) > total : total = (corA+corV+cccA+cccV+iccA+iccV) G_path = os.path.join(curDir+model_save_dir, '{}G-best-{}.ckpt'.format(additionName,i)) D_path = os.path.join(curDir+model_save_dir, '{}D-best-{}.ckpt'.format(additionName,i)) #G_path = os.path.join(curDir+model_save_dir, '{}{}-G-adl-best.ckpt'.format(i+1,additionName)) #D_path = os.path.join(curDir+model_save_dir, '{}{}-D-adl-best.ckpt'.format(i+1,additionName)) if multi_gpu : torch.save(G.module.state_dict(), G_path) torch.save(D.module.state_dict(), D_path) else : torch.save(G.state_dict(), G_path) torch.save(D.state_dict(), D_path) print('Best, MSEA : '+str(lMSA)+', CORA : '+str(lCRA)+', CCCA : '+str(lCCA)+', ICCA : '+str(lICA)+ ', MSEV : ' +str(lMSV)+ ', CORV : ' +str(lCRV)+', CCCV : '+str(lCCV) +', ICCV : '+str(lICV)+', Total : '+str(total)) print('MSEV : ',mseV, ', CORV : ',corV,', CCCV : ',cccV,', ICCV : ',iccV) print('MSEA : ',mseA, ', CORA : ',corA,', CCCA : ',cccA,', ICCA : ',iccA) f = open(err_file,'a') res = 'MSEV : '+str(mseV)+ ', CORV : ' +str(corV)+', CCCV : '+str(cccV) +', ICCV : '+str(iccV)+' \n ' f.write(res) res = 'MSEA : '+str(mseA)+ ', CORA : '+str(corA) +', CCCA : '+str(cccA) +', ICCA : '+str(iccA)+' \n ' f.write(res) res = 'Best, MSEA : '+str(lMSA)+', CORA : '+str(lCRA)+', CCCA : '+str(lCCA)+', ICCA : '+str(lICA)+ ', MSEV : ' +str(lMSV)+ ', CORV : ' +str(lCRV)+', CCCV : '+str(lCCV) +', ICCV : '+str(lICV)+', Total : '+str(total)+' \n ' f.write(res) f.close() print('Best val Acc: {:4f}'.format(lowest_loss)) pass
def Run_video(model, Fs, seg_results, instance_num): instances = {} for result in seg_results: if len(result[1]): for id in result[1]: if id == 0: continue instances.setdefault(id, 0) instances[id] += 1 instance_num = min(instance_num, len(instances)) instances_ = np.array(ins) seg_result_idx = [i[3] for i in seg_results] instance_idx = 1 b, c, T, h, w = Fs.shape results = [] while True: start_frame_idx = np.argmax( [max(i[2]) if i[2] != [] else 0 for i in seg_results]) start_frame = seg_result_idx[start_frame_idx] start_mask = seg_results[start_frame_idx][0][0].astype(np.uint8) # start_mask = cv2.resize(start_mask, (w, h)) start_mask = torch.from_numpy(start_mask) Es = torch.zeros((b, 1, T, h, w)).float() Es[:, :, start_frame] = start_mask # to_memorize = [int(i) for i in np.arange(start_frame, num_frames, step=Mem_every)] to_memorize = [start_frame] for t in range(start_frame + 1, num_frames): # frames after # memorize pre_key, pre_value = model([Fs[:, :, t - 1], Es[:, :, t - 1]]) pre_key = pre_key.unsqueeze(2) pre_value = pre_value.unsqueeze(2) if t - 1 == start_frame: # the first frame this_keys_m, this_values_m = pre_key, pre_value else: # other frame this_keys_m = torch.cat([keys, pre_key], dim=2) this_values_m = torch.cat([values, pre_value], dim=2) # segment logits, _, _ = model([Fs[:, :, t], this_keys_m, this_values_m]) em = F.softmax(logits, dim=1)[:, 1] # B h w Es[:, 0, t] = em # check solo result pred = torch.round(em.float()) if t in seg_result_idx: idx = seg_result_idx.index(t) this_frame_results = seg_results[idx] masks = this_frame_results[0] ious = [] for mask in masks: mask = mask.astype(np.uint8) mask = torch.from_numpy(mask) iou = get_video_mIoU(pred, mask) ious.append(iou) if ious != []: ious = np.array(ious) reserve = list(range(len(ious))) if sum(ious >= IOU1) >= 1: same_idx = np.argmax(ious) mask = torch.from_numpy(masks[same_idx]) Es[:, 0, t] = mask reserve.remove(same_idx) # if abs(to_memorize[-1] - t) >= TO_MEMORY_MIN_INTERVAL: to_memorize.append(t) reserve_result = [] for n in range(3): reserve_result.append( [this_frame_results[n][i] for i in reserve]) reserve_result.append(this_frame_results[3]) seg_results[idx] = reserve_result # update key and value if t - 1 in to_memorize: keys, values = this_keys_m, this_values_m # to_memorize = [start_frame - int(i) for i in np.arange(0, start_frame + 1, step=Mem_every)] to_memorize = [start_frame] for t in list(range(0, start_frame))[::-1]: # frames before # memorize pre_key, pre_value = model([Fs[:, :, t + 1], Es[:, :, t + 1]]) pre_key = pre_key.unsqueeze(2) pre_value = pre_value.unsqueeze(2) if t + 1 == start_frame: # the first frame this_keys_m, this_values_m = pre_key, pre_value else: # other frame this_keys_m = torch.cat([keys, pre_key], dim=2) this_values_m = torch.cat([values, pre_value], dim=2) # segment logits, _, _ = model([Fs[:, :, t], this_keys_m, this_values_m]) em = F.softmax(logits, dim=1)[:, 1] # B h w Es[:, 0, t] = em # check solo result pred = torch.round(em.float()) if t in seg_result_idx: idx = seg_result_idx.index(t) this_frame_results = seg_results[idx] masks = this_frame_results[0] ious = [] for mask in masks: mask = mask.astype(np.uint8) mask = torch.from_numpy(mask) iou = get_video_mIoU(pred, mask) ious.append(iou) if ious != []: ious = np.array(ious) reserve = list(range(len(ious))) if sum(ious >= IOU1) >= 1: same_idx = np.argmax(ious) mask = torch.from_numpy(masks[same_idx]) Es[:, 0, t] = mask reserve.remove(same_idx) # if abs(to_memorize[-1] - t) >= TO_MEMORY_MIN_INTERVAL: to_memorize.append(t) reserve_result = [] for n in range(3): reserve_result.append( [this_frame_results[n][i] for i in reserve]) reserve_result.append(this_frame_results[3]) seg_results[idx] = reserve_result # update key and value if t + 1 in to_memorize: keys, values = this_keys_m, this_values_m for j in range(3): seg_results[start_frame_idx][j].pop(0) # pred = torch.round(Es.float()) results.append((Es, instance_idx)) instance_idx += 1 return results
def Run_video(model, Fs, seg_results, num_frames, Mem_every=None, model_name='standard'): seg_result_idx = [i[3] for i in seg_results] instance_idx = 1 b, c, T, h, w = Fs.shape results = [] if np.all([len(i[0]) == 0 for i in seg_results]): print('No segmentation result of solo!') pred = torch.zeros((b, 1, T, h, w)).float().cuda() return [(pred, 1)] while True: if np.all([len(i[0]) == 0 for i in seg_results]): print('Run video over!') break if instance_idx > MAX_NUM: print('Max instance number!') break start_frame_idx = np.argmax( [max(i[2]) if i[2] != [] else 0 for i in seg_results]) start_frame = seg_result_idx[start_frame_idx] start_mask = seg_results[start_frame_idx][0][0].astype(np.uint8) # start_mask = cv2.resize(start_mask, (w, h)) start_mask = torch.from_numpy(start_mask).cuda() if model_name in ('enhanced', 'enhanced_motion'): Os = torch.zeros((b, c, int(h / 4), int(w / 4))) first_frame = Fs[:, :, start_frame] first_mask = start_mask.cpu() if len(first_mask.shape) == 2: first_mask = first_mask.unsqueeze(0).unsqueeze(0) elif len(first_mask.shape) == 3: first_mask = first_mask.unsqueeze(0) first_frame = first_frame * first_mask.repeat(1, 3, 1, 1).type( torch.float) for i in range(b): mask_ = first_mask[i] mask_ = mask_.squeeze(0).cpu().numpy().astype(np.uint8) assert np.any(mask_) x, y, w_, h_ = cv2.boundingRect(mask_) patch = first_frame[i, :, y:(y + h_), x:(x + w_)].cpu().numpy() patch = patch.transpose(1, 2, 0) patch = cv2.resize(patch, (int(w / 4), int(h / 4))) patch = patch.transpose(2, 0, 1) patch = torch.from_numpy(patch) Os[i, :, :, :] = patch if model_name == 'varysize': os = [] first_frame = Fs[:, :, start_frame] first_mask = start_mask.cpu() if len(first_mask.shape) == 2: first_mask = first_mask.unsqueeze(0).unsqueeze(0) elif len(first_mask.shape) == 3: first_mask = first_mask.unsqueeze(0) first_frame = first_frame * first_mask.repeat(1, 3, 1, 1).type( torch.float) for i in range(b): mask_ = first_mask[i] mask_ = mask_.squeeze(0).cpu().numpy().astype(np.uint8) assert np.any(mask_) x, y, w_, h_ = cv2.boundingRect(mask_) patch = first_frame[i, :, y:(y + h_), x:(x + w_)].cpu().numpy() Os = torch.zeros((1, c, h_, w_)) patch = patch.transpose(1, 2, 0) patch = patch.transpose(2, 0, 1) patch = torch.from_numpy(patch) Os[0, :, :, :] = patch os.append(Os) Es = torch.zeros((b, 1, T, h, w)).float().cuda() Es[:, :, start_frame] = start_mask # to_memorize = [int(i) for i in np.arange(start_frame, num_frames, step=Mem_every)] to_memorize = [start_frame] for t in range(start_frame + 1, num_frames): # frames after # memorize pre_key, pre_value = model([Fs[:, :, t - 1], Es[:, :, t - 1]]) pre_key = pre_key.unsqueeze(2) pre_value = pre_value.unsqueeze(2) if t - 1 == start_frame: # the first frame this_keys_m, this_values_m = pre_key, pre_value else: # other frame this_keys_m = torch.cat([keys, pre_key], dim=2) this_values_m = torch.cat([values, pre_value], dim=2) # segment if model_name == 'enhanced': logits, _, _ = model( [Fs[:, :, t], Os, this_keys_m, this_values_m]) elif model_name == 'motion': logits, _, _ = model( [Fs[:, :, t], this_keys_m, this_values_m, Es[:, :, t - 1]]) elif model_name == 'aspp': logits, _, _ = model([ Fs[:, :, t], this_keys_m, this_values_m, torch.round(Es[:, :, t - 1]) ]) elif model_name == 'sp': logits, _, _ = model([ Fs[:, :, t], this_keys_m, this_values_m, torch.round(Es[:, :, t - 1]) ]) elif model_name == 'standard': logits, _, _ = model([Fs[:, :, t], this_keys_m, this_values_m]) elif model_name == 'enhanced_motion': logits, _, _ = model([ Fs[:, :, t], Os, this_keys_m, this_values_m, torch.round(Es[:, :, t - 1]) ]) elif model_name == 'varysize': logits, _, _ = model( [Fs[:, :, t], os, this_keys_m, this_values_m]) else: raise NotImplementedError em = F.softmax(logits, dim=1)[:, 1] # B h w Es[:, 0, t] = em # check solo result pred = torch.round(em.float()) if t in seg_result_idx: idx = seg_result_idx.index(t) this_frame_results = seg_results[idx] masks = this_frame_results[0] ious = [] for mask in masks: mask = mask.astype(np.uint8) mask = torch.from_numpy(mask) iou = get_video_mIoU(pred, mask) ious.append(iou) if ious != []: ious = np.array(ious) reserve = list(range(len(ious))) if sum(ious >= IOU1) >= 1: same_idx = np.argmax(ious) mask = torch.from_numpy(masks[same_idx]).cuda() # if get_video_mIoU(mask, torch.round(Es[:, 0, t - 1])) \ # > get_video_mIoU(pred, torch.round(Es[:, 0, t - 1])): Es[:, 0, t] = mask reserve.remove(same_idx) # if abs(to_memorize[-1] - t) >= TO_MEMORY_MIN_INTERVAL: to_memorize.append(t) # for i, iou in enumerate(ious): # if iou >= IOU2: # if i in reserve: # reserve.remove(i) reserve_result = [] for n in range(3): reserve_result.append( [this_frame_results[n][i] for i in reserve]) reserve_result.append(this_frame_results[3]) seg_results[idx] = reserve_result # update key and value if t - 1 in to_memorize: keys, values = this_keys_m, this_values_m # to_memorize = [start_frame - int(i) for i in np.arange(0, start_frame + 1, step=Mem_every)] to_memorize = [start_frame] for t in list(range(0, start_frame))[::-1]: # frames before # memorize pre_key, pre_value = model([Fs[:, :, t + 1], Es[:, :, t + 1]]) pre_key = pre_key.unsqueeze(2) pre_value = pre_value.unsqueeze(2) if t + 1 == start_frame: # the first frame this_keys_m, this_values_m = pre_key, pre_value else: # other frame this_keys_m = torch.cat([keys, pre_key], dim=2) this_values_m = torch.cat([values, pre_value], dim=2) # segment if model_name == 'enhanced': logits, _, _ = model( [Fs[:, :, t], Os, this_keys_m, this_values_m]) elif model_name == 'motion': logits, _, _ = model( [Fs[:, :, t], this_keys_m, this_values_m, Es[:, :, t + 1]]) elif model_name == 'aspp': logits, _, _ = model([ Fs[:, :, t], this_keys_m, this_values_m, torch.round(Es[:, :, t + 1]) ]) elif model_name == 'sp': logits, _, _ = model([ Fs[:, :, t], this_keys_m, this_values_m, torch.round(Es[:, :, t + 1]) ]) elif model_name == 'standard': logits, _, _ = model([Fs[:, :, t], this_keys_m, this_values_m]) elif model_name == 'enhanced_motion': logits, _, _ = model([ Fs[:, :, t], Os, this_keys_m, this_values_m, torch.round(Es[:, :, t + 1]) ]) elif model_name == 'varysize': logits, _, _ = model( [Fs[:, :, t], os, this_keys_m, this_values_m]) else: raise NotImplementedError em = F.softmax(logits, dim=1)[:, 1] # B h w Es[:, 0, t] = em # check solo result pred = torch.round(em.float()) if t in seg_result_idx: idx = seg_result_idx.index(t) this_frame_results = seg_results[idx] masks = this_frame_results[0] ious = [] for mask in masks: mask = mask.astype(np.uint8) mask = torch.from_numpy(mask) iou = get_video_mIoU(pred, mask) ious.append(iou) if ious != []: ious = np.array(ious) reserve = list(range(len(ious))) if sum(ious >= IOU1) >= 1: same_idx = np.argmax(ious) mask = torch.from_numpy(masks[same_idx]).cuda() # if get_video_mIoU(mask, torch.round(Es[:, 0, t + 1])) \ # > get_video_mIoU(pred, torch.round(Es[:, 0, t + 1])): Es[:, 0, t] = mask reserve.remove(same_idx) # if abs(to_memorize[-1] - t) >= TO_MEMORY_MIN_INTERVAL: to_memorize.append(t) # for i, iou in enumerate(ious): # if iou >= IOU2: # if i in reserve: # reserve.remove(i) reserve_result = [] for n in range(3): reserve_result.append( [this_frame_results[n][i] for i in reserve]) reserve_result.append(this_frame_results[3]) seg_results[idx] = reserve_result # update key and value if t + 1 in to_memorize: keys, values = this_keys_m, this_values_m for j in range(3): seg_results[start_frame_idx][j].pop(0) # pred = torch.round(Es.float()) results.append((Es, instance_idx)) instance_idx += 1 return results
def detect(self, image): height = image.shape[0] width = image.shape[1] min_length = min(height, width) min_detection_size = 12 factor = 0.707 # sqrt(0.5) scales = [] m = min_detection_size/self.min_face_size min_length *= m factor_count = 0 while min_length > min_detection_size: scales.append(m*factor**factor_count) min_length *= factor factor_count += 1 # convert cv2 image to torch image = self.cv2Image2Torch(image, self.device) # STAGE 1 with torch.no_grad(): bounding_boxes = [] for scale in scales: # run P-Net on different scales img = torch.nn.functional.interpolate(image, scale_factor=scale, mode='bilinear') output = self.pnet(img) probs = output[1][0,1,:,:] offsets = output[0] # Generate bounding boxes at places where there is probably a face. stride = 2 cell_size = 12 inds = torch.nonzero(probs > self.thresholds[0]) if inds.numel() == 0: continue offsets = offsets[:, :, inds[:,0], inds[:,1]].squeeze(0) score = probs[inds[:,0], inds[:,1]] inds = inds.to(dtype=torch.float) # P-Net is applied to scaled images, so we need to rescale bounding boxes back boxes = torch.cat([ torch.round((stride*inds[:,1] + 1.0)/scale).unsqueeze(0), torch.round((stride*inds[:,0] + 1.0)/scale).unsqueeze(0), torch.round((stride*inds[:,1] + 1.0 + cell_size)/scale).unsqueeze(0), torch.round((stride*inds[:,0] + 1.0 + cell_size)/scale).unsqueeze(0), score.unsqueeze(0), offsets ]) boxes = boxes.transpose(1,0) if boxes is None or boxes.numel() == 0: continue keep = nms(boxes[:, 0:5], 0.5) bounding_boxes.append(boxes[keep]) if not bounding_boxes: return self.empty_float,self.empty_float bounding_boxes = torch.cat(bounding_boxes, dim=0) keep = nms(bounding_boxes[:, 0:5], self.nms_thresholds[0]) if len(keep) == 0: return self.empty_float,self.empty_float bounding_boxes = bounding_boxes[keep] bounding_boxes = calibrate_box(bounding_boxes[:, 0:5], bounding_boxes[:, 5:]) bounding_boxes = convert_to_square(bounding_boxes) bounding_boxes[:, 0:4] = torch.round(bounding_boxes[:, 0:4]) # STAGE 2 img_boxes = get_image_boxes(bounding_boxes, image, size=24) if img_boxes.numel() == 0: return self.empty_float,self.empty_float output = self.rnet(img_boxes) offsets = output[0] # shape [n_boxes, 4] probs = output[1] # shape [n_boxes, 2] keep = torch.nonzero(probs[:, -1] > self.thresholds[1]).view(-1) if len(keep) == 0: return self.empty_float,self.empty_float bounding_boxes = bounding_boxes[keep] bounding_boxes[:, 4] = probs[keep, 1] offsets = offsets[keep] keep = nms(bounding_boxes, self.nms_thresholds[1]) if len(keep) == 0: return self.empty_float,self.empty_float bounding_boxes = bounding_boxes[keep] bounding_boxes = calibrate_box(bounding_boxes, offsets[keep]) bounding_boxes = convert_to_square(bounding_boxes) bounding_boxes[:, 0:4] = torch.round(bounding_boxes[:, 0:4]) # STAGE 3 img_boxes = get_image_boxes(bounding_boxes, image, size=48) if img_boxes.numel() == 0: return self.empty_float,self.empty_float output = self.onet(img_boxes) landmarks = output[0] # shape [n_boxes, 10] offsets = output[1] # shape [n_boxes, 4] probs = output[2] # shape [n_boxes, 2] keep = torch.nonzero(probs[:, 1] > self.thresholds[2]).view(-1) if len(keep) == 0: return self.empty_float,self.empty_float bounding_boxes = bounding_boxes[keep] bounding_boxes[:, 4] = probs[keep, 1] offsets = offsets[keep] landmarks = landmarks[keep] # compute landmark points width = bounding_boxes[:, 2] - bounding_boxes[:, 0] + 1.0 height = bounding_boxes[:, 3] - bounding_boxes[:, 1] + 1.0 xmin, ymin = bounding_boxes[:, 0], bounding_boxes[:, 1] landmarks[:, 0:5] = xmin.unsqueeze(1) + width.unsqueeze(1) * landmarks[:, 0:5] landmarks[:, 5:10] = ymin.unsqueeze(1) + height.unsqueeze(1) * landmarks[:, 5:10] bounding_boxes = calibrate_box(bounding_boxes, offsets) keep = nms(bounding_boxes, self.nms_thresholds[2], min_mode=True) if len(keep) == 0: return self.empty_float,self.empty_float bounding_boxes = bounding_boxes[keep] landmarks = landmarks[keep] return bounding_boxes, landmarks
def vos_inference(): # Model and version if torch.cuda.is_available(): print('using Cuda devices, num:', torch.cuda.device_count()) Testset = TIANCHI_FUSAI(DATA_ROOT, imset='test.txt', target_size=TARGET_SHAPE, with_flip=WITH_FLIP, test_scale=TEST_SCALE) print('Total test videos: {}'.format(len(Testset))) Testloader = data.DataLoader(Testset, batch_size=1, shuffle=False, num_workers=0, pin_memory=True) if not OL: model = nn.DataParallel(MODEL) if torch.cuda.is_available(): model.cuda() model.eval() # turn-off BN print('Loading weights:', MODEL_PATH) model_ = torch.load(MODEL_PATH) if 'state_dict' in model_.keys(): state_dict = model_['state_dict'] else: state_dict = model_ model.load_state_dict(state_dict) else: model = online_learning() model.eval() torch.set_grad_enabled(False) code_name = 'Tianchi fusai' # date = datetime.datetime.strftime(datetime.datetime.now(), '%y%m%d%H%M') print('Start Testing:', code_name) progressbar = tqdm.tqdm(Testloader) for V in progressbar: Fs, info = V if isinstance(Fs, list): b, c, t, h, w = Fs[0].shape else: b, c, t, h, w = Fs.shape seq_name = info['name'][0] ori_shape = info['ori_shape'] target_shape = info['target_shape'] target_shape = (target_shape[0].cpu().numpy()[0], target_shape[1].cpu().numpy()[0]) num_frames = info['num_frames'][0].item() if '_' in seq_name: video_name = seq_name.split('_')[0] else: video_name = seq_name frame_list = VIDEO_FRAMES[video_name] print('[{}]: num_frames: {}'.format(seq_name, num_frames)) if isinstance(Fs, list): result = [] for idx, f in enumerate(Fs): if idx == 1: seg_results = mask_inference(video_name, target_shape, SOLO_INTERVAL, SCORE_THR, hflip=True) elif idx == 2: seg_results = mask_inference(video_name, TEST_SCALE, SOLO_INTERVAL, SCORE_THR, hflip=False) else: seg_results = mask_inference(video_name, target_shape, SOLO_INTERVAL, SCORE_THR, hflip=False) results = Run_video(model, f, seg_results, num_frames, Mem_every=5, model_name=MODEL_NAME) if idx == 1: for i, (es, ins) in enumerate(results): es = es.cpu().detach().numpy() es = es[:, :, :, :, ::-1] es = np.ascontiguousarray(es) es = torch.from_numpy(es).cuda() results[i] = (es, ins) if idx == 2: for i, (es, ins) in enumerate(results): e = torch.empty(b, 1, t, h, w) for f in range(t): e[:, :, f, :, :] = F.interpolate(es[:, :, f, :, :], (h, w)) e = e.cuda() results[i] = (e, ins) result.append(results) results = merge_result(result) else: seg_results = mask_inference(video_name, target_shape, SOLO_INTERVAL, SCORE_THR) results = Run_video(model, Fs, seg_results, num_frames, Mem_every=5, model_name=MODEL_NAME) results = [(torch.round(a), b) for a, b in results] for result in results: pred, instance = result test_path = os.path.join(TMP_PATH, seq_name + '_{}'.format(instance)) if not os.path.exists(test_path): os.makedirs(test_path) for f in range(num_frames): img_E = Image.fromarray(pred[0, 0, f].cpu().numpy().astype(np.uint8)) img_E.putpalette(PALETTE) img_E = img_E.resize(ori_shape[::-1]) img_E.save( os.path.join(test_path, '{}.png'.format(frame_list[f])))
def prepare(feat): B, n_ch, n_voxels = feat.size() n_ch_1 = n_ch + 1 conv_tensor = np.tril(np.ones((n_ch + 1, n_ch + 1)), -1).T conv_tensor += np.diag([-i for i in range(n_ch + 1)]) conv_tensor = conv_tensor[:, 1:] conv_tensor = np.matmul( conv_tensor, np.sqrt(np.diag([1 / (d * (d + 1)) for d in range(1, n_ch + 1)]))) inv_std_dev = np.sqrt(2 / 3.) * (n_ch + 1) conv_tensor *= inv_std_dev conv_tensor = conv_tensor[:, :, np.newaxis] feat = F.conv1d(feat, torch.FloatTensor(conv_tensor).cuda()) feat_v = torch.round(feat / (n_ch + 1)) rem0 = feat_v * (n_ch + 1) index = torch.argsort(feat - rem0, dim=1, descending=True) rank = torch.argsort(index, dim=1, descending=False) rank = rank + torch.sum(feat_v, 1).unsqueeze(1).type( torch.cuda.LongTensor) add_minus = (rank < 0).type( torch.cuda.LongTensor) - (rank > n_ch).type(torch.cuda.LongTensor) add_minus *= (n_ch + 1) rank = rank + add_minus rem0 = rem0 + add_minus.type(torch.cuda.FloatTensor) y = (feat - rem0) / (n_ch + 1) v_sorted = torch.sort(y, dim=1, descending=False)[0] barycentric = v_sorted - torch.cat( [v_sorted[:, -1:] - 1., v_sorted[:, :-1]], 1) canonical = torch.cuda.FloatTensor([[i] * ((n_ch + 1) - i) + [i - (n_ch + 1)] * i for i in range(n_ch + 1)]) def _simple_hash(key): key = key.type(torch.cuda.DoubleTensor) hash_vector = np.floor( np.power(np.iinfo(np.int64).max, 1. / (n_ch + 2))) hash_vector = torch.pow(hash_vector, torch.arange(1, n_ch + 1)) hash_vector = hash_vector.type(torch.DoubleTensor).unsqueeze(0) hash_vector = hash_vector.cuda() if len(key.size()) == 3: hash_vector = hash_vector.unsqueeze(2) return torch.sum( key * hash_vector.repeat(key.size(0), 1, key.size(-1)), 1) if len(key.size()) == 2: return torch.sum(key * hash_vector.repeat(key.size(0), 1), 1) dic_hash_lattice = HashTable(n_ch, torch.cuda.DoubleTensor, 2**30) loc = [None] * (n_ch + 1) loc_hash = [None] * (n_ch + 1) for scit in range(n_ch + 1): loc[scit] = torch.gather( canonical[scit:scit + 1].unsqueeze(2).repeat( rank.size(0), 1, rank.size(2)), 1, rank[:, :-1]) loc[scit] += rem0[:, :-1] loc[scit] = loc[scit] loc_hash[scit] = _simple_hash(loc[scit]) loc[scit] = torch.reshape(loc[scit].permute(0, 2, 1), (-1, n_ch)) dic_hash_lattice.add_values(loc_hash[scit].view(-1), loc[scit]) dic_hash_lattice.filter_values() fused_loc = dic_hash_lattice.export_values() dic_hash_lattice.update_rank() indices = [None] * n_ch_1 blur_neighbours1 = [None] * n_ch_1 blur_neighbours2 = [None] * n_ch_1 default = torch.tensor(0).type(torch.LongTensor).cuda() for dit in range(n_ch_1): offset = [n_ch if i == dit else -1 for i in range(n_ch)] offset = torch.cuda.FloatTensor(offset) blur_neighbours1[dit] = dic_hash_lattice.get_rank( _simple_hash(fused_loc + offset).view(-1))[:, 0] blur_neighbours2[dit] = dic_hash_lattice.get_rank( _simple_hash(fused_loc - offset).view(-1))[:, 0] indices[dit] = dic_hash_lattice.get_rank( loc_hash[dit].view(-1)).view(B, n_voxels) return rank, barycentric, blur_neighbours1, blur_neighbours2, indices
def train(attention_model,train_loader,criterion,optimizer,epochs = 5,use_regularization = False,C=0,clip=False): """ Training code Args: attention_model : {object} model train_loader : {DataLoader} training data loaded into a dataloader optimizer : optimizer criterion : loss function. Must be BCELoss for binary_classification and NLLLoss for multiclass epochs : {int} number of epochs use_regularizer : {bool} use penalization or not C : {int} penalization coeff clip : {bool} use gradient clipping or not Returns: accuracy and losses of the model """ losses = [] accuracy = [] for i in range(epochs): print("Running EPOCH",i+1) total_loss = 0 n_batches = 0 correct = 0 for batch_idx,train in enumerate(train_loader): attention_model.hidden_state = attention_model.init_hidden() x,y = Variable(train[0]),Variable(train[1]) y_pred,att = attention_model(x) #penalization AAT - I if use_regularization: attT = att.transpose(1,2) identity = torch.eye(att.size(1)) identity = Variable(identity.unsqueeze(0).expand(train_loader.batch_size,att.size(1),att.size(1))) penal = attention_model.l2_matrix_norm(att@attT - identity) if not bool(attention_model.type) : #binary classification #Adding a very small value to prevent BCELoss from outputting NaN's correct+=torch.eq(torch.round(y_pred.type(torch.DoubleTensor).squeeze(1)),y).data.sum() if use_regularization: try: loss = criterion(y_pred.type(torch.DoubleTensor).squeeze(1)+1e-8,y) + C * penal/train_loader.batch_size except RuntimeError: raise Exception("BCELoss gets nan values on regularization. Either remove regularization or add very small values") else: loss = criterion(y_pred.type(torch.DoubleTensor).squeeze(1),y) else: correct+=torch.eq(torch.max(y_pred,1)[1],y.type(torch.LongTensor)).data.sum() if use_regularization: loss = criterion(y_pred,y) + (C * penal/train_loader.batch_size).type(torch.FloatTensor) else: loss = criterion(y_pred,y) total_loss+=loss.data optimizer.zero_grad() loss.backward() #gradient clipping if clip: torch.nn.utils.clip_grad_norm(attention_model.parameters(),0.5) optimizer.step() n_batches+=1 print("avg_loss is",total_loss/n_batches) print("Accuracy of the model",correct/(n_batches*train_loader.batch_size)) losses.append(total_loss/n_batches) accuracy.append(correct/(n_batches*train_loader.batch_size)) return losses,accuracy
def sample(self, sample_shape=torch.Size()): shape = self._extended_shape(sample_shape) return torch.round(self.probs.expand(shape))
def multiclass_nms( multi_tubes, # n, 15 multi_scores, # n, 1 + n_cls score_thr, iou_thre, max_num=-1, score_factors=None, # n frame_num=16): """NMS for multi-class tubes. Args: multi_tubes (Tensor): shape (n, #class*4) or (n, 4) multi_scores (Tensor): shape (n, 1+#class) score_thr (float): bbox threshold, bboxes with scores lower than it will not be considered. iou_thre (float): NMS IoU threshold max_num (int): if there are more than max_num bboxes after NMS, only top max_num will be kept. score_factors (Tensor): The factors multiplied to scores before applying NMS frame_num (int): number of frames in input Returns: tuple: (bboxes, labels), tensors of shape (k, 5) and (k, 1). Labels are 0-based. """ num_classes = multi_scores.shape[1] tubes, labels = [], [] nms_op = nms for i in range(1, num_classes): cls_inds = multi_scores[:, i] > score_thr # print('before: ' + str(len(cls_inds))) if not cls_inds.any(): continue # get bboxes and scores of this class _tubes = multi_tubes[cls_inds, :] _scores = multi_scores[cls_inds, i] if score_factors is not None: _scores *= score_factors[cls_inds] pass # do nms in each frame for n_f in range(frame_num): frame_inds = torch.round(_tubes[:, 0]) == n_f if torch.sum(frame_inds) == 0: continue _tubes_single_frame = _tubes[frame_inds] # mid_frame = _bboxes_single_frame[:, 1:5] # cls_dets = torch.cat([mid_frame, _scores[frame_inds, None]], dim=1) # n, 4 + 1 cls_dets = torch.cat( [_tubes_single_frame, _scores[frame_inds, None]], dim=1) # n, 15 + 1 _, inds = nms_op(cls_dets, iou_thre) # cls_dets = _bboxes_single_frame[inds] cls_dets = cls_dets[inds] cls_labels = multi_tubes.new_full((cls_dets.shape[0], ), i - 1, dtype=torch.long) tubes.append(cls_dets) labels.append(cls_labels) if tubes: tubes = torch.cat(tubes) labels = torch.cat(labels) # print('middle: ' + str(len(bboxes))) # ===================================== # bboxes = bboxes[bboxes[:, -1] > score_thr] # ===================================== if tubes.shape[0] > max_num: _, inds = tubes[:, -1].sort(descending=True) inds = inds[:max_num] tubes = tubes[inds] labels = labels[inds] else: tubes = multi_tubes.new_zeros((0, multi_tubes.shape[1] + 1)) labels = multi_tubes.new_zeros((0, ), dtype=torch.long) # print('after: ' + str(len(bboxes))) return tubes, labels
for epoch in range(epochs): start_time = time.time() correct = 0 running_loss = 0 # loss in each epochs model.train() count=0 for tl,train_batch in enumerate(train_loader): optimizer.zero_grad() train_img, train_csv, train_targ = train_batch img,csv,y = train_img.float().to(device), train_csv.to(device), train_targ.to(device) yhat = model(img, csv) loss = criterion(yhat, y.reshape(-1,1).float()) loss.backward() optimizer.step() pred = torch.round(torch.sigmoid(yhat)) # round off sigmoid to obtain predictions correct += (y.squeeze().cpu() ==pred.squeeze().cpu()).sum().item() running_loss += loss.item() train_accuracy = correct / len(train_index) running_loss /= len(train_loader) end_time = time.time() # validating on our validation dataset model.eval() # matrix to store evaluation predictions val_predicts = torch.zeros((len(valid_index), 1), dtype=torch.float32, device=device) # disable gradients, no optimization required for evaluation
def forward(ctx, x, B): ctx.constant = B step = 2 ** B out = torch.round(x * step - 0.5) out = Num2Bit(out, B) return out
def compute_projection(self, depth, camera_to_world, world_to_grid): # compute projection by voxels -> image #print 'camera_to_world', camera_to_world #print 'intrinsic', self.intrinsic #print(world_to_grid) world_to_camera = torch.inverse(camera_to_world) grid_to_world = torch.inverse(world_to_grid) voxel_bounds_min, voxel_bounds_max = self.compute_frustum_bounds(world_to_grid, camera_to_world) voxel_bounds_min = np.maximum(voxel_bounds_min, 0).cuda().float() if depth.is_cuda else np.maximum(voxel_bounds_min, 0).cpu().float() voxel_bounds_max = np.minimum(voxel_bounds_max, self.volume_dims).cuda().float() if depth.is_cuda else np.minimum(voxel_bounds_max, self.volume_dims).cpu().float() # coordinates within frustum bounds # TODO python opt for this part instead of lua/torch opt? lin_ind_volume = torch.arange(0, self.volume_dims[0]*self.volume_dims[1]*self.volume_dims[2], out=torch.LongTensor()) lin_ind_volume = lin_ind_volume.cuda() if depth.is_cuda else lin_ind_volume.cpu() coords = camera_to_world.new(4, lin_ind_volume.size(0)) coords[2] = lin_ind_volume / (self.volume_dims[0]*self.volume_dims[1]) tmp = lin_ind_volume - (coords[2]*self.volume_dims[0]*self.volume_dims[1]).long() coords[1] = tmp / self.volume_dims[0] coords[0] = torch.remainder(tmp, self.volume_dims[0]) coords[3].fill_(1) mask_frustum_bounds = torch.ge(coords[0], voxel_bounds_min[0]) * torch.ge(coords[1], voxel_bounds_min[1]) * torch.ge(coords[2], voxel_bounds_min[2]) mask_frustum_bounds = mask_frustum_bounds * torch.lt(coords[0], voxel_bounds_max[0]) * torch.lt(coords[1], voxel_bounds_max[1]) * torch.lt(coords[2], voxel_bounds_max[2]) if not mask_frustum_bounds.any(): print('error: nothing in frustum bounds') return None lin_ind_volume = lin_ind_volume[mask_frustum_bounds] coords = coords.resize_(4, lin_ind_volume.size(0)) coords[2] = lin_ind_volume / (self.volume_dims[0]*self.volume_dims[1]) tmp = lin_ind_volume - (coords[2]*self.volume_dims[0]*self.volume_dims[1]).long() coords[1] = tmp / self.volume_dims[0] coords[0] = torch.remainder(tmp, self.volume_dims[0]) coords[3].fill_(1) # transform to current frame p = torch.mm(world_to_camera, torch.mm(grid_to_world, coords)) # project into image p[0] = (p[0] * self.intrinsic[0][0]) / p[2] + self.intrinsic[0][2] p[1] = (p[1] * self.intrinsic[1][1]) / p[2] + self.intrinsic[1][2] pi = torch.round(p).long() valid_ind_mask = torch.ge(pi[0], 0) * torch.ge(pi[1], 0) * torch.lt(pi[0], self.image_dims[0]) * torch.lt(pi[1], self.image_dims[1]) if not valid_ind_mask.any(): print('error: no valid image indices') return None valid_image_ind_x = pi[0][valid_ind_mask] valid_image_ind_y = pi[1][valid_ind_mask] valid_image_ind_lin = valid_image_ind_y * self.image_dims[0] + valid_image_ind_x depth_vals = torch.index_select(depth.view(-1), 0, valid_image_ind_lin) depth_mask = depth_vals.ge(self.depth_min) * depth_vals.le(self.depth_max) * torch.abs(depth_vals - p[2][valid_ind_mask]).le(self.voxel_size) if not depth_mask.any(): print('error: no valid depths') return None lin_ind_update = lin_ind_volume[valid_ind_mask] lin_ind_update = lin_ind_update[depth_mask] lin_indices_3d = lin_ind_update.new(self.volume_dims[0]*self.volume_dims[1]*self.volume_dims[2] + 1) #needs to be same size for all in batch... (first element has size) lin_indices_2d = lin_ind_update.new(self.volume_dims[0]*self.volume_dims[1]*self.volume_dims[2] + 1) #needs to be same size for all in batch... (first element has size) lin_indices_3d[0] = lin_ind_update.shape[0] lin_indices_2d[0] = lin_ind_update.shape[0] lin_indices_3d[1:1+lin_indices_3d[0]] = lin_ind_update lin_indices_2d[1:1+lin_indices_2d[0]] = torch.index_select(valid_image_ind_lin, 0, torch.nonzero(depth_mask)[:,0]) num_ind = lin_indices_3d[0] #print '[proj] #ind = ', lin_indices_3d[0] #print '2d', torch.min(lin_indices_2d[1:1+num_ind]), torch.max(lin_indices_2d[1:1+num_ind]) #print '3d', torch.min(lin_indices_3d[1:1+num_ind]), torch.max(lin_indices_3d[1:1+num_ind]) return lin_indices_3d, lin_indices_2d
def shift(x, ceil=True): #TODO: edge case, when x contains 0 if ceil: return 2.**torch.ceil(torch.log2(x)) else: return 2.**torch.round(torch.log2(x))
inputs, Survived = Variable(inputs), Variable(Survived, requires_grad=False) optimizer.zero_grad() outputs = m(inputs) # compute loss and gradients loss = criterion(outputs, Survived) # losses.append(loss) if phase == 'train': loss.backward() # update weights optimizer.step() # statistics running_loss += loss.data[0] * inputs.size(0) running_corrects += torch.sum( torch.round(F.sigmoid(outputs.data)) == Survived.data) epoch_loss = running_loss / dataset_sizes[phase] epoch_acc = running_corrects / dataset_sizes[phase] if epoch and np.mod(epoch + 1, 100) == 0: # print(f'epoch {epoch}') print('epoch:{} {} Loss: {:.4f} Acc: {:.4f}'.format( epoch, phase, epoch_loss, epoch_acc)) # deep copy the model if phase == 'val' and epoch_acc > best_acc: best_acc = epoch_acc best_model_wts = copy.deepcopy(m.state_dict()) time_elapsed = time.time() - since
def quantize(self, input_ri, q_bits): scale = pow(2, q_bits) - 1 output = torch.round(input_ri * scale) / scale return output
def forward(self, x): out = torch.round(x) return out