value_scale = 1 input_type = 'rgb' sample_t_stride = 1 sample_duration = 16 inference_stride = 16 #normalize = get_normalize_method(mean, std, no_mean_norm, no_std_norm) normalize = Normalize(mean, std) spatial_transform = [Resize(sample_size)] if inference_crop == 'center': spatial_transform.append(CenterCrop(sample_size)) if input_type == 'flow': spatial_transform.append(PickFirstChannels(n=2)) spatial_transform.append(ToTensor()) spatial_transform.extend([ScaleValue(value_scale), normalize]) spatial_transform = Compose(spatial_transform) temporal_transform = [] if sample_t_stride > 1: temporal_transform.append(TemporalSubsampling(sample_t_stride)) temporal_transform.append(SlidingWindow(sample_duration, inference_stride)) temporal_transform = TemporalCompose(temporal_transform) # 加载模型 #print('load model begin!') model = generate_model_resnet(1) # 生成resnet模型 #model = torch.load('./save_200.pth') checkpoint = torch.load('./save_200.pth', map_location='cpu') model.load_state_dict(checkpoint['state_dict']) #print(model) model.eval() # 固定batchnorm,dropout等,一定要有
def main_run(dataset, stage, trainDatasetDir, valDatasetDir, stage1_dict, stackSize, out_dir, seqLen, trainBatchSize, valBatchSize, numEpochs, lr1, decay_factor, decay_step, memSize, alphaX, alphaY): if dataset == 'gtea61': num_classes = 61 elif dataset == 'gtea71': num_classes = 71 elif dataset == 'gtea_gaze': num_classes = 44 elif dataset == 'egtea': num_classes = 106 else: print('Dataset not found') sys.exit() model_folder = os.path.join( './', out_dir, 'attConvLSTM', str(seqLen), 'stage' + str(stage)) # Dir for saving models and log files # Create the dir if os.path.exists(model_folder): print('Directory {} exists!'.format(model_folder)) sys.exit() os.makedirs(model_folder) # Log files writer = SummaryWriter(model_folder) train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w') train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w') val_log_loss = open((model_folder + '/val_log_loss.txt'), 'w') val_log_acc = open((model_folder + '/val_log_acc.txt'), 'w') # Data loader normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) spatial_transform = Compose([ Scale(256), RandomHorizontalFlip(), MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224) ]) spatial_transform2 = Compose([Scale((7, 7)), ToTensor()]) vid_seq_train = makeDataset(trainDatasetDir, spatial_transform2, spatial_transform=spatial_transform, sequence=False, numSeg=1, stackSize=stackSize, fmt='.png', seqLen=seqLen) trainInstances = vid_seq_train.__len__() train_loader = torch.utils.data.DataLoader(vid_seq_train, batch_size=trainBatchSize, shuffle=True, num_workers=4, pin_memory=True) if valDatasetDir is not None: vid_seq_val = makeDataset(valDatasetDir, spatial_transform2, spatial_transform=Compose( [Scale(256), CenterCrop(224)]), sequence=False, numSeg=1, stackSize=stackSize, fmt='.png', phase='Test', seqLen=seqLen) valInstances = vid_seq_val.__len__() val_loader = torch.utils.data.DataLoader(vid_seq_val, batch_size=valBatchSize, shuffle=False, num_workers=2, pin_memory=True) train_params = [] if stage == 1: model = attentionModel(num_classes=num_classes, mem_size=memSize) model.train(False) for params in model.parameters(): params.requires_grad = False else: # stage == 2 model = attentionModel(num_classes=num_classes, mem_size=memSize) model.load_state_dict(torch.load(stage1_dict), strict=False) model.train(False) for params in model.parameters(): params.requires_grad = False # for params in model.resNet.layer4[0].conv1.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[0].conv2.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[1].conv1.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[1].conv2.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[2].conv1.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[2].conv2.parameters(): params.requires_grad = True train_params += [params] # for params in model.resNet.fc.parameters(): params.requires_grad = True train_params += [params] model.resNet.layer4[0].conv1.train(True) model.resNet.layer4[0].conv2.train(True) model.resNet.layer4[1].conv1.train(True) model.resNet.layer4[1].conv2.train(True) model.resNet.layer4[2].conv1.train(True) model.resNet.layer4[2].conv2.train(True) model.resNet.fc.train(True) for params in model.lstm_cell.parameters(): params.requires_grad = True train_params += [params] for params in model.classifier.parameters(): params.requires_grad = True train_params += [params] model.lstm_cell.train(True) model.classifier.train(True) model.cuda() loss_fn = nn.CrossEntropyLoss() loss_fn_regression = nn.MSELoss() # Loss function for the regression model optimizer_fn = torch.optim.Adam(train_params, lr=lr1, weight_decay=4e-5, eps=1e-4) optim_scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer_fn, milestones=decay_step, gamma=decay_factor) train_iter = 0 min_accuracy = 0 for epoch in range(numEpochs): epoch_loss = 0 numCorrTrain = 0 x_loss = 0 y_loss = 0 trainSamples = 0 iterPerEpoch = 0 model.lstm_cell.train(True) model.classifier.train(True) writer.add_scalar('lr', optimizer_fn.param_groups[0]['lr'], epoch + 1) if stage == 2: model.resNet.layer4[0].conv1.train(True) model.resNet.layer4[0].conv2.train(True) model.resNet.layer4[1].conv1.train(True) model.resNet.layer4[1].conv2.train(True) model.resNet.layer4[2].conv1.train(True) model.resNet.layer4[2].conv2.train(True) model.resNet.fc.train(True) #for i, (inputs, targets) in enumerate(train_loader): for flowX, flowY, inputs, targets in train_loader: train_iter += 1 iterPerEpoch += 1 optimizer_fn.zero_grad() flowX = flowX.cuda() flowY = flowY.cuda() inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).cuda()) labelVariable = Variable(targets.cuda()) trainSamples += inputs.size(0) output_label, _, flowXprediction, flowYprediction = model( inputVariable) #Reshaping predictions and inputs in order #to correctly regress on the inputs flowXprediction = flowXprediction.view(-1) flowX = torch.reshape(flowX, (-1, )).float() flowYprediction = flowYprediction.view(-1) flowY = torch.reshape(flowY, (-1, )).float() #print(f'Prediction: {flowXprediction.size()}') #print(f'Input : {flowX.size()}') #sys.exit() lossX = alphaX * loss_fn_regression(flowXprediction, flowX) lossY = alphaY * loss_fn_regression(flowYprediction, flowY) loss = loss_fn(output_label, labelVariable) #Weighting the loss of the ss task #by multiplying it by alpha total_loss = loss + lossX + lossY total_loss.backward() optimizer_fn.step() _, predicted = torch.max(output_label.data, 1) numCorrTrain += (predicted == targets.cuda()).sum() x_loss += lossX.item() y_loss += lossY.item() epoch_loss += loss.item() optim_scheduler.step() avg_x_loss = x_loss / iterPerEpoch avg_y_loss = y_loss / iterPerEpoch avg_loss = epoch_loss / iterPerEpoch trainAccuracy = torch.true_divide(numCorrTrain, trainSamples) * 100 print('Train: Epoch = {} | Loss = {} | Accuracy = {}'.format( epoch + 1, avg_loss, trainAccuracy)) print('X loss after {} epoch = {}% '.format(epoch + 1, avg_x_loss)) print('Y loss after {} epoch = {}% '.format(epoch + 1, avg_y_loss)) writer.add_scalar('train/epoch_loss', avg_loss, epoch + 1) writer.add_scalar('train/accuracy', trainAccuracy, epoch + 1) writer.add_scalar('x_train_loss', avg_x_loss, epoch + 1) writer.add_scalar('y_train_loss', avg_y_loss, epoch + 1) train_log_loss.write('Training X loss after {} epoch= {}'.format( epoch + 1, avg_x_loss)) train_log_loss.write('Training Y loss after {} epoch= {}'.format( epoch + 1, avg_y_loss)) train_log_loss.write('Training loss after {} epoch = {}\n'.format( epoch + 1, avg_loss)) train_log_acc.write('Training accuracy after {} epoch = {}\n'.format( epoch + 1, trainAccuracy)) if valDatasetDir is not None: model.train(False) val_loss_epoch = 0 val_iter = 0 val_x_loss = 0 val_y_loss = 0 val_samples = 0 numCorr = 0 mmap_loss = 0 with torch.no_grad(): #for j, (inputs, targets) in enumerate(val_loader): for flowX, flowY, inputs, targets in val_loader: val_iter += 1 val_samples += inputs.size(0) flowX = flowX.cuda() flowY = flowY.cuda() inputVariable = Variable( inputs.permute(1, 0, 2, 3, 4).cuda()) labelVariable = Variable(targets.cuda(async=True)) #labelVariable = Variable(targets.cuda()) output_label, _, flowXprediction, flowYprediction = model( inputVariable) #Reshaping predictions and inputs in order #to correctly regress on the inputs flowXprediction = flowXprediction.view(-1) flowX = torch.reshape(flowX, (-1, )).float() flowYprediction = flowXprediction.view(-1) flowY = torch.reshape(flowX, (-1, )).float() lossX = alphaX * loss_fn_regression(flowXprediction, flowX) lossY = alphaY * loss_fn_regression(flowYprediction, flowY) val_loss = loss_fn(output_label, labelVariable) val_loss_epoch += val_loss.item() val_x_loss += lossX.item() val_y_loss += lossY.item() _, predicted = torch.max(output_label.data, 1) numCorr += (predicted == targets.cuda()).sum() avg_x_val_loss = val_x_loss / val_iter avg_y_val_loss = val_y_loss / val_iter val_accuracy = torch.true_divide(numCorr, val_samples) * 100 avg_val_loss = val_loss_epoch / val_iter print('Val X Loss after {} epochs, loss = {}'.format( epoch + 1, avg_x_val_loss)) print('Val Y Loss after {} epochs, loss = {}'.format( epoch + 1, avg_y_val_loss)) print('Val: Epoch = {} | Loss {} | Accuracy = {}'.format( epoch + 1, avg_val_loss, val_accuracy)) writer.add_scalar('val x/epoch_loss', avg_x_val_loss, epoch + 1) writer.add_scalar('val y/epoch_loss', avg_y_val_loss, epoch + 1) writer.add_scalar('val/epoch_loss', avg_val_loss, epoch + 1) writer.add_scalar('val/accuracy', val_accuracy, epoch + 1) val_log_loss.write('Val X Loss after {} epochs = {}\n'.format( epoch + 1, avg_x_val_loss)) val_log_loss.write('Val Y Loss after {} epochs = {}\n'.format( epoch + 1, avg_y_val_loss)) val_log_loss.write('Val Loss after {} epochs = {}\n'.format( epoch + 1, avg_val_loss)) val_log_acc.write('Val Accuracy after {} epochs = {}%\n'.format( epoch + 1, val_accuracy)) if val_accuracy > min_accuracy: save_path_model = (model_folder + '/model_rgb_state_dict.pth') torch.save(model.state_dict(), save_path_model) min_accuracy = val_accuracy train_log_loss.close() train_log_acc.close() val_log_acc.close() val_log_loss.close() writer.export_scalars_to_json(model_folder + "/all_scalars.json") writer.close()
def get_train_utils(opt, model_parameters): assert opt.train_crop in ['random', 'corner', 'center'] spatial_transform = [] if opt.train_crop == 'random': spatial_transform.append( RandomResizedCrop( opt.sample_size, (opt.train_crop_min_scale, 1.0), (opt.train_crop_min_ratio, 1.0 / opt.train_crop_min_ratio))) elif opt.train_crop == 'corner': scales = [1.0] scale_step = 1 / (2**(1 / 4)) for _ in range(1, 5): scales.append(scales[-1] * scale_step) spatial_transform.append(MultiScaleCornerCrop(opt.sample_size, scales)) elif opt.train_crop == 'center': spatial_transform.append(Resize(opt.sample_size)) spatial_transform.append(CenterCrop(opt.sample_size)) normalize = get_normalize_method(opt.mean, opt.std, opt.no_mean_norm, opt.no_std_norm) if not opt.no_hflip: spatial_transform.append(RandomHorizontalFlip()) if opt.colorjitter: spatial_transform.append(ColorJitter()) spatial_transform.append(ToTensor()) if opt.input_type == 'flow': spatial_transform.append(PickFirstChannels(n=2)) spatial_transform.append(ScaleValue(opt.value_scale)) spatial_transform.append(normalize) spatial_transform = Compose(spatial_transform) assert opt.train_t_crop in ['random', 'center'] temporal_transform = [] if opt.sample_t_stride > 1: temporal_transform.append(TemporalSubsampling(opt.sample_t_stride)) if opt.train_t_crop == 'random': temporal_transform.append(TemporalRandomCrop(opt.sample_duration)) elif opt.train_t_crop == 'center': temporal_transform.append(TemporalCenterCrop(opt.sample_duration)) temporal_transform = TemporalCompose(temporal_transform) train_data = get_training_data(opt.video_path, opt.annotation_path, opt.dataset, opt.input_type, opt.file_type, spatial_transform, temporal_transform) if opt.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler( train_data) else: train_sampler = None train_loader = torch.utils.data.DataLoader(train_data, batch_size=opt.batch_size, shuffle=(train_sampler is None), num_workers=opt.n_threads, pin_memory=True, sampler=train_sampler, worker_init_fn=worker_init_fn) if opt.is_master_node: train_logger = Logger(opt.result_path / 'train.log', ['epoch', 'loss', 'acc', 'lr']) train_batch_logger = Logger( opt.result_path / 'train_batch.log', ['epoch', 'batch', 'iter', 'loss', 'acc', 'lr']) else: train_logger = None train_batch_logger = None if opt.nesterov: dampening = 0 else: dampening = opt.dampening optimizer = SGD(model_parameters, lr=opt.learning_rate, momentum=opt.momentum, dampening=dampening, weight_decay=opt.weight_decay, nesterov=opt.nesterov) assert opt.lr_scheduler in ['plateau', 'multistep'] assert not (opt.lr_scheduler == 'plateau' and opt.no_val) if opt.lr_scheduler == 'plateau': scheduler = lr_scheduler.ReduceLROnPlateau( optimizer, 'min', patience=opt.plateau_patience) else: scheduler = lr_scheduler.MultiStepLR(optimizer, opt.multistep_milestones) return (train_loader, train_sampler, train_logger, train_batch_logger, optimizer, scheduler)
def main_run(model_state_dict, dataset_dir, seqLen, memSize, out_dir): model_folder = os.path.join('./', out_dir, 'attConvLSTMDoubleResnet', str(seqLen)) #dataset = 'gtea61' num_classes = 61 mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] normalize = Normalize(mean=mean, std=std) spatial_transform = Compose( [Scale(256), CenterCrop(224), ToTensor(), normalize]) vid_seq_test = makeDataset(dataset_dir, seqLen=seqLen, fmt='.png', train=False, spatial_transform=spatial_transform, users=['S2']) test_loader = torch.utils.data.DataLoader(vid_seq_test, batch_size=1, shuffle=False, num_workers=2, pin_memory=True) model = attentionDoubleResnet(num_classes=num_classes, mem_size=memSize) model.load_state_dict(torch.load(model_state_dict)) for params in model.parameters(): params.requires_grad = False model.train(False) model.cuda() test_samples = vid_seq_test.__len__() print('Number of samples = {}'.format(test_samples)) print('Evaluating...') numCorr = 0 true_labels = [] predicted_labels = [] with torch.no_grad(): #for j, (inputs, targets) in enumerate(test_loader): for inputs, inputsSN, targets in test_loader: inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).cuda()) inputSNVariable = Variable(inputsSN.permute(1, 0, 2, 3, 4).cuda()) output_label, _ = model(inputVariable, inputSNVariable) _, predicted = torch.max(output_label.data, 1) numCorr += (predicted == targets.cuda()).sum() true_labels.append(targets) predicted_labels.append(predicted.cpu()) test_accuracy = torch.true_divide(numCorr, test_samples) * 100 test_accuracy = 'Test Accuracy = {}%'.format(test_accuracy) print(test_accuracy) fil = open(model_folder + "/test_log_acc.txt", "w") fil.write(test_accuracy) fil.close() cnf_matrix = confusion_matrix(true_labels, predicted_labels).astype(float) cnf_matrix_normalized = cnf_matrix / cnf_matrix.sum(axis=1)[:, np.newaxis] ticks = np.linspace(0, 60, num=61) plt.figure(1, figsize=(12, 12), dpi=100.0) plt.imshow(cnf_matrix_normalized, interpolation='none', cmap='binary') plt.colorbar() plt.xticks(ticks, fontsize=6) plt.yticks(ticks, fontsize=6) plt.grid(True) plt.clim(0, 1) xy = np.arange(start=0, stop=61) plt.plot(xy, xy) plt.savefig(model_folder + '/cnf_matrix_normalized.png', bbox_inches='tight') plt.show()
def main_run(dataset, stage, train_data_dir, val_data_dir, stage1_dict, out_dir, seqLen, trainBatchSize, valBatchSize, numEpochs, lr1, decay_factor, decay_step, memSize, regressor): if dataset == 'gtea61': num_classes = 61 elif dataset == 'gtea71': num_classes = 71 elif dataset == 'gtea_gaze': num_classes = 44 elif dataset == 'egtea': num_classes = 106 else: print('Dataset not found') sys.exit() model_folder = os.path.join('./', out_dir, dataset, 'MS',str(stage)) # Dir for saving models and log files # Create the dir if os.path.exists(model_folder): print('Directory {} exists!'.format(model_folder)) sys.exit() os.makedirs(model_folder) # Log files writer = SummaryWriter(model_folder) train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w') train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w') val_log_loss = open((model_folder + '/val_log_loss.txt'), 'w') val_log_acc = open((model_folder + '/val_log_acc.txt'), 'w') train_log_loss_ms= open((model_folder + '/train_log_loss_ms.txt'), 'w') val_log_loss_ms = open((model_folder + '/val_log_loss_ms.txt'), 'w') train_log_acc_ms= open((model_folder + '/train_log_acc_ms.txt'), 'w') val_log_acc_ms = open((model_folder + '/val_log_acc_ms.txt'), 'w') # Data loader normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) spatial_transform = Compose([Scale(256), RandomHorizontalFlip(), MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224)]) vid_seq_train = makeDataset(train_data_dir, spatial_transform=spatial_transform, seqLen=seqLen, fmt='.png',phase='train', regressor=regressor) train_loader = torch.utils.data.DataLoader(vid_seq_train, batch_size=trainBatchSize, shuffle=True, num_workers=4, pin_memory=True) if val_data_dir is not None: vid_seq_val = makeDataset(val_data_dir, spatial_transform=Compose([Scale(256), CenterCrop(224)]), seqLen=seqLen, fmt='.png',phase='test', regressor=regressor) val_loader = torch.utils.data.DataLoader(vid_seq_val, batch_size=valBatchSize, shuffle=False, num_workers=2, pin_memory=True) valInstances = vid_seq_val.__len__() trainInstances = vid_seq_train.__len__() train_params = [] if stage == 1: model = attentionModel_ml(num_classes=num_classes, mem_size=memSize, regressor=regressor) model.train(False) for params in model.parameters(): params.requires_grad = False else: model = attentionModel_ml(num_classes=num_classes, mem_size=memSize, regressor=regressor) model.load_state_dict(torch.load(stage1_dict),strict=False) model.train(False) for params in model.parameters(): params.requires_grad = False # for params in model.resNet.layer4[0].conv1.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[0].conv2.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[1].conv1.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[1].conv2.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[2].conv1.parameters(): params.requires_grad = True train_params += [params] # for params in model.resNet.layer4[2].conv2.parameters(): params.requires_grad = True train_params += [params] # for params in model.resNet.fc.parameters(): params.requires_grad = True train_params += [params] for params in model.conv.parameters(): params.requires_grad = True train_params += [params] for params in model.clas.parameters(): params.requires_grad = True train_params += [params] model.conv.train(True) model.clas.train(True) model.resNet.layer4[0].conv1.train(True) model.resNet.layer4[0].conv2.train(True) model.resNet.layer4[1].conv1.train(True) model.resNet.layer4[1].conv2.train(True) model.resNet.layer4[2].conv1.train(True) model.resNet.layer4[2].conv2.train(True) model.resNet.fc.train(True) for params in model.lstm_cell.parameters(): params.requires_grad = True train_params += [params] for params in model.classifier.parameters(): params.requires_grad = True train_params += [params] model.lstm_cell.train(True) model.classifier.train(True) model.cuda() loss_fn = nn.CrossEntropyLoss() loss_fms = nn.NLLLoss() loss_reg = nn.MSELoss() optimizer_fn = torch.optim.Adam(train_params, lr=lr1, weight_decay=4e-5, eps=1e-4) optim_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer_fn, milestones=decay_step, gamma=decay_factor) train_iter = 0 min_accuracy = 0 for epoch in range(numEpochs): epoch_loss = 0 numCorrTrain = 0 numCorrTrain_ms = 0 trainSamples = 0 iterPerEpoch = 0 epoch_loss_ms = 0 model.lstm_cell.train(True) model.classifier.train(True) writer.add_scalar('lr', optimizer_fn.param_groups[0]['lr'], epoch+1) if stage == 2: model.conv.train(True) model.clas.train(True) model.resNet.layer4[0].conv1.train(True) model.resNet.layer4[0].conv2.train(True) model.resNet.layer4[1].conv1.train(True) model.resNet.layer4[1].conv2.train(True) model.resNet.layer4[2].conv1.train(True) model.resNet.layer4[2].conv2.train(True) model.resNet.fc.train(True) for i, (inputs ,binary_map, targets) in enumerate(train_loader): train_iter += 1 iterPerEpoch += 1 optimizer_fn.zero_grad() inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).cuda()) labelVariable = Variable(targets.cuda()) trainSamples += inputs.size(0) output_label, output_ms = model(inputVariable) loss = loss_fn(output_label, labelVariable) if stage==2 : loss.backward(retain_graph=True) else: loss.backward() if regressor == 0: binary_map = Variable(binary_map.permute(1, 0, 2, 3, 4).type(torch.LongTensor).cuda()) output_ms = output_ms.view(-1,2) elif regressor == 1: binary_map = Variable(binary_map.permute(1, 0, 2, 3, 4).cuda()) output_ms = output_ms.view(-1) binary_map =binary_map.contiguous().view(-1) if stage==2: if regressor == 1: loss_ms=loss_reg(output_ms, binary_map) loss_ms.backward() epoch_loss_ms+=loss_ms.item() elif regressor == 0: loss_ms=loss_fn(output_ms, binary_map) loss_ms.backward() _, predicted = torch.max(output_ms.data, 1) numCorrTrain_ms += torch.sum(predicted == binary_map.data).data.item() epoch_loss_ms+=loss_ms.item() optimizer_fn.step() _, predicted = torch.max(output_label.data, 1) numCorrTrain += torch.sum(predicted == labelVariable.data).data.item() epoch_loss += loss.item() avg_loss = epoch_loss/iterPerEpoch if stage ==2: trainAccuracy = (numCorrTrain_ms / trainSamples) * 100 avg_loss_ms= epoch_loss_ms/iterPerEpoch #avg_loss = avg_loss + avg_loss_ms train_log_loss_ms.write('Train Loss MS after {} epochs = {}\n'.format(epoch + 1, avg_loss_ms)) if regressor == 0:train_log_acc_ms.write('Train Accuracy after {} epochs = {}%\n'.format(epoch + 1, trainAccuracy)) trainAccuracy = (numCorrTrain / trainSamples) * 100 print('Train: Epoch = {} | Loss = {} | Accuracy = {}'.format(epoch+1, avg_loss, trainAccuracy)) writer.add_scalar('train/epoch_loss', avg_loss, epoch+1) writer.add_scalar('train/accuracy', trainAccuracy, epoch+1) train_log_loss.write('Train Loss after {} epochs = {}\n'.format(epoch + 1, avg_loss)) train_log_acc.write('Train Accuracy after {} epochs = {}%\n'.format(epoch + 1, trainAccuracy)) if val_data_dir is not None: if (epoch+1) % 1 == 0: model.train(False) val_loss_epoch = 0 val_iter = 0 val_samples = 0 numCorr = 0 numCorr_ms = 0 epoch_loss_ms_val=0 for j, (inputs, binary_map, targets) in enumerate(val_loader): val_iter += 1 val_samples += inputs.size(0) inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).cuda(), volatile=True) labelVariable = Variable(targets.cuda(async=True), volatile=True) output_label, output_ms = model(inputVariable) val_loss = loss_fn(output_label, labelVariable) val_loss_epoch += val_loss.item() if regressor == 0: binary_map = Variable(binary_map.permute(1, 0, 2, 3, 4).type(torch.LongTensor).cuda()) output_ms = output_ms.view(-1,2) elif regressor == 1: binary_map = Variable(binary_map.permute(1, 0, 2, 3, 4).cuda()) output_ms = output_ms.view(-1) binary_map =binary_map.contiguous().view(-1) if stage==2: if regressor == 1: loss_ms=loss_reg(output_ms, binary_map) epoch_loss_ms_val+=loss_ms.item() elif regressor == 0: loss_ms=loss_fn(output_ms, binary_map) _, predicted = torch.max(output_ms.data, 1) numCorr_ms += torch.sum(predicted == binary_map.data).data.item() epoch_loss_ms_val+=loss_ms.item() _, predicted = torch.max(output_label.data, 1) numCorr += torch.sum(predicted == labelVariable.data).data.item() avg_val_loss = val_loss_epoch / val_iter if stage ==2: avg_loss_ms= epoch_loss_ms_val/ val_iter val_accuracy = (numCorr_ms / val_samples) * 100 #avg_loss = avg_loss + avg_loss_ms val_log_loss_ms.write('Val Loss MS after {} epochs = {}\n'.format(epoch + 1, avg_loss_ms)) if regressor == 0:val_log_acc_ms.write('Val Accuracy after {} epochs = {}%\n'.format(epoch + 1, val_accuracy)) val_accuracy = (numCorr / val_samples) * 100 print('Val: Epoch = {} | Loss {} | Accuracy = {}'.format(epoch + 1, avg_val_loss, val_accuracy)) writer.add_scalar('val/epoch_loss', avg_val_loss, epoch + 1) writer.add_scalar('val/accuracy', val_accuracy, epoch + 1) val_log_loss.write('Val Loss after {} epochs = {}\n'.format(epoch + 1, avg_val_loss)) val_log_acc.write('Val Accuracy after {} epochs = {}%\n'.format(epoch + 1, val_accuracy)) if val_accuracy > min_accuracy: save_path_model = (model_folder + '/model_ms_state_dict.pth') torch.save(model.state_dict(), save_path_model) min_accuracy = val_accuracy else: if (epoch+1) % 10 == 0: save_path_model = (model_folder + '/model_ms_state_dict_epoch' + str(epoch+1) + '.pth') torch.save(model.state_dict(), save_path_model) train_log_loss.close() train_log_acc.close() val_log_acc.close() val_log_loss.close() train_log_loss_ms.close() val_log_loss_ms.close() writer.export_scalars_to_json(model_folder + "/all_scalars.json") writer.close() optim_scheduler.step()
def main_run(dataset, trainDir, valDir, outDir, stackSize, trainBatchSize, valBatchSize, numEpochs, lr1, decay_factor, decay_step): if dataset == 'gtea61': num_classes = 61 elif dataset == 'gtea71': num_classes = 71 elif dataset == 'gtea_gaze': num_classes = 44 elif dataset == 'egtea': num_classes = 106 else: print('Dataset not found') sys.exit() min_accuracy = 0 model_folder = os.path.join('./', outDir, dataset, 'flow') # Dir for saving models and log files # Create the dir if os.path.exists(model_folder): print('Dir {} exists!'.format(model_folder)) !rm -rf ./experiments #sys.exit() os.makedirs(model_folder) # Log files writer = SummaryWriter(model_folder) train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w') train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w') val_log_loss = open((model_folder + '/val_log_loss.txt'), 'w') val_log_acc = open((model_folder + '/val_log_acc.txt'), 'w') # Data loader normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) spatial_transform = Compose([Scale(256), RandomHorizontalFlip(), MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224), ToTensor(), normalize]) vid_seq_train = makeDataset(trainDir, spatial_transform=spatial_transform, sequence=False, stackSize=stackSize, fmt='.png') train_loader = torch.utils.data.DataLoader(vid_seq_train, batch_size=trainBatchSize, shuffle=True, sampler=None, num_workers=4, pin_memory=True) valInstances=0 if valDir is not None: vid_seq_val = makeDataset(valDir, spatial_transform=Compose([Scale(256), CenterCrop(224), ToTensor(), normalize]), sequence=False, stackSize=stackSize, fmt='.png', phase='Test') val_loader = torch.utils.data.DataLoader(vid_seq_val, batch_size=valBatchSize, shuffle=False, num_workers=2, pin_memory=True) valInstances = vid_seq_val.__len__() trainInstances = vid_seq_train.__len__() print('Number of samples in the dataset: training = {} | validation = {}'.format(trainInstances, valInstances)) model = flow_resnet34(True, channels=2*stackSize, num_classes=num_classes) model.train(True) train_params = list(model.parameters()) model.cuda() loss_fn = nn.CrossEntropyLoss() optimizer_fn = torch.optim.SGD(train_params, lr=lr1, momentum=0.9, weight_decay=5e-4) optim_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer_fn, milestones=decay_step, gamma=decay_factor) train_iter = 0 for epoch in range(numEpochs): optim_scheduler.step() epoch_loss = 0 numCorrTrain = 0 trainSamples = 0 iterPerEpoch = 0 model.train(True) writer.add_scalar('lr', optimizer_fn.param_groups[0]['lr'], epoch+1) for i, (inputs, targets) in enumerate(train_loader): train_iter += 1 iterPerEpoch += 1 optimizer_fn.zero_grad() inputVariable = Variable(inputs.cuda()) labelVariable = Variable(targets.cuda()) trainSamples += inputs.size(0) output_label, _ = model(inputVariable) loss = loss_fn(output_label, labelVariable) loss.backward() optimizer_fn.step() _, predicted = torch.max(output_label.data, 1) numCorrTrain += (predicted == targets.cuda()).sum() epoch_loss += loss.data[0] avg_loss = epoch_loss/iterPerEpoch trainAccuracy = (numCorrTrain / trainSamples) * 100 print('Train: Epoch = {} | Loss = {} | Accuracy = {}'.format(epoch + 1, avg_loss, trainAccuracy)) writer.add_scalar('train/epoch_loss', avg_loss, epoch+1) writer.add_scalar('train/accuracy', trainAccuracy, epoch+1) train_log_loss.write('Training loss after {} epoch = {}\n'.format(epoch+1, avg_loss)) train_log_acc.write('Training accuracy after {} epoch = {}\n'.format(epoch+1, trainAccuracy)) if valDir is not None: if (epoch+1) % 1 == 0: model.train(False) val_loss_epoch = 0 val_iter = 0 val_samples = 0 numCorr = 0 for j, (inputs, targets) in enumerate(val_loader): val_iter += 1 val_samples += inputs.size(0) inputVariable = Variable(inputs.cuda(), volatile=True) labelVariable = Variable(targets.cuda(async=True), volatile=True) output_label, _ = model(inputVariable) val_loss = loss_fn(output_label, labelVariable) val_loss_epoch += val_loss.data[0] _, predicted = torch.max(output_label.data, 1) numCorr += (predicted == targets.cuda()).sum() val_accuracy = (numCorr / val_samples) * 100 avg_val_loss = val_loss_epoch / val_iter print('Validation: Epoch = {} | Loss = {} | Accuracy = {}'.format(epoch + 1, avg_val_loss, val_accuracy)) writer.add_scalar('val/epoch_loss', avg_val_loss, epoch + 1) writer.add_scalar('val/accuracy', val_accuracy, epoch + 1) val_log_loss.write('Val Loss after {} epochs = {}\n'.format(epoch + 1, avg_val_loss)) val_log_acc.write('Val Accuracy after {} epochs = {}%\n'.format(epoch + 1, val_accuracy)) if val_accuracy > min_accuracy: save_path_model = (model_folder + '/model_flow_state_dict.pth') torch.save(model.state_dict(), save_path_model) min_accuracy = val_accuracy else: if (epoch+1) % 10 == 0: save_path_model = (model_folder + '/model_flow_state_dict_epoch' + str(epoch+1) + '.pth') torch.save(model.state_dict(), save_path_model) train_log_loss.close() train_log_acc.close() val_log_acc.close() val_log_loss.close() writer.export_scalars_to_json(model_folder + "/all_scalars.json") writer.close()
print('##########################################') print('####### train') print('##########################################') assert cfg.train_crop in ['random', 'corner', 'center'] if cfg.train_crop == 'random': crop_method = (cfg.scales, cfg.sample_size) elif cfg.train_crop == 'corner': crop_method = MultiScaleCornerCrop(cfg.scales, cfg.sample_size) elif cfg.train_crop == 'center': crop_method = MultiScaleCornerCrop(cfg.scales, cfg.sample_size, crop_positions=['c']) spatial_transform = Compose([ crop_method, RandomHorizontalFlip(), ToTensor(cfg.norm_value), norm_method ]) temporal_transform = TemporalRandomCrop(cfg.sample_duration) target_transform = ClassLabel() training_data = get_training_set(cfg, spatial_transform, temporal_transform, target_transform) train_loader = torch.utils.data.DataLoader(training_data, batch_size=cfg.batch_size, shuffle=True, num_workers=cfg.n_threads, drop_last=True, pin_memory=True) train_logger = Logger(os.path.join(cfg.custom_logdir, 'train.log'), ['epoch', 'loss', 'acc', 'lr']) train_batch_logger = Logger(
def main_run(dataset, flowModel_state_dict, RGBModel_state_dict, dataset_dir, stackSize, seqLen, memSize, numSeg): if dataset == 'gtea61': num_classes = 61 elif dataset == 'gtea71': num_classes = 71 elif dataset == 'gtea_gaze': num_classes = 44 elif dataset == 'egtea': num_classes = 106 mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] normalize = Normalize(mean=mean, std=std) flow_wt = 0.5 testBatchSize = 1 sequence = True spatial_transform = Compose( [Scale(256), CenterCrop(224), ToTensor(), normalize]) vid_seq_test = makeDataset(dataset_dir, spatial_transform=spatial_transform, sequence=sequence, numSeg=numSeg, stackSize=stackSize, fmt='.jpg', phase='Test', seqLen=seqLen) test_loader = torch.utils.data.DataLoader(vid_seq_test, batch_size=testBatchSize, shuffle=False, num_workers=2, pin_memory=True) modelFlow = flow_resnet34(False, channels=2 * stackSize, num_classes=num_classes) modelFlow.load_state_dict(torch.load(flowModel_state_dict)) modelRGBSN = attentionMDoubleResnet(num_classes=num_classes, mem_size=memSize) modelRGBSN.load_state_dict(torch.load(RGBSNModel_state_dict)) for params in modelFlow.parameters(): params.requires_grad = False for params in modelRGBSN.parameters(): params.requires_grad = False modelFlow.train(False) modelRGBSN.train(False) modelFlow.cuda() modelRGBSN.cuda() test_samples = vid_seq_test.__len__() print('Number of samples = {}'.format(test_samples)) print('Evaluating...') numCorrTwoStream = 0 true_labels = [] predicted_labels = [] for j, (inputFlow, inputFrame, inputSN, targets) in enumerate(test_loader): inputVariableFlow = Variable(inputFlow[0].cuda(), volatile=True) inputVariableFrame = Variable(inputFrame.permute(1, 0, 2, 3, 4).cuda(), volatile=True) inputSN = Variable(inputSN.permute(1, 0, 2, 3, 4).cuda(), volatile=True) output_labelFlow, _ = modelFlow(inputVariableFlow) output_labelFrameSN, _ = modelRGBSN(inputVariableFrame, inputVariableSN) output_label_meanFlow = torch.mean(output_labelFlow.data, 0, True) output_label_meanTwoStream = (flow_wt * output_label_meanFlow) + ( (1 - flow_wt) * output_labelFrameSN.data) _, predictedTwoStream = torch.max(output_label_meanTwoStream, 1) numCorrTwoStream += (predictedTwoStream == targets[0]).sum() true_labels.append(targets) predicted_labels.append(predictedTwoStream) test_accuracyTwoStream = (numCorrTwoStream / test_samples) * 100 print('Test Accuracy = {}'.format(test_accuracyTwoStream)) cnf_matrix = confusion_matrix(true_labels, predicted_labels).astype(float) cnf_matrix_normalized = cnf_matrix / cnf_matrix.sum(axis=1)[:, np.newaxis] ticks = np.linspace(0, 60, num=61) plt.imshow(cnf_matrix_normalized, interpolation='none', cmap='binary') plt.colorbar() plt.xticks(ticks, fontsize=6) plt.yticks(ticks, fontsize=6) plt.grid(True) plt.clim(0, 1) plt.savefig(dataset + '-twoStreamDoubleResnet.jpg', bbox_inches='tight') plt.show()
else: norm_method = Normalize(opt.mean, opt.std) if not opt.no_train: assert opt.train_crop in ['random', 'corner', 'center'] if opt.train_crop == 'random': crop_method = MultiScaleRandomCrop(opt.scales, opt.sample_size) elif opt.train_crop == 'corner': crop_method = MultiScaleCornerCrop(opt.scales, opt.sample_size) elif opt.train_crop == 'center': crop_method = MultiScaleCornerCrop( opt.scales, opt.sample_size, crop_positions=['c']) if opt.dataset == 'gtea': spatial_transform = Compose([ crop_method, RandomHorizontalFlip(), ToTensor(opt.norm_value), norm_method, ]) else: spatial_transform = Compose([ crop_method, RandomHorizontalFlip(), RGB2Gray(), ToTensor(opt.norm_value), norm_method, ]) temporal_transform = TemporalRandomCrop(opt.sample_duration) if opt.compress == 'mask': spatio_temporal_transform = Coded(opt.mask_path) elif opt.compress == 'avg': spatio_temporal_transform = Averaged() elif opt.compress == 'one':
def __init__(self, root_path, annotation_path, subset, n_samples_for_each_video=1, spatial_transform=None, temporal_transform=None, target_transform=None, sample_duration=16, modality='rgb', get_loader=get_default_video_loader): if subset == 'training': self.data, self.class_names = make_dataset( root_path, annotation_path, subset, n_samples_for_each_video, sample_duration) # self.val_data, _ = make_dataset( # root_path, annotation_path, 'validation', n_samples_for_each_video, # sample_duration) # self.data += self.val_data else: self.data, self.class_names = make_dataset( root_path, annotation_path, 'testing', n_samples_for_each_video, sample_duration) print('loaded', len(self.data)) self.spatial_transform = spatial_transform self.temporal_transform = temporal_transform self.target_transform = target_transform self.subset = subset self.modality = modality if self.modality == 'flow': self.loader = get_default_video_loader_flow() elif self.modality == 'depth': self.loader = get_default_video_loader_depth() else: self.loader = get_loader() sometimes = lambda aug: iaa.Sometimes(0.3, aug) self.aug_seq = iaa.Sequential([ # iaa.Fliplr(0.5), # sometimes(iaa.MotionBlur(k=2)), # sometimes(iaa.ChangeColorTemperature((1100, 10000))), sometimes( iaa.MultiplyAndAddToBrightness(mul=(0.8, 1.2), add=(-30, 30))), # sometimes(iaa.Affine(scale={'x': (0.8, 1.2), 'y': (0.8, 1.2)}, # translate_percent={"x": (-0.2, 0.2), "y": (-0.2, 0.2)}, # rotate=(-20, 20), # shear=(-10, 10), # cval=(0, 255), # mode=ia.ALL, )), # sometimes(iaa.PerspectiveTransform(scale=(0.01, 0.15))), # sometimes(iaa.AdditiveGaussianNoise(scale=0.05 * 255)), ]) self.aug_seq.to_deterministic() # added by alexhu self.root_path = root_path if self.modality != 'pose': self.to_tensor = Compose(self.spatial_transform.transforms[-2:]) self.spatial_transform.transforms = self.spatial_transform.transforms[: -2]
# if opt.train_crop == 'random': # crop_method = MultiScaleRandomCrop(opt.scales, opt.sample_size) # elif opt.train_crop == 'corner': # crop_method = MultiScaleCornerCrop(opt.scales, opt.sample_size) # elif opt.train_crop == 'center': # crop_method = MultiScaleCornerCrop( # opt.scales, opt.sample_size, crop_positions=['c']) # spatial_transform = Compose([ # crop_method, # RandomHorizontalFlip(), # ToTensor(opt.norm_value), norm_method # ]) spatial_transform = Compose([ ScaleQC(opt.sample_size), CenterCrop(opt.sample_size), RandomHorizontalFlip(), ToTensor(opt.norm_value), norm_method ]) temporal_transform = TemporalRandomCrop(opt.sample_duration) target_transform = ClassLabel() # training_data = get_training_set(opt, spatial_transform, # temporal_transform, target_transform) # train_loader = torch.utils.data.DataLoader( # training_data, # batch_size=opt.batch_size, # shuffle=True, # num_workers=opt.n_threads, # pin_memory=True) train_logger = Logger(os.path.join(opt.result_path, 'train.log'), ['epoch', 'loss', 'acc', 'lr'])
def main_run(dataset, model_state_dict, dataset_dir, stackSize, seqLen, memSize): if dataset == 'gtea61': num_classes = 61 elif dataset == 'gtea71': num_classes = 71 elif dataset == 'gtea_gaze': num_classes = 44 elif dataset == 'egtea': num_classes = 106 mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] normalize = Normalize(mean=mean, std=std) testBatchSize = 1 spatial_transform = Compose( [Scale(256), CenterCrop(224), ToTensor(), normalize]) vid_seq_test = makeDataset(dataset_dir, spatial_transform=spatial_transform, sequence=False, numSeg=1, stackSize=stackSize, fmt='.png', phase='Test', seqLen=seqLen) test_loader = torch.utils.data.DataLoader(vid_seq_test, batch_size=testBatchSize, shuffle=False, num_workers=2, pin_memory=True) model = twoStreamAttentionModel(stackSize=5, memSize=512, num_classes=num_classes) model.load_state_dict(torch.load(model_state_dict)) for params in model.parameters(): params.requires_grad = False classes = sorted( os.listdir( "/content/drive/My Drive/testingGithub/FPAR_project/GTEA61/processed_frames2/train/S1" ))[1:] print(classes) print(len(classes)) model.train(False) model.cuda() test_samples = vid_seq_test.__len__() print('Number of samples = {}'.format(test_samples)) print('Evaluating...') numCorrTwoStream = 0 predicted_labels = [] true_labels = [] with torch.no_grad(): test_preds = get_all_preds(model, test_loader) labels = vid_seq_test.labels predictions = test_preds.argmax(dim=1) cm = confusion_matrix(labels, predictions) plt.figure(figsize=(25, 25)) plot_confusion_matrix(cm, classes)
def main(config): # load model if config.model == 'c3d': model, params = VioNet_C3D(config) elif config.model == 'convlstm': model, params = VioNet_ConvLSTM(config) elif config.model == 'densenet': model, params = VioNet_densenet(config) elif config.model == 'densenet_lean': model, params = VioNet_densenet_lean(config) # default densenet else: model, params = VioNet_densenet_lean(config) # dataset dataset = config.dataset sample_size = config.sample_size stride = config.stride sample_duration = config.sample_duration # cross validation phase cv = config.num_cv # train set crop_method = GroupRandomScaleCenterCrop(size=sample_size) norm = Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) spatial_transform = Compose( [crop_method, GroupRandomHorizontalFlip(), ToTensor(), norm]) temporal_transform = RandomCrop(size=sample_duration, stride=stride) target_transform = Label() train_batch = config.train_batch train_data = VioDB('../VioDB/{}_jpg/'.format(dataset), '../VioDB/{}_jpg{}.json'.format(dataset, cv), 'training', spatial_transform, temporal_transform, target_transform) train_loader = DataLoader(train_data, batch_size=train_batch, shuffle=True, num_workers=4, pin_memory=True) # val set crop_method = GroupScaleCenterCrop(size=sample_size) norm = Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) spatial_transform = Compose([crop_method, ToTensor(), norm]) temporal_transform = CenterCrop(size=sample_duration, stride=stride) target_transform = Label() val_batch = config.val_batch val_data = VioDB('../VioDB/{}_jpg/'.format(dataset), '../VioDB/{}_jpg{}.json'.format(dataset, cv), 'validation', spatial_transform, temporal_transform, target_transform) val_loader = DataLoader(val_data, batch_size=val_batch, shuffle=False, num_workers=4, pin_memory=True) # make dir if not os.path.exists('./pth'): os.mkdir('./pth') if not os.path.exists('./log'): os.mkdir('./log') # log batch_log = Log( './log/{}_fps{}_{}_batch{}.log'.format( config.model, sample_duration, dataset, cv, ), ['epoch', 'batch', 'iter', 'loss', 'acc', 'lr']) epoch_log = Log( './log/{}_fps{}_{}_epoch{}.log'.format(config.model, sample_duration, dataset, cv), ['epoch', 'loss', 'acc', 'lr']) val_log = Log( './log/{}_fps{}_{}_val{}.log'.format(config.model, sample_duration, dataset, cv), ['epoch', 'loss', 'acc']) # prepare criterion = nn.CrossEntropyLoss().to(device) learning_rate = config.learning_rate momentum = config.momentum weight_decay = config.weight_decay optimizer = torch.optim.SGD(params=params, lr=learning_rate, momentum=momentum, weight_decay=weight_decay) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, verbose=True, factor=config.factor, min_lr=config.min_lr) acc_baseline = config.acc_baseline loss_baseline = 1 for i in range(config.num_epoch): train(i, train_loader, model, criterion, optimizer, device, batch_log, epoch_log) val_loss, val_acc = val(i, val_loader, model, criterion, device, val_log) scheduler.step(val_loss) if val_acc > acc_baseline or (val_acc >= acc_baseline and val_loss < loss_baseline): torch.save( model.state_dict(), './pth/{}_fps{}_{}{}_{}_{:.4f}_{:.6f}.pth'.format( config.model, sample_duration, dataset, cv, i, val_acc, val_loss)) acc_baseline = val_acc loss_baseline = val_loss
import torch from torch.utils.data import Dataset from PIL import Image import numpy as np import random import glob import sys from spatial_transforms import (Compose, ToTensor, CenterCrop, Scale, Normalize, MultiScaleCornerCrop, RandomHorizontalFlip) mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] normalize = Normalize(mean=mean, std=std) spatial_transform2 = Compose([Scale((7,7)), ToTensor(), normalize]) def gen_split(root_dir, stackSize): DatasetX = [] DatasetY = [] DatasetF = [] Labels = [] NumFrames = [] #The root directory should be flow_x_processed/train or test for dir_user in sorted(os.listdir(root_dir)): class_id = 0 dir = os.path.join(root_dir, dir_user) for target in sorted(os.listdir(dir)): dir1 = os.path.join(dir, target) insts = sorted(os.listdir(dir1)) if insts != []:
if not opt.no_train: assert opt.train_crop in ['random', 'corner', 'center', 'custom'] if opt.train_crop == 'random': crop_method = MultiScaleRandomCrop(opt.scales, opt.sample_size) elif opt.train_crop == 'corner': crop_method = MultiScaleCornerCrop(opt.scales, opt.sample_size) elif opt.train_crop == 'center': crop_method = MultiScaleCornerCrop(opt.scales, opt.sample_size, crop_positions=['c']) elif opt.train_crop == 'custom': crop_method = RandomSampleCrop(opt.sample_size) clip_transform = None spatial_transform = Compose( [ToTensor(opt.norm_value), ColorJitter(0.05, 0.05), norm_method]) temporal_transform = TemporalRandomCrop( int(opt.sample_duration * opt.t_stride)) training_data = get_training_set(opt, spatial_transform, temporal_transform) train_loader = torch.utils.data.DataLoader(training_data, batch_size=opt.batch_size, shuffle=True, num_workers=opt.n_threads, pin_memory=True) train_logger = Logger(os.path.join(opt.result_path, 'train.log'), ['epoch', 'loss', 'acc', 'lr']) train_batch_logger = Logger(
if not opt.no_train: ##-------------------------------------------------------------------------------------------- if opt.model == 'I3D': assert opt.train_crop in ['random', 'corner', 'center'] if opt.train_crop == 'random': crop_method = MultiScaleRandomCrop([0.875], opt.sample_size) elif opt.train_crop == 'corner': crop_method = MultiScaleCornerCrop(opt.scales, opt.sample_size) elif opt.train_crop == 'center': crop_method = MultiScaleCornerCrop(opt.scales, opt.sample_size, crop_positions=['c']) spatial_transform = Compose([ Scale((256, 256)), crop_method, RandomHorizontalFlip(), ToTensor(opt.norm_value), norm_method ]) temporal_transform = TemporalRandomCrop(opt.sample_duration, 1) elif opt.model == 'resnet_50': assert opt.train_crop in ['random', 'corner', 'center'] if opt.train_crop == 'random': crop_method = MultiScaleRandomCrop( opt.scales, opt.sample_size) # [1, 0.9, 0.875] elif opt.train_crop == 'corner': crop_method = MultiScaleCornerCrop(opt.scales, opt.sample_size) elif opt.train_crop == 'center': crop_method = MultiScaleCornerCrop(opt.scales, opt.sample_size, crop_positions=['c']) spatial_transform = Compose([
def main(): global epochs # Config parser = argparse.ArgumentParser( description="To read EgoGesture Dataset and run through SSAR network") parser.add_argument('--path', default='', help='full path to EgoGesture Dataset') args = parser.parse_args() path = args.path # Setup multiscale random crop scales = [initial_scale] for _ in range(1, n_scales): scales.append(scales[-1] * scale_step) # Setup datasets / dataloaders if do_data_augmentation: train_spatial_transforms = Compose([ MultiScaleRandomCrop(scales, (126, 224)), SpatialElasticDisplacement() ]) else: train_spatial_transforms = transforms.Resize((126, 224)) image_transform_train = Compose([ train_spatial_transforms, transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) image_transform_val = Compose([ transforms.Resize((126, 224)), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) image_transform_test = Compose([ transforms.Resize((126, 224)), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) mask_transform = Compose([train_spatial_transforms, transforms.ToTensor()]) if not do_data_augmentation: image_transform_train = image_transform_val subject_ids_train = [ 3, 4, 5, 6, 8, 10, 15, 16, 17, 20, 21, 22, 23, 25, 26, 27, 30, 32, 36, 38, 39, 40, 42, 43, 44, 45, 46, 48, 49, 50 ] subject_ids_val = [1, 7, 12, 13, 24, 29, 33, 34, 35, 37] subject_ids_test = [2, 9, 11, 14, 18, 19, 28, 31, 41, 47] if mode == 'training': train_dataset = EgoGestDataSequence(path, 'train_dataset', image_transform_train, mask_transform, get_mask=use_mask_loss, subject_ids=subject_ids_train) val_dataset = EgoGestDataSequence(path, 'val_dataset', image_transform_val, mask_transform, get_mask=use_mask_loss, subject_ids=subject_ids_val) # If we're not in training mode then switch the training dataset out with test or validation elif mode == 'validation': train_dataset = EgoGestDataSequence(path, 'val_dataset', image_transform_val, mask_transform, get_mask=use_mask_loss, subject_ids=subject_ids_val) else: train_dataset = EgoGestDataSequence(path, 'val_dataset', image_transform_test, mask_transform, get_mask=use_mask_loss, subject_ids=subject_ids_test) # train_indices, val_indices, test_indices = check_and_split_data(host_name=hostname, # data_folder=path, # dataset_len=len(dataset), # train_fraction=0.6, # validation_fraction=0.2) torch.manual_seed(42) torch.backends.cudnn.deterministic = True train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, num_workers=num_workers, pin_memory=True, shuffle=True, collate_fn=collate_fn_padd) if mode == 'training': val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, num_workers=num_workers, pin_memory=True, shuffle=True, collate_fn=collate_fn_padd) # Init model and load pre-trained weights rnet = resnet.resnet18(False) model = SSAR(ResNet=rnet, input_size=83, number_of_classes=83, batch_size=batch_size, dropout=dropout).cuda() model_weights = './weights/final_weights.pth' state = model.state_dict() loaded_weights = torch.load(model_weights) state.update(loaded_weights) model.load_state_dict(state) # Setup optimizer and loss criterion = torch.nn.CrossEntropyLoss(ignore_index=label_mask_value) criterion = criterion.cuda() if mode == 'training': set_train_mode( model, train=True ) # Need this here so the optimizer has the correct parameters to be trained optimizer = Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=learning_rate) else: optimizer = None # Continue from previous training checkpoint epoch_resume, step_resume, best_val_loss = load_latest( model, results_path, training_mode, optimizer) if not restore_training_variables: epoch_resume = 0 step_resume = 0 best_val_loss = np.inf # Train / test / val setup if mode != 'training': epoch_resume = 0 step_resume = 0 epochs = 1 # old_tensor_set = set() # Accuracy bar plot plt.ion() fig, ax = plt.subplots(nrows=3, ncols=1, figsize=(10, 7), dpi=100) train_acc_bars = ax[0].bar(rel_poses, 0, 1 / accuracy_bins) val_acc_bars = ax[1].bar(rel_poses, 0, 1 / accuracy_bins) train_loss_line, = ax[2].plot([], [], label='Train Loss') val_loss_line, = ax[2].plot([], [], label='Val Loss') train_acc_texts = [ ax[0].text(x, y, "", horizontalalignment='center', verticalalignment='bottom') for x, y in zip(rel_poses, np.ones_like(rel_poses)) ] val_acc_texts = [ ax[1].text(x, y, "", horizontalalignment='center', verticalalignment='bottom') for x, y in zip(rel_poses, np.ones_like(rel_poses)) ] ax[0].set_ylim(0., 1.1) ax[0].set_title('Relative Gesture Position vs Training Accuracy') ax[1].set_ylim(0., 1.1) ax[1].set_title('Relative Gesture Position vs Validation Accuracy') ax[2].legend(loc='best') # loss_text = ax[2].text(0, -0.2, "Loss: ") plt.show() # Setup movie moviewriter for writing accuracy plot over time # moviewriter = FFMpegWriter(fps=1) # moviewriter.setup(fig, os.path.join(results_path, 'accuracy_over_time.mp4'), dpi=100) # Main training loop if mode == 'training': optimizer.zero_grad() train_history = {} val_history = {} patience_counter = 0 for epoch in range(epoch_resume, epochs): # Display info print(f"Epoch: {epoch}") if epoch == epoch_resume and step_resume > 0: print(f"Fast forwarding to train step {step_resume}") # Reset epoch stats train_metrics = {} # Switch to train mode while freezing parts of the model we don't want to train set_train_mode(model, train=True) # Train if mode == 'training': print('Training:') for train_step, batch in enumerate(train_loader): # Advance train_loader to resume training from last checkpointed position (Note: Assumes same batch size) if epoch == epoch_resume and train_step < step_resume: del batch continue # Save model if mode == 'training' and train_step % 100 == 0 and ( train_step != step_resume or epoch != epoch_resume): save_model(model, optimizer, training_mode, epoch, train_step, best_val_loss, results_path) # Do one training train_step (may not actually train_step optimizer if doing gradiant accumulation) loss, batch_correct_count_samples = process_batch(model, train_step, batch, criterion, optimizer, mode=mode) del batch # Update metrics update_metrics(train_metrics, epoch, loss, batch_correct_count_samples) if (train_step + 1) % 10 == 0: # Display metrics print_metrics(train_metrics, train_step) update_accuracy_plot(train_acc_bars, train_acc_texts, train_metrics['accuracy_hist']) # Update train metric history and plots for this epoch update_epoch_history(train_history, train_metrics) update_loss_plot(train_loss_line, train_history) # Validation if mode == 'training': print('Validation:') val_metrics = {} # Switch to evaluation mode for validation set_train_mode(model, train=False) for val_step, batch in enumerate(val_loader): loss, batch_correct_count_samples = process_batch( model, val_step, batch, criterion, optimizer, mode='validation') # Update metrics update_metrics(val_metrics, epoch, loss, batch_correct_count_samples) if (val_step + 1) % 10 == 0: # Display metrics print_metrics(val_metrics, val_step) update_accuracy_plot(val_acc_bars, val_acc_texts, val_metrics['accuracy_hist']) # Update validation metric history and plots update_epoch_history(val_history, val_metrics) update_loss_plot(val_loss_line, val_history) # Early stoping if val_metrics['loss_epoch'] < best_val_loss: best_val_loss = val_metrics['loss_epoch'] patience_counter = 0 save_model(model, optimizer, training_mode, epoch, val_step, best_val_loss, results_path, filename_override='model_best.pth') else: patience_counter += 1 if patience_counter >= early_stoppping_patience: print( f'Validation accuracy did not improve for {patience_counter} epochs, stopping' ) break # Save final model if mode == 'training' and (train_step != step_resume or epoch != epoch_resume): save_model(model, optimizer, training_mode, epoch, train_step, best_val_loss, results_path) print('Done!') plt.ioff() plt.show()
def main_run(dataset, flowModel, rgbModel, stackSize, seqLen, memSize, trainDatasetDir, valDatasetDir, outDir, trainBatchSize, valBatchSize, lr1, numEpochs, decay_step, decay_factor, uniformSampling): # GTEA 61 num_classes = 61 # Train/Validation/Test split train_splits = ["S1", "S3", "S4"] val_splits = ["S2"] directory = trainDatasetDir model_folder = os.path.join( './', outDir, dataset, 'twoStream') # Dir for saving models and log files # Create the dir if os.path.exists(model_folder): print('Dir {} exists!'.format(model_folder)) sys.exit() os.makedirs(model_folder) # Log files train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w') train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w') val_log_loss = open((model_folder + '/val_log_loss.txt'), 'w') val_log_acc = open((model_folder + '/val_log_acc.txt'), 'w') mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] normalize = Normalize(mean=mean, std=std) spatial_transform = Compose([ Scale(256), RandomHorizontalFlip(), MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224), ToTensor(), normalize ]) vid_seq_train = makeDataset(directory, train_splits, spatial_transform=spatial_transform, sequence=False, numSeg=1, stackSize=stackSize, fmt='.png', seqLen=seqLen, uniform_sampling=uniformSampling) train_loader = torch.utils.data.DataLoader(vid_seq_train, batch_size=trainBatchSize, shuffle=True, num_workers=4, pin_memory=True) vid_seq_val = makeDataset(directory, val_splits, spatial_transform=Compose([ Scale(256), CenterCrop(224), ToTensor(), normalize ]), sequence=False, numSeg=1, stackSize=stackSize, fmt='.png', phase='Test', seqLen=seqLen, uniform_sampling=uniformSampling) val_loader = torch.utils.data.DataLoader(vid_seq_val, batch_size=valBatchSize, shuffle=False, num_workers=2, pin_memory=True) valSamples = vid_seq_val.__len__() model = twoStreamAttentionModel(flowModel=flowModel, frameModel=rgbModel, stackSize=stackSize, memSize=memSize, num_classes=num_classes) for params in model.parameters(): params.requires_grad = False model.train(False) train_params = [] for params in model.classifier.parameters(): params.requires_grad = True train_params += [params] for params in model.frameModel.lstm_cell.parameters(): train_params += [params] params.requires_grad = True for params in model.frameModel.resNet.layer4[0].conv1.parameters(): params.requires_grad = True train_params += [params] for params in model.frameModel.resNet.layer4[0].conv2.parameters(): params.requires_grad = True train_params += [params] for params in model.frameModel.resNet.layer4[1].conv1.parameters(): params.requires_grad = True train_params += [params] for params in model.frameModel.resNet.layer4[1].conv2.parameters(): params.requires_grad = True train_params += [params] for params in model.frameModel.resNet.layer4[2].conv1.parameters(): params.requires_grad = True train_params += [params] # for params in model.frameModel.resNet.layer4[2].conv2.parameters(): params.requires_grad = True train_params += [params] # for params in model.frameModel.resNet.fc.parameters(): params.requires_grad = True train_params += [params] base_params = [] for params in model.flowModel.layer4.parameters(): base_params += [params] params.requires_grad = True model.cuda() trainSamples = vid_seq_train.__len__() min_accuracy = 0 loss_fn = nn.CrossEntropyLoss() optimizer_fn = torch.optim.SGD([ { 'params': train_params }, { 'params': base_params, 'lr': 1e-4 }, ], lr=lr1, momentum=0.9, weight_decay=5e-4) optim_scheduler = torch.optim.lr_scheduler.StepLR(optimizer_fn, step_size=decay_step, gamma=decay_factor) train_iter = 0 for epoch in range(numEpochs): epoch_loss = 0 numCorrTrain = 0 iterPerEpoch = 0 model.classifier.train(True) model.flowModel.layer4.train(True) for j, (inputFlow, inputFrame, targets) in enumerate(train_loader): train_iter += 1 iterPerEpoch += 1 optimizer_fn.zero_grad() inputVariableFlow = inputFlow.to(DEVICE) inputVariableFrame = inputFrame.permute(1, 0, 2, 3, 4).to(DEVICE) labelVariable = targets.to(DEVICE) output_label = model(inputVariableFlow, inputVariableFrame) loss = loss_fn(torch.log_softmax(output_label, dim=1), labelVariable) loss.backward() optimizer_fn.step() _, predicted = torch.max(output_label.data, 1) numCorrTrain += (predicted == targets.cuda()).sum() epoch_loss += loss.data.item() avg_loss = epoch_loss / iterPerEpoch trainAccuracy = (numCorrTrain.item() / trainSamples) * 100 print('Average training loss after {} epoch = {} '.format( epoch + 1, avg_loss)) print('Training accuracy after {} epoch = {}% '.format( epoch + 1, trainAccuracy)) train_log_loss.write('Training loss after {} epoch = {}\n'.format( epoch + 1, avg_loss)) train_log_acc.write('Training accuracy after {} epoch = {}\n'.format( epoch + 1, trainAccuracy)) # Validation Phase #if valDatasetDir is not None: if (epoch + 1) % 1 == 0: model.train(False) val_loss_epoch = 0 val_iter = 0 numCorr = 0 for j, (inputFlow, inputFrame, targets) in enumerate(val_loader): val_iter += 1 inputVariableFlow = inputFlow.to(DEVICE) inputVariableFrame = inputFrame.permute(1, 0, 2, 3, 4).to(DEVICE) labelVariable = targets.to(DEVICE) output_label = model(inputVariableFlow, inputVariableFrame) loss = loss_fn(torch.log_softmax(output_label, dim=1), labelVariable) val_loss_epoch += loss.data.item() _, predicted = torch.max(output_label.data, 1) numCorr += (predicted == labelVariable.data).sum() val_accuracy = (numCorr.item() / valSamples) * 100 avg_val_loss = val_loss_epoch / val_iter print('Val Loss after {} epochs, loss = {}'.format( epoch + 1, avg_val_loss)) print('Val Accuracy after {} epochs = {}%'.format( epoch + 1, val_accuracy)) val_log_loss.write('Val Loss after {} epochs = {}\n'.format( epoch + 1, avg_val_loss)) val_log_acc.write('Val Accuracy after {} epochs = {}%\n'.format( epoch + 1, val_accuracy)) if val_accuracy > min_accuracy: save_path_model = (model_folder + '/model_twoStream_state_dict.pth') torch.save(model.state_dict(), save_path_model) min_accuracy = val_accuracy #else: # if (epoch + 1) % 10 == 0: # save_path_model = (model_folder + '/model_twoStream_state_dict_epoch' + str(epoch + 1) + '.pth') # torch.save(model.state_dict(), save_path_model) optim_scheduler.step() train_log_loss.close() train_log_acc.close() val_log_acc.close() val_log_loss.close()
def main(config): if config.model == 'c3d': model, params = C3D(config) elif config.model == 'convlstm': model, params = ConvLSTM(config) elif config.model == 'densenet': model, params = densenet(config) elif config.model == 'densenet_lean': model, params = densenet_lean(config) elif config.model == 'resnext': model, params = resnext(config) else: model, params = densenet_lean(config) dataset = config.dataset sample_size = config.sample_size stride = config.stride sample_duration = config.sample_duration cv = config.num_cv # crop_method = GroupRandomScaleCenterCrop(size=sample_size) crop_method = MultiScaleRandomCrop(config.scales, config.sample_size[0]) # norm = Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) norm = Normalize([114.7748, 107.7354, 99.475], [1, 1, 1]) # spatial_transform = Compose( # [crop_method, # GroupRandomHorizontalFlip(), # ToTensor(1), norm]) spatial_transform = Compose([ RandomHorizontalFlip(), crop_method, ToTensor(config.norm_value), norm ]) # temporal_transform = RandomCrop(size=sample_duration, stride=stride) temporal_transform = TemporalRandomCrop(config.sample_duration, config.downsample) target_transform = Label() train_batch = config.train_batch train_data = RWF2000('/content/RWF_2000/frames/', g_path + '/RWF-2000.json', 'training', spatial_transform, temporal_transform, target_transform, dataset) train_loader = DataLoader(train_data, batch_size=train_batch, shuffle=True, num_workers=4, pin_memory=True) crop_method = GroupScaleCenterCrop(size=sample_size) norm = Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) spatial_transform = Compose([crop_method, ToTensor(), norm]) temporal_transform = CenterCrop(size=sample_duration, stride=stride) target_transform = Label() val_batch = config.val_batch val_data = RWF2000('/content/RWF_2000/frames/', g_path + '/RWF-2000.json', 'validation', spatial_transform, temporal_transform, target_transform, dataset) val_loader = DataLoader(val_data, batch_size=val_batch, shuffle=False, num_workers=4, pin_memory=True) if not os.path.exists('{}/pth'.format(config.output)): os.mkdir('{}/pth'.format(config.output)) if not os.path.exists('{}/log'.format(config.output)): os.mkdir('{}/log'.format(config.output)) batch_log = Log( '{}/log/{}_fps{}_{}_batch{}.log'.format( config.output, config.model, sample_duration, dataset, cv, ), ['epoch', 'batch', 'iter', 'loss', 'acc', 'lr']) epoch_log = Log( '{}/log/{}_fps{}_{}_epoch{}.log'.format(config.output, config.model, sample_duration, dataset, cv), ['epoch', 'loss', 'acc', 'lr']) val_log = Log( '{}/log/{}_fps{}_{}_val{}.log'.format(config.output, config.model, sample_duration, dataset, cv), ['epoch', 'loss', 'acc']) criterion = nn.CrossEntropyLoss().to(device) # criterion = nn.BCELoss().to(device) learning_rate = config.learning_rate momentum = config.momentum weight_decay = config.weight_decay optimizer = torch.optim.SGD(params=params, lr=learning_rate, momentum=momentum, weight_decay=weight_decay, dampening=False, nesterov=False) # optimizer = torch.optim.Adam(params=params, lr = learning_rate, weight_decay= weight_decay) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, verbose=True, factor=config.factor, min_lr=config.min_lr) acc_baseline = config.acc_baseline loss_baseline = 1 for p in range(1, config.num_prune): if p > 0: model = torch.load('{}/pth/prune_{}.pth'.format( config.output, p - 1)) print(f"Prune {p}/{config.num_prune}") params = sum([np.prod(p.size()) for p in model.parameters()]) print("Number of Parameters: %.1fM" % (params / 1e6)) model = prune_model(model) params = sum([np.prod(p.size()) for p in model.parameters()]) print("Number of Parameters: %.1fM" % (params / 1e6)) model.to(config.device) acc_baseline = 0 for i in range(5): train(i, train_loader, model, criterion, optimizer, device, batch_log, epoch_log) val_loss, val_acc = val(i, val_loader, model, criterion, device, val_log) scheduler.step(val_loss) if val_acc > acc_baseline or (val_acc >= acc_baseline and val_loss < loss_baseline): # torch.save( # model.state_dict(), # '{}/pth/prune_{}_{}_fps{}_{}{}_{}_{:.4f}_{:.6f}.pth'.format( # config.output, p, config.model, sample_duration, dataset, cv, i, val_acc, # val_loss)) torch.save(model, '{}/pth/prune_{}.pth'.format(config.output, p))
cf = confusion_matrix(labels, pred).astype(float) cls_cnt = cf.sum(axis=1) cls_hit = np.diag(cf) cls_acc = cls_hit / cls_cnt print(cls_acc) print('Accuracy {:.02f}%'.format(np.mean(cls_acc) * 100)) with open(opt.result_path, 'w') as f: json.dump(test_results, f) if __name__ == '__main__': opt = parse_opts() opt.mean = get_mean() opt.arch = '{}-{}'.format(opt.model_name, opt.model_depth) opt.sample_duration = 16 spatial_transform = Compose([Scale(opt.sample_size), CenterCrop(opt.sample_size), ToTensor(1), Normalize(opt.mean, [1, 1, 1])]) temporal_transform = LoopPadding(opt.sample_duration) data = Video(opt.val_list, spatial_transform=spatial_transform, temporal_transform=temporal_transform, sample_duration=opt.sample_duration, n_samples_for_each_video=0) data_loader = torch.utils.data.DataLoader(data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=True) model, _ = generate_model(opt) model = nn.DataParallel(model, device_ids=opt.gpus).cuda() print('loading model {}'.format(opt.model)) model_data = torch.load(opt.model) assert opt.arch == model_data['arch'] model.load_state_dict(model_data['state_dict']) model.eval() test(data_loader, model, opt)
norm_method = Normalize(opt.mean, [1, 1, 1]) else: norm_method = Normalize(opt.mean, opt.std) if opt.train_crop == 'random': crop_method = MultiScaleRandomCrop(opt.scales, opt.sample_size) elif opt.train_crop == 'corner': crop_method = MultiScaleCornerCrop(opt.scales, opt.sample_size) elif opt.train_crop == 'center': crop_method = MultiScaleCornerCrop(opt.scales, opt.sample_size, crop_positions=['c']) if not opt.no_val: spatial_transform = Compose([ ToTensor(opt.norm_value), norm_method, ]) temporal_transform = LoopPadding(opt.sample_duration) target_transform = ClassLabel() if opt.compress == 'mask': spatio_temporal_transform = None temporal_transform = None elif opt.compress == 'avg': spatio_temporal_transform = Averaged() elif opt.compress == 'one': spatio_temporal_transform = OneFrame() elif opt.compress == 'spatial': spatial_transform = Compose([ LowResolution(opt.spatial_compress_size, use_cv2=opt.use_cv2), ToTensor(opt.norm_value), norm_method,
elif opt.train_crop == 'corner': crop_method = MultiScaleCornerCrop(opt.scales, opt.sample_size) elif opt.train_crop == 'center': crop_method = MultiScaleCornerCrop(opt.scales, opt.sample_size, crop_positions=['c']) # spatial_transform = Compose([ # crop_method, # RandomHorizontalFlip(), # ToTensor(opt.norm_value), norm_method # ]) spatial_transform = Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), #transforms.Resize(224), #transforms.RandomCrop(224), transforms.ToTensor(), #Threshold(200), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) target_transform = ClassLabel() training_data = get_training_set(opt, spatial_transform, target_transform) train_loader = torch.utils.data.DataLoader(training_data, batch_size=opt.batch_size, shuffle=True, num_workers=opt.n_threads, pin_memory=True) train_logger = Logger(os.path.join(opt.result_path, 'train.log'), ['epoch', 'loss', 'acc', 'lr']) train_batch_logger = Logger(
def main_run(numEpochs, lr, stepSize, decayRate, trainBatchSize, seqLen, memSize, evalInterval, evalMode, numWorkers, outDir, fightsDir_train, noFightsDir_train, fightsDir_test, noFightsDir_test): train_dataset_dir_fights = fightsDir_train train_dataset_dir_noFights = noFightsDir_train test_dataset_dir_fights = fightsDir_test test_dataset_dir_noFights = noFightsDir_test trainDataset, trainLabels, trainNumFrames = make_split( train_dataset_dir_fights, train_dataset_dir_noFights) testDataset, testLabels, testNumFrames = make_split( test_dataset_dir_fights, test_dataset_dir_noFights) mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] normalize = Normalize(mean=mean, std=std) spatial_transform = Compose([ Scale(256), RandomHorizontalFlip(), MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224), ToTensor(), normalize ]) vidSeqTrain = makeDataset(trainDataset, trainLabels, trainNumFrames, spatial_transform=spatial_transform, seqLen=seqLen) # trainLoader = torch.utils.data.DataLoader(vidSeqTrain, batch_size=trainBatchSize, # shuffle=True, num_workers=numWorkers, pin_memory=True, drop_last=True) trainLoader = torch.utils.data.DataLoader(vidSeqTrain, batch_size=trainBatchSize, shuffle=True, pin_memory=True, drop_last=True) if evalMode == 'centerCrop': test_spatial_transform = Compose( [Scale(256), CenterCrop(224), ToTensor(), normalize]) testBatchSize = 1 elif evalMode == 'tenCrops': test_spatial_transform = Compose( [Scale(256), TenCrops(size=224, mean=mean, std=std)]) testBatchSize = 1 elif evalMode == 'fiveCrops': test_spatial_transform = Compose( [Scale(256), FiveCrops(size=224, mean=mean, std=std)]) testBatchSize = 1 elif evalMode == 'horFlip': test_spatial_transform = Compose([ Scale(256), CenterCrop(224), FlippedImagesTest(mean=mean, std=std) ]) testBatchSize = 1 vidSeqTest = makeDataset(testDataset, testLabels, testNumFrames, seqLen=seqLen, spatial_transform=test_spatial_transform) # testLoader = torch.utils.data.DataLoader(vidSeqTest, batch_size=testBatchSize, # shuffle=False, num_workers=int(numWorkers/2), pin_memory=True) testLoader = torch.utils.data.DataLoader(vidSeqTest, batch_size=testBatchSize, shuffle=False, pin_memory=True) numTrainInstances = vidSeqTrain.__len__() numTestInstances = vidSeqTest.__len__() print('Number of training samples = {}'.format(numTrainInstances)) print('Number of testing samples = {}'.format(numTestInstances)) modelFolder = './experiments_' + outDir # Dir for saving models and log files # Create the dir if os.path.exists(modelFolder): print(modelFolder + ' exists!!!') sys.exit() else: os.makedirs(modelFolder) # Log files writer = SummaryWriter(modelFolder) trainLogLoss = open((modelFolder + '/trainLogLoss.txt'), 'w') trainLogAcc = open((modelFolder + '/trainLogAcc.txt'), 'w') testLogLoss = open((modelFolder + '/testLogLoss.txt'), 'w') testLogAcc = open((modelFolder + '/testLogAcc.txt'), 'w') model = ViolenceModel(mem_size=memSize) trainParams = [] for params in model.parameters(): params.requires_grad = True trainParams += [params] model.train(True) # model.cuda() lossFn = nn.CrossEntropyLoss() optimizerFn = torch.optim.RMSprop(trainParams, lr=lr) optimScheduler = torch.optim.lr_scheduler.StepLR(optimizerFn, stepSize, decayRate) minAccuracy = 50 for epoch in range(numEpochs): if epoch != 0: optimScheduler.step() epochLoss = 0 numCorrTrain = 0 iterPerEpoch = 0 model.train(True) print('Epoch = {}'.format(epoch + 1)) writer.add_scalar('lr', optimizerFn.param_groups[0]['lr'], epoch + 1) for i, (inputs, targets) in enumerate(trainLoader): iterPerEpoch += 1 optimizerFn.zero_grad() # inputVariable1 = Variable(inputs.permute(1, 0, 2, 3, 4).cuda()) inputVariable1 = Variable(inputs.permute(1, 0, 2, 3, 4)) # labelVariable = Variable(targets.cuda()) print(inputVariable1.shape) labelVariable = Variable(targets) # print("labelVariable") # print(labelVariable) # print("targets") # print(targets) outputLabel = model(inputVariable1) # print("outputs") # print(outputLabel) loss = lossFn(outputLabel, labelVariable) loss.backward() optimizerFn.step() outputProb = torch.nn.Softmax(dim=1)(outputLabel) # print("outputProb.data going into torch.max ") # print(outputProb.data) _, predicted = torch.max(outputProb.data, 1) # numCorrTrain += (predicted == targets.cuda()).sum() # print("predicted") # print(predicted) numCorrTrain += (predicted == targets).sum() # epochLoss += loss.data[0] # print(numCorrTrain) epochLoss += loss.data.item() avgLoss = epochLoss / iterPerEpoch trainAccuracy = (numCorrTrain / numTrainInstances) * 100 print('Training: Loss = {} | Accuracy = {}% '.format( avgLoss, trainAccuracy)) writer.add_scalar('train/epochLoss', avgLoss, epoch + 1) writer.add_scalar('train/accuracy', trainAccuracy, epoch + 1) trainLogLoss.write('Training loss after {} epoch = {}\n'.format( epoch + 1, avgLoss)) trainLogAcc.write('Training accuracy after {} epoch = {}\n'.format( epoch + 1, trainAccuracy)) if (epoch + 1) % evalInterval == 0: model.train(False) print('Evaluating...') testLossEpoch = 0 testIter = 0 numCorrTest = 0 for j, (inputs, targets) in enumerate(testLoader): testIter += 1 if evalMode == 'centerCrop': # inputVariable1 = Variable(inputs.permute(1, 0, 2, 3, 4).cuda(), volatile=True) inputVariable1 = Variable(inputs.permute(1, 0, 2, 3, 4), volatile=True) else: # inputVariable1 = Variable(inputs[0].cuda(), volatile=True) inputVariable1 = Variable(inputs[0], volatile=True) # labelVariable = Variable(targets.cuda(async =True), volatile=True) labelVariable = Variable(targets) outputLabel = model(inputVariable1) outputLabel_mean = torch.mean(outputLabel, 0, True) testLoss = lossFn(outputLabel_mean, labelVariable) testLossEpoch += testLoss.data.item() _, predicted = torch.max(outputLabel_mean.data, 1) numCorrTest += (predicted == targets[0]).sum() testAccuracy = (numCorrTest / numTestInstances) * 100 avgTestLoss = testLossEpoch / testIter print('Testing: Loss = {} | Accuracy = {}% '.format( avgTestLoss, testAccuracy)) writer.add_scalar('test/epochloss', avgTestLoss, epoch + 1) writer.add_scalar('test/accuracy', testAccuracy, epoch + 1) testLogLoss.write('Test Loss after {} epochs = {}\n'.format( epoch + 1, avgTestLoss)) testLogAcc.write('Test Accuracy after {} epochs = {}%\n'.format( epoch + 1, testAccuracy)) if testAccuracy > minAccuracy: savePathClassifier = (modelFolder + '/bestModel.pth') torch.save(model, savePathClassifier) minAccuracy = testAccuracy trainLogAcc.close() testLogAcc.close() trainLogLoss.close() testLogLoss.close() writer.export_scalars_to_json(modelFolder + "/all_scalars.json") writer.close() return True
def main_run(dataset, model_state_dict, dataset_dir, stackSize, seqLen, memSize): if dataset == 'gtea61': num_classes = 61 elif dataset == 'gtea71': num_classes = 71 elif dataset == 'gtea_gaze': num_classes = 44 elif dataset == 'egtea': num_classes = 106 mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] normalize = Normalize(mean=mean, std=std) testBatchSize = 1 spatial_transform = Compose( [Scale(256), CenterCrop(224), ToTensor(), normalize]) vid_seq_test = makeDataset(dataset_dir, spatial_transform=spatial_transform, sequence=False, numSeg=1, stackSize=stackSize, fmt='.jpg', phase='Test', seqLen=seqLen) test_loader = torch.utils.data.DataLoader(vid_seq_test, batch_size=testBatchSize, shuffle=False, num_workers=2, pin_memory=True) model = twoStreamAttentionModel(stackSize=5, memSize=512, num_classes=num_classes) model.load_state_dict(torch.load(model_state_dict)) for params in model.parameters(): params.requires_grad = False model.train(False) model.cuda() test_samples = vid_seq_test.__len__() print('Number of samples = {}'.format(test_samples)) print('Evaluating...') numCorrTwoStream = 0 predicted_labels = [] true_labels = [] for j, (inputFlow, inputFrame, targets) in enumerate(test_loader): inputVariableFrame = Variable(inputFrame.permute(1, 0, 2, 3, 4).cuda(), volatile=True) inputVariableFlow = Variable(inputFlow.cuda(), volatile=True) output_label = model(inputVariableFlow, inputVariableFrame) _, predictedTwoStream = torch.max(output_label.data, 1) numCorrTwoStream += (predictedTwoStream == targets.cuda()).sum() predicted_labels.append(predictedTwoStream) true_labels.append(targets) test_accuracyTwoStream = (numCorrTwoStream / float(test_samples)) * 100 print('Accuracy {:.02f}%'.format(test_accuracyTwoStream)) cnf_matrix = confusion_matrix(true_labels, predicted_labels).astype(float) cnf_matrix_normalized = cnf_matrix / cnf_matrix.sum(axis=1)[:, np.newaxis] ticks = np.linspace(0, 60, num=61) plt.imshow(cnf_matrix_normalized, interpolation='none', cmap='binary') plt.colorbar() plt.xticks(ticks, fontsize=6) plt.yticks(ticks, fontsize=6) plt.grid(True) plt.clim(0, 1) plt.savefig(dataset + '-twoStreamJoint.jpg', bbox_inches='tight') plt.show()
img_prefix = '' whole_model, parameters = generate_model(args) print(whole_model) # input('...') if args.no_mean_norm and not args.std_norm: norm_method = Normalize([0, 0, 0], [1, 1, 1]) elif not args.std_norm: norm_method = Normalize(args.mean, [1, 1, 1]) else: norm_method = Normalize(args.mean, args.std) spatial_transform = Compose([ Scale(args.sample_size), CenterCrop(args.sample_size), ToTensor(args.norm_value), norm_method ]) # if not args.test_temp_crop == 'sparse': if args.compared_temp_transform == 'shuffle': temp_transform = ShuffleFrames(args.sample_duration) else: temp_transform = ReverseFrames(args.sample_duration) temp_crop_method = TemporalRandomCrop(args.sample_duration) # if args.compared_temp_transform == 'reverse': # temp_transform = Compose([ # ReverseFrames(args.sample_duration), # temp_crop_method # ]) # elif args.compared_temp_transform == 'shuffle':
def main(): parser = argparse.ArgumentParser(description="Run model against images") parser.add_argument( '--input-glob', default= 'data/kinetics_videos/jpg/yoga/0wHOYxjRmlw_000041_000051/image_000{41,42,43,44,45,46,47,48,49,50,41,42,43,44,45,46}.jpg', help="inputs") parser.add_argument("--depth", default="50", help="which model depth") args = parser.parse_args() model_file = model_files[args.depth] model_depth = int(args.depth) model = resnet.generate_model(model_depth=model_depth, n_classes=700, n_input_channels=3, shortcut_type="B", conv1_t_size=7, conv1_t_stride=1, no_max_pool=False, widen_factor=1.0) # model = load_pretrained_model(model, args.model, "resnet", 700) checkpoint = torch.load(model_file, map_location='cpu') arch = '{}-{}'.format("resnet", model_depth) print(arch, checkpoint['arch']) assert arch == checkpoint['arch'] if hasattr(model, 'module'): # I think this only for legacy models model.module.load_state_dict(checkpoint['state_dict']) else: model.load_state_dict(checkpoint['state_dict']) model.eval() image_clips = [] files = real_glob(args.input_glob) files = extend_to_length(files, 16) print(files) for f in files: img = Image.open(f).convert("RGB") image_clips.append(img) # print("EARLY", image_clips[0][0:4,0:4,0]) mean = [0.4345, 0.4051, 0.3775] std = [0.2768, 0.2713, 0.2737] normalize = Normalize(mean, std) sample_size = 112 spatial_transform = [Resize(sample_size)] spatial_transform.append(CenterCrop(sample_size)) spatial_transform.append(ToTensor()) spatial_transform.extend([ScaleValue(1), normalize]) spatial_transform = Compose(spatial_transform) # c = spatial_transform(image_clips[0]) # c.save("raw.png") model_clips = [] clip = [spatial_transform(img) for img in image_clips] model_clips.append(torch.stack(clip, 0).permute(1, 0, 2, 3)) model_clips = torch.stack(model_clips, 0) print("Final", model_clips.shape) print("PEEK", model_clips[0, 0, 0, 0:4, 0:4]) with torch.no_grad(): outputs = model(model_clips) print(outputs[0][0:10]) outputs = F.softmax(outputs, dim=1).cpu() sorted_scores, locs = torch.topk(outputs[0], k=3) print(locs[0]) video_results = [] for i in range(sorted_scores.size(0)): video_results.append({ 'label': magic_labels_700[locs[i].item()], 'score': sorted_scores[i].item() }) print(video_results)
print(opt) with open(os.path.join(opt.result_path, 'opts.json'), 'w') as opt_file: json.dump(vars(opt), opt_file) torch.manual_seed(opt.manual_seed) model = generate_model(opt) print(model) criterion = nn.CrossEntropyLoss() if not opt.no_cuda: criterion = criterion.cuda() if not opt.no_train: spatial_transform = Compose([ MultiScaleCornerCrop(opt.scales, opt.sample_size), RandomHorizontalFlip(), ToTensor(opt.norm_value), Normalize(opt.mean, [1, 1, 1]) ]) temporal_transform = TemporalRandomCrop(opt.sample_duration) target_transform = ClassLabel() if opt.dataset == 'kinetics': training_data = Kinetics(opt.video_path, opt.annotation_path, 'training', spatial_transform=spatial_transform, temporal_transform=temporal_transform, target_transform=target_transform) else: training_data = ActivityNet(opt.video_path, opt.annotation_path, 'training',
def main_run(dataset, stage, train_data_dir, val_data_dir, stage1_dict, out_dir, seqLen, trainBatchSize, valBatchSize, numEpochs, lr1, decay_factor, decay_step, memSize, regression, rloss, debug, verbose, CAM): # GTEA 61 num_classes = 61 # Train/Validation/Test split train_splits = ["S1", "S3", "S4"] val_splits = ["S2"] if debug: n_workers = 0 device = 'cpu' else: n_workers = 4 device = 'cuda' model_folder = os.path.join( './', out_dir, dataset, 'rgb', 'stage' + str(stage)) # Dir for saving models and log files # Create the dir if os.path.exists(model_folder): print('Directory {} exists!'.format(model_folder)) sys.exit() os.makedirs(model_folder) # Log files train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w') train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w') val_log_loss = open((model_folder + '/val_log_loss.txt'), 'w') val_log_acc = open((model_folder + '/val_log_acc.txt'), 'w') # Data loader normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) spatial_transform = Compose([ Scale(256), RandomHorizontalFlip(), MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224), # ToTensor(), # normalize ]) transform_rgb = Compose([ToTensor(), normalize]) transform_MS = Compose([Resize((7, 7)), ToTensor()]) vid_seq_train = makeDataset(train_data_dir, splits=train_splits, spatial_transform=spatial_transform, transform_rgb=transform_rgb, transform_MS=transform_MS, seqLen=seqLen, fmt='.png', regression=regression) train_loader = torch.utils.data.DataLoader(vid_seq_train, batch_size=trainBatchSize, shuffle=True, num_workers=n_workers, pin_memory=True) vid_seq_val = makeDataset(train_data_dir, splits=val_splits, spatial_transform=Compose( [Scale(256), CenterCrop(224)]), transform_rgb=transform_rgb, transform_MS=transform_MS, seqLen=seqLen, fmt='.png', regression=regression, verbose=False) val_loader = torch.utils.data.DataLoader(vid_seq_val, batch_size=valBatchSize, shuffle=False, num_workers=n_workers, pin_memory=True) valInstances = vid_seq_val.__len__() ''' if val_data_dir is not None: vid_seq_val = makeDataset(val_data_dir, spatial_transform=Compose([Scale(256), CenterCrop(224), ToTensor(), normalize]), seqLen=seqLen, fmt='.jpg') val_loader = torch.utils.data.DataLoader(vid_seq_val, batch_size=valBatchSize, shuffle=False, num_workers=2, pin_memory=True) valInstances = vid_seq_val.__len__() ''' trainInstances = vid_seq_train.__len__() train_params = [] if stage == 1: if regression: model = SelfSupervisedAttentionModel(num_classes=num_classes, mem_size=memSize, n_channels=1) else: model = SelfSupervisedAttentionModel(num_classes=num_classes, mem_size=memSize) model.train(False) for params in model.parameters(): params.requires_grad = False else: if regression: model = SelfSupervisedAttentionModel(num_classes=num_classes, mem_size=memSize, n_channels=1) else: model = SelfSupervisedAttentionModel(num_classes=num_classes, mem_size=memSize) model.load_state_dict(torch.load(stage1_dict), strict=False) model.train(False) for params in model.parameters(): params.requires_grad = False # for params in model.resNet.layer4[0].conv1.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[0].conv2.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[1].conv1.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[1].conv2.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[2].conv1.parameters(): params.requires_grad = True train_params += [params] # for params in model.resNet.layer4[2].conv2.parameters(): params.requires_grad = True train_params += [params] # for params in model.resNet.fc.parameters(): params.requires_grad = True train_params += [params] model.resNet.layer4[0].conv1.train(True) model.resNet.layer4[0].conv2.train(True) model.resNet.layer4[1].conv1.train(True) model.resNet.layer4[1].conv2.train(True) model.resNet.layer4[2].conv1.train(True) model.resNet.layer4[2].conv2.train(True) model.resNet.fc.train(True) # Add params from ms_module for params in model.ms_module.parameters(): params.requires_grad = True train_params += [params] for params in model.lstm_cell.parameters(): params.requires_grad = True train_params += [params] for params in model.classifier.parameters(): params.requires_grad = True train_params += [params] model.lstm_cell.train(True) model.classifier.train(True) model.ms_module.train(True) model.to(device) # wandb.init(project="first_person_action_recognition") loss_fn = nn.CrossEntropyLoss() if regression: if rloss == 'MSE': # Mean Squared Error loss loss_ms_fn = nn.MSELoss() # it should work elif rloss == 'L1': # L1 loss loss_ms_fn = nn.L1Loss() elif rloss == 'SmoothL1': # Huber Loss or Smooth L1 Loss loss_ms_fn = nn.SmoothL1Loss() elif rloss == 'KLdiv': # Kullback-Leiber Loss loss_ms_fn = nn.KLDivLoss() else: # classification loss_ms_fn = nn.CrossEntropyLoss() # TODO: check paper Planamente optimizer_fn = torch.optim.Adam(train_params, lr=lr1, weight_decay=4e-5, eps=1e-4) optim_scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer_fn, milestones=decay_step, gamma=decay_factor) train_iter = 0 min_accuracy = 0 for epoch in range(numEpochs): epoch_loss = 0 numCorrTrain = 0 trainSamples = 0 iterPerEpoch = 0 #model.train(True) model.lstm_cell.train(True) model.classifier.train(True) if stage == 2: model.resNet.layer4[0].conv1.train(True) model.resNet.layer4[0].conv2.train(True) model.resNet.layer4[1].conv1.train(True) model.resNet.layer4[1].conv2.train(True) model.resNet.layer4[2].conv1.train(True) model.resNet.layer4[2].conv2.train(True) model.resNet.fc.train(True) model.ms_module.train(True) for i, (inputsRGB, inputsMS, targets) in enumerate(train_loader): # Inputs: # - inputsRGB : the rgb frame input # Labels : # - inputsMS : the motion task label # - targets : output train_iter += 1 iterPerEpoch += 1 optimizer_fn.zero_grad() inputVariable = inputsRGB.permute(1, 0, 2, 3, 4).to(device) labelVariable = targets.to(device) msVariable = inputsMS.to(device) trainSamples += inputsRGB.size(0) output_label, _, output_ms = model(inputVariable, device) loss_c = loss_fn(output_label, labelVariable) if regression: msVariable = torch.reshape( msVariable, (seqLen * 7 * 7, msVariable.size(0))) output_ms = torch.sigmoid(output_ms) output_ms = torch.reshape(output_ms, (seqLen * 7 * 7, output_ms.size(0))) else: # classification task msVariable = torch.reshape( msVariable, (seqLen * 7 * 7, msVariable.size(0))).long() output_ms = torch.reshape( output_ms, (seqLen * 7 * 7, 2, output_ms.size(0))) # loss_ms = loss_ms_fn(output_ms, msVariable) loss = loss_c + loss_ms if verbose: print(loss_c) print(loss_ms) print(loss) print() # loss = loss_fn(output_label, labelVariable) + loss_ms_fn(output_ms, inputsMS) # TODO (forse): invertire 0 e 1 dim per inputsMS # output1 = F.softmax(torch.reshape(output_ms, (32, 7, 2, 7*7))[0, 0, :, :], dim=0) loss.backward() optimizer_fn.step() _, predicted = torch.max(output_label.data, 1) numCorrTrain += (predicted == targets.to(device)).sum() epoch_loss += loss.data.item() avg_loss = epoch_loss / iterPerEpoch trainAccuracy = (numCorrTrain.data.item() / trainSamples) * 100 train_log_loss.write('Training loss after {} epoch = {}\n'.format( epoch + 1, avg_loss)) train_log_acc.write('Training accuracy after {} epoch = {}\n'.format( epoch + 1, trainAccuracy)) print('Train: Epoch = {} | Loss = {} | Accuracy = {}'.format( epoch + 1, avg_loss, trainAccuracy)) # VALIDATION PHASE #if val_data_dir is not None: if (epoch + 1) % 1 == 0: model.train(False) val_loss_epoch = 0 val_iter = 0 val_samples = 0 numCorr = 0 for j, (inputsRGB, inputsMS, targets) in enumerate(val_loader): val_iter += 1 val_samples += inputsRGB.size(0) inputVariable = inputsRGB.permute(1, 0, 2, 3, 4).to( device) # la permutazione è a solo scopo di computazione labelVariable = targets.to(device) msVariable = inputsMS.to(device) output_label, _, output_ms = model(inputVariable, device) loss_c = loss_fn(output_label, labelVariable) if regression: msVariable = torch.reshape( msVariable, (seqLen * 7 * 7, msVariable.size(0))) output_ms = torch.sigmoid(output_ms) output_ms = torch.reshape( output_ms, (seqLen * 7 * 7, output_ms.size(0))) else: # classification task msVariable = torch.reshape( msVariable, (seqLen * 7 * 7, msVariable.size(0))).long() output_ms = torch.reshape( output_ms, (seqLen * 7 * 7, 2, output_ms.size(0))) loss_ms = loss_ms_fn(output_ms, msVariable) val_loss = loss_c + loss_ms # val_loss = loss_fn(output_label, labelVariable) # TODO: add ms Loss val_loss_epoch += val_loss.data.item() _, predicted = torch.max(output_label.data, 1) numCorr += (predicted == targets.to(device)).sum() val_accuracy = (numCorr.data.item() / val_samples) * 100 avg_val_loss = val_loss_epoch / val_iter print('Valid: Epoch = {} | Loss {} | Accuracy = {}'.format( epoch + 1, avg_val_loss, val_accuracy)) val_log_loss.write('Val Loss after {} epochs = {}\n'.format( epoch + 1, avg_val_loss)) val_log_acc.write('Val Accuracy after {} epochs = {}%\n'.format( epoch + 1, val_accuracy)) if val_accuracy > min_accuracy: save_path_model = (model_folder + '/model_rgb_state_dict.pth') torch.save(model.state_dict(), save_path_model) min_accuracy = val_accuracy '''else: if (epoch+1) % 10 == 0: save_path_model = (model_folder + '/model_rgb_state_dict_epoch' + str(epoch+1) + '.pth') torch.save(model.state_dict(), save_path_model) ''' optim_scheduler.step() train_log_loss.close() train_log_acc.close() val_log_acc.close() val_log_loss.close()
else: norm_method = Normalize(opt.mean, opt.std) if not opt.no_train: assert opt.train_crop in ['random', 'corner', 'center'] if opt.train_crop == 'random': crop_method = MultiScaleRandomCrop(opt.scales, opt.sample_size) elif opt.train_crop == 'corner': crop_method = MultiScaleCornerCrop(opt.scales, opt.sample_size) elif opt.train_crop == 'center': crop_method = MultiScaleCornerCrop(opt.scales, opt.sample_size, crop_positions=['c']) spatial_transform = Compose([ crop_method, RandomHorizontalFlip(), ToTensor(opt.norm_value), norm_method ]) temporal_transform = TemporalRandomCrop(opt.sample_duration) target_transform = ClassLabel() training_data = get_training_set(opt, spatial_transform, temporal_transform, target_transform) train_loader = torch.utils.data.DataLoader(training_data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=True) train_logger = Logger( os.path.join(opt.result_path, 'train.log'), ['epoch', 'loss', 'acc', 'acc_img', 'lr', 'epoch_time']) train_batch_logger = Logger(
else: norm_method = Normalize(opt.mean, opt.std) if not opt.no_train: assert opt.train_crop in ['random', 'corner', 'center'] if opt.train_crop == 'random': crop_method = MultiScaleRandomCrop(opt.scales, opt.sample_size) elif opt.train_crop == 'corner': crop_method = MultiScaleCornerCrop(opt.scales, opt.sample_size) elif opt.train_crop == 'center': crop_method = MultiScaleCornerCrop(opt.scales, opt.sample_size, crop_positions=['c']) spatial_transform = Compose([ crop_method, RandomHorizontalFlip(), ToTensor(opt.norm_value), norm_method ]) temporal_transform = Compose([TemporalRandomCrop(opt.sample_duration)]) target_transform = ClassLabel() training_data = get_training_set(opt, spatial_transform, temporal_transform, target_transform) train_loader = torch.utils.data.DataLoader(training_data, batch_size=opt.batch_size, shuffle=True, num_workers=opt.n_threads, pin_memory=True) train_logger = Logger(os.path.join(opt.result_path, 'train.log'), ['epoch', 'loss', 'acc', 'lr']) train_batch_logger = Logger( os.path.join(opt.result_path, 'train_batch.log'),