def train_model(dataset=dataset, save_dir=save_dir, num_classes=num_classes, lr=lr, num_epochs=nEpochs, save_epoch=snapshot, useTest=useTest, test_interval=nTestInterval): """ Args: num_classes (int): Number of classes in the data num_epochs (int, optional): Number of epochs to train for. """ if modelName == 'C3D': model = C3D_model.C3D(num_classes=num_classes, pretrained=False) train_params = [{'params': C3D_model.get_1x_lr_params(model), 'lr': lr}, {'params': C3D_model.get_10x_lr_params(model), 'lr': lr * 10}] elif modelName == 'R2Plus1D': model = R2Plus1D_model.R2Plus1DClassifier(num_classes=num_classes, layer_sizes=(2, 2, 2, 2)) train_params = [{'params': R2Plus1D_model.get_1x_lr_params(model), 'lr': lr}, {'params': R2Plus1D_model.get_10x_lr_params(model), 'lr': lr * 10}] elif modelName == 'R3D': model = R3D_model.R3DClassifier(num_classes=num_classes, layer_sizes=(3, 4, 6, 3)) # model = resnet.ResNet(num_classes=num_classes, layers=(3, 4, 6, 3), sample_size=112, sample_duration=16) train_params = model.parameters() elif modelName == 'R2D': model = R2Dnet.R2DClassifier(group_num_classes=num_classes, pretrained=True) # model = resnet.ResNet(num_classes=num_classes, layers=(3, 4, 6, 3), sample_size=112, sample_duration=16) train_params = model.parameters() else: print('We only implemented C3D and R2Plus1D models.') raise NotImplementedError criterion = nn.CrossEntropyLoss() # standard crossentropy loss for classification optimizer = optim.SGD(train_params, lr=lr, momentum=0.9, weight_decay=5e-4) scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1) # the scheduler divides the lr by 10 every 10 epochs model.to(device) #move here because resume need .cuda() if resume_epoch == 0: print("Training {} from scratch...".format(modelName)) else: checkpoint = torch.load(os.path.join(save_dir, 'models', saveName + '_epoch-' + str(resume_epoch - 1) + '.pth.tar'), map_location=lambda storage, loc: storage) # Load all tensors onto the CPU print("Initializing weights from: {}...".format( os.path.join(save_dir, 'models', saveName + '_epoch-' + str(resume_epoch - 1) + '.pth.tar'))) model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['opt_dict']) print('Total params: %.2fM' % (sum(p.numel() for p in model.parameters()) / 1000000.0)) # model.to(device) criterion.to(device) log_dir = os.path.join(save_dir, 'models', datetime.now().strftime('%b%d_%H-%M-%S') + '_' + socket.gethostname()) writer = SummaryWriter(log_dir=log_dir) print('Training model on {} dataset...'.format(dataset)) train_dataloader = DataLoader(VolleyballDataset(dataset=dataset, split='train',clip_len=16), batch_size=4, shuffle=True, \ num_workers=0) val_dataloader = DataLoader(VolleyballDataset(dataset=dataset, split='val', clip_len=16), batch_size=4, num_workers=0) test_dataloader = DataLoader(VolleyballDataset(dataset=dataset, split='test', clip_len=16), batch_size=4, num_workers=0) trainval_loaders = {'train': train_dataloader, 'val': val_dataloader} trainval_sizes = {x: len(trainval_loaders[x].dataset) for x in ['train', 'val']} test_size = len(test_dataloader.dataset) for epoch in range(resume_epoch, num_epochs): # each epoch has a training and validation step for phase in ['train', 'val']: start_time = timeit.default_timer() # reset the running loss and corrects running_loss = 0.0 running_corrects = 0.0 # set model to train() or eval() mode depending on whether it is trained # or being validated. Primarily affects layers such as BatchNorm or Dropout. if phase == 'train': # scheduler.step() is to be called once every epoch during training scheduler.step() model.train() else: model.eval() torch.backends.cudnn.benchmark=False # for inputs, bbox_inputs, labels, adjacent_matrix in tqdm(trainval_loaders[phase]): # for inputs, labels in tqdm(trainval_loaders[phase]): for inputs, labels, dists in tqdm(trainval_loaders[phase]): # move inputs and labels to the device the training is taking place on inputs = Variable(inputs, requires_grad=True).to(device) # bbox_inputs = Variable(bbox_inputs, requires_grad=True).to(device) # adjacent_matrix = Variable(adjacent_matrix, requires_grad=True).to(device) labels = Variable(labels).to(device) dists = Variable(dists, requires_grad = True).to(device) # dist_num = Variable(dist_num).to(device) optimizer.zero_grad() if phase == 'train': outputs = model(inputs, dists) # outputs = model(inputs, bbox_inputs, adjacent_matrix) else: with torch.no_grad(): # outputs = model(inputs, bbox_inputs, adjacent_matrix) outputs = model(inputs, dists) probs = nn.Softmax(dim=1)(outputs) preds = torch.max(probs, 1)[1] loss = criterion(outputs, labels) print("labels",labels) print("outputs",outputs) print("loss",loss) torch.backends.cudnn.benchmark = False if phase == 'train': loss.backward(retain_graph=True) optimizer.step() running_loss += loss.item() * inputs.size(0) running_corrects += torch.sum(preds == labels.data) epoch_loss = running_loss / trainval_sizes[phase] epoch_acc = running_corrects.double() / trainval_sizes[phase] if phase == 'train': writer.add_scalar('data/train_loss_epoch', epoch_loss, epoch) writer.add_scalar('data/train_acc_epoch', epoch_acc, epoch) else: writer.add_scalar('data/val_loss_epoch', epoch_loss, epoch) writer.add_scalar('data/val_acc_epoch', epoch_acc, epoch) print("[{}] Epoch: {}/{} Loss: {} Acc: {}".format(phase, epoch+1, nEpochs, epoch_loss, epoch_acc)) stop_time = timeit.default_timer() print("Execution time: " + str(stop_time - start_time) + "\n") if epoch % save_epoch == (save_epoch - 1): print(os.path.join(save_dir, 'models', saveName + \ '_epoch-' + str(epoch) + '.pth.tar')) torch.save({ 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'opt_dict': optimizer.state_dict(), }, os.path.join(save_dir, 'models', saveName + '_epoch-' + str(epoch) + '.pth.tar')) print("Save model at {}\n".format(os.path.join(save_dir, 'models', saveName + '_epoch-' + str(epoch) + '.pth.tar'))) if useTest and epoch % test_interval == (test_interval - 1): model.eval() start_time = timeit.default_timer() running_loss = 0.0 running_corrects = 0.0 for inputs, bbox_inputs, labels, adjacent_matrix in tqdm(test_dataloader): # for inputs, labels in tqdm(test_dataloader): bbox_inputs = Variable(bbox_inputs, requires_grad=True).to(device) adjacent_matrix = Variable(adjacent_matrix, requires_grad=True).to(device) inputs = Variable(inputs.to(device)) with torch.no_grad(): # outputs = model(inputs) outputs = model(inputs, bbox_inputs, adjacent_matrix) probs = nn.Softmax(dim=1)(outputs) preds = torch.max(probs, 1)[1] loss = criterion(outputs, labels) running_loss += loss.item() * inputs.size(0) running_corrects += torch.sum(preds == labels.data) epoch_loss = running_loss / test_size epoch_acc = running_corrects.double() / test_size writer.add_scalar('data/test_loss_epoch', epoch_loss, epoch) writer.add_scalar('data/test_acc_epoch', epoch_acc, epoch) print("[test] Epoch: {}/{} Loss: {} Acc: {}".format(epoch+1, nEpochs, epoch_loss, epoch_acc)) stop_time = timeit.default_timer() print("Execution time: " + str(stop_time - start_time) + "\n") writer.close()
def train_model(dataset=dataset, save_dir=SAVE_FILE_FOLDER, num_classes=num_classes, lr=lr, num_epochs=nEpochs, save_epoch=snapshot, useTest=useTest, test_interval=nTestInterval): """ Args: num_classes (int): Number of classes in the data num_epochs (int, optional): Number of epochs to train for. """ if modelName == 'C3D': model = C3D_model.C3D(num_classes=num_classes, pretrained=IF_PRETRAIN) train_params = [{'params': C3D_model.get_1x_lr_params(model), 'lr': lr}, {'params': C3D_model.get_10x_lr_params(model), 'lr': lr * 10}] elif modelName == 'C3D_td5': model = C3D_model.C3D_td5(num_classes=num_classes, pretrained=IF_PRETRAIN) train_params = [{'params': C3D_model.get_1x_lr_params(model), 'lr': lr}, {'params': C3D_model.get_10x_lr_params(model), 'lr': lr * 10}] elif modelName == 'R2Plus1D': model = R2Plus1D_model.R2Plus1DClassifier(num_classes=num_classes, layer_sizes=(2, 2, 2, 2)) train_params = [{'params': R2Plus1D_model.get_1x_lr_params(model), 'lr': lr}, {'params': R2Plus1D_model.get_10x_lr_params(model), 'lr': lr * 10}] elif modelName == 'R3D': model = R3D_model.R3DClassifier(num_classes=num_classes, layer_sizes=(2, 2, 2, 2)) train_params = model.parameters() else: print('We only implemented C3D and R2Plus1D models.') raise NotImplementedError criterion = nn.CrossEntropyLoss() # standard crossentropy loss for classification if _optimizer == "SGD": optimizer = optim.SGD(train_params, lr=lr, momentum=MOMENTUM, weight_decay=WD) elif _optimizer == "Adam": optimizer = optim.Adam(train_params, lr=lr, weight_decay=WD) # print(optimizer) scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=SCHEDULER_STEP_SIZE, gamma=SCHEDULER_GAMMA) # the scheduler divides the lr by 10 every 10 epochs model.to(device) criterion.to(device) # if resume_epoch == 0: if resume_model_path == None: print("Training {} from scratch...".format(modelName)) else: checkpoint = torch.load( resume_model_path, map_location=lambda storage, loc: storage) # Load all tensors onto the CPU print("Initializing weights from: {}...".format(resume_model_path)) model.load_state_dict(checkpoint['state_dict']) if RESUM_OPTIMIZER: optimizer.load_state_dict(checkpoint['opt_dict']) # resume_epoch # else: # checkpoint = torch.load(os.path.join(SAVE_FILE_FOLDER, 'models', EXP_NAME + '_epoch-' + str(resume_epoch - 1) + '.pth.tar'), # map_location=lambda storage, loc: storage) # Load all tensors onto the CPU # print("Initializing weights from: {}...".format( # os.path.join(SAVE_FILE_FOLDER, EXP_NAME + '_epoch-' + str(resume_epoch - 1) + '.pth.tar'))) # model.load_state_dict(checkpoint['state_dict']) # optimizer.load_state_dict(checkpoint['opt_dict']) print('Total params: %.2fM' % (sum(p.numel() for p in model.parameters()) / 1000000.0)) model.to(device) criterion.to(device) writer = SummaryWriter(logdir=LOG_PATH) print('Training model on {} dataset...'.format(dataset)) train_dataloader = DataLoader(VideoDataset(dataset=dataset, split='train', clip_len=clip_len, preprocess=IF_PREPROCESS_TRAIN, grayscale=grayscale), batch_size=BS, shuffle=True, num_workers=N_WORKERS) val_dataloader = DataLoader(VideoDataset(dataset=dataset, split='val', clip_len=clip_len, preprocess=IF_PREPROCESS_VAL, grayscale=grayscale), batch_size=BS, num_workers=N_WORKERS) test_dataloader = DataLoader(VideoDataset(dataset=dataset, split='test', clip_len=clip_len, preprocess=IF_PREPROCESS_TEST, grayscale=grayscale), batch_size=BS, num_workers=N_WORKERS) trainval_loaders = {'train': train_dataloader, 'val': val_dataloader} trainval_sizes = {x: len(trainval_loaders[x].dataset) for x in ['train', 'val']} test_size = len(test_dataloader.dataset) cudnn.benchmark = True global_best_val_acc = 0 for epoch in range(num_epochs): # each epoch has a training and validation step for phase in ['train', 'val']: start_time = timeit.default_timer() # reset the running loss and corrects running_loss = 0.0 running_corrects = 0.0 # running_roc = 0.0 list_pred = list() list_label = list() # print(optimizer) # set model to train() or eval() mode depending on whether it is trained # or being validated. Primarily affects layers such as BatchNorm or Dropout. if phase == 'train': # scheduler.step() is to be called once every epoch during training scheduler.step() model.train() else: model.eval() # for inputs, labels in tqdm(trainval_loaders[phase]): run_count = 0 for inputs, labels in trainval_loaders[phase]: # move inputs and labels to the device the training is taking place on inputs = Variable(inputs, requires_grad=True).to(device) labels = Variable(labels).to(device) optimizer.zero_grad() if phase == 'train': outputs = model(inputs) else: with torch.no_grad(): outputs = model(inputs) probs = nn.Softmax(dim=1)(outputs) preds = torch.max(probs, 1)[1] loss = criterion(outputs, labels) if phase == 'train': loss.backward() optimizer.step() running_loss += loss.item() * inputs.size(0) running_corrects += torch.sum(preds == labels.data) # try: # running_roc += roc_auc_score(labels.data.cpu(), preds.cpu()) # except: # y_true = labels.data.cpu().tolist() # y_true_2 = y_true.copy() # for i_cls in range(N_CLASSES): # y_true_2.append(i_cls) # # y_pred = preds.cpu().tolist() # y_pred_2 = y_pred.copy() # for i_cls in range(N_CLASSES): # y_pred_2.append(i_cls) # # running_roc += roc_auc_score(y_true_2, y_pred_2) # # run_count += 1 list_label += labels.data.cpu().tolist() list_pred += preds.cpu().tolist() epoch_loss = running_loss / trainval_sizes[phase] epoch_acc = running_corrects.double() / trainval_sizes[phase] epoch_roc = multiclass_roc_score(label=list_label, pred=list_pred, n_cls=N_CLASSES) if phase == 'train': writer.add_scalar('data/train_loss_epoch', epoch_loss, epoch) writer.add_scalar('data/train_acc_epoch', epoch_acc, epoch) writer.add_scalar('data/train_roc_epoch', epoch_roc, epoch) else: writer.add_scalar('data/val_loss_epoch', epoch_loss, epoch) writer.add_scalar('data/val_acc_epoch', epoch_acc, epoch) writer.add_scalar('data/val_roc_epoch', epoch_roc, epoch) # if epoch_acc >= global_best_val_acc: # torch.save({ # 'epoch': epoch + 1, # 'state_dict': model.state_dict(), # 'opt_dict': optimizer.state_dict(), # }, os.path.join(SAVE_FILE_FOLDER, 'models', EXP_NAME + '_epoch-' + str(epoch) + 'ValAcc_{:10.4f}_'.format(epoch_loss) + '.pth.tar')) # print("Save model at {}\n".format( # os.path.join(SAVE_FILE_FOLDER, 'models', EXP_NAME + '_epoch-' + str(epoch) + 'ValAcc_{:10.4f}_'.format(epoch_loss) + '.pth.tar'))) print("[{}] Epoch: {}/{} Loss: {} Acc: {}, ROC:{}".format(phase, epoch+1, nEpochs, epoch_loss, epoch_acc, epoch_roc)) stop_time = timeit.default_timer() # print("Execution time: " + str(stop_time - start_time) + "\n") if epoch % save_epoch == (save_epoch - 1): torch.save({ 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'opt_dict': optimizer.state_dict(), }, os.path.join(SAVE_FILE_FOLDER, EXP_NAME + '_epoch-' + str(epoch) + '.pth.tar')) print("Save model at {}\n".format(os.path.join(SAVE_FILE_FOLDER, EXP_NAME + '_epoch-' + str(epoch) + '.pth.tar'))) if useTest and epoch % test_interval == (test_interval - 1): model.eval() start_time = timeit.default_timer() running_loss = 0.0 running_corrects = 0.0 # running_roc = 0.0 list_pred = list() list_label = list() # for inputs, labels in tqdm(test_dataloader): run_count = 0 for inputs, labels in test_dataloader: inputs = inputs.to(device) labels = labels.to(device) with torch.no_grad(): outputs = model(inputs) probs = nn.Softmax(dim=1)(outputs) preds = torch.max(probs, 1)[1] loss = criterion(outputs, labels) running_loss += loss.item() * inputs.size(0) running_corrects += torch.sum(preds == labels.data) # try: # running_roc += roc_auc_score(labels.data.cpu(), preds.cpu()) # except: # running_roc += 0.5 # run_count += 1 list_label += labels.data.cpu().tolist() list_pred += preds.cpu().tolist() epoch_loss = running_loss / test_size epoch_acc = running_corrects.double() / test_size epoch_roc = multiclass_roc_score(label=list_label, pred=list_pred, n_cls=N_CLASSES) writer.add_scalar('data/test_loss_epoch', epoch_loss, epoch) writer.add_scalar('data/test_acc_epoch', epoch_acc, epoch) writer.add_scalar('data/test_roc_epoch', epoch_roc, epoch) print("[test] Epoch: {}/{} Loss: {} Acc:{} ROC: {}".format(epoch+1, nEpochs, epoch_loss, epoch_acc, epoch_roc)) stop_time = timeit.default_timer() # print("Execution time: " + str(stop_time - start_time) + "\n") writer.close()
def train_model(dataset=dataset, save_dir=save_dir, num_classes=num_classes, lr=lr, num_epochs=nEpochs, save_epoch=snapshot, useTest=useTest, test_interval=nTestInterval): """ Args: num_classes (int): Number of classes in the data num_epochs (int, optional): Number of epochs to train for. """ if modelName == 'C3D': model = C3D_model.C3D(num_classes=num_classes, pretrained=False) train_params = [{ 'params': C3D_model.get_1x_lr_params(model), 'lr': lr }, { 'params': C3D_model.get_10x_lr_params(model), 'lr': lr * 10 }] elif modelName == 'R2Plus1D': model = R2Plus1D_model.R2Plus1DClassifier(num_classes=num_classes, layer_sizes=(3, 4, 6, 3)) train_params = [{ 'params': R2Plus1D_model.get_1x_lr_params(model), 'lr': lr }, { 'params': R2Plus1D_model.get_10x_lr_params(model), 'lr': lr * 10 }] elif modelName == 'R3D': model = R3D_model.R3DClassifier(num_classes=num_classes, layer_sizes=(3, 4, 6, 3)) train_params = model.parameters() elif modelName == 'P3D': model = p3d_model.P3D63(num_classes=num_classes) train_params = model.parameters() elif modelName == 'I3D': model = I3D_model.InceptionI3d(num_classes=num_classes, in_channels=3) train_params = model.parameters() elif modelName == 'T3D': model = T3D_model.inception_v1(num_classes=num_classes) train_params = model.parameters() elif modelName == 'STP': model = STP_model.STP(num_classes=num_classes, in_channels=3) train_params = model.parameters() else: raise NotImplementedError criterion = nn.CrossEntropyLoss( ) # standard crossentropy loss for classification optimizer = optim.SGD(train_params, lr=lr, momentum=0.9, weight_decay=5e-4) scheduler = optim.lr_scheduler.StepLR( optimizer, step_size=10, gamma=0.1) # the scheduler divides the lr by 10 every 10 epochs if resume_epoch == 0: print("Training {} from scratch...".format(modelName)) else: checkpoint = torch.load(os.path.join( save_dir, 'models', saveName + '_epoch-' + str(resume_epoch - 1) + '.pth.tar'), map_location=lambda storage, loc: storage ) # Load all tensors onto the CPU print("Initializing weights from: {}...".format( os.path.join( save_dir, 'models', saveName + '_epoch-' + str(resume_epoch - 1) + '.pth.tar'))) model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['opt_dict']) print('Total params: %.2fM' % (sum(p.numel() for p in model.parameters()) / 1000000.0)) model.to(device) criterion.to(device) # log_dir = os.path.join(save_dir, 'models', datetime.now().strftime('%b%d_%H-%M-%S') + '_' + socket.gethostname())`` # print(log_dir) log_dir = "./logs" writer = SummaryWriter(logdir=log_dir) print('Training model on {} dataset...'.format(dataset)) train_dataloader = DataLoader(VideoDataset(dataset=dataset, split='train', clip_len=16, modelName=modelName), batch_size=8, shuffle=True, num_workers=4) val_dataloader = DataLoader(VideoDataset(dataset=dataset, split='val', clip_len=16, modelName=modelName), batch_size=8, num_workers=4) test_dataloader = DataLoader(VideoDataset(dataset=dataset, split='test', clip_len=16, modelName=modelName), batch_size=8, num_workers=4) trainval_loaders = {'train': train_dataloader, 'val': val_dataloader} trainval_sizes = { x: len(trainval_loaders[x].dataset) for x in ['train', 'val'] } test_size = len(test_dataloader.dataset) for epoch in range(resume_epoch, num_epochs): # each epoch has a training and validation step for phase in ['train', 'val']: start_time = timeit.default_timer() # reset the running loss and corrects running_loss = 0.0 running_corrects = 0.0 # set model to train() or eval() mode depending on whether it is trained # or being validated. Primarily affects layers such as BatchNorm or Dropout. if phase == 'train': # scheduler.step() is to be called once every epoch during training scheduler.step() model.train() else: model.eval() for inputs, labels in tqdm(trainval_loaders[phase]): # move inputs and labels to the device the training is taking place on inputs = Variable(inputs, requires_grad=True).to(device) labels = Variable(labels).to(device) optimizer.zero_grad() # print(labels.shape) if phase == 'train': if not modelName == 'STP': import torch.nn.parallel outputs = nn.parallel.data_parallel( model, inputs, range(2)) else: outputs, index = nn.parallel.data_parallel( model, inputs, range(2)) # outputs = model(inputs) else: with torch.no_grad(): outputs = model(inputs) # print(outputs.shape) probs = nn.Softmax(dim=1)(outputs) preds = torch.max(probs, 1)[1] if modelName == 'I3D': labels = labels.reshape(labels.shape[0], 1) loss = criterion(outputs, labels) else: loss = criterion(outputs, labels) if modelName == 'STP': sp_loss = -torch.log( torch.sum(index[:, :, 0:int(index.size(2) / 2), :, :]) / int(index.size(2))) + torch.log(1 - torch.sum( index[:, :, int(index.size(2) / 2) + 1:, :, :]) // int(index.size(2))) loss = loss + sp_loss if phase == 'train': loss.backward() optimizer.step() running_loss += loss.item() * inputs.size(0) running_corrects += torch.sum(preds == labels.data) epoch_loss = running_loss / trainval_sizes[phase] epoch_acc = running_corrects.double() / trainval_sizes[phase] if phase == 'train': writer.add_scalar('data/train_loss_epoch', epoch_loss, epoch) writer.add_scalar('data/train_acc_epoch', epoch_acc, epoch) else: writer.add_scalar('data/val_loss_epoch', epoch_loss, epoch) writer.add_scalar('data/val_acc_epoch', epoch_acc, epoch) print("[{}] Epoch: {}/{} Loss: {} Acc: {}".format( phase, epoch + 1, nEpochs, epoch_loss, epoch_acc)) stop_time = timeit.default_timer() print("Execution time: " + str(stop_time - start_time) + "\n") if epoch % save_epoch == (save_epoch - 1): torch.save( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'opt_dict': optimizer.state_dict(), }, os.path.join('models', saveName + '_epoch-' + str(epoch) + '.pth.tar')) print("Save model at {}\n".format( os.path.join(save_dir, 'models', saveName + '_epoch-' + str(epoch) + '.pth.tar'))) if useTest and epoch % test_interval == (test_interval - 1): model.eval() start_time = timeit.default_timer() running_loss = 0.0 running_corrects = 0.0 for inputs, labels in tqdm(test_dataloader): inputs = inputs.to(device) labels = labels.to(device) with torch.no_grad(): outputs = model(inputs) probs = nn.Softmax(dim=1)(outputs) preds = torch.max(probs, 1)[1] if modelName == 'I3D': labels = labels.reshape(labels.shape[0], 1) loss = criterion(outputs, labels) else: loss = criterion(outputs, labels) running_loss += loss.item() * inputs.size(0) running_corrects += torch.sum(preds == labels.data) epoch_loss = running_loss / test_size epoch_acc = running_corrects.double() / test_size writer.add_scalar('data/test_loss_epoch', epoch_loss, epoch) writer.add_scalar('data/test_acc_epoch', epoch_acc, epoch) print("[test] Epoch: {}/{} Loss: {} Acc: {}".format( epoch + 1, nEpochs, epoch_loss, epoch_acc)) stop_time = timeit.default_timer() print("Execution time: " + str(stop_time - start_time) + "\n") writer.close()
def train_model(dataset=dataset, save_dir=save_dir, num_classes=num_classes, lr=lr, num_epochs=nEpochs, save_epoch=snapshot): """ Args: num_classes (int): Number of classes in the data num_epochs (int, optional): Number of epochs to train for. """ if modelName == 'R2Plus1D': model = R2Plus1D_model.R2Plus1DClassifier(num_classes=num_classes, layer_sizes=(2, 2, 2, 2)) train_params = [{ 'params': R2Plus1D_model.get_1x_lr_params(model), 'lr': lr }, { 'params': R2Plus1D_model.get_10x_lr_params(model), 'lr': lr * 10 }] else: print('We only implemented C3D and R2Plus1D models.') raise NotImplementedError criterion = nn.CrossEntropyLoss( ) # standard crossentropy loss for classification optimizer = optim.SGD(train_params, lr=lr, momentum=0.9, weight_decay=5e-4) scheduler = optim.lr_scheduler.StepLR( optimizer, step_size=10, gamma=0.1) # the scheduler divides the lr by 10 every 10 epochs if resume_epoch == 0: print("Training {} from scratch...".format(modelName)) else: checkpoint = torch.load(os.path.join( save_dir, 'models', saveName + '_epoch-' + str(resume_epoch - 1) + '.pth.tar'), map_location=lambda storage, loc: storage ) # Load all tensors onto the CPU print("Initializing weights from: {}...".format( os.path.join( save_dir, 'models', saveName + '_epoch-' + str(resume_epoch - 1) + '.pth.tar'))) model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['opt_dict']) print('Total params: %.2fM' % (sum(p.numel() for p in model.parameters()) / 1000000.0)) model.to(device) criterion.to(device) log_dir = os.path.join( save_dir, 'models', datetime.now().strftime('%b%d_%H-%M-%S') + '_' + socket.gethostname()) writer = SummaryWriter(log_dir=log_dir) print('Training model on {} dataset...'.format(dataset)) train_dataloader = DataLoader(VideoDataset(config=config, dataset=dataset, split='train'), batch_size=config.batch_size, shuffle=True, num_workers=1) for epoch in range(resume_epoch, num_epochs): # each epoch has a training and validation step for phase in ['train']: start_time = timeit.default_timer() # reset the running loss and corrects running_loss = 0.0 running_corrects = 0.0 # set model to train() or eval() mode depending on whether it is trained # or being validated. Primarily affects layers such as BatchNorm or Dropout. if phase == 'train': # scheduler.step() is to be called once every epoch during training scheduler.step() model.train() else: model.eval() for inputs, labels in train_dataloader: # move inputs and labels to the device the training is taking place on inputs = Variable(inputs, requires_grad=True).to(device) labels = Variable(labels).to(device) optimizer.zero_grad() if phase == 'train': outputs = model(inputs) probs = nn.Softmax(dim=1)(outputs) preds = torch.max(probs, 1)[1] loss = criterion(outputs, labels) if phase == 'train': loss.backward() optimizer.step() running_loss += loss.item() * inputs.size(0) running_corrects += torch.sum(preds == labels.data) epoch_loss = running_loss / len(train_dataloader.dataset) epoch_acc = running_corrects.double() / len( train_dataloader.dataset) if phase == 'train': writer.add_scalar('data/train_loss_epoch', epoch_loss, epoch) writer.add_scalar('data/train_acc_epoch', epoch_acc, epoch) print("[{}] Epoch: {}/{} Loss: {} Acc: {}".format( phase, epoch + 1, nEpochs, epoch_loss, epoch_acc)) stop_time = timeit.default_timer() print("Execution time: " + str(stop_time - start_time) + "\n") if epoch % save_epoch == (save_epoch - 1): torch.save( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'opt_dict': optimizer.state_dict(), }, os.path.join(save_dir, 'models', saveName + '_epoch-' + str(epoch) + '.pth.tar')) print("Save model at {}\n".format( os.path.join(save_dir, 'models', saveName + '_epoch-' + str(epoch) + '.pth.tar'))) writer.close()
def test_model(dataset=dataset, save_dir=save_dir, num_classes=num_classes, lr=lr, save_epoch=snapshot, useTest=useTest, test_interval=nTestInterval): """ Args: num_classes (int): Number of classes in the data num_epochs (int, optional): Number of epochs to train for. """ if modelName == 'C3DVA': model = C3DVA_model.C3DVA(num_classes=2, pretrained=True) train_params = model.parameters() elif modelName == 'CSN': model = CSN_model.csn26(num_classes=2, add_landmarks=ADD_LANDMARKS) train_params = model.parameters() elif modelName == 'R2Plus1D': model = R2Plus1D_model.R2Plus1DRegressor(num_classes=2, layer_sizes=LAYER_SIZES) train_params = [{'params': R2Plus1D_model.get_1x_lr_params(model), 'lr': lr}, {'params': R2Plus1D_model.get_10x_lr_params(model), 'lr': lr * 10}] elif modelName == 'Transformer': model = transformer_v3.Semi_Transformer(num_classes=40, seq_len=CLIP_LEN) train_params = model.parameters() elif modelName == 'Resnet3d': model = resnet3D.resnet3d18(num_classes=400, pretrained=None) train_params = model.parameters() checkpoint = torch.load(os.path.join(save_dir, 'models', saveName + '_epoch-' + str(resume_epoch - 1) + '.pth.tar'), map_location=lambda storage, loc: storage) # Load all tensors onto the CPU print("Initializing weights from: {}...".format( os.path.join(save_dir, 'models', saveName + '_epoch-' + str(resume_epoch - 1) + '.pth.tar'))) model.load_state_dict(checkpoint['state_dict'], strict=False) print('Total params: %.2fM' % (sum(p.numel() for p in model.parameters()) / 1000000.0)) model.to(device) print('Training model on {} dataset...'.format(dataset)) test_dataloader = DataLoader(VideoDataset(dataset=dataset, label_type="VA_Set", split=split, \ clip_len=CLIP_LEN, stride=FRAME_STRIDE, add_landmarks=ADD_LANDMARKS, triplet_label=True), batch_size=1, shuffle=False, num_workers=0,\ drop_last=True, pin_memory=False) test_size = len(test_dataloader.dataset) # testing model.eval() start_time = timeit.default_timer() criterion = SetVACriterion(num_classes=20, use_mse=False, is_test=True) criterion.to(device) val_cccs = [] aro_cccs = [] val_mses = [] aro_mses = [] with open('test_log.txt', 'a') as test_log: test_log.write('=======start=======\n') test_log.write('model path: ' + os.path.join(save_dir, 'models', saveName + '_epoch-' + str(resume_epoch - 1) + '.pth.tar') + '\n') for frames, label_path in (test_dataloader): print(frames[0][0].split('/')[-2]) frame_ids = [] # 图片对应的帧号 for each in frames: fid = int(each[0].split('/')[-1].split('.')[0]) frame_ids.append(fid) frame_count = len(frames) frame_root_dir = '/' + os.path.join(*frames[0][0].split('/')[1:-1]) # valence_label = np.empty((frame_count-CLIP_LEN*FRAME_STRIDE+1, 1), np.dtype('float32')) # arousal_label = np.empty((frame_count-CLIP_LEN*FRAME_STRIDE+1, 1), np.dtype('float32')) # preds_all = np.empty((frame_count-CLIP_LEN*FRAME_STRIDE+1, 2), np.dtype('float32')) valence_label = np.zeros((frame_ids[-1], 1), np.dtype('float32')) arousal_label = np.zeros((frame_ids[-1], 1), np.dtype('float32')) preds_all = np.zeros((frame_ids[-1], 2), np.dtype('float32')) '''读取视频的所有帧''' for center in tqdm( frame_ids ): if ADD_LANDMARKS: buffer = np.empty(( CLIP_LEN, RESIZE_HEIGHT, RESIZE_WIDTH, 4), np.dtype('float32')) else: buffer = np.empty(( CLIP_LEN, RESIZE_HEIGHT, RESIZE_WIDTH, 3), np.dtype('float32')) '''读取图像''' clip = list(range(center-int(CLIP_LEN/2)*FRAME_STRIDE, center+int(CLIP_LEN/2)*FRAME_STRIDE, FRAME_STRIDE)) for i, frame_id in enumerate(clip): frame_name = None if (frame_id < 1 or frame_id > frame_ids[-1]): frame = np.ones((RESIZE_HEIGHT, RESIZE_WIDTH, 3))*128 else: frame_name = os.path.join(frame_root_dir,str(frame_id).zfill(5)+'.jpg') #print(center, frames[i]) if os.path.exists(frame_name): frame = np.array(cv2.imread(frame_name)).astype(np.float32) else: frame = np.ones((RESIZE_HEIGHT, RESIZE_WIDTH, 3))*128 frame = cv2.resize(frame, (RESIZE_HEIGHT, RESIZE_WIDTH)) # cv2.imshow(frame_name, frame/256) # cv2.waitKey(1000) # cv2.destroyAllWindows() if ADD_LANDMARKS: # 读landmarks if frame_name != None: lm_name = frame_name.replace('image', 'landmarks') lm_img = np.array(cv2.imread(lm_name, cv2.IMREAD_GRAYSCALE)).astype(np.float32) lm_img = cv2.resize(lm_img, (RESIZE_HEIGHT, RESIZE_WIDTH)) lm_img = np.expand_dims(lm_img, 2) else: lm_img = np.zeros((RESIZE_HEIGHT, RESIZE_WIDTH, 1)) # 把landmark拼接上 buffer[i] = np.concatenate((frame, lm_img),2) else: buffer[i] = frame buffer = (buffer - 128) / 128 buffer = buffer.transpose((3, 0, 1, 2)) inputs = torch.from_numpy(buffer).unsqueeze(0) # if not split == 'Submission_Set': # avgface_path = frame_root_dir.replace('image', 'avgfaces') # avgface_path = os.path.join(avgface_path, 'avgface.png') # avgface = cv2.imread(avgface_path, cv2.IMREAD_GRAYSCALE) # avgface = np.stack((avgface,)*3, axis=-1) # avgface = (avgface - 128) / 128 # avgface = torch.from_numpy(avgface.transpose(2,0,1)).float().unsqueeze(0) if not split == 'Submission_Set': # 取label with open(label_path[0], 'rt') as f: lines = f.read().splitlines() line = lines[center] #去掉第一行 valence_label[center-1, 0] = float(line.split(',')[0]) arousal_label[center-1, 0] = float(line.split(',')[1]) with torch.no_grad(): # 方式1 outputs_va, outputs_expr, _, _ = model(inputs.cuda()) outputs_v = outputs_va[:,:20] outputs_a = outputs_va[:,20:40] _, valence_pred_reg, arousal_pred_reg = criterion(outputs_v, outputs_a, \ valence_label, arousal_label) # 方式2 # logits1, logits2, logits3, corr_t_div_C, corr_s_div_C, feat = model(inputs.cuda()) # outputs_v2, outputs_a2 = logits2[0][:,:20], logits2[0][:,20:40] # _, valence_pred_reg2, arousal_pred_reg2 = criterion(outputs_v2, outputs_a2, \ # valence_label, arousal_label) # valence_pred_reg = valence_pred_reg2 # arousal_pred_reg = arousal_pred_reg2 # visualize_feature(feat, center) # visualize_attention(inputs, corr_t_div_C, corr_s_div_C) pred_concat = np.concatenate((np.expand_dims(valence_pred_reg,0), np.expand_dims(arousal_pred_reg,0)), 1) preds_all[center-1, :] = pred_concat # 将无效label去掉 if valence_label[center-1, 0] == -5: preds_all[center-1, :] = -5 preds_all_valid = [] valence_label_valid = [] arousal_label_valid = [] for each in preds_all: if not each[0] == -5: preds_all_valid.append(each) for each in valence_label: if not each[0] == -5: valence_label_valid.append(each) for each in arousal_label: if not each[0] == -5: arousal_label_valid.append(each) print(len(preds_all_valid), len(valence_label_valid), len(arousal_label_valid)) preds_all = torch.from_numpy(np.array(preds_all_valid)) valence_label = torch.from_numpy(np.array(valence_label_valid)) arousal_label = torch.from_numpy(np.array(arousal_label_valid)) val_cc2 = concord_cc2(preds_all[:,0] , valence_label[:,0]) aro_cc2 = concord_cc2(preds_all[:,1], arousal_label[:,0]) val_mse = torch.nn.MSELoss()(preds_all[:,0], valence_label[:,0]) aro_mse = torch.nn.MSELoss()(preds_all[:,1], arousal_label[:,0]) if not split == 'Submission_Set': if not os.path.exists(os.path.join(save_dir, 'res')): os.mkdir(os.path.join(save_dir, 'res')) with open(os.path.join(save_dir, 'res', 'res_' + frames[0][0].split('/')[-2] + '.txt'), 'w') as res: for i, each in enumerate(preds_all): res.write(str(preds_all.numpy()[i,0]) + ", " + str(valence_label.numpy()[i,0]) + ', ' \ + str(preds_all.numpy()[i,1]) + ", " + str(arousal_label.numpy()[i,0]) + '\n') if split == 'Submission_Set': if not os.path.exists(os.path.join(save_dir, 'submission')): os.mkdir(os.path.join(save_dir, 'submission')) with open(os.path.join(save_dir, 'submission', frames[0][0].split('/')[-2] + '.txt'), 'w') as res: res.write("valence,arousal\n") for i, each in enumerate(preds_all): res.write(str(preds_all.numpy()[i,0]) + "," + str(preds_all.numpy()[i,1]) + '\n') print("Val CCC: {:.4f} Aro CCC: {:.4f} Val MSE: {:.4f} Aro MSE: {:.4f}".format(val_cc2, aro_cc2, val_mse, aro_mse)) with open('test_log.txt', 'a') as test_log: log = frames[0][0].split('/')[-2] + ',' + str(float(val_cc2)) + ',' + str(float(aro_cc2)) + ',' + \ str(float(val_mse)) + ',' + str(float(aro_mse)) test_log.write(log + '\n') val_cccs.append(val_cc2) aro_cccs.append(aro_cc2) val_mses.append(val_mse) aro_mses.append(aro_mse) print("[test] Val CCC: {:.4f} Aro CCC: {:.4f} Val MSE: {:.4f} Aro MSE: {:.4f}".format(np.mean(val_cccs), \ np.mean(aro_cccs), np.mean(val_mses), np.mean(aro_mses))) with open('test_log.txt', 'a') as test_log: test_log.write(("[test] Val CCC: {:.4f} Aro CCC: {:.4f} Val MSE: {:.4f} Aro MSE: {:.4f}\n".format(np.mean(val_cccs), \ np.mean(aro_cccs), np.mean(val_mses), np.mean(aro_mses)))) stop_time = timeit.default_timer() print("Execution time: " + str(stop_time - start_time) + "\n")
def main(): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print("Device being used:", device) with open('./dataloaders/ferryboat_labels.txt', 'r') as f: class_names = f.readlines() f.close() # init model num_classes = 4 modelName = 'STP' if modelName == 'I3D': model = I3D_model.InceptionI3d(num_classes=num_classes, in_channels=3) size = (240, 284) crop_size = 224 elif modelName == 'R2Plus1D': model = R2Plus1D_model.R2Plus1DClassifier(num_classes=num_classes, layer_sizes=(3, 4, 6, 3)) size = (171, 128) crop_size = 112 elif modelName == 'C3D': model = C3D_model.C3D(num_classes=num_classes, pretrained=False) size = (171, 128) crop_size = 112 elif modelName == 'P3D': model = p3d_model.P3D63(num_classes=num_classes) size = (176, 210) crop_size = 160 elif modelName == 'R3D': model = R3D_model.R3DClassifier(num_classes=num_classes, layer_sizes=(3, 4, 6, 3)) size = (171, 128) crop_size = 112 elif modelName == 'STP': model = STP_model.STP(num_classes=num_classes, in_channels=3) size = (240, 284) crop_size = 224 checkpoint = torch.load('./models/I3D-ferryboat4_epoch-199.pth.tar', map_location=lambda storage, loc: storage) model_dict = model.state_dict() checkpoint_load = { k: v for k, v in (checkpoint['state_dict']).items() if k in model_dict } model_dict.update(checkpoint_load) model.load_state_dict(model_dict) model.to(device) model.eval() for root, dirs, files in os.walk('./VAR/ferryboat/test/'): l_names = locals() l_names['Inshore'] = 0 l_names['Neg'] = 0 l_names['Offshore'] = 0 l_names['Traffic'] = 0 l_names['Inshore1'] = 0 l_names['Neg1'] = 0 l_names['Offshore1'] = 0 l_names['Traffic1'] = 0 if len(dirs) > 4: video_name = dirs for name in video_name: class_name = name.split('_')[1] video = './ferryboat/' + class_name + "/" + name + '.avi' clip = [] cap = cv2.VideoCapture(video) retaining = True while retaining: retaining, frame = cap.read() if not retaining and frame is None: continue tmp_ = center_crop(cv2.resize(frame, size), crop_size) tmp = tmp_ clip.append(tmp) if len(clip) == 16: inputs = np.array(clip).astype(np.float32) inputs = np.expand_dims(inputs, axis=0) inputs = np.transpose(inputs, (0, 4, 1, 2, 3)) inputs = torch.from_numpy(inputs) inputs = torch.autograd.Variable( inputs, requires_grad=False).to(device) with torch.no_grad(): if modelName == 'STP': outputs, index = model.forward(inputs) else: outputs = model.forward(inputs) iii = index.cpu().data probs = torch.nn.Softmax(dim=1)(outputs) label = torch.max(probs, 1)[1].detach().cpu().numpy()[0] if modelName == 'I3D': label = int(label[0]) pre = class_names[label].split(' ')[1][:-1] l_names[str(class_name)] = l_names[str(class_name)] + 1 if str(pre) == str(class_name): l_names[str(class_name) + '1'] = l_names[str(class_name) + '1'] + 1 elif str(class_name) == 'Neg': l_names[str(class_name) + '1'] = l_names[str(class_name) + '1'] + 1 elif str(class_name) == 'Traffic': l_names[str(class_name) + '1'] = l_names[str(class_name) + '1'] + 1 clip.pop(0) cv2.waitKey(30) cap.release() cv2.destroyAllWindows() print( str(class_name) + '_acc:' + str( int(l_names[str(class_name) + '1']) / int(l_names[str(class_name)])))