def __init__(self, train=True, classes=range(10), num_classes=10, samples=10): self.train = train self.classes = classes if self.train == True: split = 'train' else: split = 'test' self.dataset = VideoDataset(dataset='ucf101', split=split, clip_len=16) if self.train: train_data = [] train_labels = [] num_samples = samples * np.ones((num_classes)) for i in range(len(self.dataset)): images, labels = self.dataset[i] if int(labels) in self.classes and i % 5 == 0: if num_samples[int(labels)] > 0: print(int(labels)) num_samples[int(labels)] = num_samples[int(labels)] - 1 train_data.append(images) train_labels.append(int(labels)) self.train_data = train_data self.train_labels = train_labels else: test_data = [] test_labels = [] for i in range(len(self.dataset)): images, labels = self.dataset[i] if int(labels) in self.classes: test_data.append(images) test_labels.append(int(labels)) self.test_data = test_data self.test_labels = test_labels
def __init__(self, train=True, classes=range(10)): self.train = train self.classes = classes if self.train == True: split = 'train' else: split = 'test' self.dataset = VideoDataset(dataset='ucf101', split=split, clip_len=16, preprocess=False) print('Dataset Loaded') if self.train: train_data = [] train_labels = [] for i in range(len(self.dataset)): images, labels = self.dataset[i] print(i) if int(labels) in self.classes: train_data.append(images) train_labels.append(int(labels)) self.train_data = train_data self.train_labels = train_labels else: test_data = [] test_labels = [] for i in range(len(self.dataset)): images, labels = self.dataset[i] if int(labels) in self.classes: test_data.append(images) test_labels.append(int(labels)) self.test_data = test_data self.test_labels = test_labels
def train_model(dataset=dataset, save_dir=save_dir, num_classes=num_classes, lr=lr, num_epochs=nEpochs, save_epoch=snapshot, useTest=useTest, test_interval=nTestInterval): """ Args: num_classes (int): Number of classes in the data num_epochs (int, optional): Number of epochs to train for. """ model = C3D_model.C3D(num_classes=num_classes, pretrained=False) # train_params = [{'params': C3D_model.get_1x_lr_params(model), 'lr': lr}, # {'params': C3D_model.get_10x_lr_params(model), 'lr': lr * 10}] criterion = nn.CrossEntropyLoss( ) # standard crossentropy loss for classification optimizer = optim.SGD(list(model.parameters()), lr=lr, momentum=0.9, weight_decay=5e-4) scheduler = optim.lr_scheduler.StepLR( optimizer, step_size=10, gamma=0.1) # the scheduler divides the lr by 10 every 10 epochs if resume_epoch == 0: print("Training {} from scratch...".format(modelName)) else: checkpoint = torch.load(os.path.join( save_dir, 'models', saveName + '_epoch-' + str(resume_epoch - 1) + '.pth.tar'), map_location=lambda storage, loc: storage ) # Load all tensors onto the CPU print("Initializing weights from: {}...".format( os.path.join( save_dir, 'models', saveName + '_epoch-' + str(resume_epoch - 1) + '.pth.tar'))) model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['opt_dict']) print('Total params: %.2fM' % (sum(p.numel() for p in model.parameters()) / 1000000.0)) model.to(device) criterion.to(device) log_dir = os.path.join(save_dir) writer = SummaryWriter(log_dir=log_dir) print('Training model on {} dataset...'.format(dataset)) train_dataloader = DataLoader(VideoDataset(dataset=dataset, split='train', clip_len=16), batch_size=20, shuffle=True, num_workers=4) val_dataloader = DataLoader(VideoDataset(dataset=dataset, split='val', clip_len=16), batch_size=20, num_workers=4) test_dataloader = DataLoader(VideoDataset(dataset=dataset, split='test', clip_len=16), batch_size=20, num_workers=4) trainval_loaders = {'train': train_dataloader, 'val': val_dataloader} trainval_sizes = { x: len(trainval_loaders[x].dataset) for x in ['train', 'val'] } test_size = len(test_dataloader.dataset) lab_list = [] pred_list = [] for epoch in range(resume_epoch, num_epochs): # each epoch has a training and validation step for phase in ['train', 'val']: start_time = timeit.default_timer() # reset the running loss and corrects running_loss = 0.0 running_corrects = 0.0 # set model to train() or eval() mode depending on whether it is trained # or being validated. Primarily affects layers such as BatchNorm or Dropout. if phase == 'train': # scheduler.step() is to be called once every epoch during training scheduler.step() model.train() else: model.eval() for inputs, labels in (trainval_loaders[phase]): # move inputs and labels to the device the training is taking place on inputs = Variable(inputs, requires_grad=True).to(device) labels = Variable(labels).to(device) optimizer.zero_grad() if phase == 'train': inputs_rev = [ inputs[:, :, 15 - i, :, :] for i in range(16) ] inputs_rev = torch.stack(inputs_rev) inputs_rev = inputs_rev.permute(1, 2, 0, 3, 4) outputs = model(inputs_rev, inputs) else: with torch.no_grad(): # outputs = model(inputs,inputs) inputs_rev = [ inputs[:, :, 15 - i, :, :] for i in range(16) ] inputs_rev = torch.stack(inputs_rev) inputs_rev = inputs_rev.permute(1, 2, 0, 3, 4) outputs = model(inputs, inputs) probs = nn.Softmax(dim=1)(outputs) preds = torch.max(probs, 1)[1] loss = criterion(outputs, labels) lab_list += labels.cpu().numpy().tolist() pred_list += preds.cpu().numpy().tolist() if phase == 'train': loss.backward() optimizer.step() running_loss += loss.item() * inputs.size(0) running_corrects += torch.sum(preds == labels.data) conf_mat = confusion_matrix(lab_list, pred_list) np.save( "{}.npy".format( os.path.join(save_dir, saveName + '_epoch-' + str(epoch)) + '_' + phase), conf_mat) fig = plt.figure() plt.imshow(conf_mat) writer.add_figure('conf_mat_' + phase, fig, epoch) epoch_loss = running_loss / trainval_sizes[phase] epoch_acc = running_corrects.double() / trainval_sizes[phase] epoch_loss = running_loss / trainval_sizes[phase] epoch_acc = running_corrects.double() / trainval_sizes[phase] if phase == 'train': writer.add_scalar('data/train_loss_epoch', epoch_loss, epoch) writer.add_scalar('data/train_acc_epoch', epoch_acc, epoch) else: writer.add_scalar('data/val_loss_epoch', epoch_loss, epoch) writer.add_scalar('data/val_acc_epoch', epoch_acc, epoch) print("[{}] Epoch: {}/{} Loss: {} Acc: {}".format( phase, epoch + 1, nEpochs, epoch_loss, epoch_acc)) stop_time = timeit.default_timer() print("Execution time: " + str(stop_time - start_time) + "\n") if epoch % save_epoch == (save_epoch - 1): torch.save( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'opt_dict': optimizer.state_dict(), }, os.path.join(save_dir, saveName + '_epoch-' + str(epoch) + '.pth.tar')) print("Save model at {}\n".format( os.path.join(save_dir, 'models', saveName + '_epoch-' + str(epoch) + '.pth.tar'))) if useTest and epoch % test_interval == (test_interval - 1): model.eval() start_time = timeit.default_timer() running_loss = 0.0 running_corrects = 0.0 for inputs, labels in (test_dataloader): inputs = inputs.to(device) labels = labels.to(device) with torch.no_grad(): outputs = model(inputs, inputs) probs = nn.Softmax(dim=1)(outputs) preds = torch.max(probs, 1)[1] loss = criterion(outputs, labels) running_loss += loss.item() * inputs.size(0) running_corrects += torch.sum(preds == labels.data) epoch_loss = running_loss / test_size epoch_acc = running_corrects.double() / test_size writer.add_scalar('data/test_loss_epoch', epoch_loss, epoch) writer.add_scalar('data/test_acc_epoch', epoch_acc, epoch) print("[test] Epoch: {}/{} Loss: {} Acc: {}".format( epoch + 1, nEpochs, epoch_loss, epoch_acc)) stop_time = timeit.default_timer() print("Execution time: " + str(stop_time - start_time) + "\n") writer.close()
) def forward(self, x): batch_size, seq_length, c, h, w = x.shape # pdb.set_trace() x = x.view(batch_size * seq_length, c, h, w) x = self.feature_extractor(x) x = x.view(batch_size * seq_length, -1) x = self.final(x) x = x.view(batch_size, seq_length, -1) return x if __name__ == '__main__': train_dataloader = DataLoader(VideoDataset(dataset='ucf101', split='train', clip_len=16), batch_size=20, shuffle=True, num_workers=4) model = ConvLSTM( num_classes=101, latent_dim=512, lstm_layers=1, hidden_dim=1024, bidirectional=True, attention=True, ) model.to('cuda') for inputs, labels in (train_dataloader): inputs = inputs.permute(0, 2, 1, 3, 4)
super(Modified_Classifier, self).__init__() self.extractor = nn.Sequential(nn.Linear(8192, 1024), nn.ReLU(), nn.BatchNorm1d(1024, momentum=0.01), nn.Linear(1024, 512), nn.ReLU(), nn.BatchNorm1d(512, momentum = 0.01)) self.classifier_out = nn.Linear(512, num_classes, bias = bias) def forward(self, x): x = x.view(x.size(0), -1) x = self.extractor(x) return (x) ################################################################ if __name__ == '__main__': os.environ['CUDA_VISIBLE_DEVICES'] = "3" train_dataloader = DataLoader(VideoDataset(dataset='ucf101', split='train',clip_len=16), batch_size=20, shuffle=True, num_workers=4) model = ConvLSTM( num_classes=101, latent_dim=512, lstm_layers=1, hidden_dim=1024, bidirectional=True, attention=True, ) model.to('cuda') classifier_model = Classifier(10) classifier_model.to('cuda') for inputs, labels in (train_dataloader): inputs = inputs.permute(0,2,1,3,4) image_sequences = Variable(inputs.to("cuda"), requires_grad=True)