def run(init_lr=0.01, root='', split_file='data/annotations/charades.json', batch_size=8, save_dir='', stride=4, num_span_frames=32, num_epochs=200): writer = SummaryWriter() # tensorboard logging # setup dataset train_transforms = transforms.Compose( [transforms.Resize((224, 224)), transforms.ToTensor()]) test_transforms = transforms.Compose( [transforms.Resize((224, 224)), transforms.ToTensor()]) print('Getting train dataset...') train_dataset = Dataset(split_file, 'training', root, train_transforms, stride, num_span_frames, is_sife=False) train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True) print('Getting validation dataset...') val_dataset = Dataset(split_file, 'testing', root, test_transforms, stride, num_span_frames, is_sife=False) val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True) dataloaders = {'train': train_dataloader, 'val': val_dataloader} print('Loading model...') # setup the model i3d = InceptionI3d(400, in_channels=3) if args.checkpoint_path: i3d.replace_logits(157) state_dict = torch.load(args.checkpoint_path)['model_state_dict'] checkpoint = OrderedDict() for k, v in state_dict.items(): name = k[7:] # remove 'module' checkpoint[name] = v i3d.load_state_dict(checkpoint) else: i3d.load_state_dict(torch.load('models/rgb_imagenet.pt')) i3d.replace_logits(157) i3d.cuda() device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') if torch.cuda.device_count() > 1: print('Using {} GPUs'.format(torch.cuda.device_count())) i3d = nn.DataParallel(i3d) i3d.to(device) print('Loaded model.') optimizer = optim.Adam(i3d.parameters(), lr=init_lr) #lr_sched = optim.lr_scheduler.MultiStepLR(optimizer, [30], gamma=0.1) steps = 0 if not args.checkpoint_path else torch.load( args.checkpoint_path)['steps'] start_epoch = 0 if not args.checkpoint_path else torch.load( args.checkpoint_path)['epoch'] # TRAIN for epoch in range(start_epoch, num_epochs): print('-' * 50) print('EPOCH {}/{}'.format(epoch, num_epochs)) print('-' * 50) # Each epoch has a training and validation phase for phase in ['train', 'val']: if phase == 'train': i3d.train(True) print('-' * 10, 'TRAINING', '-' * 10) else: i3d.train(False) # Set model to evaluate mode print('-' * 10, 'VALIDATION', '-' * 10) # Iterate over data. all_preds = [] all_labels = [] print('Entering data loading...') for i, data in enumerate(dataloaders[phase]): # get the inputs inputs, labels, vid = data t = inputs.shape[2] inputs = inputs.cuda() labels = labels.cuda() if phase == 'train': per_frame_logits = i3d(inputs) else: with torch.no_grad(): per_frame_logits = i3d(inputs) # upsample to input size per_frame_logits = F.interpolate( per_frame_logits, t, mode='linear') # B x Classes x T max_frame_logits = torch.max(per_frame_logits, dim=2)[0] # B x Classes labels = torch.max(labels, dim=2)[0] # B x Classes if phase == 'train': loss = F.binary_cross_entropy_with_logits( max_frame_logits, labels) writer.add_scalar('loss/train', loss, steps) optimizer.zero_grad() loss.backward() optimizer.step() if steps % 10 == 0: print('Step {} {} loss: {:.4f}'.format( steps, phase, loss)) steps += 1 # metrics for validation pred = (torch.sigmoid(max_frame_logits) >= 0.5).float() # predicted labels for this batch (B x C) if i == 0: all_preds = np.array(pred.tolist()) all_labels = np.array(labels.tolist()) else: all_preds = np.append(all_preds, pred.tolist(), axis=0) all_labels = np.append(all_labels, labels.tolist(), axis=0) # Eval all_APs = [ metrics.average_precision_score(y_true=all_labels[:, j], y_score=all_preds[:, j]) for j in range(157) ] mAP = np.nanmean(all_APs) if phase == 'train': writer.add_scalar('mAP/train', mAP, epoch) print('-' * 50) print('{} mAP: {:.4f}'.format(phase, mAP)) print('-' * 50) save_checkpoint(i3d, optimizer, loss, save_dir, epoch, steps) # save checkpoint after epoch! else: writer.add_scalar('mAP/val', mAP, epoch) print('{} mAP: {:.4f}'.format(phase, mAP)) #lr_sched.step() # step after epoch writer.close()
def run(init_lr=0.001, max_steps=20, mode='rgb', root='/proxy/', train_split='./scott.txt', test_split="./scottt.txt", batch_size=8 * 5, save_model='nope'): # This table contains the distance between two possible ordering sequences # It is therefore a 120*120 table distance_dict = np.load("distance_dict.npy") distance_dict = torch.from_numpy(distance_dict).float().cuda() root = "./proxy/" dataset = Dataset( train_split, root, mode, ) dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=8, pin_memory=True) val_dataset = Dataset(test_split, root, mode) val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=8, pin_memory=True) dataloaders = {'train': dataloader, 'val': val_dataloader} datasets = {'train': dataset, 'val': val_dataset} # setup the model if mode == 'flow': i3d = InceptionI3d(400, in_channels=2) #Imagenet Pretraining i3d.load_state_dict(torch.load('models/flow_imagenet.pt')) else: #You can modify the number of outputs in the file Siamese_I3D.py i3d = ProxyNetwork() i3d.cuda() i3d = nn.DataParallel(i3d) lr = init_lr optimizer = optim.SGD(i3d.parameters(), lr=lr, momentum=0.9, weight_decay=0.0000001) lr_sched = optim.lr_scheduler.MultiStepLR(optimizer, [300, 1000]) num_steps_per_update = 1 # accum gradient steps = 0 # train it while steps < max_steps: #for epoch in range(num_epochs): print('Step {}/{}'.format(steps, max_steps)) t1 = time.time() processed_elements = 0 # Each epoch has a training and validation phase for phase in ['train', 'val']: if phase == 'train': i3d.train(True) else: i3d.train(False) # Set model to evaluate mode tot_loss = 0.0 tot_loc_loss = 0.0 tot_cls_loss = 0.0 optimizer.zero_grad() # Iterate over data. for data in dataloaders[phase]: processed_elements += 40 # get the inputs inputs, labels = data # wrap them in Variable inputs = Variable(inputs.cuda()) t = inputs.size(2) labels = Variable(labels.cuda()) #Custom loss implementation # Depending on the "real" labels per_frame_logits = i3d(inputs) for i in range(labels.shape[0]): #print(i) per_frame_logits[i] *= distance_dict[labels[i][0][0]] # upsample to input size #per_frame_logits = F.upsample(per_frame_logits, t, mode='linear') per_frame_logits = per_frame_logits.squeeze() labels = labels.squeeze() labels = labels.type(torch.LongTensor) labels = labels.cuda() # compute localization loss loc_loss = F.cross_entropy(per_frame_logits, labels) tot_loc_loss += loc_loss.item() #Class loss loss = loc_loss / num_steps_per_update tot_loss += loss.item() loss.backward() # 10800 is the number of elements in the training set len_training_set = 10800 print("processed elements : " + str(processed_elements) + " / " + str(len_training_set)) print(time.time() - t1) if phase == 'train': steps += 1 optimizer.step() optimizer.zero_grad() lr_sched.step() if steps % 1 == 0: print( '{} Train Loc Loss: {:.4f} Cls Loss: {:.4f} Tot Loss: {:.4f}' .format(phase, tot_loc_loss / (10 * num_steps_per_update), tot_cls_loss / (10 * num_steps_per_update), tot_loss / 10)) # save model torch.save(i3d, "customloss" + str(steps) + '.pt') tot_loss = tot_loc_loss = tot_cls_loss = 0. if phase == 'val': print( '{} Val Loc Loss: {:.4f} Cls Loss: {:.4f} Tot Loss: {:.4f}' .format(phase, tot_loc_loss, tot_cls_loss, (tot_loss * num_steps_per_update)))
def run(init_lr=0.1, max_step=64e3, mode='rgb', root='/ssd/Charades_v1_rgb', train_split='charades/charades.json', batch_size=8*5, save_model=''): # setup dataset train_transforms = transforms.Compose([videotransforms.RandomCrop(224), videotransforms.RandomHorisontalFlip(), ]) test_transforms = transforms.Compose([videotransforms.RandomCrop(224)]) dataset = Dataset(train_split, 'training', root, mode, train_transforms) dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=36, pin_memory=True) val_dataset = Dataset(train_split, 'testing', root, mode, test_transforms) val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=True, num_workers=36, pin_memory=True) dataloaders = {'train': dataloader, 'val': val_dataloader} datasets = {'train': dataset, 'val': val_dataset} # setup the model if mode == 'flow': i3d = InceptionI3d(400, in_channels=2) i3d.load_state_dict(torch.load('models/flow_imagenet.pt')) else: i3d = InceptionI3d(400, in_channels=3) i3d.load_state_dict(torch.load('models/rgb_imagenet.pt')) i3d.replace_logits(157) #i3d.load_state_dict(torch.load('/ssd/models/000920.pt')) i3d.cuda() i3d = nn.DataParallel(i3d) lr = init_lr optimizer = optim.SGD(i3d.parameters(), lr=lr, momentum=0.9, weight_decay=0.0000001) lr_sched = optim.lr_scheduler.MultiStepLR(optimizer, [300, 1000]) num_steps_per_update = 4 # accum gradient steps = 0 # train it while steps < max_steps:#for epoch in range(num_epochs): print('Step {}/{}'.format(steps, max_steps)) print('-' * 10) # Each epoch has a training and validation phase for phase in ['train', 'val']: if phase == 'train': i3d.train(True) else: i3d.train(False) # Set model to evaluate mode tot_loss = 0.0 tot_loc_loss = 0.0 tot_cls_loss = 0.0 num_iter = 0 optimizer.zero_grad() # Iterate over data. for data in dataloaders[phase]: num_iter += 1 # get the inputs inputs, labels = data # wrap them in Variable inputs = Variable(inputs.cuda()) t = inputs.size(2) labels = Variable(labels.cuda()) per_frame_logits = i3d(inputs) # upsample to input size per_frame_logits = F.upsample(per_frame_logits, t, mode='linear') # compute localization loss loc_loss = F.binary_cross_entropy_with_logits(per_frame_logits, labels) tot_loc_loss += loc_loss.data[0] # compute classification loss (with max-pooling along time B x C x T) cls_loss = F.binary_cross_entropy_with_logits(torch.max(per_frame_logits, dim=2)[0], torch.max(labels, dim=2)[0]) tot_cls_loss += cls_loss.data[0] loss = (0.5*loc_loss + 0.5*cls_loss)/num_steps_per_update tot_loss += loss.data[0] loss.backward() if num_iter == num_steps_per_update and phase == 'train': steps += 1 num_iter = 0 optimizer.step() optimizer.zero_grad() lr_sched.step() if steps % 10 == 0: print '{} Loc Loss: {:.4f} Cls Loss: {:.4f} Tot Loss: {:.4f}'.format(phase, tot_loc_loss/(10*num_steps_per_update), tot_cls_loss/(10*num_steps_per_update), tot_loss/10) # save model torch.save(i3d.module.state_dict(), save_model+str(steps).zfill(6)+'.pt') tot_loss = tot_loc_loss = tot_cls_loss = 0. if phase == 'val': print '{} Loc Loss: {:.4f} Cls Loss: {:.4f} Tot Loss: {:.4f}'.format(phase, tot_loc_loss/num_iter, tot_cls_loss/num_iter, (tot_loss*num_steps_per_update)/num_iter)
def run(init_lr=0.1, max_steps=64e3, mode='rgb', root='../../SSBD/ssbd_clip_segment/data/', train_split='../../SSBD/Annotations/annotations_charades.json', batch_size=1, save_model=''): # setup dataset train_transforms = transforms.Compose([ videotransforms.RandomCrop(224), videotransforms.RandomHorizontalFlip(), ]) test_transforms = transforms.Compose([videotransforms.CenterCrop(224)]) dataset = Dataset(train_split, 'training', root, mode, train_transforms) dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True) val_dataset = Dataset(train_split, 'testing', root, mode, test_transforms) val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True) dataloaders = {'train': dataloader, 'val': val_dataloader} datasets = {'train': dataset, 'val': val_dataset} # dataloaders = {'train': dataloader} # datasets = {'train': dataset} # setup the model xdc = torch.hub.load('HumamAlwassel/XDC', 'xdc_video_encoder', pretraining='r2plus1d_18_xdc_ig65m_kinetics', num_classes=3) # if mode == 'flow': # i3d = InceptionI3d(400, in_channels=2) # i3d.load_state_dict(torch.load('models/flow_imagenet.pt')) # else: # i3d = InceptionI3d(400, in_channels=3) # i3d.load_state_dict(torch.load('models/rgb_imagenet.pt')) # i3d.replace_logits(8) # #i3d.load_state_dict(torch.load('/ssd/models/000920.pt')) # i3d.cuda() # i3d = nn.DataParallel(i3d) xdc.cuda() xdc = nn.DataParallel(xdc).cuda() for name, param in xdc.named_parameters(): if 'fc' not in name and '4.1' not in name: param.requires_grad = False lr = init_lr optimizer = optim.SGD(xdc.parameters(), lr=lr, momentum=0.9, weight_decay=0.0000001) lr_sched = optim.lr_scheduler.MultiStepLR(optimizer, [300, 1000]) num_steps_per_update = 4 # accum gradient steps = 0 best_val = 0 # new_flag = 0 # train it while steps < max_steps: #for epoch in range(num_epochs): print('Step {}/{}'.format(steps, max_steps)) print('-' * 10) # new_state_dict = OrderedDict() # state_dict = torch.load(save_model+'.pt') # for k, v in state_dict.items(): # name = "module."+k # add module. # new_state_dict[name] = v # xdc.load_state_dict(new_state_dict) # new_flag = 0 # Each epoch has a training and validation phase for phase in ['train', 'val']: if phase == 'train': xdc.train(True) else: xdc.train(False) # Set model to evaluate mode tot_loss = 0.0 # tot_loc_loss = 0.0 # tot_cls_loss = 0.0 num_iter = 0 total = 0 n = 0 optimizer.zero_grad() # Iterate over data. for data in dataloaders[phase]: num_iter += 1 # get the inputs inputs, labels = data # wrap them in Variable inputs = Variable(inputs.cuda()) t = inputs.size(2) labels = Variable(labels.cuda()) per_frame_logits = xdc(inputs) # print(per_frame_logits.shape) # print(labels.shape) # upsample to input size # per_frame_logits = F.upsample(per_frame_logits, t, mode='linear') # compute localization loss # loc_loss = F.binary_cross_entropy_with_logits(per_frame_logits, labels) # tot_loc_loss += loc_loss.data.item() # compute classification loss (with max-pooling along time B x C x T) # cls_loss = F.binary_cross_entropy_with_logits(torch.max(per_frame_logits, dim=2)[0], torch.max(labels, dim=2)[0]) # print(torch.max(per_frame_logits, dim=2)[0]) # print(torch.max(labels, dim=2)[0]) correct = per_frame_logits.argmax(1).eq(labels.argmax(1)) total += correct.float().sum().item() n += batch_size # tot_cls_loss += cls_loss.data.item() loss = F.binary_cross_entropy_with_logits( per_frame_logits, labels) / num_steps_per_update tot_loss += loss.data.item() loss.backward() if num_iter == num_steps_per_update and phase == 'train': steps += 1 num_iter = 0 optimizer.step() optimizer.zero_grad() lr_sched.step() if steps % 10 == 0: print('{} Tot Loss: {:.4f} Accuracy: {:.4f}'.format( phase, tot_loss / 10, total / n)) # save model # if(steps % 10000 == 0): # torch.save(xdc.module.state_dict(), save_model+str(steps).zfill(6)+'.pt') # tot_loss = tot_loc_loss = tot_cls_loss = 0. tot_loss = 0 total = 0 n = 0 if phase == 'val': print('{} Tot Loss: {:.4f} Accuracy: {:.4f}'.format( phase, (tot_loss * num_steps_per_update) / num_iter, total / n)) if (total / n > best_val): best_val = total / n torch.save(xdc.module.state_dict(), save_model + '.pt')
def run(mode='rgb', root='/home/dataset/Charades_v1_rgb', train_split='./Charades/charades.json', batch_size=8): # create a txt file to save results import time cur_time = time.strftime('%Y-%m-%d_%H%M%S', time.localtime(time.time())) res_file = cur_time + '_charades_scores.txt' os.mknod(res_file) # setup dataset test_transforms = transforms.Compose([videotransforms.CenterCrop(224)]) val_dataset = Dataset(train_split, 'testing', root, mode, test_transforms) val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True) print("Loading model......") # setup the model if mode == 'flow': i3d = InceptionI3d(400, in_channels=2) i3d.load_state_dict(torch.load('models/flow_imagenet.pt')) else: i3d = InceptionI3d(157, in_channels=3) i3d.load_state_dict(torch.load('models/rgb_charades.pt')) # i3d.replace_logits(157) # i3d.load_state_dict(torch.load('/ssd/models/000920.pt')) i3d.cuda() i3d = nn.DataParallel(i3d) num_iter = 1 # Each epoch has a training and validation phase i3d.eval() tot_loss = 0.0 tot_loc_loss = 0.0 tot_cls_loss = 0.0 print("Start testing......") print('-' * 20) # Iterate over data. for data in val_dataloader: # get the inputs vid, inputs, labels = data # wrap them in Variable inputs = Variable(inputs.cuda()) t = inputs.size(2) labels = Variable(labels.cuda()) per_frame_logits = i3d(inputs) # upsample to input size per_frame_logits = F.interpolate(per_frame_logits, t, mode='linear', align_corners=True) # compute localization loss loc_loss = F.binary_cross_entropy_with_logits(per_frame_logits, labels) tot_loc_loss += loc_loss.data # compute classification loss (with max-pooling along time B x C x T) per_video_logits = torch.max(per_frame_logits, dim=2)[0] wirte2txt(res_file, vid, per_video_logits) # print('{:.5f}\t{}'.format(F.sigmoid(),torch.max(labels, dim=2)[0][i][j]) ) # print(per_frame_logits.size(), torch.max(per_frame_logits, dim=2)[0].size()) cls_loss = F.binary_cross_entropy_with_logits( torch.max(per_frame_logits, dim=2)[0], torch.max(labels, dim=2)[0]) tot_cls_loss += cls_loss.data loss = (0.5 * loc_loss + 0.5 * cls_loss) tot_loss += loss.data if num_iter % 10 == 0: print( 'Test {}: Loc Loss: {:.4f} Cls Loss: {:.4f} Tot Loss: {:.4f}'. format(num_iter, tot_loc_loss / 10, tot_cls_loss / 10, tot_loss / 10)) # save model # torch.save(i3d.module.state_dict(), save_model+str(steps).zfill(6)+'.pt') tot_loss = tot_loc_loss = tot_cls_loss = 0. num_iter += 1 print('Loc Loss: {:.4f} Cls Loss: {:.4f} Tot Loss: {:.4f}'.format( tot_loc_loss / (num_iter % 10), tot_cls_loss / (num_iter % 10), tot_loss / (num_iter % 10)))
def train(init_lr, max_steps, mode, root_folder, train_split, batch_size, load_model, save_model): train_transforms = transforms.Compose([videotransforms.RandomCrop(224), videotransforms.RandomHorizontalFlip()]) test_transforms = transforms.Compose([videotransforms.CenterCrop(224)]) dataset = Dataset(train_split, 'training', root_folder, mode, train_transforms) dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=36, pin_memory=True) val_dataset = Dataset(train_split, 'testing', root_folder, mode, test_transforms) val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=True, num_workers=36, pin_memory=True) dataloaders = {'train': dataloader, 'val': val_dataloader} i3d = InceptionI3d(400, in_channels=2 if mode == 'flow' else 3) # setup the model i3d.load_state_dict(torch.load('models/{}_imagenet.pt'.format(mode))) i3d.replace_logits(157) if load_model: i3d.load_state_dict(torch.load(load_model)) i3d.cuda() i3d = nn.DataParallel(i3d) lr = init_lr optimizer = optim.SGD(i3d.parameters(), lr=lr, momentum=0.9, weight_decay=0.0000001) lr_sched = optim.lr_scheduler.MultiStepLR(optimizer, [300, 1000]) steps = 0 num_steps_per_update = 4 # accum gradient while steps < max_steps: # train it print('Step {:6d} / {}'.format(steps, max_steps)) print('-' * 10) for phase in ['train', 'val']: # each epoch has a training and validation phase i3d.train(phase == 'train') # eval only during validation phase num_iter, tot_loss, tot_loc_loss, tot_cls_loss = 0, 0.0, 0.0, 0.0 optimizer.zero_grad() for data in dataloaders[phase]: # iterate over data num_iter += 1 inputs, labels = data # get the inputs inputs = Variable(inputs.cuda()) # wrap them in Variable labels = Variable(labels.cuda()) t = inputs.size(2) per_frame_logits = i3d(inputs) # upsample to input size per_frame_logits = F.upsample(per_frame_logits, t, mode='linear') loc_loss = F.binary_cross_entropy_with_logits(per_frame_logits, labels) # compute localization loss tot_loc_loss += loc_loss.data[0] cls_loss = F.binary_cross_entropy_with_logits(torch.max(per_frame_logits, dim=2)[0], torch.max(labels, dim=2)[0]) tot_cls_loss += cls_loss.data[0] # compute classification loss (with max-pooling along time B x C x T) loss = (0.5 * loc_loss + 0.5 * cls_loss) / num_steps_per_update tot_loss += loss.data[0] loss.backward() if num_iter == num_steps_per_update and phase == 'train': steps += 1 num_iter = 0 optimizer.step() optimizer.zero_grad() lr_sched.step() if steps % 10 == 0: print('{} Loc Loss: {:.4f} Cls Loss: {:.4f} Tot Loss: {:.4f}'.format( phase, tot_loc_loss / (10 * num_steps_per_update), tot_cls_loss / (10 * num_steps_per_update), tot_loss / 10)) torch.save(i3d.module.state_dict(), save_model + str(steps).zfill(6)+'.pt') # save model tot_loss = tot_loc_loss = tot_cls_loss = 0. if phase == 'val': print('{} Loc Loss: {:.4f} Cls Loss: {:.4f} Tot Loss: {:.4f}'.format( phase, tot_loc_loss / num_iter, tot_cls_loss / num_iter, (tot_loss * num_steps_per_update) / num_iter))
def run(init_lr=0.1, max_steps=64e3, mode='rgb', root='/storage/truppr/CHARADES/Charades_v1_rgb', train_split='charades/charades.json', batch_size=16, save_model=''): # setup dataset train_transforms = transforms.Compose([ videotransforms.RandomCrop(224), videotransforms.RandomHorizontalFlip(), ]) test_transforms = transforms.Compose([videotransforms.CenterCrop(224)]) # print(root) print("creating training set...") dataset = Dataset(train_split, 'training', root, mode, train_transforms) dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=18, pin_memory=True) print("creating validation set...") val_dataset = Dataset(train_split, 'testing', root, mode, test_transforms) val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=True, num_workers=18, pin_memory=True) dataloaders = {'train': dataloader, 'val': val_dataloader} datasets = {'train': dataset, 'val': val_dataset} # setup the model print("setting up the model...") if mode == 'flow' or mode == 'rgb': if mode == 'flow': i3d = InceptionI3d(400, in_channels=2) i3d.load_state_dict(torch.load('models/flow_imagenet.pt')) elif mode == 'rgb': i3d = InceptionI3d(400, in_channels=3) i3d.load_state_dict(torch.load('models/rgb_imagenet.pt')) i3d.replace_logits(157) # number of classes... originally 157 i3d.cuda(0) i3d = nn.DataParallel(i3d) elif mode == 'both': i3d_rgb = InceptionI3d(400, in_channels=3) i3d_rgb.load_state_dict(torch.load('models/rgb_imagenet.pt')) i3d_flow = InceptionI3d(400, in_channels=2) i3d_flow.load_state_dict(torch.load('models/flow_imagenet.pt')) i3d_rgb.replace_logits(157) # number of classes... originally 157 i3d_flow.replace_logits(157) i3d_rgb.cuda(0) i3d_flow.cuda(0) i3d_rgb = nn.DataParallel(i3d_rgb) i3d_flow = nn.DataParallel(i3d_flow) lr = init_lr if mode == 'both': optimizer_rgb = optim.SGD(i3d_rgb.parameters(), lr=lr, momentum=0.9, weight_decay=0.0000001) optimizer_flow = optim.SGD(i3d_flow.parameters(), lr=lr, momentum=0.9, weight_decay=0.0000001) lr_sched_rgb = optim.lr_scheduler.MultiStepLR(optimizer_rgb, [300, 1000]) lr_sched_flow = optim.lr_scheduler.MultiStepLR(optimizer_flow, [300, 1000]) else: optimizer = optim.SGD(i3d.parameters(), lr=lr, momentum=0.9, weight_decay=0.0000001) lr_sched = optim.lr_scheduler.MultiStepLR(optimizer, [300, 1000]) num_steps_per_update = 4 # accum gradient steps = 0 # train it while steps < max_steps: #for epoch in range(num_epochs): # print 'Step {}/{}'.format(steps, max_steps) # print '-' * 10 print('Step ' + str(steps) + '/' + str(max_steps)) print('-' * 25) # Each epoch has a training and validation phase for phase in ['train', 'val']: if phase == 'train': print("training model...") if mode == 'both': i3d_rgb.train(True) i3d_flow.train(True) optimizer_rgb.zero_grad() optimizer_flow.zero_grad() else: i3d.train(True) optimizer.zero_grad() else: print("validating model...") if mode == 'both': i3d_rgb.train(False) i3d_flow.train(False) optimizer_rgb.zero_grad() optimizer_flow.zero_grad() else: i3d.train(False) # Set model to evaluate mode optimizer.zero_grad() tot_loss = 0.0 tot_loc_loss = 0.0 tot_cls_loss = 0.0 num_iter = 0 # optimizer.zero_grad() print("zeroed...") # print(len(dataloaders["train"])) # print(dataloaders["train"]) # Iterate over data. for data in dataloaders[phase]: # print("starting iter...") num_iter += 1 # get the inputs inputs, labels = data print("data size: ", inputs.shape, " label: ", labels) # wrap them in Variable inputs = Variable(inputs.cuda()) t = inputs.size(2) labels = Variable(labels.cuda()) torch.set_printoptions(profile="full") print("labels:\n", labels) print("labels:\n", labels.shape) print("Inputs: \n", inputs.shape) torch.set_printoptions(profile="default") if mode == 'both': per_frame_logits = i3d_rgb(inputs) per_flows_logits = i3d_flow(flow_inputs) else: per_frame_logits = i3d(inputs) # upsample to input size per_frame_logits = F.upsample(per_frame_logits, t, mode='linear') # compute localization loss loc_loss = F.binary_cross_entropy_with_logits( per_frame_logits, labels) tot_loc_loss += loc_loss.item() # compute classification loss (with max-pooling along time B x C x T) cls_loss = F.binary_cross_entropy_with_logits( torch.max(per_frame_logits, dim=2)[0], torch.max(labels, dim=2)[0]) tot_cls_loss += cls_loss.item() loss = (0.5 * loc_loss + 0.5 * cls_loss) / num_steps_per_update tot_loss += loss.item() loss.backward() if num_iter == num_steps_per_update and phase == 'train': steps += 1 num_iter = 0 optimizer.step() optimizer.zero_grad() lr_sched.step() if steps % 10 == 0: # print '{} Loc Loss: {:.4f} Cls Loss: {:.4f} Tot Loss: {:.4f}'.format(phase, tot_loc_loss/(10*num_steps_per_update), tot_cls_loss/(10*num_steps_per_update), tot_loss/10) print( str(phase) + ' Loc Loss: ' + str(tot_loc_loss / (10 * num_steps_per_update)) + ' Cls Loss: ' + str(tot_cls_loss / (10 * num_steps_per_update)) + ' Tot Loss: ' + str(tot_loss / 10)) # save model torch.save( i3d.module.state_dict(), save_model + str(steps).zfill(6) + '-' + str(tot_loss / 10) + '.pt') tot_loss = tot_loc_loss = tot_cls_loss = 0. #else: # print(str(phase) + ' Loc Loss: ' + str(tot_loc_loss/(10*num_steps_per_update)) + ' Cls Loss: ' + str(tot_cls_loss/(10*num_steps_per_update)) + ' Tot Loss: ' + str(tot_loss/10)) if phase == 'val': print( str(phase) + ' Loc Loss: ' + str(tot_loc_loss / num_iter).zfill(4) + ' Cls Loss: ' + str(tot_cls_loss / num_iter).zfill(4) + ' Tot Loss: ' + str((tot_loss * num_steps_per_update) / num_iter).zfill(4)) # print '{} Loc Loss: {:.4f} Cls Loss: {:.4f} Tot Loss: {:.4f}'.format(phase, tot_loc_loss/num_iter, tot_cls_loss/num_iter, (tot_loss*num_steps_per_update)/num_iter) print("whoops...")
def run(mode='rgb', root='', split_file='data/annotations/charades.json', batch_size=8, stride=4, num_span_frames=125): # setup dataset test_transforms = transforms.Compose([transforms.Resize((224,224)), transforms.ToTensor() ]) print('Getting validation dataset...') val_path = './data/val_dataset_{}_{}.pickle'.format(stride, num_span_frames) if os.path.exists(val_path): pickle_in = open(val_path, 'rb') val_dataset = pickle.load(pickle_in) else: val_dataset = Dataset(split_file, 'testing', root, mode, test_transforms, stride, num_span_frames, is_sife=False) pickle_out = open(val_path, 'wb') pickle.dump(val_dataset, pickle_out) pickle_out.close() print('Got val dataset.') val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True) print('Loading model...') # setup the model if mode == 'flow': i3d = InceptionI3d(400, in_channels=2) i3d.load_state_dict(torch.load('models/flow_imagenet.pt')) else: i3d = InceptionI3d(400, in_channels=3) if args.checkpoint_path: i3d.replace_logits(157) state_dict = torch.load(args.checkpoint_path)['model_state_dict'] checkpoint = OrderedDict() for k, v in state_dict.items(): name = k[7:] # remove 'module' checkpoint[name] = v i3d.load_state_dict(checkpoint) else: i3d.load_state_dict(torch.load('models/rgb_imagenet.pt')) i3d.replace_logits(157) i3d.cuda() device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') if torch.cuda.device_count() > 1: print('Using {} GPUs'.format(torch.cuda.device_count())) i3d = nn.DataParallel(i3d) i3d.to(device) print('Loaded model.') all_preds = [] #torch.zeros((, 157)).cuda() all_labels = [] #torch.zeros((, 157)).cuda() print('Entering data loading...') for i, data in enumerate(val_dataloader): # get the inputs inputs, labels, vid = data t = inputs.shape[2] inputs = inputs.cuda() labels = labels.cuda() with torch.no_grad(): per_frame_logits = i3d(inputs) # upsample to input size per_frame_logits = F.interpolate(per_frame_logits, t, mode='linear') # B x Classes x T max_frame_logits = torch.max(per_frame_logits, dim=2)[0] # B x Classes labels = torch.max(labels, dim=2)[0] # B x Classes # metrics for validation pred = (torch.sigmoid(max_frame_logits) >= 0.5).float() # predicted labels for this batch (B x C) if i == 0: all_preds = np.array(pred.tolist()) all_labels = np.array(labels.tolist()) else: all_preds = np.append(all_preds, pred.tolist(), axis=0) all_labels = np.append(all_labels, labels.tolist(), axis=0) #print('Step {}: all_preds.shape={}, all_labels.shape={}'.format(i, all_preds.shape, all_labels.shape)) #print('Step {}: all_preds={}, all_labels={}'.format(i, all_preds, all_labels)) if i % 10 == 0: all_APs = [metrics.average_precision_score(y_true=all_labels[:, j], y_score=all_preds[:, j]) for j in range(157)] mAP = np.nanmean(all_APs) print('Step {}'.format(i)) print('all_APs:') print(all_APs) print('mAP = {}'.format(mAP)) # Eval all_APs = [metrics.average_precision_score(y_true=all_labels[:, j], y_score=all_preds[:, j]) for j in range(157)] mAP = np.nanmean(all_APs) print('-' * 50) print('Final mAP: {:.4f}'.format(mAP)) print('-' * 50)