def train(train_dataset, val_dataset, configs): train_loader = torch.utils.data.DataLoader( train_dataset, batch_size = configs["batch_size"], shuffle = True ) val_loader = torch.utils.data.DataLoader( val_dataset, batch_size = configs["batch_size"], shuffle = False ) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model = AlexNet().to(device) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(params = model.parameters(), lr = configs["lr"]) for epoch in range(configs["epochs"]): model.train() running_loss = 0.0 correct = 0 for i, (inputs, labels) in tqdm(enumerate(train_loader)): inputs, labels = inputs.to(device), labels.squeeze().to(device) optimizer.zero_grad() outputs = model(inputs) loss = criterion(outputs, labels) loss.backward() optimizer.step() _, predicted = torch.max(outputs.data, 1) correct += (predicted == labels).sum().item() running_loss += loss.item() print("[%d] loss: %.4f" % (epoch + 1, running_loss / train_dataset.__len__())) model.eval() correct = 0 with torch.no_grad(): for i, (inputs, labels) in tqdm(enumerate(val_loader)): inputs, labels = inputs.to(device), labels.squeeze().to(device) outputs = model(inputs) _, predicted = torch.max(outputs.data, 1) correct += (predicted == labels).sum().item() print("Accuracy of the network on the %d test images: %.4f %%" % (val_dataset.__len__(), 100. * correct / val_dataset.__len__())) torch.save(model.state_dict(), "/opt/output/model.pt")
def extract_feature(args): """Extract and save features for train split, several clips per video.""" torch.backends.cudnn.benchmark = True # Force the pytorch to create context on the specific device os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu) device = torch.device('cuda:0' if torch.cuda.is_available() else "cpu") ########### model ############## model = AlexNet(with_classifier=False, return_conv=True) .to(device) if args.ckpt: pretrained_weights = load_pretrained_weights(args.ckpt) model.load_state_dict(pretrained_weights, strict=True) model.eval() torch.set_grad_enabled(False) ### Exract for train split ### train_transforms = transforms.Compose([ transforms.Resize((256, 256)), transforms.CenterCrop(224), transforms.ToTensor() ]) train_dataset = UCF101FrameRetrievalDataset('data/ucf101', 10, True, train_transforms) train_dataloader = DataLoader(train_dataset, batch_size=args.bs, shuffle=False, num_workers=args.workers, pin_memory=True, drop_last=True) features = [] classes = [] for data in tqdm(train_dataloader): sampled_clips, idxs = data clips = sampled_clips.reshape((-1, 3, 224, 224)) inputs = clips.to(device) # forward outputs = model(inputs) # print(outputs.shape) # exit() features.append(outputs.cpu().numpy().tolist()) classes.append(idxs.cpu().numpy().tolist()) features = np.array(features).reshape(-1, 10, outputs.shape[1]) classes = np.array(classes).reshape(-1, 10) np.save(os.path.join(args.feature_dir, 'train_feature.npy'), features) np.save(os.path.join(args.feature_dir, 'train_class.npy'), classes) ### Exract for test split ### test_transforms = transforms.Compose([ transforms.Resize((256, 256)), transforms.CenterCrop(224), transforms.ToTensor() ]) test_dataset = UCF101FrameRetrievalDataset('data/ucf101', 10, False, test_transforms) test_dataloader = DataLoader(test_dataset, batch_size=args.bs, shuffle=False, num_workers=args.workers, pin_memory=True, drop_last=True) features = [] classes = [] for data in tqdm(test_dataloader): sampled_clips, idxs = data clips = sampled_clips.reshape((-1, 3, 224, 224)) inputs = clips.to(device) # forward outputs = model(inputs) features.append(outputs.cpu().numpy().tolist()) classes.append(idxs.cpu().numpy().tolist()) features = np.array(features).reshape(-1, 10, outputs.shape[1]) classes = np.array(classes).reshape(-1, 10) np.save(os.path.join(args.feature_dir, 'test_feature.npy'), features) np.save(os.path.join(args.feature_dir, 'test_class.npy'), classes)
def main(): global best_acc1 best_acc1 = 0 args = parse_option() if args.gpu is not None: print("Use GPU: {} for training".format(args.gpu)) # set the data loader train_folder = os.path.join(args.data_folder, 'train') val_folder = os.path.join(args.data_folder, 'val') logger = getLogger(args.save_folder) if args.dataset.startswith('imagenet') or args.dataset.startswith( 'places'): image_size = 224 crop_padding = 32 mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] normalize = transforms.Normalize(mean=mean, std=std) if args.aug == 'NULL': train_transform = transforms.Compose([ transforms.RandomResizedCrop(image_size, scale=(args.crop, 1.)), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ]) elif args.aug == 'CJ': train_transform = transforms.Compose([ transforms.RandomResizedCrop(image_size, scale=(args.crop, 1.)), transforms.RandomGrayscale(p=0.2), transforms.ColorJitter(0.4, 0.4, 0.4, 0.4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ]) else: raise NotImplemented('augmentation not supported: {}'.format( args.aug)) val_transform = transforms.Compose([ transforms.Resize(image_size + crop_padding), transforms.CenterCrop(image_size), transforms.ToTensor(), normalize, ]) if args.dataset.startswith('imagenet'): train_dataset = datasets.ImageFolder(train_folder, train_transform) val_dataset = datasets.ImageFolder( val_folder, val_transform, ) if args.dataset.startswith('places'): train_dataset = ImageList( '/data/trainvalsplit_places205/train_places205.csv', '/data/data/vision/torralba/deeplearning/images256', transform=train_transform, symbol_split=' ') val_dataset = ImageList( '/data/trainvalsplit_places205/val_places205.csv', '/data/data/vision/torralba/deeplearning/images256', transform=val_transform, symbol_split=' ') print(len(train_dataset)) train_sampler = None train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), num_workers=args.n_workers, pin_memory=False, sampler=train_sampler) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.n_workers, pin_memory=False) elif args.dataset.startswith('cifar'): train_loader, val_loader = cifar.get_linear_dataloader(args) elif args.dataset.startswith('svhn'): train_loader, val_loader = svhn.get_linear_dataloader(args) # create model and optimizer if args.model == 'alexnet': if args.layer == 6: args.layer = 5 model = AlexNet(128) model = nn.DataParallel(model) classifier = LinearClassifierAlexNet(args.layer, args.n_label, 'avg') elif args.model == 'alexnet_cifar': if args.layer == 6: args.layer = 5 model = AlexNet_cifar(128) model = nn.DataParallel(model) classifier = LinearClassifierAlexNet(args.layer, args.n_label, 'avg', cifar=True) elif args.model == 'resnet50': model = resnet50(non_linear_head=False) model = nn.DataParallel(model) classifier = LinearClassifierResNet(args.layer, args.n_label, 'avg', 1) elif args.model == 'resnet18': model = resnet18() model = nn.DataParallel(model) classifier = LinearClassifierResNet(args.layer, args.n_label, 'avg', 1, bottleneck=False) elif args.model == 'resnet18_cifar': model = resnet18_cifar() model = nn.DataParallel(model) classifier = LinearClassifierResNet(args.layer, args.n_label, 'avg', 1, bottleneck=False) elif args.model == 'resnet50_cifar': model = resnet50_cifar() model = nn.DataParallel(model) classifier = LinearClassifierResNet(args.layer, args.n_label, 'avg', 1) elif args.model == 'resnet50x2': model = InsResNet50(width=2) classifier = LinearClassifierResNet(args.layer, args.n_label, 'avg', 2) elif args.model == 'resnet50x4': model = InsResNet50(width=4) classifier = LinearClassifierResNet(args.layer, args.n_label, 'avg', 4) elif args.model == 'shufflenet': model = shufflenet_v2_x1_0(num_classes=128, non_linear_head=False) model = nn.DataParallel(model) classifier = LinearClassifierResNet(args.layer, args.n_label, 'avg', 0.5) else: raise NotImplementedError('model not supported {}'.format(args.model)) print('==> loading pre-trained model') ckpt = torch.load(args.model_path) if not args.moco: model.load_state_dict(ckpt['state_dict']) else: try: state_dict = ckpt['state_dict'] for k in list(state_dict.keys()): # retain only encoder_q up to before the embedding layer if k.startswith('module.encoder_q' ) and not k.startswith('module.encoder_q.fc'): # remove prefix state_dict['module.' + k[len("module.encoder_q."):]] = state_dict[k] # delete renamed or unused k del state_dict[k] model.load_state_dict(state_dict) except: pass print("==> loaded checkpoint '{}' (epoch {})".format( args.model_path, ckpt['epoch'])) print('==> done') model = model.cuda() classifier = classifier.cuda() criterion = torch.nn.CrossEntropyLoss().cuda(args.gpu) if not args.adam: optimizer = torch.optim.SGD(classifier.parameters(), lr=args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) else: optimizer = torch.optim.Adam(classifier.parameters(), lr=args.learning_rate, betas=(args.beta1, args.beta2), weight_decay=args.weight_decay, eps=1e-8) model.eval() cudnn.benchmark = True # optionally resume from a checkpoint args.start_epoch = 1 if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume, map_location='cpu') # checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] + 1 classifier.load_state_dict(checkpoint['classifier']) optimizer.load_state_dict(checkpoint['optimizer']) best_acc1 = checkpoint['best_acc1'] print(best_acc1.item()) best_acc1 = best_acc1.cuda() print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) if 'opt' in checkpoint.keys(): # resume optimization hyper-parameters print('=> resume hyper parameters') if 'bn' in vars(checkpoint['opt']): print('using bn: ', checkpoint['opt'].bn) if 'adam' in vars(checkpoint['opt']): print('using adam: ', checkpoint['opt'].adam) #args.learning_rate = checkpoint['opt'].learning_rate # args.lr_decay_epochs = checkpoint['opt'].lr_decay_epochs args.lr_decay_rate = checkpoint['opt'].lr_decay_rate args.momentum = checkpoint['opt'].momentum args.weight_decay = checkpoint['opt'].weight_decay args.beta1 = checkpoint['opt'].beta1 args.beta2 = checkpoint['opt'].beta2 del checkpoint torch.cuda.empty_cache() else: print("=> no checkpoint found at '{}'".format(args.resume)) # tensorboard tblogger = tb_logger.Logger(logdir=args.tb_folder, flush_secs=2) # routine best_acc = 0.0 for epoch in range(args.start_epoch, args.epochs + 1): adjust_learning_rate(epoch, args, optimizer) print("==> training...") time1 = time.time() train_acc, train_acc5, train_loss = train(epoch, train_loader, model, classifier, criterion, optimizer, args) time2 = time.time() logging.info('train epoch {}, total time {:.2f}'.format( epoch, time2 - time1)) logging.info( 'Epoch: {}, lr:{} , train_loss: {:.4f}, train_acc: {:.4f}/{:.4f}'. format(epoch, optimizer.param_groups[0]['lr'], train_loss, train_acc, train_acc5)) tblogger.log_value('train_acc', train_acc, epoch) tblogger.log_value('train_acc5', train_acc5, epoch) tblogger.log_value('train_loss', train_loss, epoch) tblogger.log_value('learning_rate', optimizer.param_groups[0]['lr'], epoch) test_acc, test_acc5, test_loss = validate(val_loader, model, classifier, criterion, args) if test_acc >= best_acc: best_acc = test_acc logging.info( colorful( 'Epoch: {}, val_loss: {:.4f}, val_acc: {:.4f}/{:.4f}, best_acc: {:.4f}' .format(epoch, test_loss, test_acc, test_acc5, best_acc))) tblogger.log_value('test_acc', test_acc, epoch) tblogger.log_value('test_acc5', test_acc5, epoch) tblogger.log_value('test_loss', test_loss, epoch) # save the best model if test_acc > best_acc1: best_acc1 = test_acc state = { 'opt': args, 'epoch': epoch, 'classifier': classifier.state_dict(), 'best_acc1': best_acc1, 'optimizer': optimizer.state_dict(), } save_name = '{}_layer{}.pth'.format(args.model, args.layer) save_name = os.path.join(args.save_folder, save_name) print('saving best model!') torch.save(state, save_name) # save model if epoch % args.save_freq == 0: print('==> Saving...') state = { 'opt': args, 'epoch': epoch, 'classifier': classifier.state_dict(), 'best_acc1': test_acc, 'optimizer': optimizer.state_dict(), } save_name = 'ckpt_epoch_{epoch}.pth'.format(epoch=epoch) save_name = os.path.join(args.save_folder, save_name) print('saving regular model!') torch.save(state, save_name) # tensorboard logger pass
def train(data_train, data_val, num_classes, num_epoch, milestones): model = AlexNet(num_classes, pretrain=False) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model = model.to(device) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=0.0001) lr_scheduler = MultiStepLR(optimizer, milestones=milestones, gamma=0.1) since = time.time() best_acc = 0 best = 0 for epoch in range(num_epoch): print('Epoch {}/{}'.format(epoch + 1, num_epoch)) print('-' * 10) # Iterate over data. running_loss = 0.0 running_corrects = 0 model.train() with torch.set_grad_enabled(True): for i, (inputs, labels) in enumerate(data_train): inputs = inputs.to(device) labels = labels.to(device) outputs = model(inputs) _, preds = torch.max(outputs, 1) loss = criterion(outputs, labels) optimizer.zero_grad() loss.backward() optimizer.step() running_loss += loss.item() running_corrects += torch.sum(preds == labels.data) * 1. / inputs.size(0) print("\rIteration: {}/{}, Loss: {}.".format(i + 1, len(data_train), loss.item()), end="") sys.stdout.flush() avg_loss = running_loss / len(data_train) t_acc = running_corrects.double() / len(data_train) running_loss = 0.0 running_corrects = 0 model.eval() with torch.set_grad_enabled(False): for i, (inputs, labels) in enumerate(data_val): inputs = inputs.to(device) labels = labels.to(device) outputs = model(inputs) _, preds = torch.max(outputs, 1) loss = criterion(outputs, labels) running_loss += loss.item() running_corrects += torch.sum(preds == labels.data) * 1. / inputs.size(0) val_loss = running_loss / len(data_val) val_acc = running_corrects.double() / len(data_val) print() print('Train Loss: {:.4f} Acc: {:.4f}'.format(avg_loss, t_acc)) print('Val Loss: {:.4f} Acc: {:.4f}'.format(val_loss, val_acc)) print('lr rate: {:.6f}'.format(optimizer.param_groups[0]['lr'])) print() if val_acc > best_acc: best_acc = val_acc best = epoch + 1 lr_scheduler.step() time_elapsed = time.time() - since print('Training complete in {:.0f}m {:.0f}s'.format( time_elapsed // 60, time_elapsed % 60)) print('Best Validation Accuracy: {}, Epoch: {}'.format(best_acc, best)) return model
def evaluate(): num_classes = 4 # Init logger if not os.path.isdir(args.save_path): os.makedirs(args.save_path) log = open(os.path.join(args.save_path, 'log_seed_{}.txt'.format(args.manualSeed)), 'w') print_log('save path : {}'.format(args.save_path), log) state = {k: v for k, v in args._get_kwargs()} print_log(state, log) print_log("Random Seed: {}".format(args.manualSeed), log) print_log("python version : {}".format(sys.version.replace('\n', ' ')), log) print_log("torch version : {}".format(torch.__version__), log) print_log("cudnn version : {}".format(torch.backends.cudnn.version()), log) # Any other preprocessings? http://pytorch.org/audio/transforms.html sample_length = 10000 scale = transforms.Scale() padtrim = transforms.PadTrim(sample_length) transforms_audio = transforms.Compose([ scale, padtrim ]) # Data loading fs, data = wavfile.read(args.file_name) data = torch.from_numpy(data).float() data = data.unsqueeze(1) audio = transforms_audio(data) audio = Variable(audio) audio = audio.view(1, -1) audio = audio.unsqueeze(0) #Feed in respective model file to pass into model (alexnet.py) print_log("=> creating model '{}'".format(args.arch), log) # Init model, criterion, and optimizer # net = models.__dict__[args.arch](num_classes) net = AlexNet(num_classes) print_log("=> network :\n {}".format(net), log) #Sets use for GPU if available if args.use_cuda: net.cuda() # optionally resume from a checkpoint # Need same python version that the resume was in if args.resume: if os.path.isfile(args.resume): print_log("=> loading checkpoint '{}'".format(args.resume), log) if args.ngpu == 0: checkpoint = torch.load(args.resume, map_location=lambda storage, loc: storage) else: checkpoint = torch.load(args.resume) recorder = checkpoint['recorder'] args.start_epoch = checkpoint['epoch'] net.load_state_dict(checkpoint['state_dict']) print_log("=> loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch']), log) else: print_log("=> no checkpoint found at '{}'".format(args.resume), log) else: print_log("=> do not use any checkpoint for {} model".format(args.arch), log) net.eval() if args.use_cuda: audio = audio.cuda() output = net(audio) print(output) # TODO postprocess output to a string representing the person speaking # ouptut = val_dataset.postprocess_target(output) return