def run_single_experiment(cfg, diffinit, seed, replace_index): t0 = time() # how we convert the cfg into a path and such is defined in ExperimentIdentifier exp = ExperimentIdentifier(seed=seed, replace_index=replace_index, diffinit=diffinit) exp.init_from_cfg(cfg) exp.ensure_directory_exists(verbose=True) path_stub = exp.path_stub() print('Running experiment with path', path_stub) # load data x_train, y_train, x_vali, y_vali, x_test, y_test = load_data( options=cfg['data'], replace_index=replace_index) # define model init_path = get_model_init_path(cfg, diffinit) model = build_model(**cfg['model'], init_path=init_path) # prep model for training prep_for_training( model, seed=seed, optimizer_settings=cfg['training']['optimization_algorithm'], task_type=cfg['model']['task_type']) # now train train_model(model, cfg['training'], cfg['logging'], x_train, y_train, x_vali, y_vali, path_stub=path_stub) # clean up del model clear_session() print('Finished after', time() - t0, 'seconds')
def main(): # Get Command Line Arguments args = get_command_line_args() use_gpu = torch.cuda.is_available() and args.gpu print("Data directory: {}".format(args.data_dir)) if use_gpu: print("Training on GPU.") else: print("Training on CPU.") print("Architecture: {}".format(args.arch)) if args.save_dir: print("Checkpoint save directory: {}".format(args.save_dir)) print("Learning rate: {}".format(args.learning_rate)) print("Hidden units: {}".format(args.hidden_units)) print("Epochs: {}".format(args.epochs)) # Get data loaders dataloaders, class_to_idx = model_utils.get_loaders(args.data_dir) for key, value in dataloaders.items(): print("{} data loader retrieved".format(key)) # Build the model model, optimizer, criterion = model_utils.build_model( args.arch, args.hidden_units, args.learning_rate) model.class_to_idx = class_to_idx # Check if GPU availiable and move if use_gpu: print("GPU is availaible. Moving Tensors.") model.cuda() criterion.cuda() # Train the model model_utils.train_model(model, args.epochs, criterion, optimizer, dataloaders['training'], dataloaders['validation'], use_gpu) # Save the checkpoint if args.save_dir: if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) save_path = args.save_dir + '/' + args.arch + '_checkpoint.pth' else: save_path = args.arch + '_checkpoint.pth' print("Will save checkpoint to {}".format(save_path)) save(args.arch, args.learning_rate, args.hidden_units, args.epochs, save_path, model, optimizer) print("Checkpoint saved") # Validate the accuracy test_loss, accuracy = model_utils.validate(model, criterion, dataloaders['testing'], use_gpu) print("Test Loss: {:.3f}".format(test_loss)) print("Test Acc.: {:.3f}".format(accuracy))
def train_component_network(pos_class, template_file=None, binary_linear_file=None): dataloaders = {} dataset_sizes = {} _, dataloaders['train'], dataset_sizes['train'] = get_binary_imagenet32( pos_class, train=True) _, dataloaders['val'], dataset_sizes['val'] = get_binary_imagenet32( pos_class, train=False) model = get_component_network(template_file, binary_linear_file, freeze_kernels=True).to(device) optimizer = torch.optim.SGD(mutil.get_model_trainable_parameters(model), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, milestones=args.decay_milestones, gamma=args.decay_factor) model, _ = mutil.train_model(model, torch.nn.CrossEntropyLoss().to(device), optimizer, dataloaders, dataset_sizes, scheduler=scheduler, num_epochs=args.epoch, device=device, verbose=False) return model
def train_model(model, ds_config, weights_path): datasets = ds_config_to_datasets(ds_config) return model_utils.train_model(model, datasets["train"], datasets["val"], weights_path, num_epochs=100)
def run_train_all(self): self.logger.info(f"training on all data...") train_ds = datasets.SpectrogramDataset( self.df, self.data_dir, sample_rate=self.config.sample_rate, composer=self.train_composer, secondary_label=self.config.secondary_label ) train_dl = torch.utils.data.DataLoader( train_ds, shuffle=True, **self.config.dataloader ) model = model_utils.build_model( self.config.model.name, n_class=self.n_class, in_chans=self.config.model.in_chans, pretrained=self.config.model.pretrained, ) if self.config.multi and self.config.gpu: self.logger.info("Using pararell gpu") model = nn.DataParallel(model) criterion = nn.BCEWithLogitsLoss() optimizer = optim.Adam(model.parameters(), float(self.config.learning_rate)) scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, 10) if self.config.mixup: self.logger.info("use mixup") model_utils.train_model( epoch=self.epoch, model=model, train_loader=train_dl, val_loader=None, optimizer=optimizer, scheduler=scheduler, criterion=criterion, device=self.device, threshold=self.config.threshold, best_model_path=None, logger=self.logger, mixup=self.config.mixup, ) model_utils.save_pytorch_model(model, self.save_dir / "all_model.pth") self.logger.info(f'save model to {self.save_dir / "all_model.pth"}')
def main(): # Get the input arguments input_arguments = process_arguments() # Set the device to cuda if specified default_device = torch.device("cuda" if torch.cuda.is_available() and input_arguments.gpu else "cpu") # Set input_size for network, by default input_size = 9216 choosen_architecture = input_arguments.choosen_archi if choosen_architecture[:3] == "vgg": input_size = 25088 if choosen_architecture[:8] == "densenet": input_size = 1024 # Load data train_data, test_data, valid_data, trainloader, testloader, validloader = du.loading_data(input_arguments.data_directory) # Set the choosen pretrained model model = mu.set_pretrained_model(choosen_architecture) # Set model classifier model = mu.set_model_classifier(model, input_arguments.hidden_units, input_size, output_size=2, dropout=0.05) # Train the model model, epochs, optimizer = mu.train_model(model, trainloader, input_arguments.epochs, validloader, input_arguments.learning_rate, default_device,choosen_architecture) # Create a file path using the specified save_directory # to save the file as checkpoint.pth under that directory if not os.path.exists(input_arguments.save_directory): os.makedirs(input_arguments.save_directory) checkpoint_file_path = os.path.join(input_arguments.save_directory, choosen_architecture+"_"+str(input_arguments.epochs)+".pth") # Store the trained model as checkpoint mu.create_checkpoint(model, input_arguments.choosen_archi, train_data, epochs, optimizer, checkpoint_file_path, input_size, output_size=2) pass
def main(): # Get the input arguments input_arguments = process_arguments() # Set the device to cuda if specified default_device = torch.device("cuda" if torch.cuda.is_available() and input_arguments.gpu else "cpu") # Set input_size for densenet121, by default input_size = 25088 choosen_architecture = input_arguments.choosen_archi # Set input_size according to vgg13 if (choosen_architecture == "densenet121"): input_size = 1024 # Restrict archi to vgg16 or densenet121 if (choosen_architecture != "vgg16") and (choosen_architecture != "densenet121"): print("Pretrained Choosen arhitecture is densenet121 or vgg16, using defaut: vgg16") choosen_architecture = "vgg16" # Load data train_data, test_data, valid_data, trainloader, testloader, validloader = du.loading_data(input_arguments.data_directory) # Set the choosen pretrained model model = mu.set_pretrained_model(choosen_architecture) # Set model classifier model = mu.set_model_classifier(model, input_arguments.hidden_units, input_size, output_size=102, dropout=0.05) # Train the model model, epochs, optimizer = mu.train_model(model, trainloader, input_arguments.epochs, validloader, input_arguments.learning_rate, default_device) # Create a file path using the specified save_directory # to save the file as checkpoint.pth under that directory if not os.path.exists(input_arguments.save_directory): os.makedirs(input_arguments.save_directory) checkpoint_file_path = os.path.join(input_arguments.save_directory, "checkpoint.pth") # Store the trained model as checkpoint mu.create_checkpoint(model, input_arguments.choosen_archi, train_data, epochs, optimizer, checkpoint_file_path, input_size, output_size=102) pass
def main(): in_args = utils.get_model_args() # load the model from the --arch and --hidden_units parameters print("Loading the model...") model = model_utils.load_model(in_args.arch, in_args.hidden_units) # train the model print("Now training the model...") model = model_utils.train_model(in_args.arch, model, in_args.learn_rate,in_args.epochs,device='cuda' if in_args.gpu else 'cpu') #save the model print("Saving the model to {}".format(in_args.save_dir)) model_utils.save_model(model,in_args.arch,in_args.epochs,in_args.gpu,in_args.learn_rate,in_args.save_dir,in_args.output_size)
def train(): conf = Config() # 打印模型配置信息 conf.dump() parser = argparse.ArgumentParser(description='图片分类模型训练') parser.add_argument( '--resume_checkpoint', action='store', type=str, default='model/checkpoint.pth', help='从模型的checkpoint恢复模型,并继续训练,如果resume_checkpoint这个参数提供' '这些参数将忽略--arch, --learning_rate, --hidden_units, and --drop_p') args = parser.parse_args() #加载数据 dataloaders, class_to_idx = load_data(conf.data_directory) #创建模型,如果模型文件存在 if args.resume_checkpoint and os.path.exists(args.resume_checkpoint): #加载checkpoint print('resume_checkpoint已存在,开始加载模型') model, optimizer, epoch, history = load_checkpoint( checkpoint_path=args.resume_checkpoint, load_optimizer=True, gpu=conf.cuda) start_epoch = epoch + 1 else: #创建新模型和优化器 print('resume_checkpoint未设置或模型文件不存在,创建新的模型') model = create_model( arch=conf.arch, class_to_idx=class_to_idx, hidden_units=conf.hidden_units, drop_p=conf.dropout) optimizer = create_optimizer(model=model, lr=conf.learning_rate) start_epoch = 1 history = None #训练模型 history, best_epoch = train_model( dataloaders=dataloaders, model=model, optimizer=optimizer, gpu=conf.cuda, start_epoch=start_epoch, epochs=conf.epochs, train_history=history) #测试集上测试模型 test_acc = test_model(dataloader=dataloaders['test'], model=model, gpu=conf.cuda) print(f'模型在测试集上的准确率是 {(test_acc * 100):.2f}%') #保存模型 save_checkpoint( save_path=conf.save_path+conf.save_name, epoch=best_epoch, model=model, optimizer=optimizer, history=history) #绘制历史记录 plot_history(history)
def main(): global args, device args = parse_arguments() device = torch.device(args.device) imagenet32_labels = get_imagenet32_labels(args.imagenet32_dir) # Generate template network weights and last layer initialization weights template_file = os.path.join(args.model_dir, '{}.pth'.format(args.arch)) if args.overwrite or not os.path.isfile(template_file): print('Preparing {} template weights...'.format(args.arch)) model = train_template_network() pathlib.Path(os.path.dirname(template_file)).mkdir(parents=True, exist_ok=True) torch.save(model.state_dict(), template_file) else: print('{} template weights exist.'.format(args.arch)) if 'sgm' in args.experiments: sgm_file = os.path.join(args.model_dir, '{}_sgm.pth'.format(args.arch)) if args.overwrite or not os.path.isfile(sgm_file): print('Preparing {} SGM template weights...'.format(args.arch)) model = train_template_network(loss='sgm') pathlib.Path(os.path.dirname(sgm_file)).mkdir(parents=True, exist_ok=True) torch.save(model.state_dict(), sgm_file) else: print('{} SGM template weights exist.'.format(args.arch)) if 'l2' in args.experiments: l2_file = os.path.join(args.model_dir, '{}_l2.pth'.format(args.arch)) if args.overwrite or not os.path.isfile(l2_file): print('Preparing {} L2 template weights...'.format(args.arch)) model = train_template_network(loss='l2') pathlib.Path(os.path.dirname(l2_file)).mkdir(parents=True, exist_ok=True) torch.save(model.state_dict(), l2_file) else: print('{} L2 template weights exist.'.format(args.arch)) binary_linear_file = os.path.join(args.model_dir, '{}_binary_linear.pth'.format(args.arch)) if args.overwrite or not os.path.isfile(binary_linear_file): print('Preparing binary {} fully-connected weights...'.format( args.arch)) model = mutil.get_model(args.arch) linear = torch.nn.Linear(model.linear.in_features, 2) pathlib.Path(os.path.dirname(binary_linear_file)).mkdir(parents=True, exist_ok=True) torch.save(linear.state_dict(), binary_linear_file) else: print('Binary {} fully-connected weights exist.'.format(args.arch)) # Load component network class indices and experiment target indices. with open( os.path.join(args.indices_dir, 'imagenet_component_classes.json'), 'r') as f: component_classes = json.load(f) print('Index of {} component classes loaded.'.format( len(component_classes))) with open(os.path.join(args.indices_dir, 'imagenet_target_classes.json'), 'r') as f: target_classes = json.load(f) print('Index of {} target classes loaded.'.format(len(target_classes))) if any('combn' in x for x in args.experiments) or any('pcbn' in x for x in args.experiments): # Generate component networks (if haven't) for pos_class in component_classes: component_file = os.path.join( args.model_dir, COMPONENT_DIRNAME, '{}_{}.pth'.format(args.arch, pos_class)) if args.overwrite or not os.path.isfile(component_file): print('Training component network ({} {})...'.format( pos_class, imagenet32_labels[pos_class])) model = train_component_network( pos_class, template_file=template_file, binary_linear_file=binary_linear_file) pathlib.Path(os.path.dirname(component_file)).mkdir( parents=True, exist_ok=True) torch.save(model.state_dict(), component_file) # Evaluate component networks to rank them for selection, or load the # evaluations if they already exist if (any('accuracy' in x for x in args.experiments) or any('threshold' in x for x in args.experiments)): max_shot_eval_file = os.path.join(args.indices_dir, 'max_shot_accuracies.json') if args.overwrite or not os.path.isfile(max_shot_eval_file): print('Generating max-shot component accuracies...') all_accuracies = rank_component_networks(component_classes, target_classes, pos_size=0, method='accuracy') with open(max_shot_eval_file, 'w') as f: json.dump(all_accuracies, f) else: with open(max_shot_eval_file, 'r') as f: all_accuracies = json.load(f) print('Max-shot accuracy component evaluations loaded.') if any('loss' in x for x in args.experiments): few_shot_eval_file = os.path.join( args.indices_dir, '{}-shot_losses.json'.format(args.shot)) if args.overwrite or not os.path.isfile(few_shot_eval_file): print('Generating {}-shot component losses...'.format( args.shot)) all_losses = rank_component_networks(component_classes, target_classes, pos_size=args.shot, method='loss') with open(few_shot_eval_file, 'w') as f: json.dump(all_losses, f) else: with open(few_shot_eval_file, 'r') as f: all_losses = json.load(f) print('{}-shot loss component evaluations loaded.'.format( args.shot)) # Main experiment loop for experiment in args.experiments: shot_dir = 'max-shot' if args.shot == 0 else '{}-shot'.format( args.shot) if not args.train: # Perform evaluation by reading off the training summaries accuracies = [] for pos_class in target_classes: summary_file = os.path.join( args.model_dir, shot_dir, experiment, '{}_{}.summary'.format(args.arch, pos_class)) if os.path.isfile(summary_file): entry = torch.load(summary_file) accuracies.append(np.amax(entry['val_acc'])) if accuracies: print('Mean validation accuracy of {} ({} classes): {:.1f}%'. format(experiment, len(accuracies), np.mean(accuracies) * 100)) else: print( 'Mean validation accuracy of {} ({} classes): N/A'.format( experiment, len(accuracies))) else: for pos_class in target_classes: weights_file = os.path.join( args.model_dir, shot_dir, experiment, '{}_{}.pth'.format(args.arch, pos_class)) summary_file = os.path.join( args.model_dir, shot_dir, experiment, '{}_{}.summary'.format(args.arch, pos_class)) if not args.overwrite and os.path.isfile(weights_file): print('Weights found for {} ({} {}). Skipping...'.format( experiment, pos_class, imagenet32_labels[pos_class])) continue print('Preparing {} ({} {})...'.format( experiment, pos_class, imagenet32_labels[pos_class])) # Define model for this experiment if any(x in experiment for x in ('combn', 'pcbn')): # Parse experiment text to set up the proper BN combination # configuration exp_params = experiment.split('_') comb_method = exp_params[0] selection_params = {'method': exp_params[1]} if selection_params['method'] == 'loss': metrics = all_losses selection_params['num_components'] = int(exp_params[2]) elif selection_params['method'] == 'accuracy': metrics = all_accuracies selection_params['num_components'] = int(exp_params[2]) elif selection_params['method'] == 'threshold': metrics = all_accuracies selection_params['threshold'] = float(exp_params[2]) print('Selecting components...') comp_paths = select_components(pos_class, metrics, target_classes, component_classes, **selection_params) if comp_paths is None: print('No valid components. Skipping...') continue model = get_bn_combination_network( comp_paths, method=comb_method, template_file=template_file, binary_linear_file=binary_linear_file) elif experiment == 'last': model = mutil.get_model(args.arch) model.load_state_dict( torch.load(template_file, map_location='cpu')) mutil.freeze_model_parameters_(model) model.linear = torch.nn.Linear(model.linear.in_features, 2) model.linear.load_state_dict( torch.load(binary_linear_file, map_location='cpu')) elif experiment == 'full': model = mutil.get_model(args.arch) model.load_state_dict( torch.load(template_file, map_location='cpu')) model.linear = torch.nn.Linear(model.linear.in_features, 2) model.linear.load_state_dict( torch.load(binary_linear_file, map_location='cpu')) elif experiment == 'bn': model = mutil.get_model(args.arch) model.load_state_dict( torch.load(template_file, map_location='cpu')) mutil.freeze_model_parameters_(model) mutil.set_module_trainable_(model, torch.nn.BatchNorm2d) model.linear = torch.nn.Linear(model.linear.in_features, 2) model.linear.load_state_dict( torch.load(binary_linear_file, map_location='cpu')) elif experiment == 'sgm': model = mutil.get_model(args.arch) model.load_state_dict( torch.load(sgm_file, map_location='cpu')) mutil.freeze_model_parameters_(model) model.linear = torch.nn.Linear(model.linear.in_features, 2) model.linear.load_state_dict( torch.load(binary_linear_file, map_location='cpu')) elif experiment == 'l2': model = mutil.get_model(args.arch) model.load_state_dict( torch.load(l2_file, map_location='cpu')) mutil.freeze_model_parameters_(model) model.linear = torch.nn.Linear(model.linear.in_features, 2) model.linear.load_state_dict( torch.load(binary_linear_file, map_location='cpu')) else: raise NameError('{} is not recognized.'.format(experiment)) model.to(device) # Prepare dataset dataloaders = {} dataset_sizes = {} _, dataloaders['train'], dataset_sizes[ 'train'] = get_binary_imagenet32(pos_class, pos_size=args.shot, train=True) _, dataloaders['val'], dataset_sizes[ 'val'] = get_binary_imagenet32(pos_class, pos_size=0, train=False) # Train model and save weights optimizer = torch.optim.SGD( mutil.get_model_trainable_parameters(model), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, milestones=args.decay_milestones, gamma=args.decay_factor) print('Training...') model, summary = mutil.train_model( model, torch.nn.CrossEntropyLoss().to(device), optimizer, dataloaders, dataset_sizes, scheduler=scheduler, num_epochs=args.epoch, device=device, verbose=False) pathlib.Path(os.path.dirname(weights_file)).mkdir( parents=True, exist_ok=True) torch.save(model.state_dict(), weights_file) torch.save(summary, summary_file) print('Script complete.')
def train_template_network(loss='default'): """Obtain CIFAR10-trained template network. Training parameters follow original ResNet paper. Args: loss: Choose from 'default'/'sgm'/'l2' """ # Use training parameters of original ResNet paper split_index = 45000 batch_size = 128 lr = 1e-1 momentum = 0.9 weight_decay = 1e-4 epoch = 180 decay_milestones = [90, 120] decay_factor = 0.1 # SGM/L2 specific parameters aux_loss_wt = 0.02 train_transform = transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.RandomCrop(32, 4), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) test_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) image_datasets = { x: datasets.CIFAR10(root=args.cifar10_dir, train=y, download=True, transform=z) for x, y, z in zip([0, 1], [True, False], [train_transform, test_transform]) } dataloaders = { x: DataLoader(image_datasets[y], batch_size=batch_size, sampler=z, num_workers=args.num_workers, pin_memory=('cpu' not in args.device)) for x, y, z in zip(['train', 'val', 'test'], [0, 0, 1], [ sampler.SubsetRandomSampler(range(split_index)), sampler.SubsetRandomSampler( range(split_index, len(image_datasets[0]))), sampler.SequentialSampler(image_datasets[1]) ]) } dataset_sizes = { 'train': split_index, 'val': len(image_datasets[0]) - split_index, 'test': len(image_datasets[1]) } model = mutil.get_model(args.arch).to(device) if loss == 'default': criterion = torch.nn.CrossEntropyLoss().to(device) elif loss in ('sgm', 'l2'): criterion = GenericLoss(loss, aux_loss_wt, model.linear.out_features) else: raise NameError('{} is not recognized.'.format(loss)) optimizer = torch.optim.SGD(mutil.get_model_trainable_parameters(model), lr=lr, momentum=momentum, weight_decay=weight_decay) scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, milestones=decay_milestones, gamma=decay_factor) model, _ = mutil.train_model(model, criterion, optimizer, dataloaders, dataset_sizes, scheduler=scheduler, num_epochs=epoch, device=device) mutil.eval_model(model, dataloaders['test'], dataset_sizes['test'], device=device) return model
def train(): parser = argparse.ArgumentParser(description='Train netwotk.') parser.add_argument('data_dir', help='train and test datas directory.') parser.add_argument( 'checkpoint_name', help='name to save the model, if none provided the model is not saved') parser.add_argument('--architecture', default='densenet121', help='architecture to be used') parser.add_argument( '--save_dir', help='name to save the model, if none provided the model is not saved', default='') parser.add_argument('--hidden_units', type=int, help='hidden units for the model, default is 512', default=512) parser.add_argument( '--learningRate', type=float, help='Learning rate to train the model.0.001 is default', default=0.001) parser.add_argument('--epochs', type=int, help='epochs when the model is training', default=2) args = parser.parse_args() data_dir = args.data_dir checkpoint_name = args.checkpoint_name architecture = args.architecture save_dir = args.save_dir hidden_units = args.hidden_units learningRate = args.learningRate epochs = args.epochs print('start training. data_dir is: ' + data_dir) train_dir = data_dir + '/train' test_dir = data_dir + '/test' traindata, trainloader = utils.load_data(True, train_dir) testdata, testloader = utils.load_data(False, test_dir) #Use GPU if it's available device = torch.device("cuda" if torch.cuda.is_available() else "cpu") #define the model model = model_utils.create_model(hidden_units, architecture) criterion = nn.NLLLoss() optimizer = optim.Adam(model.classifier.parameters(), lr=learningRate) #model training execution model_utils.train_model(model, trainloader, testloader, criterion, optimizer, device, epochs) #save the model if (save_dir != ''): checkpoint_name = os.path.join(save_dir, checkpoint_name) model_utils.save_model(model, traindata, optimizer, checkpoint_name, epochs, architecture)
n_hidden_layers=params["n_hidden_layers"], img_feat_dim=params["img_feat_dim"], obj_feat_dim=params["obj_feat_dim"], lstm_hidden_dim=params["hidden_feat_dim"], lstm_dropout=params["lstm_dropout"], device=device) model = model.to(device) optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, "min") n_frames = params["n_frames"] loss_fn = AccidentLoss(n_frames, device) # optionally load from some previous checkpoint if args.model_path != "": print("Loading Existing Model: %s" % args.model_path) checkpoint = torch.load(args.model_path) model.load_state_dict(checkpoint["model"]) optimizer.load_state_dict(checkpoint["optimizer"]) scheduler.load_state_dict(checkpoint["scheduler"]) else: model.apply(model_utils.init_weights) if args.train: model_utils.train_model(model, optimizer, scheduler, loss_fn, progress_dir, train_files[:15], eval_files, args.num_epochs, device) else: # run demo pass
val_losses = [] train_recalls = [] val_recalls = [] test_losses = [] test_recalls = [] writer = SummaryWriter() print('-------------------Start Training Model---------------------') ############################ Train Model ############################# # scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.8) for ep in range(epoch): rec_sys_model, optimizer, avg_train_loss, avg_train_recall = model_utils.train_model( rec_sys_model, loss_func, optimizer, train_loader, ep, top_k, train_display_step) # train_losses.append(avg_train_loss) # train_recalls.append(avg_train_recall) print("Train loss: ", avg_train_loss) print("Train recall: ", avg_train_recall) writer.add_scalar("Loss/train", avg_train_loss, ep) writer.add_scalar("Recall/train", avg_train_recall, ep) avg_val_loss, avg_val_recall = model_utils.validate_model( rec_sys_model, loss_func, valid_loader, ep, top_k, val_display_step) # val_losses.append(avg_val_loss) # val_recalls.append(avg_val_recall) print("Val loss: ", avg_val_loss) print("Val recall: ", avg_val_recall) writer.add_scalar("Loss/val", avg_val_loss, ep)
weights = torch.load(cnn_file, map_location=DEVICE) model.load_state_dict(weights['state_dict'], strict=False) model.to(DEVICE) filename = 'model_{}_IHCweights'.format(time) print('IHC pretrained weights loaded') else: print( 'No model file! IHC pretrained weights not loaded --> Random initialization' ) filename = 'model_{}_randomweights'.format(time) else: filename = 'model_{}_randomweights'.format(time) # --- Training/validation loop --- criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=1e-3) train_hist, val_hist, val_acc, val_F1 = train_model(DEVICE, filename, model, AGGREGATION, criterion, optimizer, N_EPOCHS, IMGS_STEP, train_path, val_path, BATCH_TILE=BATCH_TILE)
if pre_tr_model == 'vgg16': input_units = model.classifier[0].in_features model.name = 'vgg16' elif pre_tr_model == 'vgg19': input_units = model.classifier[0].in_features model.name = 'vgg19' elif pre_tr_model == 'densenet': input_units = model.classifier.in_features model.name = 'densenet' elif pre_tr_model == 'alexnet': input_units = model.classifier[1].in_features model.name = 'alexnet' #building classifier of model model = build_classifier(model, input_units, hidden_units, dropout) print(model) #Set criterion and optimizer criterion = nn.NLLLoss() optimizer = optim.Adam(model.classifier.parameters(), learning_rate) model.to(device) # Training model model = train_model(model, epochs, trainloader, validloader, criterion, optimizer, device) # Testing model test_model(model, testloader, device) # Saving model save_model(model, train_data, save_dir)
def run_train_cv(self): oof_preds = np.zeros((len(self.df), self.n_class)) best_val_loss = 0 for i_fold, (trn_idx, val_idx) in enumerate(self.fold_indices): self.logger.info("-" * 10) self.logger.info(f"fold: {i_fold}") train_df = self.df.iloc[trn_idx].reset_index(drop=True) val_df = self.df.iloc[val_idx].reset_index(drop=True) # concat nocall df # val_df = pd.concat([val_df, self.nocall_df]).reset_index() train_ds = datasets.SpectrogramDataset( train_df, self.data_dir, sample_rate=self.config.sample_rate, composer=self.train_composer, secondary_label=self.secondary_label, ) valid_ds = datasets.SpectrogramDataset( val_df, self.data_dir, sample_rate=self.config.sample_rate, composer=self.val_composer, secondary_label=self.secondary_label ) train_dl = torch.utils.data.DataLoader( train_ds, shuffle=True, **self.config.dataloader ) # reduce batchsize for avoiding cudnn error valid_dl = torch.utils.data.DataLoader( valid_ds, shuffle=False, num_workers=self.config.dataloader.num_workers, batch_size=int(self.config.dataloader.batch_size / 2), pin_memory=self.config.dataloader.pin_memory, ) model = model_utils.build_model( self.config.model.name, n_class=self.n_class, in_chans=self.config.model.in_chans, pretrained=self.config.model.pretrained, ) if self.config.multi and self.config.gpu: self.logger.info("Using pararell gpu") model = nn.DataParallel(model) # criterion = nn.BCELoss() criterion = nn.BCEWithLogitsLoss() optimizer = optim.Adam(model.parameters(), float(self.config.learning_rate)) scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, 10) best_model_path = self.save_dir / f"best_model_fold{i_fold}.pth" if self.config.mixup: self.logger.info("use mixup") best_val_loss += model_utils.train_model( epoch=self.epoch, model=model, train_loader=train_dl, val_loader=valid_dl, optimizer=optimizer, scheduler=scheduler, criterion=criterion, device=self.device, threshold=self.config.threshold, best_model_path=best_model_path, logger=self.logger, mixup=self.config.mixup, ) model = model_utils.load_pytorch_model( model_name=self.config.model.name, path=best_model_path, n_class=self.n_class, in_chans=self.config.model.in_chans, ) preds = model_utils.predict( model, valid_dl, self.n_class, self.device, sigmoid=True ) oof_preds[val_idx, :] = preds # oof_score = self.metrics(self.y, oof_preds) best_val_loss /= len(self.fold_indices) return oof_preds, best_val_loss
from model_utils import train_model, test_model from cnn_model_with_output_conv import CNNModel model = CNNModel() train_model(model, "cnn_model_with_output_conv", 20000) test_model(model, "cnn_model_with_output_conv")
def main(): args = parse_arguments() device = torch.device(args.device) transform = transforms.Compose([ transforms.Resize(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) image_datasets = { x: datasets.CIFAR10(root=args.cifar10_dir, train=y, download=True, transform=transform) for x, y in zip([0, 1], [True, False]) } class_names = ('airplane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') # Prepare initial template weights random_file = os.path.join(args.model_dir, '{}_random.pth'.format(args.arch)) if args.overwrite or not os.path.isfile(random_file): print('Preparing {} random weights...'.format(args.arch)) model = mutil.get_model(args.arch, pretrained=False) pathlib.Path(os.path.dirname(random_file)).mkdir(parents=True, exist_ok=True) torch.save(model.state_dict(), random_file) binary_fc_file = os.path.join(args.model_dir, '{}_binary_fc.pth'.format(args.arch)) if args.overwrite or not os.path.isfile(binary_fc_file): print('Preparing binary {} fully-connected weights...'.format( args.arch)) model = mutil.get_model(args.arch, pretrained=False) fc = torch.nn.Linear(model.fc.in_features, 2) pathlib.Path(os.path.dirname(binary_fc_file)).mkdir(parents=True, exist_ok=True) torch.save(fc.state_dict(), binary_fc_file) # Run experiments for experiment in args.experiments: save_dir = os.path.join(args.model_dir, experiment) for pos_class in range(10): weights_file = os.path.join( save_dir, '{}_{}.pth'.format(args.arch, pos_class)) if args.train and not args.overwrite and os.path.isfile( weights_file): print('Weights found for {} ({} {}). Skipping...'.format( experiment, pos_class, class_names[pos_class])) continue if not args.train and not os.path.isfile(weights_file): print('Accuracy of {} ({} {}): N/A'.format( experiment, pos_class, class_names[pos_class])) continue # Setup binary dataset binary_datasets = { a: BinaryCIFAR10Subset(image_datasets[b], pos_class, start_index=c, end_index=d, sample_size=e, balanced=True, random=False) for a, b, c, d, e in zip(['train', 'val', 'test'], [0, 0, 1], [0, 40000, 0], [40000, None, None], [1000, 0, 0]) } dataloaders = { x: DataLoader(binary_datasets[x], batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=('cpu' not in args.device)) for x in ['train', 'val', 'test'] } dataset_sizes = { x: len(binary_datasets[x]) for x in ['train', 'val', 'test'] } # Setup model if experiment == 'last': model = mutil.get_model(args.arch, pretrained=True) mutil.freeze_model_parameters_(model) model.fc = torch.nn.Linear(model.fc.in_features, 2) model.fc.load_state_dict( torch.load(binary_fc_file, map_location='cpu')) elif experiment == 'full': model = mutil.get_model(args.arch, pretrained=True) model.fc = torch.nn.Linear(model.fc.in_features, 2) model.fc.load_state_dict( torch.load(binary_fc_file, map_location='cpu')) elif experiment == 'bn': model = mutil.get_model(args.arch, pretrained=True) mutil.freeze_model_parameters_(model) model.fc = torch.nn.Linear(model.fc.in_features, 2) model.fc.load_state_dict( torch.load(binary_fc_file, map_location='cpu')) mutil.set_module_trainable_(model, torch.nn.BatchNorm2d) elif experiment == 'combn': model = mutil.get_model(args.arch, pretrained=True) mutil.freeze_model_parameters_(model) model.fc = torch.nn.Linear(model.fc.in_features, 2) model.fc.load_state_dict( torch.load(binary_fc_file, map_location='cpu')) component_classes = [x for x in range(10) if x != pos_class] mutil.replace_bn_with_combn_(model, [ os.path.join(args.model_dir, 'bn', '{}_{}.pth'.format( args.arch, x)) for x in component_classes ]) elif experiment == 'pcbn': model = mutil.get_model(args.arch, pretrained=True) mutil.freeze_model_parameters_(model) model.fc = torch.nn.Linear(model.fc.in_features, 2) model.fc.load_state_dict( torch.load(binary_fc_file, map_location='cpu')) component_classes = [x for x in range(10) if x != pos_class] mutil.replace_bn_with_pcbn_(model, [ os.path.join(args.model_dir, 'bn', '{}_{}.pth'.format( args.arch, x)) for x in component_classes ]) elif experiment == 'bn_random': model = mutil.get_model(args.arch, pretrained=False) model.load_state_dict( torch.load(random_file, map_location='cpu')) mutil.freeze_model_parameters_(model) model.fc = torch.nn.Linear(model.fc.in_features, 2) model.fc.load_state_dict( torch.load(binary_fc_file, map_location='cpu')) mutil.set_module_trainable_(model, torch.nn.BatchNorm2d) elif experiment == 'combn_random': model = mutil.get_model(args.arch, pretrained=False) model.load_state_dict( torch.load(random_file, map_location='cpu')) mutil.freeze_model_parameters_(model) model.fc = torch.nn.Linear(model.fc.in_features, 2) model.fc.load_state_dict( torch.load(binary_fc_file, map_location='cpu')) component_classes = [x for x in range(10) if x != pos_class] mutil.replace_bn_with_combn_(model, [ os.path.join(args.model_dir, 'bn_random', '{}_{}.pth'.format(args.arch, x)) for x in component_classes ]) elif experiment == 'pcbn_random': model = mutil.get_model(args.arch, pretrained=False) model.load_state_dict( torch.load(random_file, map_location='cpu')) mutil.freeze_model_parameters_(model) model.fc = torch.nn.Linear(model.fc.in_features, 2) component_classes = [x for x in range(10) if x != pos_class] mutil.replace_bn_with_pcbn_(model, [ os.path.join(args.model_dir, 'bn_random', '{}_{}.pth'.format(args.arch, x)) for x in component_classes ]) # elif experiment == 'combn_semi_random': # model = mutil.get_model(args.arch, pretrained=False) # model.load_state_dict(torch.load(random_file, map_location='cpu')) # mutil.freeze_model_parameters_(model) # model.fc = torch.nn.Linear(model.fc.in_features, 2) # model.fc.load_state_dict(torch.load(binary_fc_file, map_location='cpu')) # component_classes = [x for x in range(10) if x != pos_class] # mutil.replace_bn_with_combn_( # model, [os.path.join(args.model_dir, 'bn', '{}_{}.pth'.format(args.arch, x)) # for x in component_classes]) # elif experiment == 'pcbn_semi_random': # model = mutil.get_model(args.arch, pretrained=False) # model.load_state_dict(torch.load(random_file, map_location='cpu')) # mutil.freeze_model_parameters_(model) # model.fc = torch.nn.Linear(model.fc.in_features, 2) # model.fc.load_state_dict(torch.load(binary_fc_file, map_location='cpu')) # component_classes = [x for x in range(10) if x != pos_class] # mutil.replace_bn_with_pcbn_( # model, [os.path.join(args.model_dir, 'bn', '{}_{}.pth'.format(args.arch, x)) # for x in component_classes]) # elif experiment == 'combn_full_random': # model = mutil.get_model(args.arch, pretrained=False) # model.load_state_dict(torch.load(random_file, map_location='cpu')) # mutil.freeze_model_parameters_(model) # model.fc = torch.nn.Linear(model.fc.in_features, 2) # model.fc.load_state_dict(torch.load(binary_fc_file, map_location='cpu')) # component_classes = [x for x in range(10) if x != pos_class] # mutil.replace_bn_with_combn_( # model, [os.path.join(args.model_dir, 'full', '{}_{}.pth'.format(args.arch, x)) # for x in component_classes]) # elif experiment == 'pcbn_full_random': # model = mutil.get_model(args.arch, pretrained=False) # model.load_state_dict(torch.load(random_file, map_location='cpu')) # mutil.freeze_model_parameters_(model) # model.fc = torch.nn.Linear(model.fc.in_features, 2) # model.fc.load_state_dict(torch.load(binary_fc_file, map_location='cpu')) # component_classes = [x for x in range(10) if x != pos_class] # mutil.replace_bn_with_pcbn_( # model, [os.path.join(args.model_dir, 'full', '{}_{}.pth'.format(args.arch, x)) # for x in component_classes]) # elif experiment == 'bn_imagenet_random': # model = mutil.get_model(args.arch, pretrained=False) # model.load_state_dict(torch.load(random_file, map_location='cpu')) # mutil.freeze_model_parameters_(model) # mutil.set_module_trainable_(model, torch.nn.BatchNorm2d) # mutil.part_load_state_dict_( # model, # mutil.get_model(args.arch, pretrained=True).state_dict(), # torch.nn.BatchNorm2d) # model.fc = torch.nn.Linear(model.fc.in_features, 2) # model.fc.load_state_dict(torch.load(binary_fc_file, map_location='cpu')) else: raise NameError('{} is not recognized.'.format(experiment)) model.to(device) # Train and save model if args.train: print('Training {} ({} {})...'.format(experiment, pos_class, class_names[pos_class])) optimizer = torch.optim.SGD( mutil.get_model_trainable_parameters(model), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) scheduler = torch.optim.lr_scheduler.StepLR( optimizer, step_size=args.decay_step, gamma=args.decay_factor) model, _ = mutil.train_model( model, torch.nn.CrossEntropyLoss().to(device), optimizer, dataloaders, dataset_sizes, scheduler=scheduler, num_epochs=args.epoch, device=device) mutil.eval_model(model, dataloaders['test'], dataset_sizes['test'], device=device) pathlib.Path(os.path.dirname(weights_file)).mkdir( parents=True, exist_ok=True) torch.save(model.state_dict(), weights_file) # Evaluate model else: model.load_state_dict( torch.load(weights_file, map_location='cpu')) accuracy = mutil.eval_model(model, dataloaders['test'], dataset_sizes['test'], device=device, verbose=False) print('Accuracy of {} ({} {}): {:.1f}%'.format( experiment, pos_class, class_names[pos_class], accuracy * 100))
val_images = args.val_images model = args.model summarize = args.summarize normalize = args.n return model_path, out_path, create_new, epochs, batches, \ input_shape, train_images, val_images, model, summarize, normalize if __name__ == '__main__': model_path, out_path, create_new, epochs, batches, \ input_shape, train_images, val_images, model, summarize, normalize = read_arguments() input_shape = (input_shape[0], input_shape[1], 3) if create_new: # As opencv2 considers the number of rows to be the second element of a shape tuple, # this is unfortunately necessary. model = model_utils.create_model( model, (input_shape[1], input_shape[0], input_shape[2])) else: model = model_utils.load_model(model_path) if summarize: model.summary() exit(0) model_utils.train_model(model, batches, epochs, input_shape[:2], train_images_per_batch=train_images, val_images_per_batch=val_images, normalize_images=normalize) model_utils.save_model(model, out_path)
import data_utils as du import model_utils as mu str_list = du.get_file_lines('../data/test/test.txt') v_list = du.get_variables(str_list) code_to_int, int_to_code = du.get_keyword_dict(v_list) train_x, train_y, test_x, test_y = du.get_train_data_one_step( '../data/train/input.txt', '../data/train/output.txt', 6, 1, 0.9) model = mu.build_LSTM_model(train_x.shape[1], train_x.shape[2], False) model, predict, test_y = mu.train_model(model, train_x, train_y, test_x, test_y) ind = du.get_input_set(str_list, code_to_int, 6) predict = model.predict(ind) print(predict)
from pymongo import MongoClient from model_utils import train_model from config import DATABASE_NAME, MODEL_COLLECTION_NAME, DATASET_COLLECTION_NAME db = MongoClient() conn = db[DATABASE_NAME] while 1: model = conn[MODEL_COLLECTION_NAME].find_one({"status": "queued"}) if model: conn[MODEL_COLLECTION_NAME].update_one({"name": model["name"]}, {"$set": { "status": "working" }}) dataset = conn[DATASET_COLLECTION_NAME].find_one( {'name': model['dataset_name']}) meta = train_model(model['name'], model['dataset_name'], model['arch'], model['img_size'], model['epochs']) conn[MODEL_COLLECTION_NAME].update_one( {"name": model["name"]}, {"$set": { "status": "completed", "meta": meta }})
else: # create new model and optimizer model = create_model(arch=args.arch, class_to_idx=class_to_idx, hidden_units=args.hidden_units, drop_p=args.drop_p) optimizer = create_optimizer(model=model, lr=args.learning_rate) start_epoch = 1 history = None # Train model ########################### history, best_epoch = train_model(dataloaders=dataloaders, model=model, optimizer=optimizer, gpu=gpu, start_epoch=start_epoch, epochs=args.epochs, train_history=history) # Check performance on test data set # test_acc = test_model( # dataloader=dataloaders['test'], model=model, gpu=gpu) # print(f'\nModel achieved accuracy of {(test_acc * 100):.2f}% on Test data set.') # Plot training history plot_history(history) # NOTE: plot_history() is currently not working on Udacity workspace because # display device is not available # Save checkpoint
torch.utils.data.DataLoader(dataset=train_set, batch_size=batch_s, shuffle=False) torch.utils.data.DataLoader(dataset=test_set, batch_size=batch_s, shuffle=False) # Initialize Recurrent Neural Network (LSTM) RNN = NN(input_dim=input_d, hidden_dim=batch_s, num_layers=layers_n, output_dim=output_d) # Train Model print("Training Model...") model_train, train_loss = train_model(RNN, x_train_tens, y_train_tens, epochs, 0.01) print("Making Predictions...") # Back-test Model model_test = RNN(x_test_tens) # Invert data for visualization model_train = min_max_scale.inverse_transform(model_train.detach().numpy()) model_test = min_max_scale.inverse_transform(model_test.detach().numpy()) y_train = min_max_scale.inverse_transform(y_train_tens.detach().numpy()) y_test = min_max_scale.inverse_transform(y_test_tens.detach().numpy()) # Graph training loss, Actual Stock Price, Predicted Stock Price visualize_results(stock, reference_frame, train_loss, model_train, model_test, y_train, y_test)
#constants #output_cats = 102 # number of flower classifications (can make this a command line input for other training) args = get_args_train() if (args.device == 'gpu' and torch.cuda.is_available()): device = torch.device('cuda') else: print( "Model should be trained on GPU, enable and select --gpu gpu for training" ) train_data, test_data, validation_data, trainloader, testloader, validationloader = load_data( args.data_directory) pretrain_model, arch_inFeatures = pretrained_model(args.arch) model, criterion = create_classifier(pretrain_model, arch_inFeatures, args.hidden_units, args.output_cats) optimizer = optim.Adam(model.classifier.parameters(), lr=args.lr) trained_model = train_model(model, args.epochs, trainloader, validationloader, device, optimizer, criterion) tested_model = test_model(trained_model, testloader, device, optimizer, criterion) save_checkpoint(trained_model, args.save_directory, args.arch, train_data, optimizer, args.epochs, args.hidden_units)