def new_model(is_generator): ''' Creates a new model instance and initializes the respective fields in params. Keyword arguments: > params (dict) -- current state variable Returns: N/A ''' params = param_factory(is_generator=is_generator) print('You are initializing a new', bolded('generator') if is_generator else bolded('classifier') + '.') model_list = constants.GENERATORS if is_generator else constants.CLASSIFIERS # Name params['run_name'] = input('Please type the current model run name -> ') # Architecture. Slightly hacky - allows constants.py to enforce # which models are generators vs. classifiers. model_string = train_utils.input_from_list(model_list, 'model') if model_string == 'Classifier_A': params['model'] = models.Classifier_A() elif model_string == 'Classifier_B': params['model'] = models.Classifier_B() elif model_string == 'Classifier_C': params['model'] = models.Classifier_C() elif model_string == 'Classifier_D': params['model'] = models.Classifier_D() elif model_string == 'Classifier_E': params['model'] = models.Classifier_E() elif model_string == 'VANILLA_VAE': params['model'] = models.VAE() elif model_string == 'DEFENSE_VAE': params['model'] = models.Defense_VAE() else: raise Exception(model_string, 'does not exist as a model (yet)!') # Kaiming initialization for weights models.initialize_model(params['model']) # Setup other state variables for state_var in constants.SETUP_STATE_VARS: train_utils.store_user_choice(params, state_var) print() # Grabs dataloaders. TODO: Prompt for val split/randomize val indices params['train_dataloader'], params['val_dataloader'], params[ 'test_dataloader'] = get_dataloader(dataset_name=params['dataset'], batch_sz=params['batch_size'], num_threads=params['num_threads']) # Saves an initial copy if not os.path.isdir('models/' + model_type(params) + '/' + params['run_name'] + '/'): os.makedirs('models/' + model_type(params) + '/' + params['run_name'] + '/') train_utils.save_checkpoint(params, 0) return params
def define_model_and_optimizer(self): """ Defines the model (`self.model`) and the optimizer (`self.optimizer`). """ print("* Defining model and optimizer.", flush=True) job_dir = self.C.job_dir if self.C.restart: print("-- Loading model from previous saved state.", flush=True) self.restart_epoch = util.get_restart_epoch() self.model = torch.load( f"{job_dir}model_restart_{self.restart_epoch}.pth") print( f"-- Backing up as " f"{job_dir}model_restart_{self.restart_epoch}_restarted.pth.", flush=True, ) shutil.copyfile( f"{job_dir}model_restart_{self.restart_epoch}.pth", f"{job_dir}model_restart_{self.restart_epoch}_restarted.pth", ) else: print("-- Initializing model from scratch.", flush=True) self.model = models.initialize_model() self.restart_epoch = 0 start_epoch = self.restart_epoch + 1 end_epoch = start_epoch + self.C.epochs print("-- Defining optimizer.", flush=True) self.optimizer = torch.optim.Adam( params=self.model.parameters(), lr=self.C.init_lr, weight_decay=self.C.weight_decay, ) return start_epoch, end_epoch
def load_artifacts( run_id: str, device: torch.device = torch.device("cpu"), ) -> Dict: """Load artifacts for a particular `run_id`. Args: run_id (str): ID of the run to load model artifacts from. device (torch.device): Device to run model on. Defaults to CPU. Returns: Artifacts needed for inference. """ # Load model client = mlflow.tracking.MlflowClient() device = torch.device("cpu") with tempfile.TemporaryDirectory() as fp: client.download_artifacts(run_id=run_id, path="", dst_path=fp) label_encoder = data.LabelEncoder.load( fp=Path(fp, "label_encoder.json") ) tokenizer = data.Tokenizer.load(fp=Path(fp, "tokenizer.json")) model_state = torch.load(Path(fp, "model.pt"), map_location=device) performance = utils.load_dict(filepath=Path(fp, "performance.json")) # Load model run = mlflow.get_run(run_id=run_id) args = Namespace(**run.data.params) model = models.initialize_model( args=args, vocab_size=len(tokenizer), num_classes=len(label_encoder) ) model.load_state_dict(model_state) return { "args": args, "label_encoder": label_encoder, "tokenizer": tokenizer, "model": model, "performance": performance, }
name_file = args.name # Resume checkpoint model_resume = args.resume #labels if (problem == 'atmospheric'): num_classes = 4 else: labels = pd.read_csv('../data/train_v2.csv') num_classes = len(list(set(labels['tags']))) # Flag for feature extracting. When False, we finetune the whole model, # when True we only update the reshaped layer params feature_extract = True #selecting the model model_ft, input_size = initialize_model(model_name, num_classes, feature_extract, use_pretrained=True) # Print the model we just instantiated print(model_ft) #TODO split data in code not in directories # Create training and validation datasets image_datasets = {x: PlanetDataset(csv_file='../data/train_v2.csv', root_dir='../data/fast_dehazed_' + x, extension='.jpg',problem=problem , transform=transforms.Compose([ transforms.Resize(input_size),transforms.ToTensor()])) for x in ['train', 'val']} # Create training and validation dataloaders dataloaders_dict = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=batch_size, shuffle=False, num_workers=1) for x in ['train', 'val']} # Detect if we have a GPU available device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")
def visualize_model(model_name, model_path, show_only_fails, device, weighted_train_sampler, num_images=10 * 5): seed = np.random.randint(100) train_data_dir = '/home/nvme/data/openimg/train/train/' test_data_dir = '/home/nvme/data/openimg/test/testset/' # hyper parameters valid_size = 0.25 batch_size = 32 num_workers = 4 pin_memory = True num_classes = 17 # Initialize the model for this run model, input_size = initialize_model(model_name, num_classes, feature_extract=True, use_pretrained=True) model.load_state_dict( torch.load(model_path, map_location=device)['model_state_dict']) model.to(device) was_training = model.training model.eval() # Data augmentation and normalization for training # Just normalization for validation means = [0.485, 0.456, 0.406] stds = [0.229, 0.224, 0.225] data_transforms = { 'train': transforms.Compose([ transforms.Resize(input_size), transforms.CenterCrop(input_size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(means, stds) ]), 'valid': transforms.Compose([ transforms.Resize(input_size), transforms.CenterCrop(input_size), transforms.ToTensor(), transforms.Normalize(means, stds) ]), } train_loader, valid_loader = get_train_valid_loader( train_data_dir, batch_size, data_transforms, seed, weighted_train_sampler, valid_size=valid_size, shuffle=True, show_sample=False, num_workers=num_workers, pin_memory=pin_memory) test_loader = get_test_loader(test_data_dir, batch_size, data_transforms, num_workers=num_workers, pin_memory=pin_memory) dataloaders_dict = { 'train': train_loader, 'valid': valid_loader, 'test': test_loader } class_to_idx = train_loader.dataset.class_to_idx idx_to_class = {Class: idx for idx, Class in class_to_idx.items()} while True: images_so_far = 0 fig = plt.figure(figsize=(20, 10)) with torch.no_grad(): for i, (inputs, labels) in enumerate(dataloaders_dict['valid']): inputs = inputs.to(device) labels = labels.to(device) outputs = model(inputs) _, preds = torch.max(outputs, 1) for j in range(inputs.size()[0]): predicted = idx_to_class[preds[j].item()] gt = idx_to_class[labels[j].item()] if show_only_fails and predicted == gt: continue if predicted == gt: title_color = 'green' else: title_color = 'red' images_so_far += 1 ax = plt.subplot(num_images // 10, 10, images_so_far) ax.axis('off') # print(predicted, gt) ax.set_title(f'p: {predicted}; gt: {gt}', fontsize=7, color=title_color) imshow(inputs.cpu().data[j]) if images_so_far == num_images: model.train(mode=was_training) break if images_so_far == num_images: model.train(mode=was_training) break model.train(mode=was_training) plt.show() print('Use Ctrl-C in the Terminal to stop the infinite loop')
def main(): ''' Run as: (python ./part2/main.py 2>&1) | tee /home/hdd/logs/openimg/$(date +'%y%m%d%H%M%S').txt ''' # save the experiment time start_time = strftime("%y%m%d%H%M%S", localtime()) # checks and logs pwd = os.getcwd() assert os.getcwd().endswith('VehicleRecognition') assert os.path.exists('./part2/experiments/') print(f'Working dir: {pwd}') # fix the random seed seed = 13 torch.manual_seed(seed) np.random.seed(seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False # paths to dataset train_data_dir = '/home/nvme/data/openimg/train/train/' test_data_dir = '/home/nvme/data/openimg/test/testset/' # Number of classes in the dataset num_classes = len(os.listdir(train_data_dir)) # define the paths save_pred_path = None save_pred_path = f'./part2/experiments/{start_time}.csv' save_best_model_path = f'/home/hdd/logs/openimg/{start_time}/best_model.pt' # backup the working directiory workdir_copy(pwd, os.path.split(save_best_model_path)[0]) # resnet, alexnet, vgg, squeezenet, densenet, inception # 'resnext50_32x4d', 'resnext101_32x8d', 'resnext101_32x48d_wsl', 'resnext101_32x32d_wsl' # 'resnext101_32x16d_wsl model_name = "resnext101_32x16d_wsl" # Flag for feature extracting. When False, we finetune the whole model, # when True we only update the reshaped layer params feature_extract = False # hyper parameters device = torch.device("cuda:0") valid_size = 0.10 if model_name.startswith('resnext'): lr = 5e-7 # batch_size = 32 batch_size = 8 elif model_name.startswith('densenet'): lr = 1e-5 batch_size = 32 else: lr = 1e-4 batch_size = 64 num_workers = 16 pin_memory = True weighted_train_sampler = False weighted_loss = False num_epochs = 20 # preventing pytorch from allocating some memory on default GPU (0) torch.cuda.set_device(device) # Initialize the model for this run model_ft, input_size = initialize_model( model_name, num_classes, feature_extract, use_pretrained=True) # Data augmentation and normalization for training # Just normalization for validation means = [0.485, 0.456, 0.406] stds = [0.229, 0.224, 0.225] data_transforms = { 'train': transforms.Compose([ transforms.Resize(input_size), transforms.RandomCrop(input_size), transforms.RandomHorizontalFlip(), ImgAugTransform(input_size, 0.25), transforms.ToPILImage(), transforms.ToTensor(), transforms.Normalize(means, stds), ]), 'valid': transforms.Compose([ transforms.Resize(input_size), transforms.CenterCrop(input_size), transforms.ToTensor(), transforms.Normalize(means, stds) ]), } train_loader, valid_loader = get_train_valid_loader( train_data_dir, batch_size, data_transforms, seed, weighted_train_sampler, valid_size=valid_size, shuffle=True, show_sample=True, num_workers=num_workers, pin_memory=pin_memory ) test_loader = get_test_loader( test_data_dir, batch_size, data_transforms, num_workers=num_workers, pin_memory=pin_memory) dataloaders_dict = { 'train': train_loader, 'valid': valid_loader, 'test': test_loader } # Send the model to GPU model_ft = model_ft.to(device) # Gather the parameters to be optimized/updated in this run. If we are # finetuning we will be updating all parameters. However, if we are # doing feature extract method, we will only update the parameters # that we have just initialized, i.e. the parameters with requires_grad # is True. params_to_update = model_ft.parameters() print("Params to learn:") if feature_extract: params_to_update = [] for name, param in model_ft.named_parameters(): if param.requires_grad == True: params_to_update.append(param) print("\t", name) else: for name, param in model_ft.named_parameters(): if param.requires_grad == True: print("\t", name) # Observe that all parameters are being optimized optimizer_ft = optim.Adam(params_to_update, lr=lr) # Setup the loss fxn if weighted_loss: print('Weighted Loss') # {0: 0.010101, 1: 0.006622, 2: 0.0008244, 3: 0.00015335, 4: 0.0006253, 5: 0.00019665, # 6: 0.02631, 7: 0.00403, 8: 0.001996, 9: 0.01818, 10: 0.0004466, 11: 0.008771, 12: 0.01087, # 13: 0.006493, 14: 0.0017, 15: 0.000656, 16: 0.001200} cls_to_weight = train_loader.dataset.cls_to_weight weights = torch.FloatTensor([cls_to_weight[c] for c in range(num_classes)]).to(device) else: weights = torch.FloatTensor([1.0 for c in range(num_classes)]).to(device) criterion = nn.CrossEntropyLoss(weights) # print some things here so it will be seen in terminal for longer time print(f'Timestep: {start_time}') print(f'using model: {model_name}') print(f'Using optimizer: {optimizer_ft}') print(f'Device {device}') print(f'Batchsize: {batch_size}') print(f'Transforms: {data_transforms}') # Train and evaluate model_ft, hist = train_model( model_ft, dataloaders_dict, criterion, optimizer_ft, device, save_best_model_path, num_epochs=num_epochs, is_inception=(model_name == "inception") ) # do test inference if save_pred_path is not None: test_model(model_ft, dataloaders_dict, device, save_pred_path, is_inception=(model_name == "inception"))
EPOCHS = 30 # Options: raw_cnn, resnet, alexnet, vgg, squeezenet, densenet, inception MODEL_NAME = "raw_cnn" NUM_CLASSES = 2 FEATURE_EXTRACT = True USE_PRETRAINED = True USE_GRAYSCALE = MODEL_NAME == "raw_cnn" DOWNLOAD_DATA = False Net, IMG_SIZE = models.initialize_model(MODEL_NAME, NUM_CLASSES, FEATURE_EXTRACT, USE_PRETRAINED) if DOWNLOAD_DATA: download_and_split_data() if torch.cuda.is_available(): print("Running on GPU") else: print("Running on CPU") device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print("Initializing Network") net = Net.to(device) print("Params to learn:")
import os # This should be configured in a configuration file or evnironment varaiable DEFAULT_DB_URI = 'mongodb://*****:*****@gmail.com' # configure models initialize_model(db_uri=os.environ.get('DB_URI', DEFAULT_DB_URI), app=app) if __name__ == '__main__': app.run(host="0.0.0.0", port=8000, debug=False)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # ''' root = os.path.join('../','input','gtsrb-german-traffic-sign') Cells, labels = read_data(root) if opt.samples: view_samples(Cells, labels) if opt.train: X_train, X_val, y_train, y_val = train_test_split(Cells, labels, test_size=0.2, random_state=1) X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.2, random_state=1) model_ft, in_size = initialize_model(opt.model, num_classes=43, feature_extract=True) model_ft.to(device) # we use Adam optimizer, use cross entropy loss as our loss function optimizer = optim.Adam(model_ft.parameters(), lr=1e-3, betas=(0.9,0.999)) criterion = nn.CrossEntropyLoss().to(device) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=10, verbose=False, threshold=0.0001, threshold_mode='rel', cooldown=0, min_lr=0, eps=1e-08) train_transform = transforms.Compose([ transforms.Resize(in_size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), ])
def main(): # set up data transforms and device data_transforms = { 'train': transforms.Compose([ transforms.Resize(256), transforms.RandomCrop(224), transforms.RandomHorizontalFlip(), transforms.RandomVerticalFlip(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]), 'val': transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) } device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") image_datasets = { phase: SampleDataset(root=os.path.join(data_dir, phase), transform=data_transforms[phase]) for phase in PHASES } dataloaders = { phase: torch.utils.data.DataLoader(dataset=image_datasets[phase], batch_size=BATCH_SIZE, shuffle=SHUFFLE, num_workers=NUM_WORKERS) for phase in PHASES } if not os.path.exists(model_path): os.mkdir(model_path) # set up model model, params_to_optimize, _ = initialize_model(model_name=MODEL, num_classes=NUM_CLASSES, feature_extract=EXTRACTOR, use_pretrained=PRETRAINED) model = model.to(device) criterion = nn.CrossEntropyLoss( ) # classification task, cross entropy loss function optimizer = optim.SGD( params_to_optimize, lr=LEARNING_RATE, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY) # stochastic gradient descent optimizer exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=STEP_SIZE, gamma=GAMMA) print('model training starts...') trained_model, _ = train_model(device, model, dataloaders, criterion, optimizer, exp_lr_scheduler, num_epochs=NUM_EPOCHS) torch.save(trained_model.state_dict(), os.path.join(model_path, model_name)) print('model training completes...')