def trainval(exp_dict): pprint.pprint(exp_dict) savedir_base = os.path.join('tmp', hu.hash_dict(exp_dict)) os.makedirs(savedir_base, exist_ok=True) # -- get scenes source_scene = scenes.get_scene(exp_dict['source_scene']) target_scene = scenes.get_scene(exp_dict['target_scene']) # -- get model model = models.get_model(exp_dict['model'], source_scene, exp_dict) # -- train for E iterations score_list = [] for e in range(500): # update parameters and get new score_dict score_dict = model.train_on_batch(target_scene) score_dict["epoch"] = e score_dict["step_size"] = model.opt.state['step_size'] # Add to score_list and save checkpoint score_list += [score_dict] # Print score_df = pd.DataFrame(score_list) print("\n", score_df.tail(), "\n") # Visualize if e % 50 == 0: model.vis_on_batch(target_scene, fname=os.path.join(savedir_base, 'output_%d.png' % e)) save_gif(src_path=os.path.join(savedir_base, '*.png'), tgt_fname=s.path.join(savedir_base, 'animation.gif'))
def get_hydra_model(self): pyfunc_model = self.get_pyfunc_model() model_path = pyfunc_model._model_impl.context.artifacts["model"] model_name = self.run.data.params["model"] hydra_model = get_model(model_name)(self.configs) hydra_model.model = load_processor(model_path) return hydra_model
def main( log_dir, model_type, vocab_size, emb_size, batch_size, epochs, maxlen, min_acc, num_samples, oov_token, ): _create_dir_if_not_exist(log_dir) (str_X_train, y_train), (str_X_val, y_val), (str_X_test, y_test) = get_data() test_samples = np.random.choice(str_X_test, num_samples) preprocessor = Preprocessor(maxlen=maxlen, vocab_size=vocab_size, oov_token=oov_token) preprocessor.fit_on_texts(str_X_train + str_X_val + str_X_test) X_train = preprocessor.transform(str_X_train) X_val = preprocessor.transform(str_X_val) X_test = preprocessor.transform(str_X_test) # Define model model = get_model(model_type, maxlen, vocab_size, emb_size) model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"]) model.fit( X_train, y_train, validation_data=(X_val, y_val), batch_size=batch_size, epochs=epochs, ) score = model.evaluate(X_test, y_test, verbose=0) print("Test accuracy:", score[1]) assert score[ 1] > min_acc, f"score doesnt meet the minimum threshold {min_acc}" test_requests = _generate_test_requests(model, preprocessor, test_samples) _save_json(join(log_dir, "test_requests.json"), test_requests) preprocessor.save(join(log_dir, "preprocessor.pkl")) # HACK # For some reason savedModel format is not working with `lstm` model if model_type == "lstm": model.save(join(log_dir, "model.h5")) else: model.save(join(log_dir, "saved_model/model"))
def tune_hyperparams(cfg): from src.storm_utils import StormIndexAccessor, StormAccessor data_cfg = utils.get_data_cfg(cfg) features_cfg = utils.get_features_cfg(cfg) inputs_dir = Path(to_absolute_path(features_cfg.hydra.run.dir)) paths = features_cfg.outputs use_mlflow = OmegaConf.select(cfg, "mlflow", default=False) if use_mlflow: import mlflow run = setup_mlflow(cfg, features_cfg=features_cfg, data_cfg=data_cfg) cv_method = cfg.cv.method cv_init_params = cfg.cv.params # Model specific parameters model_name = cfg.model metrics = cfg.metrics # seed = cfg.seed # Compute lagged features if they haven't been computed yet if any(not (inputs_dir / path).exists() for path in paths.values()): inputs_dir.mkdir(parents=True, exist_ok=True) cwd = os.getcwd() os.chdir(inputs_dir) lagged_features_cfg = features_cfg.copy() with open_dict(lagged_features_cfg): _ = lagged_features_cfg.pop("hydra") compute_lagged_features(lagged_features_cfg) os.chdir(cwd) X_train = load_processed_data("X_train", inputs_dir=inputs_dir, paths=paths) y_train = load_processed_data("y_train", inputs_dir=inputs_dir, paths=paths) logger.info(f"Getting CV split for '{cv_method}' method...") cv = get_cv_split(y_train, cv_method, **cv_init_params) model = get_model(model_name)(cfg, cv=cv, metrics=metrics, mlflow=use_mlflow) cv_score = model.cv_score(X_train, y_train) logger.info(f"CV score: {cv_score}") if use_mlflow: mlflow.log_metric(model.cv_metric, cv_score) mlflow.log_params(model.params) model.save_output() # utils.save_output(model, cfg.outputs.hydra_model) if use_mlflow: mlflow.end_run() return cv_score
def train(args): """model compile and learning""" (X_train, y_train), (X_test, y_test) = _get_datasets() model = get_model(args) optimizer = _get_optimizer(args) X_train, X_test = X_train[:, :, :, None], X_test[:, :, :, None] model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy']) model.fit(X_train, y_train, batch_size=args.batch_size, epochs=args.epochs, verbose=1) model.evaluate(X_test, y_test, batch_size=args.test_batch_size, verbose=1) if args.job_dir.startswith('gs://'): model.save(CENSUS_MODEL) copy_file_to_gcs(args.job_dir, CENSUS_MODEL) else: model.save(os.path.join(args.job_dir, CENSUS_MODEL)) return model
def main(config_path: Path, dataset_path: Path) -> Union[Tuple[Path], Path]: """ Main function responsible for training classification model. Arguments: Path config_path: Path to main config (of :class:`DefaultConfig` class) Path dataset_path: Path to dataset Returns: Tuple containing path to the experiment root dirrectory and path to the saved model directory. """ config = load_variable("config", config_path) model = models.get_model(config) train = config.training setup(train_config=train, logger=logger) train_loader, val_loader, test_loader = DataLoader.get_loaders( path_to_data=dataset_path, config=config) model.set_example(train_loader) learner = trainer.Trainer(config=config) learner.summary(model) learner.fit(model, train_loader, val_loader) if train.test and train.save: learner.test(test_dataloaders=test_loader) if train.save: results = SimpleNamespace( root=learner.root_dir, model_path=Path(learner.checkpoint_callback.best_model_path), val_loader=val_loader, test_loader=test_loader, ) return results else: return SimpleNamespace(root=learner.root_dir)
def train(self, x_train, y_train, config, model_path): """ Trains the number classifier. :param x_train (np.array) - images :param y_train (np.array) - labels as hot encodings :param config (Configuration) - configuration :param model_path (str) - path of the model folder where to save the models """ # Get model self._model = get_model(config) # Checkpoint to save the models at each epoch checkpoint = ModelCheckpoint(os.path.join(model_path, "model_{epoch:05d}.h5"), monitor="val_loss", verbose=0, save_best_only=False, mode='min') best_checkpoint = ModelCheckpoint(os.path.join(model_path, "best_model.h5"), monitor="val_loss", verbose=0, save_best_only=True, mode='min') # Train the model self._model_history = self._model.fit( x_train, y_train, batch_size=config.batch_size, epochs=config.nb_epochs, validation_split=config.validation_split, shuffle=config.shuffle, callbacks=[checkpoint, best_checkpoint]) # Plot history self._plot_history(model_path, config.model_name)
def __init__(self, config, datasets): self.config = config self.datasets = datasets self.mode = config["mode"] training_config = config["training"] self.epochs = training_config["epochs"] self.print_iter = training_config["print_iter"] self.save_epoch = training_config["save_epoch"] model_func = get_model(config["model"]["type"]) self.model = model_func( **config[config["model"]["type"]] ) lr = scheduler.CosineAnnealingScheduler( training_config["init_learning_rate"], training_config["epochs"] ) self.optimizer = optimizers.Adam(lr, **config["optimizer"]) self.bce_w = training_config["bce_loss_weight"] self.dice_w = training_config["dice_loss_weight"] self.bce = losses.BinaryCrossEntropyLoss() self.dice = losses.DiceLoss() self.train_bce = metrics.Mean() self.train_dice = metrics.Mean() self.train_iou = metrics.MeanIoU(num_classes=2) if config["mode"] == "train": self.valid_datasets = DataLoader( "valid", **config["dataset"] ) self.test_bce = metrics.Mean() self.test_dice = metrics.Mean() self.test_iou = metrics.MeanIoU(num_classes=2) self.get_ckpt_manager(config["save_path"])
for i, (trn_idx, val_idx) in enumerate(splitter.split(df, y=y)): if i not in global_params["folds"]: continue logger.info("=" * 20) logger.info(f"Fold {i}") logger.info("=" * 20) trn_df = df.loc[trn_idx, :].reset_index(drop=True) val_df = df.loc[val_idx, :].reset_index(drop=True) loaders = { phase: C.get_loader(df_, datadir, config, phase, event_level_labels) for df_, phase in zip([trn_df, val_df], ["train", "valid"]) } model = models.get_model(config).to(device) criterion = C.get_criterion(config).to(device) optimizer = C.get_optimizer(model, config) scheduler = C.get_scheduler(optimizer, config) ema_model = AveragedModel( model, avg_fn=lambda averaged_model_parameter, model_parameter, num_averaged: 0.1 * averaged_model_parameter + 0.9 * model_parameter) (output_dir / f"fold{i}").mkdir(exist_ok=True, parents=True) train(model=model, ema_model=ema_model, dataloaders=loaders,
def trainval(exp_dict, savedir_base, datadir, reset=False, num_workers=0): # bookkeepting stuff # ================== savedir = os.path.join(savedir_base, hu.hash_dict(exp_dict)) os.makedirs(savedir, exist_ok=True) if reset: hc.delete_and_backup_experiment(savedir) print("Experiment saved in %s" % savedir) # Dataset # ================== # train set data_transform = A.Compose( [ A.Flip(p=0.3), A.IAAAffine(p=0.3), A.Rotate(p=0.3), A.HueSaturationValue(hue_shift_limit=10, sat_shift_limit=15, val_shift_limit=10, p=0.3), A.GaussianBlur(3, p=0.3), A.GaussNoise(30, p=0.3) ], keypoint_params=A.KeypointParams(format='xy'), additional_targets={ 'mask0': 'mask', 'mask1': 'mask', 'mask2': 'mask', 'keypoints0': 'keypoints', 'keypoints1': 'keypoints', 'keypoints2': 'keypoints', 'keypoints3': 'keypoints', 'keypoints4': 'keypoints', 'keypoints5': 'keypoints' }) # random.seed(20201009) random_seed = random.randint(0, 20201009) train_set = HEDataset_Fast(data_dir=datadir, n_classes=exp_dict["n_classes"], transform=data_transform, option="Train", random_seed=random_seed, obj_option=exp_dict["obj"], patch_size=exp_dict["patch_size"], bkg_option=exp_dict["bkg"]) test_transform = A.Compose([A.Resize(1024, 1024)], keypoint_params=A.KeypointParams(format='xy'), additional_targets={ 'mask0': 'mask', 'mask1': 'mask' }) # val set val_set = HEDataset(data_dir=datadir, transform=test_transform, option="Validation") val_loader = DataLoader(val_set, batch_size=1, num_workers=num_workers) # test set test_set = HEDataset(data_dir=datadir, transform=test_transform, option="Test") test_loader = DataLoader(test_set, batch_size=1, num_workers=num_workers) # Model # ================== # torch.manual_seed(20201009) model = models.get_model(exp_dict['model'], exp_dict=exp_dict, train_set=train_set).cuda() model_path = os.path.join(savedir, "model.pth") score_list_path = os.path.join(savedir, "score_list.pkl") if os.path.exists(score_list_path): # resume experiment model.load_state_dict(hu.torch_load(model_path)) score_list = hu.load_pkl(score_list_path) s_epoch = score_list[-1]['epoch'] + 1 else: # restart experiment score_list = [] s_epoch = 0 # Train & Val # ================== print("Starting experiment at epoch %d" % (s_epoch)) # train_sampler = torch.utils.data.RandomSampler( # train_set, replacement=True, num_samples=2*len(val_set)) train_loader = DataLoader(train_set, batch_size=exp_dict["batch_size"], shuffle=True, num_workers=num_workers) for e in range(s_epoch, exp_dict['max_epoch']): # Validate only at the start of each cycle score_dict = {} # Train the model train_dict = model.train_on_loader(train_loader) # Validate and Visualize the model val_dict = model.val_on_loader(val_loader, savedir_images=os.path.join( savedir, "images"), n_images=7) score_dict.update(val_dict) # Get new score_dict score_dict.update(train_dict) score_dict["epoch"] = len(score_list) # Add to score_list and save checkpoint score_list += [score_dict] # Report & Save score_df = pd.DataFrame(score_list) print("\n", score_df.tail(), "\n") hu.torch_save(model_path, model.get_state_dict()) hu.save_pkl(score_list_path, score_list) print("Checkpoint Saved: %s" % savedir) # Save Best Checkpoint if e == 0 or (score_dict.get("val_score", 0) > score_df["val_score"][:-1].fillna(0).max()): hu.save_pkl(os.path.join(savedir, "score_list_best.pkl"), score_list) hu.torch_save(os.path.join(savedir, "model_best.pth"), model.get_state_dict()) print("Saved Best: %s" % savedir) # if s_epoch==exp_dict['max_epoch']: # e = s_epoch model.load_state_dict( hu.torch_load(os.path.join(savedir, "model_best.pth"))) test_dict = model.test_on_loader(test_loader) hu.save_pkl(os.path.join(savedir, 'test_iou.pkl'), test_dict) print('Test IoU:{}'.format(test_dict["test_iou"])) print('Experiment completed et epoch %d' % e)
def trainval(exp_dict, savedir, args): """ exp_dict: dictionary defining the hyperparameters of the experiment savedir: the directory where the experiment will be saved args: arguments passed through the command line """ # set seed # ================== seed = 42 np.random.seed(seed) torch.manual_seed(seed) if args.use_cuda: device = 'cuda' torch.cuda.manual_seed_all(seed) assert torch.cuda.is_available( ), 'cuda is not, available please run with "-c 0"' else: device = 'cpu' print('Running on device: %s' % device) # Dataset # Load val set and train set val_set = datasets.get_dataset(dataset_name=exp_dict["dataset"], split="val", transform=exp_dict.get("transform"), datadir=args.datadir) train_set = datasets.get_dataset(dataset_name=exp_dict["dataset"], split="train", transform=exp_dict.get("transform"), datadir=args.datadir) # Load train loader, val loader, and vis loader train_loader = DataLoader(train_set, sampler=RandomSampler( train_set, replacement=True, num_samples=max(min(500, len(train_set)), len(val_set))), batch_size=exp_dict["batch_size"]) val_loader = DataLoader(val_set, shuffle=False, batch_size=exp_dict["batch_size"]) vis_loader = DataLoader(val_set, sampler=ut.SubsetSampler(train_set, indices=[0, 1, 2]), batch_size=1) # Create model, opt, wrapper model_original = models.get_model(exp_dict["model"], exp_dict=exp_dict).cuda() opt = torch.optim.Adam(model_original.parameters(), lr=1e-5, weight_decay=0.0005) model = wrappers.get_wrapper(exp_dict["wrapper"], model=model_original, opt=opt).cuda() score_list = [] # Checkpointing # ============= score_list_path = os.path.join(savedir, "score_list.pkl") model_path = os.path.join(savedir, "model_state_dict.pth") opt_path = os.path.join(savedir, "opt_state_dict.pth") if os.path.exists(score_list_path): # resume experiment score_list = hu.load_pkl(score_list_path) model.load_state_dict(torch.load(model_path)) opt.load_state_dict(torch.load(opt_path)) s_epoch = score_list[-1]["epoch"] + 1 else: # restart experiment score_list = [] s_epoch = 0 # Run training and validation for epoch in range(s_epoch, exp_dict["max_epoch"]): score_dict = {"epoch": epoch} # visualize model.vis_on_loader(vis_loader, savedir=os.path.join(savedir, "images")) # validate score_dict.update(model.val_on_loader(val_loader)) # train score_dict.update(model.train_on_loader(train_loader)) # Add score_dict to score_list score_list += [score_dict] # Report and save print(pd.DataFrame(score_list).tail()) hu.save_pkl(score_list_path, score_list) hu.torch_save(model_path, model.state_dict()) hu.torch_save(opt_path, opt.state_dict()) print("Saved in %s" % savedir)
def train(cfg): from src.storm_utils import StormIndexAccessor, StormAccessor # # Get data configs/overrides # data_overrides = utils.parse_data_overrides(cfg) # data_cfg = compose( # config_name="process_data", return_hydra_config=True, overrides=data_overrides, # ) # # Get features configs/overrides # features_overrides = utils.parse_processed_data_overrides(cfg) # features_overrides.extend(utils.parse_override(cfg.lagged_features)) # features_cfg = compose( # config_name="compute_lagged_features", # return_hydra_config=True, # overrides=features_overrides, # ) # Model specific parameters model_name = cfg.model metrics = cfg.metrics pred_path = OmegaConf.select(cfg.outputs, "predictions", default="ypred.pkl") # seed = cfg.seed data_cfg = utils.get_data_cfg(cfg) features_cfg = utils.get_features_cfg(cfg) processed_data_dir = Path(to_absolute_path(data_cfg.hydra.run.dir)) inputs_dir = Path(to_absolute_path(features_cfg.hydra.run.dir)) paths = features_cfg.outputs # Setup mlflow use_mlflow = OmegaConf.select(cfg, "mlflow", default=False) if use_mlflow: import mlflow run = setup_mlflow(cfg, features_cfg=features_cfg, data_cfg=data_cfg) update_tuned_hyperparams(cfg, features_cfg) # Compute lagged features if they haven't been computed yet if any(not (inputs_dir / path).exists() for path in paths.values()): inputs_dir.mkdir(parents=True, exist_ok=True) cwd = os.getcwd() os.chdir(inputs_dir) lagged_features_cfg = features_cfg.copy() with open_dict(lagged_features_cfg): _ = lagged_features_cfg.pop("hydra") compute_lagged_features(lagged_features_cfg) os.chdir(cwd) # General parameters # load_kwargs = cfg.load # processed_data_dir = Path(to_absolute_path(cfg.processed_data_dir)) target_pipeline_path = cfg.target_pipeline inverse_transform = cfg.inverse_transform cv_method = cfg.cv.method cv_init_params = cfg.cv.params # lag = cfg.lag # exog_lag = cfg.exog_lag # lead = cfg.lead logger.info("Loading training data and computing lagged features...") # # HACK: Compute lagged features if they haven't been computed yet. # inputs_dir = Path(to_absolute_path(load_kwargs.inputs_dir)) # paths = [inputs_dir / path for path in load_kwargs.paths.values()] # if any(not path.exists() for path in paths): # # if not inputs_dir.exists(): # compute_lagged_features(lag, exog_lag, lead, inputs_dir) X_train = load_processed_data("X_train", inputs_dir=inputs_dir, paths=paths) y_train = load_processed_data("y_train", inputs_dir=inputs_dir, paths=paths) X_test = load_processed_data("X_test", inputs_dir=inputs_dir, paths=paths) y_test = load_processed_data("y_test", inputs_dir=inputs_dir, paths=paths) feature_names = load_processed_data("features_names", inputs_dir=inputs_dir, paths=paths) # QUESTION: Log everything at end cleaner? if use_mlflow: n_train_obs, n_features = X_train.shape n_test_obs, _ = y_test.shape mlflow.log_params({ "n_train_obs": n_train_obs, "n_test_obs": n_test_obs, "n_features": n_features, }) logger.info(f"Getting CV split for '{cv_method}' method...") cv = get_cv_split(y_train, cv_method, **cv_init_params) # QUESTION: Do we still need CV here? ########################################################################### # Fit and evaluate model ########################################################################### logger.info(f"Fitting model {model_name}...") model = get_model(model_name)(cfg, cv=cv, metrics=metrics, mlflow=use_mlflow) model.fit(X_train, y_train, feature_names=feature_names) # TODO: Make this more general. It currently only applies to xgboost # QUESTION: compute CV score in score method? # score = model.cv_score(X_train, y_train) model.save_output() ########################################################################### # Compute/save predictions on test set ########################################################################### logger.info("Computing predictions...") ypred = model.predict(X_test) ypred = convert_pred_to_pd(ypred, y_test) if inverse_transform: y_test, ypred = inv_transform_targets( y_test, ypred, path=target_pipeline_path, processor_dir=processed_data_dir, ) logger.info("Saving predictions...") utils.save_output(ypred, pred_path) if use_mlflow: mlflow.log_artifact(pred_path) # XXX: TEMPORARY if hasattr(model, "compute_shap_values"): shap_values = model.compute_shap_values(X_test) shap_values.to_pickle("shap_values.pkl") if use_mlflow: mlflow.log_artifact("shap_values.pkl") ########################################################################### # Compute and log test metrics ########################################################################### if use_mlflow: test_score = compute_metrics(y_test, ypred, metrics=metrics) if isinstance(metrics, (list, tuple)): if len(metrics) > 1: for metric in metrics: mlflow.log_metrics({metric: test_score[metric]}) else: mlflow.log_metrics({metrics: test_score}) ########################################################################## # Plot predictions on test set ########################################################################## # Plot predictions plot_kwargs = OmegaConf.to_container(cfg.plot, resolve=True) fig, ax = model.plot( X_test, y_test, lead=features_cfg.lead, unit=features_cfg.lag_processor.unit, **plot_kwargs, ) plt.close() # if isinstance(fig, list): # for f in fig: # f.close() # elif isinstance(fig, dict): # for f in fig.values(): # f.close() # else: # fig.close() if use_mlflow: mlflow.end_run()
def newminimum(exp_id, savedir_base, datadir, name, exp_dict, metrics_flag=True): # bookkeeping # --------------- # get experiment directory old_modeldir = os.path.join(savedir_base, exp_id) savedir = os.path.join(savedir_base, exp_id, name) old_exp_dict = hu.load_json(os.path.join(old_modeldir, 'exp_dict.json')) # TODO: compare exp dict for possible errors: # optimizer have to be the same # same network, dataset # create folder and save the experiment dictionary os.makedirs(savedir, exist_ok=True) hu.save_json(os.path.join(savedir, 'exp_dict.json'), exp_dict) pprint.pprint(exp_dict) print('Experiment saved in %s' % savedir) # set seed # --------------- seed = 42 + exp_dict['runs'] np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) # Dataset # ----------- # Load Train Dataset train_set = datasets.get_dataset(dataset_name=exp_dict["dataset"], train_flag=True, datadir=datadir, exp_dict=exp_dict) train_loader = torch.utils.data.DataLoader( train_set, drop_last=True, shuffle=True, batch_size=exp_dict["batch_size"]) # Load Val Dataset val_set = datasets.get_dataset(dataset_name=exp_dict["dataset"], train_flag=False, datadir=datadir, exp_dict=exp_dict) # Model # ----------- model = models.get_model(exp_dict["model"], train_set=train_set) # Choose loss and metric function loss_function = metrics.get_metric_function(exp_dict["loss_func"]) # Load Optimizer n_batches_per_epoch = len(train_set) / float(exp_dict["batch_size"]) opt = optimizers.get_optimizer(opt=exp_dict["opt"], params=model.parameters(), n_batches_per_epoch=n_batches_per_epoch) # Checkpoint # ----------- model_path = os.path.join(savedir, 'model.pth') score_list_path = os.path.join(savedir, 'score_list.pkl') opt_path = os.path.join(savedir, 'opt_state_dict.pth') old_model_path = os.path.join(old_modeldir, 'model.pth') old_score_list_path = os.path.join(old_modeldir, 'score_list.pkl') old_opt_path = os.path.join(old_modeldir, 'opt_state_dict.pth') score_list = hu.load_pkl(old_score_list_path) model.load_state_dict(torch.load(old_model_path)) opt.load_state_dict(torch.load(old_opt_path)) s_epoch = score_list[-1]['epoch'] + 1 # save current model state for comparison minimum = [] for param in model.parameters(): minimum.append(param.clone()) # Train & Val # ------------ print('Starting experiment at epoch %d/%d' % (s_epoch, exp_dict['max_epoch'])) for epoch in range(s_epoch, exp_dict['max_epoch']): # Set seed np.random.seed(exp_dict['runs'] + epoch) torch.manual_seed(exp_dict['runs'] + epoch) # torch.cuda.manual_seed_all(exp_dict['runs']+epoch) not needed since no cuda available score_dict = {"epoch": epoch} if metrics_flag: # 1. Compute train loss over train set score_dict["train_loss"] = metrics.compute_metric_on_dataset( model, train_set, metric_name='softmax_loss') # metric_name=exp_dict["loss_func"]) # TODO: which loss should be used? (normal or with reguralizer?) # 2. Compute val acc over val set score_dict["val_acc"] = metrics.compute_metric_on_dataset( model, val_set, metric_name=exp_dict["acc_func"]) # 3. Train over train loader model.train() print("%d - Training model with %s..." % (epoch, exp_dict["loss_func"])) s_time = time.time() for images, labels in tqdm.tqdm(train_loader): # images, labels = images.cuda(), labels.cuda() no cuda available opt.zero_grad() loss = loss_function(model, images, labels, minimum, 0.1) # just works for custom loss function loss.backward() opt.step() e_time = time.time() # Record metrics score_dict["step_size"] = opt.state["step_size"] score_dict["n_forwards"] = opt.state["n_forwards"] score_dict["n_backwards"] = opt.state["n_backwards"] score_dict["batch_size"] = train_loader.batch_size score_dict["train_epoch_time"] = e_time - s_time score_list += [score_dict] # Report and save print(pd.DataFrame(score_list).tail()) hu.save_pkl(score_list_path, score_list) hu.torch_save(model_path, model.state_dict()) hu.torch_save(opt_path, opt.state_dict()) print("Saved: %s" % savedir) with torch.nograd(): print('Current distance: %f', metrics.computedistance(minimum, model)) print('Experiment completed')
test_set = datasets.get_dataset( dataset_dict=exp_dict["dataset"], split='test', datadir=datadir, exp_dict=exp_dict, dataset_size=exp_dict['dataset_size']) test_loader = DataLoader(test_set, batch_size=1, collate_fn=ut.collate_fn, num_workers=0) pprint.pprint(exp_dict) # Model # ================== model = models.get_model(model_dict=exp_dict['model'], exp_dict=exp_dict, train_set=train_set).cuda() model_path = os.path.join(savedir_base, hash_id, 'model_best.pth') # load best model model.load_state_dict(hu.torch_load(model_path)) # loop over the val_loader and saves image # get counts habitats = [] for i, batch in enumerate(test_loader): habitat = batch['meta'][0]['habitat'] habitats += [habitat] habitats = np.array(habitats) val_dict = {}
val_root = args.val_root attack_name = args.attack_name print(f"Generate {attack_name} for {model_name}") total_file_num = 0 for folder in sorted(os.listdir(val_root)): for file in os.listdir(os.path.join(val_root, folder)): total_file_num += 1 if not os.path.exists(f"./adv_example/{attack_name}/{model_name}"): os.makedirs(f"./adv_example/{attack_name}/{model_name}") mapping_folder_to_name, mapping_folder_to_label, mapping_name_to_label, mapping_label_to_name = get_mapping_dict( ) model = get_model(model_name) model.load_state_dict(torch.load(f"./models/{model_name}.pth")) model.to(device) criterion = nn.CrossEntropyLoss() file_num = 0 success_num = 0 error_num = 0 for folder in sorted(os.listdir(val_root)): if not os.path.exists( f"./adv_example/{attack_name}/{model_name}/{folder}"): os.makedirs(f"./adv_example/{attack_name}/{model_name}/{folder}") for file in os.listdir(os.path.join(val_root, folder)): file_num += 1
def trainval(exp_dict, savedir_base, n_workers, test_only, reset=False): # bookkeeping # --------------- # get experiment directory exp_id = hu.hash_dict(exp_dict) savedir = os.path.join(savedir_base, exp_id) if reset: # delete and backup experiment hc.delete_experiment(savedir, backup_flag=True) # create folder and save the experiment dictionary os.makedirs(savedir, exist_ok=True) hu.save_json(os.path.join(savedir, 'exp_dict.json'), exp_dict) pprint.pprint(exp_dict) print('Experiment saved in %s' % savedir) # Dataset # ----------- # train loader train_loader = datasets.get_loader("train", exp_dict, n_workers, test_only=test_only) # val loader val_loader = datasets.get_loader("test", exp_dict, n_workers, test_only=test_only) # Model # ----------- model = models.get_model(exp_dict) # Checkpoint # ----------- model_path = os.path.join(savedir, 'model.pth') score_list_path = os.path.join(savedir, 'score_list.pkl') if os.path.exists(score_list_path): # resume experiment model.set_state_dict(hu.torch_load(model_path)) score_list = hu.load_pkl(score_list_path) s_epoch = score_list[-1]['epoch'] + 1 else: # restart experiment score_list = [] s_epoch = 0 # Train & Val # ------------ print('Starting experiment at epoch %d' % (s_epoch)) for e in range(s_epoch, 10): score_dict = {} # Train the model score_dict.update(model.train_on_loader(train_loader)) # Validate the model score_dict.update(model.test_on_loader(val_loader)) # Get metrics # score_dict['train_loss'] = train_dict['train_loss'] # score_dict['val_acc'] = val_dict['val_acc'] score_dict['epoch'] = e # Add to score_list and save checkpoint score_list += [score_dict] # Report & Save score_df = pd.DataFrame(score_list) print(score_df.tail()) hu.torch_save(model_path, model.get_state_dict()) hu.save_pkl(score_list_path, score_list) print('Checkpoint Saved: %s' % savedir) print('experiment completed')
def trainval(exp_dict, savedir_base, datadir, reset=False, num_workers=0): # bookkeepting stuff # ================== pprint.pprint(exp_dict) exp_id = hu.hash_dict(exp_dict) savedir = os.path.join(savedir_base, exp_id) if reset: hc.delete_and_backup_experiment(savedir) os.makedirs(savedir, exist_ok=True) hu.save_json(os.path.join(savedir, "exp_dict.json"), exp_dict) print("Experiment saved in %s" % savedir) # set seed # ================== seed = 42 np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) # Dataset # ================== # train set train_set = datasets.get_dataset(dataset_dict=exp_dict["dataset"], split="train", datadir=datadir, exp_dict=exp_dict, dataset_size=exp_dict['dataset_size']) # val set val_set = datasets.get_dataset(dataset_dict=exp_dict["dataset"], split="val", datadir=datadir, exp_dict=exp_dict, dataset_size=exp_dict['dataset_size']) # test set test_set = datasets.get_dataset(dataset_dict=exp_dict["dataset"], split="test", datadir=datadir, exp_dict=exp_dict, dataset_size=exp_dict['dataset_size']) # val_sampler = torch.utils.data.SequentialSampler(val_set) val_loader = DataLoader( val_set, # sampler=val_sampler, batch_size=1, collate_fn=ut.collate_fn, num_workers=num_workers) test_loader = DataLoader( test_set, # sampler=val_sampler, batch_size=1, collate_fn=ut.collate_fn, num_workers=num_workers) # Model # ================== model = models.get_model(model_dict=exp_dict['model'], exp_dict=exp_dict, train_set=train_set).cuda() # model.opt = optimizers.get_optim(exp_dict['opt'], model) model_path = os.path.join(savedir, "model.pth") score_list_path = os.path.join(savedir, "score_list.pkl") if os.path.exists(score_list_path): # resume experiment model.load_state_dict(hu.torch_load(model_path)) score_list = hu.load_pkl(score_list_path) s_epoch = score_list[-1]['epoch'] + 1 else: # restart experiment score_list = [] s_epoch = 0 # Train & Val # ================== print("Starting experiment at epoch %d" % (s_epoch)) model.waiting = 0 model.val_score_best = -np.inf train_sampler = torch.utils.data.RandomSampler(train_set, replacement=True, num_samples=2 * len(test_set)) train_loader = DataLoader(train_set, sampler=train_sampler, collate_fn=ut.collate_fn, batch_size=exp_dict["batch_size"], drop_last=True, num_workers=num_workers) for e in range(s_epoch, exp_dict['max_epoch']): # Validate only at the start of each cycle score_dict = {} test_dict = model.val_on_loader(test_loader, savedir_images=os.path.join( savedir, "images"), n_images=3) # Train the model train_dict = model.train_on_loader(train_loader) # Validate the model val_dict = model.val_on_loader(val_loader) score_dict["val_score"] = val_dict["val_score"] # Get new score_dict score_dict.update(train_dict) score_dict["epoch"] = e score_dict["waiting"] = model.waiting model.waiting += 1 # Add to score_list and save checkpoint score_list += [score_dict] # Save Best Checkpoint score_df = pd.DataFrame(score_list) if score_dict["val_score"] >= model.val_score_best: test_dict = model.val_on_loader(test_loader, savedir_images=os.path.join( savedir, "images"), n_images=3) score_dict.update(test_dict) hu.save_pkl(os.path.join(savedir, "score_list_best.pkl"), score_list) # score_df.to_csv(os.path.join(savedir, "score_best_df.csv")) hu.torch_save(os.path.join(savedir, "model_best.pth"), model.get_state_dict()) model.waiting = 0 model.val_score_best = score_dict["val_score"] print("Saved Best: %s" % savedir) # Report & Save score_df = pd.DataFrame(score_list) # score_df.to_csv(os.path.join(savedir, "score_df.csv")) print("\n", score_df.tail(), "\n") hu.torch_save(model_path, model.get_state_dict()) hu.save_pkl(score_list_path, score_list) print("Checkpoint Saved: %s" % savedir) if model.waiting > 100: break print('Experiment completed et epoch %d' % e)
class IndividualDrum(Individual): _count = 0 model = get_model('src/rnn_10_classes.h5') # ("src/weights_rnn.h5") def __init__(self, parameters, empty=False): super().__init__(parameters) IndividualDrum._count += 1 self.ind = IndividualDrum._count if not empty: self.generate_seq() def crossover(self, other): fc = IndividualDrum(self.parameters, empty=True) sc = IndividualDrum(self.parameters, empty=True) fc.sequence = deepcopy(self.sequence) sc.sequence = deepcopy(other.sequence) return fc, sc def mutate(self): # self.generate_note() for key in self.sequence: # self.generate_note() if random.random() > 1 / len(self.sequence): if random.random() > 0.5: if key.bit.timestamp > 0.5: key.bit.timestamp -= 0.1 else: if key.bit.timestamp < 7.5: key.bit.timestamp += 0.1 # if random.random() > 0.5: # self.sequence.remove(key) # else: # self.generate_note() def create_midi_file(self, file_name=None): track = 0 channel = 9 tempo = 120 # In BPM volume = 100 # 0-127, as per the MIDI standard my_midi = MIDIFile( 1 ) # One track, defaults to format 1 (tempo track is created automatically) my_midi.addTempo(track, 0, tempo) my_midi.addProgramChange(0, 10, 0, 0) my_midi.tracks[0].addChannelPressure(0, 4, 0) repertory = "output/" if file_name is not None: file = file_name + ".mid" else: file = str(self.ind) + ".mid" for note in self.sequence: my_midi.addNote(track, channel, note.bit.pitch, note.bit.timestamp, note.bit.duration, volume) with open(repertory + file, "wb") as output_file: my_midi.writeFile(output_file) def generate_note(self): allowed_pitch = [36, 38, 42, 46, 41, 45, 48, 51, 49] new_note = Note( sample(allowed_pitch, 1)[0], round_down(round(uniform(0, 7.75), 2), 0.25), 0.25) if new_note not in self.sequence: self.sequence.append(GeneDrum(new_note)) def generate_seq(self): max_number_of_notes = 100 number_of_notes = randint(20, max_number_of_notes) for x in range(number_of_notes): self.generate_note() self.create_midi_file() def fitness(self): # class repertory = "output/" file = repertory + str(self.ind) + ".mid" self.create_midi_file(str(self.ind)) data = get_drum(file) if type(data) == type(None): return 0 prediction = self.model.predict(np.stack([data.astype(dtype=float)])) index_max = np.argmax(prediction) # pred = [0, 25, 50, 75, 100][index_max] pred = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10][index_max] return pred def fitness_reg(self): self.create_midi_file() repertory = "output/" file = repertory + str(self.ind) + ".mid" data = get_drum(file) prediction = self.model.predict(np.stack([data.astype(dtype=float)])) if prediction: prediction = prediction[0][0] return prediction * 100 def overlapped_keys(self, key_to_check, bars): overlapped = [] for key in bars: if key_to_check.pitch != key.pitch: if key_to_check.timestamp <= key.timestamp <= ( key_to_check.timestamp + key_to_check.duration): overlapped.append(key) # print("key ", key_to_check, " overlapped by ", key ) return overlapped def check_collision(self, key_to_check, changed_pitch, bars): for key in bars: if (key_to_check.bit.pitch + changed_pitch) == key.bit.pitch: if key_to_check.bit.timestamp <= key.bit.timestamp <= ( key_to_check.bit.timestamp + key_to_check.bit.duration): return False return True def __eq__(self, other): if type(other) != type(self): return False for a, b in zip(self.sequence, other.sequence): if a.bit != b.bit: return False return True def __repr__(self): # r = f"I: {self.fitness()}" # for g in self.sequence: # r += f'\n\t{g.bit}' r = str(self.ind) return r def __hash__(self): r = 0 for _ in self.sequence: r += randint(1, 100) return r
def trainval_svrg(exp_dict, savedir, datadir, metrics_flag=True): ''' SVRG-specific training and validation loop. ''' pprint.pprint(exp_dict) # Load Train Dataset train_set = datasets.get_dataset(dataset_name=exp_dict["dataset"], train_flag=True, datadir=datadir, exp_dict=exp_dict) train_loader = DataLoader(train_set, drop_last=False, shuffle=True, batch_size=exp_dict["batch_size"]) # Load Val Dataset val_set = datasets.get_dataset(dataset_name=exp_dict["dataset"], train_flag=False, datadir=datadir, exp_dict=exp_dict) # Load model model = models.get_model(exp_dict["model"], train_set=train_set).cuda() # Choose loss and metric function loss_function = metrics.get_metric_function(exp_dict["loss_func"]) # lookup the learning rate lr = get_svrg_step_size(exp_dict) # Load Optimizer opt = get_svrg_optimizer(model, loss_function, train_loader=train_loader, lr=lr) # Resume from last saved state_dict if (not os.path.exists(savedir + "/run_dict.pkl") or not os.path.exists(savedir + "/score_list.pkl")): ut.save_pkl(savedir + "/run_dict.pkl", {"running": 1}) score_list = [] s_epoch = 0 else: score_list = ut.load_pkl(savedir + "/score_list.pkl") model.load_state_dict(torch.load(savedir + "/model_state_dict.pth")) opt.load_state_dict(torch.load(savedir + "/opt_state_dict.pth")) s_epoch = score_list[-1]["epoch"] + 1 for epoch in range(s_epoch, exp_dict["max_epoch"]): score_dict = {"epoch": epoch} if metrics_flag: # 1. Compute train loss over train set score_dict["train_loss"] = metrics.compute_metric_on_dataset( model, train_set, metric_name=exp_dict["loss_func"]) # 2. Compute val acc over val set score_dict["val_acc"] = metrics.compute_metric_on_dataset( model, val_set, metric_name=exp_dict["acc_func"]) # 3. Train over train loader model.train() print("%d - Training model with %s..." % (epoch, exp_dict["loss_func"])) s_time = time.time() for images, labels in tqdm.tqdm(train_loader): images, labels = images.cuda(), labels.cuda() opt.zero_grad() closure = lambda svrg_model: loss_function( svrg_model, images, labels, backwards=True) opt.step(closure) e_time = time.time() # Record step size and batch size score_dict["step_size"] = opt.state["step_size"] score_dict["batch_size"] = train_loader.batch_size score_dict["train_epoch_time"] = e_time - s_time # Add score_dict to score_list score_list += [score_dict] # Report and save print(pd.DataFrame(score_list).tail()) ut.save_pkl(savedir + "/score_list.pkl", score_list) ut.torch_save(savedir + "/model_state_dict.pth", model.state_dict()) ut.torch_save(savedir + "/opt_state_dict.pth", opt.state_dict()) print("Saved: %s" % savedir) return score_list
def trainval(exp_dict, savedir_base, datadir_base, reset=False): # bookkeeping stuff # ================== pprint.pprint(exp_dict) exp_id = hu.hash_dict(exp_dict) savedir = os.path.join(savedir_base, exp_id) if reset: hc.delete_and_backup_experiment(savedir) os.makedirs(savedir, exist_ok=True) hu.save_json(os.path.join(savedir, "exp_dict.json"), exp_dict) print("Experiment saved in %s" % savedir) # Dataset # ================== # load train and acrtive set train_set = datasets.get_dataset(dataset_name=exp_dict["dataset"], split="train", datadir_base=datadir_base, exp_dict=exp_dict) active_set = ActiveLearningDataset(train_set, random_state=42) # val set val_set = datasets.get_dataset(dataset_name=exp_dict["dataset"], split="val", datadir_base=datadir_base, exp_dict=exp_dict) val_loader = DataLoader(val_set, batch_size=exp_dict["batch_size"]) # Model # ================== model = models.get_model(model_name=exp_dict['model']['name'], exp_dict=exp_dict).cuda() model_path = os.path.join(savedir, "model.pth") score_list_path = os.path.join(savedir, "score_list.pkl") if os.path.exists(score_list_path): # resume experiment model.set_state_dict(hu.torch_load(model_path)) active_set.load_state_dict( hu.load_pkl(os.path.join(savedir, "active_set.pkl"))) score_list = hu.load_pkl(score_list_path) inner_s_epoch = score_list[-1]['inner_epoch'] + 1 s_cycle = score_list[-1]['cycle'] else: # restart experiment score_list = [] inner_s_epoch = 0 s_cycle = 0 # Train & Val # ================== print("Starting experiment at cycle %d epoch %d" % (s_cycle, inner_s_epoch)) for c in range(s_cycle, exp_dict['max_cycle']): # Set seed np.random.seed(c) torch.manual_seed(c) torch.cuda.manual_seed_all(c) if inner_s_epoch == 0: active_set.label_next_batch(model) hu.save_pkl(os.path.join(savedir, "active_set.pkl"), active_set.state_dict()) train_loader = DataLoader(active_set, sampler=samplers.get_sampler( exp_dict['sampler']['train'], active_set), batch_size=exp_dict["batch_size"]) # Visualize the model model.vis_on_loader(vis_loader, savedir=os.path.join(savedir, "images")) for e in range(inner_s_epoch, exp_dict['max_epoch']): # Validate only at the start of each cycle score_dict = {} if e == 0: score_dict.update(model.val_on_loader(val_loader)) # Train the model score_dict.update(model.train_on_loader(train_loader)) # Validate the model score_dict["epoch"] = len(score_list) score_dict["inner_epoch"] = e score_dict["cycle"] = c score_dict['n_ratio'] = active_set.n_labelled_ratio score_dict["n_train"] = len(train_loader.dataset) score_dict["n_pool"] = len(train_loader.dataset.pool) # Add to score_list and save checkpoint score_list += [score_dict] # Report & Save score_df = pd.DataFrame(score_list) print("\n", score_df.tail(), "\n") hu.torch_save(model_path, model.get_state_dict()) hu.save_pkl(score_list_path, score_list) print("Checkpoint Saved: %s" % savedir) inner_s_epoch = 0
def trainval(exp_dict, savedir_base, reset=False, num_workers=0, run_ssl=False): # bookkeeping # --------------- # get experiment directory exp_id = hu.hash_dict(exp_dict) savedir = os.path.join(savedir_base, exp_id) if reset: # delete and backup experiment hc.delete_experiment(savedir, backup_flag=True) # create folder and save the experiment dictionary os.makedirs(savedir, exist_ok=True) hu.save_json(os.path.join(savedir, 'exp_dict.json'), exp_dict) pprint.pprint(exp_dict) print('Experiment saved in %s' % savedir) # load datasets # ========================== train_set = datasets.get_dataset( dataset_name=exp_dict["dataset_train"], data_root=exp_dict["dataset_train_root"], split="train", transform=exp_dict["transform_train"], classes=exp_dict["classes_train"], support_size=exp_dict["support_size_train"], query_size=exp_dict["query_size_train"], n_iters=exp_dict["train_iters"], unlabeled_size=exp_dict["unlabeled_size_train"]) val_set = datasets.get_dataset( dataset_name=exp_dict["dataset_val"], data_root=exp_dict["dataset_val_root"], split="val", transform=exp_dict["transform_val"], classes=exp_dict["classes_val"], support_size=exp_dict["support_size_val"], query_size=exp_dict["query_size_val"], n_iters=exp_dict["val_iters"], unlabeled_size=exp_dict["unlabeled_size_val"]) test_set = datasets.get_dataset( dataset_name=exp_dict["dataset_test"], data_root=exp_dict["dataset_test_root"], split="test", transform=exp_dict["transform_val"], classes=exp_dict["classes_test"], support_size=exp_dict["support_size_test"], query_size=exp_dict["query_size_test"], n_iters=exp_dict["test_iters"], unlabeled_size=exp_dict["unlabeled_size_test"]) # get dataloaders # ========================== train_loader = torch.utils.data.DataLoader( train_set, batch_size=exp_dict["batch_size"], shuffle=True, num_workers=num_workers, collate_fn=ut.get_collate(exp_dict["collate_fn"]), drop_last=True) val_loader = torch.utils.data.DataLoader(val_set, batch_size=1, shuffle=False, num_workers=num_workers, collate_fn=lambda x: x, drop_last=True) test_loader = torch.utils.data.DataLoader(test_set, batch_size=1, shuffle=False, num_workers=num_workers, collate_fn=lambda x: x, drop_last=True) # create model and trainer # ========================== # Create model, opt, wrapper backbone = backbones.get_backbone( backbone_name=exp_dict['model']["backbone"], exp_dict=exp_dict) model = models.get_model(model_name=exp_dict["model"]['name'], backbone=backbone, n_classes=exp_dict["n_classes"], exp_dict=exp_dict) if run_ssl: # runs the SSL experiments score_list_path = os.path.join(savedir, 'score_list.pkl') if not os.path.exists(score_list_path): test_dict = model.test_on_loader(test_loader, max_iter=None) hu.save_pkl(score_list_path, [test_dict]) return # Checkpoint # ----------- checkpoint_path = os.path.join(savedir, 'checkpoint.pth') score_list_path = os.path.join(savedir, 'score_list.pkl') if os.path.exists(score_list_path): # resume experiment model.load_state_dict(hu.torch_load(checkpoint_path)) score_list = hu.load_pkl(score_list_path) s_epoch = score_list[-1]['epoch'] + 1 else: # restart experiment score_list = [] s_epoch = 0 # Run training and validation for epoch in range(s_epoch, exp_dict["max_epoch"]): score_dict = {"epoch": epoch} score_dict.update(model.get_lr()) # train score_dict.update(model.train_on_loader(train_loader)) # validate score_dict.update(model.val_on_loader(val_loader)) score_dict.update(model.test_on_loader(test_loader)) # Add score_dict to score_list score_list += [score_dict] # Report score_df = pd.DataFrame(score_list) print(score_df.tail()) # Save checkpoint hu.save_pkl(score_list_path, score_list) hu.torch_save(checkpoint_path, model.get_state_dict()) print("Saved: %s" % savedir) if "accuracy" in exp_dict["target_loss"]: is_best = score_dict[exp_dict["target_loss"]] >= score_df[ exp_dict["target_loss"]][:-1].max() else: is_best = score_dict[exp_dict["target_loss"]] <= score_df[ exp_dict["target_loss"]][:-1].min() # Save best checkpoint if is_best: hu.save_pkl(os.path.join(savedir, "score_list_best.pkl"), score_list) hu.torch_save(os.path.join(savedir, "checkpoint_best.pth"), model.get_state_dict()) print("Saved Best: %s" % savedir) # Check for end of training conditions if model.is_end_of_training(): break
def trainval(exp_dict, savedir_base, reset, metrics_flag=True, datadir=None, cuda=False): # bookkeeping # --------------- # get experiment directory exp_id = hu.hash_dict(exp_dict) savedir = os.path.join(savedir_base, exp_id) if reset: # delete and backup experiment hc.delete_experiment(savedir, backup_flag=True) # create folder and save the experiment dictionary os.makedirs(savedir, exist_ok=True) hu.save_json(os.path.join(savedir, 'exp_dict.json'), exp_dict) print(pprint.pprint(exp_dict)) print('Experiment saved in %s' % savedir) # set seed # ================== seed = 42 + exp_dict['runs'] np.random.seed(seed) torch.manual_seed(seed) if cuda: device = 'cuda' torch.cuda.manual_seed_all(seed) else: device = 'cpu' print('Running on device: %s' % device) # Dataset # ================== train_set = datasets.get_dataset(dataset_name=exp_dict["dataset"], train_flag=True, datadir=datadir, exp_dict=exp_dict) train_loader = DataLoader(train_set, drop_last=True, shuffle=True, sampler=None, batch_size=exp_dict["batch_size"]) # Load Val Dataset val_set = datasets.get_dataset(dataset_name=exp_dict["dataset"], train_flag=False, datadir=datadir, exp_dict=exp_dict) # Model # ================== use_backpack = exp_dict['opt'].get("backpack", False) model = models.get_model(exp_dict["model"], train_set=train_set, backpack=use_backpack).to(device=device) if use_backpack: assert exp_dict['opt']['name'] in ['nus_wrapper', 'adaptive_second'] from backpack import extend model = extend(model) # Choose loss and metric function loss_function = metrics.get_metric_function(exp_dict["loss_func"]) # Load Optimizer # ============== n_batches_per_epoch = len(train_set) / float(exp_dict["batch_size"]) opt = optimizers.get_optimizer(opt=exp_dict["opt"], params=model.parameters(), n_batches_per_epoch=n_batches_per_epoch, n_train=len(train_set), train_loader=train_loader, model=model, loss_function=loss_function, exp_dict=exp_dict, batch_size=exp_dict["batch_size"]) # Checkpointing # ============= score_list_path = os.path.join(savedir, "score_list.pkl") model_path = os.path.join(savedir, "model_state_dict.pth") opt_path = os.path.join(savedir, "opt_state_dict.pth") if os.path.exists(score_list_path): # resume experiment score_list = ut.load_pkl(score_list_path) if use_backpack: model.load_state_dict(torch.load(model_path), strict=False) else: model.load_state_dict(torch.load(model_path)) opt.load_state_dict(torch.load(opt_path)) s_epoch = score_list[-1]["epoch"] + 1 else: # restart experiment score_list = [] s_epoch = 0 # Start Training # ============== n_train = len(train_loader.dataset) n_batches = len(train_loader) batch_size = train_loader.batch_size for epoch in range(s_epoch, exp_dict["max_epoch"]): # Set seed seed = epoch + exp_dict['runs'] np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) score_dict = {"epoch": epoch} # Validate # -------- if metrics_flag: # 1. Compute train loss over train set score_dict["train_loss"] = metrics.compute_metric_on_dataset( model, train_set, metric_name=exp_dict["loss_func"], batch_size=exp_dict['batch_size']) # 2. Compute val acc over val set score_dict["val_acc"] = metrics.compute_metric_on_dataset( model, val_set, metric_name=exp_dict["acc_func"], batch_size=exp_dict['batch_size']) # Train # ----- model.train() print("%d - Training model with %s..." % (epoch, exp_dict["loss_func"])) s_time = time.time() train_on_loader(model, train_set, train_loader, opt, loss_function, epoch, use_backpack) e_time = time.time() # Record step size and batch size score_dict["step"] = opt.state.get("step", 0) / int(n_batches_per_epoch) score_dict["step_size"] = opt.state.get("step_size", {}) score_dict["step_size_avg"] = opt.state.get("step_size_avg", {}) score_dict["n_forwards"] = opt.state.get("n_forwards", {}) score_dict["n_backwards"] = opt.state.get("n_backwards", {}) score_dict["grad_norm"] = opt.state.get("grad_norm", {}) score_dict["batch_size"] = batch_size score_dict["train_epoch_time"] = e_time - s_time score_dict.update(opt.state["gv_stats"]) # Add score_dict to score_list score_list += [score_dict] # Report and save print(pd.DataFrame(score_list).tail()) ut.save_pkl(score_list_path, score_list) ut.torch_save(model_path, model.state_dict()) ut.torch_save(opt_path, opt.state_dict()) print("Saved: %s" % savedir) return score_list
type=int, metavar='T', default=None, help="Enter the target class ID") args = parser.parse_args() yaml_path = args.yaml with open(yaml_path, 'r') as f: vis_args = DictAsMember(yaml.safe_load(f)) if args.img: vis_args.DATASET.path = args.img vis_args.DATASET.target_class = args.target # Load model & pretrained params pretrained_model = get_model(vis_args.MODEL) state = torch.load(vis_args.MODEL.path) try: pretrained_model.load_state_dict(state["model"]) except KeyError as e: pretrained_model.load_state_dict(state) alpha = vis_args.RESULTS.alpha h = w = vis_args.DATASET.size # Initialize GBP GBP = GuidedBackprop(pretrained_model, vis_args.MODEL.name) # Get filenames and create absolute paths if os.path.isdir(vis_args.DATASET.path): files = os.listdir(vis_args.DATASET.path)
def eval_mtl_single(args): global logger # import ipdb; ipdb.set_trace() args = torch.load(os.path.join(args.save_path, "args")) print(args) logger.info(args) task_lst, vocabs = utils.get_data(args.data_path) task_db = task_lst[args.task_id] train_data = task_db.train_set dev_data = task_db.dev_set test_data = task_db.test_set task_name = task_db.task_name # text classification for ds in [train_data, dev_data, test_data]: ds.rename_field("words_idx", "x") ds.rename_field("label", "y") ds.set_input("x", "y", "task_id") ds.set_target("y") # seq label if task_name in SEQ_LABEL_TASK: for ds in [train_data, dev_data, test_data]: ds.set_input("seq_len") ds.set_target("seq_len") logger = utils.get_logger(__name__) logger.info("task name: {}, task id: {}".format(task_db.task_name, task_db.task_id)) logger.info( "train len {}, dev len {}, test len {}".format( len(train_data), len(dev_data), len(test_data) ) ) # init model model = get_model(args, task_lst, vocabs) # logger.info('model: \n{}'.format(model)) if task_name not in SEQ_LABEL_TASK or task_name == "pos": metrics = [ AccuracyMetric(target="y"), # MetricInForward(val_name='loss') ] else: metrics = [ SpanFPreRecMetric( tag_vocab=vocabs[task_name], pred="pred", target="y", seq_len="seq_len", encoding_type="bioes" if task_name == "ner" else "chunk", ), AccuracyMetric(target="y") # MetricInForward(val_name='loss') ] cur_best = 0.0 init_best = None eval_time = 0 paths = [path for path in os.listdir(args.save_path) if path.startswith("best")] paths = sorted(paths, key=lambda x: int(x.split("_")[1])) for path in paths: path = os.path.join(args.save_path, path) state = torch.load(path, map_location="cpu") model.load_state_dict(state) tester = Tester( test_data, model, metrics=metrics, batch_size=args.batch_size, num_workers=4, device="cuda", use_tqdm=False, ) res = tester.test() val = 0.0 for metric_name, metric_dict in res.items(): if task_name == "pos" and "acc" in metric_dict: val = metric_dict["acc"] break elif "f" in metric_dict: val = metric_dict["f"] break if init_best is None: init_best = val logger.info( "No #%d: best %f, %s, path: %s, is better: %s", eval_time, val, tester._format_eval_results(res), path, val > init_best, ) eval_time += 1
def trainval(exp_dict, savedir_base, reset=False): # bookkeeping # --------------- # get experiment directory exp_id = hu.hash_dict(exp_dict) savedir = os.path.join(savedir_base, exp_id) if reset: # delete and backup experiment hc.delete_experiment(savedir, backup_flag=True) # create folder and save the experiment dictionary os.makedirs(savedir, exist_ok=True) hu.save_json(os.path.join(savedir, 'exp_dict.json'), exp_dict) pprint.pprint(exp_dict) print('Experiment saved in %s' % savedir) # set seed # --------------- seed = 42 + exp_dict['runs'] np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) # Dataset # ----------- # train loader train_set = datasets.get_dataset(dataset_name=exp_dict["dataset"], train_flag=True, datadir=savedir_base, exp_dict=exp_dict) train_loader = torch.utils.data.DataLoader( train_set, drop_last=True, shuffle=True, batch_size=exp_dict["batch_size"]) # val set val_set = datasets.get_dataset(dataset_name=exp_dict["dataset"], train_flag=False, datadir=savedir_base, exp_dict=exp_dict) # Model # ----------- model = models.get_model(exp_dict["model"], train_set=train_set).cuda() # Choose loss and metric function loss_function = metrics.get_metric_function(exp_dict["loss_func"]) # Compute fstar # ------------- if exp_dict['opt'].get('fstar_flag'): ut.compute_fstar(train_set, loss_function, savedir_base, exp_dict) # Load Optimizer n_batches_per_epoch = len(train_set) / float(exp_dict["batch_size"]) opt = optimizers.get_optimizer(opt_dict=exp_dict["opt"], params=model.parameters(), n_batches_per_epoch=n_batches_per_epoch) # Checkpoint # ----------- model_path = os.path.join(savedir, 'model.pth') score_list_path = os.path.join(savedir, 'score_list.pkl') opt_path = os.path.join(savedir, 'opt_state_dict.pth') if os.path.exists(score_list_path): # resume experiment score_list = hu.load_pkl(score_list_path) model.load_state_dict(torch.load(model_path)) opt.load_state_dict(torch.load(opt_path)) s_epoch = score_list[-1]['epoch'] + 1 else: # restart experiment score_list = [] s_epoch = 0 # Train & Val # ------------ print('Starting experiment at epoch %d/%d' % (s_epoch, exp_dict['max_epoch'])) for e in range(s_epoch, exp_dict['max_epoch']): # Set seed seed = e + exp_dict['runs'] np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) score_dict = {} # Compute train loss over train set score_dict["train_loss"] = metrics.compute_metric_on_dataset( model, train_set, metric_name=exp_dict["loss_func"]) # Compute val acc over val set score_dict["val_acc"] = metrics.compute_metric_on_dataset( model, val_set, metric_name=exp_dict["acc_func"]) # Train over train loader model.train() print("%d - Training model with %s..." % (e, exp_dict["loss_func"])) # train and validate s_time = time.time() for batch in tqdm.tqdm(train_loader): images, labels = batch["images"].cuda(), batch["labels"].cuda() opt.zero_grad() # closure def closure(): return loss_function(model, images, labels, backwards=True) opt.step(closure) e_time = time.time() # Record metrics score_dict["epoch"] = e score_dict["step_size"] = opt.state["step_size"] score_dict["step_size_avg"] = opt.state["step_size_avg"] score_dict["n_forwards"] = opt.state["n_forwards"] score_dict["n_backwards"] = opt.state["n_backwards"] score_dict["grad_norm"] = opt.state["grad_norm"] score_dict["batch_size"] = train_loader.batch_size score_dict["train_epoch_time"] = e_time - s_time score_list += [score_dict] # Report and save print(pd.DataFrame(score_list).tail()) hu.save_pkl(score_list_path, score_list) hu.torch_save(model_path, model.state_dict()) hu.torch_save(opt_path, opt.state_dict()) print("Saved: %s" % savedir) print('Experiment completed')
def train_mlt_single(args): global logger logger.info(args) task_lst, vocabs = utils.get_data(args.data_path) task_db = task_lst[args.task_id] train_data = task_db.train_set dev_data = task_db.dev_set test_data = task_db.test_set task_name = task_db.task_name if args.debug: train_data = train_data[:200] dev_data = dev_data[:200] test_data = test_data[:200] args.epochs = 3 args.pruning_iter = 3 summary_writer = SummaryWriter( log_dir=os.path.join(args.tb_path, "global/%s" % task_name) ) logger.info("task name: {}, task id: {}".format(task_db.task_name, task_db.task_id)) logger.info( "train len {}, dev len {}, test len {}".format( len(train_data), len(dev_data), len(test_data) ) ) # init model model = get_model(args, task_lst, vocabs) logger.info("model: \n{}".format(model)) if args.init_weights is not None: utils.load_model(model, args.init_weights) if utils.need_acc(task_name): metrics = [AccuracyMetric(target="y"), MetricInForward(val_name="loss")] metric_key = "acc" else: metrics = [ YangJieSpanMetric( tag_vocab=vocabs[task_name], pred="pred", target="y", seq_len="seq_len", encoding_type="bioes" if task_name == "ner" else "bio", ), MetricInForward(val_name="loss"), ] metric_key = "f" logger.info(metrics) need_cut_names = list(set([s.strip() for s in args.need_cut.split(",")])) prune_names = [] for name, p in model.named_parameters(): if not p.requires_grad or "bias" in name: continue for n in need_cut_names: if n in name: prune_names.append(name) break # get Pruning class pruner = Pruning( model, prune_names, final_rate=args.final_rate, pruning_iter=args.pruning_iter ) if args.init_masks is not None: pruner.load(args.init_masks) pruner.apply_mask(pruner.remain_mask, pruner._model) # save checkpoint os.makedirs(args.save_path, exist_ok=True) logger.info('Saving init-weights to {}'.format(args.save_path)) torch.save( model.cpu().state_dict(), os.path.join(args.save_path, "init_weights.th") ) torch.save(args, os.path.join(args.save_path, "args.th")) # start training and pruning summary_writer.add_scalar("remain_rate", 100.0, 0) summary_writer.add_scalar("cutoff", 0.0, 0) if args.init_weights is not None: init_tester = Tester( test_data, model, metrics=metrics, batch_size=args.batch_size, num_workers=4, device="cuda", use_tqdm=False, ) res = init_tester.test() logger.info("No init testing, Result: {}".format(res)) del res, init_tester for prune_step in range(pruner.pruning_iter + 1): # reset optimizer every time optim_params = [p for p in model.parameters() if p.requires_grad] # utils.get_logger(__name__).debug(optim_params) utils.get_logger(__name__).debug(len(optim_params)) optimizer = get_optim(args.optim, optim_params) # optimizer = TriOptim(optimizer, args.n_filters, args.warmup, args.decay) factor = pruner.cur_rate / 100.0 factor = 1.0 # print(factor, pruner.cur_rate) for pg in optimizer.param_groups: pg["lr"] = factor * pg["lr"] utils.get_logger(__name__).info(optimizer) trainer = Trainer( train_data, model, loss=LossInForward(), optimizer=optimizer, metric_key=metric_key, metrics=metrics, print_every=200, batch_size=args.batch_size, num_workers=4, n_epochs=args.epochs, dev_data=dev_data, save_path=None, sampler=fastNLP.BucketSampler(batch_size=args.batch_size), callbacks=[ pruner, # LRStep(lstm.WarmupLinearSchedule(optimizer, args.warmup, int(len(train_data)/args.batch_size*args.epochs))) GradientClipCallback(clip_type="norm", clip_value=5), LRScheduler( lr_scheduler=LambdaLR(optimizer, lambda ep: 1 / (1 + 0.05 * ep)) ), LogCallback(path=os.path.join(args.tb_path, "No", str(prune_step))), ], use_tqdm=False, device="cuda", check_code_level=-1, ) res = trainer.train() logger.info("No #{} training, Result: {}".format(pruner.prune_times, res)) name, val = get_metric(res) summary_writer.add_scalar("prunning_dev_acc", val, prune_step) tester = Tester( test_data, model, metrics=metrics, batch_size=args.batch_size, num_workers=4, device="cuda", use_tqdm=False, ) res = tester.test() logger.info("No #{} testing, Result: {}".format(pruner.prune_times, res)) name, val = get_metric(res) summary_writer.add_scalar("pruning_test_acc", val, prune_step) # prune and save torch.save( model.state_dict(), os.path.join( args.save_path, "best_{}_{}.th".format(pruner.prune_times, pruner.cur_rate), ), ) pruner.pruning_model() summary_writer.add_scalar("remain_rate", pruner.cur_rate, prune_step + 1) summary_writer.add_scalar("cutoff", pruner.last_cutoff, prune_step + 1) pruner.save( os.path.join( args.save_path, "{}_{}.th".format(pruner.prune_times, pruner.cur_rate) ) )
by="value", ascending=False).to_dict()["value"]) # =============================== # === Train model # =============================== logging.info("Train model") # get folds with timer("Train model"): with timer("get validation"): x_train["target"] = np.log1p(y_train) > 7.0 splits = get_validation(x_train, config) del x_train["target"] gc.collect() model = get_model(config) ( models, oof_preds, test_preds, valid_preds, feature_importance, evals_results, ) = model.cv( y_train=y_train, train_features=x_train[cols], test_features=x_test[cols], y_valid=None, valid_features=None, feature_name=cols, folds_ids=splits,
def trainval(exp_dict, savedir_base, datadir_base, reset=False, num_workers=0, pin_memory=False, ngpu=1, cuda_deterministic=False): # bookkeeping # ================== # get experiment directory exp_id = hu.hash_dict(exp_dict) savedir = os.path.join(savedir_base, exp_id) if reset: # delete and backup experiment hc.delete_experiment(savedir, backup_flag=True) # create folder and save the experiment dictionary hu.save_json(os.path.join(savedir, 'exp_dict.json'), exp_dict) pprint.pprint(exp_dict) print('Experiment saved in %s' % savedir) if DEVICE.type == "cuda": if cuda_deterministic: cudnn.benchmark = False cudnn.deterministic = True else: cudnn.benchmark = True # Dataset # ================== trainset = get_dataset(exp_dict['dataset'], 'train', exp_dict=exp_dict, datadir_base=datadir_base, n_samples=exp_dict['dataset_size']['train'], transform_lvl=exp_dict['dataset']['transform_lvl'], colorjitter=exp_dict['dataset'].get('colorjitter') ) valset = get_dataset(exp_dict['dataset'], 'validation', exp_dict=exp_dict, datadir_base=datadir_base, n_samples=exp_dict['dataset_size']['train'], transform_lvl=0, val_transform=exp_dict['dataset']['val_transform']) testset = get_dataset(exp_dict['dataset'], 'test', exp_dict=exp_dict, datadir_base=datadir_base, n_samples=exp_dict['dataset_size']['test'], transform_lvl=0, val_transform=exp_dict['dataset']['val_transform']) print("Dataset defined.") # define dataloaders if exp_dict['dataset']['name'] == 'bach': testloader = torch.utils.data.DataLoader(testset, batch_size=1, shuffle=False, num_workers=num_workers, pin_memory=pin_memory) else: testloader = torch.utils.data.DataLoader(testset, batch_size=exp_dict['batch']['size'], shuffle=False, num_workers=num_workers, pin_memory=pin_memory) print("Testloader defined.") # Model # ================== model = get_model(exp_dict, trainset, device=DEVICE) print("Model loaded") model_path = os.path.join(savedir, 'model.pth') model_best_path = os.path.join(savedir, 'model_best.pth') score_list_path = os.path.join(savedir, 'score_list.pkl') # checkpoint management if os.path.exists(score_list_path): # resume experiment model.load_state_dict(hu.torch_load(model_path)) score_list = hu.load_pkl(score_list_path) s_epoch = len(score_list) else: # restart experiment score_list = [] s_epoch = 0 # define and log random seed for reproducibility assert('fixedSeed' in exp_dict) seed = exp_dict['fixedSeed'] random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) np.random.seed(seed) print("Seed defined.") # Train & Val # ================== print("Starting experiment at epoch %d/%d" % (s_epoch, exp_dict['niter'])) for epoch in range(s_epoch, exp_dict['niter']): s_time = time.time() # Sample new train val trainloader, valloader = get_train_val_dataloader(exp_dict, trainset, valset, mixtrainval=exp_dict['mixTrainVal'], num_workers=num_workers, pin_memory=pin_memory) # Train & validate train_dict = model.train_on_loader(trainloader, valloader, epoch=epoch, exp_dict=exp_dict) # Test phase train_dict_2 = model.test_on_loader(trainloader) val_dict = model.test_on_loader(valloader) test_dict = model.test_on_loader(testloader) # Vis phase model.vis_on_loader('train', trainset, savedir_images=os.path.join( savedir, 'images'), epoch=epoch) score_dict = {} score_dict["epoch"] = epoch score_dict["test_acc"] = test_dict['acc'] score_dict["val_acc"] = val_dict['acc'] score_dict["train_acc"] = train_dict_2['acc'] score_dict["train_loss"] = train_dict['loss'] score_dict["time_taken"] = time.time() - s_time score_dict["netC_lr"] = train_dict['netC_lr'] if exp_dict['model']['netA'] is not None: if 'transformations_mean' in train_dict: for i in range(len(train_dict['transformations_mean'])): score_dict[str( i) + "_mean"] = train_dict['transformations_mean'][i].item() if 'transformations_std' in train_dict: for i in range(len(train_dict['transformations_std'])): score_dict[str( i) + "_std"] = train_dict['transformations_std'][i].item() # Add to score_list and save checkpoint score_list += [score_dict] # Report & Save score_df = pd.DataFrame(score_list) print("\n", score_df.tail(), "\n") hu.torch_save(model_path, model.get_state_dict()) hu.save_pkl(score_list_path, score_list) print("Checkpoint Saved: %s" % savedir) # Update best score if epoch == 0 or (score_dict["test_acc"] >= score_df["test_acc"][:-1].max()): hu.save_pkl(os.path.join( savedir, "score_list_best.pkl"), score_list) hu.torch_save(os.path.join(savedir, "model_best.pth"), model.get_state_dict()) print("Saved Best: %s" % savedir) print('experiment completed')
data_root="./data/imagenette2/train", mapping_folder_to_label=mapping_folder_to_label, train=True) train_dataloader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True) val_dataset = ImageNetteDataset( data_root="./data/imagenette2/val", mapping_folder_to_label=mapping_folder_to_label, train=True) val_dataloader = DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False) model = get_model(args.model_name) criterion = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=0.00001) model.to(args.device) best_val_acc = 0 for epoch in range(args.epochs): train_correct = 0 train_loss = 0 train_data_num = 0 val_correct = 0 val_loss = 0 val_data_num = 0
type=str, default='/mnt/public/datasets/DeepFish') parser.add_argument("-e", "--exp_config", default='loc') parser.add_argument("-uc", "--use_cuda", type=int, default=0) args = parser.parse_args() device = torch.device('cuda' if args.use_cuda else 'cpu') exp_dict = exp_configs.EXP_GROUPS[args.exp_config][0] train_set = datasets.get_dataset(dataset_name=exp_dict["dataset"], split="train", transform=exp_dict.get("transform"), datadir=args.datadir) # Create model, opt, wrapper model_original = models.get_model(exp_dict["model"], exp_dict=exp_dict).to('cpu') #.cuda() opt = torch.optim.Adam(model_original.parameters(), lr=1e-5, weight_decay=0.0005) model = wrappers.get_wrapper(exp_dict["wrapper"], model=model_original, opt=opt).to('cpu') #.cuda() if args.exp_config == 'loc': batch = torch.utils.data.dataloader.default_collate([train_set[3]]) else: batch = torch.utils.data.dataloader.default_collate([train_set[0]]) #*************** helen added this code im = Image.open("/Users/helenpropson/Documents/git/marepesca/tank.jpg")