Ejemplo n.º 1
0
def trainval(exp_dict):
    pprint.pprint(exp_dict)

    savedir_base = os.path.join('tmp', hu.hash_dict(exp_dict))
    os.makedirs(savedir_base, exist_ok=True)
    # -- get scenes
    source_scene = scenes.get_scene(exp_dict['source_scene'])
    target_scene = scenes.get_scene(exp_dict['target_scene'])

    # -- get model
    model = models.get_model(exp_dict['model'], source_scene, exp_dict)

    # -- train for E iterations
    score_list = []
    for e in range(500):
        # update parameters and get new score_dict
        score_dict = model.train_on_batch(target_scene)
        score_dict["epoch"] = e
        score_dict["step_size"] = model.opt.state['step_size']

        # Add to score_list and save checkpoint
        score_list += [score_dict]

        # Print 
        score_df = pd.DataFrame(score_list)
        print("\n", score_df.tail(), "\n")

        # Visualize
        if e % 50 == 0:
            model.vis_on_batch(target_scene, 
                    fname=os.path.join(savedir_base, 
                            'output_%d.png' % e))

    save_gif(src_path=os.path.join(savedir_base, '*.png'), 
             tgt_fname=s.path.join(savedir_base, 'animation.gif'))
Ejemplo n.º 2
0
    def get_hydra_model(self):
        pyfunc_model = self.get_pyfunc_model()
        model_path = pyfunc_model._model_impl.context.artifacts["model"]
        model_name = self.run.data.params["model"]

        hydra_model = get_model(model_name)(self.configs)
        hydra_model.model = load_processor(model_path)

        return hydra_model
def main(
    log_dir,
    model_type,
    vocab_size,
    emb_size,
    batch_size,
    epochs,
    maxlen,
    min_acc,
    num_samples,
    oov_token,
):
    _create_dir_if_not_exist(log_dir)

    (str_X_train, y_train), (str_X_val, y_val), (str_X_test,
                                                 y_test) = get_data()
    test_samples = np.random.choice(str_X_test, num_samples)

    preprocessor = Preprocessor(maxlen=maxlen,
                                vocab_size=vocab_size,
                                oov_token=oov_token)
    preprocessor.fit_on_texts(str_X_train + str_X_val + str_X_test)

    X_train = preprocessor.transform(str_X_train)
    X_val = preprocessor.transform(str_X_val)
    X_test = preprocessor.transform(str_X_test)

    # Define model
    model = get_model(model_type, maxlen, vocab_size, emb_size)
    model.compile(loss="binary_crossentropy",
                  optimizer="adam",
                  metrics=["accuracy"])

    model.fit(
        X_train,
        y_train,
        validation_data=(X_val, y_val),
        batch_size=batch_size,
        epochs=epochs,
    )

    score = model.evaluate(X_test, y_test, verbose=0)
    print("Test accuracy:", score[1])
    assert score[
        1] > min_acc, f"score doesnt meet the minimum threshold {min_acc}"

    test_requests = _generate_test_requests(model, preprocessor, test_samples)
    _save_json(join(log_dir, "test_requests.json"), test_requests)
    preprocessor.save(join(log_dir, "preprocessor.pkl"))

    # HACK
    # For some reason savedModel format is not working with `lstm` model
    if model_type == "lstm":
        model.save(join(log_dir, "model.h5"))
    else:
        model.save(join(log_dir, "saved_model/model"))
Ejemplo n.º 4
0
def tune_hyperparams(cfg):
    from src.storm_utils import StormIndexAccessor, StormAccessor

    data_cfg = utils.get_data_cfg(cfg)
    features_cfg = utils.get_features_cfg(cfg)
    inputs_dir = Path(to_absolute_path(features_cfg.hydra.run.dir))
    paths = features_cfg.outputs

    use_mlflow = OmegaConf.select(cfg, "mlflow", default=False)
    if use_mlflow:
        import mlflow

        run = setup_mlflow(cfg, features_cfg=features_cfg, data_cfg=data_cfg)

    cv_method = cfg.cv.method
    cv_init_params = cfg.cv.params

    # Model specific parameters
    model_name = cfg.model
    metrics = cfg.metrics
    # seed = cfg.seed

    # Compute lagged features if they haven't been computed yet
    if any(not (inputs_dir / path).exists() for path in paths.values()):
        inputs_dir.mkdir(parents=True, exist_ok=True)
        cwd = os.getcwd()
        os.chdir(inputs_dir)
        lagged_features_cfg = features_cfg.copy()
        with open_dict(lagged_features_cfg):
            _ = lagged_features_cfg.pop("hydra")
        compute_lagged_features(lagged_features_cfg)
        os.chdir(cwd)

    X_train = load_processed_data("X_train", inputs_dir=inputs_dir, paths=paths)
    y_train = load_processed_data("y_train", inputs_dir=inputs_dir, paths=paths)

    logger.info(f"Getting CV split for '{cv_method}' method...")
    cv = get_cv_split(y_train, cv_method, **cv_init_params)

    model = get_model(model_name)(cfg, cv=cv, metrics=metrics, mlflow=use_mlflow)

    cv_score = model.cv_score(X_train, y_train)
    logger.info(f"CV score: {cv_score}")
    if use_mlflow:
        mlflow.log_metric(model.cv_metric, cv_score)

    mlflow.log_params(model.params)

    model.save_output()
    # utils.save_output(model, cfg.outputs.hydra_model)

    if use_mlflow:
        mlflow.end_run()

    return cv_score
Ejemplo n.º 5
0
def train(args):
    """model compile and learning"""
    (X_train, y_train), (X_test, y_test) = _get_datasets()
    model = get_model(args)

    optimizer = _get_optimizer(args)
    X_train, X_test = X_train[:, :, :, None], X_test[:, :, :, None]
    model.compile(optimizer=optimizer,
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    model.fit(X_train, y_train, batch_size=args.batch_size, epochs=args.epochs, verbose=1)
    model.evaluate(X_test, y_test, batch_size=args.test_batch_size, verbose=1)

    if args.job_dir.startswith('gs://'):
        model.save(CENSUS_MODEL)
        copy_file_to_gcs(args.job_dir, CENSUS_MODEL)
    else:
        model.save(os.path.join(args.job_dir, CENSUS_MODEL))

    return model
Ejemplo n.º 6
0
def main(config_path: Path, dataset_path: Path) -> Union[Tuple[Path], Path]:
    """
    Main function responsible for training classification model.

    Arguments:
        Path config_path: Path to main config (of :class:`DefaultConfig` class)
        Path dataset_path: Path to dataset

    Returns:
        Tuple containing path to the experiment root dirrectory
        and path to the saved model directory.
    """

    config = load_variable("config", config_path)
    model = models.get_model(config)

    train = config.training
    setup(train_config=train, logger=logger)

    train_loader, val_loader, test_loader = DataLoader.get_loaders(
        path_to_data=dataset_path, config=config)

    model.set_example(train_loader)
    learner = trainer.Trainer(config=config)
    learner.summary(model)
    learner.fit(model, train_loader, val_loader)

    if train.test and train.save:
        learner.test(test_dataloaders=test_loader)

    if train.save:
        results = SimpleNamespace(
            root=learner.root_dir,
            model_path=Path(learner.checkpoint_callback.best_model_path),
            val_loader=val_loader,
            test_loader=test_loader,
        )
        return results
    else:
        return SimpleNamespace(root=learner.root_dir)
Ejemplo n.º 7
0
    def train(self, x_train, y_train, config, model_path):
        """ Trains the number classifier.

        :param x_train (np.array) - images
        :param y_train (np.array) - labels as hot encodings
        :param config (Configuration) - configuration
        :param model_path (str) - path of the model folder where to save the models
        """
        # Get model
        self._model = get_model(config)

        # Checkpoint to save the models at each epoch
        checkpoint = ModelCheckpoint(os.path.join(model_path,
                                                  "model_{epoch:05d}.h5"),
                                     monitor="val_loss",
                                     verbose=0,
                                     save_best_only=False,
                                     mode='min')

        best_checkpoint = ModelCheckpoint(os.path.join(model_path,
                                                       "best_model.h5"),
                                          monitor="val_loss",
                                          verbose=0,
                                          save_best_only=True,
                                          mode='min')

        # Train the model
        self._model_history = self._model.fit(
            x_train,
            y_train,
            batch_size=config.batch_size,
            epochs=config.nb_epochs,
            validation_split=config.validation_split,
            shuffle=config.shuffle,
            callbacks=[checkpoint, best_checkpoint])

        # Plot history
        self._plot_history(model_path, config.model_name)
Ejemplo n.º 8
0
    def __init__(self, config, datasets):
        self.config = config
        self.datasets = datasets
        self.mode = config["mode"]

        training_config = config["training"]
        self.epochs = training_config["epochs"]
        self.print_iter = training_config["print_iter"]
        self.save_epoch = training_config["save_epoch"]

        model_func = get_model(config["model"]["type"])
        self.model = model_func(
            **config[config["model"]["type"]]
        )
        lr = scheduler.CosineAnnealingScheduler(
            training_config["init_learning_rate"],
            training_config["epochs"]
        )
        self.optimizer = optimizers.Adam(lr, **config["optimizer"])

        self.bce_w = training_config["bce_loss_weight"]
        self.dice_w = training_config["dice_loss_weight"]
        self.bce = losses.BinaryCrossEntropyLoss()
        self.dice = losses.DiceLoss()

        self.train_bce = metrics.Mean()
        self.train_dice = metrics.Mean()
        self.train_iou = metrics.MeanIoU(num_classes=2)

        if config["mode"] == "train":
            self.valid_datasets = DataLoader(
                "valid", **config["dataset"]
            )
        self.test_bce = metrics.Mean()
        self.test_dice = metrics.Mean()
        self.test_iou = metrics.MeanIoU(num_classes=2)

        self.get_ckpt_manager(config["save_path"])
Ejemplo n.º 9
0
    for i, (trn_idx, val_idx) in enumerate(splitter.split(df, y=y)):
        if i not in global_params["folds"]:
            continue
        logger.info("=" * 20)
        logger.info(f"Fold {i}")
        logger.info("=" * 20)

        trn_df = df.loc[trn_idx, :].reset_index(drop=True)
        val_df = df.loc[val_idx, :].reset_index(drop=True)

        loaders = {
            phase: C.get_loader(df_, datadir, config, phase,
                                event_level_labels)
            for df_, phase in zip([trn_df, val_df], ["train", "valid"])
        }
        model = models.get_model(config).to(device)
        criterion = C.get_criterion(config).to(device)
        optimizer = C.get_optimizer(model, config)
        scheduler = C.get_scheduler(optimizer, config)

        ema_model = AveragedModel(
            model,
            avg_fn=lambda averaged_model_parameter, model_parameter,
            num_averaged: 0.1 * averaged_model_parameter + 0.9 *
            model_parameter)

        (output_dir / f"fold{i}").mkdir(exist_ok=True, parents=True)

        train(model=model,
              ema_model=ema_model,
              dataloaders=loaders,
def trainval(exp_dict, savedir_base, datadir, reset=False, num_workers=0):
    # bookkeepting stuff
    # ==================

    savedir = os.path.join(savedir_base, hu.hash_dict(exp_dict))
    os.makedirs(savedir, exist_ok=True)

    if reset:
        hc.delete_and_backup_experiment(savedir)

    print("Experiment saved in %s" % savedir)

    # Dataset
    # ==================
    # train set

    data_transform = A.Compose(
        [
            A.Flip(p=0.3),
            A.IAAAffine(p=0.3),
            A.Rotate(p=0.3),
            A.HueSaturationValue(hue_shift_limit=10,
                                 sat_shift_limit=15,
                                 val_shift_limit=10,
                                 p=0.3),
            A.GaussianBlur(3, p=0.3),
            A.GaussNoise(30, p=0.3)
        ],
        keypoint_params=A.KeypointParams(format='xy'),
        additional_targets={
            'mask0': 'mask',
            'mask1': 'mask',
            'mask2': 'mask',
            'keypoints0': 'keypoints',
            'keypoints1': 'keypoints',
            'keypoints2': 'keypoints',
            'keypoints3': 'keypoints',
            'keypoints4': 'keypoints',
            'keypoints5': 'keypoints'
        })

    # random.seed(20201009)
    random_seed = random.randint(0, 20201009)
    train_set = HEDataset_Fast(data_dir=datadir,
                               n_classes=exp_dict["n_classes"],
                               transform=data_transform,
                               option="Train",
                               random_seed=random_seed,
                               obj_option=exp_dict["obj"],
                               patch_size=exp_dict["patch_size"],
                               bkg_option=exp_dict["bkg"])

    test_transform = A.Compose([A.Resize(1024, 1024)],
                               keypoint_params=A.KeypointParams(format='xy'),
                               additional_targets={
                                   'mask0': 'mask',
                                   'mask1': 'mask'
                               })
    # val set
    val_set = HEDataset(data_dir=datadir,
                        transform=test_transform,
                        option="Validation")

    val_loader = DataLoader(val_set, batch_size=1, num_workers=num_workers)

    # test set
    test_set = HEDataset(data_dir=datadir,
                         transform=test_transform,
                         option="Test")

    test_loader = DataLoader(test_set, batch_size=1, num_workers=num_workers)
    # Model
    # ==================

    # torch.manual_seed(20201009)
    model = models.get_model(exp_dict['model'],
                             exp_dict=exp_dict,
                             train_set=train_set).cuda()

    model_path = os.path.join(savedir, "model.pth")
    score_list_path = os.path.join(savedir, "score_list.pkl")

    if os.path.exists(score_list_path):
        # resume experiment
        model.load_state_dict(hu.torch_load(model_path))
        score_list = hu.load_pkl(score_list_path)
        s_epoch = score_list[-1]['epoch'] + 1
    else:
        # restart experiment
        score_list = []
        s_epoch = 0

    # Train & Val
    # ==================
    print("Starting experiment at epoch %d" % (s_epoch))

    #     train_sampler = torch.utils.data.RandomSampler(
    #         train_set, replacement=True, num_samples=2*len(val_set))

    train_loader = DataLoader(train_set,
                              batch_size=exp_dict["batch_size"],
                              shuffle=True,
                              num_workers=num_workers)

    for e in range(s_epoch, exp_dict['max_epoch']):
        # Validate only at the start of each cycle
        score_dict = {}

        # Train the model
        train_dict = model.train_on_loader(train_loader)

        # Validate and Visualize the model
        val_dict = model.val_on_loader(val_loader,
                                       savedir_images=os.path.join(
                                           savedir, "images"),
                                       n_images=7)
        score_dict.update(val_dict)

        # Get new score_dict
        score_dict.update(train_dict)
        score_dict["epoch"] = len(score_list)

        # Add to score_list and save checkpoint
        score_list += [score_dict]

        # Report & Save
        score_df = pd.DataFrame(score_list)
        print("\n", score_df.tail(), "\n")
        hu.torch_save(model_path, model.get_state_dict())
        hu.save_pkl(score_list_path, score_list)
        print("Checkpoint Saved: %s" % savedir)

        # Save Best Checkpoint
        if e == 0 or (score_dict.get("val_score", 0) >
                      score_df["val_score"][:-1].fillna(0).max()):
            hu.save_pkl(os.path.join(savedir, "score_list_best.pkl"),
                        score_list)
            hu.torch_save(os.path.join(savedir, "model_best.pth"),
                          model.get_state_dict())
            print("Saved Best: %s" % savedir)

    # if s_epoch==exp_dict['max_epoch']:
    #     e = s_epoch
    model.load_state_dict(
        hu.torch_load(os.path.join(savedir, "model_best.pth")))
    test_dict = model.test_on_loader(test_loader)
    hu.save_pkl(os.path.join(savedir, 'test_iou.pkl'), test_dict)
    print('Test IoU:{}'.format(test_dict["test_iou"]))
    print('Experiment completed et epoch %d' % e)
Ejemplo n.º 11
0
def trainval(exp_dict, savedir, args):
    """
    exp_dict: dictionary defining the hyperparameters of the experiment
    savedir: the directory where the experiment will be saved
    args: arguments passed through the command line
    """

    # set seed
    # ==================
    seed = 42
    np.random.seed(seed)
    torch.manual_seed(seed)
    if args.use_cuda:
        device = 'cuda'
        torch.cuda.manual_seed_all(seed)
        assert torch.cuda.is_available(
        ), 'cuda is not, available please run with "-c 0"'
    else:
        device = 'cpu'

    print('Running on device: %s' % device)

    # Dataset
    # Load val set and train set
    val_set = datasets.get_dataset(dataset_name=exp_dict["dataset"],
                                   split="val",
                                   transform=exp_dict.get("transform"),
                                   datadir=args.datadir)
    train_set = datasets.get_dataset(dataset_name=exp_dict["dataset"],
                                     split="train",
                                     transform=exp_dict.get("transform"),
                                     datadir=args.datadir)

    # Load train loader, val loader, and vis loader
    train_loader = DataLoader(train_set,
                              sampler=RandomSampler(
                                  train_set,
                                  replacement=True,
                                  num_samples=max(min(500, len(train_set)),
                                                  len(val_set))),
                              batch_size=exp_dict["batch_size"])

    val_loader = DataLoader(val_set,
                            shuffle=False,
                            batch_size=exp_dict["batch_size"])
    vis_loader = DataLoader(val_set,
                            sampler=ut.SubsetSampler(train_set,
                                                     indices=[0, 1, 2]),
                            batch_size=1)

    # Create model, opt, wrapper
    model_original = models.get_model(exp_dict["model"],
                                      exp_dict=exp_dict).cuda()
    opt = torch.optim.Adam(model_original.parameters(),
                           lr=1e-5,
                           weight_decay=0.0005)

    model = wrappers.get_wrapper(exp_dict["wrapper"],
                                 model=model_original,
                                 opt=opt).cuda()

    score_list = []

    # Checkpointing
    # =============
    score_list_path = os.path.join(savedir, "score_list.pkl")
    model_path = os.path.join(savedir, "model_state_dict.pth")
    opt_path = os.path.join(savedir, "opt_state_dict.pth")

    if os.path.exists(score_list_path):
        # resume experiment
        score_list = hu.load_pkl(score_list_path)
        model.load_state_dict(torch.load(model_path))
        opt.load_state_dict(torch.load(opt_path))
        s_epoch = score_list[-1]["epoch"] + 1
    else:
        # restart experiment
        score_list = []
        s_epoch = 0

    # Run training and validation
    for epoch in range(s_epoch, exp_dict["max_epoch"]):
        score_dict = {"epoch": epoch}

        # visualize
        model.vis_on_loader(vis_loader,
                            savedir=os.path.join(savedir, "images"))
        # validate
        score_dict.update(model.val_on_loader(val_loader))

        # train
        score_dict.update(model.train_on_loader(train_loader))

        # Add score_dict to score_list
        score_list += [score_dict]

        # Report and save
        print(pd.DataFrame(score_list).tail())
        hu.save_pkl(score_list_path, score_list)
        hu.torch_save(model_path, model.state_dict())
        hu.torch_save(opt_path, opt.state_dict())
        print("Saved in %s" % savedir)
Ejemplo n.º 12
0
def train(cfg):
    from src.storm_utils import StormIndexAccessor, StormAccessor

    # # Get data configs/overrides
    # data_overrides = utils.parse_data_overrides(cfg)
    # data_cfg = compose(
    #     config_name="process_data", return_hydra_config=True, overrides=data_overrides,
    # )

    # # Get features configs/overrides
    # features_overrides = utils.parse_processed_data_overrides(cfg)
    # features_overrides.extend(utils.parse_override(cfg.lagged_features))
    # features_cfg = compose(
    #     config_name="compute_lagged_features",
    #     return_hydra_config=True,
    #     overrides=features_overrides,
    # )

    # Model specific parameters
    model_name = cfg.model
    metrics = cfg.metrics
    pred_path = OmegaConf.select(cfg.outputs,
                                 "predictions",
                                 default="ypred.pkl")
    # seed = cfg.seed

    data_cfg = utils.get_data_cfg(cfg)
    features_cfg = utils.get_features_cfg(cfg)
    processed_data_dir = Path(to_absolute_path(data_cfg.hydra.run.dir))
    inputs_dir = Path(to_absolute_path(features_cfg.hydra.run.dir))
    paths = features_cfg.outputs

    # Setup mlflow
    use_mlflow = OmegaConf.select(cfg, "mlflow", default=False)
    if use_mlflow:
        import mlflow

        run = setup_mlflow(cfg, features_cfg=features_cfg, data_cfg=data_cfg)

    update_tuned_hyperparams(cfg, features_cfg)

    # Compute lagged features if they haven't been computed yet
    if any(not (inputs_dir / path).exists() for path in paths.values()):
        inputs_dir.mkdir(parents=True, exist_ok=True)
        cwd = os.getcwd()
        os.chdir(inputs_dir)
        lagged_features_cfg = features_cfg.copy()
        with open_dict(lagged_features_cfg):
            _ = lagged_features_cfg.pop("hydra")
        compute_lagged_features(lagged_features_cfg)
        os.chdir(cwd)

    # General parameters
    # load_kwargs = cfg.load
    # processed_data_dir = Path(to_absolute_path(cfg.processed_data_dir))
    target_pipeline_path = cfg.target_pipeline
    inverse_transform = cfg.inverse_transform
    cv_method = cfg.cv.method
    cv_init_params = cfg.cv.params
    # lag = cfg.lag
    # exog_lag = cfg.exog_lag
    # lead = cfg.lead

    logger.info("Loading training data and computing lagged features...")

    # # HACK: Compute lagged features if they haven't been computed yet.
    # inputs_dir = Path(to_absolute_path(load_kwargs.inputs_dir))
    # paths = [inputs_dir / path for path in load_kwargs.paths.values()]
    # if any(not path.exists() for path in paths):
    #     # if not inputs_dir.exists():
    #     compute_lagged_features(lag, exog_lag, lead, inputs_dir)

    X_train = load_processed_data("X_train",
                                  inputs_dir=inputs_dir,
                                  paths=paths)
    y_train = load_processed_data("y_train",
                                  inputs_dir=inputs_dir,
                                  paths=paths)
    X_test = load_processed_data("X_test", inputs_dir=inputs_dir, paths=paths)
    y_test = load_processed_data("y_test", inputs_dir=inputs_dir, paths=paths)
    feature_names = load_processed_data("features_names",
                                        inputs_dir=inputs_dir,
                                        paths=paths)

    # QUESTION: Log everything at end cleaner?
    if use_mlflow:
        n_train_obs, n_features = X_train.shape
        n_test_obs, _ = y_test.shape
        mlflow.log_params({
            "n_train_obs": n_train_obs,
            "n_test_obs": n_test_obs,
            "n_features": n_features,
        })

    logger.info(f"Getting CV split for '{cv_method}' method...")
    cv = get_cv_split(y_train, cv_method, **cv_init_params)
    # QUESTION: Do we still need CV here?

    ###########################################################################
    # Fit and evaluate model
    ###########################################################################

    logger.info(f"Fitting model {model_name}...")
    model = get_model(model_name)(cfg,
                                  cv=cv,
                                  metrics=metrics,
                                  mlflow=use_mlflow)
    model.fit(X_train, y_train, feature_names=feature_names)

    # TODO: Make this more general. It currently only applies to xgboost
    # QUESTION: compute CV score in score method?
    # score = model.cv_score(X_train, y_train)

    model.save_output()

    ###########################################################################
    # Compute/save predictions on test set
    ###########################################################################

    logger.info("Computing predictions...")
    ypred = model.predict(X_test)
    ypred = convert_pred_to_pd(ypred, y_test)
    if inverse_transform:
        y_test, ypred = inv_transform_targets(
            y_test,
            ypred,
            path=target_pipeline_path,
            processor_dir=processed_data_dir,
        )

    logger.info("Saving predictions...")

    utils.save_output(ypred, pred_path)
    if use_mlflow:
        mlflow.log_artifact(pred_path)

    # XXX: TEMPORARY
    if hasattr(model, "compute_shap_values"):
        shap_values = model.compute_shap_values(X_test)
        shap_values.to_pickle("shap_values.pkl")
        if use_mlflow:
            mlflow.log_artifact("shap_values.pkl")

    ###########################################################################
    # Compute and log test metrics
    ###########################################################################

    if use_mlflow:
        test_score = compute_metrics(y_test, ypred, metrics=metrics)
        if isinstance(metrics, (list, tuple)):
            if len(metrics) > 1:
                for metric in metrics:
                    mlflow.log_metrics({metric: test_score[metric]})
        else:
            mlflow.log_metrics({metrics: test_score})

    ##########################################################################
    # Plot predictions on test set
    ##########################################################################

    # Plot predictions
    plot_kwargs = OmegaConf.to_container(cfg.plot, resolve=True)
    fig, ax = model.plot(
        X_test,
        y_test,
        lead=features_cfg.lead,
        unit=features_cfg.lag_processor.unit,
        **plot_kwargs,
    )
    plt.close()
    # if isinstance(fig, list):
    #     for f in fig:
    #         f.close()
    # elif isinstance(fig, dict):
    #     for f in fig.values():
    #         f.close()
    # else:
    #     fig.close()

    if use_mlflow:
        mlflow.end_run()
Ejemplo n.º 13
0
def newminimum(exp_id,
               savedir_base,
               datadir,
               name,
               exp_dict,
               metrics_flag=True):
    # bookkeeping
    # ---------------

    # get experiment directory
    old_modeldir = os.path.join(savedir_base, exp_id)
    savedir = os.path.join(savedir_base, exp_id, name)

    old_exp_dict = hu.load_json(os.path.join(old_modeldir, 'exp_dict.json'))

    # TODO: compare exp dict for possible errors:
    # optimizer have to be the same
    # same network, dataset

    # create folder and save the experiment dictionary
    os.makedirs(savedir, exist_ok=True)
    hu.save_json(os.path.join(savedir, 'exp_dict.json'), exp_dict)
    pprint.pprint(exp_dict)
    print('Experiment saved in %s' % savedir)

    # set seed
    # ---------------
    seed = 42 + exp_dict['runs']
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

    # Dataset
    # -----------

    # Load Train Dataset
    train_set = datasets.get_dataset(dataset_name=exp_dict["dataset"],
                                     train_flag=True,
                                     datadir=datadir,
                                     exp_dict=exp_dict)

    train_loader = torch.utils.data.DataLoader(
        train_set,
        drop_last=True,
        shuffle=True,
        batch_size=exp_dict["batch_size"])

    # Load Val Dataset
    val_set = datasets.get_dataset(dataset_name=exp_dict["dataset"],
                                   train_flag=False,
                                   datadir=datadir,
                                   exp_dict=exp_dict)

    # Model
    # -----------
    model = models.get_model(exp_dict["model"], train_set=train_set)

    # Choose loss and metric function
    loss_function = metrics.get_metric_function(exp_dict["loss_func"])

    # Load Optimizer
    n_batches_per_epoch = len(train_set) / float(exp_dict["batch_size"])
    opt = optimizers.get_optimizer(opt=exp_dict["opt"],
                                   params=model.parameters(),
                                   n_batches_per_epoch=n_batches_per_epoch)

    # Checkpoint
    # -----------
    model_path = os.path.join(savedir, 'model.pth')
    score_list_path = os.path.join(savedir, 'score_list.pkl')
    opt_path = os.path.join(savedir, 'opt_state_dict.pth')

    old_model_path = os.path.join(old_modeldir, 'model.pth')
    old_score_list_path = os.path.join(old_modeldir, 'score_list.pkl')
    old_opt_path = os.path.join(old_modeldir, 'opt_state_dict.pth')

    score_list = hu.load_pkl(old_score_list_path)
    model.load_state_dict(torch.load(old_model_path))
    opt.load_state_dict(torch.load(old_opt_path))
    s_epoch = score_list[-1]['epoch'] + 1

    # save current model state for comparison
    minimum = []

    for param in model.parameters():
        minimum.append(param.clone())

    # Train & Val
    # ------------
    print('Starting experiment at epoch %d/%d' %
          (s_epoch, exp_dict['max_epoch']))

    for epoch in range(s_epoch, exp_dict['max_epoch']):
        # Set seed
        np.random.seed(exp_dict['runs'] + epoch)
        torch.manual_seed(exp_dict['runs'] + epoch)
        # torch.cuda.manual_seed_all(exp_dict['runs']+epoch) not needed since no cuda available

        score_dict = {"epoch": epoch}

        if metrics_flag:
            # 1. Compute train loss over train set
            score_dict["train_loss"] = metrics.compute_metric_on_dataset(
                model, train_set, metric_name='softmax_loss')
            #                                    metric_name=exp_dict["loss_func"])
            # TODO: which loss should be used? (normal or with reguralizer?)

            # 2. Compute val acc over val set
            score_dict["val_acc"] = metrics.compute_metric_on_dataset(
                model, val_set, metric_name=exp_dict["acc_func"])

        # 3. Train over train loader
        model.train()
        print("%d - Training model with %s..." %
              (epoch, exp_dict["loss_func"]))

        s_time = time.time()
        for images, labels in tqdm.tqdm(train_loader):
            # images, labels = images.cuda(), labels.cuda() no cuda available

            opt.zero_grad()
            loss = loss_function(model, images, labels, minimum,
                                 0.1)  # just works for custom loss function
            loss.backward()
            opt.step()

        e_time = time.time()

        # Record metrics
        score_dict["step_size"] = opt.state["step_size"]
        score_dict["n_forwards"] = opt.state["n_forwards"]
        score_dict["n_backwards"] = opt.state["n_backwards"]
        score_dict["batch_size"] = train_loader.batch_size
        score_dict["train_epoch_time"] = e_time - s_time

        score_list += [score_dict]

        # Report and save
        print(pd.DataFrame(score_list).tail())
        hu.save_pkl(score_list_path, score_list)
        hu.torch_save(model_path, model.state_dict())
        hu.torch_save(opt_path, opt.state_dict())
        print("Saved: %s" % savedir)

        with torch.nograd():
            print('Current distance: %f',
                  metrics.computedistance(minimum, model))

    print('Experiment completed')
Ejemplo n.º 14
0
            test_set = datasets.get_dataset(
                dataset_dict=exp_dict["dataset"],
                split='test',
                datadir=datadir,
                exp_dict=exp_dict,
                dataset_size=exp_dict['dataset_size'])

            test_loader = DataLoader(test_set,
                                     batch_size=1,
                                     collate_fn=ut.collate_fn,
                                     num_workers=0)
            pprint.pprint(exp_dict)
            # Model
            # ==================
            model = models.get_model(model_dict=exp_dict['model'],
                                     exp_dict=exp_dict,
                                     train_set=train_set).cuda()

            model_path = os.path.join(savedir_base, hash_id, 'model_best.pth')

            # load best model
            model.load_state_dict(hu.torch_load(model_path))
            # loop over the val_loader and saves image
            # get counts
            habitats = []
            for i, batch in enumerate(test_loader):
                habitat = batch['meta'][0]['habitat']
                habitats += [habitat]
            habitats = np.array(habitats)

            val_dict = {}
    val_root = args.val_root
    attack_name = args.attack_name
    print(f"Generate {attack_name} for {model_name}")

    total_file_num = 0
    for folder in sorted(os.listdir(val_root)):
        for file in os.listdir(os.path.join(val_root, folder)):
            total_file_num += 1

    if not os.path.exists(f"./adv_example/{attack_name}/{model_name}"):
        os.makedirs(f"./adv_example/{attack_name}/{model_name}")

    mapping_folder_to_name, mapping_folder_to_label, mapping_name_to_label, mapping_label_to_name = get_mapping_dict(
    )

    model = get_model(model_name)
    model.load_state_dict(torch.load(f"./models/{model_name}.pth"))

    model.to(device)

    criterion = nn.CrossEntropyLoss()

    file_num = 0
    success_num = 0
    error_num = 0
    for folder in sorted(os.listdir(val_root)):
        if not os.path.exists(
                f"./adv_example/{attack_name}/{model_name}/{folder}"):
            os.makedirs(f"./adv_example/{attack_name}/{model_name}/{folder}")
        for file in os.listdir(os.path.join(val_root, folder)):
            file_num += 1
Ejemplo n.º 16
0
def trainval(exp_dict, savedir_base, n_workers, test_only, reset=False):
    # bookkeeping
    # ---------------

    # get experiment directory
    exp_id = hu.hash_dict(exp_dict)
    savedir = os.path.join(savedir_base, exp_id)

    if reset:
        # delete and backup experiment
        hc.delete_experiment(savedir, backup_flag=True)

    # create folder and save the experiment dictionary
    os.makedirs(savedir, exist_ok=True)
    hu.save_json(os.path.join(savedir, 'exp_dict.json'), exp_dict)
    pprint.pprint(exp_dict)
    print('Experiment saved in %s' % savedir)

    # Dataset
    # -----------

    # train loader
    train_loader = datasets.get_loader("train",
                                       exp_dict,
                                       n_workers,
                                       test_only=test_only)

    # val loader
    val_loader = datasets.get_loader("test",
                                     exp_dict,
                                     n_workers,
                                     test_only=test_only)

    # Model
    # -----------
    model = models.get_model(exp_dict)

    # Checkpoint
    # -----------
    model_path = os.path.join(savedir, 'model.pth')
    score_list_path = os.path.join(savedir, 'score_list.pkl')

    if os.path.exists(score_list_path):
        # resume experiment
        model.set_state_dict(hu.torch_load(model_path))
        score_list = hu.load_pkl(score_list_path)
        s_epoch = score_list[-1]['epoch'] + 1
    else:
        # restart experiment
        score_list = []
        s_epoch = 0

    # Train & Val
    # ------------
    print('Starting experiment at epoch %d' % (s_epoch))

    for e in range(s_epoch, 10):
        score_dict = {}

        # Train the model
        score_dict.update(model.train_on_loader(train_loader))

        # Validate the model
        score_dict.update(model.test_on_loader(val_loader))

        # Get metrics
        # score_dict['train_loss'] = train_dict['train_loss']
        # score_dict['val_acc'] = val_dict['val_acc']
        score_dict['epoch'] = e

        # Add to score_list and save checkpoint
        score_list += [score_dict]

        # Report & Save
        score_df = pd.DataFrame(score_list)
        print(score_df.tail())
        hu.torch_save(model_path, model.get_state_dict())
        hu.save_pkl(score_list_path, score_list)
        print('Checkpoint Saved: %s' % savedir)

    print('experiment completed')
def trainval(exp_dict, savedir_base, datadir, reset=False, num_workers=0):
    # bookkeepting stuff
    # ==================
    pprint.pprint(exp_dict)
    exp_id = hu.hash_dict(exp_dict)
    savedir = os.path.join(savedir_base, exp_id)
    if reset:
        hc.delete_and_backup_experiment(savedir)

    os.makedirs(savedir, exist_ok=True)
    hu.save_json(os.path.join(savedir, "exp_dict.json"), exp_dict)
    print("Experiment saved in %s" % savedir)

    # set seed
    # ==================
    seed = 42
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

    # Dataset
    # ==================
    # train set
    train_set = datasets.get_dataset(dataset_dict=exp_dict["dataset"],
                                     split="train",
                                     datadir=datadir,
                                     exp_dict=exp_dict,
                                     dataset_size=exp_dict['dataset_size'])
    # val set
    val_set = datasets.get_dataset(dataset_dict=exp_dict["dataset"],
                                   split="val",
                                   datadir=datadir,
                                   exp_dict=exp_dict,
                                   dataset_size=exp_dict['dataset_size'])

    # test set
    test_set = datasets.get_dataset(dataset_dict=exp_dict["dataset"],
                                    split="test",
                                    datadir=datadir,
                                    exp_dict=exp_dict,
                                    dataset_size=exp_dict['dataset_size'])

    # val_sampler = torch.utils.data.SequentialSampler(val_set)
    val_loader = DataLoader(
        val_set,
        # sampler=val_sampler,
        batch_size=1,
        collate_fn=ut.collate_fn,
        num_workers=num_workers)
    test_loader = DataLoader(
        test_set,
        # sampler=val_sampler,
        batch_size=1,
        collate_fn=ut.collate_fn,
        num_workers=num_workers)

    # Model
    # ==================
    model = models.get_model(model_dict=exp_dict['model'],
                             exp_dict=exp_dict,
                             train_set=train_set).cuda()

    # model.opt = optimizers.get_optim(exp_dict['opt'], model)
    model_path = os.path.join(savedir, "model.pth")
    score_list_path = os.path.join(savedir, "score_list.pkl")

    if os.path.exists(score_list_path):
        # resume experiment
        model.load_state_dict(hu.torch_load(model_path))
        score_list = hu.load_pkl(score_list_path)
        s_epoch = score_list[-1]['epoch'] + 1
    else:
        # restart experiment
        score_list = []
        s_epoch = 0

    # Train & Val
    # ==================
    print("Starting experiment at epoch %d" % (s_epoch))
    model.waiting = 0
    model.val_score_best = -np.inf

    train_sampler = torch.utils.data.RandomSampler(train_set,
                                                   replacement=True,
                                                   num_samples=2 *
                                                   len(test_set))

    train_loader = DataLoader(train_set,
                              sampler=train_sampler,
                              collate_fn=ut.collate_fn,
                              batch_size=exp_dict["batch_size"],
                              drop_last=True,
                              num_workers=num_workers)

    for e in range(s_epoch, exp_dict['max_epoch']):
        # Validate only at the start of each cycle
        score_dict = {}
        test_dict = model.val_on_loader(test_loader,
                                        savedir_images=os.path.join(
                                            savedir, "images"),
                                        n_images=3)
        # Train the model
        train_dict = model.train_on_loader(train_loader)

        # Validate the model
        val_dict = model.val_on_loader(val_loader)
        score_dict["val_score"] = val_dict["val_score"]

        # Get new score_dict
        score_dict.update(train_dict)
        score_dict["epoch"] = e
        score_dict["waiting"] = model.waiting

        model.waiting += 1

        # Add to score_list and save checkpoint
        score_list += [score_dict]

        # Save Best Checkpoint
        score_df = pd.DataFrame(score_list)
        if score_dict["val_score"] >= model.val_score_best:
            test_dict = model.val_on_loader(test_loader,
                                            savedir_images=os.path.join(
                                                savedir, "images"),
                                            n_images=3)
            score_dict.update(test_dict)
            hu.save_pkl(os.path.join(savedir, "score_list_best.pkl"),
                        score_list)
            # score_df.to_csv(os.path.join(savedir, "score_best_df.csv"))
            hu.torch_save(os.path.join(savedir, "model_best.pth"),
                          model.get_state_dict())
            model.waiting = 0
            model.val_score_best = score_dict["val_score"]
            print("Saved Best: %s" % savedir)

        # Report & Save
        score_df = pd.DataFrame(score_list)
        # score_df.to_csv(os.path.join(savedir, "score_df.csv"))
        print("\n", score_df.tail(), "\n")
        hu.torch_save(model_path, model.get_state_dict())
        hu.save_pkl(score_list_path, score_list)
        print("Checkpoint Saved: %s" % savedir)

        if model.waiting > 100:
            break

    print('Experiment completed et epoch %d' % e)
Ejemplo n.º 18
0
class IndividualDrum(Individual):
    _count = 0
    model = get_model('src/rnn_10_classes.h5')  # ("src/weights_rnn.h5")

    def __init__(self, parameters, empty=False):
        super().__init__(parameters)
        IndividualDrum._count += 1
        self.ind = IndividualDrum._count
        if not empty:
            self.generate_seq()

    def crossover(self, other):
        fc = IndividualDrum(self.parameters, empty=True)
        sc = IndividualDrum(self.parameters, empty=True)
        fc.sequence = deepcopy(self.sequence)
        sc.sequence = deepcopy(other.sequence)
        return fc, sc

    def mutate(self):
        # self.generate_note()
        for key in self.sequence:
            # self.generate_note()
            if random.random() > 1 / len(self.sequence):
                if random.random() > 0.5:
                    if key.bit.timestamp > 0.5:
                        key.bit.timestamp -= 0.1
                else:
                    if key.bit.timestamp < 7.5:
                        key.bit.timestamp += 0.1
            # if random.random() > 0.5:
            #     self.sequence.remove(key)
            # else:
            #     self.generate_note()

    def create_midi_file(self, file_name=None):
        track = 0
        channel = 9
        tempo = 120  # In BPM
        volume = 100  # 0-127, as per the MIDI standard
        my_midi = MIDIFile(
            1
        )  # One track, defaults to format 1 (tempo track is created automatically)
        my_midi.addTempo(track, 0, tempo)
        my_midi.addProgramChange(0, 10, 0, 0)
        my_midi.tracks[0].addChannelPressure(0, 4, 0)

        repertory = "output/"
        if file_name is not None:
            file = file_name + ".mid"
        else:
            file = str(self.ind) + ".mid"
        for note in self.sequence:
            my_midi.addNote(track, channel, note.bit.pitch, note.bit.timestamp,
                            note.bit.duration, volume)

        with open(repertory + file, "wb") as output_file:
            my_midi.writeFile(output_file)

    def generate_note(self):
        allowed_pitch = [36, 38, 42, 46, 41, 45, 48, 51, 49]
        new_note = Note(
            sample(allowed_pitch, 1)[0],
            round_down(round(uniform(0, 7.75), 2), 0.25), 0.25)
        if new_note not in self.sequence:
            self.sequence.append(GeneDrum(new_note))

    def generate_seq(self):
        max_number_of_notes = 100
        number_of_notes = randint(20, max_number_of_notes)
        for x in range(number_of_notes):
            self.generate_note()

        self.create_midi_file()

    def fitness(self):
        # class
        repertory = "output/"
        file = repertory + str(self.ind) + ".mid"
        self.create_midi_file(str(self.ind))
        data = get_drum(file)
        if type(data) == type(None):
            return 0
        prediction = self.model.predict(np.stack([data.astype(dtype=float)]))
        index_max = np.argmax(prediction)
        # pred = [0, 25, 50, 75, 100][index_max]
        pred = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10][index_max]
        return pred

    def fitness_reg(self):
        self.create_midi_file()
        repertory = "output/"
        file = repertory + str(self.ind) + ".mid"
        data = get_drum(file)
        prediction = self.model.predict(np.stack([data.astype(dtype=float)]))
        if prediction:
            prediction = prediction[0][0]
        return prediction * 100

    def overlapped_keys(self, key_to_check, bars):
        overlapped = []
        for key in bars:
            if key_to_check.pitch != key.pitch:
                if key_to_check.timestamp <= key.timestamp <= (
                        key_to_check.timestamp + key_to_check.duration):
                    overlapped.append(key)
                    # print("key ", key_to_check, " overlapped by ", key )
        return overlapped

    def check_collision(self, key_to_check, changed_pitch, bars):
        for key in bars:
            if (key_to_check.bit.pitch + changed_pitch) == key.bit.pitch:
                if key_to_check.bit.timestamp <= key.bit.timestamp <= (
                        key_to_check.bit.timestamp +
                        key_to_check.bit.duration):
                    return False
        return True

    def __eq__(self, other):
        if type(other) != type(self):
            return False
        for a, b in zip(self.sequence, other.sequence):
            if a.bit != b.bit:
                return False
        return True

    def __repr__(self):
        # r = f"I: {self.fitness()}"
        # for g in self.sequence:
        #    r += f'\n\t{g.bit}'
        r = str(self.ind)
        return r

    def __hash__(self):
        r = 0
        for _ in self.sequence:
            r += randint(1, 100)
        return r
Ejemplo n.º 19
0
def trainval_svrg(exp_dict, savedir, datadir, metrics_flag=True):
    '''
        SVRG-specific training and validation loop.
    '''
    pprint.pprint(exp_dict)

    # Load Train Dataset
    train_set = datasets.get_dataset(dataset_name=exp_dict["dataset"],
                                     train_flag=True,
                                     datadir=datadir,
                                     exp_dict=exp_dict)

    train_loader = DataLoader(train_set,
                              drop_last=False,
                              shuffle=True,
                              batch_size=exp_dict["batch_size"])

    # Load Val Dataset
    val_set = datasets.get_dataset(dataset_name=exp_dict["dataset"],
                                   train_flag=False,
                                   datadir=datadir,
                                   exp_dict=exp_dict)

    # Load model
    model = models.get_model(exp_dict["model"], train_set=train_set).cuda()

    # Choose loss and metric function
    loss_function = metrics.get_metric_function(exp_dict["loss_func"])

    # lookup the learning rate
    lr = get_svrg_step_size(exp_dict)

    # Load Optimizer
    opt = get_svrg_optimizer(model,
                             loss_function,
                             train_loader=train_loader,
                             lr=lr)

    # Resume from last saved state_dict
    if (not os.path.exists(savedir + "/run_dict.pkl")
            or not os.path.exists(savedir + "/score_list.pkl")):
        ut.save_pkl(savedir + "/run_dict.pkl", {"running": 1})
        score_list = []
        s_epoch = 0
    else:
        score_list = ut.load_pkl(savedir + "/score_list.pkl")
        model.load_state_dict(torch.load(savedir + "/model_state_dict.pth"))
        opt.load_state_dict(torch.load(savedir + "/opt_state_dict.pth"))
        s_epoch = score_list[-1]["epoch"] + 1

    for epoch in range(s_epoch, exp_dict["max_epoch"]):
        score_dict = {"epoch": epoch}

        if metrics_flag:
            # 1. Compute train loss over train set
            score_dict["train_loss"] = metrics.compute_metric_on_dataset(
                model, train_set, metric_name=exp_dict["loss_func"])

            # 2. Compute val acc over val set
            score_dict["val_acc"] = metrics.compute_metric_on_dataset(
                model, val_set, metric_name=exp_dict["acc_func"])

        # 3. Train over train loader
        model.train()
        print("%d - Training model with %s..." %
              (epoch, exp_dict["loss_func"]))

        s_time = time.time()
        for images, labels in tqdm.tqdm(train_loader):
            images, labels = images.cuda(), labels.cuda()

            opt.zero_grad()
            closure = lambda svrg_model: loss_function(
                svrg_model, images, labels, backwards=True)
            opt.step(closure)

        e_time = time.time()

        # Record step size and batch size
        score_dict["step_size"] = opt.state["step_size"]
        score_dict["batch_size"] = train_loader.batch_size
        score_dict["train_epoch_time"] = e_time - s_time

        # Add score_dict to score_list
        score_list += [score_dict]

        # Report and save
        print(pd.DataFrame(score_list).tail())
        ut.save_pkl(savedir + "/score_list.pkl", score_list)
        ut.torch_save(savedir + "/model_state_dict.pth", model.state_dict())
        ut.torch_save(savedir + "/opt_state_dict.pth", opt.state_dict())
        print("Saved: %s" % savedir)

    return score_list
Ejemplo n.º 20
0
def trainval(exp_dict, savedir_base, datadir_base, reset=False):
    # bookkeeping stuff
    # ==================
    pprint.pprint(exp_dict)
    exp_id = hu.hash_dict(exp_dict)
    savedir = os.path.join(savedir_base, exp_id)
    if reset:
        hc.delete_and_backup_experiment(savedir)

    os.makedirs(savedir, exist_ok=True)
    hu.save_json(os.path.join(savedir, "exp_dict.json"), exp_dict)
    print("Experiment saved in %s" % savedir)

    # Dataset
    # ==================

    # load train and acrtive set
    train_set = datasets.get_dataset(dataset_name=exp_dict["dataset"],
                                     split="train",
                                     datadir_base=datadir_base,
                                     exp_dict=exp_dict)

    active_set = ActiveLearningDataset(train_set, random_state=42)

    # val set
    val_set = datasets.get_dataset(dataset_name=exp_dict["dataset"],
                                   split="val",
                                   datadir_base=datadir_base,
                                   exp_dict=exp_dict)
    val_loader = DataLoader(val_set, batch_size=exp_dict["batch_size"])

    # Model
    # ==================
    model = models.get_model(model_name=exp_dict['model']['name'],
                             exp_dict=exp_dict).cuda()

    model_path = os.path.join(savedir, "model.pth")
    score_list_path = os.path.join(savedir, "score_list.pkl")

    if os.path.exists(score_list_path):
        # resume experiment
        model.set_state_dict(hu.torch_load(model_path))
        active_set.load_state_dict(
            hu.load_pkl(os.path.join(savedir, "active_set.pkl")))
        score_list = hu.load_pkl(score_list_path)
        inner_s_epoch = score_list[-1]['inner_epoch'] + 1
        s_cycle = score_list[-1]['cycle']
    else:
        # restart experiment
        score_list = []
        inner_s_epoch = 0
        s_cycle = 0

    # Train & Val
    # ==================
    print("Starting experiment at cycle %d epoch %d" %
          (s_cycle, inner_s_epoch))

    for c in range(s_cycle, exp_dict['max_cycle']):
        # Set seed
        np.random.seed(c)
        torch.manual_seed(c)
        torch.cuda.manual_seed_all(c)

        if inner_s_epoch == 0:
            active_set.label_next_batch(model)
            hu.save_pkl(os.path.join(savedir, "active_set.pkl"),
                        active_set.state_dict())

        train_loader = DataLoader(active_set,
                                  sampler=samplers.get_sampler(
                                      exp_dict['sampler']['train'],
                                      active_set),
                                  batch_size=exp_dict["batch_size"])
        # Visualize the model
        model.vis_on_loader(vis_loader,
                            savedir=os.path.join(savedir, "images"))

        for e in range(inner_s_epoch, exp_dict['max_epoch']):
            # Validate only at the start of each cycle
            score_dict = {}
            if e == 0:
                score_dict.update(model.val_on_loader(val_loader))

            # Train the model
            score_dict.update(model.train_on_loader(train_loader))

            # Validate the model
            score_dict["epoch"] = len(score_list)
            score_dict["inner_epoch"] = e
            score_dict["cycle"] = c
            score_dict['n_ratio'] = active_set.n_labelled_ratio
            score_dict["n_train"] = len(train_loader.dataset)
            score_dict["n_pool"] = len(train_loader.dataset.pool)

            # Add to score_list and save checkpoint
            score_list += [score_dict]

            # Report & Save
            score_df = pd.DataFrame(score_list)
            print("\n", score_df.tail(), "\n")
            hu.torch_save(model_path, model.get_state_dict())
            hu.save_pkl(score_list_path, score_list)
            print("Checkpoint Saved: %s" % savedir)

        inner_s_epoch = 0
Ejemplo n.º 21
0
def trainval(exp_dict,
             savedir_base,
             reset=False,
             num_workers=0,
             run_ssl=False):
    # bookkeeping
    # ---------------

    # get experiment directory
    exp_id = hu.hash_dict(exp_dict)
    savedir = os.path.join(savedir_base, exp_id)

    if reset:
        # delete and backup experiment
        hc.delete_experiment(savedir, backup_flag=True)

    # create folder and save the experiment dictionary
    os.makedirs(savedir, exist_ok=True)
    hu.save_json(os.path.join(savedir, 'exp_dict.json'), exp_dict)
    pprint.pprint(exp_dict)
    print('Experiment saved in %s' % savedir)

    # load datasets
    # ==========================
    train_set = datasets.get_dataset(
        dataset_name=exp_dict["dataset_train"],
        data_root=exp_dict["dataset_train_root"],
        split="train",
        transform=exp_dict["transform_train"],
        classes=exp_dict["classes_train"],
        support_size=exp_dict["support_size_train"],
        query_size=exp_dict["query_size_train"],
        n_iters=exp_dict["train_iters"],
        unlabeled_size=exp_dict["unlabeled_size_train"])

    val_set = datasets.get_dataset(
        dataset_name=exp_dict["dataset_val"],
        data_root=exp_dict["dataset_val_root"],
        split="val",
        transform=exp_dict["transform_val"],
        classes=exp_dict["classes_val"],
        support_size=exp_dict["support_size_val"],
        query_size=exp_dict["query_size_val"],
        n_iters=exp_dict["val_iters"],
        unlabeled_size=exp_dict["unlabeled_size_val"])

    test_set = datasets.get_dataset(
        dataset_name=exp_dict["dataset_test"],
        data_root=exp_dict["dataset_test_root"],
        split="test",
        transform=exp_dict["transform_val"],
        classes=exp_dict["classes_test"],
        support_size=exp_dict["support_size_test"],
        query_size=exp_dict["query_size_test"],
        n_iters=exp_dict["test_iters"],
        unlabeled_size=exp_dict["unlabeled_size_test"])

    # get dataloaders
    # ==========================
    train_loader = torch.utils.data.DataLoader(
        train_set,
        batch_size=exp_dict["batch_size"],
        shuffle=True,
        num_workers=num_workers,
        collate_fn=ut.get_collate(exp_dict["collate_fn"]),
        drop_last=True)
    val_loader = torch.utils.data.DataLoader(val_set,
                                             batch_size=1,
                                             shuffle=False,
                                             num_workers=num_workers,
                                             collate_fn=lambda x: x,
                                             drop_last=True)
    test_loader = torch.utils.data.DataLoader(test_set,
                                              batch_size=1,
                                              shuffle=False,
                                              num_workers=num_workers,
                                              collate_fn=lambda x: x,
                                              drop_last=True)

    # create model and trainer
    # ==========================

    # Create model, opt, wrapper
    backbone = backbones.get_backbone(
        backbone_name=exp_dict['model']["backbone"], exp_dict=exp_dict)
    model = models.get_model(model_name=exp_dict["model"]['name'],
                             backbone=backbone,
                             n_classes=exp_dict["n_classes"],
                             exp_dict=exp_dict)

    if run_ssl:
        # runs the SSL experiments
        score_list_path = os.path.join(savedir, 'score_list.pkl')
        if not os.path.exists(score_list_path):
            test_dict = model.test_on_loader(test_loader, max_iter=None)
            hu.save_pkl(score_list_path, [test_dict])
        return

    # Checkpoint
    # -----------
    checkpoint_path = os.path.join(savedir, 'checkpoint.pth')
    score_list_path = os.path.join(savedir, 'score_list.pkl')

    if os.path.exists(score_list_path):
        # resume experiment
        model.load_state_dict(hu.torch_load(checkpoint_path))
        score_list = hu.load_pkl(score_list_path)
        s_epoch = score_list[-1]['epoch'] + 1
    else:
        # restart experiment
        score_list = []
        s_epoch = 0

    # Run training and validation
    for epoch in range(s_epoch, exp_dict["max_epoch"]):
        score_dict = {"epoch": epoch}
        score_dict.update(model.get_lr())

        # train
        score_dict.update(model.train_on_loader(train_loader))

        # validate
        score_dict.update(model.val_on_loader(val_loader))
        score_dict.update(model.test_on_loader(test_loader))

        # Add score_dict to score_list
        score_list += [score_dict]

        # Report
        score_df = pd.DataFrame(score_list)
        print(score_df.tail())

        # Save checkpoint
        hu.save_pkl(score_list_path, score_list)
        hu.torch_save(checkpoint_path, model.get_state_dict())
        print("Saved: %s" % savedir)

        if "accuracy" in exp_dict["target_loss"]:
            is_best = score_dict[exp_dict["target_loss"]] >= score_df[
                exp_dict["target_loss"]][:-1].max()
        else:
            is_best = score_dict[exp_dict["target_loss"]] <= score_df[
                exp_dict["target_loss"]][:-1].min()

        # Save best checkpoint
        if is_best:
            hu.save_pkl(os.path.join(savedir, "score_list_best.pkl"),
                        score_list)
            hu.torch_save(os.path.join(savedir, "checkpoint_best.pth"),
                          model.get_state_dict())
            print("Saved Best: %s" % savedir)

        # Check for end of training conditions
        if model.is_end_of_training():
            break
Ejemplo n.º 22
0
def trainval(exp_dict,
             savedir_base,
             reset,
             metrics_flag=True,
             datadir=None,
             cuda=False):
    # bookkeeping
    # ---------------

    # get experiment directory
    exp_id = hu.hash_dict(exp_dict)
    savedir = os.path.join(savedir_base, exp_id)

    if reset:
        # delete and backup experiment
        hc.delete_experiment(savedir, backup_flag=True)

    # create folder and save the experiment dictionary
    os.makedirs(savedir, exist_ok=True)
    hu.save_json(os.path.join(savedir, 'exp_dict.json'), exp_dict)
    print(pprint.pprint(exp_dict))
    print('Experiment saved in %s' % savedir)

    # set seed
    # ==================
    seed = 42 + exp_dict['runs']
    np.random.seed(seed)
    torch.manual_seed(seed)
    if cuda:
        device = 'cuda'
        torch.cuda.manual_seed_all(seed)
    else:
        device = 'cpu'

    print('Running on device: %s' % device)

    # Dataset
    # ==================
    train_set = datasets.get_dataset(dataset_name=exp_dict["dataset"],
                                     train_flag=True,
                                     datadir=datadir,
                                     exp_dict=exp_dict)

    train_loader = DataLoader(train_set,
                              drop_last=True,
                              shuffle=True,
                              sampler=None,
                              batch_size=exp_dict["batch_size"])

    # Load Val Dataset
    val_set = datasets.get_dataset(dataset_name=exp_dict["dataset"],
                                   train_flag=False,
                                   datadir=datadir,
                                   exp_dict=exp_dict)

    # Model
    # ==================
    use_backpack = exp_dict['opt'].get("backpack", False)

    model = models.get_model(exp_dict["model"],
                             train_set=train_set,
                             backpack=use_backpack).to(device=device)
    if use_backpack:
        assert exp_dict['opt']['name'] in ['nus_wrapper', 'adaptive_second']
        from backpack import extend
        model = extend(model)

    # Choose loss and metric function
    loss_function = metrics.get_metric_function(exp_dict["loss_func"])

    # Load Optimizer
    # ==============
    n_batches_per_epoch = len(train_set) / float(exp_dict["batch_size"])
    opt = optimizers.get_optimizer(opt=exp_dict["opt"],
                                   params=model.parameters(),
                                   n_batches_per_epoch=n_batches_per_epoch,
                                   n_train=len(train_set),
                                   train_loader=train_loader,
                                   model=model,
                                   loss_function=loss_function,
                                   exp_dict=exp_dict,
                                   batch_size=exp_dict["batch_size"])

    # Checkpointing
    # =============
    score_list_path = os.path.join(savedir, "score_list.pkl")
    model_path = os.path.join(savedir, "model_state_dict.pth")
    opt_path = os.path.join(savedir, "opt_state_dict.pth")

    if os.path.exists(score_list_path):
        # resume experiment
        score_list = ut.load_pkl(score_list_path)
        if use_backpack:
            model.load_state_dict(torch.load(model_path), strict=False)
        else:
            model.load_state_dict(torch.load(model_path))
        opt.load_state_dict(torch.load(opt_path))
        s_epoch = score_list[-1]["epoch"] + 1
    else:
        # restart experiment
        score_list = []
        s_epoch = 0

    # Start Training
    # ==============
    n_train = len(train_loader.dataset)
    n_batches = len(train_loader)
    batch_size = train_loader.batch_size

    for epoch in range(s_epoch, exp_dict["max_epoch"]):
        # Set seed
        seed = epoch + exp_dict['runs']
        np.random.seed(seed)
        torch.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)

        score_dict = {"epoch": epoch}

        # Validate
        # --------
        if metrics_flag:
            # 1. Compute train loss over train set
            score_dict["train_loss"] = metrics.compute_metric_on_dataset(
                model,
                train_set,
                metric_name=exp_dict["loss_func"],
                batch_size=exp_dict['batch_size'])

            # 2. Compute val acc over val set
            score_dict["val_acc"] = metrics.compute_metric_on_dataset(
                model,
                val_set,
                metric_name=exp_dict["acc_func"],
                batch_size=exp_dict['batch_size'])

        # Train
        # -----
        model.train()
        print("%d - Training model with %s..." %
              (epoch, exp_dict["loss_func"]))

        s_time = time.time()

        train_on_loader(model, train_set, train_loader, opt, loss_function,
                        epoch, use_backpack)

        e_time = time.time()

        # Record step size and batch size
        score_dict["step"] = opt.state.get("step",
                                           0) / int(n_batches_per_epoch)
        score_dict["step_size"] = opt.state.get("step_size", {})
        score_dict["step_size_avg"] = opt.state.get("step_size_avg", {})
        score_dict["n_forwards"] = opt.state.get("n_forwards", {})
        score_dict["n_backwards"] = opt.state.get("n_backwards", {})
        score_dict["grad_norm"] = opt.state.get("grad_norm", {})
        score_dict["batch_size"] = batch_size
        score_dict["train_epoch_time"] = e_time - s_time
        score_dict.update(opt.state["gv_stats"])

        # Add score_dict to score_list
        score_list += [score_dict]

        # Report and save
        print(pd.DataFrame(score_list).tail())
        ut.save_pkl(score_list_path, score_list)
        ut.torch_save(model_path, model.state_dict())
        ut.torch_save(opt_path, opt.state_dict())
        print("Saved: %s" % savedir)

    return score_list
Ejemplo n.º 23
0
                        type=int,
                        metavar='T',
                        default=None,
                        help="Enter the target class ID")
    args = parser.parse_args()

    yaml_path = args.yaml
    with open(yaml_path, 'r') as f:
        vis_args = DictAsMember(yaml.safe_load(f))

    if args.img:
        vis_args.DATASET.path = args.img
        vis_args.DATASET.target_class = args.target

    # Load model & pretrained params
    pretrained_model = get_model(vis_args.MODEL)
    state = torch.load(vis_args.MODEL.path)
    try:
        pretrained_model.load_state_dict(state["model"])
    except KeyError as e:
        pretrained_model.load_state_dict(state)

    alpha = vis_args.RESULTS.alpha
    h = w = vis_args.DATASET.size

    # Initialize GBP
    GBP = GuidedBackprop(pretrained_model, vis_args.MODEL.name)

    # Get filenames and create absolute paths
    if os.path.isdir(vis_args.DATASET.path):
        files = os.listdir(vis_args.DATASET.path)
Ejemplo n.º 24
0
def eval_mtl_single(args):
    global logger
    # import ipdb; ipdb.set_trace()
    args = torch.load(os.path.join(args.save_path, "args"))
    print(args)
    logger.info(args)
    task_lst, vocabs = utils.get_data(args.data_path)
    task_db = task_lst[args.task_id]
    train_data = task_db.train_set
    dev_data = task_db.dev_set
    test_data = task_db.test_set
    task_name = task_db.task_name

    # text classification
    for ds in [train_data, dev_data, test_data]:
        ds.rename_field("words_idx", "x")
        ds.rename_field("label", "y")
        ds.set_input("x", "y", "task_id")
        ds.set_target("y")
    # seq label
    if task_name in SEQ_LABEL_TASK:
        for ds in [train_data, dev_data, test_data]:
            ds.set_input("seq_len")
            ds.set_target("seq_len")

    logger = utils.get_logger(__name__)
    logger.info("task name: {}, task id: {}".format(task_db.task_name, task_db.task_id))
    logger.info(
        "train len {}, dev len {}, test len {}".format(
            len(train_data), len(dev_data), len(test_data)
        )
    )

    # init model
    model = get_model(args, task_lst, vocabs)
    # logger.info('model: \n{}'.format(model))

    if task_name not in SEQ_LABEL_TASK or task_name == "pos":
        metrics = [
            AccuracyMetric(target="y"),
            # MetricInForward(val_name='loss')
        ]
    else:
        metrics = [
            SpanFPreRecMetric(
                tag_vocab=vocabs[task_name],
                pred="pred",
                target="y",
                seq_len="seq_len",
                encoding_type="bioes" if task_name == "ner" else "chunk",
            ),
            AccuracyMetric(target="y")
            # MetricInForward(val_name='loss')
        ]

    cur_best = 0.0
    init_best = None
    eval_time = 0
    paths = [path for path in os.listdir(args.save_path) if path.startswith("best")]
    paths = sorted(paths, key=lambda x: int(x.split("_")[1]))
    for path in paths:
        path = os.path.join(args.save_path, path)
        state = torch.load(path, map_location="cpu")
        model.load_state_dict(state)
        tester = Tester(
            test_data,
            model,
            metrics=metrics,
            batch_size=args.batch_size,
            num_workers=4,
            device="cuda",
            use_tqdm=False,
        )
        res = tester.test()
        val = 0.0
        for metric_name, metric_dict in res.items():
            if task_name == "pos" and "acc" in metric_dict:
                val = metric_dict["acc"]
                break
            elif "f" in metric_dict:
                val = metric_dict["f"]
                break

        if init_best is None:
            init_best = val
        logger.info(
            "No #%d: best %f, %s, path: %s, is better: %s",
            eval_time,
            val,
            tester._format_eval_results(res),
            path,
            val > init_best,
        )

        eval_time += 1
Ejemplo n.º 25
0
def trainval(exp_dict, savedir_base, reset=False):
    # bookkeeping
    # ---------------

    # get experiment directory
    exp_id = hu.hash_dict(exp_dict)
    savedir = os.path.join(savedir_base, exp_id)

    if reset:
        # delete and backup experiment
        hc.delete_experiment(savedir, backup_flag=True)

    # create folder and save the experiment dictionary
    os.makedirs(savedir, exist_ok=True)
    hu.save_json(os.path.join(savedir, 'exp_dict.json'), exp_dict)
    pprint.pprint(exp_dict)
    print('Experiment saved in %s' % savedir)

    # set seed
    # ---------------
    seed = 42 + exp_dict['runs']
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

    # Dataset
    # -----------

    # train loader
    train_set = datasets.get_dataset(dataset_name=exp_dict["dataset"],
                                     train_flag=True,
                                     datadir=savedir_base,
                                     exp_dict=exp_dict)

    train_loader = torch.utils.data.DataLoader(
        train_set,
        drop_last=True,
        shuffle=True,
        batch_size=exp_dict["batch_size"])

    # val set
    val_set = datasets.get_dataset(dataset_name=exp_dict["dataset"],
                                   train_flag=False,
                                   datadir=savedir_base,
                                   exp_dict=exp_dict)

    # Model
    # -----------
    model = models.get_model(exp_dict["model"], train_set=train_set).cuda()
    # Choose loss and metric function
    loss_function = metrics.get_metric_function(exp_dict["loss_func"])

    # Compute fstar
    # -------------
    if exp_dict['opt'].get('fstar_flag'):
        ut.compute_fstar(train_set, loss_function, savedir_base, exp_dict)

    # Load Optimizer
    n_batches_per_epoch = len(train_set) / float(exp_dict["batch_size"])
    opt = optimizers.get_optimizer(opt_dict=exp_dict["opt"],
                                   params=model.parameters(),
                                   n_batches_per_epoch=n_batches_per_epoch)

    # Checkpoint
    # -----------
    model_path = os.path.join(savedir, 'model.pth')
    score_list_path = os.path.join(savedir, 'score_list.pkl')
    opt_path = os.path.join(savedir, 'opt_state_dict.pth')

    if os.path.exists(score_list_path):
        # resume experiment
        score_list = hu.load_pkl(score_list_path)
        model.load_state_dict(torch.load(model_path))
        opt.load_state_dict(torch.load(opt_path))
        s_epoch = score_list[-1]['epoch'] + 1
    else:
        # restart experiment
        score_list = []
        s_epoch = 0

    # Train & Val
    # ------------
    print('Starting experiment at epoch %d/%d' %
          (s_epoch, exp_dict['max_epoch']))

    for e in range(s_epoch, exp_dict['max_epoch']):
        # Set seed
        seed = e + exp_dict['runs']
        np.random.seed(seed)
        torch.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)

        score_dict = {}

        # Compute train loss over train set
        score_dict["train_loss"] = metrics.compute_metric_on_dataset(
            model, train_set, metric_name=exp_dict["loss_func"])

        # Compute val acc over val set
        score_dict["val_acc"] = metrics.compute_metric_on_dataset(
            model, val_set, metric_name=exp_dict["acc_func"])

        # Train over train loader
        model.train()
        print("%d - Training model with %s..." % (e, exp_dict["loss_func"]))

        # train and validate
        s_time = time.time()
        for batch in tqdm.tqdm(train_loader):
            images, labels = batch["images"].cuda(), batch["labels"].cuda()

            opt.zero_grad()

            # closure
            def closure():
                return loss_function(model, images, labels, backwards=True)

            opt.step(closure)

        e_time = time.time()

        # Record metrics
        score_dict["epoch"] = e
        score_dict["step_size"] = opt.state["step_size"]
        score_dict["step_size_avg"] = opt.state["step_size_avg"]
        score_dict["n_forwards"] = opt.state["n_forwards"]
        score_dict["n_backwards"] = opt.state["n_backwards"]
        score_dict["grad_norm"] = opt.state["grad_norm"]
        score_dict["batch_size"] = train_loader.batch_size
        score_dict["train_epoch_time"] = e_time - s_time

        score_list += [score_dict]

        # Report and save
        print(pd.DataFrame(score_list).tail())
        hu.save_pkl(score_list_path, score_list)
        hu.torch_save(model_path, model.state_dict())
        hu.torch_save(opt_path, opt.state_dict())
        print("Saved: %s" % savedir)

    print('Experiment completed')
Ejemplo n.º 26
0
def train_mlt_single(args):
    global logger
    logger.info(args)
    task_lst, vocabs = utils.get_data(args.data_path)
    task_db = task_lst[args.task_id]
    train_data = task_db.train_set
    dev_data = task_db.dev_set
    test_data = task_db.test_set
    task_name = task_db.task_name

    if args.debug:
        train_data = train_data[:200]
        dev_data = dev_data[:200]
        test_data = test_data[:200]
        args.epochs = 3
        args.pruning_iter = 3

    summary_writer = SummaryWriter(
        log_dir=os.path.join(args.tb_path, "global/%s" % task_name)
    )

    logger.info("task name: {}, task id: {}".format(task_db.task_name, task_db.task_id))
    logger.info(
        "train len {}, dev len {}, test len {}".format(
            len(train_data), len(dev_data), len(test_data)
        )
    )

    # init model
    model = get_model(args, task_lst, vocabs)

    logger.info("model: \n{}".format(model))
    if args.init_weights is not None:
        utils.load_model(model, args.init_weights)

    if utils.need_acc(task_name):
        metrics = [AccuracyMetric(target="y"), MetricInForward(val_name="loss")]
        metric_key = "acc"

    else:
        metrics = [
            YangJieSpanMetric(
                tag_vocab=vocabs[task_name],
                pred="pred",
                target="y",
                seq_len="seq_len",
                encoding_type="bioes" if task_name == "ner" else "bio",
            ),
            MetricInForward(val_name="loss"),
        ]
        metric_key = "f"
    logger.info(metrics)

    need_cut_names = list(set([s.strip() for s in args.need_cut.split(",")]))
    prune_names = []
    for name, p in model.named_parameters():
        if not p.requires_grad or "bias" in name:
            continue
        for n in need_cut_names:
            if n in name:
                prune_names.append(name)
                break

    # get Pruning class
    pruner = Pruning(
        model, prune_names, final_rate=args.final_rate, pruning_iter=args.pruning_iter
    )
    if args.init_masks is not None:
        pruner.load(args.init_masks)
        pruner.apply_mask(pruner.remain_mask, pruner._model)
    # save checkpoint
    os.makedirs(args.save_path, exist_ok=True)

    logger.info('Saving init-weights to {}'.format(args.save_path))
    torch.save(
        model.cpu().state_dict(), os.path.join(args.save_path, "init_weights.th")
    )
    torch.save(args, os.path.join(args.save_path, "args.th"))
    # start training and pruning
    summary_writer.add_scalar("remain_rate", 100.0, 0)
    summary_writer.add_scalar("cutoff", 0.0, 0)

    if args.init_weights is not None:
        init_tester = Tester(
            test_data,
            model,
            metrics=metrics,
            batch_size=args.batch_size,
            num_workers=4,
            device="cuda",
            use_tqdm=False,
        )
        res = init_tester.test()
        logger.info("No init testing, Result: {}".format(res))
        del res, init_tester

    for prune_step in range(pruner.pruning_iter + 1):
        # reset optimizer every time
        optim_params = [p for p in model.parameters() if p.requires_grad]
        # utils.get_logger(__name__).debug(optim_params)
        utils.get_logger(__name__).debug(len(optim_params))
        optimizer = get_optim(args.optim, optim_params)
        # optimizer = TriOptim(optimizer, args.n_filters, args.warmup, args.decay)
        factor = pruner.cur_rate / 100.0
        factor = 1.0
        # print(factor, pruner.cur_rate)
        for pg in optimizer.param_groups:
            pg["lr"] = factor * pg["lr"]
        utils.get_logger(__name__).info(optimizer)

        trainer = Trainer(
            train_data,
            model,
            loss=LossInForward(),
            optimizer=optimizer,
            metric_key=metric_key,
            metrics=metrics,
            print_every=200,
            batch_size=args.batch_size,
            num_workers=4,
            n_epochs=args.epochs,
            dev_data=dev_data,
            save_path=None,
            sampler=fastNLP.BucketSampler(batch_size=args.batch_size),
            callbacks=[
                pruner,
                # LRStep(lstm.WarmupLinearSchedule(optimizer, args.warmup, int(len(train_data)/args.batch_size*args.epochs)))
                GradientClipCallback(clip_type="norm", clip_value=5),
                LRScheduler(
                    lr_scheduler=LambdaLR(optimizer, lambda ep: 1 / (1 + 0.05 * ep))
                ),
                LogCallback(path=os.path.join(args.tb_path, "No", str(prune_step))),
            ],
            use_tqdm=False,
            device="cuda",
            check_code_level=-1,
        )
        res = trainer.train()
        logger.info("No #{} training, Result: {}".format(pruner.prune_times, res))
        name, val = get_metric(res)
        summary_writer.add_scalar("prunning_dev_acc", val, prune_step)
        tester = Tester(
            test_data,
            model,
            metrics=metrics,
            batch_size=args.batch_size,
            num_workers=4,
            device="cuda",
            use_tqdm=False,
        )
        res = tester.test()
        logger.info("No #{} testing, Result: {}".format(pruner.prune_times, res))
        name, val = get_metric(res)
        summary_writer.add_scalar("pruning_test_acc", val, prune_step)

        # prune and save
        torch.save(
            model.state_dict(),
            os.path.join(
                args.save_path,
                "best_{}_{}.th".format(pruner.prune_times, pruner.cur_rate),
            ),
        )
        pruner.pruning_model()
        summary_writer.add_scalar("remain_rate", pruner.cur_rate, prune_step + 1)
        summary_writer.add_scalar("cutoff", pruner.last_cutoff, prune_step + 1)

        pruner.save(
            os.path.join(
                args.save_path, "{}_{}.th".format(pruner.prune_times, pruner.cur_rate)
            )
        )
Ejemplo n.º 27
0
                by="value", ascending=False).to_dict()["value"])

    # ===============================
    # === Train model
    # ===============================
    logging.info("Train model")

    # get folds
    with timer("Train model"):
        with timer("get validation"):
            x_train["target"] = np.log1p(y_train) > 7.0
            splits = get_validation(x_train, config)
            del x_train["target"]
            gc.collect()

        model = get_model(config)
        (
            models,
            oof_preds,
            test_preds,
            valid_preds,
            feature_importance,
            evals_results,
        ) = model.cv(
            y_train=y_train,
            train_features=x_train[cols],
            test_features=x_test[cols],
            y_valid=None,
            valid_features=None,
            feature_name=cols,
            folds_ids=splits,
Ejemplo n.º 28
0
def trainval(exp_dict, savedir_base, datadir_base, reset=False, 
            num_workers=0, pin_memory=False, ngpu=1, cuda_deterministic=False):
    # bookkeeping
    # ==================

    # get experiment directory
    exp_id = hu.hash_dict(exp_dict)
    savedir = os.path.join(savedir_base, exp_id)

    if reset:
        # delete and backup experiment
        hc.delete_experiment(savedir, backup_flag=True)

    # create folder and save the experiment dictionary
    hu.save_json(os.path.join(savedir, 'exp_dict.json'), exp_dict)
    pprint.pprint(exp_dict)
    print('Experiment saved in %s' % savedir)

    if DEVICE.type == "cuda":
        if cuda_deterministic:
            cudnn.benchmark = False
            cudnn.deterministic = True
        else:
            cudnn.benchmark = True

    # Dataset
    # ==================
    trainset = get_dataset(exp_dict['dataset'], 'train',
                           exp_dict=exp_dict, datadir_base=datadir_base,
                           n_samples=exp_dict['dataset_size']['train'],
                           transform_lvl=exp_dict['dataset']['transform_lvl'],
                           colorjitter=exp_dict['dataset'].get('colorjitter')
                           )

    valset = get_dataset(exp_dict['dataset'], 'validation',
                         exp_dict=exp_dict, datadir_base=datadir_base,
                         n_samples=exp_dict['dataset_size']['train'],
                         transform_lvl=0,
                         val_transform=exp_dict['dataset']['val_transform'])

    testset = get_dataset(exp_dict['dataset'], 'test',
                          exp_dict=exp_dict, datadir_base=datadir_base,
                          n_samples=exp_dict['dataset_size']['test'],
                          transform_lvl=0,
                          val_transform=exp_dict['dataset']['val_transform'])
    print("Dataset defined.")

    # define dataloaders
    if exp_dict['dataset']['name'] == 'bach':
        testloader = torch.utils.data.DataLoader(testset, batch_size=1,
                                                 shuffle=False,
                                                 num_workers=num_workers,
                                                 pin_memory=pin_memory)
    else:
        testloader = torch.utils.data.DataLoader(testset, batch_size=exp_dict['batch']['size'],
                                                 shuffle=False,
                                                 num_workers=num_workers,
                                                 pin_memory=pin_memory)

    print("Testloader  defined.")

    # Model
    # ==================
    model = get_model(exp_dict, trainset, device=DEVICE)

    print("Model loaded")

    model_path = os.path.join(savedir, 'model.pth')
    model_best_path = os.path.join(savedir, 'model_best.pth')
    score_list_path = os.path.join(savedir, 'score_list.pkl')

    # checkpoint management
    if os.path.exists(score_list_path):
        # resume experiment
        model.load_state_dict(hu.torch_load(model_path))
        score_list = hu.load_pkl(score_list_path)
        s_epoch = len(score_list)
    else:
        # restart experiment
        score_list = []
        s_epoch = 0

    # define and log random seed for reproducibility
    assert('fixedSeed' in exp_dict)
    seed = exp_dict['fixedSeed']

    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    print("Seed defined.")

    # Train & Val
    # ==================
    print("Starting experiment at epoch %d/%d" % (s_epoch, exp_dict['niter']))

    for epoch in range(s_epoch, exp_dict['niter']):
        s_time = time.time()
        # Sample new train val
        trainloader, valloader = get_train_val_dataloader(exp_dict,
                                                          trainset, valset,
                                                          mixtrainval=exp_dict['mixTrainVal'],
                                                          num_workers=num_workers,
                                                          pin_memory=pin_memory)
        # Train & validate
        train_dict = model.train_on_loader(trainloader, valloader, epoch=epoch,
                                           exp_dict=exp_dict)

        # Test phase
        train_dict_2 = model.test_on_loader(trainloader)
        val_dict = model.test_on_loader(valloader)
        test_dict = model.test_on_loader(testloader)

        # Vis phase
        model.vis_on_loader('train', trainset, savedir_images=os.path.join(
            savedir, 'images'), epoch=epoch)

        score_dict = {}
        score_dict["epoch"] = epoch
        score_dict["test_acc"] = test_dict['acc']
        score_dict["val_acc"] = val_dict['acc']
        score_dict["train_acc"] = train_dict_2['acc']
        score_dict["train_loss"] = train_dict['loss']
        score_dict["time_taken"] = time.time() - s_time
        score_dict["netC_lr"] = train_dict['netC_lr']

        if exp_dict['model']['netA'] is not None:
            if 'transformations_mean' in train_dict:
                for i in range(len(train_dict['transformations_mean'])):
                    score_dict[str(
                        i) + "_mean"] = train_dict['transformations_mean'][i].item()
            if 'transformations_std' in train_dict:
                for i in range(len(train_dict['transformations_std'])):
                    score_dict[str(
                        i) + "_std"] = train_dict['transformations_std'][i].item()

        # Add to score_list and save checkpoint
        score_list += [score_dict]

        # Report & Save
        score_df = pd.DataFrame(score_list)
        print("\n", score_df.tail(), "\n")
        hu.torch_save(model_path, model.get_state_dict())
        hu.save_pkl(score_list_path, score_list)
        print("Checkpoint Saved: %s" % savedir)

        # Update best score
        if epoch == 0 or (score_dict["test_acc"] >= score_df["test_acc"][:-1].max()):
            hu.save_pkl(os.path.join(
                savedir, "score_list_best.pkl"), score_list)
            hu.torch_save(os.path.join(savedir, "model_best.pth"),
                          model.get_state_dict())

            print("Saved Best: %s" % savedir)

    print('experiment completed')
        data_root="./data/imagenette2/train",
        mapping_folder_to_label=mapping_folder_to_label,
        train=True)
    train_dataloader = DataLoader(train_dataset,
                                  batch_size=args.batch_size,
                                  shuffle=True)

    val_dataset = ImageNetteDataset(
        data_root="./data/imagenette2/val",
        mapping_folder_to_label=mapping_folder_to_label,
        train=True)
    val_dataloader = DataLoader(val_dataset,
                                batch_size=args.batch_size,
                                shuffle=False)

    model = get_model(args.model_name)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters(),
                                lr=0.01,
                                momentum=0.9,
                                weight_decay=0.00001)

    model.to(args.device)
    best_val_acc = 0
    for epoch in range(args.epochs):
        train_correct = 0
        train_loss = 0
        train_data_num = 0
        val_correct = 0
        val_loss = 0
        val_data_num = 0
Ejemplo n.º 30
0
                        type=str,
                        default='/mnt/public/datasets/DeepFish')
    parser.add_argument("-e", "--exp_config", default='loc')
    parser.add_argument("-uc", "--use_cuda", type=int, default=0)
    args = parser.parse_args()

    device = torch.device('cuda' if args.use_cuda else 'cpu')

    exp_dict = exp_configs.EXP_GROUPS[args.exp_config][0]
    train_set = datasets.get_dataset(dataset_name=exp_dict["dataset"],
                                     split="train",
                                     transform=exp_dict.get("transform"),
                                     datadir=args.datadir)

    # Create model, opt, wrapper
    model_original = models.get_model(exp_dict["model"],
                                      exp_dict=exp_dict).to('cpu')  #.cuda()
    opt = torch.optim.Adam(model_original.parameters(),
                           lr=1e-5,
                           weight_decay=0.0005)

    model = wrappers.get_wrapper(exp_dict["wrapper"],
                                 model=model_original,
                                 opt=opt).to('cpu')  #.cuda()

    if args.exp_config == 'loc':
        batch = torch.utils.data.dataloader.default_collate([train_set[3]])
    else:
        batch = torch.utils.data.dataloader.default_collate([train_set[0]])

    #***************            helen added this code
    im = Image.open("/Users/helenpropson/Documents/git/marepesca/tank.jpg")