Example #1
0
def run(cfg: DictConfig) -> None:
    """
    Run pytorch-lightning model

    Args:
        new_dir:
        cfg: hydra config

    """
    set_seed(cfg.training.seed)
    run_name = os.path.basename(os.getcwd())
    hparams = flatten_omegaconf(cfg)

    cfg.callbacks.model_checkpoint.params.filepath = os.getcwd() + cfg.callbacks.model_checkpoint.params.filepath
    callbacks = []
    for callback in cfg.callbacks.other_callbacks:
        if callback.params:
            callback_instance = load_obj(callback.class_name)(**callback.params)
        else:
            callback_instance = load_obj(callback.class_name)()
        callbacks.append(callback_instance)

    loggers = []
    if cfg.logging.log:
        for logger in cfg.logging.loggers:
            if 'experiment_name' in logger.params.keys():
                logger.params['experiment_name'] = run_name
            loggers.append(load_obj(logger.class_name)(**logger.params))

    callbacks.append(EarlyStopping(**cfg.callbacks.early_stopping.params))

    trainer = pl.Trainer(
        logger=loggers,
        # early_stop_callback=EarlyStopping(**cfg.callbacks.early_stopping.params),
        checkpoint_callback=ModelCheckpoint(**cfg.callbacks.model_checkpoint.params),
        callbacks=callbacks,
        **cfg.trainer,
    )

    model = load_obj(cfg.training.lightning_module_name)(hparams=hparams, cfg=cfg)
    dm = load_obj(cfg.datamodule.data_module_name)(hparams=hparams, cfg=cfg)
    trainer.fit(model, dm)

    if cfg.general.save_pytorch_model and cfg.general.save_best:
        if os.path.exists(trainer.checkpoint_callback.best_model_path):  # type: ignore
            best_path = trainer.checkpoint_callback.best_model_path  # type: ignore
            # extract file name without folder
            save_name = os.path.basename(os.path.normpath(best_path))
            model = model.load_from_checkpoint(best_path, hparams=hparams, cfg=cfg, strict=False)
            model_name = f'saved_models/best_{save_name}'.replace('.ckpt', '.pth')
            torch.save(model.model.state_dict(), model_name)
        else:
            os.makedirs('saved_models', exist_ok=True)
            model_name = 'saved_models/last.pth'
            torch.save(model.model.state_dict(), model_name)

    if cfg.general.convert_to_jit and os.path.exists(trainer.checkpoint_callback.best_model_path):  # type: ignore
        convert_to_jit(model, save_name, cfg)
def test_f1score_metric(average: str) -> None:
    set_seed(42)
    labels = torch.randint(1, 10, (4096, 100)).flatten()
    predictions = torch.randint(1, 10, (4096, 100)).flatten()
    labels_numpy = labels.numpy()
    predictions_numpy = predictions.numpy()
    f1_metric = F1Score(average)
    my_pred = f1_metric(predictions, labels)

    f1_pred = f1_score(labels_numpy, predictions_numpy, average=average)

    assert np.isclose(my_pred.item(), f1_pred.item())
Example #3
0
 def get_test_data(self):
     """
     Get the Testing Data Loader
     :return: The Wrapped Data Loader
     """
     set_seed(self.seed)
     data = CustomDataset(self.test_files)
     dl = DataLoader(data,
                     batch_size=self.batch_size,
                     shuffle=True,
                     num_workers=self.workers,
                     worker_init_fn=seed_worker)
     return WrappedDataLoader(dl, self.wrapped_function)
Example #4
0
def make_prediction(cfg: DictConfig) -> None:
    """
    Run pytorch-lightning model inference

    Args:
        cfg: hydra config

    Returns:
        None
    """
    set_seed(cfg.training.seed)
    model_name = glob.glob(
        f'outputs/{cfg.inference.run_name}/saved_models/*')[0]

    lit_model = LitM5NBeats.load_from_checkpoint(checkpoint_path=model_name,
                                                 cfg=cfg)

    net = lit_model.net

    datasets = get_datasets(cfg)

    loader = torch.utils.data.DataLoader(datasets[cfg.inference.mode],
                                         batch_size=cfg.data.batch_size,
                                         num_workers=cfg.data.num_workers,
                                         shuffle=False)

    y_pred: List[np.array] = []
    device = cfg.data.device

    net.to(device)
    net.eval()

    for _, (x, y, scales, weights) in enumerate(loader):
        forecast, loss = net(x.float().to(device),
                             y.float().to(device), scales.to(device),
                             weights.to(device))
        y_pred.extend(forecast.cpu().detach().numpy())

    y_pred = np.array(y_pred)

    sub = pd.read_csv(f'{cfg.data.folder_path}/data/sample_submission.csv')

    sub.iloc[:30490, 1:] = y_pred
    sub.iloc[30490:, 1:] = y_pred
    sub.to_csv(f'subs/{cfg.inference.run_name}_{cfg.inference.mode}.csv',
               index=False)
Example #5
0
def run(cfg: DictConfig) -> None:
    """
    Run pytorch-lightning model

    Args:
        new_dir:
        cfg: hydra config

    """
    set_seed(cfg.training.seed)
    hparams = flatten_omegaconf(cfg)

    cfg.callbacks.model_checkpoint.params.filepath = os.getcwd() + cfg.callbacks.model_checkpoint.params.filepath
    callbacks = []
    for callback in cfg.callbacks.other_callbacks:
        if callback.params:
            callback_instance = load_obj(callback.class_name)(**callback.params)
        else:
            callback_instance = load_obj(callback.class_name)()
        callbacks.append(callback_instance)

    loggers = []
    if cfg.logging.log:
        for logger in cfg.logging.loggers:
            loggers.append(load_obj(logger.class_name)(**logger.params))

    trainer = pl.Trainer(
        logger=loggers,
        early_stop_callback=EarlyStopping(**cfg.callbacks.early_stopping.params),
        checkpoint_callback=ModelCheckpoint(**cfg.callbacks.model_checkpoint.params),
        callbacks=callbacks,
        **cfg.trainer,
    )

    model = load_obj(cfg.training.lightning_module_name)(hparams=hparams, cfg=cfg)
    dm = load_obj(cfg.datamodule.data_module_name)(hparams=hparams, cfg=cfg)
    trainer.fit(model, dm)

    if cfg.general.save_pytorch_model:
        # save as a simple torch model
        # TODO save not last, but best - for this load the checkpoint and save pytorch model from it
        os.makedirs('saved_models', exist_ok=True)
        model_name = 'saved_models/best.pth'
        print(model_name)
        torch.save(model.model.state_dict(), model_name)
Example #6
0
def run(cfg: DictConfig) -> None:
    """
    Run pytorch-lightning model

    Args:
        cfg: hydra config

    """
    set_seed(cfg.training.seed)
    hparams = flatten_omegaconf(cfg)
    model = LitM5NBeats(hparams=hparams, cfg=cfg)

    early_stopping = pl.callbacks.EarlyStopping(
        **cfg.callbacks.early_stopping.params)
    model_checkpoint = pl.callbacks.ModelCheckpoint(
        **cfg.callbacks.model_checkpoint.params)
    lr_logger = pl.callbacks.LearningRateLogger()
    logger = []

    if cfg.logging.log:

        tb_logger = TensorBoardLogger(save_dir=cfg.general.save_dir)
        comet_logger = CometLogger(
            save_dir=cfg.general.save_dir,
            workspace=cfg.general.workspace,
            project_name=cfg.general.project_name,
            api_key=cfg.private.comet_api,
            experiment_name=os.getcwd().split('\\')[-1],
        )
        # wandb_logger = WandbLogger(name=os.getcwd().split('\\')[-1],
        #                            save_dir=cfg.general.save_dir,
        #                            project=cfg.general.project_name
        #                            )
        logger = [tb_logger, comet_logger]

    trainer = pl.Trainer(
        logger=logger,
        early_stop_callback=early_stopping,
        checkpoint_callback=model_checkpoint,
        callbacks=[lr_logger],
        gradient_clip_val=0.5,
        **cfg.trainer,
    )
    trainer.fit(model)
Example #7
0
def predict(cfg: DictConfig) -> None:
    """
    Run pytorch-lightning model

    Args:
        cfg: hydra config

    """
    set_seed(cfg.training.seed)

    test_dataset = get_test_dataset(cfg)
    path = r'wheat\outputs\2020_05_06_09_32_36\saved_models\_ckpt_epoch_0.ckpt'

    model = LitWheat.load_from_checkpoint(checkpoint_path=path)
    model.eval()
    test_loader = torch.utils.data.DataLoader(
        test_dataset,
        batch_size=cfg.data.batch_size,
        num_workers=cfg.data.num_workers,
        shuffle=False,
        collate_fn=collate_fn,
    )
    detection_threshold = 0.5
    results = []

    for images, _, image_ids in test_loader:

        images = (image.to(cfg.general) for image in images)
        outputs = model(images)

        for i, _ in enumerate(images):
            boxes = outputs[i]['boxes'].data.cpu().numpy()
            scores = outputs[i]['scores'].data.cpu().numpy()

            boxes = boxes[scores >= detection_threshold].astype(np.int32)
            scores = scores[scores >= detection_threshold]
            image_id = image_ids[i]

            result = {
                'image_id': image_id,
                'PredictionString': format_prediction_string(boxes, scores)
            }

            results.append(result)
Example #8
0
def run(cfg: DictConfig) -> None:
    """
    Run pytorch-lightning model

    Args:
        cfg: hydra config

    """
    set_seed(cfg.training.seed)
    hparams = flatten_omegaconf(cfg)

    model = LitWheat(hparams=hparams, cfg=cfg)

    early_stopping = pl.callbacks.EarlyStopping(
        **cfg.callbacks.early_stopping.params)
    model_checkpoint = pl.callbacks.ModelCheckpoint(
        **cfg.callbacks.model_checkpoint.params)
    lr_logger = pl.callbacks.LearningRateLogger()

    tb_logger = TensorBoardLogger(save_dir=cfg.general.save_dir)
    # comet_logger = CometLogger(save_dir=cfg.general.save_dir,
    #                            workspace=cfg.general.workspace,
    #                            project_name=cfg.general.project_name,
    #                            api_key=cfg.private.comet_api,
    #                            experiment_name=os.getcwd().split('\\')[-1])
    json_logger = JsonLogger()

    trainer = pl.Trainer(
        logger=[
            tb_logger,  # comet_logger,
            json_logger
        ],
        early_stop_callback=early_stopping,
        checkpoint_callback=model_checkpoint,
        callbacks=[lr_logger],
        **cfg.trainer,
    )
    trainer.fit(model)

    # save as a simple torch model
    model_name = os.getcwd().split('\\')[-1] + '.pth'
    print(model_name)
    torch.save(model.model.state_dict(), model_name)
Example #9
0
def bootstrap_config(config_id: str,
                     should_make_config_immutable: bool = True) -> ConfigType:
    """Prepare the config object

    Args:
        config_id (str): config_id to load
        should_make_config_immutable (bool, optional): Should the config object
            be immutable. Defaults to True.

    Returns:
        ConfigType: Config Object
    """
    config = get_config(
        config_id, should_make_config_immutable=should_make_config_immutable)
    write_debug_message(
        f"Starting Experiment at {time.asctime(time.localtime(time.time()))}")
    write_debug_message(f"torch version = {torch.__version__}")  # type: ignore
    set_seed(seed=config.general.seed)
    return config
Example #10
0
    def __init__(self,
                 model,
                 data,
                 opt,
                 loss_func,
                 epochs,
                 dev,
                 logger,
                 checkpointer,
                 verbose=False,
                 seed=42):
        """
        The main class used to actually train models
        :param model: the model to train
        :param data: data to use (Of type Data from this package)
        :param opt: optimizer to use
        :param loss_func: loss function to use
        :param epochs: number of epochs to train for
        :param dev: what device to use
        :param logger: a logger to record training
        :param checkpointer: a checkpointer to save the trained (and mid training) model(s)
        :param verbose: the verbosity of training
        :param seed: the seed to use for reproducibility
        """
        set_seed(seed)
        self.train_dl = data.get_train_data()
        self.val_dl = data.get_val_data()
        self.loss_func = loss_func

        model = model.to(dev)

        self.logger = logger
        self.verbose = verbose

        for epoch in range(epochs):
            self.train_model(epoch, model, opt)
            self.evaluate_model(epoch, model)
            self.logger.print_epoch(epoch)
            checkpointer.save(epoch, model, opt)
            if logger.check_early_stopping():
                break
        checkpointer.save_override(-1, model, add_tag="FINAL")
Example #11
0
def run(cfg: DictConfig):
    """
    Run pytorch-lightning model

    Args:
        cfg: hydra config

    Returns:

    """
    set_seed(cfg.training.seed)

    model = LitBCDI(hparams=cfg)

    early_stopping = pl.callbacks.EarlyStopping(
        **cfg.callbacks.early_stopping.params)
    model_checkpoint = pl.callbacks.ModelCheckpoint(
        **cfg.callbacks.model_checkpoint.params)

    tb_logger = TensorBoardLogger(save_dir=cfg.general.save_dir)
    comet_logger = CometLogger(
        save_dir=cfg.general.save_dir,
        workspace=cfg.general.workspace,
        project_name=cfg.general.project_name,
        # api_key=cfg.private.comet_api,
        experiment_name=os.getcwd().split('\\')[-1])

    print(cfg.trainer)
    trainer = pl.Trainer(
        logger=[tb_logger, comet_logger],
        # early_stop_callback=early_stopping,
        checkpoint_callback=model_checkpoint,
        # nb_sanity_val_steps=0,
        gradient_clip_val=0.5,
        **cfg.trainer)
    trainer.fit(model)

    # save as a simple torch model
    model_name = "{os.getcwd().split('\\')[-1]}.pth"
    torch.save(model.model.state_dict(), model_name)
Example #12
0
def run_multilabel_mnist(args, exp_dir):
    """
    Run the multilabel mnist experiment with the given arguments.
    Args:
        args: Command line args.
        exp_dir: Directory in which the experiment will be stored.

    """
    # Set seed globally
    set_seed(args.seed)
    use_cuda = args.cuda and torch.cuda.is_available()
    device = torch.device(
        "cuda:{}".format(args.cuda_device_id) if use_cuda else "cpu")

    logger.info("Main device: %s", device)

    # Get the mnist loader
    train_loader, test_loader = load_multi_mnist(n_labels=args.n_labels,
                                                 canvas_size=args.canvas_size,
                                                 seed=args.seed,
                                                 args=args)

    # Retreive model
    model = get_model_by_tag(args.net, device, args, args.canvas_size**2,
                             args.n_labels)

    # Disable track_running_stats in batchnorm according to
    # https://discuss.pytorch.org/t/performance-highly-degraded-when-eval-is-activated-in-the-test-phase/3323/12
    for child in model.modules():
        if type(child) == nn.BatchNorm2d or type(child) == nn.BatchNorm1d:
            child.track_running_stats = False

    logger.info("Number of paramters: %s", count_params(model))

    # Define optimizer
    optimizer = optim.Adam(model.parameters(),
                           lr=args.lr,
                           weight_decay=args.l2)

    # Scheduler for learning rate
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=10,
                                                gamma=0.5)

    writer = SummaryWriter(log_dir=os.path.join(exp_dir, "tb-log"))

    data = []
    # Run epochs
    for epoch in range(1, args.epochs + 1):
        # Start counting after 20 epochs, that is, first lr reduction is at epoch 30
        if epoch > 20:
            scheduler.step()

        # Run train
        train_multilabel(model, device, train_loader, optimizer, epoch,
                         args.log_interval)

        # Evaluate model on train/test data
        train_loss, train_acc = evaluate_model_multilabel(
            model, device, train_loader, "Train", args.n_labels)
        test_loss, test_acc = evaluate_model_multilabel(
            model, device, test_loader, "Test", args.n_labels)
        data.append([epoch, train_acc, test_acc, train_loss, test_loss])

        # Collect data
        collect_tensorboard_info(writer, model, epoch, train_acc, test_acc,
                                 train_loss, test_loss)

    column_names = [
        "epoch", "train_acc", "test_acc", "train_loss", "test_loss"
    ]
    store_results(result_dir=exp_dir,
                  dataset_name="mnist",
                  column_names=column_names,
                  data=data)
    def __init__(self):
        # parameters
        set_seed(2020)
        if torch.cuda.is_available():
            self.device = torch.device("cuda")
        else:
            # extremely slow if use cpu to train
            self.device = torch.device("cpu")

        # dataset
        self.train_dataset = UnalignedDataset(
            osp.join("dataset", FLAGS.dataset, "train"),
            image_size=FLAGS.image_size)
        self.train_loader = DataLoader(self.train_dataset,
                                       batch_size=FLAGS.batch_size,
                                       shuffle=True,
                                       num_workers=2)
        self.test_dataset = UnalignedDataset(
            osp.join("dataset", FLAGS.dataset, "test"), is_train=True,
            image_size=FLAGS.image_size)
        self.test_loader = DataLoader(self.test_dataset,
                                      batch_size=1,  # use 1 for evaluatation
                                      shuffle=False)

        # replay buffer
        self.fake_A_pool = ImageClassPool(50)
        self.fake_B_pool = ImageClassPool(50)

        # model
        if FLAGS.model == "cyclegan":
            self.model = CycleGAN().to(self.device)
            init_weights(self.model, init_type="kaiming")
        elif FLAGS.model == "accyclegan":
            self.model = ACCycleGAN().to(self.device)
            init_weights(self.model, init_type="kaiming")

        # loss
        self.criterionGAN = GANLoss("lsgan").to(self.device)
        if FLAGS.loss == "L1":
            self.criterionCycle = torch.nn.L1Loss()
            self.criterionIdt = torch.nn.L1Loss()
        elif FLAGS.loss == "patch":
            self.criterionCycle = PatchLoss().to(self.device)
            self.criterionIdt = PatchLoss().to(self.device)

        # opt
        self.optimizer_G = torch.optim.Adam(
            itertools.chain(
                self.model.netG_A.parameters(),
                self.model.netG_B.parameters()
            ),
            lr=FLAGS.lr,
            betas=(0.5, 0.999))
        self.optimizer_D_A = torch.optim.Adam(self.model.netD_A.parameters(),
                                              lr=FLAGS.lr,
                                              betas=(0.5, 0.999))
        self.optimizer_D_B = torch.optim.Adam(self.model.netD_B.parameters(),
                                              lr=FLAGS.lr,
                                              betas=(0.5, 0.999))
        self.schedulers = [
            self.get_scheduler(opt) for opt in
            [self.optimizer_G, self.optimizer_D_A, self.optimizer_D_B]
        ]

        # logs
        self.run_name = (datetime.now().strftime("%Y-%m-%d-%H:%M:%S") + "-" +
                         FLAGS.run_name)
        self.log_dir = osp.join(FLAGS.logdir, self.run_name)
        self.writer = SummaryWriter(log_dir=self.log_dir)
        self.save_dir = osp.join(self.log_dir, "model_states")
        os.makedirs(self.save_dir, exist_ok=True)

        # write params to summary
        self.writer.add_text('Text', dict2table(FLAGS.flag_values_dict()), 0)
Example #14
0
from src.envs import *
import src.agent.model as model
from src.utils.utils import set_seed
from src.utils.params import ParamScheduler

seed = 735249652
set_seed(seed)

# Environment
env = Snake(num_par_inst=500)
env.set_seed(seed)

hyperparams = {
    # General
    "name":
    "noisy_25",
    "num_parallel_steps":
    1000000,
    "seed":
    seed,
    "env":
    env,

    # Training and synchronization
    "learning_rate":
    ParamScheduler(init_value=0.0005,
                   decay_mode="step",
                   milestones=[5000000, 50000000],
                   milestone_factor=0.4),
    "replay_period":
    64,
Example #15
0
def run_cifar(args, exp_dir):
    """
    Run the experiment with a given percentage.

    Args:
        percentage (float): Percentage of training data available.
        args: Command line args.

    Returns:
        Tuple[float, float, float, float]: Train acc, Test acc, Train loss, Test loss.
    """
    # Set seed globally
    set_seed(args.seed)
    cuda_device = "cuda:{}".format(args.cuda_device_id)
    use_cuda = args.cuda and torch.cuda.is_available()
    device = torch.device(cuda_device if use_cuda else "cpu")

    logger.info("Main device: %s", device)
    bs = args.batch_size

    # Get the cifar loader
    train_loader, test_loader = get_cifar_loader(n_labels=args.cifar,
                                                 use_cuda=use_cuda,
                                                 args=args)

    # Retreive model
    model = get_model_by_tag(
        in_features=32 * 32,
        tag=args.net,
        device=device,
        args=args,
        n_labels=args.cifar,
        in_channels=3,
    )

    # Disable track_running_stats in batchnorm according to
    # https://discuss.pytorch.org/t/performance-highly-degraded-when-eval-is-activated-in-the-test-phase/3323/12
    for child in model.modules():
        if type(child) == nn.BatchNorm2d or type(child) == nn.BatchNorm1d:
            child.track_running_stats = False

    logger.info("Number of paramters: %s", count_params(model))

    # Define optimizer
    optimizer = optim.Adam(model.parameters(),
                           lr=args.lr,
                           weight_decay=args.l2)

    # Scheduler for learning rate
    gamma = 0.5
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=10,
                                                gamma=gamma)

    writer = SummaryWriter(log_dir=os.path.join(exp_dir, "tb-log"))

    data = []
    # Run epochs
    for epoch in range(1, args.epochs + 1):
        # Start counting after 10 epochs, that is, first lr reduction is at epoch 20
        if epoch > 20:
            scheduler.step()

        # Run train
        train(model, device, train_loader, optimizer, epoch, args.log_interval)

        # Evaluate model on train/test data
        train_loss, train_acc = evaluate(model, device, train_loader, "Train")
        test_loss, test_acc = evaluate(model, device, test_loader, "Test")
        data.append([epoch, train_acc, test_acc, train_loss, test_loss])

        # Collect data
        collect_tensorboard_info(writer, model, epoch, train_acc, test_acc,
                                 train_loss, test_loss)

    column_names = [
        "epoch", "train_acc", "test_acc", "train_loss", "test_loss"
    ]
    store_results(
        result_dir=os.path.join(args.result_dir, args.experiment_name),
        dataset_name="cifar%s" % args.cifar,
        column_names=column_names,
        data=data,
    )
def run_for_percentage(percentage: float,
                       args) -> Tuple[float, float, float, float]:
    """
    Run the experiment with a given percentage.

    Args:
        percentage (float): Percentage of training data available.
        args: Command line args.

    Returns:
        Tuple[float, float, float, float]: Train acc, Test acc, Train loss, Test loss.
    """
    use_cuda = args.cuda and torch.cuda.is_available()
    # Set seed globally
    set_seed(args.seed)
    torch.manual_seed(args.seed)
    device = torch.device("cuda" if use_cuda else "cpu")
    bs = int(60000 * percentage / 100 * 1 / 10)
    logger.info("Current percentage: %.2f, Batch size: %s", percentage, bs)

    # Get the mnist loader
    train_loader, test_loader = get_mnist_subset(use_cuda=use_cuda,
                                                 train_bs=bs,
                                                 test_bs=args.test_batch_size,
                                                 p=percentage)

    # Retreive model
    model = get_model_by_tag(args.net, device)

    # logger.info("Number of samples: {} ({}%)".format(n_samples, p))
    logger.info("Number of paramters: %s", count_params(model))

    # Define optimizer
    optimizer = optim.Adam(model.parameters(), lr=args.lr)

    # Scheduler for learning rate
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=25,
                                                gamma=0.5)

    data = []
    # Run epochs
    for epoch in range(1, args.epochs + 1):
        scheduler.step()

        # Run train
        train(model, device, train_loader, optimizer, epoch, args.log_interval)
        # Evaluate model on train/test data
        # train_loss, train_acc = evaluate_model(model, device, train_loader, "Train")
        # test_loss, test_acc = evaluate_model(model, device, test_loader, "Test")
        # data.append([epoch, train_acc, test_acc, train_loss, test_loss])

    # column_names = ["epoch", "train_acc", "test_acc", "train_loss", "test_loss"]
    # store_results(
    #     result_dir=os.path.join(args.result_dir, args.experiment_name),
    #     dataset_name="mnist-p={0:.2f}".format(percentage),
    #     column_names=column_names,
    #     data=data,
    # )

    # Evaluate model on train/test data
    train_loss, train_acc = evaluate_model(model, device, train_loader,
                                           "Train")
    test_loss, test_acc = evaluate_model(model, device, test_loader, "Test")
    return percentage, train_acc, test_acc, train_loss, test_loss
Example #17
0
                        help="automata path")

    parser.add_argument('--model_type',
                        type=str,
                        default='FSARNN',
                        help='baseline MarryUp or FSARNN')

    args = parser.parse_args()
    args_bak = deepcopy(args)

    assert args.farnn in [0, 1]

    results = {}
    loggers = {}
    seed = args.seed
    set_seed(args.seed)
    if args.model_type == 'FSARNN':
        automata_path_forward, automata_path_backward = get_automata_from_seed(
            args_bak, seed)
        paths = (automata_path_forward, automata_path_backward)
        args.automata_path_forward = automata_path_forward
        args.automata_path_backward = automata_path_backward
        train_fsa_rnn(args, paths)

    elif args.model_type == 'Onehot':
        automata_path_forward, automata_path_backward = get_automata_from_seed(
            args_bak, seed)
        paths = (automata_path_forward, automata_path_backward)
        args.automata_path_forward = automata_path_forward
        args.automata_path_backward = automata_path_backward
        train_onehot(args, paths)
Example #18
0
def run_multilabel_mnist(args):
    """
    Run the experiment with a given percentage.

    Args:
        args: Command line args.

    Returns:
        Tuple[float, float, float, float]: Train acc, Test acc, Train loss, Test loss.
    """
    use_cuda = args.cuda and torch.cuda.is_available()
    # Set seed globally
    set_seed(args.seed)
    torch.manual_seed(args.seed)
    device = torch.device("cuda" if use_cuda else "cpu")
    bs = ARGS.batch_size

    # Get the mnist loader
    train_loader, test_loader = get_multilabel_mnist_loader(
        n_labels=ARGS.n_labels, use_cuda=use_cuda, args=args)
    # Retreive model
    model = get_model_by_tag(args.net, device)

    logger.info("Number of paramters: %s", count_params(model))

    # Define optimizer
    optimizer = optim.Adam(model.parameters(), lr=args.lr)

    # Scheduler for learning rate
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=10,
                                                gamma=0.5)

    data = []
    # Run epochs
    for epoch in tqdm(range(1, args.epochs + 1)):
        # Start counting after 10 epochs, that is, first lr reduction is at epoch 20
        if epoch > 10:
            scheduler.step()

        # Run train
        train_multilabel(model, device, train_loader, optimizer, epoch,
                         args.log_interval)

        # Evaluate model on train/test data
        train_loss, train_acc = evaluate_model_multilabel(
            model, device, train_loader, "Train", ARGS.n_labels)
        test_loss, test_acc = evaluate_model_multilabel(
            model, device, test_loader, "Test", ARGS.n_labels)
        data.append([epoch, train_acc, test_acc, train_loss, test_loss])

    column_names = [
        "epoch", "train_acc", "test_acc", "train_loss", "test_loss"
    ]
    store_results(
        result_dir=os.path.join(args.result_dir, args.experiment_name),
        dataset_name="mnist",
        column_names=column_names,
        data=data,
    )

    # Evaluate model on train/test data
    train_loss, train_acc = evaluate_model_multilabel(model, device,
                                                      train_loader, "Train",
                                                      ARGS.n_labels)
    test_loss, test_acc = evaluate_model_multilabel(model, device, test_loader,
                                                    "Test", ARGS.n_labels)
Example #19
0
    def __init__(self,
                 path,
                 augmented,
                 total_amt=16384,
                 val_percent=0.25,
                 test_amt=768,
                 wrapped_function=None,
                 workers=0,
                 device=torch.device('cpu'),
                 batch_size=64,
                 verbose=False,
                 seed=42):
        """
        An object to Take Data from a path and convert it to tensors
        :param path: Path to Data
        :param augmented: whether the data is augmented or not
        :param total_amt: the total amount of data to use for Training and Validation
        :param val_percent: the percentage of data to use for Validation
        :param test_amt: the amount of Data to set aside for Testing
        :param wrapped_function: a function to apply to the data if required
        :param workers: the number of workers to use
        :param device: the device to use
        :param batch_size: the batch size to use
        :param verbose: the verbosity to use
        :param seed: the seed to use for reproducibility
        """
        self.device = device
        self.batch_size = batch_size
        self.workers = workers
        self.verbose = verbose
        self.seed = seed
        set_seed(self.seed)

        self.wrapped_function = lambda x, y: mount_to_device(x, y, self.device)
        if wrapped_function is not None:
            self.wrapped_function = lambda x, y: mount_to_device(*wrapped_function(x, y), self.device)

        self.train_files, self.val_files, self.test_files = [], [], []
        if augmented:
            self.get_augmented_sets(path, total_amt, val_percent, test_amt)
        else:
            self.get_non_augmented_sets(path, total_amt, val_percent, test_amt)

        total = self.train_files + self.val_files + self.test_files
        if verbose:
            pt, nt = self.calc_distribution(self.train_files)
            pv, nv = self.calc_distribution(self.val_files)
            pte, nte = self.calc_distribution(self.test_files)
            print(f"Total Size = {len(total)}")
            print(f"Total size of Train = {len(self.train_files)} (pos = {pt}, neg = {nt})")
            print(f"Total size of Validation = {len(self.val_files)} (pos = {pv}, neg = {nv})")
            print(f"Total size of Test = {len(self.test_files)} (pos = {pte}, neg = {nte})")
            if augmented:
                for type in ["autocontrast", "equalize", "invert", "resized", "rotated"]:
                    ltr = len([i for i in self.train_files if type in i])
                    print(f"# of {type} in Train = {ltr}")
                    lv = len([i for i in self.val_files if type in i])
                    print(f"# of {type} in Validation = {lv}")
                    lte = len([i for i in self.test_files if type in i])
                    print(f"# of {type} in Test = {lte}")
            print("Checking for duplicates...")
        if len(total) != len(set(total)):
            raise RuntimeError("Something has gone wrong! there are duplicates in data")
        else:
            if verbose:
                print("There are no duplicates in data!")
Example #20
0
import os
from copy import deepcopy

# os.system('pip3 install -U torch==1.9.1+cu111 torchvision==0.10.1+cu111 -f https://download.pytorch.org/whl/torch_stable.html')
# os.system('pip install numpy cython==0.29.24')
# os.system('pip install POT==0.7.0')
# os.system('pip install dill==0.3.4')
# os.system('pip install tqdm==4.62.2 lightgbm==3.2.1')
# os.system('pip install timm')

import tensorflow as tf

from src.utils import get_logger
from src.utils.utils import set_seed

set_seed(1234)

LOGGER = get_logger('GLOBAL')

gpus = tf.config.experimental.list_physical_devices('GPU')

if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)

current_path = os.path.abspath(__file__)
config_file_path = os.path.abspath(
    os.path.join(os.path.dirname(current_path), 'config.json'))
Example #21
0
def run(cfg: DictConfig) -> None:
    """
    Run pytorch-lightning model

    Args:
        new_dir:
        cfg: hydra config

    """
    set_seed(cfg.training.seed)
    run_name = os.path.basename(os.getcwd())

    cfg.callbacks.model_checkpoint.params.dirpath = Path(
        os.getcwd(), cfg.callbacks.model_checkpoint.params.dirpath).as_posix()
    callbacks = []
    for callback in cfg.callbacks.other_callbacks:
        if callback.params:
            callback_instance = load_obj(
                callback.class_name)(**callback.params)
        else:
            callback_instance = load_obj(callback.class_name)()
        callbacks.append(callback_instance)

    loggers = []
    if cfg.logging.log:
        for logger in cfg.logging.loggers:
            if 'experiment_name' in logger.params.keys():
                logger.params['experiment_name'] = run_name
            loggers.append(load_obj(logger.class_name)(**logger.params))

    callbacks.append(EarlyStopping(**cfg.callbacks.early_stopping.params))
    callbacks.append(ModelCheckpoint(**cfg.callbacks.model_checkpoint.params))

    trainer = pl.Trainer(
        logger=loggers,
        callbacks=callbacks,
        **cfg.trainer,
    )

    dm = load_obj(cfg.datamodule.data_module_name)(cfg=cfg)
    dm.setup()
    model = load_obj(cfg.training.lightning_module_name)(
        cfg=cfg, tag_to_idx=dm.tag_to_idx)
    model._vectorizer = dm._vectorizer
    trainer.fit(model, dm)

    if cfg.general.save_pytorch_model:
        if cfg.general.save_best:
            best_path = trainer.checkpoint_callback.best_model_path  # type: ignore
            # extract file name without folder
            save_name = os.path.basename(os.path.normpath(best_path))
            model = model.load_from_checkpoint(best_path,
                                               cfg=cfg,
                                               tag_to_idx=dm.tag_to_idx,
                                               strict=False)
            model_name = Path(cfg.callbacks.model_checkpoint.params.dirpath,
                              f'best_{save_name}'.replace('.ckpt',
                                                          '.pth')).as_posix()
            torch.save(model.model.state_dict(), model_name)
        else:
            os.makedirs('saved_models', exist_ok=True)
            model_name = 'saved_models/last.pth'
            torch.save(model.model.state_dict(), model_name)
Example #22
0
    def run_trial(self, LR, BATCH_SIZE, OPTIM, LOSS):
        """
        Initalise all over again for training on given Learning rate, Batch size, Optimizer and loss function
        This function basically just initialises everything and sends it to the fitmodel
        :param LR: Learning Rate
        :param BATCH_SIZE: Batch Size
        :param OPTIM: Optimiser
        :param LOSS: Loss Function
        :return: The final metric to optimise score from the training
        """
        set_seed(self.seed)

        NAME = f"{self.name}_{str(LR).replace('.', '_')}"
        NAME += f"_{BATCH_SIZE}"

        model = self.model_class(**self.model_kwargs)

        opt = OPTIM(model.parameters(), lr=LR)
        opt_str = str(type(opt)).split("'")[-2].split(".")[-1]
        NAME += f"_{opt_str}"

        loss_func = LOSS()
        loss_func_str = str(type(loss_func)).split("'")[-2].split(".")[-1]
        NAME += f"_{loss_func_str}"

        data = Data(self.DATA_DIR,
                    self.augmented,
                    batch_size=BATCH_SIZE,
                    total_amt=self.total_amt,
                    val_percent=self.val_percent,
                    test_amt=self.test_amt,
                    wrapped_function=self.wrapped_function,
                    workers=self.workers,
                    device=self.device,
                    verbose=self.verbose,
                    seed=self.seed)

        logger = Logger(NAME,
                        self.LOG_DIR,
                        self.metrics_to_use,
                        train_early_stopping=self.tres,
                        test_early_stopping=self.tes,
                        stopping_attention=self.es_attn,
                        overwrite=self.overwrite,
                        verbose=self.verbose)

        checkpointer = Checkpoint(NAME,
                                  self.CKP_DIR,
                                  self.save_every,
                                  overwrite=self.overwrite)

        FitModel(model,
                 data,
                 opt,
                 loss_func,
                 self.epochs,
                 self.device,
                 logger,
                 checkpointer,
                 verbose=self.verbose,
                 seed=self.seed)

        met_final = logger.test_history[self.metric_to_optimise][-1]

        return NAME, met_final
Example #23
0
        :param batch_sizes: a list of batch sizes to try length >= 1
        :param optimisers: a list of optimizers to try length >= 1
        :param losses: a list of loss functions to try length >= 1
        :param SAVE_EVERY: how often to save the models and optimizers
        :param EPOCHS: the number of epochs to train for
        :param DEVICE: the device to use
        :param wrapped_function: a wrapped function for data loading if needed
        :param WORKERS: the number of workers for dataloading to use
        :param verbose: whether to print the status of whats happening
        :param overwrite: whether to overwrite previous experiments with the same name
        :param seed: a seed for reproducibility
        """
        self.augmented = augmented

        self.seed = seed
        set_seed(self.seed)

        self.name = Name
        self.model_class = model_class
        self.epochs = EPOCHS
        self.device = DEVICE
        self.workers = WORKERS
        self.wrapped_function = wrapped_function
        self.verbose = verbose

        self.overwrite = overwrite
        self.save_every = SAVE_EVERY

        self.metrics_to_use = metrics_to_use
        self.metric_to_optimise = metric_to_optimise
def run(cfg: DictConfig) -> None:
    """
    Run pytorch-lightning model

    Args:
        new_dir:
        cfg: hydra config

    """
    set_seed(cfg.training.seed)
    hparams = flatten_omegaconf(cfg)

    cfg.callbacks.model_checkpoint.params.filepath = os.getcwd(
    ) + cfg.callbacks.model_checkpoint.params.filepath
    callbacks = []
    for callback in cfg.callbacks.other_callbacks:
        if callback.params:
            callback_instance = load_obj(
                callback.class_name)(**callback.params)
        else:
            callback_instance = load_obj(callback.class_name)()
        callbacks.append(callback_instance)

    loggers = []
    if cfg.logging.log:
        for logger in cfg.logging.loggers:
            loggers.append(load_obj(logger.class_name)(**logger.params))

    callbacks.append(EarlyStopping(**cfg.callbacks.early_stopping.params))

    trainer = pl.Trainer(
        logger=loggers,
        checkpoint_callback=ModelCheckpoint(
            **cfg.callbacks.model_checkpoint.params),
        callbacks=callbacks,
        **cfg.trainer,
    )

    dm = load_obj(cfg.datamodule.data_module_name)(hparams=hparams, cfg=cfg)
    dm.setup()
    model = load_obj(cfg.training.lightning_module_name)(
        hparams=hparams, cfg=cfg, tag_to_idx=dm.tag_to_idx)
    model._vectorizer = dm._vectorizer
    # dm = load_obj(cfg.datamodule.data_module_name)(hparams=hparams, cfg=cfg)
    trainer.fit(model, dm)

    if cfg.general.save_pytorch_model:
        if cfg.general.save_best:
            best_path = trainer.checkpoint_callback.best_model_path  # type: ignore
            # extract file name without folder and extension
            save_name = best_path.split('/')[-1][:-5]
            model = model.load_from_checkpoint(best_path,
                                               hparams=hparams,
                                               cfg=cfg,
                                               tag_to_idx=dm.tag_to_idx,
                                               strict=False)
            model_name = f'saved_models/{save_name}.pth'
            torch.save(model.model.state_dict(), model_name)
        else:
            os.makedirs('saved_models', exist_ok=True)
            model_name = 'saved_models/last.pth'
            torch.save(model.model.state_dict(), model_name)
Example #25
0
from sklearn.metrics import classification_report
import pickle

# customized modules
from src.utils.utils import set_seed
from src.config.config import set_arguments
from src.models.models import *
from src.training.training import *
from src.data.get_dataloaders import *
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

if __name__ == "__main__":

    # 1. setting
    config = set_arguments()
    set_seed(config.seeds)
    print("Use CUDA: {}".format(torch.cuda.is_available()))

    # 2. read data
    loader_dict = create_dataloaders(data_name=config.data_name,
                                     data_dir=os.path.join(
                                         config.root, "data"),
                                     batch_size=config.batch_size,
                                     class_a_size=config.class_a_size,
                                     class_a_index=config.class_a_index,
                                     class_b_size=config.class_b_size,
                                     class_b_index=config.class_b_index,
                                     seeds=config.seeds,
                                     download_cifar10=config.download_cifar10)
    train_loader = loader_dict["train_loader"]
    test_ab_loader = loader_dict["test_ab_loader"]