def main(params):
    # setup random seeds
    set_seed(params.seed)
    params.ar = True

    if not os.path.isdir(params.output_dir): os.makedirs(params.output_dir)
    print("Loading the model from {0}".format(params.model_path))
    # load everything from checkpoint
    model_params, dico, model = reload_ar_checkpoint(params.model_path)
    if params.local_cpu is False:
        model = model.cuda()
    # evaluate distributional results
    generator = SmilesTransformerGenerator(params, dico, model,
                                           params.sample_temperature)
    json_file_path = os.path.join(params.output_dir,
                                  'distribution_learning_results.json')
    smiles_output_path = os.path.join(params.output_dir,
                                      'generated_smiles.txt')
    print("Starting distributional evaluation")
    t1 = time.time()
    if params.evaluate is True:
        assess_distribution_learning(generator,
                                     chembl_training_file=params.dist_file,
                                     json_output_file=json_file_path,
                                     benchmark_version=params.suite)
    else:
        smiles_list = generator.generate(params.num_samples)
        with open(smiles_output_path, 'w') as f:
            for smiles in smiles_list:
                f.write(smiles + '\n')
    t2 = time.time()
    print("Total time taken {}".format(t2 - t1))
Beispiel #2
0
    def cross_val(self, seed: int) -> None:
        r"""
        When we have a very small test set, we use will
        train the model multiple times using
        different train / test distributions. This will
        allow us to understand the "true" performance of the
        model, reducing the noisiness of having a tiny test set
        """
        set_seed(seed)

        pos_train, pos_test = self.split_train_test(self.positive_paths,
                                                    self.num_test)
        neg_train, neg_test = self.split_train_test(self.negative_paths,
                                                    self.num_test)

        self.subset_to_tasks = {
            "testing":
            KenyaCropTypeFromPaths(
                negative_paths=neg_test,
                positive_paths=pos_test,
                cache=True,
            ),
            "training":
            KenyaCropTypeFromPaths(
                negative_paths=neg_train,
                positive_paths=pos_train,
                cache=self.cache,
            ),
        }
Beispiel #3
0
def main():
    config = utils.load_config(args.config)
    config["weighted"] = "weighted" in config.keys()

    # copy args to config
    config["mode"] = args.mode
    config["fold"] = args.fold
    config["apex"] = args.apex
    config["output"] = args.output
    config["snapshot"] = args.snapshot
    # config["resume_from"] = args.resume_from if args.resume_from

    utils.set_seed(SEED)
    device = torch.device(DEVICE)

    log(f"Fold {args.fold}")

    model = factory.get_model(config).to(device)

    log(f"Model type: {model.__class__.__name__}")
    if config["mode"] == 'train':
        train(config, model)
        valid(config, model)
    elif config["mode"] == 'valid':
        valid(config, model)
    elif config["mode"] == 'test':
        valid(config, model, all_exam=True)
Beispiel #4
0
def main():
    parser = ArgumentParser()
    parser.add_argument('--config_file', type=str, required=True)
    args = parser.parse_args()

    # settings
    config_file = Path(args.config_file)
    config = Config.load(config_file)

    output_dir = Path(f'./output/{config_file.stem}/')
    output_dir.mkdir(parents=True, exist_ok=True)

    set_seed(config.seed)
    logger = getLogger(__name__)
    setup_logger(logger, output_dir / 'log.txt')

    # load data
    data_bundle = build_data_module(config)

    # train
    runner = build_runner(config)
    runner.run(data_bundle)

    # save file
    runner.save(output_dir)
Beispiel #5
0
    def __init__(self, hparams: Namespace) -> None:
        super().__init__()
        set_seed()
        self.hparams = hparams

        self.data_folder = Path(hparams.data_folder)

        dataset = self.get_dataset(subset="training", cache=False)
        self.num_outputs = dataset.num_output_classes
        self.num_timesteps = dataset.num_timesteps
        self.input_size = dataset.num_input_features

        # we save the normalizing dict because we calculate weighted
        # normalization values based on the datasets we combine.
        # The number of instances per dataset (and therefore the weights) can
        # vary between the train / test / val sets - this ensures the normalizing
        # dict stays constant between them
        self.normalizing_dict = dataset.normalizing_dict

        if self.hparams.forecast:
            num_output_timesteps = self.num_timesteps - self.hparams.input_months
            print(f"Predicting {num_output_timesteps} timesteps in the forecaster")
            self.forecaster = Forecaster(
                num_bands=self.input_size, output_timesteps=num_output_timesteps, hparams=hparams,
            )

            self.forecaster_loss = F.smooth_l1_loss

        self.classifier = Classifier(input_size=self.input_size, hparams=hparams)
        self.global_loss_function: Callable = F.binary_cross_entropy
        self.local_loss_function: Callable = F.binary_cross_entropy
Beispiel #6
0
    def __init__(self, hparams: Namespace) -> None:
        super().__init__()

        set_seed()

        self.hparams = hparams
        self.data_folder = Path(hparams.data_folder)
        self.version_folder = (self.data_folder / "maml_models" /
                               f"version_{self.hparams.model_version}")

        # load the model info
        with (self.version_folder / "model_info.json").open("r") as f:
            self.model_info = json.load(f)

        self.classifier = Classifier(
            input_size=self.model_info["input_size"],
            classifier_vector_size=self.model_info["classifier_vector_size"],
            classifier_dropout=self.model_info["classifier_dropout"],
            classifier_base_layers=self.model_info["classifier_base_layers"],
            num_classification_layers=self.
            model_info["num_classification_layers"],
        )

        self.best_val_loss: float = np.inf

        self.loss = nn.BCELoss()

        self.normalizing_dict = self.get_dataset(
            subset="training").normalizing_dict
Beispiel #7
0
    def cross_val(self, seed: int) -> None:
        set_seed(seed)

        # we shuffle the training indices, so that when sample_train is called
        # (which is deterministic) it will return different samples
        random.shuffle(self.subset_to_tasks["training"].positive_indices)
        random.shuffle(self.subset_to_tasks["training"].negative_indices)
Beispiel #8
0
    def create_training_env(self):
        if(self.parallel_size == 1):
            self.env = SingleEnv(self.get_new_env())
        else:
            self.env = SubprocVecEnv([self.get_new_env for _ in range(self.parallel_size)])

        if(self.seed):
            set_seed(self.seed)
Beispiel #9
0
    def __init__(self, data_folder: Path) -> None:

        set_seed()
        self.data_folder = data_folder
        self.raw_folder = self.data_folder / "raw" / self.dataset
        assert self.raw_folder.exists(), f"{self.raw_folder} does not exist!"

        self.output_folder = self.data_folder / "processed" / self.dataset
        self.output_folder.mkdir(exist_ok=True, parents=True)
def main():
    """主函数"""
    paddle.set_device("gpu:1")
    set_seed(2021)  # 设置随机数种子
    logger.info("构建数据集")
    data_file = os.path.join(work_root, "data/NewsTrain.txt")
    datasets, labels = load_data(data_file)
    save_dict_obj(labels, os.path.join(work_root,
                                       "data/news_labels_info.json"))
    for i in range(3):
        random.shuffle(datasets)
    train_data_num = int(len(datasets) * 0.8)
    train_dataset, valid_dataset = datasets[:train_data_num], datasets[
        train_data_num:]
    train_dataset, valid_dataset = MapDataset(train_dataset), MapDataset(
        valid_dataset)
    logger.info("数据转换word2id")
    tokenizer = paddlenlp.transformers.ErnieTinyTokenizer.from_pretrained(
        'ernie-tiny')
    trans_func = partial(convert_example,
                         tokenizer=tokenizer,
                         max_seq_length=64)
    batchify_fn = lambda samples, fn=Tuple(
        Pad(axis=0, pad_val=tokenizer.pad_token_id),  # input
        Pad(axis=0, pad_val=tokenizer.pad_token_type_id),  # segment
        Stack(dtype="int64")  # label
    ): fn(samples)  # 有点难以理解, 没有Torch便于理解, pad_val非常好, 可以动态设置batch最大序列长度

    train_loader = create_dataloader(train_dataset,
                                     mode="train",
                                     batch_size=512,
                                     batchify_fn=batchify_fn,
                                     trans_fn=trans_func)
    valid_loader = create_dataloader(valid_dataset,
                                     mode="valid",
                                     batch_size=256,
                                     batchify_fn=batchify_fn,
                                     trans_fn=trans_func)
    epochs = 5  # 训练epoch number
    num_training_steps = len(train_loader) * epochs
    num_classes = len(labels)
    model, optimizer, criterion, lr_scheduler = create_classification_model(
        num_classes, num_training_steps)

    logger.info("训练模型")
    metric = paddle.metric.Accuracy()
    train(model,
          optimizer,
          criterion,
          lr_scheduler,
          metric,
          tokenizer,
          train_loader,
          valid_loader,
          epochs=epochs)
Beispiel #11
0
    def __init__(self, hparams: Namespace) -> None:
        super().__init__()

        set_seed()
        self.hparams = hparams

        self.data_folder = Path(hparams.data_folder)

        dataset = self.get_dataset(subset="training")
        self.input_size = dataset.num_input_features
        self.num_outputs = dataset.num_output_classes

        # we save the normalizing dict because we calculate weighted
        # normalization values based on the datasets we combine.
        # The number of instances per dataset (and therefore the weights) can
        # vary between the train / test / val sets - this ensures the normalizing
        # dict stays constant between them
        self.normalizing_dict = dataset.normalizing_dict

        self.model_base_name = hparams.model_base

        self.base = STR2BASE[hparams.model_base](input_size=self.input_size,
                                                 hparams=self.hparams)

        global_classification_layers: List[nn.Module] = []
        for i in range(hparams.num_classification_layers):
            global_classification_layers.append(
                nn.Linear(
                    in_features=hparams.hidden_vector_size,
                    out_features=self.num_outputs
                    if i == (hparams.num_classification_layers -
                             1) else hparams.hidden_vector_size,
                ))
            if i < (hparams.num_classification_layers - 1):
                global_classification_layers.append(nn.ReLU())

        self.global_classifier = nn.Sequential(*global_classification_layers)

        if self.hparams.multi_headed:

            local_classification_layers: List[nn.Module] = []
            for i in range(hparams.num_classification_layers):
                local_classification_layers.append(
                    nn.Linear(
                        in_features=hparams.hidden_vector_size,
                        out_features=self.num_outputs
                        if i == (hparams.num_classification_layers -
                                 1) else hparams.hidden_vector_size,
                    ))
                if i < (hparams.num_classification_layers - 1):
                    local_classification_layers.append(nn.ReLU())

            self.local_classifier = nn.Sequential(*local_classification_layers)

        self.loss_function: Callable = F.binary_cross_entropy
Beispiel #12
0
    def __init__(self, data_folder: Path) -> None:
        set_seed()
        self.data_folder = data_folder
        self.geospatial_files = self.get_geospatial_files(data_folder)
        self.labels = self.read_labels(data_folder)

        self.savedir = self.data_folder / "features" / self.dataset
        self.savedir.mkdir(exist_ok=True, parents=True)

        self.normalizing_dict_interim: Dict[str, Union[np.ndarray, int]] = {
            "n": 0
        }
Beispiel #13
0
def load_cfg() -> Tuple[Config, str]:
    from src.dict2obj import Config
    from src.base import Adversary
    from src.utils import gpu, load, set_seed

    cfg = Config()
    set_seed(opts.seed)

    # load the model
    model = load_model(opts.model)(num_classes=get_num_classes(opts.dataset))
    device = gpu(model)
    load(model=model, path=opts.info_path, device=device)

    # load the testset
    testset = load_dataset(dataset_type=opts.dataset,
                           transform=opts.transform,
                           train=False)
    cfg['testloader'] = load_dataloader(dataset=testset,
                                        batch_size=opts.batch_size,
                                        train=False,
                                        show_progress=opts.progress)
    normalizer = load_normalizer(dataset_type=opts.dataset)

    # generate the log path
    _, log_path = generate_path(method=METHOD,
                                dataset_type=opts.dataset,
                                model=opts.model,
                                description=opts.description)

    # set the attacker
    attack, bounds, preprocessing = load_attacks(attack_type=opts.attack,
                                                 dataset_type=opts.dataset,
                                                 stepsize=opts.stepsize,
                                                 steps=opts.steps)

    epsilons = torch.linspace(opts.epsilon_min, opts.epsilon_max,
                              opts.epsilon_times).tolist()
    cfg['attacker'] = Adversary(model=model,
                                attacker=attack,
                                device=device,
                                bounds=bounds,
                                preprocessing=preprocessing,
                                epsilon=epsilons)

    return cfg, log_path
Beispiel #14
0
def run(cfg: DictConfig) -> Dict:
    """
    Model's inference on hold-out dataset
    ----------
    Example:
        python predict.py
    :param cfg: general hydra config
    :return: dict: test metrics
    """
    hparams = flatten_omegaconf(cfg)

    exp = Experiment(
        api_key=cfg.logger.comet_api,
        project_name=cfg.general.project_name,
        workspace=cfg.general.workspace,
    )
    exp.log_parameters(hparams)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    bert_model = load_obj(cfg.model.class_name).from_pretrained(cfg)
    bert_model.model.load_state_dict(
        torch.load(f"../../{cfg.inference.save_dir}/pytorch_model.bin"))
    # print(bert_model)
    bert_model.to(device)
    bert_model.eval()

    collator = load_obj(cfg.dataset.collator)(
        percentile=cfg.data_params.percentile,
        pad_value=cfg.data_params.pad_value,
    )
    criterion = nn.CrossEntropyLoss()

    test = get_test_dataset(cfg)

    evaluator = Evaluator(val_dataset=test,
                          collator=collator,
                          criterion=criterion,
                          cfg=cfg,
                          device=device)

    set_seed(cfg.trainer.seed)
    test_metrics = evaluator(model=bert_model, experiment=exp, epoch=0, step=0)

    return test_metrics
Beispiel #15
0
def load_cfg() -> Tuple[Config, str]:
    from src.dict2obj import Config
    from src.utils import gpu, load, set_seed

    cfg = Config()
    set_seed(opts.seed)

    # load the model
    model = load_model(opts.model)(num_classes=get_num_classes(opts.dataset))
    device = gpu(model)
    load(model=model, path=opts.info_path, device=device)
    model.eval()

    # load the testset
    testset = load_dataset(dataset_type=opts.dataset,
                           transform='None',
                           train=False)
    data = []
    targets = []
    for i in range(len(testset)):
        img, label = testset[i]
        data.append(img)
        targets.append(label)

    cfg['data'] = torch.stack(data)
    cfg['targets'] = torch.tensor(targets, dtype=torch.long)
    normalizer = load_normalizer(opts.dataset)

    # generate the log path
    _, log_path = generate_path(METHOD, opts.dataset, opts.model,
                                opts.description)

    cfg['attacker'] = AutoAttack(Defense(model, normalizer),
                                 norm=opts.norm,
                                 eps=opts.epsilon,
                                 version=opts.version,
                                 device=device)

    return cfg, log_path
Beispiel #16
0
def main(params):
    # setup random seeds
    set_seed(params.seed)
    params.ar = True

    print("Loading the model from {0}".format(params.model_path))
    # load everything from checkpoint
    model_params, dico, model = reload_ar_checkpoint(params.model_path)
    if params.local_cpu is False:
        model = model.cuda()
    # evaluate distributional results
    generator = SmilesTransformerGenerator(params, dico, model)
    json_file_path = os.path.join('/'.join(params.model_path.split('/')[:-1]),
                                  'distribution_learning_results.json')
    print("Starting distributional evaluation")
    t1 = time.time()
    assess_distribution_learning(generator,
                                 chembl_training_file=params.dist_file,
                                 json_output_file=json_file_path,
                                 benchmark_version=params.suite)
    t2 = time.time()
    print("Total time taken {}".format(t2 - t1))
Beispiel #17
0
def run(cfg: DictConfig) -> None:
    """
    Initialize model training
    Use to start training cycle
    :param cfg: general hydra config
    :return: None
    ----------
    Example:
        python train.py
    """
    hparams = flatten_omegaconf(cfg)

    exp = Experiment(
        api_key=cfg.logger.comet_api,
        project_name=cfg.general.project_name,
        workspace=cfg.general.workspace,
    )
    exp.log_parameters(hparams)

    trainer = Trainer(cfg)

    set_seed(cfg.trainer.seed)
    trainer.train(exp)
    return None
Beispiel #18
0
def main():
    """主函数"""
    paddle.set_device("gpu:1")
    set_seed(2021)
    logger.info("构建数据集")
    dataset = MovieDataset(use_poster=False)
    vocab_sizes = {
        "max_usr_id": int(dataset.max_usr_id),
        "max_usr_age": int(dataset.max_usr_age),
        "max_usr_job": int(dataset.max_usr_job),
        "max_mov_id": int(dataset.max_mov_id),
        "movie_category_size": len(dataset.movie_cat),
        "movie_title_size": len(dataset.movie_title)
    }
    save_dict_obj(vocab_sizes,
                  os.path.join(work_root, "data/movie_vocab_size.json"))
    train_data_loader = dataset.load_data(dataset.train_dataset,
                                          mode="train",
                                          use_poster=False)
    eval_data_loader = dataset.load_data(dataset.valid_dataset,
                                         mode="eval",
                                         use_poster=False)

    logger.info("构建模型")
    model, opt = create_model_opt(vocab_sizes)

    logger.info("训练模型")
    EPOCHS = 10
    train(model,
          opt,
          data_loader=train_data_loader,
          eval_data_loader=eval_data_loader,
          epochs=EPOCHS)

    logger.info("构建特征信息")
    get_usr_mov_features(model, dataset.dataset)
Beispiel #19
0
def main():
    parser = ArgumentParser()
    parser.add_argument('--config_file', type=str, required=True)
    args = parser.parse_args()

    # settings
    config_path = Path(args.config_file)
    config = Config.load(config_path)

    warnings.filterwarnings('ignore')
    set_seed(config.seed)
    start_time = time.time()

    with timer('load data'):
        DATA_DIR = './input/riiid-test-answer-prediction/'
        usecols = [
            'row_id',
            'timestamp',
            'user_id',
            'content_id',
            'content_type_id',
            'answered_correctly',
            'prior_question_elapsed_time',
        ]
        dtype = {
            'row_id': 'int64',
            'timestamp': 'int64',
            'user_id': 'int32',
            'content_id': 'int16',
            'content_type_id': 'int8',
            'answered_correctly': 'int8',
            'prior_question_elapsed_time': 'float32'
        }

        train_df = pd.read_csv(DATA_DIR + 'train.csv',
                               usecols=usecols,
                               dtype=dtype)
        question_df = pd.read_csv(DATA_DIR + 'questions.csv',
                                  usecols=['question_id', 'part'])

    train_df = train_df[train_df['content_type_id'] == 0].reset_index(
        drop=True)

    question_df['part'] += 1  # 0: padding id, 1: start id
    train_df['content_id'] += 2  # 0: padding id, 1: start id
    question_df['question_id'] += 2
    train_df = train_df.merge(question_df,
                              how='left',
                              left_on='content_id',
                              right_on='question_id')

    with timer('validation split'):
        train_idx, valid_idx, epoch_valid_idx = virtual_time_split(
            train_df,
            valid_size=config.valid_size,
            epoch_valid_size=config.epoch_valid_size)
        valid_y = train_df.iloc[valid_idx]['answered_correctly'].values
        epoch_valid_y = train_df.iloc[epoch_valid_idx][
            'answered_correctly'].values

    print('-' * 20)
    print(f'train size: {len(train_idx)}')
    print(f'valid size: {len(valid_idx)}')

    with timer('prepare data loader'):
        train_user_seqs = get_user_sequences(train_df.iloc[train_idx])
        valid_user_seqs = get_user_sequences(train_df.iloc[valid_idx])

        train_dataset = TrainDataset(train_user_seqs,
                                     window_size=config.window_size,
                                     stride_size=config.stride_size)
        valid_dataset = ValidDataset(train_df,
                                     train_user_seqs,
                                     valid_user_seqs,
                                     valid_idx,
                                     window_size=config.window_size)

        train_loader = DataLoader(train_dataset, **config.train_loader_params)
        valid_loader = DataLoader(valid_dataset, **config.valid_loader_params)

        # valid loader for epoch validation
        epoch_valid_user_seqs = get_user_sequences(
            train_df.iloc[epoch_valid_idx])
        epoch_valid_dataset = ValidDataset(train_df,
                                           train_user_seqs,
                                           epoch_valid_user_seqs,
                                           epoch_valid_idx,
                                           window_size=config.window_size)
        epoch_valid_loader = DataLoader(epoch_valid_dataset,
                                        **config.valid_loader_params)

    with timer('train'):
        if config.model == 'akt':
            content_encoder_config = BertConfig(
                **config.content_encoder_config)
            knowledge_encoder_config = BertConfig(
                **config.knowledge_encoder_config)
            decoder_config = BertConfig(**config.decoder_config)

            content_encoder_config.max_position_embeddings = config.window_size + 1
            knowledge_encoder_config.max_position_embeddings = config.window_size
            decoder_config.max_position_embeddings = config.window_size + 1

            model = AktEncoderDecoderModel(content_encoder_config,
                                           knowledge_encoder_config,
                                           decoder_config)

        elif config.model == 'saint':
            encoder_config = BertConfig(**config.encoder_config)
            decoder_config = BertConfig(**config.decoder_config)

            encoder_config.max_position_embeddings = config.window_size
            decoder_config.max_position_embeddings = config.window_size

            model = SaintEncoderDecoderModel(encoder_config, decoder_config)

        else:
            raise ValueError(f'Unknown model: {config.model}')

        model.to(config.device)
        model.zero_grad()

        optimizer = optim.Adam(model.parameters(), **config.optimizer_params)
        scheduler = NoamLR(optimizer, warmup_steps=config.warmup_steps)
        loss_ema = None

        for epoch in range(config.n_epochs):
            epoch_start_time = time.time()
            model.train()

            progress = tqdm(train_loader,
                            desc=f'epoch {epoch + 1}',
                            leave=False)
            for i, (x_batch, w_batch, y_batch) in enumerate(progress):
                y_pred = model(**x_batch.to(config.device).to_dict())
                loss = nn.BCEWithLogitsLoss(weight=w_batch.to(config.device))(
                    y_pred, y_batch.to(config.device))
                loss.backward()

                if (config.gradient_accumulation_steps is None
                        or (i + 1) % config.gradient_accumulation_steps == 0):
                    optimizer.step()
                    optimizer.zero_grad()
                    scheduler.step()

                loss_ema = loss_ema * 0.9 + loss.item(
                ) * 0.1 if loss_ema is not None else loss.item()
                progress.set_postfix(loss=loss_ema)

            valid_preds = predict(model,
                                  epoch_valid_loader,
                                  device=config.device)
            valid_score = roc_auc_score(epoch_valid_y, valid_preds)

            elapsed_time = time.time() - epoch_start_time
            print(
                f'Epoch {epoch + 1}/{config.n_epochs} \t valid score: {valid_score:.5f} \t time: {elapsed_time / 60:.1f} min'
            )

    with timer('predict'):
        valid_preds = predict(model, valid_loader, device=config.device)
        valid_score = roc_auc_score(valid_y, valid_preds)

    print(f'valid score: {valid_score:.5f}')

    output_dir = Path(f'./output/{config_path.stem}/')
    output_dir.mkdir(parents=True, exist_ok=True)

    torch.save(model.state_dict(), output_dir / 'model.pt')
    torch.save(optimizer.state_dict(), output_dir / 'optimizer.pt')

    elapsed_time = time.time() - start_time
    print(f'all processes done in {elapsed_time / 60:.1f} min.')
Beispiel #20
0
def main(args: DictConfig):
    # Distributed training
    torch.multiprocessing.set_sharing_strategy('file_system')
    if str(args.exp.gpus) == '-1':
        args.exp.gpus = torch.cuda.device_count()

    # Secondary data args
    args.data.setting = 'in-topic' if args.data.test_id is None else 'cross-topic'
    dataset_name = args.data.path.split('/')[1]
    args.data.path = f'{ROOT_PATH}/{args.data.path}'

    # MlFlow Logging
    if args.exp.logging:
        experiment_name = f'{dataset_name}/{args.setting}-{args.data.setting}/{args.exp.task_name}'
        mlf_logger = MLFlowLogger(experiment_name=experiment_name,
                                  tracking_uri=MLFLOW_URI)
        experiment = mlf_logger._mlflow_client.get_experiment_by_name(
            experiment_name)
        if experiment is not None:
            experiment_id = experiment.experiment_id

            if args.exp.check_exisisting_hash:
                args.hash = calculate_hash(args)
                existing_runs = mlf_logger._mlflow_client.search_runs(
                    filter_string=f"params.hash = '{args.hash}'",
                    run_view_type=mlflow.tracking.client.ViewType.ACTIVE_ONLY,
                    experiment_ids=[experiment_id])
                if len(existing_runs) > 0:
                    logger.info('Skipping existing run.')
                    return
                else:
                    logger.info('No runs found - perfoming one.')

    #     cpnt_path = f'{ROOT_PATH}/mlruns/{experiment_id}/{run_id}/artifacts'
    # else:
    #     cpnt_path = None

    # Load pretrained model and tokenizer
    set_seed(args)
    model = instantiate(args.lightning_module, args=args)
    logger.info(f'Run arguments: \n{args.pretty()}')

    # Early stopping & Checkpointing
    early_stop_callback = EarlyStopping(
        min_delta=0.00,
        patience=args.exp.early_stopping_patience,
        verbose=False,
        mode='min')
    checkpoint_callback = CustomModelCheckpoint(
        model=model,
        verbose=True,
        mode='min',
        save_top_k=1,
        period=0 if args.exp.val_check_interval < 1.0 else 1)
    lr_logging_callback = LearningRateLogger(logging_interval='epoch')

    # Training
    trainer = Trainer(
        gpus=eval(str(args.exp.gpus)),
        logger=mlf_logger if args.exp.logging else None,
        max_epochs=args.exp.max_epochs,
        gradient_clip_val=args.optimizer.max_grad_norm,
        early_stop_callback=early_stop_callback,
        val_check_interval=args.exp.val_check_interval,
        checkpoint_callback=checkpoint_callback
        if args.exp.checkpoint else None,
        accumulate_grad_batches=args.exp.gradient_accumulation_steps,
        auto_lr_find=args.optimizer.auto_lr_find,
        precision=args.exp.precision,
        distributed_backend='dp',
        callbacks=[lr_logging_callback])
    trainer.fit(model)
    trainer.test(model)

    # Cleaning cache
    torch.cuda.empty_cache()

    # Ending the run
    if args.exp.logging:
        mlf_logger.finalize()
Beispiel #21
0
    def __init__(self, yaml_path):
        config_file = yaml_path
        config = yaml.load(open(config_file), Loader=yaml.FullLoader)
        args = config["training"]
        SEED = args["seed"]
        DATASET = args["dataset"]  # Multi30k or ISWLT
        MODEL = args["model"]  # gru**2, gru_attn**2, transformer, gcn_gru, gcngru_gru, gcngruattn_gru, gcnattn_gru
        REVERSE = args["reverse"]
        BATCH_SIZE = args["batch_size"]
        ENC_EMB_DIM = args["encoder_embed_dim"]
        DEC_EMB_DIM = args["decoder_embed_dim"]
        ENC_HID_DIM = args["encoder_hidden_dim"]
        DEC_HID_DIM = args["decoder_hidden_dim"]
        ENC_DROPOUT = args["encoder_dropout"]
        DEC_DROPOUT = args["decoder_dropout"]
        NLAYERS = args["num_layers"]
        N_EPOCHS = args["num_epochs"]
        CLIP = args["grad_clip"]
        LR = args["lr"]
        LR_DECAY_RATIO = args["lr_decay_ratio"]
        ID = args["id"]
        PATIENCE = args["patience"]
        DIR = 'checkpoints/{}-{}-{}/'.format(DATASET, MODEL, ID)
        MODEL_PATH = DIR
        LOG_PATH = '{}test-log.log'.format(DIR)
        set_seed(SEED)
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

        self.config = args
        self.device = device

        if 'transformer' in MODEL:
            ENC_HEADS = args["encoder_heads"]
            DEC_HEADS = args["decoder_heads"]
            ENC_PF_DIM = args["encoder_pf_dim"]
            DEC_PF_DIM = args["decoder_pf_dim"]
            MAX_LEN = args["max_len"]
            
        SRC = Field(tokenize = lambda text: tokenize_de(text, REVERSE), 
                    init_token = '<sos>', 
                    eos_token = '<eos>', 
                    lower = True)
        TGT = Field(tokenize = tokenize_en, 
                    init_token = '<sos>', 
                    eos_token = '<eos>', 
                    lower = True)
        GRH = RawField(postprocessing=batch_graph)
        data_fields = [('src', SRC), ('trg', TGT), ('grh', GRH)]
        
        train_data = Dataset(torch.load("data/Multi30k/train_data.pt"), data_fields)
        valid_data = Dataset(torch.load("data/Multi30k/valid_data.pt"), data_fields)
        test_data = Dataset(torch.load("data/Multi30k/test_data.pt"), data_fields)
        self.train_data, self.valid_data, self.test_data = train_data, valid_data, test_data
        
        train_iterator, valid_iterator, test_iterator = BucketIterator.splits(
            (train_data, valid_data, test_data), 
            batch_size = BATCH_SIZE, 
            sort_key = lambda x: len(x.src),
            sort_within_batch=False,
            device = device)
        self.train_iterator, self.valid_iterator, self.test_iterator = train_iterator, valid_iterator, test_iterator
        
        SRC.build_vocab(train_data, min_freq = 2)
        TGT.build_vocab(train_data, min_freq = 2)
        self.SRC, self.TGT, self.GRH = SRC, TGT, GRH

        print(f"Number of training examples: {len(train_data.examples)}")
        print(f"Number of validation examples: {len(valid_data.examples)}")
        print(f"Number of testing examples: {len(test_data.examples)}")
        print(f"Unique tokens in source (de) vocabulary: {len(SRC.vocab)}")
        print(f"Unique tokens in target (en) vocabulary: {len(TGT.vocab)}")

        src_c, tgt_c = get_sentence_lengths(train_data)
        src_lengths = counter2array(src_c)
        tgt_lengths = counter2array(tgt_c)

        print("maximum src, tgt sent lengths: ")
        np.quantile(src_lengths, 1), np.quantile(tgt_lengths, 1)

        # Get models and corresponding training scripts

        INPUT_DIM = len(SRC.vocab)
        OUTPUT_DIM = len(TGT.vocab)
        SRC_PAD_IDX = SRC.vocab.stoi[SRC.pad_token]
        TGT_PAD_IDX = TGT.vocab.stoi[TGT.pad_token]
        self.SRC_PAD_IDX = SRC_PAD_IDX
        self.TGT_PAD_IDX = TGT_PAD_IDX

        if MODEL == "gru**2":  # gru**2, gru_attn**2, transformer, gcn_gru
            from models.gru_seq2seq import GRUEncoder, GRUDecoder, Seq2Seq
            enc = GRUEncoder(INPUT_DIM, ENC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM, NLAYERS, ENC_DROPOUT)
            dec = GRUDecoder(OUTPUT_DIM, DEC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM, NLAYERS, DEC_DROPOUT)
            model = Seq2Seq(enc, dec, device).to(device)

            from src.train import train_epoch_gru, evaluate_gru, epoch_time
            train_epoch = train_epoch_gru
            evaluate = evaluate_gru
            
            self.enc, self.dec, self.model, self.train_epoch, self.evaluate = enc, dec, model, train_epoch, evaluate
            
        elif MODEL == "gru_attn**2":
            from models.gru_attn import GRUEncoder, GRUDecoder, Seq2Seq, Attention
            attn = Attention(ENC_HID_DIM, DEC_HID_DIM)
            enc = GRUEncoder(INPUT_DIM, ENC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM, NLAYERS, ENC_DROPOUT)
            dec = GRUDecoder(OUTPUT_DIM, DEC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM, NLAYERS, DEC_DROPOUT, attn)
            model = Seq2Seq(enc, dec, device).to(device)

            from src.train import train_epoch_gru_attn, evaluate_gru_attn, epoch_time
            train_epoch = train_epoch_gru_attn
            evaluate = evaluate_gru_attn
            
            self.enc, self.dec, self.model, self.train_epoch, self.evaluate, self.attn = enc, dec, model, train_epoch, evaluate, attn

        elif MODEL == "transformer":
            from models.transformer import Encoder, Decoder, Seq2Seq
            enc = Encoder(INPUT_DIM, ENC_HID_DIM, NLAYERS, ENC_HEADS, 
                          ENC_PF_DIM, ENC_DROPOUT, device, MAX_LEN)
            dec = Decoder(OUTPUT_DIM, DEC_HID_DIM, NLAYERS, DEC_HEADS, 
                          DEC_PF_DIM, DEC_DROPOUT, device, MAX_LEN)
            model = Seq2Seq(enc, dec, SRC_PAD_IDX, TGT_PAD_IDX, device).to(device)

            from src.train import train_epoch_tfmr, evaluate_tfmr, epoch_time
            train_epoch = train_epoch_tfmr
            evaluate = evaluate_tfmr

            self.enc, self.dec, self.model, self.train_epoch, self.evaluate = enc, dec, model, train_epoch, evaluate
            
        elif MODEL == "gcn_gru":
            from models.gru_seq2seq import GCNEncoder, GRUDecoder, GCN2Seq
            enc = GCNEncoder(INPUT_DIM, ENC_EMB_DIM, ENC_HID_DIM, NLAYERS, ENC_DROPOUT)
            dec = GRUDecoder(OUTPUT_DIM, DEC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM, NLAYERS, DEC_DROPOUT)
            model = GCN2Seq(enc, dec, device).to(device)

            from src.train import train_epoch_gcn_gru, evaluate_gcn_gru, epoch_time
            train_epoch = train_epoch_gcn_gru
            evaluate = evaluate_gcn_gru

            self.enc, self.dec, self.model, self.train_epoch, self.evaluate = enc, dec, model, train_epoch, evaluate
            
        elif MODEL == "gcngru_gru":
            from models.gru_seq2seq import GCNGRUEncoder, GRUDecoder, GCN2Seq
            enc = GCNGRUEncoder(INPUT_DIM, ENC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM, NLAYERS, ENC_DROPOUT, device)
            dec = GRUDecoder(OUTPUT_DIM, DEC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM, NLAYERS, DEC_DROPOUT)
            model = GCN2Seq(enc, dec, device).to(device)

            from src.train import train_epoch_gcn_gru, evaluate_gcn_gru, epoch_time
            train_epoch = train_epoch_gcn_gru
            evaluate = evaluate_gcn_gru

            self.enc, self.dec, self.model, self.train_epoch, self.evaluate = enc, dec, model, train_epoch, evaluate
            
        elif MODEL == "gcnattn_gru":
            from models.gru_attn import GCNEncoder, GRUDecoder, GCN2Seq, Attention
            attn = Attention(ENC_HID_DIM, DEC_HID_DIM)
            enc = GCNEncoder(INPUT_DIM, ENC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM, NLAYERS, ENC_DROPOUT)
            dec = GRUDecoder(OUTPUT_DIM, DEC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM, NLAYERS, DEC_DROPOUT, attn)
            model = GCN2Seq(enc, dec, device).to(device)

            from src.train import train_epoch_gcnattn_gru, evaluate_gcnattn_gru, epoch_time
            train_epoch = train_epoch_gcnattn_gru
            evaluate = evaluate_gcnattn_gru
            
            self.enc, self.dec, self.model, self.train_epoch, self.evaluate, self.attn = enc, dec, model, train_epoch, evaluate, attn

        elif MODEL == "gcngruattn_gru":
            from models.gru_attn import GCNGRUEncoder, GRUDecoder, GCN2Seq, Attention
            attn = Attention(ENC_HID_DIM, DEC_HID_DIM)
            enc = GCNGRUEncoder(INPUT_DIM, ENC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM, NLAYERS, ENC_DROPOUT, device)
            dec = GRUDecoder(OUTPUT_DIM, DEC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM, NLAYERS, DEC_DROPOUT, attn)
            model = GCN2Seq(enc, dec, device).to(device)

            from src.train import train_epoch_gcnattn_gru, evaluate_gcnattn_gru, epoch_time
            train_epoch = train_epoch_gcnattn_gru
            evaluate = evaluate_gcnattn_gru
            
            self.enc, self.dec, self.model, self.train_epoch, self.evaluate, self.attn = enc, dec, model, train_epoch, evaluate, attn

        else:
            raise ValueError("Wrong model choice")

        if 'gcn' in MODEL:
            from src.utils import init_weights_uniform as init_weights
        else: 
            from src.utils import init_weights_xavier as init_weights

        model.apply(init_weights)
        n_params = count_parameters(model)
        print("Model initialized...{} params".format(n_params))
        
        self.criterion = nn.CrossEntropyLoss(ignore_index=TGT_PAD_IDX)
        
        print(os.path.join(MODEL_PATH, "checkpoint.pt"))
#         try:
#             state_dict = torch.load(os.path.join(MODEL_PATH, "checkpoint.pt"), map_location=device)['model_state_dict']
#         except:
#             state_dict = torch.load(os.path.join(MODEL_PATH, "checkpoint.pt"), map_location=device)
        state_dict = torch.load(os.path.join(MODEL_PATH, "checkpoint.pt"), map_location=device)
        if 'model_state_dict' in state_dict:
            state_dict = state_dict['model_state_dict']
        model.load_state_dict(state_dict)
        self.model = model
Beispiel #22
0
            f"Elapsed time: {elapsed_min}min {elapsed_sec:.4f}seconds.")


if __name__ == "__main__":
    warnings.filterwarnings("ignore")

    args = utils.get_parser().parse_args()
    config = utils.load_config(args.config)

    global_params = config["globals"]

    output_dir = Path(global_params["output_dir"])
    output_dir.mkdir(exist_ok=True, parents=True)
    logger = utils.get_logger(output_dir / "output.log")

    utils.set_seed(global_params["seed"])
    device = C.get_device(global_params["device"])

    df, datadir = C.get_metadata(config)
    splitter = C.get_split(config)

    calltype_labels = C.get_calltype_labels(df)

    if config["data"].get("event_level_labels") is not None:
        event_level_labels = C.get_event_level_labels(config)
    else:
        event_level_labels = None

    if "Multilabel" in config["split"]["name"]:
        y = calltype_labels
    else:
def train_KFolds(train,
                 test,
                 target,
                 molecules,
                 coupling_types,
                 batch_size=1024,
                 n_folds=5,
                 seed=42,
                 debug=False):
    oof = np.zeros(len(train))
    preds = np.zeros(len(test))
    gkf = GroupKFold(n_splits=n_folds)
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    criterion = nn.L1Loss()
    utils.set_seed(seed)

    for fold, (train_idx, val_idx) in enumerate(
            gkf.split(train, target, groups=molecules), 1):
        if debug:
            if fold != 1:
                continue
        xtrain, ytrain = torch.from_numpy(train[train_idx]), torch.from_numpy(
            target[train_idx])
        xval, yval = torch.from_numpy(train[val_idx]), torch.from_numpy(
            target[val_idx])
        eval_types = coupling_types[val_idx]

        train_set = TensorDataset(xtrain, ytrain)
        val_set = TensorDataset(xval, yval)

        train_loader = DataLoader(train_set,
                                  batch_size=batch_size,
                                  shuffle=True)
        val_loader = DataLoader(val_set, batch_size=batch_size)

        model = PointCNN().to(device).float()
        optimizer = optim.Adam(model.parameters(), lr=.0003)

        summary_path = 'runs/' + time.strftime("%Y%m%d-%H%M%S")
        model_path = configs.models_folder / time.strftime("%Y%m%d-%H%M%S")
        writer = SummaryWriter(summary_path)
        best_val_score = 2.0

        for epoch in range(1, 70):
            trn_loss = 0.0
            val_loss = 0.0

            for i, (features, targets) in enumerate(train_loader):
                writer.add_graph(model, features.float())
                features, targets = features.float().to(device), targets.to(
                    device)

                model.train()
                outputs = model(features)
                loss = criterion(outputs, targets)
                trn_loss += loss.item()

                loss.backward()
                optimizer.step()
                optimizer.zero_grad()

            with torch.no_grad():
                print('[%d] loss: %.5f' %
                      (epoch, trn_loss / len(train_loader)))
                writer.add_scalar('training loss',
                                  trn_loss / len(train_loader), epoch)

                for i, (features, targets) in enumerate(val_loader):
                    features, targets = features.float().to(
                        device), targets.to(device)

                    model.eval()
                    outputs = model(features)
                    loss = criterion(outputs, targets)
                    val_loss += loss.item()

                    if i == 0:
                        eval_set = outputs.cpu().numpy()
                    else:
                        eval_set = np.append(eval_set, outputs.cpu().numpy())

                print('[%d] validation loss: %.5f' %
                      (epoch, val_loss / len(val_loader)))
                writer.add_scalar('validation loss',
                                  val_loss / len(val_loader), epoch)

                val_score = utils.mean_log_mae(yval, eval_set, eval_types)
                print('[%d] validation score: %.5f' % (epoch, val_score))
                writer.add_scalar('validation score', val_score, epoch)

                if val_score < best_val_score:
                    torch.save(model.state_dict(), model_path)
                    best_val_score = val_score

        writer.close()
Beispiel #24
0
ENC_HID_DIM = args["encoder_hidden_dim"]
DEC_HID_DIM = args["decoder_hidden_dim"]
ENC_DROPOUT = args["encoder_dropout"]
DEC_DROPOUT = args["decoder_dropout"]
NLAYERS = args["num_layers"]
N_EPOCHS = args["num_epochs"]
CLIP = args["grad_clip"]
LR = args["lr"]
LR_DECAY_RATIO = args["lr_decay_ratio"]
ID = args["id"]
PATIENCE = args["patience"]
DIR = 'checkpoints/{}-{}-{}/'.format(DATASET, MODEL, ID)
MODEL_PATH = DIR
LOG_PATH = '{}log.log'.format(DIR)
CONFIG_PATH = '{}config.yaml'.format(DIR)
set_seed(SEED)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
ensure_path_exist(DIR)
yaml.dump(config, open(CONFIG_PATH, 'w'))

if 'transformer' in MODEL:
    ENC_HEADS = args["encoder_heads"]
    DEC_HEADS = args["decoder_heads"]
    ENC_PF_DIM = args["encoder_pf_dim"]
    DEC_PF_DIM = args["decoder_pf_dim"]
    MAX_LEN = args["max_len"]

# dataset

SRC = Field(tokenize=lambda text: tokenize_de(text, REVERSE),
            init_token='<sos>',
def train_KFolds(meta_df,
                 feature_df,
                 batch_size=1024,
                 seed=42,
                 n_folds=5,
                 debug=False):
    oof = np.zeros(len(meta_df))
    criterion = nn.BCELoss()
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    set_seed(seed)

    # split 2017 and 2018 data temporarily for CV setup
    idx_2017 = meta_df.index[meta_df.Season == 2017]
    idx_2018 = meta_df.index[meta_df.Season == 2018]
    meta_df_2017, feature_df_2017 = meta_df.loc[idx_2017], feature_df[idx_2017]
    target_2017 = meta_df_2017.pop('Yards').values
    meta_df_2018, feature_df_2018 = meta_df.loc[idx_2018], feature_df[idx_2018]
    target_2018 = meta_df_2018.pop('Yards').values

    gkf = GroupKFold(n_splits=n_folds)
    games = meta_df_2018.GameId

    for fold, (train_idx, val_idx) in enumerate(
            gkf.split(feature_df_2018, target_2018, groups=games)):
        if debug:
            if fold != 1:
                continue

        # split 2018 data into training and evaluation set
        xtrain_2018, ytrain_2018 = feature_df_2018[train_idx], target_2018[
            train_idx]
        xval, yval = feature_df_2018[val_idx], target_2018[val_idx]
        yval = prepare_targets(yval)

        # append 2017 data and 2018 training data --> full train set
        xtrain = np.vstack((feature_df_2017, xtrain_2018))
        ytrain = np.hstack((target_2017, ytrain_2018))
        ytrain = prepare_targets(ytrain)

        # create torch tensors
        xtrain, ytrain = torch.from_numpy(xtrain), torch.from_numpy(ytrain)
        xval, yval = torch.from_numpy(xval), torch.from_numpy(yval)

        train_set = TensorDataset(xtrain, ytrain)
        train_loader = DataLoader(train_set,
                                  batch_size=batch_size,
                                  shuffle=True)
        val_set = TensorDataset(xval, yval)
        val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False)

        model = CNNTransformer()
        optimizer = optim.Adam(model.parameters(), lr=.001)

        summary_path = 'runs/' + time.strftime("%Y%m%d-%H%M%S")
        writer = SummaryWriter(summary_path)

        for epoch in range(1, 50):
            trn_loss = 0.0
            val_loss = 0.0

            for i, (features, targets) in enumerate(train_loader):
                features, targets = features.float().to(
                    device), targets.float().to(device)
                writer.add_graph(model, features)

                model.train()
                outputs = model(features)
                loss = criterion(outputs, targets)
                trn_loss += loss.item()

                loss.backward()
                optimizer.step()
                optimizer.zero_grad()

            with torch.no_grad():
                print('[%d] loss: %.5f' %
                      (epoch, trn_loss / len(train_loader)))
                writer.add_scalar('training loss',
                                  trn_loss / len(train_loader), epoch)

                for i, (features, targets) in enumerate(val_loader):
                    features, targets = features.float().to(
                        device), targets.float().to(device)

                    model.eval()
                    outputs = model(features)
                    loss = criterion(outputs, targets)
                    val_loss += loss.item()

                    if i == 0:
                        eval_set = outputs.cpu().numpy()
                    else:
                        eval_set = np.vstack((eval_set, outputs.cpu().numpy()))

                print('[%d] validation loss: %.5f' %
                      (epoch, val_loss / len(val_loader)))
                writer.add_scalar('validation loss',
                                  val_loss / len(val_loader), epoch)

                yval_cum = np.clip(np.cumsum(yval.numpy(), axis=1), 0, 1)
                eval_set = np.clip(np.cumsum(eval_set, axis=1), 0, 1)
                validation_score = ((eval_set - yval_cum)**2).sum(axis=1).sum(
                    axis=0) / (199 * yval_cum.shape[0])
                print('[%d] validation score: %.5f' %
                      (epoch, validation_score))
                writer.add_scalar('validation score', validation_score, epoch)
        writer.close()
def run(config: DictConfig, logger=None):
    # ----------- setup experiment ------------------- #
    # modify the datamodule
    def train_dataloader(self):
        ds = CutMixDatasetWrapper(self.train, **config.cutmix)
        return DataLoader(ds, shuffle=True, **self.config)

    LitDataModule.train_dataloader = train_dataloader

    # setup logging
    if logger is None:
        logger = logging.getLogger(__name__)

    set_seed(config.training.seed)
    logger.info(f"using seed {config.training.seed}")
    wandb.login(key=config.logger.api)

    # init wandb logger
    wb = load_obj(config.logger.class_name)(**config.logger.params)

    # log the training config to wandb
    # create a new hparam dictionary with the relevant hparams and
    # log the hparams to wandb
    wb_hparam = OrderedDict({
        "training_fold": config.fold_num,
        "input_dims": config.training.image_dim,
        "batch_size": config.training.dataloaders.batch_size,
        "optimizer": config.optimizer.class_name,
        "scheduler": config.scheduler.class_name,
        "learning_rate": config.optimizer.params.lr,
        "weight_decay": config.optimizer.params.weight_decay,
        "num_epochs": config.training.num_epochs,
    })
    wb.log_hyperparams(wb_hparam)

    # ----------- prepare datasets ------------------- #
    logger.info("Prepare Training/Validation Datasets.")

    processor = Preprocessor(config.csv_dir, config.json_dir, config.image_dir,
                             5)
    df = pd.read_csv(config.fold_csv_dir)
    imsdir = config.image_dir
    df.filePath = [
        os.path.join(imsdir, df.image_id[i]) for i in range(len(df))
    ]

    processor.dataframe = df
    fold_num = config.fold_num
    trainFold, valFold = processor.get_fold(fold_num)
    # testFold, valFold = train_test_split(valFold, stratify=valFold.label, test_size=0.5)

    trainFold.reset_index(drop=True, inplace=True)
    # testFold.reset_index(drop=True, inplace=True)
    valFold.reset_index(drop=True, inplace=True)

    # init weights for loss function
    weights = None  # no weights for cutmix

    tfms_config = config.augmentation
    trn_augs = A.Compose(
        [
            load_obj(augs.class_name)(**augs.params)
            for augs in tfms_config.train_augs
        ],
        p=1.0,
    )
    valid_augs = A.Compose(
        [
            load_obj(augs.class_name)(**augs.params)
            for augs in tfms_config.valid_augs
        ],
        p=1.0,
    )
    test_augs = A.Compose(
        [
            load_obj(augs.class_name)(**augs.params)
            for augs in tfms_config.test_augs
        ],
        p=1.0,
    )

    tfms = {
        "train": trn_augs,
        "valid": valid_augs,
        "test": test_augs,
    }
    # init datamodule
    dl_config = config.training.dataloaders
    dm = LitDataModule(trainFold, valFold, valFold, tfms, dl_config)
    dm.setup()

    # set training total steps
    config.training.total_steps = (len(dm.train_dataloader()) *
                                   config.training.num_epochs)

    logger.info(f"Train dataset size: {len(dm.train_dataloader())}")
    logger.info(f"Validation dataset size: {len(dm.val_dataloader())}")

    # ----------- load lightning trainer ------------------- #

    trainer_cfg = config.lightning
    # init lightning callbacks
    chkpt = pl.callbacks.ModelCheckpoint(**trainer_cfg.model_checkpoint)

    cb_config = config.lightning.callbacks
    cbs = [
        load_obj(module.class_name)(**module.params) for module in cb_config
    ]

    if config.log_to_stdout:
        cbs.append(PrintCallback(log=logger))

    # init trainer
    args = trainer_cfg.init_args
    trainer = pl.Trainer(callbacks=cbs,
                         checkpoint_callback=chkpt,
                         logger=wb,
                         **args)

    # ----------- init lightning module ------------------- #
    logger.info("Build network.")
    model = LitModel(config, weights=weights)
    # update model loss function to soft cross entropy loss
    model.loss_fn = SoftTargetCrossEntropy(weight=weights)
    model.unfreeze_classifier()

    wb.watch(model.net)

    model_name = config.model.params.model_name or config.model.class_name

    logger.info(f"Init from base net: {model_name}")
    logger.info(
        f"Uses {str(config.optimizer.class_name).split('.')[-1]} optimizer.")
    logger.info(
        f"Learning Rate: {config.optimizer.params.lr}, Weight Decay: {config.optimizer.params.weight_decay}"
    )
    logger.info(
        f"Uses {str(config.scheduler.class_name).split('.')[-1]} scheduler.")

    tr_config = config.training

    logger.info(
        f"Training over {tr_config.num_epochs} epochs ~ {tr_config.total_steps} steps."
    )

    # ----------- start train/validaiton/test ------------------- #
    # Pass the datamodule as arg to trainer.fit to override model hooks :)
    trainer.fit(model, datamodule=dm)
    # Compute metrics on test dataset
    _ = trainer.test(model, datamodule=dm, ckpt_path=chkpt.best_model_path)

    # ----------- finish experiment/cleanup/save weights ------------------- #
    PATH = chkpt.best_model_path  # path to the best performing model
    WEIGHTS_PATH = config.training.model_save_dir

    # init best model
    logger.info(f"Restored best model weights from {PATH}.")
    params = {"config": config, "weights": weights}

    loaded_model = model.load_from_checkpoint(PATH, **params)
    torchmodel = loaded_model.net

    torch.save(torchmodel.state_dict(), WEIGHTS_PATH)
    # upload the weights file to wandb
    wandb.save(WEIGHTS_PATH)

    # upload the full config file to wandb
    conf_pth = f"{config.run_name}.yaml"
    OmegaConf.save(config, f=conf_pth)
    logger.info(f"Saved config file {conf_pth}.")

    wandb.save(conf_pth)

    logger.info(f"Saved model {WEIGHTS_PATH}.")

    wandb.finish()
Beispiel #27
0
def load_cfg() -> Tuple[Config, str]:
    from src.dict2obj import Config
    from src.base import Coach
    from src.utils import gpu, set_seed, load_checkpoint

    cfg = Config()
    set_seed(opts.seed)

    # the model and other settings for training
    model = load_model(opts.model)(num_classes=get_num_classes(opts.dataset),
                                   scale=opts.scale)
    device = gpu(model)

    # load the dataset
    trainset = load_dataset(dataset_type=opts.dataset,
                            transform=opts.transform,
                            train=True)
    cfg['trainloader'] = load_dataloader(dataset=trainset,
                                         batch_size=opts.batch_size,
                                         train=True,
                                         show_progress=opts.progress)
    testset = load_dataset(dataset_type=opts.dataset,
                           transform=opts.transform,
                           train=False)
    cfg['testloader'] = load_dataloader(dataset=testset,
                                        batch_size=opts.batch_size,
                                        train=False,
                                        show_progress=opts.progress)
    normalizer = load_normalizer(dataset_type=opts.dataset)

    # load the optimizer and learning_policy
    optimizer = load_optimizer(model=model,
                               optim_type=opts.optimizer,
                               lr=opts.lr,
                               momentum=opts.momentum,
                               betas=(opts.beta1, opts.beta2),
                               weight_decay=opts.weight_decay)
    learning_policy = load_learning_policy(
        optimizer=optimizer,
        learning_policy_type=opts.learning_policy,
        T_max=opts.epochs)

    # generate the path for logging information and saving parameters
    cfg['info_path'], cfg['log_path'] = generate_path(
        method=METHOD,
        dataset_type=opts.dataset,
        model=opts.model,
        description=opts.description)
    if opts.resume:
        cfg['start_epoch'] = load_checkpoint(path=cfg.info_path,
                                             model=model,
                                             optimizer=optimizer,
                                             lr_scheduler=learning_policy)
    else:
        cfg['start_epoch'] = 0

    cfg['coach'] = Coach(model=model,
                         device=device,
                         loss_func=load_loss_func(opts.loss)(model=model),
                         normalizer=normalizer,
                         optimizer=optimizer,
                         learning_policy=learning_policy)

    # for validation
    cfg['valider'] = load_valider(model=model,
                                  device=device,
                                  dataset_type=opts.dataset)
    return cfg
Beispiel #28
0
def main(params):
    # setup random seeds
    set_seed(params.seed)
    params.ar = True

    exp_path = os.path.join(params.dump_path, params.exp_name)
    # create exp path if it doesn't exist
    if not os.path.exists(exp_path):
        os.makedirs(exp_path)
    # create logger
    logger = create_logger(os.path.join(exp_path, 'train.log'), 0)
    logger.info("============ Initialized logger ============")
    logger.info("Random seed is {}".format(params.seed))
    logger.info("\n".join("%s: %s" % (k, str(v))
                          for k, v in sorted(dict(vars(params)).items())))
    logger.info("The experiment will be stored in %s\n" % exp_path)
    logger.info("Running command: %s" % 'python ' + ' '.join(sys.argv))
    logger.info("")
    # load data
    data, loader = load_smiles_data(params)
    if params.data_type == 'ChEMBL':
        all_smiles_mols = open(os.path.join(params.data_path, 'guacamol_v1_all.smiles'), 'r').readlines()
    else:
        all_smiles_mols = open(os.path.join(params.data_path, 'QM9_all.smiles'), 'r').readlines()
    train_data, val_data = data['train'], data['valid']
    dico = data['dico']
    logger.info ('train_data len is {}'.format(len(train_data)))
    logger.info ('val_data len is {}'.format(len(val_data)))

    # keep cycling through train_loader forever
    # stop when max iters is reached
    def rcycle(iterable):
        saved = []                 # In-memory cache
        for element in iterable:
            yield element
            saved.append(element)
        while saved:
            random.shuffle(saved)  # Shuffle every batch
            for element in saved:
                  yield element
    train_loader = rcycle(train_data.get_iterator(shuffle=True, group_by_size=True, n_sentences=-1))

    # extra param names for transformermodel
    params.n_langs = 1
    # build Transformer model
    model = TransformerModel(params, is_encoder=False, with_output=True)

    if params.local_cpu is False:
        model = model.cuda()
    opt = get_optimizer(model.parameters(), params.optimizer)
    scores = {'ppl': np.float('inf'), 'acc': 0}

    if params.load_path:
        reloaded_iter, scores = load_model(params, model, opt, logger)

    for total_iter, train_batch in enumerate(train_loader):
        if params.load_path is not None:
            total_iter += reloaded_iter + 1

        epoch = total_iter // params.epoch_size
        if total_iter == params.max_steps:
            logger.info("============ Done training ... ============")
            break
        elif total_iter % params.epoch_size == 0:
            logger.info("============ Starting epoch %i ... ============" % epoch)
        model.train()
        opt.zero_grad()
        train_loss = calculate_loss(model, train_batch, params)
        train_loss.backward()
        if params.clip_grad_norm > 0:
            clip_grad_norm_(model.parameters(), params.clip_grad_norm)
        opt.step()
        if total_iter % params.print_after == 0:
            logger.info("Step {} ; Loss = {}".format(total_iter, train_loss))

        if total_iter > 0 and total_iter % params.epoch_size == (params.epoch_size - 1):
            # run eval step (calculate validation loss)
            model.eval()
            n_chars = 0
            xe_loss = 0
            n_valid = 0
            logger.info("============ Evaluating ... ============")
            val_loader = val_data.get_iterator(shuffle=True)
            for val_iter, val_batch in enumerate(val_loader):
                with torch.no_grad():
                    val_scores, val_loss, val_y = calculate_loss(model, val_batch, params, get_scores=True)
                # update stats
                n_chars += val_y.size(0)
                xe_loss += val_loss.item() * len(val_y)
                n_valid += (val_scores.max(1)[1] == val_y).sum().item()

            ppl = np.exp(xe_loss / n_chars)
            acc = 100. * n_valid / n_chars
            logger.info("Acc={}, PPL={}".format(acc, ppl))
            if acc > scores['acc']:
                scores['acc'] = acc
                scores['ppl'] = ppl
                save_model(params, data, model, opt, dico, logger, 'best_model', epoch, total_iter, scores)
                logger.info('Saving new best_model {}'.format(epoch))
                logger.info("Best Acc={}, PPL={}".format(scores['acc'], scores['ppl']))

            logger.info("============ Generating ... ============")
            number_samples = 100
            gen_smiles = generate_smiles(params, model, dico, number_samples)
            generator = ARMockGenerator(gen_smiles)

            try:
                benchmark = ValidityBenchmark(number_samples=number_samples)
                validity_score = benchmark.assess_model(generator).score
            except:
                validity_score = -1
            try:
                benchmark = UniquenessBenchmark(number_samples=number_samples)
                uniqueness_score = benchmark.assess_model(generator).score
            except:
                uniqueness_score = -1

            try:
                benchmark = KLDivBenchmark(number_samples=number_samples, training_set=all_smiles_mols)
                kldiv_score = benchmark.assess_model(generator).score
            except:
                kldiv_score = -1
            logger.info('Validity Score={}, Uniqueness Score={}, KlDiv Score={}'.format(validity_score, uniqueness_score, kldiv_score))
            save_model(params, data, model, opt, dico, logger, 'model', epoch, total_iter, {'ppl': ppl, 'acc': acc})
Beispiel #29
0
def main():
    # Set seed for reproducibility
    set_seed(seed_value=12345, use_cuda=False)

    print("Hello world!")
                        help='Choose the device for testing')
    parser.add_argument('--dim',
                        default=100,
                        type=int,
                        help='dim for feature size (default: 100)')
    parser.add_argument('--distance_type',
                        default='euclidean',
                        type=str,
                        help='Distance type for testing')

    return parser.parse_args()


if __name__ == '__main__':
    args = parse_args()
    set_seed(123)
    test_dataset = MiniDataset(args.test_csv, args.test_data_dir)

    test_loader = DataLoader(test_dataset,
                             batch_size=args.n_way *
                             (args.n_query + args.n_shot),
                             num_workers=3,
                             pin_memory=False,
                             worker_init_fn=worker_init_fn,
                             sampler=TestSampler(args.testcase_csv))

    # TODO: load your model
    model = Convnet4(out_channels=args.dim).to(args.device)
    model.load_state_dict(torch.load(args.model))
    distance = Distance(args)
    if args.distance_type == 'param':