예제 #1
0
파일: run.py 프로젝트: imoken1122/signate
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--config", default="./configs/" + CONFIG_FILE)
    option = parser.parse_args()
    config = json.load(open(option.config))
    model_name = config["model"]
    features = config["features"]
    target_name = config["target_name"]
    params = config["model_params"]
    #lineNotify("leanrning start")
    X, y, test = load_data(features, target_name)

    #logger
    get_logger(VERSION).info(test.shape)
    get_logger(VERSION).info(option.config)
    get_logger(VERSION).info(params)
    get_logger(VERSION).info("====== CV score ======")

    trainer = Trainer(model_name, params, features)
    val_score, val_log = trainer.cross_validation(X,
                                                  y,
                                                  test,
                                                  random_state=RS,
                                                  n_split=K,
                                                  importance_f=imp,
                                                  task=TASK)
    get_logger(VERSION).info(np.mean(val_score["lgb"]))
    get_logger(VERSION).info(val_log)

    # prediction
    preds = trainer.predict()

    #submit
    trainer.create_submit(preds, val_score)
예제 #2
0
def main(cfg: DictConfig):
    print('Nishika Second-hand Apartment Price Training')
    cur_dir = hydra.utils.get_original_cwd()
    os.chdir(cur_dir)
    data_dir = './input'

    seed_everything(cfg.data.seed)

    experiment = Experiment(api_key=cfg.exp.api_key,
                            project_name=cfg.exp.project_name,
                            auto_output_logging='simple',
                            auto_metric_logging=False)

    experiment.log_parameters(dict(cfg.data))

    # Config  ####################################################################################
    del_tar_col = ['取引時点']
    id_col = 'ID'
    tar_col = '取引価格(総額)_log'
    g_col = 'year'
    criterion = MAE
    cv = KFold(n_splits=cfg.data.n_splits,
               shuffle=True,
               random_state=cfg.data.seed)
    # cv = GroupKFold(n_splits=5)

    # Load Data  ####################################################################################
    if cfg.exp.use_pickle:
        # pickleから読み込み
        df = unpickle('./input/data.pkl')

    else:
        df = load_data(data_dir,
                       sampling=cfg.data.sampling,
                       seed=cfg.data.seed,
                       id_col=id_col,
                       target_col=tar_col)
        # Preprocessing
        print('Preprocessing')
        df = preprocessing(df, cfg)

        # pickle形式で保存
        to_pickle('./input/data.pkl', df)
        try:
            experiment.log_asset(file_data='./input/data.pkl',
                                 file_name='data.pkl')
        except:
            pass

    features = [c for c in df.columns if c not in del_tar_col]

    # Model  ####################################################################################
    model = None
    if cfg.exp.model == 'lgb':
        model = LGBMModel(dict(cfg.lgb))
    elif cfg.exp.model == 'cat':
        model = CatBoostModel(dict(cfg.cat))

    # Train & Predict  ##############################################################################
    trainer = Trainer(model, id_col, tar_col, g_col, features, cv, criterion,
                      experiment)
    trainer.fit(df)
    trainer.predict(df)
    trainer.get_feature_importance()
예제 #3
0
class Emulator(tune.Trainable):
    def _setup(self, config):

        self.config = config

        self.hc_config = config['hc_config']
        self.is_tune = self.hc_config['is_tune']

        activation = torch.nn.ReLU()

        train_loader = get_dataloader(
            self.hc_config,
            partition_set='train',
            is_tune=self.is_tune,
            small_aoi=self.hc_config['small_aoi'],
            fold=-1,
            batch_size=self.hc_config['batch_size'],
            shuffle=True,
            drop_last=True,
            num_workers=self.hc_config['num_workers'],
            pin_memory=self.hc_config['pin_memory'])
        eval_loader = get_dataloader(self.hc_config,
                                     partition_set='eval',
                                     is_tune=self.is_tune,
                                     small_aoi=self.hc_config['small_aoi'],
                                     fold=-1,
                                     batch_size=self.hc_config['batch_size'],
                                     shuffle=True,
                                     drop_last=False,
                                     num_workers=self.hc_config['num_workers'],
                                     pin_memory=self.hc_config['pin_memory'])

        if not self.hc_config['is_temporal']:
            model = DENSE(input_size=train_loader.dataset.num_dynamic +
                          train_loader.dataset.num_static,
                          hidden_size=config['dense_hidden_size'],
                          num_layers=config['dense_num_layers'],
                          activation=activation,
                          dropout_in=config['dropout_in'],
                          dropout_linear=config['dropout_linear'])
        else:
            model = LSTM(num_dynamic=train_loader.dataset.num_dynamic,
                         num_static=train_loader.dataset.num_static,
                         lstm_hidden_size=config['lstm_hidden_size'],
                         lstm_num_layers=config['lstm_num_layers'],
                         dense_hidden_size=config['dense_hidden_size'],
                         dense_num_layers=config['dense_num_layers'],
                         output_size=1,
                         dropout_in=config['dropout_in'],
                         dropout_lstm=config['dropout_lstm'],
                         dropout_linear=config['dropout_linear'],
                         dense_activation=activation)

        if not isinstance(model, BaseModule):
            raise ValueError(
                'The model is not a subclass of models.modules:BaseModule')

        if self.hc_config['optimizer'] == 'Adam':
            optimizer = torch.optim.AdamW(model.parameters(),
                                          config['learning_rate'],
                                          weight_decay=config['weight_decay'])
        else:
            raise ValueError(
                f'Optimizer {self.hc_config["optimizer"]} not defined.')

        if self.hc_config['loss_fn'] == 'MSE':
            loss_fn = torch.nn.MSELoss()
        else:
            raise ValueError(
                f'Loss function {self.hc_config["loss_fn"]} not defined.')

        self.trainer = Trainer(
            train_loader=train_loader,
            eval_loader=eval_loader,
            model=model,
            optimizer=optimizer,
            loss_fn=loss_fn,
            train_seq_length=self.hc_config['time']['train_seq_length'],
            train_sample_size=self.hc_config['train_sample_size'])

    def _train(self):
        train_stats = self.trainer.train_epoch()
        eval_stats = self.trainer.eval_epoch()

        stats = {**train_stats, **eval_stats}

        # Disable early stopping before 'grace period' is reched.
        if stats['epoch'] < self.hc_config['grace_period']:
            stats['patience_counter'] = -1

        return stats

    def _stop(self):
        if not self.is_tune:
            self._save(os.path.dirname(os.path.dirname(self.logdir)))

    def _predict(self, prediction_dir, predict_training_set=False):

        self.trainer.predict(prediction_dir, predict_training_set)

    def _save(self, path):
        path = os.path.join(path, 'model.pth')
        return self.trainer.save(path)

    def _restore(self, path):
        self.trainer.restore(path)