コード例 #1
0
    def _generate_eval_video(self):
        env_main, env1, env2 = self._env_func()
        with tempfile.TemporaryDirectory(dir="/tmp") as temp:
            path_to_video = os.path.join(temp, "movie.mp4")
            PygameInteractiveEnvRecorder(env=env_main,
                                         fps=60,
                                         win_size=(640 * 1.5, 480 * 1.5),
                                         p1={
                                             "policy": self.model,
                                             "env": env1,
                                             "frameskip": self._params["frameskip"],
                                         },
                                         p2={
                                             "policy": "human",
                                             "env": env2,
                                             "frameskip": 1,
                                         },
                                         record_output_path=path_to_video,
                                         render_n_frames_after_done=250,
                                         render=False).run()

            encoded = base64.b64encode(open(path_to_video, "rb").read())
            html = f'<video controls><source type="video/mp4" ' \
                   f'src="data:video/mp4;base64,{encoded.decode("utf-8")}"></video>'
            open(path_to_video, "w+").write(html)

            neptune.send_artifact(path_to_video, f"movies/movie_{self._iteration}_{self._context.num_timesteps}.html")
コード例 #2
0
def main():
    neptune.init(api_token=os.getenv('NEPTUNE_API_TOKEN'),
                 project_qualified_name=os.getenv('NEPTUNE_PROJECT'))

    application_table_path = os.path.join(RAW_DATA_DIRPATH,
                                          'application_train.csv.zip')
    application_table = pd.read_csv(application_table_path, nrows=NROWS)

    index_table = application_table[['SK_ID_CURR', 'TARGET']]

    with neptune.create_experiment(name='validation schema',
                                   tags=['processed', 'validation'],
                                   upload_source_files=get_filepaths()):

        train_idx, valid_idx = train_test_split(index_table,
                                                test_size=TEST_SIZE,
                                                random_state=SEED)
        train_idx_path = os.path.join(INTERIM_FEATURES_DIRPATH,
                                      'train_idx.csv')
        train_idx.to_csv(train_idx_path, index=None)
        neptune.send_artifact(train_idx_path)
        neptune.set_property('train_split_version', md5_hash(train_idx_path))

        valid_idx_path = os.path.join(INTERIM_FEATURES_DIRPATH,
                                      'valid_idx.csv')
        valid_idx.to_csv(valid_idx_path, index=None)
        neptune.send_artifact(valid_idx_path)
        neptune.set_property('valid_split_version', md5_hash(valid_idx_path))
コード例 #3
0
    def handle_directories(self):
        exp = neptune.get_experiment()

        # download_artifacts
        neptune.send_artifact(self.data_dir)
        if self._api_version == 1:
            with self.with_check_if_file_appears("output.zip"):
                exp.download_artifacts()
        else:
            with self.with_assert_raises(
                    DownloadArtifactsUnsupportedException):
                exp.download_artifacts()

        # create some nested artifacts
        neptune.log_artifact(self.img_path,
                             destination="main dir/sub dir/art1")
        neptune.log_artifact(self.img_path,
                             destination="main dir/sub dir/art2")
        neptune.log_artifact(self.img_path,
                             destination="main dir/sub dir/art3")

        # downloading artifact - download_artifact
        # non existing artifact
        if self._api_version == 1:
            with self.with_assert_raises(FileNotFound):
                exp.download_artifact("main dir/sub dir/art100")
        else:
            with self.with_assert_raises(DownloadArtifactUnsupportedException):
                exp.download_artifact("main dir/sub dir/art100")
        # artifact directories
        if self._api_version == 1:
            with self.with_assert_raises(HTTPError):
                exp.download_artifact("main dir/sub dir")
        else:
            with self.with_assert_raises(DownloadArtifactUnsupportedException):
                exp.download_artifact("main dir/sub dir")

        # deleting artifacts
        neptune.delete_artifacts("main dir/sub dir/art1")

        # delete non existing artifact
        if self._api_version == 1:
            neptune.delete_artifacts("main dir/sub dir/art100")
        else:
            with self.with_assert_raises(
                    DeleteArtifactUnsupportedInAlphaException):
                neptune.delete_artifacts("main dir/sub dir/art100")

        # delete dir
        if self._api_version == 1:
            neptune.delete_artifacts("main dir/sub dir")
        else:
            with self.with_assert_raises(
                    DeleteArtifactUnsupportedInAlphaException):
                neptune.delete_artifacts("main dir/sub dir")
コード例 #4
0
 def print_table_input_to_message(self):
     table_data = [['x'] + list(range(self.opts.n_features))
                   ] + [[i] + [None] * self.opts.n_features
                        for i in range(self.opts.n_features)]
     for (input1, input2), messages in self.input_to_message.items():
         table_data[input1 + 1][input2 + 1] = '  '.join((' '.join(
             (str(s) for s in message)) for message in set(messages)))
     for a, b in self.test_targets:
         table_data[a + 1][b + 1] = '*' + table_data[a + 1][b + 1]
     filename = f'{self.prefix}input_to_message_{self.epoch_counter}.txt'
     with open(file=filename, mode='w', encoding='utf-8') as file:
         file.write(tabulate(table_data, tablefmt='fancy_grid'))
     neptune.send_artifact(filename)
コード例 #5
0
ファイル: nereid.py プロジェクト: ufwt/berbalang
def build_experiment(conf, logpath, resultname, csvfile):

    with open(csvfile) as stats_f:
        params = stats_f.readline().strip().split(",")
        stats = stats_f.readlines()

    for s in stats:
        elems = s.strip().split(",")
        counter, scalar_fit, priority_fit = [elems[0], elems[4], elems[5]]
        print(counter, scalar_fit, priority_fit)

    exp_config = read_toml(conf)
    exp_number = 1

    champ = read_berb_log(logpath)
    exp_name = champ['chromosome']['name']
    exp_desc = str(champ['tag'])

    exp_params = {
        "some_param": 0.1,
        "other_param": 128,
        "yet_another_param": 31337
    }

    exp_log_artifact = ["data/champion_statistics.csv", "mean_statistics.csv"]

    #Neptune init
    neptune.init('special-circumstances/sandbox', api_token=None)

    neptune.create_experiment(name=exp_name, params=exp_params)

    for s in stats:
        elems = s.strip().split(",")
        counter, scalar_fit, priority_fit = [elems[0], elems[4], elems[5]]
        neptune.log_metric(params[0], int(counter))
        neptune.log_metric(params[4], float(scalar_fit))
        neptune.log_metric(params[5], float(priority_fit))

    neptune.log_image(
        'pleasures_1',
        "/home/armadilo/projects/neptune/data/clamp-liked-zeros-count-pleasures.png"
    )
    neptune.log_image(
        'pleasures_2',
        "/home/armadilo/projects/neptune/data/lamas-koala-zero-count-pleasures.png"
    )
    neptune.send_artifact(
        '/home/armadilo/projects/neptune/data/champion_statistics.csv')
    neptune.send_artifact(
        '/home/armadilo/projects/neptune/data/mean_statistics.csv')
 def visualize_embeddings(self):
     embeddings = self.game.receiver.embedding.weight.detach().transpose(
         1, 0)
     pca = PCA(n_components=2)
     embeddings_projected = pca.fit_transform(embeddings)
     np.savetxt('embs.txt', embeddings_projected)
     neptune.send_artifact('embs.txt')
     ax = sns.scatterplot(x=embeddings_projected[:, 0],
                          y=embeddings_projected[:, 1])
     for i in range(10):
         ax.annotate(str(i), embeddings_projected[i], size=20)
     sns.despine(left=True, bottom=True)
     plt.xlabel('First principal component')
     plt.ylabel('Second principal component')
     figure = ax.get_figure()
     send_figure(figure, channel_name='embeddings')
     figure.savefig('figx.png')
     plt.close(figure)
コード例 #7
0
    def handle_files_and_images(self):
        # image
        # `image_name` and `description` will be lost (`send_image` the same as `log_image`)
        neptune.send_image("image",
                           self.img_path,
                           name="name",
                           description="desc")

        # artifact with default dest
        neptune.send_artifact(self.text_file_path)
        exp = neptune.get_experiment()
        with self.with_check_if_file_appears("text.txt"):
            exp.download_artifact("text.txt")
        with self.with_check_if_file_appears("custom_dest/text.txt"):
            exp.download_artifact("text.txt", "custom_dest")

        # artifact with custom dest
        neptune.send_artifact(self.text_file_path, destination="something.txt")
        exp = neptune.get_experiment()
        with self.with_check_if_file_appears("something.txt"):
            exp.download_artifact("something.txt")
        with self.with_check_if_file_appears("custom_dest/something.txt"):
            exp.download_artifact("something.txt", "custom_dest")

        # destination dirs
        neptune.log_artifact(self.text_file_path,
                             destination="dir/text file artifact")
        neptune.log_artifact(self.text_file_path,
                             destination="dir/artifact_to_delete")

        # deleting
        neptune.delete_artifacts("dir/artifact_to_delete")

        # streams
        with open(self.text_file_path, mode="r") as f:
            neptune.send_artifact(f, destination="file stream.txt")
                                   'search_hyperopt_tpe.py',
                                   'search_hyperopt_basic.py', 'utils.py'
                               ]):
    trials = Trials()
    _ = fmin(objective, SPACE, trials=trials, algo=tpe.suggest, **HPO_PARAMS)

    results = hpo_utils.hyperopt2skopt(trials, SPACE)

    best_auc = -1.0 * results.fun
    best_params = results.x

    # log metrics
    print('Best Validation AUC: {}'.format(best_auc))
    print('Best Params: {}'.format(best_params))

    neptune.send_metric('validation auc', best_auc)

    # log results
    joblib.dump(trials, 'artifacts/hyperopt_trials.pkl')
    joblib.dump(results, 'artifacts/hyperopt_results.pkl')
    joblib.dump(SPACE, 'artifacts/hyperopt_space.pkl')

    neptune.send_artifact('artifacts/hyperopt_trials.pkl')
    neptune.send_artifact('artifacts/hyperopt_results.pkl')
    neptune.send_artifact('artifacts/hyperopt_space.pkl')

    # log runs
    sk_utils.send_runs(results)
    sk_utils.send_best_parameters(results)
    sk_utils.send_plot_convergence(results, channel_name='diagnostics')
    sk_utils.send_plot_evaluations(results, channel_name='diagnostics')
コード例 #9
0
def main():
    config_path = args.conf
    initial_weights = args.weights

    with open(config_path) as config_buffer:
        config = json.loads(config_buffer.read())

    train_set, valid_set, classes = data.create_training_instances(config['train']['train_folder'],
                                                                   None,
                                                                   config['train']['cache_name'],
                                                                   config['model']['labels'])

    num_classes = len(classes)
    print('Readed {} classes: {}'.format(num_classes, classes))

    train_generator = gen.BatchGenerator(
        instances=train_set,
        labels=classes,
        batch_size=config['train']['batch_size'],
        input_sz=config['model']['infer_shape'],
        shuffle=True,
        norm=data.normalize
    )

    valid_generator = gen.BatchGenerator(
        instances=valid_set,
        labels=classes,
        batch_size=config['train']['batch_size'],
        input_sz=config['model']['infer_shape'],
        norm=data.normalize,
        infer=True
    )

    early_stop = EarlyStopping(
        monitor='val_loss',
        min_delta=0,
        patience=20,
        mode='min',
        verbose=1
    )

    reduce_on_plateau = ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=5,
        verbose=1,
        mode='min',
        min_delta=0.01,
        cooldown=0,
        min_lr=0
    )

    net_input_shape = (config['model']['infer_shape'][0],
                       config['model']['infer_shape'][1],
                       3)

    train_model = models.create(
        base_name=config['model']['base'],
        num_classes=num_classes,
        input_shape=net_input_shape)

    if initial_weights:
        train_model.load_weights(initial_weights)

    print(train_model.summary())
    # plot_model(train_model, to_file='images/MobileNetv2.png', show_shapes=True)

    optimizer = Adam(lr=config['train']['learning_rate'], clipnorm=0.001)

    train_model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

    checkpoint_name = utils.get_checkpoint_name(config)
    utils.makedirs_4_file(checkpoint_name)

    static_chk_name = utils.get_static_checkpoint_name(config)
    utils.makedirs_4_file(static_chk_name)

    checkpoint_vloss = cbs.CustomModelCheckpoint(
        model_to_save=train_model,
        filepath=checkpoint_name,
        monitor='val_loss',
        verbose=1,
        save_best_only=True,
        mode='min',
        period=1
    )
    
    neptune_mon = cbs.NeptuneMonitor(
        monitoring=['loss', 'val_loss', 'accuracy', 'val_accuracy'],
        neptune=neptune
    )

    chk_static = ModelCheckpoint(
        filepath=static_chk_name,
        monitor='val_loss',
        verbose=1,
        save_best_only=True,
        mode='min',
        period=1
    )

    callbacks = [early_stop, reduce_on_plateau, checkpoint_vloss, neptune_mon, chk_static]

    ### NEPTUNE ###
    sources_to_upload = [
        'models.py',
        'config.json'
    ]

    params = {
        'infer_size': "H{}xW{}".format(*config['model']['infer_shape']),
        'classes': config['model']['labels'],
    }

    neptune.create_experiment(
        name=utils.get_neptune_name(config),
        upload_stdout=False,
        upload_source_files=sources_to_upload,
        params=params
    )
    ### NEPTUNE ###
    
    hist = train_model.fit_generator(
        generator=train_generator,
        steps_per_epoch=len(train_generator) * config['train']['train_times'],

        validation_data=valid_generator,
        validation_steps=len(valid_generator) * config['valid']['valid_times'],

        epochs=config['train']['nb_epochs'],
        verbose=2 if config['train']['debug'] else 1,
        callbacks=callbacks,
        workers=multiprocessing.cpu_count(),
        max_queue_size=100
    )
    
    neptune.send_artifact(static_chk_name)
    neptune.send_artifact('config.json')
コード例 #10
0
                                **HPO_PARAMS)
    best_auc = -1.0 * results.fun
    best_params = results.x

    # log metrics
    print('Best Validation AUC: {}'.format(best_auc))
    print('Best Params: {}'.format(best_params))

    neptune.send_metric('validation auc', best_auc)
    neptune.set_property('best_params', str(to_named_params(best_params)))

    # log results
    skopt.dump(results, 'artifacts/gp_results.pkl')
    joblib.dump(SPACE, 'artifacts/gp_space.pkl')

    neptune.send_artifact('artifacts/gp_results.pkl')
    neptune.send_artifact('artifacts/gp_space.pkl')

    # log diagnostic plots
    fig, ax = plt.subplots(figsize=(16, 12))
    skopt.plots.plot_convergence(results, ax=ax)
    fig.savefig('plots/gp_convergence.png')

    neptune.send_image('diagnostics', 'plots/gp_convergence.png')

    axes = skopt.plots.plot_evaluations(results)
    fig = axes2fig(axes, figsize=(16, 12))
    fig.savefig('plots/gp_evaluations.png')

    neptune.send_image('diagnostics', 'plots/gp_evaluations.png')
コード例 #11
0
                                    **HPO_PARAMS)
    best_auc = -1.0 * results.fun
    best_params = results.x

    # log metrics
    print('Best Validation AUC: {}'.format(best_auc))
    print('Best Params: {}'.format(best_params))

    neptune.send_metric('validation auc', best_auc)
    neptune.set_property('best_params', str(to_named_params(best_params)))

    # log results
    skopt.dump(results, 'artifacts/forest_results.pkl')
    joblib.dump(SPACE, 'artifacts/forest_space.pkl')

    neptune.send_artifact('artifacts/forest_results.pkl')
    neptune.send_artifact('artifacts/forest_space.pkl')

    # log diagnostic plots
    fig, ax = plt.subplots(figsize=(16, 12))
    skopt.plots.plot_convergence(results, ax=ax)
    fig.savefig('plots/forest_convergence.png')

    neptune.send_image('diagnostics', 'plots/forest_convergence.png')

    axes = skopt.plots.plot_evaluations(results)
    fig = axes2fig(axes, figsize=(16, 12))
    fig.savefig('plots/forest_evaluations.png')

    neptune.send_image('diagnostics', 'plots/forest_evaluations.png')
コード例 #12
0
ファイル: train.py プロジェクト: KaiL4eK/neural_networks
def start_train(
    config,
    config_path,
    yolo_model: yolo.YOLO_Model,
    train_generator,
    valid_generator,
    dry_mode: bool
):
    print('Full training')

    ###############################
    #   Optimizers
    ###############################

    optimizers = {
        'sgd': opt.SGD(lr=config['train']['learning_rate']),
        'adam': opt.Adam(lr=config['train']['learning_rate']),
        'adamax': opt.Adamax(lr=config['train']['learning_rate']),
        'nadam': opt.Nadam(lr=config['train']['learning_rate']),
        'rmsprop': opt.RMSprop(lr=config['train']['learning_rate']),
        # 'Radam': RAdam(lr=config['train']['learning_rate'], warmup_proportion=0.1, min_lr=1e-5)
    }

    optimizer = optimizers[config['train']['optimizer'].lower()]

    if config['train']['clipnorm'] > 0:
        optimizer.clipnorm = config['train']['clipnorm']

    if config['train'].get('lr_decay', 0) > 0:
        optimizer.decay = config['train']['lr_decay']

    if config['train']['optimizer'] == 'Nadam':
        # Just to set field
        optimizer.decay = 0.0

    ###############################
    #   Callbacks
    ###############################

    checkpoint_name = utils.get_checkpoint_name(config)
    utils.makedirs_4_file(checkpoint_name)

    checkpoint_vloss = cbs.CustomModelCheckpoint(
        model_to_save=yolo_model.infer_model,
        filepath=checkpoint_name,
        monitor='val_loss',
        verbose=1,
        save_best_only=True,
        mode='min',
        period=1
    )

    # tensorboard_logdir = utils.get_tensorboard_name(config)
    # utils.makedirs(tensorboard_logdir)
    # print('Tensorboard dir: {}'.format(tensorboard_logdir))

    # tensorboard_cb = TensorBoard(
    #     log_dir=tensorboard_logdir,
    #     histogram_freq=0,
    #     write_graph=False
    # )

    mAP_checkpoint_name = utils.get_mAP_checkpoint_name(config)
    mAP_checkpoint_static_name = utils.get_mAP_checkpoint_static_name(config)
    utils.makedirs_4_file(mAP_checkpoint_name)
    map_evaluator_cb = cbs.MAP_evaluation(
        model=yolo_model,
        generator=valid_generator,
        save_best=True,
        save_name=mAP_checkpoint_name,
        save_static_name=mAP_checkpoint_static_name,
        # tensorboard=tensorboard_cb,
        neptune=neptune if not dry_mode else None
    )

    reduce_on_plateau = ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.4,
        patience=20,
        verbose=1,
        mode='min',
        min_delta=0,
        cooldown=10,
        min_lr=1e-8
    )

    early_stop = EarlyStopping(
        monitor='val_loss',
        min_delta=0,
        patience=80,
        mode='min',
        verbose=1
    )

    neptune_mon = cbs.NeptuneMonitor(
        monitoring=['loss', 'val_loss'],
        neptune=neptune
    )

    # logger_cb = cbs.CustomLogger(
    #     config=config,
    #     tensorboard=tensorboard_cb
    # )

    # fps_logger = cbs.FPSLogger(
    #     infer_model=yolo_model.infer_model,
    #     generator=valid_generator,
    #     infer_sz=config['model']['infer_shape'],
    #     tensorboard=tensorboard_cb
    # )

    callbacks = [
        # tensorboard_cb,
        map_evaluator_cb,
        # early_stop,
        reduce_on_plateau,
    ]

    ###############################
    #   Prepare fit
    ###############################

    if not dry_mode:
        callbacks.append(neptune_mon)

        with open('config.json', 'w') as f:
            json.dump(config, f, indent=4)

        sources_to_upload = [
            'yolo.py',
            '_common/backend.py',
            'config.json'
        ]

        params = {
            'base_params': str(config['model']['base_params']),
            'infer_size': "H{}xW{}".format(*config['model']['infer_shape']),
            'anchors_per_output': config['model']['anchors_per_output'],
            'anchors': str(config['model']['anchors'])
        }
        
        tags = [
            config['model']['base']
        ]

        logger.info('Tags: {}'.format(tags))
        
        neptune.create_experiment(
            name=utils.get_neptune_name(config),
            upload_stdout=False,
            upload_source_files=sources_to_upload,
            params=params,
            tags=tags
        )
    else:
        config['train']['nb_epochs'] = 10

    yolo_model.train_model.compile(loss=yolo.dummy_loss, optimizer=optimizer)
    yolo_model.train_model.fit_generator(
        generator=train_generator,
        steps_per_epoch=len(train_generator) * config['train']['train_times'],

        validation_data=valid_generator,
        validation_steps=len(valid_generator) * config['valid']['valid_times'],

        epochs=config['train']['nb_epochs'],
        verbose=1,
        callbacks=callbacks,
        workers=mp.cpu_count(),
        max_queue_size=100,
        use_multiprocessing=False
    )

    if not dry_mode:
        neptune.send_artifact(mAP_checkpoint_static_name)
        neptune.send_artifact('config.json')
コード例 #13
0
                                   }):

        results = skopt.forest_minimize(objective,
                                        SPACE,
                                        callback=[monitor],
                                        **HPO_PARAMS)

        best_auc = -1.0 * results.fun
        best_params = results.x

        neptune.send_metric('valid_auc', best_auc)
        neptune.set_property('best_params', str(to_named_params(best_params)))

        # log results
        skopt.dump(results, os.path.join(REPORTS_DIRPATH, 'skopt_results.pkl'))
        neptune.send_artifact(
            os.path.join(REPORTS_DIRPATH, 'skopt_results.pkl'))

        # log diagnostic plots
        fig, ax = plt.subplots(figsize=(16, 12))
        skopt.plots.plot_convergence(results, ax=ax)
        fig.savefig(os.path.join(REPORTS_DIRPATH, 'convergence.png'))
        neptune.send_image('diagnostics',
                           os.path.join(REPORTS_DIRPATH, 'convergence.png'))

        axes = skopt.plots.plot_evaluations(results)
        fig = plt.figure(figsize=(16, 12))
        fig = axes2fig(axes, fig)
        fig.savefig(os.path.join(REPORTS_DIRPATH, 'evaluations.png'))
        neptune.send_image('diagnostics',
                           os.path.join(REPORTS_DIRPATH, 'evaluations.png'))
コード例 #14
0
                                   **HPO_PARAMS)
    best_auc = -1.0 * results.fun
    best_params = results.x

    # log metrics
    print('Best Validation AUC: {}'.format(best_auc))
    print('Best Params: {}'.format(best_params))

    neptune.send_metric('validation auc', best_auc)
    neptune.set_property('best_params', str(to_named_params(best_params)))

    # log results
    skopt.dump(results, 'artifacts/random_results.pkl')
    joblib.dump(SPACE, 'artifacts/random_space.pkl')

    neptune.send_artifact('artifacts/random_results.pkl')
    neptune.send_artifact('artifacts/random_space.pkl')

    # log diagnostic plots
    fig, ax = plt.subplots(figsize=(16, 12))
    skopt.plots.plot_convergence(results, ax=ax)
    fig.savefig('plots/random_convergence.png')

    neptune.send_image('diagnostics', 'plots/random_convergence.png')

    axes = skopt.plots.plot_evaluations(results)
    fig = axes2fig(axes, figsize=(16, 12))
    fig.savefig('plots/random_evaluations.png')

    neptune.send_image('diagnostics', 'plots/random_evaluations.png')
コード例 #15
0
def main():
    print('loading data')
    train_features_path = os.path.join(
        FEATURES_DATA_PATH, 'train_features_' + FEATURE_NAME + '.csv')
    test_features_path = os.path.join(FEATURES_DATA_PATH,
                                      'test_features_' + FEATURE_NAME + '.csv')

    print('... train')
    train = pd.read_csv(train_features_path, nrows=TRAINING_PARAMS['nrows'])
    X = train.sort_values('TransactionDT').drop(
        ['isFraud', 'TransactionDT', 'TransactionID'], axis=1)
    y = train.sort_values('TransactionDT')['isFraud']
    train = train[["TransactionDT", 'TransactionID']]

    print('... test')
    test = pd.read_csv(test_features_path, nrows=TRAINING_PARAMS['nrows'])
    X_test = test.sort_values('TransactionDT').drop(
        ['TransactionDT', 'TransactionID'], axis=1)
    test = test[["TransactionDT", 'TransactionID']]

    folds = KFold(n_splits=VALIDATION_PARAMS['n_splits'],
                  random_state=VALIDATION_PARAMS['validation_seed'])

    hyperparams = {**MODEL_PARAMS, **TRAINING_PARAMS, **VALIDATION_PARAMS}

    print('starting experiment')
    with neptune.create_experiment(
            name='model training',
            params=hyperparams,
            upload_source_files=get_filepaths(),
            tags=[MODEL_NAME, 'features_'.format(FEATURE_NAME), 'training']):
        print('logging data version')
        log_data_version(train_features_path, prefix='train_features_')
        log_data_version(test_features_path, prefix='test_features_')

        print('training')
        in_fold, out_of_fold, test_preds = fit_predict(X, y, X_test, folds,
                                                       MODEL_PARAMS,
                                                       TRAINING_PARAMS)

        print('logging metrics')
        train_auc, valid_auc = roc_auc_score(y, in_fold), roc_auc_score(
            y, out_of_fold)
        neptune.send_metric('train_auc', train_auc)
        neptune.send_metric('valid_auc', valid_auc)
        send_binary_classification_report(
            y,
            fmt_preds(out_of_fold),
            channel_name='valid_classification_report')

        print('postprocessing predictions')
        train_predictions_path = os.path.join(
            PREDICTION_DATA_PATH,
            'train_prediction_{}_{}.csv'.format(FEATURE_NAME, MODEL_NAME))
        test_predictions_path = os.path.join(
            PREDICTION_DATA_PATH,
            'test_prediction_{}_{}.csv'.format(FEATURE_NAME, MODEL_NAME))
        submission_path = os.path.join(
            PREDICTION_DATA_PATH,
            'submission_{}_{}.csv'.format(FEATURE_NAME, MODEL_NAME))
        submission = pd.read_csv(SAMPLE_SUBMISSION_PATH)

        train = pd.concat(
            [train, pd.DataFrame(out_of_fold, columns=['prediction'])], axis=1)
        test = pd.concat(
            [test, pd.DataFrame(test_preds, columns=['prediction'])], axis=1)
        submission['isFraud'] = pd.merge(submission, test,
                                         on='TransactionID')['prediction']
        train.to_csv(train_predictions_path, index=None)
        test.to_csv(test_predictions_path, index=None)
        submission.to_csv(submission_path, index=None)
        neptune.send_artifact(train_predictions_path)
        neptune.send_artifact(test_predictions_path)
        neptune.send_artifact(submission_path)
        print('experiment finished')
コード例 #16
0
                                dev_tensor_attention_masks,
                                dev_tensor_token_type_ids, dev_list_span_idx)
        metrics = validation.do_validation()
        model.train()
        record_eval_metric(neptune, metrics)

        curr_f1 = metrics['joint_f1']
        if curr_f1 > best_eval_f1:
            best_eval_f1 = curr_f1
            model.save_pretrained(model_path)

    # Calculate the average loss over all of the batches.
    avg_train_loss = total_train_loss / len(train_dataloader)

    # Measure how long this epoch took.
    training_time = format_time(time.time() - t0)

    print("")
    print("  Average training loss: {0:.2f}".format(avg_train_loss))
    print("  Training epoch took: {:}".format(training_time))

    print("")
    print("Training complete!")

    print("Total training took {:} (h:mm:ss)".format(
        format_time(time.time() - total_t0)))
    # create a zip file for the folder of the model
    zipdir(model_path, os.path.join(model_path, 'checkpoint.zip'))
    # upload the model to neptune
    neptune.send_artifact(os.path.join(model_path, 'checkpoint.zip'))
コード例 #17
0
ファイル: saving_utils.py プロジェクト: mikipacman/retro-rl
def save_exp_params(params):
    with tempfile.TemporaryDirectory(dir="/tmp") as temp:
        path_to_pickle = os.path.join(temp, params_pickle_name)
        pickle.dump(params, open(path_to_pickle, "wb"))
        neptune.send_artifact(path_to_pickle, params_pickle_name)
コード例 #18
0
def main():
    neptune.init(api_token=os.getenv('NEPTUNE_API_TOKEN'),
                 project_qualified_name=os.getenv('NEPTUNE_PROJECT'))

    train_idx = pd.read_csv(TRAIN_IDX_PATH, nrows=NROWS)
    valid_idx = pd.read_csv(VALID_IDX_PATH, nrows=NROWS)
    features = pd.read_csv(FEATURES_PATH, nrows=NROWS)

    train = pd.merge(train_idx, features, on='SK_ID_CURR')
    valid = pd.merge(valid_idx, features, on='SK_ID_CURR')

    all_params = {
        'num_boost_round': NUM_BOOST_ROUND,
        'early_stopping_rounds': EARLY_STOPPING_ROUNDS,
        **LGBM_PARAMS
    }

    with neptune.create_experiment(name='model training',
                                   params=all_params,
                                   tags=['lgbm'],
                                   upload_source_files=get_filepaths(),
                                   properties={
                                       'features_path':
                                       FEATURES_PATH,
                                       'features_version':
                                       md5_hash(FEATURES_PATH),
                                       'train_split_version':
                                       md5_hash(TRAIN_IDX_PATH),
                                       'valid_split_version':
                                       md5_hash(VALID_IDX_PATH),
                                   }):
        results = train_evaluate(train,
                                 valid,
                                 LGBM_PARAMS,
                                 callbacks=[neptune_monitor()])
        train_score, valid_score = results['train_score'], results[
            'valid_score']
        train_preds, valid_preds = results['train_preds'], results[
            'valid_preds']

        neptune.send_metric('train_auc', train_score)
        neptune.send_metric('valid_auc', valid_score)

        train_pred_path = os.path.join(PREDICTION_DIRPATH, 'train_preds.csv')
        train_preds.to_csv(train_pred_path, index=None)
        neptune.send_artifact(train_pred_path)

        valid_pred_path = os.path.join(PREDICTION_DIRPATH, 'valid_preds.csv')
        valid_preds.to_csv(valid_pred_path, index=None)
        neptune.send_artifact(valid_pred_path)

        model_path = os.path.join(MODEL_DIRPATH, 'model.pkl')
        joblib.dump(results['model'], model_path)
        neptune.set_property('model_path', model_path)
        neptune.set_property('model_version', md5_hash(model_path))
        neptune.send_artifact(model_path)

        if PACKAGE_TO_PROD:
            saved_path = CreditDefaultClassifier.pack(
                model=results['model']).save(PRODUCTION_DIRPATH)
            neptune.set_property('production_model_path', saved_path)

        fig, ax = plt.subplots(figsize=(16, 12))
        sk_metrics.plot_confusion_matrix(valid_preds['TARGET'],
                                         valid_preds['preds_pos'] > 0.5,
                                         ax=ax)
        plot_path = os.path.join(REPORTS_DIRPATH, 'conf_matrix.png')
        fig.savefig(plot_path)
        neptune.send_image('diagnostics', plot_path)

        fig, ax = plt.subplots(figsize=(16, 12))
        sk_metrics.plot_roc(valid_preds['TARGET'],
                            valid_preds[['preds_neg', 'preds_pos']],
                            ax=ax)
        plot_path = os.path.join(REPORTS_DIRPATH, 'roc_auc.png')
        fig.savefig(plot_path)
        neptune.send_image('diagnostics', plot_path)

        fig, ax = plt.subplots(figsize=(16, 12))
        sk_metrics.plot_precision_recall(
            valid_preds['TARGET'],
            valid_preds[['preds_neg', 'preds_pos']],
            ax=ax)
        plot_path = os.path.join(REPORTS_DIRPATH, 'prec_recall.png')
        fig.savefig(plot_path)
        neptune.send_image('diagnostics', plot_path)

        fig, ax = plt.subplots(figsize=(16, 12))
        plot_prediction_distribution(valid_preds['TARGET'],
                                     valid_preds['preds_pos'],
                                     ax=ax)
        plot_path = os.path.join(REPORTS_DIRPATH, 'preds_dist.png')
        fig.savefig(plot_path)
        neptune.send_image('diagnostics', plot_path)