Ejemplo n.º 1
0
                # Resolver global, by value, by id.
                ('log', {'key': None}),
                ('log', {'key_id': None}),
                # Resolver conf, by value, by id.
                ('log2', {'x_id': None}),
                ('log2', {'x': None}),
            ],
            'priority': 2,
        }
    },
}


if __name__ == '__main__':
    # Execute configuration(s).
    objects = pycnfg.run(CNFG, resolve_none=True)
    # => 42
    # => 42
    # => 42
    # => 42
    # => 42
    # => 42

    # Storage for produced object(s).
    print(objects)
    # => {'logger__default': <Logger default (INFO)>,
    #     'path__default': 'pycnfg/examples/complex',
    #     'x__1': 'a',
    #     'x__2': 'c',
    #     'y__conf': {'b': 2, 'c': 42, 'print': 252}}
Ejemplo n.º 2
0
        }
    },
    'key': {
        'conf': {
            'init': 'b',
        },
    },
    'val': {
        'conf': {
            'init': 24,
        },
        'conf2': {
            'init': '42',
        }
    },
}

if __name__ == '__main__':
    for cnfg in [CNFG_1, CNFG_2, CNFG_3, CNFG_4]:
        # Execute configuration(s).
        objects = pycnfg.run(cnfg,
                             dcnfg={},
                             update_expl=True,
                             resolve_none=True)
        # => 42

        # Storage for produced object(s).
        print(objects['section_id__configuration_id'])
        # => {'a': 7, 'b': 42}
        print('=' * 79)
Ejemplo n.º 3
0
                        'return_train_score':
                        True,
                    },
                }),
                # Validate 'sgd' pipeline on 'train' and 'test' subsets of
                # 'train' dataset with 'r2' scorer (after optimization).
                ('validate', {
                    'pipeline_id': 'pipeline__sgd',
                    'dataset_id': 'dataset__train',
                    'subset_id': ['train', 'test'],
                    'metric_id': ['metric__r2', 'metric__mse'],
                }),
                # Predict with 'sgd' pipeline on whole 'test' dataset.
                ('predict', {
                    'pipeline_id': 'pipeline__sgd',
                    'dataset_id': 'dataset__test',
                    'subset_id': '',
                }),
                # Dump 'sgd' pipeline on disk.
                ('dump', {
                    'pipeline_id': 'pipeline__sgd',
                    'dirpath': None
                }),
            ],
        },
    },
}

if __name__ == '__main__':
    objects = pycnfg.run(CNFG, dcnfg={})
Ejemplo n.º 4
0
        },
    },
    # Separate section for 'gs_params' kwarg.
    'gs_params': {
        'conf': {
            'priority': 3,
            'init': {
                'n_iter':
                None,
                'n_jobs':
                1,
                'refit':
                'metric__r2',
                'cv':
                sklearn.model_selection.KFold(n_splits=3,
                                              shuffle=True,
                                              random_state=42),
                'verbose':
                1,
                'pre_dispatch':
                'n_jobs',
                'return_train_score':
                True,
            },
        },
    },
}

if __name__ == '__main__':
    objects = pycnfg.run(CNFG, dcnfg=mlshell.CNFG)
Ejemplo n.º 5
0
    'y': {
        'conf': {
            'init': {'b': 2, 'c': 42},
            'producer': CustomProducer,
            'steps': [
                ('__init__', {'path_id': 'path__default',
                              'logger_id': 'logger__default'}),
                ('set', {'key': 'x__1', 'val': 7}),
                ('log', {'key': 'x__2'}),
                ('log', {'key_id': 'x__2'}),
            ],
            'priority': 2,
        }
    },
}


if __name__ == '__main__':
    # Execute configuration(s).
    objects = pycnfg.run(CNFG)
    # => 42
    # => 42

    # Storage for produced object(s).
    print(objects)
    # => {'logger__default': <Logger default (INFO)>,
    #     'path__default': 'pycnfg/examples/complex',
    #     'x__1': 'a',
    #     'x__2': 'c',
    #     'y__conf': {'b': 2, 'c': 42, 'a': 7}}
Ejemplo n.º 6
0
def test_run(id_, args, kwargs, expected):
    """
    - Delete previous test output if exist.
    - Start mlshell.run.py.
    - Check current output with original.

    """
    # Remove results for test.
    results_path = expected['results_path']
    shutil.rmtree(results_path, ignore_errors=True)
    # Start code.
    # [future] attempts to run classification with n_jobs>1
    # global cnfg_default
    # sys.modules['cnfg_default'] = get_params(args[0], 'cnfg_default')
    # import cnfg_default
    # #from cnfg_default import custom_score_metric
    objects = pycnfg.run(oid='default', *args, **kwargs)
    tmp = {k: type(v).__name__ for k, v in objects.items()}
    print('OBJECTS:')
    print(tmp)
    # Compare results:
    # * Compare objects (keys and str of values).
    objects_ = expected['objects']
    objects = {k: type(v).__name__ for k, v in objects.items()}
    if objects != objects_:
        print(set(objects.items()) ^ set(objects_.items()))
    assert objects == objects_
    # for k, v in objects.items():
    #     assert k in objects_
    #     assert type(v).__name__ == objects_[k]
    # * Compare predictions csv(all available).
    pred_path = glob.glob(f"{results_path}/models/*_pred.csv")
    pred_path_ = glob.glob(expected['pred_path'])
    assert len(pred_path) == len(pred_path_)
    for act, exp in zip(sorted(pred_path), sorted(pred_path_)):
        file_diff(act, exp)
        assert filecmp.cmp(act, exp)
    # * Compare test logs.
    logs_path = glob.glob(f"{results_path}/logs*/*_test.log")[0]
    logs_path_ = expected['logs_path']
    file_diff(logs_path, logs_path_)
    assert filecmp.cmp(logs_path, logs_path_)
    # * Compare runs dataframe, non-universe columns.
    runs_path = f"{results_path}/runs"
    runs_path_ = expected['runs_path']
    df = runs_loader(runs_path)
    df_ = runs_loader(runs_path_)
    # First False/True for each element, then check all by columns.
    # col1     True
    # col2    False
    # dtype: bool
    df_diff = df.eq(df_).all()
    # Column names that are not equal.
    columns = sorted(list(df_diff[df_diff == False].dropna().index))
    # columns_eq = sorted(list(df_diff[df_diff==True].dropna().index))
    columns_ = expected['columns_diff']
    print('DIFF:\n', columns)
    time.sleep(1)
    # assert columns == columns_
    # * Compare model.
    model_path = glob.glob(f"{results_path}/models/*.model")
    model_path_ = glob.glob(expected['model_path'])
    assert len(model_path) == len(model_path_)
    for act, exp in zip(sorted(model_path), sorted(model_path_)):
        assert filecmp.cmp(act, exp)
    return


# Otherwise can`t pickle when n_jobs>1, need import to address scope.
# custom_score_metric = get_params('classification/conf.py',
#                                  'custom_score_metric')
# sys.modules[f'custom_score_metric'] = custom_score_metric
Ejemplo n.º 7
0
def test_run(id_, args, kwargs, expected):
    objects = pycnfg.run(*args, **kwargs)
    assert objects == expected, f"test={id_}"