Esempio n. 1
0
def metrics(model=None,
            params=False,
            sort=None,
            descending=False,
            output=None):
    """Show metrics from the metrics database

    Args:
        model (list of str): Model ids
        params (bool): Show model parameters if True
        sort (str): Column name on which to sort
        descending (bool): Sort in descending order
    """
    config = dg.Config()
    model = model or config.models.keys()
    if params and len(model) > 1:
        print('Params can be shown only for one model')
        return
    db = Database()
    all = OrderedDict()
    for model_id, timestamp, model_params, model_metrics in db.metrics(model):
        all.setdefault('model', []).append(model_id)
        all.setdefault('timestamp',
                       []).append(timestamp.strftime('%Y.%m.%d %H:%M:%S'))
        if params:
            for param, value in model_params.items():
                all.setdefault(param, []).append(value)
        for key, metrics_data in model_metrics.items():
            if metrics_data is not None:
                for m, value in metrics_data.items():
                    all.setdefault(f'{key}-{m}', []).append(value)
    df = pd.DataFrame(all)
    if sort:
        df.sort_values(sort, ascending=not descending, inplace=True)
    print_and_save_df(df, output=output)
Esempio n. 2
0
def train_and_evaluate(models, datasets, silent=False):
    """Train end evaluate models and print out the metrics for evaluation

    Args:
        models (list of str): Model names. Pass if you want to train/evaluate
            just a set of particular models
        datasets (list of dg.enums.Dataset): List of datasets used for
            evaluation.
        silent (bool): Don't print details to standard out.

    """
    config = dg.Config()
    all_metrics = []
    bar(silent=silent)
    for model_id in models:
        model = config.models[model_id].set_params(
            **config.get_params(model_id))
        dss = config.get_datasets(model.id)
        train_model(model,
                    train_set=dss[Dataset.TRAIN.value],
                    eval_set=dss[Dataset.EVAL.value],
                    save=False,
                    silent=silent)
        all_metrics.append(evaluate_model(model, datasets, silent=silent))
        bar(silent=silent)

    df = pd.DataFrame(all_metrics, columns=columns())
    return df
Esempio n. 3
0
def models(params=False):
    """Lists all models with some additional info"""
    config = dg.Config()
    if len(config.models) == 0:
        return

    longest = max(map(len, config.models.keys()))

    for model_id, model in config.models.items():
        spaces = ' ' * (longest - len(model_id) + 15)
        if model.__doc__ is not None:
            doc = model.__doc__.splitlines()[0]
        else:
            doc = model.__class__.__name__
        cprint(f'{model_id}:{spaces}[blue]{doc}[normal]\n', parse=True)
        if params:
            indent = len(model_id) + len(spaces) + 1
            width = 50 + indent
            wrapper = TextWrapper(width=width,
                                  initial_indent=' ' * indent,
                                  subsequent_indent=' ' * indent,
                                  break_long_words=False,
                                  replace_whitespace=True,
                                  break_on_hyphens=False)

            text = wrapper.fill(', '.join(model.get_params().keys()))
            cprint(f'[cyan]{text}[normal]\n', parse=True)
Esempio n. 4
0
def serve():
    """Serve models"""
    config = dg.Config()

    server_klass = config.get('server.class', None)
    if server_klass:
        server = get_object(server_klass)()
    else:
        server = dg.Server()
    server.run()
Esempio n. 5
0
def grid(model, test_only=False, output=None, silent=False):
    """Implement grid search for model

    Args:
        model (str): Model name for which we want to do a grid search.
        test_only (bool): Evaluate only on test data
        output (str): Path to the output csv file
        silent (bool): Don't print details to standard out.
    """
    import pandas as pd

    config = dg.Config()
    model = config.models[model]
    grid_params = config[f'grid.{model.id}']
    datasets = config.get_datasets(model.id)
    if grid_params is None:
        print('Grid is not defined for this model')
        return

    grid = create_grid(config[f'models.{model}'], grid_params)
    if len(grid) == 0:
        print('Grid is not defined for this model')
        return

    metrics = []
    param_cols = set()
    bar(silent=silent)
    for i, params in enumerate(grid, 1):
        start = datetime.now()
        if not silent:
            print(f'{i} out of {len(grid)}')
            print(f'Params: {params}')
        param_cols.update(params.keys())
        model.set_params(**params)
        train_model(model,
                    train_set=datasets[Dataset.TRAIN.value],
                    eval_set=datasets[Dataset.EVAL.value])
        m = evaluate_model(
            model,
            datasets=[Dataset.TEST] if test_only else Dataset.for_eval(),
            silent=silent)
        m.update(params)
        metrics.append(m)
        cols = ['model'] + sorted(param_cols) + columns()[1:]
        df = pd.DataFrame([m], columns=cols)
        print_and_save_df(df)
        diff = datetime.now() - start
        total_seconds = diff.total_seconds()
        print('Duration: {:.0f}:{:.0f}:{:.0f}'.format(
            total_seconds // 3600, (total_seconds % 3600) // 60,
            total_seconds % 60))
        bar(silent=silent)

    df = pd.DataFrame(metrics, columns=cols)
    print_and_save_df(df, output)
Esempio n. 6
0
def shell():
    """Run IPython shell with loaded configuration and model classes
    """
    from IPython import embed

    config = dg.Config()
    user_ns = {'config': config}
    models = {
        model.__class__.__name__: model
        for model in config.models.values()
    }
    user_ns.update(models)
    embed(user_ns=user_ns)
Esempio n. 7
0
    def __init__(self):
        self.config = dg.Config()

        # Load models
        self.models = {
            model: persistence.load(self.config.models[model])
            for model in self.config['server.models']
        }

        # Create server and setup routes
        self.server = Sanic()
        self.server.add_route(self.reload, '/reload/',
                              methods=['POST'])
Esempio n. 8
0
def train(models=None, production=False, silent=True):
    """Train all model for production and save them

    Args:
        models (list of str): Model names. Pass if you want to train a just a
            set particular models,
        production (bool): Train for production or for evaluation.
        silent (bool): Don't print details to standard out.
    """
    config = dg.Config()
    models = models or config.models.keys()
    train_eval.train(
        models,
        train_set=dg.Dataset.FULL if production else dg.Dataset.TRAIN,
        eval_set=None if production else dg.Dataset.EVAL,
        silent=silent)
Esempio n. 9
0
File: model.py Progetto: alefnula/dg
def load(model, model_dir=None):
    """Load the model

    Args:
        model (dg.models.Model): Model class or instance of the model
        model_dir (str): If `model_dir` is provided, loads the model from
            the model dir, else loads the production model.
    Returns:
        Estimator: Returns the estimator loaded from the save point
    """
    model_dir = model_dir or dg.Config().get_model_dir(production=True)
    model_dir = os.path.join(model_dir, model.id)
    if hasattr(model, 'load'):
        return model.load(model_dir)
    else:
        model_file = os.path.join(model_dir, f'{model.id}.pickle')
        with io.open(model_file, 'rb') as f:
            return joblib.load(f)
Esempio n. 10
0
def evaluate(models=None, test_only=False, output=None, silent=False):
    """Evaluate all models and print out the metrics for evaluation.

    Evaluation is using the production model.

    Args:
        models (list of str): Model names. Pass if you want to evaluate just a
            set of particular models.
        test_only (bool): Evaluate only on test data
        output (str): Path to the output csv file
        silent (bool): Don't print details to standard out.
    """
    config = dg.Config()
    models = models or config.models.keys()
    df = train_eval.evaluate(
        models,
        datasets=[dg.Dataset.TEST] if test_only else dg.Dataset.for_eval(),
        silent=silent)
    print_and_save_df(df, output=output)
Esempio n. 11
0
def deploy(models=None, silent=False):
    """Deploy the latest model to production

    Args:
        models (list of str): Names of the models we want to deploy
        silent (bool): Don't print details to standard out.
    """
    config = dg.Config()
    production_dir = config.get_model_dir(production=True)
    models_dir = os.path.dirname(production_dir)

    models = models or config.models.keys()

    files = [
        os.path.basename(x) for x in
        glob.glob(os.path.join(models_dir, '*'))
        # Remove production and tensorflow from the list
        if os.path.basename(x) not in (
            'production', 'tensorflow', 'metrics.db'
        )
    ]

    latest = os.path.join(models_dir, sorted(
        files, key=lambda x: datetime.strptime(x[:19], '%Y.%m.%dT%H:%M:%S')
    )[-1])

    ensure_dir(production_dir, directory=True)

    bar(silent=silent)
    for model in models:
        if not silent:
            print('Deploying model:', model)
        source = os.path.join(latest, model)
        # If the model is trained in the latest training batch
        if os.path.isdir(source):
            destination = os.path.join(production_dir, model)
            if os.path.isdir(destination):
                shutil.rmtree(destination)
            shutil.copytree(source, destination)
        bar(silent=silent)
Esempio n. 12
0
 def __init__(self):
     self.config = dg.Config()
     self._create()