def metrics(model=None, params=False, sort=None, descending=False, output=None): """Show metrics from the metrics database Args: model (list of str): Model ids params (bool): Show model parameters if True sort (str): Column name on which to sort descending (bool): Sort in descending order """ config = dg.Config() model = model or config.models.keys() if params and len(model) > 1: print('Params can be shown only for one model') return db = Database() all = OrderedDict() for model_id, timestamp, model_params, model_metrics in db.metrics(model): all.setdefault('model', []).append(model_id) all.setdefault('timestamp', []).append(timestamp.strftime('%Y.%m.%d %H:%M:%S')) if params: for param, value in model_params.items(): all.setdefault(param, []).append(value) for key, metrics_data in model_metrics.items(): if metrics_data is not None: for m, value in metrics_data.items(): all.setdefault(f'{key}-{m}', []).append(value) df = pd.DataFrame(all) if sort: df.sort_values(sort, ascending=not descending, inplace=True) print_and_save_df(df, output=output)
def train_and_evaluate(models, datasets, silent=False): """Train end evaluate models and print out the metrics for evaluation Args: models (list of str): Model names. Pass if you want to train/evaluate just a set of particular models datasets (list of dg.enums.Dataset): List of datasets used for evaluation. silent (bool): Don't print details to standard out. """ config = dg.Config() all_metrics = [] bar(silent=silent) for model_id in models: model = config.models[model_id].set_params( **config.get_params(model_id)) dss = config.get_datasets(model.id) train_model(model, train_set=dss[Dataset.TRAIN.value], eval_set=dss[Dataset.EVAL.value], save=False, silent=silent) all_metrics.append(evaluate_model(model, datasets, silent=silent)) bar(silent=silent) df = pd.DataFrame(all_metrics, columns=columns()) return df
def models(params=False): """Lists all models with some additional info""" config = dg.Config() if len(config.models) == 0: return longest = max(map(len, config.models.keys())) for model_id, model in config.models.items(): spaces = ' ' * (longest - len(model_id) + 15) if model.__doc__ is not None: doc = model.__doc__.splitlines()[0] else: doc = model.__class__.__name__ cprint(f'{model_id}:{spaces}[blue]{doc}[normal]\n', parse=True) if params: indent = len(model_id) + len(spaces) + 1 width = 50 + indent wrapper = TextWrapper(width=width, initial_indent=' ' * indent, subsequent_indent=' ' * indent, break_long_words=False, replace_whitespace=True, break_on_hyphens=False) text = wrapper.fill(', '.join(model.get_params().keys())) cprint(f'[cyan]{text}[normal]\n', parse=True)
def serve(): """Serve models""" config = dg.Config() server_klass = config.get('server.class', None) if server_klass: server = get_object(server_klass)() else: server = dg.Server() server.run()
def grid(model, test_only=False, output=None, silent=False): """Implement grid search for model Args: model (str): Model name for which we want to do a grid search. test_only (bool): Evaluate only on test data output (str): Path to the output csv file silent (bool): Don't print details to standard out. """ import pandas as pd config = dg.Config() model = config.models[model] grid_params = config[f'grid.{model.id}'] datasets = config.get_datasets(model.id) if grid_params is None: print('Grid is not defined for this model') return grid = create_grid(config[f'models.{model}'], grid_params) if len(grid) == 0: print('Grid is not defined for this model') return metrics = [] param_cols = set() bar(silent=silent) for i, params in enumerate(grid, 1): start = datetime.now() if not silent: print(f'{i} out of {len(grid)}') print(f'Params: {params}') param_cols.update(params.keys()) model.set_params(**params) train_model(model, train_set=datasets[Dataset.TRAIN.value], eval_set=datasets[Dataset.EVAL.value]) m = evaluate_model( model, datasets=[Dataset.TEST] if test_only else Dataset.for_eval(), silent=silent) m.update(params) metrics.append(m) cols = ['model'] + sorted(param_cols) + columns()[1:] df = pd.DataFrame([m], columns=cols) print_and_save_df(df) diff = datetime.now() - start total_seconds = diff.total_seconds() print('Duration: {:.0f}:{:.0f}:{:.0f}'.format( total_seconds // 3600, (total_seconds % 3600) // 60, total_seconds % 60)) bar(silent=silent) df = pd.DataFrame(metrics, columns=cols) print_and_save_df(df, output)
def shell(): """Run IPython shell with loaded configuration and model classes """ from IPython import embed config = dg.Config() user_ns = {'config': config} models = { model.__class__.__name__: model for model in config.models.values() } user_ns.update(models) embed(user_ns=user_ns)
def __init__(self): self.config = dg.Config() # Load models self.models = { model: persistence.load(self.config.models[model]) for model in self.config['server.models'] } # Create server and setup routes self.server = Sanic() self.server.add_route(self.reload, '/reload/', methods=['POST'])
def train(models=None, production=False, silent=True): """Train all model for production and save them Args: models (list of str): Model names. Pass if you want to train a just a set particular models, production (bool): Train for production or for evaluation. silent (bool): Don't print details to standard out. """ config = dg.Config() models = models or config.models.keys() train_eval.train( models, train_set=dg.Dataset.FULL if production else dg.Dataset.TRAIN, eval_set=None if production else dg.Dataset.EVAL, silent=silent)
def load(model, model_dir=None): """Load the model Args: model (dg.models.Model): Model class or instance of the model model_dir (str): If `model_dir` is provided, loads the model from the model dir, else loads the production model. Returns: Estimator: Returns the estimator loaded from the save point """ model_dir = model_dir or dg.Config().get_model_dir(production=True) model_dir = os.path.join(model_dir, model.id) if hasattr(model, 'load'): return model.load(model_dir) else: model_file = os.path.join(model_dir, f'{model.id}.pickle') with io.open(model_file, 'rb') as f: return joblib.load(f)
def evaluate(models=None, test_only=False, output=None, silent=False): """Evaluate all models and print out the metrics for evaluation. Evaluation is using the production model. Args: models (list of str): Model names. Pass if you want to evaluate just a set of particular models. test_only (bool): Evaluate only on test data output (str): Path to the output csv file silent (bool): Don't print details to standard out. """ config = dg.Config() models = models or config.models.keys() df = train_eval.evaluate( models, datasets=[dg.Dataset.TEST] if test_only else dg.Dataset.for_eval(), silent=silent) print_and_save_df(df, output=output)
def deploy(models=None, silent=False): """Deploy the latest model to production Args: models (list of str): Names of the models we want to deploy silent (bool): Don't print details to standard out. """ config = dg.Config() production_dir = config.get_model_dir(production=True) models_dir = os.path.dirname(production_dir) models = models or config.models.keys() files = [ os.path.basename(x) for x in glob.glob(os.path.join(models_dir, '*')) # Remove production and tensorflow from the list if os.path.basename(x) not in ( 'production', 'tensorflow', 'metrics.db' ) ] latest = os.path.join(models_dir, sorted( files, key=lambda x: datetime.strptime(x[:19], '%Y.%m.%dT%H:%M:%S') )[-1]) ensure_dir(production_dir, directory=True) bar(silent=silent) for model in models: if not silent: print('Deploying model:', model) source = os.path.join(latest, model) # If the model is trained in the latest training batch if os.path.isdir(source): destination = os.path.join(production_dir, model) if os.path.isdir(destination): shutil.rmtree(destination) shutil.copytree(source, destination) bar(silent=silent)
def __init__(self): self.config = dg.Config() self._create()