def lab_show(experiment_id=None): """ Show a Lab Experiment """ is_lab_project() is_empty_project() models_directory = 'experiments' experiments = next(os.walk(models_directory))[1] if experiment_id is None: experiments = next(os.walk('experiments'))[1] p = graphviz.Digraph(name='lab_project', format='png') p.graph_attr['rankdir'] = 'LR' for e in experiments: p.subgraph(show_experiment(e)) else: experiment_dir = os.path.join('experiments', experiment_id) if not os.path.exists(experiment_dir): click.secho("Can't find experiment [" + experiment_id + '] in the current ' 'directory.\nEnsure that you are in Lab Project root', fg='red') click.Abort() else: p = show_experiment(experiment_id) p.render()
def lab_rm(experiment_id): """ Remove a Lab Experiment """ is_lab_project() experiment_dir = os.path.join('experiments', experiment_id) logs_dir = os.path.join('logs', experiment_id) if not os.path.exists(experiment_dir): click.secho("Can't find experiment [" + experiment_id + '] in the current ' 'directory.\nEnsure that you are in Lab Project root', fg='red') else: shutil.rmtree(experiment_dir) shutil.rmtree(logs_dir) click.secho('[' + experiment_id + '] removed', fg='blue')
def lab_notebook(): """ Publish Lab project as a jupyter kernel """ is_lab_project() with open(os.path.join(os.getcwd(), 'config', 'runtime.yaml'), 'r') as file: config = yaml.load(file) project_name = config['name'] + '_' +\ ''.join(e for e in config['timestamp'] if e.isalnum()) click.secho('Generating jupyter kernel for ' + config['name'] + '...', fg='cyan') try: _install_jupyter_kernel(project_name) click.secho('Kernel generated: ' + project_name) except Exception as e: print(e) click.secho('Failed to generate kernel.', fg='red')
def lab_run(script): """ Run a training script """ home_dir = os.getcwd() is_lab_project() is_venv(home_dir) try: with open(os.path.join(os.getcwd(), 'config', 'runtime.yaml'), 'r') as file: config = yaml.load(file) home_dir = config['path'] # Update project directory if it hasn't been updated if home_dir != os.getcwd(): config['path'] = os.getcwd() home_dir = config['path'] with open(os.path.join(os.getcwd(), 'config', 'runtime.yaml'), 'w') as file: yaml.dump(config, file, default_flow_style=False) except KeyError: click.secho( 'Looks like this Project was configured with an earlier ' 'version of Lab. Check that config/runtime.yaml file ' 'has a valid path key and value.', fg='red') raise click.Abort() # Extract lab version from virtual environment click.secho('Intializing', fg='cyan') python_bin = os.path.join(home_dir, '.venv', 'bin/python') click.secho('Running ' + str(script), fg='green') subprocess.call([python_bin] + list(script)) click.secho('Finished!', fg='green')
def lab_ls(sort_by=None): """ Compare multiple Lab Experiments """ models_directory = 'experiments' logs_directory = 'logs' TICK = '█' is_lab_project() is_empty_project() experiments = next(os.walk(models_directory))[1] comparisons = [] # Get unique metric names metrics_names = [] for e in experiments: metrics_file = os.path.join(models_directory, e, 'metrics.yaml') with open(metrics_file, 'r') as file: metrics = yaml.load(file) metrics_names.append(list(metrics.keys())) metrics_names = list(set(metrics_names[0]).intersection(*metrics_names)) # Get all experiments for e in experiments: metrics_file = os.path.join(models_directory, e, 'metrics.yaml') try: with open(metrics_file, 'r') as file: metrics = yaml.load(file) for k, v in metrics.items(): metrics[k] = round(v, 2) metrics = {k: metrics[k] for k in metrics_names} metrics_list = list(metrics.values()) meta_file = os.path.join(logs_directory, e, 'meta.yaml') with open(meta_file, 'r') as file: meta = yaml.load(file) # Truncate source name if too long source_name = meta['source'] meta['source'] = (source_name[:20] + '..') if len(source_name) > 20 else source_name record = [meta['experiment_uuid'], meta['source'], meta['start_time'].strftime("%m/%d/%Y, %H:%M:%S")] + \ metrics_list comparisons.append(record) except FileNotFoundError: pass # Create visualisation of numeric metrics A = pd.DataFrame(comparisons) meta_data = A[[0, 1, 2]] metrics_data = A.drop([0, 1, 2], axis=1) row_max = metrics_data.abs().max(axis=0) scaled_metrics_data = metrics_data.abs().divide(row_max, axis=1) scaled_metrics_data = scaled_metrics_data.fillna(value=0) sparklines = np.empty(shape=metrics_data.shape, dtype=object) for row in range(metrics_data.shape[0]): for column in range(metrics_data.shape[1]): value = metrics_data.iloc[row, column] scaled_value = scaled_metrics_data.iloc[row, column] scaled_value = scaled_value spark = (format(value, '.2f') + ': ' + TICK * int(round(scaled_value * 10))) sparklines[row, column] = spark result = pd.concat([meta_data, pd.DataFrame(sparklines)], axis=1) result.columns = (['Experiment', 'Source', 'Date'] + list(metrics.keys())) result.sort_values(by=['Date'], axis=0, ascending=False, inplace=True) if sort_by is not None: result.sort_values(by=[sort_by], axis=0, ascending=False, inplace=True) header = ['Experiment', 'Source', 'Date'] + list(metrics.keys()) click.echo('') click.echo(tabulate.tabulate(result.values, headers=header)) # Check the last time lab project was synced with minio with open(os.path.join('config', 'runtime.yaml'), 'r') as file: minio_config = yaml.load(file) push_time = datetime.datetime.fromtimestamp(0) try: push_time = \ datetime.datetime.strptime( minio_config['last_push'], '%Y-%m-%d %H:%M:%S.%f') now_time = datetime.datetime.now() td = now_time - push_time (days, hours) = (td.days, td.seconds // 3600) except Exception: (days, hours) = (0, 0) click.secho('\nLast push: ' + str(days) + 'd, ' + str(hours) + 'h ago', fg='yellow') # Find the latest file and print its timestamp list_of_files = glob.glob(os.path.join(os.getcwd(), '*')) latest_file = max(list_of_files, key=os.path.getctime) latest_file_timestamp = \ datetime.datetime.fromtimestamp(os.path.getmtime(latest_file)) recommend = '| Project is in sync with remote' if latest_file_timestamp > push_time: recommend = ' | Recommend to run <lab push>' click.secho('Last modified: ' + str(latest_file_timestamp) + recommend, fg='yellow')