def run_template(validate_config, check_early_exit=None, gen_trial_params=None): """ Common template for the "run" step of an experiment. Reads a config directory and output directory from the command line, reads in the experiment config, and uses it to generate parameters for trial_util.run_trials. Exits with a ret code of 1 if there is any problem or exception, otherwise exits with 0 Parameters ========== validate_config : A function from string-keyed dictionary to (dictionary, str). If this function is specified, it will be run on the object parsed from the experiment's config.json. Returns a processed config if successful, or None and an error message if there is a problem with the config check_early_exit: Function from config -> (bool, str). If specified, this function will be run on the experiment config and determine whether the script should exit without running the experiment. The second return value should be a message explaining why the function exited early (if the first return value is true). gen_trial_params: Function that takes a config and returns an array of arguments to trial_util.run_trial. If this is omitted, no experiment will run. """ def main(config_dir, output_dir): try: config, msg = validate_config(config_dir) if config is None: write_status(output_dir, False, msg) return 1 if check_early_exit is not None: early_exit, msg = check_early_exit(config) if early_exit: write_status(output_dir, True, msg) return 0 configure_seed(config) if gen_trial_params is None: write_status(output_dir, True, 'No trial to run') return 0 trial_params = gen_trial_params(config) success, msg = run_trials(*trial_params, path_prefix=output_dir) write_status(output_dir, success, msg) return 0 if success else 1 except Exception as e: write_status(output_dir, False, render_exception(e)) return 1 invoke_main(main, 'config_dir', 'output_dir')
def visualize_template(validate_config, generate_individual_comparisons): """ Common template for the "visualize" step of an experiment. Reads data, config, output directories from the command line, reads in the experiment config, and all the data in the data directory. Runs generate_individual_comparisons on the most recent data file with the config. Also generates lognitudinal comparisons (using the basic function) over all time and over the last two weeks. Exits with a ret code of 1 if there is any problem or exception, otherwise exits with 0 Parameters ========== validate_config : A function from string-keyed dictionary to (dictionary, str). If this function is specified, it will be run on the object parsed from the experiment's config.json. Returns a processed config if successful, or None and an error message if there is a problem with the config generate_individual_comparisons : A function that, given a valid parsed config, the most recent data object, and an output directory, produces graphs of the given data """ def main(data_dir, config_dir, output_dir): try: config, msg = validate_config(config_dir) if config is None: write_status(output_dir, False, msg) return 1 all_data = sort_data(data_dir) most_recent = all_data[-1] last_two_weeks = [ entry for entry in all_data if time_difference(most_recent, entry).days < 14 ] generate_longitudinal_comparisons(all_data, output_dir, 'all_time') generate_longitudinal_comparisons(last_two_weeks, output_dir, 'two_weeks') generate_individual_comparisons(config, most_recent, output_dir) except Exception as e: write_status(output_dir, False, 'Exception encountered:\n' + render_exception(e)) return 1 write_status(output_dir, True, 'success') invoke_main(main, 'data_dir', 'config_dir', 'output_dir')
def summarize_template(validate_config, use_networks=True): """ Common template for the "visualize" step of an experiment. Reads data, config, output directories from the command line, reads in the experiment config. Uses write_generic_summary to produce a summary based on the most recent data file based on the devices and title specified in the config. Exits with a ret code of 1 if there is any problem or exception, otherwise exits with 0 Parameters ========== validate_config : A function from string-keyed dictionary to (dictionary, str). If this function is specified, it will be run on the object parsed from the experiment's config.json. Returns a processed config if successful, or None and an error message if there is a problem with the config use_networks : Whether to summarize by networks (true by default) """ def main(data_dir, config_dir, output_dir): config, msg = validate_config(config_dir) if config is None: write_status(output_dir, False, msg) return 1 devs = config['devices'] networks = [] if use_networks: networks = config['networks'] write_generic_summary(data_dir, output_dir, config['title'], devs, networks, use_networks=use_networks) invoke_main(main, 'data_dir', 'config_dir', 'output_dir')
if 'DTR_MEMORY_BUDGET' in os.environ: specific_params['memory_budget'] = float( os.environ['DTR_MEMORY_BUDGET']) assert 'batch_size' in specific_params if use_dtr: assert 'memory_budget' in specific_params if specific_params['memory_budget'] > 0: print(f'Setting budget to {int(specific_params["memory_budget"])}') torch.set_memory_budget(int(specific_params['memory_budget'])) if is_trial: timing_loop(model_name, i, config, use_dtr, specific_params, None, True, trial_run_outfile) return with open(out_file, 'a', newline='') as csvfile: writer = create_csv_writer(csvfile, specific_params) timing_loop(model_name, i, config, use_dtr, specific_params, writer, memory_budget=specific_params.get('memory_budget', -1)) if __name__ == '__main__': invoke_main(main, 'config_dir', 'experiment_mode', 'model_name', 'input_idx', 'params_file', 'out_file', 'trial_run', 'trial_run_outfile')
for metric in metrics: score_metric = SCORE_METRICS[metric](score_confs[metric]) valid, msg = check_prerequisites(info, score_metric.prereq()) if not valid: write_status(output_dir, False, msg) return 1 score_data_dir = os.path.join(data_dir, metric) score_graph_dir = os.path.join(graph_dir, metric) idemp_mkdir(score_data_dir) idemp_mkdir(score_graph_dir) try: report = process_score(info, score_metric, score_data_dir, score_graph_dir, timestamp) score_reports[metric] = report except Exception as e: write_status( output_dir, False, 'Encountered exception while scoring {}:\n{}'.format( metric, render_exception(e))) return 1 report = {'title': 'Metric Scores', 'value': format_scores(score_reports)} write_json(output_dir, 'report.json', report) write_status(output_dir, True, 'success') if __name__ == '__main__': invoke_main(main, 'config_dir', 'home_dir', 'output_dir')
} filename = 'arm-vta-{}.png'.format(target) PlotBuilder().set_y_label(target_plot_data['meta'][2]) \ .set_y_scale(PlotScale.LINEAR) \ .make(PlotType.MULTI_BAR, target_plot_data) \ .save(comparison_dir, filename) def main(data_dir, config_dir, output_dir): config, msg = validate(config_dir) if config is None: write_status(output_dir, False, msg) return 1 # read in data, output graphs of most recent data, and output longitudinal graphs all_data = sort_data(data_dir) most_recent = all_data[-1] try: generate_longitudinal_comparisons(all_data, output_dir) generate_arm_vta_comparisons(most_recent, output_dir) except Exception as e: write_status(output_dir, False, 'Exception encountered:\n' + render_exception(e)) return 1 write_status(output_dir, True, 'success') if __name__ == '__main__': invoke_main(main, 'data_dir', 'config_dir', 'output_dir')
def analysis_template(validate_config, generate_listing_settings, generate_data_query, use_networks=True): """ Common template for the "visualize" step of an experiment. Reads data, config, output directories from the command line, reads in the experiment config. Uses trials_stat_summary and the user-specified functions to query the raw data and produce a data.json file Exits with a ret code of 1 if there is any problem or exception, otherwise exits with 0 Parameters ========== validate_config : A function from string-keyed dictionary to (dictionary, str). If this function is specified, it will be run on the object parsed from the experiment's config.json. Returns a processed config if successful, or None and an error message if there is a problem with the config generate_listing_settings: A function that takes the exp config and generates a mapping of category names to all information that will be needed to generate a data query (args to trials_stat_summary) for each category's corresponding data generate_data_query: A function that takes the config, a device, network (if use_networks is True), and a listing setting and returns a set of query arguments for trials_stat_summary use_networks: Assumes the config has multiple networks and the analysis should analyze each network separately. True by default """ def main(data_dir, config_dir, output_dir): config, msg = validate_config(config_dir) if config is None: write_status(output_dir, False, msg) return 1 devs = config['devices'] listing_settings = generate_listing_settings(config) ret = {} for dev in devs: ret[dev] = {} for listing, settings in listing_settings.items(): if not use_networks: query = generate_data_query(config, dev, settings) summary, success, msg = trials_stat_summary( data_dir, *query) if not success: write_status(output_dir, False, msg) return 1 ret[dev][listing] = summary['mean'] add_detailed_summary(ret, summary, dev, listing) continue ret[dev][listing] = {} for network in config['networks']: query = generate_data_query(config, dev, network, settings) summary, success, msg = trials_stat_summary( data_dir, *query) if not success: write_status(output_dir, False, msg) return 1 ret[dev][listing][network] = summary['mean'] add_detailed_summary(ret, summary, dev, listing, network) write_json(output_dir, 'data.json', ret) write_status(output_dir, True, 'success') invoke_main(main, 'data_dir', 'config_dir', 'output_dir')
from common import invoke_main, render_exception, write_status from mxnet_util import export_mxnet_model def main(config_dir, setup_dir): try: export_mxnet_model('rnn', setup_dir) export_mxnet_model('gru', setup_dir) export_mxnet_model('lstm', setup_dir) write_status(setup_dir, True, 'success') except Exception as e: write_status(setup_dir, False, render_exception(e)) if __name__ == '__main__': invoke_main(main, 'config_dir', 'setup_dir')
""" Just prints the location of the configured setup directory so it can be used by bash commands. This is a total nasty hack to deal with the fact that there is no global dependency management in the dashboard. """ import os from common import invoke_main, read_config def sanitize_path(path): return os.path.abspath(os.path.expanduser(path)) def main(home_dir): global_conf = read_config(sanitize_path(home_dir)) print(sanitize_path(global_conf['setup_dir'])) if __name__ == '__main__': invoke_main(main, 'home_dir')
if pair[0] == dataset: max_idx = pair[1] break # dataset is not included in the config, so skip if max_idx == -1: write_status(output_dir, True, 'Dataset {} not run'.format(dataset)) return 0 success, msg = run_trials( 'relay', 'treelstm', config['dry_run'], config['n_times_per_input'], config['n_inputs'], treelstm_trial, treelstm_setup, treelstm_teardown, ['device', 'method', 'dataset', 'idx'], [config['devices'], [method], [dataset], [i for i in range(max_idx)]], path_prefix=output_dir, append_to_csv=True) if not success: write_status(output_dir, success, msg) return 1 write_status(output_dir, True, 'success') if __name__ == '__main__': # does not follow common template invoke_main(main, 'config_dir', 'output_dir', 'method', 'dataset')
# make a backup of the previous dashboard files if they exist if os.path.exists(home_dir): subprocess.call(['tar', '-zcf', backup_archive, home_dir]) # directories whose contents should not change between runs of the dashboard persistent_dirs = {info.exp_data, info.exp_configs, info.subsys_configs, info.subsys_output} all_dashboard_dirs = info.all_experiment_dirs() + info.all_subsystem_dirs() # instantiate necessary dashboard dirs and clean any that should be empty for dashboard_dir in all_dashboard_dirs: if dashboard_dir not in persistent_dirs: subprocess.call(['rm', '-rf', dashboard_dir]) idemp_mkdir(dashboard_dir) randomize_exps = True if 'randomize' in dash_config: randomize_exps = dash_config['randomize'] run_all_experiments(info, experiments_dir, setup_dir, tmp_data_dir, data_archive, time_str, randomize=randomize_exps) run_all_subsystems(info, subsystem_dir, time_str) if __name__ == '__main__': invoke_main(main, 'home_dir', 'experiments_dir', 'subsystem_dir')
write_status(output_dir, True, 'TF not run on {}'.format(device)) return 0 configure_seed(config) enable_xla = [False] if config['use_xla']: enable_xla.append(True) success, msg = run_trials( 'tf', 'cnn_comp', config['dry_run'], config['n_times_per_input'], config['n_inputs'], cnn_trial, cnn_setup, cnn_teardown, ['network', 'device', 'batch_size', 'enable_xla'], [config['networks'], [device], config['batch_sizes'], enable_xla], path_prefix=output_dir, append_to_csv=True) write_status(output_dir, success, msg) if not success: return 1 if __name__ == '__main__': # can't use the template, as we take an extra argument invoke_main(main, 'config_dir', 'output_dir', 'device')
if entries[1:]: with open(os.path.join(fp_dir, 'cpu', fname), 'a+') as fp: for (label, data) in entries[1:]: fp.write(f'{time_after} {label} {data}\n') def main(interval, output_dir, exp_name, run_cpu_telemetry, run_gpu_telemetry): ''' # directory structure: # ./output_dir # -> telemtry # -> char_rnn # -> treelstm ... ''' out_dir = os.path.join(output_dir, 'telemetry') log_dir = os.path.join(out_dir, exp_name) idemp_mkdir(os.path.join(log_dir, 'cpu')) idemp_mkdir(os.path.join(log_dir, 'gpu')) nvidia_fields = 'timestamp,clocks.gr,clocks.current.memory,utilization.gpu,utilization.memory,memory.used,pstate,power.limit,temperature.gpu,fan.speed'.split(',') start_job(log_dir, nvidia_fields, int(interval), 0, run_cpu_telemetry == 'True', run_gpu_telemetry == 'True') time_run = 0 interval = float(interval) while True: start_job(log_dir, nvidia_fields, interval, time_run, run_cpu_telemetry == 'True', run_gpu_telemetry == 'True') time_run += 1 time.sleep(interval) if __name__ == '__main__': invoke_main(main, 'interval', 'output_dir', 'exp_name', 'run_cpu_telemetry', 'run_gpu_telemetry')
# directories whose contents should not change between runs of the dashboard persistent_dirs = {info.exp_data, info.exp_configs, info.subsys_configs, info.subsys_output} all_dashboard_dirs = info.all_experiment_dirs() + info.all_subsystem_dirs() # instantiate necessary dashboard dirs and clean any that should be empty for dashboard_dir in all_dashboard_dirs: if dashboard_dir not in persistent_dirs: subprocess.call(['rm', '-rf', dashboard_dir]) idemp_mkdir(dashboard_dir) randomize_exps = True if 'randomize' in dash_config: randomize_exps = dash_config['randomize'] telemetry_rate = dash_config.get('telemetry_rate', 15) run_cpu_telemetry = dash_config.get('run_cpu_telemetry', False) run_gpu_telemetry = dash_config.get('run_gpu_telemetry', False) run_all_experiments(info, experiments_dir, setup_dir, tmp_data_dir, data_archive, time_str, telemetry_script_dir, run_cpu_telemetry=run_cpu_telemetry, run_gpu_telemetry=run_gpu_telemetry, telemetry_interval=telemetry_rate, randomize=randomize_exps) run_all_subsystems(info, subsystem_dir, time_str) if __name__ == '__main__': invoke_main(main, 'home_dir', 'experiments_dir', 'subsystem_dir', 'telemetry_script_dir')