Ejemplo n.º 1
0
def analyze_experiment(info, experiments_dir, tmp_data_dir,
                       date_str, exp_name):
    exp_dir = os.path.join(experiments_dir, exp_name)

    exp_data_dir = os.path.join(tmp_data_dir, exp_name)
    tmp_analysis_dir = os.path.join(exp_data_dir, 'analysis')
    idemp_mkdir(tmp_analysis_dir)

    analyzed_data_dir = info.exp_data_dir(exp_name)
    if not os.path.exists(analyzed_data_dir):
        idemp_mkdir(analyzed_data_dir)

    subprocess.call([os.path.join(exp_dir, 'analyze.sh'),
                     info.exp_config_dir(exp_name), exp_data_dir, tmp_analysis_dir],
                    cwd=exp_dir)

    status = validate_status(tmp_analysis_dir)

    # read the analyzed data, append a timestamp field, and copy over to the permanent data dir
    if status['success']:
        data_exists = check_file_exists(tmp_analysis_dir, 'data.json')
        if not data_exists:
            status = {'success': False, 'message': 'No data.json file produced by {}'.format(exp_name)}
        else:
            # collect data to dump to data_*.json
            dump_data = {
                'timestamp'  : date_str,
            }
            dump_data.update(read_json(tmp_analysis_dir, 'data.json'))
            # fetch time spent on the experiment
            dump_data.update(get_timing_info(info, exp_name))
            write_json(analyzed_data_dir, 'data_{}.json'.format(date_str), dump_data)
    
    info.report_exp_status(exp_name, 'analysis', status)
    return status['success']
Ejemplo n.º 2
0
def run_experiment(info, experiments_dir, tmp_data_dir, exp_name):

    to_local_time = lambda sec: time.asctime(time.localtime(sec))
    exp_dir = os.path.join(experiments_dir, exp_name)
    exp_conf = info.exp_config_dir(exp_name)

    # set up a temporary data directory for that experiment
    exp_data_dir = os.path.join(tmp_data_dir, exp_name)
    idemp_mkdir(exp_data_dir)

    # Mark the start and the end of an experiment
    start_time = time.time()
    start_msg = f'Experiment {exp_name} starts @ {to_local_time(start_time)}'
    print_log(start_msg)
    # run the run.sh file on the configs directory and the destination directory
    subprocess.call([os.path.join(exp_dir, 'run.sh'), exp_conf, exp_data_dir],
                    cwd=exp_dir)
    end_time = time.time()
    delta = datetime.timedelta(seconds=end_time - start_time)
    # collect the status file from the destination directory, copy to status dir
    status = validate_status(exp_data_dir)
    # show experiment status to terminal
    if status['success']:
        end_msg = f'Experiment {exp_name} ends @ {to_local_time(end_time)}\nTime Delta: {delta}'
        print_log(end_msg)
    else:
        print_log(f'*** {exp_name} FAILED ***\n*** Reason: {status["message"]} ***')
    # record start & end & duration of an experiment
    status['start_time'] = to_local_time(start_time)
    status['end_time'] = to_local_time(end_time)
    status['time_delta'] = str(delta)
    # not literally copying because validate may have produced a status that generated an error
    info.report_exp_status(exp_name, 'run', status)
    return status['success']
Ejemplo n.º 3
0
def prepare_exp_data_pages(info, out_dir):
    idemp_mkdir(out_dir)
    for exp in info.all_present_experiments():
        stage_statuses = info.exp_stage_statuses(exp)
        if 'analysis' not in stage_statuses or not stage_statuses['analysis'][
                'success']:
            continue
        all_exp_data = sort_data(info.exp_data_dir(exp))

        # customize the formatting here so that it's at
        # least somewhat human-readable
        with open(os.path.join(out_dir, '{}.json'.format(exp)), 'w') as f:
            json.dump(all_exp_data[::-1], f, indent=1)
Ejemplo n.º 4
0
def set_up_out_dir(info, out_dir):
    idemp_mkdir(out_dir)

    web_graph_dir = os.path.join(out_dir, 'graph')
    web_data_dir = os.path.join(out_dir, 'data')
    shutil.rmtree(web_graph_dir, ignore_errors=True)
    shutil.rmtree(web_data_dir, ignore_errors=True)
    shutil.copytree(info.exp_graphs, web_graph_dir)
    if score_successful(info):
        score_graphs = os.path.join(info.subsys_output_dir('score'), 'graphs')
        for subdir in os.listdir(score_graphs):
            full_path = os.path.join(score_graphs, subdir)
            if not os.path.isdir(full_path):
                continue
            shutil.copytree(full_path, os.path.join(web_graph_dir, subdir))
    prepare_exp_data_pages(info, web_data_dir)
Ejemplo n.º 5
0
def main(home_dir, experiments_dir, subsystem_dir, telemetry_script_dir):
    """
    Home directory: Where config info for experiments, etc., is
    Experiments directory: Where experiment implementations are
    Both should be given as absolute directories
    """
    time_str = get_timestamp()

    if not check_file_exists(home_dir, 'config.json'):
        print('Dashboard config (config.json) is missing in {}'.format(home_dir))
        return 1
    dash_config = read_json(home_dir, 'config.json')

    # must expand all tildes in the config to avoid future errors
    for path_field in ['tmp_data_dir', 'setup_dir', 'backup_dir']:
        dash_config[path_field] = os.path.expanduser(dash_config[path_field])

    tmp_data_dir = os.path.join(dash_config['tmp_data_dir'], 'benchmarks_' + time_str)
    data_archive = os.path.join(dash_config['tmp_data_dir'], 'benchmarks_' + time_str + '_data.tar.gz')
    setup_dir = dash_config['setup_dir']
    backup_archive = os.path.join(dash_config['backup_dir'], 'dashboard_' + time_str + '.tar.gz')
    idemp_mkdir(tmp_data_dir)
    idemp_mkdir(os.path.dirname(backup_archive))
    idemp_mkdir(setup_dir)

    info = DashboardInfo(home_dir)

    # make a backup of the previous dashboard files if they exist
    if os.path.exists(home_dir):
        subprocess.call(['tar', '-zcf', backup_archive, home_dir])

    # directories whose contents should not change between runs of the dashboard
    persistent_dirs = {info.exp_data,
                       info.exp_configs,
                       info.subsys_configs,
                       info.subsys_output}
    all_dashboard_dirs = info.all_experiment_dirs() + info.all_subsystem_dirs()

    # instantiate necessary dashboard dirs and clean any that should be empty
    for dashboard_dir in all_dashboard_dirs:
        if dashboard_dir not in persistent_dirs:
            subprocess.call(['rm', '-rf', dashboard_dir])
        idemp_mkdir(dashboard_dir)

    randomize_exps = True
    if 'randomize' in dash_config:
        randomize_exps = dash_config['randomize']

    telemetry_rate = dash_config.get('telemetry_rate', 15)
    run_cpu_telemetry = dash_config.get('run_cpu_telemetry', False)
    run_gpu_telemetry = dash_config.get('run_gpu_telemetry', False)
    run_all_experiments(info, experiments_dir, setup_dir,
                        tmp_data_dir, data_archive,
                        time_str, telemetry_script_dir, run_cpu_telemetry=run_cpu_telemetry, run_gpu_telemetry=run_gpu_telemetry,
                        telemetry_interval=telemetry_rate, randomize=randomize_exps)

    run_all_subsystems(info, subsystem_dir, time_str)
Ejemplo n.º 6
0
def main(config_dir, home_dir, output_dir):
    info = DashboardInfo(home_dir)
    idemp_mkdir(output_dir)
    for exp_name in info.all_present_experiments():
        exp_status = info.exp_status_dir(exp_name)
        run_status = validate_json(exp_status,
                                   'run_cpu_telemetry',
                                   'run_gpu_telemetry',
                                   filename='run.json')
        if check_prerequisites(
                info, {exp_name: {}}) == (True, 'success') and run_status.get(
                    'success', False):
            telemetry_folder = info.subsys_telemetry_dir(exp_name)
            if os.path.exists(telemetry_folder):
                exp_graph_folder = os.path.join(telemetry_folder, 'graph')
                cpu_stat = info.exp_cpu_telemetry(exp_name)
                gpu_stat = info.exp_gpu_telemetry(exp_name)
                cpu_data = sort_data(cpu_stat)
                gpu_data = sort_data(gpu_stat)
                graph_folder = info.exp_graph_dir(exp_name)
                website_include_dir = os.path.join(graph_folder)
                try:
                    if cpu_data and run_status.get('run_cpu_telemetry', False):
                        visualize(
                            'cpu', process_cpu_telemetry(cpu_data[-1]),
                            exp_graph_folder,
                            os.path.join(website_include_dir, 'cpu_telemetry'),
                            f'Visualizing CPU telemetry for {exp_name}',
                            lambda adapter, title, *rest: f'{adapter}-{title}')

                    if gpu_data and run_status.get('run_gpu_telemetry', False):
                        visualize(
                            'gpu', process_gpu_telemetry(gpu_data[-1]),
                            exp_graph_folder,
                            os.path.join(website_include_dir, 'gpu_telemetry'),
                            f'Visualizing GPU telemetry for {exp_name}',
                            lambda _, title, *rest: title)
                except Exception as e:
                    write_status(
                        output_dir, False,
                        f'Encountered err while generating graphs: {e}')
                    return
                write_status(output_dir, True, 'success')
            else:
                write_status(output_dir, False, 'No telemetry data found')
                return
Ejemplo n.º 7
0
def setup_experiment(info, experiments_dir, setup_dir, exp_name):
    exp_dir = os.path.join(experiments_dir, exp_name)
    exp_setup_dir = os.path.join(setup_dir, exp_name)

    # remove the existing setup dir before running the script again
    subprocess.call(['rm', '-rf', exp_setup_dir])
    idemp_mkdir(exp_setup_dir)

    subprocess.call([os.path.join(exp_dir, 'setup.sh'), info.exp_config_dir(exp_name),
                     exp_setup_dir], cwd=exp_dir)

    status = validate_status(exp_setup_dir)
    info.report_exp_status(exp_name, 'setup', status)

    # if setup succeeded, touch a marker file so we know what time to check for changes
    if status['success']:
        subprocess.call(['touch', '.last_setup'], cwd=exp_setup_dir)

    return status['success']
Ejemplo n.º 8
0
def visualize(device,
              data,
              exp_graph_dir,
              website_copy_dir,
              msg='',
              get_title=lambda *arg: '-'.join(arg)):
    ts, *data = data
    current_ts_dir = os.path.join(exp_graph_dir, ts)
    graph_dir = os.path.join(current_ts_dir, device)
    idemp_mkdir(graph_dir)
    idemp_mkdir(website_copy_dir)
    print(msg)
    for adapter, title, unit, data in data:
        generate_graph(ts,
                       get_title(adapter, title, unit, data),
                       title,
                       data,
                       graph_dir,
                       y_label=unit if unit else '',
                       copy_to=[website_copy_dir])
Ejemplo n.º 9
0
def run_subsystem(info, subsystem_dir, subsys_name):
    subsys_dir = os.path.join(subsystem_dir, subsys_name)
    subsys_output_dir = info.subsys_output_dir(subsys_name)
    idemp_mkdir(subsys_output_dir)

    # remove the old status if one is hanging around
    # (subsystem output dirs remain around between runs)
    if check_file_exists(subsys_output_dir, 'status.json'):
        subprocess.call(['rm', '-f', os.path.join(subsys_output_dir, 'status.json')])

    # run the run.sh file on the configs directory and the output directory
    subprocess.call([os.path.join(subsys_dir, 'run.sh'),
                     info.subsys_config_dir(subsys_name),
                     info.home_dir, subsys_output_dir],
                    cwd=subsys_dir)

    # collect the status file from the destination directory, copy to status dir
    status = validate_status(subsys_output_dir)
    # not literally copying because validate may have produced a status that generated an error
    info.report_subsys_status(subsys_name, 'run', status)
    return status['success']
Ejemplo n.º 10
0
def main(interval, output_dir, exp_name, run_cpu_telemetry, run_gpu_telemetry):
    '''
        # directory structure:
        # ./output_dir
        #       -> telemtry
        #           -> char_rnn
        #           -> treelstm ...
    '''
    out_dir = os.path.join(output_dir, 'telemetry')
    log_dir = os.path.join(out_dir, exp_name)
    idemp_mkdir(os.path.join(log_dir, 'cpu'))
    idemp_mkdir(os.path.join(log_dir, 'gpu'))
    nvidia_fields = 'timestamp,clocks.gr,clocks.current.memory,utilization.gpu,utilization.memory,memory.used,pstate,power.limit,temperature.gpu,fan.speed'.split(',')
    start_job(log_dir, nvidia_fields, int(interval), 0,
                run_cpu_telemetry == 'True', run_gpu_telemetry == 'True')
    time_run = 0
    interval = float(interval)
    while True:
        start_job(log_dir, nvidia_fields, interval, time_run,
            run_cpu_telemetry == 'True', run_gpu_telemetry == 'True')
        time_run += 1
        time.sleep(interval)
Ejemplo n.º 11
0
def process_telemetry_statistics(info,
                                 exp_name,
                                 output_dir,
                                 time_str,
                                 cpu_stat_parser=parse_cpu_stat,
                                 gpu_stat_parser=parse_gpu_stat):
    '''
    Collect data of telemetry statistics and write to results directory
    Note: The "parsing" logic procedure written in this file is specialized to deal with
          telemetry collected at pipsqueak. They are not guaranteed to work on other platforms.
    '''
    telemetry_output_dir = info.subsys_telemetry_dir(exp_name)
    if not os.path.exists(telemetry_output_dir):
        idemp_mkdir(telemetry_output_dir)
    data_dir = os.path.join(output_dir, f'telemetry/{exp_name}')
    cpu_telemetry_dir = os.path.join(data_dir, 'cpu')
    gpu_telemetry_dir = os.path.join(data_dir, 'gpu')
    write_json(os.path.join(telemetry_output_dir, 'gpu'),
               f'gpu-{time_str}.json',
               gpu_stat_parser(gpu_telemetry_dir, time_str))
    write_json(os.path.join(telemetry_output_dir, 'cpu'),
               f'cpu-{time_str}.json',
               cpu_stat_parser(cpu_telemetry_dir, time_str))
Ejemplo n.º 12
0
def main(config_dir, home_dir, output_dir):
    info = DashboardInfo(home_dir)
    conf = read_config(config_dir)

    data_dir = os.path.join(output_dir, 'data')
    graph_dir = os.path.join(output_dir, 'graphs')
    idemp_mkdir(data_dir)
    idemp_mkdir(graph_dir)

    timestamp = get_timestamp()

    score_confs = conf['score_confs']
    metrics = set(score_confs.keys())
    metrics = metrics.intersection(set(SCORE_METRICS.keys()))

    if not metrics:
        write_status(output_dir, True, 'No scores to report')
        return 0

    score_data = {}
    score_reports = {}
    for metric in metrics:
        score_metric = SCORE_METRICS[metric](score_confs[metric])
        valid, msg = check_prerequisites(info, score_metric.prereq())
        if not valid:
            write_status(output_dir, False, msg)
            return 1

        score_data_dir = os.path.join(data_dir, metric)
        score_graph_dir = os.path.join(graph_dir, metric)
        idemp_mkdir(score_data_dir)
        idemp_mkdir(score_graph_dir)

        try:
            report = process_score(info, score_metric, score_data_dir,
                                   score_graph_dir, timestamp)
            score_reports[metric] = report
        except Exception as e:
            write_status(
                output_dir, False,
                'Encountered exception while scoring {}:\n{}'.format(
                    metric, render_exception(e)))
            return 1

    report = {'title': 'Metric Scores', 'value': format_scores(score_reports)}
    write_json(output_dir, 'report.json', report)
    write_status(output_dir, True, 'success')
Ejemplo n.º 13
0
def export_mxnet_model(cell_type, setup_dir):
    # batch and seq_len are placeholder, and don't affect the exported model
    ctx = mx.context.cpu()
    dtype = 'float32'
    model = RNNModel(cell_type, INPUT_SIZE, HIDDEN_SIZE)
    if cell_type == 'rnn' or cell_type == 'gru':
        states = [mx.nd.zeros((BATCH, HIDDEN_SIZE), dtype=dtype, ctx=ctx)]
    elif cell_type == 'lstm':
        states = [
            mx.nd.zeros((BATCH, HIDDEN_SIZE), dtype=dtype, ctx=ctx),
            mx.nd.zeros((BATCH, HIDDEN_SIZE), dtype=dtype, ctx=ctx)
        ]
    xs = mx.nd.random.uniform(shape=(SEQ_LEN, BATCH, INPUT_SIZE),
                              dtype=dtype,
                              ctx=ctx)

    model.collect_params().initialize(ctx=ctx)
    model.hybridize()
    model(xs, states)
    idemp_mkdir(os.path.join(setup_dir, 'mxnet'))
    fname = os.path.join(setup_dir, 'mxnet', model_filename(cell_type))
    model.export(fname, epoch=1)
    print('Export MXNet model to %s' % fname)
    return fname