コード例 #1
0
ファイル: fix_query_keys.py プロジェクト: oxhead/elastic-hpcc
def run(run_output_dir, workload_file):
    #hpcc_cluster = HPCCCluster.parse_config("/etc/HPCCSystems/source/hpcc_t5_r5_120worker_cyclic.xml")
    #hpcc_cluster = HPCCCluster.parse_config("/etc/HPCCSystems/source/hpcc_16r_cyclic_2replica.xml")
    #hpcc_cluster = HPCCCluster.parse_config("/etc/HPCCSystems/source/elastic_cyclic_4node_2replica.xml")
    #hpcc_cluster = HPCCCluster.parse_config("/etc/HPCCSystems/source/cyclic/elastic_4node_2replica.xml")
    hpcc_cluster = HPCCCluster.parse_config(
        "/etc/HPCCSystems/source/cyclic/elastic_4node_4replica.xml")
    benchmark_config = BenchmarkConfig.parse_file(
        "/home/chsu6/elastic-hpcc/conf/6driver.yaml")
    #benchmark_config = BenchmarkConfig.parse_file("/home/chsu6/elastic-hpcc/conf/1driver.yaml")

    script_dir = os.path.dirname(os.path.realpath(__file__))

    workload_config_template = WorkloadConfig.parse_file(
        os.path.join(script_dir, workload_file))
    application_db = workload_config_template.lookup_config(
        'workload.applications')

    for arrival_type in arrival_types:
        for num_queries in num_queries_list:
            for period in period_list:
                workload_config = copy.deepcopy(workload_config_template)
                workload_config.set_config('workload.type', arrival_type)
                workload_config.set_config('workload.num_queries', num_queries)
                workload_config.set_config('workload.period', period)
                workload = Workload.from_config(workload_config)
                workload_timeline = WorkloadExecutionTimeline.from_workload(
                    workload)
                for app_name in applications:
                    for distribution_type in distribution_types:
                        per_workload_config = copy.deepcopy(workload_config)
                        per_workload_config.set_config('workload.distribution',
                                                       distribution_type)
                        per_workload_config.set_config(
                            'workload.applications',
                            {app_name: application_db[app_name]})
                        per_workload = Workload.from_config(
                            per_workload_config)
                        per_workload_timeline = WorkloadExecutionTimeline.from_timeline(
                            workload_timeline, per_workload)

                        output_dir = os.path.join(
                            "results", run_output_dir,
                            "5roxie_{}_{}_{}_{}queries_{}sec".format(
                                arrival_type, distribution_type['type'],
                                app_name, num_queries, period))
                        if os.path.exists(output_dir):
                            continue

                        bm = RoxieBenchmark(hpcc_cluster,
                                            benchmark_config,
                                            per_workload_timeline,
                                            output_dir=output_dir)
                        time.sleep(60)
                        bm.run()
コード例 #2
0
ファイル: benchmark.py プロジェクト: oxhead/elastic-hpcc
def run(ctx, config, output_dir):
    hpcc_cluster = HPCCCluster.parse_config(
        "/etc/HPCCSystems/source/hpcc_t5_r5_cyclic.xml")
    benchmark_config = BenchmarkConfig.parse_file(ctx.obj['config'])

    w = Workload.parse_config(config)
    workload_timeline = WorkloadExecutionTimeline.from_workload(w)
    bm = RoxieBenchmark(hpcc_cluster,
                        benchmark_config,
                        workload_timeline,
                        output_dir=output_dir)
    bm.run()
コード例 #3
0
ファイル: benchmark.py プロジェクト: oxhead/RoxieBench
def run(ctx, config, hpcc_config, output_dir):
    # workload
    w = Workload.parse_config(config)
    workload_timeline = WorkloadExecutionTimeline.from_workload(w)
    # hpcc setting
    hpcc_cluster = HPCCCluster.parse_config(hpcc_config)
    # benchmark setting
    benchmark_config = BenchmarkConfig.parse_file(ctx.obj['config'])
    bm = RoxieBenchmark(hpcc_cluster,
                        benchmark_config,
                        workload_timeline,
                        output_dir=output_dir)
    bm.run()
コード例 #4
0
ファイル: benchmark.py プロジェクト: oxhead/elastic-hpcc
def submit(ctx, config, output_dir):
    w = Workload.parse_config(config)
    workload_timeline = WorkloadExecutionTimeline.from_workload(w)
    benchmark_service = BenchmarkService.new(ctx.obj['config'])
    benchmark_service.submit_workload(workload_timeline)
コード例 #5
0
def generate_experiments(default_setting, variable_setting_list, experiment_dir=None, timeline_reuse=False, wait_time=60, check_success=True, overwrite=False, restart_hpcc=False, timeout=300):
    for variable_setting in variable_setting_list:
        per_setting = copy.deepcopy(default_setting)
        #print(json.dumps(per_setting.config, indent=4))
        for setting_name, setting_value in variable_setting.items():
            per_setting.set_config(setting_name, setting_value)
        #print(json.dumps(per_setting.config, indent=4))
        # create workload timeline
        workload_config = WorkloadConfig.parse_file(per_setting['experiment.workload_template'])
        # this feature now can reduce redundancy workload configs
        if per_setting.has_key('experiment.workload_endpoints'):
            workload_config.select_endpoints(per_setting.lookup_config('experiment.workload_endpoints'))
        workload_config.merge(per_setting)  # should be able to merge

        if per_setting.has_key('experiment.applications'):
            application_db = workload_config.lookup_config('workload.applications')
            app_names = per_setting['experiment.applications']
            app_config = {}
            for app_name in app_names:
                app_config[app_name] = application_db[app_name]
            workload_config.set_config('workload.applications', app_config)

        #print(json.dumps(workload_config.config, indent=4))

        workload = Workload.from_config(workload_config)
        workload_timeline_dir = os.path.join(experiment_dir, '.workload_timeline') if experiment_dir else '.workload_timeline'
        workload_timeline_manager = WorkloadTimelineManager(store_dir=workload_timeline_dir)
        workload_name = per_setting['workload.name'] if per_setting.has_key('workload.name') else None
        workload_timeline = workload_timeline_manager.cache(workload_config, workload, update=not timeline_reuse, name=workload_name)
        #for k, vs in workload_timeline.timeline.items():
        #    for v in vs:
        #        print(v.wid, v.query_name, v.key)
        #print(workload.application_selection.distribution, workload.application_selection.probability_list)
        #analyze_timeline(workload_timeline.timeline)
        #import sys
        #sys.exit(0)

        experiment_id = per_setting['experiment.id']
        #print(per_setting['cluster.target'])
        hpcc_cluster = HPCCCluster.parse_config(per_setting['cluster.target'])
        benchmark_config = BenchmarkConfig.parse_file(per_setting['cluster.benchmark'])
        #print("before", benchmark_config.config)
        num_benchmark_instances = int(per_setting['experiment.benchmark_instances'])
        num_benchmark_processors_per_client = int(per_setting['experiment.benchmark_processors'])
        num_benchmark_clients = int(per_setting['experiment.benchmark_clients'])
        num_benchmark_concurrency = int(per_setting['experiment.benchmark_concurrency'])
        benchmark_config.set_config("driver.num_instances", num_benchmark_instances)
        benchmark_config.set_config("driver.num_processors", num_benchmark_processors_per_client)
        benchmark_config.set_config("driver.hosts", benchmark_config.lookup_config("driver.hosts")[:num_benchmark_clients])
        benchmark_config.set_config("driver.num_workers", num_benchmark_concurrency)
        #print("after", benchmark_config.config)

        if not per_setting.has_key('experiment.output_dir'):
            per_setting.set_config('experiment.output_dir', generate_default_output_dir(per_setting, hpcc_cluster, workload_config))
        output_dir = per_setting['experiment.output_dir']

        if (not overwrite) and os.path.exists(output_dir):
            print("skip experiment:", output_dir)
            continue

        dp_new = None
        routing_table = {}
        access_profile = None
        if per_setting.has_key('experiment.data_placement'):
            data_placement_type, old_locations, access_profile = per_setting['experiment.data_placement']

            if data_placement_type == placement.DataPlacementType.complete:
                all_nodes = sorted(list(set([n.get_ip() for n in hpcc_cluster.get_roxie_cluster().nodes])))
                dp_new = generate_complete_data_placement(all_nodes, old_locations)
            else:
                dp_model = per_setting['experiment.dp_model']

                dp_old = placement.DataPlacement.new(old_locations)
                old_nodes = sorted(dp_old.nodes)
                new_nodes = sorted(list(set([n.get_ip() for n in hpcc_cluster.get_roxie_cluster().nodes]) - set(dp_old.nodes)))
                access_statistics = placement.PlacementTool.compute_partition_statistics(placement.PlacementTool.load_statistics(access_profile))
                coarse_grained = True if data_placement_type == placement.DataPlacementType.coarse_partial else False
                dp_name = per_setting['experiment.dp_name']
                dp_new = generate_data_placement(old_nodes, new_nodes, old_locations, access_statistics, coarse_grained=coarse_grained, dp_model=dp_model, dp_name=dp_name)
            #print(json.dumps(dp_new.locations, indent=4, sort_keys=True))
            #import sys
            #sys.exit(0)
            if per_setting.has_key('experiment.benchmark_manual_routing_table'):
                routing_table = generate_routing_table(dp_new, workload_config.lookup_config('workload.endpoints'))
                #print(json.dumps(routing_table, indent=4, sort_keys=True))
                #sys.exit(0)

        data_dir = per_setting['experiment.dataset_dir'] if per_setting.has_key('experiment.dataset_dir') else '/dataset'
        storage_type = per_setting['experiment.storage_type'] if per_setting.has_key('experiment.storage_type') else 'nfs'

        experiment = Experiment(
            experiment_id,
            benchmark_config,
            hpcc_cluster,
            workload_timeline,
            output_dir,
            wp=access_profile,
            dp=dp_new,
            wait_time=wait_time,
            check_success=check_success,
            data_dir=data_dir,
            storage_type=storage_type,
            restart_hpcc=restart_hpcc,
            routing_table=routing_table,
            timeout=timeout,
            cluster_config=per_setting['cluster.target'],
            deploy_config=per_setting['cluster.deploy_config']
        )
        experiment.workload_config = workload_config  # hack
        yield experiment