Exemple #1
0
def run_time_analysis(input_dir):
    if not input_dir:
        cfg = utils.get_cfg()
        input_dir = cfg['main'][
            'output_folder'] if 'main' in cfg and 'output_folder' in cfg[
                'main'] else None
    run_ta.main(input_dir)
Exemple #2
0
def run_log_profiling(local):
    out_folder = None
    if not local:
        cfg = utils.get_cfg()
        out_folder = cfg['main'][
            'output_folder'] if 'main' in cfg and 'output_folder' in cfg[
                'main'] else None
    profiling.main(out_folder)
def run_time_analysis(input_dir):
    if not input_dir:
        cfg = utils.get_cfg()
        if 'out_folders' in cfg:
            for x in cfg['out_folders']:
                input_dir  = cfg['out_folders'][x]
                run_ta.main(input_dir)
    else:
        run_ta.main(input_dir)
def run_log_profiling(local):
    out_dir = None
    in_dir = None
    cfg = utils.get_cfg()
    if not local:
        if 'out_folders' in cfg:
            for x in cfg['out_folders']:
                in_dir = out_dir = cfg['out_folders'][x]
                profiling.main(input_dir=in_dir, json_out_dir=out_dir)
    else:
        profiling.main(input_dir=in_dir, json_out_dir=out_dir)
Exemple #5
0
 def upload_profile(self):
     cfg = utils.get_cfg()
     benchmark = cfg['experiment']['benchmarkname']
     profile_fname = cfg[benchmark]['profile_name'] + '.json'
     filedir = OUTPUT_DIR
     filepath = filedir + '/' + profile_fname
     try:
         self.retrieve_nodes()
         x_run.upload_profile_to_master(self.nodes, profile_fname, filepath)
     except (OSError, IOError) as exc:
         print('ERROR: {}\n\nCould not upload profile)'.format(exc))
Exemple #6
0
def kill_cluster(cluster):
    cluster_id = cluster_map[cluster]
    print(bold('Terminate {}...'.format(cluster_id)))
    run_xspark(current_cluster=cluster,
               num_instance=0,
               cluster_id=cluster_id,
               run=0,
               terminate=1,
               reboot=0)
    cfg = utils.get_cfg()
    cfg[cluster] = {}
    utils.write_cfg(cfg)
def profile(args):
    cluster_id = c.CLUSTER_MAP['spark']
    num_run = args.num_runs
    reuse_dataset = args.reuse_dataset
    #exp_filepath = args.exp_file_path if args.exp_file_path else "experiment.json"
    exp_filepaths = args.exp_file_paths if args.exp_file_paths else ["experiment.json"]
    num_experiments = len(exp_filepaths)
    spark_seq = args.spark_seq if args.spark_seq else False
    index = 0
    for exp_filepath in exp_filepaths:
        exp_file = Path(exp_filepath)
        index += 1
        if exp_file.exists():
            experiment = json.load(open(exp_filepath))
            try:
                benchmark = experiment["BenchmarkName"]
                #benchmark = experiment["BenchmarkBench"][0]
            except KeyError as error:
                print("ERROR:  {} in experiment file: {}".format(error, exp_filepath))
                exit(1) 
        with utils.open_cfg(mode='w') as cfg:
            for s in cfg.sections():
                if s != 'hdfs':
                    cfg.remove_section(s)
            cfg['main'] = {}
            cfg['main']['tool_on_master'] = 'false'
            cfg['main']['experiment_file'] = exp_filepath
            cfg['main']['process_on_server'] = str(c.PROCESS_ON_SERVER)
            cfg['main']['iter_num'] = str(1) #vboxvm
            cfg['main']['num_experiments'] = str(num_experiments)
            cfg['main']['experiment_num'] = str(index)
            #cfg['main']['cluster_id'] = cluster_id
            cfg['profile'] = {}
            cfg['profile']['spark_seq'] = str(spark_seq)
            cfg[benchmark] = {}
            cfg[benchmark]['profile_name'] = '{}'.format(c.VAR_PAR_MAP[benchmark]['profile_name'])
            if reuse_dataset:
                cfg['main']['delete_hdfs'] = str(not reuse_dataset)
            
        print(bold('Profile experiment {} performing {} runs for benchmark {} on cluster {}'.format(exp_filepath, 
                                                                                                   num_run, benchmark,
                                                                                                   cluster_id,)))
        run_xspark(current_cluster='spark', num_instance=0, num_run=num_run,
                   cluster_id=cluster_id, run=1, terminate=0, reboot=0)
        if not c.PROCESS_ON_SERVER:
            average_runs.main(profile_name=utils.get_cfg()[benchmark]['profile_name'])
            deploy_profile(benchmark, cluster_id)
Exemple #8
0
def launch_exp(args):
    cluster_id = cluster_map['spark']
    num_v = args.num_v
    for v in num_v:
        cfg = utils.get_cfg()
        cfg['pagerank'] = {}
        cfg['pagerank']['num_v'] = v
        utils.write_cfg(cfg)
        print(
            bold('Launch Experiments on {} with {} vertices...'.format(
                cluster_id, v)))
        run_xspark(current_cluster='spark',
                   num_instance=0,
                   cluster_id=cluster_id,
                   run=1,
                   terminate=0,
                   reboot=0)
        if args.profile:
            run_log_profiling(None)
def profile_disabled(args):
    cluster_id = c.CLUSTER_MAP['spark']
    var_par = args.var_par
    exp_profile_name = args.exp_profile_name if args.exp_profile_name else ""
    benchmark = args.benchmark
    num_run = args.num_runs
    max_executors = args.max_executors
    num_partitions = args.num_partitions
    for v in var_par:
        with utils.open_cfg(mode='w') as cfg:
            cfg['main'] = {}
            cfg['main']['profile'] = 'true'
            cfg['main']['tool_on_master'] = 'false'
            cfg['main']['benchmark'] = benchmark
            cfg['main']['iter_num'] = str(1)  #vboxvm
            cfg[benchmark] = {}
            cfg[benchmark][c.VAR_PAR_MAP[benchmark]
                           ['var_name']] = '({}, {})'.format(
                               c.VAR_PAR_MAP[benchmark]['default'][0], v)
            cfg[benchmark]['profile_name']= \
                '{}'.format(c.VAR_PAR_MAP[benchmark]['profile_name']) if not args.exp_profile_name else args.exp_profile_name
            cfg[benchmark]['num_partitions'] = str(num_partitions)
            if max_executors:
                cfg['main']['max_executors'] = max_executors
        print(
            bold(
                'Profile {} performing {} runs for benchmark {} on cluster {} with {}={}...'
                .format(exp_profile_name, num_run, benchmark, cluster_id,
                        c.VAR_PAR_MAP[benchmark]['var_name'], v)))
        run_xspark(current_cluster='spark',
                   num_instance=0,
                   num_run=num_run,
                   cluster_id=cluster_id,
                   run=1,
                   terminate=0,
                   reboot=0)
        #profiling.main()
        average_runs.main(
            profile_name=utils.get_cfg()[benchmark]['profile_name'])
        #run_log_profiling(args.local)
        deploy_profile(benchmark, cluster_id)
Exemple #10
0
def run_xspark(current_cluster,
               num_instance=NUM_INSTANCE,
               num_run=NUM_RUN,
               cluster_id=CLUSTER_ID,
               terminate=TERMINATE,
               run=RUN,
               reboot=REBOOT,
               assume_yes=False):
    """ Main function;
    * Launch spot request of NUMINSTANCE
    * Run Benchmark
    * Download Log
    * Plot data from log
    """
    print(
        header(
            'run_xspark(num_instance={}, num_run={}, cluster_id={},terminate={}, run={}, reboot={})'
            .format(num_instance, num_run, cluster_id, terminate, run,
                    reboot)))
    cfg = utils.get_cfg()
    cfg['main'] = {}
    cfg.set('main', 'current_cluster', current_cluster)
    utils.write_cfg(cfg)

    if PROVIDER == "AWS_SPOT":
        set_spot_drivers()
        cls = get_driver("ec2_spot_" + REGION.replace('-', '_'))
        driver = cls(AWS_ACCESS_ID, AWS_SECRET_KEY)
    elif PROVIDER == "AZURE":
        set_azurearm_driver()
        cls = get_driver("CustomAzureArm")
        driver = cls(tenant_id=AZ_TENANT_ID,
                     subscription_id=AZ_SUBSCRIPTION_ID,
                     key=AZ_APPLICATION_ID,
                     secret=AZ_SECRET,
                     region=CONFIG_DICT["Azure"]["Location"])

    else:
        print("Unsupported provider", PROVIDER)
        return

    if num_instance > 0:

        # Create nodes
        if PROVIDER == "AWS_SPOT":
            nodes, spot_requests = launch.launch_libcloud(
                driver, num_instance, CONFIG_DICT, cluster_id, assume_yes)

        if PROVIDER == "AZURE":
            nodes = launch.launch_libcloud(driver, num_instance, CONFIG_DICT,
                                           cluster_id, assume_yes)

        # nodes is a list of "libcloud.compute.base.Node"

        print("CHECK SECURITY GROUP ALLOWED IP SETTINGS!!!")

        # Tag nodes
        if PROVIDER == "AWS_SPOT":
            for node in nodes:
                driver.ex_create_tags(node, TAG[0])
        elif PROVIDER == "AZURE":
            for node in nodes:
                driver.ex_create_tags(
                    node, {"ClusterId": cluster_id
                           })  # was CONFIG_DICT["Azure"]["ClusterId"]

        instance_ids = [n.id for n in nodes]

        # Wait for all the nodes to become RUNNNING
        print("Waiting for nodes to run")
        launch.wait_for_running_libcloud(driver, instance_ids,
                                         copy.deepcopy(instance_ids))

        time.sleep(15)

        # Wait for all the nodes to be pingable
        print("Waiting for nodes to be pingable")
        launch.wait_ping_libcloud(driver, instance_ids,
                                  copy.deepcopy(instance_ids))

    if reboot:
        print("Rebooting instances...")

        # Retrieve running nodes
        if PROVIDER == "AWS_SPOT":
            nodes = driver.list_nodes(
                ex_filters={'instance-state-name': ['running']})
            nodes = [
                n for n in nodes
                if driver.ex_describe_tags(node)['Value'] == cluster_id
            ]
        elif PROVIDER == "AZURE":
            nodes = driver.list_nodes(
                ex_resource_group=CONFIG_DICT["Azure"]["ResourceGroup"])
            nodes = [
                n for n in nodes if n.extra["tags"]["ClusterId"] == cluster_id
            ]

        # Reboot nodes
        for node in nodes:
            driver.reboot_node(node)

        # Wait for all the nodes to be pingable
        instance_ids = [n.id for n in nodes]
        launch.wait_ping_libcloud(driver, instance_ids,
                                  copy.deepcopy(instance_ids))

    if run:
        for i in range(num_run):
            if PROVIDER == "AWS_SPOT":
                nodes = driver.list_nodes(
                    ex_filters={'instance-state-name': ['running']})
                nodes = [
                    n for n in nodes
                    if driver.ex_describe_tags(n)['Value'] == cluster_id
                ]
            elif PROVIDER == "AZURE":
                nodes = driver.list_nodes(
                    ex_resource_group=CONFIG_DICT["Azure"]["ResourceGroup"])
                nodes = [
                    n for n in nodes
                    if n.extra["tags"]["ClusterId"] == cluster_id
                ]

            # nodes is a list of "libcloud.compute.base.Node"
            print("Found {} nodes".format(len(nodes)))

            x_run.run_benchmark(nodes)

    if terminate:
        print("Begin termination of instances and cleaning")

        # Cancel Spot Request
        if PROVIDER == "AWS_SPOT" and num_instance > 0:
            for s in spot_requests:
                driver.ex_cancel_spot_instance_request(s)
            print("Spot requests cancelled")

        ###################################################

        # Retrieve running nodes
        if PROVIDER == "AWS_SPOT":
            nodes = driver.list_nodes(
                ex_filters={'instance-state-name': ['running']})
            nodes = [
                n for n in nodes
                if driver.ex_describe_tags(n)['Value'] == cluster_id
            ]
        elif PROVIDER == "AZURE":
            nodes = driver.list_nodes(
                ex_resource_group=CONFIG_DICT["Azure"]["ResourceGroup"])
            nodes = [
                n for n in nodes if n.extra["tags"]["ClusterId"] == cluster_id
            ]
        print("Found {} nodes".format(len(nodes)))

        # nodes is a list of "libcloud.compute.base.Node"

        # Destroy all nodes
        print("Destroying nodes")
        for node in nodes:
            driver.destroy_node(node)

        print(okgreen("All nodes destroyed"))
def profile_symex(args):
    cluster_id = c.CLUSTER_MAP['spark']
    num_run = args.num_runs
    reuse_dataset = args.reuse_dataset
    #exp_filepath = args.exp_file_path if args.exp_file_path else "experiment.json"
    exp_filepaths = args.exp_file_paths if args.exp_file_paths else ["experiment.json"]
    num_experiments = len(exp_filepaths)
    spark_seq = args.spark_seq if args.spark_seq else False
    index = 0
    app_name = ''
    app_jar = ''
    app_class = ''
    guard_evaluator_class = ''
    num_partitions = ''
    app_args = {}
    meta_profile_name = ''
    for exp_filepath in exp_filepaths:
        exp_file = Path(exp_filepath)
        index += 1
        if exp_file.exists():
            experiment = json.load(open(exp_filepath))
            try:
                app_name = experiment["AppName"]
                app_jar = experiment["AppJar"]
                app_class = experiment["AppClass"]
                guard_evaluator_class = experiment["GuardEvaluatorClass"]
                num_partitions = experiment["NumPartitions"]
                app_args = experiment["AppConf"]
                data_multiplier = experiment["DataMultiplier"] if experiment["DataMultiplier"] else 1
                meta_profile_name = experiment["MetaProfileName"] if experiment["MetaProfileName"] else meta_profile_name
            except KeyError as error:
                print("ERROR:  {} in experiment file: {}".format(error, exp_filepath))
                exit(1) 
        with utils.open_cfg(mode='w') as cfg:
            for s in cfg.sections():
                if s != 'hdfs':
                    cfg.remove_section(s)
            cfg['main'] = {}
            cfg['main']['app_name'] = app_name
            cfg['main']['app_jar'] = app_jar
            cfg['main']['app_class'] = app_class
            cfg['main']['guard_evaluator_class'] = guard_evaluator_class
            cfg['main']['tool_on_master'] = 'false'
            cfg['main']['experiment_file'] = exp_filepath
            cfg['main']['process_on_server'] = str(c.PROCESS_ON_SERVER)
            cfg['experiment'] = {}
            cfg['experiment']['app_name'] = app_name
            cfg['experiment']['profile_name'] = app_name
            cfg['experiment']['meta_profile_name'] = meta_profile_name
            cfg['app_args'] = {}
            arg_string = ''
            not_to_scale_args = ["pastMonths", "inputFile", "outputFile", "delimiter", "parallelism", "minimumCompressionProgress", "progressCounter"]
            for key_app_arg in sorted(app_args.keys(), key=lambda k: int(k)):
                app_arg_name = '{}'.format(app_args[key_app_arg]["Name"])
                app_arg_val = '{}'.format(app_args[key_app_arg]["Value"]) 
                app_arg_value = app_arg_val if app_arg_name in not_to_scale_args else '{}'.format(int(app_arg_val) * int(data_multiplier)) 
                cfg['app_args']['arg'+key_app_arg+': ' + app_arg_name] = app_arg_value 
                arg_string += ' {}'.format(app_arg_value)
            #arg_string += ' {}'.format(str(num_partitions))
            cfg['main']['child_args_string'] = '{}'.format(arg_string)
            cfg['main']['num_partitions'] = str(num_partitions)
            cfg['main']['iter_num'] = str(1) #vboxvm
            cfg['main']['num_experiments'] = str(num_experiments)
            cfg['main']['experiment_num'] = str(index)
            #cfg['main']['cluster_id'] = cluster_id
            cfg['profile'] = {}
            cfg['profile']['spark_seq'] = str(spark_seq)
            cfg['profile']['profile_name'] = app_name
            cfg['profile']['metaprofile_name'] = meta_profile_name
            
            if reuse_dataset:
                cfg['main']['delete_hdfs'] = str(not reuse_dataset)
                
        print(bold('Profile experiment {} performing {} runs for application {} on cluster {}'.format(exp_filepath, 
                                                                                                   num_run, app_name,
                                                                                                   cluster_id,)))
        run_xspark(current_cluster='spark', num_instance=0, num_run=num_run,
                   cluster_id=cluster_id, run=1, terminate=0, reboot=0)
        if not c.PROCESS_ON_SERVER:
            average_runs.main(profile_name=utils.get_cfg()['experiment']['profile_name'])
            
    join_jsons.join_dags(OUTPUT_DIR)
    #join_jsons.join_dags("spark_log_profiling"+os.sep+"avg_json")
    
    deploy_meta_profile(meta_profile_name, cluster_id, True)
    
    #upload all the normal (non-meta) profiles
    for filename in os.listdir(OUTPUT_DIR):
        profilename = filename.split(os.sep)[-1].split(".")[0]
        profile_fname = filename.split(os.sep)[-1]
        if profilename != meta_profile_name and not "collection" in profilename and profile_fname.split(".")[-1] == "json":
           deploy_meta_profile(profilename, cluster_id)