def run_time_analysis(input_dir): if not input_dir: cfg = utils.get_cfg() input_dir = cfg['main'][ 'output_folder'] if 'main' in cfg and 'output_folder' in cfg[ 'main'] else None run_ta.main(input_dir)
def run_log_profiling(local): out_folder = None if not local: cfg = utils.get_cfg() out_folder = cfg['main'][ 'output_folder'] if 'main' in cfg and 'output_folder' in cfg[ 'main'] else None profiling.main(out_folder)
def run_time_analysis(input_dir): if not input_dir: cfg = utils.get_cfg() if 'out_folders' in cfg: for x in cfg['out_folders']: input_dir = cfg['out_folders'][x] run_ta.main(input_dir) else: run_ta.main(input_dir)
def run_log_profiling(local): out_dir = None in_dir = None cfg = utils.get_cfg() if not local: if 'out_folders' in cfg: for x in cfg['out_folders']: in_dir = out_dir = cfg['out_folders'][x] profiling.main(input_dir=in_dir, json_out_dir=out_dir) else: profiling.main(input_dir=in_dir, json_out_dir=out_dir)
def upload_profile(self): cfg = utils.get_cfg() benchmark = cfg['experiment']['benchmarkname'] profile_fname = cfg[benchmark]['profile_name'] + '.json' filedir = OUTPUT_DIR filepath = filedir + '/' + profile_fname try: self.retrieve_nodes() x_run.upload_profile_to_master(self.nodes, profile_fname, filepath) except (OSError, IOError) as exc: print('ERROR: {}\n\nCould not upload profile)'.format(exc))
def kill_cluster(cluster): cluster_id = cluster_map[cluster] print(bold('Terminate {}...'.format(cluster_id))) run_xspark(current_cluster=cluster, num_instance=0, cluster_id=cluster_id, run=0, terminate=1, reboot=0) cfg = utils.get_cfg() cfg[cluster] = {} utils.write_cfg(cfg)
def profile(args): cluster_id = c.CLUSTER_MAP['spark'] num_run = args.num_runs reuse_dataset = args.reuse_dataset #exp_filepath = args.exp_file_path if args.exp_file_path else "experiment.json" exp_filepaths = args.exp_file_paths if args.exp_file_paths else ["experiment.json"] num_experiments = len(exp_filepaths) spark_seq = args.spark_seq if args.spark_seq else False index = 0 for exp_filepath in exp_filepaths: exp_file = Path(exp_filepath) index += 1 if exp_file.exists(): experiment = json.load(open(exp_filepath)) try: benchmark = experiment["BenchmarkName"] #benchmark = experiment["BenchmarkBench"][0] except KeyError as error: print("ERROR: {} in experiment file: {}".format(error, exp_filepath)) exit(1) with utils.open_cfg(mode='w') as cfg: for s in cfg.sections(): if s != 'hdfs': cfg.remove_section(s) cfg['main'] = {} cfg['main']['tool_on_master'] = 'false' cfg['main']['experiment_file'] = exp_filepath cfg['main']['process_on_server'] = str(c.PROCESS_ON_SERVER) cfg['main']['iter_num'] = str(1) #vboxvm cfg['main']['num_experiments'] = str(num_experiments) cfg['main']['experiment_num'] = str(index) #cfg['main']['cluster_id'] = cluster_id cfg['profile'] = {} cfg['profile']['spark_seq'] = str(spark_seq) cfg[benchmark] = {} cfg[benchmark]['profile_name'] = '{}'.format(c.VAR_PAR_MAP[benchmark]['profile_name']) if reuse_dataset: cfg['main']['delete_hdfs'] = str(not reuse_dataset) print(bold('Profile experiment {} performing {} runs for benchmark {} on cluster {}'.format(exp_filepath, num_run, benchmark, cluster_id,))) run_xspark(current_cluster='spark', num_instance=0, num_run=num_run, cluster_id=cluster_id, run=1, terminate=0, reboot=0) if not c.PROCESS_ON_SERVER: average_runs.main(profile_name=utils.get_cfg()[benchmark]['profile_name']) deploy_profile(benchmark, cluster_id)
def launch_exp(args): cluster_id = cluster_map['spark'] num_v = args.num_v for v in num_v: cfg = utils.get_cfg() cfg['pagerank'] = {} cfg['pagerank']['num_v'] = v utils.write_cfg(cfg) print( bold('Launch Experiments on {} with {} vertices...'.format( cluster_id, v))) run_xspark(current_cluster='spark', num_instance=0, cluster_id=cluster_id, run=1, terminate=0, reboot=0) if args.profile: run_log_profiling(None)
def profile_disabled(args): cluster_id = c.CLUSTER_MAP['spark'] var_par = args.var_par exp_profile_name = args.exp_profile_name if args.exp_profile_name else "" benchmark = args.benchmark num_run = args.num_runs max_executors = args.max_executors num_partitions = args.num_partitions for v in var_par: with utils.open_cfg(mode='w') as cfg: cfg['main'] = {} cfg['main']['profile'] = 'true' cfg['main']['tool_on_master'] = 'false' cfg['main']['benchmark'] = benchmark cfg['main']['iter_num'] = str(1) #vboxvm cfg[benchmark] = {} cfg[benchmark][c.VAR_PAR_MAP[benchmark] ['var_name']] = '({}, {})'.format( c.VAR_PAR_MAP[benchmark]['default'][0], v) cfg[benchmark]['profile_name']= \ '{}'.format(c.VAR_PAR_MAP[benchmark]['profile_name']) if not args.exp_profile_name else args.exp_profile_name cfg[benchmark]['num_partitions'] = str(num_partitions) if max_executors: cfg['main']['max_executors'] = max_executors print( bold( 'Profile {} performing {} runs for benchmark {} on cluster {} with {}={}...' .format(exp_profile_name, num_run, benchmark, cluster_id, c.VAR_PAR_MAP[benchmark]['var_name'], v))) run_xspark(current_cluster='spark', num_instance=0, num_run=num_run, cluster_id=cluster_id, run=1, terminate=0, reboot=0) #profiling.main() average_runs.main( profile_name=utils.get_cfg()[benchmark]['profile_name']) #run_log_profiling(args.local) deploy_profile(benchmark, cluster_id)
def run_xspark(current_cluster, num_instance=NUM_INSTANCE, num_run=NUM_RUN, cluster_id=CLUSTER_ID, terminate=TERMINATE, run=RUN, reboot=REBOOT, assume_yes=False): """ Main function; * Launch spot request of NUMINSTANCE * Run Benchmark * Download Log * Plot data from log """ print( header( 'run_xspark(num_instance={}, num_run={}, cluster_id={},terminate={}, run={}, reboot={})' .format(num_instance, num_run, cluster_id, terminate, run, reboot))) cfg = utils.get_cfg() cfg['main'] = {} cfg.set('main', 'current_cluster', current_cluster) utils.write_cfg(cfg) if PROVIDER == "AWS_SPOT": set_spot_drivers() cls = get_driver("ec2_spot_" + REGION.replace('-', '_')) driver = cls(AWS_ACCESS_ID, AWS_SECRET_KEY) elif PROVIDER == "AZURE": set_azurearm_driver() cls = get_driver("CustomAzureArm") driver = cls(tenant_id=AZ_TENANT_ID, subscription_id=AZ_SUBSCRIPTION_ID, key=AZ_APPLICATION_ID, secret=AZ_SECRET, region=CONFIG_DICT["Azure"]["Location"]) else: print("Unsupported provider", PROVIDER) return if num_instance > 0: # Create nodes if PROVIDER == "AWS_SPOT": nodes, spot_requests = launch.launch_libcloud( driver, num_instance, CONFIG_DICT, cluster_id, assume_yes) if PROVIDER == "AZURE": nodes = launch.launch_libcloud(driver, num_instance, CONFIG_DICT, cluster_id, assume_yes) # nodes is a list of "libcloud.compute.base.Node" print("CHECK SECURITY GROUP ALLOWED IP SETTINGS!!!") # Tag nodes if PROVIDER == "AWS_SPOT": for node in nodes: driver.ex_create_tags(node, TAG[0]) elif PROVIDER == "AZURE": for node in nodes: driver.ex_create_tags( node, {"ClusterId": cluster_id }) # was CONFIG_DICT["Azure"]["ClusterId"] instance_ids = [n.id for n in nodes] # Wait for all the nodes to become RUNNNING print("Waiting for nodes to run") launch.wait_for_running_libcloud(driver, instance_ids, copy.deepcopy(instance_ids)) time.sleep(15) # Wait for all the nodes to be pingable print("Waiting for nodes to be pingable") launch.wait_ping_libcloud(driver, instance_ids, copy.deepcopy(instance_ids)) if reboot: print("Rebooting instances...") # Retrieve running nodes if PROVIDER == "AWS_SPOT": nodes = driver.list_nodes( ex_filters={'instance-state-name': ['running']}) nodes = [ n for n in nodes if driver.ex_describe_tags(node)['Value'] == cluster_id ] elif PROVIDER == "AZURE": nodes = driver.list_nodes( ex_resource_group=CONFIG_DICT["Azure"]["ResourceGroup"]) nodes = [ n for n in nodes if n.extra["tags"]["ClusterId"] == cluster_id ] # Reboot nodes for node in nodes: driver.reboot_node(node) # Wait for all the nodes to be pingable instance_ids = [n.id for n in nodes] launch.wait_ping_libcloud(driver, instance_ids, copy.deepcopy(instance_ids)) if run: for i in range(num_run): if PROVIDER == "AWS_SPOT": nodes = driver.list_nodes( ex_filters={'instance-state-name': ['running']}) nodes = [ n for n in nodes if driver.ex_describe_tags(n)['Value'] == cluster_id ] elif PROVIDER == "AZURE": nodes = driver.list_nodes( ex_resource_group=CONFIG_DICT["Azure"]["ResourceGroup"]) nodes = [ n for n in nodes if n.extra["tags"]["ClusterId"] == cluster_id ] # nodes is a list of "libcloud.compute.base.Node" print("Found {} nodes".format(len(nodes))) x_run.run_benchmark(nodes) if terminate: print("Begin termination of instances and cleaning") # Cancel Spot Request if PROVIDER == "AWS_SPOT" and num_instance > 0: for s in spot_requests: driver.ex_cancel_spot_instance_request(s) print("Spot requests cancelled") ################################################### # Retrieve running nodes if PROVIDER == "AWS_SPOT": nodes = driver.list_nodes( ex_filters={'instance-state-name': ['running']}) nodes = [ n for n in nodes if driver.ex_describe_tags(n)['Value'] == cluster_id ] elif PROVIDER == "AZURE": nodes = driver.list_nodes( ex_resource_group=CONFIG_DICT["Azure"]["ResourceGroup"]) nodes = [ n for n in nodes if n.extra["tags"]["ClusterId"] == cluster_id ] print("Found {} nodes".format(len(nodes))) # nodes is a list of "libcloud.compute.base.Node" # Destroy all nodes print("Destroying nodes") for node in nodes: driver.destroy_node(node) print(okgreen("All nodes destroyed"))
def profile_symex(args): cluster_id = c.CLUSTER_MAP['spark'] num_run = args.num_runs reuse_dataset = args.reuse_dataset #exp_filepath = args.exp_file_path if args.exp_file_path else "experiment.json" exp_filepaths = args.exp_file_paths if args.exp_file_paths else ["experiment.json"] num_experiments = len(exp_filepaths) spark_seq = args.spark_seq if args.spark_seq else False index = 0 app_name = '' app_jar = '' app_class = '' guard_evaluator_class = '' num_partitions = '' app_args = {} meta_profile_name = '' for exp_filepath in exp_filepaths: exp_file = Path(exp_filepath) index += 1 if exp_file.exists(): experiment = json.load(open(exp_filepath)) try: app_name = experiment["AppName"] app_jar = experiment["AppJar"] app_class = experiment["AppClass"] guard_evaluator_class = experiment["GuardEvaluatorClass"] num_partitions = experiment["NumPartitions"] app_args = experiment["AppConf"] data_multiplier = experiment["DataMultiplier"] if experiment["DataMultiplier"] else 1 meta_profile_name = experiment["MetaProfileName"] if experiment["MetaProfileName"] else meta_profile_name except KeyError as error: print("ERROR: {} in experiment file: {}".format(error, exp_filepath)) exit(1) with utils.open_cfg(mode='w') as cfg: for s in cfg.sections(): if s != 'hdfs': cfg.remove_section(s) cfg['main'] = {} cfg['main']['app_name'] = app_name cfg['main']['app_jar'] = app_jar cfg['main']['app_class'] = app_class cfg['main']['guard_evaluator_class'] = guard_evaluator_class cfg['main']['tool_on_master'] = 'false' cfg['main']['experiment_file'] = exp_filepath cfg['main']['process_on_server'] = str(c.PROCESS_ON_SERVER) cfg['experiment'] = {} cfg['experiment']['app_name'] = app_name cfg['experiment']['profile_name'] = app_name cfg['experiment']['meta_profile_name'] = meta_profile_name cfg['app_args'] = {} arg_string = '' not_to_scale_args = ["pastMonths", "inputFile", "outputFile", "delimiter", "parallelism", "minimumCompressionProgress", "progressCounter"] for key_app_arg in sorted(app_args.keys(), key=lambda k: int(k)): app_arg_name = '{}'.format(app_args[key_app_arg]["Name"]) app_arg_val = '{}'.format(app_args[key_app_arg]["Value"]) app_arg_value = app_arg_val if app_arg_name in not_to_scale_args else '{}'.format(int(app_arg_val) * int(data_multiplier)) cfg['app_args']['arg'+key_app_arg+': ' + app_arg_name] = app_arg_value arg_string += ' {}'.format(app_arg_value) #arg_string += ' {}'.format(str(num_partitions)) cfg['main']['child_args_string'] = '{}'.format(arg_string) cfg['main']['num_partitions'] = str(num_partitions) cfg['main']['iter_num'] = str(1) #vboxvm cfg['main']['num_experiments'] = str(num_experiments) cfg['main']['experiment_num'] = str(index) #cfg['main']['cluster_id'] = cluster_id cfg['profile'] = {} cfg['profile']['spark_seq'] = str(spark_seq) cfg['profile']['profile_name'] = app_name cfg['profile']['metaprofile_name'] = meta_profile_name if reuse_dataset: cfg['main']['delete_hdfs'] = str(not reuse_dataset) print(bold('Profile experiment {} performing {} runs for application {} on cluster {}'.format(exp_filepath, num_run, app_name, cluster_id,))) run_xspark(current_cluster='spark', num_instance=0, num_run=num_run, cluster_id=cluster_id, run=1, terminate=0, reboot=0) if not c.PROCESS_ON_SERVER: average_runs.main(profile_name=utils.get_cfg()['experiment']['profile_name']) join_jsons.join_dags(OUTPUT_DIR) #join_jsons.join_dags("spark_log_profiling"+os.sep+"avg_json") deploy_meta_profile(meta_profile_name, cluster_id, True) #upload all the normal (non-meta) profiles for filename in os.listdir(OUTPUT_DIR): profilename = filename.split(os.sep)[-1].split(".")[0] profile_fname = filename.split(os.sep)[-1] if profilename != meta_profile_name and not "collection" in profilename and profile_fname.split(".")[-1] == "json": deploy_meta_profile(profilename, cluster_id)