def measure(model, num_storages, output_dir): env.init() scheduler = "Fifo" num_nodes = 1 cluster_config_path = env.get_cluter_config_path( model, num_nodes, num_storages) node_config_path = env.get_node_config_path() setting = HadoopSetting( cluster_config_path, node_config_path, scheduler="Fifo", model=model, num_nodes=num_nodes, num_storages=num_storages, parameters={}) upload = True format = True job_list = ["nocomputation"] job_size_list = ["3GB"] #job_list = ["wordcount", "nocomputation"] #job_size_list = ["512MB", "1GB"] map_size = 1024 submit_times = 10 job_timeline = jobfactory.create_all_pair_jobs( job_list=job_list, job_size_list=job_size_list, map_size=map_size, submit_times=submit_times) experiment = ExperimentRun( job_timeline, output_dir, setting=setting, upload=upload, format=format, sync=True) experiment.init() experiment.run()
def measure(model, scheduler, num_nodes, num_storages, output_dir): cluster_config_path = env.get_cluter_config_path(model, num_nodes, num_storages) node_config_path = env.get_node_config_path() setting = HadoopSetting(cluster_config_path, node_config_path, scheduler="Fifo", model=model, num_nodes=num_nodes, num_storages=num_storages, parameters={}) upload = True format = True job_list = ["nocomputation"] job_size_list = ["4GB"] map_size = 1024 submit_times = 4 job_timeline = jobfactory.create_fixed_jobs(job_list=job_list, job_size_list=job_size_list, map_size=map_size, submit_times=submit_times) experiment = ExperimentRun(job_timeline, output_dir, setting=setting, upload=upload, format=format) experiment.init() experiment.run()
def measure(model, schedulers, num_nodes, num_storages, num_jobs, period, output_dir): parameters = { 'mapreduce.job.reduce.slowstart.completedmaps': '1.0', 'dfs.replication': '1', } cluster_config_path = env.get_cluter_config_path(model, num_nodes, num_storages) node_config_path = env.get_node_config_path() setting = HadoopSetting(cluster_config_path, node_config_path, scheduler="Fifo", model=model, num_nodes=num_nodes, num_storages=num_storages, parameters=parameters) job_list = ["grep", "nocomputation", "histogrammovies", "histogramratings"] job_size_list = ["1GB", "2GB", "4GB"] job_timeline = jobfactory.create_jobs(job_list=job_list, job_size_list=job_size_list, num_jobs=num_jobs, period=period) experiment = SchedulerExperiment(setting, schedulers, job_timeline, output_dir) experiment.init() experiment.run()
def measure(model, schedulers, num_nodes, num_storages, job_list, num_jobs, output_dir, debug=False): env.init(debug=debug) from my.experiment.base import HadoopSetting from my.experiment.base import SchedulerExperiment from my.experiment import jobfactory parameters = { 'mapreduce.job.reduce.slowstart.completedmaps': '0.8', } cluster_config_path = env.get_cluter_config_path(model, num_nodes, num_storages) node_config_path = env.get_node_config_path() setting = HadoopSetting(cluster_config_path, node_config_path, scheduler="Fifo", model=model, num_nodes=num_nodes, num_storages=num_storages, parameters=parameters) #job_list = ["grep", "terasort", "wordcount", "nocomputation", "histogrammovies", "histogramratings", "custommap1"] #job_list = ["grep"] job_size_list = ["1GB"] job_timeline = jobfactory.create_jobs(job_list=job_list, job_size_list=job_size_list, num_jobs=num_jobs, period=10) experiment = SchedulerExperiment(setting, schedulers, job_timeline, output_dir) experiment.init() experiment.run()
def measure(model, schedulers, num_nodes, num_storages, submit_times, submit_ratio, output_dir, debug=False): if debug: env.enable_debug() parameters = { 'mapreduce.job.reduce.slowstart.completedmaps': '1.0', 'dfs.replication': '1', } cluster_config_path = env.get_cluter_config_path(model, num_nodes, num_storages) node_config_path = env.get_node_config_path() setting = HadoopSetting(cluster_config_path, node_config_path, scheduler="Fifo", model=model, num_nodes=num_nodes, num_storages=num_storages, parameters=parameters) job_list = ["grep", "histogramratings"] job_size = "4GB" job_timeline = jobfactory.create_ab_jobs(job_list=job_list, job_size=job_size, submit_times=submit_times, submit_ratio=submit_ratio) experiment = SchedulerExperiment(setting, schedulers, job_timeline, output_dir) experiment.init() experiment.run()
def measure(output_dir): scheduler = "Fifo" model = "decoupled" num_nodes = 1 num_storages = 1 parameters = {} upload = True format = True cluster_config_path = env.get_cluter_config_path(model, num_nodes, num_storages) node_config_path = env.get_node_config_path() setting = HadoopSetting(cluster_config_path, node_config_path, scheduler=scheduler, model=model, num_nodes=num_nodes, num_storages=num_storages, parameters=parameters) job_list = [ "grep", "terasort", "wordcount", "nocomputation", "histogrammovies", "histogramratings", "custommap1" ] #job_list = ["nocomputation", "histogrammovies", "histogramratings", "custommap1"] job_size_list = ["64MB"] submit_times = 1 job_timeline = jobfactory.create_all_pair_jobs(job_list=job_list, job_size_list=job_size_list, submit_times=submit_times) experiment = ExperimentRun(job_timeline, output_dir, setting=setting, upload=upload, format=format, sync=True) experiment.init() experiment.run()
def measure(model, scheduler, num_nodes, num_storages, output_dir): env.init() cluster_config_path = env.get_cluter_config_path(model, num_nodes, num_storages) node_config_path = env.get_node_config_path() setting = HadoopSetting(cluster_config_path, node_config_path, scheduler="Fifo", model=model, num_nodes=num_nodes, num_storages=num_storages, parameters={}) upload = True format = True job_list = [ "grep", "terasort", "wordcount", "nocomputation", "histogrammovies", "histogramratings", "custommap1" ] job_size_list = ["64MB", "128MB", "256MB", "512MB", "1GB", "2GB", "4GB"] #job_list = ["wordcount", "nocomputation"] #job_size_list = ["512MB", "1GB"] map_size = 512 submit_times = 1 job_timeline = jobfactory.create_all_pair_jobs(job_list=job_list, job_size_list=job_size_list, map_size=map_size, submit_times=submit_times) experiment = ExperimentRun(job_timeline, output_dir, setting=setting, upload=upload, format=format, sync=True) experiment.init() experiment.run()