def measure(model, num_storages, output_dir):
    env.init()

    scheduler = "Fifo"
    num_nodes = 1

    cluster_config_path = env.get_cluter_config_path(
        model, num_nodes, num_storages)
    node_config_path = env.get_node_config_path()
    setting = HadoopSetting(
        cluster_config_path, node_config_path, scheduler="Fifo",
        model=model, num_nodes=num_nodes, num_storages=num_storages, parameters={})
    upload = True
    format = True

    job_list = ["nocomputation"]
    job_size_list = ["3GB"]

    #job_list = ["wordcount", "nocomputation"]
    #job_size_list = ["512MB", "1GB"]
    map_size = 1024
    submit_times = 10
    job_timeline = jobfactory.create_all_pair_jobs(
        job_list=job_list, job_size_list=job_size_list, map_size=map_size, submit_times=submit_times)
    experiment = ExperimentRun(
        job_timeline, output_dir, setting=setting, upload=upload,
        format=format, sync=True)
    experiment.init()
    experiment.run()
def measure(model, scheduler, num_nodes, num_storages, output_dir):

    cluster_config_path = env.get_cluter_config_path(model, num_nodes,
                                                     num_storages)
    node_config_path = env.get_node_config_path()
    setting = HadoopSetting(cluster_config_path,
                            node_config_path,
                            scheduler="Fifo",
                            model=model,
                            num_nodes=num_nodes,
                            num_storages=num_storages,
                            parameters={})
    upload = True
    format = True

    job_list = ["nocomputation"]
    job_size_list = ["4GB"]
    map_size = 1024
    submit_times = 4
    job_timeline = jobfactory.create_fixed_jobs(job_list=job_list,
                                                job_size_list=job_size_list,
                                                map_size=map_size,
                                                submit_times=submit_times)
    experiment = ExperimentRun(job_timeline,
                               output_dir,
                               setting=setting,
                               upload=upload,
                               format=format)
    experiment.init()
    experiment.run()
Example #3
0
def measure(model, schedulers, num_nodes, num_storages, num_jobs, period,
            output_dir):

    parameters = {
        'mapreduce.job.reduce.slowstart.completedmaps': '1.0',
        'dfs.replication': '1',
    }

    cluster_config_path = env.get_cluter_config_path(model, num_nodes,
                                                     num_storages)
    node_config_path = env.get_node_config_path()
    setting = HadoopSetting(cluster_config_path,
                            node_config_path,
                            scheduler="Fifo",
                            model=model,
                            num_nodes=num_nodes,
                            num_storages=num_storages,
                            parameters=parameters)

    job_list = ["grep", "nocomputation", "histogrammovies", "histogramratings"]
    job_size_list = ["1GB", "2GB", "4GB"]
    job_timeline = jobfactory.create_jobs(job_list=job_list,
                                          job_size_list=job_size_list,
                                          num_jobs=num_jobs,
                                          period=period)

    experiment = SchedulerExperiment(setting, schedulers, job_timeline,
                                     output_dir)

    experiment.init()
    experiment.run()
Example #4
0
def measure(model,
            schedulers,
            num_nodes,
            num_storages,
            job_list,
            num_jobs,
            output_dir,
            debug=False):

    env.init(debug=debug)

    from my.experiment.base import HadoopSetting
    from my.experiment.base import SchedulerExperiment
    from my.experiment import jobfactory

    parameters = {
        'mapreduce.job.reduce.slowstart.completedmaps': '0.8',
    }

    cluster_config_path = env.get_cluter_config_path(model, num_nodes,
                                                     num_storages)
    node_config_path = env.get_node_config_path()
    setting = HadoopSetting(cluster_config_path,
                            node_config_path,
                            scheduler="Fifo",
                            model=model,
                            num_nodes=num_nodes,
                            num_storages=num_storages,
                            parameters=parameters)

    #job_list = ["grep", "terasort", "wordcount", "nocomputation", "histogrammovies", "histogramratings", "custommap1"]
    #job_list = ["grep"]
    job_size_list = ["1GB"]
    job_timeline = jobfactory.create_jobs(job_list=job_list,
                                          job_size_list=job_size_list,
                                          num_jobs=num_jobs,
                                          period=10)

    experiment = SchedulerExperiment(setting, schedulers, job_timeline,
                                     output_dir)

    experiment.init()
    experiment.run()
Example #5
0
def measure(model,
            schedulers,
            num_nodes,
            num_storages,
            submit_times,
            submit_ratio,
            output_dir,
            debug=False):

    if debug:
        env.enable_debug()

    parameters = {
        'mapreduce.job.reduce.slowstart.completedmaps': '1.0',
        'dfs.replication': '1',
    }

    cluster_config_path = env.get_cluter_config_path(model, num_nodes,
                                                     num_storages)
    node_config_path = env.get_node_config_path()
    setting = HadoopSetting(cluster_config_path,
                            node_config_path,
                            scheduler="Fifo",
                            model=model,
                            num_nodes=num_nodes,
                            num_storages=num_storages,
                            parameters=parameters)

    job_list = ["grep", "histogramratings"]
    job_size = "4GB"
    job_timeline = jobfactory.create_ab_jobs(job_list=job_list,
                                             job_size=job_size,
                                             submit_times=submit_times,
                                             submit_ratio=submit_ratio)

    experiment = SchedulerExperiment(setting, schedulers, job_timeline,
                                     output_dir)

    experiment.init()
    experiment.run()
Example #6
0
def measure(output_dir):

    scheduler = "Fifo"
    model = "decoupled"
    num_nodes = 1
    num_storages = 1
    parameters = {}
    upload = True
    format = True

    cluster_config_path = env.get_cluter_config_path(model, num_nodes,
                                                     num_storages)
    node_config_path = env.get_node_config_path()
    setting = HadoopSetting(cluster_config_path,
                            node_config_path,
                            scheduler=scheduler,
                            model=model,
                            num_nodes=num_nodes,
                            num_storages=num_storages,
                            parameters=parameters)

    job_list = [
        "grep", "terasort", "wordcount", "nocomputation", "histogrammovies",
        "histogramratings", "custommap1"
    ]
    #job_list = ["nocomputation", "histogrammovies", "histogramratings", "custommap1"]
    job_size_list = ["64MB"]
    submit_times = 1
    job_timeline = jobfactory.create_all_pair_jobs(job_list=job_list,
                                                   job_size_list=job_size_list,
                                                   submit_times=submit_times)
    experiment = ExperimentRun(job_timeline,
                               output_dir,
                               setting=setting,
                               upload=upload,
                               format=format,
                               sync=True)
    experiment.init()
    experiment.run()
def measure(model, scheduler, num_nodes, num_storages, output_dir):
    env.init()

    cluster_config_path = env.get_cluter_config_path(model, num_nodes,
                                                     num_storages)
    node_config_path = env.get_node_config_path()
    setting = HadoopSetting(cluster_config_path,
                            node_config_path,
                            scheduler="Fifo",
                            model=model,
                            num_nodes=num_nodes,
                            num_storages=num_storages,
                            parameters={})
    upload = True
    format = True

    job_list = [
        "grep", "terasort", "wordcount", "nocomputation", "histogrammovies",
        "histogramratings", "custommap1"
    ]
    job_size_list = ["64MB", "128MB", "256MB", "512MB", "1GB", "2GB", "4GB"]

    #job_list = ["wordcount", "nocomputation"]
    #job_size_list = ["512MB", "1GB"]
    map_size = 512
    submit_times = 1
    job_timeline = jobfactory.create_all_pair_jobs(job_list=job_list,
                                                   job_size_list=job_size_list,
                                                   map_size=map_size,
                                                   submit_times=submit_times)
    experiment = ExperimentRun(job_timeline,
                               output_dir,
                               setting=setting,
                               upload=upload,
                               format=format,
                               sync=True)
    experiment.init()
    experiment.run()