Exemple #1
0
def configure():
    context = conf.copy()
    context['java_home'] = find_java_home()
    context['master'] = env.master
    # get hadoop conf environment variable
    context['hadoop_conf_path'] = run("echo $HADOOP_CONF_DIR")
    destination = get_flink_dist_path() + "/conf"
    process_template("flink", "flink-conf.yaml.mustache", context, destination)
    slaves = '\n'.join(env.slaves)
    context2 = {'slaves' : slaves}
    process_template("flink", "slaves.mustache", context2, destination)
    # update the PATH variable
    run("echo export PATH=$PATH:'%s'/bin >> %s" % (PATH, "~/.profile"))
Exemple #2
0
compute_engine_config['machine_type'] = "n1-standard-2"
# num cores to use
compute_engine_config['num_cores'] = 2
compute_engine_config['size_mem'] = 7500
# 10 workers + 1 master
compute_engine_config['num_workers'] = 10
compute_engine_config['disk_space_gb'] = 200

dop = compute_engine_config['num_workers'] * compute_engine_config['num_cores']

flink_config['num_task_slots'] = compute_engine_config['num_cores']
flink_config['taskmanager_heap'] = 5120 #5gb
flink_config['jobmanager_heap'] = 5120
flink_config['parallelization'] = dop

flink_als_config = flink_config.copy()
flink_als_config['extra_config_entries'] = [
    {'entry' : "taskmanager.memory.fraction: 0.3"},
]

cluster = ComputeEngine(compute_engine_config)
hadoop = Hadoop(hadoop_config)
flink = Flink(flink_config)
flink_als = Flink(flink_als_config)

systems = [hadoop, flink]

generators = {

    'text':
        Generator(
Exemple #3
0
# import experiment's main class
from experiments.wordcount import WordCountFromJar
from experiments.wordcount import WordCount
from experiments.grep import Grep

# import data generators for benchmarks
from experiments import generators

cluster = ComputeEngine(compute_engine_config)
hadoop = Hadoop(hadoop_config)
flink = Flink(flink_config)

systems = [hadoop, flink]

custom_flink_config = flink_config.copy()
custom_flink_config['git_commit'] = "858d1bccf957bf36c04ab011ec9a26933109086c"
custom_flink_config['taskmanager_num_buffers'] = 1024

custom_flink = Flink(custom_flink_config)

benchmarks = [
    # Normal benchmark
    Benchmark(
        id = "WordCount1000",
        systems = [flink],
        experiment = WordCountFromJar({
            'num_lines' : 1000
        }),
        times = 5
    ),
Exemple #4
0
# 2 cores 7.5GB RAM
compute_engine_config['machine_type'] = "n1-standard-2"
# num cores to use
compute_engine_config['num_cores'] = 2
# 16 workers + 1 master
compute_engine_config['num_workers'] = 10
compute_engine_config['disk_space_gb'] = 100

cluster = ComputeEngine(compute_engine_config)
hadoop = Hadoop(hadoop_config)

flink_config['git_repository'] = "https://github.com/mxm/flink.git"
flink_config['git_commit'] = "aba76171fef41e2c987913c32fefafc55ef635f6"
flink = Flink(flink_config)

flink_config_custom = flink_config.copy()
flink_config_custom['git_commit'] = "off_heap_rebased"
flink_config_custom['extra_config_entries'] = [
    { 'entry' : "taskmanager.memory.directAllocation: true" }
]
flink_custom = Flink(flink_config_custom)

systems = [hadoop, flink]

benchmarks = [
    Benchmark(
        id = "WordCount-heap",
        systems = [flink],
        experiment = WordCount(),
        times = 1
    ),
Exemple #5
0
# import experiment's main class
from experiments.wordcount import WordCountFromJar
from experiments.wordcount import WordCount
from experiments.grep import Grep

# import data generators for benchmarks
from experiments import generators

cluster = ComputeEngine(compute_engine_config)
hadoop = Hadoop(hadoop_config)
flink = Flink(flink_config)

systems = [hadoop, flink]

custom_flink_config = flink_config.copy()
custom_flink_config['git_commit'] = "858d1bccf957bf36c04ab011ec9a26933109086c"
custom_flink_config['taskmanager_num_buffers'] = 1024

custom_flink = Flink(custom_flink_config)

benchmarks = [
    # Normal benchmark
    Benchmark(id="WordCount1000",
              systems=[flink],
              experiment=WordCountFromJar({'num_lines': 1000}),
              times=5),
    # Custom Flink version benchmark
    Benchmark(id="WordCount1000-custom",
              systems=[custom_flink],
              experiment=WordCountFromJar({'num_lines': 1000}),
Exemple #6
0
# 2 cores 7.5GB RAM
compute_engine_config['machine_type'] = "n1-standard-2"
# num cores to use
compute_engine_config['num_cores'] = 2
# 16 workers + 1 master
compute_engine_config['num_workers'] = 10
compute_engine_config['disk_space_gb'] = 100

cluster = ComputeEngine(compute_engine_config)
hadoop = Hadoop(hadoop_config)

flink_config['git_repository'] = "https://github.com/mxm/flink.git"
flink_config['git_commit'] = "aba76171fef41e2c987913c32fefafc55ef635f6"
flink = Flink(flink_config)

flink_config_custom = flink_config.copy()
flink_config_custom['git_commit'] = "off_heap_rebased"
flink_config_custom['extra_config_entries'] = [{
    'entry':
    "taskmanager.memory.directAllocation: true"
}]
flink_custom = Flink(flink_config_custom)

systems = [hadoop, flink]

benchmarks = [
    Benchmark(id="WordCount-heap",
              systems=[flink],
              experiment=WordCount(),
              times=1),
    Benchmark(id="WordCount-offheap",