def configure(): context = conf.copy() context['java_home'] = find_java_home() context['master'] = env.master # get hadoop conf environment variable context['hadoop_conf_path'] = run("echo $HADOOP_CONF_DIR") destination = get_flink_dist_path() + "/conf" process_template("flink", "flink-conf.yaml.mustache", context, destination) slaves = '\n'.join(env.slaves) context2 = {'slaves' : slaves} process_template("flink", "slaves.mustache", context2, destination) # update the PATH variable run("echo export PATH=$PATH:'%s'/bin >> %s" % (PATH, "~/.profile"))
compute_engine_config['machine_type'] = "n1-standard-2" # num cores to use compute_engine_config['num_cores'] = 2 compute_engine_config['size_mem'] = 7500 # 10 workers + 1 master compute_engine_config['num_workers'] = 10 compute_engine_config['disk_space_gb'] = 200 dop = compute_engine_config['num_workers'] * compute_engine_config['num_cores'] flink_config['num_task_slots'] = compute_engine_config['num_cores'] flink_config['taskmanager_heap'] = 5120 #5gb flink_config['jobmanager_heap'] = 5120 flink_config['parallelization'] = dop flink_als_config = flink_config.copy() flink_als_config['extra_config_entries'] = [ {'entry' : "taskmanager.memory.fraction: 0.3"}, ] cluster = ComputeEngine(compute_engine_config) hadoop = Hadoop(hadoop_config) flink = Flink(flink_config) flink_als = Flink(flink_als_config) systems = [hadoop, flink] generators = { 'text': Generator(
# import experiment's main class from experiments.wordcount import WordCountFromJar from experiments.wordcount import WordCount from experiments.grep import Grep # import data generators for benchmarks from experiments import generators cluster = ComputeEngine(compute_engine_config) hadoop = Hadoop(hadoop_config) flink = Flink(flink_config) systems = [hadoop, flink] custom_flink_config = flink_config.copy() custom_flink_config['git_commit'] = "858d1bccf957bf36c04ab011ec9a26933109086c" custom_flink_config['taskmanager_num_buffers'] = 1024 custom_flink = Flink(custom_flink_config) benchmarks = [ # Normal benchmark Benchmark( id = "WordCount1000", systems = [flink], experiment = WordCountFromJar({ 'num_lines' : 1000 }), times = 5 ),
# 2 cores 7.5GB RAM compute_engine_config['machine_type'] = "n1-standard-2" # num cores to use compute_engine_config['num_cores'] = 2 # 16 workers + 1 master compute_engine_config['num_workers'] = 10 compute_engine_config['disk_space_gb'] = 100 cluster = ComputeEngine(compute_engine_config) hadoop = Hadoop(hadoop_config) flink_config['git_repository'] = "https://github.com/mxm/flink.git" flink_config['git_commit'] = "aba76171fef41e2c987913c32fefafc55ef635f6" flink = Flink(flink_config) flink_config_custom = flink_config.copy() flink_config_custom['git_commit'] = "off_heap_rebased" flink_config_custom['extra_config_entries'] = [ { 'entry' : "taskmanager.memory.directAllocation: true" } ] flink_custom = Flink(flink_config_custom) systems = [hadoop, flink] benchmarks = [ Benchmark( id = "WordCount-heap", systems = [flink], experiment = WordCount(), times = 1 ),
# import experiment's main class from experiments.wordcount import WordCountFromJar from experiments.wordcount import WordCount from experiments.grep import Grep # import data generators for benchmarks from experiments import generators cluster = ComputeEngine(compute_engine_config) hadoop = Hadoop(hadoop_config) flink = Flink(flink_config) systems = [hadoop, flink] custom_flink_config = flink_config.copy() custom_flink_config['git_commit'] = "858d1bccf957bf36c04ab011ec9a26933109086c" custom_flink_config['taskmanager_num_buffers'] = 1024 custom_flink = Flink(custom_flink_config) benchmarks = [ # Normal benchmark Benchmark(id="WordCount1000", systems=[flink], experiment=WordCountFromJar({'num_lines': 1000}), times=5), # Custom Flink version benchmark Benchmark(id="WordCount1000-custom", systems=[custom_flink], experiment=WordCountFromJar({'num_lines': 1000}),
# 2 cores 7.5GB RAM compute_engine_config['machine_type'] = "n1-standard-2" # num cores to use compute_engine_config['num_cores'] = 2 # 16 workers + 1 master compute_engine_config['num_workers'] = 10 compute_engine_config['disk_space_gb'] = 100 cluster = ComputeEngine(compute_engine_config) hadoop = Hadoop(hadoop_config) flink_config['git_repository'] = "https://github.com/mxm/flink.git" flink_config['git_commit'] = "aba76171fef41e2c987913c32fefafc55ef635f6" flink = Flink(flink_config) flink_config_custom = flink_config.copy() flink_config_custom['git_commit'] = "off_heap_rebased" flink_config_custom['extra_config_entries'] = [{ 'entry': "taskmanager.memory.directAllocation: true" }] flink_custom = Flink(flink_config_custom) systems = [hadoop, flink] benchmarks = [ Benchmark(id="WordCount-heap", systems=[flink], experiment=WordCount(), times=1), Benchmark(id="WordCount-offheap",