# generate data T, M_c, M_r, X_L, X_D = generate_clean_state(gen_seed, num_clusters, num_cols, num_rows, num_splits, max_mean=10, max_std=1) # write table_data table_data = dict(M_c=M_c, M_r=M_r, T=T) fu.pickle(table_data, table_data_filename) # write hadoop input n_tasks = write_hadoop_input(input_filename, X_L, X_D, n_steps, SEED=gen_seed) # actually run if do_local: xu.run_script_local(input_filename, script_filename, output_filename, table_data_filename) elif do_remote: hadoop_engine = HE.HadoopEngine(output_path=output_path, input_filename=input_filename, table_data_filename=table_data_filename, ) hadoop_engine.send_hadoop_command(n_tasks) was_successful = hadoop_engine.get_hadoop_results() if was_successful: hu.copy_hadoop_output(output_path, output_filename) else: print('remote hadoop job NOT successful') else: hadoop_engine = HE.HadoopEngine() # print what the command would be print(HE.create_hadoop_cmd_str(hadoop_engine, n_tasks=n_tasks))
table_data_filename = hs.default_table_data_filename initialize_input_filename = 'initialize_input' initialize_output_filename = 'initialize_output' initialize_args_dict = hs.default_initialize_args_dict analyze_input_filename = 'analyze_input' analyze_output_filename = 'analyze_output' analyze_args_dict = hs.default_analyze_args_dict # set up table_data = xu.read_and_pickle_table_data(filename, table_data_filename) # create initialize input xu.write_initialization_files(initialize_input_filename, initialize_args_dict=initialize_args_dict, n_chains=n_chains) # initialize xu.run_script_local(initialize_input_filename, script_name, initialize_output_filename) # read initialization output, write analyze input analyze_args_dict['n_steps'] = n_steps analyze_args_dict['max_time'] = 20 xu.link_initialize_to_analyze(initialize_output_filename, analyze_input_filename, analyze_args_dict) # analyze xu.run_script_local(analyze_input_filename, script_name, analyze_output_filename)
# table_data_filename = hs.default_table_data_filename initialize_input_filename = 'initialize_input' initialize_output_filename = 'initialize_output' initialize_args_dict = hs.default_initialize_args_dict analyze_input_filename = 'analyze_input' analyze_output_filename = 'analyze_output' analyze_args_dict = hs.default_analyze_args_dict # set up table_data = xu.read_and_pickle_table_data(filename, table_data_filename) # create initialize input xu.write_initialization_files(initialize_input_filename, initialize_args_dict=initialize_args_dict, n_chains=n_chains) # initialize xu.run_script_local(initialize_input_filename, script_name, initialize_output_filename) # read initialization output, write analyze input analyze_args_dict['n_steps'] = n_steps analyze_args_dict['max_time'] = 20 xu.link_initialize_to_analyze(initialize_output_filename, analyze_input_filename, analyze_args_dict) # analyze xu.run_script_local(analyze_input_filename, script_name, analyze_output_filename)
num_cols=num_cols, num_views=num_splits, num_clusters=num_clusters) write_hadoop_input(input_filename, timing_run_parameters, n_steps, SEED=gen_seed) n_tasks = len(num_rows_list) * len(num_cols_list) * len( num_clusters_list) * len(num_splits_list) * 5 # Create a dummy table data file table_data = dict(T=[], M_c=[], X_L=[], X_D=[]) fu.pickle(table_data, table_data_filename) if do_local: xu.run_script_local(input_filename, script_filename, output_filename, table_data_filename) print( 'Local Engine for automated timing runs has not been completely implemented/tested' ) elif do_remote: hadoop_engine = HE.HadoopEngine( which_engine_binary=which_engine_binary, output_path=output_path, input_filename=input_filename, table_data_filename=table_data_filename) xu.write_support_files(table_data, hadoop_engine.table_data_filename, dict(command='time_analyze'), hadoop_engine.command_dict_filename) hadoop_engine.send_hadoop_command(n_tasks=n_tasks) was_successful = hadoop_engine.get_hadoop_results() if was_successful: