def write_hadoop_input(input_filename, timing_run_parameters, n_steps, SEED): # prep settings dictionary time_analyze_args_dict = xu.default_analyze_args_dict time_analyze_args_dict['command'] = 'time_analyze' time_analyze_args_dict['SEED'] = SEED time_analyze_args_dict['n_steps'] = n_steps # one kernel per line all_kernels = State.transition_name_to_method_name_and_args.keys() with open(input_filename, 'a') as out_fh: dict_generator = generate_hadoop_dicts(all_kernels,timing_run_parameters, time_analyze_args_dict) for dict_to_write in dict_generator: xu.write_hadoop_line(out_fh, key=dict_to_write['SEED'], dict_to_write=dict_to_write)
def write_hadoop_input(input_filename, X_L, X_D, n_steps, SEED): # prep settings dictionary time_analyze_args_dict = hs.default_analyze_args_dict time_analyze_args_dict['command'] = 'time_analyze' time_analyze_args_dict['SEED'] = SEED time_analyze_args_dict['n_steps'] = n_steps # one kernel per line all_kernels = State.transition_name_to_method_name_and_args.keys() n_tasks = 0 with open(input_filename, 'w') as out_fh: dict_generator = generate_hadoop_dicts(all_kernels, X_L, X_D, time_analyze_args_dict) for dict_to_write in dict_generator: xu.write_hadoop_line(out_fh, key=dict_to_write['SEED'], dict_to_write=dict_to_write) n_tasks += 1 return n_tasks
def write_hadoop_input(input_filename, convergence_run_parameters, n_steps, block_size, SEED): # prep settings dictionary convergence_analyze_args_dict = xu.default_analyze_args_dict convergence_analyze_args_dict['command'] = 'convergence_analyze' convergence_analyze_args_dict['SEED'] = SEED convergence_analyze_args_dict['n_steps'] = n_steps convergence_analyze_args_dict['block_size'] = block_size # n_tasks = 0 with open(input_filename, 'a') as out_fh: dict_generator = generate_hadoop_dicts(convergence_run_parameters, convergence_analyze_args_dict) for dict_to_write in dict_generator: xu.write_hadoop_line(out_fh, key=dict_to_write['SEED'], dict_to_write=dict_to_write) n_tasks += 1 return n_tasks
def write_hadoop_input(input_filename, X_L, X_D, n_steps, SEED): # prep settings dictionary time_analyze_args_dict = xu.default_analyze_args_dict time_analyze_args_dict['command'] = 'time_analyze' time_analyze_args_dict['SEED'] = SEED time_analyze_args_dict['n_steps'] = n_steps # one kernel per line all_kernels = State.transition_name_to_method_name_and_args.keys() n_tasks = 0 with open(input_filename, 'w') as out_fh: dict_generator = generate_hadoop_dicts(all_kernels, X_L, X_D, time_analyze_args_dict) for dict_to_write in dict_generator: xu.write_hadoop_line(out_fh, key=dict_to_write['SEED'], dict_to_write=dict_to_write) n_tasks += 1 return n_tasks
def write_hadoop_input(input_filename, impute_run_parameters, SEED): # prep settings dictionary impute_analyze_args_dict = xu.default_analyze_args_dict impute_analyze_args_dict['command'] = 'impute_analyze' with open(input_filename, 'a') as out_fh: xu.write_hadoop_line(out_fh, key=SEED, dict_to_write=impute_run_parameters)
print "Done." # table data is empty because we generate it in the mapper table_data=dict(T=[],M_c=[],X_L=[],X_D=[]) fu.pickle(table_data, table_data_filename) ##################### if do_local: output_filename = os.path.join(directory_path, "output_local") output_file_object = open(output_filename, 'ab') with open(input_filename,'rb') as infile: for line in infile: key, test_dict = xu.parse_hadoop_line(line) ret_dict = run_mi_test_local.run_mi_test_local(test_dict) xu.write_hadoop_line(output_file_object, key, ret_dict) print "%s\n\t%s" % (str(test_dict), str(ret_dict)) output_file_object.close() # generate the csv parse_mi.parse_data_to_csv(input_filename, params_dict, test_idx, output_filename) print "Done." elif do_remote: # generate the massive hadoop files hadoop_engine = HE.HadoopEngine(output_path=output_path, input_filename=input_filename, table_data_filename=table_data_filename, which_engine_binary=which_engine_binary, hdfs_uri=hdfs_uri, jobtracker_uri=jobtracker_uri, )
analyze=analyze_helper, time_analyze=time_analyze_helper, convergence_analyze=convergence_analyze_helper, chunk_analyze=chunk_analyze_helper, mi_analyze=mi_analyze_helper) if __name__ == '__main__': import argparse parser = argparse.ArgumentParser() parser.add_argument('--table_data_filename', type=str, default=hs.default_table_data_filename) parser.add_argument('--command_dict_filename', type=str, default=hs.default_command_dict_filename) args = parser.parse_args() table_data_filename = args.table_data_filename command_dict_filename = args.command_dict_filename table_data = fu.unpickle(table_data_filename) command_dict = fu.unpickle(command_dict_filename) command = command_dict['command'] method = method_lookup[command] # from signal import signal, SIGPIPE, SIG_DFL signal(SIGPIPE, SIG_DFL) for line in sys.stdin: key, data_dict = xu.parse_hadoop_line(line) ret_dict = method(table_data, data_dict, command_dict) xu.write_hadoop_line(sys.stdout, key, ret_dict)
time_analyze=time_analyze_helper, convergence_analyze=convergence_analyze_helper, chunk_analyze=chunk_analyze_helper, mi_analyze=mi_analyze_helper ) if __name__ == '__main__': import argparse parser = argparse.ArgumentParser() parser.add_argument('--table_data_filename', type=str, default=hs.default_table_data_filename) parser.add_argument('--command_dict_filename', type=str, default=hs.default_command_dict_filename) args = parser.parse_args() table_data_filename = args.table_data_filename command_dict_filename = args.command_dict_filename table_data = fu.unpickle(table_data_filename) command_dict = fu.unpickle(command_dict_filename) command = command_dict['command'] method = method_lookup[command] # from signal import signal, SIGPIPE, SIG_DFL signal(SIGPIPE,SIG_DFL) for line in sys.stdin: key, data_dict = xu.parse_hadoop_line(line) ret_dict = method(table_data, data_dict, command_dict) xu.write_hadoop_line(sys.stdout, key, ret_dict)