def get_CrossCatClient(client_type, **kwargs): """Helper which instantiates the appropriate Engine and returns a Client """ client = None if client_type == 'local': import crosscat.LocalEngine as LocalEngine le = LocalEngine.LocalEngine(**kwargs) client = CrossCatClient(le) elif client_type == 'hadoop': import crosscat.HadoopEngine as HadoopEngine he = HadoopEngine.HadoopEngine(**kwargs) client = CrossCatClient(he) elif client_type == 'jsonrpc': import crosscat.JSONRPCEngine as JSONRPCEngine je = JSONRPCEngine.JSONRPCEngine(**kwargs) client = CrossCatClient(je) elif client_type == 'multiprocessing': import crosscat.MultiprocessingEngine as MultiprocessingEngine me = MultiprocessingEngine.MultiprocessingEngine(**kwargs) client = CrossCatClient(me) else: raise Exception('unknown client_type: %s' % client_type) return client
T, M_c, M_r, X_L, X_D = generate_clean_state(gen_seed, num_clusters, num_cols, num_rows, num_splits, max_mean=10, max_std=1) # write table_data table_data = dict(M_c=M_c, M_r=M_r, T=T) fu.pickle(table_data, table_data_filename) # write hadoop input n_tasks = write_hadoop_input(input_filename, X_L, X_D, n_steps, SEED=gen_seed) # actually run if do_local: xu.run_script_local(input_filename, script_filename, output_filename, table_data_filename) elif do_remote: hadoop_engine = HE.HadoopEngine(output_path=output_path, input_filename=input_filename, table_data_filename=table_data_filename, ) hadoop_engine.send_hadoop_command(n_tasks) was_successful = hadoop_engine.get_hadoop_results() if was_successful: hu.copy_hadoop_output(output_path, output_filename) else: print('remote hadoop job NOT successful') else: hadoop_engine = HE.HadoopEngine() # print what the command would be print(HE.create_hadoop_cmd_str(hadoop_engine, n_tasks=n_tasks))
for line in infile: key, test_dict = xu.parse_hadoop_line(line) ret_dict = run_mi_test_local.run_mi_test_local(test_dict) xu.write_hadoop_line(output_file_object, key, ret_dict) print "%s\n\t%s" % (str(test_dict), str(ret_dict)) output_file_object.close() # generate the csv parse_mi.parse_data_to_csv(input_filename, params_dict, test_idx, output_filename) print "Done." elif do_remote: # generate the massive hadoop files hadoop_engine = HE.HadoopEngine(output_path=output_path, input_filename=input_filename, table_data_filename=table_data_filename, which_engine_binary=which_engine_binary, hdfs_uri=hdfs_uri, jobtracker_uri=jobtracker_uri, ) xu.write_support_files(table_data, hadoop_engine.table_data_filename, dict(command='mi_analyze'), hadoop_engine.command_dict_filename) t_start = time.time() hadoop_engine.send_hadoop_command(n_tasks=len(testlist)) was_successful = hadoop_engine.get_hadoop_results() if was_successful: t_end = time.time() t_total = t_end-t_start print "That took %i seconds." % t_total hu.copy_hadoop_output(hadoop_engine.output_path, output_filename) parse_mi.parse_data_to_csv(input_filename, params_dict, test_idx, output_filename)
n_tasks = len(num_rows_list) * len(num_cols_list) * len( num_clusters_list) * len(num_splits_list) * 5 # Create a dummy table data file table_data = dict(T=[], M_c=[], X_L=[], X_D=[]) fu.pickle(table_data, table_data_filename) if do_local: xu.run_script_local(input_filename, script_filename, output_filename, table_data_filename) print( 'Local Engine for automated timing runs has not been completely implemented/tested' ) elif do_remote: hadoop_engine = HE.HadoopEngine( which_engine_binary=which_engine_binary, output_path=output_path, input_filename=input_filename, table_data_filename=table_data_filename) xu.write_support_files(table_data, hadoop_engine.table_data_filename, dict(command='time_analyze'), hadoop_engine.command_dict_filename) hadoop_engine.send_hadoop_command(n_tasks=n_tasks) was_successful = hadoop_engine.get_hadoop_results() if was_successful: hu.copy_hadoop_output(hadoop_engine.output_path, output_filename) parse_timing.parse_timing_to_csv(output_filename, outfile=parsed_out_file) coeff_list = find_regression_coeff(parsed_out_file, parameter_list) else: print('remote hadoop job NOT successful')
# write table_data table_data = dict(M_c=M_c, M_r=M_r, T=T) fu.pickle(table_data, table_data_filename) # write hadoop input n_tasks = write_hadoop_input(input_filename, X_L, X_D, n_steps, SEED=gen_seed) # actually run if do_local: xu.run_script_local(input_filename, script_filename, output_filename, table_data_filename) elif do_remote: hadoop_engine = HE.HadoopEngine( output_path=output_path, input_filename=input_filename, table_data_filename=table_data_filename, ) hadoop_engine.send_hadoop_command(n_tasks) was_successful = hadoop_engine.get_hadoop_results() if was_successful: hu.copy_hadoop_output(output_path, output_filename) else: print('remote hadoop job NOT successful') else: hadoop_engine = HE.HadoopEngine() # print what the command would be print(HE.create_hadoop_cmd_str(hadoop_engine, n_tasks=n_tasks))