def parse_timing_to_csv(filename, outfile='parsed_timing.csv'): #drive, path = os.path.splitdrive(filename) #outpath, file_nameonly = os.path.split(path) with open(filename) as fh: lines = [] for line in fh: lines.append(xu.parse_hadoop_line(line)) header = [ 'num_rows', 'num_cols', 'num_clusters', 'num_views', 'time_per_step', 'which_kernel' ] reduced_lines = map(lambda x: x[1], lines) with open(outfile, 'w') as csvfile: csvwriter = csv.writer(csvfile, delimiter=',') csvwriter.writerow(header) for reduced_line in reduced_lines: try: parsed_line = parse_reduced_line(reduced_line) csvwriter.writerow(parsed_line) except Exception as e: pass
def process_line(line, table_data): key, dict_in = xu.parse_hadoop_line(line) if dict_in is None: return None, None command = dict_in['command'] method = method_lookup[command] ret_dict = method(table_data, dict_in) return key, ret_dict
def parse_to_csv(in_filename, out_filename='parsed_convergence.csv'): variable_names_to_extract = ['num_rows', 'num_cols', 'num_clusters', 'num_views', 'max_mean', 'n_steps', 'block_size','column_ari_list', 'generative_mean_test_log_likelihood','mean_test_ll_list', 'elapsed_seconds_list'] header = ['experiment'] + variable_names_to_extract with open(in_filename) as in_fh: with open(out_filename,'w') as out_fh: csvwriter = csv.writer(out_fh) csvwriter.writerow(header) for line in in_fh: try: parsed_line = xu.parse_hadoop_line(line) output_row = parsed_line_to_output_row(parsed_line, variable_names_to_extract=variable_names_to_extract) csvwriter.writerow(output_row) except Exception as e: sys.stderr.write(line + '\n' + str(e) + '\n')
def parse_timing_to_csv(filename, outfile='parsed_timing.csv'): #drive, path = os.path.splitdrive(filename) #outpath, file_nameonly = os.path.split(path) with open(filename) as fh: lines = [] for line in fh: lines.append(xu.parse_hadoop_line(line)) header = ['num_rows', 'num_cols', 'num_clusters', 'num_views', 'time_per_step', 'which_kernel'] reduced_lines = map(lambda x: x[1], lines) with open(outfile,'w') as csvfile: csvwriter = csv.writer(csvfile,delimiter=',') csvwriter.writerow(header) for reduced_line in reduced_lines: try: parsed_line = parse_reduced_line(reduced_line) csvwriter.writerow(parsed_line) except Exception as e: pass
def read_hadoop_output_file(hadoop_output_filename): with open(hadoop_output_filename) as fh: ret_dict = dict([xu.parse_hadoop_line(line) for line in fh]) return ret_dict
def hadoop_to_dict_generator(test_key_file_object): # return read cursor to the start (or this generator cannot be called again) test_key_file_object.seek(0) for line in test_key_file_object: dict_line = xu.parse_hadoop_line(line) yield dict_line
for reduced_line in reduced_lines: try: parsed_line = parse_reduced_line(reduced_line) csvwriter.writerow(parsed_line) except Exception as e: pass if __name__ == '__main__': import argparse parser = argparse.ArgumentParser() parser.add_argument('filename', type=str) args = parser.parse_args() filename = args.filename with open(filename) as fh: lines = [] for line in fh: lines.append(xu.parse_hadoop_line(line)) header = 'num_rows,num_cols,num_clusters,num_views,time_per_step,which_kernel' print(header) reduced_lines = map(lambda x: x[1], lines) for reduced_line in reduced_lines: try: parsed_line = parse_reduced_line(reduced_line) print(','.join(map(str, parsed_line))) except Exception as e: pass
csvwriter.writerow(header) for reduced_line in reduced_lines: try: parsed_line = parse_reduced_line(reduced_line) csvwriter.writerow(parsed_line) except Exception as e: pass if __name__ == '__main__': import argparse parser = argparse.ArgumentParser() parser.add_argument('filename', type=str) args = parser.parse_args() filename = args.filename with open(filename) as fh: lines = [] for line in fh: lines.append(xu.parse_hadoop_line(line)) header = 'num_rows,num_cols,num_clusters,num_views,time_per_step,which_kernel' print(header) reduced_lines = map(lambda x: x[1], lines) for reduced_line in reduced_lines: try: parsed_line = parse_reduced_line(reduced_line) print(','.join(map(str, parsed_line))) except Exception as e: pass
testlist.append(impute_run_parameters) test_idx += 1 print "Done." # table data is empty because we generate it in the mapper table_data=dict(T=[],M_c=[],X_L=[],X_D=[]) fu.pickle(table_data, table_data_filename) ##################### if do_local: output_filename = os.path.join(directory_path, "output_local") output_file_object = open(output_filename, 'ab') with open(input_filename,'rb') as infile: for line in infile: key, test_dict = xu.parse_hadoop_line(line) ret_dict = run_mi_test_local.run_mi_test_local(test_dict) xu.write_hadoop_line(output_file_object, key, ret_dict) print "%s\n\t%s" % (str(test_dict), str(ret_dict)) output_file_object.close() # generate the csv parse_mi.parse_data_to_csv(input_filename, params_dict, test_idx, output_filename) print "Done." elif do_remote: # generate the massive hadoop files hadoop_engine = HE.HadoopEngine(output_path=output_path, input_filename=input_filename, table_data_filename=table_data_filename, which_engine_binary=which_engine_binary, hdfs_uri=hdfs_uri,
analyze=analyze_helper, time_analyze=time_analyze_helper, convergence_analyze=convergence_analyze_helper, chunk_analyze=chunk_analyze_helper, mi_analyze=mi_analyze_helper) if __name__ == '__main__': import argparse parser = argparse.ArgumentParser() parser.add_argument('--table_data_filename', type=str, default=hs.default_table_data_filename) parser.add_argument('--command_dict_filename', type=str, default=hs.default_command_dict_filename) args = parser.parse_args() table_data_filename = args.table_data_filename command_dict_filename = args.command_dict_filename table_data = fu.unpickle(table_data_filename) command_dict = fu.unpickle(command_dict_filename) command = command_dict['command'] method = method_lookup[command] # from signal import signal, SIGPIPE, SIG_DFL signal(SIGPIPE, SIG_DFL) for line in sys.stdin: key, data_dict = xu.parse_hadoop_line(line) ret_dict = method(table_data, data_dict, command_dict) xu.write_hadoop_line(sys.stdout, key, ret_dict)
time_analyze=time_analyze_helper, convergence_analyze=convergence_analyze_helper, chunk_analyze=chunk_analyze_helper, mi_analyze=mi_analyze_helper ) if __name__ == '__main__': import argparse parser = argparse.ArgumentParser() parser.add_argument('--table_data_filename', type=str, default=hs.default_table_data_filename) parser.add_argument('--command_dict_filename', type=str, default=hs.default_command_dict_filename) args = parser.parse_args() table_data_filename = args.table_data_filename command_dict_filename = args.command_dict_filename table_data = fu.unpickle(table_data_filename) command_dict = fu.unpickle(command_dict_filename) command = command_dict['command'] method = method_lookup[command] # from signal import signal, SIGPIPE, SIG_DFL signal(SIGPIPE,SIG_DFL) for line in sys.stdin: key, data_dict = xu.parse_hadoop_line(line) ret_dict = method(table_data, data_dict, command_dict) xu.write_hadoop_line(sys.stdout, key, ret_dict)