Exemple #1
0
def parse_timing_to_csv(filename, outfile='parsed_timing.csv'):
    #drive, path = os.path.splitdrive(filename)
    #outpath, file_nameonly = os.path.split(path)

    with open(filename) as fh:
        lines = []
        for line in fh:
            lines.append(xu.parse_hadoop_line(line))

    header = [
        'num_rows', 'num_cols', 'num_clusters', 'num_views', 'time_per_step',
        'which_kernel'
    ]

    reduced_lines = map(lambda x: x[1], lines)

    with open(outfile, 'w') as csvfile:
        csvwriter = csv.writer(csvfile, delimiter=',')
        csvwriter.writerow(header)
        for reduced_line in reduced_lines:
            try:
                parsed_line = parse_reduced_line(reduced_line)
                csvwriter.writerow(parsed_line)
            except Exception as e:
                pass
def process_line(line, table_data):
    key, dict_in = xu.parse_hadoop_line(line)
    if dict_in is None:
        return None, None
    command = dict_in['command']
    method = method_lookup[command]
    ret_dict = method(table_data, dict_in)
    return key, ret_dict
def process_line(line, table_data):
        key, dict_in = xu.parse_hadoop_line(line)
        if dict_in is None:
            return None, None
        command = dict_in['command']
        method = method_lookup[command]
        ret_dict = method(table_data, dict_in)
        return key, ret_dict
Exemple #4
0
def parse_to_csv(in_filename, out_filename='parsed_convergence.csv'):
    variable_names_to_extract = ['num_rows', 'num_cols', 'num_clusters', 'num_views',
            'max_mean', 'n_steps', 'block_size','column_ari_list',
            'generative_mean_test_log_likelihood','mean_test_ll_list',
            'elapsed_seconds_list']
    header = ['experiment'] + variable_names_to_extract
    with open(in_filename) as in_fh:
      with open(out_filename,'w') as out_fh:
        csvwriter = csv.writer(out_fh)
        csvwriter.writerow(header)
        for line in in_fh:
            try:
              parsed_line = xu.parse_hadoop_line(line)
              output_row = parsed_line_to_output_row(parsed_line,
                      variable_names_to_extract=variable_names_to_extract)
              csvwriter.writerow(output_row)
            except Exception as e:
              sys.stderr.write(line + '\n' + str(e) + '\n')
def parse_timing_to_csv(filename, outfile='parsed_timing.csv'):
   #drive, path = os.path.splitdrive(filename)
   #outpath, file_nameonly = os.path.split(path)

   with open(filename) as fh:
        lines = []
        for line in fh:
            lines.append(xu.parse_hadoop_line(line))

   header = ['num_rows', 'num_cols', 'num_clusters', 'num_views', 'time_per_step', 'which_kernel']
   
   reduced_lines = map(lambda x: x[1], lines)
      
   with open(outfile,'w') as csvfile:
	csvwriter = csv.writer(csvfile,delimiter=',')
	csvwriter.writerow(header)
    	for reduced_line in reduced_lines:
            try:
            	parsed_line = parse_reduced_line(reduced_line)
	    	csvwriter.writerow(parsed_line)
            except Exception as e:
                pass
Exemple #6
0
def read_hadoop_output_file(hadoop_output_filename):
    with open(hadoop_output_filename) as fh:
        ret_dict = dict([xu.parse_hadoop_line(line) for line in fh])
    return ret_dict
Exemple #7
0
def hadoop_to_dict_generator(test_key_file_object):
	# return read cursor to the start (or this generator cannot be called again)
	test_key_file_object.seek(0)
	for line in test_key_file_object:
		dict_line = xu.parse_hadoop_line(line)
		yield dict_line
    	for reduced_line in reduced_lines:
            try:
            	parsed_line = parse_reduced_line(reduced_line)
	    	csvwriter.writerow(parsed_line)
            except Exception as e:
                pass


if __name__ == '__main__':
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('filename', type=str)
    args = parser.parse_args()
    filename = args.filename

    with open(filename) as fh:
        lines = []
        for line in fh:
            lines.append(xu.parse_hadoop_line(line))

    header = 'num_rows,num_cols,num_clusters,num_views,time_per_step,which_kernel'
    print(header)
    reduced_lines = map(lambda x: x[1], lines)
    for reduced_line in reduced_lines:
        try:
            parsed_line = parse_reduced_line(reduced_line)
            print(','.join(map(str, parsed_line)))
        except Exception as e:
            pass

Exemple #9
0
        csvwriter.writerow(header)
        for reduced_line in reduced_lines:
            try:
                parsed_line = parse_reduced_line(reduced_line)
                csvwriter.writerow(parsed_line)
            except Exception as e:
                pass


if __name__ == '__main__':
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('filename', type=str)
    args = parser.parse_args()
    filename = args.filename

    with open(filename) as fh:
        lines = []
        for line in fh:
            lines.append(xu.parse_hadoop_line(line))

    header = 'num_rows,num_cols,num_clusters,num_views,time_per_step,which_kernel'
    print(header)
    reduced_lines = map(lambda x: x[1], lines)
    for reduced_line in reduced_lines:
        try:
            parsed_line = parse_reduced_line(reduced_line)
            print(','.join(map(str, parsed_line)))
        except Exception as e:
            pass
Exemple #10
0
						testlist.append(impute_run_parameters)
			test_idx += 1
	
	print "Done."
	
	# table data is empty because we generate it in the mapper
	table_data=dict(T=[],M_c=[],X_L=[],X_D=[])
	fu.pickle(table_data, table_data_filename)

	#####################
	if do_local:
		output_filename = os.path.join(directory_path, "output_local")
		output_file_object = open(output_filename, 'ab')
		with open(input_filename,'rb') as infile:
			for line in infile:
				key, test_dict = xu.parse_hadoop_line(line)
				ret_dict = run_mi_test_local.run_mi_test_local(test_dict)
				xu.write_hadoop_line(output_file_object, key, ret_dict)
				print "%s\n\t%s" % (str(test_dict), str(ret_dict))

		output_file_object.close()
		# generate the csv
		parse_mi.parse_data_to_csv(input_filename, params_dict, test_idx, output_filename)
		print "Done."
	elif do_remote:
		# generate the massive hadoop files
		hadoop_engine = HE.HadoopEngine(output_path=output_path,
                                    input_filename=input_filename,
                                    table_data_filename=table_data_filename,
                                    which_engine_binary=which_engine_binary,
                                    hdfs_uri=hdfs_uri,
Exemple #11
0
def hadoop_to_dict_generator(test_key_file_object):
    # return read cursor to the start (or this generator cannot be called again)
    test_key_file_object.seek(0)
    for line in test_key_file_object:
        dict_line = xu.parse_hadoop_line(line)
        yield dict_line
def read_hadoop_output_file(hadoop_output_filename):
    with open(hadoop_output_filename) as fh:
        ret_dict = dict([xu.parse_hadoop_line(line) for line in fh])
    return ret_dict
                     analyze=analyze_helper,
                     time_analyze=time_analyze_helper,
                     convergence_analyze=convergence_analyze_helper,
                     chunk_analyze=chunk_analyze_helper,
                     mi_analyze=mi_analyze_helper)

if __name__ == '__main__':
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('--table_data_filename',
                        type=str,
                        default=hs.default_table_data_filename)
    parser.add_argument('--command_dict_filename',
                        type=str,
                        default=hs.default_command_dict_filename)
    args = parser.parse_args()
    table_data_filename = args.table_data_filename
    command_dict_filename = args.command_dict_filename

    table_data = fu.unpickle(table_data_filename)
    command_dict = fu.unpickle(command_dict_filename)
    command = command_dict['command']
    method = method_lookup[command]
    #
    from signal import signal, SIGPIPE, SIG_DFL
    signal(SIGPIPE, SIG_DFL)
    for line in sys.stdin:
        key, data_dict = xu.parse_hadoop_line(line)
        ret_dict = method(table_data, data_dict, command_dict)
        xu.write_hadoop_line(sys.stdout, key, ret_dict)
    time_analyze=time_analyze_helper,
    convergence_analyze=convergence_analyze_helper,
    chunk_analyze=chunk_analyze_helper,
    mi_analyze=mi_analyze_helper
    )


if __name__ == '__main__':
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('--table_data_filename', type=str,
                        default=hs.default_table_data_filename)
    parser.add_argument('--command_dict_filename', type=str,
                        default=hs.default_command_dict_filename)
    args = parser.parse_args()
    table_data_filename = args.table_data_filename
    command_dict_filename = args.command_dict_filename
    
    
    table_data = fu.unpickle(table_data_filename)
    command_dict = fu.unpickle(command_dict_filename)
    command = command_dict['command']
    method = method_lookup[command]
    #
    from signal import signal, SIGPIPE, SIG_DFL 
    signal(SIGPIPE,SIG_DFL) 
    for line in sys.stdin:
        key, data_dict = xu.parse_hadoop_line(line)
        ret_dict = method(table_data, data_dict, command_dict)
        xu.write_hadoop_line(sys.stdout, key, ret_dict)