def parse_line(line: str, tag_lst: list): data_len = len(tag_lst) if line is None: time_lst = [None for _ in range(data_len)] else: lst = line.split()[get_log_header_col():] # eval all between the ':' and 's' my_str = ''.join(lst) colon_idx_lst = [] unit_idx_lst2 = [] for idx, ch in enumerate(my_str): if ch == ':': colon_idx_lst.append(idx) if ch == 's': # last ch is numeric if idx - 1 > 0 and my_str[idx - 1] in [str(i) for i in range(10)]: unit_idx_lst2.append(idx) # assert len(colon_idx_lst) == len(unit_idx_lst2) == data_len if len(colon_idx_lst) == len(unit_idx_lst2) == data_len: time_lst = [ eval(my_str[beg + 1:end]) for beg, end in zip(colon_idx_lst, unit_idx_lst2) ] else: logger = exec_utils.get_logger('/home/yche/logs/' + 'error.log', __name__) logger.info('err:' + my_str) time_lst = [None for _ in range(data_len)] return dict( zip(tag_lst, map(lambda ele: None if ele is None else float(ele), time_lst)))
if __name__ == '__main__': base_dir = '/home/yche/' # base_dir = '/Users/cheyulin/' os.system('mkdir -p {}logs/'.format(base_dir)) my_res_log_file_folder = config_lst[0] my_gpu_lst = config_lst[2] for hostname in my_gpu_lst: app_md_path = init_folder_md_json_file('..', hostname, user_output_md_file) for my_md_algorithm_name in config_lst[1]: json_file_path = my_res_log_file_folder + '-' + my_md_algorithm_name + '.json' json_file_path = os.sep.join( ['../data-json/', hostname, json_file_path]) log_path = my_res_log_file_folder + '-' + my_md_algorithm_name + '.log' logger = exec_utils.get_logger( '{}logs/'.format(base_dir) + log_path, __name__) with open(app_md_path, 'a+') as output_md_file: # Dataset -> Thread Num -> Detailed Time Info config_dict = get_config_dict_via_hostname(hostname) root_dir = os.sep.join([ config_dict[exp_res_root_mount_path_tag], my_res_log_file_folder, hostname, ]) dataset_lst = load_data_sets() reorder_tag = 'org' t_lst = list(map(str, config_dict[thread_num_lst_tag])) # Fetch data and parse it as a markdown file fetch_statistics(root_dir=root_dir,
from data_analysis.figures_icde19.play.common import * def parse_ept(): mount_dir = '{}/workspace/yche/git-repos/OutOfCoreSCAN/python_experiments/exp_results/exp-2019-10-06-parameters' \ '/ustgpu2/{}/org/40/{}/'. \ format('/home/yche/mnt/ustgpu2', 'webgraph_eu', 'dstc-tp') data = dict() for ept in [1.25 * (10**8) * (2**i) for i in range(9)]: file_path = '{}/pkt-varying-{}.log'.format(mount_dir, ept) lines = get_file_lines(file_path) line = '\n'.join(lines) content = (ret_all('TC-levels: .*, Time: .*]', line)[0]) tc_dict = eval('{' + content.replace( 'TC-levels', "'TC-levels'").replace('Time', "'Time'") + '}') time_dict = parse_lines(lines, local_tag_lst, total_time_tag, total_tag_lst) union(tc_dict, time_dict) data[ept] = tc_dict print(tc_dict) with open('{}/ept.json'.format(varying_parameter_json_dir), 'w') as ofs: ofs.write(json.dumps(data, indent=4)) if __name__ == '__main__': base_dir = '/home/yche/' os.system('mkdir -p {}logs/'.format(base_dir)) logger = get_logger('/home/yche/tmp.log', __name__) parse_ept()
import os from exec_utilities.exec_utils import get_logger if __name__ == '__main__': files = [ # 'run_k_truss_reorderd_graph_PP_SI.py', # 'run_k_truss_reorderd_graph_nvprof.py', # 'run_k_truss_reorderd_graph_cuda.py', # 'run_k_truss_reorderd_graph.py', 'run_k_truss_performance.py', 'run_necleus_decomposition.py', ] work_dir = '/home/zlai/workspace/yche/git-repos/' \ 'OutOfCoreSCAN/python_experiments/exp_results' os.system('mkdir -p {}'.format(work_dir)) hostname = 'ustgpu2' logger = get_logger(os.sep.join([work_dir, hostname + '.log']), name=__name__) for i in range(3): for f in files: cmd = 'python /home/zlai/workspace/yche/git-repos/' \ 'OutOfCoreSCAN/python_experiments/run_experiments/{}'.format(f) logger.info(cmd) os.system(cmd)
import os if __name__ == '__main__': data_set_lst = [ # "snap_livejournal", # "snap_orkut", # "webgraph_eu", # "webgraph_uk", # "webgraph_webbase", # "webgraph_it", # "webgraph_twitter", # "snap_friendster", # "rmat_v50m_e0.5g", # "rmat_v5m_e0.5g", # "rmat_v0.5m_e0.5g", "s22-16", "s23-16", "s24-16", "s25-16", "s26-16", "s27-16", "s28-16", "s29-16" ] # folder_root = '/home/yche/mnt/luocpu9/mnt/storage1/yche/datasets' folder_root = '/home/yche/mnt/gpu24/mnt/nvme-ssd/yche/datasets' logger = exec_utils.get_logger("/home/yche/log.log", name=__name__) for data_set in data_set_lst: cmd = 'md5sum ' + os.sep.join([folder_root, data_set, 'ktruss-pkt-inter-shrink.histogram']) logger.info(time_out_util.run_with_timeout(cmd, timeout_sec=10))
if __name__ == '__main__': os.system('mkdir -p /home/yche/logs/') my_res_log_file_folder = config_lst[0] my_gpu_lst = config_lst[2] dataset_lst = load_data_sets() t_num = '60' for hostname in my_gpu_lst: app_md_path = init_folder_md_json_file('..', hostname, user_output_md_file) app_md_simple_path = init_folder_md_json_file('..', hostname, user_output_md_file_simple) for my_md_algorithm_name in config_lst[1]: json_file_path = my_res_log_file_folder + '-' + my_md_algorithm_name + '.json' json_file_path = os.sep.join(['../data-json/', hostname, json_file_path]) log_path = my_res_log_file_folder + '-' + my_md_algorithm_name + '.log' logger = exec_utils.get_logger('/home/yche/logs/' + log_path, __name__) with open(app_md_path, 'a+') as output_md_file: with open(app_md_simple_path, 'a+') as output_md_file_simple: # Dataset -> Thread Num -> Detailed Time Info config_dict = get_config_dict_via_hostname(hostname) root_dir = os.sep.join( [config_dict[exp_res_root_mount_path_tag], my_res_log_file_folder, hostname, ]) # dataset_lst = config_dict[data_set_lst_tag] reorder_tag = 'org' # t_lst = list(map(str, config_dict[thread_num_lst_tag])) t_lst = list(map(str, config_dict[thread_num_lst_tag])) # Fetch data and parse it as a markdown file fetch_statistics(root_dir=root_dir, dataset_lst=dataset_lst, reorder_tag=reorder_tag, t_lst=t_lst, algorithm=my_md_algorithm_name, json_file_path=json_file_path)
return eval(''.join(ifs.readlines())) # data set abbreviation dictionary data_names = get_name_dict() # figure parameters FIG_SIZE_MULTIPLE = (32, 6) LABEL_SIZE = 22 TICK_SIZE = 22 LEGEND_SIZE = 22 # get the data for figures index_info_dict = get_index_dict_with_reads('../../data_analysis') logger = get_logger( '{}/tkde_indexing_time_drawing_log.log'.format(yche_logger_root_dir), __name__) logger.info(index_info_dict) digg_data_tag = 'digg-friends' flickr_data_tag = 'flickr-growth' def get_tsf_index_disk_size(v_num): sample_one_way_graph_num = 100 return float( format_str(sample_one_way_graph_num * size_of_int * v_num / (1024.**2))) other_indexing_dict = { local_push_tag: {
for t_num in t_lst: file_path = os.sep.join( [root_dir, dataset, reorder_tag, t_num, algorithm + '.log']) logger.info(file_path) lines = get_file_lines(file_path) eid_time_lst = get_tc_time_lst('\n'.join(lines)) my_dict[dataset][t_num] = min(eid_time_lst) with open(json_file_path, 'w') as ofs: ofs.write(json.dumps(my_dict, indent=4)) if __name__ == '__main__': base_dir = '/home/yche/' os.system('mkdir -p {}'.format(si_dir)) logger = exec_utils.get_logger( '{}logs/'.format(base_dir) + 'ustgpu2-si.log', __name__) root_dir_template = '{}mnt/ustgpu2/workspace/yche/git-repos/' \ 'OutOfCoreSCAN/python_experiments/exp_results/{}/ustgpu2' data_set_lst = ['webgraph_eu', 'webgraph_it', 'webgraph_twitter'] dtc_wp_root_dir = 'exp-2019-10-05-eid' baseline_root_dir = 'exp-2019-10-04-eid' for name in ['pkt-eval-tc-wp']: fetch_statistics(root_dir_template.format(base_dir, dtc_wp_root_dir), data_set_lst, 'org', [str(i) for i in [1, 2, 4, 8, 16, 32, 40]], name, '{}/{}'.format(si_dir, name + '.json')) for name in ['pkt-eval-tc-dtc']: fetch_statistics(root_dir_template.format(base_dir, dtc_wp_root_dir), data_set_lst, 'org', [str(i) for i in [40]], name, '{}/{}'.format(si_dir, name + '.json'))
from data_analysis.util.read_file_utils_updated import * from config import * from exec_utilities import exec_utils iter_tag = 'iter' time_tag = 'time' left_tag = 'left' total_tag = 'total' if __name__ == '__main__': logger = exec_utils.get_logger('/home/yche/analyzing_peel.log', __name__) config_dict = get_config_dict(gpu23_tag, '../..') root_dir = os.sep.join( [config_dict[exp_res_root_mount_path_tag], 'exp-2019-06-24-k-truss-refactor', gpu23_tag, ]) dataset_lst = config_dict[data_set_lst_tag] def parse_line(line): lst = line.split()[get_log_header_col():] triple = [literal.replace('s', '').replace(',', '') for literal in [lst[-6], lst[-3], lst[-1]]] left = triple[2].split('/')[0] right = triple[2].split('/')[1] return dict( zip([iter_tag, time_tag, left_tag, total_tag], [int(triple[0]), float(triple[1]), int(left), int(right)], )) for dataset in dataset_lst:
my_dict = dict() for dataset in dataset_lst: my_dict[dataset] = dict() for t_num in t_lst: file_path = os.sep.join( [root_dir, dataset, reorder_tag, t_num, algorithm + '.log']) logger.info(file_path) lines = get_file_lines(file_path) functor = max if algorithm == 'pkt-eid' else min eid_time_lst = get_eid_time_lst('\n'.join(lines)) my_dict[dataset][t_num] = functor(eid_time_lst) with open(json_file_path, 'w') as ofs: ofs.write(json.dumps(my_dict, indent=4)) if __name__ == '__main__': base_dir = '/home/yche/' os.system('mkdir -p {}'.format(gpu23_pp_dir)) logger = exec_utils.get_logger('{}logs/'.format(base_dir) + 'gpu23-pp.log', __name__) my_res_log_file_folder = 'exp-2019-10-07-eid' root_dir = '{}mnt/luocpu9/mnt/storage1/yche/git-repos/' \ 'OutOfCoreSCAN/python_experiments/exp_results/{}/gpu23'.format(base_dir, my_res_log_file_folder) for name in ['pkt-eid', 'pkt-eid-parallel']: fetch_statistics(root_dir, ['webgraph_eu', 'webgraph_it', 'webgraph_twitter'], 'org', [str(i) for i in [1, 2, 4, 8, 16, 32, 40]], name, '{}/{}'.format(gpu23_pp_dir, name + '.json'))