def one_round(): for data_set_name in data_set_lst: params_lst = map(str, [ exec_path, data_set_path + os.sep + data_set_name, statistics_file_path ]) cmd = ' '.join(params_lst) # print cmd time_out = 700 tle_flag, info, correct_info = time_out_util.run_with_timeout( cmd, timeout_sec=time_out) with open(statistics_file_path, 'a+') as ifs: ifs.write(correct_info) ifs.write('\nis_time_out:' + str(tle_flag)) ifs.write(my_splitter + time.ctime() + my_splitter) ifs.write('\n\n\n\n') params_lst = map(str, [ verify_path, data_set_path + os.sep + data_set_name, statistics_file_path ]) cmd = ' '.join(params_lst) # print 'verify', cmd tle_flag, info, correct_info = time_out_util.run_with_timeout( cmd, timeout_sec=time_out) with open(statistics_file_path, 'a+') as ifs: ifs.write(correct_info) ifs.write('\nis_time_out:' + str(tle_flag)) ifs.write(my_splitter + time.ctime() + my_splitter) ifs.write('\n\n\n\n')
def check_result(): tmp_lst = [] gt_res_file_name = '-'.join( map(str, ['result', eps, mu])) + '.txt' res_file_path = os.sep.join([ data_set_path, data_set_name, 'scanxp-' + gt_res_file_name ]) tmp_tle_flag, tmp_info, tmp_correct_info = time_out_util.run_with_timeout( ' '.join(['md5sum', res_file_path]), timeout_sec=time_out) tmp_lst.append(tmp_info + tmp_correct_info) tmp_tle_flag, tmp_info, tmp_correct_info = time_out_util.run_with_timeout( ' '.join([ 'md5sum', os.sep.join([ data_set_path, data_set_name, gt_res_file_name ]) ]), timeout_sec=time_out) tmp_lst.append(tmp_info + tmp_correct_info) if len(tmp_lst[0].split()) > 0 and len(tmp_lst[1].split()) > 0 and \ tmp_lst[0].split()[0] == tmp_lst[1].split()[0]: return True else: return 'False\n' + '\n'.join( map(str, tmp_lst))
def one_round(): statistics_dir = os.sep.join( map(str, ['.', exp_res_root_name, folder_name])) os.system('mkdir -p ' + statistics_dir) statistics_file_path = statistics_dir + os.sep + folder_name + '.txt' logger.info(statistics_file_path) thread_num = my_config_dict[thread_num_tag] for n, m, dataset_name in n_m_lst: # 1st: write header os.system(' '.join([ 'echo', my_splitter + time.ctime() + my_splitter, '>>', statistics_file_path ])) # 2nd: run rmat cmd rmat_path = our_exec_path + os.sep + rmat_exec_name params_lst = map(str, [ rmat_path, '-nVertices', n, '-nEdges', m, '-noEdgeToSelf', '-noDuplicateEdges', '-undirected', '-memUsage', 0.8, '-threads', thread_num, '-outbin', data_set_path + os.sep + dataset_name ] + ['>>&', statistics_file_path]) os.system('mkdir -p ' + data_set_path + os.sep + dataset_name) cmd = ' '.join(params_lst) logger.info('cmd: ', cmd) time_out = 7200 my_env = os.environ.copy() my_env['OMP_NUM_THREADS'] = str(thread_num) tle_flag, info, correct_info = time_out_util.run_with_timeout( cmd, timeout_sec=time_out, env=my_env) # 3rd: append outputs write_split(statistics_file_path) with open(statistics_file_path, 'a+') as ifs: ifs.write(correct_info) ifs.write('\nis_time_out:' + str(tle_flag)) ifs.write(my_splitter + time.ctime() + my_splitter) ifs.write('\n\n\n\n') logger.info('finish:', cmd) # 4th: run reordering cmd converter_path = reorder_path + os.sep + reorder_exec_name params_lst = map(str, [ converter_path, data_set_path + os.sep + dataset_name, statistics_file_path ]) cmd = ' '.join(params_lst) logger.info('cmd: ', cmd) tle_flag, info, correct_info = time_out_util.run_with_timeout( cmd, timeout_sec=time_out, env=my_env) write_split(statistics_file_path) with open(statistics_file_path, 'a+') as ifs: ifs.write(correct_info) ifs.write('\nis_time_out:' + str(tle_flag)) ifs.write(my_splitter + time.ctime() + my_splitter) ifs.write('\n\n\n\n') logger.info('finish:', cmd)
def one_round(): for data_set_name in data_set_lst: # assume already have all the edge list binaries reorder_dict = dict( zip(['gpu15', 'gpu16', 'gpu17', 'gpu21', 'gpu22'], ['hybrid', 'slashburn', 'bfsr', 'dfs', 'gro'])) order_tag = reorder_dict[hostname] # for order_tag in [ # 'hybrid', # 'slashburn', # 'bfsr', # 'dfs', # 'gro' # ]: time_out = 7200000 if order_tag is 'gro' else 3600000 statistics_file_path = root_path + 'han-' + order_tag + '.log' params_lst = map(str, [ han_exec_path, os.sep.join([data_set_path, data_set_name]), '-order', order_tag ]) cmd = ' '.join(params_lst) # print cmd tle_flag, info, correct_info = time_out_util.run_with_timeout( cmd, timeout_sec=time_out) with open(statistics_file_path, 'a+') as ifs: ifs.write(info) ifs.write(correct_info) ifs.write('\nis_time_out:' + str(tle_flag)) ifs.write(my_splitter + time.ctime() + my_splitter) ifs.write('\n\n\n\n')
def one_round(): for data_set_name in data_set_lst: for our_algorithm in our_exec_name_lst: statistics_dir = os.sep.join( map(str, ['.', tag, folder_name, data_set_name])) os.system('mkdir -p ' + statistics_dir) statistics_file_path = statistics_dir + os.sep + our_algorithm + '.txt' # 1st: write header os.system(' '.join([ 'echo', my_splitter + time.ctime() + my_splitter, '>>', statistics_file_path ])) algorithm_path = our_exec_path + os.sep + our_algorithm params_lst = map(str, [ algorithm_path, data_set_name, '>>', statistics_file_path ]) cmd = ' '.join(params_lst) time_out = 1000 if data_set_name != 'soc-LiveJournal1' else 3600 tle_flag, info, correct_info = time_out_util.run_with_timeout( cmd, timeout_sec=time_out) write_split(statistics_file_path) with open(statistics_file_path, 'a+') as ifs: ifs.write(correct_info) ifs.write(my_splitter + time.ctime() + my_splitter) ifs.write('is_time_out:' + str(tle_flag)) ifs.write('\n\n\n\n')
def one_round(): for data_set_name in data_set_lst: for our_algorithm in our_exec_name_lst: # for sample_num in list(reversed([10 ** 3, 10 ** 4, 10 ** 5, 10 ** 6])): for sample_num in [10 ** 6]: is_cur_sample_scale_tle = False for round_idx in range_idx_lst: statistics_dir = os.sep.join( map(str, ['.', tag, folder_name, data_set_name, sample_num, round_idx])) os.system('mkdir -p ' + statistics_dir) statistics_file_path = statistics_dir + os.sep + our_algorithm + '.txt' # 1st: write header os.system(' '.join( ['echo', my_splitter + time.ctime() + my_splitter, '>>', statistics_file_path])) algorithm_path = our_exec_path + os.sep + our_algorithm params_lst = map(str, [algorithm_path, data_set_name, sample_num, round_idx, '>>', statistics_file_path]) cmd = ' '.join(params_lst) time_out = 3600 tle_flag, info, correct_info = time_out_util.run_with_timeout(cmd, timeout_sec=time_out) write_split(statistics_file_path) with open(statistics_file_path, 'a+') as ifs: ifs.write(correct_info) ifs.write(my_splitter + time.ctime() + my_splitter) ifs.write('is_time_out:' + str(tle_flag)) ifs.write('\n\n\n\n') if tle_flag: is_cur_sample_scale_tle = True break if is_cur_sample_scale_tle: break
def one_round(): for data_set_name in data_set_lst: statistics_dir = os.sep.join( map(str, ['.', exp_res_root_name, folder_name])) os.system('mkdir -p ' + statistics_dir) statistics_file_path = statistics_dir + os.sep + data_set_name + '.txt' # 1st: write header os.system(' '.join([ 'echo', my_splitter + time.ctime() + my_splitter, '>>', statistics_file_path ])) # 2nd: run exec cmd algorithm_path = '/mnt/nfs/var/nfsshare/yche/git-repos/OutOfCoreSCAN/degree-statistics/build/' \ 'statistics_deg_ordered_directed_graph' params_lst = map(str, [ algorithm_path, os.sep.join([data_set_path, data_set_name]), statistics_file_path ]) cmd = ' '.join(params_lst) # print cmd time_out = 1000 tle_flag, info, correct_info = time_out_util.run_with_timeout( cmd, timeout_sec=time_out) # 3rd: append outputs write_split(statistics_file_path) with open(statistics_file_path, 'a+') as ifs: ifs.write(correct_info) ifs.write('\nis_time_out:' + str(tle_flag)) ifs.write(my_splitter + time.ctime() + my_splitter) ifs.write('\n\n\n\n')
def one_round(): order_tag = 'rcm-cache' weihao_exec_path = my_config_dict[weihao_order_exec_path_tag] if hostname.startswith('gpu13'): order_tag = 'cache' weihao_exec_path = my_config_dict[ weihao_order_exec_path_tag] + '_no_rcm' data_set_lst = filter(lambda name: 'rmat' in name, my_config_dict[data_set_lst_tag]) # print data_set_lst for data_set_name in data_set_lst: # assume already have all the edge list binaries time_out = 72000 statistics_file_path = root_path + 'weihao-' + order_tag + '.log' params_lst = map(str, [ weihao_exec_path, os.sep.join([data_set_path, data_set_name]) ]) cmd = ' '.join(params_lst) # print cmd tle_flag, info, correct_info = time_out_util.run_with_timeout( cmd, timeout_sec=time_out) with open(statistics_file_path, 'a+') as ifs: ifs.write(info) ifs.write(correct_info) ifs.write('\nis_time_out:' + str(tle_flag)) ifs.write(my_splitter + time.ctime() + my_splitter) ifs.write('\n\n\n\n')
def one_round(): # algorithm by algorithm to finish the experiment for algorithm_path in exec_path_lst: for c in c_lst: algorithm = algorithm_path.split('/')[-1] statistics_dir = os.sep.join( map(str, ['.', tag, folder_name, data_set_name, sample_num_dict[algorithm_path], c])) os.system('mkdir -p ' + statistics_dir) statistics_file_path = statistics_dir + os.sep + algorithm + '.txt' # 1st: write header os.system(' '.join( ['echo', my_splitter + time.ctime() + my_splitter, '>>', statistics_file_path])) params_lst = map(str, [algorithm_path, data_set_name, sample_num_dict[algorithm_path], round_idx, c, eps, delta, 200, '>>', statistics_file_path]) cmd = ' '.join(params_lst) time_out = 3600 # 2nd: run cmd tle_flag, info, correct_info = time_out_util.run_with_timeout(cmd, timeout_sec=time_out) write_split(statistics_file_path) with open(statistics_file_path, 'a+') as ifs: ifs.write(correct_info) ifs.write(my_splitter + time.ctime() + my_splitter) ifs.write('is_time_out:' + str(tle_flag)) ifs.write('\n\n\n\n') # 3rd: if tle, break if tle_flag: break
def one_round(is_rev_deg=False): for data_set_name in data_set_lst: # should use using the user `root` if env_tag == knl_tag: os.system('echo 3 > /proc/sys/vm/drop_caches') # avoid page-cache influence on for _ in range(3 if env_tag == lccpu12_tag else 1): if is_rev_deg: data_set_name = data_set_name + os.sep + rev_deg_order_tag for our_algorithm in our_exec_name_lst: statistics_dir = os.sep.join( map(str, ['.', exp_res_root_name, folder_name, data_set_name, normal_no_reorder_tag, ])) os.system('mkdir -p ' + statistics_dir) statistics_file_path = statistics_dir + os.sep + our_algorithm + '.txt' # 1st: write header os.system( ' '.join( ['echo', my_splitter + time.ctime() + my_splitter, '>>', statistics_file_path])) # 2nd: run exec cmd algorithm_path = our_exec_path + os.sep + our_algorithm params_lst = map(str, [algorithm_path, data_set_path + os.sep + data_set_name, statistics_file_path]) cmd = ' '.join(params_lst) time_out = 1200 tle_flag, info, correct_info = time_out_util.run_with_timeout(cmd, timeout_sec=time_out, env=my_env) # 3rd: append outputs write_split(statistics_file_path) with open(statistics_file_path, 'a+') as ifs: ifs.write(correct_info) ifs.write('\nis_time_out:' + str(tle_flag)) ifs.write(my_splitter + time.ctime() + my_splitter) ifs.write('\n\n\n\n')
def one_round(): for data_set_name in data_set_lst: for our_algorithm in our_algorithm_lst: statistics_folder_path = os.sep.join(['plp_parallel_gen_idx_0405', our_algorithm, data_set_name]) os.system('mkdir -p ' + statistics_folder_path) for thread_num in list(reversed(thread_num_lst)): # os.system('export OMP_NUM_THREADS=' + str(thread_num)) algorithm_path = os.sep.join([our_exec_path, our_algorithm]) statistics_file_path = os.sep.join([statistics_folder_path, str(thread_num) + ".txt"]) params_lst = map(str, [algorithm_path, data_set_name, 0.116040, 'save', '>>', statistics_file_path]) cmd = ' '.join(params_lst) time_out = 36000 my_env = os.environ.copy() my_env["OMP_NUM_THREADS"] = str(thread_num) tle_flag, info, correct_info = time_out_util.run_with_timeout(cmd, timeout_sec=time_out, env=my_env) write_split(statistics_file_path) with open(statistics_file_path, 'a+') as ofs: ofs.write(correct_info) ofs.write(my_splitter + time.ctime() + my_splitter) ofs.write('is_time_out:' + str(tle_flag)) ofs.write('\n\n\n\n')
def one_round(bool_val): timestamp_str = str(datetime.datetime.now()).split('.')[0].replace( ' ', '-').replace(':', '-') name_str = 'flp_dynamic_update_time_' if 'rlp' in our_algorithm \ else 'flp_dynamic_update_time_' statistics_file_path = 'exp_results/' + name_str + str( insert_edge_num) + '_' + timestamp_str + '.txt' for data_set_name in data_set_lst: algorithm_path = our_exec_path + os.sep + our_algorithm params_lst = map(str, [ algorithm_path, data_set_name, insert_edge_num, 'ins' if bool_val == True else 'del', '>>', statistics_file_path ]) cmd = ' '.join(params_lst) # time_out = 1000 if data_set_name != 'soc-LiveJournal1' else 3600 time_out = 72000 tle_flag, info, correct_info = time_out_util.run_with_timeout( cmd, timeout_sec=time_out) write_split(statistics_file_path) with open(statistics_file_path, 'a+') as ifs: ifs.write(correct_info) ifs.write(my_splitter + time.ctime() + my_splitter) ifs.write('is_time_out:' + str(tle_flag)) ifs.write('\n\n\n\n')
def one_round(): t_num = 64 for data_set_name in data_set_lst: for our_algorithm in our_exec_name_lst: for kt_type_name in ['12', '23']: # for hierarchy in ['NO', 'YES']: # for hierarchy in ['YES']: for hierarchy in ['NO']: statistics_dir = os.sep.join( map(str, [work_dir, data_set_name, t_num, hierarchy])) output_dir = os.sep.join([statistics_dir, 'output']) os.system('mkdir -p ' + os.sep.join([statistics_dir, 'log'])) os.system('mkdir -p ' + output_dir) dstat_file_path = statistics_dir + os.sep + our_algorithm + '-' + kt_type_name + '-dstat.log' log_file_path = os.sep.join([ statistics_dir, 'log', '-'.join([our_algorithm, kt_type_name, 'raw.log']) ]) logger.info('log file path: {}'.format(log_file_path)) # 1st: append headers append_header(dstat_file_path) append_header(log_file_path) # 2nd: run exec cmd algorithm_path = our_exec_path + os.sep + our_algorithm os.chdir(output_dir) params_lst = map(str, [ 'cgexec -g memory:yche-exp' if with_c_group else '', algorithm_path, data_set_path + os.sep + data_set_name, kt_type_name, hierarchy ]) cmd = ' '.join(params_lst) logger.info('exec-cmd: {}'.format(cmd)) time_out = 3600 * 10 my_env = os.environ.copy() def execute_cmd(my_cmd): logger.info('sub-process: {}'.format(my_cmd)) os.system(my_cmd) # 3rd: spawn a new process to run the exec dstat_cmd = 'dstat -tcdrlmgyn --fs >> ' + dstat_file_path p = Process(target=execute_cmd, args=(dstat_cmd, )) p.start() my_env['OMP_NUM_THREADS'] = str(t_num) tle_flag, info, correct_info = time_out_util.run_with_timeout( cmd, timeout_sec=time_out, env=my_env) time_out_util.kill_term_recursive(p.pid) modify_dstat_file(dstat_file_path) # 4th: append outputs if len(info) > 0: with open(log_file_path, 'a+') as ofs: ofs.write(info) logger.info('finish: {}'.format(cmd))
def one_round(): for data_set_name in data_set_lst: for reorder_method in filtered_reorder_lst: for our_algorithm in our_exec_name_lst: for t_num in thread_num_lst : statistics_dir = os.sep.join( map(str, ['.', exp_res_root_name, folder_name, data_set_name, reorder_method, t_num])) os.system('mkdir -p ' + os.sep.join([statistics_dir, 'log'])) os.system('mkdir -p ' + os.sep.join([statistics_dir, 'dstat'])) os.system('mkdir -p ' + os.sep.join([statistics_dir, 'dstat_clean'])) statistics_file_path = statistics_dir + os.sep + our_algorithm + '.log' dstat_file_path = os.sep.join([statistics_dir, 'dstat', our_algorithm + '-dstat.log']) log_file_path = os.sep.join([statistics_dir, 'log', our_algorithm + '-raw.log']) logger.info('stat file path: {}'.format(statistics_file_path)) # 1st: write header append_header(statistics_file_path) append_header(dstat_file_path) append_header(log_file_path) # 2nd: run exec cmd algorithm_path = our_exec_path + os.sep + our_algorithm params_lst = map(str, ['cgexec -g memory:yche-exp' if with_c_group else '', algorithm_path, data_set_path + os.sep + data_set_name, reorder_method, statistics_file_path]) cmd = ' '.join(params_lst) logger.info('exec-cmd: {}'.format(cmd)) time_out = 3600 * 5 if 'cuda' not in our_algorithm else 600 my_env = os.environ.copy() def execute_cmd(my_cmd): logger.info('sub-process: {}'.format(my_cmd)) os.system(my_cmd) # 3rd: spawn a new process to run the exec dstat_cmd = 'dstat -tcdrlmgyn --fs >> ' + dstat_file_path p = Process(target=execute_cmd, args=(dstat_cmd,)) p.start() my_env['OMP_NUM_THREADS'] = str(t_num) tle_flag, info, correct_info = time_out_util.run_with_timeout(cmd, timeout_sec=time_out, env=my_env) time_out_util.kill_term_recursive(p.pid) modify_dstat_file(dstat_file_path) # 4th: append outputs write_split(statistics_file_path) with open(statistics_file_path, 'a+') as ifs: ifs.write(correct_info) ifs.write('\nis_time_out:' + str(tle_flag)) ifs.write(my_splitter + time.ctime() + my_splitter) ifs.write('\n\n\n\n') if len(info) > 0: with open(log_file_path, 'a+') as ofs: ofs.write(info) logger.info('finish: {}'.format(cmd))
def exec_compile_lst(build_dir, src_dir, extra_cmake_options): compile_cmd_lst = [ # ' '.join(['rm -r', build_dir]), ' '.join(['mkdir -p', build_dir]), ' '.join(['cmake', '-B' + build_dir, '-H' + src_dir] + extra_cmake_options), ' '.join(['make', '-C', build_dir, '-j']) ] for compile_cmd in compile_cmd_lst: logger.info(compile_cmd) is_time_out, stderr, stdout = time_out_util.run_with_timeout( compile_cmd, timeout_sec=100) logger.info('is_time_out:{}'.format(is_time_out)) logger.info('stderr: {}'.format(stderr)) logger.info('stdout: {}'.format(stdout))
def one_round(): for data_set_name in data_set_lst: algorithm_path = our_exec_path + os.sep + our_algorithm statistics_file_path = 'exp_results/' + 'rlp_dynamic.txt' params_lst = map(str, [algorithm_path, data_set_name, '>>', statistics_file_path]) cmd = ' '.join(params_lst) time_out = 1000 if data_set_name != 'soc-LiveJournal1' else 3600 tle_flag, info, correct_info = time_out_util.run_with_timeout(cmd, timeout_sec=time_out) write_split(statistics_file_path) with open(statistics_file_path, 'a+') as ifs: ifs.write(correct_info) ifs.write(my_splitter + time.ctime() + my_splitter) ifs.write('is_time_out:' + str(tle_flag)) ifs.write('\n\n\n\n') print('finish:', cmd)
def one_round(): for exec_name in dynamic_exec_tag_lst: for data_set_name in data_set_lst: algorithm_path = our_exec_path + os.sep + exec_name statistics_file_path = 'exp_results/' + exec_name + '_dynamic_update_time_' + str( insert_edge_num) + '_0407.txt' params_lst = map(str, [algorithm_path, data_set_name, '>>', statistics_file_path]) cmd = ' '.join(params_lst) time_out = 72000 tle_flag, info, correct_info = time_out_util.run_with_timeout(cmd, timeout_sec=time_out) write_split(statistics_file_path) with open(statistics_file_path, 'a+') as ifs: ifs.write(correct_info) ifs.write(my_splitter + time.ctime() + my_splitter) ifs.write('is_time_out:' + str(tle_flag)) ifs.write('\n\n\n\n')
def one_round(): for data_set_name in data_set_lst: data_set_name = data_set_name.replace('0.5', '0dot5') for our_algorithm in our_exec_name_lst: thread_num_lst = my_config_dict[thread_num_lst_tag] for t_num in thread_num_lst: statistics_dir = os.sep.join( map(str, [ '.', exp_res_root_name, folder_name, data_set_name, t_num ])) os.system('mkdir -p ' + statistics_dir) statistics_file_path = statistics_dir + os.sep + our_algorithm + '.txt' # print statistics_file_path # 1st: write header os.system(' '.join([ 'echo', my_splitter + time.ctime() + my_splitter, '>>', statistics_file_path ])) # 2nd: run exec cmd algorithm_path = our_exec_path + os.sep + our_algorithm params_lst = map(str, [ algorithm_path, '-f', data_set_path + os.sep + data_set_name + os.sep + 'undir_edge_list.bin', '-a', '0', '&>> ', statistics_file_path ]) cmd = ' '.join(params_lst) # print 'cmd: ', cmd time_out = 80000 my_env = os.environ.copy() tle_flag, info, correct_info = time_out_util.run_with_timeout( cmd, timeout_sec=time_out, env=my_env) if tle_flag: break # 3rd: append outputs write_split(statistics_file_path) with open(statistics_file_path, 'a+') as ifs: ifs.write(correct_info) ifs.write('\nis_time_out:' + str(tle_flag)) ifs.write(my_splitter + time.ctime() + my_splitter) ifs.write('\n\n\n\n')
def one_round(): for data_set_name in data_set_lst: algorithm_path = our_exec_path + os.sep + tsf_dynamic for parameter in ['ins', 'del']: statistics_file_path = 'exp_results/' + 'tsf_dynamic_update_time_' + str( insert_edge_num) + '_' + parameter + '_0407.txt' params_lst = map(str, [algorithm_path, data_set_name, insert_edge_num, parameter, '>>', statistics_file_path]) cmd = ' '.join(params_lst) time_out = 1000 if data_set_name != 'soc-LiveJournal1' else 3600 tle_flag, info, correct_info = time_out_util.run_with_timeout(cmd, timeout_sec=time_out) write_split(statistics_file_path) with open(statistics_file_path, 'a+') as ifs: ifs.write(correct_info) ifs.write(my_splitter + time.ctime() + my_splitter) ifs.write('is_time_out:' + str(tle_flag)) ifs.write('\n\n\n\n')
def one_round(): for data_set_name in data_set_lst: # should use using the user `root` if env_tag == knl_tag: os.system('echo 3 > /proc/sys/vm/drop_caches') data_set_name = data_set_name + os.sep + rev_deg_order_tag for our_algorithm in our_exec_name_lst: for vid in range(0, 1000, 10): t_num = thread_num statistics_dir = os.sep.join( map(str, [ '.', exp_res_root_name, folder_name, data_set_name, t_num ])) os.system('mkdir -p ' + statistics_dir) statistics_file_path = statistics_dir + os.sep + our_algorithm + '.txt' # 1st: write header os.system(' '.join([ 'echo', my_splitter + time.ctime() + my_splitter, '>>', statistics_file_path ])) # 2nd: run exec cmd algorithm_path = our_exec_path + os.sep + our_algorithm params_lst = map(str, [ algorithm_path, data_set_path + os.sep + data_set_name, vid, t_num, statistics_file_path, '> /dev/null 2>&1' ]) cmd = ' '.join(params_lst) time_out = 20000 my_env = os.environ.copy() tle_flag, info, correct_info = time_out_util.run_with_timeout( cmd, timeout_sec=time_out, env=my_env) # 3rd: append outputs write_split(statistics_file_path) with open(statistics_file_path, 'a+') as ifs: ifs.write(correct_info) ifs.write('\nis_time_out:' + str(tle_flag)) ifs.write(my_splitter + time.ctime() + my_splitter) ifs.write('\n\n\n\n')
def one_round(reorder_method='.'): statistics_file_path = root_path + 'han-' + reorder_method + '.log' for data_set_name in data_set_lst: for eps in eps_lst: for mu in mu_lst: algorithm_path = my_config_dict[ppSCAN_exec_path_tag] params_lst = map(str, [algorithm_path, os.sep.join([data_set_path, data_set_name, reorder_method]), eps, mu, 'output', '> /dev/null 2>&1']) cmd = ' '.join(params_lst) # print cmd time_out = 7000 tle_flag, info, correct_info = time_out_util.run_with_timeout(cmd, timeout_sec=time_out) with open(statistics_file_path, 'a+') as ifs: ifs.write(info) ifs.write(correct_info) ifs.write('\nis_time_out:' + str(tle_flag)) ifs.write(my_splitter + time.ctime() + my_splitter) ifs.write('\n\n\n\n')
def one_round(): for data_set_name in data_set_lst: for our_algorithm in our_exec_name_lst: thread_num_lst = my_config_dict[thread_num_lst_tag] for t_num in thread_num_lst: for reorder_method in reorder_method_lst: statistics_dir = os.sep.join( map(str, [ '.', exp_res_root_name, folder_name, data_set_name, reorder_method, t_num ])) os.system('mkdir -p ' + statistics_dir) statistics_file_path = statistics_dir + os.sep + our_algorithm + '.txt' # 1st: write header os.system(' '.join([ 'echo', my_splitter + time.ctime() + my_splitter, '>>', statistics_file_path ])) # 2nd: run exec cmd algorithm_path = our_exec_path + os.sep + our_algorithm params_lst = map(str, [ algorithm_path, data_set_path + os.sep + data_set_name, t_num, reorder_method, statistics_file_path, '> /dev/null 2>&1' ]) cmd = ' '.join(params_lst) time_out = 300 my_env = os.environ.copy() tle_flag, info, correct_info = time_out_util.run_with_timeout( cmd, timeout_sec=time_out, env=my_env) # 3rd: append outputs write_split(statistics_file_path) with open(statistics_file_path, 'a+') as ifs: ifs.write(correct_info) ifs.write('\nis_time_out:' + str(tle_flag) + '\n') ifs.write(my_splitter + time.ctime() + my_splitter) ifs.write('\n\n\n\n')
def one_round(): # algorithm by algorithm to finish the experiment for other_algorithm_path in other_exec_path_lst: for data_set_name in data_set_lst: for sample_num in sample_num_lst: is_cur_sample_scale_tle = False for round_idx in range_idx_lst: other_algorithm = other_algorithm_path.split('/')[-1] statistics_dir = os.sep.join( map(str, ['.', tag, folder_name, data_set_name, sample_num, round_idx])) os.system('mkdir -p ' + statistics_dir) statistics_file_path = statistics_dir + os.sep + other_algorithm + '.txt' # 1st: write header os.system(' '.join( ['echo', my_splitter + time.ctime() + my_splitter, '>>', statistics_file_path])) params_lst = map(str, [other_algorithm_path, data_set_name, sample_num, round_idx, '0.6 0.01 0.01' if other_algorithm_path.endswith( 'ProbeSim-gt') or other_algorithm_path.endswith('ProbeSim') else '', '>>', statistics_file_path]) cmd = ' '.join(params_lst) # time_out = 1200 if data_set_name != 'soc-LiveJournal1' else 3600 time_out = 36000 if other_algorithm.endswith('sling-rand-bench') else 3600 tle_flag, info, correct_info = time_out_util.run_with_timeout(cmd, timeout_sec=time_out) write_split(statistics_file_path) with open(statistics_file_path, 'a+') as ifs: ifs.write(correct_info) ifs.write(my_splitter + time.ctime() + my_splitter) ifs.write('is_time_out:' + str(tle_flag)) ifs.write('\n\n\n\n') if tle_flag: is_cur_sample_scale_tle = True break if is_cur_sample_scale_tle: break
def one_round(): for data_set_name in data_set_lst: for our_algorithm in our_algorithm_lst: statistics_folder_path = os.sep.join([ 'plp_scalability_results_04_24', our_algorithm, data_set_name ]) os.system('mkdir -p ' + statistics_folder_path) for thread_num in list(reversed(thread_num_lst)): # os.system('export OMP_NUM_THREADS=' + str(thread_num)) algorithm_path = os.sep.join( [our_exec_path, our_algorithm]) statistics_file_path = os.sep.join( [statistics_folder_path, str(thread_num) + ".txt"]) # params_lst = map(str, [algorithm_path, data_set_name, 0.01, 'save', '>>', statistics_file_path]) params_lst = map(str, [ algorithm_path, data_set_name, 0.01, '>>', statistics_file_path ]) cmd = ' '.join(params_lst) # time_out = 3600 if data_set_name != 'soc-LiveJournal1' else 7200 time_out = 36000 my_env = os.environ.copy() my_env["OMP_NUM_THREADS"] = str(thread_num) tle_flag, info, correct_info = time_out_util.run_with_timeout( cmd, timeout_sec=time_out, env=my_env) write_split(statistics_file_path) with open(statistics_file_path, 'a+') as ofs: ofs.write(correct_info) ofs.write(my_splitter + time.ctime() + my_splitter) ofs.write('is_time_out:' + str(tle_flag)) ofs.write('\n\n\n\n') print('finish:' + cmd)
def one_round(): # algorithm by algorithm to finish the experiment for algorithm_path in exec_path_lst: for data_set_name in data_set_name_lst: eps_lst = list( reversed([0.0001, 0.0004, 0.0016, 0.0064, 0.0256]))[2:3] if 'sling' in algorithm_path or 'reads-d' in algorithm_path: eps_lst = list(reversed([0.0016, 0.0064, 0.0256])) elif 'tsf' in algorithm_path: eps_lst = list(reversed([0.0064, 0.0256])) for eps in eps_lst: algorithm = algorithm_path.split('/')[-1] statistics_dir = os.sep.join( map(str, [ '.', tag, folder_name, sample_num, k, data_set_name, eps ])) os.system('mkdir -p ' + statistics_dir) statistics_file_path = statistics_dir + os.sep + algorithm + '.txt' # 1st: write header os.system(' '.join([ 'echo', my_splitter + time.ctime() + my_splitter, '>>', statistics_file_path ])) if algorithm_path.endswith('reads-rq-rand-bench'): eps = max(0.008, eps) params_lst = map(str, [ algorithm_path, data_set_name, sample_num, round_idx, k, eps, '>>', statistics_file_path ]) if algorithm_path.split('/')[-1] in [ 'lind-rand-ben-gt', 'cw-rand-gen-g' ]: params_lst = map(str, [ algorithm_path, data_set_name, sample_num, round_idx, k, '>>', statistics_file_path ]) if algorithm_path.endswith('ProbeSim-gt'): params_lst = map(str, [ algorithm_path, data_set_name, sample_num, round_idx, 0.6, eps, 0.01, k, '>>', statistics_file_path ]) cmd = ' '.join(params_lst) time_out = 7200 # 2nd: run cmd tle_flag, info, correct_info = time_out_util.run_with_timeout( cmd, timeout_sec=time_out) write_split(statistics_file_path) with open(statistics_file_path, 'a+') as ifs: ifs.write(correct_info) ifs.write(my_splitter + time.ctime() + my_splitter) ifs.write('is_time_out:' + str(tle_flag)) ifs.write('\n\n\n\n') # if lind or cw, break if algorithm_path.split('/')[-1] in [ 'lind-rand-ben-gt', 'cw-rand-gen-g' ]: break # 3rd: if tle, break if tle_flag: break
def one_round(is_rev_deg=False, num_of_gpus=1): for data_set_name in data_set_lst: if is_rev_deg: data_set_name = data_set_name + os.sep + rev_deg_order_tag for our_algorithm in our_exec_name_lst: if 'friendster' in data_set_name and 'bitmap' in our_algorithm: num_pass_lst = list(reversed(range(3, 11))) else: num_pass_lst = list(reversed(range(1, 11))) for eps in eps_lst: for mu in mu_lst: for t_num in thread_num_lst: for num_pass in num_pass_lst: statistics_dir = os.sep.join( map(str, [ '.', exp_res_root_name, folder_name + '-' + str(num_of_gpus), num_pass, data_set_name, eps, mu, t_num ])) os.system('mkdir -p ' + statistics_dir) statistics_file_path = statistics_dir + os.sep + our_algorithm + '.txt' # 1st: write header os.system(' '.join([ 'echo', my_splitter + time.ctime() + my_splitter, '>>', statistics_file_path ])) # 2nd: run exec cmd algorithm_path = our_exec_path + os.sep + our_algorithm params_lst = map(str, [ algorithm_path, data_set_path + os.sep + data_set_name, eps, mu, t_num, num_pass, statistics_file_path, '> /dev/null 2>&1' ]) cmd = ' '.join(params_lst) time_out = 3600 my_env = os.environ.copy() my_env["CUDA_VISIBLE_DEVICES"] = ','.join( map(str, range(4, 4 + num_of_gpus))) tle_flag, info, correct_info = time_out_util.run_with_timeout( cmd, timeout_sec=time_out, env=my_env) # check md5 for correctness, assuming result generated by ppSCAN def check_result(): tmp_lst = [] gt_res_file_name = '-'.join( map(str, ['result', eps, mu])) + '.txt' res_file_path = os.sep.join([ data_set_path, data_set_name, 'scanxp-' + gt_res_file_name ]) tmp_tle_flag, tmp_info, tmp_correct_info = time_out_util.run_with_timeout( ' '.join(['md5sum', res_file_path]), timeout_sec=time_out) tmp_lst.append(tmp_info + tmp_correct_info) tmp_tle_flag, tmp_info, tmp_correct_info = time_out_util.run_with_timeout( ' '.join([ 'md5sum', os.sep.join([ data_set_path, data_set_name, gt_res_file_name ]) ]), timeout_sec=time_out) tmp_lst.append(tmp_info + tmp_correct_info) if len(tmp_lst[0].split()) > 0 and len(tmp_lst[1].split()) > 0 and \ tmp_lst[0].split()[0] == tmp_lst[1].split()[0]: return True else: return 'False\n' + '\n'.join( map(str, tmp_lst)) # 3rd: append outputs write_split(statistics_file_path) with open(statistics_file_path, 'a+') as ifs: ifs.write(correct_info) ifs.write('\nis_time_out:' + str(tle_flag)) ifs.write('\nis_correct:' + str(check_result()) + '\n') ifs.write(my_splitter + time.ctime() + my_splitter) ifs.write('\n\n\n\n')
import os if __name__ == '__main__': data_set_lst = [ # "snap_livejournal", # "snap_orkut", # "webgraph_eu", # "webgraph_uk", # "webgraph_webbase", # "webgraph_it", # "webgraph_twitter", # "snap_friendster", # "rmat_v50m_e0.5g", # "rmat_v5m_e0.5g", # "rmat_v0.5m_e0.5g", "s22-16", "s23-16", "s24-16", "s25-16", "s26-16", "s27-16", "s28-16", "s29-16" ] # folder_root = '/home/yche/mnt/luocpu9/mnt/storage1/yche/datasets' folder_root = '/home/yche/mnt/gpu24/mnt/nvme-ssd/yche/datasets' logger = exec_utils.get_logger("/home/yche/log.log", name=__name__) for data_set in data_set_lst: cmd = 'md5sum ' + os.sep.join([folder_root, data_set, 'ktruss-pkt-inter-shrink.histogram']) logger.info(time_out_util.run_with_timeout(cmd, timeout_sec=10))
def one_round(eps_lst, c_lst): data_set_name = 'ca-GrQc' for our_algorithm in our_exec_name_lst: for eps in list(reversed(eps_lst)): for c in c_lst: statistics_dir = os.sep.join( map(str, [ exp_res_root_name, folder_name, data_set_name, c, eps ])) os.system('mkdir -p ' + os.sep.join([statistics_dir, 'log'])) os.system('mkdir -p ' + os.sep.join([statistics_dir, 'dstat'])) os.system('mkdir -p ' + os.sep.join([statistics_dir, 'dstat_clean'])) statistics_file_path = statistics_dir + os.sep + our_algorithm + '.log' dstat_file_path = os.sep.join([ statistics_dir, 'dstat', our_algorithm + '-dstat.log' ]) log_file_path = os.sep.join( [statistics_dir, 'log', our_algorithm + '-raw.log']) logger.info( 'stat file path: {}'.format(statistics_file_path)) # 1st: write header append_header(statistics_file_path) append_header(dstat_file_path) append_header(log_file_path) # 2nd: run exec cmd algorithm_path = our_exec_path + os.sep + our_algorithm params_lst = map(str, [ 'cgexec -g memory:yche-exp' if with_c_group else '', algorithm_path, data_set_name, eps, c, statistics_file_path ]) cmd = ' '.join(params_lst) logger.info('exec-cmd: {}'.format(cmd)) time_out = 600 my_env = os.environ.copy() my_env[ "OMP_NUM_THREADS"] = '1' if our_algorithm != 'probesim_ss_ap_bench' else '56' def execute_cmd(my_cmd): logger.info('sub-process: {}'.format(my_cmd)) os.system(my_cmd) # 3rd: spawn a new process to run the exec dstat_cmd = 'dstat -tcdrlmgyn --fs >> ' + dstat_file_path p = Process(target=execute_cmd, args=(dstat_cmd, )) p.start() tle_flag, info, correct_info = time_out_util.run_with_timeout( cmd, timeout_sec=time_out, env=my_env) time_out_util.kill_term_recursive(p.pid) modify_dstat_file(dstat_file_path) # 4th: append outputs write_split(statistics_file_path) with open(statistics_file_path, 'a+') as ifs: ifs.write(correct_info) ifs.write('\nis_time_out:' + str(tle_flag)) ifs.write(my_splitter + time.ctime() + my_splitter) ifs.write('\n\n\n\n') if len(info) > 0: with open(log_file_path, 'a+') as ofs: ofs.write(info) logger.info('finish: {}'.format(cmd))
def one_round(): data_set_name = 'ca-HepTh' for our_algorithm in our_exec_name_lst: for update_tag in ['ins', 'del']: for edge_num in [str(i) for i in range(5000, 30000, 5000)]: statistics_dir = os.sep.join( map(str, [ exp_res_root_name, folder_name, data_set_name, update_tag, edge_num ])) os.system('mkdir -p ' + os.sep.join([statistics_dir, 'log'])) os.system('mkdir -p ' + os.sep.join([statistics_dir, 'dstat'])) os.system('mkdir -p ' + os.sep.join([statistics_dir, 'dstat_clean'])) statistics_file_path = statistics_dir + os.sep + our_algorithm + '.log' dstat_file_path = os.sep.join([ statistics_dir, 'dstat', our_algorithm + '-dstat.log' ]) log_file_path = os.sep.join( [statistics_dir, 'log', our_algorithm + '-raw.log']) logger.info( 'stat file path: {}'.format(statistics_file_path)) # 1st: write header append_header(statistics_file_path) append_header(dstat_file_path) append_header(log_file_path) # 2nd: run exec cmd algorithm_path = our_exec_path + os.sep + our_algorithm params_lst = map(str, [ 'cgexec -g memory:yche-exp' if with_c_group else '', algorithm_path, data_set_name, edge_num, update_tag, statistics_file_path ]) cmd = ' '.join(params_lst) logger.info('exec-cmd: {}'.format(cmd)) time_out = 800000 my_env = os.environ.copy() def execute_cmd(my_cmd): logger.info('sub-process: {}'.format(my_cmd)) os.system(my_cmd) # 3rd: spawn a new process to run the exec dstat_cmd = 'dstat -tcdrlmgyn --fs >> ' + dstat_file_path p = Process(target=execute_cmd, args=(dstat_cmd, )) p.start() tle_flag, info, correct_info = time_out_util.run_with_timeout( cmd, timeout_sec=time_out, env=my_env) time_out_util.kill_term_recursive(p.pid) modify_dstat_file(dstat_file_path) # 4th: append outputs write_split(statistics_file_path) with open(statistics_file_path, 'a+') as ifs: ifs.write(correct_info) ifs.write('\nis_time_out:' + str(tle_flag)) ifs.write(my_splitter + time.ctime() + my_splitter) ifs.write('\n\n\n\n') if len(info) > 0: with open(log_file_path, 'a+') as ofs: ofs.write(info) logger.info('finish: {}'.format(cmd))
def one_round(reorder_method='.'): for data_set_name in data_set_lst: # for data_set_name in ['webgraph_twitter']: for our_algorithm in our_exec_name_lst: def is_time_out(name): if env_tag is gpu23_tag or env_tag is gpu_other_tag: return 'roaring' in name or 'bsr' in name elif env_tag is knl_tag: return 'roaring' in name return False if data_set_name == 'snap_friendster' and is_time_out( our_algorithm): continue if data_path_tag is exec_path_non_hbw_tag and 'hbw' in our_algorithm: continue for eps in eps_lst: for mu in mu_lst: thread_num_lst = my_config_dict[thread_num_lst_tag] for t_num in thread_num_lst: statistics_dir = os.sep.join( map( str, [ '.', exp_res_root_name, folder_name + '-non-hbw' if data_path_tag is exec_path_non_hbw_tag else folder_name, data_set_name, reorder_method, # eps, mu, t_num ])) os.system('mkdir -p ' + statistics_dir) statistics_file_path = statistics_dir + os.sep + our_algorithm + '.txt' # if the graph not exist graph_dir = os.sep.join( [data_set_path, data_set_name, reorder_method]) if not os.path.exists(graph_dir): os.system('echo graph not exist >> ' + statistics_file_path) continue rm_cmd = 'rm ' + os.sep.join([ graph_dir, 'scanxp-result-' + str(eps) + '-' + str(mu) + '.txt' ]) os.system(rm_cmd) # 1st: write header os.system(' '.join([ 'echo', my_splitter + time.ctime() + my_splitter, '>>', statistics_file_path ])) # 2nd: run exec cmd algorithm_path = our_exec_path + os.sep + our_algorithm params_lst = map(str, [ algorithm_path, graph_dir, eps, mu, t_num, statistics_file_path, '> /dev/null 2>&1' ]) cmd = ' '.join(params_lst) time_out = 600 my_env = os.environ.copy() tle_flag, info, correct_info = time_out_util.run_with_timeout( cmd, timeout_sec=time_out, env=my_env) if tle_flag: break # check md5 for correctness, assuming result generated by ppSCAN def check_result(): tmp_lst = [] gt_res_file_name = '-'.join( map(str, ['result', eps, mu])) + '.txt' res_file_path = os.sep.join( [graph_dir, 'scanxp-' + gt_res_file_name]) tmp_tle_flag, tmp_info, tmp_correct_info = time_out_util.run_with_timeout( ' '.join(['md5sum', res_file_path]), timeout_sec=time_out) tmp_lst.append(tmp_info + tmp_correct_info) tmp_tle_flag, tmp_info, tmp_correct_info = time_out_util.run_with_timeout( ' '.join([ 'md5sum', os.sep.join( [graph_dir, gt_res_file_name]) ]), timeout_sec=time_out) tmp_lst.append(tmp_info + tmp_correct_info) os.system(' '.join([ 'echo', str(tmp_lst), '>>', statistics_file_path ])) if len(tmp_lst[0].split()) > 0 and len(tmp_lst[1].split()) > 0 and \ tmp_lst[0].split()[0] == tmp_lst[1].split()[0]: return True else: return 'False\n' + '\n'.join( map(str, tmp_lst)) # 3rd: append outputs write_split(statistics_file_path) with open(statistics_file_path, 'a+') as ifs: ifs.write(correct_info) ifs.write('\nis_time_out:' + str(tle_flag)) ifs.write('\nis_correct:' + str(check_result()) + '\n') ifs.write(my_splitter + time.ctime() + my_splitter) ifs.write('\n\n\n\n')