def test_accuracy(): for size in size_list: for file_size in filesize_list: kwargs = { 'true_posi':'right', 'average':'binary', 'truth_column':'label', 'pred_column':'label', } result_file = 'report/test_accuracy_{}_{}.txt'.format(size, file_size) if os.path.exists(result_file): print('skip {}'.format(result_file)) continue dfmaker = LabelDataMaker(size, file_size, 100, 2) multip = len(dfmaker) if multip == 0: continue timer = Timer() print('running {}'.format(result_file)) new_data = next(dfmaker) timer(accuracy, new_data, new_data, **kwargs) cost = timer.avg_cost * multip with open(result_file, 'w') as f: f.write('total,each,number\n') f.write(str(cost)+","+str(timer.avg_cost)+","+str(multip))
def test_kmeans(): for size in size_list: for file_size in filesize_list: kwargs = { 'n_cluster':'2', 'max_iter':'300', 'predict_labels':'label', 'store_origin':'False', 'n_jobs':'10', } result_file = 'report/test_kmeans_{}_{}.txt'.format(size, file_size) if os.path.exists(result_file): print('skip {}'.format(result_file)) continue dfmaker = LabelDataMaker(size, file_size, 100, 2) multip = len(dfmaker) if multip == 0: continue timer = Timer() print('running {}'.format(result_file)) timer(kmeans, next(dfmaker), **kwargs) cost = timer.avg_cost * multip with open(result_file, 'w') as f: f.write('total,each,number\n') f.write(str(cost)+","+str(timer.avg_cost)+","+str(multip))
def test_iforest(): for size in size_list: for file_size in filesize_list: kwargs = { 'contamination': '0.1', 'n_jobs': '10', } result_file = 'report/test_iforest_{}_{}.txt'.format( size, file_size) if os.path.exists(result_file): print('skip {}'.format(result_file)) continue dfmaker = LabelDataMaker(size, file_size, 100, 2) multip = len(dfmaker) if multip == 0: continue timer = Timer() print('running {}'.format(result_file)) new_data = next(dfmaker) timer(outlier_iforest, new_data, **kwargs) cost = timer.avg_cost * multip with open(result_file, 'w') as f: f.write('total,each,number\n') f.write( str(cost) + "," + str(timer.avg_cost) + "," + str(multip))
def test_random(): for size in size_list: for file_size in filesize_list: kwargs = {} result_file = 'report/test_random_{}_{}.txt'.format( size, file_size) if os.path.exists(result_file): print('skip {}'.format(result_file)) continue dfmaker = LabelDataMaker(size, file_size, 100, 2) multip = len(dfmaker) if multip == 0: continue timer = Timer() print('running {}'.format(result_file)) timer(random, next(dfmaker), **kwargs) cost = timer.avg_cost * multip with open(result_file, 'w') as f: f.write('total,each,number\n') f.write( str(cost) + "," + str(timer.avg_cost) + "," + str(multip))
def test_sort(): for size in size_list: for file_size in filesize_list: kwargs = { 'columns': '1', 'ascending': 'True', 'na_position': 'last', } result_file = 'report/test_sort_{}_{}.txt'.format(size, file_size) if os.path.exists(result_file): print('skip {}'.format(result_file)) continue dfmaker = LabelDataMaker(size, file_size, 100, 2) multip = len(dfmaker) if multip == 0: continue timer = Timer() print('running {}'.format(result_file)) timer(sort, next(dfmaker), **kwargs) cost = timer.avg_cost * multip with open(result_file, 'w') as f: f.write('total,each,number\n') f.write( str(cost) + "," + str(timer.avg_cost) + "," + str(multip))