Esempio n. 1
0
def test_accuracy():
    for size in size_list:
        for file_size in filesize_list:
            kwargs = {
                'true_posi':'right',
                'average':'binary',
                'truth_column':'label',
                'pred_column':'label',
            }
            result_file = 'report/test_accuracy_{}_{}.txt'.format(size, file_size)
            if os.path.exists(result_file):
                print('skip {}'.format(result_file))
                continue
            dfmaker = LabelDataMaker(size, file_size, 100, 2)
            multip = len(dfmaker)
            if multip == 0:
                continue
            timer = Timer()
            print('running {}'.format(result_file))
            new_data = next(dfmaker)
            timer(accuracy, new_data, new_data, **kwargs)
            cost = timer.avg_cost * multip
            with open(result_file, 'w') as f:
                f.write('total,each,number\n')
                f.write(str(cost)+","+str(timer.avg_cost)+","+str(multip))
def test_kmeans():
    for size in size_list:
        for file_size in filesize_list:
            kwargs = {
                'n_cluster':'2',
                'max_iter':'300',
                'predict_labels':'label',
                'store_origin':'False',
                'n_jobs':'10',
            }
            result_file = 'report/test_kmeans_{}_{}.txt'.format(size, file_size)
            if os.path.exists(result_file):
                print('skip {}'.format(result_file))
                continue
            dfmaker = LabelDataMaker(size, file_size, 100, 2)
            multip = len(dfmaker)
            if multip == 0:
                continue
            timer = Timer()
            print('running {}'.format(result_file))
            timer(kmeans, next(dfmaker), **kwargs)
            cost = timer.avg_cost * multip
            with open(result_file, 'w') as f:
                f.write('total,each,number\n')
                f.write(str(cost)+","+str(timer.avg_cost)+","+str(multip))
def test_iforest():
    for size in size_list:
        for file_size in filesize_list:
            kwargs = {
                'contamination': '0.1',
                'n_jobs': '10',
            }
            result_file = 'report/test_iforest_{}_{}.txt'.format(
                size, file_size)
            if os.path.exists(result_file):
                print('skip {}'.format(result_file))
                continue
            dfmaker = LabelDataMaker(size, file_size, 100, 2)
            multip = len(dfmaker)
            if multip == 0:
                continue
            timer = Timer()
            print('running {}'.format(result_file))
            new_data = next(dfmaker)
            timer(outlier_iforest, new_data, **kwargs)
            cost = timer.avg_cost * multip
            with open(result_file, 'w') as f:
                f.write('total,each,number\n')
                f.write(
                    str(cost) + "," + str(timer.avg_cost) + "," + str(multip))
Esempio n. 4
0
def test_random():
    for size in size_list:
        for file_size in filesize_list:
            kwargs = {}
            result_file = 'report/test_random_{}_{}.txt'.format(
                size, file_size)
            if os.path.exists(result_file):
                print('skip {}'.format(result_file))
                continue
            dfmaker = LabelDataMaker(size, file_size, 100, 2)
            multip = len(dfmaker)
            if multip == 0:
                continue
            timer = Timer()
            print('running {}'.format(result_file))
            timer(random, next(dfmaker), **kwargs)
            cost = timer.avg_cost * multip
            with open(result_file, 'w') as f:
                f.write('total,each,number\n')
                f.write(
                    str(cost) + "," + str(timer.avg_cost) + "," + str(multip))
Esempio n. 5
0
def test_sort():
    for size in size_list:
        for file_size in filesize_list:
            kwargs = {
                'columns': '1',
                'ascending': 'True',
                'na_position': 'last',
            }
            result_file = 'report/test_sort_{}_{}.txt'.format(size, file_size)
            if os.path.exists(result_file):
                print('skip {}'.format(result_file))
                continue
            dfmaker = LabelDataMaker(size, file_size, 100, 2)
            multip = len(dfmaker)
            if multip == 0:
                continue
            timer = Timer()
            print('running {}'.format(result_file))
            timer(sort, next(dfmaker), **kwargs)
            cost = timer.avg_cost * multip
            with open(result_file, 'w') as f:
                f.write('total,each,number\n')
                f.write(
                    str(cost) + "," + str(timer.avg_cost) + "," + str(multip))