def main():
    # initial setup
    dataset_list = ['skin_cancer']
    score_list = ['Mahalanobis_0.0', 'Mahalanobis_0.01', 'Mahalanobis_0.005', 'Mahalanobis_0.002', 'Mahalanobis_0.0014', 'Mahalanobis_0.001', 'Mahalanobis_0.0005']
    
    # train and measure the performance of Mahalanobis detector
    list_best_results, list_best_results_index = [], []
    for dataset in dataset_list:
        print('In-distribution: ', dataset)
        outf = './output/' + args.net_type + '_' + dataset + '/'
        out_list = ['imgnet', 'skin_cli', 'skin_derm', 'corrupted', 'corrupted_70', 'nct']
        
        list_best_results_out, list_best_results_index_out = [[] for i in range(len(out_list))], [[] for i in range(len(out_list))]
        for out in out_list:
            best_lr = None
            best_score = None
            best_tnr = 0
            for score in score_list:
                total_X, total_Y = lib_regression.load_characteristics(score, dataset, out, outf)
                X_in = total_X[-2533:,:]
                Y_in = total_Y[-2533:]
                X_out = total_X[:-2533,:]
                Y_out = total_Y[:-2533]
                
                l = int(0.8*X_out.shape[0])
                
                X_train = np.concatenate((X_in[:500], X_out[:l]))
                Y_train = np.concatenate((Y_in[:500], Y_out[:l]))
                
                X_val_for_test = np.concatenate((X_in[500:550], X_out[l:2*l]))
                Y_val_for_test = np.concatenate((Y_in[500:550], Y_out[l:2*l]))
                
                lr = LogisticRegressionCV(n_jobs=-1,max_iter=1000).fit(X_train, Y_train)
                results = lib_regression.detection_performance(lr, X_val_for_test, Y_val_for_test, outf)
                
                if best_tnr < results['TMP']['TNR']:
                    best_tnr = results['TMP']['TNR']
                    best_lr = lr
                    best_score = score 

            for i,out in enumerate(out_list):
                print('Out-of-distribution: ', out)
                total_X, total_Y = lib_regression.load_characteristics(best_score, dataset, out, outf)
                X_in = total_X[-2533:,:]
                Y_in = total_Y[-2533:]
                X_out = total_X[:-2533,:]
                Y_out = total_Y[:-2533]
                
                np.random.seed(seed=0)
                np.random.shuffle(X_out)

                k = int(0.1*X_out.shape[0])
                l = int(0*X_out.shape[0])
                
                # X_train = np.concatenate((X_in[:k], X_out[:l]))
                # Y_train = np.concatenate((Y_in[:k], Y_out[:l]))
                
                # X_val_for_test = np.concatenate((X_in[k:2*k], X_out[l:2*l]))
                # Y_val_for_test = np.concatenate((Y_in[k:2*k], Y_out[l:2*l]))
                
                X_test = np.concatenate((X_in[550:], X_out[2*l:]))
                Y_test = np.concatenate((Y_in[550:], Y_out[2*l:]))
                    
                best_result = lib_regression.detection_performance(best_lr, X_test, Y_test, outf)
                list_best_results_out[i].append(best_result)
                list_best_results_index_out[i].append(best_score)
        list_best_results.append(list_best_results_out)
        list_best_results_index.append(list_best_results_index_out)
    
    print(len(list_best_results_out))
    print(len(list_best_results))
    # print the results
    count_in = 0
    mtypes = ['TNR', 'AUROC', 'DTACC', 'AUIN', 'AUOUT']
    
    for in_list in list_best_results:
        print('in_distribution: ' + dataset_list[count_in] + '==========')
        # out_list = ['skin_cli', 'skin_derm', 'corrupted', 'corrupted_70', 'imgnet', 'nct', 'final_test']
        print(len(in_list))
        count_out = 0
        for results in in_list:
            print('out_distribution: '+ out_list[count_out])
            summary_results = {"TMP":{}}
            
            for r in results:
                # print(r)
                summary_results["TMP"]["TNR"] = summary_results["TMP"].get("TNR",0)+r["TMP"]["TNR"]/len(out_list)
                summary_results["TMP"]["AUROC"] = summary_results["TMP"].get("AUROC",0)+r["TMP"]["AUROC"]/len(out_list)
                summary_results["TMP"]["DTACC"] = summary_results["TMP"].get("DTACC",0)+r["TMP"]["DTACC"]/len(out_list)
                summary_results["TMP"]["AUIN"] = summary_results["TMP"].get("AUIN",0)+r["TMP"]["AUIN"]/len(out_list)
                summary_results["TMP"]["AUOUT"] = summary_results["TMP"].get("AUOUT",0)+r["TMP"]["AUOUT"]/len(out_list)
            for mtype in mtypes:
                print(' {mtype:6s}'.format(mtype=mtype), end='')
            print('\n{val:6.2f}'.format(val=100.*summary_results['TMP']['TNR']), end='')
            print(' {val:6.2f}'.format(val=100.*summary_results['TMP']['AUROC']), end='')
            print(' {val:6.2f}'.format(val=100.*summary_results['TMP']['DTACC']), end='')
            print(' {val:6.2f}'.format(val=100.*summary_results['TMP']['AUIN']), end='')
            print(' {val:6.2f}\n'.format(val=100.*summary_results['TMP']['AUOUT']), end='')
            print('Input noise: ' + str(list_best_results_index[count_in][count_out]))
            print('')
            count_out += 1
        count_in += 1
Ejemplo n.º 2
0
def main():
    # initial setup
    dataset_list = ['cifar10', 'cifar100', 'svhn']
    score_list = [
        'Mahalanobis_0.0', 'Mahalanobis_0.01', 'Mahalanobis_0.005',
        'Mahalanobis_0.002', 'Mahalanobis_0.0014', 'Mahalanobis_0.001',
        'Mahalanobis_0.0005'
    ]

    # train and measure the performance of Mahalanobis detector
    list_best_results, list_best_results_index = [], []
    for dataset in dataset_list:
        print('In-distribution: ', dataset)
        outf = './output/' + args.net_type + '_' + dataset + '/'
        out_list = ['svhn', 'imagenet_resize', 'lsun_resize']
        if dataset == 'svhn':
            out_list = ['cifar10', 'imagenet_resize', 'lsun_resize']

        list_best_results_out, list_best_results_index_out = [], []
        for out in out_list:
            print('Out-of-distribution: ', out)
            best_tnr, best_result, best_index = 0, 0, 0
            for score in score_list:
                total_X, total_Y = lib_regression.load_characteristics(
                    score, dataset, out, outf)
                X_val, Y_val, X_test, Y_test = lib_regression.block_split(
                    total_X, total_Y, out)
                X_train = np.concatenate((X_val[:500], X_val[1000:1500]))
                Y_train = np.concatenate((Y_val[:500], Y_val[1000:1500]))
                X_val_for_test = np.concatenate(
                    (X_val[500:1000], X_val[1500:]))
                Y_val_for_test = np.concatenate(
                    (Y_val[500:1000], Y_val[1500:]))
                lr = LogisticRegressionCV(n_jobs=-1).fit(X_train, Y_train)
                y_pred = lr.predict_proba(X_train)[:, 1]
                #print('training mse: {:.4f}'.format(np.mean(y_pred - Y_train)))
                y_pred = lr.predict_proba(X_val_for_test)[:, 1]
                #print('test mse: {:.4f}'.format(np.mean(y_pred - Y_val_for_test)))
                results = lib_regression.detection_performance(
                    lr, X_val_for_test, Y_val_for_test, outf)
                if best_tnr < results['TMP']['TNR']:
                    best_tnr = results['TMP']['TNR']
                    best_index = score
                    best_result = lib_regression.detection_performance(
                        lr, X_test, Y_test, outf)
            list_best_results_out.append(best_result)
            list_best_results_index_out.append(best_index)
        list_best_results.append(list_best_results_out)
        list_best_results_index.append(list_best_results_index_out)

    # print the results
    count_in = 0
    mtypes = ['TNR', 'AUROC', 'DTACC', 'AUIN', 'AUOUT']

    for in_list in list_best_results:
        print('in_distribution: ' + dataset_list[count_in] + '==========')
        out_list = ['svhn', 'imagenet_resize', 'lsun_resize']
        if dataset_list[count_in] == 'svhn':
            out_list = ['cifar10', 'imagenet_resize', 'lsun_resize']
        count_out = 0
        for results in in_list:
            print('out_distribution: ' + out_list[count_out])
            for mtype in mtypes:
                print(' {mtype:6s}'.format(mtype=mtype), end='')
            print('\n{val:6.2f}'.format(val=100. * results['TMP']['TNR']),
                  end='')
            print(' {val:6.2f}'.format(val=100. * results['TMP']['AUROC']),
                  end='')
            print(' {val:6.2f}'.format(val=100. * results['TMP']['DTACC']),
                  end='')
            print(' {val:6.2f}'.format(val=100. * results['TMP']['AUIN']),
                  end='')
            print(' {val:6.2f}\n'.format(val=100. * results['TMP']['AUOUT']),
                  end='')
            print('Input noise: ' +
                  list_best_results_index[count_in][count_out])
            print('')
            count_out += 1
        count_in += 1
def main():
    # initial setup
    dataset_list = ['cifar10', 'cifar100', 'svhn']
    dataset_list = ['cifar10']
    if args.net_type == 'densenet121':
        dataset_list = ['ham10000']
    adv_test_list = ['FGSM', 'BIM', 'DeepFool', 'CWL2']

    print('evaluate the LID estimator')
    score_list = [
        'LID_10', 'LID_20', 'LID_30', 'LID_40', 'LID_50', 'LID_60', 'LID_70',
        'LID_80', 'LID_90'
    ]
    list_best_results, list_best_results_index = [], []
    for dataset in dataset_list:
        print('load train data: ', dataset)
        outf = './adv_output/' + args.net_type + '_' + dataset + '/'

        list_best_results_out, list_best_results_index_out = [], []
        for out in adv_test_list:
            best_auroc, best_result, best_index = 0, 0, 0
            for score in score_list:
                print('load train data: ', out, ' of ', score)
                total_X, total_Y = lib_regression.load_characteristics(
                    score, dataset, out, outf)
                #X_val, Y_val, X_test, Y_test = lib_regression.block_split_adv(total_X, total_Y)
                X_val, Y_val, X_test, Y_test = lib_regression.stratified_split(
                    total_X, total_Y)
                #pivot = int(X_val.shape[0] / 6)
                #X_train = np.concatenate((X_val[:pivot], X_val[2*pivot:3*pivot], X_val[4*pivot:5*pivot]))
                #Y_train = np.concatenate((Y_val[:pivot], Y_val[2*pivot:3*pivot], Y_val[4*pivot:5*pivot]))
                #X_val_for_test = np.concatenate((X_val[pivot:2*pivot], X_val[3*pivot:4*pivot], X_val[5*pivot:]))
                #Y_val_for_test = np.concatenate((Y_val[pivot:2*pivot], Y_val[3*pivot:4*pivot], Y_val[5*pivot:]))
                X_train, Y_train, X_val_for_test, Y_val_for_test = lib_regression.stratified_split(
                    X_val, Y_val, test_size=0.5)
                lr = LogisticRegressionCV(n_jobs=-1,
                                          max_iter=1000).fit(X_train, Y_train)
                y_pred = lr.predict_proba(X_train)[:, 1]
                #print('training mse: {:.4f}'.format(np.mean(y_pred - Y_train)))
                y_pred = lr.predict_proba(X_val_for_test)[:, 1]
                #print('test mse: {:.4f}'.format(np.mean(y_pred - Y_val_for_test)))
                results = lib_regression.detection_performance(
                    lr, X_val_for_test, Y_val_for_test, outf)
                if best_auroc < results['TMP']['AUROC']:
                    best_auroc = results['TMP']['AUROC']
                    best_index = score
                    best_result = lib_regression.detection_performance(
                        lr, X_test, Y_test, outf)
            list_best_results_out.append(best_result)
            list_best_results_index_out.append(best_index)
        list_best_results.append(list_best_results_out)
        list_best_results_index.append(list_best_results_index_out)

    print('evaluate the Mahalanobis estimator')
    score_list = ['Mahalanobis_0.0', 'Mahalanobis_0.01', 'Mahalanobis_0.005', \
                  'Mahalanobis_0.002', 'Mahalanobis_0.0014', 'Mahalanobis_0.001', 'Mahalanobis_0.0005']
    list_best_results_ours, list_best_results_index_ours = [], []
    for dataset in dataset_list:
        print('load train data: ', dataset)
        outf = './adv_output/' + args.net_type + '_' + dataset + '/'
        list_best_results_out, list_best_results_index_out = [], []
        for out in adv_test_list:
            best_auroc, best_result, best_index = 0, 0, 0
            for score in score_list:
                print('load train data: ', out, ' of ', score)
                total_X, total_Y = lib_regression.load_characteristics(
                    score, dataset, out, outf)
                #X_val, Y_val, X_test, Y_test = lib_regression.block_split_adv(total_X, total_Y)
                X_val, Y_val, X_test, Y_test = lib_regression.stratified_split(
                    total_X, total_Y)
                pivot = int(X_val.shape[0] / 6)
                #X_train = np.concatenate((X_val[:pivot], X_val[2*pivot:3*pivot], X_val[4*pivot:5*pivot]))
                #Y_train = np.concatenate((Y_val[:pivot], Y_val[2*pivot:3*pivot], Y_val[4*pivot:5*pivot]))
                #X_val_for_test = np.concatenate((X_val[pivot:2*pivot], X_val[3*pivot:4*pivot], X_val[5*pivot:]))
                #Y_val_for_test = np.concatenate((Y_val[pivot:2*pivot], Y_val[3*pivot:4*pivot], Y_val[5*pivot:]))
                X_train, Y_train, X_val_for_test, Y_val_for_test = lib_regression.stratified_split(
                    X_val, Y_val, test_size=0.5)
                lr = LogisticRegressionCV(n_jobs=-1,
                                          max_iter=1000).fit(X_train, Y_train)
                y_pred = lr.predict_proba(X_train)[:, 1]
                #print('training mse: {:.4f}'.format(np.mean(y_pred - Y_train)))
                y_pred = lr.predict_proba(X_val_for_test)[:, 1]
                #print('test mse: {:.4f}'.format(np.mean(y_pred - Y_val_for_test)))
                results = lib_regression.detection_performance(
                    lr, X_val_for_test, Y_val_for_test, outf)
                if best_auroc < results['TMP']['AUROC']:
                    best_auroc = results['TMP']['AUROC']
                    best_index = score
                    best_result = lib_regression.detection_performance(
                        lr, X_test, Y_test, outf)
            list_best_results_out.append(best_result)
            list_best_results_index_out.append(best_index)
        list_best_results_ours.append(list_best_results_out)
        list_best_results_index_ours.append(list_best_results_index_out)

    count_in = 0
    mtypes = ['TNR', 'AUROC', 'DTACC', 'AUIN', 'AUOUT']
    print("results of LID")
    for in_list in list_best_results:
        print('in_distribution: ' + dataset_list[count_in] + '==========')
        count_out = 0
        for results in in_list:
            print('out_distribution: ' + adv_test_list[count_out])
            for mtype in mtypes:
                print(' {mtype:6s}'.format(mtype=mtype), end='')
            print('\n{val:6.2f}'.format(val=100. * results['TMP']['TNR']),
                  end='')
            print(' {val:6.2f}'.format(val=100. * results['TMP']['AUROC']),
                  end='')
            print(' {val:6.2f}'.format(val=100. * results['TMP']['DTACC']),
                  end='')
            print(' {val:6.2f}'.format(val=100. * results['TMP']['AUIN']),
                  end='')
            print(' {val:6.2f}\n'.format(val=100. * results['TMP']['AUOUT']),
                  end='')
            print('Input noise: ' +
                  list_best_results_index[count_in][count_out])
            print('')
            count_out += 1
        count_in += 1

    count_in = 0
    print("results of Mahalanobis")
    for in_list in list_best_results_ours:
        print('in_distribution: ' + dataset_list[count_in] + '==========')
        count_out = 0
        for results in in_list:
            print('out_distribution: ' + adv_test_list[count_out])
            for mtype in mtypes:
                print(' {mtype:6s}'.format(mtype=mtype), end='')
            print('\n{val:6.2f}'.format(val=100. * results['TMP']['TNR']),
                  end='')
            print(' {val:6.2f}'.format(val=100. * results['TMP']['AUROC']),
                  end='')
            print(' {val:6.2f}'.format(val=100. * results['TMP']['DTACC']),
                  end='')
            print(' {val:6.2f}'.format(val=100. * results['TMP']['AUIN']),
                  end='')
            print(' {val:6.2f}\n'.format(val=100. * results['TMP']['AUOUT']),
                  end='')
            print('Input noise: ' +
                  list_best_results_index_ours[count_in][count_out])
            print('')
            count_out += 1
        count_in += 1
def train_detector(dataset_list, out_list):
    net_type = "model"
    outf = "/output/"

    score_list = [
        'Mahalanobis_0.0', 'Mahalanobis_0.01', 'Mahalanobis_0.005',
        'Mahalanobis_0.002', 'Mahalanobis_0.0014', 'Mahalanobis_0.001',
        'Mahalanobis_0.0005'
    ]

    for dataset in dataset_list:
        list_best_results_out, list_best_results_index_out = [], []
        for out in out_list:
            print('Out-of-distribution: ', out)
            best_tnr, best_result, best_index = 0, 0, 0
            for score in score_list:
                total_X, total_Y = lib_regression.load_characteristics(
                    score, dataset, out, outf)
                X_val, Y_val, X_test, Y_test = lib_regression.stratified_split(
                    total_X, total_Y)

                X_train, Y_train, X_val_for_test, Y_val_for_test = lib_regression.stratified_split(
                    X_val, Y_val, test_size=0.5)
                lr = LogisticRegressionCV(n_jobs=-1,
                                          max_iter=1000).fit(X_train, Y_train)
                y_pred = lr.predict_proba(X_train)[:, 1]

                y_pred = lr.predict_proba(X_val_for_test)[:, 1]

                results = lib_regression.detection_performance(
                    lr, X_val_for_test, Y_val_for_test, outf)
                if best_tnr < results['TMP']['TNR']:
                    best_tnr = results['TMP']['TNR']
                    best_index = score
                    best_result = lib_regression.detection_performance(
                        lr, X_test, Y_test, outf)
            list_best_results_out.append(best_result)
            list_best_results_index_out.append(best_index)
        list_best_results.append(list_best_results_out)
        list_best_results_index.append(list_best_results_index_out)

    # print the results
    count_in = 0
    mtypes = ['TNR', 'AUROC', 'DTACC', 'AUIN', 'AUOUT']

    for in_list in list_best_results:
        print('in_distribution: ' + dataset_list[count_in] + '==========')

        count_out = 0
        for results in in_list:
            print('out_distribution: ' + out_list[count_out])
            for mtype in mtypes:
                print(' {mtype:6s}'.format(mtype=mtype), end='')
            print('\n{val:6.2f}'.format(val=100. * results['TMP']['TNR']),
                  end='')
            print(' {val:6.2f}'.format(val=100. * results['TMP']['AUROC']),
                  end='')
            print(' {val:6.2f}'.format(val=100. * results['TMP']['DTACC']),
                  end='')
            print(' {val:6.2f}'.format(val=100. * results['TMP']['AUIN']),
                  end='')
            print(' {val:6.2f}\n'.format(val=100. * results['TMP']['AUOUT']),
                  end='')
            print('Input noise: ' +
                  list_best_results_index[count_in][count_out])
            print('')
            count_out += 1
        count_in += 1


# if __name__ == '__main__':
#     main()
Ejemplo n.º 5
0
def main():
    # initial setup
    dataset_list = ["cifar10", "cifar100", "svhn"]
    score_list = [
        "Mahalanobis_0.0",
        "Mahalanobis_0.01",
        "Mahalanobis_0.005",
        "Mahalanobis_0.002",
        "Mahalanobis_0.0014",
        "Mahalanobis_0.001",
        "Mahalanobis_0.0005",
    ]

    # train and measure the performance of Mahalanobis detector
    list_best_results, list_best_results_index = [], []
    for dataset in dataset_list:
        print("In-distribution: ", dataset)
        outf = "./output/" + args.net_type + "_" + dataset + "/"
        out_list = ["svhn", "imagenet_resize", "lsun_resize"]
        if dataset == "svhn":
            out_list = ["cifar10", "imagenet_resize", "lsun_resize"]

        list_best_results_out, list_best_results_index_out = [], []
        for out in out_list:
            print("Out-of-distribution: ", out)
            best_tnr, best_result, best_index = 0, 0, 0
            for score in score_list:
                total_X, total_Y = lib_regression.load_characteristics(
                    score, dataset, out, outf)
                X_val, Y_val, X_test, Y_test = lib_regression.block_split(
                    total_X, total_Y, out)
                X_train = np.concatenate((X_val[:500], X_val[1000:1500]))
                Y_train = np.concatenate((Y_val[:500], Y_val[1000:1500]))
                X_val_for_test = np.concatenate(
                    (X_val[500:1000], X_val[1500:]))
                Y_val_for_test = np.concatenate(
                    (Y_val[500:1000], Y_val[1500:]))
                lr = LogisticRegressionCV(n_jobs=-1).fit(X_train, Y_train)
                y_pred = lr.predict_proba(X_train)[:, 1]
                # print('training mse: {:.4f}'.format(np.mean(y_pred - Y_train)))
                y_pred = lr.predict_proba(X_val_for_test)[:, 1]
                # print('test mse: {:.4f}'.format(np.mean(y_pred - Y_val_for_test)))
                results = lib_regression.detection_performance(
                    lr, X_val_for_test, Y_val_for_test, outf)
                if best_tnr < results["TMP"]["TNR"]:
                    best_tnr = results["TMP"]["TNR"]
                    best_index = score
                    best_result = lib_regression.detection_performance(
                        lr, X_test, Y_test, outf)
            list_best_results_out.append(best_result)
            list_best_results_index_out.append(best_index)
        list_best_results.append(list_best_results_out)
        list_best_results_index.append(list_best_results_index_out)

    # print the results
    count_in = 0
    mtypes = ["TNR", "AUROC", "DTACC", "AUIN", "AUOUT"]

    for in_list in list_best_results:
        print("in_distribution: " + dataset_list[count_in] + "==========")
        out_list = ["svhn", "imagenet_resize", "lsun_resize"]
        if dataset_list[count_in] == "svhn":
            out_list = ["cifar10", "imagenet_resize", "lsun_resize"]
        count_out = 0
        for results in in_list:
            print("out_distribution: " + out_list[count_out])
            for mtype in mtypes:
                print(" {mtype:6s}".format(mtype=mtype), end="")
            print("\n{val:6.2f}".format(val=100.0 * results["TMP"]["TNR"]),
                  end="")
            print(" {val:6.2f}".format(val=100.0 * results["TMP"]["AUROC"]),
                  end="")
            print(" {val:6.2f}".format(val=100.0 * results["TMP"]["DTACC"]),
                  end="")
            print(" {val:6.2f}".format(val=100.0 * results["TMP"]["AUIN"]),
                  end="")
            print(" {val:6.2f}\n".format(val=100.0 * results["TMP"]["AUOUT"]),
                  end="")
            print("Input noise: " +
                  list_best_results_index[count_in][count_out])
            print("")
            count_out += 1
        count_in += 1
Ejemplo n.º 6
0
def main():
    # initial setup
    dataset_list = ['cifar10', 'cifar100', 'svhn']

    if args.net_type == 'densenet121':
        dataset_list = ['ham10000']
    elif args.net_type == "parkinsonsNet-rest":
        dataset_list = ['mpower-rest']
    elif args.net_type == "parkinsonsNet-return":
        dataset_list = ['mpower-return']
    elif args.net_type == "parkinsonsNet-outbound":
        dataset_list = ['mpower-outbound']
    score_list = [
        'Mahalanobis_0.0', 'Mahalanobis_0.01', 'Mahalanobis_0.005',
        'Mahalanobis_0.002', 'Mahalanobis_0.0014', 'Mahalanobis_0.001',
        'Mahalanobis_0.0005'
    ]

    # train and measure the performance of Mahalanobis detector
    list_best_results, list_best_results_index = [], []
    for dataset in dataset_list:
        print('In-distribution: ', dataset)
        outf = '/home/anasa2/deep_Mahalanobis_detector/output/' + args.net_type + '_' + dataset + '/'
        out_list = ['svhn', 'imagenet_resize', 'lsun_resize']
        if dataset == 'svhn':
            out_list = ['cifar10', 'imagenet_resize', 'lsun_resize']
        elif dataset == 'ham10000':
            #out_list = ['cifar10', 'cifar100', 'svhn', 'imagenet_resize', 'lsun_resize', 'face', 'face_age', 'isic-2017', 'isic-2016'] #face #face_age
            out_list = [
                'ham10000-avg-smoothing', 'ham10000-brightness',
                'ham10000-contrast', 'ham10000-dilation', 'ham10000-erosion',
                'ham10000-med-smoothing', 'ham10000-rotation', 'ham10000-shift'
            ]  #face #face_age
        elif dataset == 'mpower-rest':
            out_list = [
                'mHealth', 'MotionSense', 'oodParkinsonsData', 'mpower-rst'
            ]
        elif dataset == 'mpower-return':
            out_list = [
                'mHealth', 'MotionSense', 'oodParkinsonsData', 'mpower-ret'
            ]
        elif dataset == 'mpower-outbound':
            out_list = [
                'mHealth', 'MotionSense', 'oodParkinsonsData', 'mpower-out'
            ]

        list_best_results_out, list_best_results_index_out = [], []
        for out in out_list:
            print('Out-of-distribution: ', out)
            best_tnr, best_result, best_index = 0, 0, 0
            for score in score_list:
                total_X, total_Y = lib_regression.load_characteristics(
                    score, dataset, out, outf)
                #X_val, Y_val, X_test, Y_test = lib_regression.block_split(total_X, total_Y, out)
                #X_val, Y_val, X_test, Y_test = lib_regression.block_split_adv(total_X, total_Y)
                X_val, Y_val, X_test, Y_test = lib_regression.stratified_split(
                    total_X, total_Y)
                #X_train = np.concatenate((X_val[:500], X_val[1000:1500]))
                #Y_train = np.concatenate((Y_val[:500], Y_val[1000:1500]))
                #X_val_for_test = np.concatenate((X_val[500:1000], X_val[1500:]))
                #Y_val_for_test = np.concatenate((Y_val[500:1000], Y_val[1500:]))
                #partition = int(len(X_val)/2)
                #X_train = np.concatenate((X_val[:partition], X_val[:partition]))
                #Y_train = np.concatenate((Y_val[:partition], Y_val[:partition]))
                #X_val_for_test = np.concatenate((X_val[partition:], X_val[partition:]))
                #Y_val_for_test = np.concatenate((Y_val[partition:], Y_val[partition:]))
                X_train, Y_train, X_val_for_test, Y_val_for_test = lib_regression.stratified_split(
                    X_val, Y_val, test_size=0.5)
                lr = LogisticRegressionCV(n_jobs=-1,
                                          max_iter=1000).fit(X_train, Y_train)
                y_pred = lr.predict_proba(X_train)[:, 1]
                #print('training mse: {:.4f}'.format(np.mean(y_pred - Y_train)))
                y_pred = lr.predict_proba(X_val_for_test)[:, 1]
                #print('test mse: {:.4f}'.format(np.mean(y_pred - Y_val_for_test)))
                results = lib_regression.detection_performance(
                    lr, X_val_for_test, Y_val_for_test, outf)
                if best_tnr < results['TMP']['TNR']:
                    best_tnr = results['TMP']['TNR']
                    best_index = score
                    best_result = lib_regression.detection_performance(
                        lr, X_test, Y_test, outf)
            list_best_results_out.append(best_result)
            list_best_results_index_out.append(best_index)
        list_best_results.append(list_best_results_out)
        list_best_results_index.append(list_best_results_index_out)

    # print the results
    count_in = 0
    mtypes = ['TNR', 'AUROC', 'DTACC', 'AUIN', 'AUOUT']

    for in_list in list_best_results:
        print('in_distribution: ' + dataset_list[count_in] + '==========')
        out_list = ['svhn', 'imagenet_resize', 'lsun_resize']
        if dataset_list[count_in] == 'svhn':
            out_list = ['cifar10', 'imagenet_resize', 'lsun_resize']
        elif dataset == 'ham10000':
            #out_list = ['cifar10', 'cifar100', 'svhn', 'imagenet_resize', 'lsun_resize', 'face', 'face_age', 'isic-2017', 'isic-2016']
            out_list = [
                'ham10000-avg-smoothing', 'ham10000-brightness',
                'ham10000-contrast', 'ham10000-dilation', 'ham10000-erosion',
                'ham10000-med-smoothing', 'ham10000-rotation', 'ham10000-shift'
            ]
        elif dataset == 'mpower-rest':
            out_list = [
                'mHealth', 'MotionSense', 'oodParkinsonsData', 'mpower-rst'
            ]
        elif dataset == 'mpower-outbound':
            out_list = [
                'mHealth', 'MotionSense', 'oodParkinsonsData', 'mpower-out'
            ]
        elif dataset == 'mpower-return':
            out_list = [
                'mHealth', 'MotionSense', 'oodParkinsonsData', 'mpower-ret'
            ]

        count_out = 0
        for results in in_list:
            print('out_distribution: ' + out_list[count_out])
            for mtype in mtypes:
                print(' {mtype:6s}'.format(mtype=mtype), end='')
            print('\n{val:6.2f}'.format(val=100. * results['TMP']['TNR']),
                  end='')
            print(' {val:6.2f}'.format(val=100. * results['TMP']['AUROC']),
                  end='')
            print(' {val:6.2f}'.format(val=100. * results['TMP']['DTACC']),
                  end='')
            print(' {val:6.2f}'.format(val=100. * results['TMP']['AUIN']),
                  end='')
            print(' {val:6.2f}\n'.format(val=100. * results['TMP']['AUOUT']),
                  end='')
            print('Input noise: ' +
                  list_best_results_index[count_in][count_out])
            print('')
            count_out += 1
        count_in += 1