コード例 #1
0
def data_change(file_path):
    file_name = file_path + '_conv_old.txt'

    data = fo.FileReader(file_name)

    new_data = []
    for data_i in data:

        data_i = data_i.split(' ')

        new_data_i = []
        for each_num in data_i:
            each_num = float(each_num)
            each_num = each_num + random.normalvariate(0, 0.1)
            new_data_i.append(each_num)

        new_data.append(new_data_i)

    change_index = len(new_data) - 1

    for i in range(len(new_data[change_index]) - 1):
        if new_data[change_index][i] < new_data[change_index][i + 1]:
            new_data[change_index][i + 1] = new_data[change_index][i]

    print('print data: ', new_data[change_index])

    buff = []
    for buff_data in new_data:
        buff.append(list2string(buff_data))

    new_file_name = file_path + '_conv_change.txt'

    fo.FileWriter(new_file_name, buff, style='w')

    return
コード例 #2
0
def run_exp_racos_for_synthetic_problem_analysis():

    # parameters
    sample_size = 10  # the instance number of sampling in an iteration
    budget = 500  # budget in online style
    positive_num = 2  # the set size of PosPop
    rand_probability = 0.99  # the probability of sample in model
    uncertain_bit = 1  # the dimension size that is sampled randomly
    adv_threshold = 10  # advance sample size

    opt_repeat = 10

    dimension_size = 10
    problem_name = 'sphere'
    problem_num = 200
    start_index = 0
    bias_region = 0.2

    dimension = Dimension()
    dimension.set_dimension_size(dimension_size)
    dimension.set_regions([[-1.0, 1.0] for _ in range(dimension_size)],
                          [0 for _ in range(dimension_size)])

    log_buffer = []

    # logging
    learner_path = './ExpLearner/SyntheticProbsLearner/' + problem_name + '/dimension' + str(dimension_size)\
                   + '/DirectionalModel/' + 'learner-' + problem_name + '-' + 'dim' + str(dimension_size) + '-'\
                   + 'bias' + str(bias_region) + '-'
    problem_path = './ExpLog/SyntheticProbsLog/' + problem_name + '/dimension' + str(dimension_size)\
                   + '/DirectionalModel/' + 'bias-' + problem_name + '-' + 'dim' + str(dimension_size) + '-'\
                   + 'bias' + str(bias_region) + '-'

    func = DistributedFunction(dimension, bias_region=[-0.5, 0.5])
    target_bias = [0.1 for _ in range(dimension_size)]
    func.setBias(target_bias)

    if problem_name == 'ackley':
        prob_fct = func.DisAckley
    else:
        prob_fct = func.DisSphere

    relate_error_list = []

    for prob_i in range(problem_num):

        print(
            start_index + prob_i,
            '++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++')
        log_buffer.append(
            str(start_index + prob_i) +
            '++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++')

        log_buffer.append('+++++++++++++++++++++++++++++++')
        log_buffer.append('optimization parameters')
        log_buffer.append('sample size: ' + str(sample_size))
        log_buffer.append('budget: ' + str(budget))
        log_buffer.append('positive num: ' + str(positive_num))
        log_buffer.append('random probability: ' + str(rand_probability))
        log_buffer.append('uncertain bits: ' + str(uncertain_bit))
        log_buffer.append('advance num: ' + str(adv_threshold))
        log_buffer.append('+++++++++++++++++++++++++++++++')
        log_buffer.append('problem parameters')
        log_buffer.append('dimension size: ' + str(dimension_size))
        log_buffer.append('problem name: ' + problem_name)
        log_buffer.append('bias_region: ' + str(bias_region))
        log_buffer.append('+++++++++++++++++++++++++++++++')

        problem_file = problem_path + str(start_index + prob_i) + '.txt'
        problem_str = fo.FileReader(problem_file)[0].split(',')
        problem_index = int(problem_str[0])
        problem_bias = string2list(problem_str[1])
        if problem_index != (start_index + prob_i):
            print('problem error!')
            exit(0)
        print('source bias: ', problem_bias)
        log_buffer.append('source bias: ' + list2string(problem_bias))

        reduisal = np.array(target_bias) - np.array(problem_bias)
        this_distance = reduisal * reduisal.T

        learner_file = learner_path + str(start_index + prob_i) + '.pkl'
        log_buffer.append('learner file: ' + learner_file)
        print('learner file: ', learner_file)

        net = torch.load(learner_file)

        net_list = [net]

        opt_error_list = []

        for i in range(opt_repeat):

            print('optimize ', i,
                  '===================================================')
            log_buffer.append(
                'optimize ' + str(i) +
                '===================================================')

            exp_racos = ExpRacosOptimization(dimension, net_list)

            start_t = time.time()
            exp_racos.exp_mix_opt(obj_fct=prob_fct,
                                  ss=sample_size,
                                  bud=budget,
                                  pn=positive_num,
                                  rp=rand_probability,
                                  ub=uncertain_bit,
                                  at=adv_threshold)
            end_t = time.time()

            print('total budget is ', budget)
            log_buffer.append('total budget is ' + str(budget))

            hour, minute, second = time_formulate(start_t, end_t)
            print('spending time: ', hour, ':', minute, ':', second)
            log_buffer.append('spending time: ' + str(hour) + '+' +
                              str(minute) + '+' + str(second))

            optimal = exp_racos.get_optimal()
            opt_error = optimal.get_fitness()
            optimal_x = optimal.get_features()

            opt_error_list.append(opt_error)
            print('validation optimal value: ', opt_error)
            log_buffer.append('validation optimal value: ' + str(opt_error))
            print('optimal x: ', optimal_x)
            log_buffer.append('optimal nn structure: ' +
                              list2string(optimal_x))

        opt_mean = np.mean(np.array(opt_error_list))
        relate_error_list.append([this_distance, opt_mean])
        opt_std = np.std(np.array(opt_error_list))
        print('--------------------------------------------------')
        print('optimization result: ', opt_mean, '#', opt_std)
        log_buffer.append('--------------------------------------------------')
        log_buffer.append('optimization result: ' + str(opt_mean) + '#' +
                          str(opt_std))

    result_path = './Results/SyntheticProbs/' + problem_name + '/dimension' + str(
        dimension_size) + '/'
    relate_error_file = result_path + 'relate-error-' + problem_name + '-dim' + str(dimension_size) + '-bias'\
                            + str(bias_region) + '.txt'
    temp_buffer = []
    for i in range(len(relate_error_list)):
        relate, error = relate_error_list[i]
        temp_buffer.append(str(relate) + ',' + str(error))
    print('relate error logging: ', relate_error_file)
    log_buffer.append('relate error logging: ' + relate_error_file)
    fo.FileWriter(relate_error_file, temp_buffer, style='w')

    optimization_log_file = result_path + 'opt-log-' + problem_name + '-dim' + str(dimension_size) + '-bias'\
                            + str(bias_region) + '.txt'
    print('optimization logging: ', optimization_log_file)
    fo.FileWriter(optimization_log_file, log_buffer, style='w')
コード例 #3
0
def chosen_single_classifier(dataset_list):

    data_path = 'data_set/'
    logging_path = 'results/baseline/'

    repeat = 5

    for dataset_name in dataset_list:

        log_buffer = []
        print '========================================================'
        log_buffer.append(
            '========================================================')
        print 'dataset: ', dataset_name
        log_buffer.append('dataset: ' + dataset_name)

        train_file = data_path + dataset_name + '/' + dataset_name + '_train_data.pkl'
        test_file = data_path + dataset_name + '/' + dataset_name + '_test_data.pkl'

        train_feature, train_label, test_feature, test_label = dataset_reader(
            train_file, test_file)

        print '     feature size:', train_feature.shape[
            0], ', feature dimension:', train_feature.shape[1]
        log_buffer.append('     feature size:' + str(train_feature.shape[0]) +
                          ', feature dimension:' + str(train_feature.shape[1]))
        print '     feature size:', test_feature.shape[
            0], ', feature dimension:', test_feature.shape[1]
        log_buffer.append('     feature size:' + str(test_feature.shape[0]) +
                          ', feature dimension:' + str(test_feature.shape[1]))

        dtc = DecisionTreeClassifier()
        mlpc = MLPClassifier()
        lr = LogisticRegression()
        svc = classes.SVC()
        gpc = GaussianProcessClassifier()
        pac = PassiveAggressiveClassifier()
        gnb = GaussianNB()
        sgdc = SGDClassifier()
        rfc = RandomForestClassifier()
        knn = KNeighborsClassifier()

        classifiers = [dtc, mlpc, lr, svc, gpc, pac, gnb, sgdc, rfc, knn]

        classifier_names = [
            'Decision Tree Classifier', 'MLPClassifier', 'LogisticRegression',
            'SVC', 'Gaussian Process Classifier',
            'Passive Aggressive Classifier', 'GaussianNB', 'SGDClassifier:',
            'Random Forest Classifier', 'K-Neighbors Classifier'
        ]

        classifier_vali = []
        for c_i in xrange(len(classifiers)):

            classifier = classifiers[c_i]
            classifier_name = classifier_names[c_i]

            print '--------------------------------------------------------'
            log_buffer.append(
                '--------------------------------------------------------')
            print classifier_name, ':'
            log_buffer.append(classifier_name + ':')
            start_t = time.time()
            vali_error = validation_error(classifier,
                                          train_feature,
                                          train_label,
                                          k=5)
            end_t = time.time()
            hour, minute, second = time_formulate(start_t, end_t)
            print '     training time: ', hour, ' hours, ', minute, ' minutes, ', second, ' seconds'
            log_buffer.append('     training time: ' + str(hour) + ' hours, ' +
                              str(minute) + ' minutes, ' + str(second) +
                              ' seconds')
            classifier_vali.append(vali_error)
            print 'validation error:', vali_error

        min_index = classifier_vali.index(min(classifier_vali))
        best_c = classifiers[min_index]
        best_c_name = classifier_names[min_index]

        print 'test best============================================'
        print 'best c: ', best_c_name
        log_buffer.append('===================================')
        log_buffer.append('best classifier: ' + best_c_name)

        test_errors = []
        for r_i in xrange(repeat):
            print 'test repeat ', r_i, '-----------------------------'
            start_t = time.time()
            best_c = best_c.fit(train_feature, train_label)
            end_t = time.time()
            hour, minute, second = time_formulate(start_t, end_t)
            print '     training time: ', hour, ' hours, ', minute, ' minutes, ', second, ' seconds'
            log_buffer.append('     training time: ' + str(hour) + ' hours, ' +
                              str(minute) + ' minutes, ' + str(second) +
                              ' seconds')
            predictions = best_c.predict(test_feature)
            accuracy = accuracy_score(test_label, predictions)
            print 'error: ', 1 - accuracy
            test_errors.append(1 - accuracy)

        log_buffer.append('errors: ' + list2string(test_errors))
        mean_error = np.mean(np.array(test_errors))
        print 'mean error: ', mean_error
        log_buffer.append('mean_error: ' + str(mean_error))

        logging_file = logging_path + dataset_name + '_chosen_single.txt'
        print dataset_name, ' logging...'
        fo.FileWriter(logging_file, log_buffer, 'w')
コード例 #4
0
def dataset_processing():
    path = 'data_set/'
    # feature type False means categorical and True means number
    # cylinder
    if False:
        dataset_name = 'cylinder'
        ori_file = 'bands.data.txt'
        feature_types = [True]
        for i in range(19):
            feature_types.append(False)
        for i in range(19):
            feature_types.append(True)
        feature_types.append(False)
        label_index = 39

    # abalone
    if False:
        dataset_name = 'abalone'
        ori_file = 'abalone.data.txt'
        feature_types = [False]
        for i in range(7):
            feature_types.append(True)
        feature_types.append(False)
        label_index = 8

    # anneal
    if False:
        dataset_name = 'annealing'
        ori_train_file = 'anneal.data.txt'
        ori_test_file = 'anneal.test.txt'
        feature_types = [False for i in range(39)]
        number_index = [3, 4, 7, 12, 32, 33, 34, 37]
        for index in number_index:
            feature_types[index] = True
        useless_index = [
            10, 12, 13, 15, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
            35, 37
        ]
        label_index = 38

    # balanceScale
    if False:
        dataset_name = 'balanceScale'
        ori_file = 'balance-scale.data.txt'
        feature_types = [False, True, True, True, True]
        label_index = 0

    # banknote
    if False:
        dataset_name = 'banknote'
        ori_file = 'data_banknote_authentication.txt'
        feature_types = [True, True, True, True, False]
        label_index = 4

    # car
    if False:
        dataset_name = 'car'
        ori_file = 'car.data.txt'
        feature_types = [False, False, False, False, False, False, False]
        label_index = 6

    # chess
    if False:
        dataset_name = 'chess'
        ori_file = 'chess.data.txt'
        feature_types = [False for i in range(37)]
        label_index = 36

    # chess2
    if False:
        dataset_name = 'chess2'
        ori_file = 'krkopt.data.txt'
        feature_types = [False for i in range(7)]
        label_index = 6

    # cmc
    if False:
        dataset_name = 'cmc'
        ori_file = 'cmc.data.txt'
        feature_types = [
            True, False, False, True, False, False, False, False, False, False
        ]
        label_index = 9

    # CNAE9
    if False:
        dataset_name = 'CNAE9'
        ori_file = 'CNAE-9.data.txt'
        feature_types = [False]
        for i in range(856):
            feature_types.append(True)
        label_index = 0

    # credit
    if False:
        dataset_name = 'credit'
        ori_file = 'crx.data.txt'
        feature_types = [
            False, True, True, False, False, False, False, True, False, False,
            True, False, False, True, True, False
        ]
        label_index = 15

    # egg
    if False:
        dataset_name = 'eeg'
        ori_file = 'EEG_Eye_State.arff.txt'
        feature_types = [True for i in range(14)]
        feature_types.append(False)
        label_index = 14

    # german credit
    if False:
        dataset_name = 'german credit'
        ori_file = 'german.data.txt'
        feature_types = [False for i in range(21)]
        feature_types[1] = True
        feature_types[4] = True
        feature_types[7] = True
        feature_types[10] = True
        feature_types[12] = True
        feature_types[15] = True
        feature_types[17] = True
        label_index = 20

    # gisette use No.3 sub-processing
    if False:
        dataset_name = 'gisette'
        ori_train_data = 'gisette_train.data.txt'
        ori_train_label = 'gisette_train.labels.txt'
        ori_test_data = 'gisette_valid.data'
        ori_test_label = 'gisette_valid.labels'
        feature_types = [True for i in range(5000)]

    # jsbach, No.1
    if False:
        dataset_name = 'jsbach'
        ori_file = 'jsbach_chorals_harmony.data'
        feature_types = [False for i in range(17)]
        feature_types[15] = True
        label_index = 16

    # imageSegmentation_car, No.2
    if False:
        dataset_name = 'imageSegmentation_car'
        ori_train_file = 'segmentation.data.txt'
        ori_test_file = 'segmentation.test.txt'
        feature_types = [True for i in range(20)]
        feature_types[0] = False
        useless_index = []
        label_index = 0

    # iris, No.1
    if False:
        dataset_name = 'iris'
        ori_file = 'iris.data'
        feature_types = [True, True, True, True, False]
        label_index = 4

    # letterRecognition, No.1
    if False:
        dataset_name = 'letterRecognition'
        ori_file = 'letter-recognition.data'
        feature_types = [True for i in range(17)]
        feature_types[0] = False
        label_index = 0

    # madelon, No.3
    if False:
        dataset_name = 'madelon'
        ori_train_data = 'madelon_train.data.txt'
        ori_train_label = 'madelon_train.labels.txt'
        ori_test_data = 'madelon_valid.data.txt'
        ori_test_label = 'madelon_valid.labels.txt'
        feature_types = [True for i in range(500)]

    # magic04, No.1
    if False:
        dataset_name = 'magic04'
        ori_file = 'magic04.data.txt'
        feature_types = [True for i in range(11)]
        feature_types[10] = False
        label_index = 10

    # Diabetic Retinopathy Debrecen Data Set Data Set, No.1
    if False:
        dataset_name = 'messidor'
        ori_file = 'messidor_features.arff'
        feature_types = [True for i in range(20)]
        feature_types[19] = False
        label_index = 19

    # mushroom, No.1
    if False:
        dataset_name = 'mushroom'
        ori_file = 'agaricus-lepiota.data.txt'
        feature_types = [False for i in range(23)]
        label_index = 0

    # nursery, No.1
    if False:
        dataset_name = 'nursery'
        ori_file = 'nursery.data.txt'
        feature_types = [False for i in range(9)]
        label_index = 8

    # occupancy, No.4, delete 0-th and 1-st features
    if False:
        dataset_name = 'occupancy'
        ori_file = 'datatraining.txt'
        feature_types = [False, False, True, True, True, True, True, False]
        useless_index = [0, 1]
        label_index = 7

    # seismic, No.1
    if False:
        dataset_name = 'seismic'
        ori_file = 'seismic-bumps.arff.txt'
        feature_types = [True for i in range(19)]
        feature_types[0] = False
        feature_types[1] = False
        feature_types[2] = False
        feature_types[7] = False
        feature_types[18] = False
        label_index = 18

    # spambase, No.1
    if False:
        dataset_name = 'spambase'
        ori_file = 'spambase.data.txt'
        feature_types = [True for i in range(58)]
        feature_types[57] = False
        label_index = 57

    # statlogSegment, No.1
    if False:
        dataset_name = 'statlogSegment'
        ori_file = 'segment.data.txt'
        feature_types = [True for i in range(20)]
        feature_types[19] = False
        label_index = 19

    # wilt, No.5
    if False:
        dataset_name = 'wilt'
        ori_train_file = 'training.csv'
        ori_test_file = 'testing.csv'
        feature_types = [False, True, True, True, True, True]
        label_index = 0

    # wine_quality_red, No.1
    if False:
        dataset_name = 'wine_quality_red'
        ori_file = 'winequality-red.csv'
        feature_types = [True for i in range(12)]
        feature_types[11] = False
        label_index = 11

    # wine_quality_white, No.1
    if False:
        dataset_name = 'wine_quality_white'
        ori_file = 'winequality-white.csv'
        feature_types = [True for i in range(12)]
        feature_types[11] = False
        label_index = 11

    # yeast, no.4
    if False:
        dataset_name = 'yeast'
        ori_file = 'yeast.data.txt'
        feature_types = [
            False, True, True, True, True, True, True, True, True, False
        ]
        useless_index = [0]
        label_index = 9

    # adult, No.6
    if False:
        dataset_name = 'adult'
        ori_train_file = 'adult.data'
        ori_test_file = 'adult.test'
        feature_types = [
            True, False, True, False, True, False, False, False, False, False,
            True, True, True, False, False
        ]
        label_index = 14

    # arcene, No.3
    if False:
        dataset_name = 'arcene'
        ori_train_data = 'arcene_train.data'
        ori_train_label = 'arcene_train.labels'
        ori_test_data = 'arcene_valid.data'
        ori_test_label = 'arcene_valid.labels'
        feature_types = [True for i in range(10000)]

    # breast_cancer_wisconsin, No.4
    if False:
        dataset_name = 'breast_cancer_wisconsin'
        ori_file = 'breast-cancer-wisconsin.data'
        feature_types = [False for i in range(11)]
        useless_index = [0]
        label_index = 10

    # covtype, No.1
    if False:
        dataset_name = 'covtype'
        ori_file = 'covtype.data'
        feature_types = [True for i in range(10)]
        for i in range(45):
            feature_types.append(False)
        label_index = 54

    # drug_consumption, No.1
    if False:
        dataset_name = 'drug_consumption'
        ori_file = 'drug_consumption.data'
        feature_types = [False]
        for i in range(12):
            feature_types.append(True)
        for i in range(19):
            feature_types.append(False)
        useless_index = [0]
        label_index = 31

    # ecoli, No.1
    if False:
        dataset_name = 'ecoli'
        ori_file = 'ecoli.data'
        feature_types = [
            False, True, True, True, True, True, True, True, False
        ]
        label_index = 8

    # flags, No.4, choose religion as prediction target
    if False:
        dataset_name = 'flag'
        ori_file = 'flag.data'
        feature_types = [False for i in range(30)]
        feature_types[3] = True
        feature_types[4] = True
        for i in range(3):
            feature_types[7 + i] = True
        for i in range(5):
            feature_types[18 + i] = True
        useless_index = [0]
        label_index = 6

    # glass, No.4
    if False:
        dataset_name = 'glass'
        ori_file = 'glass.data'
        feature_types = [False]
        for i in range(9):
            feature_types.append(True)
        feature_types.append(False)
        useless_index = [0]
        label_index = 10

    # horse_colic, No.2
    if False:
        dataset_name = 'horse_colic'
        ori_train_file = 'horse-colic.data'
        ori_test_file = 'horse-colic.test'
        feature_types = [False for i in range(28)]
        feature_types[3] = True
        feature_types[4] = True
        feature_types[5] = True
        feature_types[15] = True
        feature_types[18] = True
        feature_types[19] = True
        feature_types[21] = True
        useless_index = [2]
        label_index = 23

    # HTRU2, No.1, line is split by '\r'
    if False:
        dataset_name = 'HTRU2'
        ori_file = 'HTRU_2.arff'
        feature_types = [True for i in range(8)]
        feature_types.append(False)
        label_index = 8

    # wdbc, No.4
    if False:
        dataset_name = 'wdbc'
        ori_file = 'wdbc.data'
        feature_types = [True for i in range(32)]
        feature_types[0] = False
        feature_types[1] = False
        useless_index = [0]
        label_index = 1

    # wpbc, No.4
    if False:
        dataset_name = 'wpbc'
        ori_file = 'wpbc.data'
        feature_types = [True for i in range(35)]
        feature_types[0] = False
        feature_types[1] = False
        useless_index = [0]
        label_index = 1

    if True:
        dataset_name = 'house_vote'
        ori_file = 'house-votes-84.data.txt'
        feature_types = [False for i in range(17)]
        useless_index = []
        label_index = 0

    # processing--------------------------------------------------------------
    # No.1
    if True:
        # just one data file, we should split training and testing data
        # cylinder, abalone, balanceScale, banknote, car, chess, chess2
        file_name = path + dataset_name + '/' + ori_file
        # reading data from file
        data = data_reader(file_name, feature_types)
        # missing value processing
        data = missing_value_processing(data, feature_types)
        # extract label
        data, label = extracting_label(data, label_index)
        del feature_types[label_index]
        # categorical feature encoding
        data = categorical_feature_encoding(data, feature_types)
        label, label_name = label_encoding(label)
        # split data into training and testing
        train_data, train_label, test_data, test_label = split_data(
            data, label, percent=0.2)

    # No.2
    if False:
        # training and testing data are in different files
        # anneal
        train_file_name = path + dataset_name + '/' + ori_train_file
        test_file_name = path + dataset_name + '/' + ori_test_file
        # get training and testing data
        train_data = data_reader(train_file_name, feature_types)
        test_data = data_reader(test_file_name, feature_types)
        # extract label from data
        train_data, train_label = extracting_label(train_data, label_index)
        test_data, test_label = extracting_label(test_data, label_index)
        del feature_types[label_index]
        # delete useless feature from data
        train_data = feature_selected(train_data, useless_index)
        test_data = feature_selected(test_data, useless_index)
        new_feature_types = []
        for i in range(len(feature_types)):
            if not (i in useless_index):
                new_feature_types.append(feature_types[i])
        feature_types = new_feature_types
        # processing training and testing data at the same time
        train_data_len = len(train_data)
        test_data_len = len(test_data)
        data = []
        data.extend(train_data)
        data.extend(test_data)
        label = []
        label.extend(train_label)
        label.extend(test_label)
        # missing feature processing
        data = missing_value_processing(data, feature_types)
        # encoding categorical feature
        data = categorical_feature_encoding(data, feature_types)
        # encoding label
        label, label_name = label_encoding(label)
        # regain training and testing data from data
        train_data = []
        train_label = []
        for i in range(train_data_len):
            train_data.append(data[0])
            train_label.append(label[0])
            del data[0]
            del label[0]
        test_data = data
        test_label = label

    # No.3
    if False:
        # training and testing data are in different files
        # gisette, madelon
        train_data_file_name = path + dataset_name + '/' + ori_train_data
        train_label_file_name = path + dataset_name + '/' + ori_train_label
        test_data_file_name = path + dataset_name + '/' + ori_test_data
        test_label_file_name = path + dataset_name + '/' + ori_test_label
        # get training and testing data
        train_data = data_reader(train_data_file_name, feature_types)
        train_label = data_reader(train_label_file_name, [False])
        train_label = np.array(train_label).reshape(len(train_label)).tolist()
        test_data = data_reader(test_data_file_name, feature_types)
        test_label = data_reader(test_label_file_name, [False])
        test_label = np.array(test_label).reshape(len(test_label)).tolist()
        # processing training and testing data at the same time
        train_data_len = len(train_data)
        print('training data length:', train_data_len)
        test_data_len = len(test_data)
        print('testing data length:', test_data_len)
        data = []
        data.extend(train_data)
        data.extend(test_data)
        label = []
        label.extend(train_label)
        label.extend(test_label)
        # missing feature processing
        data = missing_value_processing(data, feature_types)
        # encoding categorical feature
        data = categorical_feature_encoding(data, feature_types)
        # encoding label
        label, label_name = label_encoding(label)
        # regain training and testing data from data
        train_data = []
        train_label = []
        for i in range(train_data_len):
            train_data.append(data[0])
            train_label.append(label[0])
            del data[0]
            del label[0]
        test_data = data
        test_label = label

    # No.4
    # delete some feature based on No.1
    # occupancy, yeast
    if False:
        file_name = path + dataset_name + '/' + ori_file
        # reading data from file
        data = data_reader(file_name, feature_types)
        # missing value processing
        data = missing_value_processing(data, feature_types)
        # extract label
        data, label = extracting_label(data, label_index)
        del feature_types[label_index]
        # delete useless feature
        data = feature_selected(data, useless_index)
        new_feature_types = []
        for i in range(len(feature_types)):
            if not (i in useless_index):
                new_feature_types.append(feature_types[i])
        feature_types = new_feature_types
        # categorical feature encoding
        data = categorical_feature_encoding(data, feature_types)
        label, label_name = label_encoding(label)
        # split data into training and testing
        train_data, train_label, test_data, test_label = split_data(
            data, label, percent=0.2)

    # No.5
    # reading csv file
    if False:
        train_file_name = path + dataset_name + '/' + ori_train_file
        test_file_name = path + dataset_name + '/' + ori_test_file

        train_data = csv_data_reader(train_file_name, feature_types)
        test_data = csv_data_reader(test_file_name, feature_types)
        train_data, train_label = extracting_label(train_data, label_index)
        test_data, test_label = extracting_label(test_data, label_index)
        del feature_types[label_index]

        train_data_len = len(train_data)
        test_data_len = len(test_data)

        data = []
        label = []
        data.extend(train_data)
        data.extend(test_data)
        label.extend(train_label)
        label.extend(test_label)

        data = missing_value_processing(data, feature_types)

        data = categorical_feature_encoding(data, feature_types)

        label, label_name = label_encoding(label)

        # regain training and testing data from data
        train_data = []
        train_label = []
        for i in range(train_data_len):
            train_data.append(data[0])
            train_label.append(label[0])
            del data[0]
            del label[0]
        test_data = data
        test_label = label

    # No.6
    # training and testing data are in different files, processing based on No.1
    if False:
        train_file_name = path + dataset_name + '/' + ori_train_file
        test_file_name = path + dataset_name + '/' + ori_test_file

        train_data = data_reader(train_file_name, feature_types)
        test_data = data_reader(test_file_name, feature_types)
        train_data, train_label = extracting_label(train_data, label_index)
        test_data, test_label = extracting_label(test_data, label_index)
        del feature_types[label_index]

        train_data_len = len(train_data)
        test_data_len = len(test_data)

        data = []
        label = []
        data.extend(train_data)
        data.extend(test_data)
        label.extend(train_label)
        label.extend(test_label)

        data = missing_value_processing(data, feature_types)

        data = categorical_feature_encoding(data, feature_types)

        label, label_name = label_encoding(label)

        # regain training and testing data from data
        train_data = []
        train_label = []
        for i in range(train_data_len):
            train_data.append(data[0])
            train_label.append(label[0])
            del data[0]
            del label[0]
        test_data = data
        test_label = label

    print(
        '-------------------------------------------------------------------------------------'
    )
    print('training data length:', len(train_data), ', testing data length:',
          len(test_data))
    print('data feature size:', len(train_data[0]))
    print(
        '-------------------------------------------------------------------------------------'
    )
    train_data_file = path + dataset_name + '/' + dataset_name + '_train_data.pkl'
    test_data_file = path + dataset_name + '/' + dataset_name + '_test_data.pkl'
    label_class_file = path + dataset_name + '/' + dataset_name + '_label_class.txt'

    # writing train feature and label into pickle file
    print('writing training data...')
    f = open(train_data_file, 'wb')
    pickle.dump(train_data, f, 2)
    pickle.dump(train_label, f, 2)
    f.close()

    # writing test feature and label into pickle file
    print('writing testing data...')
    f = open(test_data_file, 'wb')
    pickle.dump(test_data, f, 2)
    pickle.dump(test_label, f, 2)
    f.close()

    # writing label class into txt file
    print('writing label class file...')
    buff = []
    buff.append(list2string(label_name))
    fo.FileWriter(label_class_file, buff, style='w')
def synthetic_problems_sample(budget=500,
                              problem_name='sphere',
                              problem_size=5,
                              max_bias=0.5,
                              bias_step=0):
    sample_size = 10  # the instance number of sampling in an iteration
    positive_num = 2  # the set size of PosPop
    rand_probability = 0.99  # the probability of sample in model
    uncertain_bits = 2  # the dimension size that is sampled randomly

    start_index = 0

    repeat_num = 10

    exp_path = path + '/ExpLog/SyntheticProbsLog/'

    bias = 0

    dimension_size = 10

    dimension = Dimension()
    dimension.set_dimension_size(dimension_size)
    dimension.set_regions([[-1.0, 1.0] for _ in range(dimension_size)],
                          [0 for _ in range(dimension_size)])

    if bias_step > 0:
        problem_name += '_group-sample'

    for prob_i in range(problem_size):

        if bias_step > 0 and prob_i % (problem_size / max_bias *
                                       bias_step) == 0:
            bias += bias_step
        else:
            bias = max_bias

        # bias log format: 'index,bias_list: dim1 dim2 dim3...'
        bias_log = []
        running_log = []
        running_log.append('+++++++++++++++++++++++++++++++++')
        running_log.append('optimization setting: ')
        running_log.append('sample_size: ' + str(sample_size))
        running_log.append('positive_num: ' + str(positive_num))
        running_log.append('rand_probability: ' + str(rand_probability))
        running_log.append('uncertain_bits: ' + str(uncertain_bits))
        running_log.append('budget: ' + str(budget))
        running_log.append('group sample step: ' + str(bias_step))
        running_log.append('+++++++++++++++++++++++++++++++++')

        print(problem_name, ': ', start_index + prob_i,
              ' ==============================================')
        running_log.append(problem_name + ': ' + str(start_index + prob_i) +
                           ' ==============================================')

        # problem setting
        func = DistributedFunction(dim=dimension, bias_region=[-bias, bias])
        if 'ackley' in problem_name:
            prob = func.DisAckley
        elif 'sphere' in problem_name:
            prob = func.DisSphere
        elif 'rosenbrock' in problem_name:
            prob = func.DisRosenbrock
        else:
            print('Wrong function!')
            return

            # bias log
        bias_log.append(str(prob_i) + ',' + list2string(func.getBias()))
        print('function: ', problem_name, ', this bias: ', func.getBias())
        running_log.append('function: ' + problem_name + ', this bias: ' +
                           list2string(func.getBias()))

        # optimization setting
        optimizer = RacosOptimization(dimension)

        positive_set = []
        negative_set = []
        new_sample_set = []
        label_set = []

        for repeat_i in range(repeat_num):
            print('repeat ', repeat_i,
                  ' ----------------------------------------')
            running_log.append('repeat ' + str(repeat_i) +
                               ' ----------------------------------------')

            # optimization process
            start_t = time.time()
            optimizer.mix_opt(obj_fct=prob,
                              ss=sample_size,
                              bud=budget,
                              pn=positive_num,
                              rp=rand_probability,
                              ub=uncertain_bits)
            end_t = time.time()
            hour, minute, second = time_formulate(start_t, end_t)

            # optimization results
            optimal = optimizer.get_optimal()
            print('optimal v: ', optimal.get_fitness(), ' - ',
                  optimal.get_features())
            running_log.append('optimal v: ' + str(optimal.get_fitness()) +
                               ' - ' + list2string(optimal.get_features()))
            print('spent time: ', hour, ':', minute, ':', second)
            running_log.append('spent time: ' + str(hour) + ':' + str(minute) +
                               ':' + str(second))

            # log samples
            this_positive, this_negative, this_new, this_label = optimizer.get_log(
            )

            print('sample number: ', len(this_positive), ':', len(this_label))
            running_log.append('sample number: ' + str(len(this_positive)) +
                               ':' + str(len(this_label)))

            positive_set.extend(this_positive)
            negative_set.extend(this_negative)
            new_sample_set.extend(this_new)
            label_set.extend(this_label)
        print('----------------------------------------------')
        print('sample finish!')
        print('all sample number: ', len(positive_set), '-', len(negative_set), '-', len(new_sample_set), \
              '-', len(label_set))
        running_log.append('----------------------------------------------')
        running_log.append('all sample number: ' + str(len(positive_set)) +
                           '-' + str(len(negative_set)) + '-' +
                           str(len(new_sample_set)) + '-' +
                           str(len(label_set)))

        data_log_file = exp_path + str(problem_name) + '/dimension' + str(dimension_size) + '/DataLog/' + \
                        'data-' + problem_name + '-' + 'dim' + str(dimension_size) + '-' + 'bias' \
                        + str(bias) + '-' + str(start_index + prob_i) + '.pkl'
        bias_log_file = exp_path + str(problem_name) + '/dimension' + str(dimension_size) + '/RecordLog/' + 'bias-' \
                        + problem_name + '-' + 'dim' + str(dimension_size) + '-' + 'bias' + str(bias) \
                        + '-' + str(start_index + prob_i) + '.txt'
        running_log_file = exp_path + str(problem_name) + '/dimension' + str(dimension_size) + '/RecordLog/' + \
                           'running-' + problem_name + '-' + 'dim' + str(dimension_size) + '-' + 'bias' \
                           + str(bias) + '-' + str(start_index + prob_i) + '.txt'

        print('data logging: ', data_log_file)
        running_log.append('data log path: ' + data_log_file)
        save_log(positive_set, negative_set, new_sample_set, label_set,
                 data_log_file)

        print('bias logging: ', bias_log_file)
        running_log.append('bias log path: ' + bias_log_file)
        fo.FileWriter(bias_log_file, bias_log, style='w')

        print('running logging: ', running_log_file)
        fo.FileWriter(running_log_file, running_log, style='w')

    return
コード例 #6
0
def run_racos():
    # parameters
    sample_size = 10  # the instance number of sampling in an iteration
    budget = 500  # budget in online style
    positive_num = 2  # the set size of PosPop
    rand_probability = 0.99  # the probability of sample in model
    uncertain_bit = 1  # the dimension size that is sampled randomly
    bias_region = 0.5

    repeat = 10

    # dimension setting
    dimension_size = 10

    dimension = Dimension()
    dimension.set_dimension_size(dimension_size)
    dimension.set_regions([[-1.0, 1.0] for _ in range(dimension_size)],
                          [0 for _ in range(dimension_size)])

    func = DistributedFunction(dim=dimension,
                               bias_region=[-bias_region, bias_region])
    if problem_name == 'rosenbrock':
        prob = func.DisRosenbrock
    else:
        prob = func.DisSphere

    # optimization
    racos = RacosOptimization(dimension)
    opt_error_list = []

    for i in range(repeat):
        start_t = time.time()
        racos.mix_opt(prob,
                      ss=sample_size,
                      bud=budget,
                      pn=positive_num,
                      rp=rand_probability,
                      ub=uncertain_bit)
        end_t = time.time()

        optimal = racos.get_optimal()

        hour, minute, second = time_formulate(start_t, end_t)

        print('total budget is ', budget, '------------------------------')
        print('spending time: ', hour, ' hours ', minute, ' minutes ', second,
              ' seconds')
        print('optimal value: ', optimal.get_fitness())
        opt_error = optimal.get_fitness()
        optimal_x = optimal.get_features()

        opt_error_list.append(opt_error)
        print('validation optimal value: ', opt_error)
        log_buffer.append('validation optimal value: ' + str(opt_error))
        print('optimal x: ', optimal_x)
        log_buffer.append('optimal nn structure: ' + list2string(optimal_x))
    opt_mean = np.mean(np.array(opt_error_list))
    opt_std = np.std(np.array(opt_error_list))
    print('--------------------------------------------------')
    print('optimization result: ', opt_mean, '#', opt_std)
    log_buffer.append('--------------------------------------------------')
    log_buffer.append('optimization result: ' + str(opt_mean) + '#' +
                      str(opt_std))

    return opt_mean
コード例 #7
0
def run_for_synthetic_problem():

    sample_size = 10  # the instance number of sampling in an iteration
    budget = 50  # budget in online style
    positive_num = 2  # the set size of PosPop
    rand_probability = 0.99  # the probability of sample in model
    uncertain_bit = 1  # the dimension size that is sampled randomly
    adv_threshold = 10  # advance sample size

    opt_repeat = 10

    dimension_size = 10
    problem_name = 'sphere'
    bias_region = 0.5

    eta = 0.9
    step = 100

    dimension = Dimension()
    dimension.set_dimension_size(dimension_size)
    dimension.set_regions([[-1.0, 1.0] for _ in range(dimension_size)],
                          [0 for _ in range(dimension_size)])

    log_buffer = []

    # problem define
    func = DistributedFunction(dimension, bias_region=[-0.5, 0.5])
    target_bias = [0.2 for _ in range(dimension_size)]
    func.setBias(target_bias)

    if problem_name == 'ackley':
        prob_fct = func.DisAckley
    else:
        prob_fct = func.DisSphere

    log_buffer.append('+++++++++++++++++++++++++++++++')
    log_buffer.append('optimization parameters')
    log_buffer.append('sample size: ' + str(sample_size))
    log_buffer.append('budget: ' + str(budget))
    log_buffer.append('positive num: ' + str(positive_num))
    log_buffer.append('random probability: ' + str(rand_probability))
    log_buffer.append('uncertain bits: ' + str(uncertain_bit))
    log_buffer.append('advance num: ' + str(adv_threshold))
    log_buffer.append('+++++++++++++++++++++++++++++++')
    log_buffer.append('problem parameters')
    log_buffer.append('dimension size: ' + str(dimension_size))
    log_buffer.append('problem name: ' + problem_name)
    log_buffer.append('bias: ' + list2string(target_bias))
    log_buffer.append('+++++++++++++++++++++++++++++++')

    predictors, load_buffer = get_predicotrs()
    expert = Experts(predictors=predictors, eta=eta, step=step)
    log_buffer.extend(load_buffer)

    opt_error_list = []

    for i in range(opt_repeat):
        print('optimize ', i,
              '===================================================')
        log_buffer.append(
            'optimize ' + str(i) +
            '===================================================')

        exp_racos = ExpAdaRacosOptimization(dimension, expert)

        start_t = time.time()
        exp_racos.exp_ada_mix_opt(obj_fct=prob_fct,
                                  ss=sample_size,
                                  bud=budget,
                                  pn=positive_num,
                                  rp=rand_probability,
                                  ub=uncertain_bit,
                                  at=adv_threshold)
        end_t = time.time()

        print('total budget is ', budget)
        log_buffer.append('total budget is ' + str(budget))

        hour, minute, second = time_formulate(start_t, end_t)
        print('spending time: ', hour, ':', minute, ':', second)
        log_buffer.append('spending time: ' + str(hour) + '+' + str(minute) +
                          '+' + str(second))

        optimal = exp_racos.get_optimal()
        opt_error = optimal.get_fitness()
        optimal_x = optimal.get_features()

        opt_error_list.append(opt_error)
        print('validation optimal value: ', opt_error)
        log_buffer.append('validation optimal value: ' + str(opt_error))
        print('optimal x: ', optimal_x)
        log_buffer.append('optimal nn structure: ' + list2string(optimal_x))

    opt_mean = np.mean(np.array(opt_error_list))
    opt_std = np.std(np.array(opt_error_list))
    print('--------------------------------------------------')
    print('optimization result: ', opt_mean, '#', opt_std)
    log_buffer.append('--------------------------------------------------')
    log_buffer.append('optimization result: ' + str(opt_mean) + '#' +
                      str(opt_std))

    result_path = path + '/Results/Ada/' + problem_name + '/dimension' + str(
        dimension_size) + '/'

    optimization_log_file = result_path + 'opt-log-' + problem_name + '-dim' + str(dimension_size) + '-bias' \
                            + str(bias_region) + '.txt'
    print('optimization logging: ', optimization_log_file)
    fo.FileWriter(optimization_log_file, log_buffer, style='w')

    return
コード例 #8
0
def run(type):
    opt_error_list = []
    log_buffer.append('+++++++++++++++++++++++++++++++')
    log_buffer.append('Running: ' + type)
    log_buffer.append('+++++++++++++++++++++++++++++++')
    print('+++++++++++++++++++++++++++++++')
    print('Running: ' + type)
    print('+++++++++++++++++++++++++++++++')
    if type == 'ada':
        # pre=sorted(predictors,key=lambda a:a.dist)
        expert = Experts(predictors=predictors, eta=eta, bg=budget)

    for i in range(opt_repeat):
        print('optimize ', i,
              '===================================================')
        log_buffer.append(
            'optimize ' + str(i) +
            '===================================================')
        start_t = time.time()
        if type == 'ave':
            exp_racos = ExpRacosOptimization(dimension, nets)
            opt_error = exp_racos.exp_mix_opt(obj_fct=prob_fct,
                                              ss=sample_size,
                                              bud=budget,
                                              pn=positive_num,
                                              rp=rand_probability,
                                              ub=uncertain_bit,
                                              at=adv_threshold)
        elif type == 'ada':
            exp_racos = ExpAdaRacosOptimization(dimension, expert)
            opt_error = exp_racos.exp_ada_mix_opt(obj_fct=prob_fct,
                                                  ss=sample_size,
                                                  bud=budget,
                                                  pn=positive_num,
                                                  rp=rand_probability,
                                                  ub=uncertain_bit,
                                                  at=adv_threshold,
                                                  step=step)
        elif type == 'ground truth':
            exp_racos = ExpRacosOptimization(dimension, nets[:step])
            exp_racos.exp_mix_opt(obj_fct=prob_fct,
                                  ss=sample_size,
                                  bud=budget,
                                  pn=positive_num,
                                  rp=rand_probability,
                                  ub=uncertain_bit,
                                  at=adv_threshold)
        else:
            print('Wrong type!')
            return

        end_t = time.time()

        hour, minute, second = time_formulate(start_t, end_t)
        print('spending time: ', hour, ':', minute, ':', second)
        log_buffer.append('spending time: ' + str(hour) + '+' + str(minute) +
                          '+' + str(second))

        optimal = exp_racos.get_optimal()
        opt_error = optimal.get_fitness()
        optimal_x = optimal.get_features()

        opt_error_list.append(opt_error)
        print('validation optimal value: ', opt_error)
        log_buffer.append('validation optimal value: ' + str(opt_error))
        print('optimal x: ', optimal_x)
        log_buffer.append('optimal nn structure: ' + list2string(optimal_x))

    opt_mean = np.mean(np.array(opt_error_list), axis=0)
    opt_std = np.std(np.array(opt_error_list), axis=0)
    print('--------------------------------------------------')
    print('optimization result for ' + str(opt_repeat) + ' times average: ',
          opt_mean, ', standard variance is: ', opt_std)
    log_buffer.append('--------------------------------------------------')
    log_buffer.append('optimization result for ' + str(opt_repeat) +
                      ' times average: ' + str(opt_mean) +
                      ', standard variance is: ' + str(opt_std))

    return opt_mean, opt_std
コード例 #9
0
        print('Wrong function!')
        exit()

    log_buffer.append('+++++++++++++++++++++++++++++++')
    log_buffer.append('optimization parameters')
    log_buffer.append('sample size: ' + str(sample_size))
    log_buffer.append('budget: ' + str(budget))
    log_buffer.append('positive num: ' + str(positive_num))
    log_buffer.append('random probability: ' + str(rand_probability))
    log_buffer.append('uncertain bits: ' + str(uncertain_bit))
    log_buffer.append('advance num: ' + str(adv_threshold))
    log_buffer.append('+++++++++++++++++++++++++++++++')
    log_buffer.append('problem parameters')
    log_buffer.append('dimension size: ' + str(dimension_size))
    log_buffer.append('problem name: ' + problem_name)
    log_buffer.append('bias: ' + list2string(target_bias))
    log_buffer.append('+++++++++++++++++++++++++++++++')

    predictors, nets = get_mixed_predicotrs()

    opt_mean_gt, opt_std_gt = run('ground truth')
    opt_mean_ada, opt_std_ada = run('ada')
    opt_mean_ave, opt_std_ave = run('ave')
    opt_mean_ne, opt_std_ne = run_no_expert()
    x = [i for i in range(len(opt_mean_ada))]
    y0 = [opt_mean_ne for _ in range(len(opt_mean_ada))]
    plt.plot(x, y0)
    plt.plot(x, opt_mean_ada)
    plt.plot(x, opt_mean_ave)
    plt.show()
コード例 #10
0
def synthetic_problems_sample(prob_i):
    # bias log format: 'index,bias_list: dim1 dim2 dim3...'
    bias_log = []
    running_log = []
    running_log.append('+++++++++++++++++++++++++++++++++')
    running_log.append('optimization setting: ')
    running_log.append('sample_size: ' + str(sample_size))
    running_log.append('positive_num: ' + str(positive_num))
    running_log.append('rand_probability: ' + str(rand_probability))
    running_log.append('uncertain_bits: ' + str(uncertain_bits))
    running_log.append('budget: ' + str(budget))
    running_log.append('+++++++++++++++++++++++++++++++++')

    print(problem_name, ': ', start_index + prob_i,
          ' ==============================================')
    running_log.append(problem_name + ': ' + str(start_index + prob_i) +
                       ' ==============================================')

    # bias setting
    group_num = 10
    group_size = problem_num / group_num
    bias_step = bias_region / group_num
    new_bias_region = int(prob_i / group_size) * bias_step

    # problem setting
    func = DistributedFunction(dim=dimension,
                               bias_region=[-new_bias_region, new_bias_region])
    if 'ackley' in problem_name:
        prob_fct = func.DisAckley
    elif 'sphere' in problem_name:
        prob_fct = func.DisSphere
    elif 'rosenbrock' in problem_name:
        prob_fct = func.DisRosenbrock
    else:
        print('Wrong Function!')
        exit()

    # bias log
    bias_log.append(
        str(prob_i + start_index) + ',' + list2string(func.getBias()))
    print('function: ', problem_name, ', this bias: ', func.getBias())
    running_log.append('function: ' + problem_name + ', this bias: ' +
                       list2string(func.getBias()))

    # optimization setting
    optimizer = RacosOptimization(dimension)

    positive_set = []
    negative_set = []
    new_sample_set = []
    label_set = []

    for repeat_i in range(repeat_num):
        print('repeat ', repeat_i, ' ----------------------------------------')
        running_log.append('repeat ' + str(repeat_i) +
                           ' ----------------------------------------')

        # optimization process
        start_t = time.time()
        optimizer.mix_opt(obj_fct=prob_fct,
                          ss=sample_size,
                          bud=budget,
                          pn=positive_num,
                          rp=rand_probability,
                          ub=uncertain_bits)
        end_t = time.time()
        hour, minute, second = time_formulate(start_t, end_t)

        # optimization results
        optimal = optimizer.get_optimal()
        print('optimal v: ', optimal.get_fitness(), ' - ',
              optimal.get_features())
        running_log.append('optimal v: ' + str(optimal.get_fitness()) + ' - ' +
                           list2string(optimal.get_features()))
        print('spent time: ', hour, ':', minute, ':', second)
        running_log.append('spent time: ' + str(hour) + ':' + str(minute) +
                           ':' + str(second))

        # log samples
        this_positive, this_negative, this_new, this_label = optimizer.get_log(
        )

        print('sample number: ', len(this_positive), ':', len(this_label))
        running_log.append('sample number: ' + str(len(this_positive)) + ':' +
                           str(len(this_label)))

        positive_set.extend(this_positive)
        negative_set.extend(this_negative)
        new_sample_set.extend(this_new)
        label_set.extend(this_label)
    print('----------------------------------------------')
    print('sample finish!')
    print('all sample number: ', len(positive_set), '-', len(negative_set), '-', len(new_sample_set), \
          '-', len(label_set))
    running_log.append('----------------------------------------------')
    running_log.append('all sample number: ' + str(len(positive_set)) + '-' +
                       str(len(negative_set)) + '-' +
                       str(len(new_sample_set)) + '-' + str(len(label_set)))

    data_log_file = exp_path + str(problem_name) + '/dimension' + str(dimension_size) + '/DataLog/' + \
                    'data-' + problem_name + '-' + 'dim' + str(dimension_size) + '-' + 'bias' \
                    + str(bias_region) + '-' + str(start_index + prob_i) + '.pkl'
    bias_log_file = exp_path + str(problem_name) + '/dimension' + str(dimension_size) + '/RecordLog/' + 'bias-' \
                    + problem_name + '-' + 'dim' + str(dimension_size) + '-' + 'bias' + str(bias_region) \
                    + '-' + str(start_index + prob_i) + '.txt'
    running_log_file = exp_path + str(problem_name) + '/dimension' + str(dimension_size) + '/RecordLog/' + \
                       'running-' + problem_name + '-' + 'dim' + str(dimension_size) + '-' + 'bias' \
                       + str(bias_region) + '-' + str(start_index + prob_i) + '.txt'

    print('data logging: ', data_log_file)
    running_log.append('data log path: ' + data_log_file)
    save_log(positive_set, negative_set, new_sample_set, label_set,
             data_log_file)

    print('bias logging: ', bias_log_file)
    running_log.append('bias log path: ' + bias_log_file)
    fo.FileWriter(bias_log_file, bias_log, style='w')

    print('running logging: ', running_log_file)
    fo.FileWriter(running_log_file, running_log, style='w')

    return