def main():
    config = utils.read_config()
    elogger = logger.get_logger()

    # initialize arrays for short-term and long-term traffic features
    speed_array = 'speeds'
    time_array = 'times'
    short_ttf = [
        [collections.defaultdict(lambda: {speed_array: [], time_array: []}) for _ in range(256)] for _ in range(256)
    ]
    long_ttf = [
        [collections.defaultdict(lambda: {speed_array: [], time_array: []}) for _ in range(256)] for _ in range(256)
    ]

    for data_file in config['data']:
        elogger.info('Generating G and T paths and extracting traffic features on {} ...'.format(data_file))

        data = utils.read_data(data_file)

        define_travel_grid_path(data, config['coords'], short_ttf, long_ttf, args.grid_size)

        elogger.info('Saving extended with G and T paths data in {}{}.\n'.format(args.data_destination_folder, data_file))
        utils.save_processed_data(data, args.data_destination_folder, data_file)

    elogger.info('Aggregate historical traffic features ...')
    utils.aggregate_historical_data(short_ttf, long_ttf)
    elogger.info('Saving extracted traffic features in {}'.format(args.ttf_destination_folder))
    utils.save_extracted_traffic_features(short_ttf, long_ttf, args.ttf_destination_folder)
def load_process_digits(pathlist):

    labels = np.array([])
    combined_data_dict = {}
    for t in range(min_thickness, max_thickness + 1):
        combined_data_dict[t] = []

    dataset_index = 0
    t = time.time()
    for p in pathlist:
        dataset_index += 1
        digits_data, labels_data = utils.load_image_data(p,
                                                         side=200,
                                                         padding=40)
        labels = np.concatenate((labels, labels_data))
        #digits_data = normalize_digit_thickness(digits_data)
        #digits_data = change_thickness(digits_data,min_thickness)
        tau_data_dict = build_thickness_data(digits_data, min_thickness,
                                             max_thickness)
        processed_digit_copy = copy.deepcopy(tau_data_dict)
        processed_digit_copy["labels"] = labels_data
        filename = 'Dataset-%i-save' % dataset_index
        utils.save_processed_data(processed_digit_copy, filename)

        for k in tau_data_dict.keys():
            d = tau_data_dict[k]
            combined_data_dict[k].append(d)
        print(labels.shape)

    for t in combined_data_dict.keys():
        combined_data_dict[t] = np.concatenate(combined_data_dict[t], axis=0)
    elapsed = time.time() - t
    print("This ran for : %s" % (datetime.timedelta(seconds=elapsed)))
    return combined_data_dict, labels
def replace_null_with_zero(file_name: str, column_name: str,
                           **context) -> Path:
    df = load_data(file_name, raw_data=True)
    clean_series = df[column_name].fillna(0)

    saved_file_path = get_saved_file_path(context[TASK].task_id)
    save_processed_data(clean_series, saved_file_path, use_index=False)
    return saved_file_path
def create_xy_column(**context) -> Path:
    df = get_upstream_tasks_output_concat_df(context, remove_none_values=True)
    xy_series = df[X].astype(str) + "/" + df[Y].astype(str)
    xy_series.name = XY

    saved_file_path = get_saved_file_path(context[TASK].task_id)
    save_processed_data(xy_series, saved_file_path, use_index=False)
    return saved_file_path
Esempio n. 5
0
def experiment(network_model, reshape_mode = 'mlp'):
    reshape_funs = {
        "conv" : lambda d : d.reshape(-1,28,28,1),
        "mlp" : lambda d : d.reshape(-1,784)
    }
    xtrain,ytrain,xtest,ytest = utils.load_mnist()
    reshape_fun = reshape_funs[reshape_mode]
    xtrain,xtest = reshape_fun(xtrain),reshape_fun(xtest)
    digits_data = utils.load_processed_data('digits_og_and_optimal')
    digits = digits_data['optimal_lw']
    labels = utils.create_one_hot(digits_data['labels'].astype('uint'))

    ensemble_size = 20
    epochs = 50
    small_digits = reshape_fun(np.array(list(map(scale_down, digits))))
    small_digits = utils.normalize_data(small_digits)
    trials = 5

    for t in range(1,trials+1):
        gc.collect()

        l_xtrain = []
        l_xval = []
        l_ytrain = []
        l_yval = []
        for _ in range(ensemble_size):
            t_xtrain,t_ytrain,t_xval,t_yval = utils.create_validation(xtrain,ytrain,(1/6))
            l_xtrain.append(t_xtrain)
            l_xval.append(t_xval)
            l_ytrain.append(t_ytrain)
            l_yval.append(t_yval)

        inputs, outputs, train_model, model_list, merge_model = ann.build_ensemble([network_model], pop_per_type=ensemble_size, merge_type="Average")
        es = clb.EarlyStopping(monitor='val_loss',patience=2,restore_best_weights=True)
        
        train_model.compile(optimizer="adam", loss="categorical_crossentropy", metrics = ['acc'])
        train_model.fit(x=l_xtrain,y=l_ytrain, verbose=1,batch_size=100, epochs = epochs,validation_data=(l_xval,l_yval),callbacks=[es])
        merge_model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=['acc'])

        results = test_digits(merge_model, digits, labels, ensemble_size, reshape_fun)

        #entropy = ann.test_model(merge_model, [small_digits]*ensemble_size, labels, metric = 'entropy')
        #c_error = ann.test_model(merge_model, [small_digits]*ensemble_size, labels, metric = 'c_error')

        #results['c_error'][0] = c_error
        #results['entropy'][0] = entropy

        filename = "saltpepper_norm_trial-%s" % t
        utils.save_processed_data(results, filename)
Esempio n. 6
0
                layers.Dense(10,
                             kernel_initializer=inits.RandomUniform(
                                 maxval=0.5, minval=-0.5)))
            model.add(layers.Activation("softmax"))

            es = clb.EarlyStopping(monitor='val_loss',
                                   patience=10,
                                   restore_best_weights=True)
            model.compile(optimizer=opt.Adam(),
                          loss="categorical_crossentropy",
                          metrics=['acc'])
            model.fit(xtrain,
                      ytrain,
                      epochs=epochs,
                      batch_size=100,
                      validation_split=(1 / 6),
                      callbacks=[es])
            model_list.append(model)

            inputs.extend(model.inputs)
            outputs.extend(model.outputs)

        merge_layer = layers.Average()(
            outputs) if ensemble_size > 1 else outputs
        ensemble = Model(inputs=inputs, outputs=merge_layer)
        pred = ensemble.predict([notmnist] * ensemble_size)
        h = list(map(stats.entropy, pred))
        results[ensemble_size].extend(h)

    utils.save_processed_data(results, 'notmnist_sim-trial-%s' % (t + 1))

images_path = os.path.join(".", "images")

xm_digits_path = os.path.join(images_path, "XiaoMing_Digits")
ob_digits_path = os.path.join(images_path, "60 Images")
m_digits_path = os.path.join(images_path, "mnumbers")
hubben1_path = os.path.join(images_path, "Size1")
hubben2_path = os.path.join(images_path, "Size2")
hubben3_path = os.path.join(images_path, "Size3")
hubben4_path = os.path.join(images_path, "Size4")

digit_paths = [
    #  xm_digits_path
    ob_digits_path
    #, m_digits_path
    #, hubben1_path
    #, hubben2_path
    #, hubben3_path
    #, hubben4_path
]

print("===== LOADING DIGITS =====")

data_dict, labels = load_process_digits(digit_paths)
print(data_dict)
data_dict['labels'] = labels
utils.save_processed_data(data_dict, 'combined_testing_data')

print("Script done running")
        digits_mbits[t].extend(d_mbits[t])

mnist_preds = np.mean(np.array(mnist_mpreds), axis=0)
mnist_cerr = calc_cerror(mnist_preds, ytest)
mnist_bits = np.mean(list(map(entropy, mnist_preds)))

digits_cerr = {}
digits_bits = {}

for t in taus:
    preds = np.mean(np.array(digits_mpreds[t]), axis=0)
    digits_cerr[t] = calc_cerror(preds, labels)
    digits_bits[t] = np.mean(list(map(entropy, preds)))

results = {
    "ensembles": {
        "mnist_bits": mnist_bits,
        "mnist_cerr": mnist_cerr,
        "digits_cerr": digits_cerr,
        "digits_bits": digits_bits
    },
    "individuals": {
        "mnist_bits": mnist_mbits,
        "mnist_cerr": mnist_mcerr,
        "digits_cerr": digits_mcerr,
        "digits_bits": digits_mbits
    }
}

utils.save_processed_data(results, "cnn_results_ltsim_100")
def calc_acc():
    reshape_funs = {
        "conv": lambda d: d.reshape(-1, 28, 28, 1),
        "mlp": lambda d: d.reshape(-1, 784)
    }

    acc_mnist = np.zeros(trials)
    bits_mnist = np.zeros(trials)
    acc_digits = np.zeros(trials)
    bits_digits = np.zeros(trials)

    cnn_acc_mnist = np.zeros(trials)
    cnn_bits_mnist = np.zeros(trials)
    cnn_acc_digits = np.zeros(trials)
    cnn_bits_digits = np.zeros(trials)

    for i in range(trials):
        t_xtrain, t_ytrain, xval, yval = utils.create_validation(
            xtrain, ytrain)

        reshape_fun = reshape_funs['mlp']

        t_xtrain, c_xtest = reshape_fun(t_xtrain), reshape_fun(xtest)
        xval = reshape_fun(xval)
        c_digits = reshape_fun(digits)

        model = ann.parse_model_js(network_model2)
        model.compile(optimizer='adam',
                      loss='categorical_crossentropy',
                      metrics=['acc'])
        model.fit(t_xtrain, t_ytrain, epochs=5, validation_data=(xval, yval))

        acc_mnist[i] = ann.test_model(model, c_xtest, ytest, metric='accuracy')
        bits_mnist[i] = ann.test_model(model, c_xtest, ytest, metric='entropy')
        acc_digits[i] = ann.test_model(model,
                                       c_digits,
                                       digits_labels,
                                       metric='accuracy')
        bits_digits[i] = ann.test_model(model,
                                        c_digits,
                                        digits_labels,
                                        metric='entropy')

        reshape_fun = reshape_funs['conv']
        t_xtrain, c_xtest = reshape_fun(t_xtrain), reshape_fun(xtest)
        xval = reshape_fun(xval)
        c_digits = reshape_fun(digits)

        model = ann.parse_model_js(network_model1)
        model.compile(optimizer='adam',
                      loss='categorical_crossentropy',
                      metrics=['acc'])
        model.fit(t_xtrain, t_ytrain, epochs=3, validation_data=(xval, yval))

        cnn_acc_mnist[i] = ann.test_model(model,
                                          c_xtest,
                                          ytest,
                                          metric='accuracy')
        cnn_bits_mnist[i] = ann.test_model(model,
                                           c_xtest,
                                           ytest,
                                           metric='entropy')
        cnn_acc_digits[i] = ann.test_model(model,
                                           c_digits,
                                           digits_labels,
                                           metric='accuracy')
        cnn_bits_digits[i] = ann.test_model(model,
                                            c_digits,
                                            digits_labels,
                                            metric='entropy')

    results_mlp = {
        'mnist_acc': acc_mnist,
        'mnist_bits': bits_mnist,
        'digits_acc': acc_digits,
        'digits_bits': bits_digits
    }

    results_cnn = {
        'mnist_acc': cnn_acc_mnist,
        'mnist_bits': cnn_bits_mnist,
        'digits_acc': cnn_acc_digits,
        'digits_bits': cnn_bits_digits
    }

    utils.save_processed_data(results_mlp, '100-trials-mlp')
    utils.save_processed_data(results_cnn, '100-trials-cnn')
def experiment(network_model, reshape_mode='mlp'):
    reshape_funs = {
        "conv": lambda d: d.reshape(-1, 28, 28, 1),
        "mlp": lambda d: d.reshape(-1, 784)
    }
    xtrain, ytrain, xtest, ytest = utils.load_mnist()
    reshape_fun = reshape_funs[reshape_mode]
    xtrain, xtest = reshape_fun(xtrain), reshape_fun(xtest)
    digits_data = utils.load_processed_data('digits_og_and_optimal')
    digits = digits_data['optimal_lw']
    digits = utils.normalize_data(digits)
    digits_og = digits_data['lecunn_big']
    digits_og_small = reshape_fun(digits_data['lecunn'])
    digits_og_small = utils.normalize_data(digits_og_small)
    labels = utils.create_one_hot(digits_data['labels'].astype('uint'))

    ensemble_size = 20
    epochs = 3
    small_digits = reshape_fun(np.array(list(map(scale_down, digits))))

    trials = 5

    for t in range(1, trials + 1):

        gc.collect()

        l_xtrain = []
        l_xval = []
        l_ytrain = []
        l_yval = []
        for _ in range(ensemble_size):
            t_xtrain, t_ytrain, t_xval, t_yval = utils.create_validation(
                xtrain, ytrain, (1 / 6))
            l_xtrain.append(t_xtrain)
            l_xval.append(t_xval)
            l_ytrain.append(t_ytrain)
            l_yval.append(t_yval)

        inputs, outputs, train_model, model_list, merge_model = ann.build_ensemble(
            [network_model], pop_per_type=ensemble_size, merge_type="Average")
        train_model.compile(optimizer="adam",
                            loss="categorical_crossentropy",
                            metrics=['acc'])
        train_model.fit(x=l_xtrain,
                        y=l_ytrain,
                        verbose=1,
                        batch_size=100,
                        epochs=epochs,
                        validation_data=(l_xval, l_yval))
        merge_model.compile(optimizer="adam",
                            loss="categorical_crossentropy",
                            metrics=['acc'])

        results_linewidth = test_digits(merge_model, digits, labels,
                                        ensemble_size, reshape_fun)

        entropy = ann.test_model(merge_model, [small_digits] * ensemble_size,
                                 labels,
                                 metric='entropy')
        c_error = ann.test_model(merge_model, [small_digits] * ensemble_size,
                                 labels,
                                 metric='c_error')

        results_linewidth['c_error'][0] = c_error
        results_linewidth['entropy'][0] = entropy

        results_lecunn = test_digits(merge_model, digits_og, labels,
                                     ensemble_size, reshape_fun)

        entropy = ann.test_model(merge_model,
                                 [digits_og_small] * ensemble_size,
                                 labels,
                                 metric='entropy')
        c_error = ann.test_model(merge_model,
                                 [digits_og_small] * ensemble_size,
                                 labels,
                                 metric='c_error')

        results_lecunn['c_error'][0] = c_error
        results_lecunn['entropy'][0] = entropy

        total_results = {
            'optimal_lw': results_linewidth,
            'lecunn': results_lecunn
        }

        filename = "saltpepper_random_trial-%s" % t
        utils.save_processed_data(total_results, filename)
    individual = {
        'd1_correct': d1_mncorrect,
        'd1_wrong': d1_mnwrong,
        'd2_correct': d2_mncorrect,
        'd2_wrong': d2_mnwrong,
        'lecunn_correct': d3_mncorrect,
        'lecunn_wrong': d3_mnwrong
    }

    return individual


utils.setup_gpu_session()
individual = experiment(network_model1, 'mlp')
utils.save_processed_data(individual, 'individual_bin')

#plt.figure()
#plt.subplot(231)
#plt.hist(individual['d1_correct'],color = 'blue')
#plt.xlabel('entropy')
#plt.ylabel('n_correct')
#plt.subplot(232)
#plt.hist(individual['d2_correct'],color = 'blue')
#plt.xlabel('entropy')
#plt.ylabel('n_correct')
#plt.subplot(233)
#plt.hist(individual['lecunn_correct'],color = 'blue')
#plt.xlabel('entropy')
#plt.ylabel('n_correct')
#plt.subplot(234)
def experiment(network_model, reshape_mode = 'mlp'):
    reshape_funs = {
        "conv" : lambda d : d.reshape(-1,28,28,1),
        "mlp" : lambda d : d.reshape(-1,784)
    }
    xtrain,ytrain,xtest,ytest = utils.load_mnist()
    reshape_fun = reshape_funs[reshape_mode]
    xtrain,xtest = reshape_fun(xtrain),reshape_fun(xtest)

    custom_digits_dict = utils.load_processed_data("combined_testing_data")
    digits_labels = custom_digits_dict['labels']
    digits_taus = list(custom_digits_dict.keys())[:-1]
    digits_data = list(map(reshape_fun, [custom_digits_dict[t] for t in digits_taus]))
    digits_data = list(map(utils.normalize_data, digits_data))
    digits_labels = utils.create_one_hot(digits_labels.astype('uint'))

    for t in range(trials):
        print("===== START TRIAL %s =====" % (t + 1))
        # Preparing Results
        # Classification Error
        ensemble_cerror = []
        #t_ensemble_adv_cerror = []
        digits_cerror = []
        #t_digits_adv_cerror = []

        # Prediction Entropy
        entropy_ensemble = []
        #t_entropy_adv_ensemble = []
        digits_entropy = []
        #t_digits_adv_entropy = []

        # Voting entropy
        #entropy_vote = []
        #entropy_adv_vote = []
        #digits_vote = []
        #digits_adv_entropy = []

        epochs = 5

        l_xtrain = []
        l_xval = []
        l_ytrain = []
        l_yval = []
        for _ in range(ensemble_size):
            t_xtrain,t_ytrain,t_xval,t_yval = utils.create_validation(xtrain,ytrain,(1/6))
            l_xtrain.append(t_xtrain)
            l_xval.append(t_xval)
            l_ytrain.append(t_ytrain)
            l_yval.append(t_yval)
        # Without adveserial training
        inputs, outputs, train_model, model_list, merge_model = ann.build_ensemble([network_model], pop_per_type=ensemble_size, merge_type="Average")
        losses = list(
            map( lambda m : ann.adveserial_loss(klosses.categorical_crossentropy,m,eps=0.01), model_list)
        )
        train_model.compile(optimizer="adam", loss=losses, metrics = ['acc'])
        train_model.fit(x=l_xtrain,y=l_ytrain, verbose=1,batch_size=100, epochs = epochs,validation_data=(l_xval,l_yval))
        merge_model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=['acc'])
        c_error = ann.test_model(merge_model, [xtest]*ensemble_size, ytest, metric = 'accuracy' )
        entropy = ann.test_model(merge_model, [xtest]*ensemble_size, ytest, metric = 'entropy' )
        ensemble_cerror.append(c_error)
        entropy_ensemble.append(entropy)
        #entropy_vote.append(calc_vote_entropy(model_list,m,xtest))
        d_cerror,d_entropy = test_digits(merge_model,model_list,ensemble_size,digits_data,digits_labels)
        digits_cerror.append(d_cerror)
        digits_entropy.append(d_entropy)

            #digits_vote.append(d_vote)
            # Adveserial training

            #inputs, outputs, train_model, model_list, merge_model = ann.build_ensemble([network_model], pop_per_type=m, merge_type="Average")
            #losses = list(
            #    map( lambda m : ann.adveserial_loss(klosses.categorical_crossentropy,m,eps=0.01), model_list)
            #)
            #train_model.compile(optimizer="adam", loss=losses, metrics = ['acc'])
            #train_model.fit(x=l_xtrain, y=l_ytrain, verbose=1, epochs = epochs ,validation_data=(l_xval,l_yval))
            #c_error = ann.test_model(merge_model, [xtest]*m, ytest, metric = 'c_error' )
            #entropy = ann.test_model(merge_model, [xtest]*m, ytest, metric = 'entropy' )
#
            #t_ensemble_adv_cerror.append(c_error)
            #t_entropy_adv_ensemble.append(entropy)
#
            #d_cerror,d_entropy,d_vote = test_digits(merge_model,model_list,m,digits_data,digits_labels)
#
            #t_digits_adv_cerror.append(d_cerror)
            #t_digits_adv_entropy.append(d_entropy)

        filename1 = 'adv_mnist_results_5-20-50-trial%s' % t
        filename2 = 'adv_digits_results_5-20-50-trial%s' % t

        mnist_results = {
            'ensemble_cerror' : ensemble_cerror,
            'ensemble_entropy' : entropy_ensemble
            #'ensemble_adv_cerror' : ensemble_adv_cerror,
            #'ensemble_adv_entropy' : entropy_adv_ensemble,
            #'voting_entropy' : entropy_vote
            #'voting_adv_entropy' : entropy_adv_vote
        }

        digits_results = {
            'ensemble_cerror' : digits_cerror,
            'ensemble_entropy' : digits_entropy
            #'ensemble_adv_cerror' : digits_adv_cerror,
            #'ensemble_adv_entropy' : digits_adv_entropy,
            #'voting_entropy' : digits_vote
            #'voting_adv_entropy' : digits_adv_vote
        }


        utils.save_processed_data(mnist_results,filename1)
        utils.save_processed_data(digits_results,filename2)

    return digits_taus, mnist_results, digits_results
            inputs.extend(model.inputs)
            outputs.extend(model.outputs)
        
        merge_model = Model(inputs = inputs, outputs = layers.Average()(outputs))

        preds = merge_model.predict([notmnist]*ensemble_size)
        mem_preds = np.array(list(map(lambda m : m.predict(notmnist), model_list))).transpose(1,2,0)
        print(mem_preds.shape)
        bits = list(map(stats.entropy,preds))
        s_q = list(map(calc_pred_vars,mem_preds))
        results.extend(list(zip(bits,s_q)))
    return results
utils.setup_gpu_session()

ensemble = experiment('mlp')
utils.save_processed_data(ensemble , "distribution_not_mnist")

#plt.figure()
#plt.subplot(221)
#plt.hist(ensemble['mnist_correct'],color = 'blue')
#plt.xlabel('entropy')
#plt.ylabel('ncorrect')
#plt.subplot(222)
#plt.hist(ensemble['mnist_wrong'],color = 'red')
#plt.xlabel('entropy')
#plt.ylabel('nwrong')
#plt.subplot(223)
#plt.hist(ensemble['digits_correct'],color = 'blue')
#plt.xlabel('entropy')
#plt.ylabel('ncorrect')
#plt.subplot(224)
Esempio n. 14
0
        ensemble = {
            #'mnist_correct' : mnist_correct,
            #'mnist_wrong' : mnist_wrong,
            'digits_correct': digits_correct,
            'digits_wrong': digits_wrong,
            'lecunn_correct': d2_correct,
            'lecunn_wrong': d2_wrong
        }

    return ensemble


utils.setup_gpu_session()
ensemble = experiment(network_model1, 'mlp')
utils.save_processed_data(ensemble, "error_entropy5sep-bins")

#plt.figure()
#plt.subplot(221)
#plt.hist(ensemble['mnist_correct'],color = 'blue')
#plt.xlabel('entropy')
#plt.ylabel('ncorrect')
#plt.subplot(222)
#plt.hist(ensemble['mnist_wrong'],color = 'red')
#plt.xlabel('entropy')
#plt.ylabel('nwrong')
#plt.subplot(223)
#plt.hist(ensemble['digits_correct'],color = 'blue')
#plt.xlabel('entropy')
#plt.ylabel('ncorrect')
#plt.subplot(224)
Esempio n. 15
0
        model.fit(xtrain,
                  ytrain,
                  epochs=epochs,
                  batch_size=100,
                  callbacks=[es],
                  validation_split=(1 / 6))
        members.append(model)

    i = 0
    for ensemble_size in range(2, ensemble_size_top + 1):
        inputs = []
        outputs = []
        members_to_use = members[i:i + ensemble_size]
        for m in members_to_use:
            inputs.extend(m.inputs)
            outputs.extend(m.outputs)

        print((outputs))
        ensemble = Model(inputs=inputs, outputs=layers.Average()(outputs))
        ensemble.compile(loss='categorical_crossentropy',
                         optimizer='adam',
                         metrics=['acc'])
        accuracy = ann.test_model(ensemble, [xtest] * ensemble_size, ytest,
                                  'accuracy')
        i += ensemble_size
        t_accuracies.append(accuracy)

    t_accuracies = np.array(t_accuracies)
    utils.save_processed_data(t_accuracies,
                              'ensemble_sizesim-trial-%s' % (t + 1))
images_path = os.path.join(".", "images")

xm_digits_path = os.path.join(images_path, "XiaoMing_Digits")
ob_digits_path = os.path.join(images_path, "60 Images")
m_digits_path = os.path.join(images_path, "mnumbers")
hubben1_path = os.path.join(images_path, "Size1")
hubben2_path = os.path.join(images_path, "Size2")
hubben3_path = os.path.join(images_path, "Size3")
hubben4_path = os.path.join(images_path, "Size4")

digit_paths = [
    xm_digits_path, ob_digits_path, m_digits_path, hubben1_path, hubben2_path,
    hubben3_path, hubben4_path
]

print("===== LOADING DIGITS =====")

bw, bw_small, optimal_lw, labels = load_process_digits(digit_paths)

data_dict = {
    'lecunn_big': bw,
    'lecunn': bw_small,
    'optimal_lw': optimal_lw,
    'labels': labels
}

utils.save_processed_data(data_dict, 'digits_og_and_optimal')

print("Script done running")
        for img in images:
            img_arr = load_image(os.path.join(letter_path, img), True)
            if (not (img_arr is None)):
                img_arr = dutils.unpad_img(img_arr)
                if (has_shape(img_arr)):
                    img_list.append(img_arr)

        img_list = list(map(dutils.unpad_img, img_list))

        img_list = list(
            map(
                lambda img: dutils.center_box_image(
                    dutils.resize_image(img, 20), 20, 4), img_list))

        return np.array(img_list)

    path = os.path.join('notMNIST_small')
    letters_dict = {}
    letters = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J']
    for letter in letters:
        l_list = load_letter(os.path.join(path, letter))
        count, _, _ = l_list.shape
        l_index = np.random.permutation(count)
        letters_dict[letter] = l_list[l_index[:100]]

    return letters_dict


not_mnist = create_not_mnist()
utils.save_processed_data(not_mnist, 'notMNIST1000')
    individual = {
        'mnist_correct': mnist_mncorrect,
        'mnist_wrong': mnist_mnwrong,
        'digits_correct': digits_mncorrect,
        'digits_wrong': digits_mnwrong,
        'lecunn_correct': d2_mncorrect,
        'lecunn_wrong': d2_mnwrong
    }

    return individual


utils.setup_gpu_session()
individual = experiment(network_model1, 'mlp')
utils.save_processed_data(individual, 'individual_entropy_bins')

#plt.figure()
#plt.subplot(231)
#plt.hist(individual['mnist_correct'],color = 'blue')
#plt.xlabel('entropy')
#plt.ylabel('n_correct')
#plt.subplot(232)
#plt.hist(individual['lecunn_correct'],color = 'blue')
#plt.xlabel('entropy')
#plt.ylabel('n_correct')
#plt.subplot(233)
#plt.hist(individual['digits_correct'],color = 'blue')
#plt.xlabel('entropy')
#plt.ylabel('n_correct')
#plt.subplot(234)
        #d2_wrong.extend(wrong)
        #
        #for d in digits:
        #    digits_preds = merge_model.predict([d]*ensemble_size)
        #    mempreds = np.array(train_model.predict([d]*ensemble_size)).transpose(1,2,0)
        #    correct, wrong = bin_entropies(digits_preds,mempreds,d_labels)
        #    digits_wrong.extend(wrong)
        #    digits_correct.extend(correct)

        ensemble = {'mnist_correct': mnist_correct, 'mnist_wrong': mnist_wrong}

    return ensemble


ensemble = experiment(network_model1, 'mlp')
utils.save_processed_data(ensemble, "lookalikes_entropy5sep-bins")

#plt.figure()
#plt.subplot(221)
#plt.hist(ensemble['mnist_correct'],color = 'blue')
#plt.xlabel('entropy')
#plt.ylabel('ncorrect')
#plt.subplot(222)
#plt.hist(ensemble['mnist_wrong'],color = 'red')
#plt.xlabel('entropy')
#plt.ylabel('nwrong')
#plt.subplot(223)
#plt.hist(ensemble['digits_correct'],color = 'blue')
#plt.xlabel('entropy')
#plt.ylabel('ncorrect')
#plt.subplot(224)
            d2.extend(wrong)
            d2.extend(correct)

        ensemble = {
            'd1_correct' : d1_correct,
            'd1_wrong' : d1_wrong,
            'd2_correct' : d2_correct,
            'd2_wrong' : d2_wrong,
            'lecunn_correct' : dog_correct,
            'lecunn_wrong' : dog_wrong
        }

    return ensemble

ensemble = experiment(network_model2, 'conv')
utils.save_processed_data(ensemble , "cnn_entropy5trial-bins")

#plt.figure()
#plt.subplot(221)
#plt.hist(ensemble['mnist_correct'],color = 'blue')
#plt.xlabel('entropy')
#plt.ylabel('ncorrect')
#plt.subplot(222)
#plt.hist(ensemble['mnist_wrong'],color = 'red')
#plt.xlabel('entropy')
#plt.ylabel('nwrong')
#plt.subplot(223)
#plt.hist(ensemble['digits_correct'],color = 'blue')
#plt.xlabel('entropy')
#plt.ylabel('ncorrect')
#plt.subplot(224)
def experiment(network_model, reshape_mode='mlp'):
    reshape_funs = {
        "conv": lambda d: d.reshape(-1, 28, 28, 1),
        "mlp": lambda d: d.reshape(-1, 784)
    }
    xtrain, ytrain, xtest, ytest = utils.load_mnist()
    reshape_fun = reshape_funs[reshape_mode]
    xtrain, xtest = reshape_fun(xtrain), reshape_fun(xtest)

    custom_digits_dict = utils.load_processed_data(
        "combined_testing_data_more")
    digits_labels = custom_digits_dict['labels']
    digits_taus = [t for t in custom_digits_dict.keys() if t != "labels"]
    digits_data = list(
        map(reshape_fun, [custom_digits_dict[t] for t in digits_taus]))
    digits_data = list(map(utils.normalize_data, digits_data))
    digits_labels = utils.create_one_hot(digits_labels.astype('uint'))
    for tr in range(1, trials + 1):
        gc.collect()
        print("==== TRIAL %s ====" % tr)
        # Preparing Results
        # Classification Error
        ensemble_cerror = []
        #t_ensemble_adv_cerror = []
        digits_cerror = []
        #t_digits_adv_cerror = []

        # Prediction Entropy
        entropy_ensemble = []
        #t_entropy_adv_ensemble = []
        digits_entropy = []
        #t_digits_adv_entropy = []

        # Voting entropy
        #entropy_vote = []
        #entropy_adv_vote = []
        #digits_vote = []
        #digits_adv_entropy = []

        epochs = 50

        for m in ensemble_sizes:
            print('Working now with ensemble of size m = %s' % m)
            l_xtrain = []
            l_xval = []
            l_ytrain = []
            l_yval = []

            for _ in range(m):
                t_xtrain, t_ytrain, t_xval, t_yval = utils.create_validation(
                    xtrain, ytrain, (1 / 6))
                l_xtrain.append(t_xtrain)
                l_xval.append(t_xval)
                l_ytrain.append(t_ytrain)
                l_yval.append(t_yval)
            # Without adveserial training

            es = clb.EarlyStopping(monitor='val_loss',
                                   patience=2,
                                   restore_best_weights=True)
            inputs, outputs, train_model, model_list, merge_model = ann.build_ensemble(
                [network_model], pop_per_type=m, merge_type="Average")
            train_model.compile(optimizer="adam",
                                loss="categorical_crossentropy",
                                metrics=['acc'])
            train_model.fit(x=l_xtrain,
                            y=l_ytrain,
                            verbose=1,
                            epochs=epochs,
                            validation_data=(l_xval, l_yval),
                            callbacks=[es])
            merge_model.compile(optimizer="adam",
                                loss="categorical_crossentropy",
                                metrics=['acc'])
            c_error = ann.test_model(merge_model, [xtest] * m,
                                     ytest,
                                     metric='accuracy')
            entropy = ann.test_model(merge_model, [xtest] * m,
                                     ytest,
                                     metric='entropy')

            ensemble_cerror.append(c_error)
            entropy_ensemble.append(entropy)
            #entropy_vote.append(calc_vote_entropy(model_list,m,xtest))

            d_cerror, d_entropy = test_digits(merge_model, model_list, m,
                                              digits_data, digits_labels)

            digits_cerror.append(d_cerror)
            digits_entropy.append(d_entropy)
            gc.collect()

            #digits_vote.append(d_vote)
            # Adveserial training

            #inputs, outputs, train_model, model_list, merge_model = ann.build_ensemble([network_model], pop_per_type=m, merge_type="Average")
            #losses = list(
            #    map( lambda m : ann.adveserial_loss(klosses.categorical_crossentropy,m,eps=0.01), model_list)
            #)
            #train_model.compile(optimizer="adam", loss=losses, metrics = ['acc'])
            #train_model.fit(x=l_xtrain, y=l_ytrain, verbose=1, epochs = epochs ,validation_data=(l_xval,l_yval))
            #c_error = ann.test_model(merge_model, [xtest]*m, ytest, metric = 'c_error' )
            #entropy = ann.test_model(merge_model, [xtest]*m, ytest, metric = 'entropy' )


#
#t_ensemble_adv_cerror.append(c_error)
#t_entropy_adv_ensemble.append(entropy)
#
#d_cerror,d_entropy,d_vote = test_digits(merge_model,model_list,m,digits_data,digits_labels)
#
#t_digits_adv_cerror.append(d_cerror)
#t_digits_adv_entropy.append(d_entropy)

        filename1 = 'mnist_results_20-trial%s' % tr
        filename2 = 'digits_results_20-trial%s' % tr

        mnist_results = {
            'ensemble_cerror': ensemble_cerror,
            'ensemble_entropy': entropy_ensemble
            #'ensemble_adv_cerror' : ensemble_adv_cerror,
            #'ensemble_adv_entropy' : entropy_adv_ensemble,
            #'voting_entropy' : entropy_vote
            #'voting_adv_entropy' : entropy_adv_vote
        }

        digits_results = {
            'ensemble_cerror': digits_cerror,
            'ensemble_entropy': digits_entropy
            #'ensemble_adv_cerror' : digits_adv_cerror,
            #'ensemble_adv_entropy' : digits_adv_entropy,
            #'voting_entropy' : digits_vote
            #'voting_adv_entropy' : digits_adv_vote
        }

        utils.save_processed_data(mnist_results, filename1)
        utils.save_processed_data(digits_results, filename2)

    return digits_taus, mnist_results, digits_results