def _pull_layers(self):
        """ Sets layers """

        # pull layers from database
        conn = SQLConnector()
        jsonlist = conn.pull_best_results(attack=self.attack_type,
                                          num=5,
                                          verbose=False)
        if jsonlist:
            raise Exception('Hyper data does not exist for ' +
                            self.attack_type)
        json = jsonlist[0]
        layersstr = json['layers']

        # parse ints from string
        comma_index = layersstr.index(",")
        num1 = int(layersstr[:layersstr.index(",")])
        layersstr = layersstr[comma_index + 1:]

        comma_index = layersstr.index(",")
        num2 = int(layersstr[:layersstr.index(",")])
        layersstr = layersstr[comma_index + 1:]

        num3 = int(layersstr)

        self.layers = [num1, num2, num3]
    def setup(self):
        """ Setups the GAN """
        # TODO new method  called from init opt passed

        print("Attack type: " + self.attack_type)

        conn = SQLConnector()
        data = conn.pull_kdd99(attack=self.attack_type, num=4000)
        dataframe = pd.DataFrame.from_records(
            data=data, columns=conn.pull_kdd99_columns(allQ=True))

        # ==========
        # ENCODING
        # ==========
        # https://stackoverflow.com/questions/24458645/label-encoding-across-multiple-columns-in-scikit-learn

        d = defaultdict(LabelEncoder)

        fit = dataframe.apply(
            lambda x: d[x.name].fit_transform(x))  # fit is encoded dataframe
        dataset = fit.values  # transform to ndarray

        # to visually judge encoded dataset
        print("Real encoded " + self.attack_type + " attacks:")
        print(dataset[:1])

        # Set X as our input data and Y as our label
        self.X_train = dataset[:, 0:41].astype(float)
        Y_train = dataset[:, 41]

        # labels for data. 1 for valid attacks, 0 for fake (generated) attacks
        self.valid = np.ones((self.batch_size, 1))
        self.fake = np.zeros((self.batch_size, 1))
Esempio n. 3
0
def main():
    conn = SQLConnector()
    data = np.asarray(conn.pull_evaluator_data(30000, 'satan'))
    dataframe = pd.DataFrame.from_records(
        data=data, columns=conn.pull_kdd99_columns(allQ=True))

    features = dataframe.iloc[:, :41]
    attacks = dataframe.iloc[:, 41:]

    print(attacks.at[0, 'attack_type'])
    print(type(attacks.at[0, 'attack_type']))
    for i in range(0, attacks.size):
        attacks.at[i, 'attack_type'] = util.attacks_to_num(
            attacks.at[i, 'attack_type'])

    # using 0 as the label for non-neptune data
    for i in range(0, attacks.size):
        if (attacks.at[i, 'attack_type'] == 16):
            attacks.at[i, 'attack_type'] = 1
        else:
            attacks.at[i, 'attack_type'] = 0

    print(attacks)

    d = defaultdict(LabelEncoder)
    encoded_features_df = features.apply(lambda x: d[x.name].fit_transform(x))
    eval_dataset_df = encoded_features_df.join(attacks)
    eval_dataset_df = shuffle(eval_dataset_df)
    print(eval_dataset_df)

    #Print encoded values to a csv
    eval_dataset_df.to_csv('SatanAndNonsatan.csv', header=False, index=False)
    def signal_handler(sig, frame):
        """ Catches Crl-C command to print from database before ending """
        conn = SQLConnector()
        hypers = conn.read_hyper()  # by epoch?
        gens = conn.read_gens()  # by epoch?
        print("\n\nMYSQL DATA:\n==============")
        print("hypers  " + str(hypers))
        print("\ngens  " + str(gens) + "\n")
        sys.exit(0)

        signal.signal(signal.SIGINT, signal_handler)
    def train(self):
        """ Trains the GAN system """
        # break condition for training (when diverging)
        loss_increase_count = 0
        prev_g_loss = 0


        conn = SQLConnector()

        idx = np.arange(self.batch_size)

        for epoch in range(self.max_epochs):
            #selecting batch_size random attacks from our training data
            #idx = np.random.randint(0, X_train.shape[0], batch_size)
            attacks = self.X_train[idx]

            # generate a matrix of noise vectors
            noise = np.random.normal(0, 1, (self.batch_size, 41))

            # create an array of generated attacks
            gen_attacks = self.generator.predict(noise)

            # loss functions, based on what metrics we specify at model compile time
            d_loss_real = self.discriminator.train_on_batch(
                    attacks, self.valid)
            d_loss_fake = self.discriminator.train_on_batch(
                    gen_attacks, self.fake)
            d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

            # generator loss function
            g_loss = self.gan.train_on_batch(noise, self.valid)

            if epoch % 500 == 0:
                print("%d [D loss: %f, acc.: %.2f%%] [G loss: %f] [Loss change: %.3f, Loss increases: %.0f]" % (epoch, d_loss[0], 100 * d_loss[1], g_loss, g_loss - prev_g_loss, loss_increase_count))
    def train(self):
        """ Trains the GAN system """
        # break condition for training (when diverging)
        loss_increase_count = 0
        prev_g_loss = 0

        conn = SQLConnector()

        idx = np.arange(self.batch_size)

        ones = np.ones((self.batch_size, 1))
        zeros = np.zeros((self.batch_size, 1))

        for epoch in range(50000):
            # print('Epoch ({}/{})-------------------------------------------------'.format(epoch, self.max_epochs))
            # selecting batch_size random attacks from our training data
            # idx = np.random.randint(0, X_train.shape[0], batch_size)
            attacks = self.X_train[idx]

            # generate a matrix of noise vectors
            noise = np.random.normal(0, 1, (self.batch_size, 41))

            # create an array of generated attacks
            gen_attacks = self.generator.predict(attacks)

            # loss functions, based on what metrics we specify at model compile time
            d_loss_real = self.discriminator.train_on_batch(
                attacks, self.valid)
            d_loss_fake = self.discriminator.train_on_batch(
                gen_attacks, self.fake)
            d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

            # generator loss function
            g_loss = self.gan.train_on_batch(attacks, [gen_attacks, ones])
            g_loss = self.gan.train_on_batch(attacks, [gen_attacks, ones])

            if epoch % 499 == 0:
                print("%d [D loss: %f, acc.: %.2f%%] [G loss: %f]" %
                      (epoch, d_loss[0], 100 * d_loss[1], g_loss[0]))
                print('Real attack:')
                print(attacks[150])
                print('Reconstructed attack:')
                print(gen_attacks[150].round(3))
def main(argv):
    parser = argparse.ArgumentParser()

    parser.add_argument("--mode", "-m", type = str, dest = "mode", 
                        required = False, default = "show",
                        help = "Whether to show or save the heatmap. Use -m show or -m save.")
    parser.add_argument("--save_dir", "-dir", type = str, dest = "save_dir", 
                        required = False, default = "figs",
                        help = "Directory to save heatmap figures to, if any. Will be created if does not exist.")
    parser.add_argument("--num", "-n", type = str, dest = "num", 
                        required = False, default = 40000,
                        help = "Number of samples to pull from the database.")
    parser.add_argument("--host", "-ht", type = str, dest = "host", 
                        required = False, default = "localhost",
                        help = "Database host.")

    args = parser.parse_args()
    mode = args.mode
    save_dir = args.save_dir
    try:
        num = int(args.num)
    except Exception as e:
        print(e)
    host = args.host

    conn = SQLConnector(host = host)
    data = conn.pull_all_attacks(num, nodupes = True)
    columns = conn.pull_kdd99_columns()
    col_len = len(columns) - 1
    dataframe = pd.DataFrame(data=data, columns=columns)

    # Using Tim's method.
    dataframe = dataframe.iloc[:, :col_len]

    print(type(columns))
    
    # ==========
    # ENCODING
    # ==========
    # https://stackoverflow.com/questions/24458645/label-encoding-across-multiple-columns-in-scikit-learn

    d = defaultdict(LabelEncoder)

    fit = dataframe.apply(lambda x: d[x.name].fit_transform(x))  # fit is encoded dataframe
    dataset = fit.values  # transform to ndarray

    print(dataset)
    print(dataset.size)

    #TODO: Figure out what the f**k the method actually takes as params

    # Using the Pandas .corr function.
    corr_matrix = fit.corr()
    correlation_heatmap(corr_matrix)

    correlation_matrix = np.zeros(shape=(col_len,col_len))

    for i in range(1, len(columns) - 1):
        for j in range(0, len(columns) - 1):
            correlation_matrix[i, j] = correlation_ratio(dataset[:, i], dataset[:, j])

    print(type(columns))
    correlation_dataframe = pd.DataFrame(data = correlation_matrix, index = columns[:col_len], columns = columns[:col_len])
    print(correlation_dataframe)
    print(correlation_matrix.shape)
    
    correlation_heatmap(correlation_dataframe, mode = mode, save_dir = save_dir, num = num)
Esempio n. 8
0
def signal_handler(sig, frame):
    """ Catches Crl-C command to print from database before ending """
    conn = SQLConnector()
Esempio n. 9
0
    def train(self):
        """ Trains the GAN system """
        # break condition for training (when diverging)
        loss_increase_count = 0
        prev_g_loss = 0

        conn = SQLConnector()

        idx = np.arange(self.batch_size)

        for epoch in range(self.max_epochs):
            #selecting batch_size random attacks from our training data
            #idx = np.random.randint(0, X_train.shape[0], batch_size)
            attacks = self.X_train[idx]

            # generate a matrix of noise vectors
            noise = np.random.normal(0, 1, (self.batch_size, 41))

            # create an array of generated attacks
            gen_attacks = self.generator.predict(noise)

            # loss functions, based on what metrics we specify at model compile time
            d_loss_real = self.discriminator.train_on_batch(
                    attacks, self.valid)
            d_loss_fake = self.discriminator.train_on_batch(
                    gen_attacks, self.fake)
            d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

            # generator loss function
            g_loss = self.gan.train_on_batch(noise, self.valid)

            if epoch % 500 == 0:
                print("%d [D loss: %f, acc.: %.2f%%] [G loss: %f] [Loss change: %.3f, Loss increases: %.0f]" % (epoch, d_loss[0], 100 * d_loss[1], g_loss, g_loss - prev_g_loss, loss_increase_count))

            '''
            # ======================
            # Decoding attacks
            # ======================
            if epoch % 20 == 0:
                decode = gen_attacks[:1]  # take a slice from the ndarray that we want to decode
                #MAX QUESTION: Do we plan on changing the shape of this at some
                #point? If not just do
                #decode = gen_attacks[0]
                #decode_ints = decode.astype(int)
                #print("decoded floats ======= " + str(decode))
                #print("decoded ints ======= " + str(decode_ints))
                accuracy_threshold = 55
                accuracy = (d_loss[1] * 100)
                if(accuracy > accuracy_threshold):
                    # print out first result
                    list_of_lists = util.decode_gen(decode)
                    print(list_of_lists)

                    # ??????
                    gennum = 1  # pickle
                    modelnum = 1

                    layersstr = str(self.generator_layers[0]) + "," + str(self.generator_layers[1]) + "," + str(self.generator_layers[2])
                    attack_num = util.attacks_to_num(self.attack_type)

                    # send all to database
                    print(np.shape(list_of_lists))
                    for lis in list_of_lists:
                        #print(len(lis))
                        conn.write(gennum=gennum, modelnum=modelnum, layersstr=layersstr,
                                attack_type=attack_num, accuracy=accuracy, gen_list=lis)

                        # peek at our results
<<<<<<< HEAD
            self.writeOut(self, conn)
    def writeOut(self, conn):
=======
            '''
            accuracy = (d_loss[1] * 100)
            layersstr = str(self.generator_layers[0]) + "," + str(self.generator_layers[1]) + "," + str(
                self.generator_layers[2])
            attack_num = util.attacks_to_num(self.attack_type)

        conn.write_hypers(layerstr=layersstr, attack_encoded=attack_num, accuracy=accuracy)

        # TODO: Get the evaluation model implemented and replace the accuracy parameter with that metric
        # TODO: Log our generated attacks to the gens table
        # TODO: Refactor our sql methods with the new database structure
        # TODO: Add foreign key for attack type in hypers table
        '''
Esempio n. 10
0
def main():

    print()
    conn = SQLConnector()
    data = conn.pull_all_attacks(num=10000)
    dataframe = pd.DataFrame.from_records(
        data=data, columns=conn.pull_kdd99_columns(allQ=True))
    d = defaultdict(LabelEncoder)
    features = dataframe.iloc[:, :41]
    attack_labels = dataframe.iloc[:, 41:]

    for i in range(0, attack_labels.size):
        attack_labels.at[i, 'attack_type'] = util.attacks_to_num(
            attack_labels.at[i, 'attack_type'])

    fit = features.apply(lambda x: d[x.name].fit_transform(x))

    unbalanced_df = fit.join(attack_labels)
    balanced_df = unbalanced_df.copy(deep=True)

    gen_data = np.asarray(conn.read_gen_attacks_acc_thresh(.90, 1000))
    gen_df = pd.DataFrame.from_records(
        gen_data, columns=conn.pull_kdd99_columns(allQ=True))
    gen_df = gen_df.fillna(0)
    balanced_df = pd.concat([balanced_df, gen_df])
    print(len(balanced_df))

    unbalanced_array = unbalanced_df.values
    balanced_array = balanced_df.values

    # BEGIN LOOP
    # Create two identical multi-class classifiers, make sure their output dimensions match the number of classes in our data

    layers = [16, 32, 16]
    alpha = 0.1
    dropout = 0.3

    unb_labels = unbalanced_array[:, 41]
    [unb_classes, unb_counts] = np.unique(unb_labels, return_counts=True)
    print("Unique classes in unbalanced labels: ")
    print(unb_classes)
    print("Counts for the classes in unbalanced labels: ")
    print(unb_counts)
    unb_class_count = len(unb_classes)
    print("Number of classes in unbalanced dataset: " + str(unb_class_count))

    bal_labels = balanced_array[:, 41]
    [bal_classes, bal_counts] = np.unique(bal_labels, return_counts=True)

    dummy_bal_labels = np_utils.to_categorical(bal_labels)
    bal_class_count = len(bal_classes)
    print("Number of classes in balanced dataset: " + str(bal_class_count))

    print("Unique classes in balanced labels: ")
    print(bal_classes)
    print("Counts for the classes in balanced labels: ")
    print(bal_counts)

    for j in range(100):
        unbalanced_classifier = build_discriminator(layers, alpha, dropout,
                                                    unb_class_count)
        balanced_classifier = build_discriminator(layers, alpha, dropout,
                                                  bal_class_count)

        optimizer = Adam(.001)
        unbalanced_classifier.compile(loss='sparse_categorical_crossentropy',
                                      optimizer=optimizer,
                                      metrics=['accuracy'])
        balanced_classifier.compile(loss='sparse_categorical_crossentropy',
                                    optimizer=optimizer,
                                    metrics=['accuracy'])

        # encoding labels, classifier wants them in range 0 to num_classes
        unb_enc = LabelEncoder()
        bal_enc = LabelEncoder()

        unb_labels = unbalanced_array[:, 41]
        bal_labels = balanced_array[:, 41]

        unb_enc = unb_enc.fit(unb_labels)
        bal_enc = bal_enc.fit(bal_labels)

        unbalanced_array[:, 41] = unb_enc.transform(unbalanced_array[:, 41])
        balanced_array[:, 41] = bal_enc.transform(balanced_array[:, 41])
        [unb_classes, _] = np.unique(unbalanced_array[:, 41],
                                     return_counts=True)
        train_data = unbalanced_array[:, :41].astype(int)
        unb_cm = train(unbalanced_classifier, unbalanced_array, train_data)
        bal_cm = train(balanced_classifier, balanced_array, train_data)

        print("Metrics for iteration " + str(j))
        # print("Confusion matrix of unbalanced: ")
        # print
        print("Accuracy of unbalanced: " + str(getmetrics(unb_cm)))

        # print("Confusion matrix of balanced: ")
        # print(bal_cm)
        print("Accuracy of balanced" + str(getmetrics(bal_cm)))

        print("Diff: " + str(getmetrics(bal_cm) - getmetrics(unb_cm)))
Esempio n. 11
0
    def setup(self):
        """ Setups the GAN """
        # TODO new method  called from init opt passed

        print("Attack type: " + self.attack_type)

        conn = SQLConnector()
        data = conn.pull_kdd99(attack=self.attack_type, num=5000)
        dataframe = pd.DataFrame.from_records(
            data=data, columns=conn.pull_kdd99_columns(allQ=True))

        # ==========
        # ENCODING
        # ==========
        # https://stackoverflow.com/questions/24458645/label-encoding-across-multiple-columns-in-scikit-learn

        d = defaultdict(LabelEncoder)

        # Splitting the data from features and lablels. Want labels to be consistent with evaluator encoding, so
        # we use the utils attack_to_num function
        features = dataframe.iloc[:, :41]
        attack_labels = dataframe.iloc[:, 41:]

        for i in range(0, attack_labels.size):
            attack_labels.at[i, 'attack_type'] = util.attacks_to_num(
                attack_labels.at[i, 'attack_type'])

        features = features.apply(
            lambda x: d[x.name].fit_transform(x))  # fit is encoded dataframe

        # feature scaling, reccomended from github implementation
        self.scaler = MinMaxScaler(feature_range=(-1, 1))
        scaled_features = self.scaler.fit_transform(features.astype(float))
        scaled_df = pd.DataFrame(data=scaled_features)

        # Join the seperately encoded sections back into one dataframe
        dataframe = scaled_df.join(attack_labels)
        dataset = dataframe.values  # transform to ndarray
        print(dataset)

        # TODO: Feature scaling? May be necessary. Has to be on a per-feature basis?

        # Splitting up the evaluation dataset. Should maybe be moved?
        eval_dataset = pd.read_csv('PortsweepAndNonportsweep.csv', header=None)
        eval_dataset = eval_dataset.values

        self.eval_dataset_X = eval_dataset[:, 0:41].astype(int)
        self.eval_dataset_Y = eval_dataset[:, 41]

        validationToTrainRatio = 0.05
        validationSize = int(validationToTrainRatio * len(self.eval_dataset_X))
        self.eval_validation_data = self.eval_dataset_X[:validationSize]
        self.eval_validation_labels = self.eval_dataset_Y[:validationSize]
        self.eval_dataset_X = self.eval_dataset_X[validationSize:]
        self.eval_dataset_Y = self.eval_dataset_Y[validationSize:]

        testToTrainRatio = 0.05
        testSize = int(testToTrainRatio * len(self.eval_dataset_X))
        self.eval_test_data = self.eval_dataset_X[:testSize]
        self.eval_test_labels = self.eval_dataset_Y[:testSize]
        self.eval_dataset_X = self.eval_dataset_X[testSize:]
        self.eval_dataset_Y = self.eval_dataset_Y[testSize:]

        # to visually judge encoded dataset
        print("Real encoded " + self.attack_type + " attacks:")
        print(dataset[:1])

        # Set X as our input data and Y as our label
        self.X_train = dataset[:, 0:41].astype(float)
        Y_train = dataset[:, 41]

        # labels for data. 1 for valid attacks, 0 for fake (generated) attacks
        self.valid = np.ones((self.batch_size, 1))
        self.fake = np.zeros((self.batch_size, 1))
Esempio n. 12
0
def signal_handler(sig, frame):
    """ Catches Crl-C command to print from database before ending """
    conn = SQLConnector()
    writeOut(conn)
    sys.exit(0)
    print("did it work?")
Esempio n. 13
0
    def train(self):
        """ Trains the GAN system """
        # break condition for training (when diverging)
        loss_increase_count = 0
        prev_g_loss = 0

        conn = SQLConnector()

        idx = np.arange(self.batch_size)

        for epoch in range(self.max_epochs):
            #selecting batch_size random attacks from our training data
            #idx = np.random.randint(0, X_train.shape[0], batch_size)
            attacks = self.X_train[idx]

            # generate a matrix of noise vectors
            noise = np.random.normal(0, 1, (self.batch_size, 41))

            # create an array of generated attacks
            gen_attacks = self.generator.predict(noise)

            # loss functions, based on what metrics we specify at model compile time
            c_loss_real = self.critic.train_on_batch(attacks, self.valid)
            c_loss_fake = self.critic.train_on_batch(gen_attacks, self.fake)
            d_loss = 0.5 * np.add(c_loss_real, c_loss_fake)

            for l in self.critic.layers:
                weights = l.get_weights()
                weights = [
                    np.clip(w, -self.clip_value, self.clip_value)
                    for w in weights
                ]
                l.set_weights(weights)

            # generator loss function
            g_loss = self.gan.train_on_batch(noise, self.valid)

            if epoch % 500 == 0:
                print(
                    "%d [D loss: %f, acc.: %.2f%%] [G loss: %f] [Loss change: %.3f, Loss increases: %.0f]"
                    % (epoch, d_loss[0], 100 * d_loss[1], g_loss,
                       g_loss - prev_g_loss, loss_increase_count))

        gen_attacks = self.scaler.inverse_transform(gen_attacks)
        predicted_gen_attack_labels = self.evaluator.predict(
            gen_attacks).transpose().astype(int)
        gen_attack_labels = np.full(predicted_gen_attack_labels.shape, 1)

        print("Generated attack labels: ")
        print(gen_attack_labels)
        print("Predicted labels of generated attacks: ")
        print(predicted_gen_attack_labels)

        right = (predicted_gen_attack_labels == 1).sum()
        wrong = (predicted_gen_attack_labels != 1).sum()

        accuracy = (right / float(right + wrong))

        print("5 generated attacks: ")
        print(gen_attacks[:5, :])
        print()
        print("Accuracy of evaluator on generated data: %.4f " % accuracy)
        if accuracy > .50:
            conn.write_gens(gen_attacks, util.attacks_to_num(self.attack_type))

        layersstr = str(self.generator_layers[0]) + "," + str(
            self.generator_layers[1]) + "," + str(self.generator_layers[2])
        attack_num = util.attacks_to_num(self.attack_type)

        conn.write_hypers(layerstr=layersstr,
                          attack_encoded=attack_num,
                          accuracy=accuracy)
    def setup(self):
        """ Setups the GAN """
        # TODO new method  called from init opt passed

        print("Attack type: " + self.attack_type)

        conn = SQLConnector()
        data = conn.pull_kdd99(attack=self.attack_type, num=5000)
        dataframe = pd.DataFrame.from_records(data=data,
                columns=conn.pull_kdd99_columns(allQ=True))

        # ==========
        # ENCODING
        # ==========
        # https://stackoverflow.com/questions/24458645/label-encoding-across-multiple-columns-in-scikit-learn

        d = defaultdict(LabelEncoder)
        features = dataframe.iloc[:, :41]
        attack_labels = dataframe.iloc[:, 41:]

        for i in range(0, attack_labels.size):
            attack_labels.at[i, 'attack_type'] = util.attacks_to_num(attack_labels.at[i, 'attack_type'])

        fit = features.apply(lambda x: d[x.name].fit_transform(x))  # fit is encoded dataframe

        dataframe = fit.join(attack_labels)
        dataset = dataframe.values   # transform to ndarray

        #TODO: Move this entire process outside of gan.py? creating the evaluation model may take time and doesn't need to be redone for every GAN model Moving this
        #TODO: and then handling evaluation and database uploading to another script (like in the automation script) may be more efficient


        #pulling and encoding data for evaluation model
        '''
        eval_data = np.asarray(conn.pull_evaluator_data(1000000, self.attack_type))
        eval_dataframe = pd.DataFrame.from_records(data=eval_data,
                                              columns=conn.pull_kdd99_columns(allQ=True))
        encoded_eval_df = eval_dataframe.apply(lambda x: d[x.name].fit_transform(x))
        '''
        eval_dataset = pd.read_csv('PortsweepAndNonportsweep.csv', header=None)
        eval_dataset = eval_dataset.values

        self.eval_dataset_X = eval_dataset[:,0:41].astype(int)
        self.eval_dataset_Y = eval_dataset[:, 41]

        validationToTrainRatio = 0.05
        validationSize = int(validationToTrainRatio * len(self.eval_dataset_X))
        self.eval_validation_data = self.eval_dataset_X[:validationSize]
        self.eval_validation_labels = self.eval_dataset_Y[:validationSize]
        self.eval_dataset_X = self.eval_dataset_X[validationSize:]
        self.eval_dataset_Y = self.eval_dataset_Y[validationSize:]

        testToTrainRatio = 0.05
        testSize = int(testToTrainRatio * len(self.eval_dataset_X))
        self.eval_test_data = self.eval_dataset_X[:testSize]
        self.eval_test_labels = self.eval_dataset_Y[:testSize]
        self.eval_dataset_X = self.eval_dataset_X[testSize:]
        self.eval_dataset_Y = self.eval_dataset_Y [testSize:]



        #print(fit)

        # ==========
        # DECODING
        # ==========

#         print("===============================================")
#         print("decoded:")
#         print("===============================================")
#         decode_test = dataset[:5]  # take a slice from the ndarray that we want to decode
#         decode_test_df = pd.DataFrame(decode_test, columns=conn.pull_kdd99_columns())  # turn that ndarray into a dataframe with correct column names and order
#         decoded = decode_test_df.apply(lambda x: d[x.name].inverse_transform(x))  # decode that dataframe
#         print(decoded)


        # to visually judge encoded dataset
        print("Real encoded " + self.attack_type + " attacks:")
        print(dataset[:1])

        # Set X as our input data and Y as our label
        self.X_train = dataset[:, 0:41].astype(float)
        Y_train = dataset[:, 41]

        # labels for data. 1 for valid attacks, 0 for fake (generated) attacks
        self.valid = np.ones((self.batch_size, 1))
        self.fake = np.zeros((self.batch_size, 1))