Beispiel #1
0
    def __init__(self, seed=999, n_splits=10):

        self.seed = seed
        self.n_splits = n_splits

        import sys
        sys.path.append(
            r"C:\Users\Kelvin\CloudStation\MSC COMPUTER SCIENCE\Dissertation\CODE\Dissertation\Dissertation"
        )
        #sys.path.append(r"C:\Users\Kelvi\CloudStation\MSC COMPUTER SCIENCE\Dissertation\CODE\Dissertation\Dissertation")

        from Models import Models

        self.models = Models()
        from sklearn.model_selection import GridSearchCV

        from sklearn.model_selection import TimeSeriesSplit

        tscv = TimeSeriesSplit(n_splits)

        from sklearn.neural_network import MLPClassifier
        from sklearn.neighbors import KNeighborsClassifier
        from sklearn.svm import SVC
        from sklearn.gaussian_process import GaussianProcessClassifier
        from sklearn.gaussian_process.kernels import RBF
        from sklearn.tree import DecisionTreeClassifier
        from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
        from sklearn.naive_bayes import GaussianNB
        from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

        #self.models.add_model(model = GridSearchCV(estimator=MLPClassifier(random_state=seed), param_grid={}, cv=tscv), model_name = 'Neural Net')
        #self.models.add_model(model = GridSearchCV(estimator=KNeighborsClassifier(), param_grid={}, cv=tscv), model_name = 'KNN')
        #self.models.add_model(model = GridSearchCV(estimator=SVC(kernel='linear', random_state=seed), param_grid={}, cv=tscv), model_name = 'Linear SVM')
        #self.models.add_model(model = GridSearchCV(estimator=SVC(kernel='rbf', random_state=seed), param_grid={}, cv=tscv), model_name = 'RBF SVM')
        #self.models.add_model(model = GridSearchCV(estimator=GaussianProcessClassifier(random_state=seed), param_grid={}, cv=tscv), model_name = 'Gaussian Process')
        #self.models.add_model(model = GridSearchCV(estimator=DecisionTreeClassifier(random_state=seed), param_grid={}, cv=tscv), model_name = 'Decision Tree')
        #self.models.add_model(model = GridSearchCV(estimator=RandomForestClassifier(random_state=seed), param_grid={}, cv=tscv), model_name = 'Random Forest')
        #self.models.add_model(model = GridSearchCV(estimator=AdaBoostClassifier(random_state=seed), param_grid={}, cv=tscv), model_name = 'AdaBoost')
        #self.models.add_model(model = GridSearchCV(estimator=GaussianNB(), param_grid={}, cv=tscv), model_name = 'Naive Bayes')
        ##self.models.add_model(model = GridSearchCV(estimator=QuadraticDiscriminantAnalysis(), param_grid={}, cv=tscv), model_name = 'QDA')

        self.models.add_model(model=MLPClassifier(random_state=seed),
                              model_name='Neural Net')
        self.models.add_model(model=KNeighborsClassifier(), model_name='KNN')
        self.models.add_model(model=SVC(kernel='linear', random_state=seed),
                              model_name='Linear SVM')
        self.models.add_model(model=SVC(kernel='rbf', random_state=seed),
                              model_name='RBF SVM')
        self.models.add_model(
            model=GaussianProcessClassifier(random_state=seed),
            model_name='Gaussian Process')
        self.models.add_model(model=DecisionTreeClassifier(random_state=seed),
                              model_name='Decision Tree')
        self.models.add_model(model=RandomForestClassifier(random_state=seed),
                              model_name='Random Forest')
        self.models.add_model(model=AdaBoostClassifier(random_state=seed),
                              model_name='AdaBoost')
        self.models.add_model(model=GaussianNB(), model_name='Naive Bayes')
Beispiel #2
0
def train_model(new_data):
    #declare objects
    Data_preparation = data_preparation()
    models = Models()
    if new_data:
        #read_data
        data = Data_preparation.read_data_add_labels()
        add_article_topic_col(data)
        data = Data_preparation.add_full_text(data)
        data = Data_preparation.add_binary_topics_col(data)
        data.to_csv('new_data/new_processed_data.csv')
    else:
        data = pd.read_csv('new_data/new_processed_data.csv', index_col=0)

    #for fast debug
    #data = data.sample(n=1000)

    train, test = train_test_split(data, test_size=0.1)
    train1, train2 = train_test_split(train, test_size=0.5)

    #train naive baise model
    nb_model_obj = models.train_NB_model(train1)
    zero_one_train_matrix = Data_preparation.create_zero_one_matrix(
        nb_model_obj, train2)
    lr_model_obj = models.train_lr_model(zero_one_train_matrix,
                                         train2['LABEL'])

    #save model
    if save_model:
        nb_pkl_filename = 'nb_pickle_model.pkl'
        with open(nb_pkl_filename, 'wb') as file:
            pickle.dump(nb_model_obj, file)
        lr_pkl_filename = 'lr_pickle_model.pkl'
        with open(lr_pkl_filename, 'wb') as file:
            pickle.dump(lr_model_obj, file)

    predict_obj = Predict(nb_model_obj, lr_model_obj)
    nb_prediction = predict_obj.nb_predict(test, data_preparation)
    print('test nb score: ' + str(np.mean(nb_prediction == test['LABEL'])))
    lr_proba, lr_prediction = predict_obj.lr_predict(test, Data_preparation)
    print('test lr score: ' + str(np.mean(lr_prediction == test['LABEL'])))
    predict_obj.get_confusion_matrix(test['LABEL'], lr_prediction, 'all')
    quantile_data, quantile_accurate = predict_obj.get_quantile_accurate(
        test, lr_prediction, lr_proba)
    with pd.option_context('display.max_rows', None, 'display.max_columns',
                           None):
        print(quantile_accurate)
    #todo add confusion matrix for each band
    for index, row in quantile_accurate.iterrows():
        print(row['probaBand'])
        quantile = quantile_data[quantile_data['probaBand'] ==
                                 row['probaBand']]
 def test_challenge5(self):
     self.driver.get("https://www.copart.com")
     s = CopartSearchBar(self.driver)
     model = "porsche"
     s.search_input(model)
     entryNumber100 = self.driver.find_element(
         By.XPATH, "//*[@id='serverSideDataTable_length']//option[3]")
     entryNumber100.click()
     WebDriverWait(self.driver, 10).until(
         expected_conditions.visibility_of_element_located(
             (By.XPATH, "//*[@id='serverSideDataTable']/tbody/tr[100]")))
     m = Models(self.driver)
     m.unique_model_counter(100)
     d = Damages(self.driver)
     d.damage_finder(100)
Beispiel #4
0
    def __init__(self, formal, caps, group):
        # Load the dataset
        data = DatasetLoader()
        # Create and train the models
        modelz = Models(data)
        # modelz.showPerformances()
        modelz.setSingleModel()  # this will set the multiNB model

        # Wait for the models to finish loading
        while not modelz.endLoading:
            time.sleep(1)

        # Initialize the chat and run the dialogs
        chat = ChatManager(modelz, group, formal, caps)
        chat.run()
def Classify(X, Y, cls, rep, k=5000):
    # Start moment
    Start_moment = time.time()
    title = 'Classificando com {} e {} k={}'.format(cls, rep, k)
    print(title)

    # Creating the K-fold cross validator
    if 'w2v' in rep:
        train_x = load(open('w2v_rep/{}_train_x.pkl'.format(rep), 'rb'))
        train_y = load(open('w2v_rep/{}_train_y.pkl'.format(rep), 'rb'))
        test_x = load(open('w2v_rep/{}_test_x.pkl'.format(rep), 'rb'))
        test_y = load(open('w2v_rep/{}_test_y.pkl'.format(rep), 'rb'))
    else:

        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            Y,
                                                            test_size=0.2,
                                                            random_state=123,
                                                            stratify=Y)
        train_x, train_y, test_x, test_y = Representations(
        ).get_representation(rep=rep,
                             train_x=X_train,
                             train_y=y_train,
                             test_x=X_test,
                             test_y=y_test,
                             k=k,
                             cat=None)
        sm = SMOTE(sampling_strategy='minority', random_state=None)
        train_x, train_y = sm.fit_sample(train_x, train_y)

    # dump(train_x, open('w2v_rep/{}_train_x.pkl'.format(rep), 'wb'))
    # dump(train_y, open('w2v_rep/{}_train_y.pkl'.format(rep), 'wb'))
    # dump(test_x, open('w2v_rep/{}_test_x.pkl'.format(rep), 'wb'))
    # dump(test_y, open('w2v_rep/{}_test_y.pkl'.format(rep), 'wb'))
    # return

    classifier = Models().get_classifier(cls)
    classifier.fit(train_x, train_y)
    # Train_Classifier(classifier, X_train, Y_train)

    pred = classifier.predict(test_x)

    # report = classification_report(test_labels, test_pred, target_names=['Contrário', 'Favorável'] if plb =='polaridade' else ['neutro', 'opiniao'])
    report = classification_report(test_y, pred, target_names=['no', 'yes'])
    print(report)
    Finish_moment = time.time()
    tm = "It took " + str((Finish_moment - Start_moment)) + " seconds"
    print(tm)
Beispiel #6
0
def test():
    models = Models()

    exp = RegressionExperiment("med_random")
    exp.predict(FeaturePipeline.ngram_reg(models.get("svr"), 1))
Beispiel #7
0
    if args.method.lower() in ['sorecgatitem']:
        dataset = SocialItem_Dataset(args)
    elif args.method.lower() in ['sorecgatuser']:
        dataset = SocialUser_Dataset(args)
    else:
        dataset = Dataset(args)

    params = Parameters(args, dataset)
    print(
        """Load data done [%.1f s]. #user:%d, #item:%d, #dom:%d, #train:%d, #test:%d, #valid:%d"""
        % (time() - t1, params.num_users, params.num_items, params.num_doms,
           params.num_train_instances, params.num_test_instances,
           params.num_valid_instances))
    print('Method: %s' % (params.method))
    if params.method in ['sorecgatitem', 'sorecgatuser']:
        model = Models(params)
    model.define_model()
    model.define_loss('all')
    print("Model definition completed: in %.2fs" % (time() - t1))

    train_step = get_optimizer(params.learn_rate,
                               params.optimizer).minimize(model.loss)
    init = tf.global_variables_initializer()
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    print('train instances: {}'.format(params.train_matrix.nnz))

    error_plot = Error_plot(save_flag=True,
                            res_path=params.result_path,
                            args_str=args_str,
                            args=args)
Beispiel #8
0
    def execute(self):
        # parameters
        epsilon = .5  # exploration
        epsilon_decay = 0.95
        epsilon_min = 0.1

        epoch = 4000  # is number of cycles...
        max_memory = 2000  #  NEEDS TO BE AS BIG AS AT LEAST 1 TRADING DAY!!!

        batch_size = 50  # 50
        sequence_length = 250  # 500
        discount = 0.95

        training_days = 1
        testing_days = 1

        features_list = list(range(1, 33))  ## FULL
        features_list = list(range(1, 6))  ## SHORT!!

        training_store = ds.DataStore(training_days=training_days,
                                      features_list=features_list,
                                      sequence_length=sequence_length)
        features_length = training_store.get_features_length()
        env = Trading(data_store=training_store,
                      sequence_length=sequence_length,
                      features_length=features_length)

        num_actions = env.get_action_count(
        )  # [sell, buy, flat] # get From TRADING!!

        #testing_store = ds.DataStore(training_days=training_days, testing_days=10, features_list=features_list, sequence_length=sequence_length)

        mo = Models()
        rms = RMSprop(lr=0.0001, rho=0.9, epsilon=1e-06)

        use_ufcnn = True
        if use_ufcnn:
            model = mo.model_ufcnn_concat(sequence_length=sequence_length,
                                          features=features_length,
                                          nb_filter=15,
                                          filter_length=5,
                                          output_dim=num_actions,
                                          optimizer=rms,
                                          loss='mse',
                                          batch_size=batch_size,
                                          init="normal")
            base_model_name = "ufcnn"
        else:
            model = mo.atari_conv_model(output_dim=num_actions,
                                        features=features_length,
                                        loss='mse',
                                        sequence_length=sequence_length,
                                        optimizer=rms,
                                        batch_size=batch_size,
                                        init="normal")
            base_model_name = "atari"

        testing_store = ds.DataStore(training_days=training_days,
                                     testing_days=testing_days,
                                     features_list=features_list,
                                     sequence_length=sequence_length,
                                     mean=training_store.mean,
                                     std=training_store.std)

        test_env = Trading(data_store=testing_store,
                           sequence_length=sequence_length,
                           features_length=features_length)

        #model = mo.atari_conv_model(regression=False, output_dim=num_actions, features=features_length, nb_filter=50,
        #                           loss='mse', sequence_length=sequence_length, optimizer=rms, batch_size=batch_size)

        # If you want to continue training from a previous model, just uncomment the line bellow
        #mo.load_model("ufcnn_rl_training")

        # Define environment/game

        # Initialize experience replay object

        start_time = time.time()
        best_pnl = -99999.
        best_rndless_pnl = -99999.

        exp_replay = ExperienceReplay(max_memory=max_memory,
                                      env=env,
                                      sequence_dim=(sequence_length,
                                                    features_length),
                                      discount=discount)
        lineindex = 0

        # Train
        for e in range(epoch):
            loss = 0.
            game_over = False

            total_reward = 0

            win_cnt = 0
            loss_cnt = 0
            random_cnt = 0
            no_random_cnt = 0

            ### loop over days-...
            for i in range(training_days):
                input_t = env.reset()

                j = 0
                while not game_over:  # game_over ... end of trading day...
                    input_tm1 = input_t
                    #print("INPUT ",input_tm1)
                    # get next action
                    if np.random.rand() <= epsilon:
                        action = np.random.randint(0, num_actions, size=1)[0]
                        random_cnt += 1
                        #print("RANDOM")
                    else:
                        q = model.predict(exp_replay.resize_input(input_tm1))
                        action = np.argmax(q[0])
                        no_random_cnt += 1
                        #print("SELECT")
                        ##action = np.argmax(q)

                    # apply action, get rewards and new state
                    input_t, reward, game_over, idays, lineindex = env.act(
                        action)

                    if reward > 0:
                        win_cnt += 1

                    if reward < 0:
                        loss_cnt += 1

                    total_reward += reward
                    if reward > 1.:
                        reward = 1.

                    if reward < -1.:
                        reward = -1.

                    # store experience
                    exp_replay.remember([action, reward, idays, lineindex - 1],
                                        game_over)

                    # adapt model

                    if j > batch_size:  # do not run exp_rep if the store is empty...
                        inputs, targets = exp_replay.get_batch(
                            model, batch_size=batch_size)
                        curr_loss = model.train_on_batch(
                            exp_replay.resize_input(inputs), targets)
                        loss += curr_loss

                    j += 1

            rndless_pnl = self.get_randomless_pnl(test_env=test_env,
                                                  model=model,
                                                  testing_days=testing_days)

            secs = time.time() - start_time
            print(
                "Epoch {:05d}/{} | Time {:7.1f} | Loss {:11.4f} | Win trades {:5d} | Loss trades {:5d} | Total PnL {:8.2f} | Rndless PnL {:8.2f} | Eps {:.4f} | Rnd: {:5d}| No Rnd: {:5d}  "
                .format(e, epoch, secs, loss, win_cnt, loss_cnt, total_reward,
                        rndless_pnl, epsilon, random_cnt, no_random_cnt),
                flush=True)
            if epsilon > epsilon_min:
                epsilon *= epsilon_decay
            # Save trained model weights and architecture, this will be used by the visualization code

            if total_reward > best_pnl:
                mo.save_model(model, base_model_name + "_rl_best")
                best_pnl = total_reward
            else:
                mo.save_model(model, base_model_name + "_rl_training")

            if rndless_pnl > best_pnl:
                mo.save_model(model, base_model_name + "_rl_rndless_best")
                best_rndless_pnl = rndless_pnl
Beispiel #9
0
 def initModels(self):
     self.corp = Models()
     self.corp.createLetterModel()
     self.corp.createWordUniGramModel()
def Classify(X, Y, cls, rep, k=5000):
    # Start moment
    Start_moment = time.time()
    title = 'Classificando com {} e {} k={}'.format(cls, rep, k)
    print(title)

    # Creating the K-fold cross validator
    K_fold = KFold(n_splits=10, shuffle=True)

    # Labels
    test_labels = np.array([], 'int32')
    test_pred = np.array([], 'int32')

    # Confusion Matrix
    confusion = np.array([[0, 0], [0, 0]])

    # The test
    for train_indices, test_indices in K_fold.split(X):
        print('Running .... =)')
        X_train = [X[i] for i in train_indices]
        Y_train = [Y[i] for i in train_indices]

        X_test = [X[i] for i in test_indices]
        Y_test = [Y[i] for i in test_indices]

        train_x, train_y, test_x, test_y = Representations(
        ).get_representation(rep=rep,
                             train_x=X_train,
                             train_y=Y_train,
                             test_x=X_test,
                             test_y=Y_test,
                             k=k,
                             cat=None)
        # c = Counter(Y_train)
        # print(Counter(train_y))
        # print({1:c.most_common(1)[0][1], 0:c.most_common(1)[0][1], 2:c.most_common(1)[0][1]})

        sm = SMOTE(sampling_strategy='minority', random_state=None)
        # sm = SMOTE(sampling_strategy={1:c.most_common(1)[0][1], 0:c.most_common(1)[0][1], 2:c.most_common(1)[0][1]}, random_state=None)
        # print(len(train_y))
        train_x, train_y = sm.fit_sample(train_x, train_y)

        # print(Counter(train_y))

        test_labels = np.append(test_labels, Y_test)

        classifier = Models().get_classifier(cls)
        classifier.fit(train_x, train_y)
        # Train_Classifier(classifier, X_train, Y_train)

        pred = classifier.predict(test_x)
        test_pred = np.append(test_pred, pred)
        # print(test_y)
        # print(pred)
        confusion += confusion_matrix(test_y, pred)

    # report = classification_report(test_labels, test_pred, target_names=['Contrário', 'Favorável'] if plb =='polaridade' else ['neutro', 'opiniao'])
    report = classification_report(test_labels,
                                   test_pred,
                                   target_names=['no', 'yes'])
    print(report)
    print("Confusion matrix:")
    print(confusion)
    Finish_moment = time.time()
    tm = "It took " + str((Finish_moment - Start_moment)) + " seconds"
    print(tm)
Beispiel #11
0
 def test_makeModels(self):
     m1 = Models()
     self.assertEqual(len(m1.modelList), 6)
Beispiel #12
0
__author__ = 'Placinta'

from Models import Models

encrypted_string = "Esp qtcde nzyqpcpynp zy esp ezatn zq Lcetqtntlw Tyepwwtrpynp hld spwo le Olcexzfes Nzwwprp ty estd jplc".upper(
)
new_string = ''
count = len(encrypted_string)
possibilities = []
corp = Models()
corp.createWordUniGramModel()
for i in range(1, 37):
    character_list = []
    new_string = ""
    for j in range(0, count):
        if encrypted_string[j] == ' ':
            character_list.append(' ')
        else:
            character_list.append(
                chr((ord(encrypted_string[j]) - 0x41 + i) % 26 + 0x41))
    new_string = "".join(character_list).lower()
    possibilities.append(new_string)
    #print "{0}:".format(i)

max_prob = -10000
max_string = ''
for string in possibilities:
    probability = corp.getWordProbability(string)
    if probability > max_prob:
        max_prob = probability
        max_string = string
if es.ping():
  print('Connected to Elasticsearch')
else:
  print('Could not connect to elasticsearch')
  sys.exit()

def remove_special_chars(text):
  '''This function removes the special chars from the text'''
  text=str(text)
  text = re.sub('[^A-Za-z0-9]+', ' ', text)
  text=text.lower()
  return text
 
#Loading the Universal sentence encoder model
model=Models()

def get_query_doc(query):
  query=remove_special_chars(query)
  query_vector =model.get_vec_rep(query) # Getting the vector representation of text from the model
  #Creating a document structure to search with query.
  query_doc = {
    "query" : 
    {
            "script_score" :
            {
                "query" : {
                    "match_all": {}
                },
                "script" : {
                    "source": "cosineSimilarity(params.query_vector, 'text_vector') + 1.0",
Beispiel #14
0
def main():
    N_CLASSES = 2
    PREPROCESSING1 = 0
    PREPROCESSING2 = 0
    LOAD_AUTOENCODER1 = 1
    LOAD_CLASSIFIER = 1
    LOAD_MODEL = 1
    VALIDATION_SPLIT = .1
    LABELS = ["Attacks", "Normal"]
    pd.set_option('display.expand_frame_repr', False)
    pathModels = 'models/'
    pathDataset = 'datasets/'
    path = 'KDDTrain+aggregateOneCls10Features'
    pathTest = 'KDDTest+aggregateOneCls10Features'
    testpath = 'KDDTest+'
    train = pd.read_csv(pathDataset+path + ".csv")
    test = pd.read_csv(pathDataset+pathTest + ".csv")
    pathOutputTrain = pathDataset+path + 'Numeric.csv'
    pathOutputTest = pathDataset+pathTest + 'Numeric.csv'

    listNumerical10 = [
        ' src_bytes', ' dst_bytes', ' diff_srv_rate', ' same_srv_rate', ' dst_host_srv_count',
        ' dst_host_same_srv_rate',
        ' dst_host_diff_srv_rate', ' dst_host_serror_rate']

    prp = prep(train, test)

    tic_preprocessing1 = time.time()

    if (PREPROCESSING1 == 1):
        train, test = preprocessing(train, test, prp)
        train, test = scaler(train, test, listNumerical10)
        train.to_csv(pathDataset + pathOutputTrain, index=False)  # X is an array
        test.to_csv(pathDataset + pathOutputTest, index=False)


    else:
        train = pd.read_csv(pathDataset + path + 'Numeric.csv')
        test = pd.read_csv(pathDataset + pathTest + 'Numeric.csv')

    clsT, clsTest = prp.getCls()
    train_normal = train[(train[clsT] == 1)]
    print("train normal:", train_normal.shape)

    train_anormal = train[(train[clsT] == 0)]
    test_normal = test[(test[clsTest] == 1)]
    test_anormal = test[(test[clsTest] == 0)]

    train_XN, train_YN, test_XN, test_YN = prp.getXY(train_normal, test_normal)

    train_XA, train_YA, test_XA, test_YA = prp.getXY(train_anormal, test_anormal)
    train_X, train_Y, test_X, test_Y = prp.getXY(train, test)

    toc_preprocessing1 = time.time()
    time_preprocessing1 = toc_preprocessing1 - tic_preprocessing1

    print('Train data shape normal', train_XN.shape)
    print('Train target shape normal', train_YN.shape)
    print('Test data shape normal', test_XN.shape)
    print('Test target shape normal', test_YN.shape)

    print('Train data shape anormal', train_XA.shape)
    print('Train target shape anormal', train_YA.shape)
    print('Test data shape anormal', test_XA.shape)
    print('Test target shape anormal', test_YA.shape)

    # convert class vectors to binary class matrices fo softmax
    #print(train_Y.head())
    train_Y2 = np_utils.to_categorical(train_Y, N_CLASSES)
    print("Target train shape after", train_Y2.shape)
    test_Y2 = np_utils.to_categorical(test_Y, N_CLASSES)
    print("Target test shape after", test_Y2.shape)

    callbacks_list = [
        callbacks.EarlyStopping(monitor='val_loss', min_delta=0.0001, patience=6, restore_best_weights=True),
    ]

    m = Models(N_CLASSES)

    if (LOAD_AUTOENCODER1 == 0):
        tic_autoencoder1 = time.time()
        print('Autoencoder only normal')
        # parametri per autoencoder
        p1 = {
            'first_layer': 60,
            'second_layer': 30,
            'third_layer': 10,
            'four_layer': 40,
            'five_layer': 20,
            'six_layer': 10,
            'batch_size': 64,
            'epochs': 150,
            'optimizer': optimizers.Adam,
            'kernel_initializer': 'glorot_uniform',
            'losses': 'mse',
            'first_activation': 'tanh',
            'second_activation': 'tanh',
            'third_activation': 'tanh'}

        autoencoder = m.deepAutoEncoder(train_XN, p1)
        autoencoder.summary()

        history = autoencoder.fit(train_XN, train_XN,
                                  validation_split=VALIDATION_SPLIT,
                                  batch_size=p1['batch_size'],
                                  epochs=p1['epochs'], shuffle=True,
                                  callbacks=callbacks_list,
                                  verbose=1)

        printPlotAccuracy(history, 'autoencoder')
        printPlotLoss(history, 'autoencoder')

        toc_autoencoder1 = time.time()
        time_autoencoder1 = toc_autoencoder1 - tic_autoencoder1
        autoencoder.save(pathModels + 'autoencoderNormal.h5')
    else:
        print("Load autoencoder from disk")
        autoencoder = load_model(pathModels + 'autoencoderNormal.h5')
    # autoencoder.summary()

    # train predictions

    predictionsT = autoencoder.predict(train_X)
    mseT = np.mean(np.power(train_X - predictionsT, 2), axis=1)
    error_dfT = pd.DataFrame({'reconstruction_error': mseT})
    error_dfT['true_class'] = train_Y[clsT]

    pathOutputError = 'ErrorTraining.csv'
    error_dfT.to_csv(pathDataset + pathOutputError, index=False)

    ################# mse test #################################

    # test predictions
    tic_prediction_autoencoder1 = time.time()
    predictions = autoencoder.predict(test_X)
    mse = np.mean(np.power(test_X - predictions, 2), axis=1)
    toc_prediction_autoencoder1 = time.time()
    time_prediction_autoencoder1 = toc_prediction_autoencoder1 - tic_prediction_autoencoder1
    error_df = pd.DataFrame({'reconstruction_error': mse})
    error_df['true_class'] = test_Y[clsTest]

    pathOutputError = 'ErrorTest'
    error_df.to_csv(pathDataset + pathOutputError + testpath + '.csv', index=False)

    # =============================================================================
    #                   C2
    #
    # =============================================================================

    pathmseTrain = 'ErrorTraining'
    pathmseTest = 'ErrorTest'
    columnNameErrorN = 'reconstruction_error'

#    prp = prep(train, test)

    mseTrain = pd.read_csv(pathDataset + pathmseTrain + '.csv')
    mseTest = pd.read_csv(pathDataset + pathmseTest + testpath + '.csv')

    pathOutputTrain = pathDataset + path + 'mse_Numeric.csv'
    pathOutputTest = pathDataset + pathTest + 'mse_Numeric.csv'
    train = pd.read_csv(pathDataset+path + ".csv")
    test = pd.read_csv(pathDataset+pathTest + ".csv")

    train[columnNameErrorN] = mseTrain[columnNameErrorN]
    test[columnNameErrorN] = mseTest[columnNameErrorN]

    listNumerical10 = [
        ' src_bytes', ' dst_bytes', ' diff_srv_rate', ' same_srv_rate', ' dst_host_srv_count',
        ' dst_host_same_srv_rate',
        ' dst_host_diff_srv_rate', ' dst_host_serror_rate']

    tic_preprocessing = time.time()
    if (PREPROCESSING2 == 1):
        train, test = preprocessing(train, test, prp)
        train, test = scaler(train, test, listNumerical10)
        train.to_csv(pathOutputTrain, index=False)
        test.to_csv(pathOutputTest, index=False)

    else:
        train = pd.read_csv(pathOutputTrain)
        test = pd.read_csv(pathOutputTest)

    clsT, clsTest = prp.getCls()

    train_normal = train[(train[clsT] == 1)]

    train_anormal = train[(train[clsT] == 0)]
    test_normal = test[(test[clsTest] == 1)]
    test_anormal = test[(test[clsTest] == 0)]

    train_XN, train_YN, test_XN, test_YN = prp.getXY(train_normal, test_normal)

    train_XA, train_YA, test_XA, test_YA = prp.getXY(train_anormal, test_anormal)
    train_X, train_Y, test_X, test_Y = prp.getXY(train, test)

    toc_preprocessing = time.time()
    time_preprocessing = toc_preprocessing - tic_preprocessing

    print('Train data shape normal', train_XN.shape)
    print('Train target shape normal', train_YN.shape)
    print('Test data shape normal', test_XN.shape)
    print('Test target shape normal', test_YN.shape)

    print('Train data shape anormal', train_XA.shape)
    print('Train target shape anormal', train_YA.shape)
    print('Test data shape anormal', test_XA.shape)
    print('Test target shape anormal', test_YA.shape)

    # convert class vectors to binary class matrices fo softmax
    train_Y2 = np_utils.to_categorical(train_Y, N_CLASSES)
    print("Train shape after", train_X.shape)
    print("Target train shape after", train_Y2.shape)
    test_Y2 = np_utils.to_categorical(test_Y, N_CLASSES)
    print("Target test shape after", test_Y2.shape)
    print("Test shape after", test_X.shape)

    callbacks_list = [
        callbacks.EarlyStopping(monitor='val_loss', min_delta=0.0001, patience=6, restore_best_weights=True),
    ]

    m = Models(N_CLASSES)

    if (LOAD_MODEL == 0):
        tic_autoencoder = time.time()
        print('Autoencoder only normal')
        # parameters per autoencoder
        p1 = {
            'first_layer': 60,
            'second_layer': 30,
            'third_layer': 10,
            'four_layer': 40,
            'five_layer': 20,
            'six_layer': 10,
            'batch_size': 128,
            'epochs': 150,
            'optimizer': optimizers.Adam,
            'kernel_initializer': 'glorot_uniform',
            'losses': 'mse',
            'first_activation': 'tanh',
            'second_activation': 'tanh',
            'third_activation': 'tanh'}

        autoencoder = m.deepAutoEncoder(train_XN, p1)
        autoencoder.summary()

        history = autoencoder.fit(train_XN, train_XN,
                                  validation_split=VALIDATION_SPLIT,
                                  batch_size=p1['batch_size'],
                                  epochs=p1['epochs'], shuffle=True,
                                  callbacks=callbacks_list,
                                  verbose=1)

        toc_autoencoder = time.time()
        time_autoencoder = toc_autoencoder - tic_autoencoder

        printPlotAccuracy(history, 'autoencoder')
        printPlotLoss(history, 'autoencoder')
        autoencoder.save(pathModels + 'autoencoderNormal2.h5')
    else:
        print("Load autoencoder from disk")
        autoencoder = load_model(pathModels + 'autoencoderNormal2.h5')
        #plot_model(autoencoder, to_file='autoencoder.png')

    # scale to improve classifier (!! change in fit!!)
    train_XS, test_XS = scaleSimple(train_X, test_X)

    print("Using softmax classifier:")
    if (LOAD_CLASSIFIER == 0):
        tic_classifier = time.time()
        # parameters for final model
        p2 = {
            'batch_size': 64,
            'epochs': 150,
            'optimizer': optimizers.Adam,
            'kernel_initializer': 'glorot_uniform',
            'losses': 'binary_crossentropy',
            'first_activation': 'tanh',
            'second_activation': 'tanh',
            'third_activation': 'tanh'}

        # model = m.modelWeightFixed(encoder, train_X, p2, encoder2)
        # class_weight = {0: 3, 1: 1}
        model = m.baselineModel(train_XS, p2)

        history3 = model.fit(train_XS, train_Y2,
                             # validation_data=(test_X, test_Y2),
                             validation_split=VALIDATION_SPLIT,
                             batch_size=p2['batch_size'],
                             epochs=p2['epochs'], shuffle=False,
                             callbacks=callbacks_list,  # class_weight=class_weight,
                             verbose=1)

        toc_classifier = time.time()
        time_classifier = toc_classifier - tic_classifier
        printPlotAccuracy(history3, 'finalModel1')
        printPlotLoss(history3, 'finalModel1')
        model.save(pathModels + 'modelsoftmax2.h5')
    else:
        print("Load softmax from disk")
        model = load_model(pathModels + 'modelsoftmax2.h5')
        model.summary()
        #plot_model(model, to_file='model.png')

    ################# mse train  ###########################

    # train predictions
    predictionsT = autoencoder.predict(train_X)
    pathOutputErrorT = 'ErrorTrain2.csv'
    mseT = np.mean(np.power(train_X - predictionsT, 2), axis=1)
    error_dfT = pd.DataFrame({'reconstruction_error': mseT})
    error_dfT['true_class'] = train_Y[clsT]
    error_dfT.to_csv(pathDataset + pathOutputErrorT)

    #################test#################################

    # test predictions
    pathOutputErrorTest = 'ErrorTest2'
    tic_prediction_autoencoder = time.time()
    predictions = autoencoder.predict(test_X)
    mse = np.mean(np.power(test_X - predictions, 2), axis=1)
    toc_prediction_autoencoder = time.time()
    time_prediction_autoencoder1 = toc_prediction_autoencoder - tic_prediction_autoencoder
    error_df = pd.DataFrame({'reconstruction_error': mse})
    error_df['true_class'] = test_Y[clsTest]

    ###################à classifier prediction ###################
    tic_prediction_classifier = time.time()
    predictions = model.predict(test_XS)
    toc_prediction_classifier = time.time()
    time_prediction_classifier = toc_prediction_classifier - tic_prediction_classifier
    predictionsT = model.predict(train_XS)

    ############# create confusion matrix ######################

    # Predicting the Training set results
    y_predT = np.argmax(predictionsT, axis=1)
    cm = confusion_matrix(train_Y, y_predT)
    acc = accuracy_score(train_Y, y_predT, normalize=True)
    print('Softmax on training set')
    print(cm)
    print(acc)
    # Add prediction at dataframe with error reconstruction
    error_dfT['predict_softmax'] = y_predT
    error_dfT.to_csv(pathDataset+pathOutputErrorT, index=False)

    # Predicting the Test set results
   # prob = np.amax(predictions, axis=1)
   # print(prob)
    y_pred = np.argmax(predictions, axis=1)
    print(y_pred)
    cm = confusion_matrix(test_Y, y_pred)
    acc = accuracy_score(test_Y, y_pred, normalize=True)
    print(cm)
    print(acc)
    print('Softmax on test set')
    # Add prediction at dataframe with error reconstruction
    error_df['predict_softmax'] = y_pred
   # error_df['prob'] = prob
    error_df.to_csv(pathDataset+pathOutputErrorTest + testpath + '.csv', index=False)

    #########################################Phase after classification##############################

    # take to dataframe only prediction equals to 1
    error_OnlyNormal = error_df[error_df['predict_softmax'] == 1]
    # error_OnlyNormalT = error_dfT[error_dfT['predict_softmax'] == 1]
    # error_OnlyNormalT.to_csv("onlyNormal2.csv", index=False)

    threshold = 0.002

    tic_prediction_anomaly1 = time.time()
    y_predA = [0 if (e > threshold) else 1 for e in error_df.reconstruction_error.values]
    toc_prediction_anomaly1 = time.time()
    time_prediction_anomaly1 = toc_prediction_anomaly1 - tic_prediction_anomaly1
    conf_matrix = confusion_matrix(error_df.true_class, y_predA)
    plt.figure(figsize=(12, 12))
    sns.heatmap(conf_matrix, xticklabels=LABELS, yticklabels=LABELS, annot=True, fmt="d");
    plt.title("Confusion matrix All")
    plt.ylabel('True class')
    plt.xlabel('Predicted class')
    plt.savefig("first matrix")
    plt.show()
    plt.close()

    tic_prediction_anomaly2 = time.time()
    y_predNormal = [0 if (e > threshold) else 1 for e in error_OnlyNormal.reconstruction_error.values]
    toc_prediction_anomaly2 = time.time()
    time_prediction_anomaly2 = toc_prediction_anomaly2 - tic_prediction_anomaly2
    conf_matrix2 = confusion_matrix(error_OnlyNormal.true_class, y_predNormal)
    print(conf_matrix2)
    plt.figure(figsize=(12, 12))
    sns.heatmap(conf_matrix2, xticklabels=LABELS, yticklabels=LABELS, annot=True, fmt="d");
    plt.title("Confusion matrix Normal")
    plt.ylabel('True class')
    plt.xlabel('Predicted class')
    plt.savefig("second matrix")
    plt.show()

    if (PREPROCESSING1 == 1):
        print("Time for preprocessing 1 %s " % time_preprocessing1)

    if (PREPROCESSING2 == 1):
        print("Time for preprocessing 2 %s " % time_preprocessing)

    if (LOAD_AUTOENCODER1 == 0):
        print("Time for train autoencoder 1 %s " % time_autoencoder1)

    if (LOAD_MODEL == 0):
        print("Time for train autoencoder 2 %s " % time_autoencoder)
    if (LOAD_CLASSIFIER == 0):
        print("Time for train classifier %s " % time_classifier)

    print("Time for anomaly prediction %s " % (time_prediction_autoencoder1 + time_prediction_anomaly1))
    print("Time for classifier prediction %s " % time_prediction_classifier)
    print("Time for 2 phase prediction %s " % (time_prediction_autoencoder1 +
                                               time_prediction_classifier + time_prediction_autoencoder1 + time_prediction_anomaly2))
Beispiel #15
0
from Models import Models
import generator as gen

data_gen_args = dict(rotation_range=0.2,
                     width_shift_range=0.05,
                     height_shift_range=0.05,
                     shear_range=0.05,
                     zoom_range=0.05,
                     horizontal_flip=True,
                     fill_mode='nearest')

path = 'data/shapes/'
myGene = gen.trainGenerator(2,
                            path + 'train',
                            'image',
                            'label',
                            data_gen_args,
                            save_to_dir=None)
model = Models(input_size=(256, 256, 1),
               model='unet',
               modelPath='unet_shapes.hdf5')
model.train(myGene, steps=10, epochs=2)
model.predict_images(path + 'test/')
Beispiel #16
0
config_file_path = results_folder + '/params.ini'
args = train_utils.parse_params(config_file_path)
L = args.num_classes
args.image_dim = [128, 128, 3]

#%% load saved network parameters and open new session
tf.reset_default_graph()
in_placeholder = tf.placeholder(
    tf.float32,
    shape=[None, None, None, L + args.image_dim[2]],
    name="in_placeholder")
out_placeholder = tf.placeholder(tf.float32,
                                 shape=[None, None, None, L],
                                 name='out_placeholder')
phase = tf.placeholder(tf.bool, name='phase')
net_class = Models(args)
net_class.build_model(in_placeholder, phase)

sess = tf.Session()
saver = tf.train.Saver()
sess.run(tf.global_variables_initializer())
ckpt = tf.train.get_checkpoint_state(checkpoints_path)
if ckpt and ckpt.model_checkpoint_path:
    ckpt_path = checkpoints_path + 'my_model-' + str(checkpoint[0])
    saver.restore(sess, ckpt_path)

#%%
if not os.path.exists(output_path):
    os.makedirs(output_path)

image_list = sorted(os.listdir(pascal_path + '/images'))
Beispiel #17
0
def getModels(client):
    saveTickDateInString = Properties.SAVE_TICKDATE_IN_STRING
    models = Models(client, saveTickDateInString=saveTickDateInString)
    return models
Beispiel #18
0
 def __init__(self):
     # super(self).__init__()
     # инициализация классификаторов
     self.models = Models()
Beispiel #19
0
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu

    test_mkdir(args.save_path)

    if args.modelname == 'mnist_2nn' or args.modelname == 'mnist_cnn':
        datasetname = 'mnist'
        with tf.variable_scope('inputs') as scope:
            inputsx = tf.placeholder(tf.float32, [None, 784])
            inputsy = tf.placeholder(tf.float32, [None, 10])
    elif args.modelname == 'cifar10_cnn':
        datasetname = 'cifar10'
        with tf.variable_scope('inputs') as scope:
            inputsx = tf.placeholder(tf.float32, [None, 24, 24, 3])
            inputsy = tf.placeholder(tf.float32, [None, 10])

    myModel = Models(args.modelname, inputsx)

    predict_label = tf.nn.softmax(myModel.outputs)
    with tf.variable_scope('loss') as scope:
        Cross_entropy = -tf.reduce_mean(inputsy * tf.log(predict_label), axis=1)

    with tf.variable_scope('train') as scope:
        optimizer = tf.train.GradientDescentOptimizer(args.learning_rate)
        train = optimizer.minimize(Cross_entropy)

    with tf.variable_scope('validation') as scope:
        correct_prediction = tf.equal(tf.argmax(predict_label, axis=1), tf.argmax(inputsy, axis=1))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float'))

    saver = tf.train.Saver(max_to_keep=3)
Beispiel #20
0
        dataset = Dataset(args)

    params = Parameters(args,dataset)
    print("""Load data done [%.1f s]. #user:%d, #list:%d, #item:%d, #train:%d, #valid:%d, #test:%d"""% (time() - t1, params.num_user, params.num_list,
        params.num_item,params.num_train_instances,params.num_valid_instances,params.num_test_instances))

    args.args_str        = params.get_args_to_string()
    t1                   = time()
    print("args str: ",args.args_str)

    print("leng from list_items_list: ",len(utils.get_value_lists_as_list(params.list_items_dct)))
    print("leng from trainArrTriplets: ", len((params.trainArrTriplets[0])))
    print("non-zero entries in train_matrix: ", params.train_matrix.nnz)

    # model-loss-optimizer defn =======================================================================
    models               = Models(params,device=device)
    model                = models.get_model()

    if params.loss not in ['bpr']: #bpr
        criterion_li     = torch.nn.BCELoss()
        #criterion_li     = torch.nn.BCEWithLogitsLoss() ## new change made
    if params.optimizer == 'adam':
        optimizer_gnn     = torch.optim.Adam(model.parameters(), lr=params.lr)
        optimizer_seq     = torch.optim.Adam(model.parameters(), lr=params.lr)
    elif params.optimizer == 'rmsprop':
        optimizer_gnn     = torch.optim.RMSprop(model.parameters(), lr=params.lr)
        optimizer_seq     = torch.optim.RMSprop(model.parameters(), lr=params.lr)
    model.to(device)

    # training =======================================================================
    ## param =============================
Beispiel #21
0
    testing_store = ds.DataStore(training_days=training_days,
                                 testing_days=testing_days,
                                 features_list=features_list,
                                 sequence_length=sequence_length,
                                 mean=training_store.mean,
                                 std=training_store.std)

    features_length = training_store.get_features_length()

    env = Trading(data_store=testing_store,
                  sequence_length=sequence_length,
                  features_length=features_length)
    num_actions = env.get_action_count(
    )  # [sell, buy, flat] # get From TRADING!!

    mo = Models()

    start_time = time.time()
    best_pnl = -99999.
    exp_replay = ExperienceReplay(max_memory=max_memory,
                                  env=env,
                                  sequence_dim=(sequence_length,
                                                features_length))

    if len(sys.argv) == 2:
        model_name = sys.argv[1]
    else:
        model_name = None

    if model_name is not None:
        model = mo.load_model(model_name)
Beispiel #22
0
from Models import Models
from api import get_model_name

models = Models()
model_ids = models.model_ids

model_names = []
for model_id in model_ids:
    model_names.append((model_id, get_model_name(model_id)))

models.write_model_names(model_names)