def fit(self, x, y):
        Utilities.count_occurences(y)

        self.grid_search(x, y)
        self.clf = self.construct_classifier()
        logger.info("Training...")
        return self.clf.fit(x, y)
def processing(price_source, stock, load_articles, full_articles, processing_type='SVM'):

    x_data, y_data = get_data(full_articles, load_articles, price_source, stock)
    x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.25, random_state=42)

    if processing_type == 'SVM':
        y_result, y_result_prob = svm_prediction(x_train, x_test, y_train)
    else:
        y_result, y_result_prob = lstm_prediction(processing_type, x_train, x_test, y_train)

    Utilities.measure_performance(y_test, y_result)
    Utilities.measure_performance_auc(y_test, y_result, y_result)
def lstm_prediction(model_type, x_train, x_test, y_train):

    y_train = Utilities.make_dual(y_train, 2)
    x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
    x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))

    # initialise model architecture
    market_model = build_model(x_train, model_type)
    market_model.summary()
    # train model on data
    # note: eth_history contains information on the training error per epoch
    cbks = [callbacks.EarlyStopping(monitor='val_loss', patience=25)]
    market_model.fit(x_train, y_train, batch_size=1000, callbacks=cbks, epochs=50, validation_split=0.25, shuffle=True)
    y_result_prob = market_model.predict(x_test)
    y_result = Utilities.make_single_dimension(y_result_prob)
    return y_result, y_result_prob
 def compute_cost_function(self):
     k = len(self.theta)
     r = np.ones((self.nrows, k))
     for i in range(0, self.nrows):
         input_x = self.x[i]
         sum_lh = 0
         for j in range(0, k):
             r[i][j] = self.theta[j] * Utilities.pdf_mnd(
                 self.mu[j], np.diag(np.diag(self.cov[j])), input_x)
             sum_lh += r[i][j]
         r[i] /= sum_lh
     return r
Example #5
0
 def update_vars(self,cost_function):
     e_hi,e_log_hi = cost_function
     self.mu = np.zeros(self.dimensions)
     for i in range(self.nrows):
         self.mu += e_hi[i]*self.x[i]
     self.mu /= sum(e_hi)
     x_minus_mean = self.x - self.mu
     self.cov = np.zeros((self.dimensions,self.dimensions))
     for i in range(0,self.nrows-1):
         self.cov +=  e_hi[i]*np.matmul(x_minus_mean[i], x_minus_mean[i].T)
     self.cov /= sum(e_hi)
     self.cov = np.diag(np.diag(self.cov))
     self.nu = Utilities.line_search_min(Utilities.t_cost,0.1,1000,10000,e_hi,e_log_hi)
     return self.compute_cost_function()
    def __init__(self,type):
            mu0,image0,mu1,image1,shape =self.load_model(type)
            image0,image1 = self.reduce_dimensions(image0),self.reduce_dimensions(image1)
            mu0,mu1 = np.mean(image0,axis = 0),np.mean(image1,axis = 0)
            print(mu0)
            cov0 = np.cov(image0.T)
            cov1 = np.cov(image1.T)
            cov0_det,cov0_inv =  np.linalg.det(cov0),np.linalg.inv(cov0)
            cov1_det,cov1_inv =  np.linalg.det(cov1),np.linalg.inv(cov1)
            self.ground_truth = list()
            test_images = self.load_test_data(type)
            test_pca = self.reduce_dimensions(test_images)
            prediction = list()
            correct_pred = 0
            for x,pred in zip(test_pca,self.ground_truth):
                lh_0 = self.pdf_nd(mu0,cov0_det,cov0_inv,x)
                lh_1 = self.pdf_nd(mu1,cov1_det,cov1_inv,x)
                prob = self.compute_posterior(lh_0,lh_1)
                if(prob>=0.5):
                    prob = 1
                else:
                    prob = 0
                if(prob == pred):
                    correct_pred += 1
                prediction.append(prob)
#                print("Correct/Total Predictions: "+repr((correct_pred,len(prediction))))
            print("Accuracy: "+repr(float(correct_pred)*100/len(prediction)))
            print(Utilities.performance_Metrics(prediction,self.ground_truth))
            fp,tp,_ = roc_curve(self.ground_truth,prediction)
            area_under_curve = auc(fp, tp)
            print((fp,tp))
            plt.plot(fp, tp, 'b',label='AUC = %0.2f'% area_under_curve)
            plt.legend(loc='lower right')
            plt.plot([0,1],[0,1],'r--')
            plt.xlim([-0.1,1.1])
            plt.ylim([-0.1,1.1])
            plt.xlabel('False Positive Rate')
            plt.ylabel('True Positive Rate')
            plt.show()