Ejemplo n.º 1
0
    def learn_method(self):

        for i in range(5):
            l = [0, 0, 0, 0]
            score = 0.0
            for v in range(4):
                l[v] = random.randint(1, 10)

            cal = calculator.cal_map(l[0], l[1], l[2], l[3])
            score = cal.get("map") * cal.get("ndcg")

            w = Weight(l, score)
            list.append(w)
        list.sort()

        #we use evolutionary algorithms for finding best weights
        #to ways to generate new weights, that is decided randomly

        #in first way, we take 2 array and combine weights
        # based on a nrandom alpha number
        for n in range(1000):

            choice = random.randint(0, 1)
            # w1 = Weight()
            l1 = [0, 0, 0, 0]
            s = 0.0

            if choice == 0:
                alpha = random.random()
                c1 = random.randint(0, 3)
                c2 = random.randint(0, 3)

                for i in range(4):
                    l1[i] = (list[c1].weights[i] *
                             alpha) + (list[c2].weights[i] * (1 - alpha))

            else:
                alpha = random.randint(1, 3)
                c1 = random.randint(0, 3)
                c2 = random.randint(0, 3)

                for i in range(0, alpha):
                    l1[i] = list[c1].weights[i]
                for i in range(alpha, 4):
                    l1[i] = list[c2].weights[i]

                #here we call ealsticSearch and calculate score, forexample imagine score is 9
                #w1.score = f(l1[0], l1[1], l1[2], l1[3])
                #w1.score = f(l1)

            cal = calculator.cal_map(l1[0], l1[1], l1[2], l1[3])
            s = cal.get("map") * cal.get("ndcg")
            w1 = Weight(l1, s)
            list.append(w1)
            list.sort()
            list.pop()

        print("done")
Ejemplo n.º 2
0
def mini_batch_SGD(eta, batch_size, epochs):
    MB_start_time = time.time()
    np.random.seed(seed)
    beta_init = np.random.randn(X_train.shape[1], 1)
    w1 = Weight(X_train, y_train, beta_init, eta, epochs, batch_size=batch_size)
    final_betas_MB, _ = w1.train(w1.mini_batch_gradient_descent)
    prob_MB, y_pred_MB = classification(X_test, final_betas_MB, y_test)[0:2]
    false_pos_MB, true_pos_MB = roc_curve(y_test, prob_MB)[0:2]
    AUC_MB = auc(false_pos_MB, true_pos_MB)
    print("Area under curve MB%s: " %batch_size, AUC_MB)
    MB_time = time.time() - MB_start_time
    return AUC_MB, MB_time, false_pos_MB, true_pos_MB
Ejemplo n.º 3
0
def Best_Parameters(epochs, batch_size, method, etamin, etamax, step, y_val, X_val):
    beta_init = np.random.randn(X_train.shape[1], 1)
    eta_vals = np.logspace(etamin, etamax, step)
    auc_array = np.zeros((2, step))
    for i, eta in enumerate(eta_vals):
        np.random.seed(seed)
        print("Iteration: ",i)
        print("eta: ", eta)
        w = Weight(X_train, y_train, beta_init, eta, epochs, batch_size=batch_size)
        method_ = getattr(w, method)
        final_betas, _ = w.train(method_)
        prob = sigmoid(X_val, final_betas)
        auc_array[0][i] = roc_auc_score(y_val, prob)

        auc_array[1][i] = eta
    max_auc = np.max(auc_array[0])
    best_eta = auc_array[1][np.argmax(auc_array[0])]

    return max_auc, best_eta
Ejemplo n.º 4
0
    tmpNeuron = Neuron("INPUT", "I" + str(i))
    neuronList.append(tmpNeuron)
for i in range(0, numberOfHiddenNeurons):
    tmpNeuron = Neuron("HIDDEN", "H" + str(i))
    neuronList.append(tmpNeuron)
for i in range(0, numberOfOutputNeurons):
    tmpNeuron = Neuron("OUTPUT", "O" + str(i))
    neuronList.append(tmpNeuron)
# Now, generate all of the weights.
# Start from the hidden layer.
for i in range(numberOfInputNeurons,
               numberOfInputNeurons + numberOfHiddenNeurons):
    outputNeuron = neuronList[i]
    for j in range(0, numberOfInputNeurons):
        inputNeuron = neuronList[j]
        tmpWeight = Weight("W" + str(numberofWeights), random.uniform(0, 1),
                           inputNeuron, outputNeuron)
        inputNeuron.UpdateOutputWeights(tmpWeight)
        outputNeuron.UpdateInputWeights(tmpWeight)
        weightList.append(tmpWeight)
        numberofWeights += 1
for i in range(
        numberOfInputNeurons + numberOfHiddenNeurons,
        numberOfInputNeurons + numberOfHiddenNeurons + numberOfOutputNeurons):
    outputNeuron = neuronList[i]
    for j in range(numberOfInputNeurons,
                   numberOfInputNeurons + numberOfHiddenNeurons):
        inputNeuron = neuronList[j]
        tmpWeight = Weight("W" + str(numberofWeights), random.uniform(0, 1),
                           inputNeuron, outputNeuron)
        inputNeuron.UpdateOutputWeights(tmpWeight)
        outputNeuron.UpdateInputWeights(tmpWeight)
Ejemplo n.º 5
0
    if args.physico:
        feats.physicochem()

    if args.kmer == None and args.pseaac == None and not args.physico:
        print("You must specify at least one feature type (-k, -p, -y).")

    else:
        # Weight if needed
        if args.weight:
            # Get distance threshold
            d = args.dist[0]
            # Get cluster type
            cluster_type = args.cluster_type[0]
            # Weight GTA
            pairwiseGTA = Weight.load(args.weight[0])
            GTA_weight = Weight(gta_profs, pairwiseGTA)
            GTA_clusters = GTA_weight.cluster(cluster_type, d)
            GTA_weight.weight(GTA_clusters)
            # Weight Virus
            pairwiseViral = Weight.load(args.weight[1])
            virus_weight = Weight(viral_profs, pairwiseViral)
            virus_clusters = virus_weight.cluster(cluster_type, d)
            virus_weight.weight(virus_clusters)

        # Create SVM
        c = args.c[0]
        kernel = args.kernel[0]
        kernel_var = float(args.kernel[1])

        svm = SVM(gta_profs, viral_profs, c, kernel, kernel_var)
Ejemplo n.º 6
0
    def xval(self,
             nfold=5,
             nrep=10,
             pairwiseGTA=None,
             pairwiseViral=None,
             cluster_type='farthest',
             d=0.03):
        """n-fold cross validation of
			the test set. 
			Input:
				n (int): number of folds for xval
			Returns:
				(fpr, fnr): false positive and false 
					negative rates from the n xvals
		"""
        # keep track of label classification
        score0 = 0.0
        score1 = 0.0
        gta_as_phage = []
        phage_as_gta = []

        # repeat xval results nrep times
        for i in range(nrep):
            if not mini:
                sys.stdout.flush()
                sys.stdout.write("Starting rep: %d\r" % (i + 1))
            # randomly sort profiles
            random.shuffle(self.profiles)
            # split into folds
            split = [self.profiles[i::nfold] for i in range(nfold)]
            # cross val
            for j in range(nfold):
                # Build train and test sets
                train_fold = np.array([
                    x for sublist in (split[:j] + split[j + 1:])
                    for x in sublist
                ])
                test_fold = split[j]
                trainX = np.array([x.features for x in train_fold])
                testX = np.array([x.features for x in test_fold])
                trainY = np.array([
                    -1.0 if y.label == self.label0 else 1.0 for y in train_fold
                ])
                testY = np.array([
                    -1.0 if y.label == self.label0 else 1.0 for y in test_fold
                ])
                testNames = np.array([x.org_name for x in test_fold])
                # randomize labels
                # random.shuffle(trainY)
                # Get training set weights
                if pairwiseGTA:
                    # Reweight based on training set
                    # GTA
                    GTA_weight = Weight(
                        [x for x in train_fold if x.label == self.label0],
                        pairwiseGTA)
                    GTA_clusters = GTA_weight.cluster(cluster_type, d)
                    GTA_weight.weight(GTA_clusters)
                    # Virus
                    virus_weight = Weight(
                        [x for x in train_fold if x.label != self.label0],
                        pairwiseViral)
                    virus_clusters = virus_weight.cluster(cluster_type, d)
                    virus_weight.weight(virus_clusters)
                    # Grab updated weights
                    weights = np.array([x.weight for x in train_fold])
                else:
                    weights = np.array([1 for x in train_fold])
                # evaluate results
                predictor = SVMTrain(self.kernel,
                                     self.c).train(trainX, trainY, weights)
                for r in range(len(testX)):
                    # Positive product is correct classification
                    if predictor.predict(testX[r]) * testY[r] > 0:
                        # Update label0 if negative, label1 otherwise
                        if testY[r] < 0:
                            score0 += 1
                        else:
                            score1 += 1
                    else:  # predicted incorrectly
                        if testY[r] > 0:  #virus as GTA
                            phage_as_gta.append(testNames[r])
                        else:  #gta as virus
                            gta_as_phage.append(testNames[r])

        if not mini:
            print("\nPhages (%d) misclassified over %d reps: %s" %
                  (len(phage_as_gta), nrep, phage_as_gta))
            print("\nGTA (%d) misclassified over %d reps: %s\n" %
                  (len(gta_as_phage), nrep, gta_as_phage))

        return (score0 / nrep, score1 / nrep)
Ejemplo n.º 7
0
"""计算不同因素的权重"""

from Weight import Weight
import os


def dir_name(dirname):
    for root, dirs, files in os.walk(dirname):
        models = [file for file in files if 'model' == file[-5:]]
        return models


if __name__ == '__main__':
    d_name = os.path.join(os.getcwd(), 'Main')
    models = dir_name(d_name)

    TI = Weight('news.csv', 'positive.txt', 'negative.txt', models)
    TI.run()
Ejemplo n.º 8
0
class TestWeight:
    @pytest.mark.regression
    @pytest.mark.parametrize(
        "weight1, weight2, expected_result",
        [(Weight(10, WeightUnit.G), Weight(10, WeightUnit.G), 20),
         (Weight(1, WeightUnit.KG), Weight(10, WeightUnit.G), 1.01),
         (Weight(10, WeightUnit.KG), Weight(10, WeightUnit.LB), 14.53592),
         (10, Weight(10, WeightUnit.LB), 10.022046244201839)])
    def test_adding(self, weight1: Weight, weight2: Weight,
                    expected_result: int):
        assert weight1 + weight2 == expected_result
        weight1 += weight2
        if type(weight1) == Weight:
            assert weight1.weight == expected_result

        else:
            assert weight1 == expected_result

    @pytest.mark.regression
    @pytest.mark.parametrize(
        "weight1, weight2, expected_result",
        [(Weight(10, WeightUnit.G), Weight(10, WeightUnit.G), 0),
         (Weight(1, WeightUnit.KG), Weight(10, WeightUnit.G), 0.99),
         (Weight(10, WeightUnit.KG), Weight(10, WeightUnit.LB), 5.46408),
         (10, Weight(10, WeightUnit.LB), 9.977953755798163)])
    def test_subtracting(self, weight1: Weight, weight2: Weight,
                         expected_result: int):
        assert weight1 - weight2 == expected_result
        weight1 -= weight2
        if type(weight1) == Weight:
            assert weight1.weight == expected_result

        else:
            assert weight1 == expected_result
Ejemplo n.º 9
0
Created on 11-Oct-2014

@author: ghantasa
'''

from Temperature import Temperature
from Distance import Distance
from Memory import Memory
from Weight import Weight

if __name__ == '__main__':
    while (True):
        choice = input('Available conversions ... \n' + '1. Temperature\n' +
                       '2. Distance\n' + '3. Memory\n' + '4. Weight\n' +
                       '5. Exit\n' + 'Please enter your choice ... ')
        if choice == '1':
            t = Temperature()
            t.convert()
        elif choice == '2':
            d = Distance()
            d.convert()
        elif choice == '3':
            m = Memory()
            m.convert()
        elif choice == '4':
            w = Weight()
            w.convert()
        else:
            print('Exiting now ... Bye!')
            break
Ejemplo n.º 10
0
def Plots(epochs, AUC_time_plot = 0, ROC_plot = 0, Lift_plot_test_NN = 0, Lift_plot_train_NN = 0, GD_plot = 0, MB_GD_plot = 0, Stoch_GD_plot = 0,
          Newton_plot = 0, Scatter_GD_plot = 0):

    if (ROC_plot == 1 or AUC_time_plot == 1):
        GRAD_start_time = time.time()
        np.random.seed(seed)
        beta_init = np.random.randn(X_train.shape[1],1)
        w = Weight(X_train,y_train,beta_init,6.892612104349695e-05, epochs)
        final_betas_grad,cost = w.train(w.gradient_descent)
        prob_grad, y_pred_grad = classification(X_test, final_betas_grad, y_test)[0:2]
        false_pos_grad, true_pos_grad = roc_curve(y_test, prob_grad)[0:2]
        AUC_GRAD = auc(false_pos_grad, true_pos_grad)
        print("Area under curve gradient: ", AUC_GRAD)
        GRAD_time = time.time() - GRAD_start_time

        SGD_start_time = time.time()
        np.random.seed(seed)
        beta_init = np.random.randn(X_train.shape[1], 1)
        w2 = Weight(X_train, y_train, beta_init, 0.0007924828983539169, epochs)
        final_betas_ST, _ = w2.train(w2.stochastic_gradient_descent)
        prob_ST, y_pred_ST = classification(X_test, final_betas_ST, y_test)[0:2]  ### HERE
        false_pos_ST, true_pos_ST = roc_curve(y_test, prob_ST)[0:2]
        AUC_SGD = auc(false_pos_ST, true_pos_ST)
        print("Area under curve ST: ", AUC_SGD)
        SGD_time = time.time() - SGD_start_time

        """np.random.seed(seed)
        beta_init = np.random.randn(X_train.shape[1],1)
        w3 = Weight(X_train,y_train,beta_init,0.001, 20)
        final_betas_Newton,_ = w3.train(w3.newtons_method)
        prob_Newton, y_pred_Newton = classification(X_train,final_betas_Newton, y_test)[0:2]
        false_pos_Newton, true_pos_Newton = roc_curve(y_test, prob_Newton)[0:2]
        print("Area under curve Newton: ", auc(false_pos_Newton, true_pos_Newton))"""

        AUC_MB5 = 0
        MB5_time = 0
        AUC_MB1000 = 0
        MB1000_time = 0
        AUC_MB6000 = 0
        MB6000_time = 0
        AUC_MB = 0
        false_pos_MB = 0
        true_pos_MB = 0
        if(AUC_time_plot != 0):
            AUC_MB5, MB5_time, _, _ = mini_batch_SGD(0.0038625017292608175, 5, epochs)
            AUC_MB1000, MB1000_time, _, _ = mini_batch_SGD(0.0009501185073181439, 1000, epochs)
            AUC_MB6000, MB6000_time, _ ,_ = mini_batch_SGD(0.0001999908383831537, 6000, epochs)
            return AUC_SGD, AUC_GRAD, AUC_MB5, AUC_MB1000, AUC_MB6000, SGD_time, GRAD_time, MB5_time, MB1000_time, MB6000_time
        else:
            AUC_MB, _,false_pos_MB, true_pos_MB  = mini_batch_SGD(0.0038625017292608175, 32, epochs)

        np.random.seed(seed)
        beta_init = np.random.randn(X_train.shape[1], 1)
        w4 = Weight(X_train, y_train, beta_init, 0.0007924828983539169, epochs)
        final_betas_ST_Skl,_ = w.train(w4.stochastic_gradient_descent_Skl)
        prob_ST_Skl, y_pred_ST_Skl = classification(X_test,final_betas_ST_Skl[0], y_test)[0:2]
        false_pos_ST_Skl, true_pos_ST_Skl = roc_curve(y_test, prob_ST_Skl)[0:2]
        print("Area under curve ST_skl: ", auc(false_pos_ST_Skl, true_pos_ST_Skl))

        epochs = 20
        batch_size = 25
        eta = 0.1
        lmbd = 0.01
        n_hidden_neurons = 41
        ####################
        # epochs = 20
        # batch_size = 26
        # eta = 3.14230708e+00
        # lmbd = 1.25472709e-02
        # n_hidden_neurons = 66

        np.random.seed(seed)
        n_categories = 1

        dnn = NN(X_train, y_train, eta=eta, lmbd=lmbd, epochs=epochs, batch_size=batch_size,
                    n_hidden_neurons=n_hidden_neurons, n_categories=n_categories,
                    cost_grad = 'crossentropy', activation = 'sigmoid', activation_out='sigmoid')
        dnn.train_and_validate()

        y_predict = dnn.predict_probabilities(X_test)

        false_pos_NN, true_pos_NN = roc_curve(y_test, y_predict)[0:2]
        print("AUC score NN: ", auc(false_pos_NN, true_pos_NN))

        plt.plot([0, 1], [0, 1], "k--")
        plt.plot(false_pos_grad, true_pos_grad,label="Gradient")
        plt.plot(false_pos_ST, true_pos_ST, label="Stoch")
        plt.plot(false_pos_ST_Skl, true_pos_ST_Skl, label="Stoch_Skl")
        plt.plot(false_pos_MB, true_pos_MB, label="Mini")
        # plt.plot(false_pos_Newton, true_pos_Newton, label="Newton")
        plt.plot(false_pos_NN, true_pos_NN, label='NeuralNetwork')
        plt.legend()
        plt.xlabel("False Positive rate")
        plt.ylabel("True Positive rate")
        plt.title("ROC curve")
        plt.show()

    """Creates cumulative gain charts/lift plots for Neural network. The two optimal parameters sets from tuning are listed below"""
    if (Lift_plot_test_NN == 1):

        np.random.seed(seed)

        # epochs = 20
        # batch_size = 26
        # eta = 3.14230708e+00
        # lmbd = 1.25472709e-02
        # n_hidden_neurons = 66
        epochs = 20
        batch_size = 25
        eta = 0.1
        lmbd = 0.01
        n_hidden_neurons = 41

        n_categories = 1

        dnn = NN(X_train, y_train, eta=eta, lmbd=lmbd, epochs=epochs, batch_size=batch_size,
                 n_hidden_neurons=n_hidden_neurons, n_categories=n_categories,
                 cost_grad='crossentropy', activation='sigmoid', activation_out='sigmoid')
        dnn.train_and_validate()

        y_predict_proba = dnn.predict_probabilities(X_test)
        y_predict_proba_tuple = np.concatenate((1 - y_predict_proba, y_predict_proba), axis=1)

        pos_true = y_test.sum()
        pos_true_perc = pos_true / len(y_test)

        x = np.linspace(0, 1, len(y_test))
        m = 1 / pos_true_perc

        best_line = np.zeros((len(x)))
        for i in range(len(x)):
            best_line[i] = m * x[i]
            if (x[i] > pos_true_perc):
                best_line[i] = 1

        x_, y_ = skplt.helpers.cumulative_gain_curve(y_test, y_predict_proba_tuple[:, 1])

        Score = (np.trapz(y_, x=x_) - 0.5) / (np.trapz(best_line, dx=(1 / len(y_predict_proba))) - 0.5)
        print('Area ratio score(test)', Score)  # The score  Area ratio = 0.49129354889528054 Neural Network test against predicted
        perc = np.linspace(0, 100, len(y_test))
        plt.plot(x_*100, y_*100)
        plt.plot(perc, best_line*100)
        plt.plot(perc, perc, "k--")

        plt.xlabel("Percentage of clients")
        plt.ylabel("Cumulative % of defaults")
        plt.title("Cumulative Gain Chart for Test Data")
        plt.show()

        """Let's you insert a threshold and classify"""
        _, y_predict, y_predict_tot = classification(y_prob_input=y_predict_proba, threshold=0.5)
        pos = y_predict.sum()
        neg = len(y_predict) - pos
        pos_perc = (pos / len(y_predict))
        neg_perc = (neg / len(y_predict))
        print("default: ", pos_perc)
        print("Non-default: ", neg_perc)

    if (Lift_plot_train_NN == 1):

        np.random.seed(seed)

        # epochs = 20
        # batch_size = 26
        # eta = 3.14230708e+00
        # lmbd = 1.25472709e-02
        # n_hidden_neurons = 66
        epochs = 20
        batch_size = 25
        eta = 0.1
        lmbd = 0.01
        n_hidden_neurons = 41
        n_categories = 1

        dnn = NN(X_train, y_train, eta=eta, lmbd=lmbd, epochs=epochs, batch_size=batch_size,
                 n_hidden_neurons=n_hidden_neurons, n_categories=n_categories,
                 cost_grad='crossentropy', activation='sigmoid', activation_out='sigmoid')
        dnn.train_and_validate()

        y_predict_proba = dnn.predict_probabilities(X_train)
        y_predict_proba_tuple = np.concatenate((1 - y_predict_proba, y_predict_proba), axis=1)

        pos_true = y_train.sum()
        pos_true_perc = pos_true / len(y_train)

        x = np.linspace(0, 1, len(y_train))
        m = 1 / pos_true_perc

        best_line = np.zeros((len(x)))
        for i in range(len(x)):
            best_line[i] = m * x[i]
            if (x[i] > pos_true_perc):
                best_line[i] = 1

        x_, y_ = skplt.helpers.cumulative_gain_curve(y_train, y_predict_proba_tuple[:, 1])

        Score = (np.trapz(y_, x=x_) - 0.5) / (np.trapz(best_line, dx=(1 / len(y_predict_proba))) - 0.5)
        print('Area ratio score(train)', Score)
        perc = np.linspace(0, 100, len(y_train))
        plt.plot(x_ * 100, y_ * 100)
        plt.plot(perc, best_line * 100)
        plt.plot(perc, perc, "k--")

        plt.xlabel("Percentage of clients")
        plt.ylabel("Cumulative % of defaults")
        plt.title("Cumulative Gain Chart for Train Data")
        plt.show()

        """Let's you insert a threshold and classify"""
        _, y_predict, y_predict_tot = classification(y_prob_input=y_predict_proba, threshold=0.5)
        pos = y_predict.sum()
        neg = len(y_predict) - pos
        pos_perc = (pos / len(y_predict))
        neg_perc = (neg / len(y_predict))
        print("default: ", pos_perc)
        print("Non-default: ", neg_perc)

    beta_init = np.random.randn(X_train.shape[1], 1)
    w = Weight(X_train, y_train, beta_init, 0.0007924828983539169, epochs)

    if (GD_plot == 1):
        _, cost_all = w.train(w.gradient_descent)
        epoch = np.arange(len(cost_all))

        plt.plot(epoch, cost_all)
        plt.show()

    if (MB_GD_plot == 1):
        _, cost_all = w.train(w.mini_batch_gradient_descent)
        batch = np.arange(len(cost_all))

        plt.plot(batch, cost_all)
        plt.show()

    if (Stoch_GD_plot == 1):
        _, cost_all = w.train(w.stochastic_gradient_descent)
        batch = np.arange(len(cost_all))

        plt.plot(batch, cost_all)
        plt.show()

    if (Newton_plot == 1):
        _, cost_all = w.train(w.newtons_method)
        epochs = np.arange(len(cost_all))

        plt.plot(epochs, cost_all)
        plt.show()

    if (Scatter_GD_plot == 1):
        final_betas, _ = w.train(w.gradient_descent)
        prob_train = classification(X_train, final_betas)[0]
        x_sigmoid = np.dot(X_train, final_betas)
        plt.scatter(x_sigmoid, prob_train)
        plt.show()