Exemple #1
0
    def learn_method(self):

        for i in range(5):
            l = [0, 0, 0, 0]
            score = 0.0
            for v in range(4):
                l[v] = random.randint(1, 10)

            cal = calculator.cal_map(l[0], l[1], l[2], l[3])
            score = cal.get("map") * cal.get("ndcg")

            w = Weight(l, score)
            list.append(w)
        list.sort()

        #we use evolutionary algorithms for finding best weights
        #to ways to generate new weights, that is decided randomly

        #in first way, we take 2 array and combine weights
        # based on a nrandom alpha number
        for n in range(1000):

            choice = random.randint(0, 1)
            # w1 = Weight()
            l1 = [0, 0, 0, 0]
            s = 0.0

            if choice == 0:
                alpha = random.random()
                c1 = random.randint(0, 3)
                c2 = random.randint(0, 3)

                for i in range(4):
                    l1[i] = (list[c1].weights[i] *
                             alpha) + (list[c2].weights[i] * (1 - alpha))

            else:
                alpha = random.randint(1, 3)
                c1 = random.randint(0, 3)
                c2 = random.randint(0, 3)

                for i in range(0, alpha):
                    l1[i] = list[c1].weights[i]
                for i in range(alpha, 4):
                    l1[i] = list[c2].weights[i]

                #here we call ealsticSearch and calculate score, forexample imagine score is 9
                #w1.score = f(l1[0], l1[1], l1[2], l1[3])
                #w1.score = f(l1)

            cal = calculator.cal_map(l1[0], l1[1], l1[2], l1[3])
            s = cal.get("map") * cal.get("ndcg")
            w1 = Weight(l1, s)
            list.append(w1)
            list.sort()
            list.pop()

        print("done")
Exemple #2
0
def mini_batch_SGD(eta, batch_size, epochs):
    MB_start_time = time.time()
    np.random.seed(seed)
    beta_init = np.random.randn(X_train.shape[1], 1)
    w1 = Weight(X_train, y_train, beta_init, eta, epochs, batch_size=batch_size)
    final_betas_MB, _ = w1.train(w1.mini_batch_gradient_descent)
    prob_MB, y_pred_MB = classification(X_test, final_betas_MB, y_test)[0:2]
    false_pos_MB, true_pos_MB = roc_curve(y_test, prob_MB)[0:2]
    AUC_MB = auc(false_pos_MB, true_pos_MB)
    print("Area under curve MB%s: " %batch_size, AUC_MB)
    MB_time = time.time() - MB_start_time
    return AUC_MB, MB_time, false_pos_MB, true_pos_MB
Exemple #3
0
def lmmd(source,
         target,
         s_label,
         t_label,
         kernel_mul=2.0,
         kernel_num=5,
         fix_sigma=None,
         num_classes=10):
    batch_size = source.size()[0]
    weight_ss, weight_tt, weight_st = Weight.cal_weight(s_label,
                                                        t_label,
                                                        type='visual',
                                                        class_nums=num_classes)
    weight_ss = torch.from_numpy(weight_ss).cuda()
    weight_tt = torch.from_numpy(weight_tt).cuda()
    weight_st = torch.from_numpy(weight_st).cuda()

    kernels = guassian_kernel(source,
                              target,
                              kernel_mul=kernel_mul,
                              kernel_num=kernel_num,
                              fix_sigma=fix_sigma)
    loss = torch.Tensor([0]).cuda()
    #print(kernels)
    if torch.sum(torch.isnan(sum(kernels))):
        return loss
    SS = kernels[:batch_size, :batch_size]
    TT = kernels[batch_size:, batch_size:]
    ST = kernels[:batch_size, batch_size:]

    loss += torch.sum(weight_ss * SS + weight_tt * TT - 2 * weight_st * ST)
    return loss
Exemple #4
0
    def conditional(self,
                    source,
                    target,
                    s_label,
                    t_label,
                    kernel_mul=2.0,
                    kernel_num=5,
                    fix_sigma=None):
        batch_size = source.size()[0]
        weight_ss, weight_tt, weight_st = Weight.cal_weight(s_label,
                                                            t_label,
                                                            type='visual')
        weight_ss = torch.from_numpy(weight_ss).cuda()
        weight_tt = torch.from_numpy(weight_tt).cuda()
        weight_st = torch.from_numpy(weight_st).cuda()

        kernels = self.guassian_kernel(source,
                                       target,
                                       kernel_mul=kernel_mul,
                                       kernel_num=kernel_num,
                                       fix_sigma=fix_sigma)
        loss = torch.Tensor([0]).cuda()
        if torch.sum(torch.isnan(sum(kernels))):
            return loss
        SS = kernels[:batch_size, :batch_size]
        TT = kernels[batch_size:, batch_size:]
        ST = kernels[:batch_size, batch_size:]

        loss += torch.sum(weight_ss * SS + weight_tt * TT - 2 * weight_st * ST)
        return loss
Exemple #5
0
def Best_Parameters(epochs, batch_size, method, etamin, etamax, step, y_val, X_val):
    beta_init = np.random.randn(X_train.shape[1], 1)
    eta_vals = np.logspace(etamin, etamax, step)
    auc_array = np.zeros((2, step))
    for i, eta in enumerate(eta_vals):
        np.random.seed(seed)
        print("Iteration: ",i)
        print("eta: ", eta)
        w = Weight(X_train, y_train, beta_init, eta, epochs, batch_size=batch_size)
        method_ = getattr(w, method)
        final_betas, _ = w.train(method_)
        prob = sigmoid(X_val, final_betas)
        auc_array[0][i] = roc_auc_score(y_val, prob)

        auc_array[1][i] = eta
    max_auc = np.max(auc_array[0])
    best_eta = auc_array[1][np.argmax(auc_array[0])]

    return max_auc, best_eta
	def get(self):
		statusCode = 200
		emailAddress = str(self.request.get('emailAddress'))
		all_weight = []
		for weight in Weight.query(Weight.emailAddress == emailAddress):
			all_weight.append(weight)
		all_weight = sorted(all_weight,key=lambda r: r.dateTime,reverse=True)
		return_list = []
		for weight in all_weight:
			return_list.append({'weight': float(weight.weight), 'date': str(weight.dateTime)})
		self.response.write(json.dumps({'statusCode': statusCode, 'weights': return_list}))
	def post(self):
		statusCode = 202
		emailAddress = str(self.request.get('emailAddress'))
		content = json.loads(self.request.body)
		logging.info(content)
		weight = float(content['weight'])
		date = ''
		try:
			date = str(content['date'])
		except KeyError:
			date = ''
		newWeight = ''
		if date == '':
			newWeight = Weight(weight=weight, emailAddress=emailAddress)
		else:
			newWeight = Weight(weight=weight, emailAddress=emailAddress, dateTime=datetime.datetime.strptime(date, "%Y-%m-%d %H:%M:%S.%f"))
		if newWeight:
			newWeight.put()
			statusCode = 200

		self.response.write(json.dumps({'statusCode': statusCode}))
    def Build_Weight_Database(  self,
                                database,
                                version,
                                num_of_kpts,
                                cutoff,
                                force_update = False,
                                K = 10,
                                depth = 4,
                                nleaves = 10000):
                

        
        updated = self.Train_Database_Sign(database,version, num_of_kpts, force_update, K, depth, nleaves)
        
        #load signature
        sign_dir = os.path.join(self.SIGN_DIR, 'db_version_' + str(version))
        sign = Signature()
        sign.load_sign(sign_dir, self.SIGN_FILE+str(num_of_kpts))
        
        print 'Siganture Loaded'
        
        wt_file = os.path.join(sign_dir, self.WEIGHT_FILE+str(num_of_kpts)+'_'+str(cutoff))
        wts_file = os.path.join(sign_dir, self.WEIGHT_SIGN_FILE+str(num_of_kpts)+'_'+str(cutoff))
        
        if updated or (not os.path.isfile(wt_file) and not os.path.isfile(wts_file)):
            wt = Weight(cutoff)
            wt.get_weight(sign.sign_database)        
            wt.weight_train_database(sign.sign_database)
        
            wt.save_weights(sign_dir, self.WEIGHT_FILE+str(num_of_kpts)+'_'+str(cutoff))
            wt.save_weighted_sign(sign_dir, self.WEIGHT_SIGN_FILE+str(num_of_kpts)+'_'+str(cutoff))
            
            print ' '
            print 'Wegihted Sign Generated'
        
        else:
            print 'Weighted Sign Has Already Been Generated'
Exemple #9
0
        feats.pseaac(lam=int(args.pseaac), weight=PSE_WEIGHT)
    if args.physico:
        feats.physicochem()

    if args.kmer == None and args.pseaac == None and not args.physico:
        print("You must specify at least one feature type (-k, -p, -y).")

    else:
        # Weight if needed
        if args.weight:
            # Get distance threshold
            d = args.dist[0]
            # Get cluster type
            cluster_type = args.cluster_type[0]
            # Weight GTA
            pairwiseGTA = Weight.load(args.weight[0])
            GTA_weight = Weight(gta_profs, pairwiseGTA)
            GTA_clusters = GTA_weight.cluster(cluster_type, d)
            GTA_weight.weight(GTA_clusters)
            # Weight Virus
            pairwiseViral = Weight.load(args.weight[1])
            virus_weight = Weight(viral_profs, pairwiseViral)
            virus_clusters = virus_weight.cluster(cluster_type, d)
            virus_weight.weight(virus_clusters)

        # Create SVM
        c = args.c[0]
        kernel = args.kernel[0]
        kernel_var = float(args.kernel[1])

        svm = SVM(gta_profs, viral_profs, c, kernel, kernel_var)
Exemple #10
0
def Plots(epochs, AUC_time_plot = 0, ROC_plot = 0, Lift_plot_test_NN = 0, Lift_plot_train_NN = 0, GD_plot = 0, MB_GD_plot = 0, Stoch_GD_plot = 0,
          Newton_plot = 0, Scatter_GD_plot = 0):

    if (ROC_plot == 1 or AUC_time_plot == 1):
        GRAD_start_time = time.time()
        np.random.seed(seed)
        beta_init = np.random.randn(X_train.shape[1],1)
        w = Weight(X_train,y_train,beta_init,6.892612104349695e-05, epochs)
        final_betas_grad,cost = w.train(w.gradient_descent)
        prob_grad, y_pred_grad = classification(X_test, final_betas_grad, y_test)[0:2]
        false_pos_grad, true_pos_grad = roc_curve(y_test, prob_grad)[0:2]
        AUC_GRAD = auc(false_pos_grad, true_pos_grad)
        print("Area under curve gradient: ", AUC_GRAD)
        GRAD_time = time.time() - GRAD_start_time

        SGD_start_time = time.time()
        np.random.seed(seed)
        beta_init = np.random.randn(X_train.shape[1], 1)
        w2 = Weight(X_train, y_train, beta_init, 0.0007924828983539169, epochs)
        final_betas_ST, _ = w2.train(w2.stochastic_gradient_descent)
        prob_ST, y_pred_ST = classification(X_test, final_betas_ST, y_test)[0:2]  ### HERE
        false_pos_ST, true_pos_ST = roc_curve(y_test, prob_ST)[0:2]
        AUC_SGD = auc(false_pos_ST, true_pos_ST)
        print("Area under curve ST: ", AUC_SGD)
        SGD_time = time.time() - SGD_start_time

        """np.random.seed(seed)
        beta_init = np.random.randn(X_train.shape[1],1)
        w3 = Weight(X_train,y_train,beta_init,0.001, 20)
        final_betas_Newton,_ = w3.train(w3.newtons_method)
        prob_Newton, y_pred_Newton = classification(X_train,final_betas_Newton, y_test)[0:2]
        false_pos_Newton, true_pos_Newton = roc_curve(y_test, prob_Newton)[0:2]
        print("Area under curve Newton: ", auc(false_pos_Newton, true_pos_Newton))"""

        AUC_MB5 = 0
        MB5_time = 0
        AUC_MB1000 = 0
        MB1000_time = 0
        AUC_MB6000 = 0
        MB6000_time = 0
        AUC_MB = 0
        false_pos_MB = 0
        true_pos_MB = 0
        if(AUC_time_plot != 0):
            AUC_MB5, MB5_time, _, _ = mini_batch_SGD(0.0038625017292608175, 5, epochs)
            AUC_MB1000, MB1000_time, _, _ = mini_batch_SGD(0.0009501185073181439, 1000, epochs)
            AUC_MB6000, MB6000_time, _ ,_ = mini_batch_SGD(0.0001999908383831537, 6000, epochs)
            return AUC_SGD, AUC_GRAD, AUC_MB5, AUC_MB1000, AUC_MB6000, SGD_time, GRAD_time, MB5_time, MB1000_time, MB6000_time
        else:
            AUC_MB, _,false_pos_MB, true_pos_MB  = mini_batch_SGD(0.0038625017292608175, 32, epochs)

        np.random.seed(seed)
        beta_init = np.random.randn(X_train.shape[1], 1)
        w4 = Weight(X_train, y_train, beta_init, 0.0007924828983539169, epochs)
        final_betas_ST_Skl,_ = w.train(w4.stochastic_gradient_descent_Skl)
        prob_ST_Skl, y_pred_ST_Skl = classification(X_test,final_betas_ST_Skl[0], y_test)[0:2]
        false_pos_ST_Skl, true_pos_ST_Skl = roc_curve(y_test, prob_ST_Skl)[0:2]
        print("Area under curve ST_skl: ", auc(false_pos_ST_Skl, true_pos_ST_Skl))

        epochs = 20
        batch_size = 25
        eta = 0.1
        lmbd = 0.01
        n_hidden_neurons = 41
        ####################
        # epochs = 20
        # batch_size = 26
        # eta = 3.14230708e+00
        # lmbd = 1.25472709e-02
        # n_hidden_neurons = 66

        np.random.seed(seed)
        n_categories = 1

        dnn = NN(X_train, y_train, eta=eta, lmbd=lmbd, epochs=epochs, batch_size=batch_size,
                    n_hidden_neurons=n_hidden_neurons, n_categories=n_categories,
                    cost_grad = 'crossentropy', activation = 'sigmoid', activation_out='sigmoid')
        dnn.train_and_validate()

        y_predict = dnn.predict_probabilities(X_test)

        false_pos_NN, true_pos_NN = roc_curve(y_test, y_predict)[0:2]
        print("AUC score NN: ", auc(false_pos_NN, true_pos_NN))

        plt.plot([0, 1], [0, 1], "k--")
        plt.plot(false_pos_grad, true_pos_grad,label="Gradient")
        plt.plot(false_pos_ST, true_pos_ST, label="Stoch")
        plt.plot(false_pos_ST_Skl, true_pos_ST_Skl, label="Stoch_Skl")
        plt.plot(false_pos_MB, true_pos_MB, label="Mini")
        # plt.plot(false_pos_Newton, true_pos_Newton, label="Newton")
        plt.plot(false_pos_NN, true_pos_NN, label='NeuralNetwork')
        plt.legend()
        plt.xlabel("False Positive rate")
        plt.ylabel("True Positive rate")
        plt.title("ROC curve")
        plt.show()

    """Creates cumulative gain charts/lift plots for Neural network. The two optimal parameters sets from tuning are listed below"""
    if (Lift_plot_test_NN == 1):

        np.random.seed(seed)

        # epochs = 20
        # batch_size = 26
        # eta = 3.14230708e+00
        # lmbd = 1.25472709e-02
        # n_hidden_neurons = 66
        epochs = 20
        batch_size = 25
        eta = 0.1
        lmbd = 0.01
        n_hidden_neurons = 41

        n_categories = 1

        dnn = NN(X_train, y_train, eta=eta, lmbd=lmbd, epochs=epochs, batch_size=batch_size,
                 n_hidden_neurons=n_hidden_neurons, n_categories=n_categories,
                 cost_grad='crossentropy', activation='sigmoid', activation_out='sigmoid')
        dnn.train_and_validate()

        y_predict_proba = dnn.predict_probabilities(X_test)
        y_predict_proba_tuple = np.concatenate((1 - y_predict_proba, y_predict_proba), axis=1)

        pos_true = y_test.sum()
        pos_true_perc = pos_true / len(y_test)

        x = np.linspace(0, 1, len(y_test))
        m = 1 / pos_true_perc

        best_line = np.zeros((len(x)))
        for i in range(len(x)):
            best_line[i] = m * x[i]
            if (x[i] > pos_true_perc):
                best_line[i] = 1

        x_, y_ = skplt.helpers.cumulative_gain_curve(y_test, y_predict_proba_tuple[:, 1])

        Score = (np.trapz(y_, x=x_) - 0.5) / (np.trapz(best_line, dx=(1 / len(y_predict_proba))) - 0.5)
        print('Area ratio score(test)', Score)  # The score  Area ratio = 0.49129354889528054 Neural Network test against predicted
        perc = np.linspace(0, 100, len(y_test))
        plt.plot(x_*100, y_*100)
        plt.plot(perc, best_line*100)
        plt.plot(perc, perc, "k--")

        plt.xlabel("Percentage of clients")
        plt.ylabel("Cumulative % of defaults")
        plt.title("Cumulative Gain Chart for Test Data")
        plt.show()

        """Let's you insert a threshold and classify"""
        _, y_predict, y_predict_tot = classification(y_prob_input=y_predict_proba, threshold=0.5)
        pos = y_predict.sum()
        neg = len(y_predict) - pos
        pos_perc = (pos / len(y_predict))
        neg_perc = (neg / len(y_predict))
        print("default: ", pos_perc)
        print("Non-default: ", neg_perc)

    if (Lift_plot_train_NN == 1):

        np.random.seed(seed)

        # epochs = 20
        # batch_size = 26
        # eta = 3.14230708e+00
        # lmbd = 1.25472709e-02
        # n_hidden_neurons = 66
        epochs = 20
        batch_size = 25
        eta = 0.1
        lmbd = 0.01
        n_hidden_neurons = 41
        n_categories = 1

        dnn = NN(X_train, y_train, eta=eta, lmbd=lmbd, epochs=epochs, batch_size=batch_size,
                 n_hidden_neurons=n_hidden_neurons, n_categories=n_categories,
                 cost_grad='crossentropy', activation='sigmoid', activation_out='sigmoid')
        dnn.train_and_validate()

        y_predict_proba = dnn.predict_probabilities(X_train)
        y_predict_proba_tuple = np.concatenate((1 - y_predict_proba, y_predict_proba), axis=1)

        pos_true = y_train.sum()
        pos_true_perc = pos_true / len(y_train)

        x = np.linspace(0, 1, len(y_train))
        m = 1 / pos_true_perc

        best_line = np.zeros((len(x)))
        for i in range(len(x)):
            best_line[i] = m * x[i]
            if (x[i] > pos_true_perc):
                best_line[i] = 1

        x_, y_ = skplt.helpers.cumulative_gain_curve(y_train, y_predict_proba_tuple[:, 1])

        Score = (np.trapz(y_, x=x_) - 0.5) / (np.trapz(best_line, dx=(1 / len(y_predict_proba))) - 0.5)
        print('Area ratio score(train)', Score)
        perc = np.linspace(0, 100, len(y_train))
        plt.plot(x_ * 100, y_ * 100)
        plt.plot(perc, best_line * 100)
        plt.plot(perc, perc, "k--")

        plt.xlabel("Percentage of clients")
        plt.ylabel("Cumulative % of defaults")
        plt.title("Cumulative Gain Chart for Train Data")
        plt.show()

        """Let's you insert a threshold and classify"""
        _, y_predict, y_predict_tot = classification(y_prob_input=y_predict_proba, threshold=0.5)
        pos = y_predict.sum()
        neg = len(y_predict) - pos
        pos_perc = (pos / len(y_predict))
        neg_perc = (neg / len(y_predict))
        print("default: ", pos_perc)
        print("Non-default: ", neg_perc)

    beta_init = np.random.randn(X_train.shape[1], 1)
    w = Weight(X_train, y_train, beta_init, 0.0007924828983539169, epochs)

    if (GD_plot == 1):
        _, cost_all = w.train(w.gradient_descent)
        epoch = np.arange(len(cost_all))

        plt.plot(epoch, cost_all)
        plt.show()

    if (MB_GD_plot == 1):
        _, cost_all = w.train(w.mini_batch_gradient_descent)
        batch = np.arange(len(cost_all))

        plt.plot(batch, cost_all)
        plt.show()

    if (Stoch_GD_plot == 1):
        _, cost_all = w.train(w.stochastic_gradient_descent)
        batch = np.arange(len(cost_all))

        plt.plot(batch, cost_all)
        plt.show()

    if (Newton_plot == 1):
        _, cost_all = w.train(w.newtons_method)
        epochs = np.arange(len(cost_all))

        plt.plot(epochs, cost_all)
        plt.show()

    if (Scatter_GD_plot == 1):
        final_betas, _ = w.train(w.gradient_descent)
        prob_train = classification(X_train, final_betas)[0]
        x_sigmoid = np.dot(X_train, final_betas)
        plt.scatter(x_sigmoid, prob_train)
        plt.show()
Exemple #11
0
    tmpNeuron = Neuron("INPUT", "I" + str(i))
    neuronList.append(tmpNeuron)
for i in range(0, numberOfHiddenNeurons):
    tmpNeuron = Neuron("HIDDEN", "H" + str(i))
    neuronList.append(tmpNeuron)
for i in range(0, numberOfOutputNeurons):
    tmpNeuron = Neuron("OUTPUT", "O" + str(i))
    neuronList.append(tmpNeuron)
# Now, generate all of the weights.
# Start from the hidden layer.
for i in range(numberOfInputNeurons,
               numberOfInputNeurons + numberOfHiddenNeurons):
    outputNeuron = neuronList[i]
    for j in range(0, numberOfInputNeurons):
        inputNeuron = neuronList[j]
        tmpWeight = Weight("W" + str(numberofWeights), random.uniform(0, 1),
                           inputNeuron, outputNeuron)
        inputNeuron.UpdateOutputWeights(tmpWeight)
        outputNeuron.UpdateInputWeights(tmpWeight)
        weightList.append(tmpWeight)
        numberofWeights += 1
for i in range(
        numberOfInputNeurons + numberOfHiddenNeurons,
        numberOfInputNeurons + numberOfHiddenNeurons + numberOfOutputNeurons):
    outputNeuron = neuronList[i]
    for j in range(numberOfInputNeurons,
                   numberOfInputNeurons + numberOfHiddenNeurons):
        inputNeuron = neuronList[j]
        tmpWeight = Weight("W" + str(numberofWeights), random.uniform(0, 1),
                           inputNeuron, outputNeuron)
        inputNeuron.UpdateOutputWeights(tmpWeight)
        outputNeuron.UpdateInputWeights(tmpWeight)
from Temperature import Temperature
from Distance import Distance
from Memory import Memory
from Weight import Weight

if __name__ == '__main__':
    while (True):
        choice = input ('Available conversions ... \n' +
                    '1. Temperature\n' +
                    '2. Distance\n' +
                    '3. Memory\n' +
                    '4. Weight\n' +
                    '5. Exit\n' +
                    'Please enter your choice ... ' )
        if choice == '1' :
            t = Temperature()
            t.convert()
        elif choice == '2' :
            d = Distance()
            d.convert()
        elif choice == '3':
            m = Memory()
            m.convert()
        elif choice == '4' :
            w = Weight()
            w.convert()
        else:
            print('Exiting now ... Bye!')
            break
    
Created on 11-Oct-2014

@author: ghantasa
'''

from Temperature import Temperature
from Distance import Distance
from Memory import Memory
from Weight import Weight

if __name__ == '__main__':
    while (True):
        choice = input('Available conversions ... \n' + '1. Temperature\n' +
                       '2. Distance\n' + '3. Memory\n' + '4. Weight\n' +
                       '5. Exit\n' + 'Please enter your choice ... ')
        if choice == '1':
            t = Temperature()
            t.convert()
        elif choice == '2':
            d = Distance()
            d.convert()
        elif choice == '3':
            m = Memory()
            m.convert()
        elif choice == '4':
            w = Weight()
            w.convert()
        else:
            print('Exiting now ... Bye!')
            break
Exemple #14
0
class TestWeight:
    @pytest.mark.regression
    @pytest.mark.parametrize(
        "weight1, weight2, expected_result",
        [(Weight(10, WeightUnit.G), Weight(10, WeightUnit.G), 20),
         (Weight(1, WeightUnit.KG), Weight(10, WeightUnit.G), 1.01),
         (Weight(10, WeightUnit.KG), Weight(10, WeightUnit.LB), 14.53592),
         (10, Weight(10, WeightUnit.LB), 10.022046244201839)])
    def test_adding(self, weight1: Weight, weight2: Weight,
                    expected_result: int):
        assert weight1 + weight2 == expected_result
        weight1 += weight2
        if type(weight1) == Weight:
            assert weight1.weight == expected_result

        else:
            assert weight1 == expected_result

    @pytest.mark.regression
    @pytest.mark.parametrize(
        "weight1, weight2, expected_result",
        [(Weight(10, WeightUnit.G), Weight(10, WeightUnit.G), 0),
         (Weight(1, WeightUnit.KG), Weight(10, WeightUnit.G), 0.99),
         (Weight(10, WeightUnit.KG), Weight(10, WeightUnit.LB), 5.46408),
         (10, Weight(10, WeightUnit.LB), 9.977953755798163)])
    def test_subtracting(self, weight1: Weight, weight2: Weight,
                         expected_result: int):
        assert weight1 - weight2 == expected_result
        weight1 -= weight2
        if type(weight1) == Weight:
            assert weight1.weight == expected_result

        else:
            assert weight1 == expected_result
    def Classifier(self, 
                   database,
                   version,
                   num_of_kpts,
                   cutoff,
                   top = 5,
                   K = 10,
                   depth=4):
        
        num_in_set = database[0]
        num_of_sets = database[1]
        test_database = database[3]  
        
        total = num_in_set * num_of_sets
        num_in_test_set = len(test_database) / num_of_sets 
        
        classify_score =np.zeros(num_of_sets)
        class_name = [[] for i in range(num_of_sets)]
        #load weight
        wt = Weight(cutoff)
        sign_dir = os.path.join(self.SIGN_DIR, 'db_version_' + str(version))
        wt.load_weights(sign_dir, self.WEIGHT_FILE+str(num_of_kpts)+'_'+str(cutoff))
        wt.load_weighted_sign(sign_dir, self.WEIGHT_SIGN_FILE+str(num_of_kpts)+'_'+str(cutoff))
    
        #Load tree
        tree_dir = os.path.join(self.TREE_DIR, 'db_version_' + str(version))
        tr = vl._vlfeat.VlHIKMTree(0, 0)
        tr.load(os.path.join(tree_dir, str(num_of_kpts) + self.TREE_FILE))    

        for k in test_database:
            #randomly get image from the img_dir
            img = Image.open(k[2]).convert('L')
            img = self.StandalizeImage(img, 480)
            img_data = np.asarray(img, dtype=float)
            
            #generate desc, sign and weighted sign
            kp = Keypoint()
            #kp.load_keypoint(self.KEYPOINT_DIR, self.KEYPOINT_FILE+str(num_of_kpts))
            kp.generate_keypoint(num_of_kpts, img.size[0], img.size[1], self.SIGMA)
            desc = Descriptor()
            desc.generate_desc(img_data, kp.kpt)
            #very important !! convert desc to float type
            #desc_f = np.array(desc.desc, dtype=float)
            
            sign = Signature()
            s = sign.generate_sign(tr,desc.desc, K, depth)
            weighted_sign = wt.weight_sign(s)
            
            #vote
            d=np.empty(total)
            for i in range(0, total):   
                d[i] = self.dist(wt.weighted_sign[i,:], weighted_sign)
            
            perm = np.argsort(d)
            vote_for = np.floor((perm[0:top])/num_in_set)+1                      
            votes = vl.vl_binsum(np.zeros(num_of_sets), np.ones(top), vote_for)
            
            #print votes
            best = np.argmax(votes)
            
            if best == k[1]:
                classify_score[k[1]] += 1
                
            print '=>'+str(k[0])
            
            class_name[k[1]] = k[3]
        
        classify_score = classify_score / num_in_test_set
        
        return zip(class_name, classify_score.tolist())
Exemple #16
0
	def xval(self, nfold=5, nrep=10, pairwiseGTA=None, pairwiseViral=None, cluster_type='farthest', d=0.03):
		"""n-fold cross validation of
			the test set. 
			Input:
				n (int): number of folds for xval
			Returns:
				(fpr, fnr): false positive and false 
					negative rates from the n xvals
		"""
		# keep track of label classification
		score0 = 0.0
		score1 = 0.0
		gta_as_phage = []
		phage_as_gta = []

		# repeat xval results nrep times
		for i in range(nrep):
			if not mini:
				sys.stdout.flush()
				sys.stdout.write("Starting rep: %d\r" % (i+1))
			# randomly sort profiles
			random.shuffle(self.profiles)
			# split into folds
			split = [self.profiles[i::nfold] for i in range(nfold)]
			# cross val
			for j in range(nfold):
				# Build train and test sets
				train_fold = np.array([x for sublist in (split[:j]+split[j+1:]) for x in sublist])
				test_fold = split[j]
				trainX = np.array([x.features for x in train_fold])
				testX = np.array([x.features for x in test_fold])
				trainY = np.array([-1.0 if y.label == self.label0 else 1.0 for y in train_fold])
				testY = np.array([-1.0 if y.label == self.label0 else 1.0 for y in test_fold])
				testNames = np.array([x.org_name for x in test_fold])
				# randomize labels
				# random.shuffle(trainY)
				# Get training set weights
				if pairwiseGTA:
					# Reweight based on training set
					# GTA
					GTA_weight = Weight([x for x in train_fold if x.label == self.label0], pairwiseGTA)
					GTA_clusters = GTA_weight.cluster(cluster_type, d)
					GTA_weight.weight(GTA_clusters)
					# Virus
					virus_weight = Weight([x for x in train_fold if x.label != self.label0], pairwiseViral)
					virus_clusters = virus_weight.cluster(cluster_type, d)
					virus_weight.weight(virus_clusters)
					# Grab updated weights
					weights = np.array([x.weight for x in train_fold])
				else:
					weights = np.array([1 for x in train_fold])
				# evaluate results
				predictor = SVMTrain(self.kernel, self.c).train(trainX, trainY, weights)
				for r in range(len(testX)):
					# Positive product is correct classification
					if predictor.predict(testX[r]) * testY[r] > 0:
						# Update label0 if negative, label1 otherwise
						if testY[r] < 0:
							score0 += 1
						else:
							score1 += 1
					else: # predicted incorrectly
						if testY[r] > 0: #virus as GTA
							phage_as_gta.append(testNames[r])
						else: #gta as virus
							gta_as_phage.append(testNames[r])

		if not mini:
			print("\nPhages (%d) misclassified over %d reps: %s" % (len(phage_as_gta), nrep, phage_as_gta))
			print("\nGTA (%d) misclassified over %d reps: %s\n" % (len(gta_as_phage), nrep, gta_as_phage))

		return (score0/nrep, score1/nrep)
Exemple #17
0
		feats.pseaac(lam=int(args.pseaac), weight=PSE_WEIGHT)
	if args.physico:
		feats.physicochem()

	if args.kmer == None and args.pseaac == None and not args.physico:
		print("You must specify at least one feature type (-k, -p, -y).")

	else:
		# Weight if needed
		if args.weight:
			# Get distance threshold
			d = args.dist[0]
			# Get cluster type
			cluster_type = args.cluster_type[0]
			# Weight GTA
			pairwiseGTA = Weight.load(args.weight[0])
			GTA_weight = Weight(gta_profs, pairwiseGTA)
			GTA_clusters = GTA_weight.cluster(cluster_type, d)
			GTA_weight.weight(GTA_clusters)
			# Weight Virus
			pairwiseViral = Weight.load(args.weight[1])
			virus_weight = Weight(viral_profs, pairwiseViral)
			virus_clusters = virus_weight.cluster(cluster_type, d)
			virus_weight.weight(virus_clusters)

		# Create SVM
		c = args.c[0]
		kernel = args.kernel[0]
		kernel_var = float(args.kernel[1])

		svm = SVM(gta_profs, viral_profs, c, kernel, kernel_var)
Exemple #18
0
    def xval(self,
             nfold=5,
             nrep=10,
             pairwiseGTA=None,
             pairwiseViral=None,
             cluster_type='farthest',
             d=0.03):
        """n-fold cross validation of
			the test set. 
			Input:
				n (int): number of folds for xval
			Returns:
				(fpr, fnr): false positive and false 
					negative rates from the n xvals
		"""
        # keep track of label classification
        score0 = 0.0
        score1 = 0.0
        gta_as_phage = []
        phage_as_gta = []

        # repeat xval results nrep times
        for i in range(nrep):
            if not mini:
                sys.stdout.flush()
                sys.stdout.write("Starting rep: %d\r" % (i + 1))
            # randomly sort profiles
            random.shuffle(self.profiles)
            # split into folds
            split = [self.profiles[i::nfold] for i in range(nfold)]
            # cross val
            for j in range(nfold):
                # Build train and test sets
                train_fold = np.array([
                    x for sublist in (split[:j] + split[j + 1:])
                    for x in sublist
                ])
                test_fold = split[j]
                trainX = np.array([x.features for x in train_fold])
                testX = np.array([x.features for x in test_fold])
                trainY = np.array([
                    -1.0 if y.label == self.label0 else 1.0 for y in train_fold
                ])
                testY = np.array([
                    -1.0 if y.label == self.label0 else 1.0 for y in test_fold
                ])
                testNames = np.array([x.org_name for x in test_fold])
                # randomize labels
                # random.shuffle(trainY)
                # Get training set weights
                if pairwiseGTA:
                    # Reweight based on training set
                    # GTA
                    GTA_weight = Weight(
                        [x for x in train_fold if x.label == self.label0],
                        pairwiseGTA)
                    GTA_clusters = GTA_weight.cluster(cluster_type, d)
                    GTA_weight.weight(GTA_clusters)
                    # Virus
                    virus_weight = Weight(
                        [x for x in train_fold if x.label != self.label0],
                        pairwiseViral)
                    virus_clusters = virus_weight.cluster(cluster_type, d)
                    virus_weight.weight(virus_clusters)
                    # Grab updated weights
                    weights = np.array([x.weight for x in train_fold])
                else:
                    weights = np.array([1 for x in train_fold])
                # evaluate results
                predictor = SVMTrain(self.kernel,
                                     self.c).train(trainX, trainY, weights)
                for r in range(len(testX)):
                    # Positive product is correct classification
                    if predictor.predict(testX[r]) * testY[r] > 0:
                        # Update label0 if negative, label1 otherwise
                        if testY[r] < 0:
                            score0 += 1
                        else:
                            score1 += 1
                    else:  # predicted incorrectly
                        if testY[r] > 0:  #virus as GTA
                            phage_as_gta.append(testNames[r])
                        else:  #gta as virus
                            gta_as_phage.append(testNames[r])

        if not mini:
            print("\nPhages (%d) misclassified over %d reps: %s" %
                  (len(phage_as_gta), nrep, phage_as_gta))
            print("\nGTA (%d) misclassified over %d reps: %s\n" %
                  (len(gta_as_phage), nrep, gta_as_phage))

        return (score0 / nrep, score1 / nrep)
Exemple #19
0
"""计算不同因素的权重"""

from Weight import Weight
import os


def dir_name(dirname):
    for root, dirs, files in os.walk(dirname):
        models = [file for file in files if 'model' == file[-5:]]
        return models


if __name__ == '__main__':
    d_name = os.path.join(os.getcwd(), 'Main')
    models = dir_name(d_name)

    TI = Weight('news.csv', 'positive.txt', 'negative.txt', models)
    TI.run()