Example #1
0
 def SVM_eval_func(self, chromosome):
     cost, gamma, window_size = self.decode_chromosome(chromosome)
     if self.check_log(cost, gamma, window_size):
         return self.get_means_from_log(cost, gamma, window_size)[0]
     folded_dataset = self.create_folded_dataset(window_size)
     indim = 21 * (2 * window_size + 1)
     mean_AUC = 0
     mean_decision_value = 0
     mean_mcc = 0
     sample_size_over_thousand_flag = False
     for test_fold in xrange(self.fold):
         test_labels, test_dataset, train_labels, train_dataset = folded_dataset.get_test_and_training_dataset(test_fold)
         if len(test_labels) + len(train_labels) > 1000:
             sample_size_over_thousand_flag = True
         clf = svm.SVC(C=cost, gamma=gamma, class_weight='auto')
         clf.fit(train_dataset, train_labels)
         decision_values = clf.decision_function(test_dataset)
         if type(decision_values[0]) is list or type(decision_values[0]) is numpy.ndarray:
             decision_values = map(lambda x: x[0], decision_values)
         AUC, decision_value_and_max_mcc = validate_performance.calculate_AUC(decision_values, test_labels)
         mean_AUC += AUC
         mean_decision_value += decision_value_and_max_mcc[0]
         mean_mcc += decision_value_and_max_mcc[1]
         if sample_size_over_thousand_flag:
             break
     if not sample_size_over_thousand_flag:
         mean_AUC /= self.fold
         mean_decision_value /= self.fold
         mean_mcc /= self.fold
     self.write_log(cost, gamma, window_size, mean_AUC, mean_decision_value, mean_mcc)
     self.add_log(cost, gamma, window_size, mean_AUC, mean_decision_value, mean_mcc)
     return mean_AUC
Example #2
0
 def neuralNetwork_eval_func(self, chromosome):
     node_num, learning_rate, window_size = self.decode_chromosome(chromosome)
     if self.check_log(node_num, learning_rate, window_size):
         return self.get_means_from_log(node_num, learning_rate, window_size)[0]
     folded_dataset = self.create_folded_dataset(window_size)
     indim = 21 * (2 * window_size + 1)
     mean_AUC = 0
     mean_decision_value = 0
     mean_mcc = 0
     sample_size_over_thousand_flag = False
     for test_fold in xrange(self.fold):
         test_labels, test_dataset, train_labels, train_dataset = folded_dataset.get_test_and_training_dataset(test_fold)
         if len(test_labels) + len(train_labels) > 1000:
             sample_size_over_thousand_flag = True
         ds = SupervisedDataSet(indim, 1)
         for i in xrange(len(train_labels)):
             ds.appendLinked(train_dataset[i], [train_labels[i]])
         net = buildNetwork(indim, node_num, 1, outclass=SigmoidLayer, bias=True)
         trainer = BackpropTrainer(net, ds, learningrate=learning_rate)
         trainer.trainUntilConvergence(maxEpochs=self.maxEpochs_for_trainer)
         decision_values = [net.activate(test_dataset[i]) for i in xrange(len(test_labels))]
         decision_values = map(lambda x: x[0], decision_values)
         AUC, decision_value_and_max_mcc = validate_performance.calculate_AUC(decision_values, test_labels)
         mean_AUC += AUC
         mean_decision_value += decision_value_and_max_mcc[0]
         mean_mcc += decision_value_and_max_mcc[1]
         if sample_size_over_thousand_flag:
             break
     if not sample_size_over_thousand_flag:
         mean_AUC /= self.fold
         mean_decision_value /= self.fold
         mean_mcc /= self.fold
     self.write_log(node_num, learning_rate, window_size, mean_AUC, mean_decision_value, mean_mcc)
     self.add_log(node_num, learning_rate, window_size, mean_AUC, mean_decision_value, mean_mcc)
     return mean_AUC
Example #3
0
 def randomForest_eval_func(self, chromosome):
     n_estimators, max_features, window_size = self.decode_chromosome(chromosome)
     if self.check_log(n_estimators, max_features, window_size):
         return self.get_means_from_log(n_estimators, max_features, window_size)[0]
     folded_dataset = self.create_folded_dataset(window_size)
     indim = 21 * (2 * window_size + 1)
     mean_AUC = 0
     mean_decision_value = 0
     mean_mcc = 0
     sample_size_over_thousand_flag = False
     for test_fold in xrange(self.fold):
         test_labels, test_dataset, train_labels, train_dataset = folded_dataset.get_test_and_training_dataset(test_fold)
         if len(test_labels) + len(train_labels) > 1000:
             sample_size_over_thousand_flag = True
         clf = RandomForestClassifier(n_estimators=n_estimators, max_features=max_features)
         clf.fit(train_dataset, train_labels)
         probas = clf.predict_proba(test_dataset)
         decision_values = map(lambda x: x[1], probas) # Probability of being binding residue
         AUC, decision_value_and_max_mcc = validate_performance.calculate_AUC(decision_values, test_labels)
         mean_AUC += AUC
         mean_decision_value += decision_value_and_max_mcc[0]
         mean_mcc += decision_value_and_max_mcc[1]
         if sample_size_over_thousand_flag:
             break
     if not sample_size_over_thousand_flag:
         mean_AUC /= self.fold
         mean_decision_value /= self.fold
         mean_mcc /= self.fold
     self.write_log(n_estimators, max_features, window_size, mean_AUC, mean_decision_value, mean_mcc)
     self.add_log(n_estimators, max_features, window_size, mean_AUC, mean_decision_value, mean_mcc)
     return mean_AUC
 def run_SVM(self, cost, gamma):
     mean_AUC = 0
     mean_decision_value = 0
     mean_mcc = 0
     mean_performance_by_decision_value = [0, 0, 0, 0]   # [SE, SP, ACC, MCC]
     mean_performance_by_predict_function = [0, 0, 0, 0] # [SE, SP, ACC, MCC]
     for test_fold in xrange(self.fold):
         test_labels, test_dataset, train_labels, train_dataset = self.folded_dataset.get_test_and_training_dataset(test_fold)
         clf = svm.SVC(C=cost, gamma=gamma, class_weight='auto')
         clf.fit(train_dataset, train_labels)
         decision_values = clf.decision_function(test_dataset)
         if type(decision_values[0]) is list or type(decision_values[0]) is numpy.ndarray:
             decision_values = map(lambda x: x[0], decision_values)
         AUC, decision_value_and_max_mcc = validate_performance.calculate_AUC(decision_values, test_labels)
         predicted_labels_by_decision_value = [1 if decision_value >= decision_value_and_max_mcc[0] else 0 for decision_value in decision_values]
         predicted_labels_by_predict_function = clf.predict(test_dataset)
         # [SE, SP, ACC, MCC]
         performances = validate_performance.calculate_performance(test_labels, predicted_labels_by_decision_value)
         mean_performance_by_decision_value = self.add_performances(mean_performance_by_decision_value, performances)
         performances = validate_performance.calculate_performance(test_labels, predicted_labels_by_predict_function)
         mean_performance_by_predict_function = self.add_performances(mean_performance_by_predict_function, performances)
         mean_AUC += AUC
         mean_decision_value += decision_value_and_max_mcc[0]
         mean_mcc += decision_value_and_max_mcc[1]
     mean_AUC /= self.fold
     mean_decision_value /= self.fold
     mean_mcc /= self.fold
     mean_performance_by_decision_value = map(lambda x: x/float(self.fold), mean_performance_by_decision_value)
     mean_performance_by_predict_function = map(lambda x: x/float(self.fold), mean_performance_by_predict_function)
     self.write_log(cost, gamma, mean_AUC, mean_decision_value, mean_mcc,
                     mean_performance_by_decision_value, mean_performance_by_predict_function)
Example #5
0
 def neuralNetwork_eval_func(self, chromosome):
     node_num, learning_rate, window_size = self.decode_chromosome(
         chromosome)
     if self.check_log(node_num, learning_rate, window_size):
         return self.get_means_from_log(node_num, learning_rate,
                                        window_size)[0]
     folded_dataset = self.create_folded_dataset(window_size)
     indim = 21 * (2 * window_size + 1)
     mean_AUC = 0
     mean_decision_value = 0
     mean_mcc = 0
     sample_size_over_thousand_flag = False
     for test_fold in xrange(self.fold):
         test_labels, test_dataset, train_labels, train_dataset = folded_dataset.get_test_and_training_dataset(
             test_fold)
         if len(test_labels) + len(train_labels) > 1000:
             sample_size_over_thousand_flag = True
         ds = SupervisedDataSet(indim, 1)
         for i in xrange(len(train_labels)):
             ds.appendLinked(train_dataset[i], [train_labels[i]])
         net = buildNetwork(indim,
                            node_num,
                            1,
                            outclass=SigmoidLayer,
                            bias=True)
         trainer = BackpropTrainer(net, ds, learningrate=learning_rate)
         trainer.trainUntilConvergence(maxEpochs=self.maxEpochs_for_trainer)
         decision_values = [
             net.activate(test_dataset[i]) for i in xrange(len(test_labels))
         ]
         decision_values = map(lambda x: x[0], decision_values)
         AUC, decision_value_and_max_mcc = validate_performance.calculate_AUC(
             decision_values, test_labels)
         mean_AUC += AUC
         mean_decision_value += decision_value_and_max_mcc[0]
         mean_mcc += decision_value_and_max_mcc[1]
         if sample_size_over_thousand_flag:
             break
     if not sample_size_over_thousand_flag:
         mean_AUC /= self.fold
         mean_decision_value /= self.fold
         mean_mcc /= self.fold
     self.write_log(node_num, learning_rate, window_size, mean_AUC,
                    mean_decision_value, mean_mcc)
     self.add_log(node_num, learning_rate, window_size, mean_AUC,
                  mean_decision_value, mean_mcc)
     return mean_AUC
Example #6
0
    def test_calculate_AUC(self):
        decision_values = [-1, -0.5, -0.1,  0.1, 0.5, 1]
        correct_labels = [0, 0, 0, 1, 1, 1]
        AUC, mcc = validate_performance.calculate_AUC(decision_values, correct_labels)
        self.assertEqual(AUC, 1.0)

        decision_values = [-1, 0.5, -0.1,  0.1, -0.5, 1]
        correct_labels = [0, 1, 0, 1, 0, 1]
        AUC, mcc = validate_performance.calculate_AUC(decision_values, correct_labels)
        self.assertEqual(AUC, 1.0)

        decision_values = [1.0]*6
        correct_labels = [0, 1, 0, 1, 0, 1]
        AUC, mcc = validate_performance.calculate_AUC(decision_values, correct_labels)
        self.assertEqual(AUC, 0.5)

        decision_values = [-1, -0.5, 0.5, 1]
        correct_labels = [0, 1, 0, 1]
        AUC, mcc = validate_performance.calculate_AUC(decision_values, correct_labels)
        self.assertEqual(AUC, 0.75)

        decision_values = [1.0] * 2 + [-1.0] * 8 + [1.0] * 2 + [0.5] * 8
        correct_labels = [0] * 10 + [1] * 10
        AUC, mcc = validate_performance.calculate_AUC(decision_values, correct_labels)
        self.assertEqual(round(AUC*(10**5))/(10**5), 0.82)

        decision_values = [-1.0] * 8 + [0.0] * 2 + [0.0] * 2 + [0.5] * 8
        correct_labels = [0] * 10 + [1] * 10
        AUC, mcc = validate_performance.calculate_AUC(decision_values, correct_labels)
        self.assertEqual(round(AUC*(10**5))/(10**5), 0.98)

        decision_values = [-1.0] * 8 + [0.2] * 2 + [0.1]*3 +[0.2]*2 + [0.5]*5
        correct_labels = [0] * 10 + [1] * 10
        AUC, mcc = validate_performance.calculate_AUC(decision_values, correct_labels)
        self.assertEqual(round(AUC*(10**5))/(10**5), 0.92)

        decision_values = [-1.0]*6 + [0.0]*2+ [0.2]*2 + [0.0]*2+ [0.1]*1 +[0.2]*2 + [0.5]*5
        correct_labels = [0] * 10 + [1] * 10
        AUC, mcc = validate_performance.calculate_AUC(decision_values, correct_labels)
        self.assertEqual(round(AUC*(10**5))/(10**5), 0.9)
Example #7
0
 def randomForest_eval_func(self, chromosome):
     n_estimators, max_features, window_size = self.decode_chromosome(
         chromosome)
     if self.check_log(n_estimators, max_features, window_size):
         return self.get_means_from_log(n_estimators, max_features,
                                        window_size)[0]
     folded_dataset = self.create_folded_dataset(window_size)
     indim = 21 * (2 * window_size + 1)
     mean_AUC = 0
     mean_decision_value = 0
     mean_mcc = 0
     sample_size_over_thousand_flag = False
     for test_fold in xrange(self.fold):
         test_labels, test_dataset, train_labels, train_dataset = folded_dataset.get_test_and_training_dataset(
             test_fold)
         if len(test_labels) + len(train_labels) > 1000:
             sample_size_over_thousand_flag = True
         clf = RandomForestClassifier(n_estimators=n_estimators,
                                      max_features=max_features)
         clf.fit(train_dataset, train_labels)
         probas = clf.predict_proba(test_dataset)
         decision_values = map(
             lambda x: x[1], probas)  # Probability of being binding residue
         AUC, decision_value_and_max_mcc = validate_performance.calculate_AUC(
             decision_values, test_labels)
         mean_AUC += AUC
         mean_decision_value += decision_value_and_max_mcc[0]
         mean_mcc += decision_value_and_max_mcc[1]
         if sample_size_over_thousand_flag:
             break
     if not sample_size_over_thousand_flag:
         mean_AUC /= self.fold
         mean_decision_value /= self.fold
         mean_mcc /= self.fold
     self.write_log(n_estimators, max_features, window_size, mean_AUC,
                    mean_decision_value, mean_mcc)
     self.add_log(n_estimators, max_features, window_size, mean_AUC,
                  mean_decision_value, mean_mcc)
     return mean_AUC
Example #8
0
 def SVM_eval_func(self, chromosome):
     cost, gamma, window_size = self.decode_chromosome(chromosome)
     if self.check_log(cost, gamma, window_size):
         return self.get_means_from_log(cost, gamma, window_size)[0]
     folded_dataset = self.create_folded_dataset(window_size)
     indim = 21 * (2 * window_size + 1)
     mean_AUC = 0
     mean_decision_value = 0
     mean_mcc = 0
     sample_size_over_thousand_flag = False
     for test_fold in xrange(self.fold):
         test_labels, test_dataset, train_labels, train_dataset = folded_dataset.get_test_and_training_dataset(
             test_fold)
         if len(test_labels) + len(train_labels) > 1000:
             sample_size_over_thousand_flag = True
         clf = svm.SVC(C=cost, gamma=gamma, class_weight='auto')
         clf.fit(train_dataset, train_labels)
         decision_values = clf.decision_function(test_dataset)
         if type(decision_values[0]) is list or type(
                 decision_values[0]) is numpy.ndarray:
             decision_values = map(lambda x: x[0], decision_values)
         AUC, decision_value_and_max_mcc = validate_performance.calculate_AUC(
             decision_values, test_labels)
         mean_AUC += AUC
         mean_decision_value += decision_value_and_max_mcc[0]
         mean_mcc += decision_value_and_max_mcc[1]
         if sample_size_over_thousand_flag:
             break
     if not sample_size_over_thousand_flag:
         mean_AUC /= self.fold
         mean_decision_value /= self.fold
         mean_mcc /= self.fold
     self.write_log(cost, gamma, window_size, mean_AUC, mean_decision_value,
                    mean_mcc)
     self.add_log(cost, gamma, window_size, mean_AUC, mean_decision_value,
                  mean_mcc)
     return mean_AUC