def tune_svm_using_10_fold():
    dh = DataHandler('data/train-set-feature-engineered.csv', 'prediction_label')
    headers, train_features, train_prediction_labels = dh.get_numeric_data_set()

    #train_features = dh.get_k_best_features(500, train_features, train_prediction_labels)
    data_sets = dh.get_cross_validation_data_sets(10, train_features, train_prediction_labels)

    accuracy = []
    for data_set_number in data_sets:
        data_set = data_sets.get(data_set_number)
        training_set = data_set[0]
        tuning_set = data_set[1]
        train_features = training_set["data_points"]
        train_prediction_labels = training_set["labels"]

        # Feature selection
        train_features, selected_features = dh.get_k_best_features(len(train_features[0]), train_features, train_prediction_labels)

        test_features = tuning_set["data_points"]
        test_prediction_labels = tuning_set["labels"]

        # Feature selection
        test_features = dh.get_new_feature_vec(test_features, selected_features)

        svm = Svm(train_features, train_prediction_labels, 200, 1, 2)
        svm.train()
        eval_metrics = EvaluationMetrics(svm, test_features, test_prediction_labels)
        eval = eval_metrics.evaluate()
        accuracy.append(eval['accuracy'])

    average_accuracy = sum(accuracy) / len(accuracy)
    print average_accuracy
Exemple #2
0
 def __init__(self, features, false_positive_loss=1, false_negative_loss=1):
     super().__init__()
     self.svm = Svm()
     self._features = features
     self.svm.false_negative_loss = false_negative_loss
     self.svm.false_positive_loss = false_positive_loss
     self._row_len = len(features) + 1
     self.examples = np.empty((0, self._row_len))
def evaluate_svm():
    dh = DataHandler('data/train-set-feature-engineered.csv', 'prediction_label')
    headers, train_features, train_prediction_labels = dh.get_numeric_data_set()

    # Feature selection
    train_features, selected_features = dh.get_k_best_features(len(train_features[0]), train_features, train_prediction_labels)

    svm = Svm(train_features, train_prediction_labels, 20, 0)
    svm.train()

    dh_test = DataHandler('data/test-set-feature-engineered.csv', 'prediction_label')
    headers, test_features, test_prediction_labels = dh_test.get_numeric_data_set()

    # Feature selection
    test_features = dh_test.get_new_feature_vec(test_features, selected_features)

    eval_metrics = EvaluationMetrics(svm, test_features, test_prediction_labels)
    eval = eval_metrics.evaluate()
    eval_metrics.compute_and_plot_auc(eval['predicted'], test_prediction_labels)
    eval_metrics.compute_au_roc(eval['predicted'], test_prediction_labels)
Exemple #4
0
class _SubClassifier(AbstractClassfier):
    def from_list(self, mat):
        self.svm.from_list(mat)

    def classify_vec(self, rect, axis=-1):
        return self.svm.classify_vec(self.calc_features(rect), axis)

    def to_list(self):
        return self.svm.to_list()

    def __init__(self, features, false_positive_loss=1, false_negative_loss=1):
        super().__init__()
        self.svm = Svm()
        self._features = features
        self.svm.false_negative_loss = false_negative_loss
        self.svm.false_positive_loss = false_positive_loss
        self._row_len = len(features) + 1
        self.examples = np.empty((0, self._row_len))

    def add_current_image(self, y):
        row = np.empty(self._row_len)
        row[-1] = y
        row[:-1] = self.calc_features()
        self.examples = np.vstack((self.examples, row))

    def calc_features(self, rect=None):
        start = (0, 0) if rect is None else (rect[0][0], rect[1][0])
        return np.array([f.calc(start) for f in self._features])

    def learn(self):
        self.svm.learn(np.copy(self.examples), c_arr=2**np.arange(-5, 15, 2.0),
                       epoch=15, cross_validation_times=5, learning_part=0.7)

    def classify(self, rect):
        return self.svm.classify(self.calc_features(rect))

    def valuefy(self, rect):
        return self.svm.valuefy(self.calc_features(rect))
Exemple #5
0
        print('\nPerformance is being computed:')
        with open(filein) as f:
            for line in f:
                items = line.split()
                print(items)

                fold = items[0]
                data_pkl = items[1]
                data_id = items[2]
                data_dat = items[3]
                pred_file = items[4]

                ### Build the dataset from dataset.pkl ###
                model = Svm() \
                            .load(dataset=data_pkl, id_file=data_id, encode=True, pkl=True)\
                            .decoding(prediction_file=pred_file)

                dictionary = model.fetch_dictionary()

                ##############################################
                ## Performance Steps:                       ##
                ## 1. Confusion Matrix and related scores   ##
                ## 2. Segment OVerlapping score             ##
                ##############################################

                prediction = []
                expectation = []

                for key in dictionary:
                    expectation.append(dictionary[key]['dssp'])
Exemple #6
0
        filein = sys.argv[1]
        setype = sys.argv[2]
    except:
        print(
            'Program Usage: python3 svm_encode.py <file.txt> <set type (trainingset, testset)>'
        )
        raise SystemExit
    else:
        with open(filein) as f:
            for line in f:
                items = line.split()
                print(items)

                path = items[0]
                data_pkl = items[1]
                data_id = items[2]
                data_dat = items[3]

                # Build the dataset from scratch
                prof = Pssm(data_id, setype=setype,
                            raw_file=False).parse(normalize=False)
                dict_prof = prof.fetch_dict()
                dssp = Dssp(data_id, setype=setype, raw_file=False).parse()
                dict_dssp = dssp.fetch_dict()
                dataset = Dataset(data_id, setype=setype).build(
                    profile=dict_prof, dssp=dict_dssp).fetch_dict()

                model = Svm(id_file=data_id, setype=setype)\
                            .load(dataset=dataset, id_file=data_id, encode=True)\
                            .save(path=data_dat, format='dat') \
                            .save(path=data_pkl, format='pkl')
Exemple #7
0
def main(argv):
    train_x = read_from_file(sys.argv[1])
    train_x = one_hot_encode(train_x).astype(float)

    train_y = read_from_file(sys.argv[2])
    train_y = train_y.astype(float).astype(int)
    num_of_labels = len(Counter(train_y).keys())
    # np.random.seed(5)
    # mapIndexPosition = list(zip(train_x, train_y))
    # np.random.shuffle(mapIndexPosition)
    # train_x, train_y = zip(*mapIndexPosition)
    # train_y = np.asarray(train_y)
    # train_x = np.asarray(train_x)

    ############## prediction:################
    test_x = read_from_file(sys.argv[3])
    test_x = one_hot_encode(test_x).astype(float)
    #
    # test_y = read_from_file("test_y.txt").astype(float).astype(int).tolist()

    # ###### cross validation #######
    # trains_x = [all_train_x[:657],all_train_x[657:1314],all_train_x[1314:1971],all_train_x[1971:2628],all_train_x[2628:]]
    # trains_y = [all_train_y[:657], all_train_y[657:1314], all_train_y[1314:1971], all_train_y[1971:2628],all_train_y[2628:]]

    # for K in range(5):
    #     test_x = trains_x[K]
    #     test_y = trains_y[K]
    #     train_x = []
    #     train_y = []
    #     for i in range(5):
    #         if i is not K:
    #             for example, lable in zip(trains_x[i],trains_y[i]):
    #                 train_x.append(example)
    #                 train_y.append(lable)
    #
    #     train_x = np.asarray(train_x)
    #     train_y = np.asarray(train_y)
    # d = {"I": 0, "M": 1, "F": 2}
    # temp = train_x
    # temp = scipy.stats.zscore(temp)
    train_x_Z_score, mean, std_dev = z_score_norm(train_x)
    train_x_min_max, min_train, max_train = min_max_norm(train_x)

    test_x_z_score = z_score_norm_by_mean_std(test_x, mean, std_dev)
    test_x_min_max = min_max_norm_by_min_max(test_x, min_train, max_train)

    # perceptron_z_score = Perceptron(train_x_Z_score, train_y, num_of_feature, num_of_labels)
    # svm_z_score = Svm(train_x_Z_score, train_y, num_of_feature, num_of_labels)
    # pa_z_score = Pa(train_x_Z_score, train_y, num_of_feature, num_of_labels)
    #
    # perceptron_min_max = Perceptron(train_x_min_max, train_y, num_of_feature, num_of_labels)
    # svm_min_max = Svm(train_x_min_max, train_y, num_of_feature, num_of_labels)
    # pa_min_max = Pa(train_x_min_max, train_y, num_of_feature, num_of_labels)
    ############# training:#################

    perceptron = Perceptron(train_x_min_max, train_y, num_of_labels)
    svm = Svm(train_x_min_max, train_y, num_of_labels)
    pa = Pa(train_x_Z_score, train_y, num_of_labels)
    perceptron.train()
    svm.train()
    pa.train()

    predict_pereceptron = []
    predict_svm = []
    predict_pa = []
    for test_min_max, test_z_score in zip(test_x_min_max, test_x_z_score):
        predict_pereceptron.append(perceptron.predict(test_min_max))
        predict_svm.append(svm.predict(test_min_max))
        predict_pa.append(pa.predict(test_z_score))
    # for test in test_x_z_score:
    #     predict_pa.append(pa.predict(test))

    print_predict(predict_pereceptron, predict_svm, predict_pa)