コード例 #1
0
    def predict(self, data):
        res = []
        if len(self.predictors) == 0:
            logging.debug('The Model has not been fitted!')
        if len(data.shape) == 1:
            data = np.reshape(data, [1, -1])

        for i in data:
            predict_res = self._use_predictors(i)

            if self.predicted_vector == []:
                self.predicted_vector = copy.deepcopy(predict_res)
            else:
                self.predicted_vector = np.row_stack(
                    (self.predicted_vector, predict_res))

            value = MT.closet_vector(predict_res, self.matrix,
                                     y_euclidean_distance,
                                     np.array(self.predictor_weights))
            res.append(MT.get_key(self.index, value))

        vector = []
        for i in range(self.matrix.shape[1]):
            vector.append(list(self.predicted_vector[:, i]))
        self.predicted_vector = copy.deepcopy(vector)

        return np.array(res)
コード例 #2
0
    def predict(self, data):
        """
        a method used to predict label for give data
        :param data: data to predict
        :return: predicted label
        """
        res = []
        if len(self.predictors) == 0:
            logging.debug('The Model has not been fitted!')
        if len(data.shape) == 1:
            data = np.reshape(data, [1, -1])
        for i in data:
            # find k neighbors from train data
            knn_model = neighbors.KNeighborsClassifier(algorithm='ball_tree',
                                                       n_neighbors=3).fit(
                                                           self.train_data,
                                                           self.train_label)
            knn_pre_label = knn_model.predict([i])
            predicted_vector = self._use_predictors(i)  # one row
            index = {l: i for i, l in enumerate(np.unique(self.train_label))}
            knn_pre_index = index[knn_pre_label[0]]
            # make it 0 when the knn predicted class is 0
            for j in range(len(self.matrix[0])):
                if self.matrix[knn_pre_index][j] == 0:
                    predicted_vector[j] = 0

            self.predicted_vector.append(list(predicted_vector))
            value = MT.closet_vector(predicted_vector, self.matrix,
                                     self.distance_measure)
            res.append(MT.get_key(self.index, value))
        return np.array(res)
コード例 #3
0
 def fit(self, data, label):
     self.train_data, self.validate_data, self.train_label, self.validation_y = train_test_split(
         data, label, test_size=0.25)
     self.matrix, self.index, self.predictors, self.predictor_weights = \
         self.create_matrix(self.train_data, self.train_label, self.validate_data, self.validation_y, self.estimator,
                            **self.param)
     feature_subset = MT.get_subset_feature_from_matrix(
         self.matrix, self.index)
     for i in range(self.iter_num):
         y_pred = self.predict(self.validate_data)
         y_true = self.validation_y
         confusion_matrix = MT.create_confusion_matrix(
             y_true, y_pred, self.index)
         while True:
             max_index = np.argmax(confusion_matrix)
             max_index_y = np.floor(max_index / confusion_matrix.shape[1])
             max_index_x = max_index % confusion_matrix.shape[1]
             label_y = MT.get_key(self.index, max_index_y)
             label_x = MT.get_key(self.index, max_index_x)
             score_result = 0
             col_result = None
             est_result = None
             est_weight_result = None
             feature_subset_m = None
             feature_subset_n = None
             for m in range(len(feature_subset) - 1):
                 for n in range(m + 1, len(feature_subset)):
                     if ((label_y in feature_subset[m] and label_x in feature_subset[n])
                         or (label_y in feature_subset[n] and label_x in feature_subset[m])) \
                             and (set(feature_subset[m]).intersection(set(feature_subset[n])) == set()):
                         col = MT.create_col_from_partition(
                             feature_subset[m], feature_subset[n],
                             self.index)
                         if not MT.have_same_col(col, self.matrix):
                             train_data, train_cla = MT.get_data_from_col(
                                 self.train_data, self.train_label, col,
                                 self.index)
                             est = self.estimator(**self.param).fit(
                                 train_data, train_cla)
                             validation_data, validation_cla = MT.get_data_from_col(
                                 self.validate_data, self.validation_y, col,
                                 self.index)
                             if validation_data is None:
                                 score = 0.8
                             else:
                                 score = est.score(validation_data,
                                                   validation_cla)
                             if score >= score_result:
                                 score_result = score
                                 col_result = col
                                 est_result = est
                                 est_weight_result = MT.estimate_weight(
                                     1 - score_result)
                                 feature_subset_m = m
                                 feature_subset_n = n
             if col_result is None:
                 confusion_matrix[np.int(max_index_y),
                                  np.int(max_index_x)] = 0
                 if np.sum(confusion_matrix) == 0:
                     break
             else:
                 break
         try:
             self.matrix = np.hstack((self.matrix, col_result))
             self.predictors.append(est_result)
             self.predictor_weights.append(est_weight_result)
             feature_subset.append(feature_subset[feature_subset_m] +
                                   feature_subset[feature_subset_n])
         except (TypeError, ValueError):
             pass