コード例 #1
0
 def create_matrix(self, data, label):
     index = {l: i for i, l in enumerate(np.unique(label))}
     matrix = None
     labels_to_agg = np.unique(label)
     labels_to_agg_list = [[x] for x in labels_to_agg]
     label_dict = {
         labels_to_agg[value]: value
         for value in range(labels_to_agg.shape[0])
     }
     num_of_length = len(labels_to_agg_list)
     class_1_variety = []
     class_2_variety = []
     while len(labels_to_agg_list) > 1:
         score_result = np.inf
         for i in range(0, len(labels_to_agg_list) - 1):
             for j in range(i + 1, len(labels_to_agg_list)):
                 class_1_data, class_1_label = MT.get_data_subset(
                     data, label, labels_to_agg_list[i])
                 class_2_data, class_2_label = MT.get_data_subset(
                     data, label, labels_to_agg_list[j])
                 score = Criterion.agg_score(
                     class_1_data,
                     class_1_label,
                     class_2_data,
                     class_2_label,
                     score=Criterion.max_distance_score)
                 if score < score_result:
                     score_result = score
                     class_1_variety = labels_to_agg_list[i]
                     class_2_variety = labels_to_agg_list[j]
         new_col = np.zeros((num_of_length, 1))
         for i in class_1_variety:
             new_col[label_dict[i]] = 1
         for i in class_2_variety:
             new_col[label_dict[i]] = -1
         if matrix is None:
             matrix = new_col
         else:
             matrix = np.hstack((matrix, new_col))
         new_class = class_1_variety + class_2_variety
         labels_to_agg_list.remove(class_1_variety)
         labels_to_agg_list.remove(class_2_variety)
         labels_to_agg_list.insert(0, new_class)
     return matrix, index
コード例 #2
0
def get_DC_value(data, labels, group1, group2, dc_option):
    """
    
    :param data: the whole data
    :param labels: the whole labels
    :param group1: group1 classes 
    :param group2: group2 classes
    :param dc_option: select which dc is used
    :return: DC value
    """
    group1_data, group1_label = MT.get_data_subset(data, labels, group1)
    group2_data, group2_label = MT.get_data_subset(data, labels, group2)

    try:
        funname = 'get_complexity_' + dc_option
        fun = getattr(GC, funname)
        DC = fun(list(group1_data), list(group1_label), list(group2_data),
                 list(group2_label))
    except:
        logging.error('DC option is wrong')
        raise NameError('DC option is wrong')
    return DC
コード例 #3
0
    def create_matrix(self, data, label, dc_option):
        labels_to_divide = [np.unique(label)]
        index = {l: i for i, l in enumerate(np.unique(label))}
        # if dc_option == 'F1':
        #     matrix = [[-1,0,1,0],[-1,0,-1,1],[1,-1,0,0],[-1,0,-1,-1],[1,1,0,0]]
        #     return matrix,index
        # elif dc_option == 'F2':
        #     matrix = [[-1,0,-1,-1],[-1,0,-1,1],[-1,0,1,0],[1,1,0,0],[1,-1,0,0]]
        #     return matrix,index
        # elif dc_option == 'F3':
        #     matrix = [[1,1,0,0],[-1,0,-1,1],[-1,0,1,0],[-1,0,-1,-1],[1,-1,0,0]]
        #     return matrix,index

        matrix = None
        while len(labels_to_divide) > 0:
            label_set = labels_to_divide.pop(0)

            # get correspoding label and data from whole data and label
            datas, labels = MT.get_data_subset(data, label, label_set)

            # DC search
            class_1, class_2 = Greedy_Search.greedy_search(datas,
                                                           labels,
                                                           dc_option=dc_option)
            new_col = np.zeros((len(index), 1))
            for i in class_1:
                new_col[index[i]] = 1
            for i in class_2:
                new_col[index[i]] = -1
            if matrix is None:
                matrix = copy.copy(new_col)
            else:
                matrix = np.hstack((matrix, new_col))
            if len(class_1) > 1:
                labels_to_divide.append(class_1)
            if len(class_2) > 1:
                labels_to_divide.append(class_2)
        return matrix, index
コード例 #4
0
 def create_matrix(self, data, label):
     index = {l: i for i, l in enumerate(np.unique(label))}
     matrix = None
     labels_to_divide = [np.unique(label)]
     while len(labels_to_divide) > 0:
         label_set = labels_to_divide.pop(0)
         datas, labels = MT.get_data_subset(data, label, label_set)
         class_1_variety_result, class_2_variety_result = sffs(
             datas, labels)
         new_col = np.zeros((len(index), 1))
         for i in class_1_variety_result:
             new_col[index[i]] = 1
         for i in class_2_variety_result:
             new_col[index[i]] = -1
         if matrix is None:
             matrix = copy.copy(new_col)
         else:
             matrix = np.hstack((matrix, new_col))
         if len(class_1_variety_result) > 1:
             labels_to_divide.append(class_1_variety_result)
         if len(class_2_variety_result) > 1:
             labels_to_divide.append(class_2_variety_result)
     return matrix, index
コード例 #5
0
 def create_matrix(self, train_data, train_label, validate_data,
                   validate_label, estimator, **param):
     index = {l: i for i, l in enumerate(np.unique(train_label))}
     matrix = None
     predictors = []
     predictor_weights = []
     labels_to_divide = [np.unique(train_label)]
     while len(labels_to_divide) > 0:
         label_set = labels_to_divide.pop(0)
         label_count = len(label_set)
         groups = combinations(range(label_count),
                               np.int(np.ceil(label_count / 2)))
         score_result = 0
         est_result = None
         for group in groups:
             class_1_variety = np.array([label_set[i] for i in group])
             class_2_variety = np.array(
                 [l for l in label_set if l not in class_1_variety])
             class_1_data, class_1_label = MT.get_data_subset(
                 train_data, train_label, class_1_variety)
             class_2_data, class_2_label = MT.get_data_subset(
                 train_data, train_label, class_2_variety)
             class_1_cla = np.ones(len(class_1_data))
             class_2_cla = -np.ones(len(class_2_data))
             train_d = np.vstack((class_1_data, class_2_data))
             train_c = np.hstack((class_1_cla, class_2_cla))
             est = estimator(**param).fit(train_d, train_c)
             class_1_data, class_1_label = MT.get_data_subset(
                 validate_data, validate_label, class_1_variety)
             class_2_data, class_2_label = MT.get_data_subset(
                 validate_data, validate_label, class_2_variety)
             class_1_cla = np.ones(len(class_1_data))
             class_2_cla = -np.ones(len(class_2_data))
             validation_d = np.array([])
             validation_c = np.array([])
             try:
                 validation_d = np.vstack((class_1_data, class_2_data))
                 validation_c = np.hstack((class_1_cla, class_2_cla))
             except Exception:
                 if len(class_1_data) > 0:
                     validation_d = class_1_data
                     validation_c = class_1_cla
                 elif len(class_2_data) > 0:
                     validation_d = class_2_data
                     validation_c = class_2_cla
             if validation_d.shape[0] > 0 and validation_d.shape[1] > 0:
                 score = est.score(validation_d, validation_c)
             else:
                 score = 0.8
             if score >= score_result:
                 score_result = score
                 est_result = est
                 class_1_variety_result = class_1_variety
                 class_2_variety_result = class_2_variety
         new_col = np.zeros((len(index), 1))
         for i in class_1_variety_result:
             new_col[index[i]] = 1
         for i in class_2_variety_result:
             new_col[index[i]] = -1
         if matrix is None:
             matrix = copy.copy(new_col)
         else:
             matrix = np.hstack((matrix, new_col))
         predictors.append(est_result)
         predictor_weights.append(MT.estimate_weight(1 - score_result))
         if len(class_1_variety_result) > 1:
             labels_to_divide.append(class_1_variety_result)
         if len(class_2_variety_result) > 1:
             labels_to_divide.append(class_2_variety_result)
     return matrix, index, predictors, predictor_weights
コード例 #6
0
 def create_matrix(self, data, label):
     index = {l: i for i, l in enumerate(np.unique(label))}
     matrix = None
     labels_to_divide = [np.unique(label)]
     while len(labels_to_divide) > 0:
         label_set = labels_to_divide.pop(0)
         datas, labels = MT.get_data_subset(data, label, label_set)
         class_1_variety_result, class_2_variety_result = sffs(
             datas, labels, score=Criterion.max_center_distance_score)
         class_1_data_result, class_1_label_result = MT.get_data_subset(
             data, label, class_1_variety_result)
         class_2_data_result, class_2_label_result = MT.get_data_subset(
             data, label, class_2_variety_result)
         class_1_center_result = np.average(class_1_data_result, axis=0)
         class_2_center_result = np.average(class_2_data_result, axis=0)
         belong_to_class_1 = [
             euclidean_distance(x, class_1_center_result) <=
             euclidean_distance(x, class_2_center_result)
             for x in class_1_data_result
         ]
         belong_to_class_2 = [
             MT.euclidean_distance(x, class_2_center_result) <=
             MT.euclidean_distance(x, class_1_center_result)
             for x in class_2_data_result
         ]
         class_1_true_num = {k: 0 for k in class_1_variety_result}
         class_2_true_num = {k: 0 for k in class_2_variety_result}
         for y in class_1_label_result[belong_to_class_1]:
             class_1_true_num[y] += 1
         for y in class_2_label_result[belong_to_class_2]:
             class_2_true_num[y] += 1
         class_1_label_count = {
             k: list(class_1_label_result).count(k)
             for k in class_1_variety_result
         }
         class_2_label_count = {
             k: list(class_2_label_result).count(k)
             for k in class_2_variety_result
         }
         class_1_ratio = {
             k: class_1_true_num[k] / class_1_label_count[k]
             for k in class_1_variety_result
         }
         class_2_ratio = {
             k: -class_2_true_num[k] / class_2_label_count[k]
             for k in class_2_variety_result
         }
         new_col = np.zeros((len(index), 1))
         for i in class_1_ratio:
             new_col[index[i]] = class_1_ratio[i]
         for i in class_2_ratio:
             new_col[index[i]] = class_2_ratio[i]
         if matrix is None:
             matrix = copy.copy(new_col)
         else:
             matrix = np.hstack((matrix, new_col))
         if len(class_1_variety_result) > 1:
             labels_to_divide.append(class_1_variety_result)
         if len(class_2_variety_result) > 1:
             labels_to_divide.append(class_2_variety_result)
     return matrix, index