def create_matrix(self, data, label): index = {l: i for i, l in enumerate(np.unique(label))} matrix = None labels_to_agg = np.unique(label) labels_to_agg_list = [[x] for x in labels_to_agg] label_dict = { labels_to_agg[value]: value for value in range(labels_to_agg.shape[0]) } num_of_length = len(labels_to_agg_list) class_1_variety = [] class_2_variety = [] while len(labels_to_agg_list) > 1: score_result = np.inf for i in range(0, len(labels_to_agg_list) - 1): for j in range(i + 1, len(labels_to_agg_list)): class_1_data, class_1_label = MT.get_data_subset( data, label, labels_to_agg_list[i]) class_2_data, class_2_label = MT.get_data_subset( data, label, labels_to_agg_list[j]) score = Criterion.agg_score( class_1_data, class_1_label, class_2_data, class_2_label, score=Criterion.max_distance_score) if score < score_result: score_result = score class_1_variety = labels_to_agg_list[i] class_2_variety = labels_to_agg_list[j] new_col = np.zeros((num_of_length, 1)) for i in class_1_variety: new_col[label_dict[i]] = 1 for i in class_2_variety: new_col[label_dict[i]] = -1 if matrix is None: matrix = new_col else: matrix = np.hstack((matrix, new_col)) new_class = class_1_variety + class_2_variety labels_to_agg_list.remove(class_1_variety) labels_to_agg_list.remove(class_2_variety) labels_to_agg_list.insert(0, new_class) return matrix, index
def get_DC_value(data, labels, group1, group2, dc_option): """ :param data: the whole data :param labels: the whole labels :param group1: group1 classes :param group2: group2 classes :param dc_option: select which dc is used :return: DC value """ group1_data, group1_label = MT.get_data_subset(data, labels, group1) group2_data, group2_label = MT.get_data_subset(data, labels, group2) try: funname = 'get_complexity_' + dc_option fun = getattr(GC, funname) DC = fun(list(group1_data), list(group1_label), list(group2_data), list(group2_label)) except: logging.error('DC option is wrong') raise NameError('DC option is wrong') return DC
def create_matrix(self, data, label, dc_option): labels_to_divide = [np.unique(label)] index = {l: i for i, l in enumerate(np.unique(label))} # if dc_option == 'F1': # matrix = [[-1,0,1,0],[-1,0,-1,1],[1,-1,0,0],[-1,0,-1,-1],[1,1,0,0]] # return matrix,index # elif dc_option == 'F2': # matrix = [[-1,0,-1,-1],[-1,0,-1,1],[-1,0,1,0],[1,1,0,0],[1,-1,0,0]] # return matrix,index # elif dc_option == 'F3': # matrix = [[1,1,0,0],[-1,0,-1,1],[-1,0,1,0],[-1,0,-1,-1],[1,-1,0,0]] # return matrix,index matrix = None while len(labels_to_divide) > 0: label_set = labels_to_divide.pop(0) # get correspoding label and data from whole data and label datas, labels = MT.get_data_subset(data, label, label_set) # DC search class_1, class_2 = Greedy_Search.greedy_search(datas, labels, dc_option=dc_option) new_col = np.zeros((len(index), 1)) for i in class_1: new_col[index[i]] = 1 for i in class_2: new_col[index[i]] = -1 if matrix is None: matrix = copy.copy(new_col) else: matrix = np.hstack((matrix, new_col)) if len(class_1) > 1: labels_to_divide.append(class_1) if len(class_2) > 1: labels_to_divide.append(class_2) return matrix, index
def create_matrix(self, data, label): index = {l: i for i, l in enumerate(np.unique(label))} matrix = None labels_to_divide = [np.unique(label)] while len(labels_to_divide) > 0: label_set = labels_to_divide.pop(0) datas, labels = MT.get_data_subset(data, label, label_set) class_1_variety_result, class_2_variety_result = sffs( datas, labels) new_col = np.zeros((len(index), 1)) for i in class_1_variety_result: new_col[index[i]] = 1 for i in class_2_variety_result: new_col[index[i]] = -1 if matrix is None: matrix = copy.copy(new_col) else: matrix = np.hstack((matrix, new_col)) if len(class_1_variety_result) > 1: labels_to_divide.append(class_1_variety_result) if len(class_2_variety_result) > 1: labels_to_divide.append(class_2_variety_result) return matrix, index
def create_matrix(self, train_data, train_label, validate_data, validate_label, estimator, **param): index = {l: i for i, l in enumerate(np.unique(train_label))} matrix = None predictors = [] predictor_weights = [] labels_to_divide = [np.unique(train_label)] while len(labels_to_divide) > 0: label_set = labels_to_divide.pop(0) label_count = len(label_set) groups = combinations(range(label_count), np.int(np.ceil(label_count / 2))) score_result = 0 est_result = None for group in groups: class_1_variety = np.array([label_set[i] for i in group]) class_2_variety = np.array( [l for l in label_set if l not in class_1_variety]) class_1_data, class_1_label = MT.get_data_subset( train_data, train_label, class_1_variety) class_2_data, class_2_label = MT.get_data_subset( train_data, train_label, class_2_variety) class_1_cla = np.ones(len(class_1_data)) class_2_cla = -np.ones(len(class_2_data)) train_d = np.vstack((class_1_data, class_2_data)) train_c = np.hstack((class_1_cla, class_2_cla)) est = estimator(**param).fit(train_d, train_c) class_1_data, class_1_label = MT.get_data_subset( validate_data, validate_label, class_1_variety) class_2_data, class_2_label = MT.get_data_subset( validate_data, validate_label, class_2_variety) class_1_cla = np.ones(len(class_1_data)) class_2_cla = -np.ones(len(class_2_data)) validation_d = np.array([]) validation_c = np.array([]) try: validation_d = np.vstack((class_1_data, class_2_data)) validation_c = np.hstack((class_1_cla, class_2_cla)) except Exception: if len(class_1_data) > 0: validation_d = class_1_data validation_c = class_1_cla elif len(class_2_data) > 0: validation_d = class_2_data validation_c = class_2_cla if validation_d.shape[0] > 0 and validation_d.shape[1] > 0: score = est.score(validation_d, validation_c) else: score = 0.8 if score >= score_result: score_result = score est_result = est class_1_variety_result = class_1_variety class_2_variety_result = class_2_variety new_col = np.zeros((len(index), 1)) for i in class_1_variety_result: new_col[index[i]] = 1 for i in class_2_variety_result: new_col[index[i]] = -1 if matrix is None: matrix = copy.copy(new_col) else: matrix = np.hstack((matrix, new_col)) predictors.append(est_result) predictor_weights.append(MT.estimate_weight(1 - score_result)) if len(class_1_variety_result) > 1: labels_to_divide.append(class_1_variety_result) if len(class_2_variety_result) > 1: labels_to_divide.append(class_2_variety_result) return matrix, index, predictors, predictor_weights
def create_matrix(self, data, label): index = {l: i for i, l in enumerate(np.unique(label))} matrix = None labels_to_divide = [np.unique(label)] while len(labels_to_divide) > 0: label_set = labels_to_divide.pop(0) datas, labels = MT.get_data_subset(data, label, label_set) class_1_variety_result, class_2_variety_result = sffs( datas, labels, score=Criterion.max_center_distance_score) class_1_data_result, class_1_label_result = MT.get_data_subset( data, label, class_1_variety_result) class_2_data_result, class_2_label_result = MT.get_data_subset( data, label, class_2_variety_result) class_1_center_result = np.average(class_1_data_result, axis=0) class_2_center_result = np.average(class_2_data_result, axis=0) belong_to_class_1 = [ euclidean_distance(x, class_1_center_result) <= euclidean_distance(x, class_2_center_result) for x in class_1_data_result ] belong_to_class_2 = [ MT.euclidean_distance(x, class_2_center_result) <= MT.euclidean_distance(x, class_1_center_result) for x in class_2_data_result ] class_1_true_num = {k: 0 for k in class_1_variety_result} class_2_true_num = {k: 0 for k in class_2_variety_result} for y in class_1_label_result[belong_to_class_1]: class_1_true_num[y] += 1 for y in class_2_label_result[belong_to_class_2]: class_2_true_num[y] += 1 class_1_label_count = { k: list(class_1_label_result).count(k) for k in class_1_variety_result } class_2_label_count = { k: list(class_2_label_result).count(k) for k in class_2_variety_result } class_1_ratio = { k: class_1_true_num[k] / class_1_label_count[k] for k in class_1_variety_result } class_2_ratio = { k: -class_2_true_num[k] / class_2_label_count[k] for k in class_2_variety_result } new_col = np.zeros((len(index), 1)) for i in class_1_ratio: new_col[index[i]] = class_1_ratio[i] for i in class_2_ratio: new_col[index[i]] = class_2_ratio[i] if matrix is None: matrix = copy.copy(new_col) else: matrix = np.hstack((matrix, new_col)) if len(class_1_variety_result) > 1: labels_to_divide.append(class_1_variety_result) if len(class_2_variety_result) > 1: labels_to_divide.append(class_2_variety_result) return matrix, index