def group_fit(features): """ 根据特征区分样本是否为噪音标签 :param features: :return: """ model = GaussianMixture(2, max_iter=10, tol=1e-2, reg_covar=5e-4) model.fit(features) if np.argmax(model.means_[:, 1]) == 0: # 根据混合分布的均值参数来区分,概率高的将其看做是clean label parameters = [i[::-1] for i in model._get_parameters()] model._set_parameters(parameters) return model
# df[df[:, 0] == 'U'] = 20 # df[df[:, 0] == 'V'] = 21 # df[df[:, 0] == 'W'] = 22 # df[df[:, 0] == 'X'] = 23 # df[df[:, 0] == 'Y'] = 24 # df[df[:, 0] == 'Z'] = 25 # df_target_names = ['A', 'B', 'C'] df = df[:200, :] df_data = df[:, 1:] df_target = df[:, 0] gmm = GaussianMixture(n_components=16, n_init=10) gmm.fit(df_data) prob = gmm.predict_proba(df_data) print(prob.shape) para = gmm._get_parameters() print(len(para)) # tuple length for i in range(len(para)): print(para[i].shape) # X_train = df_data[:15000, 1:] # y_train = df_target[:15000] # X_test = df_data[15000:, 1:] # y_test = df_target[15000:] # # n_classes = len(np.unique(y_train)) # # # Try GMMs using different types of covariances. # estimators = dict((cov_type, GaussianMixture(n_components=n_classes, # covariance_type=cov_type, max_iter=200, random_state=0)) # for cov_type in ['spherical', 'diag', 'tied', 'full'])