Esempio n. 1
0
def group_fit(features):
    """
    根据特征区分样本是否为噪音标签
    :param features:
    :return:
    """
    model = GaussianMixture(2, max_iter=10, tol=1e-2, reg_covar=5e-4)
    model.fit(features)

    if np.argmax(model.means_[:,
                              1]) == 0:  # 根据混合分布的均值参数来区分,概率高的将其看做是clean label
        parameters = [i[::-1] for i in model._get_parameters()]
        model._set_parameters(parameters)

    return model
Esempio n. 2
0
# df[df[:, 0] == 'U'] = 20
# df[df[:, 0] == 'V'] = 21
# df[df[:, 0] == 'W'] = 22
# df[df[:, 0] == 'X'] = 23
# df[df[:, 0] == 'Y'] = 24
# df[df[:, 0] == 'Z'] = 25
# df_target_names = ['A', 'B', 'C']
df = df[:200, :]
df_data = df[:, 1:]
df_target = df[:, 0]

gmm = GaussianMixture(n_components=16, n_init=10)
gmm.fit(df_data)
prob = gmm.predict_proba(df_data)
print(prob.shape)
para = gmm._get_parameters()
print(len(para))  # tuple length
for i in range(len(para)):
    print(para[i].shape)

# X_train = df_data[:15000, 1:]
# y_train = df_target[:15000]
# X_test = df_data[15000:, 1:]
# y_test = df_target[15000:]
#
# n_classes = len(np.unique(y_train))
#
# # Try GMMs using different types of covariances.
# estimators = dict((cov_type, GaussianMixture(n_components=n_classes,
#                    covariance_type=cov_type, max_iter=200, random_state=0))
#                   for cov_type in ['spherical', 'diag', 'tied', 'full'])