Esempio n. 1
0
def Em(name1,feature,n1,n2):
    data = pd.read_csv('E:\\feature.csv', encoding='gbk')
    datafeature = data[feature]
    data2values = datafeature[feature].values
    emval = EM()
    datacompete = emval.solve(data2values)
    return datacompete
Esempio n. 2
0
def EMmake(name1, name2):
    trafficflow, b, ind = share.Csvopreation(name1, name2)
    datavalue = trafficflow.values
    datavalue = share.Standardize(datavalue, np.nanmean(datavalue, axis=0),
                                  np.nanstd(datavalue, axis=0))
    #print(trafficflow.values.dtype)
    em = EM()  # EM算法
    model = em.solve(datavalue)
    return model
Esempio n. 3
0
def test_em():
    X_filled = EM().complete(missing_data)
    complete_data_, _, _ = min_max_scale(complete_data)
    X_filled, _, _ = min_max_scale(X_filled)

    score = RMSE(complete_data_[missing_mask], X_filled[missing_mask])
    print(score)
Esempio n. 4
0
 def evaluate(self, X_mis, X_full):
     missing_index = evaluate.get_missing_index(np.isnan(X_mis))
     original_arr = X_full[missing_index]
     em_X_filled = EM().complete(copy.copy(X_mis))
     em_filled_arr = em_X_filled[missing_index]
     rmse_em_score = evaluate.RMSE(original_arr, em_filled_arr)
     return rmse_em_score
Esempio n. 5
0
    def em_missing_value(self, **params) -> pd.DataFrame:
        """
        EM基于高斯分布能处理混合数据,但在连续型数据上表现的相对较好。在多种数
        据缺失机制下,EM相对于其他方法有着较好的表现。
        :param params:
        :return:
        """
        before = self.df[self.columns].values
        after = EM(max_iter=1000).complete(before)
        self.df[self.columns] = after

        return self.df
Esempio n. 6
0
#pip install ycimpute
#EM Algoritma
import  seaborn as sns
import missingno as msno
import numpy as np
import pandas as pd
from ycimpute.imputer import EM

df = sns.load_dataset("titanic")
df = df.select_dtypes(include =["float64","int64"])
print(df)
print(df.isnull().sum())
var_name = list(df)
print(var_name)
arr_df = np.array(df)
print(arr_df)
print(arr_df.shape)

arr_dffill =  EM().complete(arr_df)
df_fill = pd.DataFrame(arr_dffill,columns=var_name)
print(df_fill.isnull().sum())
 try:
     imputedData = knnimput.KNN(k=3).complete(missData)
     score = evaluate.RMSE(originData, imputedData)
     knn_rmse.append(score)
     logger.info("knn missing rate:{},RMSE:{}".format(i, score))
 except:
     knn_rmse.append(np.nan)
 try:
     imputedData = mice.MICE().complete(missData)
     score = evaluate.RMSE(originData, imputedData)
     mice_rmse.append(score)
     logger.info("MICE missing rate:{},RMSE:{}".format(i, score))
 except:
     mice_rmse.append(np.nan)
 try:
     imputedData = EM().complete(missData)
     score = evaluate.RMSE(originData, imputedData)
     em_rmse.append(score)
     logger.info("EM missing rate:{},RMSE:{}".format(i, score))
 except:
     em_rmse.append(np.nan)
 try:
     imputedData = BiScaler().fit_transform(missData)
     imputedData = SoftImpute().fit_transform(imputedData)
     score = evaluate.RMSE(originData, imputedData)
     fi_bs_rmse.append(score)
     logger.info("fi BiScaler  missing rate:{},RMSE:{}".format(
         i, score))
 except:
     fi_bs_rmse.append(np.nan)
 try:
Esempio n. 8
0
var_names=list(df)

import numpy as np
n_df=np.array(df)

from ycimpute.imputer import iterforest
dff=iterforest.IterImput().complete(n_df)

import pandas as pd
dff=pd.DataFrame(dff, columns=var_names)
dff.isnull().sum()

###EM

import seaborn as sns
import missingno as msno
df=sns.load_dataset('titanic')
df=df.select_dtypes(include=['float64','int64'])

from ycimpute.imputer import EM
var_names=list(df)

import numpy as np
n_df=np.array(df)

dff=EM().complete(n_df)

dff=pd.DataFrame(dff, columns=var_names)

dff.isnull().sum()  #yani eksiklerin hepsi dolmuş oldu.