def imputeMethodMICE(result,
                     originData,
                     missData,
                     missRate,
                     missPattern,
                     dataType='continuous'):
    imputationMethod = "MICE"
    try:

        if dataType != 'continuous':
            imputedData = mice.MICE().complete(missData)
            mark = [
                temp[0] for temp in pd.DataFrame(np.unique(missData)).dropna(
                    axis=0).values
            ]
            imputedData = modifier(imputedData, mark)
        else:
            imputedData = mice.MICE().complete(missData)
        result = addResult(result, missRate, missPattern, imputationMethod,
                           evaluate.RMSE(originData, imputedData),
                           MAE(originData, imputedData),
                           masked_mape_np(originData, imputedData))
    except Exception as e:
        print(e)
        imputedData = 'none'
        result = addResult(result, missRate, missPattern, imputationMethod,
                           np.inf, np.inf, np.inf)
    return result, imputedData
Beispiel #2
0
def imputeMethod(result,loss,firstImputedMethod,autoMethod,originData,missData,missRate,missPattern,dataType='continuous',firstImputedData="None"):
    imputationMethod =  "{}_{}_{}".format(firstImputedMethod, loss, autoMethod)
    try:
        imputedData, firstImputedData = TAI(first_imputation_method=firstImputedMethod,
                                            firstImputedData=firstImputedData,
                                               batch_size=len(missData),
                                               epochs=500,
                                               theta=int(len(missData[0]) / 3),
                                               iterations=1000,
                                               Autoencoder_method=autoMethod,
                                               loss=loss,
                                               use_cuda=False
                                               ).complete(missData)
        
        if dataType!='continuous':
            mark = [temp[0] for temp in pd.DataFrame(np.unique(missData)).dropna(axis=0).values]
            imputedData = modifier(imputedData, mark)

        result = addResult(result, missRate, missPattern, imputationMethod,
                           evaluate.RMSE(originData, imputedData),
                           MAE(originData, imputedData),
                           masked_mape_np(originData, imputedData))

        return result, imputedData
    except Exception as e:
        print(e)
        result = addResult(result, missRate, missPattern, imputationMethod,
                           np.inf,
                           np.inf,
                           np.inf)

        return result,firstImputedData
def imputeMethodMedain(result,
                       originData,
                       missData,
                       missRate,
                       missPattern,
                       dataType='continuous'):
    imputationMethod = "median"
    try:
        imputedData = SimpleFill("median").fit_transform(missData)
        if dataType != 'continuous':

            mark = [
                temp[0] for temp in pd.DataFrame(np.unique(missData)).dropna(
                    axis=0).values
            ]
            imputedData = modifier(imputedData, mark)
        result = addResult(result, missRate, missPattern, imputationMethod,
                           evaluate.RMSE(originData, imputedData),
                           MAE(originData, imputedData),
                           masked_mape_np(originData, imputedData))
    except Exception as e:
        print(e)
        imputedData = 'none'
        result = addResult(result, missRate, missPattern, imputationMethod,
                           np.inf, np.inf, np.inf)
    return result, imputedData
Beispiel #4
0
 def evaluate(self, X_mis, X_full):
     missing_index = evaluate.get_missing_index(np.isnan(X_mis))
     original_arr = X_full[missing_index]
     em_X_filled = EM().complete(copy.copy(X_mis))
     em_filled_arr = em_X_filled[missing_index]
     rmse_em_score = evaluate.RMSE(original_arr, em_filled_arr)
     return rmse_em_score
Beispiel #5
0
def imputeMethod2(result, originData, missData, missRate, missPattern):
    imputationMethod = "ycimpute KNN"
    try:
        imputedData = yKNN(k=int(math.sqrt(len(missData)))).complete(missData)
        result = addResult(result, missRate, missPattern, imputationMethod,
                           evaluate.RMSE(originData, imputedData),
                           MAE(originData, imputedData),
                           masked_mape_np(originData, imputedData))
    except Exception as e:
        print(e)
        result = addResult(result, missRate, missPattern, imputationMethod,
                           np.inf, np.inf, np.inf)
    return result
def imputeMethodMR(result,originData,missData,missRate,missPattern,dataType='continuous'):
    imputationMethod = "RandomForest"
    try:
        imputer = predictive_imputer.PredictiveImputer(f_model='RandomForest')
        imputedData = imputer.fit(missData).transform(missData.copy())
        if dataType!='continuous':
            mark = [temp[0] for temp in pd.DataFrame(np.unique(missData)).dropna(axis=0).values]
            imputedData = modifier(imputedData, mark)
        result = addResult(result, missRate, missPattern, imputationMethod,
                           evaluate.RMSE(originData, imputedData),
                           MAE(originData, imputedData),
                           masked_mape_np(originData, imputedData))
    except Exception as e:
        print(e)
        imputedData = 'none'
        result = addResult(result, missRate, missPattern, imputationMethod,
                           np.inf,
                           np.inf,
                           np.inf)
    return result,imputedData
def imputeMethodFixed(result,
                      originData,
                      missData,
                      missRate,
                      missPattern,
                      dataType='continuous'):
    """
    默认值插补,插补为0
    :param result:
    :param originData:
    :param missData:
    :param missRate:
    :param missPattern:
    :param dataType:
    :return:
    """
    imputationMethod = "Fixed"

    try:
        imputedData = fixedImpute(missData)
        if dataType != 'continuous':

            mark = [
                temp[0] for temp in pd.DataFrame(np.unique(missData)).dropna(
                    axis=0).values
            ]
            imputedData = modifier(imputedData, mark)
        result = addResult(result, missRate, missPattern, imputationMethod,
                           evaluate.RMSE(originData, imputedData),
                           MAE(originData, imputedData),
                           masked_mape_np(originData, imputedData))
    except Exception as e:
        print(e)
        imputedData = 'none'
        result = addResult(result, missRate, missPattern, imputationMethod,
                           np.inf, np.inf, np.inf)
    return result, imputedData
Beispiel #8
0
                missData = gene_missingdata_chara_bias(rate=i, data=originData)
            elif missPattern == 'block':
                missData = gene_missingdata_block_bias(rate=i, data=originData)
            else:
                raise Exception(
                    "缺失模式错误,请在'normal','taxa','chara','block'中选择对应模式")

            mark = [
                temp[0] for temp in pd.DataFrame(np.unique(missData)).dropna(
                    axis=0).values
            ]

            try:
                imputedData = mice.MICE().complete(missData)
                imputedData = modifier(imputedData, mark)
                score = evaluate.RMSE(originData, imputedData)
                mice_misc[0].append(score)
                mice_misc[1].append(MAE(originData, imputedData))
                mice_misc[2].append(masked_mape_np(originData, imputedData))
                mice_misc[3].append(TF(originData, imputedData))
                logger.info("MICE missing rate:{},RMSE:{}".format(i, score))
            except:
                mice_misc[0].append(np.inf)
                mice_misc[1].append(np.inf)
                mice_misc[2].append(np.inf)
                mice_misc[3].append(np.inf)
            try:
                imputedData = IterativeImputer().fit_transform(missData)
                imputedData = modifier(imputedData, mark)
                score = evaluate.RMSE(originData, imputedData)
                ii_misc[0].append(score)