def imputeMethodMICE(result,
                     originData,
                     missData,
                     missRate,
                     missPattern,
                     dataType='continuous'):
    imputationMethod = "MICE"
    try:

        if dataType != 'continuous':
            imputedData = mice.MICE().complete(missData)
            mark = [
                temp[0] for temp in pd.DataFrame(np.unique(missData)).dropna(
                    axis=0).values
            ]
            imputedData = modifier(imputedData, mark)
        else:
            imputedData = mice.MICE().complete(missData)
        result = addResult(result, missRate, missPattern, imputationMethod,
                           evaluate.RMSE(originData, imputedData),
                           MAE(originData, imputedData),
                           masked_mape_np(originData, imputedData))
    except Exception as e:
        print(e)
        imputedData = 'none'
        result = addResult(result, missRate, missPattern, imputationMethod,
                           np.inf, np.inf, np.inf)
    return result, imputedData
Beispiel #2
0
    def mice_missing_value(self, **params) -> pd.DataFrame:
        """
        算法基于线性回归。mice能填充如连续型,二值型,离散型等混
        合数据并保持数据一致性
        :param params:
        :return:
        """
        before = self.df[self.columns].values
        after = mice.MICE().complete(before)
        self.df[self.columns] = after

        return self.df
Beispiel #3
0
                missData = gene_missingdata_taxa_bias(rate=i, data=originData)
            elif missPattern == 'chara':
                missData = gene_missingdata_chara_bias(rate=i, data=originData)
            elif missPattern == 'block':
                missData = gene_missingdata_block_bias(rate=i, data=originData)
            else:
                raise Exception(
                    "缺失模式错误,请在'normal','taxa','chara','block'中选择对应模式")

            mark = [
                temp[0] for temp in pd.DataFrame(np.unique(missData)).dropna(
                    axis=0).values
            ]

            try:
                imputedData = mice.MICE().complete(missData)
                imputedData = modifier(imputedData, mark)
                score = evaluate.RMSE(originData, imputedData)
                mice_misc[0].append(score)
                mice_misc[1].append(MAE(originData, imputedData))
                mice_misc[2].append(masked_mape_np(originData, imputedData))
                mice_misc[3].append(TF(originData, imputedData))
                logger.info("MICE missing rate:{},RMSE:{}".format(i, score))
            except:
                mice_misc[0].append(np.inf)
                mice_misc[1].append(np.inf)
                mice_misc[2].append(np.inf)
                mice_misc[3].append(np.inf)
            try:
                imputedData = IterativeImputer().fit_transform(missData)
                imputedData = modifier(imputedData, mark)
            imputedData = mice.MICE().complete(missData)
        result = addResult(result, missRate, missPattern, imputationMethod,
                           evaluate.RMSE(originData, imputedData),
                           MAE(originData, imputedData),
                           masked_mape_np(originData, imputedData))
    except Exception as e:
        print(e)
        imputedData = 'none'
        result = addResult(result, missRate, missPattern, imputationMethod,
                           np.inf, np.inf, np.inf)
    return result, imputedData


if __name__ == "__main__":
    dataPath = r'C:\Users\pro\Desktop\实验二自编码器建树\古生物数据集测试\01起始数据集\01_Yang2015.nex'

    missData, missRow, speciesName, begin, end = readNex(dataPath)
    #missData = lableEncoder(originData)
    result = {}
    missRate = 0.3
    missPattern = "normal"
    print(missData)
    s = set()
    for i in range(len(missData)):
        for j in range(len(missData[0])):
            s.add(missData[i][j])
    print(s)
    print(np.isnan(missData).any())
    print(np.isfinite(missData).all())
    t = mice.MICE().complete(missData)
Beispiel #5
0
import torch.utils.data
from pandas import isnull
from functools import partial
from logger import logger
from sklearn.preprocessing import StandardScaler
#继承类和model
from utils.tools import Solver
from dnn.autoencoder_test_partice import Autoencoder,ResAutoencoder,StockedAutoencoder,StockedResAutoencoder
from utils.normalizer import NORMALIZERS,RECOVER


#baseline插补方法
from ycimpute.imputer import  mice
from utils.base_impute import random_inpute
from fancyimpute import IterativeImputer, SimpleFill
imputation = {'median':SimpleFill("median").fit_transform,'random':random_inpute,'mice':mice.MICE().complete,'ii':IterativeImputer().fit_transform}
AUTOENCODER_METHOD={'Autoencoder':Autoencoder,'ResAutoencoder':ResAutoencoder,'StockedAutoencoder':StockedAutoencoder,'StockedResAutoencoder':StockedResAutoencoder}
LOSS={'MSELoss':torch.nn.MSELoss(),'CrossEntropyLoss':torch.nn.CrossEntropyLoss()}


class TAI(Solver):
    #原始参数
    def __init__(
            self,
            theta=5,
            epochs=50,
            use_cuda=False,
            batch_size=64,
            early_stop=1e-06,
            normalizer='zero_score',
            iterations=30,
Beispiel #6
0
from ray.tune.suggest.hyperopt import HyperOptSearch
from utils.handle_missingdata import gene_missingdata
# space = {
#     "lr": hp.loguniform("lr", 1e-10, 0.1),
#     "momentum": hp.uniform("momentum", 0.1, 0.9),
# }

#baseline插补方法
from ycimpute.imputer import mice
from ycimpute.utils import evaluate
from utils.base_impute import random_inpute
from fancyimpute import IterativeImputer, SimpleFill
imputation = {
    'median': SimpleFill("median").fit_transform,
    'random': random_inpute,
    'mice': mice.MICE().complete,
    'ii': IterativeImputer().fit_transform
}


class TAI(Solver):
    def __init__(self,
                 theta=5,
                 epochs=50,
                 use_cuda=False,
                 batch_size=64,
                 early_stop=1e-06,
                 normalizer='zero_score',
                 iterations=30,
                 first_imputation_method='ii',
                 learning_rate=0.001,