def imputeMethod(result,loss,firstImputedMethod,autoMethod,originData,missData,missRate,missPattern,dataType='continuous',firstImputedData="None"): imputationMethod = "{}_{}_{}".format(firstImputedMethod, loss, autoMethod) try: imputedData, firstImputedData = TAI(first_imputation_method=firstImputedMethod, firstImputedData=firstImputedData, batch_size=len(missData), epochs=500, theta=int(len(missData[0]) / 3), iterations=1000, Autoencoder_method=autoMethod, loss=loss, use_cuda=False ).complete(missData) if dataType!='continuous': mark = [temp[0] for temp in pd.DataFrame(np.unique(missData)).dropna(axis=0).values] imputedData = modifier(imputedData, mark) result = addResult(result, missRate, missPattern, imputationMethod, evaluate.RMSE(originData, imputedData), MAE(originData, imputedData), masked_mape_np(originData, imputedData)) return result, imputedData except Exception as e: print(e) result = addResult(result, missRate, missPattern, imputationMethod, np.inf, np.inf, np.inf) return result,firstImputedData
def imputeMethodMICE(result, originData, missData, missRate, missPattern, dataType='continuous'): imputationMethod = "MICE" try: if dataType != 'continuous': imputedData = mice.MICE().complete(missData) mark = [ temp[0] for temp in pd.DataFrame(np.unique(missData)).dropna( axis=0).values ] imputedData = modifier(imputedData, mark) else: imputedData = mice.MICE().complete(missData) result = addResult(result, missRate, missPattern, imputationMethod, evaluate.RMSE(originData, imputedData), MAE(originData, imputedData), masked_mape_np(originData, imputedData)) except Exception as e: print(e) imputedData = 'none' result = addResult(result, missRate, missPattern, imputationMethod, np.inf, np.inf, np.inf) return result, imputedData
def imputeMethodMedain(result, originData, missData, missRate, missPattern, dataType='continuous'): imputationMethod = "median" try: imputedData = SimpleFill("median").fit_transform(missData) if dataType != 'continuous': mark = [ temp[0] for temp in pd.DataFrame(np.unique(missData)).dropna( axis=0).values ] imputedData = modifier(imputedData, mark) result = addResult(result, missRate, missPattern, imputationMethod, evaluate.RMSE(originData, imputedData), MAE(originData, imputedData), masked_mape_np(originData, imputedData)) except Exception as e: print(e) imputedData = 'none' result = addResult(result, missRate, missPattern, imputationMethod, np.inf, np.inf, np.inf) return result, imputedData
def imputeMethodMR(result,originData,missData,missRate,missPattern,dataType='continuous'): imputationMethod = "RandomForest" try: imputer = predictive_imputer.PredictiveImputer(f_model='RandomForest') imputedData = imputer.fit(missData).transform(missData.copy()) if dataType!='continuous': mark = [temp[0] for temp in pd.DataFrame(np.unique(missData)).dropna(axis=0).values] imputedData = modifier(imputedData, mark) result = addResult(result, missRate, missPattern, imputationMethod, evaluate.RMSE(originData, imputedData), MAE(originData, imputedData), masked_mape_np(originData, imputedData)) except Exception as e: print(e) imputedData = 'none' result = addResult(result, missRate, missPattern, imputationMethod, np.inf, np.inf, np.inf) return result,imputedData
def imputeMethodFixed(result, originData, missData, missRate, missPattern, dataType='continuous'): """ 默认值插补,插补为0 :param result: :param originData: :param missData: :param missRate: :param missPattern: :param dataType: :return: """ imputationMethod = "Fixed" try: imputedData = fixedImpute(missData) if dataType != 'continuous': mark = [ temp[0] for temp in pd.DataFrame(np.unique(missData)).dropna( axis=0).values ] imputedData = modifier(imputedData, mark) result = addResult(result, missRate, missPattern, imputationMethod, evaluate.RMSE(originData, imputedData), MAE(originData, imputedData), masked_mape_np(originData, imputedData)) except Exception as e: print(e) imputedData = 'none' result = addResult(result, missRate, missPattern, imputationMethod, np.inf, np.inf, np.inf) return result, imputedData
elif missPattern == 'chara': missData = gene_missingdata_chara_bias(rate=i, data=originData) elif missPattern == 'block': missData = gene_missingdata_block_bias(rate=i, data=originData) else: raise Exception( "缺失模式错误,请在'normal','taxa','chara','block'中选择对应模式") mark = [ temp[0] for temp in pd.DataFrame(np.unique(missData)).dropna( axis=0).values ] try: imputedData = mice.MICE().complete(missData) imputedData = modifier(imputedData, mark) score = evaluate.RMSE(originData, imputedData) mice_misc[0].append(score) mice_misc[1].append(MAE(originData, imputedData)) mice_misc[2].append(masked_mape_np(originData, imputedData)) mice_misc[3].append(TF(originData, imputedData)) logger.info("MICE missing rate:{},RMSE:{}".format(i, score)) except: mice_misc[0].append(np.inf) mice_misc[1].append(np.inf) mice_misc[2].append(np.inf) mice_misc[3].append(np.inf) try: imputedData = IterativeImputer().fit_transform(missData) imputedData = modifier(imputedData, mark) score = evaluate.RMSE(originData, imputedData)