def mainWork(path,savePath):
    pbar = tqdm.tqdm(os.listdir(path), desc='dirs')
    for file in pbar:
        pbar.set_description("Processing %s" % file)
        if file.endswith('xlsx') or file.endswith('csv'):
            originData = readAllTypeFile(os.path.join(path, file))
            for missPattern in ['normal']:
                result = {}
                for missRate in [0.05, 0.1, 0.2, 0.3, 0.4, 0.5]:
                    if missPattern == 'normal':
                        missData = gene_missingdata(rate=missRate, data=originData)
                    elif missPattern == 'taxa':
                        missData = gene_missingdata_taxa_bias(rate=missRate, data=originData)
                    elif missPattern == 'chara':
                        missData = gene_missingdata_chara_bias(rate=missRate, data=originData)
                    elif missPattern == 'block':
                        missData = gene_missingdata_block_bias(rate=missRate, data=originData)
                    else:
                        raise Exception("缺失模式错误,请在'normal','taxa','chara','block'中选择对应模式")

                    result, MICEImputedData = MICE(result, originData, missData, missRate, missPattern)
                    for firstImputedMethod in ['mice']:
                        if firstImputedMethod=='mice':
                            firstImputedData  = MICEImputedData
                        for loss in ['MSELoss']:
                            #for autoMethod in ['Autoencoder','ResAutoencoder','StockedAutoencoder','StockedResAutoencoder']:
                            for autoMethod in ['StockedResAutoencoder']:
                                start=time.time()
                                result,_=TAI(result=result,firstImputedMethod=firstImputedMethod,
                                                    firstImputedData=firstImputedData,
                                                    loss=loss,autoMethod=autoMethod,
                                                    originData=originData,missData=missData,
                                                    missRate=missRate,missPattern=missPattern,
                                             )
                                logger.info("{}-{}-{}训练耗时:{}".format(firstImputedMethod,loss,autoMethod,time.time() - start))
                if not os.path.exists(savePath):
                    os.makedirs(savePath)
                saveJson(result, os.path.join(savePath,"{}_{}_{}_{}.json".format("allMethod", missPattern,file, datetime.datetime.now().strftime('%Y%m%d-%H%M%S'))))
Ejemplo n.º 2
0
                        'StockedResAutoencoder'
                ]:
                    varname = "{}_{}_{}".format(first_imputed_method, loss,
                                                method)
                    globals()[varname] = [[] for _ in range(4)]
                    methed_names_half.append(varname)
                    methed_names_all.append(varname)

        # for i in [0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]:
        for i in [0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]:
            if missPattern == 'normal':
                missData = gene_missingdata(rate=i, data=originData)
            elif missPattern == 'taxa':
                missData = gene_missingdata_taxa_bias(rate=i, data=originData)
            elif missPattern == 'chara':
                missData = gene_missingdata_chara_bias(rate=i, data=originData)
            elif missPattern == 'block':
                missData = gene_missingdata_block_bias(rate=i, data=originData)
            else:
                raise Exception(
                    "缺失模式错误,请在'normal','taxa','chara','block'中选择对应模式")

            mark = [
                temp[0] for temp in pd.DataFrame(np.unique(missData)).dropna(
                    axis=0).values
            ]

            try:
                imputedData = mice.MICE().complete(missData)
                imputedData = modifier(imputedData, mark)
                score = evaluate.RMSE(originData, imputedData)
Ejemplo n.º 3
0
def mainWork(path, savePath):

    for file in os.listdir(path):
        originData = readAllTypeFile(os.path.join(path, file))
        for missPattern in ['normal']:
            # for missPattern in ['normal','block',  'taxa', 'chara']:
            # for i in [0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]:
            result = {}
            for missRate in [0.05, 0.1, 0.2, 0.3, 0.4, 0.5]:
                if missPattern == 'normal':
                    missData = gene_missingdata(rate=missRate, data=originData)
                elif missPattern == 'taxa':
                    missData = gene_missingdata_taxa_bias(rate=missRate,
                                                          data=originData)
                elif missPattern == 'chara':
                    missData = gene_missingdata_chara_bias(rate=missRate,
                                                           data=originData)
                elif missPattern == 'block':
                    missData = gene_missingdata_block_bias(rate=missRate,
                                                           data=originData)
                else:
                    raise Exception(
                        "缺失模式错误,请在'normal','taxa','chara','block'中选择对应模式")

                result, _ = Random(result, originData, missData, missRate,
                                   missPattern, 'disperse')
                result, _ = Medain(result, originData, missData, missRate,
                                   missPattern, 'disperse')
                result, KNNImputedData = KNN(result, originData, missData,
                                             missRate, missPattern, 'disperse')
                result, EMImputedData = EM(result, originData, missData,
                                           missRate, missPattern, 'disperse')
                result, IIImputedData = II(result, originData, missData,
                                           missRate, missPattern, 'disperse')
                result, _ = GAIN(result, originData, missData, missRate,
                                 missPattern, 'disperse')
                result, _ = MIDA(result, originData, missData, missRate,
                                 missPattern, 'disperse')
                result, MICEImputedData = MICE(result, originData, missData,
                                               missRate, missPattern,
                                               'disperse')
                # for firstImputedMethod in ['ii', 'mice']:
                for firstImputedMethod in ['knn', 'ii', 'mice']:
                    if firstImputedMethod == 'knn':
                        firstImputedData = KNNImputedData
                    elif firstImputedMethod == 'ii':
                        firstImputedData = IIImputedData
                    elif firstImputedMethod == 'mice':
                        firstImputedData = MICEImputedData
                    for loss in ['MSELoss']:
                        # for autoMethod in ['Autoencoder','ResAutoencoder','StockedAutoencoder','StockedResAutoencoder']:
                        for autoMethod in ['Autoencoder']:
                            start = time.time()
                            result = TAI(
                                result=result,
                                firstImputedMethod=firstImputedMethod,
                                firstImputedData=firstImputedData.copy(),
                                loss=loss,
                                autoMethod=autoMethod,
                                originData=originData,
                                missData=missData,
                                missRate=missRate,
                                missPattern=missPattern)
                            logger.info("改后{}-{}-{}训练耗时:{}".format(
                                firstImputedMethod, loss, autoMethod,
                                time.time() - start))
            saveJson(
                result,
                os.path.join(
                    savePath, "{}_{}_{}_{}.json".format(
                        "allmethod", missPattern, file,
                        datetime.datetime.now().strftime('%Y%m%d-%H%M%S'))))