def cal_1D(comb, i):
    res = pd.DataFrame(columns=['taxa', 'pm', 'pvalue'])
    run_log = pd.DataFrame(columns=['taxa', 'numpyindex'])
    cc_list = []
    ct_list = []
    tt_list = []
    tc_list = []
    cc_kde_list = []
    ct_kde_list = []
    tt_kde_list = []
    tc_kde_list = []
    count = 0
    for m in comb:
        try:
            m = list(m)
            pm_score, pvalue, cc, ct, tt, tc, cc_kde, ct_kde, tt_kde, tc_kde = cal_pm_socre_nD(
                m, demodata)
            if (pm_score is None) or (cc_kde is None) or (ct_kde is None) or (
                    tt_kde is None) or (tc_kde is None):
                continue
            res.loc[count, 'taxa'] = m[0]
            res.loc[count, 'pm'] = pm_score
            res.loc[count, 'pvalue'] = pvalue
            run_log.loc[count, 'taxa'] = m[0]
            run_log.loc[count, 'numpyindex'] = count
            count = count + 1
            cc_list.append(cc)
            ct_list.append(ct)
            tt_list.append(tt)
            tc_list.append(tc)
            cc_kde_list.append(cc_kde)
            ct_kde_list.append(ct_kde)
            tt_kde_list.append(tt_kde)
            tc_kde_list.append(tc_kde)
            if count % 3 == 0:
                finishedtask = {
                    'hasfinished': count,
                    'isfinished': False,
                }
                load_save_project.save_dict(
                    logs_place + '/' + str(i) + '_task_1D.json', finishedtask)
        except:
            pass
    finishedtask = {
        'hasfinished': count,
        'isfinished': True,
    }
    load_save_project.save_dict(logs_place + '/' + str(i) + '_task_1D.json',
                                finishedtask)
    np.save(logs_place + '/' + str(i) + '_cc_list_1D.npy', cc_list)
    np.save(logs_place + '/' + str(i) + '_ct_list_1D.npy', ct_list)
    np.save(logs_place + '/' + str(i) + '_tt_list_1D.npy', tt_list)
    np.save(logs_place + '/' + str(i) + '_tc_list_1D.npy', tc_list)
    np.save(logs_place + '/' + str(i) + '_cc_kde_list_1D.npy', cc_kde_list)
    np.save(logs_place + '/' + str(i) + '_ct_kde_list_1D.npy', ct_kde_list)
    np.save(logs_place + '/' + str(i) + '_tt_kde_list_1D.npy', tt_kde_list)
    np.save(logs_place + '/' + str(i) + '_tc_kde_list_1D.npy', tc_kde_list)
    run_log.to_csv(logs_place + '/' + str(i) + 'runlog_1D.csv')
    return res
Beispiel #2
0
    return res

from multiprocessing import Pool
if __name__ == '__main__':
    success_1D = False
    pm_res=None
    try:
        comb = []
        for a in range(len(genuslist)):
            if genuslist[a] == condition:
                continue
            comb.append([genuslist[a]])
        totaltask = {'totalcomb': len(comb),
                     'isfinished': False
                     }
        load_save_project.save_dict(logs_place + '/totaltask_1D.json', totaltask)
        # print(datetime.datetime.now())
        cl = np.array_split(np.asarray(range(len(comb))), processers, axis=0)
        comb = np.asarray(comb)
        res_list = []
        p = Pool(processers)
        for i in range(processers):
            res_list.append(p.apply_async(cal_1D, args=(comb[cl[i]], i,)))
        p.close()
        p.join()
        pm_res = pd.concat([i.get() for i in res_list])
        tmpPvalue = pm_res['pvalue']
        # fdrcorrection:
        # This covers Benjamini/Hochberg for independent or positively correlated and Benjamini/Yekutieli
        # for general or negatively correlated tests. Both are available in the function multipletests, as method=`fdr_bh`, resp. fdr_by.
        if fdr == 'ON':