Exemple #1
0
def prepare_folds(hdf5, folds, pheno, derivatives, experiment):
    # 创建实验数据
    exps = hdf5.require_group("experiments")
    ids = pheno["FILE_ID"]

    for derivative in derivatives:
        # 为每个脑图谱创建一个实验数据
        exp = exps.require_group(
            format_config(experiment, {
                "derivative": derivative,
            }))

        exp.attrs["derivative"] = derivative
        # 按照标签比例把数据分为n份,并把数据进行打散
        skf = StratifiedKFold(n_splits=folds, shuffle=True)
        for i, (train_index,
                test_index) in enumerate(skf.split(ids, pheno["STRAT"])):
            train_index, valid_index = train_test_split(train_index,
                                                        test_size=0.33)
            # 创建每个分组的实验数据
            fold = exp.require_group(str(i))

            fold['train'] = [ind.encode('utf8') for ind in ids[train_index]]
            fold['valid'] = [indv.encode('utf8') for indv in ids[valid_index]]
            fold["test"] = [indt.encode('utf8') for indt in ids[test_index]]
Exemple #2
0
def load_patient(subj, tmpl):
    # 拼接参数获取数据地址,并读取数据
    df = pd.read_csv(format_config(tmpl, {
        "subject": subj,
    }),
                     sep="\t",
                     header=0)
    df = df.apply(lambda x: pd.to_numeric(x, errors='coerce'))

    # 获取ROI区域编号
    ROIs = [
        "#" + str(y) for y in sorted([int(x[1:]) for x in df.keys().tolist()])
    ]

    if arguments["--lstm"]:
        functional = np.nan_to_num(df[ROIs].to_numpy()).tolist()
    else:
        # 使用0替代无效元素,一共200行,表示200个感兴趣区域,每行一共有196个元素,表示每个感兴趣区域有196个值
        functional = np.nan_to_num(df[ROIs].to_numpy().T).tolist()

    # axis=1表示沿着x轴数据标准化
    functional = preprocessing.scale(functional, axis=1)
    if arguments["--lstm"]:
        functional = np.array(functional)
    else:
        # 计算并获得每两个ROI之间的连接性
        functional = compute_connectivity(functional)

    functional = functional.astype(np.float32)

    # 返回某个病人的ROI的连接性
    return subj, functional.tolist()
Exemple #3
0
        load_patients_to_file(hdf5, pheno, derivatives)

    # 构建所有的交叉验证的实验数据
    if arguments["--whole"]:
        print ("Preparing whole dataset")
        prepare_folds(hdf5, folds, pheno, derivatives, experiment="{derivative}_whole")

    # 构建男性的交叉验证的实验数据
    if arguments["--male"]:
        print ("Preparing male dataset")
        pheno_male = pheno[pheno["SEX"] == "M"]
        prepare_folds(hdf5, folds, pheno_male, derivatives, experiment="{derivative}_male")

    # 构建有阈值的交叉验证的实验数据
    if arguments["--threshold"]:
        print ("Preparing thresholded dataset")
        pheno_thresh = pheno[pheno["MEAN_FD"] <= 0.2]
        prepare_folds(hdf5, folds, pheno_thresh, derivatives, experiment="{derivative}_threshold")

    # 构建每个实验室的交叉验证的实验数据
    if arguments["--leave-site-out"]:
        print ("Preparing leave-site-out dataset")
        for site in pheno["SITE_ID"].unique():
            pheno_without_site = pheno[pheno["SITE_ID"] != site]
            prepare_folds(hdf5, folds, pheno_without_site, derivatives, experiment=format_config(
                "{derivative}_leavesiteout-{site}",
                {
                    "site": site,
                })
            )
Exemple #4
0
        pheno_male = pheno[pheno["SEX"] == "M"]
        prepare_folds(hdf5,
                      folds,
                      pheno_male,
                      derivatives,
                      experiment="{derivative}_male")

    # 构建有阈值的交叉验证的实验数据
    if arguments["--threshold"]:
        print("Preparing thresholded dataset")
        pheno_thresh = pheno[pheno["MEAN_FD"] <= 0.2]
        prepare_folds(hdf5,
                      folds,
                      pheno_thresh,
                      derivatives,
                      experiment="{derivative}_threshold")

    # 构建每个实验室的交叉验证的实验数据
    if arguments["--leave-site-out"]:
        print("Preparing leave-site-out dataset")
        for site in pheno["SITE_ID"].unique():
            pheno_without_site = pheno[pheno["SITE_ID"] != site]
            prepare_folds(hdf5,
                          folds,
                          pheno_without_site,
                          derivatives,
                          experiment=format_config(
                              "{derivative}_leavesiteout-{site}", {
                                  "site": site,
                              }))
    # 脑图谱的选择
    valid_derivatives = ["cc200", "aal", "ez", "ho", "tt", "dosenbach160"]
    derivatives = [
        derivative for derivative in arguments["<derivative>"]
        if derivative in valid_derivatives
    ]

    # 标记实现数据
    experiments = []
    for derivative in derivatives:

        config = {"derivative": derivative}

        if arguments["--whole"]:
            experiments += [
                PrepareUtils.format_config("{derivative}_whole", config)
            ],

        if arguments["--male"]:
            experiments += [
                PrepareUtils.format_config("{derivative}_male", config)
            ]

        if arguments["--threshold"]:
            experiments += [
                PrepareUtils.format_config("{derivative}_threshold", config)
            ]

        if arguments["--leave-site-out"]:
            for site in pheno["SITE_ID"].unique():
                site_config = {"site": site}