Example #1
0
def infer_or_drop_diagnosis(bids_df: pd.DataFrame) -> pd.DataFrame:
    """
    Deduce the diagnosis when missing from previous and following sessions of the subject. If not identical, the session
    is dropped. Sessions with no diagnosis are also dropped when there are the last sessions of the follow-up.

    Args:
        bids_df: DataFrame with columns including ['participant_id', 'session_id', 'diagnosis']

    Returns:
        cleaned DataFrame
    """
    bids_copy_df = copy(bids_df)
    found_diag_interpol = 0

    for subject, subject_df in bids_df.groupby(level=0):
        session_list = [
            int(session[5::]) for _, session in subject_df.index.values
        ]

        for _, session in subject_df.index.values:
            diagnosis = subject_df.loc[(subject, session), "diagnosis"]
            session_nb = int(session[5::])

            if isinstance(diagnosis, float):
                if session == last_session(session_list):
                    bids_copy_df.drop((subject, session), inplace=True)
                else:
                    prev_session = neighbour_session(session_nb, session_list,
                                                     -1)
                    prev_diagnosis = bids_df.loc[(subject, prev_session),
                                                 "diagnosis"]
                    while isinstance(
                            prev_diagnosis, float
                    ) and prev_session != first_session(subject_df):
                        prev_session = neighbour_session(
                            int(prev_session[5::]), session_list, -1)
                        prev_diagnosis = bids_df.loc[(subject, prev_session),
                                                     "diagnosis"]
                    post_session = neighbour_session(session_nb, session_list,
                                                     +1)
                    post_diagnosis = bids_df.loc[(subject, post_session),
                                                 "diagnosis"]
                    while isinstance(
                            post_diagnosis, float
                    ) and post_session != last_session(session_list):
                        post_session = neighbour_session(
                            int(post_session[5::]), session_list, +1)
                        post_diagnosis = bids_df.loc[(subject, post_session),
                                                     "diagnosis"]
                    if prev_diagnosis == post_diagnosis:
                        found_diag_interpol += 1
                        bids_copy_df.loc[(subject, session),
                                         "diagnosis"] = prev_diagnosis
                    else:
                        bids_copy_df.drop((subject, session), inplace=True)

    logger.debug(f"Inferred diagnosis: {found_diag_interpol}")

    return bids_copy_df
Example #2
0
def diagnosis_removal(MCI_df: pd.DataFrame,
                      diagnosis_list: List[str]) -> pd.DataFrame:
    """
    Removes subjects whom last diagnosis is in the list provided (avoid to keep rMCI and pMCI in sMCI lists).

    Args:
        MCI_df: DataFrame with columns including ['participant_id', 'session_id', 'diagnosis']
        diagnosis_list: list of diagnoses that will be removed

    Returns:
        cleaned DataFrame
    """

    output_df = copy(MCI_df)

    # Remove subjects who regress to CN label, even late in the follow-up
    for subject, subject_df in MCI_df.groupby(level=0):
        session_list = [
            int(session[5::]) for _, session in subject_df.index.values
        ]
        last_session_id = last_session(session_list)
        last_diagnosis = subject_df.loc[(subject, last_session_id),
                                        "diagnosis"]
        if last_diagnosis in diagnosis_list:
            output_df.drop(subject, inplace=True)

    return output_df
Example #3
0
def mci_stability(bids_df: pd.DataFrame,
                  horizon_time: int = 36) -> pd.DataFrame:
    """
    A method to label all MCI sessions depending on their stability on the time horizon

    Args:
        bids_df: DataFrame with columns including ['participant_id', 'session_id', 'diagnosis']
        horizon_time: time horizon in months

    Returns:
        DataFrame with new labels
    """

    diagnosis_list = ["MCI", "EMCI", "LMCI"]
    bids_df = bids_df[(bids_df.baseline_diagnosis.isin(diagnosis_list))]
    bids_df = cleaning_nan_diagnoses(bids_df)
    bids_df = infer_or_drop_diagnosis(bids_df)

    # Check possible double change in diagnosis in time
    bids_copy_df = copy(bids_df)
    nb_subjects = 0
    for subject, subject_df in bids_df.groupby(level=0):
        session_list = [
            int(session[5::]) for _, session in subject_df.index.values
        ]
        session_list.sort()
        diagnosis_list = []
        for session in session_list:
            if session < 10:
                diagnosis_list.append(bids_df.loc[(subject,
                                                   "ses-M0" + str(session)),
                                                  "diagnosis"])
            else:
                diagnosis_list.append(bids_df.loc[(subject,
                                                   "ses-M" + str(session)),
                                                  "diagnosis"])

        new_diagnosis = diagnosis_list[0]
        nb_change = 0
        for diagnosis in diagnosis_list:
            if new_diagnosis != diagnosis:
                new_diagnosis = diagnosis
                nb_change += 1

        if nb_change > 1:
            nb_subjects += 1
            bids_copy_df.drop(subject, inplace=True)

    logger.debug(f"Dropped subjects: {nb_subjects}")
    bids_df = copy(bids_copy_df)

    # Stability of sessions
    stability_dict = {
        "CN": "r",
        "MCI": "s",
        "AD": "p",
    }  # Do not take into account the case of missing diag = nan

    bids_copy_df = copy(bids_df)
    for subject, subject_df in bids_df.groupby(level=0):
        session_list = [
            int(session[5::]) for _, session in subject_df.index.values
        ]

        for _, session in subject_df.index.values:
            diagnosis = subject_df.loc[(subject, session), "diagnosis"]

            # If the diagnosis is not MCI we remove the time point
            if diagnosis != "MCI":
                bids_copy_df.drop((subject, session), inplace=True)

            else:
                session_nb = int(session[5::])
                horizon_session_nb = session_nb + horizon_time
                horizon_session = "ses-M" + str(horizon_session_nb)
                # print(session, '-->', horizon_session)

                if horizon_session_nb in session_list:
                    horizon_diagnosis = subject_df.loc[(subject,
                                                        horizon_session),
                                                       "diagnosis"]
                    update_diagnosis = stability_dict[horizon_diagnosis] + "MCI"
                    # print(horizon_diagnosis, update_diagnosis)
                    bids_copy_df.loc[(subject, session),
                                     "diagnosis"] = update_diagnosis
                else:
                    if after_end_screening(horizon_session_nb, session_list):
                        # Two situations, change in last session AD or CN --> pMCI or rMCI
                        # Last session MCI --> uMCI
                        last_diagnosis = subject_df.loc[(
                            subject, last_session(session_list)), "diagnosis"]
                        # This section must be discussed --> removed in Jorge's paper
                        if last_diagnosis != "MCI":
                            update_diagnosis = stability_dict[
                                last_diagnosis] + "MCI"
                        else:
                            update_diagnosis = "uMCI"
                        # print(update_diagnosis)
                        bids_copy_df.loc[(subject, session),
                                         "diagnosis"] = update_diagnosis

                    else:
                        prev_session = neighbour_session(
                            horizon_session_nb, session_list, -1)
                        post_session = neighbour_session(
                            horizon_session_nb, session_list, +1)
                        # print('prev_session', prev_session)
                        # print('post_session', post_session)
                        prev_diagnosis = subject_df.loc[(subject,
                                                         prev_session),
                                                        "diagnosis"]
                        if prev_diagnosis != "MCI":
                            update_diagnosis = stability_dict[
                                prev_diagnosis] + "MCI"
                        else:
                            post_diagnosis = subject_df.loc[(subject,
                                                             post_session),
                                                            "diagnosis"]
                            if post_diagnosis != "MCI":
                                update_diagnosis = "uMCI"
                            else:
                                update_diagnosis = "sMCI"
                        # print(update_diagnosis)
                        bids_copy_df.loc[(subject, session),
                                         "diagnosis"] = update_diagnosis

    return bids_copy_df