def infer_or_drop_diagnosis(bids_df: pd.DataFrame) -> pd.DataFrame: """ Deduce the diagnosis when missing from previous and following sessions of the subject. If not identical, the session is dropped. Sessions with no diagnosis are also dropped when there are the last sessions of the follow-up. Args: bids_df: DataFrame with columns including ['participant_id', 'session_id', 'diagnosis'] Returns: cleaned DataFrame """ bids_copy_df = copy(bids_df) found_diag_interpol = 0 for subject, subject_df in bids_df.groupby(level=0): session_list = [ int(session[5::]) for _, session in subject_df.index.values ] for _, session in subject_df.index.values: diagnosis = subject_df.loc[(subject, session), "diagnosis"] session_nb = int(session[5::]) if isinstance(diagnosis, float): if session == last_session(session_list): bids_copy_df.drop((subject, session), inplace=True) else: prev_session = neighbour_session(session_nb, session_list, -1) prev_diagnosis = bids_df.loc[(subject, prev_session), "diagnosis"] while isinstance( prev_diagnosis, float ) and prev_session != first_session(subject_df): prev_session = neighbour_session( int(prev_session[5::]), session_list, -1) prev_diagnosis = bids_df.loc[(subject, prev_session), "diagnosis"] post_session = neighbour_session(session_nb, session_list, +1) post_diagnosis = bids_df.loc[(subject, post_session), "diagnosis"] while isinstance( post_diagnosis, float ) and post_session != last_session(session_list): post_session = neighbour_session( int(post_session[5::]), session_list, +1) post_diagnosis = bids_df.loc[(subject, post_session), "diagnosis"] if prev_diagnosis == post_diagnosis: found_diag_interpol += 1 bids_copy_df.loc[(subject, session), "diagnosis"] = prev_diagnosis else: bids_copy_df.drop((subject, session), inplace=True) logger.debug(f"Inferred diagnosis: {found_diag_interpol}") return bids_copy_df
def diagnosis_removal(MCI_df: pd.DataFrame, diagnosis_list: List[str]) -> pd.DataFrame: """ Removes subjects whom last diagnosis is in the list provided (avoid to keep rMCI and pMCI in sMCI lists). Args: MCI_df: DataFrame with columns including ['participant_id', 'session_id', 'diagnosis'] diagnosis_list: list of diagnoses that will be removed Returns: cleaned DataFrame """ output_df = copy(MCI_df) # Remove subjects who regress to CN label, even late in the follow-up for subject, subject_df in MCI_df.groupby(level=0): session_list = [ int(session[5::]) for _, session in subject_df.index.values ] last_session_id = last_session(session_list) last_diagnosis = subject_df.loc[(subject, last_session_id), "diagnosis"] if last_diagnosis in diagnosis_list: output_df.drop(subject, inplace=True) return output_df
def mci_stability(bids_df: pd.DataFrame, horizon_time: int = 36) -> pd.DataFrame: """ A method to label all MCI sessions depending on their stability on the time horizon Args: bids_df: DataFrame with columns including ['participant_id', 'session_id', 'diagnosis'] horizon_time: time horizon in months Returns: DataFrame with new labels """ diagnosis_list = ["MCI", "EMCI", "LMCI"] bids_df = bids_df[(bids_df.baseline_diagnosis.isin(diagnosis_list))] bids_df = cleaning_nan_diagnoses(bids_df) bids_df = infer_or_drop_diagnosis(bids_df) # Check possible double change in diagnosis in time bids_copy_df = copy(bids_df) nb_subjects = 0 for subject, subject_df in bids_df.groupby(level=0): session_list = [ int(session[5::]) for _, session in subject_df.index.values ] session_list.sort() diagnosis_list = [] for session in session_list: if session < 10: diagnosis_list.append(bids_df.loc[(subject, "ses-M0" + str(session)), "diagnosis"]) else: diagnosis_list.append(bids_df.loc[(subject, "ses-M" + str(session)), "diagnosis"]) new_diagnosis = diagnosis_list[0] nb_change = 0 for diagnosis in diagnosis_list: if new_diagnosis != diagnosis: new_diagnosis = diagnosis nb_change += 1 if nb_change > 1: nb_subjects += 1 bids_copy_df.drop(subject, inplace=True) logger.debug(f"Dropped subjects: {nb_subjects}") bids_df = copy(bids_copy_df) # Stability of sessions stability_dict = { "CN": "r", "MCI": "s", "AD": "p", } # Do not take into account the case of missing diag = nan bids_copy_df = copy(bids_df) for subject, subject_df in bids_df.groupby(level=0): session_list = [ int(session[5::]) for _, session in subject_df.index.values ] for _, session in subject_df.index.values: diagnosis = subject_df.loc[(subject, session), "diagnosis"] # If the diagnosis is not MCI we remove the time point if diagnosis != "MCI": bids_copy_df.drop((subject, session), inplace=True) else: session_nb = int(session[5::]) horizon_session_nb = session_nb + horizon_time horizon_session = "ses-M" + str(horizon_session_nb) # print(session, '-->', horizon_session) if horizon_session_nb in session_list: horizon_diagnosis = subject_df.loc[(subject, horizon_session), "diagnosis"] update_diagnosis = stability_dict[horizon_diagnosis] + "MCI" # print(horizon_diagnosis, update_diagnosis) bids_copy_df.loc[(subject, session), "diagnosis"] = update_diagnosis else: if after_end_screening(horizon_session_nb, session_list): # Two situations, change in last session AD or CN --> pMCI or rMCI # Last session MCI --> uMCI last_diagnosis = subject_df.loc[( subject, last_session(session_list)), "diagnosis"] # This section must be discussed --> removed in Jorge's paper if last_diagnosis != "MCI": update_diagnosis = stability_dict[ last_diagnosis] + "MCI" else: update_diagnosis = "uMCI" # print(update_diagnosis) bids_copy_df.loc[(subject, session), "diagnosis"] = update_diagnosis else: prev_session = neighbour_session( horizon_session_nb, session_list, -1) post_session = neighbour_session( horizon_session_nb, session_list, +1) # print('prev_session', prev_session) # print('post_session', post_session) prev_diagnosis = subject_df.loc[(subject, prev_session), "diagnosis"] if prev_diagnosis != "MCI": update_diagnosis = stability_dict[ prev_diagnosis] + "MCI" else: post_diagnosis = subject_df.loc[(subject, post_session), "diagnosis"] if post_diagnosis != "MCI": update_diagnosis = "uMCI" else: update_diagnosis = "sMCI" # print(update_diagnosis) bids_copy_df.loc[(subject, session), "diagnosis"] = update_diagnosis return bids_copy_df