コード例 #1
0
def melt_df(df1):
    """
    This function melts the dataframe in long form based on some fixed columns
    Args:
        df : dataframe to be converted in long format
    Returns:
        long_form : dataframe in long format

    """
    fixed_cols = [maven_constants.NAME, maven_constants.LABEL, maven_constants.FORMULA]
    col_headers = df1.columns.tolist()
    check_column_headers(col_headers, fixed_cols)
    melt_cols = [x for x in col_headers if x not in fixed_cols]

    # try:
    long_form = pd.melt(df1, id_vars=fixed_cols, value_vars=melt_cols)
    # except KeyError():
    #    raise KeyError('columns {} not found in input data'.format(','.join(fixed_cols)))
    return long_form
コード例 #2
0
def merge_samples(merged_df, sample_metadata):
    """
    This function merge the raw input dataframe with the sample metadata 
    dataframe.
    """
    if sample_metadata is not None:
        col_headers_sample = sample_metadata.columns.values
        col_headers_merged = merged_df.columns.values
        bg_corr_col_names_sample = [
            multiquant.BACKGROUND, multiquant.MQ_COHORT_NAME
        ]
        bg_corr_col_names_merged = [multiquant.MQ_COHORT_NAME]
        try:
            hlp.check_column_headers(col_headers_sample,
                                     bg_corr_col_names_sample)
            hlp.check_column_headers(col_headers_merged,
                                     bg_corr_col_names_merged)
            assert set(sample_metadata[multiquant.BACKGROUND]).issubset(
                set(sample_metadata[multiquant.MQ_SAMPLE_NAME]))
            merged_df = merged_df.merge(
                sample_metadata,
                how='inner',
                on=[multiquant.MQ_SAMPLE_NAME, multiquant.MQ_COHORT_NAME])
            if merged_df.empty:
                raise Exception(
                    'Empty Merge, no common entries to process, please check input files'
                )
        except AssertionError:
            warnings.warn("Background Correction can't be performed")
            merged_df = merged_df.merge(sample_metadata,
                                        how='inner',
                                        on=[multiquant.MQ_SAMPLE_NAME])

    # first change Sample Name to Cohort Name, then Original Filename to Sample
    # refer to multiquant raw output
    merged_df.rename(columns={multiquant.MQ_COHORT_NAME: multiquant.COHORT},
                     inplace=True)
    merged_df.rename(columns={multiquant.MQ_SAMPLE_NAME: multiquant.SAMPLE},
                     inplace=True)

    remove_stds = remove_mq_stds(merged_df)
    remove_stds.rename(columns={"Area": multiquant.INTENSITY}, inplace=True)
    return remove_stds
コード例 #3
0
def merge_mq_metadata(mq_df, metdata, sample_metdata):
    """
    This function merge the raw input dataframe with the metadata and 
    sample metadata.
    Args:
        mq_df: raw input df
        metdata: metadata df
        sample_metdata: sample metadata df

    Returns:
        merged_df: merged dataframe
        list_of_replicates: list of sample names belonging to same cohort.
    """
    merged_data = mq_merge_dfs(mq_df, metdata, sample_metdata)
    merged_data.fillna(0, inplace=True)
    list_of_replicates = []
    sample_background = []

    if sample_metdata is not None:
        col_headers = merged_data.columns.values
        bg_corr_col_names = [multiquant.BACKGROUND, multiquant.COHORT]
        try:
            hlp.check_column_headers(col_headers, bg_corr_col_names)
        except AssertionError:
            return merged_data, list_of_replicates, sample_background
        # consider only those sample names which are present in raw file
        sample_metdata = sample_metdata[sample_metdata[
            multiquant.MQ_SAMPLE_NAME].isin(merged_data[multiquant.SAMPLE])]
        list_of_replicates = get_replicates(sample_metdata,
                                            multiquant.MQ_SAMPLE_NAME,
                                            multiquant.MQ_COHORT_NAME,
                                            multiquant.BACKGROUND)
        sample_background = get_background_samples(sample_metdata,
                                                   multiquant.MQ_SAMPLE_NAME,
                                                   multiquant.BACKGROUND)
    return merged_data, list_of_replicates, sample_background