def melt_df(df1): """ This function melts the dataframe in long form based on some fixed columns Args: df : dataframe to be converted in long format Returns: long_form : dataframe in long format """ fixed_cols = [maven_constants.NAME, maven_constants.LABEL, maven_constants.FORMULA] col_headers = df1.columns.tolist() check_column_headers(col_headers, fixed_cols) melt_cols = [x for x in col_headers if x not in fixed_cols] # try: long_form = pd.melt(df1, id_vars=fixed_cols, value_vars=melt_cols) # except KeyError(): # raise KeyError('columns {} not found in input data'.format(','.join(fixed_cols))) return long_form
def merge_samples(merged_df, sample_metadata): """ This function merge the raw input dataframe with the sample metadata dataframe. """ if sample_metadata is not None: col_headers_sample = sample_metadata.columns.values col_headers_merged = merged_df.columns.values bg_corr_col_names_sample = [ multiquant.BACKGROUND, multiquant.MQ_COHORT_NAME ] bg_corr_col_names_merged = [multiquant.MQ_COHORT_NAME] try: hlp.check_column_headers(col_headers_sample, bg_corr_col_names_sample) hlp.check_column_headers(col_headers_merged, bg_corr_col_names_merged) assert set(sample_metadata[multiquant.BACKGROUND]).issubset( set(sample_metadata[multiquant.MQ_SAMPLE_NAME])) merged_df = merged_df.merge( sample_metadata, how='inner', on=[multiquant.MQ_SAMPLE_NAME, multiquant.MQ_COHORT_NAME]) if merged_df.empty: raise Exception( 'Empty Merge, no common entries to process, please check input files' ) except AssertionError: warnings.warn("Background Correction can't be performed") merged_df = merged_df.merge(sample_metadata, how='inner', on=[multiquant.MQ_SAMPLE_NAME]) # first change Sample Name to Cohort Name, then Original Filename to Sample # refer to multiquant raw output merged_df.rename(columns={multiquant.MQ_COHORT_NAME: multiquant.COHORT}, inplace=True) merged_df.rename(columns={multiquant.MQ_SAMPLE_NAME: multiquant.SAMPLE}, inplace=True) remove_stds = remove_mq_stds(merged_df) remove_stds.rename(columns={"Area": multiquant.INTENSITY}, inplace=True) return remove_stds
def merge_mq_metadata(mq_df, metdata, sample_metdata): """ This function merge the raw input dataframe with the metadata and sample metadata. Args: mq_df: raw input df metdata: metadata df sample_metdata: sample metadata df Returns: merged_df: merged dataframe list_of_replicates: list of sample names belonging to same cohort. """ merged_data = mq_merge_dfs(mq_df, metdata, sample_metdata) merged_data.fillna(0, inplace=True) list_of_replicates = [] sample_background = [] if sample_metdata is not None: col_headers = merged_data.columns.values bg_corr_col_names = [multiquant.BACKGROUND, multiquant.COHORT] try: hlp.check_column_headers(col_headers, bg_corr_col_names) except AssertionError: return merged_data, list_of_replicates, sample_background # consider only those sample names which are present in raw file sample_metdata = sample_metdata[sample_metdata[ multiquant.MQ_SAMPLE_NAME].isin(merged_data[multiquant.SAMPLE])] list_of_replicates = get_replicates(sample_metdata, multiquant.MQ_SAMPLE_NAME, multiquant.MQ_COHORT_NAME, multiquant.BACKGROUND) sample_background = get_background_samples(sample_metdata, multiquant.MQ_SAMPLE_NAME, multiquant.BACKGROUND) return merged_data, list_of_replicates, sample_background