コード例 #1
0
def output_psm_level_report(df, output_stem: str):
    df = df[~df.uniprot.str.startswith('Reverse_')]
    df = df[~df.uniprot.str.startswith('contaminant')]
    psm_df = df.set_index(
        ['uniprot', 'symbol', 'description',
         'clean_sequence']).sort_index(level=1)
    psm_df = psm_df.drop(columns=['unique_sequence'])

    psm_df.to_excel(
        utils.get_timestamped_report_path(f'{output_stem}_{{}}.xlsx',
                                          DATA_OUTPUT_PATH), )
    return psm_df
コード例 #2
0
def tabulate_timecourse(glob, name):
    time_course_path = utils.get_newest_file(DATA_OUTPUT_PATH, glob)

    df = pd.read_excel(time_course_path, index_col='uniprot')

    chase_col = 'DMSO t0/t1'
    palm_protect_col = 'Palm M t1/DMSO t1'
    abd_protect_col = 'ABD957 t1/DMSO t1'

    df[chase_col] = df['DMSO t0 (mean)'] / df['DMSO t1 (mean)']
    df[palm_protect_col] = df['Palm M t1 (mean)'] / df['DMSO t1 (mean)']
    df[abd_protect_col] = df['ABD957 t1 (mean)'] / df['DMSO t1 (mean)']

    dynamic = df[chase_col] >= DYNAMIC_FOLD_CHANGE_THRESHOLD
    palm_protected = df[palm_protect_col] >= CHASE_FOLD_CHANGE_THRESHOLD
    abd_protected = df[abd_protect_col] >= CHASE_FOLD_CHANGE_THRESHOLD

    dynamic_colname = 'Dynamically palmitoylated?'

    df[dynamic_colname] = 'no'
    df.loc[dynamic, dynamic_colname] = 'yes'

    desired_cols = [
        'symbol',
        'description',
        'DMSO t0 (mean)',
        'Palm M t0 (mean)',
        'ABD957 t0 (mean)',
        'DMSO t1 (mean)',
        'Palm M t1 (mean)',
        'ABD957 t1 (mean)',
        dynamic_colname,
    ]

    output_path = utils.get_timestamped_report_path(
        f'timecourse_{name.replace("-", "").lower()}_supp_table_extra_sheets_{{}}.xlsx',
        DATA_OUTPUT_PATH)

    with pd.ExcelWriter(output_path) as w:
        df[dynamic][desired_cols].drop(columns=[dynamic_colname]).to_excel(
            w, sheet_name=f'Dynamic ({name})')
        df[palm_protected][desired_cols].to_excel(
            w, sheet_name=f'Palm M Regulated ({name})')
        df[abd_protected][desired_cols].to_excel(
            w, sheet_name=f'ABD957 M Regulated ({name})')
コード例 #3
0
def unfiltered_report(analysis, report_output_prefix: str):
    m = analysis.data_model

    query = (m
        .select(
            m.id,
            m.experiment,
            m.uniprot,
            m.symbol,
            m.description,
            m.sequence,
            m.mass,
            m.charge,
            m.rsquared,
            m.ratio,
        )
        .where(
            m.experiment_id.in_(analysis.experiment_ids_included)
        )
    )

    df = pd.DataFrame.from_records(list(query.dicts()))

    for dataset in analysis.datasets:
        df.loc[df.experiment.isin(dataset.experiment_ids_included), 'condition'] = dataset.name

    df = df[~df.uniprot.str.startswith('Reverse_')]
    df['description'] = df.description.str.split().str[1:].str.join(' ')

    df = df.set_index(['uniprot', 'symbol', 'description', 'condition', 'experiment']).sort_index(level=0)

    report_output_path = utils.get_timestamped_report_path(
        f'unfiltered_{report_output_prefix}_{{}}.xlsx',
        pathlib.Path(analysis.params.output_folder),
    )

    df.to_excel(report_output_path, encoding='utf-8-sig')
    return df
コード例 #4
0
def tabulate_hydroxylamine():
    nb4_path = utils.get_newest_file(DATA_OUTPUT_PATH,
                                     'filtered_hydroxylamine_nb4_*.xlsx')
    oci_path = utils.get_newest_file(DATA_OUTPUT_PATH,
                                     'filtered_hydroxylamine_oci_p_*.xlsx')
    on_path = utils.get_newest_file(DATA_OUTPUT_PATH,
                                    'filtered_hydroxylamine_oci_o_*.xlsx')
    oci = get_hydroxylamine_sensitive_df(oci_path, name='OCI-AML3')
    nb4 = get_hydroxylamine_sensitive_df(nb4_path, name='NB-4')
    on = get_hydroxylamine_sensitive_df(on_path, name='ON')

    df = oci.join(nb4, how='outer')
    df = df.join(on, how='outer')

    df['symbol'] = df['symbol (OCI-AML3)'].fillna(df['symbol (NB-4)']).fillna(
        df['symbol (ON)'])
    df['description'] = df['description (OCI-AML3)'].fillna(
        df['description (NB-4)']).fillna(df['description (ON)'])

    df = df[[
        'symbol',
        'description',
        'mean_reduction (OCI-AML3)',
        'mean_reduction (NB-4)',
        'mean_reduction (ON)',
    ]]

    df = df.sort_values(by='mean_reduction (OCI-AML3)',
                        ascending=False).fillna('-')

    swisspalm = pd.read_excel('input/swisspalm_search.xlsx')

    swisspalm = swisspalm.set_index('Query identifier')
    swisspalm = swisspalm[[
        # 'Query identifier',
        # 'UniProt AC',
        # 'UniProt ID',
        # 'UniProt status',
        # 'Organism',
        # 'Gene names',
        # 'Description',
        'Number of palmitoyl-proteomics articles',
        'Number of palmitoyl-proteomics studies where the protein appears in a high confidence hit list',
        'Number of technique categories used in palmitoyl-proteomics studies',
        'Technique categories used in palmitoyl-proteomics studies',
        'Number of targeted studies',
        'Targeted studies (PMIDs)',
        # 'PATs',
        # 'APTs',
        'Number of sites',
        'Sites in main isoform',
        'Number of isoforms',
        'Max number of cysteines',
        'Max number of cysteines in TM or cytosolic domain',
        'Predicted to be S-palmitoylated?',
        'Predicted to be S-palmitoylated in cytosolic domains?',
        'Protein has hits in SwissPalm?',
        'Orthologs of this protein have hits in SwissPalm?'
    ]]

    df = df.join(swisspalm)
    df = df.fillna('-')

    output_path = utils.get_timestamped_report_path(
        'hydroxylamine_supp_table_extra_sheets_{}.xlsx', DATA_OUTPUT_PATH)

    with pd.ExcelWriter(output_path) as w:
        df.to_excel(w, sheet_name='Hydroxylamine Sensitive')
コード例 #5
0
def timecourse(ha_proteins):
    OUTPUT_PSM_LEVEL = True
    OUTPUT_TO_EXCEL = True
    MIN_NUM_UNIQUE_SEQUENCES = 2

    oci = filter_timecourse_data(
        'oci_p',
        'resubmission_17odya.yaml',
        ha_proteins,
        min_num_unique_sequences=MIN_NUM_UNIQUE_SEQUENCES,
        min_num_datasets=2,
        output_psm_level=OUTPUT_PSM_LEVEL,
        output_to_excel=OUTPUT_TO_EXCEL)

    oci_on = filter_timecourse_data(
        'oci_o',
        'resubmission_ocio_17odya.yaml',
        ha_proteins,
        min_num_unique_sequences=MIN_NUM_UNIQUE_SEQUENCES,
        min_num_datasets=2,
        output_psm_level=OUTPUT_PSM_LEVEL,
        output_to_excel=OUTPUT_TO_EXCEL)

    nb4_10plex = filter_timecourse_data(
        'nb4',
        'resubmission_nb4_17odya.yaml',
        ha_proteins,
        min_num_unique_sequences=MIN_NUM_UNIQUE_SEQUENCES,
        min_num_datasets=1,
        output_psm_level=OUTPUT_PSM_LEVEL,
        output_to_excel=OUTPUT_TO_EXCEL)

    nb4_6plex = filter_timecourse_data(
        '6plex_nb4',
        'resubmission_nb4_17odya_6plex.yaml',
        ha_proteins,
        min_num_unique_sequences=MIN_NUM_UNIQUE_SEQUENCES,
        min_num_datasets=1,
        output_psm_level=OUTPUT_PSM_LEVEL,
        output_to_excel=OUTPUT_TO_EXCEL)

    #%%

    both = nb4_10plex.join(nb4_6plex,
                           how='outer',
                           lsuffix='_10plex',
                           rsuffix='_6plex')
    both = both.drop(columns=list(both.filter(regex='mean')))
    # both = both.drop(columns=both.filter(regex='Palm').columns)
    means = both.filter(regex='percent').groupby(lambda x: x.split('.')[0],
                                                 axis=1).mean()
    means.columns = [f'{c} (mean)' for c in list(means.columns)]
    counts = both.filter(regex='percent').groupby(lambda x: x.split('.')[0],
                                                  axis=1).count()
    counts.columns = [f'{c} (count)' for c in list(counts.columns)]

    by_condition = both.filter(regex='percent').groupby(
        lambda x: x.split('.')[0], axis=1)
    stdevs = by_condition.std(ddof=0)
    stdevs.columns = [f'{c} (stdev)' for c in list(stdevs.columns)]

    both = both.join(means)
    both = both.join(counts)
    both = both.join(stdevs)

    both['symbol'] = both.symbol_6plex.fillna(both.symbol_10plex)
    both['description'] = both.description_6plex.fillna(
        both.description_10plex)

    both = both[both['ABD957 t1 (count)'].ge(4)]
    both = both[both['DMSO t1 (count)'].ge(4)]
    both = both[both['ABD957 t0 (count)'].ge(2)]
    both = both[both['DMSO t0 (count)'].ge(2)]
    both = both[~stdevs.ge(100).any(axis=1)]

    both = both[[
        'symbol',
        'description',
        'DMSO t0 (mean)',
        'Palm M t0 (mean)',
        'ABD957 t0 (mean)',
        'DMSO t1 (mean)',
        'ABD957 t1 (mean)',
        'Palm M t1 (mean)',
        # 'ABD957 t0 (count)',
        # 'ABD957 t1 (count)',
        # 'DMSO t0 (count)',
        # 'DMSO t1 (count)',
        # 'Palm M t0 (count)',
        # 'Palm M t1 (count)',
        # 'ABD957 t0 (stdev)',
        # 'ABD957 t1 (stdev)',
        # 'DMSO t0 (stdev)',
        # 'DMSO t1 (stdev)',
        # 'Palm M t0 (stdev)',
        # 'Palm M t1 (stdev)',
        'num_unique_peptides (Replicate 1)_10plex',
        'num_unique_peptides (Replicate 2)_10plex',
        'num_unique_peptides (Replicate 1)_6plex',
        'num_unique_peptides (Replicate 2)_6plex',
        'DMSO t0.percent_of_control_0 (Replicate 1)_10plex',
        'DMSO t0.percent_of_control_1 (Replicate 1)',
        'DMSO t0.percent_of_control_0 (Replicate 2)_10plex',
        'DMSO t0.percent_of_control_1 (Replicate 2)',
        'DMSO t0.percent_of_control_0 (Replicate 1)_6plex',
        'DMSO t0.percent_of_control_0 (Replicate 2)_6plex',
        'Palm M t0.percent_of_control_0 (Replicate 1)',
        'Palm M t0.percent_of_control_0 (Replicate 2)',
        'ABD957 t0.percent_of_control_0 (Replicate 1)_10plex',
        'ABD957 t0.percent_of_control_1 (Replicate 1)',
        'ABD957 t0.percent_of_control_0 (Replicate 2)_10plex',
        'ABD957 t0.percent_of_control_1 (Replicate 2)',
        'ABD957 t0.percent_of_control_0 (Replicate 1)_6plex',
        'ABD957 t0.percent_of_control_0 (Replicate 2)_6plex',
        'DMSO t1.percent_of_control_0 (Replicate 1)_10plex',
        'DMSO t1.percent_of_control_1 (Replicate 1)',
        'DMSO t1.percent_of_control_2 (Replicate 1)',
        'DMSO t1.percent_of_control_0 (Replicate 2)_10plex',
        'DMSO t1.percent_of_control_1 (Replicate 2)',
        'DMSO t1.percent_of_control_2 (Replicate 2)',
        'DMSO t1.percent_of_control_0 (Replicate 1)_6plex',
        'DMSO t1.percent_of_control_0 (Replicate 2)_6plex',
        'Palm M t1.percent_of_control_0 (Replicate 1)',
        'Palm M t1.percent_of_control_0 (Replicate 2)',
        'ABD957 t1.percent_of_control_0 (Replicate 1)_10plex',
        'ABD957 t1.percent_of_control_1 (Replicate 1)',
        'ABD957 t1.percent_of_control_2 (Replicate 1)',
        'ABD957 t1.percent_of_control_0 (Replicate 2)_10plex',
        'ABD957 t1.percent_of_control_1 (Replicate 2)',
        'ABD957 t1.percent_of_control_2 (Replicate 2)',
        'ABD957 t1.percent_of_control_0 (Replicate 1)_6plex',
        'ABD957 t1.percent_of_control_0 (Replicate 2)_6plex',
    ]]

    both_output_path = utils.get_timestamped_report_path(
        'nb4_6plex_10plex_{}.xlsx', DATA_OUTPUT_PATH)
    both.to_excel(both_output_path)
コード例 #6
0
def filter_hydroxylamine_data(params_filename: str,
                              output_name: str,
                              output_psm_level=False,
                              output_to_excel=True,
                              min_num_unique_sequences=1,
                              min_num_datasets=1):
    analysis, params = analyze(params_filename, user='******')
    dataset = analysis.datasets[0]
    df = analysis_to_df(analysis, dataset.channel_layout,
                        dataset.control_channels)

    if output_psm_level:
        output_psm_level_report(df, f'unfiltered_{output_name}_psm_level')

    df = df[~df.index.isin(analysis.filtered_out)]

    df = df[~df.uniprot.str.startswith('contaminant_')]

    # processing for filtered protein level table and plotting
    def agg(x):
        result = x.filter(regex='percent_of_control').mean()
        result['num_unique_peptides'] = x['clean_sequence'].nunique()
        return result

    result = group_by_protein_and_filter(df, agg, min_num_unique_sequences,
                                         min_num_datasets)

    result.columns = pd.MultiIndex.from_tuples(
        (i.replace('.percent_of_control_', ' '), j) for i, j in result.columns)

    result.columns.set_levels(
        [f'Replicate {i}' for i in range(1,
                                         len(dataset.experiments) + 1)],
        level=1,
        inplace=True)
    means = result.drop(columns=['num_unique_peptides']).groupby(
        level=0, axis=1).mean()

    result = add_meta(result, meta=df[['uniprot', 'symbol', 'description']])
    result = pd.merge(result, means, left_index=True, right_index=True)

    result['mean_reduction'] = 100 - means.filter(regex='Hydroxylamine').mean(
        axis=1)

    # ordering for supp table
    cols = list(result.columns)
    first_cols = [
        'symbol', 'description', 'mean_reduction', 'PBS 0', 'PBS 1', 'PBS 2',
        'Hydroxylamine 0', 'Hydroxylamine 1', 'Hydroxylamine 2'
    ]
    replicate_cols = [
        c for c in cols
        if c not in first_cols and c[0] != 'num_unique_peptides'
    ]
    num_peptide_cols = [
        c for c in cols
        if c not in first_cols and c[0] == 'num_unique_peptides'
    ]
    result = result[first_cols + num_peptide_cols + replicate_cols]

    result = result.rename(columns=dict(
        zip(replicate_cols, ['{} ({})'.format(*c) for c in replicate_cols])))
    result = result.rename(columns=dict(
        zip(num_peptide_cols, ['{} ({})'.format(*c)
                               for c in num_peptide_cols])))

    result.index.rename('uniprot', inplace=True)

    if output_to_excel:
        result.to_excel(
            utils.get_timestamped_report_path(
                f'filtered_hydroxylamine_{output_name}_{{}}.xlsx',
                DATA_OUTPUT_PATH),
            freeze_panes=(1, 1),
            # index=False
        )

    return result
コード例 #7
0
def filter_timecourse_data(
    output_name,
    params_filename,
    uniprots_passing_ha_filter,
    min_num_unique_sequences=1,
    min_num_datasets=1,
    output_psm_level=False,
    output_to_excel=True,
):
    analysis, params = analyze(params_filename, user='******')
    dataset = analysis.datasets[0]
    df = analysis_to_df(analysis, dataset.channel_layout,
                        dataset.control_channels)

    if output_psm_level:
        output_psm_level_report(
            df, f'unfiltered_timecourse_{output_name}_psm_level')

    df = df[~df.index.isin(analysis.filtered_out)]
    df = df[~df.uniprot.str.startswith('contaminant_')]
    df = df[df.uniprot.isin(uniprots_passing_ha_filter)]

    def agg(x):
        percentages = x.filter(regex='percent_of_control')
        cv = percentages.apply(stats.variation)
        have_cv_ge = percentages[cv[cv.ge(0.5)].index.values]

        if len(x) > 2 and not have_cv_ge.empty:
            to_filter_out = have_cv_ge.apply(
                stats.zscore).abs().idxmax().values
            x = x[~x.index.isin(to_filter_out)]

        result = x.filter(regex='percent_of_control').mean()
        result['num_unique_peptides'] = x['clean_sequence'].nunique()
        return result

    # group by protein
    result = group_by_protein_and_filter(df, agg, min_num_unique_sequences,
                                         min_num_datasets)

    # result.columns = pd.MultiIndex.from_tuples(
    #     (i.split('.percent')[0], j) for i, j in result.columns
    # )

    result.columns.set_levels(
        [f'Replicate {i}' for i in range(1,
                                         len(dataset.experiments) + 1)],
        level=1,
        inplace=True)
    means = result.drop(columns=['num_unique_peptides']).groupby(
        level=0, axis=1).mean()
    means = means.groupby(by=lambda x: x.split('.percent')[0],
                          axis='columns').mean()
    result = add_meta(result, meta=df[['uniprot', 'symbol', 'description']])
    result = pd.merge(result, means, left_index=True, right_index=True)

    # ordering for supp table
    cols = list(result.columns)
    unique_conditions = list(
        SortedSet([next(iter(x.values())) for x in dataset.channel_layout]))
    first_cols = ['symbol', 'description', *unique_conditions]
    replicate_cols = [
        c for c in cols
        if c not in first_cols and c[0] != 'num_unique_peptides'
    ]
    num_peptide_cols = [
        c for c in cols
        if c not in first_cols and c[0] == 'num_unique_peptides'
    ]
    result = result[first_cols + num_peptide_cols + replicate_cols]

    result = result.rename(
        columns={x: f'{x} (mean)'
                 for x in unique_conditions})
    result = result.rename(columns=dict(
        zip(replicate_cols, ['{} ({})'.format(*c) for c in replicate_cols])))
    result = result.rename(columns=dict(
        zip(num_peptide_cols, ['{} ({})'.format(*c)
                               for c in num_peptide_cols])))

    result.index.rename('uniprot', inplace=True)

    result.to_excel(
        utils.get_timestamped_report_path(
            f'filtered_timecourse_{output_name}_{{}}.xlsx', DATA_OUTPUT_PATH),
        freeze_panes=(1, 1),
        # index=False
    )

    return result
コード例 #8
0
    def filter_report(self):
        datasets_to_filter = self.experiment_ids_included

        m = self.data_model

        query = (m
            .select(
                m.id,
                m.experiment,
                m.uniprot,
                m.symbol,
                m.description,
                m.sequence,
                m.clean_sequence,
                m.ratio,
                m.num_ms2,
                m.rsquared,
                m.charge,
                m.meta
            )
            .where(m.experiment_id.in_(datasets_to_filter))
        )

        df = pd.DataFrame.from_records(list(query.dicts()))
        df = self.dataset_class.generate_id(df)

        for dataset in self.datasets:
            df.loc[df.experiment.isin(dataset.experiment_ids_included), 'condition'] = dataset.name

        df = df.set_index('id')

        df = df[[
            '_id',
            'experiment',
            'condition',
            'uniprot',
            'symbol',
            'sequence',
            'meta',
            'num_ms2',
            'rsquared',
            'ratio',
        ]]

        for filtered_out in self._analysis.filters.values():
            for cat, f in filtered_out.items():
                for filter_name, filtered_ids in f.items():
                    df.loc[filtered_ids, f'{cat}.{filter_name}'] = False

        # this should probably be just done in SQL
        # experiment_ids = df.experiment.unique().tolist()
        # q2 = Experiment.select(Experiment.id, Experiment.source_url).where(Experiment.id.in_(experiment_ids))
        # experiments = dict(list(q2.tuples()))
        # df.link = df.apply(lambda x: experiments[x.experiment] + x.link.split('"')[1], axis=1)

        report_output_name_template = 'filter_report_{}_{}_{{}}.xlsx'.format(
            self.name,
            self._analysis.id
        )

        report_output_path = utils.get_timestamped_report_path(
            report_output_name_template,
            self.output_path
        )

        # # df.set_index(['seq_id', 'condition', 'experiment']).sort_index(level=0).to_excel(report_output_path)
        df.set_index(['_id', 'experiment', 'condition']).sort_index(level=0).to_excel(report_output_path)
        return df
コード例 #9
0
def sh_analysis(params_file: str, report_output_prefix: str):
    with open('input/human.json') as f:
        whitelist = json.loads(f.read())

    blacklist = [
        'P35030', 'P07477', 'P07478', # trypsins 
        'O15427', # SLC16A3
        'P00734', # F2
        'Q14703', # MBTSP1 (serine protease)
        'Q8NBP7', # PCSK9
    ]

    analysis, params = analyze(params_file, user='******')

    m = analysis.data_model

    query = (m
        .select(
            m.id,
            m.experiment,
            m.uniprot,
            m.symbol,
            m.ratio,
        )
        .where(
            (m.experiment_id.in_(analysis.experiment_ids_included)) &
            (m.id.not_in(analysis.filtered_out))
        )
    )

    df = pd.DataFrame.from_records(list(query.dicts()))
    df = analysis.dataset_class.generate_id(df)

    for dataset in analysis.datasets:
        df.loc[df.experiment.isin(dataset.experiment_ids_included), 'condition'] = dataset.name

    df = df.set_index('id')

    df = df[df.uniprot.isin(whitelist)]
    df = df[~df.uniprot.isin(blacklist)]

    result_df = (df
        .groupby(['uniprot', 'symbol', 'condition', 'experiment'])
        .agg(ratio=('ratio', 'median'), num_peptides=('ratio', len))
        .groupby(level=('uniprot', 'symbol', 'condition'))  
        .agg(ratio=('ratio', 'median'), num_peptides=('num_peptides', 'sum'), ratio_list=('ratio', list))
    )

    # result_df['num_peptides'] = result_df.num_peptides.astype(int)
    result_df = result_df.unstack(level='condition')
    result_df['num_peptides'] = result_df.num_peptides.fillna(0).astype(int)
    # result_df['ratio'] = result_df.ratio.fillna('-')

    def ratios_to_string(ratios, invert=True):
        if not isinstance(ratios, list):
            return
        if invert:
            ratios = [1/r for r in ratios]
        return ', '.join(map(str, ratios))

    result_df['ratio_list'] = result_df.ratio_list.transform(lambda x: x.apply(ratios_to_string))
    result_df['ratio'] = result_df.ratio.rdiv(1)
    
    result_df.columns = result_df.columns.swaplevel()

    sorted_col_multiindex, _ = result_df.columns.sortlevel()
    result_df = result_df[sorted_col_multiindex]

    # result_df.loc[:, pd.IndexSlice[:, 'ratio']] = (result_df
    #     .loc[:, pd.IndexSlice[:, 'ratio']]
    #     .rdiv(1)
    # )

    result_df = (result_df
        .reset_index()
        .sort_values(by=[(analysis.datasets[0].name, 'ratio'), 'symbol'], ascending=True)
        .fillna('-')
    )

    report_output_path = utils.get_timestamped_report_path(
        f'{report_output_prefix}_{{}}.csv',
        pathlib.Path(analysis.params.output_folder),
    )

    result_df.to_csv(report_output_path, index=False, encoding='utf-8-sig')
    return analysis, result_df
コード例 #10
0
    'DKO1 ABD957 t0.percent_of_control_0 (Replicate 1)_16plex',
    'DKO1 ABD957 t0.percent_of_control_1 (Replicate 1)_16plex',
    'Parental ABD957 t0.percent_of_control_0 (Replicate 1)_16plex',
    'Parental ABD957 t0.percent_of_control_1 (Replicate 1)_16plex',
    'DKO1 DMSO t1.percent_of_control_0 (Replicate 1)_10plex',
    'DKO1 DMSO t1.percent_of_control_1 (Replicate 1)_10plex',
    'DKO1 DMSO t1.percent_of_control_2 (Replicate 1)_10plex',
    'DKO1 DMSO t1.percent_of_control_0 (Replicate 1)_16plex',
    'DKO1 DMSO t1.percent_of_control_1 (Replicate 1)_16plex',
    'Parental DMSO t1.percent_of_control_0 (Replicate 1)_16plex',
    'Parental DMSO t1.percent_of_control_1 (Replicate 1)_16plex',
    'DKO1 ABD957 t1.percent_of_control_0 (Replicate 1)_10plex',
    'DKO1 ABD957 t1.percent_of_control_1 (Replicate 1)_10plex',
    'DKO1 ABD957 t1.percent_of_control_2 (Replicate 1)_10plex',
    'DKO1 ABD957 t1.percent_of_control_0 (Replicate 1)_16plex',
    'DKO1 ABD957 t1.percent_of_control_1 (Replicate 1)_16plex',
    'Parental ABD957 t1.percent_of_control_0 (Replicate 1)_16plex',
    'Parental ABD957 t1.percent_of_control_1 (Replicate 1)_16plex',
]]

both = both.rename(
    columns=lambda x: x.replace('Replicate 1)_10plex', 'Experiment 1)_10plex'))
both = both.rename(
    columns=lambda x: x.replace('Replicate 1)_16plex', 'Experiment 2)_16plex'))

both_output_path = utils.get_timestamped_report_path(
    'dko_16plex_10plex_{}.xlsx', pathlib.Path('output'))
both.to_excel(both_output_path)

# %%