Beispiel #1
0
def test_calculate_psi(event_annotation, reads2d, isoform1_junctions,
                       isoform2_junctions, psi_df, summary_df):
    from outrigger.psi.compute import calculate_psi

    true_psi = psi_df
    true_summary = summary_df

    test_psi, test_summary = calculate_psi(event_annotation, reads2d,
                                           isoform1_junctions,
                                           isoform2_junctions)
    # When psi is written to CSV, only the index name is preserved, not the
    # column names so need to get rid of it for these comparisons
    test_psi.columns.name = None
    pdt.assert_frame_equal(test_psi, true_psi)
    pdt.assert_frame_equal(test_summary, true_summary)
Beispiel #2
0
def test_calculate_psi(event_annotation, reads2d,
                       isoform1_junctions, isoform2_junctions,
                       psi_df, summary_df):
    from outrigger.psi.compute import calculate_psi

    true_psi = psi_df
    true_summary = summary_df

    test_psi, test_summary = calculate_psi(event_annotation, reads2d,
                                           isoform1_junctions,
                                           isoform2_junctions)
    # When psi is written to CSV, only the index name is preserved, not the
    # column names so need to get rid of it for these comparisons
    test_psi.columns.name = None
    pdt.assert_frame_equal(test_psi, true_psi)
    pdt.assert_frame_equal(test_summary, true_summary)
Beispiel #3
0
    def execute(self):
        """Calculate percent spliced in (psi) of splicing events"""

        logger = logging.getLogger('outrigger.psi')
        if self.debug:
            logger.setLevel(10)

        junction_reads = self.csv()

        metadata_csv = os.path.join(self.junctions_folder, METADATA_CSV)
        self.junction_metadata(junction_reads, metadata_csv)

        junction_reads_2d = junction_reads.pivot(index=self.sample_id_col,
                                                 columns=self.junction_id_col,
                                                 values=self.reads_col)
        junction_reads_2d.fillna(0, inplace=True)
        junction_reads_2d = junction_reads_2d.astype(int)

        logger.debug('\n--- Splice Junction reads ---')
        logger.debug(repr(junction_reads.head()))

        psis = []
        for splice_name, splice_abbrev in outrigger.common.SPLICE_TYPES:
            filename = self.maybe_get_validated_events(splice_abbrev)
            if not os.path.exists(filename):
                util.progress('No {name} ({abbrev}) events found, '
                              'skipping.'. format(name=splice_name,
                                                  abbrev=splice_abbrev))
                continue
            # event_type = os.path.basename(filename).split('.csv')[0]
            util.progress('Reading {name} ({abbrev}) events from {filename}'
                          ' ...'.format(name=splice_name, abbrev=splice_abbrev,
                                        filename=filename))

            event_annotation = pd.read_csv(filename, index_col=0,
                                           low_memory=self.low_memory)
            util.done()

            isoform_junctions = outrigger.common.ISOFORM_JUNCTIONS[
                splice_abbrev]
            logger.debug('\n--- Splicing event annotation ---')
            logger.debug(repr(event_annotation.head()))

            util.progress(
                'Calculating percent spliced-in (Psi) scores on '
                '{name} ({abbrev}) events ...'.format(
                    name=splice_name, abbrev=splice_abbrev))
            # Splice type percent spliced-in (psi) and summary
            type_psi, summary = compute.calculate_psi(
                event_annotation, junction_reads_2d,
                min_reads=self.min_reads, n_jobs=self.n_jobs,
                method=self.method,
                uneven_coverage_multiplier=self.uneven_coverage_multiplier,
                **isoform_junctions)

            # Write this event's percent spliced-in matrix
            csv = os.path.join(self.psi_folder, splice_abbrev,
                               'psi.csv'.format(splice_abbrev))
            util.progress('Writing {name} ({abbrev}) Psi values to {filename}'
                          ' ...'.format(name=splice_name, abbrev=splice_abbrev,
                                        filename=csv))
            self.maybe_make_folder(os.path.dirname(csv))
            type_psi.to_csv(csv, na_rep='NA')

            # Write this event's summary of events and why they weren't or were
            # calculated Psi on
            csv = os.path.join(self.psi_folder, splice_abbrev,
                               'summary.csv'.format(splice_abbrev))
            util.progress('Writing {name} ({abbrev}) event summaries (e.g. '
                          'number of reads, why an event does not have a Psi '
                          'score) to {filename} ...'
                          ''.format(name=splice_name, abbrev=splice_abbrev,
                                    filename=csv))
            self.maybe_make_folder(os.path.dirname(csv))
            summary.to_csv(csv, na_rep='NA', index=False)
            psis.append(type_psi)
            util.done()

        util.progress('Concatenating all calculated psi scores '
                      'into one big matrix...')
        splicing = pd.concat(psis, axis=1)
        util.done()
        splicing = splicing.T
        csv = os.path.join(self.psi_folder, 'outrigger_psi.csv')
        util.progress('Writing a samples x features matrix of Psi '
                      'scores to {} ...'.format(csv))
        splicing.to_csv(csv, na_rep='NA')
        util.done()