Esempio n. 1
0
def _get_reads_info(aligned_reads_file):
    """
    Extract information from the BAM files. Returns a tuple of length 2.
    First item is a dictionary of dictionaries, such that holes are mapped by cell, then set.
    Second item is the instrument name. 
    :param aligned_reads_file: (str) path to aligned_reads[.xml,.bam]
    :return tuple (reads_by_cell_then_set, instrument) (dict, string): A dictionary of dictionaries,
    instrument name
    """
    inst = None
    reads_by_cell = defaultdict(set)
    with AlignmentSet(aligned_reads_file) as ds:
        for bamfile in ds.resourceReaders():
            if ds.isIndexed:
                logging.info("Indexed file - will use fast loop.")
                for (hole, rgId) in zip(bamfile.holeNumber, bamfile.qId):
                    movie_name = bamfile.readGroupInfo(rgId).MovieName
                    cell = movie_to_cell(movie_name)
                    if inst is None:
                        inst = _cell_2_inst(cell)
                    reads_by_cell[cell].add(hole)
            else:
                for aln in bamfile:
                    hole = aln.HoleNumber
                    movie_name = aln.movieName
                    cell = movie_to_cell(movie_name)
                    if inst is None:
                        inst = _cell_2_inst(cell)
                    reads_by_cell[cell].add(hole)
    return reads_by_cell, inst
Esempio n. 2
0
def _get_reads_info(aligned_reads_file):
    """
    Extract information from the BAM files. Returns a tuple of length 2.
    First item is a dictionary of dictionaries, such that holes are mapped by cell, then set.
    Second item is the instrument name. 
    :param aligned_reads_file: (str) path to aligned_reads[.xml,.bam]
    :return tuple (reads_by_cell_then_set, instrument) (dict, string): A dictionary of dictionaries,
    instrument name
    """
    inst = None
    reads_by_cell = defaultdict(set)
    with AlignmentSet(aligned_reads_file) as ds:
        for bamfile in ds.resourceReaders():
            if ds.isIndexed:
                logging.info("Indexed file - will use fast loop.")
                for (hole, rgId) in zip(bamfile.holeNumber, bamfile.qId):
                    movie_name = bamfile.readGroupInfo(rgId).MovieName
                    cell = movie_to_cell(movie_name)
                    if inst is None:
                        inst = _cell_2_inst(cell)
                    reads_by_cell[cell].add(hole)
            else:
                for aln in bamfile:
                    hole = aln.HoleNumber
                    movie_name = aln.movieName
                    cell = movie_to_cell(movie_name)
                    if inst is None:
                        inst = _cell_2_inst(cell)
                    reads_by_cell[cell].add(hole)
    return reads_by_cell, inst
 def test_timestamped_moviename(self):
     moviename = "m54004_151002_00100"
     cellname = movie_to_cell(moviename)
     # XXX for the time being the cellname will be the timestamped
     # moviename...
     self.assertEqual(moviename, cellname)
     self.assertEqual('54004', _cell_2_inst(cellname))
 def test_timestamped_moviename(self):
     moviename = "m54004_151002_00100"
     cellname = movie_to_cell(moviename)
     # XXX for the time being the cellname will be the timestamped
     # moviename...
     self.assertEqual(moviename, cellname)
     self.assertEqual('54004', _cell_2_inst(cellname))
Esempio n. 5
0
    def test_movie_2_cell(self):
        """
        Parse a cell name from a movie name
        """
        try:

            log.info(TestUtil.test_movie_2_cell.__doc__)
            self.assertEqual('m120128_025832_42129_c100277632550000001523007907041250',
                             movie_to_cell('m120128_025832_42129_c100277632550000001523007907041250_s2_p0'))

        except:
            log.error(traceback.format_exc())
            raise
Esempio n. 6
0
    def test_movie_2_cell(self):
        """
        Parse a cell name from a movie name
        """
        try:

            log.info(TestUtil.test_movie_2_cell.__doc__)
            self.assertEqual(
                'm120128_025832_42129_c100277632550000001523007907041250',
                movie_to_cell(
                    'm120128_025832_42129_c100277632550000001523007907041250_s2_p0'
                ))

        except:
            log.error(traceback.format_exc())
            raise
Esempio n. 7
0
def run(dataset_file):
    """Reads in the input.fofn and counts movies and cells. Outputs in XML."""

    with openDataSet(dataset_file) as ds:
        movies = None
        movies = set([])
        for file_name in ds.toExternalFiles():
            if type(ds).__name__ == "HdfSubreadSet":
                movies.add(path_to_movie(file_name))
            else:
                with BamReader(file_name) as bam:
                    for rg in bam.peer.header["RG"]:
                        movies.add(rg["PU"])
        cells = set([movie_to_cell(movie) for movie in movies])
        ncells_attr = Attribute(Constants.A_NCELLS, len(cells))
        nmovies_attr = Attribute(Constants.A_NMOVIES, len(movies))
        attrs = [ncells_attr, nmovies_attr]
        report = Report(meta_rpt.id, attributes=attrs)
        return meta_rpt.apply_view(report)
Esempio n. 8
0
def run(dataset_file):
    """Reads in the input.fofn and counts movies and cells. Outputs in XML."""

    with openDataSet(dataset_file) as ds:
        movies = None
        movies = set([])
        for file_name in ds.toExternalFiles():
            if type(ds).__name__ == "HdfSubreadSet":
                movies.add( path_to_movie(file_name) )
            else:
                with BamReader(file_name) as bam:
                    for rg in bam.peer.header["RG"]:
                        movies.add(rg["PU"])
        cells = set([ movie_to_cell(movie) for movie in movies ])
        ncells_attr = Attribute('ncells', len(cells), name="SMRT Cells")
        nmovies_attr = Attribute('nmovies', len(movies), name="Movies")
        attrs = [ncells_attr, nmovies_attr]
        report = Report('overview', attributes=attrs)
        return report
Esempio n. 9
0
def run(dataset_file):
    """Reads in the input.fofn and counts movies and cells. Outputs in XML."""

    with openDataSet(dataset_file) as ds:
        movies = None
        movies = set([])
        for file_name in ds.toExternalFiles():
            if type(ds).__name__ == "HdfSubreadSet":
                movies.add(path_to_movie(file_name))
            else:
                with BamReader(file_name) as bam:
                    for rg in bam.peer.header["RG"]:
                        movies.add(rg["PU"])
        cells = set([movie_to_cell(movie) for movie in movies])
        ncells_attr = Attribute(Constants.A_NCELLS, len(cells))
        nmovies_attr = Attribute(Constants.A_NMOVIES, len(movies))
        attrs = [ncells_attr, nmovies_attr]
        report = Report(Constants.R_ID, attributes=attrs)
        return spec.apply_view(report)