예제 #1
0
    def loadIndexedBam(self, filePath, viewRegions, stringency=STRICT):
        """
        Functions like loadAlignments, but uses BAM index files to look at fewer
        blocks, and only returns records within the specified ReferenceRegions.
        BAM index file required.

        :param str pathName: The path name to load indexed BAM formatted
        alignment records from. Globs/directories are supported.
        :param list<ReferenceRegion> viewRegions: List of ReferenceRegion to
        filter on.
        :param int stringency: The validation stringency to use when validating
        the BAM/CRAM/SAM format header. Defaults to ValidationStringency.STRICT.

        :return Returns an AlignmentRecordRDD which wraps the RDD of alignment
        records, sequence dictionary representing contigs the alignment records
        may be aligned to, and the record group dictionary for the alignment
        records if one is available.
        :rtype: bdgenomics.adam.rdd.AlignmentRecordRDD
        """

        # translate reference regions into jvm types
        javaRrs = [rr._toJava(self._jvm) for rr in viewRegions]

        adamRdd = self.__jac.loadIndexedBam(filePath, javaRrs,
                                            _toJava(stringency, self._jvm))

        return AlignmentRecordRDD(adamRdd, self._sc)
예제 #2
0
    def loadAlignments(self, filePath, stringency=STRICT):
        """
        Load alignment records into an AlignmentRecordRDD.

        Loads path names ending in:
        * .bam/.cram/.sam as BAM/CRAM/SAM format,
        * .fa/.fasta as FASTA format,
        * .fq/.fastq as FASTQ format, and
        * .ifq as interleaved FASTQ format.
        
        If none of these match, fall back to Parquet + Avro.
        
        For FASTA, FASTQ, and interleaved FASTQ formats, compressed files are supported
        through compression codecs configured in Hadoop, which by default include .gz and .bz2,
        but can include more.

        :param str filePath: The path to load the file from.
        :param stringency: The validation stringency to apply. Defaults to STRICT.
        :return: Returns an RDD containing reads.
        :rtype: bdgenomics.adam.rdd.AlignmentRecordRDD
        """

        adamRdd = self.__jac.loadAlignments(filePath,
                                            _toJava(stringency, self._jvm))

        return AlignmentRecordRDD(adamRdd, self._sc)
예제 #3
0
    def loadAlignments(self, filePath):
        """
        Loads in an ADAM read file. This method can load SAM, BAM, and ADAM files.

        Loads path names ending in:
        * .bam/.cram/.sam as BAM/CRAM/SAM format,
        * .fa/.fasta as FASTA format,
        * .fq/.fastq as FASTQ format, and
        * .ifq as interleaved FASTQ format.
        
        If none of these match, fall back to Parquet + Avro.
        
        For FASTA, FASTQ, and interleaved FASTQ formats, compressed files are supported
        through compression codecs configured in Hadoop, which by default include .gz and .bz2,
        but can include more.

        :param str filePath: The path to load the file from.
        :return: Returns an RDD containing reads.
        :rtype: bdgenomics.adam.rdd.AlignmentRecordRDD
        """

        adamRdd = self.__jac.loadAlignments(filePath)

        return AlignmentRecordRDD(adamRdd, self._sc)