def loadIndexedBam(self, filePath, viewRegions, stringency=STRICT): """ Functions like loadAlignments, but uses BAM index files to look at fewer blocks, and only returns records within the specified ReferenceRegions. BAM index file required. :param str pathName: The path name to load indexed BAM formatted alignment records from. Globs/directories are supported. :param list<ReferenceRegion> viewRegions: List of ReferenceRegion to filter on. :param int stringency: The validation stringency to use when validating the BAM/CRAM/SAM format header. Defaults to ValidationStringency.STRICT. :return Returns an AlignmentRecordRDD which wraps the RDD of alignment records, sequence dictionary representing contigs the alignment records may be aligned to, and the record group dictionary for the alignment records if one is available. :rtype: bdgenomics.adam.rdd.AlignmentRecordRDD """ # translate reference regions into jvm types javaRrs = [rr._toJava(self._jvm) for rr in viewRegions] adamRdd = self.__jac.loadIndexedBam(filePath, javaRrs, _toJava(stringency, self._jvm)) return AlignmentRecordRDD(adamRdd, self._sc)
def loadAlignments(self, filePath, stringency=STRICT): """ Load alignment records into an AlignmentRecordRDD. Loads path names ending in: * .bam/.cram/.sam as BAM/CRAM/SAM format, * .fa/.fasta as FASTA format, * .fq/.fastq as FASTQ format, and * .ifq as interleaved FASTQ format. If none of these match, fall back to Parquet + Avro. For FASTA, FASTQ, and interleaved FASTQ formats, compressed files are supported through compression codecs configured in Hadoop, which by default include .gz and .bz2, but can include more. :param str filePath: The path to load the file from. :param stringency: The validation stringency to apply. Defaults to STRICT. :return: Returns an RDD containing reads. :rtype: bdgenomics.adam.rdd.AlignmentRecordRDD """ adamRdd = self.__jac.loadAlignments(filePath, _toJava(stringency, self._jvm)) return AlignmentRecordRDD(adamRdd, self._sc)
def loadAlignments(self, filePath): """ Loads in an ADAM read file. This method can load SAM, BAM, and ADAM files. Loads path names ending in: * .bam/.cram/.sam as BAM/CRAM/SAM format, * .fa/.fasta as FASTA format, * .fq/.fastq as FASTQ format, and * .ifq as interleaved FASTQ format. If none of these match, fall back to Parquet + Avro. For FASTA, FASTQ, and interleaved FASTQ formats, compressed files are supported through compression codecs configured in Hadoop, which by default include .gz and .bz2, but can include more. :param str filePath: The path to load the file from. :return: Returns an RDD containing reads. :rtype: bdgenomics.adam.rdd.AlignmentRecordRDD """ adamRdd = self.__jac.loadAlignments(filePath) return AlignmentRecordRDD(adamRdd, self._sc)