def setUp(self): self.bam = test_utils.genomics_core_testdata('test.bam') self.options = reads_pb2.SamReaderOptions() self.indexed_options = reads_pb2.SamReaderOptions( index_mode=index_pb2.INDEX_BASED_ON_FILENAME)
def setUp(self): self.bam = test_utils.genomics_core_testdata('test.bam') self.options = reads_pb2.SamReaderOptions()
def __init__(self, input_path, read_requirements=None, parse_aux_fields=False, hts_block_size=None, downsample_fraction=None, random_seed=None): """Initializes a NativeSamReader. Args: input_path: str. A path to a resource containing SAM/BAM records. Currently supports SAM text format and BAM binary format. read_requirements: optional ReadRequirement proto. If not None, this proto is used to control which reads are filtered out by the reader before they are passed to the client. parse_aux_fields: optional bool, defaulting to False. If False, we do not parse the auxiliary fields of the SAM/BAM records (see SAM spec for details). Parsing the aux fields is unnecessary for many applications, and adds a significant parsing cost to access. If you need these aux fields, set parse_aux_fields to True and these fields will be parsed and populate the appropriate Read proto fields (e.g., read.info). hts_block_size: int or None. If specified, this configures the block size of the underlying htslib file object. Larger values (e.g. 1M) may be beneficial for reading remote files. If None, the reader uses the default htslib block size. downsample_fraction: float in the interval [0.0, 1.0] or None. If specified as a positive float, the reader will only keep each read with probability downsample_fraction, randomly. If None or zero, all reads are kept. random_seed: None or int. The random seed to use with this sam reader, if needed. If None, a fixed random value will be assigned. Raises: ValueError: If downsample_fraction is not None and not in the interval (0.0, 1.0]. ImportError: If someone tries to load a tfbam file. """ if input_path.endswith('.tfbam'): # Delayed loading of tfbam_lib. try: from tfbam_lib import tfbam_reader # pylint: disable=g-import-not-at-top self._reader = tfbam_reader.make_sam_reader( input_path, read_requirements=read_requirements, unused_block_size=hts_block_size, downsample_fraction=downsample_fraction, random_seed=random_seed) except ImportError: raise ImportError( 'tfbam_lib module not found, cannot read .tfbam files.') else: aux_field_handling = reads_pb2.SamReaderOptions.SKIP_AUX_FIELDS if parse_aux_fields: aux_field_handling = reads_pb2.SamReaderOptions.PARSE_ALL_AUX_FIELDS # We make 0 be a valid value that means "keep all reads" so that proto # defaults (=0) do not omit all reads. if downsample_fraction is not None and downsample_fraction != 0: if not 0.0 < downsample_fraction <= 1.0: raise ValueError( 'downsample_fraction must be in the interval (0.0, 1.0]', downsample_fraction) if random_seed is None: # Fixed random seed produced with 'od -vAn -N4 -tu4 < /dev/urandom'. random_seed = 2928130004 self._reader = sam_reader.SamReader.from_file( input_path.encode('utf8'), reads_pb2.SamReaderOptions( read_requirements=read_requirements, aux_field_handling=aux_field_handling, hts_block_size=(hts_block_size or 0), downsample_fraction=downsample_fraction, random_seed=random_seed)) self.header = self._reader.header super(NativeSamReader, self).__init__()