def setUp(self):
     self.bam = test_utils.genomics_core_testdata('test.bam')
     self.options = reads_pb2.SamReaderOptions()
 def setUp(self):
     self.bam = test_utils.genomics_core_testdata('test.bam')
     self.options = reads_pb2.SamReaderOptions()
     self.indexed_options = reads_pb2.SamReaderOptions(
         index_mode=index_pb2.INDEX_BASED_ON_FILENAME)
Beispiel #3
0
    def __init__(self,
                 input_path,
                 ref_path=None,
                 read_requirements=None,
                 parse_aux_fields=False,
                 hts_block_size=None,
                 downsample_fraction=None,
                 random_seed=None):
        """Initializes a NativeSamReader.

    Args:
      input_path: str. A path to a resource containing SAM/BAM/CRAM records.
        Currently supports SAM text format, BAM binary format, and CRAM.
      ref_path: optional str or None. Only used for CRAM decoding, and only
        necessary if the UR encoded path in the CRAM itself needs to be
        overridden. If provided, we will tell the CRAM decoder to use this FASTA
        for the reference sequence.
      read_requirements: optional ReadRequirement proto. If not None, this proto
        is used to control which reads are filtered out by the reader before
        they are passed to the client.
      parse_aux_fields: optional bool, defaulting to False. If False, we do not
        parse the auxiliary fields of the SAM/BAM/CRAM records (see SAM spec for
        details). Parsing the aux fields is unnecessary for many applications,
        and adds a significant parsing cost to access. If you need these aux
        fields, set parse_aux_fields to True and these fields will be parsed and
        populate the appropriate Read proto fields (e.g., read.info).
      hts_block_size: int or None. If specified, this configures the block size
        of the underlying htslib file object. Larger values (e.g. 1M) may be
        beneficial for reading remote files. If None, the reader uses the
        default htslib block size.
      downsample_fraction: float in the interval [0.0, 1.0] or None. If
        specified as a positive float, the reader will only keep each read with
        probability downsample_fraction, randomly. If None or zero, all reads
        are kept.
      random_seed: None or int. The random seed to use with this sam reader, if
        needed. If None, a fixed random value will be assigned.

    Raises:
      ValueError: If downsample_fraction is not None and not in the interval
        (0.0, 1.0].
      ImportError: If someone tries to load a tfbam file.
    """
        if input_path.endswith('.tfbam'):
            # Delayed loading of tfbam_lib.
            try:
                from tfbam_lib import tfbam_reader  # pylint: disable=g-import-not-at-top
                self._reader = tfbam_reader.make_sam_reader(
                    input_path,
                    read_requirements=read_requirements,
                    unused_block_size=hts_block_size,
                    downsample_fraction=downsample_fraction,
                    random_seed=random_seed)
            except ImportError:
                raise ImportError(
                    'tfbam_lib module not found, cannot read .tfbam files.')
        else:
            aux_field_handling = reads_pb2.SamReaderOptions.SKIP_AUX_FIELDS
            if parse_aux_fields:
                aux_field_handling = reads_pb2.SamReaderOptions.PARSE_ALL_AUX_FIELDS

            # We make 0 be a valid value that means "keep all reads" so that proto
            # defaults (=0) do not omit all reads.
            if downsample_fraction is not None and downsample_fraction != 0:
                if not 0.0 < downsample_fraction <= 1.0:
                    raise ValueError(
                        'downsample_fraction must be in the interval (0.0, 1.0]',
                        downsample_fraction)

            if random_seed is None:
                # Fixed random seed produced with 'od -vAn -N4 -tu4 < /dev/urandom'.
                random_seed = 2928130004

            self._reader = sam_reader.SamReader.from_file(
                input_path.encode('utf8'),
                ref_path.encode('utf8') if ref_path is not None else '',
                reads_pb2.SamReaderOptions(
                    read_requirements=read_requirements,
                    aux_field_handling=aux_field_handling,
                    hts_block_size=(hts_block_size or 0),
                    downsample_fraction=downsample_fraction,
                    random_seed=random_seed))

            self.header = self._reader.header

        super(NativeSamReader, self).__init__()
Beispiel #4
0
    def __init__(self,
                 input_path,
                 use_index=True,
                 read_requirements=None,
                 parse_aux_fields=False,
                 hts_block_size=None,
                 downsample_fraction=None,
                 random_seed=None):
        """Initializes a NativeSamReader.

    Args:
      input_path: string. A path to a resource containing SAM/BAM records.
        Currently supports SAM text format and BAM binary format.
      use_index: optional bool, defaulting to True. If True, we will attempt to
        load an index file for reads_source to enable the query() API call. If
        True an index file must exist. If False, we will not attempt to load an
        index for reads_source, disabling the query() call.
      read_requirements: optional ReadRequirement proto. If not None, this proto
        is used to control which reads are filtered out by the reader before
        they are passed to the client.
      parse_aux_fields: optional bool. If False, the default, we will not parse
        the auxillary fields of the SAM/BAM records (see SAM spec for details).
        Parsing the aux fields is often unnecessary for many applications, and
        adds a significant parsing cost to access. If you need these aux fields,
        set parse_aux_fields to True and these fields will be parsed and
        populate the appropriate Read proto fields (e.g., read.info).
      hts_block_size: integer or None.  If None, will use the default htslib
        block size.  Otherwise, will configure the underlying block size of the
        underlying htslib file object.  Larger values (e.g. 1M) may be
        beneficial for reading remote files.
      downsample_fraction: None or float in the interval [0.0, 1.0]. If not
        None or 0.0, the reader will only keep each read with probability
        downsample_fraction, randomly.
      random_seed: None or int. The random seed to use with this sam reader, if
        needed. If None, a fixed random value will be assigned.

    Raises:
      ValueError: If downsample_fraction is not None and not in the interval
        (0.0, 1.0].
      ImportError: If someone tries to load a tfbam file.
    """
        if input_path.endswith('.tfbam'):
            # Delayed loading of tfbam_lib.
            try:
                from tfbam_lib import tfbam_reader  # pylint: disable=g-import-not-at-top
                self._reader = tfbam_reader.make_sam_reader(
                    input_path,
                    read_requirements=read_requirements,
                    use_index=use_index,
                    unused_block_size=hts_block_size,
                    downsample_fraction=downsample_fraction,
                    random_seed=random_seed)
            except ImportError:
                raise ImportError(
                    'tfbam_lib module not found, cannot read .tfbam files.')
        else:
            index_mode = index_pb2.INDEX_BASED_ON_FILENAME
            if not use_index:
                index_mode = index_pb2.DONT_USE_INDEX

            aux_field_handling = reads_pb2.SamReaderOptions.SKIP_AUX_FIELDS
            if parse_aux_fields:
                aux_field_handling = reads_pb2.SamReaderOptions.PARSE_ALL_AUX_FIELDS

            if downsample_fraction:
                if not 0.0 < downsample_fraction <= 1.0:
                    raise ValueError(
                        'downsample_fraction must be in the interval (0.0, 1.0]',
                        downsample_fraction)

            if random_seed is None:
                # Fixed random seed produced with 'od -vAn -N4 -tu4 < /dev/urandom'.
                random_seed = 2928130004

            self._reader = sam_reader.SamReader.from_file(
                input_path.encode('utf8'),
                reads_pb2.SamReaderOptions(
                    read_requirements=read_requirements,
                    index_mode=index_mode,
                    aux_field_handling=aux_field_handling,
                    hts_block_size=(hts_block_size or 0),
                    downsample_fraction=downsample_fraction,
                    random_seed=random_seed))

            self.header = self._reader.header

        super(NativeSamReader, self).__init__()