def __init__( self, file_name, # type: str range_tracker, # type: range_trackers.OffsetRangeTracker file_pattern, # type: str compression_type, # type: str allow_malformed_records, # type: bool representative_header_lines=None, # type: List[str] **kwargs # type: **str ): # type: (...) -> None # If `representative_header_lines` is given, header lines in `file_name` # are ignored; refer to _process_header_lines() logic. self._representative_header_lines = representative_header_lines self._file_name = file_name self._allow_malformed_records = allow_malformed_records text_source = textio._TextSource( file_pattern, 0, # min_bundle_size compression_type, True, # strip_trailing_newlines coders.StrUtf8Coder(), # coder validate=False, header_processor_fns=(lambda x: not x.strip() or x.startswith('#'), self._process_header_lines), **kwargs) self._text_lines = text_source.read_records(self._file_name, range_tracker)
def _create_source(self, path, schema): if not self.use_json_exports: return _create_avro_source(path, use_fastavro=True) else: return _TextSource(path, min_bundle_size=0, compression_type=CompressionTypes.UNCOMPRESSED, strip_trailing_newlines=True, coder=_JsonToDictCoder(schema))
def __init__( self, file_name, # type: str range_tracker, # type: range_trackers.OffsetRangeTracker file_pattern, # type: str compression_type, # type: str allow_malformed_records, # type: bool representative_header_lines=None, # type: List[str] **kwargs # type: **str ): # type: (...) -> None # If `representative_header_lines` is given, header lines in `file_name` # are ignored. self._header_lines = [] self._representative_header_lines = representative_header_lines self._last_record = None self._file_name = file_name self._allow_malformed_records = allow_malformed_records text_source = textio._TextSource( file_pattern, 0, # min_bundle_size compression_type, True, # strip_trailing_newlines coders.StrUtf8Coder(), # coder validate=False, header_processor_fns=(lambda x: x.startswith('#'), self._store_header_lines), **kwargs) self._text_lines = text_source.read_records( self._file_name, range_tracker) try: self._vcf_reader = vcf.Reader(fsock=self._create_generator()) except SyntaxError as e: raise ValueError('Invalid VCF header in %s: %s' % (self._file_name, str(e)))
def __init__( self, file_name, # type: str range_tracker, # type: range_trackers.OffsetRangeTracker file_pattern, # type: str compression_type, # type: str allow_malformed_records, # type: bool representative_header_lines=None, # type: List[str] splittable_bgzf=False, # type: bool pre_infer_headers=False, # type: bool sample_name_encoding=SampleNameEncoding. WITHOUT_FILE_PATH, # type: int use_1_based_coordinate=False, # type: bool **kwargs # type: **str ): # type: (...) -> None # If `representative_header_lines` is given, header lines in `file_name` # are ignored; refer to _process_header_lines() logic. self._representative_header_lines = representative_header_lines self._file_name = file_name self._allow_malformed_records = allow_malformed_records self._pre_infer_headers = pre_infer_headers self._sample_name_encoding = sample_name_encoding self._use_1_based_coordinate = use_1_based_coordinate if splittable_bgzf: text_source = bgzf.BGZFBlockSource( file_name, range_tracker, representative_header_lines, compression_type, header_processor_fns=( lambda x: not x.strip() or x.startswith('#'), self._process_header_lines), **kwargs) elif compression_type == filesystems.CompressionTypes.GZIP: text_source = bgzf.BGZFSource( file_pattern, 0, # min_bundle_size compression_type, True, # strip_trailing_newlines coders.StrUtf8Coder(), # coder validate=False, header_processor_fns=( lambda x: not x.strip() or x.startswith('#'), self._process_header_lines), **kwargs) else: text_source = textio._TextSource( file_pattern, 0, # min_bundle_size compression_type, True, # strip_trailing_newlines coders.StrUtf8Coder(), # coder validate=False, header_processor_fns=( lambda x: not x.strip() or x.startswith('#'), self._process_header_lines), **kwargs) self._text_lines = text_source.read_records(self._file_name, range_tracker)