Python _TextSource 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: apache_beam.io.textio

메소드/함수: _TextSource

hotexamples.com에서의 예제들: 4

Python _TextSource - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 apache_beam.io.textio._TextSource에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: vcf_parser.py 프로젝트: vijbon01/gcp-variant-transforms

    def __init__(
            self,
            file_name,  # type: str
            range_tracker,  # type: range_trackers.OffsetRangeTracker
            file_pattern,  # type: str
            compression_type,  # type: str
            allow_malformed_records,  # type: bool
            representative_header_lines=None,  # type:  List[str]
            **kwargs  # type: **str
    ):
        # type: (...) -> None
        # If `representative_header_lines` is given, header lines in `file_name`
        # are ignored; refer to _process_header_lines() logic.
        self._representative_header_lines = representative_header_lines
        self._file_name = file_name
        self._allow_malformed_records = allow_malformed_records

        text_source = textio._TextSource(
            file_pattern,
            0,  # min_bundle_size
            compression_type,
            True,  # strip_trailing_newlines
            coders.StrUtf8Coder(),  # coder
            validate=False,
            header_processor_fns=(lambda x: not x.strip() or x.startswith('#'),
                                  self._process_header_lines),
            **kwargs)

        self._text_lines = text_source.read_records(self._file_name,
                                                    range_tracker)

예제 #2

파일 보기

 def _create_source(self, path, schema):
     if not self.use_json_exports:
         return _create_avro_source(path, use_fastavro=True)
     else:
         return _TextSource(path,
                            min_bundle_size=0,
                            compression_type=CompressionTypes.UNCOMPRESSED,
                            strip_trailing_newlines=True,
                            coder=_JsonToDictCoder(schema))

예제 #3

파일 보기

파일: vcfio.py 프로젝트: sanjaysiddhanti/gcp-variant-transforms

        def __init__(
                self,
                file_name,  # type: str
                range_tracker,  # type: range_trackers.OffsetRangeTracker
                file_pattern,  # type: str
                compression_type,  # type: str
                allow_malformed_records,  # type: bool
                representative_header_lines=None,  # type:  List[str]
                **kwargs  # type: **str
        ):
            # type: (...) -> None
            # If `representative_header_lines` is given, header lines in `file_name`
            # are ignored.
            self._header_lines = []
            self._representative_header_lines = representative_header_lines
            self._last_record = None
            self._file_name = file_name
            self._allow_malformed_records = allow_malformed_records

            text_source = textio._TextSource(
                file_pattern,
                0,  # min_bundle_size
                compression_type,
                True,  # strip_trailing_newlines
                coders.StrUtf8Coder(),  # coder
                validate=False,
                header_processor_fns=(lambda x: x.startswith('#'),
                                      self._store_header_lines),
                **kwargs)

            self._text_lines = text_source.read_records(
                self._file_name, range_tracker)
            try:
                self._vcf_reader = vcf.Reader(fsock=self._create_generator())
            except SyntaxError as e:
                raise ValueError('Invalid VCF header in %s: %s' %
                                 (self._file_name, str(e)))

예제 #4

파일 보기

    def __init__(
            self,
            file_name,  # type: str
            range_tracker,  # type: range_trackers.OffsetRangeTracker
            file_pattern,  # type: str
            compression_type,  # type: str
            allow_malformed_records,  # type: bool
            representative_header_lines=None,  # type:  List[str]
            splittable_bgzf=False,  # type: bool
            pre_infer_headers=False,  # type: bool
            sample_name_encoding=SampleNameEncoding.
        WITHOUT_FILE_PATH,  # type: int
            use_1_based_coordinate=False,  # type: bool
            **kwargs  # type: **str
    ):
        # type: (...) -> None
        # If `representative_header_lines` is given, header lines in `file_name`
        # are ignored; refer to _process_header_lines() logic.
        self._representative_header_lines = representative_header_lines
        self._file_name = file_name
        self._allow_malformed_records = allow_malformed_records
        self._pre_infer_headers = pre_infer_headers
        self._sample_name_encoding = sample_name_encoding
        self._use_1_based_coordinate = use_1_based_coordinate

        if splittable_bgzf:
            text_source = bgzf.BGZFBlockSource(
                file_name,
                range_tracker,
                representative_header_lines,
                compression_type,
                header_processor_fns=(
                    lambda x: not x.strip() or x.startswith('#'),
                    self._process_header_lines),
                **kwargs)
        elif compression_type == filesystems.CompressionTypes.GZIP:
            text_source = bgzf.BGZFSource(
                file_pattern,
                0,  # min_bundle_size
                compression_type,
                True,  # strip_trailing_newlines
                coders.StrUtf8Coder(),  # coder
                validate=False,
                header_processor_fns=(
                    lambda x: not x.strip() or x.startswith('#'),
                    self._process_header_lines),
                **kwargs)
        else:
            text_source = textio._TextSource(
                file_pattern,
                0,  # min_bundle_size
                compression_type,
                True,  # strip_trailing_newlines
                coders.StrUtf8Coder(),  # coder
                validate=False,
                header_processor_fns=(
                    lambda x: not x.strip() or x.startswith('#'),
                    self._process_header_lines),
                **kwargs)

        self._text_lines = text_source.read_records(self._file_name,
                                                    range_tracker)