def text_file_record_generator_creator(filename: str,
                                       compression: str = None
                                       ) -> RecordGeneratorType:
    A helper function that returns a generator that we can use to iterate through lines in the input file
        filename: The input filename
        compression: The compression type of the input file or None if its not compressed    
    if compression is None: compression = infer_compression(filename)
    if compression is None or compression == '':
        return open(filename, 'r')
    if compression == 'gzip':
        import gzip
        return, 'r')
    if compression == 'bz2':
        import bz2
        return bz2.BZ2File(filename, 'r')
    if compression == 'zip':
        import zipfile
        zf = zipfile.ZipFile(filename,
        zip_infos = zf.infolist()
        zip_names = [zi.filename for zi in zip_infos if not zi.is_dir()]
        if len(zip_names) == 0:
            raise ValueError(f'zero files found in ZIP file {filename}')
    if compression == 'xz':
        import lzma
        return lzma.LZMAFile(filename, 'r')
    raise ValueError(
        f'unrecognized compression: {compression} for file: {filename}')
    def __call__(self) -> Sequence[Tuple[str, str]]:
            All matching filenames
        files = glob.glob(self.path)
        if not len(files):
            raise Exception(
                f'no matching files found with pattern: {self.path}')
        if self.include_pattern:
            files = [file for file in files if self.include_pattern in file]
        if self.exclude_pattern:
            files = [
                file for file in files if self.exclude_pattern not in file
        if not len(files):
            raise Exception(
                f'no matching files for: {self.path} including: {self.include_pattern} excluding: {self.exclude_pattern}'
        compression_tmp = [infer_compression(filename) for filename in files]
        compression: Sequence[str] = list(
            map(lambda x: '' if x is None else x, compression_tmp))

        return list(zip(files, compression))
def process_marketdata_file(
        input_filename: str,
        compression: str,
        output_file_prefix_mapper: OutputFilePrefixMapperType,
        record_parser_creator: RecordParserCreatorType,
        aggregator_creator: AggregatorCreatorType,
        line_filter: LineFilterType = None,
        base_date_mapper: BaseDateMapperType = None,
    FileProcessorCreatorType = create_text_file_processor,
        header_parser_creator: HeaderParserCreatorType = lambda
    record_generator_creator: TextHeaderParser(record_generator_creator),
    RecordGeneratorCreatorType = text_file_record_generator_creator,
        record_generator: RecordGeneratorType = TextFileDecompressor(),
        bad_line_handler: BadLineHandlerType = PrintBadLineHandler(),
        record_filter: RecordFilterType = None,
    MissingDataHandlerType = PriceQtyMissingDataHandler(),
        writer_creator: WriterCreatorType = None) -> None:
    Processes a single market data file
        input_filename: File name (including path) to process
        compression: Compression type for the input file.  If not set, we try to infer the compression type from the filename.
        output_file_prefix_mapper: A function that takes an input filename and returns the corresponding output filename we want
        record_parser_creator:  A function that takes a date and a list of column names and returns a 
            function that can take a list of fields and return a subclass of Record
        aggregator_creator: A function that takes a writer creator and returns a list of Aggregators
        line_filter: A function that takes a line and decides whether we want to keep it or discard it.  Defaults to None
        base_date_mapper: A function that takes an input filename and returns the date implied by the filename, 
            represented as millis since epoch.
        file_processor_creator: A function that returns an object that we can use to iterate through lines in a file.  Defaults to
            helper function :obj:`create_text_file_processor`
        header_record_generator: A function that takes a filename and compression and returns a generator that we can use to get column headers
        record_generator: A function that takes a filename and compression and returns a generator that we 
            can use to iterate through lines in the file
        bad_line_handler (optional): A function that takes a line that we could not parse, and either parses it or does something else
            like recording debugging info, or stopping the processing by raising an exception.  Defaults to helper function 
        record_filter (optional): A function that takes a parsed TradeRecord, QuoteRecord, OpenInterestRecord or OtherRecord and decides whether we
            want to keep it or discard it.  Defaults to None
        missing_data_handler (optional):  A function that takes a parsed TradeRecord, QuoteRecord, OpenInterestRecord or OtherRecord, and decides
            deals with any data that is missing in those records.  For example, 0 for bid could be replaced by NAN.  Defaults to helper function:
        writer_creator (optional): A function that takes an output_file_prefix, schema, whether to create a batch id file, and batch_size
            and returns a subclass of :obj:`Writer`.  Defaults to :obj:`HDF5WriterCreatorr`

    if writer_creator is None:
        writer_creator = HDF5WriterCreator(
            output_file_prefix_mapper(input_filename), ' ')

    output_file_prefix = output_file_prefix_mapper(input_filename)

    base_date = 0

    if base_date_mapper is not None:
        base_date = base_date_mapper(input_filename)

    header_parser = header_parser_creator(header_record_generator)'starting file: {input_filename}')
    if compression == "":
        compression_tmp = infer_compression(input_filename)
        compression = '' if compression_tmp is None else compression_tmp  # In C++ don't want virtual function with default argument, so don't allow it here

    headers = header_parser(
        compression)  # type: ignore # cannot be None at this point.

    record_parser = record_parser_creator(base_date, headers)

    aggregators = aggregator_creator(writer_creator)

    file_processor = file_processor_creator(record_generator, line_filter,
                                            record_parser, bad_line_handler,
                                            missing_data_handler, aggregators)

    start = timer()
    lines_processed = file_processor(input_filename, compression)
    end = timer()
    duration = round((end - start) * 1000)
    touch(output_file_prefix + '.done')
        f"processed: {input_filename} {lines_processed} lines in {duration} milliseconds"