def open_record_stream(record_class=None, filename=None, file_handle=None, mode="rb+", gzip="auto"): """Can take a filename or a file_handle. Normally called indirectly from A record class i.e WarcRecord.open_archive. If the first parameter is None, will try to guess""" if file_handle is None: file_handle = open(filename, mode=mode) else: if not filename: filename = file_handle.name if record_class == None: record_class = guess_record_type(file_handle) if record_class == None: raise StandardError('Failed to guess compression') record_parser = record_class.make_parser() if gzip == 'auto': if is_gzip_file(file_handle): gzip = 'record' #debug('autodetect: record gzip') else: # assume uncompressed file #debug('autodetected: uncompressed file') gzip = None if gzip == 'record': return GzipRecordStream(file_handle, record_parser) elif gzip == 'file': return GzipFileStream(file_handle, record_parser) else: return RecordStream(file_handle, record_parser)
def open_record_stream(record_class=None, filename=None, file_handle=None, mode="rb", gzip="auto", offset=None, length=None): """Can take a filename or a file_handle. Normally called indirectly from A record class i.e WarcRecord.open_archive. If the first parameter is None, will try to guess""" if file_handle is None: if filename.startswith('s3://'): from . import s3 file_handle = s3.open_url(filename, offset=offset, length=length) else: file_handle = open(filename, mode=mode) if offset is not None: file_handle.seek(offset) if record_class == None: record_class = guess_record_type(file_handle) if record_class == None: raise Exception('Failed to guess compression') record_parser = record_class.make_parser() if gzip == 'auto': if (filename and filename.endswith('.gz')) or is_gzip_file(file_handle): gzip = 'record' #debug('autodetect: record gzip') else: # assume uncompressed file #debug('autodetected: uncompressed file') gzip = None if gzip == 'record': return GzipRecordStream(file_handle, record_parser) elif gzip == 'file': return GzipFileStream(file_handle, record_parser) else: return RecordStream(file_handle, record_parser)
def open_record_stream(record_class=None, filename=None, file_handle=None, mode="rb+", gzip="auto", offset=None, length=None): """Can take a filename or a file_handle. Normally called indirectly from A record class i.e WarcRecord.open_archive. If the first parameter is None, will try to guess""" if file_handle is None: if filename.startswith('s3://'): from . import s3 file_handle = s3.open_url(filename, offset=offset, length=length) else: file_handle = open(filename, mode=mode) if offset is not None: file_handle.seek(offset) if record_class == None: record_class = guess_record_type(file_handle) if record_class == None: raise StandardError('Failed to guess compression') record_parser = record_class.make_parser() if gzip == 'auto': if (filename and filename.endswith('.gz')) or is_gzip_file(file_handle): gzip = 'record' #debug('autodetect: record gzip') else: # assume uncompressed file #debug('autodetected: uncompressed file') gzip = None if gzip == 'record': return GzipRecordStream(file_handle, record_parser) elif gzip == 'file': return GzipFileStream(file_handle, record_parser) else: return RecordStream(file_handle, record_parser)