def _cdx_index(self, out, input_, rel_root=None): from pywb.warc.cdxindexer import write_multi_cdx_index options = dict(append_post=True, cdxj=True, sort=True, recurse=True, rel_root=rel_root) write_multi_cdx_index(out, input_, **options)
def update_cdx(self, output_cdx, inputs): """ Output sorted, post-query resolving cdx from 'input_' warc(s) to 'output_cdx'. Write cdx to temp and rename to output_cdx when completed to ensure atomic updates of the cdx. """ writer_cls = PageDetectSortedWriter options = dict(sort=True, surt_ordered=True, append_post=True, cdxj=True, include_all=True, writer_add_mixin=True) options['writer_cls'] = writer_cls writer = write_multi_cdx_index(output_cdx.name, inputs, **options) return writer.pages