Ejemplo n.º 1
0
    def _cdx_index(self, out, input_, rel_root=None):
        from pywb.warc.cdxindexer import write_multi_cdx_index

        options = dict(append_post=True,
                       cdxj=True,
                       sort=True,
                       recurse=True,
                       rel_root=rel_root)

        write_multi_cdx_index(out, input_, **options)
Ejemplo n.º 2
0
    def _cdx_index(self, out, input_, rel_root=None):
        from pywb.warc.cdxindexer import write_multi_cdx_index

        options = dict(append_post=True,
                       cdxj=True,
                       sort=True,
                       recurse=True,
                       rel_root=rel_root)

        write_multi_cdx_index(out, input_, **options)
Ejemplo n.º 3
0
    def update_cdx(self, output_cdx, inputs):
        """
        Output sorted, post-query resolving cdx from 'input_' warc(s)
        to 'output_cdx'. Write cdx to temp and rename to output_cdx
        when completed to ensure atomic updates of the cdx.
        """

        writer_cls = PageDetectSortedWriter
        options = dict(sort=True,
                       surt_ordered=True,
                       append_post=True,
                       cdxj=True,
                       include_all=True,
                       writer_add_mixin=True)

        options['writer_cls'] = writer_cls

        writer = write_multi_cdx_index(output_cdx.name, inputs, **options)

        return writer.pages
Ejemplo n.º 4
0
    def update_cdx(self, output_cdx, inputs):
        """
        Output sorted, post-query resolving cdx from 'input_' warc(s)
        to 'output_cdx'. Write cdx to temp and rename to output_cdx
        when completed to ensure atomic updates of the cdx.
        """

        writer_cls = PageDetectSortedWriter
        options = dict(sort=True,
                       surt_ordered=True,
                       append_post=True,
                       cdxj=True,
                       include_all=True,
                       writer_add_mixin=True)

        options['writer_cls'] = writer_cls

        writer = write_multi_cdx_index(output_cdx.name, inputs, **options)

        return writer.pages