Python BlockLoader.read Examples

Programming Language: Python

Namespace/Package Name: pywb.utils.loaders

Class/Type: BlockLoader

Method/Function: read

Examples at hotexamples.com: 8

Python BlockLoader.read - 8 examples found. These are the top rated real world Python examples of pywb.utils.loaders.BlockLoader.read extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

BlockLoader(22)

load(7)

read(4)

load_file_or_resource(1)

Example #1

Show file

File: mementohandler.py Project: theletterf/memento-reconstruct

    def load_archive_info_xml(self, url):
        self.archive_infos = {}
        logging.debug('Loading XML from {0}'.format(url))
        if not url:
            return

        try:
            stream = BlockLoader().load(url)
        except Exception as e:
            logging.debug(e)
            logging.debug('Proceeding without xml archive info')
            return

        root = ElementTree.fromstring(stream.read())

        for link in root.findall('link'):
            name = link.get('id')
            archive = link.find('archive')
            timegate = link.find('timegate')

            if timegate is None or archive is None:
                continue

            rewritten = (archive.get('rewritten-urls') == 'yes')
            unrewritten_url = archive.get('un-rewritten-api-url', '')
            uri = timegate.get('uri')

            self.archive_infos[name] = {'uri': uri,
                                        'rewritten': rewritten,
                                        'unrewritten_url': unrewritten_url
                                       }

Example #2

Show file

File: archivereplayview.py Project: lorz54/test

    def load_archive_info_json(self, url):
        self.archive_infos = {}
        url = os.path.expandvars(url)
        logging.debug('Loading XML from {0}'.format(url))
        if not url:
            return

        try:
            stream = BlockLoader().load(url)
        except Exception as e:
            logging.debug(e)
            logging.debug('Proceeding without json archive info')
            return

        archives = json.loads(stream.read())
        for arc in archives:
            id_ = arc['id']
            name = arc['name']
            uri = arc['timegate']
            base_url = arc.get('base_url', uri)
            unrewritten_url = arc.get('unrewritten_url')
            if not unrewritten_url:
                unrewritten_url = base_url + '{timestamp}id_/{url}'

            self.archive_infos[id_] = {
                'id': id_,
                'uri': uri,
                'name': name,
                'base_url': base_url,
                'unrewritten_url': unrewritten_url
            }

Example #3

Show file

File: archivereplayview.py Project: lorz54/test

    def load_archive_info_xml(self, url):
        self.archive_infos = {}
        url = os.path.expandvars(url)
        logging.debug('Loading XML from {0}'.format(url))
        if not url:
            return

        try:
            stream = BlockLoader().load(url)
        except Exception as e:
            logging.debug(e)
            logging.debug('Proceeding without xml archive info')
            return

        root = ElementTree.fromstring(stream.read())

        for link in root.findall('link'):
            name = link.get('id')
            longname = link.get('longname')
            archive = link.find('archive')
            timegate = link.find('timegate')

            if timegate is None or archive is None:
                continue

            rewritten = (archive.get('rewritten-urls') == 'yes')
            unrewritten_url = archive.get('un-rewritten-api-url', '')
            uri = timegate.get('uri')

            self.archive_infos[name] = {
                'uri': uri,
                'rewritten': rewritten,
                'unrewritten_url': unrewritten_url,
                'name': longname
            }

Example #4

Show file

File: archivereplayview.py Project: GovanifY/netcapsule

    def load_archive_info_json(self, url):
        self.archive_infos = {}
        url = os.path.expandvars(url)
        logging.debug('Loading XML from {0}'.format(url))
        if not url:
            return

        try:
            stream = BlockLoader().load(url)
        except Exception as e:
            logging.debug(e)
            logging.debug('Proceeding without json archive info')
            return

        archives = json.loads(stream.read())
        for arc in archives:
            id_ = arc['id']
            name = arc['name']
            uri = arc['timegate']
            unrewritten_url = arc.get('unrewritten_url')
            if not unrewritten_url:
                unrewritten_url = uri + '{timestamp}id_/{url}'

            self.archive_infos[id_] = {'id': id_,
                                       'uri': uri,
                                       'name': name,
                                       'rewritten': True,
                                       'unrewritten_url': unrewritten_url}

Example #5

Show file

File: test_loaders.py Project: mirrorweb/pywb

def test_s3_read_2():
    pytest.importorskip('boto3')

    res = BlockLoader().load('s3://commoncrawl/crawl-data/CC-MAIN-2015-11/index.html')

    buff = res.read()
    assert len(buff) == 2082

    reader = DecompressingBufferedReader(BytesIO(buff))
    assert reader.readline() == b'<!DOCTYPE html>\n'

Example #6

Show file

File: test_loaders.py Project: ikreymer/pywb

def test_s3_read_2():
    pytest.importorskip('boto3')

    res = BlockLoader().load('s3://commoncrawl/crawl-data/CC-MAIN-2015-11/index.html')

    buff = res.read()
    assert len(buff) == 2082

    reader = DecompressingBufferedReader(BytesIO(buff))
    assert reader.readline() == b'<!DOCTYPE html>\n'

Example #7

Show file

def test_s3_read_1():
    pytest.importorskip('boto')

    res = BlockLoader().load('s3://commoncrawl/crawl-data/CC-MAIN-2015-11/segments/1424936462700.28/warc/CC-MAIN-20150226074102-00159-ip-10-28-5-156.ec2.internal.warc.gz',
                             offset=53235662,
                             length=2526)

    buff = res.read()
    assert len(buff) == 2526

    reader = DecompressingBufferedReader(BytesIO(buff))
    assert reader.readline() == b'WARC/1.0\r\n'
    assert reader.readline() == b'WARC-Type: response\r\n'

Example #8

Show file

File: test_loaders.py Project: gwu-libraries/pywb

def test_s3_read_1():
    pytest.importorskip('boto')

    res = BlockLoader().load('s3://aws-publicdatasets/common-crawl/crawl-data/CC-MAIN-2015-11/segments/1424936462700.28/warc/CC-MAIN-20150226074102-00159-ip-10-28-5-156.ec2.internal.warc.gz',
                             offset=53235662,
                             length=2526)

    buff = res.read()
    assert len(buff) == 2526

    reader = DecompressingBufferedReader(BytesIO(buff))
    assert reader.readline() == b'WARC/1.0\r\n'
    assert reader.readline() == b'WARC-Type: response\r\n'