Ejemplos de BlockLoader.read en Python

Lenguaje de programación: Python

Namespace/Package Name: pywb.utils.loaders

Clase / Tipo: BlockLoader

Método / Función: read

Ejemplos en hotexamples.com: 8

Python BlockLoader.read - 8 ejemplos encontrados. Estos son los ejemplos en Python del mundo real mejor valorados de pywb.utils.loaders.BlockLoader.read extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Métodos usados con frecuencia

Mostrar Ocultar

BlockLoader(22)

load(7)

read(4)

load_file_or_resource(1)

Ejemplo n.º 1

Mostrar archivo

Archivo: mementohandler.py Proyecto: theletterf/memento-reconstruct

    def load_archive_info_xml(self, url):
        self.archive_infos = {}
        logging.debug('Loading XML from {0}'.format(url))
        if not url:
            return

        try:
            stream = BlockLoader().load(url)
        except Exception as e:
            logging.debug(e)
            logging.debug('Proceeding without xml archive info')
            return

        root = ElementTree.fromstring(stream.read())

        for link in root.findall('link'):
            name = link.get('id')
            archive = link.find('archive')
            timegate = link.find('timegate')

            if timegate is None or archive is None:
                continue

            rewritten = (archive.get('rewritten-urls') == 'yes')
            unrewritten_url = archive.get('un-rewritten-api-url', '')
            uri = timegate.get('uri')

            self.archive_infos[name] = {'uri': uri,
                                        'rewritten': rewritten,
                                        'unrewritten_url': unrewritten_url
                                       }

Ejemplo n.º 2

Mostrar archivo

Archivo: archivereplayview.py Proyecto: lorz54/test

    def load_archive_info_json(self, url):
        self.archive_infos = {}
        url = os.path.expandvars(url)
        logging.debug('Loading XML from {0}'.format(url))
        if not url:
            return

        try:
            stream = BlockLoader().load(url)
        except Exception as e:
            logging.debug(e)
            logging.debug('Proceeding without json archive info')
            return

        archives = json.loads(stream.read())
        for arc in archives:
            id_ = arc['id']
            name = arc['name']
            uri = arc['timegate']
            base_url = arc.get('base_url', uri)
            unrewritten_url = arc.get('unrewritten_url')
            if not unrewritten_url:
                unrewritten_url = base_url + '{timestamp}id_/{url}'

            self.archive_infos[id_] = {
                'id': id_,
                'uri': uri,
                'name': name,
                'base_url': base_url,
                'unrewritten_url': unrewritten_url
            }

Ejemplo n.º 3

Mostrar archivo

Archivo: archivereplayview.py Proyecto: lorz54/test

    def load_archive_info_xml(self, url):
        self.archive_infos = {}
        url = os.path.expandvars(url)
        logging.debug('Loading XML from {0}'.format(url))
        if not url:
            return

        try:
            stream = BlockLoader().load(url)
        except Exception as e:
            logging.debug(e)
            logging.debug('Proceeding without xml archive info')
            return

        root = ElementTree.fromstring(stream.read())

        for link in root.findall('link'):
            name = link.get('id')
            longname = link.get('longname')
            archive = link.find('archive')
            timegate = link.find('timegate')

            if timegate is None or archive is None:
                continue

            rewritten = (archive.get('rewritten-urls') == 'yes')
            unrewritten_url = archive.get('un-rewritten-api-url', '')
            uri = timegate.get('uri')

            self.archive_infos[name] = {
                'uri': uri,
                'rewritten': rewritten,
                'unrewritten_url': unrewritten_url,
                'name': longname
            }

Ejemplo n.º 4

Mostrar archivo

Archivo: archivereplayview.py Proyecto: GovanifY/netcapsule

    def load_archive_info_json(self, url):
        self.archive_infos = {}
        url = os.path.expandvars(url)
        logging.debug('Loading XML from {0}'.format(url))
        if not url:
            return

        try:
            stream = BlockLoader().load(url)
        except Exception as e:
            logging.debug(e)
            logging.debug('Proceeding without json archive info')
            return

        archives = json.loads(stream.read())
        for arc in archives:
            id_ = arc['id']
            name = arc['name']
            uri = arc['timegate']
            unrewritten_url = arc.get('unrewritten_url')
            if not unrewritten_url:
                unrewritten_url = uri + '{timestamp}id_/{url}'

            self.archive_infos[id_] = {'id': id_,
                                       'uri': uri,
                                       'name': name,
                                       'rewritten': True,
                                       'unrewritten_url': unrewritten_url}

Ejemplo n.º 5

Mostrar archivo

Archivo: test_loaders.py Proyecto: mirrorweb/pywb

def test_s3_read_2():
    pytest.importorskip('boto3')

    res = BlockLoader().load('s3://commoncrawl/crawl-data/CC-MAIN-2015-11/index.html')

    buff = res.read()
    assert len(buff) == 2082

    reader = DecompressingBufferedReader(BytesIO(buff))
    assert reader.readline() == b'<!DOCTYPE html>\n'

Ejemplo n.º 6

Mostrar archivo

Archivo: test_loaders.py Proyecto: ikreymer/pywb

def test_s3_read_2():
    pytest.importorskip('boto3')

    res = BlockLoader().load('s3://commoncrawl/crawl-data/CC-MAIN-2015-11/index.html')

    buff = res.read()
    assert len(buff) == 2082

    reader = DecompressingBufferedReader(BytesIO(buff))
    assert reader.readline() == b'<!DOCTYPE html>\n'

Ejemplo n.º 7

Mostrar archivo

def test_s3_read_1():
    pytest.importorskip('boto')

    res = BlockLoader().load('s3://commoncrawl/crawl-data/CC-MAIN-2015-11/segments/1424936462700.28/warc/CC-MAIN-20150226074102-00159-ip-10-28-5-156.ec2.internal.warc.gz',
                             offset=53235662,
                             length=2526)

    buff = res.read()
    assert len(buff) == 2526

    reader = DecompressingBufferedReader(BytesIO(buff))
    assert reader.readline() == b'WARC/1.0\r\n'
    assert reader.readline() == b'WARC-Type: response\r\n'

Ejemplo n.º 8

Mostrar archivo

Archivo: test_loaders.py Proyecto: gwu-libraries/pywb

def test_s3_read_1():
    pytest.importorskip('boto')

    res = BlockLoader().load('s3://aws-publicdatasets/common-crawl/crawl-data/CC-MAIN-2015-11/segments/1424936462700.28/warc/CC-MAIN-20150226074102-00159-ip-10-28-5-156.ec2.internal.warc.gz',
                             offset=53235662,
                             length=2526)

    buff = res.read()
    assert len(buff) == 2526

    reader = DecompressingBufferedReader(BytesIO(buff))
    assert reader.readline() == b'WARC/1.0\r\n'
    assert reader.readline() == b'WARC-Type: response\r\n'