コード例 #1
0
ファイル: cdxserver.py プロジェクト: akeprojecta/pywb
    def __init__(self, source, **kwargs):
        super(RemoteCDXServer, self).__init__(**kwargs)

        if isinstance(source, RemoteCDXSource):
            self.source = source
        elif (isinstance(source, str) and is_http(source)):
            self.source = RemoteCDXSource(source, remote_processing=True)
        else:
            raise Exception('Invalid remote cdx source: ' + str(source))
コード例 #2
0
ファイル: cdxserver.py プロジェクト: tilgovi/pywb
    def __init__(self, source, **kwargs):
        super(RemoteCDXServer, self).__init__(**kwargs)

        if isinstance(source, RemoteCDXSource):
            self.source = source
        elif (isinstance(source, str) and is_http(source)):
            self.source = RemoteCDXSource(source, remote_processing=True)
        else:
            raise Exception('Invalid remote cdx source: ' + str(source))
コード例 #3
0
    def fetch_request(self,
                      url,
                      urlrewriter,
                      head_insert_func=None,
                      urlkey=None,
                      env=None,
                      req_headers={},
                      timestamp=None,
                      follow_redirects=False,
                      proxies=None):

        ts_err = url.split('///')

        # fixup for accidental erroneous rewrite which has ///
        # (unless file:///)
        if len(ts_err) > 1 and ts_err[0] != 'file:':
            url = 'http://' + ts_err[1]

        if url.startswith('//'):
            url = 'http:' + url

        if is_http(url):
            (status_headers, stream) = self.fetch_http(url, env, req_headers,
                                                       follow_redirects,
                                                       proxies)
        else:
            (status_headers, stream) = self.fetch_local_file(url)

        # explicit urlkey may be passed in (say for testing)
        if not urlkey:
            urlkey = canonicalize(url)

        if timestamp is None:
            timestamp = datetime_to_timestamp(datetime.datetime.utcnow())

        cdx = {
            'urlkey': urlkey,
            'timestamp': timestamp,
            'original': url,
            'statuscode': status_headers.get_statuscode(),
            'mimetype': status_headers.get_header('Content-Type'),
            'is_live': True,
        }

        result = (self.rewriter.rewrite_content(
            urlrewriter,
            status_headers,
            stream,
            head_insert_func=head_insert_func,
            urlkey=urlkey,
            cdx=cdx))

        if env:
            env['pywb.cdx'] = cdx

        return result
コード例 #4
0
ファイル: rewrite_live.py プロジェクト: jasonliw93/recon
    def fetch_request(self, url, urlrewriter,
                      head_insert_func=None,
                      urlkey=None,
                      env=None,
                      req_headers={},
                      timestamp=None,
                      follow_redirects=False,
                      proxies=None):

        ts_err = url.split('///')

        # fixup for accidental erroneous rewrite which has ///
        # (unless file:///)
        if len(ts_err) > 1 and ts_err[0] != 'file:':
            url = 'http://' + ts_err[1]

        if url.startswith('//'):
            url = 'http:' + url

        if is_http(url):
            (status_headers, stream) = self.fetch_http(url, env, req_headers,
                                                       follow_redirects,
                                                       proxies)
        else:
            (status_headers, stream) = self.fetch_local_file(url)

        # explicit urlkey may be passed in (say for testing)
        if not urlkey:
            urlkey = canonicalize(url)

        if timestamp is None:
            timestamp = datetime_to_timestamp(datetime.datetime.utcnow())

        cdx = {'urlkey': urlkey,
               'timestamp': timestamp,
               'original': url,
               'statuscode': status_headers.get_statuscode(),
               'mimetype': status_headers.get_header('Content-Type'),
               'is_live': True,
              }

        result = (self.rewriter.
                  rewrite_content(urlrewriter,
                                  status_headers,
                                  stream,
                                  head_insert_func=head_insert_func,
                                  urlkey=urlkey,
                                  cdx=cdx))

        if env:
            env['pywb.cdx'] = cdx

        return result
コード例 #5
0
ファイル: cdxserver.py プロジェクト: tilgovi/pywb
    def _create_cdx_source(self, filename, config):
        if is_http(filename):
            return RemoteCDXSource(filename)

        if filename.startswith('redis://'):
            return RedisCDXSource(filename, config)

        if filename.endswith('.cdx'):
            return CDXFile(filename)

        if filename.endswith(('.summary', '.idx')):
            return ZipNumCluster(filename, config)

        logging.warn('skipping unrecognized URI:%s', filename)
        return None
コード例 #6
0
ファイル: cdxserver.py プロジェクト: akeprojecta/pywb
    def _create_cdx_source(self, filename, config):
        if is_http(filename):
            return RemoteCDXSource(filename)

        if filename.startswith('redis://'):
            return RedisCDXSource(filename, config)

        if filename.endswith(('.cdx', '.cdxj')):
            return CDXFile(filename)

        if filename.endswith(('.summary', '.idx')):
            return ZipNumCluster(filename, config)

        logging.warn('skipping unrecognized URI:%s', filename)
        return None
コード例 #7
0
ファイル: pywb_init.py プロジェクト: akeprojecta/pywb
    def _add_dir_if_exists(self, coll, root_dir, dir_key, required=False):
        curr_val = coll.get(dir_key)
        if curr_val:
            if not is_http(curr_val):
                coll[dir_key] = self._norm_path(root_dir, curr_val) + os.path.sep
            return False

        thedir = self.config.get('paths')[dir_key]

        fulldir = os.path.join(root_dir, thedir)

        if os.path.isdir(fulldir):
            fulldir = os.path.abspath(fulldir) + os.path.sep
            coll[dir_key] = fulldir
            return True
        elif required:
            msg = 'Dir "{0}" does not exist for "{1}"'.format(fulldir, dir_key)
            raise Exception(msg)
        else:
            return False
コード例 #8
0
ファイル: pywb_init.py プロジェクト: akeprojecta/pywb
    def _add_dir_if_exists(self, coll, root_dir, dir_key, required=False):
        curr_val = coll.get(dir_key)
        if curr_val:
            if not is_http(curr_val):
                coll[dir_key] = self._norm_path(root_dir,
                                                curr_val) + os.path.sep
            return False

        thedir = self.config.get('paths')[dir_key]

        fulldir = os.path.join(root_dir, thedir)

        if os.path.isdir(fulldir):
            fulldir = os.path.abspath(fulldir) + os.path.sep
            coll[dir_key] = fulldir
            return True
        elif required:
            msg = 'Dir "{0}" does not exist for "{1}"'.format(fulldir, dir_key)
            raise Exception(msg)
        else:
            return False
コード例 #9
0
ファイル: cdxserver.py プロジェクト: tilgovi/pywb
def create_cdx_server(config, ds_rules_file=None, server_cls=None):
    if hasattr(config, 'get'):
        paths = config.get('index_paths')
        surt_ordered = config.get('surt_ordered', True)
        pass_config = config
    else:
        paths = config
        surt_ordered = True
        pass_config = None

    logging.debug('CDX Surt-Ordered? ' + str(surt_ordered))

    if not server_cls:
        if ((isinstance(paths, str) and is_http(paths)) or
            isinstance(paths, RemoteCDXSource)):
            server_cls = RemoteCDXServer
        else:
            server_cls = CDXServer

    return server_cls(paths,
                      config=pass_config,
                      surt_ordered=surt_ordered,
                      ds_rules_file=ds_rules_file)
コード例 #10
0
ファイル: cdxserver.py プロジェクト: akeprojecta/pywb
def create_cdx_server(config, ds_rules_file=None, server_cls=None):
    if hasattr(config, 'get'):
        paths = config.get('index_paths')
        surt_ordered = config.get('surt_ordered', True)
        pass_config = config
    else:
        paths = config
        surt_ordered = True
        pass_config = None

    logging.debug('CDX Surt-Ordered? ' + str(surt_ordered))

    if not server_cls:
        if ((isinstance(paths, str) and is_http(paths)) or
            isinstance(paths, RemoteCDXSource)):
            server_cls = RemoteCDXServer
        else:
            server_cls = CDXServer

    return server_cls(paths,
                      config=pass_config,
                      surt_ordered=surt_ordered,
                      ds_rules_file=ds_rules_file)