コード例 #1
0
ファイル: cdxops.py プロジェクト: tilgovi/pywb
def cdx_sort_closest(closest, cdx_iter, limit=10):
    """
    sort CDXCaptureResult by closest to timestamp.
    """
    closest_cdx = []

    closest_sec = timestamp_to_sec(closest)

    for cdx in cdx_iter:
        sec = timestamp_to_sec(cdx['timestamp'])
        key = abs(closest_sec - sec)

        # create tuple to sort by key
        bisect.insort(closest_cdx, (key, cdx))

        if len(closest_cdx) == limit:
            # assuming cdx in ascending order and keys have started increasing
            if key > closest_cdx[-1]:
                break

        if len(closest_cdx) > limit:
            closest_cdx.pop()

    for cdx in itertools.imap(lambda x: x[1], closest_cdx):
        yield cdx
コード例 #2
0
ファイル: cdxops.py プロジェクト: yumatchlab/pywb
def cdx_sort_closest(closest, cdx_iter, limit=10):
    """
    sort CDXCaptureResult by closest to timestamp.
    """
    closest_cdx = []
    closest_keys = []
    closest_sec = timestamp_to_sec(closest)

    for cdx in cdx_iter:
        sec = timestamp_to_sec(cdx[TIMESTAMP])
        key = abs(closest_sec - sec)

        # create tuple to sort by key
        #bisect.insort(closest_cdx, (key, cdx))

        i = bisect.bisect_right(closest_keys, key)
        closest_keys.insert(i, key)
        closest_cdx.insert(i, cdx)

        if len(closest_cdx) == limit:
            # assuming cdx in ascending order and keys have started increasing
            if key > closest_keys[-1]:
                break

        if len(closest_cdx) > limit:
            closest_cdx.pop()

    for cdx in closest_cdx:
        yield cdx
コード例 #3
0
ファイル: cdxops.py プロジェクト: akeprojecta/pywb
def cdx_sort_closest(closest, cdx_iter, limit=10):
    """
    sort CDXCaptureResult by closest to timestamp.
    """
    closest_cdx = []

    closest_sec = timestamp_to_sec(closest)

    for cdx in cdx_iter:
        sec = timestamp_to_sec(cdx[TIMESTAMP])
        key = abs(closest_sec - sec)

        # create tuple to sort by key
        bisect.insort(closest_cdx, (key, cdx))

        if len(closest_cdx) == limit:
            # assuming cdx in ascending order and keys have started increasing
            if key > closest_cdx[-1]:
                break

        if len(closest_cdx) > limit:
            closest_cdx.pop()

    for cdx in itertools.imap(lambda x: x[1], closest_cdx):
        yield cdx
コード例 #4
0
    def cdx_index(self, z_key, stream, filename):
        cdxout = BytesIO()
        write_cdx_index(cdxout, stream, filename, cdxj=True, append_post=True)

        cdx_list = cdxout.getvalue().rstrip().split(b'\n')
        count = 0

        min_ = max_ = None

        for cdx in cdx_list:
            if cdx and not self.dry:
                self.dst_redis.zadd(z_key, 0, cdx)
                cdxobj = CDXObject(cdx)

                ts = cdxobj['timestamp']

                min_ = min(min_, ts) if min_ else ts
                max_ = max(max_, ts) if max_ else ts

                count += 1

        if count:
            min_ = timestamp_to_sec(min_)
            max_ = timestamp_to_sec(max_)

        logging.info('  CDXJ: {0} {1} {2}'.format(count, min_, max_))
        return min_, max_
コード例 #5
0
ファイル: migrate3.0.py プロジェクト: webrecorder/webrecorder
    def cdx_index(self, z_key, stream, filename):
        cdxout = BytesIO()
        write_cdx_index(cdxout, stream, filename,
                        cdxj=True, append_post=True)

        cdx_list = cdxout.getvalue().rstrip().split(b'\n')
        count = 0

        min_ = max_ = None

        for cdx in cdx_list:
            if cdx and not self.dry:
                self.dst_redis.zadd(z_key, 0, cdx)
                cdxobj = CDXObject(cdx)

                ts = cdxobj['timestamp']

                min_ = min(min_, ts) if min_ else ts
                max_ = max(max_, ts) if max_ else ts

                count += 1

        if count:
            min_ = timestamp_to_sec(min_)
            max_ = timestamp_to_sec(max_)

        logging.info('  CDXJ: {0} {1} {2}'.format(count, min_, max_))
        return min_, max_
コード例 #6
0
    def handle_not_found(self, wbrequest, nfe):
        response = super(MementoHandler, self).handle_not_found(wbrequest, nfe)

        if (not wbrequest.wb_url.is_query() and
            wbrequest.referrer and
            wbrequest.referrer.startswith(wbrequest.wb_prefix)):

            wb_url = WbUrl(wbrequest.referrer[len(wbrequest.wb_prefix):])

            status = response.status_headers.get_statuscode()

            if status.startswith('4') and not self.skip_missing_count(wb_url):
                key_name = 'MISSING '
            elif status.startswith('2'):
                key_name = 'LIVE '
            else:
                key_name = None

            if key_name:
                page_key = redis_client.get_url_key(wb_url)

                ts = timestamp_now()

                value = (key_name + ts + ' ' +
                          wbrequest.wb_url.url)

                save_value = str(timestamp_to_sec(ts))
                save_value += ' ' + 'text/html'

                redis_client.set_embed_entry(page_key, value, save_value)

        return response
コード例 #7
0
ファイル: archivereplayview.py プロジェクト: lorz54/test
    def __call__(self, cdx, skip_hosts, cdx_loader, wbrequest):
        self.session.cookies.clear()

        try_urls, host, archive_name = self._get_urls_to_try(
            cdx, skip_hosts, wbrequest)

        try:
            response = self._do_req(try_urls, host, cdx, wbrequest.env,
                                    skip_hosts)
        except Exception as e:
            print(e)
            response = None

        if response is None:
            print(skip_hosts)
            raise CaptureException('Content Could Not Be Loaded')

        if response.status_code >= 300 and response.status_code < 400:
            self.unrewrite_header(response, 'Location')
            self.unrewrite_header(response, 'Content-Location')

        remote = wbrequest.env.get('REMOTE_ADDR')
        req_ts = wbrequest.wb_url.timestamp
        base_key = remote + ':' + req_ts

        sec = timestamp_to_sec(cdx['timestamp'])
        referrer = wbrequest.env.get('HTTP_REFERER')

        try:
            pi = redisclient.redis.pipeline(transaction=False)

            pi.hset(base_key + ':urls', cdx['url'], sec)
            pi.sadd(base_key + ':hosts', archive_name)

            if referrer and not referrer.endswith('.css'):
                pi.set(base_key + ':ref', referrer)
            elif not referrer:
                pi.set(base_key + ':base', cdx['url'])

            pi.execute()
        except Exception as e:
            import traceback
            traceback.print_exc(e)

        statusline = str(response.status_code) + ' ' + response.reason

        headers = response.headers.items()

        stream = response.raw

        status_headers = StatusAndHeaders(statusline, headers)

        return (status_headers, stream)
コード例 #8
0
ファイル: test_redis_source.py プロジェクト: tilgovi/pywb
def zadd_cdx(source, cdx, key):
    if key:
        source.redis.zadd(key, 0, cdx)
        return

    parts = cdx.split(' ', 2)

    key = parts[0]
    timestamp = parts[1]
    rest = timestamp + ' ' + parts[2]

    score = timestamp_to_sec(timestamp)
    source.redis.zadd(source.key_prefix + key, score, rest)
コード例 #9
0
    def __call__(self, cdx, skip_hosts, cdx_loader, wbrequest):
        self.session.cookies.clear()

        try_urls, host, archive_name = self._get_urls_to_try(cdx, skip_hosts, wbrequest)

        try:
            response = self._do_req(try_urls, host, wbrequest.env, skip_hosts)
        except Exception as e:
            print(e)
            response = None

        if response is None:
            print(skip_hosts)
            raise CaptureException('Content Could Not Be Loaded')

        if response.status_code >= 300 and response.status_code < 400:
            self.unrewrite_header(response, 'Location')
            self.unrewrite_header(response, 'Content-Location')

        remote = wbrequest.env.get('REMOTE_ADDR')
        req_ts = wbrequest.wb_url.timestamp
        base_key = remote + ':' + req_ts

        sec = timestamp_to_sec(cdx['timestamp'])
        referrer = wbrequest.env.get('HTTP_REFERER')

        try:
            pi = redisclient.redis.pipeline(transaction=False)

            pi.hset(base_key + ':urls', cdx['url'], sec)
            pi.sadd(base_key + ':hosts', archive_name)

            if referrer and not referrer.endswith('.css'):
                pi.set(base_key + ':ref', referrer)
            elif not referrer:
                pi.set(base_key + ':base', cdx['url'])

            pi.execute()
        except Exception as e:
            import traceback
            traceback.print_exc(e)

        statusline = str(response.status_code) + ' ' + response.reason

        headers = response.headers.items()

        stream = response.raw

        status_headers = StatusAndHeaders(statusline, headers)

        return (status_headers, stream)
コード例 #10
0
    def sort_cci_timestamp(self, cci_iter, query):
        sorted_cci = []

        limit = query.limit

        if query.closest:
            closest_sec = timestamp_to_sec(query.closest) * 1000
            key_func = lambda x: abs(closest_sec - x)
        elif query.reverse:
            key_func = lambda x: -x
        else:
            key_func = lambda x: x

        for cci in cci_iter:
            key = key_func(cci.data['arcFileDate'])

            # create tuple to sort by key
            bisect.insort(sorted_cci, (key, cci))

            if len(sorted_cci) > limit:
               sorted_cci.pop()

        for cci in itertools.imap(lambda x: x[1], sorted_cci):
            yield cci
コード例 #11
0
ファイル: mementoquery.py プロジェクト: lorz54/test
 def __init__(self, api_loader, url, timestamp):
     self.api_loader = api_loader
     self.url = url
     self.target_timestamp = timestamp
     self.target_sec = timestamp_to_sec(timestamp)
コード例 #12
0
ファイル: views.py プロジェクト: peval/pywb
def format_ts(value, format_="%a, %b %d %Y %H:%M:%S"):
    if format_ == "%s":
        return timestamp_to_sec(value)
    else:
        value = timestamp_to_datetime(value)
        return value.strftime(format_)
コード例 #13
0
def format_ts(value, format_='%a, %b %d %Y %H:%M:%S'):
    if format_ == '%s':
        return timestamp_to_sec(value)
    else:
        value = timestamp_to_datetime(value)
        return value.strftime(format_)