Exemple #1
0
def refresh_links_for_project(project_uuid, chunk_size, expiry_seconds):
    if chunk_size:
        log.info('Refreshing the first %i links for project %s', chunk_size,
                 project_uuid)
    else:
        log.info('Refreshing all links for project %s', project_uuid)

    # Retrieve expired links.
    files_collection = current_app.data.driver.db['files']

    now = utils.utcnow()
    expire_before = now + datetime.timedelta(seconds=expiry_seconds)
    log.info('Limiting to links that expire before %s', expire_before)

    to_refresh = files_collection.find({
        'project': ObjectId(project_uuid),
        'link_expires': {
            '$lt': expire_before
        },
    }).sort([('link_expires', pymongo.ASCENDING)]).limit(chunk_size)

    refresh_count = 0
    for file_doc in to_refresh:
        log.debug('Refreshing links for file %s', file_doc['_id'])
        generate_all_links(file_doc, now)
        refresh_count += 1

    if refresh_count:
        log.info('Refreshed %i links', refresh_count)
Exemple #2
0
def ensure_valid_link(response):
    """Ensures the file item has valid file links using generate_link(...)."""

    # Log to function-specific logger, so we can easily turn it off.
    log_link = logging.getLogger('%s.ensure_valid_link' % __name__)
    # log.debug('Inspecting link for file %s', response['_id'])

    # Check link expiry.
    now = utils.utcnow()
    if 'link_expires' in response:
        link_expires = response['link_expires']
        if now < link_expires:
            # Not expired yet, so don't bother regenerating anything.
            log_link.debug(
                'Link expires at %s, which is in the future, so not '
                'generating new link', link_expires)
            return

        log_link.debug(
            'Link expired at %s, which is in the past; generating '
            'new link', link_expires)
    else:
        log_link.debug('No expiry date for link; generating new link')

    generate_all_links(response, now)
Exemple #3
0
def on_pre_get_files(_, lookup):
    # Override the HTTP header, we always want to fetch the document from
    # MongoDB.
    parsed_req = eve.utils.parse_request('files')
    parsed_req.if_modified_since = None

    # If there is no lookup, we would refresh *all* file documents,
    # which is far too heavy to do in one client HTTP request.
    if not lookup:
        return

    # Only fetch it if the date got expired.
    now = utils.utcnow()
    lookup_expired = lookup.copy()
    lookup_expired['link_expires'] = {'$lte': now}

    cursor, _ = current_app.data.find('files',
                                      parsed_req,
                                      lookup_expired,
                                      perform_count=False)
    for idx, file_doc in enumerate(cursor):
        if idx == 0:
            log.debug(
                'Updating expired links for files that matched lookup %s',
                lookup_expired)
        # log.debug('Updating expired links for file %r.', file_doc['_id'])
        generate_all_links(file_doc, now)
Exemple #4
0
def refresh_links_for_backend(backend_name, chunk_size, expiry_seconds):
    import gcloud.exceptions

    my_log = log.getChild(f'refresh_links_for_backend.{backend_name}')

    # Retrieve expired links.
    files_collection = current_app.data.driver.db['files']
    proj_coll = current_app.data.driver.db['projects']

    now = utils.utcnow()
    expire_before = now + datetime.timedelta(seconds=expiry_seconds)
    my_log.info('Limiting to links that expire before %s', expire_before)

    base_query = {'backend': backend_name, '_deleted': {'$ne': True}}
    to_refresh = files_collection.find({
        '$or': [{
            'link_expires': None,
            **base_query
        }, {
            'link_expires': {
                '$lt': expire_before
            },
            **base_query
        }, {
            'link': None,
            **base_query
        }]
    }).sort([('link_expires', pymongo.ASCENDING)
             ]).limit(chunk_size).batch_size(5)

    document_count = to_refresh.count()
    if document_count == 0:
        my_log.info('No links to refresh.')
        return

    if 0 < chunk_size == document_count:
        my_log.info(
            'Found %d documents to refresh, probably limited by the chunk size.',
            document_count)
    else:
        my_log.info('Found %d documents to refresh.', document_count)

    refreshed = 0
    report_chunks = min(max(5, document_count // 25), 100)
    for file_doc in to_refresh:
        try:
            file_id = file_doc['_id']
            project_id = file_doc.get('project')
            if project_id is None:
                my_log.debug('Skipping file %s, it has no project.', file_id)
                continue

            count = proj_coll.count({
                '_id':
                project_id,
                '$or': [
                    {
                        '_deleted': {
                            '$exists': False
                        }
                    },
                    {
                        '_deleted': False
                    },
                ]
            })

            if count == 0:
                my_log.debug('Skipping file %s, project %s does not exist.',
                             file_id, project_id)
                continue

            if 'file_path' not in file_doc:
                my_log.warning(
                    "Skipping file %s, missing 'file_path' property.", file_id)
                continue

            my_log.debug('Refreshing links for file %s', file_id)

            try:
                generate_all_links(file_doc, now)
            except gcloud.exceptions.Forbidden:
                my_log.warning(
                    'Skipping file %s, GCS forbids us access to '
                    'project %s bucket.', file_id, project_id)
                continue
            refreshed += 1

            if refreshed % report_chunks == 0:
                my_log.info('Refreshed %i links', refreshed)
        except KeyboardInterrupt:
            my_log.warning(
                'Aborting due to KeyboardInterrupt after refreshing %i '
                'links', refreshed)
            return

    my_log.info('Refreshed %i links', refreshed)