def refresh_links_for_project(project_uuid, chunk_size, expiry_seconds): if chunk_size: log.info('Refreshing the first %i links for project %s', chunk_size, project_uuid) else: log.info('Refreshing all links for project %s', project_uuid) # Retrieve expired links. files_collection = current_app.data.driver.db['files'] now = utils.utcnow() expire_before = now + datetime.timedelta(seconds=expiry_seconds) log.info('Limiting to links that expire before %s', expire_before) to_refresh = files_collection.find({ 'project': ObjectId(project_uuid), 'link_expires': { '$lt': expire_before }, }).sort([('link_expires', pymongo.ASCENDING)]).limit(chunk_size) refresh_count = 0 for file_doc in to_refresh: log.debug('Refreshing links for file %s', file_doc['_id']) generate_all_links(file_doc, now) refresh_count += 1 if refresh_count: log.info('Refreshed %i links', refresh_count)
def ensure_valid_link(response): """Ensures the file item has valid file links using generate_link(...).""" # Log to function-specific logger, so we can easily turn it off. log_link = logging.getLogger('%s.ensure_valid_link' % __name__) # log.debug('Inspecting link for file %s', response['_id']) # Check link expiry. now = utils.utcnow() if 'link_expires' in response: link_expires = response['link_expires'] if now < link_expires: # Not expired yet, so don't bother regenerating anything. log_link.debug( 'Link expires at %s, which is in the future, so not ' 'generating new link', link_expires) return log_link.debug( 'Link expired at %s, which is in the past; generating ' 'new link', link_expires) else: log_link.debug('No expiry date for link; generating new link') generate_all_links(response, now)
def on_pre_get_files(_, lookup): # Override the HTTP header, we always want to fetch the document from # MongoDB. parsed_req = eve.utils.parse_request('files') parsed_req.if_modified_since = None # If there is no lookup, we would refresh *all* file documents, # which is far too heavy to do in one client HTTP request. if not lookup: return # Only fetch it if the date got expired. now = utils.utcnow() lookup_expired = lookup.copy() lookup_expired['link_expires'] = {'$lte': now} cursor, _ = current_app.data.find('files', parsed_req, lookup_expired, perform_count=False) for idx, file_doc in enumerate(cursor): if idx == 0: log.debug( 'Updating expired links for files that matched lookup %s', lookup_expired) # log.debug('Updating expired links for file %r.', file_doc['_id']) generate_all_links(file_doc, now)
def refresh_links_for_backend(backend_name, chunk_size, expiry_seconds): import gcloud.exceptions my_log = log.getChild(f'refresh_links_for_backend.{backend_name}') # Retrieve expired links. files_collection = current_app.data.driver.db['files'] proj_coll = current_app.data.driver.db['projects'] now = utils.utcnow() expire_before = now + datetime.timedelta(seconds=expiry_seconds) my_log.info('Limiting to links that expire before %s', expire_before) base_query = {'backend': backend_name, '_deleted': {'$ne': True}} to_refresh = files_collection.find({ '$or': [{ 'link_expires': None, **base_query }, { 'link_expires': { '$lt': expire_before }, **base_query }, { 'link': None, **base_query }] }).sort([('link_expires', pymongo.ASCENDING) ]).limit(chunk_size).batch_size(5) document_count = to_refresh.count() if document_count == 0: my_log.info('No links to refresh.') return if 0 < chunk_size == document_count: my_log.info( 'Found %d documents to refresh, probably limited by the chunk size.', document_count) else: my_log.info('Found %d documents to refresh.', document_count) refreshed = 0 report_chunks = min(max(5, document_count // 25), 100) for file_doc in to_refresh: try: file_id = file_doc['_id'] project_id = file_doc.get('project') if project_id is None: my_log.debug('Skipping file %s, it has no project.', file_id) continue count = proj_coll.count({ '_id': project_id, '$or': [ { '_deleted': { '$exists': False } }, { '_deleted': False }, ] }) if count == 0: my_log.debug('Skipping file %s, project %s does not exist.', file_id, project_id) continue if 'file_path' not in file_doc: my_log.warning( "Skipping file %s, missing 'file_path' property.", file_id) continue my_log.debug('Refreshing links for file %s', file_id) try: generate_all_links(file_doc, now) except gcloud.exceptions.Forbidden: my_log.warning( 'Skipping file %s, GCS forbids us access to ' 'project %s bucket.', file_id, project_id) continue refreshed += 1 if refreshed % report_chunks == 0: my_log.info('Refreshed %i links', refreshed) except KeyboardInterrupt: my_log.warning( 'Aborting due to KeyboardInterrupt after refreshing %i ' 'links', refreshed) return my_log.info('Refreshed %i links', refreshed)