Beispiel #1
0
 def test_aborts_on_missing_term(self, app, caplog):
     from nessie.jobs.create_sis_schema import CreateSisSchema
     with mock_s3(app):
         daily_path = get_s3_sis_daily_path()
         historical_path = app.config[
             'LOCH_S3_SIS_DATA_PATH'] + '/historical'
         self._upload_data_to_s3(daily_path, historical_path)
         s3.delete_objects(
             [f'{daily_path}/enrollments/enrollments-2178.gz'])
         with capture_app_logs(app):
             with pytest.raises(BackgroundJobError) as e:
                 CreateSisSchema().update_manifests()
             assert 'Expected filename enrollments-2178.gz not found in S3, aborting' in str(
                 e.value)
 def delete_old_incrementals(self):
     old_incrementals = s3.get_keys_with_prefix(self.transient_path, bucket=self.transient_bucket)
     if old_incrementals is None:
         raise BackgroundJobError('Error listing old incrementals, aborting job.')
     if len(old_incrementals) > 0:
         delete_response = s3.delete_objects(old_incrementals, bucket=self.transient_bucket)
         if not delete_response:
             raise BackgroundJobError(f'Error deleting old incremental files from {self.transient_bucket}, aborting job.')
         else:
             app.logger.info(f'Deleted {len(old_incrementals)} old incremental files from {self.transient_bucket}.')
 def delete_old_unloads(self):
     old_unloads = s3.get_keys_with_prefix(app.config['LRS_CANVAS_INCREMENTAL_ETL_PATH_REDSHIFT'], bucket=self.transient_bucket)
     if old_unloads is None:
         raise BackgroundJobError('Error listing old unloads, aborting job.')
     if len(old_unloads) > 0:
         delete_response = s3.delete_objects(old_unloads, bucket=self.transient_bucket)
         if not delete_response:
             raise BackgroundJobError(f'Error deleting old unloads from {self.transient_bucket}, aborting job.')
         else:
             app.logger.info(f'Deleted {len(old_unloads)} old unloads from {self.transient_bucket}.')
Beispiel #4
0
    def run(self, cleanup=True):
        job_id = self.generate_job_id()
        app.logger.info(f'Starting Canvas snapshot resync job... (id={job_id})')
        md = metadata.get_failures_from_last_sync()
        if not md['failures']:
            return f"No failures found for job_id {md['job_id']}, skipping resync."
        app.logger.info(f"Found {len(md['failures'])} failures for job_id {md['job_id']}, attempting resync.")

        failures = 0
        successes = 0

        for failure in md['failures']:
            if cleanup and failure['destination_url']:
                destination_key = failure['destination_url'].split(app.config['LOCH_S3_BUCKET'] + '/')[1]
                if s3.delete_objects([destination_key]):
                    metadata.delete_canvas_snapshots([destination_key])
                else:
                    app.logger.error(f'Could not delete failed snapshot from S3 (url={failure.destination_url})')
            metadata.create_canvas_sync_status(
                job_id=job_id,
                filename=failure['filename'],
                canvas_table=failure['canvas_table'],
                # The original signed source URL will remain valid if the resync job is run within an hour of the sync job.
                # TODO Add logic to fetch a new signed URL from the Canvas Data API for older jobs.
                source_url=failure['source_url'],
            )

            # Regenerate the S3 key, since the failed job may not have progressed far enough to store a destination URL in its metadata.
            if failure['canvas_table'] == 'requests':
                key_components = [berkeley.s3_canvas_data_path_current_term(), failure['canvas_table'], failure['filename']]
            else:
                key_components = [get_s3_canvas_daily_path(), failure['canvas_table'], failure['filename']]
            key = '/'.join(key_components)
            response = dispatch('sync_file_to_s3', data={'canvas_sync_job_id': job_id, 'url': failure['source_url'], 'key': key})

            if not response:
                app.logger.error('Failed to dispatch S3 resync of snapshot ' + failure['filename'])
                metadata.update_canvas_sync_status(job_id, key, 'error', details=f'Failed to dispatch: {response}')
                failures += 1
            else:
                app.logger.info('Dispatched S3 resync of snapshot ' + failure['filename'])
                successes += 1

        return f'Canvas snapshot resync job dispatched to workers ({successes} successful dispatches, {failures} failures).'
Beispiel #5
0
def delete_objects_with_prefix(prefix, whitelist=[]):
    keys_to_delete = []
    existing_keys = s3.get_keys_with_prefix(prefix)
    if existing_keys is None:
        app.logger.error('Error listing keys, aborting job.')
        return False
    for key in existing_keys:
        filename = key.split('/')[-1]
        if filename not in whitelist:
            keys_to_delete.append(key)
    app.logger.info(
        f'Found {len(existing_keys)} key(s) matching prefix "{prefix}", {len(existing_keys) - len(keys_to_delete)} '
        f'key(s) in whitelist, will delete {len(keys_to_delete)} object(s)')
    if not keys_to_delete:
        return True
    if s3.delete_objects(keys_to_delete):
        metadata.delete_canvas_snapshots(keys_to_delete)
        return True
    else:
        return False
Beispiel #6
0
def cleanup_s3(app):
    yield
    from nessie.externals import s3
    keys = s3.get_keys_with_prefix(app.config['LOCH_S3_PREFIX_TESTEXT'])
    s3.delete_objects(keys)