def test_fallback_update_manifests(self, app): """Uses yesterday's news if today's is unavailable.""" with mock_s3(app): yesterday = datetime.now() - timedelta(days=1) daily_path = get_s3_sis_daily_path(yesterday) historical_path = app.config[ 'LOCH_S3_SIS_DATA_PATH'] + '/historical' manifest_path = app.config['LOCH_S3_SIS_DATA_PATH'] + '/manifests' s3.upload_data('some new course data', f'{daily_path}/courses/courses-aaa.gz') s3.upload_data('some more new course data', f'{daily_path}/courses/courses-bbb.gz') s3.upload_data('some new enrollment data', f'{daily_path}/enrollments/enrollments-ccc.gz') s3.upload_data('some old course data', f'{historical_path}/courses/courses-ddd.gz') s3.upload_data( 'some old enrollment data', f'{historical_path}/enrollments/enrollments-eee.gz') s3.upload_data( 'some perfectly antique enrollment data', f'{historical_path}/enrollments/enrollments-fff.gz') assert CreateSisSchema().update_manifests() courses_manifest = json.loads( s3.get_object_text(manifest_path + '/courses.json')) assert len(courses_manifest['entries']) == 3 assert courses_manifest['entries'][0][ 'url'] == f's3://{app.config["LOCH_S3_BUCKET"]}/{daily_path}/courses/courses-aaa.gz' assert courses_manifest['entries'][0]['meta'][ 'content_length'] == 20 enrollments_manifest = json.loads( s3.get_object_text(manifest_path + '/enrollments.json')) assert len(enrollments_manifest['entries']) == 3 assert ( enrollments_manifest['entries'][2]['url'] == f's3://{app.config["LOCH_S3_BUCKET"]}/{historical_path}/enrollments/enrollments-fff.gz' ) assert enrollments_manifest['entries'][2]['meta'][ 'content_length'] == 38
def _assert_complete_manifest(self, app, daily_path, historical_path): bucket = app.config['LOCH_S3_BUCKET'] manifest_path = app.config['LOCH_S3_SIS_DATA_PATH'] + '/manifests' courses_manifest = json.loads( s3.get_object_text(manifest_path + '/courses.json')) assert len(courses_manifest['entries']) == 6 assert courses_manifest['entries'][0][ 'url'] == f's3://{bucket}/{daily_path}/courses/courses-2178.gz' assert courses_manifest['entries'][0]['meta']['content_length'] == 20 enrollments_manifest = json.loads( s3.get_object_text(manifest_path + '/enrollments.json')) assert len(enrollments_manifest['entries']) == 6 assert ( enrollments_manifest['entries'][4]['url'] == f's3://{bucket}/{historical_path}/enrollments/enrollments-2172.gz') assert enrollments_manifest['entries'][4]['meta'][ 'content_length'] == 26
def copy_tsv_from_s3(table, s3_key): # In a test environment, retrieve object contents from mock S3 and use Postgres COPY FROM STDIN. if app.config['NESSIE_ENV'] == 'test': try: buf = io.StringIO(s3.get_object_text(s3_key)) with _get_cursor(operation='read') as cursor: cursor.copy_from(buf, table) return True except psycopg2.Error as e: error_str = str(e) if e.pgcode: error_str += f'{e.pgcode}: {e.pgerror}\n' app.logger.warning({'message': error_str}) return False # Real Redshift accepts an S3 URL with IAM role. else: iam_role = app.config['REDSHIFT_IAM_ROLE'] s3_prefix = 's3://' + app.config['LOCH_S3_BUCKET'] + '/' return execute(f"COPY {table} FROM '{s3_prefix}{s3_key}' IAM_ROLE '{iam_role}' DELIMITER '\\t';")