def test_update_sync_status(file_errors, site_errors, status): """update_sync_status should update the website sync_status field as expected""" now = now_in_utc() website = WebsiteFactory.create(synced_on=now, sync_status=WebsiteSyncStatus.PROCESSING, sync_errors=site_errors) for error in file_errors: DriveFileFactory.create( website=website, sync_error=error, sync_dt=now, resource=(WebsiteContentFactory.create(type=CONTENT_TYPE_RESOURCE, website=website) if not error else None), status=(DriveFileStatus.COMPLETE if error is None else DriveFileStatus.FAILED), ) DriveFileFactory.create( website=website, sync_dt=now_in_utc() + timedelta(seconds=10), resource=WebsiteContentFactory.create(type=CONTENT_TYPE_RESOURCE, website=website), ) update_sync_status(website, now) website.refresh_from_db() assert website.sync_status == status assert sorted(website.sync_errors) == sorted( [error for error in file_errors if error] + (site_errors or []))
def publish_website( # pylint: disable=too-many-arguments name: str, version: str, pipeline_api: Optional[object] = None, prepublish: Optional[bool] = True, trigger_pipeline: Optional[bool] = True, ): """Publish a live or draft version of a website""" try: website = Website.objects.get(name=name) if prepublish: for action in settings.PREPUBLISH_ACTIONS: import_string(action)(website, version=version) backend = get_sync_backend(website) backend.sync_all_content_to_backend() if version == VERSION_DRAFT: backend.merge_backend_draft() else: backend.merge_backend_live() if trigger_pipeline and settings.CONTENT_SYNC_PIPELINE_BACKEND: pipeline = get_sync_pipeline(website, api=pipeline_api) pipeline.unpause_pipeline(version) build_id = pipeline.trigger_pipeline_build(version) update_kwargs = { f"latest_build_id_{version}": build_id, } else: update_kwargs = {} if ( getattr(website, f"{version}_publish_status") != PUBLISH_STATUS_NOT_STARTED or getattr(website, f"{version}_publish_status_updated_on") is None ): # Need to update additional fields update_kwargs = { f"{version}_publish_status": PUBLISH_STATUS_NOT_STARTED, f"{version}_publish_status_updated_on": now_in_utc(), f"{version}_last_published_by": None, f"has_unpublished_{version}": False, **update_kwargs, } except: # pylint:disable=bare-except update_kwargs = { f"{version}_publish_status": PUBLISH_STATUS_ERRORED, f"{version}_publish_status_updated_on": now_in_utc(), f"has_unpublished_{version}": True, } raise finally: Website.objects.filter(name=name).update(**update_kwargs)
def test_publish_website( # pylint:disable=redefined-outer-name,too-many-arguments settings, mocker, mock_api_funcs, prepublish, prepublish_actions, has_api, version, status, trigger, ): """Verify that the appropriate backend calls are made by the publish_website function""" settings.PREPUBLISH_ACTIONS = prepublish_actions website = WebsiteFactory.create() setattr(website, f"{version}_publish_status", status) if status: setattr(website, f"{version}_publish_status_updated_on", now_in_utc()) website.save() build_id = 123456 pipeline_api = mocker.Mock() if has_api else None backend = mock_api_funcs.mock_get_backend.return_value pipeline = mock_api_funcs.mock_get_pipeline.return_value pipeline.trigger_pipeline_build.return_value = build_id api.publish_website( website.name, version, pipeline_api=pipeline_api, prepublish=prepublish, trigger_pipeline=trigger, ) mock_api_funcs.mock_get_backend.assert_called_once_with(website) backend.sync_all_content_to_backend.assert_called_once() if version == VERSION_DRAFT: backend.merge_backend_draft.assert_called_once() else: backend.merge_backend_live.assert_called_once() website.refresh_from_db() if trigger: mock_api_funcs.mock_get_pipeline.assert_called_once_with( website, api=pipeline_api) pipeline.trigger_pipeline_build.assert_called_once_with(version) pipeline.unpause_pipeline.assert_called_once_with(version) assert getattr(website, f"latest_build_id_{version}") == build_id else: mock_api_funcs.mock_get_pipeline.assert_not_called() pipeline.trigger_pipeline_build.assert_not_called() pipeline.unpause_pipeline.assert_not_called() assert getattr(website, f"latest_build_id_{version}") is None assert getattr(website, f"{version}_publish_status") == PUBLISH_STATUS_NOT_STARTED assert getattr( website, f"has_unpublished_{version}") is (status == PUBLISH_STATUS_NOT_STARTED) assert getattr(website, f"{version}_last_published_by") is None assert getattr(website, f"{version}_publish_status_updated_on") is not None if len(prepublish_actions) > 0 and prepublish: mock_api_funcs.mock_import_string.assert_any_call("some.Action") mock_api_funcs.mock_import_string.return_value.assert_any_call( website, version=version)
def test_update_website_status(mocker): """Calling the update_website_status task should call api.update_sync_status with args""" website = WebsiteFactory.create() now = now_in_utc() mock_update_sync_status = mocker.patch( "gdrive_sync.tasks.api.update_sync_status") update_website_status.delay(website.pk, now) mock_update_sync_status.assert_called_once_with( website, now.strftime("%Y-%m-%dT%H:%M:%S.%fZ"))
def handle(self, *args, **options): self.validate_options(options) self.do_handle( commit=options["commit"], alias=options["alias"], out=options["out"], csv_only_changes=options["csv_only_changes"], ) if (settings.CONTENT_SYNC_BACKEND and options["commit"] and not options["skip_sync"]): self.stdout.write( "Syncing all unsynced websites to the designated backend") start = now_in_utc() task = sync_unsynced_websites.delay(create_backends=True) self.stdout.write(f"Starting task {task}...") task.get() total_seconds = (now_in_utc() - start).total_seconds() self.stdout.write( "Backend sync finished, took {} seconds".format(total_seconds))
def import_website_files(self, name: str): """Query the Drive API for all children of a website folder and import the files""" if not api.is_gdrive_enabled(): return website = Website.objects.get(name=name) website.sync_status = WebsiteSyncStatus.PROCESSING website.synced_on = now_in_utc() website.sync_errors = [] errors = [] tasks = [] for subfolder in [DRIVE_FOLDER_FILES_FINAL, DRIVE_FOLDER_VIDEOS_FINAL]: try: query = f'parents = "{website.gdrive_folder}" and name="{subfolder}" and mimeType = "{DRIVE_MIMETYPE_FOLDER}" and not trashed' subfolder_list = list( api.query_files(query=query, fields=DRIVE_FILE_FIELDS)) if not subfolder_list: error_msg = f"Could not find drive subfolder {subfolder}" log.error("%s for %s", error_msg, website.short_id) errors.append(error_msg) continue for gdfile in api.walk_gdrive_folder( subfolder_list[0]["id"], DRIVE_FILE_FIELDS, ): try: drive_file = api.process_file_result( gdfile, sync_date=website.synced_on) if drive_file: tasks.append(process_drive_file.s(drive_file.file_id)) except: # pylint:disable=bare-except errors.append( f"Error processing gdrive file {gdfile.get('name')}") log.exception( "Error processing gdrive file %s for %s", gdfile.get("name"), website.short_id, ) except: # pylint:disable=bare-except error_msg = f"An error occurred when querying the {subfolder} google drive subfolder" errors.append(error_msg) log.exception("%s for %s", error_msg, website.short_id) website.sync_errors = errors website.save() if tasks: # Import the files first, then sync the website for those files in git file_steps = chord( celery.group(*tasks), update_website_status.si(website.pk, website.synced_on), ) website_step = sync_website_content.si(name) workflow = chain(file_steps, website_step) raise self.replace(celery.group(workflow)) update_website_status(website.pk, website.synced_on)
def handle(self, *args, **options): if settings.DRIVE_SHARED_ID and settings.DRIVE_SERVICE_ACCOUNT_CREDS: websites = Website.objects.all() website_filter = options["filter"].lower() starter_filter = options["starter"].lower() source_filter = options["source"].lower() chunk_size = int(options["chunk_size"]) is_verbose = options["verbosity"] > 1 if website_filter: websites = websites.filter( Q(name__startswith=website_filter) | Q(short_id__startswith=website_filter)) if starter_filter: websites = websites.filter(starter__slug=starter_filter) if source_filter: websites = websites.filter(source=source_filter) short_ids = list(websites.values_list("short_id", flat=True)) start = now_in_utc() task = create_gdrive_folders_chunked.delay(short_ids, chunk_size=chunk_size) self.stdout.write( f"Started celery task {task} to upsert pipelines for {len(short_ids)} sites." ) if is_verbose: self.stdout.write(f"{','.join(short_ids)}") self.stdout.write("Waiting on task...") result = task.get() if set(result) != {True}: raise CommandError(f"Some errors occurred: {result}") total_seconds = (now_in_utc() - start).total_seconds() self.stdout.write( "Google drive folder creation finished, took {} seconds". format(total_seconds))
def reset_publishing_fields(website_name: str): """Reset all publishing fields to allow a fresh publish request""" now = now_in_utc() Website.objects.filter(name=website_name).update( has_unpublished_live=True, has_unpublished_draft=True, live_publish_status=None, draft_publish_status=None, live_publish_status_updated_on=now, draft_publish_status_updated_on=now, latest_build_id_live=None, latest_build_id_draft=None, )
def test_update_website_status(mocker, status, notify, has_user, version): """update_website_status should update the appropriate website publishing fields""" mock_mail = mocker.patch("websites.api.mail_on_publish") mock_log = mocker.patch("websites.api.log.error") user = UserFactory.create() if has_user else None website = WebsiteFactory.create(**{f"{version}_last_published_by": user}) now = now_in_utc() update_website_status(website, version, status, now) website.refresh_from_db() assert getattr(website, f"{version}_publish_status") == status assert getattr(website, f"{version}_publish_status_updated_on") == now assert mock_mail.call_count == (1 if has_user and notify else 0) assert mock_log.call_count == (1 if status == PUBLISH_STATUS_ERRORED else 0)
def test_check_incomplete_publish_build_statuses_abort(settings, api_mock): """A website whose publish status has not changed after the cutoff time should be aborted""" stuck_website = WebsiteFactory.create( draft_publish_status_updated_on=now_in_utc() - timedelta(seconds=settings.PUBLISH_STATUS_CUTOFF + 5), draft_publish_status=PUBLISH_STATUS_NOT_STARTED, latest_build_id_draft=1, ) api_mock.get_sync_pipeline.return_value.get_build_status.return_value = ( PUBLISH_STATUS_NOT_STARTED) tasks.check_incomplete_publish_build_statuses.delay() api_mock.get_sync_pipeline.return_value.abort_build.assert_called_once_with( stuck_website.latest_build_id_draft) stuck_website.refresh_from_db() assert stuck_website.draft_publish_status == PUBLISH_STATUS_ABORTED
def test_check_incomplete_publish_build_statuses_no_setting( settings, api_mock): """Pipeline apis should not be called if settings.CONTENT_SYNC_PIPELINE_BACKEND is not set""" settings.CONTENT_SYNC_PIPELINE_BACKEND = None stuck_website = WebsiteFactory.create( draft_publish_status_updated_on=now_in_utc() - timedelta(seconds=settings.PUBLISH_STATUS_CUTOFF + 5), draft_publish_status=PUBLISH_STATUS_NOT_STARTED, latest_build_id_draft=1, ) api_mock.get_sync_pipeline.return_value.get_build_status.return_value = ( PUBLISH_STATUS_NOT_STARTED) tasks.check_incomplete_publish_build_statuses.delay() api_mock.get_sync_pipeline.assert_not_called() stuck_website.refresh_from_db() assert stuck_website.draft_publish_status == PUBLISH_STATUS_NOT_STARTED
def test_sync_all_websites_rate_limit_low(mocker, settings, check_limit): """Test that sync_unsynced_websites pauses if the GithubBackend is close to exceeding rate limit""" settings.CONTENT_SYNC_BACKEND = "content_sync.backends.github.GithubBackend" settings.GITHUB_RATE_LIMIT_CHECK = check_limit sleep_mock = mocker.patch("content_sync.api.sleep") mock_git_wrapper = mocker.patch( "content_sync.backends.github.GithubApiWrapper") mock_dt_now = mocker.patch("content_sync.tasks.now_in_utc", return_value=now_in_utc()) mock_git_wrapper.return_value.git.rate_limiting = ( 5, mock_dt_now + timedelta(seconds=1000), ) ContentSyncStateFactory.create_batch(2) tasks.sync_unsynced_websites.delay() assert sleep_mock.call_count == (2 if check_limit else 0)
def test_check_incomplete_publish_build_statuses_500(settings, mocker, api_mock): """An error should be logged and status not updated if querying for the build status returns a non-404 error""" mock_log = mocker.patch("content_sync.tasks.log.exception") website = WebsiteFactory.create( live_publish_status_updated_on=now_in_utc() - timedelta(seconds=settings.PUBLISH_STATUS_WAIT_TIME + 5), live_publish_status=PUBLISH_STATUS_NOT_STARTED, latest_build_id_live=1, ) api_mock.get_sync_pipeline.return_value.get_build_status.side_effect = HTTPError( response=mocker.Mock(status_code=500)) tasks.check_incomplete_publish_build_statuses.delay() mock_log.assert_called_once_with( "Error updating publishing status for website %s", website.name) website.refresh_from_db() assert website.live_publish_status == PUBLISH_STATUS_NOT_STARTED
def test_update_unpublished_website_status(status, version): """update_website_status should update an unpublished site appropriately""" website = WebsiteFactory.create(unpublished=True, draft_publish_date=None) now = now_in_utc() update_website_status(website, version, status, now) website.refresh_from_db() assert getattr(website, f"{version}_publish_status") == status assert getattr(website, f"{version}_publish_status_updated_on") == now publish_date_field = ("publish_date" if version == VERSION_LIVE else "draft_publish_date") if status == PUBLISH_STATUS_SUCCEEDED: assert getattr(website, publish_date_field) == now if version == VERSION_LIVE: assert getattr(website, "first_published_to_production") == now else: assert getattr(website, publish_date_field) is None
def build_digital_credential( certificate: Union[ProgramCertificate, CourseRunCertificate], learner_did: LearnerDID, ) -> Dict: """Function for building certificate digital credentials""" if isinstance(certificate, ProgramCertificate): has_credential = build_program_credential(certificate) elif isinstance(certificate, CourseRunCertificate): has_credential = build_course_run_credential(certificate) else: raise Exception( f"Unexpected courseware object type for digital credentials: {type(certificate)}" ) return { "credential": { "@context": [ "https://www.w3.org/2018/credentials/v1", "https://w3id.org/security/suites/ed25519-2020/v1", "https://w3id.org/dcc/v1", ], "id": urljoin(settings.SITE_BASE_URL, certificate.link), "type": ["VerifiableCredential", "LearningCredential"], "issuer": { "type": "Issuer", "id": settings.DIGITAL_CREDENTIALS_ISSUER_ID, "name": settings.SITE_NAME, "url": settings.SITE_BASE_URL, }, "issuanceDate": now_in_utc().isoformat(), "credentialSubject": { "type": "schema:Person", "id": learner_did.did, "name": learner_did.learner.name, "hasCredential": has_credential, }, }, "options": { "verificationMethod": settings.DIGITAL_CREDENTIALS_VERIFICATION_METHOD }, }
def throttle_git_backend_calls(backend: object, min_delay: Optional[int] = None): """If the current git api limit is too low, sleep until it is reset""" min_delay = min_delay or settings.GITHUB_RATE_LIMIT_MIN_SLEEP if settings.GITHUB_RATE_LIMIT_CHECK and isinstance(backend, GithubBackend): requests_remaining, limit = backend.api.git.rate_limiting reset_time = datetime.fromtimestamp( backend.api.git.rate_limiting_resettime, tz=pytz.utc ) log.debug( "Remaining github calls : %d/%d, reset: %s", requests_remaining, limit, reset_time.isoformat(), ) if requests_remaining <= settings.GITHUB_RATE_LIMIT_CUTOFF: sleep((reset_time - now_in_utc()).seconds) else: # Always wait x seconds between git backend calls sleep(min_delay)
def test_check_incomplete_publish_build_statuses_404(settings, mocker, api_mock): """A website with a non-existent pipeline/build should have publishing status set to errored""" mock_log = mocker.patch("content_sync.tasks.log.error") bad_build_website = WebsiteFactory.create( draft_publish_status_updated_on=now_in_utc() - timedelta(seconds=settings.PUBLISH_STATUS_CUTOFF + 5), draft_publish_status=PUBLISH_STATUS_NOT_STARTED, latest_build_id_draft=1, ) api_mock.get_sync_pipeline.return_value.get_build_status.side_effect = HTTPError( response=mocker.Mock(status_code=404)) tasks.check_incomplete_publish_build_statuses.delay() mock_log.assert_called_once_with( "Could not find %s build %s for %s", VERSION_DRAFT, bad_build_website.latest_build_id_draft, bad_build_website.name, ) bad_build_website.refresh_from_db() assert bad_build_website.draft_publish_status == PUBLISH_STATUS_ERRORED
def test_sync_all_websites_rate_limit_sleep_length(mocker, settings, calls_left, use_default_sleep): """Test that sync_unsynced_websites pauses if the GithubBackend is close to exceeding rate limit""" settings.GITHUB_RATE_LIMIT_MIN_SLEEP = 12 settings.GITHUB_RATE_LIMIT_CHECK = True settings.CONTENT_SYNC_BACKEND = "content_sync.backends.github.GithubBackend" sleep_mock = mocker.patch("content_sync.api.sleep") mock_git_wrapper = mocker.patch( "content_sync.backends.github.GithubApiWrapper") mock_dt_now = mocker.patch("content_sync.tasks.now_in_utc", return_value=now_in_utc()) mock_git_wrapper.return_value.git.rate_limiting = ( calls_left, mock_dt_now + timedelta(seconds=1000), ) ContentSyncStateFactory.create_batch(2) tasks.sync_unsynced_websites.delay() if use_default_sleep: sleep_mock.assert_any_call(12) else: assert sleep_mock.call_args_list[0][0][0] > 12
def test_check_incomplete_publish_build_statuses( settings, mocker, api_mock, old_status, new_status, should_check, should_update, pipeline, ): # pylint:disable=too-many-arguments,too-many-locals """check_incomplete_publish_build_statuses should update statuses of pipeline builds""" settings.CONTENT_SYNC_PIPELINE_BACKEND = pipeline mock_update_status = mocker.patch( "content_sync.tasks.update_website_status") now = now_in_utc() draft_site_in_query = WebsiteFactory.create( draft_publish_status_updated_on=now - timedelta(seconds=settings.PUBLISH_STATUS_WAIT_TIME + 5), draft_publish_status=old_status, latest_build_id_draft=1, ) draft_site_to_exclude_time = WebsiteFactory.create( draft_publish_status_updated_on=now - timedelta(seconds=settings.PUBLISH_STATUS_WAIT_TIME - 5), draft_publish_status=old_status, latest_build_id_draft=2, ) draft_site_to_exclude_status = WebsiteFactory.create( draft_publish_status_updated_on=now - timedelta(seconds=settings.PUBLISH_STATUS_WAIT_TIME + 5), draft_publish_status=PUBLISH_STATUS_SUCCEEDED, latest_build_id_draft=2, ) live_site_in_query = WebsiteFactory.create( live_publish_status_updated_on=now - timedelta(seconds=settings.PUBLISH_STATUS_WAIT_TIME + 5), live_publish_status=old_status, latest_build_id_live=3, ) live_site_excluded_time = WebsiteFactory.create( live_publish_status_updated_on=now - timedelta(seconds=settings.PUBLISH_STATUS_WAIT_TIME - 5), live_publish_status=old_status, latest_build_id_live=4, ) live_site_excluded_status = WebsiteFactory.create( live_publish_status_updated_on=now - timedelta(seconds=settings.PUBLISH_STATUS_WAIT_TIME + 5), live_publish_status=None, latest_build_id_live=4, ) api_mock.get_sync_pipeline.return_value.get_build_status.return_value = new_status tasks.check_incomplete_publish_build_statuses.delay() for website, version in [ (draft_site_in_query, VERSION_DRAFT), (live_site_in_query, VERSION_LIVE), ]: if should_check and pipeline is not None: api_mock.get_sync_pipeline.assert_any_call(website) api_mock.get_sync_pipeline.return_value.get_build_status.assert_any_call( getattr(website, f"latest_build_id_{version}")) if should_update: mock_update_status.assert_any_call(website, version, new_status, mocker.ANY) else: with pytest.raises(AssertionError): api_mock.get_sync_pipeline.assert_any_call(website) with pytest.raises(AssertionError): mock_update_status.assert_any_call(website, version, new_status, mocker.ANY) for website, version in [ (draft_site_to_exclude_time, VERSION_DRAFT), (draft_site_to_exclude_status, VERSION_DRAFT), (live_site_excluded_time, VERSION_LIVE), (live_site_excluded_status, VERSION_LIVE), ]: with pytest.raises(AssertionError): api_mock.get_sync_pipeline.assert_any_call(website) with pytest.raises(AssertionError): mock_update_status.assert_any_call(website, version, new_status, mocker.ANY)
def check_incomplete_publish_build_statuses(): """ Check statuses of concourse builds that have not been updated in a reasonable amount of time """ if not settings.CONTENT_SYNC_PIPELINE_BACKEND: return now = now_in_utc() wait_dt = now - timedelta(seconds=settings.PUBLISH_STATUS_WAIT_TIME) cutoff_dt = now - timedelta(seconds=settings.PUBLISH_STATUS_CUTOFF) for website in (Website.objects.exclude( (Q(draft_publish_status__isnull=True) | Q(draft_publish_status__in=PUBLISH_STATUSES_FINAL)) & (Q(live_publish_status__isnull=True) | Q(live_publish_status__in=PUBLISH_STATUSES_FINAL))).filter( Q(draft_publish_status_updated_on__lte=wait_dt) | Q(live_publish_status_updated_on__lte=wait_dt)).iterator()): try: versions_to_check = [] if (website.draft_publish_status not in PUBLISH_STATUSES_FINAL and website.draft_publish_status_updated_on and website.draft_publish_status_updated_on <= wait_dt): versions_to_check.append(( VERSION_DRAFT, website.draft_publish_status_updated_on, website.draft_publish_status, )) if (website.live_publish_status not in PUBLISH_STATUSES_FINAL and website.live_publish_status_updated_on and website.live_publish_status_updated_on <= wait_dt): versions_to_check.append(( VERSION_LIVE, website.live_publish_status_updated_on, website.live_publish_status, )) for version, update_dt, last_status in versions_to_check: build_id = getattr(website, f"latest_build_id_{version}") if build_id is not None: pipeline = api.get_sync_pipeline(website) try: status = pipeline.get_build_status(build_id) except HTTPError as err: if err.response.status_code == 404: log.error( "Could not find %s build %s for %s", version, build_id, website.name, ) status = PUBLISH_STATUS_ERRORED else: raise if status not in PUBLISH_STATUSES_FINAL and update_dt <= cutoff_dt: # Abort so another attempt can be made pipeline.abort_build(build_id) status = PUBLISH_STATUS_ABORTED if status != last_status: update_website_status(website, version, status, now) except: # pylint: disable=bare-except log.exception("Error updating publishing status for website %s", website.name)