def test_get_items_to_be_processed(self): p = PillowError( date_created=datetime.datetime.utcnow() - datetime.timedelta(days=1), date_next_attempt=datetime.datetime.utcnow() - datetime.timedelta(days=2), date_last_attempt=datetime.datetime.utcnow() - datetime.timedelta(days=3), ) p.save() self.addCleanup(p.delete) errors = list(PillowRetryEnqueuingOperation.get_items_to_be_processed( datetime.datetime.utcnow())) self.assertTrue(errors)
def test_bulk_reset(self): for i in range(0, 5): error = create_error(_change(id=i), attempts=const.PILLOW_RETRY_QUEUE_MAX_PROCESSING_ATTEMPTS) error.save() errors = PillowError.get_errors_to_process(datetime.utcnow()).all() self.assertEqual(len(errors), 0) PillowError.bulk_reset_attempts(datetime.utcnow()) errors = PillowError.get_errors_to_process(datetime.utcnow()).all() self.assertEqual(len(errors), 5)
def test_get_or_create(self): message = 'abcd' id = '12335' error = create_error({'id': id}, message=message, attempts=2) error.save() get = PillowError.get_or_create({'id': id}, FakePillow()) self.assertEqual(get.total_attempts, 2) self.assertEqual(get.current_attempt, 2) self.assertTrue(message in error.error_traceback) new = PillowError.get_or_create({'id': id}, FakePillow1()) self.assertIsNone(new.id) self.assertEqual(new.current_attempt, 0)
def test_bulk_reset_cutoff(self): for i in range(0, 3): error = create_error({'id': i}, attempts=1) if i >= 1: error.total_attempts = PillowError.multi_attempts_cutoff() + 1 error.save() errors = PillowError.get_errors_to_process(datetime.utcnow()).all() self.assertEqual(len(errors), 0) PillowError.bulk_reset_attempts(datetime.utcnow()) errors = PillowError.get_errors_to_process(datetime.utcnow()).all() self.assertEqual(len(errors), 2)
def test_bulk_reset_cutoff(self): for i in range(0, 3): error = create_error(_change(id=i), attempts=1) if i >= 1: error.total_attempts = const.PILLOW_RETRY_MULTI_ATTEMPTS_CUTOFF + 1 error.save() errors = PillowError.get_errors_to_process(datetime.utcnow()).all() self.assertEqual(len(errors), 0) PillowError.bulk_reset_attempts(datetime.utcnow()) errors = PillowError.get_errors_to_process(datetime.utcnow()).all() self.assertEqual(len(errors), 2)
def test_get_or_create(self): message = 'abcd' id = '12335' error = create_error(_change(id=id), message=message, attempts=2) error.save() get = PillowError.get_or_create(_change(id=id), FakePillow()) self.assertEqual(get.total_attempts, 2) self.assertEqual(get.current_attempt, 2) self.assertTrue(message in error.error_traceback) another_pillow = make_fake_constructed_pillow('FakePillow1', '') new = PillowError.get_or_create(_change(id=id), another_pillow) self.assertIsNone(new.id) self.assertEqual(new.current_attempt, 0)
def create_error(change, message='message', attempts=0, pillow=None, ex_class=None): change = force_to_change(change) change.metadata = ChangeMeta(data_source_type='couch', data_source_name='test_commcarehq', document_id=change.id) error = PillowError.get_or_create(change, pillow or FakePillow()) for n in range(0, attempts): error.add_attempt(*get_ex_tb(message, ex_class=ex_class)) return error
def handle(self, *args, **options): root_dir = settings.FILEPATH git_snapshot = gitinfo.get_project_snapshot(root_dir, submodules=True) git_snapshot['diff_url'] = options.get('url', None) deploy = HqDeploy( date=datetime.utcnow(), user=options['user'], environment=options['environment'], code_snapshot=git_snapshot, ) deploy.save() # reset PillowTop errors in the hope that a fix has been deployed rows_updated = PillowError.bulk_reset_attempts(datetime.utcnow()) if rows_updated: print "\n---------------- Pillow Errors Reset ----------------\n" \ "{} pillow errors queued for retry\n".format(rows_updated) if options['mail_admins']: snapshot_table = render_to_string('hqadmin/partials/project_snapshot.html', dictionary={'snapshot': git_snapshot}) message = "Deployed by %s, cheers!" % options['user'] snapshot_body = "<html><head><title>Deploy Snapshot</title></head><body><h2>%s</h2>%s</body></html>" % (message, snapshot_table) call_command('mail_admins', snapshot_body, **{'subject': 'Deploy successful', 'html': True})
def process_pillow_retry(error_doc_id): # Redis error logged in get_redis_client try: client = cache_core.get_redis_client() except cache_core.RedisClientError: return # Prevent more than one task from processing this error, just in case # it got enqueued twice. lock = client.lock( "pillow-retry-processing-%s" % error_doc_id, timeout=settings.PILLOW_RETRY_PROCESSING_LOCK_TIMEOUT*60 ) if lock.acquire(blocking=False): try: error_doc = PillowError.objects.get(id=error_doc_id) except PillowError.DoesNotExist: release_lock(lock, True) return pillow_name_or_class = error_doc.pillow try: pillow = get_pillow_by_name(pillow_name_or_class) except PillowNotFoundError: pillow = None if not pillow: notify_error(( "Could not find pillowtop class '%s' while attempting a retry. " "If this pillow was recently deleted then this will be automatically cleaned up eventually. " "If not, then this should be looked into." ) % pillow_name_or_class) try: error_doc.total_attempts = PillowError.multi_attempts_cutoff() + 1 error_doc.save() finally: release_lock(lock, True) return change = error_doc.change_object try: change_metadata = change.metadata if change_metadata: document_store = get_document_store( data_source_type=change_metadata.data_source_type, data_source_name=change_metadata.data_source_name, domain=change_metadata.domain ) change.document_store = document_store pillow.process_change(change) except Exception: ex_type, ex_value, ex_tb = sys.exc_info() error_doc.add_attempt(ex_value, ex_tb) error_doc.queued = False error_doc.save() else: error_doc.delete() finally: release_lock(lock, True)
def test_empty_metadata(self): change = _change(id='123') error = PillowError.get_or_create(change, GetDocPillow()) error.save() process_pillow_retry(error) error = PillowError.objects.get(pk=error.id) self.assertEquals(error.total_attempts, 1)
def test_get_errors_to_process_queued_update(self): date = datetime.utcnow() error = create_error({'id': 1}, attempts=0) error.date_next_attempt = date error.save() errors = PillowError.get_errors_to_process( date, ).all() self.assertEqual(len(errors), 1) # check that calling update on the return value has the desired effect errors.update(queued=True) errors = PillowError.get_errors_to_process( date, ).all() self.assertEqual(len(errors), 0)
def test_get_errors_to_process(self): # Only re-process errors with # current_attempt < const.PILLOW_RETRY_QUEUE_MAX_PROCESSING_ATTEMPTS date = datetime.utcnow() for i in range(0, 5): error = create_error(_change(id=i), attempts=i+1) error.date_next_attempt = date.replace(day=i+1) error.save() errors = PillowError.get_errors_to_process( date.replace(day=1), ).all() self.assertEqual(len(errors), 1) errors = PillowError.get_errors_to_process( date.replace(day=5), ).all() self.assertEqual(len(errors), 3)
def test_null_meta_date(self): id = '12335' meta = { 'domains': ['a' * 247, '123456789'], 'doc_type': 'something', 'date': None, } get = PillowError.get_or_create({'id': id}, FakePillow(), meta) self.assertEqual(None, get.doc_date)
def handle(self, *args, **options): root_dir = settings.FILEPATH git_snapshot = gitinfo.get_project_snapshot(root_dir, submodules=True) compare_url = git_snapshot['diff_url'] = options.get('url', None) deploy = HqDeploy( date=datetime.utcnow(), user=options['user'], environment=options['environment'], code_snapshot=git_snapshot, ) deploy.save() # reset PillowTop errors in the hope that a fix has been deployed rows_updated = PillowError.bulk_reset_attempts(datetime.utcnow()) if rows_updated: print "\n---------------- Pillow Errors Reset ----------------\n" \ "{} pillow errors queued for retry\n".format(rows_updated) deploy_notification_text = ( "CommCareHQ has been successfully deployed to *{}* by *{}*. " "Find the diff {{diff_link}}".format( options['environment'], options['user'], ) ) if hasattr(settings, 'MIA_THE_DEPLOY_BOT_API'): link = diff_link(STYLE_SLACK, compare_url) requests.post(settings.MIA_THE_DEPLOY_BOT_API, data=json.dumps({ "username": "******", "text": deploy_notification_text.format(diff_link=link), })) if settings.DATADOG_API_KEY: tags = ['environment:{}'.format(options['environment'])] link = diff_link(STYLE_MARKDOWN, compare_url) datadog_api.Event.create( title="Deploy Success", text=deploy_notification_text.format(diff_link=link), tags=tags, alert_type="success" ) print "\n=============================================================\n" \ "Congratulations! Deploy Complete.\n\n" \ "Don't forget to keep an eye on the deploy dashboard to " \ "make sure everything is running smoothly.\n\n" \ "https://p.datadoghq.com/sb/5c4af2ac8-1f739e93ef" \ "\n=============================================================\n" if options['mail_admins']: message_body = get_deploy_email_message_body( environment=options['environment'], user=options['user'], compare_url=compare_url) call_command('mail_admins', message_body, **{'subject': 'Deploy successful', 'html': True})
def test_pillow_not_found(self): error = PillowError.objects.create( doc_id='missing-pillow', pillow='badmodule.NotARealPillow', date_created=datetime.utcnow(), date_last_attempt=datetime.utcnow() ) # make sure this doesn't error process_pillow_retry(error.id) # and that its total_attempts was bumped above the threshold self.assertTrue(PillowError.objects.get(pk=error.pk).total_attempts > PillowError.multi_attempts_cutoff())
def test_get_or_create_meta(self): id = '12335' date = '2013-12-05T08:52:19Z' meta = { 'domains': ['a' * 247, '123456789'], 'doc_type': 'something', 'date': date, } get = PillowError.get_or_create({'id': id}, FakePillow(), meta) self.assertEqual(get.domains, 'a' * 247 + ',1234...') self.assertEqual(get.doc_type, 'something') self.assertEqual(get.doc_date, parse(date)) get.save()
def handle(self, *args, **options): root_dir = settings.FILEPATH git_snapshot = gitinfo.get_project_snapshot(root_dir, submodules=True) git_snapshot['diff_url'] = options.get('url', None) deploy = HqDeploy( date=datetime.utcnow(), user=options['user'], environment=options['environment'], code_snapshot=git_snapshot, ) deploy.save() # reset PillowTop errors in the hope that a fix has been deployed rows_updated = PillowError.bulk_reset_attempts(datetime.utcnow()) if rows_updated: print "\n---------------- Pillow Errors Reset ----------------\n" \ "{} pillow errors queued for retry\n".format(rows_updated) deploy_notification_text = ( "CommCareHQ has been successfully deployed to *{}* by *{}*. " "Find the diff {{diff_link}}".format( options['environment'], options['user'], ) ) if hasattr(settings, 'MIA_THE_DEPLOY_BOT_API'): link = diff_link(STYLE_SLACK, git_snapshot['diff_url']) requests.post(settings.MIA_THE_DEPLOY_BOT_API, data=json.dumps({ "channel": "#dev", "username": "******", "text": deploy_notification_text.format(diff_link=link), "icon_emoji": ":see_no_evil:" })) if settings.DATADOG_API_KEY: tags = ['environment:{}'.format(options['environment'])] link = diff_link(STYLE_MARKDOWN, git_snapshot['diff_url']) datadog_api.Event.create( title="Deploy Success", text=deploy_notification_text.format(diff_link=link), tags=tags ) if options['mail_admins']: snapshot_table = render_to_string('hqadmin/partials/project_snapshot.html', dictionary={'snapshot': git_snapshot}) message = "Deployed by %s, cheers!" % options['user'] snapshot_body = "<html><head><title>Deploy Snapshot</title></head><body><h2>%s</h2>%s</body></html>" % (message, snapshot_table) call_command('mail_admins', snapshot_body, **{'subject': 'Deploy successful', 'html': True})
def test_get_errors_to_process_queued(self): date = datetime.utcnow() error = create_error({'id': 1}, attempts=0) error.date_next_attempt = date error.save() queued_error = create_error({'id': 2}, attempts=0) queued_error.date_next_attempt = date queued_error.queued = True queued_error.save() errors = PillowError.get_errors_to_process( date, ).all() self.assertEqual(len(errors), 1) self.assertEqual(error.id, errors[0]['id'])
def handle_pillow_error(pillow, change, exception): from pillow_retry.models import PillowError error_id = None if pillow.retry_errors: error = PillowError.get_or_create(change, pillow) error.add_attempt(exception, sys.exc_info()[2]) error.save() error_id = error.id pillow_logging.exception( "[%s] Error on change: %s, %s. Logged as: %s" % ( pillow.get_name(), change['id'], exception, error_id ) )
def _handle_pillow_error(self, change, exception): try: # This breaks the module boundary by using a show function defined in commcare-hq # but it was decided that it wasn't worth the effort to maintain the separation. meta = self.get_couch_db().show('domain/domain_date', change['id']) except ResourceNotFound: # Show function does not exist meta = None error = PillowError.get_or_create(change, self, change_meta=meta) error.add_attempt(exception, sys.exc_info()[2]) error.save() pillow_logging.exception( "[%s] Error on change: %s, %s. Logged as: %s" % ( self.get_name(), change['id'], exception, error.id ) )
def test_get_errors_to_process_max_limit(self): # see settings.PILLOW_RETRY_MULTI_ATTEMPTS_CUTOFF date = datetime.utcnow() def make_error(id, current_attempt, total_attempts): error = create_error({'id': id}) error.date_next_attempt = date error.current_attempt = current_attempt error.total_attempts = total_attempts error.save() # current_attempts <= limit, total_attempts <= limit make_error( 'to-process1', settings.PILLOW_RETRY_QUEUE_MAX_PROCESSING_ATTEMPTS, settings.PILLOW_RETRY_MULTI_ATTEMPTS_CUTOFF ) # current_attempts = 0, total_attempts > limit make_error( 'to-process2', 0, settings.PILLOW_RETRY_MULTI_ATTEMPTS_CUTOFF + 1 ) # current_attempts > limit, total_attempts <= limit make_error( 'not-processed1', settings.PILLOW_RETRY_QUEUE_MAX_PROCESSING_ATTEMPTS + 1, settings.PILLOW_RETRY_MULTI_ATTEMPTS_CUTOFF ) # current_attempts <= limit, total_attempts > limit make_error( 'not-processed2', settings.PILLOW_RETRY_QUEUE_MAX_PROCESSING_ATTEMPTS, settings.PILLOW_RETRY_MULTI_ATTEMPTS_CUTOFF + 1 ) errors = PillowError.get_errors_to_process(date, fetch_full=True).all() self.assertEqual(len(errors), 2) docs_to_process = {e.doc_id for e in errors} self.assertEqual({'to-process1', 'to-process2'}, docs_to_process)
def handle_pillow_error(pillow, change, exception): from couchdbkit import ResourceNotFound from pillow_retry.models import PillowError meta = None if hasattr(pillow, 'get_couch_db'): try: meta = pillow.get_couch_db().show('domain_shows/domain_date', change['id']) except ResourceNotFound: pass error = PillowError.get_or_create(change, pillow, change_meta=meta) error.add_attempt(exception, sys.exc_info()[2]) error.save() pillow_logging.exception( "[%s] Error on change: %s, %s. Logged as: %s" % ( pillow.get_name(), change['id'], exception, error.id ) )
def handle_pillow_error(pillow, change, exception): from pillow_retry.models import PillowError pillow_logging.exception("[%s] Error on change: %s, %s" % ( pillow.get_name(), change['id'], exception, )) datadog_counter('commcare.change_feed.changes.exceptions', tags=[ 'pillow_name:{}'.format(pillow.get_name()), ]) # keep track of error attempt count change.increment_attempt_count() # always retry document missing errors, because the error is likely with couch if pillow.retry_errors or isinstance(exception, DocumentMissingError): error = PillowError.get_or_create(change, pillow) error.add_attempt(exception, sys.exc_info()[2], change.metadata) error.save()
def handle(self, **options): compare_url = options.get('url', None) minutes = options.get('minutes', None) deploy = HqDeploy(date=datetime.utcnow(), user=options['user'], environment=options['environment'], diff_url=compare_url, commit=options['commit']) deploy.save() # reset PillowTop errors in the hope that a fix has been deployed rows_updated = PillowError.bulk_reset_attempts(datetime.utcnow()) if rows_updated: print("\n---------------- Pillow Errors Reset ----------------\n" \ "{} pillow errors queued for retry\n".format(rows_updated)) deploy_notification_text = ( "CommCareHQ has been successfully deployed to *{}* by *{}* in *{}* minutes. " .format( options['environment'], options['user'], minutes or '?', )) if options['environment'] == 'production': deploy_notification_text += "Monitor the {dashboard_link}. " if settings.MOBILE_INTEGRATION_TEST_TOKEN: deploy_notification_text += "Check the integration {integration_tests_link}. " requests.get( 'https://jenkins.dimagi.com/job/integration-tests/build', params={'token': settings.MOBILE_INTEGRATION_TEST_TOKEN}, ) requests.get( 'https://jenkins.dimagi.com/job/integration-tests-pipeline/build', params={'token': settings.MOBILE_INTEGRATION_TEST_TOKEN}, ) deploy_notification_text += "Find the diff {diff_link}" if settings.DATADOG_API_KEY: link = diff_link(compare_url) create_metrics_event( title="Deploy Success", text=deploy_notification_text.format( dashboard_link=dashboard_link(DASHBOARD_URL), diff_link=link, integration_tests_link=integration_tests_link( INTEGRATION_TEST_URL)), tags={'environment': options['environment']}, alert_type="success") print( "\n=============================================================\n" "Congratulations! Deploy Complete.\n\n" "Don't forget to keep an eye on the deploy dashboard to " "make sure everything is running smoothly.\n\n" "https://app.datadoghq.com/dashboard/xch-zwt-vzv/hq-deploy-dashboard?tpl_var_environment={}" "\n=============================================================\n" .format(settings.SERVER_ENVIRONMENT))
def _get_items(self, utcnow): errors = PillowError.get_errors_to_process( utcnow=utcnow, ) return (dict(id=e['id'], key=e['date_next_attempt']) for e in errors)
def create_error(change, message='message', attempts=0, pillow=None, ex_class=None): change.metadata = ChangeMeta(data_source_type='couch', data_source_name='test_commcarehq', document_id=change.id) error = PillowError.get_or_create(change, pillow or FakePillow()) for n in range(0, attempts): error.add_attempt(*get_ex_tb(message, ex_class=ex_class)) return error
def handle(self, *args, **options): compare_url = options.get('url', None) minutes = options.get('minutes', None) deploy = HqDeploy(date=datetime.utcnow(), user=options['user'], environment=options['environment'], diff_url=compare_url) deploy.save() # reset PillowTop errors in the hope that a fix has been deployed rows_updated = PillowError.bulk_reset_attempts(datetime.utcnow()) if rows_updated: print "\n---------------- Pillow Errors Reset ----------------\n" \ "{} pillow errors queued for retry\n".format(rows_updated) deploy_notification_text = ( "CommCareHQ has been successfully deployed to *{}* by *{}* in *{}* minutes. " .format( options['environment'], options['user'], minutes or '?', )) if options['environment'] == 'production': deploy_notification_text += "Monitor the {dashboard_link}. " if settings.MOBILE_INTEGRATION_TEST_TOKEN: deploy_notification_text += "Check the integration {integration_tests_link}. " requests.get( 'https://jenkins.dimagi.com/job/integration-tests/build', params={'token': settings.MOBILE_INTEGRATION_TEST_TOKEN}, ) deploy_notification_text += "Find the diff {diff_link}" if hasattr(settings, 'MIA_THE_DEPLOY_BOT_API'): link = diff_link(STYLE_SLACK, compare_url) if options['environment'] == 'staging': channel = '#staging' else: channel = '#hq-ops' requests.post( settings.MIA_THE_DEPLOY_BOT_API, data=json.dumps({ "username": "******", "channel": channel, "text": deploy_notification_text.format( dashboard_link=dashboard_link(STYLE_SLACK, DASHBOARD_URL), diff_link=link, integration_tests_link=integration_tests_link( STYLE_SLACK, INTEGRATION_TEST_URL)), })) if settings.DATADOG_API_KEY: tags = ['environment:{}'.format(options['environment'])] link = diff_link(STYLE_MARKDOWN, compare_url) datadog_api.Event.create( title="Deploy Success", text=deploy_notification_text.format( dashboard_link=dashboard_link(STYLE_MARKDOWN, DASHBOARD_URL), diff_link=link, integration_tests_link=integration_tests_link( STYLE_MARKDOWN, INTEGRATION_TEST_URL)), tags=tags, alert_type="success") print "\n=============================================================\n" \ "Congratulations! Deploy Complete.\n\n" \ "Don't forget to keep an eye on the deploy dashboard to " \ "make sure everything is running smoothly.\n\n" \ "https://p.datadoghq.com/sb/5c4af2ac8-1f739e93ef" \ "\n=============================================================\n" if options['mail_admins']: message_body = get_deploy_email_message_body( environment=options['environment'], user=options['user'], compare_url=compare_url) call_command('mail_admins', message_body, **{ 'subject': 'Deploy successful', 'html': True })
def handle(self, **options): compare_url = options.get('url', None) minutes = options.get('minutes', None) deploy = HqDeploy( date=datetime.utcnow(), user=options['user'], environment=options['environment'], diff_url=compare_url ) deploy.save() # reset PillowTop errors in the hope that a fix has been deployed rows_updated = PillowError.bulk_reset_attempts(datetime.utcnow()) if rows_updated: print("\n---------------- Pillow Errors Reset ----------------\n" \ "{} pillow errors queued for retry\n".format(rows_updated)) deploy_notification_text = ( "CommCareHQ has been successfully deployed to *{}* by *{}* in *{}* minutes. ".format( options['environment'], options['user'], minutes or '?', ) ) if options['environment'] == 'production': deploy_notification_text += "Monitor the {dashboard_link}. " if settings.MOBILE_INTEGRATION_TEST_TOKEN: deploy_notification_text += "Check the integration {integration_tests_link}. " requests.get( 'https://jenkins.dimagi.com/job/integration-tests/build', params={'token': settings.MOBILE_INTEGRATION_TEST_TOKEN}, ) requests.get( 'https://jenkins.dimagi.com/job/integration-tests-pipeline/build', params={'token': settings.MOBILE_INTEGRATION_TEST_TOKEN}, ) deploy_notification_text += "Find the diff {diff_link}" if hasattr(settings, 'MIA_THE_DEPLOY_BOT_API'): link = diff_link(STYLE_SLACK, compare_url) if options['environment'] == 'staging': channel = '#staging' elif options['environment'] == 'icds': channel = '#nic-server-standup' else: channel = '#hq-ops' requests.post(settings.MIA_THE_DEPLOY_BOT_API, data=json.dumps({ "username": "******", "channel": channel, "text": deploy_notification_text.format( dashboard_link=dashboard_link(STYLE_SLACK, DASHBOARD_URL), diff_link=link, integration_tests_link=integration_tests_link(STYLE_SLACK, INTEGRATION_TEST_URL) ), })) if settings.DATADOG_API_KEY: tags = ['environment:{}'.format(options['environment'])] link = diff_link(STYLE_MARKDOWN, compare_url) datadog_api.Event.create( title="Deploy Success", text=deploy_notification_text.format( dashboard_link=dashboard_link(STYLE_MARKDOWN, DASHBOARD_URL), diff_link=link, integration_tests_link=integration_tests_link(STYLE_MARKDOWN, INTEGRATION_TEST_URL) ), tags=tags, alert_type="success" ) print("\n=============================================================\n" \ "Congratulations! Deploy Complete.\n\n" \ "Don't forget to keep an eye on the deploy dashboard to " \ "make sure everything is running smoothly.\n\n" \ "https://p.datadoghq.com/sb/5c4af2ac8-1f739e93ef" \ "\n=============================================================\n") if options['mail_admins']: message_body = get_deploy_email_message_body(user=options['user'], compare_url=compare_url) call_command('mail_admins', message_body, **{'subject': 'Deploy successful', 'html': True}) if settings.DAILY_DEPLOY_EMAIL: recipient = settings.DAILY_DEPLOY_EMAIL subject = 'Deploy Successful - {}'.format(options['environment']) send_HTML_email(subject=subject, recipient=recipient, html_content=message_body) if settings.SENTRY_CONFIGURED and settings.SENTRY_API_KEY: create_update_sentry_release() notify_sentry_deploy(minutes)
def handle(self, **options): compare_url = options.get('url', None) minutes = options.get('minutes', None) deploy = HqDeploy( date=datetime.utcnow(), user=options['user'], environment=options['environment'], diff_url=compare_url ) deploy.save() # reset PillowTop errors in the hope that a fix has been deployed rows_updated = PillowError.bulk_reset_attempts(datetime.utcnow()) if rows_updated: print("\n---------------- Pillow Errors Reset ----------------\n" \ "{} pillow errors queued for retry\n".format(rows_updated)) deploy_notification_text = ( "CommCareHQ has been successfully deployed to *{}* by *{}* in *{}* minutes. ".format( options['environment'], options['user'], minutes or '?', ) ) if options['environment'] == 'production': deploy_notification_text += "Monitor the {dashboard_link}. " if settings.MOBILE_INTEGRATION_TEST_TOKEN: deploy_notification_text += "Check the integration {integration_tests_link}. " requests.get( 'https://jenkins.dimagi.com/job/integration-tests/build', params={'token': settings.MOBILE_INTEGRATION_TEST_TOKEN}, ) requests.get( 'https://jenkins.dimagi.com/job/integration-tests-pipeline/build', params={'token': settings.MOBILE_INTEGRATION_TEST_TOKEN}, ) deploy_notification_text += "Find the diff {diff_link}" if settings.DATADOG_API_KEY: tags = ['environment:{}'.format(options['environment'])] link = diff_link(compare_url) datadog_api.Event.create( title="Deploy Success", text=deploy_notification_text.format( dashboard_link=dashboard_link(DASHBOARD_URL), diff_link=link, integration_tests_link=integration_tests_link(INTEGRATION_TEST_URL) ), tags=tags, alert_type="success" ) print( "\n=============================================================\n" "Congratulations! Deploy Complete.\n\n" "Don't forget to keep an eye on the deploy dashboard to " "make sure everything is running smoothly.\n\n" "https://app.datadoghq.com/dashboard/xch-zwt-vzv/hq-deploy-dashboard?tpl_var_environment={}" "\n=============================================================\n".format( settings.SERVER_ENVIRONMENT ) ) if options['mail_admins']: message_body = get_deploy_email_message_body(user=options['user'], compare_url=compare_url) subject = 'Deploy Successful - {}'.format(options['environment']) call_command('mail_admins', message_body, **{'subject': subject, 'html': True}) if settings.DAILY_DEPLOY_EMAIL: recipient = settings.DAILY_DEPLOY_EMAIL send_HTML_email(subject=subject, recipient=recipient, html_content=message_body) if settings.SENTRY_CONFIGURED and settings.SENTRY_API_KEY: create_update_sentry_release() notify_sentry_deploy(minutes)
def create_error(change, message='message', attempts=0, pillow=None, ex_class=None): error = PillowError.get_or_create(change, pillow or FakePillow()) for n in range(0, attempts): error.add_attempt(*get_ex_tb(message, ex_class=ex_class)) return error
def options(self): return [(p, p) for p in PillowError.get_pillows()]
def options(self): return [(e, e) for e in PillowError.get_error_types()]
def _get_items(self, utcnow): errors = PillowError.get_errors_to_process(utcnow=utcnow, ) return (dict(id=e['id'], key=e['date_next_attempt']) for e in errors)
def _get_items(utcnow): errors = PillowError.get_errors_to_process(utcnow=utcnow, limit=1000) return [ QueueItem(id=e.id, key=e.date_next_attempt, object=e) for e in errors ]
def process_pillow_retry(error_doc_id): # Redis error logged in get_redis_client try: client = cache_core.get_redis_client() except cache_core.RedisClientError: return # Prevent more than one task from processing this error, just in case # it got enqueued twice. lock = client.lock( "pillow-retry-processing-%s" % error_doc_id, timeout=settings.PILLOW_RETRY_PROCESSING_LOCK_TIMEOUT*60 ) if lock.acquire(blocking=False): try: error_doc = PillowError.objects.get(id=error_doc_id) except PillowError.DoesNotExist: release_lock(lock, True) return pillow_name_or_class = error_doc.pillow try: pillow = get_pillow_by_name(pillow_name_or_class) except PillowNotFoundError: if not settings.UNIT_TESTING: _assert = soft_assert(to='@'.join(['czue', 'dimagi.com'])) _assert(False, 'Pillow retry {} is still using legacy class {}'.format( error_doc.pk, pillow_name_or_class )) pillow = _try_legacy_import(pillow_name_or_class) if not pillow: notify_error(( "Could not find pillowtop class '%s' while attempting a retry. " "If this pillow was recently deleted then this will be automatically cleaned up eventually. " "If not, then this should be looked into." ) % pillow_name_or_class) try: error_doc.total_attempts = PillowError.multi_attempts_cutoff() + 1 error_doc.save() finally: release_lock(lock, True) return change = error_doc.change_object if getattr(pillow, 'include_docs', False): try: change.set_document(pillow.get_couch_db().open_doc(change.id)) except ResourceNotFound: change.deleted = True try: try: from corehq.apps.userreports.pillow import ConfigurableReportKafkaPillow if isinstance(pillow, ConfigurableReportKafkaPillow): raise Exception('this is temporarily not supported!') except ImportError: pass pillow.process_change(change, is_retry_attempt=True) except Exception: ex_type, ex_value, ex_tb = sys.exc_info() error_doc.add_attempt(ex_value, ex_tb) error_doc.queued = False error_doc.save() else: error_doc.delete() finally: release_lock(lock, True)
def _get_items(utcnow): errors = PillowError.get_errors_to_process(utcnow=utcnow, limit=1000) return [dict(id=e['id'], key=e['date_next_attempt']) for e in errors]
def _get_items(utcnow): errors = PillowError.get_errors_to_process(utcnow=utcnow, limit=1000) error_pks = [error['id'] for error in errors] PillowError.objects.filter(pk__in=error_pks).update(queued=True) return [dict(id=e['id'], key=e['date_next_attempt']) for e in errors]