def get_canary_failures_since(self, canary_period_start): canary_uploads = UploadEvent.objects.filter( canary=True, created__gte=canary_period_start, ).exclude(status__in=UploadEventStatus.processing_statuses()) canaries = canary_uploads.all() return self.get_failed_canaries(canaries)
def wait_for_complete_deployment(self, timeout): """ Wait up to \a timeout seconds for the deployment to finish to ensure that there are no more running Lambdas before we exit. This is achieved by checking the amount of UploadEvent with a certain status. """ max_time = datetime.now() + timedelta(seconds=30) self.stdout.write("Waiting up to %i seconds" % (timeout)) statuses = UploadEventStatus.processing_statuses() while True: uploads = UploadEvent.objects.filter(status__in=statuses) count = uploads.count() if not count: return if datetime.now() > max_time: self.stderr.write("Waited too long. Exiting.") return self.stdout.write("Found %i uploads... sleeping 3 seconds." % (count)) time.sleep(3)
def wait_for_complete_deployment(self, timeout): """ Wait up to timeout seconds for the deployment to finish to ensure that there are no more running Lambdas before we exit. This is achieved by checking the amount of UploadEvent with a certain status. """ max_time = datetime.now() + timedelta(seconds=30) self.output("Waiting up to %i seconds" % (timeout)) statuses = UploadEventStatus.processing_statuses() while True: uploads = UploadEvent.objects.filter(status__in=statuses) count = uploads.count() if not count: return if datetime.now() > max_time: self.output("Waited too long. Exiting.") return self.output("Found %i uploads... sleeping 3 seconds." % (count)) time.sleep(3)
def handle(self, *args, **options): func_name = self.canary_function_name prod_alias = LAMBDA.get_alias(FunctionName=func_name, Name="PROD") self.log("PROD Alias starting at version: %s" % prod_alias["FunctionVersion"]) canary_alias = LAMBDA.get_alias(FunctionName=func_name, Name="CANARY") self.log("CANARY Alias starting at version: %s" % canary_alias["FunctionVersion"]) # Create new version self.stdout.write("Publishing new version...") new_version = LAMBDA.publish_version(FunctionName=func_name) new_version_num = new_version["Version"] self.log("New version is: %s" % new_version_num) # This causes the new code to start getting used on canary upload events. canary_period_start = now() self.set_canary_version(new_version_num) if options["bypass_canary"]: # We promote all aliases and return immediately. self.log("Bypassing canary stage and promoting immediately") self.set_prod_version(new_version_num) self.log("Finished.") return if is_processing_disabled(): self.log("Processing is disabled so we will not see any canaries") self.log("Bypassing canary stage and promoting immediately") self.set_prod_version(new_version_num) self.log("Finished.") return # If we did not exit already, then we are doing a canary deployment. max_wait_seconds = options["max_wait_seconds"] max_wait_time = now() + timedelta(seconds=max_wait_seconds) min_canary_uploads = options["min_canary_uploads"] wait_for_more_canary_uploads = True try: while wait_for_more_canary_uploads: canary_uploads = UploadEvent.objects.filter( canary=True, created__gte=canary_period_start, ).exclude(status__in=UploadEventStatus.processing_statuses()) if canary_uploads.count() >= min_canary_uploads: wait_for_more_canary_uploads = False elif len(self.get_failed_canaries(canary_uploads.all())): # Exit early since some canaries have already failed wait_for_more_canary_uploads = False else: if now() > max_wait_time: msg = "Waited too long for canary events. Exiting." self.log(msg) raise RuntimeError(msg) self.log("Found %i uploads... sleeping 5 seconds" % (canary_uploads.count())) time.sleep(5) self.log("%s canary upload events have been found" % (canary_uploads.count())) canary_failures = self.get_canary_failures_since( canary_period_start) if canary_failures: # We have canary failures, time to rollback. self.log("The following canary uploads have failed:") self.log(", ".join(u.shortid for u in canary_failures)) raise RuntimeError( "Failed canary events detected. Rolling back.") except Exception: # Revert the canary alias back to what PROD still points to prod_version = prod_alias["FunctionVersion"] self.log("CANARY will be reverted to version: %s" % prod_version) self.set_canary_version(prod_version) self.log("Initiating reprocessing to fix canary uploads") # Query again for all canary failures after reverting so we don't miss any canary_failures = self.get_canary_failures_since( canary_period_start) queue_upload_events_for_reprocessing(canary_failures, use_kinesis=True) self.log("Finished queuing canaries for reprocessing.") sys.exit(1) # We didn't have any canary failures so it's time to promote PROD. self.log("The canary version is a success! Promoting PROD.") self.set_prod_version(new_version_num) self.log("Finished.")
def handle(self, *args, **options): func_name = self.canary_function_name prod_alias = LAMBDA.get_alias(FunctionName=func_name, Name="PROD") self.log("PROD Alias starting at version: %s" % prod_alias["FunctionVersion"]) canary_alias = LAMBDA.get_alias(FunctionName=func_name, Name="CANARY") self.log("CANARY Alias starting at version: %s" % canary_alias["FunctionVersion"]) # Create new version self.stdout.write("Publishing new version...") new_version = LAMBDA.publish_version(FunctionName=func_name) new_version_num = new_version["Version"] self.log("New version is: %s" % new_version_num) # This causes the new code to start getting used on canary upload events. canary_period_start = now() self.set_canary_version(new_version_num) if options["bypass_canary"]: # We promote all aliases and return immediately. self.log("Bypassing canary stage and promoting immediately") self.set_prod_version(new_version_num) self.log("Finished.") return if is_processing_disabled(): self.log("Processing is disabled so we will not see any canaries") self.log("Bypassing canary stage and promoting immediately") self.set_prod_version(new_version_num) self.log("Finished.") return # If we did not exit already, then we are doing a canary deployment. max_wait_seconds = options["max_wait_seconds"] max_wait_time = now() + timedelta(seconds=max_wait_seconds) min_canary_uploads = options["min_canary_uploads"] wait_for_more_canary_uploads = True try: while wait_for_more_canary_uploads: canary_uploads = UploadEvent.objects.filter( canary=True, created__gte=canary_period_start, ).exclude(status__in=UploadEventStatus.processing_statuses()) if canary_uploads.count() >= min_canary_uploads: wait_for_more_canary_uploads = False elif len(self.get_failed_canaries(canary_uploads.all())): # Exit early since some canaries have already failed wait_for_more_canary_uploads = False else: if now() > max_wait_time: msg = "Waited too long for canary events. Exiting." self.log(msg) raise RuntimeError(msg) self.log( "Found %i uploads... sleeping 5 seconds" % (canary_uploads.count(),) ) time.sleep(5) self.log( "%s canary upload events have been found" % canary_uploads.count() ) canary_failures = self.get_canary_failures_since(canary_period_start) if canary_failures: # We have canary failures, time to rollback. self.log("The following canary uploads have failed:") self.log(", ".join(u.shortid for u in canary_failures)) raise RuntimeError("Failed canary events detected. Rolling back.") except Exception: # Revert the canary alias back to what PROD still points to prod_version = prod_alias["FunctionVersion"] self.log("CANARY will be reverted to version: %s" % prod_version) self.set_canary_version(prod_version) self.log("Initiating reprocessing to fix canary uploads") # Query again for all canary failures after reverting so we don't miss any canary_failures = self.get_canary_failures_since(canary_period_start) queue_upload_events_for_reprocessing(canary_failures, use_kinesis=True) self.log("Finished queuing canaries for reprocessing.") sys.exit(1) # We didn't have any canary failures so it's time to promote PROD. self.log("The canary version is a success! Promoting PROD.") self.set_prod_version(new_version_num) self.log("Finished.")