def register_task(self): task_history = TaskHistory() task_history.task_id = datetime.now().strftime("%Y%m%d%H%M%S") task_history.task_name = "switch_masters_in_zone" task_history.relevance = TaskHistory.RELEVANCE_WARNING task_history.task_status = TaskHistory.STATUS_RUNNING task_history.context = {'hostname': gethostname()} task_history.user = '******' task_history.save() return task_history
def test_can_get_waiting_tasks(self): self.task.task_status = TaskHistory.STATUS_WAITING self.task.save() tasks = TaskHistory.waiting_tasks() self.assertIsNotNone(tasks) self.assertIn(self.task, tasks)
def node_zone_migrate_rollback(self, migrate, task): task = TaskHistory.register( request=self.request, task_history=task, user=task.user, worker_name=get_worker_name() ) from tasks_migrate import rollback_node_zone_migrate rollback_node_zone_migrate(migrate, task)
def database_notification(self): LOG.info("retrieving all teams and sending database notification") teams = Team.objects.all() msgs = {} for team in teams: ############################################### # create task ############################################### msgs[team] = analyzing_notification_for_team(team=team) ############################################### try: LOG.info("Messages: ") LOG.info(msgs) worker_name = get_worker_name() task_history = TaskHistory.register( request=self.request, user=None, worker_name=worker_name) task_history.update_status_for(TaskHistory.STATUS_SUCCESS, details="\n".join( str(key) + ': ' + ', '.join(value) for key, value in msgs.items())) except Exception as e: task_history.update_status_for(TaskHistory.STATUS_ERROR, details=e) return
def database_environment_migrate_rollback(self, migrate, task): task = TaskHistory.register( request=self.request, task_history=task, user=task.user, worker_name=get_worker_name() ) from tasks_database_migrate import rollback_database_environment_migrate rollback_database_environment_migrate(migrate, task)
def make_databases_backup(self): LOG.info("Making databases backups") task_history = TaskHistory.register(request=self.request, user=None) msgs = [] status = TaskHistory.STATUS_SUCCESS databaseinfras = DatabaseInfra.objects.filter(plan__provider=Plan.CLOUDSTACK) error = {} for databaseinfra in databaseinfras: instances = Instance.objects.filter(databaseinfra=databaseinfra) for instance in instances: if not instance.databaseinfra.get_driver().check_instance_is_eligible_for_backup(instance): LOG.info('Instance %s is not eligible for backup' % (str(instance))) continue try: if make_instance_snapshot_backup(instance = instance, error = error): msg = "Backup for %s was successful" % (str(instance)) LOG.info(msg) else: status = TaskHistory.STATUS_ERROR msg = "Backup for %s was unsuccessful. Error: %s" % (str(instance), error['errormsg']) LOG.error(msg) print msg except Exception, e: status = TaskHistory.STATUS_ERROR msg = "Backup for %s was unsuccessful. Error: %s" % (str(instance), str(e)) LOG.error(msg) msgs.append(msg)
def remove_database_old_backups(self): worker_name = get_worker_name() task_history = TaskHistory.register( request=self.request, worker_name=worker_name, user=None ) task_history.relevance = TaskHistory.RELEVANCE_WARNING snapshots = [] for env in Environment.objects.all(): snapshots += get_snapshots_by_env(env) msgs = [] status = TaskHistory.STATUS_SUCCESS if len(snapshots) == 0: msgs.append("There is no snapshot to purge") for snapshot in snapshots: try: remove_snapshot_backup(snapshot=snapshot, msgs=msgs) except Exception as e: msg = "Error removing backup {}. Error: {}".format(snapshot, e) status = TaskHistory.STATUS_ERROR LOG.error(msg) msgs.append(msg) task_history.update_status_for(status, details="\n".join(msgs)) return
def remove_database_old_backups(self): task_history = TaskHistory.register(request=self.request, user=None) backup_retention_days = Configuration.get_by_name_as_int('backup_retention_days') LOG.info("Removing backups older than %s days" % (backup_retention_days)) backup_time_dt = date.today() - timedelta(days=backup_retention_days) snapshots = Snapshot.objects.filter(start_at__lte=backup_time_dt, purge_at__isnull = True, instance__isnull = False, snapshopt_id__isnull = False) msgs = [] status = TaskHistory.STATUS_SUCCESS if len(snapshots) == 0: msgs.append("There is no snapshot to purge") for snapshot in snapshots: try: remove_snapshot_backup(snapshot=snapshot) msg = "Backup %s removed" % (snapshot) LOG.info(msg) except: msg = "Error removing backup %s" % (snapshot) status = TaskHistory.STATUS_ERROR LOG.error(msg) msgs.append(msg) task_history.update_status_for(status, details="\n".join(msgs)) return
def test_can_get_running_tasks(self): self.task.task_status = TaskHistory.STATUS_RUNNING self.task.save() tasks = TaskHistory.running_tasks() self.assertIsNotNone(tasks) self.assertIn(self.task, tasks)
def _create_database_rollback(self, rollback_from, task, user): task = TaskHistory.register( request=self.request, task_history=task, user=user, worker_name=get_worker_name() ) from tasks_create_database import rollback_create rollback_create(rollback_from, task, user)
def delete_model(modeladmin, request, obj): LOG.debug("Deleting {}".format(obj)) database = obj if database.is_in_quarantine: if database.plan.provider == database.plan.CLOUDSTACK: LOG.debug( "call destroy_database - name=%s, team=%s, project=%s, user=%s" % ( database.name, database.team, database.project, request.user)) task_history = TaskHistory() task_history.task_name="destroy_database" task_history.task_status= task_history.STATUS_WAITING task_history.arguments="Database name: {}".format(database.name) task_history.user= request.user task_history.save() destroy_database.delay(database=database, task_history=task_history, user=request.user ) url = reverse('admin:notification_taskhistory_changelist') else: database.delete() else: database.delete()
def node_zone_migrate( self, host, zone, new_environment, task, since_step=None, step_manager=None ): task = TaskHistory.register( request=self.request, task_history=task, user=task.user, worker_name=get_worker_name() ) from tasks_migrate import node_zone_migrate node_zone_migrate(host, zone, new_environment, task, since_step, step_manager=step_manager)
def restore_snapshot(self, database, snapshot, user, task_history): from dbaas_nfsaas.models import HostAttr LOG.info("Restoring snapshot") worker_name = get_worker_name() task_history = models.TaskHistory.objects.get(id=task_history) task_history = TaskHistory.register(request=self.request, task_history=task_history, user=user, worker_name=worker_name) databaseinfra = database.databaseinfra snapshot = Snapshot.objects.get(id=snapshot) snapshot_id = snapshot.snapshopt_id host_attr = HostAttr.objects.get(nfsaas_path=snapshot.export_path) host = host_attr.host host_attr = HostAttr.objects.get(host=host, is_active=True) export_id = host_attr.nfsaas_export_id export_path = host_attr.nfsaas_path steps = RESTORE_SNAPSHOT_SINGLE if databaseinfra.plan.is_ha and databaseinfra.engine_name == 'mysql': steps = RESTORE_SNAPSHOT_MYSQL_HA not_primary_instances = databaseinfra.instances.exclude(hostname=host).exclude(instance_type__in=[Instance.MONGODB_ARBITER, Instance.REDIS_SENTINEL]) not_primary_hosts = [ instance.hostname for instance in not_primary_instances] workflow_dict = build_dict(databaseinfra=databaseinfra, database=database, snapshot_id=snapshot_id, export_path=export_path, export_id=export_id, host=host, steps=steps, not_primary_hosts=not_primary_hosts, ) start_workflow(workflow_dict=workflow_dict, task=task_history) if workflow_dict['exceptions']['traceback']: error = "\n".join( ": ".join(err) for err in workflow_dict['exceptions']['error_codes']) traceback = "\nException Traceback\n".join( workflow_dict['exceptions']['traceback']) error = "{}\n{}\n{}".format(error, traceback, error) task_history.update_status_for( TaskHistory.STATUS_ERROR, details=error) else: task_history.update_status_for( TaskHistory.STATUS_SUCCESS, details='Database sucessfully recovered!') return
def restore_database(self, database, task, snapshot, user, retry_from=None): task = TaskHistory.register( request=self.request, task_history=task, user=user, worker_name=get_worker_name() ) from backup.models import Snapshot snapshot = Snapshot.objects.get(id=snapshot) from tasks_restore_backup import restore_snapshot restore_snapshot(database, snapshot.group, task, retry_from)
def databaseregionmigration_view(self, request, databaseregionmigration_id): form = DatabaseRegionMigrationDetailForm database_region_migration = DatabaseRegionMigration.objects.get( id=databaseregionmigration_id) if request.method == 'POST': form = DatabaseRegionMigrationDetailForm(request.POST) if form.is_valid(): scheduled_for = form.cleaned_data['scheduled_for'] database_region_migration_detail = DatabaseRegionMigrationDetail( database_region_migration=database_region_migration, step=database_region_migration.current_step, scheduled_for=scheduled_for, created_by=request.user.username) database_region_migration_detail.save() task_history = TaskHistory() task_history.task_name = "execute_database_region_migration" task_history.task_status = task_history.STATUS_WAITING description = database_region_migration.description() task_history.arguments = "Database name: {},\ Macro step: {}".format(database_region_migration.database.name, description) task_history.user = request.user task_history.save() is_rollback = request.GET.get('rollback') scheduled_for.replace( tzinfo=tz.tzlocal()).astimezone(tz.tzutc()) if is_rollback: LOG.info("Rollback!") database_region_migration_detail.step -= 1 database_region_migration_detail.save() task = execute_database_region_migration_undo.apply_async(args=[database_region_migration_detail.id, task_history, request.user], eta=scheduled_for) else: task = execute_database_region_migration.apply_async(args=[database_region_migration_detail.id, task_history, request.user], eta=scheduled_for) database_region_migration_detail.celery_task_id = task.task_id database_region_migration_detail.save() url = reverse('admin:notification_taskhistory_changelist') return HttpResponseRedirect(url + "?user=%s" % request.user.username) return render_to_response("region_migration/databaseregionmigrationdetail/schedule_next_step.html", locals(), context_instance=RequestContext(request))
def create(self, request): serializer = self.get_serializer(data=request.DATA, files=request.FILES) if serializer.is_valid(): self.pre_save(serializer.object) data = serializer.restore_fields(request.DATA, request.FILES) task_history = TaskHistory() task_history.task_name="create_database" task_history.task_status= task_history.STATUS_PENDING task_history.arguments="Database name: {}".format(data['name']) task_history.save() result = create_database.delay(name=data['name'], plan=data['plan'], environment=data['environment'], team=data['team'], project=data['project'], description=data['description'], task_history=task_history, user=request.user) headers = self.get_success_headers(data) task_url = Site.objects.get_current().domain + '/api/task?task_id=%s' % str(result.id) return Response({"task":task_url}, status=status.HTTP_201_CREATED, headers=headers) return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
def restore_snapshot(self, request, database_id): database = Database.objects.get(id=database_id) url = reverse('admin:logical_database_change', args=[database.id]) if database.is_in_quarantine: self.message_user( request, "Database in quarantine and cannot be restored", level=messages.ERROR) return HttpResponseRedirect(url) if database.status != Database.ALIVE or not database.database_status.is_alive: self.message_user( request, "Database is dead and cannot be restored", level=messages.ERROR) return HttpResponseRedirect(url) if database.is_beeing_used_elsewhere(): self.message_user( request, "Database is beeing used by another task, please check your tasks", level=messages.ERROR) return HttpResponseRedirect(url) if database.has_migration_started(): self.message_user( request, "Database {} cannot be restored because it is beeing migrated.".format(database.name), level=messages.ERROR) url = reverse('admin:logical_database_changelist') return HttpResponseRedirect(url) form = None if request.method == 'POST': form = RestoreDatabaseForm( request.POST, initial={"database_id": database_id},) if form.is_valid(): target_snapshot = request.POST.get('target_snapshot') task_history = TaskHistory() task_history.task_name = "restore_snapshot" task_history.task_status = task_history.STATUS_WAITING task_history.arguments = "Restoring {} to an older version.".format( database.name) task_history.user = request.user task_history.save() Database.recover_snapshot(database=database, snapshot=target_snapshot, user=request.user, task_history=task_history.id) url = reverse('admin:notification_taskhistory_changelist') return HttpResponseRedirect(url + "?user=%s" % request.user.username) else: form = RestoreDatabaseForm(initial={"database_id": database_id, }) return render_to_response("logical/database/restore.html", locals(), context_instance=RequestContext(request))
def set_celery_healthcheck_last_update(self): try: worker_name = get_worker_name() task_history = TaskHistory.register(request=self.request, user=None, worker_name=worker_name) LOG.info("Setting Celery healthcheck last update") CeleryHealthCheck.set_last_update() task_history.update_status_for(TaskHistory.STATUS_SUCCESS, details="Finished") except Exception, e: LOG.warn("Oopss...{}".format(e)) task_history.update_status_for(TaskHistory.STATUS_ERROR, details=e)
def database_environment_migrate( self, database, new_environment, new_offering, task, hosts_zones, since_step=None, step_manager=None ): task = TaskHistory.register( request=self.request, task_history=task, user=task.user, worker_name=get_worker_name() ) from tasks_database_migrate import database_environment_migrate database_environment_migrate( database, new_environment, new_offering, task, hosts_zones, since_step, step_manager=step_manager )
def purge_unused_exports_task(self): from notification.tasks import TaskRegister task = TaskRegister.purge_unused_exports() task = TaskHistory.register( request=self.request, worker_name=get_worker_name(), task_history=task ) task.add_detail('Getting all inactive exports without snapshots') if purge_unused_exports(task): task.set_status_success('Done') else: task.set_status_error('Error')
def create_database( self, name, plan, environment, team, project, description, task, subscribe_to_email_events=True, is_protected=False, user=None, retry_from=None ): task = TaskHistory.register( request=self.request, task_history=task, user=user, worker_name=get_worker_name() ) from tasks_create_database import create_database create_database( name, plan, environment, team, project, description, task, subscribe_to_email_events, is_protected, user, retry_from )
def analyze_databases(self, task_history=None): endpoint, healh_check_route, healh_check_string = get_analyzing_credentials() user = User.objects.get(username='******') worker_name = get_worker_name() task_history = TaskHistory.register(task_history=task_history, request=self.request, user=user, worker_name=worker_name) task_history.update_details(persist=True, details="Loading Process...") AuditRequest.new_request("analyze_databases", user, "localhost") try: analyze_service = AnalyzeService(endpoint, healh_check_route, healh_check_string) with transaction.atomic(): databases = Database.objects.filter(is_in_quarantine=False) today = datetime.now() for database in databases: database_name, engine, instances, environment_name, databaseinfra_name = setup_database_info(database) for execution_plan in ExecutionPlan.objects.all(): if database_can_not_be_resized(database, execution_plan): continue params = execution_plan.setup_execution_params() result = analyze_service.run(engine=engine, database=database_name, instances=instances, **params) if result['status'] == 'success': task_history.update_details(persist=True, details="\nDatabase {} {} was analised.".format(database, execution_plan.plan_name)) if result['msg'] != instances: continue for instance in result['msg']: insert_analyze_repository_record(today, database_name, instance, engine, databaseinfra_name, environment_name, execution_plan) else: raise Exception("Check your service logs..") task_history.update_status_for(TaskHistory.STATUS_SUCCESS, details='Analisys ok!') except Exception: try: task_history.update_details(persist=True, details="\nDatabase {} {} could not be analised.".format(database, execution_plan.plan_name)) task_history.update_status_for(TaskHistory.STATUS_ERROR, details='Analisys finished with errors!\nError: {}'.format(result['msg'])) except UnboundLocalError: task_history.update_details(persist=True, details="\nProccess crashed") task_history.update_status_for(TaskHistory.STATUS_ERROR, details='Analisys could not be started') finally: AuditRequest.cleanup_request()
def remove_database_backup(self, task, snapshot): worker_name = get_worker_name() task_history = TaskHistory.register( request=self.request, worker_name=worker_name, task_history=task ) task_history.add_detail('Removing {}'.format(snapshot)) try: remove_snapshot_backup(snapshot, force=1) except Exception as e: task_history.add_detail('Error: {}'.format(e)) task.set_status_error('Could not delete backup') return False else: task.set_status_success('Backup deleted with success') return True
def __init__(self): super(Command, self).__init__() self.task = TaskHistory() self.task.task_id = datetime.datetime.now().strftime("%Y%m%d%H%M%S") self.task.task_name = "sync_celery_tasks" self.task.relevance = TaskHistory.RELEVANCE_WARNING self.task.task_status = TaskHistory.STATUS_RUNNING self.task.context = {'hostname': socket.gethostname()} self.task.save() self.task.add_detail('Syncing metadata tasks with celery tasks') self.unique_tasks = [{ 'name': 'backup.tasks.make_databases_backup', 'unique_key': 'makedatabasebackupkey' }] self._redis_conn = None
def resize(cls, database, cloudstackpack, user): from notification.tasks import resize_database from notification.models import TaskHistory task_history = TaskHistory() task_history.task_name = "resize_database" task_history.task_status = task_history.STATUS_WAITING task_history.arguments = "Database name: {}".format(database.name) task_history.user = user task_history.save() resize_database.delay(database=database, cloudstackpack=cloudstackpack, user=user, task_history=task_history )
def delete_model(modeladmin, request, obj): LOG.debug("Deleting {}".format(obj)) database = obj if database.status != Database.ALIVE or not database.database_status.is_alive: modeladmin.message_user( request, "Database {} is not alive and cannot be deleted".format(database.name), level=messages.ERROR) url = reverse('admin:logical_database_changelist') return HttpResponseRedirect(url) if database.is_beeing_used_elsewhere(): modeladmin.message_user( request, "Database {} cannot be deleted because it is in use by another task.".format(database.name), level=messages.ERROR) url = reverse('admin:logical_database_changelist') return HttpResponseRedirect(url) if database.has_migration_started(): modeladmin.message_user( request, "Database {} cannot be deleted because it is beeing migrated.".format(database.name), level=messages.ERROR) url = reverse('admin:logical_database_changelist') return HttpResponseRedirect(url) if database.is_in_quarantine: if database.plan.provider == database.plan.CLOUDSTACK: LOG.debug( "call destroy_database - name=%s, team=%s, project=%s, user=%s" % ( database.name, database.team, database.project, request.user)) task_history = TaskHistory() task_history.task_name = "destroy_database" task_history.task_status = task_history.STATUS_WAITING task_history.arguments = "Database name: {}".format( database.name) task_history.user = request.user task_history.save() destroy_database.delay(database=database, task_history=task_history, user=request.user ) url = reverse('admin:notification_taskhistory_changelist') else: database.delete() else: database.delete()
def post(self, request, *args, **kwargs): from dbaas_services.analyzing.tasks import analyze_databases task_history = TaskHistory() task_history.task_name = "analyze_databases" task_history.task_status = task_history.STATUS_WAITING task_history.arguments = "Waiting to start" task_history.save() analyze_databases.delay(task_history=task_history) url = reverse('admin:notification_taskhistory_changelist') return HttpResponseRedirect(url)
def purge_quarantine(self,): user = AccountUser.objects.get(username='******') AuditRequest.new_request("purge_quarantine", user, "localhost") try: task_history = TaskHistory.register(request=self.request, user=user) task_history.relevance = TaskHistory.RELEVANCE_WARNING LOG.info( "id: {} | task: {} | kwargs: {} | args: {}".format( self.request.id, self.request.task, self.request.kwargs, str(self.request.args) ) ) quarantine_time = Configuration.get_by_name_as_int( 'quarantine_retention_days' ) quarantine_time_dt = date.today() - timedelta(days=quarantine_time) task_history.add_detail( "Quarantine date older than {}".format(quarantine_time_dt) ) databases = Database.objects.filter( is_in_quarantine=True, quarantine_dt__lte=quarantine_time_dt ) task_history.add_detail( "Databases to purge: {}".format(len(databases)) ) for database in databases: task_history.add_detail('Deleting {}...'.format(database), level=2) database.destroy(user) task_history.update_status_for( TaskHistory.STATUS_SUCCESS, details='Listed databases were destroyed successfully.' ) return except Exception as e: task_history.update_status_for( TaskHistory.STATUS_ERROR, details="Error\n{}".format(e)) return finally: AuditRequest.cleanup_request()
def clone(cls, database, clone_name, plan, environment, user): from notification.tasks import clone_database from notification.models import TaskHistory task_history = TaskHistory() task_history.task_name = "clone_database" task_history.task_status = task_history.STATUS_WAITING task_history.arguments = "Database name: {}".format(database.name) task_history.user = user task_history.save() clone_database.delay(origin_database=database, clone_name=clone_name, plan=plan, environment=environment, user=user, task_history=task_history )
def make_database_backup(self, database, task): worker_name = get_worker_name() task_history = TaskHistory.register( request=self.request, worker_name=worker_name, task_history=task ) if not database.pin_task(task): task.error_in_lock(database) return False task_history.add_detail('Starting database {} backup'.format(database)) instances = _get_backup_instance(database, task) if not instances: task.set_status_error('Could not find eligible instances', database) return False _check_snapshot_limit(instances, task) group = BackupGroup() group.save() has_warning = False for instance in instances: snapshot = _create_database_backup(instance, task, group) if not snapshot: task.set_status_error( 'Backup was unsuccessful in {}'.format(instance), database ) return False snapshot.is_automatic = False snapshot.save() if not has_warning: has_warning = snapshot.has_warning if has_warning: task.set_status_warning('Backup was warning', database) else: task.set_status_success('Backup was successful', database) return True
def register_task_history(self, task): return TaskHistory.register(request=self.request, task_history=task, user=task.user, worker_name=get_worker_name())
def mongodb_engine_version_upgrade(self, request, database_id): from notification.tasks import upgrade_mongodb_24_to_30 url = reverse('admin:logical_database_change', args=[database_id]) database = Database.objects.get(id=database_id) if database.is_in_quarantine: self.message_user(request, "Database in quarantine and cannot be upgraded!", level=messages.ERROR) return HttpResponseRedirect(url) if database.status != Database.ALIVE or not database.database_status.is_alive: self.message_user(request, "Database is dead and cannot be upgraded!", level=messages.ERROR) return HttpResponseRedirect(url) if database.is_being_used_elsewhere(): self.message_user(request, "Database is being used by another task, " "please check your tasks", level=messages.ERROR) return HttpResponseRedirect(url) if database.has_flipperfox_migration_started(): self.message_user( request, "Database {} is being migrated and cannot be upgraded!".format( database.name), level=messages.ERROR) return HttpResponseRedirect(url) if not database.is_mongodb_24: self.message_user( request, "Database {} cannot be upgraded. Please contact you DBA". format(database.name), level=messages.ERROR) return HttpResponseRedirect(url) if not request.user.has_perm(constants.PERM_UPGRADE_MONGO24_TO_30): self.message_user( request, "You have no permissions to upgrade {}. Please, contact your DBA" .format(database.name), level=messages.ERROR) return HttpResponseRedirect(url) task_history = TaskHistory() task_history.task_name = "upgrade_mongodb_24_to_30" task_history.task_status = task_history.STATUS_WAITING task_history.arguments = "Upgrading MongoDB 2.4 to 3.0" task_history.user = request.user task_history.save() upgrade_mongodb_24_to_30.delay(database=database, user=request.user, task_history=task_history) url = reverse('admin:notification_taskhistory_changelist') return HttpResponseRedirect(url)
def execute_scheduled_maintenance(self, maintenance_id): LOG.debug("Maintenance id: {}".format(maintenance_id)) maintenance = models.Maintenance.objects.get(id=maintenance_id) models.Maintenance.objects.filter(id=maintenance_id, ).update( status=maintenance.RUNNING, started_at=datetime.now()) LOG.info("Maintenance {} is RUNNING".format(maintenance, )) worker_name = get_worker_name() task_history = TaskHistory.register(request=self.request, worker_name=worker_name) LOG.info("id: %s | task: %s | kwargs: %s | args: %s" % (self.request.id, self.request.task, self.request.kwargs, str(self.request.args))) task_history.update_details( persist=True, details="Executing Maintenance: {}".format(maintenance)) for hm in models.HostMaintenance.objects.filter(maintenance=maintenance): main_output = {} hm.status = hm.RUNNING hm.started_at = datetime.now() hm.save() if hm.host is None: hm.status = hm.UNAVAILABLEHOST hm.finished_at = datetime.now() hm.save() continue host = hm.host update_task = "\nRunning Maintenance on {}".format(host) try: cloudstack_host_attributes = host.cs_host_attributes.get() except ObjectDoesNotExist as e: LOG.warn("Host {} does not have cloudstack attrs...{}".format( hm.host, e)) hm.status = hm.UNAVAILABLECSHOSTATTR hm.finished_at = datetime.now() hm.save() continue param_dict = {} for param in models.MaintenanceParameters.objects.filter( maintenance=maintenance): param_function = _get_function(param.function_name) param_dict[param.parameter_name] = param_function(host.id) main_script = build_context_script(param_dict, maintenance.main_script) exit_status = exec_remote_command( server=host.address, username=cloudstack_host_attributes.vm_user, password=cloudstack_host_attributes.vm_password, command=main_script, output=main_output) if exit_status == 0: hm.status = hm.SUCCESS else: if maintenance.rollback_script: rollback_output = {} hm.status = hm.ROLLBACK hm.save() rollback_script = build_context_script( param_dict, maintenance.rollback_script) exit_status = exec_remote_command( server=host.address, username=cloudstack_host_attributes.vm_user, password=cloudstack_host_attributes.vm_password, command=rollback_script, output=rollback_output) if exit_status == 0: hm.status = hm.ROLLBACK_SUCCESS else: hm.status = hm.ROLLBACK_ERROR hm.rollback_log = get_dict_lines(rollback_output) else: hm.status = hm.ERROR update_task += "...status: {}".format(hm.status) task_history.update_details(persist=True, details=update_task) hm.main_log = get_dict_lines(main_output) hm.finished_at = datetime.now() hm.save() models.Maintenance.objects.filter(id=maintenance_id, ).update( status=maintenance.FINISHED, finished_at=datetime.now()) task_history.update_status_for(TaskHistory.STATUS_SUCCESS, details='Maintenance executed succesfully') LOG.info("Maintenance: {} has FINISHED".format(maintenance, ))
def restore_snapshot(self, database, snapshot, user, task_history): try: from dbaas_nfsaas.models import HostAttr LOG.info("Restoring snapshot") worker_name = get_worker_name() # task_history = models.TaskHistory.objects.get(id=task_history) task_history = TaskHistory.register(request=self.request, task_history=task_history, user=user, worker_name=worker_name) databaseinfra = database.databaseinfra snapshot = Snapshot.objects.get(id=snapshot) snapshot_id = snapshot.snapshopt_id host_attr_snapshot = HostAttr.objects.get( nfsaas_path=snapshot.export_path) host = host_attr_snapshot.host host_attr = HostAttr.objects.get(host=host, is_active=True) export_id_snapshot = host_attr_snapshot.nfsaas_export_id export_id = host_attr.nfsaas_export_id export_path = host_attr.nfsaas_path steps = get_restore_snapshot_settings( database.plan.replication_topology.class_path) not_primary_instances = databaseinfra.instances.exclude( hostname=host).exclude(instance_type__in=[ Instance.MONGODB_ARBITER, Instance.REDIS_SENTINEL ]) not_primary_hosts = [ arbiter.hostname for arbiter in databaseinfra.instances.filter( instance_type=Instance.MONGODB_ARBITER) ] for instance in not_primary_instances: not_primary_hosts.append(instance.hostname) tasks.disable_zabbix_alarms(database) workflow_dict = build_dict( databaseinfra=databaseinfra, database=database, snapshot_id=snapshot_id, export_path=export_path, export_id=export_id, export_id_snapshot=export_id_snapshot, host=host, steps=steps, not_primary_hosts=not_primary_hosts, ) start_workflow(workflow_dict=workflow_dict, task=task_history) if workflow_dict['exceptions']['traceback']: raise Exception('Restore could not be finished') else: task_history.update_status_for( TaskHistory.STATUS_SUCCESS, details='Database sucessfully recovered!') except Exception, e: if 'workflow_dict' in locals(): error = "\n".join( ": ".join(err) for err in workflow_dict['exceptions']['error_codes']) traceback = "\nException Traceback\n".join( workflow_dict['exceptions']['traceback']) error = "{}\n{}\n{}".format(error, traceback, error) else: error = str(e) task_history.update_status_for(TaskHistory.STATUS_ERROR, details=error)
def execute_database_flipperfox_migration(self, database_flipperfox_migration_detail_id, task_history=None, user=None): AuditRequest.new_request( "execute_database_flipperfox_migration", user, "localhost") try: if task_history: arguments = task_history.arguments else: arguments = None task_history = TaskHistory.register(request=self.request, task_history=task_history, user=user, worker_name=get_worker_name()) if arguments: task_history.arguments = arguments task_history.save() database_flipperfox_migration_detail = DatabaseFlipperFoxMigrationDetail.objects.get( id=database_flipperfox_migration_detail_id) database_flipperfox_migration_detail.started_at = datetime.now() database_flipperfox_migration_detail.status = database_flipperfox_migration_detail.RUNNING database_flipperfox_migration_detail.save() database_flipperfox_migration = database_flipperfox_migration_detail.database_flipperfox_migration database = database_flipperfox_migration.database databaseinfra = database.databaseinfra steps = get_flipeerfox_migration_steps() workflow_steps = steps[ database_flipperfox_migration_detail.step].step_classes source_instances = [] source_hosts = [] for instance in Instance.objects.filter(databaseinfra=databaseinfra): if database_flipperfox_migration.current_step > 0 and not instance.future_instance: continue source_instances.append(instance) if instance.instance_type != instance.REDIS: source_hosts.append(instance.hostname) source_plan = databaseinfra.plan target_plan = source_plan.flipperfox_equivalent_plan offering = databaseinfra.cs_dbinfra_offering.get().offering source_secondary_ips = [] for secondary_ip in DatabaseInfraAttr.objects.filter(databaseinfra=databaseinfra): source_secondary_ips.append(secondary_ip) workflow_dict = build_dict( databaseinfra=databaseinfra, environment=databaseinfra.environment, database=database, steps=workflow_steps, source_instances=source_instances, source_hosts=source_hosts, source_plan=source_plan, target_plan=target_plan, offering=offering, source_secondary_ips=source_secondary_ips, ) start_workflow(workflow_dict=workflow_dict, task=task_history) if workflow_dict['created'] == False: if 'exceptions' in workflow_dict: error = "\n".join( ": ".join(err) for err in workflow_dict['exceptions']['error_codes']) traceback = "\nException Traceback\n".join( workflow_dict['exceptions']['traceback']) error = "{}\n{}\n{}".format(error, traceback, error) else: error = "There is not any infra-structure to allocate this database." database_flipperfox_migration_detail.status = database_flipperfox_migration_detail.ROLLBACK database_flipperfox_migration_detail.finished_at = datetime.now() database_flipperfox_migration_detail.save() task_history.update_status_for( TaskHistory.STATUS_ERROR, details=error) return else: database_flipperfox_migration_detail.status = database_flipperfox_migration_detail.SUCCESS database_flipperfox_migration_detail.finished_at = datetime.now() database_flipperfox_migration_detail.save() current_step = database_flipperfox_migration.current_step database_flipperfox_migration.current_step = current_step + 1 database_flipperfox_migration.save() task_history.update_status_for( TaskHistory.STATUS_SUCCESS, details='Database flipper fox migration was succesfully') return except Exception as e: traceback = full_stack() LOG.error("Ops... something went wrong: %s" % e) LOG.error(traceback) database_flipperfox_migration_detail.status = database_flipperfox_migration_detail.ROLLBACK database_flipperfox_migration_detail.finished_at = datetime.now() database_flipperfox_migration_detail.save() task_history.update_status_for( TaskHistory.STATUS_ERROR, details=traceback) return finally: AuditRequest.cleanup_request() pass
class Command(BaseCommand): help = "Check if all Tasks with status running are in celery" option_list = BaseCommand.option_list + (make_option( "-n", "--celery_hosts", dest="celery_hosts", help="Number of celery hosts", type="int", ), ) def __init__(self): super(Command, self).__init__() self.task = TaskHistory() self.task.task_id = datetime.datetime.now().strftime("%Y%m%d%H%M%S") self.task.task_name = "sync_celery_tasks" self.task.task_status = TaskHistory.STATUS_RUNNING self.task.context = {'hostname': socket.gethostname()} self.task.save() self.task.add_detail('Syncing metadata tasks with celery tasks') def handle(self, *args, **kwargs): self.task.arguments = {'args': args, 'kwargs': kwargs} if not kwargs['celery_hosts']: raise CommandError("Please specified the --celery_hosts count") try: tasks_with_problem = self.check_tasks(kwargs['celery_hosts']) except CeleryActivesNodeError as celery_error: self.task.update_status_for( TaskHistory.STATUS_WARNING, 'Could not check celery tasks.\n{}{}'.format( full_stack(), celery_error)) return except Exception as e: self.task.update_status_for( TaskHistory.STATUS_ERROR, 'Could not execute task.\n{}{}'.format(full_stack(), e)) return problems = len(tasks_with_problem) status = TaskHistory.STATUS_SUCCESS if problems > 0: status = TaskHistory.STATUS_WARNING self.task.update_status_for(status, 'Problems: {}'.format(problems)) def check_tasks(self, celery_hosts): tasks_running = TaskHistory.objects.filter( task_status=TaskHistory.STATUS_RUNNING).exclude(id=self.task.id) self.task.add_detail("\nTasks with status running: {}\n".format( len(tasks_running))) celery_tasks = self.get_celery_active_tasks(celery_hosts) self.task.add_detail("Celery running: {}\n".format(len(celery_tasks))) tasks_with_problem = [] self.task.add_detail("Checking tasks status") for task in tasks_running: self.task.add_detail("{} - {}".format(task.task_id, task.task_name), level=1) task = TaskHistory.objects.get(id=task.id) if task.is_running and task.task_id in celery_tasks: self.task.add_detail("OK: Running in celery", level=2) continue tasks_with_problem.append(task) self.task.add_detail("ERROR: Not running in celery", level=2) self.task.add_detail("Setting task to ERROR status", level=3) task.update_status_for(status=TaskHistory.STATUS_ERROR, details="Celery is not running this task") database_upgrade = task.database_upgrades.first() if database_upgrade: self.task.add_detail( "Setting database upgrade {} status to ERROR".format( database_upgrade.id), level=3) database_upgrade.set_error() database_resize = task.database_resizes.first() if database_resize: self.task.add_detail( "Setting database resize {} status to ERROR".format( database_resize.id), level=3) database_resize.set_error() return tasks_with_problem def get_celery_active_tasks(self, expected_hosts): self.task.add_detail('Collecting celery tasks...') actives = app.control.inspect().active() activated_hosts = [] if actives: activated_hosts = actives.keys() if len(activated_hosts) != expected_hosts: raise CeleryActivesNodeError(expected_hosts, activated_hosts) active_tasks = [] for host, tasks in actives.items(): self.task.add_detail('Host {} tasks:'.format(host), level=1) for task in tasks: task_id = task['id'] self.task.add_detail('{}'.format(task_id), level=2) active_tasks.append(task_id) return active_tasks
def update_ssl(self, database, task, since_step=None, step_manager=None, scheduled_task=None, auto_rollback=False): from maintenance.models import UpdateSsl task = TaskHistory.register(request=self.request, task_history=task, user=task.user, worker_name=get_worker_name()) if step_manager: step_manager = step_manager step_manager.id = None step_manager.started_at = None since_step = step_manager.current_step else: retry_from = UpdateSsl.objects.filter(can_do_retry=True, database=database, status=UpdateSsl.ERROR).last() step_manager = UpdateSsl() if retry_from: step_manager.current_step = retry_from.current_step since_step = retry_from.current_step step_manager.task_schedule = retry_from.task_schedule step_manager.database = database step_manager.task = task if scheduled_task: step_manager.task_schedule = scheduled_task step_manager.set_running() step_manager.save() steps = database.databaseinfra.update_ssl_steps() instances = database.infra.get_driver().get_database_instances() result = steps_for_instances(steps, instances, task, step_manager.update_step, since_step, step_manager=step_manager) step_manager = UpdateSsl.objects.get(id=step_manager.id) if result: step_manager.set_success() task.set_status_success('SSL Update with success') else: step_manager.set_error() task.set_status_error('Could not update SSL') if auto_rollback: from workflow.workflow import rollback_for_instances_full new_task = task new_task.id = None new_task.details = '' new_task.task_name += '_rollback' new_task.task_status = new_task.STATUS_RUNNING new_task.save() rollback_step_manager = step_manager rollback_step_manager.id = None rollback_step_manager.task_schedule = None rollback_step_manager.can_do_retry = 0 rollback_step_manager.save() result = rollback_for_instances_full( steps, instances, new_task, rollback_step_manager.get_current_step, rollback_step_manager.update_step, ) if result: rollback_step_manager.set_success() task.set_status_success('Rollback SSL Update with success') else: if hasattr(rollback_step_manager, 'cleanup'): rollback_step_manager.cleanup(instances) rollback_step_manager.set_error() task.set_status_error('Could not rollback update SSL')
def databaseflipperfoxmigration_view(self, request, databaseflipperfoxmigration_id): form = DatabaseFlipperFoxMigrationDetailForm database_flipperfox_migration = DatabaseFlipperFoxMigration.objects.get( id=databaseflipperfoxmigration_id) if request.method == 'POST': form = DatabaseFlipperFoxMigrationDetailForm(request.POST) if form.is_valid(): scheduled_for = form.cleaned_data['scheduled_for'] database_flipperfox_migration_detail = DatabaseFlipperFoxMigrationDetail( database_flipperfox_migration=database_flipperfox_migration, step=database_flipperfox_migration.current_step, scheduled_for=scheduled_for, created_by=request.user.username) database_flipperfox_migration_detail.save() task_history = TaskHistory() task_history.task_name = "execute_database_flipperfox_migration" task_history.task_status = task_history.STATUS_WAITING description = database_flipperfox_migration.description() task_history.arguments = "Database name: {},\ Macro step: {}".format( database_flipperfox_migration.database.name, description) task_history.user = request.user task_history.save() is_rollback = request.GET.get('rollback') scheduled_for.replace(tzinfo=tz.tzlocal()).astimezone( tz.tzutc()) if is_rollback: LOG.info("Rollback!") database_flipperfox_migration_detail.step -= 1 database_flipperfox_migration_detail.save() task = execute_database_flipperfox_migration_undo.apply_async( args=[ database_flipperfox_migration_detail.id, task_history, request.user ], eta=scheduled_for) else: task = execute_database_flipperfox_migration.apply_async( args=[ database_flipperfox_migration_detail.id, task_history, request.user ], eta=scheduled_for) database_flipperfox_migration_detail.celery_task_id = task.task_id database_flipperfox_migration_detail.save() url = reverse('admin:notification_taskhistory_changelist') return HttpResponseRedirect(url + "?user=%s" % request.user.username) return render_to_response( "flipperfox_migration/databaseflipperfoxmigrationdetail/schedule_next_step.html", locals(), context_instance=RequestContext(request))
def execute_database_flipperfox_migration_undo(self, database_flipperfox_migration_detail_id, task_history=None, user=None): AuditRequest.new_request( "execute_database_flipperfox_migration", user, "localhost") try: if task_history: arguments = task_history.arguments else: arguments = None task_history = TaskHistory.register(request=self.request, task_history=task_history, user=user, worker_name=get_worker_name()) if arguments: task_history.arguments = arguments task_history.save() database_flipperfox_migration_detail = DatabaseFlipperFoxMigrationDetail.objects.get( id=database_flipperfox_migration_detail_id) database_flipperfox_migration_detail.started_at = datetime.now() database_flipperfox_migration_detail.status = database_flipperfox_migration_detail.RUNNING database_flipperfox_migration_detail.is_migration_up = False database_flipperfox_migration_detail.save() database_flipperfox_migration = database_flipperfox_migration_detail.database_flipperfox_migration database = database_flipperfox_migration.database databaseinfra = database.databaseinfra steps = get_flipeerfox_migration_steps() workflow_steps = steps[ database_flipperfox_migration_detail.step].step_classes source_instances = [] source_hosts = [] for instance in databaseinfra.instances.filter(future_instance__isnull=False): source_instances.append(instance) if instance.instance_type != instance.REDIS: source_hosts.append(instance.hostname) target_instances = [] target_hosts = [] for instance in databaseinfra.instances.filter(future_instance__isnull=True): target_instances.append(instance) if instance.instance_type != instance.REDIS: target_hosts.append(instance.hostname) source_plan = databaseinfra.plan target_plan = source_plan.flipperfox_equivalent_plan if not source_hosts: raise Exception('There is no source host') if not source_instances: raise Exception('There is no source instance') if not target_hosts: raise Exception('There is no target host') if not target_instances: raise Exception('There is no target instance') source_secondary_ips = [] for secondary_ip in DatabaseInfraAttr.objects.filter(databaseinfra=databaseinfra): source_secondary_ips.append(secondary_ip) workflow_dict = build_dict(database_flipperfox_migration_detail=database_flipperfox_migration_detail, database_flipperfox_migration=database_flipperfox_migration, database=database, databaseinfra=databaseinfra, environment=databaseinfra.environment, steps=workflow_steps, source_instances=source_instances, source_plan=source_plan, target_plan=target_plan, source_hosts=source_hosts, target_instances=target_instances, target_hosts=target_hosts, source_secondary_ips=source_secondary_ips, ) stop_workflow(workflow_dict=workflow_dict, task=task_history) current_step = database_flipperfox_migration.current_step database_flipperfox_migration.current_step = current_step - 1 database_flipperfox_migration.save() database_flipperfox_migration_detail.status = database_flipperfox_migration_detail.SUCCESS database_flipperfox_migration_detail.finished_at = datetime.now() database_flipperfox_migration_detail.save() task_history.update_status_for( TaskHistory.STATUS_SUCCESS, details='Database flipper fox migration was succesfully') except Exception as e: traceback = full_stack() LOG.error("Ops... something went wrong: %s" % e) LOG.error(traceback) task_history.update_status_for( TaskHistory.STATUS_ERROR, details=traceback) database_flipperfox_migration_detail.status = database_flipperfox_migration_detail.ERROR database_flipperfox_migration_detail.finished_at = datetime.now() database_flipperfox_migration_detail.save() return finally: AuditRequest.cleanup_request()
def execute_scheduled_maintenance(self, maintenance_id): LOG.debug("Maintenance id: {}".format(maintenance_id)) maintenance = models.Maintenance.objects.get(id=maintenance_id) models.Maintenance.objects.filter(id=maintenance_id).update( status=maintenance.RUNNING, started_at=datetime.now() ) LOG.info("Maintenance {} is RUNNING".format(maintenance,)) worker_name = get_worker_name() task_history = TaskHistory.register( request=self.request, worker_name=worker_name ) LOG.info("id: {} | task: {} | kwargs: {} | args: {}".format( self.request.id, self.request.task, self.request.kwargs, str(self.request.args) )) task_history.update_details( persist=True, details="Executing Maintenance: {}".format(maintenance) ) for hm in models.HostMaintenance.objects.filter(maintenance=maintenance): main_output = {} hm.status = hm.RUNNING hm.started_at = datetime.now() hm.save() if hm.host is None: hm.status = hm.UNAVAILABLEHOST hm.finished_at = datetime.now() hm.save() continue host = hm.host update_task = "\nRunning Maintenance on {}".format(host) if maintenance.disable_alarms: disable_alarms(hm.host) param_dict = {} params = models.MaintenanceParameters.objects.filter( maintenance=maintenance ) for param in params: param_function = get_function(param.function_name) param_dict[param.parameter_name] = param_function(host.id) main_script = build_context_script(param_dict, maintenance.main_script) exit_status = exec_remote_command_host(host, main_script, main_output) if exit_status == 0: hm.status = hm.SUCCESS else: if maintenance.rollback_script: rollback_output = {} hm.status = hm.ROLLBACK hm.save() rollback_script = build_context_script( param_dict, maintenance.rollback_script ) exit_status = exec_remote_command_host( host, rollback_script, rollback_output ) if exit_status == 0: hm.status = hm.ROLLBACK_SUCCESS else: hm.status = hm.ROLLBACK_ERROR hm.rollback_log = get_dict_lines(rollback_output) else: hm.status = hm.ERROR if maintenance.disable_alarms: enable_alarms(hm.host) update_task += "...status: {}".format(hm.status) task_history.update_details(persist=True, details=update_task) hm.main_log = get_dict_lines(main_output) hm.finished_at = datetime.now() hm.save() models.Maintenance.objects.filter(id=maintenance_id).update( status=maintenance.FINISHED, finished_at=datetime.now() ) task_history.update_status_for( TaskHistory.STATUS_SUCCESS, details='Maintenance executed succesfully' ) LOG.info("Maintenance: {} has FINISHED".format(maintenance))
def make_databases_backup(self): LOG.info("Making databases backups") worker_name = get_worker_name() task_history = TaskHistory.register(request=self.request, worker_name=worker_name, user=None) task_history.relevance = TaskHistory.RELEVANCE_ERROR waiting_msg = "\nWaiting 5 minutes to start the next backup group" status = TaskHistory.STATUS_SUCCESS environments = Environment.objects.all() prod_envs = Configuration.get_by_name_as_list('prod_envs') dev_envs = Configuration.get_by_name_as_list('dev_envs') env_names_order = prod_envs + dev_envs if not env_names_order: env_names_order = [env.name for env in environments] infras = DatabaseInfra.objects.filter(plan__has_persistence=True) for env_name in env_names_order: try: env = environments.get(name=env_name) except Environment.DoesNotExist: continue msg = '\nStarting Backup for env {}'.format(env.name) task_history.update_details(persist=True, details=msg) databaseinfras_by_env = infras.filter(environment=env) error = {} backup_number = 0 backups_per_group = len(infras) / 12 for infra in databaseinfras_by_env: if not infra.databases.first(): continue if backups_per_group > 0: if backup_number < backups_per_group: backup_number += 1 else: backup_number = 0 task_history.update_details(waiting_msg, True) sleep(300) group = BackupGroup() group.save() for instance in infra.instances.filter(read_only=False): try: driver = instance.databaseinfra.get_driver() is_eligible = driver.check_instance_is_eligible_for_backup( instance) if not is_eligible: LOG.info( 'Instance {} is not eligible for backup'.format( instance)) continue except Exception as e: status = TaskHistory.STATUS_ERROR msg = "Backup for %s was unsuccessful. Error: %s" % ( str(instance), str(e)) LOG.error(msg) time_now = str(strftime("%m/%d/%Y %H:%M:%S")) start_msg = "\n{} - Starting backup for {} ...".format( time_now, instance) task_history.update_details(persist=True, details=start_msg) try: snapshot = make_instance_snapshot_backup(instance=instance, error=error, group=group) if snapshot and snapshot.was_successful: msg = "Backup for %s was successful" % (str(instance)) LOG.info(msg) elif snapshot and snapshot.has_warning: status = TaskHistory.STATUS_WARNING msg = "Backup for %s has warning" % (str(instance)) LOG.info(msg) else: status = TaskHistory.STATUS_ERROR msg = "Backup for %s was unsuccessful. Error: %s" % ( str(instance), error['errormsg']) LOG.error(msg) LOG.info(msg) except Exception as e: status = TaskHistory.STATUS_ERROR msg = "Backup for %s was unsuccessful. Error: %s" % ( str(instance), str(e)) LOG.error(msg) time_now = str(strftime("%m/%d/%Y %H:%M:%S")) msg = "\n{} - {}".format(time_now, msg) task_history.update_details(persist=True, details=msg) task_history.update_status_for(status, details="\nBackup finished") return
def restore_snapshot(self, database, snapshot, user, task_history): try: from dbaas_nfsaas.models import HostAttr LOG.info("Restoring snapshot") worker_name = get_worker_name() task_history = models.TaskHistory.objects.get(id=task_history) task_history = TaskHistory.register(request=self.request, task_history=task_history, user=user, worker_name=worker_name) databaseinfra = database.databaseinfra snapshot = Snapshot.objects.get(id=snapshot) snapshot_id = snapshot.snapshopt_id host_attr_snapshot = HostAttr.objects.get( nfsaas_path=snapshot.export_path) host = host_attr_snapshot.host host_attr = HostAttr.objects.get(host=host, is_active=True) export_id_snapshot = host_attr_snapshot.nfsaas_export_id export_id = host_attr.nfsaas_export_id export_path = host_attr.nfsaas_path steps = RESTORE_SNAPSHOT_SINGLE tasks.disable_zabbix_alarms(database) if databaseinfra.plan.is_ha and databaseinfra.engine_name == 'mysql': steps = RESTORE_SNAPSHOT_MYSQL_HA not_primary_instances = databaseinfra.instances.exclude( hostname=host).exclude(instance_type__in=[ Instance.MONGODB_ARBITER, Instance.REDIS_SENTINEL ]) not_primary_hosts = [ instance.hostname for instance in not_primary_instances ] workflow_dict = build_dict( databaseinfra=databaseinfra, database=database, snapshot_id=snapshot_id, export_path=export_path, export_id=export_id, export_id_snapshot=export_id_snapshot, host=host, steps=steps, not_primary_hosts=not_primary_hosts, ) start_workflow(workflow_dict=workflow_dict, task=task_history) if workflow_dict['exceptions']['traceback']: raise Exception('Restore could not be finished') else: task_history.update_status_for( TaskHistory.STATUS_SUCCESS, details='Database sucessfully recovered!') except Exception: error = "\n".join( ": ".join(err) for err in workflow_dict['exceptions']['error_codes']) traceback = "\nException Traceback\n".join( workflow_dict['exceptions']['traceback']) error = "{}\n{}\n{}".format(error, traceback, error) task_history.update_status_for(TaskHistory.STATUS_ERROR, details=error) finally: tasks.enable_zabbix_alarms(database)
def post(self, request, format=None): data = request.DATA name = data['name'] user = data['user'] team = data['team'] env = get_url_env(request) try: description = data['description'] if not description: raise Exception("A description must be provided") except Exception as e: msg = "A description must be provided." return log_and_response( msg=msg, http_status=status.HTTP_500_INTERNAL_SERVER_ERROR ) name_regexp = re.compile('^[a-z][a-z0-9_]+$') if name_regexp.match(name) is None: msg = "Your database name must match /^[a-z][a-z0-9_]+$/ ." return log_and_response( msg=msg, http_status=status.HTTP_500_INTERNAL_SERVER_ERROR ) try: Database.objects.get(name=name, environment__name=env) msg = "There is already a database called {} in {}.".format( name, env ) return log_and_response( msg=msg, http_status=status.HTTP_500_INTERNAL_SERVER_ERROR ) except ObjectDoesNotExist: pass if database_name_evironment_constraint(name, env): msg = "{} already exists in production!".format(name) return log_and_response( msg=msg, http_status=status.HTTP_500_INTERNAL_SERVER_ERROR ) try: dbaas_user = AccountUser.objects.get(email=user) except ObjectDoesNotExist as e: msg = "User does not exist." return log_and_response( msg=msg, e=e, http_status=status.HTTP_500_INTERNAL_SERVER_ERROR ) try: dbaas_team = Team.objects.get(name=team) except ObjectDoesNotExist as e: msg = "Team does not exist." return log_and_response( msg=msg, e=e, http_status=status.HTTP_500_INTERNAL_SERVER_ERROR ) try: dbaas_user.team_set.get(name=dbaas_team.name) except ObjectDoesNotExist as e: msg = "The user is not on {} team.".format(dbaas_team.name) return log_and_response( msg=msg, e=e, http_status=status.HTTP_500_INTERNAL_SERVER_ERROR ) try: dbaas_environment = Environment.objects.get(name=env) except(ObjectDoesNotExist) as e: msg = "Environment does not exist." return log_and_response( msg=msg, http_status=status.HTTP_500_INTERNAL_SERVER_ERROR ) databases_used_by_team = dbaas_team.count_databases_in_use( environment=dbaas_environment ) database_alocation_limit = dbaas_team.database_alocation_limit if databases_used_by_team >= database_alocation_limit: msg = "The database alocation limit of {} has been exceeded for the selected team: {}".format( database_alocation_limit, dbaas_team ) return log_and_response( msg=msg, http_status=status.HTTP_500_INTERNAL_SERVER_ERROR ) if 'plan' not in data: msg = "Plan was not found" return log_and_response( msg=msg, http_status=status.HTTP_500_INTERNAL_SERVER_ERROR ) plan = data['plan'] hard_plans = Plan.objects.values( 'name', 'description', 'pk', 'environments__name' ).extra( where=['is_active=True', 'provider={}'.format(Plan.CLOUDSTACK)] ) plans = get_plans_dict(hard_plans) plan = [splan for splan in plans if splan['name'] == plan] LOG.info("Plan: {}".format(plan)) if any(plan): dbaas_plan = Plan.objects.get(pk=plan[0]['pk']) else: msg = "Plan was not found" return log_and_response( msg=msg, http_status=status.HTTP_500_INTERNAL_SERVER_ERROR ) task_history = TaskHistory() task_history.task_name = "create_database" task_history.arguments = "Database name: {}".format(name) task_history.save() create_database.delay( name=name, plan=dbaas_plan, environment=dbaas_environment, team=dbaas_team, project=None, description=description, contacts=None, task_history=task_history, user=dbaas_user ) return Response(status=status.HTTP_201_CREATED)
def register_task(self, database): task_history = TaskHistory() task_history.task_id = datetime.now().strftime("%Y%m%d%H%M%S") task_history.task_name = self.TASK_NAME task_history.relevance = TaskHistory.RELEVANCE_WARNING task_history.task_status = TaskHistory.STATUS_RUNNING task_history.context = {'hostname': gethostname()} task_history.user = '******' task_history.db_id = database.id task_history.object_class = "logical_database" task_history.object_id = database.id task_history.database_name = database.name task_history.arguments = 'Database_name: {}'.format(database.name) task_history.save() return task_history
def make_databases_backup(self): LOG.info("Making databases backups") worker_name = get_worker_name() task_history = TaskHistory.register(request=self.request, worker_name=worker_name, user=None) task_history.relevance = TaskHistory.RELEVANCE_ERROR backup_group_interval = Configuration.get_by_name_as_int( 'backup_group_interval', default=1) waiting_msg = "\nWaiting {} minute(s) to start the next backup group".format( backup_group_interval) status = TaskHistory.STATUS_SUCCESS environments = Environment.objects.all() prod_envs = Configuration.get_by_name_as_list('prod_envs') dev_envs = Configuration.get_by_name_as_list('dev_envs') env_names_order = prod_envs + dev_envs if not env_names_order: env_names_order = [env.name for env in environments] current_time = datetime.now() current_hour = current_time.hour # Get all infras with a backup today until the current hour infras_with_backup_today = DatabaseInfra.objects.filter( instances__backup_instance__status=2, backup_hour__lt=current_hour, plan__has_persistence=True, instances__backup_instance__end_at__year=current_time.year, instances__backup_instance__end_at__month=current_time.month, instances__backup_instance__end_at__day=current_time.day).distinct() # Get all infras with pending backups based on infras_with_backup_today infras_pending_backup = DatabaseInfra.objects.filter( backup_hour__lt=current_hour, plan__has_persistence=True, ).exclude(pk__in=[infra.pk for infra in infras_with_backup_today]) # Get all infras to backup on the current hour infras_current_hour = DatabaseInfra.objects.filter( plan__has_persistence=True, backup_hour=current_time.hour) # Merging pending and current infras to backup list infras = infras_current_hour | infras_pending_backup for env_name in env_names_order: try: env = environments.get(name=env_name) except Environment.DoesNotExist: continue msg = '\nStarting Backup for env {}'.format(env.name) task_history.update_details(persist=True, details=msg) databaseinfras_by_env = infras.filter(environment=env) error = {} backup_number = 0 backups_per_group = len(infras) / 12 for infra in databaseinfras_by_env: if not infra.databases.first(): continue if backups_per_group > 0: if backup_number < backups_per_group: backup_number += 1 else: backup_number = 0 task_history.update_details(waiting_msg, True) sleep(backup_group_interval * 60) group = BackupGroup() group.save() for instance in infra.instances.filter(read_only=False): try: driver = instance.databaseinfra.get_driver() is_eligible = driver.check_instance_is_eligible_for_backup( instance) if not is_eligible: LOG.info( 'Instance {} is not eligible for backup'.format( instance)) continue except Exception as e: status = TaskHistory.STATUS_ERROR msg = "Backup for %s was unsuccessful. Error: %s" % ( str(instance), str(e)) LOG.error(msg) time_now = str(strftime("%m/%d/%Y %H:%M:%S")) start_msg = "\n{} - Starting backup for {} ...".format( time_now, instance) task_history.update_details(persist=True, details=start_msg) try: snapshot = make_instance_snapshot_backup(instance=instance, error=error, group=group) if snapshot and snapshot.was_successful: msg = "Backup for %s was successful" % (str(instance)) LOG.info(msg) elif snapshot and snapshot.has_warning: status = TaskHistory.STATUS_WARNING msg = "Backup for %s has warning" % (str(instance)) LOG.info(msg) else: status = TaskHistory.STATUS_ERROR msg = "Backup for %s was unsuccessful. Error: %s" % ( str(instance), error['errormsg']) LOG.error(msg) LOG.info(msg) except Exception as e: status = TaskHistory.STATUS_ERROR msg = "Backup for %s was unsuccessful. Error: %s" % ( str(instance), str(e)) LOG.error(msg) time_now = str(strftime("%m/%d/%Y %H:%M:%S")) msg = "\n{} - {}".format(time_now, msg) task_history.update_details(persist=True, details=msg) task_history.update_status_for(status, details="\nBackup finished") return
def add_view(self, request, form_url='', extra_context=None): self.form = DatabaseForm try: if request.method == 'POST': teams = Team.objects.filter(users=request.user) LOG.info("user %s teams: %s" % (request.user, teams)) if not teams: self.message_user(request, self.database_add_perm_message, level=messages.ERROR) return HttpResponseRedirect( reverse('admin:logical_database_changelist')) # if no team is specified and the user has only one team, then # set it to the database if teams.count( ) == 1 and request.method == 'POST' and not request.user.has_perm( self.perm_add_database_infra): post_data = request.POST.copy() if 'team' in post_data: post_data['team'] = u"%s" % teams[0].pk request.POST = post_data form = DatabaseForm(request.POST) if not form.is_valid(): return super(DatabaseAdmin, self).add_view(request, form_url, extra_context=extra_context) LOG.debug( "call create_database - name=%s, plan=%s, environment=%s, team=%s, project=%s, description=%s, user=%s" % (form.cleaned_data['name'], form.cleaned_data['plan'], form.cleaned_data['environment'], form.cleaned_data['team'], form.cleaned_data['project'], form.cleaned_data['description'], request.user)) task_history = TaskHistory() task_history.task_name = "create_database" task_history.task_status = task_history.STATUS_WAITING task_history.arguments = "Database name: {}".format( form.cleaned_data['name']) task_history.user = request.user task_history.save() create_database.delay( name=form.cleaned_data['name'], plan=form.cleaned_data['plan'], environment=form.cleaned_data['environment'], team=form.cleaned_data['team'], project=form.cleaned_data['project'], description=form.cleaned_data['description'], task_history=task_history, user=request.user) url = reverse('admin:notification_taskhistory_changelist') # Redirect after POST return HttpResponseRedirect(url + "?user=%s" % request.user.username) else: return super(DatabaseAdmin, self).add_view(request, form_url, extra_context=extra_context) except DatabaseAlreadyExists: self.message_user( request, _('An inconsistency was found: The database "%s" already exists in infra-structure but not in DBaaS.' ) % request.POST['name'], level=messages.ERROR) request.method = 'GET' return super(DatabaseAdmin, self).add_view(request, form_url, extra_context=extra_context)
def make_databases_backup(self): LOG.info("Making databases backups") worker_name = get_worker_name() task_history = TaskHistory.register(request=self.request, worker_name=worker_name, user=None) status = TaskHistory.STATUS_SUCCESS databaseinfras = DatabaseInfra.objects.filter( plan__provider=Plan.CLOUDSTACK, plan__has_persistence=True) error = {} backup_number = 0 backups_per_group = len(databaseinfras) / 12 for databaseinfra in databaseinfras: if backups_per_group > 0: if backup_number < backups_per_group: backup_number += 1 else: backup_number = 0 waiting_msg = "\nWaiting 5 minutes to start the next backup group" task_history.update_details(persist=True, details=waiting_msg) time.sleep(300) instances = Instance.objects.filter(databaseinfra=databaseinfra, read_only=False) group = BackupGroup() group.save() for instance in instances: try: if not instance.databaseinfra.get_driver( ).check_instance_is_eligible_for_backup(instance): LOG.info('Instance %s is not eligible for backup' % (str(instance))) continue except Exception as e: status = TaskHistory.STATUS_ERROR msg = "Backup for %s was unsuccessful. Error: %s" % ( str(instance), str(e)) LOG.error(msg) time_now = str(time.strftime("%m/%d/%Y %H:%M:%S")) start_msg = "\n{} - Starting backup for {} ...".format( time_now, instance) task_history.update_details(persist=True, details=start_msg) try: snapshot = make_instance_snapshot_backup(instance=instance, error=error, group=group) if snapshot and snapshot.was_successful: msg = "Backup for %s was successful" % (str(instance)) LOG.info(msg) elif snapshot and snapshot.has_warning: status = TaskHistory.STATUS_WARNING msg = "Backup for %s has warning" % (str(instance)) LOG.info(msg) else: status = TaskHistory.STATUS_ERROR msg = "Backup for %s was unsuccessful. Error: %s" % ( str(instance), error['errormsg']) LOG.error(msg) LOG.info(msg) except Exception as e: status = TaskHistory.STATUS_ERROR msg = "Backup for %s was unsuccessful. Error: %s" % ( str(instance), str(e)) LOG.error(msg) time_now = str(time.strftime("%m/%d/%Y %H:%M:%S")) msg = "\n{} - {}".format(time_now, msg) task_history.update_details(persist=True, details=msg) task_history.update_status_for(status, details="\nBackup finished") return
def restore_snapshot(self, request, database_id): database = Database.objects.get(id=database_id) url = reverse('admin:logical_database_change', args=[database.id]) if database.is_in_quarantine: self.message_user(request, "Database in quarantine and cannot be restored", level=messages.ERROR) return HttpResponseRedirect(url) if database.status != Database.ALIVE or not database.database_status.is_alive: self.message_user(request, "Database is dead and cannot be restored", level=messages.ERROR) return HttpResponseRedirect(url) if database.is_beeing_used_elsewhere(): self.message_user( request, "Database is beeing used by another task, please check your tasks", level=messages.ERROR) return HttpResponseRedirect(url) if database.has_migration_started(): self.message_user( request, "Database {} cannot be restored because it is beeing migrated." .format(database.name), level=messages.ERROR) url = reverse('admin:logical_database_changelist') return HttpResponseRedirect(url) form = None if request.method == 'POST': form = RestoreDatabaseForm( request.POST, initial={"database_id": database_id}, ) if form.is_valid(): target_snapshot = request.POST.get('target_snapshot') task_history = TaskHistory() task_history.task_name = "restore_snapshot" task_history.task_status = task_history.STATUS_WAITING task_history.arguments = "Restoring {} to an older version.".format( database.name) task_history.user = request.user task_history.save() Database.recover_snapshot(database=database, snapshot=target_snapshot, user=request.user, task_history=task_history.id) url = reverse('admin:notification_taskhistory_changelist') return HttpResponseRedirect(url + "?user=%s" % request.user.username) else: form = RestoreDatabaseForm(initial={ "database_id": database_id, }) return render_to_response("logical/database/restore.html", locals(), context_instance=RequestContext(request))
plan = [splan for splan in plans if splan['name']==plan] LOG.info("Plan: {}".format(plan)) if any(plan): dbaas_plan = Plan.objects.get(pk=plan[0]['pk']) else: msg = "Plan was not found" return log_and_response(msg=msg, http_status=status.HTTP_500_INTERNAL_SERVER_ERROR) try: dbaas_environment = Environment.objects.get(name= env) except(ObjectDoesNotExist,IndexError), e: msg = "Environment does not exist." return log_and_response(msg=msg, http_status=status.HTTP_500_INTERNAL_SERVER_ERROR) task_history = TaskHistory() task_history.task_name="create_database" task_history.arguments="Database name: {}".format(name) task_history.save() create_database.delay(name=name, plan=dbaas_plan, environment=dbaas_environment, team=dbaas_team,project=None, description='Database from Tsuru', task_history=task_history, user=dbaas_user) return Response(status=status.HTTP_201_CREATED,) class ServiceRemove(APIView): renderer_classes = (JSONRenderer, JSONPRenderer) model = Database
def region_migration_start(self, infra, instances, since_step=None): steps = [{ 'Disable monitoring and alarms': ( 'workflow.steps.util.zabbix.DestroyAlarms', 'workflow.steps.util.db_monitor.DisableMonitoring', )}, { 'Stopping infra': ( 'workflow.steps.util.database.Stop', 'workflow.steps.util.database.CheckIsDown', )}, { 'Creating new virtual machine': ( 'workflow.steps.util.vm.MigrationCreateNewVM', )}, { 'Creating new infra': ( 'workflow.steps.util.vm.MigrationWaitingBeReady', 'workflow.steps.util.infra.MigrationCreateInstance', 'workflow.steps.util.disk.MigrationCreateExport', )}, { 'Configuring new infra': ( 'workflow.steps.util.volume_provider.MountDataVolume', 'workflow.steps.util.plan.InitializationMigration', 'workflow.steps.util.plan.ConfigureMigration', )}, { 'Preparing new environment': ( 'workflow.steps.util.disk.AddDiskPermissionsOldest', 'workflow.steps.util.disk.MountOldestExportMigration', 'workflow.steps.util.disk.CopyDataBetweenExportsMigration', 'workflow.steps.util.disk.FilePermissionsMigration', 'workflow.steps.util.disk.UnmountNewerExportMigration', 'workflow.steps.util.vm.ChangeInstanceHost', 'workflow.steps.util.vm.UpdateOSDescription', 'workflow.steps.util.infra.OfferingMigration', 'workflow.steps.util.infra.UpdateMigrateEnvironment', 'workflow.steps.util.infra.UpdateMigratePlan', )}, { 'Starting new infra': ( 'workflow.steps.util.database.Start', 'workflow.steps.util.database.CheckIsUp', )}, { 'Enabling access': ( 'workflow.steps.util.dns.ChangeEndpoint', 'workflow.steps.util.acl.ReplicateAclsMigration', )}, { 'Destroying old infra': ( 'workflow.steps.util.disk.DisableOldestExportMigration', 'workflow.steps.util.disk.DiskUpdateHost', 'workflow.steps.util.vm.RemoveHostMigration', )}, { 'Enabling monitoring and alarms': ( 'workflow.steps.util.db_monitor.EnableMonitoring', 'workflow.steps.util.zabbix.CreateAlarms', )}, { 'Restart replication': ( 'workflow.steps.util.database.SetSlavesMigration', ) }] task = TaskHistory() task.task_id = self.request.id task.task_name = "migrating_zone" task.task_status = TaskHistory.STATUS_RUNNING task.context = {'infra': infra, 'instances': instances} task.arguments = {'infra': infra, 'instances': instances} task.user = '******' task.save() if steps_for_instances(steps, instances, task, since_step=since_step): task.set_status_success('Region migrated with success') else: task.set_status_error('Could not migrate region') database = infra.databases.first() database.environment = infra.environment database.save()
def analyze_databases(self, task_history=None): (endpoint, healh_check_route, healh_check_string) = get_analyzing_credentials() user = User.objects.get(username='******') worker_name = get_worker_name() task_history = TaskHistory.register(task_history=task_history, request=self.request, user=user, worker_name=worker_name) task_history.update_details(persist=True, details="Loading Process...") AuditRequest.new_request("analyze_databases", user, "localhost") try: analyze_service = AnalyzeService(endpoint, healh_check_route, healh_check_string) with transaction.atomic(): databases = Database.objects.filter(is_in_quarantine=False) today = datetime.now() for database in databases: (database_name, engine, instances, environment_name, databaseinfra_name) = setup_database_info(database) for execution_plan in ExecutionPlan.objects.all(): if not database_can_be_resized(database, execution_plan): continue params = execution_plan.setup_execution_params() result = { 'msg': 'Could not analyse {}'.format(database_name) } try: result = analyze_service.run(engine=engine, database=database_name, instances=instances, **params) if result['status'] == 'success': task_history.update_details( persist=True, details=("\nDatabase {} {} was " "analysed.").format( database, execution_plan.plan_name)) if result['msg'] != instances: continue for instance in result['msg']: insert_analyze_repository_record( today, database_name, instance, engine, databaseinfra_name, environment_name, execution_plan) else: raise Exception("Check your service logs..") except Exception: task_history.update_details( persist=True, details=("\nDatabase {} {} could not be " "analysed.").format( database, execution_plan.plan_name)) task_history.update_status_for( TaskHistory.STATUS_ERROR, details='Analysis finished with errors!' '\nError: {}'.format(result['msg'])) task_history.update_status_for(TaskHistory.STATUS_SUCCESS, details='Analysis ok!') except Exception: task_history.update_details(persist=True, details="\nProcess crashed") task_history.update_status_for(TaskHistory.STATUS_ERROR, details='Analysis could not be started') finally: AuditRequest.cleanup_request()