Example #1
0
def purge_task_history(self):
    try:
        worker_name = get_worker_name()
        task_history = TaskHistory.register(request=self.request, user=None, worker_name=worker_name)

        now = datetime.datetime.now()
        retention_days = Configuration.get_by_name_as_int('task_history_retention_days')

        n_days_before = now - datetime.timedelta(days=retention_days)

        tasks_to_purge = TaskHistory.objects.filter(task_name__in=['notification.tasks.database_notification',
                'notification.tasks.database_notification_for_team',
                'notification.tasks.update_database_status',
                'notification.tasks.update_database_used_size',
                'notification.tasks.update_instances_status',
                'system.tasks.set_celery_healthcheck_last_update']
        , ended_at__lt=n_days_before
        , task_status__in=["SUCCESS", "ERROR"])

        tasks_to_purge.delete()

        task_history.update_status_for(TaskHistory.STATUS_SUCCESS,
            details='Purge succesfully done!')
    except Exception, e:
        task_history.update_status_for(TaskHistory.STATUS_ERROR, details=e)
Example #2
0
def node_zone_migrate_rollback(self, migrate, task):
    task = TaskHistory.register(
        request=self.request, task_history=task, user=task.user,
        worker_name=get_worker_name()
    )
    from tasks_migrate import rollback_node_zone_migrate
    rollback_node_zone_migrate(migrate, task)
Example #3
0
def remove_database_old_backups(self):
    worker_name = get_worker_name()
    task_history = TaskHistory.register(
        request=self.request, worker_name=worker_name, user=None
    )
    task_history.relevance = TaskHistory.RELEVANCE_WARNING

    snapshots = []
    for env in Environment.objects.all():
        snapshots += get_snapshots_by_env(env)

    msgs = []
    status = TaskHistory.STATUS_SUCCESS
    if len(snapshots) == 0:
        msgs.append("There is no snapshot to purge")

    for snapshot in snapshots:
        try:
            remove_snapshot_backup(snapshot=snapshot, msgs=msgs)
        except Exception as e:
            msg = "Error removing backup {}. Error: {}".format(snapshot, e)
            status = TaskHistory.STATUS_ERROR
            LOG.error(msg)
            msgs.append(msg)
    task_history.update_status_for(status, details="\n".join(msgs))
    return
Example #4
0
def update_database_used_size(self):
    LOG.info("Retrieving all databases")
    try:
        worker_name = get_worker_name()
        task_history = TaskHistory.register(
            request=self.request, user=None, worker_name=worker_name)
        databases = Database.objects.all()
        msgs = []
        for database in databases:
            if database.database_status:
                database.used_size_in_bytes = float(
                    database.database_status.used_size_in_bytes)
            else:
                database.used_size_in_bytes = 0.0

            database.save()
            msg = "\nUpdating used size in bytes for database: {}, used size: {}".format(
                database, database.used_size_in_bytes)
            msgs.append(msg)
            LOG.info(msg)

        task_history.update_status_for(TaskHistory.STATUS_SUCCESS, details="\n".join(
            value for value in msgs))
    except Exception, e:
        task_history.update_status_for(TaskHistory.STATUS_ERROR, details=e)
def database_notification(self):
    LOG.info("retrieving all teams and sending database notification")
    teams = Team.objects.all()
    msgs = {}

    for team in teams:
        ###############################################
        # create task
        ###############################################

        msgs[team] = analyzing_notification_for_team(team=team)
        ###############################################

    try:
        LOG.info("Messages: ")
        LOG.info(msgs)

        worker_name = get_worker_name()
        task_history = TaskHistory.register(
            request=self.request, user=None, worker_name=worker_name)
        task_history.update_status_for(TaskHistory.STATUS_SUCCESS, details="\n".join(
            str(key) + ': ' + ', '.join(value) for key, value in msgs.items()))
    except Exception as e:
        task_history.update_status_for(TaskHistory.STATUS_ERROR, details=e)

    return
Example #6
0
def databaseinfra_notification(self, user=None):
    worker_name = get_worker_name()
    task_history = TaskHistory.register(request=self.request, user=user, worker_name= worker_name)
    threshold_infra_notification = Configuration.get_by_name_as_int("threshold_infra_notification", default=0)
    if threshold_infra_notification <= 0:
        LOG.warning("database infra notification is disabled")
        return

    # Sum capacity per databseinfra with parameter plan, environment and engine
    infras = DatabaseInfra.objects.values('plan__name', 'environment__name', 'engine__engine_type__name',
                                          'plan__provider').annotate(capacity=Sum('capacity'))
    for infra in infras:
        # total database created in databaseinfra per plan, environment and engine
        used = DatabaseInfra.objects.filter(plan__name=infra['plan__name'],
                                            environment__name=infra['environment__name'],
                                            engine__engine_type__name=infra['engine__engine_type__name']).aggregate(
            used=Count('databases'))
        # calculate the percentage
        percent = int(used['used'] * 100 / infra['capacity'])
        if percent >= threshold_infra_notification and infra['plan__provider'] != Plan.CLOUDSTACK:
            LOG.info('Plan %s in environment %s with %s%% occupied' % (
                infra['plan__name'], infra['environment__name'], percent))
            LOG.info("Sending database infra notification...")
            context = {}
            context['plan'] = infra['plan__name']
            context['environment'] = infra['environment__name']
            context['used'] = used['used']
            context['capacity'] = infra['capacity']
            context['percent'] = percent
            email_notifications.databaseinfra_ending(context=context)

        task_history.update_status_for(TaskHistory.STATUS_SUCCESS,
                                       details='Databaseinfra Notification successfully sent to dbaas admins!')
    return
Example #7
0
def resize_database(self, database, cloudstackpack, task_history=None,user=None):

    AuditRequest.new_request("resize_database", user, "localhost")

    try:
        worker_name = get_worker_name()
        task_history = TaskHistory.register(request=self.request, task_history=task_history,
            user=user, worker_name=worker_name)
        from util.providers import resize_database

        result = resize_database(database = database, cloudstackpack = cloudstackpack, task = task_history)

        if result['created']==False:

            if 'exceptions' in result:
                error = "\n".join(": ".join(err) for err in result['exceptions']['error_codes'])
                traceback = "\nException Traceback\n".join(result['exceptions']['traceback'])
                error = "{}\n{}\n{}".format(error, traceback, error)
            else:
                error = "Something went wrong."

            task_history.update_status_for(TaskHistory.STATUS_ERROR, details=error)
        else:
            task_history.update_status_for(TaskHistory.STATUS_SUCCESS, details='Resize successfully done.')

    except Exception, e:
        error = "Resize Database ERROR: {}".format(e)
        LOG.error(error)
        task_history.update_status_for(TaskHistory.STATUS_ERROR, details=error)
Example #8
0
def database_notification(self):
    """
    Create tasks for database notification by team
    if threshold_database_notification <= 0, the notification is disabled.
    """
    # get all teams and for each one create a new task
    LOG.info("retrieving all teams and sendind database notification")
    teams = Team.objects.all()
    msgs = {}

    for team in teams:
        ###############################################
        # create task
        ###############################################

        msgs[team] = database_notification_for_team(team=team)
        ###############################################

    try:
        LOG.info("Messages: ")
        LOG.info(msgs)

        worker_name = get_worker_name()
        task_history = TaskHistory.register(request=self.request, user=None, worker_name=worker_name)
        task_history.update_status_for(TaskHistory.STATUS_SUCCESS, details="\n".join(
            str(key) + ': ' + ', '.join(value) for key, value in msgs.items()))
    except Exception, e:
        task_history.update_status_for(TaskHistory.STATUS_ERROR, details=e)
Example #9
0
def update_instances_status(self):
    LOG.info("Retrieving all databaseinfras")
    worker_name = get_worker_name()
    task_history = TaskHistory.register(request=self.request, user=None, worker_name=worker_name)

    try:
        infras = DatabaseInfra.objects.all()
        msgs = []
        for databaseinfra in infras:
            LOG.info("Retrieving all instances for {}".format(databaseinfra))

            for instance in Instance.objects.filter(databaseinfra=databaseinfra, is_arbiter=False):
                if instance.check_status():
                    instance.status = Instance.ALIVE
                else:
                    instance.status = Instance.DEAD

                instance.save()

                msg = "\nUpdating instance status, instance: {}, status: {}".format(
                instance, instance.status)
                msgs.append(msg)
                LOG.info(msg)

        task_history.update_status_for(TaskHistory.STATUS_SUCCESS, details="\n".join(
            value for value in msgs))
    except Exception, e:
        task_history.update_status_for(TaskHistory.STATUS_ERROR, details=e)
Example #10
0
def remove_database_old_backups(self):

    worker_name = get_worker_name()
    task_history = TaskHistory.register(request=self.request,
        worker_name=worker_name, user=None)

    backup_retention_days = Configuration.get_by_name_as_int('backup_retention_days')

    LOG.info("Removing backups older than %s days" % (backup_retention_days))

    backup_time_dt = date.today() - timedelta(days=backup_retention_days)
    snapshots = Snapshot.objects.filter(start_at__lte=backup_time_dt, purge_at__isnull = True, instance__isnull = False, snapshopt_id__isnull = False)
    msgs = []
    status = TaskHistory.STATUS_SUCCESS
    if len(snapshots) == 0:
        msgs.append("There is no snapshot to purge")
    for snapshot in snapshots:
        try:
            remove_snapshot_backup(snapshot=snapshot)
            msg = "Backup %s removed" % (snapshot)
            LOG.info(msg)
        except:
            msg = "Error removing backup %s" % (snapshot)
            status = TaskHistory.STATUS_ERROR
            LOG.error(msg)
        msgs.append(msg)

    task_history.update_status_for(status, details="\n".join(msgs))

    return
Example #11
0
def monitor_acl_job(self,database, job_id, bind_address, bind_status=models.CREATED , user=None):
    if not user:
        user =  self.request.args[-1]
    AuditRequest.new_request("create_database",user, "localhost")

    worker_name = get_worker_name()
    task_history = TaskHistory.register(request=self.request, user=user, worker_name=worker_name)
    LOG.info("id: %s | task: %s | kwargs: %s | args: %s" % (self.request.id, self.request.task, self.request.kwargs, str(self.request.args)))

    task_history.update_details(persist=True, details="Loading Process...")
    try:

        LOG.debug("database: {}, job_id: {}, bind_address: {}, bind_status: {}, user: {}".format(database, job_id, bind_address, bind_status, user))

        status = tasks.monitor_acl_job(database, job_id, bind_address,)

        LOG.debug("Job status return: {}".format(status))
        if status:
            from dbaas_aclapi.util import update_bind_status
            LOG.info("Updating Bind Status")
            update_bind_status(database, bind_address, bind_status)

            task_history.update_status_for(TaskHistory.STATUS_SUCCESS, details='Bind created successfully')
            return
        else:
            raise Exception, "Error when monitoring the Bind Process"


    except Exception, e:
        LOG.info("DatabaseBindMonitoring ERROR: {}".format(e))
        task_history.update_status_for(TaskHistory.STATUS_ERROR, details='Bind could not be granted')
        return
Example #12
0
def update_database_status(self):
    LOG.info("Retrieving all databases")
    try:
        worker_name = get_worker_name()
        task_history = TaskHistory.register(
            request=self.request, user=None, worker_name=worker_name)
        databases = Database.objects.all()
        msgs = []
        for database in databases:
            if database.database_status.is_alive:
                database.status = Database.ALIVE

                instances_status = database.databaseinfra.check_instances_status()
                if instances_status == database.databaseinfra.ALERT:
                    database.status = Database.ALERT

            else:
                database.status = Database.DEAD

            database.save()
            msg = "\nUpdating status for database: {}, status: {}".format(
                database, database.status)
            msgs.append(msg)
            LOG.info(msg)

        task_history.update_status_for(TaskHistory.STATUS_SUCCESS, details="\n".join(
            value for value in msgs))
    except Exception, e:
        task_history.update_status_for(TaskHistory.STATUS_ERROR, details=e)
Example #13
0
def make_databases_backup(self):

    LOG.info("Making databases backups")
    worker_name = get_worker_name()
    task_history = TaskHistory.register(request=self.request,
        worker_name=worker_name, user=None)

    msgs = []
    status = TaskHistory.STATUS_SUCCESS
    databaseinfras = DatabaseInfra.objects.filter(plan__provider=Plan.CLOUDSTACK)
    error = {}
    for databaseinfra in databaseinfras:
        instances = Instance.objects.filter(databaseinfra=databaseinfra)
        for instance in instances:

            if not instance.databaseinfra.get_driver().check_instance_is_eligible_for_backup(instance):
                LOG.info('Instance %s is not eligible for backup' % (str(instance)))
                continue

            try:
                if make_instance_snapshot_backup(instance = instance, error = error):
                    msg = "Backup for %s was successful" % (str(instance))
                    LOG.info(msg)
                else:
                    status = TaskHistory.STATUS_ERROR
                    msg = "Backup for %s was unsuccessful. Error: %s" % (str(instance), error['errormsg'])
                    LOG.error(msg)
                print msg
            except Exception, e:
                status = TaskHistory.STATUS_ERROR
                msg = "Backup for %s was unsuccessful. Error: %s" % (str(instance), str(e))
                LOG.error(msg)

            msgs.append(msg)
Example #14
0
def database_environment_migrate_rollback(self, migrate, task):
    task = TaskHistory.register(
        request=self.request, task_history=task, user=task.user,
        worker_name=get_worker_name()
    )
    from tasks_database_migrate import rollback_database_environment_migrate
    rollback_database_environment_migrate(migrate, task)
Example #15
0
def _create_database_rollback(self, rollback_from, task, user):
    task = TaskHistory.register(
        request=self.request, task_history=task, user=user,
        worker_name=get_worker_name()
    )

    from tasks_create_database import rollback_create
    rollback_create(rollback_from, task, user)
def create_database(self, name, plan, environment, team, project, description, task_history=None, user=None):
    AuditRequest.new_request("create_database", user, "localhost")
    try:

        worker_name = get_worker_name()
        task_history = TaskHistory.register(request=self.request, task_history=task_history,
                                            user=user, worker_name=worker_name)

        LOG.info("id: %s | task: %s | kwargs: %s | args: %s" % (
            self.request.id, self.request.task, self.request.kwargs, str(self.request.args)))

        task_history.update_details(persist=True, details="Loading Process...")

        result = make_infra(plan=plan,
                            environment=environment,
                            name=name,
                            team=team,
                            project=project,
                            description=description,
                            task=task_history,
                            )

        if result['created'] == False:

            if 'exceptions' in result:
                error = "\n".join(": ".join(err)
                                  for err in result['exceptions']['error_codes'])
                traceback = "\nException Traceback\n".join(
                    result['exceptions']['traceback'])
                error = "{}\n{}\n{}".format(error, traceback, error)
            else:
                error = "There is not any infra-structure to allocate this database."

            task_history.update_status_for(
                TaskHistory.STATUS_ERROR, details=error)
            return

        task_history.update_dbid(db=result['database'])
        task_history.update_status_for(
            TaskHistory.STATUS_SUCCESS, details='Database created successfully')

        return

    except Exception as e:
        traceback = full_stack()
        LOG.error("Ops... something went wrong: %s" % e)
        LOG.error(traceback)

        if 'result' in locals() and result['created']:
            destroy_infra(
                databaseinfra=result['databaseinfra'], task=task_history)

        task_history.update_status_for(
            TaskHistory.STATUS_ERROR, details=traceback)
        return

    finally:
        AuditRequest.cleanup_request()
Example #17
0
def node_zone_migrate(
    self, host, zone, new_environment, task, since_step=None, step_manager=None
):
    task = TaskHistory.register(
        request=self.request, task_history=task, user=task.user,
        worker_name=get_worker_name()
    )
    from tasks_migrate import node_zone_migrate
    node_zone_migrate(host, zone, new_environment, task, since_step, step_manager=step_manager)
Example #18
0
def restore_snapshot(self, database, snapshot, user, task_history):
    from dbaas_nfsaas.models import HostAttr
    LOG.info("Restoring snapshot")
    worker_name = get_worker_name()

    task_history = models.TaskHistory.objects.get(id=task_history)
    task_history = TaskHistory.register(request=self.request, task_history=task_history,
                                        user=user, worker_name=worker_name)

    databaseinfra = database.databaseinfra

    snapshot = Snapshot.objects.get(id=snapshot)
    snapshot_id = snapshot.snapshopt_id

    host_attr = HostAttr.objects.get(nfsaas_path=snapshot.export_path)
    host = host_attr.host
    host_attr = HostAttr.objects.get(host=host, is_active=True)

    export_id = host_attr.nfsaas_export_id
    export_path = host_attr.nfsaas_path

    steps = RESTORE_SNAPSHOT_SINGLE

    if databaseinfra.plan.is_ha and databaseinfra.engine_name == 'mysql':
        steps = RESTORE_SNAPSHOT_MYSQL_HA

    not_primary_instances = databaseinfra.instances.exclude(hostname=host).exclude(instance_type__in=[Instance.MONGODB_ARBITER,
                                                                                                      Instance.REDIS_SENTINEL])
    not_primary_hosts = [
        instance.hostname for instance in not_primary_instances]

    workflow_dict = build_dict(databaseinfra=databaseinfra,
                               database=database,
                               snapshot_id=snapshot_id,
                               export_path=export_path,
                               export_id=export_id,
                               host=host,
                               steps=steps,
                               not_primary_hosts=not_primary_hosts,
                               )

    start_workflow(workflow_dict=workflow_dict, task=task_history)

    if workflow_dict['exceptions']['traceback']:
        error = "\n".join(
            ": ".join(err) for err in workflow_dict['exceptions']['error_codes'])
        traceback = "\nException Traceback\n".join(
            workflow_dict['exceptions']['traceback'])
        error = "{}\n{}\n{}".format(error, traceback, error)
        task_history.update_status_for(
            TaskHistory.STATUS_ERROR, details=error)
    else:
        task_history.update_status_for(
            TaskHistory.STATUS_SUCCESS, details='Database sucessfully recovered!')

    return
Example #19
0
def restore_database(self, database, task, snapshot, user, retry_from=None):
    task = TaskHistory.register(
        request=self.request, task_history=task, user=user,
        worker_name=get_worker_name()
    )

    from backup.models import Snapshot
    snapshot = Snapshot.objects.get(id=snapshot)

    from tasks_restore_backup import restore_snapshot
    restore_snapshot(database, snapshot.group, task, retry_from)
Example #20
0
def database_environment_migrate(
    self, database, new_environment, new_offering, task, hosts_zones,
    since_step=None, step_manager=None
):
    task = TaskHistory.register(
        request=self.request, task_history=task, user=task.user,
        worker_name=get_worker_name()
    )
    from tasks_database_migrate import database_environment_migrate
    database_environment_migrate(
        database, new_environment, new_offering, task, hosts_zones, since_step,
        step_manager=step_manager
    )
Example #21
0
def purge_unused_exports_task(self):
    from notification.tasks import TaskRegister
    task = TaskRegister.purge_unused_exports()

    task = TaskHistory.register(
        request=self.request, worker_name=get_worker_name(), task_history=task
    )

    task.add_detail('Getting all inactive exports without snapshots')
    if purge_unused_exports(task):
        task.set_status_success('Done')
    else:
        task.set_status_error('Error')
Example #22
0
def create_database(
    self, name, plan, environment, team, project, description, task,
    subscribe_to_email_events=True, is_protected=False, user=None,
    retry_from=None
):
    task = TaskHistory.register(
        request=self.request, task_history=task, user=user,
        worker_name=get_worker_name()
    )

    from tasks_create_database import create_database
    create_database(
        name, plan, environment, team, project, description, task,
        subscribe_to_email_events, is_protected, user, retry_from
    )
def analyze_databases(self, task_history=None):
    endpoint, healh_check_route, healh_check_string = get_analyzing_credentials()
    user = User.objects.get(username='******')
    worker_name = get_worker_name()
    task_history = TaskHistory.register(task_history=task_history, request=self.request, user=user,
                                        worker_name=worker_name)
    task_history.update_details(persist=True, details="Loading Process...")
    AuditRequest.new_request("analyze_databases", user, "localhost")

    try:
        analyze_service = AnalyzeService(endpoint, healh_check_route,
                                         healh_check_string)
        with transaction.atomic():
            databases = Database.objects.filter(is_in_quarantine=False)
            today = datetime.now()
            for database in databases:
                database_name, engine, instances, environment_name, databaseinfra_name = setup_database_info(database)
                for execution_plan in ExecutionPlan.objects.all():
                    if database_can_not_be_resized(database, execution_plan):
                        continue
                    params = execution_plan.setup_execution_params()
                    result = analyze_service.run(engine=engine, database=database_name,
                                                 instances=instances, **params)
                    if result['status'] == 'success':
                        task_history.update_details(persist=True, details="\nDatabase {} {} was analised.".format(database, execution_plan.plan_name))
                        if result['msg'] != instances:
                            continue
                        for instance in result['msg']:
                            insert_analyze_repository_record(today, database_name, instance,
                                                             engine, databaseinfra_name,
                                                             environment_name,
                                                             execution_plan)
                    else:
                        raise Exception("Check your service logs..")
        task_history.update_status_for(TaskHistory.STATUS_SUCCESS,
                                       details='Analisys ok!')
    except Exception:
        try:
            task_history.update_details(persist=True,
                                        details="\nDatabase {} {} could not be analised.".format(database,
                                                                                                 execution_plan.plan_name))
            task_history.update_status_for(TaskHistory.STATUS_ERROR,
                                           details='Analisys finished with errors!\nError: {}'.format(result['msg']))
        except UnboundLocalError:
            task_history.update_details(persist=True, details="\nProccess crashed")
            task_history.update_status_for(TaskHistory.STATUS_ERROR, details='Analisys could not be started')
    finally:
        AuditRequest.cleanup_request()
Example #24
0
def remove_database_backup(self, task, snapshot):
    worker_name = get_worker_name()
    task_history = TaskHistory.register(
        request=self.request, worker_name=worker_name, task_history=task
    )

    task_history.add_detail('Removing {}'.format(snapshot))
    try:
        remove_snapshot_backup(snapshot, force=1)
    except Exception as e:
        task_history.add_detail('Error: {}'.format(e))
        task.set_status_error('Could not delete backup')
        return False
    else:
        task.set_status_success('Backup deleted with success')
        return True
Example #25
0
def bind_address_on_database(self, database, acl_environment, acl_vlan, action="permit", user=None):
    if not user:
        user =  self.request.args[-1]

    LOG.info("User: {}, action: {}".format(user, action))

    worker_name = get_worker_name()
    task_history = TaskHistory.register(request=self.request, user=user, worker_name=worker_name)
    LOG.info("id: %s | task: %s | kwargs: %s | args: %s" % (self.request.id, self.request.task, self.request.kwargs, str(self.request.args)))

    task_history.update_details(persist=True, details="Loading Process...")


    try:
        if action == "permit":
            bind_status = models.CREATING
        else:
            bind_status = models.DESTROYING

        LOG.info("Params database: {}, acl_environment: {}, acl_vlan: {}, action: {}, bind_status: {}".format(database, acl_environment,
            acl_vlan, action, bind_status))

        job = tasks.bind_unbind_address_on_database(database= database, acl_environment= acl_environment,
            acl_vlan=acl_vlan, action="permit", bind_status= bind_status)

        if not job:
            raise Exception, "Error when executing the Bind"

        task_history.update_status_for(TaskHistory.STATUS_SUCCESS, details='Bind created successfully')

        if bind_status == models.CREATING:
            bind_status = models.CREATED
        else:
            bind_status = models.ERROR

        LOG.debug("Bind Status: {}".format(bind_status))


        monitor_acl_job.delay(database, job, acl_environment+'/'+acl_vlan, bind_status, user=user)
        return

    except Exception,e:
        LOG.info("DatabaseBind ERROR: {}".format(e))
        task_history.update_status_for(TaskHistory.STATUS_ERROR, details='Bind could not be created')
        return
Example #26
0
def make_database_backup(self, database, task):
    worker_name = get_worker_name()
    task_history = TaskHistory.register(
        request=self.request, worker_name=worker_name, task_history=task
    )

    if not database.pin_task(task):
        task.error_in_lock(database)
        return False

    task_history.add_detail('Starting database {} backup'.format(database))

    instances = _get_backup_instance(database, task)
    if not instances:
        task.set_status_error('Could not find eligible instances', database)
        return False

    _check_snapshot_limit(instances, task)

    group = BackupGroup()
    group.save()

    has_warning = False
    for instance in instances:
        snapshot = _create_database_backup(instance, task, group)

        if not snapshot:
            task.set_status_error(
                'Backup was unsuccessful in {}'.format(instance), database
            )
            return False

        snapshot.is_automatic = False
        snapshot.save()

        if not has_warning:
            has_warning = snapshot.has_warning

    if has_warning:
        task.set_status_warning('Backup was warning', database)
    else:
        task.set_status_success('Backup was successful', database)

    return True
Example #27
0
def destroy_database(self, database, task_history=None, user=None):
    # register History
    AuditRequest.new_request("destroy_database", user, "localhost")
    try:
        worker_name = get_worker_name()
        task_history = TaskHistory.register(request=self.request, task_history=task_history,
            user=user, worker_name= worker_name)

        LOG.info("id: %s | task: %s | kwargs: %s | args: %s" % (
            self.request.id, self.request.task, self.request.kwargs, str(self.request.args)))

        task_history.update_details(persist=True, details="Loading Process...")

        databaseinfra = database.databaseinfra

        destroy_infra(databaseinfra=databaseinfra, task=task_history)

        task_history.update_status_for(TaskHistory.STATUS_SUCCESS, details='Database destroyed successfully')
        return
    finally:
        AuditRequest.cleanup_request()
Example #28
0
def make_databases_backup(self):

    LOG.info("Making databases backups")
    worker_name = get_worker_name()
    task_history = TaskHistory.register(request=self.request,
                                        worker_name=worker_name, user=None)

    msgs = []
    status = TaskHistory.STATUS_SUCCESS
    databaseinfras = DatabaseInfra.objects.filter(
        plan__provider=Plan.CLOUDSTACK)
    error = {}
    for databaseinfra in databaseinfras:
        instances = Instance.objects.filter(databaseinfra=databaseinfra)
        for instance in instances:

            if not instance.databaseinfra.get_driver().check_instance_is_eligible_for_backup(instance):
                LOG.info('Instance %s is not eligible for backup' %
                         (str(instance)))
                continue

            try:
                if make_instance_snapshot_backup(instance=instance,
                                                 error=error):
                    msg = "Backup for %s was successful" % (str(instance))
                    LOG.info(msg)
                else:
                    status = TaskHistory.STATUS_ERROR
                    msg = "Backup for %s was unsuccessful. Error: %s" % (
                        str(instance), error['errormsg'])
                    LOG.error(msg)
                print msg
            except Exception, e:
                status = TaskHistory.STATUS_ERROR
                msg = "Backup for %s was unsuccessful. Error: %s" % (
                    str(instance), str(e))
                LOG.error(msg)

            msgs.append(msg)
Example #29
0
def update_ssl(self, database, task, since_step=None, step_manager=None):
    from maintenance.models import UpdateSsl
    task = TaskHistory.register(request=self.request,
                                task_history=task,
                                user=task.user,
                                worker_name=get_worker_name())
    if step_manager:
        step_manager = step_manager
        step_manager.id = None
        step_manager.started_at = None
        since_step = step_manager.current_step
    else:
        retry_from = UpdateSsl.objects.filter(can_do_retry=True,
                                              database=database,
                                              status=UpdateSsl.ERROR).last()
        step_manager = UpdateSsl()
        if retry_from:
            step_manager.current_step = retry_from.current_step
            since_step = retry_from.current_step
    step_manager.database = database
    step_manager.task = task
    step_manager.save()

    steps = database.databaseinfra.update_ssl_steps()
    instances = database.infra.get_driver().get_database_instances()
    result = steps_for_instances(steps,
                                 instances,
                                 task,
                                 step_manager.update_step,
                                 since_step,
                                 step_manager=step_manager)
    step_manager = UpdateSsl.objects.get(id=step_manager.id)
    if result:
        step_manager.set_success()
        task.set_status_success('SSL Update with success')
    else:
        step_manager.set_error()
        task.set_status_error('Could not update SSL')
Example #30
0
def databaseinfra_notification(self, user=None):
    worker_name = get_worker_name()
    task_history = TaskHistory.register(
        request=self.request, user=user, worker_name=worker_name)
    threshold_infra_notification = Configuration.get_by_name_as_int(
        "threshold_infra_notification", default=0)
    if threshold_infra_notification <= 0:
        LOG.warning("database infra notification is disabled")
        return

    # Sum capacity per databseinfra with parameter plan, environment and engine
    infras = DatabaseInfra.objects.values('plan__name', 'environment__name', 'engine__engine_type__name',
                                          'plan__provider').annotate(capacity=Sum('capacity'))
    for infra in infras:
        # total database created in databaseinfra per plan, environment and
        # engine
        used = DatabaseInfra.objects.filter(plan__name=infra['plan__name'],
                                            environment__name=infra[
                                                'environment__name'],
                                            engine__engine_type__name=infra['engine__engine_type__name']).aggregate(
            used=Count('databases'))
        # calculate the percentage
        percent = int(used['used'] * 100 / infra['capacity'])
        if percent >= threshold_infra_notification and infra['plan__provider'] != Plan.CLOUDSTACK:
            LOG.info('Plan %s in environment %s with %s%% occupied' % (
                infra['plan__name'], infra['environment__name'], percent))
            LOG.info("Sending database infra notification...")
            context = {}
            context['plan'] = infra['plan__name']
            context['environment'] = infra['environment__name']
            context['used'] = used['used']
            context['capacity'] = infra['capacity']
            context['percent'] = percent
            email_notifications.databaseinfra_ending(context=context)

        task_history.update_status_for(TaskHistory.STATUS_SUCCESS,
                                       details='Databaseinfra Notification successfully sent to dbaas admins!')
    return
Example #31
0
def recreate_slave(self, host, task, since_step=None, step_manager=None):
    from maintenance.models import RecreateSlave
    task = TaskHistory.register(request=self.request,
                                task_history=task,
                                user=task.user,
                                worker_name=get_worker_name())
    instance = host.instances.first()
    if step_manager:
        step_manager = step_manager
        step_manager.id = None
        step_manager.started_at = None
        since_step = step_manager.current_step
    else:
        retry_from = RecreateSlave.objects.filter(
            can_do_retry=True, host=host, status=RecreateSlave.ERROR).last()
        step_manager = RecreateSlave()
        if retry_from:
            step_manager.current_step = retry_from.current_step
            step_manager.snapshot = retry_from.snapshot
            since_step = retry_from.current_step
    step_manager.host = instance.hostname
    step_manager.task = task
    step_manager.save()

    steps = host.instances.first().databaseinfra.recreate_slave_steps()
    result = steps_for_instances(steps, [instance],
                                 task,
                                 step_manager.update_step,
                                 since_step,
                                 step_manager=step_manager)
    step_manager = RecreateSlave.objects.get(id=step_manager.id)
    if result:
        step_manager.set_success()
        task.set_status_success('Slave recreated with success')
    else:
        step_manager.set_error()
        task.set_status_error('Could not recreate slave')
def check_ssl_expire_at(self):
    LOG.info("Retrieving all SSL MySQL databases")
    worker_name = get_worker_name()
    task = TaskHistory.register(
        request=self.request, user=None, worker_name=worker_name)
    task.relevance = TaskHistory.RELEVANCE_CRITICAL

    one_month_later = date.today() + timedelta(days=30)
    try:
        infras = DatabaseInfra.objects.filter(
            ssl_configured=True,
            engine__engine_type__name='mysql',
            instances__hostname__ssl_expire_at__lte=one_month_later
        ).distinct()
        for infra in infras:
            database = infra.databases.first()
            task.update_details(
                "Checking database {}...".format(database), persist=True
            )
            scheudled_tasks = TaskSchedule.objects.filter(
                scheduled_for__lte=one_month_later,
                status=TaskSchedule.SCHEDULED,
                database=database
            )
            if scheudled_tasks:
                task.update_details("Already scheduled!\n", persist=True)
            else:
                TaskSchedule.objects.create(
                    method_path='ddd',
                    scheduled_for=one_month_later,
                    database=database
                )
                task.update_details("Schedule created!\n", persist=True)
        task.update_status_for(TaskHistory.STATUS_SUCCESS, details="\nDone")
    except Exception as err:
        task.update_status_for(TaskHistory.STATUS_ERROR, details=err)
        return
Example #33
0
def create_database(self,
                    name,
                    plan,
                    environment,
                    team,
                    project,
                    description,
                    task,
                    backup_hour,
                    maintenance_window,
                    maintenance_day,
                    subscribe_to_email_events=True,
                    is_protected=False,
                    user=None,
                    retry_from=None):
    task = TaskHistory.register(request=self.request,
                                task_history=task,
                                user=user,
                                worker_name=get_worker_name())

    from tasks_create_database import create_database
    create_database(name, plan, environment, team, project, description, task,
                    backup_hour, maintenance_window, maintenance_day,
                    subscribe_to_email_events, is_protected, user, retry_from)
Example #34
0
def make_databases_backup(self):

    LOG.info("Making databases backups")
    worker_name = get_worker_name()
    task_history = TaskHistory.register(request=self.request,
                                        worker_name=worker_name, user=None)

    status = TaskHistory.STATUS_SUCCESS
    databaseinfras = DatabaseInfra.objects.filter(
        plan__provider=Plan.CLOUDSTACK, plan__has_persistence=True
    )
    error = {}
    backup_number = 0
    backups_per_group = len(databaseinfras) / 12
    for databaseinfra in databaseinfras:
        if backups_per_group > 0:
            if backup_number < backups_per_group:
                backup_number += 1
            else:
                backup_number = 0
                waiting_msg = "\nWaiting 5 minutes to start the next backup group"
                task_history.update_details(persist=True, details=waiting_msg)
                time.sleep(300)

        instances = Instance.objects.filter(databaseinfra=databaseinfra)
        for instance in instances:
            try:
                if not instance.databaseinfra.get_driver().check_instance_is_eligible_for_backup(instance):
                    LOG.info('Instance %s is not eligible for backup' % (str(instance)))
                    continue
            except Exception as e:
                status = TaskHistory.STATUS_ERROR
                msg = "Backup for %s was unsuccessful. Error: %s" % (
                    str(instance), str(e))
                LOG.error(msg)
            else:
                time_now = str(time.strftime("%m/%d/%Y %H:%M:%S"))
                start_msg = "\n{} - Starting backup for {} ...".format(time_now, instance)
                task_history.update_details(persist=True, details=start_msg)
                try:
                    snapshot = make_instance_snapshot_backup(
                        instance=instance, error=error
                    )
                    if snapshot and snapshot.was_successful:
                        msg = "Backup for %s was successful" % (str(instance))
                        LOG.info(msg)
                    elif snapshot and snapshot.has_warning:
                        status = TaskHistory.STATUS_WARNING
                        msg = "Backup for %s has warning" % (str(instance))
                        LOG.info(msg)
                    else:
                        status = TaskHistory.STATUS_ERROR
                        msg = "Backup for %s was unsuccessful. Error: %s" % (
                            str(instance), error['errormsg'])
                        LOG.error(msg)
                    LOG.info(msg)
                except Exception as e:
                    status = TaskHistory.STATUS_ERROR
                    msg = "Backup for %s was unsuccessful. Error: %s" % (
                        str(instance), str(e))
                    LOG.error(msg)

            time_now = str(time.strftime("%m/%d/%Y %H:%M:%S"))
            msg = "\n{} - {}".format(time_now, msg)
            task_history.update_details(persist=True, details=msg)

    task_history.update_status_for(status, details="\nBackup finished")

    return
Example #35
0
def execute_scheduled_maintenance(self, maintenance_id):
    LOG.debug("Maintenance id: {}".format(maintenance_id))
    maintenance = models.Maintenance.objects.get(id=maintenance_id)

    models.Maintenance.objects.filter(id=maintenance_id, ).update(
        status=maintenance.RUNNING, started_at=datetime.now())
    LOG.info("Maintenance {} is RUNNING".format(maintenance, ))

    worker_name = get_worker_name()
    task_history = TaskHistory.register(request=self.request,
                                        worker_name=worker_name)

    LOG.info("id: %s | task: %s | kwargs: %s | args: %s" %
             (self.request.id, self.request.task, self.request.kwargs,
              str(self.request.args)))

    task_history.update_details(
        persist=True, details="Executing Maintenance: {}".format(maintenance))

    for hm in models.HostMaintenance.objects.filter(maintenance=maintenance):
        main_output = {}
        hm.status = hm.RUNNING
        hm.started_at = datetime.now()
        hm.save()

        if hm.host is None:
            hm.status = hm.UNAVAILABLEHOST
            hm.finished_at = datetime.now()
            hm.save()
            continue

        host = hm.host
        update_task = "\nRunning Maintenance on {}".format(host)

        param_dict = {}
        for param in models.MaintenanceParameters.objects.filter(
                maintenance=maintenance):
            param_function = _get_function(param.function_name)
            param_dict[param.parameter_name] = param_function(host.id)

        main_script = build_context_script(param_dict, maintenance.main_script)
        exit_status = exec_remote_command_host(host, main_script, main_output)

        if exit_status == 0:
            hm.status = hm.SUCCESS
        else:

            if maintenance.rollback_script:
                rollback_output = {}
                hm.status = hm.ROLLBACK
                hm.save()

                rollback_script = build_context_script(
                    param_dict, maintenance.rollback_script)
                exit_status = exec_remote_command_host(host, rollback_script,
                                                       rollback_output)

                if exit_status == 0:
                    hm.status = hm.ROLLBACK_SUCCESS
                else:
                    hm.status = hm.ROLLBACK_ERROR

                hm.rollback_log = get_dict_lines(rollback_output)

            else:
                hm.status = hm.ERROR

        update_task += "...status: {}".format(hm.status)

        task_history.update_details(persist=True, details=update_task)

        hm.main_log = get_dict_lines(main_output)
        hm.finished_at = datetime.now()
        hm.save()

    models.Maintenance.objects.filter(id=maintenance_id, ).update(
        status=maintenance.FINISHED, finished_at=datetime.now())

    task_history.update_status_for(TaskHistory.STATUS_SUCCESS,
                                   details='Maintenance executed succesfully')

    LOG.info("Maintenance: {} has FINISHED".format(maintenance, ))
Example #36
0
def analyze_databases(self, task_history=None):
    (endpoint, healh_check_route,
     healh_check_string) = get_analyzing_credentials()
    user = User.objects.get(username='******')
    worker_name = get_worker_name()
    task_history = TaskHistory.register(task_history=task_history,
                                        request=self.request,
                                        user=user,
                                        worker_name=worker_name)
    task_history.update_details(persist=True, details="Loading Process...")
    AuditRequest.new_request("analyze_databases", user, "localhost")

    try:
        analyze_service = AnalyzeService(endpoint, healh_check_route,
                                         healh_check_string)
        with transaction.atomic():
            databases = Database.objects.filter(is_in_quarantine=False)
            today = datetime.now()
            for database in databases:
                (database_name, engine, instances, environment_name,
                 databaseinfra_name) = setup_database_info(database)
                for execution_plan in ExecutionPlan.objects.all():
                    if not database_can_be_resized(database, execution_plan):
                        continue

                    params = execution_plan.setup_execution_params()
                    result = {
                        'msg': 'Could not analyse {}'.format(database_name)
                    }

                    try:
                        result = analyze_service.run(engine=engine,
                                                     database=database_name,
                                                     instances=instances,
                                                     **params)
                        if result['status'] == 'success':
                            task_history.update_details(
                                persist=True,
                                details=("\nDatabase {} {} was "
                                         "analysed.").format(
                                             database,
                                             execution_plan.plan_name))

                            if result['msg'] != instances:
                                continue

                            for instance in result['msg']:
                                insert_analyze_repository_record(
                                    today, database_name, instance, engine,
                                    databaseinfra_name, environment_name,
                                    execution_plan)
                        else:
                            raise Exception("Check your service logs..")
                    except Exception:
                        task_history.update_details(
                            persist=True,
                            details=("\nDatabase {} {} could not be "
                                     "analysed.").format(
                                         database, execution_plan.plan_name))
                        task_history.update_status_for(
                            TaskHistory.STATUS_ERROR,
                            details='Analysis finished with errors!'
                            '\nError: {}'.format(result['msg']))

        task_history.update_status_for(TaskHistory.STATUS_SUCCESS,
                                       details='Analysis ok!')
    except Exception:
        task_history.update_details(persist=True, details="\nProcess crashed")
        task_history.update_status_for(TaskHistory.STATUS_ERROR,
                                       details='Analysis could not be started')
    finally:
        AuditRequest.cleanup_request()
Example #37
0
def upgrade_mongodb_24_to_30(self, database, user, task_history=None):

    from workflow.settings import MONGODB_UPGRADE_24_TO_30_SINGLE
    from workflow.settings import MONGODB_UPGRADE_24_TO_30_HA
    from util import build_dict
    from workflow.workflow import start_workflow

    worker_name = get_worker_name()
    task_history = TaskHistory.register(request=self.request,
                                        task_history=task_history,
                                        user=user,
                                        worker_name=worker_name)

    databaseinfra = database.databaseinfra
    driver = databaseinfra.get_driver()

    instances = driver.get_database_instances()
    source_plan = databaseinfra.plan
    target_plan = source_plan.engine_equivalent_plan

    source_engine = databaseinfra.engine
    target_engine = source_engine.engine_upgrade_option

    if source_plan.is_ha:
        steps = MONGODB_UPGRADE_24_TO_30_HA
    else:
        steps = MONGODB_UPGRADE_24_TO_30_SINGLE

    stop_now = False

    if not target_plan:
        msg = "There is not Engine Equivalent Plan!"
        stop_now = True

    if not target_engine:
        msg = "There is not Engine Upgrade Option!"
        stop_now = True

    if database.status != Database.ALIVE or not database.database_status.is_alive:
        msg = "Database is not alive!"
        stop_now = True

    if database.is_beeing_used_elsewhere(task_id=self.request.id):
        msg = "Database is in use by another task!"
        stop_now = True

    if not source_engine.version.startswith('2.4.'):
        msg = "Database version must be 2.4!"
        stop_now = True

    if target_engine and target_engine.version != '3.0.12':
        msg = "Target database version must be 3.0.12!"
        stop_now = True

    if stop_now:
        task_history.update_status_for(TaskHistory.STATUS_ERROR, details=msg)
        LOG.info("Upgrade finished")
        return

    try:
        delete_zabbix_alarms(database)
    except Exception as e:
        message = "Could not delete Zabbix alarms: {}".format(e)
        task_history.update_status_for(TaskHistory.STATUS_ERROR,
                                       details=message)
        LOG.error(message)
        return

    try:
        workflow_dict = build_dict(steps=steps,
                                   databaseinfra=databaseinfra,
                                   instances=instances,
                                   source_plan=source_plan,
                                   target_plan=target_plan,
                                   source_engine=source_engine,
                                   target_engine=target_engine)

        start_workflow(workflow_dict=workflow_dict, task=task_history)

        if workflow_dict['exceptions']['traceback']:
            error = "\n".join(
                ": ".join(err)
                for err in workflow_dict['exceptions']['error_codes'])
            traceback = "\nException Traceback\n".join(
                workflow_dict['exceptions']['traceback'])
            error = "{}\n{}\n{}".format(error, traceback, error)
            task_history.update_status_for(TaskHistory.STATUS_ERROR,
                                           details=error)
            LOG.info("MongoDB Upgrade finished with errors")
            return

        task_history.update_status_for(TaskHistory.STATUS_SUCCESS,
                                       details='MongoDB sucessfully upgraded!')

        LOG.info("MongoDB Upgrade finished")
    except Exception as e:
        task_history.update_status_for(TaskHistory.STATUS_ERROR, details=e)
        LOG.warning("MongoDB Upgrade finished with errors")

    try:
        create_zabbix_alarms(database)
    except Exception as e:
        message = "Could not create Zabbix alarms: {}".format(e)
        task_history.update_status_for(TaskHistory.STATUS_ERROR,
                                       details=message)
        LOG.error(message)
Example #38
0
def create_database(self,
                    name,
                    plan,
                    environment,
                    team,
                    project,
                    description,
                    subscribe_to_email_events=True,
                    task_history=None,
                    user=None,
                    is_protected=False):
    AuditRequest.new_request("create_database", user, "localhost")
    try:

        worker_name = get_worker_name()
        task_history = TaskHistory.register(request=self.request,
                                            task_history=task_history,
                                            user=user,
                                            worker_name=worker_name)

        LOG.info("id: %s | task: %s | kwargs: %s | args: %s" %
                 (self.request.id, self.request.task, self.request.kwargs,
                  str(self.request.args)))

        task_history.update_details(persist=True, details="Loading Process...")

        result = make_infra(
            plan=plan,
            environment=environment,
            name=name,
            team=team,
            project=project,
            description=description,
            subscribe_to_email_events=subscribe_to_email_events,
            task=task_history,
            is_protected=is_protected)

        if result['created'] is False:
            if 'exceptions' in result:
                error = "\n".join(
                    ": ".join(err)
                    for err in result['exceptions']['error_codes'])
                traceback = "\nException Traceback\n".join(
                    result['exceptions']['traceback'])
                error = "{}\n{}\n{}".format(error, traceback, error)
            else:
                error = "There is not any infra-structure to allocate this database."

            task_history.update_status_for(TaskHistory.STATUS_ERROR,
                                           details=error)
            return

        task_history.update_dbid(db=result['database'])
        task_history.update_status_for(TaskHistory.STATUS_SUCCESS,
                                       details='Database created successfully')

        return

    except Exception as e:
        traceback = full_stack()
        LOG.error("Ops... something went wrong: %s" % e)
        LOG.error(traceback)

        if 'result' in locals() and result['created']:
            destroy_infra(databaseinfra=result['databaseinfra'],
                          task=task_history)

        task_history.update_status_for(TaskHistory.STATUS_ERROR,
                                       details=traceback)
        return

    finally:
        AuditRequest.cleanup_request()
Example #39
0
def execute_database_region_migration(self,
                                      database_region_migration_detail_id,
                                      task_history=None,
                                      user=None):
    #AuditRequest.new_request("execute_database_region_migration", user, "localhost")
    try:

        if task_history:
            arguments = task_history.arguments
        else:
            arguments = None

        task_history = TaskHistory.register(request=self.request,
                                            task_history=task_history,
                                            user=user,
                                            worker_name=get_worker_name())

        if arguments:
            task_history.arguments = arguments
            task_history.save()

        database_region_migration_detail = DatabaseRegionMigrationDetail.objects.get(
            id=database_region_migration_detail_id)
        database_region_migration = database_region_migration_detail.database_region_migration
        database = database_region_migration.database
        databaseinfra = database.databaseinfra
        source_environment = databaseinfra.environment
        target_environment = source_environment.equivalent_environment
        engine = database.engine_type
        steps = get_engine_steps(engine)
        workflow_steps = steps[
            database_region_migration_detail.step].step_classes
        source_instances = []
        source_hosts = []
        for instance in Instance.objects.filter(databaseinfra=databaseinfra):
            source_instances.append(instance)
            if instance.instance_type != instance.REDIS_SENTINEL:
                source_hosts.append(instance.hostname)

        source_plan = databaseinfra.plan
        target_plan = source_plan.equivalent_plan_id

        workflow_dict = build_dict(
            #database_region_migration_detail = database_region_migration_detail,
            #database_region_migration = database_region_migration,
            #database = database,
            databaseinfra=databaseinfra,
            #source_environment = source_environment,
            target_environment=target_environment,
            steps=workflow_steps,
            #engine = engine,
            source_instances=source_instances,
            source_hosts=source_hosts,
            #source_plan = source_plan,
            target_plan=target_plan,
        )

        start_workflow(workflow_dict=workflow_dict, task=task_history)

        if workflow_dict['created'] == False:

            if 'exceptions' in workflow_dict:
                error = "\n".join(
                    ": ".join(err)
                    for err in workflow_dict['exceptions']['error_codes'])
                traceback = "\nException Traceback\n".join(
                    workflow_dict['exceptions']['traceback'])
                error = "{}\n{}\n{}".format(error, traceback, error)
            else:
                error = "There is not any infra-structure to allocate this database."

            task_history.update_status_for(TaskHistory.STATUS_ERROR,
                                           details=error)

            return

        else:

            task_history.update_status_for(
                TaskHistory.STATUS_SUCCESS,
                details='Database region migration was succesfully')
            return

    except Exception, e:
        traceback = full_stack()
        LOG.error("Ops... something went wrong: %s" % e)
        LOG.error(traceback)

        task_history.update_status_for(TaskHistory.STATUS_ERROR,
                                       details=traceback)
        return
Example #40
0
def update_ssl(self,
               database,
               task,
               since_step=None,
               step_manager=None,
               scheduled_task=None,
               auto_rollback=False):
    from maintenance.models import UpdateSsl
    task = TaskHistory.register(request=self.request,
                                task_history=task,
                                user=task.user,
                                worker_name=get_worker_name())
    if step_manager:
        step_manager = step_manager
        step_manager.id = None
        step_manager.started_at = None
        since_step = step_manager.current_step
    else:
        retry_from = UpdateSsl.objects.filter(can_do_retry=True,
                                              database=database,
                                              status=UpdateSsl.ERROR).last()
        step_manager = UpdateSsl()
        if retry_from:
            step_manager.current_step = retry_from.current_step
            since_step = retry_from.current_step
            step_manager.task_schedule = retry_from.task_schedule
    step_manager.database = database
    step_manager.task = task
    if scheduled_task:
        step_manager.task_schedule = scheduled_task
    step_manager.set_running()
    step_manager.save()

    steps = database.databaseinfra.update_ssl_steps()

    hosts = []
    for instance in database.infra.instances.all():
        if instance.hostname not in hosts:
            hosts.append(instance.hostname)
    instances = []
    for host in hosts:
        instances.append(host.instances.all()[0])

    result = steps_for_instances(steps,
                                 instances,
                                 task,
                                 step_manager.update_step,
                                 since_step,
                                 step_manager=step_manager)
    step_manager = UpdateSsl.objects.get(id=step_manager.id)
    if result:
        step_manager.set_success()
        task.set_status_success('SSL Update with success')
    else:
        step_manager.set_error()
        task.set_status_error('Could not update SSL')
        if auto_rollback:
            from workflow.workflow import rollback_for_instances_full
            new_task = task
            new_task.id = None
            new_task.details = ''
            new_task.task_name += '_rollback'
            new_task.task_status = new_task.STATUS_RUNNING
            new_task.save()
            rollback_step_manager = step_manager
            rollback_step_manager.id = None
            rollback_step_manager.task_schedule = None
            rollback_step_manager.can_do_retry = 0
            rollback_step_manager.save()
            result = rollback_for_instances_full(
                steps,
                instances,
                new_task,
                rollback_step_manager.get_current_step,
                rollback_step_manager.update_step,
            )
            if result:
                rollback_step_manager.set_success()
                task.set_status_success('Rollback SSL Update with success')
            else:
                if hasattr(rollback_step_manager, 'cleanup'):
                    rollback_step_manager.cleanup(instances)
                rollback_step_manager.set_error()
                task.set_status_error('Could not rollback update SSL')
Example #41
0
def make_databases_backup(self):
    LOG.info("Making databases backups")
    worker_name = get_worker_name()
    task_history = TaskHistory.register(
        request=self.request, worker_name=worker_name, user=None
    )
    task_history.relevance = TaskHistory.RELEVANCE_ERROR

    backup_group_interval = Configuration.get_by_name_as_int(
        'backup_group_interval', default=1
    )
    waiting_msg = "\nWaiting {} minute(s) to start the next backup group".format(
        backup_group_interval
    )
    status = TaskHistory.STATUS_SUCCESS
    environments = Environment.objects.all()
    prod_envs = Environment.prod_envs()
    dev_envs = Environment.dev_envs()
    env_names_order = list(prod_envs) + list(dev_envs)
    if not env_names_order:
        env_names_order = [env.name for env in environments]

    current_time = datetime.now()
    current_hour = current_time.hour

    # Get all infras with a backup today until the current hour
    infras_with_backup_today = DatabaseInfra.objects.filter(
        instances__backup_instance__status=Snapshot.SUCCESS,
        backup_hour__lt=current_hour,
        plan__has_persistence=True,
        instances__backup_instance__end_at__year=current_time.year,
        instances__backup_instance__end_at__month=current_time.month,
        instances__backup_instance__end_at__day=current_time.day).distinct()

    # Get all infras with pending backups based on infras_with_backup_today
    infras_pending_backup = DatabaseInfra.objects.filter(
        backup_hour__lt=current_hour,
        plan__has_persistence=True,
    ).exclude(pk__in=[infra.pk for infra in infras_with_backup_today])

    # Get all infras to backup on the current hour
    infras_current_hour = DatabaseInfra.objects.filter(
        plan__has_persistence=True,
        backup_hour=current_time.hour
    )

    # Merging pending and current infras to backup list
    infras = infras_current_hour | infras_pending_backup

    for env_name in env_names_order:
        try:
            env = environments.get(name=env_name)
        except Environment.DoesNotExist:
            continue

        msg = '\nStarting Backup for env {}'.format(env.name)
        task_history.update_details(persist=True, details=msg)
        databaseinfras_by_env = infras.filter(environment=env)
        error = {}
        backup_number = 0
        backups_per_group = len(infras) / 12
        for infra in databaseinfras_by_env:
            if not infra.databases.first():
                continue

            if backups_per_group > 0:
                if backup_number < backups_per_group:
                    backup_number += 1
                else:
                    backup_number = 0
                    task_history.update_details(waiting_msg, True)
                    sleep(backup_group_interval*60)

            group = BackupGroup()
            group.save()

            instances_backup = infra.instances.filter(
                read_only=False, is_active=True
            )
            for instance in instances_backup:
                try:
                    driver = instance.databaseinfra.get_driver()
                    is_eligible = driver.check_instance_is_eligible_for_backup(
                        instance
                    )
                    if not is_eligible:
                        LOG.info(
                            'Instance {} is not eligible for backup'.format(
                                instance
                            )
                        )
                        continue
                except Exception as e:
                    status = TaskHistory.STATUS_ERROR
                    msg = "Backup for %s was unsuccessful. Error: %s" % (
                        str(instance), str(e))
                    LOG.error(msg)

                time_now = str(strftime("%m/%d/%Y %H:%M:%S"))
                start_msg = "\n{} - Starting backup for {} ...".format(
                    time_now, instance
                )
                task_history.update_details(persist=True, details=start_msg)
                try:
                    snapshot = make_instance_snapshot_backup(
                        instance=instance, error=error, group=group,
                        current_hour=current_hour
                    )
                    if snapshot and snapshot.was_successful:
                        msg = "Backup for %s was successful" % (str(instance))
                        LOG.info(msg)
                    elif snapshot and snapshot.was_error:
                        status = TaskHistory.STATUS_ERROR
                        msg = "Backup for %s was unsuccessful. Error: %s" % (
                            str(instance), error['errormsg'])
                        LOG.error(msg)
                    else:
                        status = TaskHistory.STATUS_WARNING
                        msg = "Backup for %s has warning" % (str(instance))
                        LOG.info(msg)
                    LOG.info(msg)
                except Exception as e:
                    status = TaskHistory.STATUS_ERROR
                    msg = "Backup for %s was unsuccessful. Error: %s" % (
                        str(instance), str(e))
                    LOG.error(msg)

                time_now = str(strftime("%m/%d/%Y %H:%M:%S"))
                msg = "\n{} - {}".format(time_now, msg)
                task_history.update_details(persist=True, details=msg)

    task_history.update_status_for(status, details="\nBackup finished")

    return
def make_databases_backup(self):

    LOG.info("Making databases backups")
    worker_name = get_worker_name()
    task_history = TaskHistory.register(request=self.request,
                                        worker_name=worker_name, user=None)

    status = TaskHistory.STATUS_SUCCESS
    envs = Environment.objects.all()
    # TODO: back here to do right
    env_names_order = ['prod', 'qa2', 'dev-cta-nao-usar', 'dev']
    databaseinfras = DatabaseInfra.objects.filter(
        plan__provider=Plan.CLOUDSTACK, plan__has_persistence=True
    )

    for env_name in env_names_order:
        try:
            env = envs.get(name=env_name)
        except Environment.DoesNotExist:
            continue
        msg = 'Starting Backup for env {}'.format(env.name)
        task_history.update_details(persist=True, details=msg)
        databaseinfras_by_env = databaseinfras.filter(environment=env)
        error = {}
        backup_number = 0
        backups_per_group = len(databaseinfras) / 12
        for databaseinfra in databaseinfras_by_env:
            if backups_per_group > 0:
                if backup_number < backups_per_group:
                    backup_number += 1
                else:
                    backup_number = 0
                    waiting_msg = "\nWaiting 5 minutes to start the next backup group"
                    task_history.update_details(persist=True, details=waiting_msg)
                    time.sleep(300)

            instances = Instance.objects.filter(
                databaseinfra=databaseinfra, read_only=False
            )

            group = BackupGroup()
            group.save()

            for instance in instances:
                try:
                    if not instance.databaseinfra.get_driver().check_instance_is_eligible_for_backup(instance):
                        LOG.info('Instance %s is not eligible for backup' % (str(instance)))
                        continue
                except Exception as e:
                    status = TaskHistory.STATUS_ERROR
                    msg = "Backup for %s was unsuccessful. Error: %s" % (
                        str(instance), str(e))
                    LOG.error(msg)

                time_now = str(time.strftime("%m/%d/%Y %H:%M:%S"))
                start_msg = "\n{} - Starting backup for {} ...".format(time_now, instance)
                task_history.update_details(persist=True, details=start_msg)
                try:
                    snapshot = make_instance_snapshot_backup(
                        instance=instance, error=error, group=group
                    )
                    if snapshot and snapshot.was_successful:
                        msg = "Backup for %s was successful" % (str(instance))
                        LOG.info(msg)
                    elif snapshot and snapshot.has_warning:
                        status = TaskHistory.STATUS_WARNING
                        msg = "Backup for %s has warning" % (str(instance))
                        LOG.info(msg)
                    else:
                        status = TaskHistory.STATUS_ERROR
                        msg = "Backup for %s was unsuccessful. Error: %s" % (
                            str(instance), error['errormsg'])
                        LOG.error(msg)
                    LOG.info(msg)
                except Exception as e:
                    status = TaskHistory.STATUS_ERROR
                    msg = "Backup for %s was unsuccessful. Error: %s" % (
                        str(instance), str(e))
                    LOG.error(msg)

                time_now = str(time.strftime("%m/%d/%Y %H:%M:%S"))
                msg = "\n{} - {}".format(time_now, msg)
                task_history.update_details(persist=True, details=msg)

    task_history.update_status_for(status, details="\nBackup finished")

    return
Example #43
0
def execute_scheduled_maintenance(self, maintenance_id):
    LOG.debug("Maintenance id: {}".format(maintenance_id))
    maintenance = models.Maintenance.objects.get(id=maintenance_id)

    models.Maintenance.objects.filter(id=maintenance_id, ).update(
        status=maintenance.RUNNING, started_at=datetime.now())
    LOG.info("Maintenance {} is RUNNING".format(maintenance, ))

    worker_name = get_worker_name()
    task_history = TaskHistory.register(request=self.request,
                                        worker_name=worker_name)

    LOG.info("id: %s | task: %s | kwargs: %s | args: %s" %
             (self.request.id, self.request.task, self.request.kwargs,
              str(self.request.args)))

    task_history.update_details(
        persist=True, details="Executing Maintenance: {}".format(maintenance))

    for hm in models.HostMaintenance.objects.filter(maintenance=maintenance):
        main_output = {}
        hm.status = hm.RUNNING
        hm.started_at = datetime.now()
        hm.save()

        if hm.host is None:
            hm.status = hm.UNAVAILABLEHOST
            hm.finished_at = datetime.now()
            hm.save()
            continue

        host = hm.host
        update_task = "\nRunning Maintenance on {}".format(host)

        try:
            cloudstack_host_attributes = host.cs_host_attributes.get()
        except ObjectDoesNotExist, e:
            LOG.warn("Host {} does not have cloudstack attrs...{}".format(
                hm.host, e))
            hm.status = hm.UNAVAILABLECSHOSTATTR
            hm.finished_at = datetime.now()
            hm.save()
            continue

        param_dict = {}
        for param in models.MaintenanceParameters.objects.filter(
                maintenance=maintenance):
            param_function = _get_function(param.function_name)
            param_dict[param.parameter_name] = param_function(host.id)

        main_script = build_context_script(param_dict, maintenance.main_script)
        exit_status = exec_remote_command(
            server=host.address,
            username=cloudstack_host_attributes.vm_user,
            password=cloudstack_host_attributes.vm_password,
            command=main_script,
            output=main_output)

        if exit_status == 0:
            hm.status = hm.SUCCESS
        else:

            if maintenance.rollback_script:
                rollback_output = {}
                hm.status = hm.ROLLBACK
                hm.save()

                rollback_script = build_context_script(
                    param_dict, maintenance.rollback_script)
                exit_status = exec_remote_command(
                    server=host.address,
                    username=cloudstack_host_attributes.vm_user,
                    password=cloudstack_host_attributes.vm_password,
                    command=rollback_script,
                    output=rollback_output)

                if exit_status == 0:
                    hm.status = hm.ROLLBACK_SUCCESS
                else:
                    hm.status = hm.ROLLBACK_ERROR

                hm.rollback_log = get_dict_lines(rollback_output)

            else:
                hm.status = hm.ERROR

        update_task += "...status: {}".format(hm.status)

        task_history.update_details(persist=True, details=update_task)

        hm.main_log = get_dict_lines(main_output)
        hm.finished_at = datetime.now()
        hm.save()
Example #44
0
def execute_scheduled_maintenance(self, maintenance_id):
    LOG.debug("Maintenance id: {}".format(maintenance_id))
    maintenance = models.Maintenance.objects.get(id=maintenance_id)
    models.Maintenance.objects.filter(id=maintenance_id).update(
        status=maintenance.RUNNING, started_at=datetime.now()
    )
    LOG.info("Maintenance {} is RUNNING".format(maintenance,))

    worker_name = get_worker_name()
    task_history = TaskHistory.register(
        request=self.request, worker_name=worker_name
    )
    task_history.relevance = TaskHistory.RELEVANCE_CRITICAL
    LOG.info("id: {} | task: {} | kwargs: {} | args: {}".format(
        self.request.id, self.request.task,
        self.request.kwargs, str(self.request.args)
    ))
    task_history.update_details(
        persist=True, details="Executing Maintenance: {}".format(maintenance)
    )
    for hm in models.HostMaintenance.objects.filter(maintenance=maintenance):
        main_output = {}
        hm.status = hm.RUNNING
        hm.started_at = datetime.now()
        hm.save()
        if hm.host is None:
            hm.status = hm.UNAVAILABLEHOST
            hm.finished_at = datetime.now()
            hm.save()
            continue

        host = hm.host
        update_task = "\nRunning Maintenance on {}".format(host)

        if maintenance.disable_alarms:
            disable_alarms(hm.host)

        param_dict = {}
        params = models.MaintenanceParameters.objects.filter(
            maintenance=maintenance
        )
        for param in params:
            param_function = get_function(param.function_name)
            param_dict[param.parameter_name] = param_function(host.id)

        main_script = build_context_script(param_dict, maintenance.main_script)
        exit_status = exec_remote_command_host(host, main_script, main_output)

        if exit_status == 0:
            hm.status = hm.SUCCESS
        else:

            if maintenance.rollback_script:
                rollback_output = {}
                hm.status = hm.ROLLBACK
                hm.save()

                rollback_script = build_context_script(
                    param_dict, maintenance.rollback_script
                )
                exit_status = exec_remote_command_host(
                    host, rollback_script, rollback_output
                )

                if exit_status == 0:
                    hm.status = hm.ROLLBACK_SUCCESS
                else:
                    hm.status = hm.ROLLBACK_ERROR

                hm.rollback_log = get_dict_lines(rollback_output)

            else:
                hm.status = hm.ERROR

        if maintenance.disable_alarms:
            enable_alarms(hm.host)

        update_task += "...status: {}".format(hm.status)

        task_history.update_details(persist=True, details=update_task)

        hm.main_log = get_dict_lines(main_output)
        hm.finished_at = datetime.now()
        hm.save()

    models.Maintenance.objects.filter(id=maintenance_id).update(
        status=maintenance.FINISHED, finished_at=datetime.now()
    )
    task_history.update_status_for(
        TaskHistory.STATUS_SUCCESS, details='Maintenance executed succesfully'
    )
    LOG.info("Maintenance: {} has FINISHED".format(maintenance))
Example #45
0
def execute_scheduled_maintenance(self,maintenance_id):
    LOG.debug("Maintenance id: {}".format(maintenance_id))
    maintenance = models.Maintenance.objects.get(id=maintenance_id)

    models.Maintenance.objects.filter(id=maintenance_id,
        ).update(status=maintenance.RUNNING, started_at=datetime.now())
    LOG.info("Maintenance {} is RUNNING".format(maintenance,))

    worker_name = get_worker_name()
    task_history = TaskHistory.register(request=self.request,worker_name= worker_name)

    LOG.info("id: %s | task: %s | kwargs: %s | args: %s" % (
            self.request.id, self.request.task, self.request.kwargs, str(self.request.args)))

    task_history.update_details(persist=True,
        details="Executing Maintenance: {}".format(maintenance))

    for hm in models.HostMaintenance.objects.filter(maintenance=maintenance):
        main_output = {}
        hm.status = hm.RUNNING
        hm.started_at = datetime.now()
        hm.save()

        if hm.host is None:
            hm.status = hm.UNAVAILABLEHOST
            hm.finished_at = datetime.now()
            hm.save()
            continue

        host = hm.host
        update_task = "\nRunning Maintenance on {}".format(host)

        try:
            cloudstack_host_attributes = host.cs_host_attributes.get()
        except ObjectDoesNotExist, e:
            LOG.warn("Host {} does not have cloudstack attrs...{}".format(hm.host,e))
            hm.status = hm.UNAVAILABLECSHOSTATTR
            hm.finished_at = datetime.now()
            hm.save()
            continue

        param_dict = {}
        for param in models.MaintenanceParameters.objects.filter(maintenance=maintenance):
            param_function = _get_function(param.function_name)
            param_dict[param.parameter_name] = param_function(host.id)

        main_script = build_context_script(param_dict, maintenance.main_script)
        exit_status = exec_remote_command(server=host.address,
            username=cloudstack_host_attributes.vm_user,
            password=cloudstack_host_attributes.vm_password,
            command=main_script, output=main_output)

        if exit_status == 0:
            hm.status = hm.SUCCESS
        else:

            if maintenance.rollback_script:
                rollback_output = {}
                hm.status = hm.ROLLBACK
                hm.save()

                rollback_script = build_context_script(param_dict, maintenance.rollback_script)
                exit_status = exec_remote_command(server=host.address,
                    username=cloudstack_host_attributes.vm_user,
                    password=cloudstack_host_attributes.vm_password,
                    command=rollback_script, output=rollback_output)

                if exit_status ==0:
                    hm.status = hm.ROLLBACK_SUCCESS
                else:
                    hm.status = hm.ROLLBACK_ERROR

                hm.rollback_log = get_dict_lines(rollback_output)

            else:
                hm.status = hm.ERROR

        update_task += "...status: {}".format(hm.status)

        task_history.update_details(persist=True,
            details=update_task)

        hm.main_log = get_dict_lines(main_output)
        hm.finished_at = datetime.now()
        hm.save()
Example #46
0
def bind_address_on_database(self,
                             database,
                             acl_environment,
                             acl_vlan,
                             action="permit",
                             user=None):
    if not user:
        user = self.request.args[-1]

    LOG.info("User: {}, action: {}".format(user, action))

    worker_name = get_worker_name()
    task_history = TaskHistory.register(request=self.request,
                                        user=user,
                                        worker_name=worker_name)
    LOG.info("id: %s | task: %s | kwargs: %s | args: %s" %
             (self.request.id, self.request.task, self.request.kwargs,
              str(self.request.args)))

    task_history.update_details(persist=True, details="Loading Process...")

    try:
        if action == "permit":
            bind_status = models.CREATING
        else:
            bind_status = models.DESTROYING

        LOG.info(
            "Params database: {}, acl_environment: {}, acl_vlan: {}, action: {}, bind_status: {}"
            .format(database, acl_environment, acl_vlan, action, bind_status))

        job = tasks.bind_unbind_address_on_database(
            database=database,
            acl_environment=acl_environment,
            acl_vlan=acl_vlan,
            action="permit",
            bind_status=bind_status)

        if not job:
            raise Exception, "Error when executing the Bind"

        task_history.update_status_for(TaskHistory.STATUS_SUCCESS,
                                       details='Bind created successfully')

        if bind_status == models.CREATING:
            bind_status = models.CREATED
        else:
            bind_status = models.ERROR

        LOG.debug("Bind Status: {}".format(bind_status))

        monitor_acl_job.delay(database,
                              job,
                              acl_environment + '/' + acl_vlan,
                              bind_status,
                              user=user)
        return

    except Exception, e:
        LOG.info("DatabaseBind ERROR: {}".format(e))
        task_history.update_status_for(TaskHistory.STATUS_ERROR,
                                       details='Bind could not be created')
        return
Example #47
0
def restore_snapshot(self, database, snapshot, user, task_history):
    try:
        from dbaas_nfsaas.models import HostAttr
        LOG.info("Restoring snapshot")
        worker_name = get_worker_name()

        task_history = models.TaskHistory.objects.get(id=task_history)
        task_history = TaskHistory.register(request=self.request, task_history=task_history,
                                            user=user, worker_name=worker_name)

        databaseinfra = database.databaseinfra

        snapshot = Snapshot.objects.get(id=snapshot)
        snapshot_id = snapshot.snapshopt_id

        host_attr_snapshot = HostAttr.objects.get(nfsaas_path=snapshot.export_path)
        host = host_attr_snapshot.host
        host_attr = HostAttr.objects.get(host=host, is_active=True)

        export_id_snapshot = host_attr_snapshot.nfsaas_export_id
        export_id = host_attr.nfsaas_export_id
        export_path = host_attr.nfsaas_path

        steps = get_restore_snapshot_settings(
            database.plan.replication_topology.class_path
        )

        not_primary_instances = databaseinfra.instances.exclude(hostname=host).exclude(instance_type__in=[Instance.MONGODB_ARBITER,
                                                                                                          Instance.REDIS_SENTINEL])
        not_primary_hosts = [
            instance.hostname for instance in not_primary_instances]

        workflow_dict = build_dict(databaseinfra=databaseinfra,
                                   database=database,
                                   snapshot_id=snapshot_id,
                                   export_path=export_path,
                                   export_id=export_id,
                                   export_id_snapshot=export_id_snapshot,
                                   host=host,
                                   steps=steps,
                                   not_primary_hosts=not_primary_hosts,
                                   )

        start_workflow(workflow_dict=workflow_dict, task=task_history)

        if workflow_dict['exceptions']['traceback']:
            raise Exception('Restore could not be finished')
        else:
            task_history.update_status_for(
                TaskHistory.STATUS_SUCCESS, details='Database sucessfully recovered!')

    except Exception, e:
        if 'workflow_dict' in locals():
            error = "\n".join(": ".join(err) for err in
                              workflow_dict['exceptions']['error_codes'])
            traceback = "\nException Traceback\n".join(workflow_dict['exceptions']['traceback'])
            error = "{}\n{}\n{}".format(error, traceback, error)
        else:
            error = str(e)
        task_history.update_status_for(
            TaskHistory.STATUS_ERROR, details=error)
Example #48
0
def execute_scheduled_maintenance(self, maintenance_id):
    LOG.debug("Maintenance id: {}".format(maintenance_id))
    maintenance = models.Maintenance.objects.get(id=maintenance_id)
    models.Maintenance.objects.filter(id=maintenance_id).update(
        status=maintenance.RUNNING, started_at=datetime.now())
    LOG.info("Maintenance {} is RUNNING".format(maintenance, ))

    worker_name = get_worker_name()
    task_history = TaskHistory.register(request=self.request,
                                        worker_name=worker_name)
    task_history.relevance = TaskHistory.RELEVANCE_CRITICAL
    LOG.info("id: {} | task: {} | kwargs: {} | args: {}".format(
        self.request.id, self.request.task, self.request.kwargs,
        str(self.request.args)))
    task_history.update_details(
        persist=True, details="Executing Maintenance: {}".format(maintenance))
    for hm in models.HostMaintenance.objects.filter(maintenance=maintenance):
        # main_output = {}
        hm.status = hm.RUNNING
        hm.started_at = datetime.now()
        hm.save()
        if hm.host is None:
            hm.status = hm.UNAVAILABLEHOST
            hm.finished_at = datetime.now()
            hm.save()
            continue

        host = hm.host
        update_task = "\nRunning Maintenance on {}".format(host)

        if maintenance.disable_alarms:
            disable_alarms(hm.host)

        param_dict = {}
        params = models.MaintenanceParameters.objects.filter(
            maintenance=maintenance)
        for param in params:
            param_function = get_function(param.function_name)
            param_dict[param.parameter_name] = param_function(host.id)

        main_script = build_context_script(param_dict, maintenance.main_script)
        main_output = host.ssh.run_script(script=main_script,
                                          raise_if_error=False)

        if main_output['exit_code'] == 0:
            hm.status = hm.SUCCESS
        else:
            if maintenance.rollback_script:
                hm.status = hm.ROLLBACK
                hm.save()

                rollback_script = build_context_script(
                    param_dict, maintenance.rollback_script)
                rollback_output = host.ssh.run_script(script=rollback_script,
                                                      raise_if_error=False)

                if rollback_output['exit_code'] == 0:
                    hm.status = hm.ROLLBACK_SUCCESS
                else:
                    hm.status = hm.ROLLBACK_ERROR

                hm.rollback_log = get_dict_lines(rollback_output)

            else:
                hm.status = hm.ERROR

        if maintenance.disable_alarms:
            enable_alarms(hm.host)

        update_task += "...status: {}".format(hm.status)

        task_history.update_details(persist=True, details=update_task)

        hm.main_log = get_dict_lines(main_output)
        hm.finished_at = datetime.now()
        hm.save()

    models.Maintenance.objects.filter(id=maintenance_id).update(
        status=maintenance.FINISHED, finished_at=datetime.now())
    task_history.update_status_for(TaskHistory.STATUS_SUCCESS,
                                   details='Maintenance executed succesfully')
    LOG.info("Maintenance: {} has FINISHED".format(maintenance))
Example #49
0
def volume_migration(self, database, user, task_history=None):
    from dbaas_nfsaas.models import HostAttr, PlanAttr
    from workflow.settings import VOLUME_MIGRATION
    from util import build_dict
    from workflow.workflow import start_workflow
    from time import sleep

    def switch_master(databaseinfra, instance):
        driver = databaseinfra.get_driver()
        for attempt in range(0, 21):
            if driver.is_replication_ok(instance):
                driver.switch_master()
                return
            LOG.info("Waiting 10s to check replication...")
            sleep(10)

    worker_name = get_worker_name()
    task_history = TaskHistory.register(request=self.request,
                                        task_history=task_history,
                                        user=user,
                                        worker_name=worker_name)

    stop_now = False
    if database.status != Database.ALIVE or not database.database_status.is_alive:
        msg = "Database is not alive!"
        stop_now = True

    if database.is_beeing_used_elsewhere(task_id=self.request.id):
        msg = "Database is in use by another task!"
        stop_now = True

    if database.has_migration_started():
        msg = "Region migration for this database has already started!"
        stop_now = True

    if stop_now:
        task_history.update_status_for(TaskHistory.STATUS_SUCCESS, details=msg)
        LOG.info("Migration finished")
        return

    default_plan_size = PlanAttr.objects.get(
        dbaas_plan=database.plan).nfsaas_plan
    LOG.info("Migrating {} volumes".format(database))

    databaseinfra = database.databaseinfra
    driver = databaseinfra.get_driver()

    environment = database.environment
    plan = database.plan

    instances = driver.get_slave_instances()
    master_instance = driver.get_master_instance()
    instances.append(master_instance)
    LOG.info('Instances: {}'.format(str(instances)))

    hosts = [instance.hostname for instance in instances]
    volumes = HostAttr.objects.filter(host__in=hosts,
                                      is_active=True,
                                      nfsaas_size_id=default_plan_size)

    if len(volumes) == len(hosts):
        task_history.update_status_for(TaskHistory.STATUS_SUCCESS,
                                       details='Volumes already migrated!')
        LOG.info("Migration finished")
        return

    for index, instance in enumerate(instances):
        if not driver.check_instance_is_eligible_for_backup(instance=instance):
            LOG.info('Instance is not eligible for backup {}'.format(
                str(instance)))
            continue

        LOG.info('Volume migration for instance {}'.format(str(instance)))
        host = instance.hostname
        old_volume = HostAttr.objects.get(host=host, is_active=True)

        if old_volume.nfsaas_size_id == default_plan_size:
            if databaseinfra.plan.is_ha:
                switch_master(databaseinfra, instance)
            continue

        workflow_dict = build_dict(
            databaseinfra=databaseinfra,
            database=database,
            environment=environment,
            plan=plan,
            host=host,
            instance=instance,
            old_volume=old_volume,
            steps=VOLUME_MIGRATION,
        )

        start_workflow(workflow_dict=workflow_dict, task=task_history)

        if workflow_dict['exceptions']['traceback']:
            error = "\n".join(
                ": ".join(err)
                for err in workflow_dict['exceptions']['error_codes'])
            traceback = "\nException Traceback\n".join(
                workflow_dict['exceptions']['traceback'])
            error = "{}\n{}\n{}".format(error, traceback, error)
            task_history.update_status_for(TaskHistory.STATUS_ERROR,
                                           details=error)
            LOG.info("Migration finished with errors")
            return

        if databaseinfra.plan.is_ha:
            LOG.info("Waiting 60s to check continue...")
            sleep(60)
            switch_master(databaseinfra, instance)
            LOG.info("Waiting 60s to check continue...")
            sleep(60)

    task_history.update_status_for(TaskHistory.STATUS_SUCCESS,
                                   details='Volumes sucessfully migrated!')

    LOG.info("Migration finished")

    return
Example #50
0
def execute_database_region_migration_undo(self,
                                           database_region_migration_detail_id,
                                           task_history=None,
                                           user=None):
    #AuditRequest.new_request("execute_database_region_migration", user, "localhost")
    try:

        if task_history:
            arguments = task_history.arguments
        else:
            arguments = None

        task_history = TaskHistory.register(request=self.request,
                                            task_history=task_history,
                                            user=user,
                                            worker_name=get_worker_name())

        if arguments:
            task_history.arguments = arguments
            task_history.save()

        database_region_migration_detail = DatabaseRegionMigrationDetail.objects.get(
            id=database_region_migration_detail_id)
        database_region_migration = database_region_migration_detail.database_region_migration
        database = database_region_migration.database
        databaseinfra = database.databaseinfra
        source_environment = databaseinfra.environment
        target_environment = source_environment.equivalent_environment
        engine = database.engine_type
        steps = get_engine_steps(engine)
        workflow_steps = steps[
            database_region_migration_detail.step].step_classes
        source_instances = []
        source_hosts = []
        for instance in databaseinfra.instances.filter(
                future_instance__isnull=False):
            source_instances.append(instance)
            source_hosts.append(instance.hostname)

        target_instances = []
        target_hosts = []
        for instance in databaseinfra.instances.filter(
                future_instance__isnull=True):
            target_instances.append(instance)
            target_hosts.append(instance.hostname)

        source_plan = databaseinfra.plan
        target_plan = source_plan.equivalent_plan_id

        workflow_dict = build_dict(
            database_region_migration_detail=database_region_migration_detail,
            database_region_migration=database_region_migration,
            database=database,
            databaseinfra=databaseinfra,
            source_environment=source_environment,
            target_environment=target_environment,
            steps=workflow_steps,
            engine=engine,
            source_instances=source_instances,
            source_plan=source_plan,
            target_plan=target_plan,
            source_hosts=source_hosts,
            target_instances=target_instances,
            target_hosts=target_hosts)

        stop_workflow(workflow_dict=workflow_dict, task=task_history)

        task_history.update_status_for(
            TaskHistory.STATUS_SUCCESS,
            details='Database region migration was succesfully')

    except Exception, e:
        traceback = full_stack()
        LOG.error("Ops... something went wrong: %s" % e)
        LOG.error(traceback)

        task_history.update_status_for(TaskHistory.STATUS_ERROR,
                                       details=traceback)
        return
Example #51
0
def clone_database(self,
                   origin_database,
                   clone_name,
                   plan,
                   environment,
                   task_history=None,
                   user=None):
    AuditRequest.new_request("clone_database", user, "localhost")
    try:
        worker_name = get_worker_name()
        LOG.info("id: %s | task: %s | kwargs: %s | args: %s" %
                 (self.request.id, self.request.task, self.request.kwargs,
                  str(self.request.args)))

        task_history = TaskHistory.register(request=self.request,
                                            task_history=task_history,
                                            user=user,
                                            worker_name=worker_name)

        LOG.info("origin_database: %s" % origin_database)

        task_history.update_details(persist=True, details="Loading Process...")
        result = clone_infra(
            plan=plan,
            environment=environment,
            name=clone_name,
            team=origin_database.team,
            project=origin_database.project,
            description=origin_database.description,
            task=task_history,
            clone=origin_database,
        )

        if result['created'] == False:

            if 'exceptions' in result:
                error = "\n\n".join(
                    ": ".join(err)
                    for err in result['exceptions']['error_codes'])
                traceback = "\n\nException Traceback\n".join(
                    result['exceptions']['traceback'])
                error = "{}\n{}".format(error, traceback)
            else:
                error = "There is not any infra-structure to allocate this database."

            task_history.update_status_for(TaskHistory.STATUS_ERROR,
                                           details=error)
            return

        task_history.update_dbid(db=result['database'])
        task_history.update_status_for(
            TaskHistory.STATUS_SUCCESS,
            details='\nDatabase cloned successfully')

    except SoftTimeLimitExceeded:
        LOG.error("task id %s - timeout exceeded" % self.request.id)
        task_history.update_status_for(TaskHistory.STATUS_ERROR,
                                       details="timeout exceeded")
        if 'result' in locals() and result['created']:
            destroy_infra(databaseinfra=result['databaseinfra'],
                          task=task_history)
            return
    except Exception, e:
        traceback = full_stack()
        LOG.error("Ops... something went wrong: %s" % e)
        LOG.error(traceback)

        if 'result' in locals() and result['created']:
            destroy_infra(databaseinfra=result['databaseinfra'],
                          task=task_history)

        task_history.update_status_for(TaskHistory.STATUS_ERROR,
                                       details=traceback)

        return
Example #52
0
def change_parameters_database(self, database, user, task, since_step=0):
    worker_name = get_worker_name()
    task = TaskHistory.register(self.request, user, task, worker_name)

    infra = database.infra
    plan = infra.plan
    class_path = plan.replication_topology.class_path

    from physical.models import DatabaseInfraParameter
    changed_parameters = DatabaseInfraParameter.get_databaseinfra_changed_parameters(
        databaseinfra=infra,
    )
    all_dinamic = True
    custom_procedure = None
    for changed_parameter in changed_parameters:
        if changed_parameter.parameter.dynamic is False:
            all_dinamic = False
            break
    for changed_parameter in changed_parameters:
        if changed_parameter.parameter.custom_method:
            custom_procedure = changed_parameter.parameter.custom_method
            break

    steps = get_database_change_parameter_setting(
        class_path, all_dinamic, custom_procedure)

    LOG.info(steps)

    task.add_detail("Changed parameters:", level=0)
    for changed_parameter in changed_parameters:
        msg = "{}: old value: [{}], new value: [{}]".format(
            changed_parameter.parameter.name,
            changed_parameter.current_value,
            changed_parameter.value

        )
        task.add_detail(msg, level=1)
    task.add_detail("", level=0)

    if since_step > 0:
        steps_dec = get_database_change_parameter_retry_steps_count(
            class_path, all_dinamic, custom_procedure)
        LOG.info('since_step: {}, steps_dec: {}'.format(since_step, steps_dec))
        since_step = since_step - steps_dec
        if since_step < 0:
            since_step = 0

    database_change_parameter = DatabaseChangeParameter()
    database_change_parameter.database = database
    database_change_parameter.task = task
    database_change_parameter.save()

    instances_to_change_parameters = infra.get_driver().get_database_instances()

    success = steps_for_instances(
        steps, instances_to_change_parameters, task,
        database_change_parameter.update_step, since_step
    )

    if success:
        database_change_parameter.set_success()
        task.update_status_for(TaskHistory.STATUS_SUCCESS, 'Done')
    else:
        database_change_parameter.set_error()
        task.update_status_for(
            TaskHistory.STATUS_ERROR,
            'Could not do change the database parameters.\nChange parameters doesn\'t have rollback'
        )
Example #53
0
def execute_database_region_migration(self, database_region_migration_detail_id, task_history=None, user=None):
    #AuditRequest.new_request("execute_database_region_migration", user, "localhost")
    try:
    
        if task_history:
            arguments = task_history.arguments
        else:
            arguments = None
    
        task_history = TaskHistory.register(request=self.request,
            task_history = task_history,
            user = user,
            worker_name = get_worker_name())

        if arguments:
            task_history.arguments = arguments
            task_history.save()
    
        database_region_migration_detail = DatabaseRegionMigrationDetail.objects.get(id=database_region_migration_detail_id)
        database_region_migration = database_region_migration_detail.database_region_migration
        database = database_region_migration.database
        databaseinfra = database.databaseinfra
        source_environment = databaseinfra.environment
        target_environment = source_environment.equivalent_environment
        engine = database.engine_type
        steps = get_engine_steps(engine)
        workflow_steps = steps[database_region_migration_detail.step].step_classes
        source_instances = []
        source_hosts = []
        for instance in Instance.objects.filter(databaseinfra=databaseinfra):
            source_instances.append(instance)
            if instance.instance_type != instance.REDIS_SENTINEL:
                source_hosts.append(instance.hostname)
    
        source_plan = databaseinfra.plan
        target_plan = source_plan.equivalent_plan_id
    
        workflow_dict = build_dict(
                               #database_region_migration_detail = database_region_migration_detail,
                               #database_region_migration = database_region_migration,
                               #database = database,
                               databaseinfra = databaseinfra,
                               #source_environment = source_environment,
                               target_environment = target_environment,
                               steps = workflow_steps,
                               #engine = engine,
                               source_instances = source_instances,
                               source_hosts = source_hosts,
                               #source_plan = source_plan,
                               target_plan = target_plan,
                               )

        start_workflow(workflow_dict = workflow_dict, task = task_history)    

        if workflow_dict['created'] == False:

            if 'exceptions' in workflow_dict:
                error = "\n".join(": ".join(err) for err in workflow_dict['exceptions']['error_codes'])
                traceback = "\nException Traceback\n".join(workflow_dict['exceptions']['traceback'])
                error = "{}\n{}\n{}".format(error, traceback, error)
            else:
                error = "There is not any infra-structure to allocate this database."

            task_history.update_status_for(TaskHistory.STATUS_ERROR, details=error)

            return

        else:

            task_history.update_status_for(TaskHistory.STATUS_SUCCESS, details='Database region migration was succesfully')
            return

    except Exception, e:
        traceback = full_stack()
        LOG.error("Ops... something went wrong: %s" % e)
        LOG.error(traceback)

        task_history.update_status_for(TaskHistory.STATUS_ERROR, details=traceback)
        return
Example #54
0
def resize_database(self,
                    database,
                    cloudstackpack,
                    task_history=None,
                    user=None):
    AuditRequest.new_request("resize_database", user, "localhost")

    try:
        worker_name = get_worker_name()
        task_history = TaskHistory.register(request=self.request,
                                            task_history=task_history,
                                            user=user,
                                            worker_name=worker_name)
        from util.providers import resize_database_instances
        from util import get_credentials_for
        from dbaas_cloudstack.provider import CloudStackProvider
        from dbaas_credentials.models import CredentialType

        cs_credentials = get_credentials_for(
            environment=database.environment,
            credential_type=CredentialType.CLOUDSTACK)
        cs_provider = CloudStackProvider(credentials=cs_credentials)

        databaseinfra = database.databaseinfra
        driver = databaseinfra.get_driver()
        instances = driver.get_slave_instances()
        instances.append(driver.get_master_instance())
        instances_to_resize = []
        resized_instances = []

        disable_zabbix_alarms(database)

        for instance in instances:
            host = instance.hostname
            host_attr = host.cs_host_attributes.get()
            offering_id = cs_provider.get_vm_offering_id(
                vm_id=host_attr.vm_id, project_id=cs_credentials.project)

            if offering_id == cloudstackpack.offering.serviceofferingid:
                LOG.info("Instance offering: {}".format(offering_id))
                resized_instances.append(instance)
            else:
                instances_to_resize.append(instance)

        result = resize_database_instances(database=database,
                                           cloudstackpack=cloudstackpack,
                                           instances=instances_to_resize,
                                           task=task_history)

        if result['created']:
            resized_instances += result['completed_instances']
        else:
            if 'exceptions' not in result:
                error = "Something went wrong."
            else:
                error = "\n".join(
                    ": ".join(err)
                    for err in result['exceptions']['error_codes'])
                traceback = "\nException Traceback\n".join(
                    result['exceptions']['traceback'])

                error = "{}\n{}\n{}".format(error, traceback, error)

        if databaseinfra.plan.is_ha:
            LOG.info("Waiting 60s to check continue...")
            sleep(60)
            instance = driver.get_slave_instances()[0]
            driver.check_replication_and_switch(instance)

        if len(instances) == len(resized_instances):
            from dbaas_cloudstack.models import DatabaseInfraOffering
            LOG.info('Updating offering DatabaseInfra.')

            databaseinfraoffering = DatabaseInfraOffering.objects.get(
                databaseinfra=databaseinfra)
            databaseinfraoffering.offering = cloudstackpack.offering
            databaseinfraoffering.save()

            if databaseinfra.engine.engine_type.name == 'redis':
                new_max_memory = databaseinfraoffering.offering.memory_size_mb
                resize_factor = 0.5
                if new_max_memory > 1024:
                    resize_factor = 0.75

                new_max_memory *= resize_factor
                databaseinfra.per_database_size_mbytes = int(new_max_memory)
                databaseinfra.save()

            task_history.update_status_for(TaskHistory.STATUS_SUCCESS,
                                           details='Resize successfully done.')
            return

        task_history.update_status_for(TaskHistory.STATUS_ERROR, details=error)
        return

    except Exception as e:
        error = "Resize Database ERROR: {}".format(e)
        LOG.error(error)
        task_history.update_status_for(TaskHistory.STATUS_ERROR, details=error)

    finally:
        enable_zabbix_alarms(database)
        AuditRequest.cleanup_request()
Example #55
0
 def register_task_history(self, task):
     return TaskHistory.register(request=self.request,
                                 task_history=task,
                                 user=task.user,
                                 worker_name=get_worker_name())
Example #56
0
def database_disk_resize(self, database, disk_offering, task_history, user):
    from dbaas_nfsaas.models import HostAttr
    from workflow.steps.util.nfsaas_utils import resize_disk

    AuditRequest.new_request("database_disk_resize", user, "localhost")

    databaseinfra = database.databaseinfra
    old_disk_offering = database.databaseinfra.disk_offering
    resized = []

    try:
        worker_name = get_worker_name()
        task_history = TaskHistory.register(request=self.request,
                                            task_history=task_history,
                                            user=user,
                                            worker_name=worker_name)

        task_history.update_details(persist=True,
                                    details='\nLoading Disk offering')

        for instance in databaseinfra.get_driver().get_database_instances():
            if not HostAttr.objects.filter(
                    host_id=instance.hostname_id).exists():
                continue

            task_history.update_details(persist=True,
                                        details='\nChanging instance {} to '
                                        'NFS {}'.format(
                                            instance, disk_offering))
            if resize_disk(environment=database.environment,
                           host=instance.hostname,
                           disk_offering=disk_offering):
                resized.append(instance)

        task_history.update_details(
            persist=True,
            details='\nUpdate DBaaS metadata from {} to '
            '{}'.format(databaseinfra.disk_offering, disk_offering))
        databaseinfra.disk_offering = disk_offering
        databaseinfra.save()

        task_history.update_status_for(
            status=TaskHistory.STATUS_SUCCESS,
            details='\nDisk resize successfully done.')
        return True

    except Exception as e:
        error = "Disk resize ERROR: {}".format(e)
        LOG.error(error)

        if databaseinfra.disk_offering != old_disk_offering:
            task_history.update_details(persist=True,
                                        details='\nUndo update DBaaS metadata')
            databaseinfra.disk_offering = old_disk_offering
            databaseinfra.save()

        for instance in resized:
            task_history.update_details(
                persist=True,
                details='\nUndo NFS change for instance {}'.format(instance))
            resize_disk(environment=database.environment,
                        host=instance.hostname,
                        disk_offering=old_disk_offering)

        task_history.update_status_for(TaskHistory.STATUS_ERROR, details=error)
    finally:
        AuditRequest.cleanup_request()
Example #57
0
def execute_database_region_migration_undo(self,
                                           database_region_migration_detail_id,
                                           task_history=None,
                                           user=None):
    AuditRequest.new_request("execute_database_region_migration", user,
                             "localhost")
    try:

        if task_history:
            arguments = task_history.arguments
        else:
            arguments = None

        task_history = TaskHistory.register(request=self.request,
                                            task_history=task_history,
                                            user=user,
                                            worker_name=get_worker_name())

        if arguments:
            task_history.arguments = arguments
            task_history.save()

        database_region_migration_detail = DatabaseRegionMigrationDetail.objects.get(
            id=database_region_migration_detail_id)

        database_region_migration_detail.started_at = datetime.now()
        database_region_migration_detail.status = database_region_migration_detail.RUNNING
        database_region_migration_detail.is_migration_up = False
        database_region_migration_detail.save()

        database_region_migration = database_region_migration_detail.database_region_migration
        database = database_region_migration.database
        databaseinfra = database.databaseinfra
        source_environment = databaseinfra.environment
        target_environment = source_environment.equivalent_environment
        engine = database.engine_type
        steps = get_engine_steps(engine)
        workflow_steps = steps[
            database_region_migration_detail.step].step_classes
        source_instances = []
        source_hosts = []
        for instance in databaseinfra.instances.filter(
                future_instance__isnull=False):
            source_instances.append(instance)
            if instance.instance_type != instance.REDIS:
                source_hosts.append(instance.hostname)

        target_instances = []
        target_hosts = []
        for instance in databaseinfra.instances.filter(
                future_instance__isnull=True):
            target_instances.append(instance)
            if instance.instance_type != instance.REDIS:
                target_hosts.append(instance.hostname)

        source_plan = databaseinfra.plan
        target_plan = source_plan.equivalent_plan_id

        if not source_hosts:
            raise Exception('There is no source host')
        if not source_instances:
            raise Exception('There is no source instance')
        if not target_hosts:
            raise Exception('There is no target host')
        if not target_instances:
            raise Exception('There is no target instance')

        source_secondary_ips = DatabaseInfraAttr.objects.filter(
            databaseinfra=databaseinfra, equivalent_dbinfraattr__isnull=False)

        source_secondary_ips = list(source_secondary_ips)

        workflow_dict = build_dict(
            database_region_migration_detail=database_region_migration_detail,
            database_region_migration=database_region_migration,
            database=database,
            databaseinfra=databaseinfra,
            source_environment=source_environment,
            target_environment=target_environment,
            steps=workflow_steps,
            engine=engine,
            source_instances=source_instances,
            source_plan=source_plan,
            target_plan=target_plan,
            source_hosts=source_hosts,
            target_instances=target_instances,
            target_hosts=target_hosts,
            source_secondary_ips=source_secondary_ips,
        )

        stop_workflow(workflow_dict=workflow_dict, task=task_history)

        current_step = database_region_migration.current_step
        database_region_migration.current_step = current_step - 1
        database_region_migration.save()

        database_region_migration_detail.status = database_region_migration_detail.SUCCESS
        database_region_migration_detail.finished_at = datetime.now()
        database_region_migration_detail.save()

        task_history.update_status_for(
            TaskHistory.STATUS_SUCCESS,
            details='Database region migration was succesfully')

    except Exception, e:
        traceback = full_stack()
        LOG.error("Ops... something went wrong: %s" % e)
        LOG.error(traceback)

        task_history.update_status_for(TaskHistory.STATUS_ERROR,
                                       details=traceback)

        database_region_migration_detail.status = database_region_migration_detail.ERROR
        database_region_migration_detail.finished_at = datetime.now()
        database_region_migration_detail.save()

        return
Example #58
0
def execute_database_region_migration(self,
                                      database_region_migration_detail_id,
                                      task_history=None,
                                      user=None):
    AuditRequest.new_request("execute_database_region_migration", user,
                             "localhost")
    try:

        if task_history:
            arguments = task_history.arguments
        else:
            arguments = None

        task_history = TaskHistory.register(request=self.request,
                                            task_history=task_history,
                                            user=user,
                                            worker_name=get_worker_name())

        if arguments:
            task_history.arguments = arguments
            task_history.save()

        database_region_migration_detail = DatabaseRegionMigrationDetail.objects.get(
            id=database_region_migration_detail_id)

        database_region_migration_detail.started_at = datetime.now()
        database_region_migration_detail.status = database_region_migration_detail.RUNNING
        database_region_migration_detail.save()

        database_region_migration = database_region_migration_detail.database_region_migration
        database = database_region_migration.database
        databaseinfra = database.databaseinfra
        source_environment = databaseinfra.environment
        target_environment = source_environment.equivalent_environment
        engine = database.engine_type
        steps = get_engine_steps(engine)
        workflow_steps = steps[
            database_region_migration_detail.step].step_classes
        source_instances = []
        source_hosts = []
        for instance in Instance.objects.filter(databaseinfra=databaseinfra):
            if database_region_migration.current_step > 0 and not instance.future_instance:
                continue
            source_instances.append(instance)
            if instance.instance_type != instance.REDIS:
                source_hosts.append(instance.hostname)

        source_plan = databaseinfra.plan
        target_plan = source_plan.equivalent_plan

        source_offering = databaseinfra.cs_dbinfra_offering.get().offering
        target_offering = source_offering.equivalent_offering

        source_secondary_ips = []

        for secondary_ip in DatabaseInfraAttr.objects.filter(
                databaseinfra=databaseinfra):
            if database_region_migration.current_step > 0 and\
                    not secondary_ip.equivalent_dbinfraattr:
                continue
            source_secondary_ips.append(secondary_ip)

        workflow_dict = build_dict(
            databaseinfra=databaseinfra,
            database=database,
            source_environment=source_environment,
            target_environment=target_environment,
            steps=workflow_steps,
            source_instances=source_instances,
            source_hosts=source_hosts,
            source_plan=source_plan,
            target_plan=target_plan,
            source_offering=source_offering,
            target_offering=target_offering,
            source_secondary_ips=source_secondary_ips,
        )

        start_workflow(workflow_dict=workflow_dict, task=task_history)

        if workflow_dict['created'] == False:

            if 'exceptions' in workflow_dict:
                error = "\n".join(
                    ": ".join(err)
                    for err in workflow_dict['exceptions']['error_codes'])
                traceback = "\nException Traceback\n".join(
                    workflow_dict['exceptions']['traceback'])
                error = "{}\n{}\n{}".format(error, traceback, error)
            else:
                error = "There is not any infra-structure to allocate this database."

            database_region_migration_detail.status = database_region_migration_detail.ROLLBACK
            database_region_migration_detail.finished_at = datetime.now()
            database_region_migration_detail.save()

            task_history.update_status_for(TaskHistory.STATUS_ERROR,
                                           details=error)

            return

        else:
            database_region_migration_detail.status = database_region_migration_detail.SUCCESS
            database_region_migration_detail.finished_at = datetime.now()
            database_region_migration_detail.save()

            current_step = database_region_migration.current_step
            database_region_migration.current_step = current_step + 1
            database_region_migration.save()

            task_history.update_status_for(
                TaskHistory.STATUS_SUCCESS,
                details='Database region migration was succesfully')
            return

    except Exception, e:
        traceback = full_stack()
        LOG.error("Ops... something went wrong: %s" % e)
        LOG.error(traceback)

        database_region_migration_detail.status = database_region_migration_detail.ROLLBACK
        database_region_migration_detail.finished_at = datetime.now()
        database_region_migration_detail.save()

        task_history.update_status_for(TaskHistory.STATUS_ERROR,
                                       details=traceback)
        return
Example #59
0
def make_databases_backup(self):
    LOG.info("Making databases backups")
    worker_name = get_worker_name()
    task_history = TaskHistory.register(request=self.request,
                                        worker_name=worker_name,
                                        user=None)
    task_history.relevance = TaskHistory.RELEVANCE_ERROR

    waiting_msg = "\nWaiting 5 minutes to start the next backup group"
    status = TaskHistory.STATUS_SUCCESS
    environments = Environment.objects.all()
    prod_envs = Configuration.get_by_name_as_list('prod_envs')
    dev_envs = Configuration.get_by_name_as_list('dev_envs')
    env_names_order = prod_envs + dev_envs
    if not env_names_order:
        env_names_order = [env.name for env in environments]

    infras = DatabaseInfra.objects.filter(plan__has_persistence=True)
    for env_name in env_names_order:
        try:
            env = environments.get(name=env_name)
        except Environment.DoesNotExist:
            continue

        msg = '\nStarting Backup for env {}'.format(env.name)
        task_history.update_details(persist=True, details=msg)
        databaseinfras_by_env = infras.filter(environment=env)
        error = {}
        backup_number = 0
        backups_per_group = len(infras) / 12
        for infra in databaseinfras_by_env:
            if not infra.databases.first():
                continue

            if backups_per_group > 0:
                if backup_number < backups_per_group:
                    backup_number += 1
                else:
                    backup_number = 0
                    task_history.update_details(waiting_msg, True)
                    sleep(300)

            group = BackupGroup()
            group.save()

            for instance in infra.instances.filter(read_only=False):
                try:
                    driver = instance.databaseinfra.get_driver()
                    is_eligible = driver.check_instance_is_eligible_for_backup(
                        instance)
                    if not is_eligible:
                        LOG.info(
                            'Instance {} is not eligible for backup'.format(
                                instance))
                        continue
                except Exception as e:
                    status = TaskHistory.STATUS_ERROR
                    msg = "Backup for %s was unsuccessful. Error: %s" % (
                        str(instance), str(e))
                    LOG.error(msg)

                time_now = str(strftime("%m/%d/%Y %H:%M:%S"))
                start_msg = "\n{} - Starting backup for {} ...".format(
                    time_now, instance)
                task_history.update_details(persist=True, details=start_msg)
                try:
                    snapshot = make_instance_snapshot_backup(instance=instance,
                                                             error=error,
                                                             group=group)
                    if snapshot and snapshot.was_successful:
                        msg = "Backup for %s was successful" % (str(instance))
                        LOG.info(msg)
                    elif snapshot and snapshot.has_warning:
                        status = TaskHistory.STATUS_WARNING
                        msg = "Backup for %s has warning" % (str(instance))
                        LOG.info(msg)
                    else:
                        status = TaskHistory.STATUS_ERROR
                        msg = "Backup for %s was unsuccessful. Error: %s" % (
                            str(instance), error['errormsg'])
                        LOG.error(msg)
                    LOG.info(msg)
                except Exception as e:
                    status = TaskHistory.STATUS_ERROR
                    msg = "Backup for %s was unsuccessful. Error: %s" % (
                        str(instance), str(e))
                    LOG.error(msg)

                time_now = str(strftime("%m/%d/%Y %H:%M:%S"))
                msg = "\n{} - {}".format(time_now, msg)
                task_history.update_details(persist=True, details=msg)

    task_history.update_status_for(status, details="\nBackup finished")

    return