예제 #1
0
파일: tasks.py 프로젝트: Gazzonyx/mist.io
def post_deploy_steps(self, email, backend_id, machine_id, monitoring, command,
                      key_id=None, username=None, password=None, port=22,
                      script_id='', script_params='', job_id=None,
                      hostname='', plugins=None,
                      post_script_id='', post_script_params=''):
    from mist.io.methods import connect_provider, probe_ssh_only
    from mist.io.methods import notify_user, notify_admin
    from mist.io.methods import create_dns_a_record
    if multi_user:
        from mist.core.methods import enable_monitoring
        from mist.core.tasks import run_script
        from mist.core.helpers import log_event
    else:
        from mist.io.methods import enable_monitoring
        log_event = lambda *args, **kwargs: None

    job_id = job_id or uuid.uuid4().hex

    user = user_from_email(email)
    tmp_log = lambda msg, *args: log.error('Post deploy: %s' % msg, *args)
    tmp_log('Entering post deploy steps for %s %s %s',
            user.email, backend_id, machine_id)
    try:
        # find the node we're looking for and get its hostname
        node = None
        try:
            conn = connect_provider(user.backends[backend_id])
            nodes = conn.list_nodes() # TODO: use cache
            for n in nodes:
                if n.id == machine_id:
                    node = n
                    break
            tmp_log('run list_machines')
        except:
            raise self.retry(exc=Exception(), countdown=10, max_retries=10)

        if node and len(node.public_ips):
            # filter out IPv6 addresses
            ips = filter(lambda ip: ':' not in ip, node.public_ips)
            host = ips[0]
        else:
            tmp_log('ip not found, retrying')
            raise self.retry(exc=Exception(), countdown=60, max_retries=20)

        try:
            from mist.io.shell import Shell
            shell = Shell(host)
            # connect with ssh even if no command, to create association
            # to be able to enable monitoring
            tmp_log('attempting to connect to shell')
            key_id, ssh_user = shell.autoconfigure(
                user, backend_id, node.id, key_id, username, password, port
            )
            tmp_log('connected to shell')
            result = probe_ssh_only(user, backend_id, machine_id, host=None,
                           key_id=key_id, ssh_user=ssh_user, shell=shell)
            log_dict = {
                    'email': email,
                    'event_type': 'job',
                    'backend_id': backend_id,
                    'machine_id': machine_id,
                    'job_id': job_id,
                    'host': host,
                    'key_id': key_id,
                    'ssh_user': ssh_user,
                }

            log_event(action='probe', result=result, **log_dict)
            backend = user.backends[backend_id]
            msg = "Backend:\n  Name: %s\n  Id: %s\n" % (backend.title,
                                                        backend_id)
            msg += "Machine:\n  Name: %s\n  Id: %s\n" % (node.name,
                                                             node.id)

            if hostname:
                try:
                    record = create_dns_a_record(user, hostname, host)
                    hostname = '.'.join((record.name, record.zone.domain))
                    log_event(action='create_dns_a_record', hostname=hostname,
                              **log_dict)
                except Exception as exc:
                    log_event(action='create_dns_a_record', error=str(exc),
                              **log_dict)

            error = False
            if script_id and multi_user:
                tmp_log('will run script_id %s', script_id)
                ret = run_script.run(
                    user.email, script_id, backend_id, machine_id,
                    params=script_params, host=host, job_id=job_id
                )
                error = ret['error']
                tmp_log('executed script_id %s', script_id)
            elif command:
                tmp_log('will run command %s', command)
                log_event(action='deployment_script_started', command=command, **log_dict)
                start_time = time()
                retval, output = shell.command(command)
                tmp_log('executed command %s', command)
                execution_time = time() - start_time
                output = output.decode('utf-8','ignore')
                title = "Deployment script %s" % ('failed' if retval
                                                  else 'succeeded')
                error = retval > 0
                notify_user(user, title,
                            backend_id=backend_id,
                            machine_id=machine_id,
                            machine_name=node.name,
                            command=command,
                            output=output,
                            duration=execution_time,
                            retval=retval,
                            error=retval > 0)
                log_event(action='deployment_script_finished',
                          error=retval > 0,
                          return_value=retval,
                          command=command,
                          stdout=output,
                          **log_dict)

            shell.disconnect()

            if monitoring:
                try:
                    enable_monitoring(user, backend_id, node.id,
                        name=node.name, dns_name=node.extra.get('dns_name',''),
                        public_ips=ips, no_ssh=False, dry=False, job_id=job_id,
                        plugins=plugins, deploy_async=False,
                    )
                except Exception as e:
                    print repr(e)
                    error = True
                    notify_user(user, "Enable monitoring failed for machine %s" % machine_id, repr(e))
                    notify_admin('Enable monitoring on creation failed for user %s machine %s: %r' % (email, machine_id, e))
                    log_event(action='enable_monitoring_failed', error=repr(e),
                              **log_dict)

            if post_script_id and multi_user:
                tmp_log('will run post_script_id %s', post_script_id)
                ret = run_script.run(
                    user.email, post_script_id, backend_id, machine_id,
                    params=post_script_params, host=host, job_id=job_id,
                    action_prefix='post_',
                )
                error = ret['error']
                tmp_log('executed post_script_id %s', script_id)

            log_event(action='post_deploy_finished', error=error, **log_dict)

        except (ServiceUnavailableError, SSHException) as exc:
            tmp_log(repr(exc))
            raise self.retry(exc=exc, countdown=60, max_retries=15)
    except Exception as exc:
        tmp_log(repr(exc))
        if str(exc).startswith('Retry'):
            raise
        notify_user(user, "Deployment script failed for machine %s" % machine_id)
        notify_admin("Deployment script failed for machine %s in backend %s by user %s" % (machine_id, backend_id, email), repr(exc))
        log_event(
            email=email,
            event_type='job',
            action='post_deploy_finished',
            backend_id=backend_id,
            machine_id=machine_id,
            enable_monitoring=bool(monitoring),
            command=command,
            error="Couldn't connect to run post deploy steps.",
            job_id=job_id
        )
예제 #2
0
def post_deploy_steps(self,
                      email,
                      cloud_id,
                      machine_id,
                      monitoring,
                      command,
                      key_id=None,
                      username=None,
                      password=None,
                      port=22,
                      script_id='',
                      script_params='',
                      job_id=None,
                      hostname='',
                      plugins=None,
                      post_script_id='',
                      post_script_params=''):
    from mist.io.methods import connect_provider, probe_ssh_only
    from mist.io.methods import notify_user, notify_admin
    from mist.io.methods import create_dns_a_record
    if multi_user:
        from mist.core.methods import enable_monitoring
        from mist.core.tasks import run_script
        from mist.core.helpers import log_event
    else:
        from mist.io.methods import enable_monitoring
        log_event = lambda *args, **kwargs: None

    job_id = job_id or uuid.uuid4().hex

    user = user_from_email(email)
    tmp_log = lambda msg, *args: log.error('Post deploy: %s' % msg, *args)
    tmp_log('Entering post deploy steps for %s %s %s', user.email, cloud_id,
            machine_id)
    try:
        # find the node we're looking for and get its hostname
        node = None
        try:
            conn = connect_provider(user.clouds[cloud_id])
            nodes = conn.list_nodes()  # TODO: use cache
            for n in nodes:
                if n.id == machine_id:
                    node = n
                    break
            tmp_log('run list_machines')
        except:
            raise self.retry(exc=Exception(), countdown=10, max_retries=10)

        if node and len(node.public_ips):
            # filter out IPv6 addresses
            ips = filter(lambda ip: ':' not in ip, node.public_ips)
            host = ips[0]
        else:
            tmp_log('ip not found, retrying')
            raise self.retry(exc=Exception(), countdown=60, max_retries=20)

        try:
            from mist.io.shell import Shell
            shell = Shell(host)
            # connect with ssh even if no command, to create association
            # to be able to enable monitoring
            tmp_log('attempting to connect to shell')
            key_id, ssh_user = shell.autoconfigure(user, cloud_id, node.id,
                                                   key_id, username, password,
                                                   port)
            tmp_log('connected to shell')
            result = probe_ssh_only(user,
                                    cloud_id,
                                    machine_id,
                                    host=None,
                                    key_id=key_id,
                                    ssh_user=ssh_user,
                                    shell=shell)
            log_dict = {
                'email': email,
                'event_type': 'job',
                'cloud_id': cloud_id,
                'machine_id': machine_id,
                'job_id': job_id,
                'host': host,
                'key_id': key_id,
                'ssh_user': ssh_user,
            }

            log_event(action='probe', result=result, **log_dict)
            cloud = user.clouds[cloud_id]
            msg = "Cloud:\n  Name: %s\n  Id: %s\n" % (cloud.title, cloud_id)
            msg += "Machine:\n  Name: %s\n  Id: %s\n" % (node.name, node.id)

            if hostname:
                try:
                    record = create_dns_a_record(user, hostname, host)
                    hostname = '.'.join((record.name, record.zone.domain))
                    log_event(action='create_dns_a_record',
                              hostname=hostname,
                              **log_dict)
                except Exception as exc:
                    log_event(action='create_dns_a_record',
                              error=str(exc),
                              **log_dict)

            error = False
            if script_id and multi_user:
                tmp_log('will run script_id %s', script_id)
                ret = run_script.run(user.email,
                                     script_id,
                                     cloud_id,
                                     machine_id,
                                     params=script_params,
                                     host=host,
                                     job_id=job_id)
                error = ret['error']
                tmp_log('executed script_id %s', script_id)
            elif command:
                tmp_log('will run command %s', command)
                log_event(action='deployment_script_started',
                          command=command,
                          **log_dict)
                start_time = time()
                retval, output = shell.command(command)
                tmp_log('executed command %s', command)
                execution_time = time() - start_time
                output = output.decode('utf-8', 'ignore')
                title = "Deployment script %s" % ('failed'
                                                  if retval else 'succeeded')
                error = retval > 0
                notify_user(user,
                            title,
                            cloud_id=cloud_id,
                            machine_id=machine_id,
                            machine_name=node.name,
                            command=command,
                            output=output,
                            duration=execution_time,
                            retval=retval,
                            error=retval > 0)
                log_event(action='deployment_script_finished',
                          error=retval > 0,
                          return_value=retval,
                          command=command,
                          stdout=output,
                          **log_dict)

            shell.disconnect()

            if monitoring:
                try:
                    enable_monitoring(
                        user,
                        cloud_id,
                        node.id,
                        name=node.name,
                        dns_name=node.extra.get('dns_name', ''),
                        public_ips=ips,
                        no_ssh=False,
                        dry=False,
                        job_id=job_id,
                        plugins=plugins,
                        deploy_async=False,
                    )
                except Exception as e:
                    print repr(e)
                    error = True
                    notify_user(
                        user,
                        "Enable monitoring failed for machine %s" % machine_id,
                        repr(e))
                    notify_admin(
                        'Enable monitoring on creation failed for user %s machine %s: %r'
                        % (email, machine_id, e))
                    log_event(action='enable_monitoring_failed',
                              error=repr(e),
                              **log_dict)

            if post_script_id and multi_user:
                tmp_log('will run post_script_id %s', post_script_id)
                ret = run_script.run(
                    user.email,
                    post_script_id,
                    cloud_id,
                    machine_id,
                    params=post_script_params,
                    host=host,
                    job_id=job_id,
                    action_prefix='post_',
                )
                error = ret['error']
                tmp_log('executed post_script_id %s', script_id)

            log_event(action='post_deploy_finished', error=error, **log_dict)

        except (ServiceUnavailableError, SSHException) as exc:
            tmp_log(repr(exc))
            raise self.retry(exc=exc, countdown=60, max_retries=15)
    except Exception as exc:
        tmp_log(repr(exc))
        if str(exc).startswith('Retry'):
            raise
        notify_user(user,
                    "Deployment script failed for machine %s" % machine_id)
        notify_admin(
            "Deployment script failed for machine %s in cloud %s by user %s" %
            (machine_id, cloud_id, email), repr(exc))
        log_event(email=email,
                  event_type='job',
                  action='post_deploy_finished',
                  cloud_id=cloud_id,
                  machine_id=machine_id,
                  enable_monitoring=bool(monitoring),
                  command=command,
                  error="Couldn't connect to run post deploy steps.",
                  job_id=job_id)