Beispiel #1
0
 def smart_delay(self, *args, **kwargs):
     """Return cached result if it exists, send job to celery if needed"""
     # check cache
     id_str = json.dumps([self.task_key, args, kwargs])
     cache_key = b64encode(id_str)
     cached = self.memcache.get(cache_key)
     if cached:
         age = time() - cached['timestamp']
         if age > self.result_fresh:
             amqp_log("%s: scheduling task" % id_str)
             self.delay(*args, **kwargs)
         if age < self.result_expires:
             amqp_log("%s: smart delay cache hit" % id_str)
             return cached['payload']
     else:
         self.delay(*args, **kwargs)
Beispiel #2
0
 def on_stats(self, backend_id, machine_id, start, stop, step, requestID):
     try:
         data = get_stats(self.user, backend_id, machine_id, start - 50,
                          stop + 50, step / 1000)
     except Exception as exc:
         amqp_log("Error getting stats: %r" % exc)
         return
     ret = {
         'backend_id': backend_id,
         'machine_id': machine_id,
         'start': start,
         'stop': stop,
         'requestID': requestID,
         'metrics': data,
     }
     self.emit('stats', ret)
Beispiel #3
0
 def smart_delay(self, *args, **kwargs):
     """Return cached result if it exists, send job to celery if needed"""
     # check cache
     id_str = json.dumps([self.task_key, args, kwargs])
     cache_key = b64encode(id_str)
     cached = self.memcache.get(cache_key)
     if cached:
         age = time() - cached['timestamp']
         if age > self.result_fresh:
             amqp_log("%s: scheduling task" % id_str)
             self.delay(*args, **kwargs)
         if age < self.result_expires:
             amqp_log("%s: smart delay cache hit" % id_str)
             return cached['payload']
     else:
         self.delay(*args, **kwargs)
Beispiel #4
0
 def on_stats(self, backend_id, machine_id, start, stop, step, requestID):
     try:
         data = get_stats(self.user, backend_id, machine_id,
                          start - 50, stop + 50, step / 1000)
     except Exception as exc:
         amqp_log("Error getting stats: %r" % exc)
         return
     ret = {
         'backend_id': backend_id,
         'machine_id': machine_id,
         'start': start,
         'stop': stop,
         'requestID': requestID,
         'metrics': data,
     }
     self.emit('stats', ret)
Beispiel #5
0
    def on_stats(self, backend_id, machine_id, start, stop, step, request_id, metrics):
        error = False
        try:
            data = get_stats(self.user, backend_id, machine_id,
                             start, stop, step)
        except BadRequestError as exc:
            error = str(exc)
            data = []
        except Exception as exc:
            amqp_log("Error getting stats: %r" % exc)
            return

        ret = {
            'backend_id': backend_id,
            'machine_id': machine_id,
            'start': start,
            'stop': stop,
            'request_id': request_id,
            'metrics': data,
        }
        if error:
            ret['error'] = error
        self.emit('stats', ret)
Beispiel #6
0
    def on_stats(self, backend_id, machine_id, start, stop, step, request_id,
                 metrics):
        error = False
        try:
            data = get_stats(self.user, backend_id, machine_id, start, stop,
                             step)
        except BadRequestError as exc:
            error = str(exc)
            data = []
        except Exception as exc:
            amqp_log("Error getting stats: %r" % exc)
            return

        ret = {
            'backend_id': backend_id,
            'machine_id': machine_id,
            'start': start,
            'stop': stop,
            'request_id': request_id,
            'metrics': data,
        }
        if error:
            ret['error'] = error
        self.emit('stats', ret)
Beispiel #7
0
def run_deploy_script(self, email, backend_id, machine_id, command,
                      key_id=None, username=None, password=None, port=22):
    from mist.io.methods import ssh_command, connect_provider
    from mist.io.methods import notify_user, notify_admin

    user = user_from_email(email)

    try:
        # find the node we're looking for and get its hostname
        conn = connect_provider(user.backends[backend_id])
        nodes = conn.list_nodes()
        node = None
        for n in nodes:
            if n.id == machine_id:
                node = n
                break

        if node and len(node.public_ips):
            # filter out IPv6 addresses
            ips = filter(lambda ip: ':' not in ip, node.public_ips)
            host = ips[0]
        else:
            raise self.retry(exc=Exception(), countdown=60, max_retries=5)

        try:
            from mist.io.shell import Shell
            shell = Shell(host)
            key_id, ssh_user = shell.autoconfigure(user, backend_id, node.id,
                                                   key_id, username, password, port)
            start_time = time()
            retval, output = shell.command(command)
            execution_time = time() - start_time
            shell.disconnect()
            msg = """
Command: %s
Return value: %s
Duration: %s seconds
Output:
%s""" % (command, retval, execution_time, output)

            if retval:
                notify_user(user, "Deployment script failed for machine %s (%s)" % (node.name, node.id), msg)
                amqp_log("Deployment script failed for user %s machine %s (%s): %s" % (user, node.name, node.id, msg))
            else:
                notify_user(user, "Deployment script succeeded for machine %s (%s)" % (node.name, node.id), msg)
                amqp_log("Deployment script succeeded for user %s machine %s (%s): %s" % (user, node.name, node.id, msg))

        except ServiceUnavailableError as exc:
            raise self.retry(exc=exc, countdown=60, max_retries=5)
    except Exception as exc:
        if str(exc).startswith('Retry'):
            return
        amqp_log("Deployment script failed for machine %s in backend %s by user %s after 5 retries: %s" % (node.id, backend_id, email, repr(exc)))
        notify_user(user, "Deployment script failed for machine %s after 5 retries" % node.id)
        notify_admin("Deployment script failed for machine %s in backend %s by user %s after 5 retries" % (node.id, backend_id, email), repr(exc))
Beispiel #8
0
def post_deploy_steps(self, email, backend_id, machine_id, monitoring, command,
                      key_id=None, username=None, password=None, port=22):
    from mist.io.methods import ssh_command, connect_provider, enable_monitoring
    from mist.io.methods import notify_user, notify_admin
    if multi_user:
        from mist.core.methods import enable_monitoring
    else:
        from mist.io.methods import enable_monitoring

    user = user_from_email(email)
    try:

        # find the node we're looking for and get its hostname
        conn = connect_provider(user.backends[backend_id])
        nodes = conn.list_nodes()
        node = None
        for n in nodes:
            if n.id == machine_id:
                node = n
                break

        if node and len(node.public_ips):
            # filter out IPv6 addresses
            ips = filter(lambda ip: ':' not in ip, node.public_ips)
            host = ips[0]
        else:
            raise self.retry(exc=Exception(), countdown=120, max_retries=5)

        try:
            from mist.io.shell import Shell
            shell = Shell(host)
            # connect with ssh even if no command, to create association
            # to be able to enable monitoring
            key_id, ssh_user = shell.autoconfigure(
                user, backend_id, node.id, key_id, username, password, port
            )

            if command:
                start_time = time()
                retval, output = shell.command(command)
                execution_time = time() - start_time
                output = output.decode('utf-8','ignore')
                msg = ("Command: %s\n"
                       "Return value: %s\n"
                       "Duration: %d seconds\n"
                       "Output:%s\n") % (command, retval,
                                         execution_time, output)
                msg = msg.encode('utf-8', 'ignore')
                msg_title = "Deployment script %s for machine %s (%s)" % (
                    'failed' if retval else 'succeeded',
                    node.name, node.id
                )
                notify_user(user, msg_title, msg)

            shell.disconnect()

            if monitoring:
                try:
                    enable_monitoring(user, backend_id, node.id,
                        name=node.name, dns_name=node.extra.get('dns_name',''),
                        public_ips=ips, no_ssh=False, dry=False,
                    )
                except Exception as e:
                    print repr(e)
                    notify_user(user, "Enable monitoring failed for machine %s (%s)" % (node.name, node.id), repr(e))
                    notify_admin('Enable monitoring on creation failed for user %s machine %s: %r' % (email, node.name, e))

        except (ServiceUnavailableError, SSHException) as exc:
            raise self.retry(exc=exc, countdown=60, max_retries=5)
    except Exception as exc:
        if str(exc).startswith('Retry'):
            raise
        amqp_log("Deployment script failed for machine %s in backend %s by user %s after 5 retries: %s" % (node.id, backend_id, email, repr(exc)))
        notify_user(user, "Deployment script failed for machine %s after 5 retries" % node.id)
        notify_admin("Deployment script failed for machine %s in backend %s by user %s after 5 retries" % (node.id, backend_id, email), repr(exc))
Beispiel #9
0
 def run(self, *args, **kwargs):
     email = args[0]
     # seq_id is an id for the sequence of periodic tasks, to avoid
     # running multiple concurrent sequences of the same task with the
     # same arguments. it is empty on first run, constant afterwards
     seq_id = kwargs.pop('seq_id', '')
     id_str = json.dumps([self.task_key, args, kwargs])
     cache_key = b64encode(id_str)
     cached_err = self.memcache.get(cache_key + 'error')
     if cached_err:
         # task has been failing recently
         if seq_id != cached_err['seq_id']:
             # other sequence of task already handling this error flow
             # This is not working! Passing instead
             #return
             pass
     if not amqp_user_listening(email):
         # noone is waiting for result, stop trying, but flush cached erros
         if cached_err:
             self.memcache.delete(cache_key + 'error')
         return
     # check cache to stop iteration if other sequence has started
     cached = self.memcache.get(cache_key)
     if cached:
         if seq_id and seq_id != cached['seq_id']:
             amqp_log("%s: found new cached seq_id [%s], "
                      "stopping iteration of [%s]" % (id_str,
                                                      cached['seq_id'],
                                                      seq_id))
             return
         elif not seq_id and time() - cached['timestamp'] < self.result_fresh:
             amqp_log("%s: fresh task submitted with fresh cached result "
                      ", dropping" % id_str)
             return
     if not seq_id:
         # this task is called externally, not a rerun, create a seq_id
         amqp_log("%s: fresh task submitted [%s]" % (id_str, seq_id))
         seq_id = uuid4().hex
     # actually run the task
     try:
         data = self.execute(*args, **kwargs)
     except Exception as exc:
         # error handling
         now = time()
         if not cached_err:
             cached_err = {'seq_id': seq_id, 'timestamps': []}
         cached_err['timestamps'].append(now)
         x0 = cached_err['timestamps'][0]
         rel_points = [x - x0 for x in cached_err['timestamps']]
         rerun = self.error_rerun_handler(exc, rel_points, *args, **kwargs)
         if rerun is not None:
             self.memcache.set(cache_key + 'error', cached_err)
             kwargs['seq_id'] = seq_id
             self.apply_async(args, kwargs, countdown=rerun)
         else:
             self.memcache.delete(cache_key + 'error')
         amqp_log("%s: error %r, rerun %s" % (id_str, exc, rerun))
         return
     else:
         if cached_err:
             self.memcache.delete(cache_key + 'error')
     cached = {'timestamp': time(), 'payload': data, 'seq_id': seq_id}
     ok = amqp_publish_user(email, routing_key=self.task_key, data=data)
     if not ok:
         # echange closed, no one gives a shit, stop repeating, why try?
         amqp_log("%s: exchange closed" % id_str)
         return
     kwargs['seq_id'] = seq_id
     self.memcache.set(cache_key, cached)
     if self.polling:
         amqp_log("%s: will rerun in %d secs [%s]" % (id_str,
                                                      self.result_fresh,
                                                      seq_id))
         self.apply_async(args, kwargs, countdown=self.result_fresh)
Beispiel #10
0
 def run(self, *args, **kwargs):
     email = args[0]
     # seq_id is an id for the sequence of periodic tasks, to avoid
     # running multiple concurrent sequences of the same task with the
     # same arguments. it is empty on first run, constant afterwards
     seq_id = kwargs.pop('seq_id', '')
     id_str = json.dumps([self.task_key, args, kwargs])
     cache_key = b64encode(id_str)
     cached_err = self.memcache.get(cache_key + 'error')
     if cached_err:
         # task has been failing recently
         if seq_id != cached_err['seq_id']:
             # other sequence of task already handling this error flow
             # This is not working! Passing instead
             #return
             pass
     if not amqp_user_listening(email):
         # noone is waiting for result, stop trying, but flush cached erros
         if cached_err:
             self.memcache.delete(cache_key + 'error')
         return
     # check cache to stop iteration if other sequence has started
     cached = self.memcache.get(cache_key)
     if cached:
         if seq_id and seq_id != cached['seq_id']:
             amqp_log("%s: found new cached seq_id [%s], "
                      "stopping iteration of [%s]" %
                      (id_str, cached['seq_id'], seq_id))
             return
         elif not seq_id and time(
         ) - cached['timestamp'] < self.result_fresh:
             amqp_log("%s: fresh task submitted with fresh cached result "
                      ", dropping" % id_str)
             return
     if not seq_id:
         # this task is called externally, not a rerun, create a seq_id
         amqp_log("%s: fresh task submitted [%s]" % (id_str, seq_id))
         seq_id = uuid4().hex
     # actually run the task
     try:
         data = self.execute(*args, **kwargs)
     except Exception as exc:
         # error handling
         now = time()
         if not cached_err:
             cached_err = {'seq_id': seq_id, 'timestamps': []}
         cached_err['timestamps'].append(now)
         x0 = cached_err['timestamps'][0]
         rel_points = [x - x0 for x in cached_err['timestamps']]
         rerun = self.error_rerun_handler(exc, rel_points, *args, **kwargs)
         if rerun is not None:
             self.memcache.set(cache_key + 'error', cached_err)
             kwargs['seq_id'] = seq_id
             self.apply_async(args, kwargs, countdown=rerun)
         else:
             self.memcache.delete(cache_key + 'error')
         amqp_log("%s: error %r, rerun %s" % (id_str, exc, rerun))
         return
     else:
         if cached_err:
             self.memcache.delete(cache_key + 'error')
     cached = {'timestamp': time(), 'payload': data, 'seq_id': seq_id}
     ok = amqp_publish_user(email, routing_key=self.task_key, data=data)
     if not ok:
         # echange closed, no one gives a shit, stop repeating, why try?
         amqp_log("%s: exchange closed" % id_str)
         return
     kwargs['seq_id'] = seq_id
     self.memcache.set(cache_key, cached)
     if self.polling:
         amqp_log("%s: will rerun in %d secs [%s]" %
                  (id_str, self.result_fresh, seq_id))
         self.apply_async(args, kwargs, countdown=self.result_fresh)
Beispiel #11
0
def run_deploy_script(self,
                      email,
                      backend_id,
                      machine_id,
                      command,
                      key_id=None,
                      username=None,
                      password=None,
                      port=22):
    from mist.io.methods import ssh_command, connect_provider
    from mist.io.methods import notify_user, notify_admin

    user = user_from_email(email)

    try:
        # find the node we're looking for and get its hostname
        conn = connect_provider(user.backends[backend_id])
        nodes = conn.list_nodes()
        node = None
        for n in nodes:
            if n.id == machine_id:
                node = n
                break

        if node and len(node.public_ips):
            # filter out IPv6 addresses
            ips = filter(lambda ip: ':' not in ip, node.public_ips)
            host = ips[0]
        else:
            raise self.retry(exc=Exception(), countdown=60, max_retries=5)

        try:
            from mist.io.shell import Shell
            shell = Shell(host)
            key_id, ssh_user = shell.autoconfigure(user, backend_id, node.id,
                                                   key_id, username, password,
                                                   port)
            start_time = time()
            retval, output = shell.command(command)
            execution_time = time() - start_time
            shell.disconnect()
            msg = """
Command: %s
Return value: %s
Duration: %s seconds
Output:
%s""" % (command, retval, execution_time, output)

            if retval:
                notify_user(
                    user, "Deployment script failed for machine %s (%s)" %
                    (node.name, node.id), msg)
                amqp_log(
                    "Deployment script failed for user %s machine %s (%s): %s"
                    % (user, node.name, node.id, msg))
            else:
                notify_user(
                    user, "Deployment script succeeded for machine %s (%s)" %
                    (node.name, node.id), msg)
                amqp_log(
                    "Deployment script succeeded for user %s machine %s (%s): %s"
                    % (user, node.name, node.id, msg))

        except ServiceUnavailableError as exc:
            raise self.retry(exc=exc, countdown=60, max_retries=5)
    except Exception as exc:
        if str(exc).startswith('Retry'):
            return
        amqp_log(
            "Deployment script failed for machine %s in backend %s by user %s after 5 retries: %s"
            % (node.id, backend_id, email, repr(exc)))
        notify_user(
            user, "Deployment script failed for machine %s after 5 retries" %
            node.id)
        notify_admin(
            "Deployment script failed for machine %s in backend %s by user %s after 5 retries"
            % (node.id, backend_id, email), repr(exc))
Beispiel #12
0
 def run(self, *args, **kwargs):
     email = args[0]
     # seq_id is an id for the sequence of periodic tasks, to avoid
     # running multiple concurrent sequences of the same task with the
     # same arguments. it is empty on first run, constant afterwards
     seq_id = kwargs.pop("seq_id", "")
     id_str = json.dumps([self.task_key, args, kwargs])
     cache_key = b64encode(id_str)
     cached_err = self.memcache.get(cache_key + "error")
     if cached_err:
         # task has been failing recently
         if seq_id != cached_err["seq_id"]:
             if seq_id:
                 # other sequence of tasks has taken over
                 return
             else:
                 # taking over from other sequence
                 cached_err = None
                 # cached err will be deleted or overwritten in a while
                 # self.memcache.delete(cache_key + 'error')
     if not amqp_user_listening(email):
         # noone is waiting for result, stop trying, but flush cached erros
         self.memcache.delete(cache_key + "error")
         return
     # check cache to stop iteration if other sequence has started
     cached = self.memcache.get(cache_key)
     if cached:
         if seq_id and seq_id != cached["seq_id"]:
             amqp_log(
                 "%s: found new cached seq_id [%s], "
                 "stopping iteration of [%s]" % (id_str, cached["seq_id"], seq_id)
             )
             return
         elif not seq_id and time() - cached["timestamp"] < self.result_fresh:
             amqp_log("%s: fresh task submitted with fresh cached result " ", dropping" % id_str)
             return
     if not seq_id:
         # this task is called externally, not a rerun, create a seq_id
         amqp_log("%s: fresh task submitted [%s]" % (id_str, seq_id))
         seq_id = uuid4().hex
     # actually run the task
     try:
         data = self.execute(*args, **kwargs)
     except Exception as exc:
         # error handling
         if isinstance(exc, SoftTimeLimitExceeded):
             log.error("SoftTimeLimitExceeded: %s", id_str)
         now = time()
         if not cached_err:
             cached_err = {"seq_id": seq_id, "timestamps": []}
         cached_err["timestamps"].append(now)
         x0 = cached_err["timestamps"][0]
         rel_points = [x - x0 for x in cached_err["timestamps"]]
         rerun = self.error_rerun_handler(exc, rel_points, *args, **kwargs)
         if rerun is not None:
             self.memcache.set(cache_key + "error", cached_err)
             kwargs["seq_id"] = seq_id
             self.apply_async(args, kwargs, countdown=rerun)
         else:
             self.memcache.delete(cache_key + "error")
         amqp_log("%s: error %r, rerun %s" % (id_str, exc, rerun))
         return
     else:
         self.memcache.delete(cache_key + "error")
     cached = {"timestamp": time(), "payload": data, "seq_id": seq_id}
     ok = amqp_publish_user(email, routing_key=self.task_key, data=data)
     if not ok:
         # echange closed, no one gives a shit, stop repeating, why try?
         amqp_log("%s: exchange closed" % id_str)
         return
     kwargs["seq_id"] = seq_id
     self.memcache.set(cache_key, cached)
     if self.polling:
         amqp_log("%s: will rerun in %d secs [%s]" % (id_str, self.result_fresh, seq_id))
         self.apply_async(args, kwargs, countdown=self.result_fresh)