Example #1
0
def log_event(owner_id, event_type, action, error=None, **kwargs):
    """Log a new event.

    Log a new event comprised of the arguments provided.

    Once the new event has been prepared, additional processing is applied
    in order to associate any relevant stories, and, finally, it is pushed
    to RabbitMQ.

    Arguments:
        - owner_id: the current Owner's ID.
        - event_type: the event type - job, shell, session, request, incident.
        - action: the action described by the event, such as create_machine.
        - error: indicates the error included in the event, if one exists.
        - kwargs: extra parameters to be logged.

    """
    def _default(obj):
        return {'_python_object': str(obj)}

    try:
        # Prepare the base event to be logged.
        event = {
            'owner_id': owner_id or None,
            'log_id': uuid.uuid4().hex,
            'action': action,
            'type': event_type,
            'time': time.time(),
            'error': error if error else False,
            'extra': json.dumps(kwargs, default=_default)
        }
        # Bring more key-value pairs to the top level.
        for key in FIELDS:
            if key in kwargs:
                event[key] = kwargs.pop(key)
        if 'story_id' in kwargs:
            event['story_id'] = kwargs.pop('story_id')
        if 'user_id' in event:
            try:
                event['email'] = User.objects.get(id=event['user_id']).email
            except User.DoesNotExist:
                log.error('User %s does not exist', event['user_id'])

        # Associate event with relevant stories.
        for key in ('job_id', 'shell_id', 'session_id', 'incident_id'):
            if key == 'session_id' and event_type != 'session':  # AB Testing.
                continue
            if event.get(key):
                event.update({'story_id': event[key], 'stories': []})
                associate_stories(event)
                break
        else:
            # Special case for closing stories unless an error has been raised,
            # such as PolicyUnauthorizedError.
            # TODO: Can be used to close any type of story, not only incidents.
            if action in ('close_story', ) and not error:
                event['stories'] = [('closes', 'incident', event['story_id'])]
            # Attempt to close open incidents.
            if action in CLOSES_INCIDENT:
                try:
                    close_open_incidents(event)
                except Exception as exc:
                    log.error('Event %s failed to close open incidents: %s',
                              event['log_id'], exc)
        # Cross populate session-log data.
        try:
            cross_populate_session_data(event, kwargs)
        except Exception as exc:
            log.error('Failed to cross-populate log/session data: %s', exc)
    except Exception as exc:
        log.error('Failed to log event %s: %s', event, exc)
    else:
        # FIXME: Deprecate
        conn = MongoClient(config.MONGO_URI)
        coll = conn['mist'].logging
        coll.save(event.copy())
        conn.close()

        # Construct RabbitMQ routing key.
        keys = [str(owner_id), str(event_type), str(action)]
        keys.append('true' if error else 'false')
        routing_key = '.'.join(map(str.lower, keys))

        # Broadcast event to RabbitMQ's "events" exchange.
        amqp_publish('events',
                     routing_key,
                     event,
                     ex_type='topic',
                     ex_declare=True,
                     auto_delete=False)

        event.pop('extra')
        event.update(kwargs)
        return event
Example #2
0
def list_machines(schedule_id):
    """Perform list machines. Cloud controller stores results in mongodb."""

    # Fetch schedule and cloud from database.
    # FIXME: resolve circular deps error
    from mist.api.poller.models import ListMachinesPollingSchedule
    sched = ListMachinesPollingSchedule.objects.get(id=schedule_id)
    cloud = sched.cloud
    now = datetime.datetime.now()

    # Check if this cloud should be autodisabled.
    if sched.last_success:
        two_days = datetime.timedelta(days=2)
        if now - sched.last_success > two_days and sched.failure_count > 50:
            autodisable_cloud(sched.cloud)
            return
    elif sched.failure_count > 100:
        autodisable_cloud(sched.cloud)
        return

    # Find last run. If too recent, abort.
    if sched.last_success and sched.last_failure:
        last_run = max(sched.last_success, sched.last_failure)
    else:
        last_run = sched.last_success or sched.last_failure
    if last_run:
        if now - last_run < sched.interval.timedelta:
            log.warning("Running too soon for cloud %s, aborting!", cloud)
            return

    # Is another same task running?
    if sched.last_attempt_started:
        # Other same task started recently, abort.
        if now - sched.last_attempt_started < datetime.timedelta(seconds=60):
            log.warning("Other same tasks started recently, aborting.")
            return
        # Has been running for too long or has died. Ignore.
        log.warning("Other same task seems to have started, but it's been "
                    "quite a while, will ignore and run normally.")
    sched.last_attempt_started = now
    cloud.save()

    try:
        # Run list_machines.
        machines = cloud.ctl.compute.list_machines()
    except Exception as exc:
        # Store failure.
        log.warning("Failed to list_machines for cloud %s: %r", cloud, exc)
        sched.last_failure = datetime.datetime.now()
        sched.failure_count += 1
        sched.last_attempt_started = None
        cloud.save()
        raise
    else:
        # Store success.
        log.info("Succeeded to list_machines for cloud %s", cloud)
        sched.last_success = datetime.datetime.now()
        sched.failure_count = 0
        sched.last_attempt_started = None
        cloud.save()

    # Publish results to rabbitmq (for backwards compatibility).
    if amqp_owner_listening(cloud.owner.id):
        amqp_publish_user(cloud.owner.id, routing_key='list_machines',
                          data={'cloud_id': cloud.id,
                                'machines': [machine.as_dict()
                                             for machine in machines]})

    # Push historic information for inventory and cost reporting.
    for machine in machines:
        data = {'owner_id': machine.cloud.owner.id,
                'machine_id': machine.id,
                'cost_per_month': machine.cost.monthly}
        log.info("Will push to elastic: %s", data)
        amqp_publish(exchange='machines_inventory', routing_key='',
                     auto_delete=False, data=data)
Example #3
0
    def list_machines(self, persist=True):
        """Return list of machines for cloud

        A list of nodes is fetched from libcloud, the data is processed, stored
        on machine models, and a list of machine models is returned.

        Subclasses SHOULD NOT override or extend this method.

        This method wraps `_list_machines` which contains the core
        implementation.

        """

        task_key = 'cloud:list_machines:%s' % self.cloud.id
        task = PeriodicTaskInfo.get_or_add(task_key)
        try:
            with task.task_runner(persist=persist):
                old_machines = {
                    '%s-%s' % (m.id, m.machine_id): m.as_dict()
                    for m in self.list_cached_machines()
                }
                machines = self._list_machines()
        except PeriodicTaskThresholdExceeded:
            self.cloud.disable()
            raise

        # Initialize AMQP connection to reuse for multiple messages.
        amqp_conn = Connection(config.AMQP_URI)

        if amqp_owner_listening(self.cloud.owner.id):
            if not config.MACHINE_PATCHES:
                amqp_publish_user(
                    self.cloud.owner.id,
                    routing_key='list_machines',
                    connection=amqp_conn,
                    data={
                        'cloud_id': self.cloud.id,
                        'machines':
                        [machine.as_dict() for machine in machines]
                    })
            else:
                # Publish patches to rabbitmq.
                new_machines = {
                    '%s-%s' % (m.id, m.machine_id): m.as_dict()
                    for m in machines
                }
                # Exclude last seen and probe fields from patch.
                for md in old_machines, new_machines:
                    for m in md.values():
                        m.pop('last_seen')
                        m.pop('probe')
                patch = jsonpatch.JsonPatch.from_diff(old_machines,
                                                      new_machines).patch
                if patch:
                    amqp_publish_user(self.cloud.owner.id,
                                      routing_key='patch_machines',
                                      connection=amqp_conn,
                                      data={
                                          'cloud_id': self.cloud.id,
                                          'patch': patch
                                      })

        # Push historic information for inventory and cost reporting.
        for machine in machines:
            data = {
                'owner_id': self.cloud.owner.id,
                'machine_id': machine.id,
                'cost_per_month': machine.cost.monthly
            }
            amqp_publish(exchange='machines_inventory',
                         routing_key='',
                         auto_delete=False,
                         data=data,
                         connection=amqp_conn)

        return machines
Example #4
0
def log_request_to_elastic(exception):
    try:
        log_dict = g.log_dict
    except AttributeError:
        return

    # log original exception
    if isinstance(exception, MistError):
        if exception.orig_exc:
            log_dict['_exc'] = repr(exception.orig_exc)
            log_dict['_exc_type'] = type(exception.orig_exc)
            if exception.orig_traceback:
                log_dict['_traceback'] = exception.orig_traceback
    elif isinstance(exception, Exception):
        log_dict['_exc'] = repr(exception)
        log_dict['_exc_type'] = type(exception)
        log_dict['_traceback'] = traceback.format_exc()

    if log_dict:
        log_methods.log_event(**log_dict)

    # if a bad exception didn't occur then return, else log it to file
    if g.exc_flag is False or exception is None:
        return

    # Publish traceback in rabbitmq, for heka to parse and forward to
    # elastic
    log.info('Bad exception occured, logging to rabbitmq')
    es_dict = log_dict.copy()
    es_dict.pop('_exc_type', '')
    es_dict['time'] = time.time()
    es_dict['traceback'] = es_dict.pop('_traceback', '')
    es_dict['exception'] = es_dict.pop('_exc', '')
    es_dict['type'] = 'exception'
    routing_key = '%s.%s' % (es_dict['owner_id'], es_dict['action'])
    pickler = jsonpickle.pickler.Pickler()
    helpers.amqp_publish('exceptions',
                         routing_key,
                         pickler.flatten(es_dict),
                         ex_type='topic',
                         ex_declare=True,
                         auto_delete=False)

    # log bad exception to file
    log.info('Bad exception occured, logging to file')
    lines = []
    lines.append('Exception: %s' % log_dict.pop('_exc', ''))
    lines.append('Exception type: %s' % log_dict.pop('_exc_type', ''))
    lines.append('Time: %s' % time.strftime('%Y-%m-%d %H:%M %Z'))
    lines += ([
        '%s: %s' % (key, value) for key, value in list(log_dict.items())
        if value and key != '_traceback'
    ])
    for key in ('owner', 'user', 'sudoer'):
        id_ = log_dict.get('%s_id' % key)
        if id_:
            try:
                value = Owner.objects.get(id=id_)
                lines.append('%s: %s' % (key, value))
            except Owner.DoesNotExist:
                pass
            except Exception as exc:
                log.error('Error finding user in logged exc: %r', exc)
    lines.append('-' * 10)
    lines.append(log_dict.get('_traceback', ''))
    lines.append('=' * 10)
    msg = '\n'.join(lines) + '\n'
    directory = 'var/log/exceptions'
    if not os.path.exists(directory):
        os.makedirs(directory)
    filename = '%s/%s' % (directory, int(time.time()))
    with open(filename, 'w+') as f:
        f.write(msg)