예제 #1
0
파일: shell.py 프로젝트: ghoul008/mist.api
    def connect(self, username, key=None, password=None, cert_file=None,
                port=22):
        """Initialize an SSH connection.

        Tries to connect and configure self. If only password is provided, it
        will be used for authentication. If key is provided, it is treated as
        and OpenSSH private RSA key and used for authentication. If both key
        and password are provided, password is used as a passphrase to unlock
        the private key.

        Raises MachineUnauthorizedError if it fails to connect.

        """

        if not key and not password:
            raise RequiredParameterMissingError("neither key nor password "
                                                "provided.")

        if key:
            private = key.private
            if isinstance(key, SignedSSHKey) and cert_file:
                # signed ssh key, use RSACert
                rsa_key = paramiko.RSACert(privkey_file_obj=StringIO(private),
                                           cert_file_obj=StringIO(cert_file))
            else:
                rsa_key = paramiko.RSAKey.from_private_key(StringIO(private))
        else:
            rsa_key = None

        attempts = 3
        while attempts:
            attempts -= 1
            try:
                self.ssh.connect(
                    self.host,
                    port=port,
                    username=username,
                    password=password,
                    pkey=rsa_key,
                    allow_agent=False,
                    look_for_keys=False,
                    timeout=10
                )
                break
            except paramiko.AuthenticationException as exc:
                log.error("ssh exception %r", exc)
                raise MachineUnauthorizedError("Couldn't connect to "
                                               "%s@%s:%s. %s"
                                               % (username, self.host,
                                                  port, exc))
            except socket.error as exc:
                log.error("Got ssh error: %r", exc)
                if not attempts:
                    raise ServiceUnavailableError("SSH timed-out repeatedly.")
            except Exception as exc:
                log.error("ssh exception %r", exc)
                # don't fail if SSHException or other paramiko exception,
                # eg related to network, but keep until all attempts are made
                if not attempts:
                    raise ServiceUnavailableError(repr(exc))
예제 #2
0
 def _on_tornado_response(resp):
     if resp.code != 200:
         log.error('Code %d on get_stats: %s', resp.code, resp.body)
         if resp.code == 400:
             raise BadRequestError()
         raise ServiceUnavailableError()
     return callback(self._on_stats_callback(json.loads(resp.body)))
예제 #3
0
def show_tag_keys(measurement=None):
    """Return all tag keys."""
    q = 'SHOW TAG KEYS'
    q += ' FROM %s' % measurement if measurement is not None else ''
    url = '%(host)s/query?db=%(db)s' % INFLUX
    data = requests.get('%s&q=%s' % (url, q))
    if not data.ok:
        log.error('Got %d on SHOW TAG KEY: %s', data.status_code, data.content)
        raise ServiceUnavailableError()
    tags = {}
    results = data.json().get('results', [])
    results = results[0] if results else {}
    for series in results.get('series', []):
        name = series['name']
        if not series['values']:
            continue
        if name not in tags:
            tags[name] = []
        for value in series['values']:
            value = value[0]
            if value not in (
                    'host',
                    'machine_id',
            ):
                tags[name].append(value)
    return tags
예제 #4
0
def _on_response_callback(response, tornado_async=False):
    """HTTP Response-handling callback.

    This method is meant to return HTTP Response objects generated either in a
    Tornado or synchronous execution context.

    Arguments:
        - response: HTTP Response object.
        - tornado_async: Denotes if a Tornado-safe HTTP request was issued.

    """
    if tornado_async:
        if response.code != 200:
            log.error(
                'Error on Elasticsearch query in tornado_async mode. '
                'Got %d status code: %s', response.code, response.body)
            if response.code == 400:
                raise BadRequestError()
            if response.code == 404:
                raise NotFoundError()
            if response.code == 429:
                raise RateLimitError()
            raise ServiceUnavailableError()
        response = json.loads(response.body)
    return response
예제 #5
0
def _get_multimachine_stats(owner, metric, start='', stop='', step='',
                            uuids=None):
    if not uuids:
        uuids = [machine.id for machine in Machine.objects(
            cloud__in=Cloud.objects(owner=owner, deleted=None),
            monitoring__hasmonitoring=True
        )]
    if not uuids:
        raise NotFoundError("No machine has monitoring enabled.")
    try:
        data = get_multi_uuid(uuids, metric, start=start, stop=stop,
                              interval_str=step)
    except Exception as exc:
        log.error("Error getting %s: %r", metric, exc)
        raise ServiceUnavailableError()
    ret = {}
    for item in data:
        target = item['target'].split('.')
        if len(target) > 1:
            uuid = target[1]
        else:
            uuid = target[0]
        item['name'] = uuid
        ret[uuid] = item
    return ret
예제 #6
0
def _query_influxdb(query, owner_id):
    # Prepare base URL.
    url = '%s/query?db=metering' % config.INFLUX['host']

    # Request metering info.
    results = requests.get('%s&q=%s' % (url, query))
    if not results.ok:
        log.error('Failed to execute query "%s": %s', query, results.content)
        if results.status_code == 400:
            raise BadRequestError()
        raise ServiceUnavailableError()

    # Get the `results` key. If the query is valid, this shouldn't fail. Even
    # if InfluxDB returns an empty response, the `results` key should be there.
    results = results.json()
    results = results['results']

    try:
        series_list = results[0]['series']
    except IndexError:
        # raise BadRequestError('Failed to parse results: %s' % results)
        log.error('Failed to parse results: %s', results)
        series_list = []
    except KeyError:
        series_list = []
    else:
        if owner_id and len(series_list) > 1:
            raise BadRequestError("Got multiple series for single owner.")
    return series_list
예제 #7
0
def find_metrics(machine):
    if not machine.monitoring.hasmonitoring:
        raise ForbiddenError("Machine doesn't have monitoring enabled.")
    try:
        data = requests.get("%s/v1/resources/%s" %
                            (config.TSFDB_URI, machine.id),
                            headers={'x-org-id': machine.owner.id},
                            timeout=5)
    except Exception as exc:
        log.error('Got %r on find_metrics for resource %s' % (exc, machine.id))
        raise ServiceUnavailableError()

    if not data.ok:
        log.error('Got %d on find_metrics: %s', data.status_code, data.content)
        raise ServiceUnavailableError()

    return data.json().get("metrics", {})
예제 #8
0
def show_fields(measurement=None):
    """Return field keys and their respective values, including tags."""
    # Make sure measurement names are inside quotes to escape special
    # characters, such as "." or "-".
    if isinstance(measurement, list):
        measurement = ','.join(['"%s"' % m for m in measurement])
    elif measurement:
        measurement = '"%s"' % measurement
    q = 'SHOW FIELD KEYS'
    q += ' FROM %s' % measurement if measurement else ''
    url = '%(host)s/query?db=%(db)s' % INFLUX
    data = requests.get('%s&q=%s' % (url, q))
    if not data.ok:
        log.error('Got %d on SHOW FIELDS: %s', data.status_code, data.content)
        raise ServiceUnavailableError()
    fields = []
    results = data.json().get('results', [])
    if results:
        tags = show_tags(measurement)
        results = results[0]
    else:
        results = {}
    for series in results.get('series', []):
        name = series['name']
        for value in series['values']:  # eg. value = [u'load1', u'float']
            pairs = []
            column = value[0]
            for key, values in tags[name].iteritems():
                if key in (
                        'host',
                        'machine_id',
                ):
                    continue
                pairs += ['%s=%s' % (key, value) for value in values]
            if pairs:
                ids = ['%s.%s.%s' % (name, pair, column) for pair in pairs]
            else:
                ids = ['%s.%s' % (name, column)]
            for id in ids:
                fields.append({
                    'id':
                    id,
                    'name':
                    '%s %s' % (name.upper(), column.replace('_', ' ')),
                    'column':
                    column,
                    'measurement':
                    name,
                    'max_value':
                    None,
                    'min_value':
                    None,
                    'priority':
                    0,
                    'unit':
                    '',
                })
    return fields
예제 #9
0
 def search(self, start, stop, terms=None, query_string=''):
     """Query elasticsearch for documents within the given timeframe."""
     query = self._get_query(start, stop, terms, query_string)
     try:
         return self._run_query(query)
     except eexc.NotFoundError as err:
         log.error('%s: %s', self.__class__.__name__, err.info)
         raise NotFoundError(err.error)
     except (eexc.RequestError, eexc.TransportError) as err:
         log.error('%s: %s', self.__class__.__name__, err.info)
         raise BadRequestError(err.error)
     except (eexc.ConnectionError, eexc.ConnectionTimeout) as err:
         log.error('%s: %s', self.__class__.__name__, err.info)
         raise ServiceUnavailableError(err.error)
예제 #10
0
def show_tag_values(key):
    """Return all tag values of the specified key."""
    q = 'SHOW TAG VALUES WITH KEY = "%s"' % key
    url = '%(host)s/query?db=%(db)s' % INFLUX
    data = requests.get('%s&q=%s' % (url, q))
    if not data.ok:
        log.error('Got %d on SHOW TAG VAL: %s', data.status_code, data.content)
        raise ServiceUnavailableError()
    tags = set()
    results = data.json().get('results', [])
    results = results[0] if results else {}
    for series in results.get('series', []):
        for value in series['values']:
            tags.add(value[1])
    return list(tags)
예제 #11
0
def show_measurements(machine_id=None):
    """Return a list of measurements filtered by machine_id, if provided."""
    q = 'SHOW MEASUREMENTS'
    q += ' WHERE "machine_id" = \'%s\'' % machine_id if machine_id else ''
    url = '%(host)s/query?db=%(db)s' % INFLUX
    data = requests.get(url, params=dict(q=q))
    if not data.ok:
        log.error('Got %d on SHOW MEASUR: %s', data.status_code, data.content)
        raise ServiceUnavailableError()
    measurements = set()
    results = data.json().get('results', [])
    if results:
        results = results[0]
        series = results.get('series', [])
    else:
        series = []
    if series:
        values = series[0].get('values', [])
        measurements = set([value[0] for value in values])
    return list(measurements)
예제 #12
0
    def get_stats(self, metric, start=None, stop=None, step=None,
                  callback=None, tornado_async=False):
        """Query InfluxDB for the given metric.

        This method, after parsing the metric given, is responsible for
        incrementally constructing the InfluxDB query.

        The provided metric should be in the form of:

            <measurement>.<tags>.<column>

        Also, metrics may be enclosed in nested function, such as:

            MEAN(system.load1)

        or even:

            DERIVATIVE(MEAN(net.bytes_sent))

        """
        # A list of functions, extracted from `metric` to be applied later on.
        functions = []

        # Attempt to match nested functions in `metric` in order to extract the
        # actual metric and store any functions in `functions` so that they can
        # be re-applied later on.
        regex = r'^(\w+)\((.+)\)$'
        match = re.match(regex, metric)
        while match:
            groups = match.groups()
            metric = groups[1]
            functions.append(groups[0].upper())
            match = re.match(regex, metric)

        # Get the measurement and requested column(s). Update tags.
        self.measurement, self.column, tags = self.parse_path(metric)

        # Construct query.
        q = 'SELECT %s' % self.column
        for function in functions:
            if function not in AGGREGATIONS | TRANSFORMATIONS:
                raise BadRequestError('Function %s not supported' % function)
            q = q.replace(self.column, '%s(%s)' % (function, self.column))
        if functions and not re.match('^/.*/$', self.column):  # Not for regex.
            q += ' AS %s' % self.column
        q += ' FROM "%s"' % self.measurement
        q = group(add_filter(q, tags, start, stop), self.group, step)

        if not tornado_async:
            data = requests.get(self.influx, params=dict(q=q))
            log.warn('Query: %s' % q)
            if not data.ok:
                log.error('Got %d HTTP status code on get_stats: %s',
                          data.status_code, data.content)
                log.error('Query: %s' % q)
                if data.status_code == 400:
                    raise BadRequestError()
                raise ServiceUnavailableError()
            if callback is not None:
                return callback(self._on_stats_callback(data.json()))
            return self._on_stats_callback(data.json())

        def _on_tornado_response(resp):
            if resp.code != 200:
                log.error('Code %d on get_stats: %s', resp.code, resp.body)
                if resp.code == 400:
                    raise BadRequestError()
                raise ServiceUnavailableError()
            return callback(self._on_stats_callback(json.loads(resp.body)))

        AsyncHTTPClient().fetch(url_concat(self.influx, dict(q=q)),
                                callback=_on_tornado_response)
예제 #13
0
def get_stats(machine, start="", stop="", step="", metrics=None):
    data = {}

    # If no metrics are specified, then we get all of them
    if not metrics:
        metrics = [
            ('fetch(\"{id}.*\"' + ', start=\"{start}\", stop=\"{stop}\"' +
             ', step=\"{step}\")')
        ]

    for metric in metrics:
        query = metric.format(id=machine.id, start=start, stop=stop, step=step)
        try:
            raw_machine_data = requests.get(
                "%s/v1/datapoints?query=%s" %
                (config.TSFDB_URI, urllib.parse.quote(query)),
                headers={'x-org-id': machine.owner.id},
                timeout=20)
        except Exception as exc:
            log.error('Got %r on get_stats for resource %s' %
                      (exc, machine.id))
            raise ServiceUnavailableError()

        if not raw_machine_data.ok:
            log.error('Got %d on get_stats: %s', raw_machine_data.status_code,
                      raw_machine_data.content)
            raise ServiceUnavailableError()

        raw_machine_data = raw_machine_data.json()
        raw_metrics = list(raw_machine_data.get("series", {}).keys())
        for raw_metric in raw_metrics:
            # We use as key the metric name without the machine id
            # e.g "id.system.load1 => system.load1"
            _, returned_metric = raw_metric.split(".", 1)
            data.update({
                returned_metric: {
                    "name": returned_metric,
                    "datapoints":
                    raw_machine_data["series"].get(raw_metric, []),
                }
            })

    if not isinstance(machine, str):
        # set activated_at for collectd/telegraf installation status
        # if no data previously received for machine
        from mist.api.helpers import trigger_session_update
        from mist.api.rules.tasks import add_nodata_rule

        istatus = machine.monitoring.installation_status
        if not istatus.activated_at:
            for val in (point[0] for item in list(data.values())
                        for point in item['datapoints']
                        if point[1] >= istatus.started_at):
                if val is not None:
                    if not istatus.finished_at:
                        istatus.finished_at = time.time()
                    istatus.activated_at = time.time()
                    istatus.state = 'succeeded'
                    machine.save()
                    add_nodata_rule.delay(machine.owner.id)
                    trigger_session_update(machine.owner, ['monitoring'])
                    break

    return data
예제 #14
0
def get_events(auth_context,
               owner_id='',
               user_id='',
               event_type='',
               action='',
               limit=0,
               start=0,
               stop=0,
               newest=True,
               error=None,
               **kwargs):
    """Fetch logged events.

    This generator yields a series of logs after querying Elasticsearch.

    The initial query is extended with additional terms based on the inputs
    provided. Also, extra filtering may be applied in order to perform RBAC
    on the fly given the permissions granted to the requesting User.

    All Elasticsearch indices are in the form of <app|ui>-logs-<date>.

    """
    # Restrict access to UI logs to Admins only.
    is_admin = auth_context and auth_context.user.role == 'Admin'

    # Attempt to enforce owner_id in case of non-Admins.
    if not is_admin and not owner_id:
        owner_id = auth_context.owner.id if auth_context else None

    # Construct base Elasticsearch query.
    index = "%s-logs-*" % ("*" if is_admin else "app")
    query = {
        "query": {
            "bool": {
                "filter": {
                    "bool": {
                        "must": [{
                            "range": {
                                "@timestamp": {
                                    "gte": int(start * 1000),
                                    "lte": int(stop * 1000) or "now"
                                }
                            }
                        }],
                        "must_not": []
                    }
                }
            }
        },
        "sort": [{
            "@timestamp": {
                "order": ("desc" if newest else "asc")
            }
        }],
        "size": (limit or 50)
    }
    # Match action.
    if action:
        query["query"]["bool"]["filter"]["bool"]["must"].append(
            {"term": {
                'action': action
            }})
    # Fetch logs corresponding to the current Organization.
    if owner_id:
        query["query"]["bool"]["filter"]["bool"]["must"].append(
            {"term": {
                "owner_id": owner_id
            }})
    # Match the user's ID, if provided.
    if user_id:
        query["query"]["bool"]["filter"]["bool"]["must"].append(
            {"term": {
                "user_id": user_id
            }})
    # Specify whether to fetch stories that ended with an error.
    if error:
        query["query"]["bool"]["filter"]["bool"]["must_not"].append(
            {"term": {
                "error": False
            }})
    elif error is False:
        query["query"]["bool"]["filter"]["bool"]["must"].append(
            {"term": {
                "error": False
            }})
    # Perform a complex "Query String" Query that may span fields.
    if 'filter' in kwargs:
        f = kwargs.pop('filter')
        query_string = {
            'query': f,
            'analyze_wildcard': True,
            'default_operator': 'and',
            'allow_leading_wildcard': False
        }
        query["query"]["bool"]["filter"]["bool"]["must"].append(
            {'query_string': query_string})

    # Extend query with additional kwargs.
    for key, value in kwargs.iteritems():
        query["query"]["bool"]["filter"]["bool"]["must"].append(
            {"term": {
                key: value
            }})

    # Apply RBAC for non-Owners.
    if auth_context and not auth_context.is_owner():
        filter_logs(auth_context, query)

    # Query Elasticsearch.
    try:
        result = es().search(index=index, doc_type=event_type, body=query)
    except eexc.NotFoundError as err:
        log.error('Error %s during ES query: %s', err.status_code, err.info)
        raise NotFoundError(err.error)
    except (eexc.RequestError, eexc.TransportError) as err:
        log.error('Error %s during ES query: %s', err.status_code, err.info)
        raise BadRequestError(err.error)
    except (eexc.ConnectionError, eexc.ConnectionTimeout) as err:
        log.error('Error %s during ES query: %s', err.status_code, err.info)
        raise ServiceUnavailableError(err.error)

    for hit in result['hits']['hits']:
        event = hit['_source']
        if not event.get('action'):
            log.error('Skipped event %s, missing action', event['log_id'])
            continue
        try:
            extra = json.loads(event.pop('extra'))
        except Exception as exc:
            log.error('Failed to parse extra of event %s: %r', event, exc)
        else:
            for key, value in extra.iteritems():
                event[key] = value
        if event.get('su') and not is_admin:
            continue
        yield event
예제 #15
0
def get_usage(owner_id='', full_days=6):
    """Request metering data

    If no owner_id is specified, then sum for all owners.

    """

    assert isinstance(full_days, int)

    # Get the start of the samples' range.
    now = datetime.datetime.utcnow()
    today = datetime.datetime(year=now.year, month=now.month, day=now.day)
    start = today - datetime.timedelta(days=full_days)

    # Prepare base URL.
    url = '%s/query?db=metering' % config.INFLUX['host']

    # Prepare query.
    query = "SELECT"
    query += " MAX(cores) AS cores,"
    query += " NON_NEGATIVE_DERIVATIVE(MAX(checks)) AS checks,"
    query += " NON_NEGATIVE_DERIVATIVE(MAX(datapoints)) AS datapoints, "
    query += " MAX(cost) AS cost "
    query += "FROM usage"
    query += " WHERE time >= '%s'" % start.isoformat(sep=' ')
    if owner_id:
        query += " AND owner = '%s' " % owner_id
    query += "GROUP BY time(1d)"
    if not owner_id:
        query += ",owner"

    # Request metering info.
    results = requests.get('%s&q=%s' % (url, query))
    if not results.ok:
        log.error('Failed to execute query "%s": %s', query, results.content)
        if results.status_code == 400:
            raise BadRequestError()
        raise ServiceUnavailableError()

    try:
        results = results.json()
        series_list = results['results'][0]['series']
    except (KeyError, IndexError):
        log.error('Failed to execute: %s', query)
        raise BadRequestError('Failed to parse results: %s' % results)

    if owner_id and len(series_list) > 1:
        raise BadRequestError("Got multiple series for single owner.")

    data = {}
    for series in series_list:
        for value in series.get('values', []):
            usage = {k: v for k, v in zip(series['columns'], value)}
            date = usage.pop('time')
            if date not in data:
                data[date] = usage
            else:
                for k, v in usage.items():
                    if k not in data[date] or data[date][k] is None:
                        data[date][k] = v
                    elif v is not None:
                        data[date][k] += v

    return [{
        'date': d,
        'cost': data[d].pop('cost', 0),
        'usage': data[d]
    } for d in sorted(data)]