def connect(self, username, key=None, password=None, cert_file=None, port=22): """Initialize an SSH connection. Tries to connect and configure self. If only password is provided, it will be used for authentication. If key is provided, it is treated as and OpenSSH private RSA key and used for authentication. If both key and password are provided, password is used as a passphrase to unlock the private key. Raises MachineUnauthorizedError if it fails to connect. """ if not key and not password: raise RequiredParameterMissingError("neither key nor password " "provided.") if key: private = key.private if isinstance(key, SignedSSHKey) and cert_file: # signed ssh key, use RSACert rsa_key = paramiko.RSACert(privkey_file_obj=StringIO(private), cert_file_obj=StringIO(cert_file)) else: rsa_key = paramiko.RSAKey.from_private_key(StringIO(private)) else: rsa_key = None attempts = 3 while attempts: attempts -= 1 try: self.ssh.connect( self.host, port=port, username=username, password=password, pkey=rsa_key, allow_agent=False, look_for_keys=False, timeout=10 ) break except paramiko.AuthenticationException as exc: log.error("ssh exception %r", exc) raise MachineUnauthorizedError("Couldn't connect to " "%s@%s:%s. %s" % (username, self.host, port, exc)) except socket.error as exc: log.error("Got ssh error: %r", exc) if not attempts: raise ServiceUnavailableError("SSH timed-out repeatedly.") except Exception as exc: log.error("ssh exception %r", exc) # don't fail if SSHException or other paramiko exception, # eg related to network, but keep until all attempts are made if not attempts: raise ServiceUnavailableError(repr(exc))
def _on_tornado_response(resp): if resp.code != 200: log.error('Code %d on get_stats: %s', resp.code, resp.body) if resp.code == 400: raise BadRequestError() raise ServiceUnavailableError() return callback(self._on_stats_callback(json.loads(resp.body)))
def show_tag_keys(measurement=None): """Return all tag keys.""" q = 'SHOW TAG KEYS' q += ' FROM %s' % measurement if measurement is not None else '' url = '%(host)s/query?db=%(db)s' % INFLUX data = requests.get('%s&q=%s' % (url, q)) if not data.ok: log.error('Got %d on SHOW TAG KEY: %s', data.status_code, data.content) raise ServiceUnavailableError() tags = {} results = data.json().get('results', []) results = results[0] if results else {} for series in results.get('series', []): name = series['name'] if not series['values']: continue if name not in tags: tags[name] = [] for value in series['values']: value = value[0] if value not in ( 'host', 'machine_id', ): tags[name].append(value) return tags
def _on_response_callback(response, tornado_async=False): """HTTP Response-handling callback. This method is meant to return HTTP Response objects generated either in a Tornado or synchronous execution context. Arguments: - response: HTTP Response object. - tornado_async: Denotes if a Tornado-safe HTTP request was issued. """ if tornado_async: if response.code != 200: log.error( 'Error on Elasticsearch query in tornado_async mode. ' 'Got %d status code: %s', response.code, response.body) if response.code == 400: raise BadRequestError() if response.code == 404: raise NotFoundError() if response.code == 429: raise RateLimitError() raise ServiceUnavailableError() response = json.loads(response.body) return response
def _get_multimachine_stats(owner, metric, start='', stop='', step='', uuids=None): if not uuids: uuids = [machine.id for machine in Machine.objects( cloud__in=Cloud.objects(owner=owner, deleted=None), monitoring__hasmonitoring=True )] if not uuids: raise NotFoundError("No machine has monitoring enabled.") try: data = get_multi_uuid(uuids, metric, start=start, stop=stop, interval_str=step) except Exception as exc: log.error("Error getting %s: %r", metric, exc) raise ServiceUnavailableError() ret = {} for item in data: target = item['target'].split('.') if len(target) > 1: uuid = target[1] else: uuid = target[0] item['name'] = uuid ret[uuid] = item return ret
def _query_influxdb(query, owner_id): # Prepare base URL. url = '%s/query?db=metering' % config.INFLUX['host'] # Request metering info. results = requests.get('%s&q=%s' % (url, query)) if not results.ok: log.error('Failed to execute query "%s": %s', query, results.content) if results.status_code == 400: raise BadRequestError() raise ServiceUnavailableError() # Get the `results` key. If the query is valid, this shouldn't fail. Even # if InfluxDB returns an empty response, the `results` key should be there. results = results.json() results = results['results'] try: series_list = results[0]['series'] except IndexError: # raise BadRequestError('Failed to parse results: %s' % results) log.error('Failed to parse results: %s', results) series_list = [] except KeyError: series_list = [] else: if owner_id and len(series_list) > 1: raise BadRequestError("Got multiple series for single owner.") return series_list
def find_metrics(machine): if not machine.monitoring.hasmonitoring: raise ForbiddenError("Machine doesn't have monitoring enabled.") try: data = requests.get("%s/v1/resources/%s" % (config.TSFDB_URI, machine.id), headers={'x-org-id': machine.owner.id}, timeout=5) except Exception as exc: log.error('Got %r on find_metrics for resource %s' % (exc, machine.id)) raise ServiceUnavailableError() if not data.ok: log.error('Got %d on find_metrics: %s', data.status_code, data.content) raise ServiceUnavailableError() return data.json().get("metrics", {})
def show_fields(measurement=None): """Return field keys and their respective values, including tags.""" # Make sure measurement names are inside quotes to escape special # characters, such as "." or "-". if isinstance(measurement, list): measurement = ','.join(['"%s"' % m for m in measurement]) elif measurement: measurement = '"%s"' % measurement q = 'SHOW FIELD KEYS' q += ' FROM %s' % measurement if measurement else '' url = '%(host)s/query?db=%(db)s' % INFLUX data = requests.get('%s&q=%s' % (url, q)) if not data.ok: log.error('Got %d on SHOW FIELDS: %s', data.status_code, data.content) raise ServiceUnavailableError() fields = [] results = data.json().get('results', []) if results: tags = show_tags(measurement) results = results[0] else: results = {} for series in results.get('series', []): name = series['name'] for value in series['values']: # eg. value = [u'load1', u'float'] pairs = [] column = value[0] for key, values in tags[name].iteritems(): if key in ( 'host', 'machine_id', ): continue pairs += ['%s=%s' % (key, value) for value in values] if pairs: ids = ['%s.%s.%s' % (name, pair, column) for pair in pairs] else: ids = ['%s.%s' % (name, column)] for id in ids: fields.append({ 'id': id, 'name': '%s %s' % (name.upper(), column.replace('_', ' ')), 'column': column, 'measurement': name, 'max_value': None, 'min_value': None, 'priority': 0, 'unit': '', }) return fields
def search(self, start, stop, terms=None, query_string=''): """Query elasticsearch for documents within the given timeframe.""" query = self._get_query(start, stop, terms, query_string) try: return self._run_query(query) except eexc.NotFoundError as err: log.error('%s: %s', self.__class__.__name__, err.info) raise NotFoundError(err.error) except (eexc.RequestError, eexc.TransportError) as err: log.error('%s: %s', self.__class__.__name__, err.info) raise BadRequestError(err.error) except (eexc.ConnectionError, eexc.ConnectionTimeout) as err: log.error('%s: %s', self.__class__.__name__, err.info) raise ServiceUnavailableError(err.error)
def show_tag_values(key): """Return all tag values of the specified key.""" q = 'SHOW TAG VALUES WITH KEY = "%s"' % key url = '%(host)s/query?db=%(db)s' % INFLUX data = requests.get('%s&q=%s' % (url, q)) if not data.ok: log.error('Got %d on SHOW TAG VAL: %s', data.status_code, data.content) raise ServiceUnavailableError() tags = set() results = data.json().get('results', []) results = results[0] if results else {} for series in results.get('series', []): for value in series['values']: tags.add(value[1]) return list(tags)
def show_measurements(machine_id=None): """Return a list of measurements filtered by machine_id, if provided.""" q = 'SHOW MEASUREMENTS' q += ' WHERE "machine_id" = \'%s\'' % machine_id if machine_id else '' url = '%(host)s/query?db=%(db)s' % INFLUX data = requests.get(url, params=dict(q=q)) if not data.ok: log.error('Got %d on SHOW MEASUR: %s', data.status_code, data.content) raise ServiceUnavailableError() measurements = set() results = data.json().get('results', []) if results: results = results[0] series = results.get('series', []) else: series = [] if series: values = series[0].get('values', []) measurements = set([value[0] for value in values]) return list(measurements)
def get_stats(self, metric, start=None, stop=None, step=None, callback=None, tornado_async=False): """Query InfluxDB for the given metric. This method, after parsing the metric given, is responsible for incrementally constructing the InfluxDB query. The provided metric should be in the form of: <measurement>.<tags>.<column> Also, metrics may be enclosed in nested function, such as: MEAN(system.load1) or even: DERIVATIVE(MEAN(net.bytes_sent)) """ # A list of functions, extracted from `metric` to be applied later on. functions = [] # Attempt to match nested functions in `metric` in order to extract the # actual metric and store any functions in `functions` so that they can # be re-applied later on. regex = r'^(\w+)\((.+)\)$' match = re.match(regex, metric) while match: groups = match.groups() metric = groups[1] functions.append(groups[0].upper()) match = re.match(regex, metric) # Get the measurement and requested column(s). Update tags. self.measurement, self.column, tags = self.parse_path(metric) # Construct query. q = 'SELECT %s' % self.column for function in functions: if function not in AGGREGATIONS | TRANSFORMATIONS: raise BadRequestError('Function %s not supported' % function) q = q.replace(self.column, '%s(%s)' % (function, self.column)) if functions and not re.match('^/.*/$', self.column): # Not for regex. q += ' AS %s' % self.column q += ' FROM "%s"' % self.measurement q = group(add_filter(q, tags, start, stop), self.group, step) if not tornado_async: data = requests.get(self.influx, params=dict(q=q)) log.warn('Query: %s' % q) if not data.ok: log.error('Got %d HTTP status code on get_stats: %s', data.status_code, data.content) log.error('Query: %s' % q) if data.status_code == 400: raise BadRequestError() raise ServiceUnavailableError() if callback is not None: return callback(self._on_stats_callback(data.json())) return self._on_stats_callback(data.json()) def _on_tornado_response(resp): if resp.code != 200: log.error('Code %d on get_stats: %s', resp.code, resp.body) if resp.code == 400: raise BadRequestError() raise ServiceUnavailableError() return callback(self._on_stats_callback(json.loads(resp.body))) AsyncHTTPClient().fetch(url_concat(self.influx, dict(q=q)), callback=_on_tornado_response)
def get_stats(machine, start="", stop="", step="", metrics=None): data = {} # If no metrics are specified, then we get all of them if not metrics: metrics = [ ('fetch(\"{id}.*\"' + ', start=\"{start}\", stop=\"{stop}\"' + ', step=\"{step}\")') ] for metric in metrics: query = metric.format(id=machine.id, start=start, stop=stop, step=step) try: raw_machine_data = requests.get( "%s/v1/datapoints?query=%s" % (config.TSFDB_URI, urllib.parse.quote(query)), headers={'x-org-id': machine.owner.id}, timeout=20) except Exception as exc: log.error('Got %r on get_stats for resource %s' % (exc, machine.id)) raise ServiceUnavailableError() if not raw_machine_data.ok: log.error('Got %d on get_stats: %s', raw_machine_data.status_code, raw_machine_data.content) raise ServiceUnavailableError() raw_machine_data = raw_machine_data.json() raw_metrics = list(raw_machine_data.get("series", {}).keys()) for raw_metric in raw_metrics: # We use as key the metric name without the machine id # e.g "id.system.load1 => system.load1" _, returned_metric = raw_metric.split(".", 1) data.update({ returned_metric: { "name": returned_metric, "datapoints": raw_machine_data["series"].get(raw_metric, []), } }) if not isinstance(machine, str): # set activated_at for collectd/telegraf installation status # if no data previously received for machine from mist.api.helpers import trigger_session_update from mist.api.rules.tasks import add_nodata_rule istatus = machine.monitoring.installation_status if not istatus.activated_at: for val in (point[0] for item in list(data.values()) for point in item['datapoints'] if point[1] >= istatus.started_at): if val is not None: if not istatus.finished_at: istatus.finished_at = time.time() istatus.activated_at = time.time() istatus.state = 'succeeded' machine.save() add_nodata_rule.delay(machine.owner.id) trigger_session_update(machine.owner, ['monitoring']) break return data
def get_events(auth_context, owner_id='', user_id='', event_type='', action='', limit=0, start=0, stop=0, newest=True, error=None, **kwargs): """Fetch logged events. This generator yields a series of logs after querying Elasticsearch. The initial query is extended with additional terms based on the inputs provided. Also, extra filtering may be applied in order to perform RBAC on the fly given the permissions granted to the requesting User. All Elasticsearch indices are in the form of <app|ui>-logs-<date>. """ # Restrict access to UI logs to Admins only. is_admin = auth_context and auth_context.user.role == 'Admin' # Attempt to enforce owner_id in case of non-Admins. if not is_admin and not owner_id: owner_id = auth_context.owner.id if auth_context else None # Construct base Elasticsearch query. index = "%s-logs-*" % ("*" if is_admin else "app") query = { "query": { "bool": { "filter": { "bool": { "must": [{ "range": { "@timestamp": { "gte": int(start * 1000), "lte": int(stop * 1000) or "now" } } }], "must_not": [] } } } }, "sort": [{ "@timestamp": { "order": ("desc" if newest else "asc") } }], "size": (limit or 50) } # Match action. if action: query["query"]["bool"]["filter"]["bool"]["must"].append( {"term": { 'action': action }}) # Fetch logs corresponding to the current Organization. if owner_id: query["query"]["bool"]["filter"]["bool"]["must"].append( {"term": { "owner_id": owner_id }}) # Match the user's ID, if provided. if user_id: query["query"]["bool"]["filter"]["bool"]["must"].append( {"term": { "user_id": user_id }}) # Specify whether to fetch stories that ended with an error. if error: query["query"]["bool"]["filter"]["bool"]["must_not"].append( {"term": { "error": False }}) elif error is False: query["query"]["bool"]["filter"]["bool"]["must"].append( {"term": { "error": False }}) # Perform a complex "Query String" Query that may span fields. if 'filter' in kwargs: f = kwargs.pop('filter') query_string = { 'query': f, 'analyze_wildcard': True, 'default_operator': 'and', 'allow_leading_wildcard': False } query["query"]["bool"]["filter"]["bool"]["must"].append( {'query_string': query_string}) # Extend query with additional kwargs. for key, value in kwargs.iteritems(): query["query"]["bool"]["filter"]["bool"]["must"].append( {"term": { key: value }}) # Apply RBAC for non-Owners. if auth_context and not auth_context.is_owner(): filter_logs(auth_context, query) # Query Elasticsearch. try: result = es().search(index=index, doc_type=event_type, body=query) except eexc.NotFoundError as err: log.error('Error %s during ES query: %s', err.status_code, err.info) raise NotFoundError(err.error) except (eexc.RequestError, eexc.TransportError) as err: log.error('Error %s during ES query: %s', err.status_code, err.info) raise BadRequestError(err.error) except (eexc.ConnectionError, eexc.ConnectionTimeout) as err: log.error('Error %s during ES query: %s', err.status_code, err.info) raise ServiceUnavailableError(err.error) for hit in result['hits']['hits']: event = hit['_source'] if not event.get('action'): log.error('Skipped event %s, missing action', event['log_id']) continue try: extra = json.loads(event.pop('extra')) except Exception as exc: log.error('Failed to parse extra of event %s: %r', event, exc) else: for key, value in extra.iteritems(): event[key] = value if event.get('su') and not is_admin: continue yield event
def get_usage(owner_id='', full_days=6): """Request metering data If no owner_id is specified, then sum for all owners. """ assert isinstance(full_days, int) # Get the start of the samples' range. now = datetime.datetime.utcnow() today = datetime.datetime(year=now.year, month=now.month, day=now.day) start = today - datetime.timedelta(days=full_days) # Prepare base URL. url = '%s/query?db=metering' % config.INFLUX['host'] # Prepare query. query = "SELECT" query += " MAX(cores) AS cores," query += " NON_NEGATIVE_DERIVATIVE(MAX(checks)) AS checks," query += " NON_NEGATIVE_DERIVATIVE(MAX(datapoints)) AS datapoints, " query += " MAX(cost) AS cost " query += "FROM usage" query += " WHERE time >= '%s'" % start.isoformat(sep=' ') if owner_id: query += " AND owner = '%s' " % owner_id query += "GROUP BY time(1d)" if not owner_id: query += ",owner" # Request metering info. results = requests.get('%s&q=%s' % (url, query)) if not results.ok: log.error('Failed to execute query "%s": %s', query, results.content) if results.status_code == 400: raise BadRequestError() raise ServiceUnavailableError() try: results = results.json() series_list = results['results'][0]['series'] except (KeyError, IndexError): log.error('Failed to execute: %s', query) raise BadRequestError('Failed to parse results: %s' % results) if owner_id and len(series_list) > 1: raise BadRequestError("Got multiple series for single owner.") data = {} for series in series_list: for value in series.get('values', []): usage = {k: v for k, v in zip(series['columns'], value)} date = usage.pop('time') if date not in data: data[date] = usage else: for k, v in usage.items(): if k not in data[date] or data[date][k] is None: data[date][k] = v elif v is not None: data[date][k] += v return [{ 'date': d, 'cost': data[d].pop('cost', 0), 'usage': data[d] } for d in sorted(data)]