def disable(self): self.cloud.enabled = False self.cloud.save() # FIXME: Circular dependency. from mist.api.machines.models import Machine Machine.objects( cloud=self.cloud, missing_since=None).update(missing_since=datetime.datetime.now())
def add(self, fail_on_error=True, fail_on_invalid_params=True, **kwargs): """Add new Cloud to the database This is only expected to be called by `Cloud.add` classmethod to create a cloud. Fields `owner` and `title` are already populated in `self.cloud`. The `self.cloud` model is not yet saved. Params: fail_on_error: If True, then a connection to the cloud will be established and if it fails, a `CloudUnavailableError` or `CloudUnauthorizedError` will be raised and the cloud will be deleted. fail_on_invalid_params: If True, then invalid keys in `kwargs` will raise an Error. Subclasses SHOULD NOT override or extend this method. If a subclass has to perform special parsing of `kwargs`, it can override `self._add__preparse_kwargs`. """ # Transform params with extra underscores for compatibility. rename_kwargs(kwargs, 'api_key', 'apikey') rename_kwargs(kwargs, 'api_secret', 'apisecret') # Cloud specific argument preparsing cloud-wide argument self.cloud.dns_enabled = kwargs.pop('dns_enabled', False) is True self.cloud.observation_logs_enabled = True self.cloud.polling_interval = kwargs.pop('polling_interval', 30 * 60) # Cloud specific kwargs preparsing. try: self._add__preparse_kwargs(kwargs) except MistError as exc: log.error("Error while adding cloud %s: %r", self.cloud, exc) raise except Exception as exc: log.exception("Error while preparsing kwargs on add %s", self.cloud) raise InternalServerError(exc=exc) try: self.update(fail_on_error=fail_on_error, fail_on_invalid_params=fail_on_invalid_params, **kwargs) except (CloudUnavailableError, CloudUnauthorizedError) as exc: # FIXME: Move this to top of the file once Machine model is # migrated. The import statement is currently here to avoid # circular import issues. from mist.api.machines.models import Machine # Remove any machines created from check_connection performing a # list_machines. Machine.objects(cloud=self.cloud).delete() # Propagate original error. raise # Add relevant polling schedules. self.add_polling_schedules()
def dockerhost(self): """This is a helper method to get the machine representing the host""" if self._dockerhost is not None: return self._dockerhost try: # Find dockerhost from database. machine = Machine.objects.get(cloud=self.cloud, machine_type='container-host') except Machine.DoesNotExist: try: # Find dockerhost with previous format from database. machine = Machine.objects.get( cloud=self.cloud, # Nested query. Trailing underscores to avoid conflict # with mongo's $type operator. See: # https://github.com/MongoEngine/mongoengine/issues/1410 **{'extra__tags__type__': 'docker_host'}) except Machine.DoesNotExist: # Create dockerrhost machine. machine = Machine(cloud=self.cloud, machine_type='container-host') # Update dockerhost machine model fields. changed = False for attr, val in { 'name': self.cloud.title, 'hostname': self.cloud.host, 'machine_type': 'container-host' }.iteritems(): if getattr(machine, attr) != val: setattr(machine, attr, val) changed = True if not machine.machine_id: machine.machine_id = machine.id changed = True try: ip_addr = socket.gethostbyname(machine.hostname) except socket.gaierror: pass else: is_private = netaddr.IPAddress(ip_addr).is_private() ips = machine.private_ips if is_private else machine.public_ips if ip_addr not in ips: ips.insert(0, ip_addr) changed = True if changed: machine.save() self._dockerhost = machine return machine
def delete(self, expire=False): """Delete a Cloud. By default the corresponding mongodb document is not actually deleted, but rather marked as deleted. :param expire: if True, the document is expired from its collection. """ self.cloud.deleted = datetime.datetime.utcnow() self.cloud.save() if expire: # FIXME: Circular dependency. from mist.api.machines.models import Machine Machine.objects(cloud=self.cloud).delete() self.cloud.delete()
def _get_multimachine_stats(owner, metric, start='', stop='', step='', uuids=None): if not uuids: uuids = [machine.id for machine in Machine.objects( cloud__in=Cloud.objects(owner=owner, deleted=None), monitoring__hasmonitoring=True )] if not uuids: raise NotFoundError("No machine has monitoring enabled.") try: data = get_multi_uuid(uuids, metric, start=start, stop=stop, interval_str=step) except Exception as exc: log.error("Error getting %s: %r", metric, exc) raise ServiceUnavailableError() ret = {} for item in data: target = item['target'].split('.') if len(target) > 1: uuid = target[1] else: uuid = target[0] item['name'] = uuid ret[uuid] = item return ret
def get_load(owner, start='', stop='', step='', uuids=None): """Get shortterm load for all monitored machines.""" clouds = Cloud.objects(owner=owner, deleted=None).only('id') machines = Machine.objects(cloud__in=clouds, monitoring__hasmonitoring=True) if uuids: machines.filter(id__in=uuids) graphite_uuids = [machine.id for machine in machines if machine.monitoring.method.endswith('-graphite')] influx_uuids = [machine.id for machine in machines if machine.monitoring.method.endswith('-influxdb')] graphite_data = {} influx_data = {} if graphite_uuids: graphite_data = graphite_get_load(owner, start=start, stop=stop, step=step, uuids=graphite_uuids) if influx_uuids: # Transform "min" and "sec" to "m" and "s", respectively. _start, _stop, _step = [re.sub('in|ec', repl='', string=x) for x in ( start.strip('-'), stop.strip('-'), step)] influx_data = InfluxMultiLoadHandler(influx_uuids).get_stats( metric='system.load1', start=_start, stop=_stop, step=_step, ) if graphite_data or influx_data: return dict(list(graphite_data.items()) + list(influx_data.items())) else: raise NotFoundError('No machine has monitoring enabled')
def _list_vnfs(self, host=None): from mist.api.machines.models import Machine from mist.api.clouds.models import CloudLocation if not host: hosts = Machine.objects( cloud=self.cloud, parent=None, missing_since=None) else: hosts = [host] vnfs = [] for host in hosts: # TODO: asyncio driver = self.cloud.ctl.compute._get_host_driver(host) host_vnfs = driver.ex_list_vnfs() try: location = CloudLocation.objects.get(cloud=self.cloud, name=host.name) except CloudLocation.DoesNotExist: host_name = host.name.replace('.', '-') try: location = CloudLocation.objects.get(cloud=self.cloud, external_id=host_name) except CloudLocation.DoesNotExist: location = None except Exception as e: log.error(e) location = None for vnf in host_vnfs: vnf['location'] = location.id vnfs += host_vnfs return vnfs
def set_missing(): """Declare machines, whose cloud has been marked as deleted, as missing""" failed = succeeded = 0 clouds = Cloud.objects(deleted__ne=None) print print 'Searching through %d clouds' % clouds.count() print for c in clouds: try: print 'Updating machines of', c, updated = Machine.objects( cloud=c, missing_since=None).update(missing_since=c.deleted) except Exception: print '[ERROR]' traceback.print_exc() failed += 1 else: print '[OK:%s]' % updated succeeded += 1 print print 'Failed:', failed print 'Succeeded:', succeeded print print 'Completed %s' % ('with errors!' if failed else 'successfully!') print
def remove_string_field_type(): c = MongoClient(MONGO_URI) db = c.get_database('mist2') db_machines = db['machines'] machines = Machine.objects().only('id') print print 'Removing size field from %d migrated machines' % db_machines.count() print failed = migrated = 0 for machine in machines: try: print 'Updating machine %s ...' % machine['id'], db_machines.update_one({'_id': machine['id']}, {'$unset': { 'size': '' }}) except Exception: traceback.print_exc() failed += 1 continue else: print 'OK' migrated += 1 print 'migrated: %d' % migrated c.close()
def add(self, fail_on_error=True, fail_on_invalid_params=True, **kwargs): """This is a hack to associate a key with the VM hosting this cloud""" super(LibvirtMainController, self).add( fail_on_error=fail_on_error, fail_on_invalid_params=fail_on_invalid_params, add=True, **kwargs ) # FIXME: Don't use self.cloud.host as machine_id, this prevents us from # changing the cloud's host. # FIXME: Add type field to differentiate between actual vm's and the # host. from mist.api.machines.models import Machine host_machine_id = self.cloud.host.replace('.', '-') try: machine = Machine.objects.get( cloud=self.cloud, machine_id=host_machine_id) except me.DoesNotExist: machine = Machine(cloud=self.cloud, name=self.cloud.name, machine_id=host_machine_id).save() if self.cloud.key: machine.ctl.associate_key(self.cloud.key, username=self.cloud.username, port=self.cloud.port)
def list_keys(owner): """List owner's keys :param owner: :return: """ keys = Key.objects(owner=owner, deleted=None) clouds = Cloud.objects(owner=owner, deleted=None) key_objects = [] # FIXME: This must be taken care of in Keys.as_dict for key in keys: key_object = {} # FIXME: Need to optimize this! It's potentially invoked per ssh probe. # Can't we expose associations directly from Machine.key_associations? machines = Machine.objects(cloud__in=clouds, key_associations__keypair__exact=key) key_object["id"] = key.id key_object['name'] = key.name key_object['owned_by'] = key.owned_by.id if key.owned_by else '' key_object['created_by'] = key.created_by.id if key.created_by else '' key_object["isDefault"] = key.default key_object["machines"] = transform_key_machine_associations( machines, key) key_object['tags'] = get_tags_for_resource(owner, key) key_objects.append(key_object) return key_objects
def disassociate_key(request): """ Disassociate a key from a machine Disassociates a key from a machine. If host is set it will also attempt to actually remove it from the machine. READ permission required on cloud. DISASSOCIATE_KEY permission required on machine. --- key: in: path required: true type: string machine: in: path required: true type: string """ key_id = request.matchdict['key'] cloud_id = request.matchdict.get('cloud') auth_context = auth_context_from_request(request) if cloud_id: # this is depracated, keep it for backwards compatibility machine_id = request.matchdict['machine'] try: Cloud.objects.get(owner=auth_context.owner, id=cloud_id, deleted=None) except Cloud.DoesNotExist: raise NotFoundError('Cloud does not exist') auth_context.check_perm("cloud", "read", cloud_id) try: machine = Machine.objects.get(cloud=cloud_id, machine_id=machine_id, state__ne='terminated') except Machine.DoesNotExist: raise NotFoundError("Machine %s doesn't exist" % machine_id) else: machine_uuid = request.matchdict['machine'] try: machine = Machine.objects.get(id=machine_uuid, state__ne='terminated') except Machine.DoesNotExist: raise NotFoundError("Machine %s doesn't exist" % machine_uuid) cloud_id = machine.cloud.id auth_context.check_perm("cloud", "read", cloud_id) auth_context.check_perm("machine", "disassociate_key", machine.id) key = Key.objects.get(owner=auth_context.owner, id=key_id, deleted=None) key.ctl.disassociate(machine) clouds = Cloud.objects(owner=auth_context.owner, deleted=None) machines = Machine.objects(cloud__in=clouds, key_associations__keypair__exact=key) assoc_machines = transform_key_machine_associations(machines, key) return assoc_machines
def _gen_config(): """Generate traefik config from scratch for all machines""" cfg = {'frontends': {}, 'backends': {}} for machine in Machine.objects(monitoring__hasmonitoring=True, ): frontend, backend = _gen_machine_config(machine) cfg['frontends'][machine.id] = frontend cfg['backends'][machine.id] = backend return cfg
def check_monitoring(owner): """Return the monitored machines, enabled metrics, and user details.""" custom_metrics = owner.get_metrics_dict() for metric in custom_metrics.values(): metric['machines'] = [] monitored_machines = [] monitored_machines_2 = {} clouds = Cloud.objects(owner=owner, deleted=None) machines = Machine.objects(cloud__in=clouds, monitoring__hasmonitoring=True) for machine in machines: monitored_machines.append([machine.cloud.id, machine.machine_id]) try: commands = machine.monitoring.get_commands() except Exception as exc: log.error(exc) commands = {} monitored_machines_2[machine.id] = { 'cloud_id': machine.cloud.id, 'machine_id': machine.machine_id, 'installation_status': machine.monitoring.installation_status.as_dict(), 'commands': commands, } for metric_id in machine.monitoring.metrics: if metric_id in custom_metrics: machines = custom_metrics[metric_id]['machines'] machines.append((machine.cloud.id, machine.machine_id)) ret = { 'machines': monitored_machines, 'monitored_machines': monitored_machines_2, 'rules': owner.get_rules_dict(), 'alerts_email': owner.alerts_email, 'custom_metrics': custom_metrics, } if config.DEFAULT_MONITORING_METHOD.endswith('graphite'): ret.update({ # Keep for backwards compatibility 'builtin_metrics': config.GRAPHITE_BUILTIN_METRICS, 'builtin_metrics_graphite': config.GRAPHITE_BUILTIN_METRICS, 'builtin_metrics_influxdb': config.INFLUXDB_BUILTIN_METRICS, }) elif config.DEFAULT_MONITORING_METHOD.endswith('influxdb'): ret.update({ # Keep for backwards compatibility 'builtin_metrics': config.INFLUXDB_BUILTIN_METRICS, 'builtin_metrics_influxdb': config.INFLUXDB_BUILTIN_METRICS, }) for key in ('rules', 'builtin_metrics', 'custom_metrics'): for id in ret[key]: ret[key][id]['id'] = id return ret
def push_metering_info(owner_id): """Collect and push new metering data to InfluxDB""" now = datetime.datetime.utcnow() metering = {} # Base InfluxDB URL. url = config.INFLUX['host'] # Create database for storing metering data, if missing. db = requests.post('%s/query?q=CREATE DATABASE metering' % url) if not db.ok: raise Exception(db.content) # CPUs for machine in Machine.objects(owner=owner_id, last_seen__gte=now.date()): metering.setdefault( owner_id, dict.fromkeys(('cores', 'checks', 'datapoints'), 0) ) try: if _skip_metering(machine): continue metering[owner_id]['cores'] += machine.cores or 0 except Exception as exc: log.error('Failed upon cores metering of %s: %r', machine.id, exc) # Checks for rule in Rule.objects(owner_id=owner_id): try: metering[rule.owner_id]['checks'] += rule.total_check_count except Exception as exc: log.error('Failed upon checks metering of %s: %r', rule.id, exc) # Datapoints try: q = "SELECT MAX(counter) FROM datapoints " q += "WHERE owner = '%s' AND time >= now() - 30m" % owner_id q += " GROUP BY machine" result = requests.get('%s/query?db=metering&q=%s' % (url, q)).json() result = result['results'][0]['series'] for series in result: metering[owner_id]['datapoints'] += series['values'][0][-1] except Exception as exc: log.error('Failed upon datapoints metering: %r', exc) # Assemble points. points = [] for owner, counters in metering.iteritems(): value = ','.join(['%s=%s' % (k, v) for k, v in counters.iteritems()]) point = 'usage,owner=%s %s' % (owner, value) points.append(point) # Write metering data. data = '\n'.join(points) write = requests.post('%s/write?db=metering&precision=s' % url, data=data) if not write.ok: log.error('Failed to write metering data: %s', write.text)
def _list_networks__fetch_networks(self): from mist.api.machines.models import Machine hosts = Machine.objects(cloud=self.cloud, parent=None, missing_since=None) loop = asyncio.get_event_loop() all_nets = loop.run_until_complete( self.list_networks_all_hosts(hosts, loop)) return [net for host_nets in all_nets for net in host_nets]
def get_load(owner, start="", stop="", step="", uuids=None): """Get shortterm load for all monitored machines.""" clouds = Cloud.objects(owner=owner, deleted=None).only("id") machines = Machine.objects(cloud__in=clouds, monitoring__hasmonitoring=True) if uuids: machines.filter(id__in=uuids) graphite_uuids = [ machine.id for machine in machines if machine.monitoring.method.endswith("-graphite") ] influx_uuids = [ machine.id for machine in machines if machine.monitoring.method.endswith("-influxdb") ] fdb_uuids = [ machine.id for machine in machines if machine.monitoring.method.endswith("-tsfdb") ] graphite_data = {} influx_data = {} fdb_data = {} if graphite_uuids: graphite_data = graphite_get_load(owner, start=start, stop=stop, step=step, uuids=graphite_uuids) if influx_uuids: # Transform "min" and "sec" to "m" and "s", respectively. _start, _stop, _step = [ re.sub("in|ec", repl="", string=x) for x in (start.strip("-"), stop.strip("-"), step) ] metric = "system.load1" if step: metric = "MEAN(%s)" % metric influx_data = InfluxMultiLoadHandler(influx_uuids).get_stats( metric=metric, start=_start, stop=_stop, step=_step, ) if fdb_uuids: fdb_data = fdb_get_load(owner, fdb_uuids, start, stop, step) if graphite_data or influx_data or fdb_data: return dict( list(graphite_data.items()) + list(influx_data.items()) + list(fdb_data.items())) else: raise NotFoundError("No machine has monitoring enabled")
def delete(self, expire=False): """Delete a Cloud. By default the corresponding mongodb document is not actually deleted, but rather marked as deleted. :param expire: if True, the document is expired from its collection. """ if expire: # FIXME: Set reverse_delete_rule=me.CASCADE? from mist.api.machines.models import Machine Machine.objects(cloud=self.cloud).delete() self.cloud.delete() else: from mist.api.tasks import set_missing_since self.cloud.deleted = datetime.datetime.utcnow() self.cloud.save() set_missing_since.apply_async((self.cloud.id, ), countdown=30)
def add_key(request): """ Tags: keys --- Adds key. ADD permission required on key. --- name: description: The key's name required: true type: string priv: description: The private key required: true type: string certificate: description: The signed public key, when using signed ssh keys type: string """ params = params_from_request(request) key_name = params.pop('name', None) private_key = params.get('priv', None) certificate = params.get('certificate', None) auth_context = auth_context_from_request(request) key_tags = auth_context.check_perm("key", "add", None) if not key_name: raise BadRequestError("Key name is not provided") if not private_key: raise RequiredParameterMissingError("Private key is not provided") if certificate: key = SignedSSHKey.add(auth_context.owner, key_name, **params) else: key = SSHKey.add(auth_context.owner, key_name, **params) # Set ownership. key.assign_to(auth_context.user) if key_tags: add_tags_to_resource(auth_context.owner, key, key_tags.items()) # since its a new key machines fields should be an empty list clouds = Cloud.objects(owner=auth_context.owner, deleted=None) machines = Machine.objects(cloud__in=clouds, key_associations__keypair__exact=key) assoc_machines = transform_key_machine_associations(machines, key) return { 'id': key.id, 'name': key.name, 'machines': assoc_machines, 'isDefault': key.default }
def get_load(request): """ Tags: monitoring --- Request load data for all monitored machines --- start: in: query type: string default: now required: false description: time (eg. '10s') since when to fetch stats stop: in: query type: string required: false description: time until when to fetch stats step: in: query type: string required: false description: step to fetch stats, used in aggregations request_id: in: query type: string required: false """ auth_context = auth_context_from_request(request) cloud_ids = [ cloud['id'] for cloud in filter_list_clouds(auth_context) if cloud['enabled'] ] uuids = [ machine.id for machine in Machine.objects( cloud__in=cloud_ids, monitoring__hasmonitoring=True, ).only('id') ] if not auth_context.is_owner(): allowed_uuids = auth_context.get_allowed_resources(rtype='machines') uuids = set(uuids) & set(allowed_uuids) params = params_from_request(request) start = params.get('start', '') stop = params.get('stop', '') step = params.get('step', '') data = mist.api.monitoring.methods.get_load(auth_context.owner, start=start, stop=stop, step=step, uuids=uuids) data['request_id'] = params.get('request_id') return data
def _gen_config(): """Generate traefik config from scratch for all machines""" cfg = {'frontends': {}, 'backends': {}} for machine in Machine.objects( monitoring__hasmonitoring=True, monitoring__method__in=['telegraf-graphite', 'telegraf-influxdb'], ): frontend, backend = _gen_machine_config(machine) cfg['frontends'][machine.id] = frontend cfg['backends'][machine.id] = backend return cfg
def list_cached_machines(self, timedelta=datetime.timedelta(days=1)): """Return list of machines from database Only returns machines that existed last time we check and we've seen during the last `timedelta`. """ return Machine.objects( cloud=self.cloud, missing_since=None, last_seen__gt=datetime.datetime.utcnow() - timedelta, )
def migrate_libvirt_clouds(): c = MongoClient(MONGO_URI) db = c.get_database('mist2') db_clouds = db['clouds'] clouds = LibvirtCloud.objects() failed = migrated = skipped = 0 for cloud in clouds: try: machines = Machine.objects(cloud=cloud, missing_since=None) images_location = db_clouds.find_one( {'_id': cloud['id']}).get('images_location') if not images_location: skipped += 1 continue print('Updating cloud ' + cloud['id']) for machine in machines: if machine.extra.get('tags', {}).get('type') == 'hypervisor': updated_extra = { 'images_location': images_location, } machine.extra.update(updated_extra) machine.save() break db_clouds.update_one( {'_id': cloud['id']}, {'$unset': {'host': '', 'username': '', 'port': '', 'key': '', 'images_location': ''}} ) cloud.ctl.compute.list_machines() except Exception: traceback.print_exc() failed += 1 continue else: print('OK') migrated += 1 print('Clouds migrated: %d' % migrated) if skipped: print('Skipped: %d' % skipped) c.close()
def disable_monitoring_cloud(owner, cloud_id, no_ssh=False): """Disable monitoring for all machines of the specified Cloud.""" try: cloud = Cloud.objects.get(owner=owner, id=cloud_id, deleted=None) machines = Machine.objects( cloud=cloud, monitoring__hasmonitoring=True).only('machine_id') except me.DoesNotExist: raise NotFoundError("Cloud doesn't exist") for machine in machines: try: disable_monitoring(owner, cloud_id, machine.machine_id, no_ssh=no_ssh) except Exception as exc: log.error("Error while disabling monitoring for all machines of " "Cloud %s (%s): %s", cloud.id, owner.id, exc)
def list_clouds(self): if config.ACTIVATE_POLLER: self.update_poller() self.send('list_clouds', filter_list_clouds(self.auth_context)) clouds = Cloud.objects(owner=self.owner, enabled=True, deleted=None) log.info(clouds) periodic_tasks = [] if not config.ACTIVATE_POLLER: periodic_tasks.append(('list_machines', tasks.ListMachines())) else: for cloud in clouds: after = datetime.datetime.utcnow() - datetime.timedelta(days=1) machines = Machine.objects(cloud=cloud, missing_since=None, last_seen__gt=after) machines = filter_list_machines( self.auth_context, cloud_id=cloud.id, machines=[machine.as_dict() for machine in machines] ) if machines: log.info("Emitting list_machines from poller's cache.") self.send('list_machines', {'cloud_id': cloud.id, 'machines': machines}) periodic_tasks.extend([('list_images', tasks.ListImages()), ('list_sizes', tasks.ListSizes()), ('list_networks', tasks.ListNetworks()), ('list_zones', tasks.ListZones()), ('list_locations', tasks.ListLocations()), ('list_projects', tasks.ListProjects())]) for key, task in periodic_tasks: for cloud in clouds: cached = task.smart_delay(self.owner.id, cloud.id) if cached is not None: log.info("Emitting %s from cache", key) if key == 'list_machines': cached['machines'] = filter_list_machines( self.auth_context, **cached ) if cached['machines'] is None: continue self.send(key, cached)
def list_keys(owner): """List owner's keys :param owner: :return: """ keys = Key.objects(owner=owner, deleted=None) clouds = Cloud.objects(owner=owner, deleted=None) key_objects = [] # FIXME: This must be taken care of in Keys.as_dict for key in keys: key_object = {} machines = Machine.objects(cloud__in=clouds, key_associations__keypair__exact=key) key_object["id"] = key.id key_object['name'] = key.name key_object["isDefault"] = key.default key_object["machines"] = transform_key_machine_associations( machines, key) key_object['tags'] = get_tags_for_resource(owner, key) key_objects.append(key_object) return key_objects
def load(self, machines=None): self.hosts = {} self.keys = {} if not machines: clouds = Cloud.objects(owner=self.owner, deleted=None) machines = [(machine.cloud.id, machine.machine_id) for machine in Machine.objects(cloud__in=clouds)] for bid, mid in machines: try: name, ip_addr = self.find_machine_details(bid, mid) key_id, ssh_user, port = self.find_ssh_settings(bid, mid) except Exception as exc: print exc continue ip_addr, port = dnat(self.owner, ip_addr, port) if key_id not in self.keys: keypair = SSHKey.objects.get(owner=self.owner, name=key_id, deleted=None) self.keys[key_id] = keypair.private if isinstance(keypair, SignedSSHKey): # if signed ssh key, provide the key appending a -cert.pub # on the name since this is how ssh will include it as # an identify file self.keys['%s-cert.pub' % key_id] = keypair.certificate # pub key also needed for openssh 7.2 self.keys['%s.pub' % key_id] = keypair.public if name in self.hosts: num = 2 while ('%s-%d' % (name, num)) in self.hosts: num += 1 name = '%s-%d' % (name, num) self.hosts[name] = { 'ansible_ssh_host': ip_addr, 'ansible_ssh_port': port, 'ansible_ssh_user': ssh_user, 'ansible_ssh_private_key_file': 'id_rsa/%s' % key_id, }
def list_machines(self): """Return list of machines for cloud A list of nodes is fetched from libcloud, the data is processed, stored on machine models, and a list of machine models is returned. Subclasses SHOULD NOT override or extend this method. There are instead a number of methods that are called from this method, to allow subclasses to modify the data according to the specific of their cloud type. These methods currently are: `self._list_machines__fetch_machines` `self._list_machines__machine_actions` `self._list_machines__postparse_machine` `self._list_machines__cost_machine` `self._list_machines__fetch_generic_machines` Subclasses that require special handling should override these, by default, dummy methods. """ # Try to query list of machines from provider API. try: nodes = self._list_machines__fetch_machines() log.info("List nodes returned %d results for %s.", len(nodes), self.cloud) except InvalidCredsError as exc: log.warning("Invalid creds on running list_nodes on %s: %s", self.cloud, exc) raise CloudUnauthorizedError(msg=exc.message) except ssl.SSLError as exc: log.error("SSLError on running list_nodes on %s: %s", self.cloud, exc) raise SSLError(exc=exc) except Exception as exc: log.exception("Error while running list_nodes on %s", self.cloud) raise CloudUnavailableError(exc=exc) machines = [] now = datetime.datetime.utcnow() # Process each machine in returned list. # Store previously unseen machines separately. new_machines = [] for node in nodes: # Fetch machine mongoengine model from db, or initialize one. try: machine = Machine.objects.get(cloud=self.cloud, machine_id=node.id) except Machine.DoesNotExist: machine = Machine(cloud=self.cloud, machine_id=node.id).save() new_machines.append(machine) # Update machine_model's last_seen fields. machine.last_seen = now machine.missing_since = None # Get misc libcloud metadata. image_id = str(node.image or node.extra.get('imageId') or node.extra.get('image_id') or node.extra.get('image') or '') size = (node.size or node.extra.get('flavorId') or node.extra.get('instancetype')) machine.name = node.name machine.image_id = image_id machine.size = size machine.state = config.STATES[node.state] machine.private_ips = node.private_ips machine.public_ips = node.public_ips # Set machine extra dict. # Make sure we don't meet any surprises when we try to json encode # later on in the HTTP response. extra = self._list_machines__get_machine_extra(machine, node) for key, val in extra.items(): try: json.dumps(val) except TypeError: extra[key] = str(val) machine.extra = extra # Set machine hostname if machine.extra.get('dns_name'): machine.hostname = machine.extra['dns_name'] else: ips = machine.public_ips + machine.private_ips if not ips: ips = [] for ip in ips: if ip and ':' not in ip: machine.hostname = ip break # Get machine tags from db tags = {tag.key: tag.value for tag in Tag.objects( owner=self.cloud.owner, resource=machine, ).only('key', 'value')} # Get machine creation date. try: created = self._list_machines__machine_creation_date(machine, node) if created: machine.created = get_datetime(created) except Exception as exc: log.exception("Error finding creation date for %s in %s.", self.cloud, machine) # TODO: Consider if we should fall back to using current date. # if not machine_model.created: # machine_model.created = datetime.datetime.utcnow() # Update with available machine actions. try: self._list_machines__machine_actions(machine, node) except Exception as exc: log.exception("Error while finding machine actions " "for machine %s:%s for %s", machine.id, node.name, self.cloud) # Apply any cloud/provider specific post processing. try: self._list_machines__postparse_machine(machine, node) except Exception as exc: log.exception("Error while post parsing machine %s:%s for %s", machine.id, node.name, self.cloud) # Apply any cloud/provider cost reporting. try: def parse_num(num): try: return float(num or 0) except (ValueError, TypeError): log.warning("Can't parse %r as float.", num) return 0 month_days = calendar.monthrange(now.year, now.month)[1] cph = parse_num(tags.get('cost_per_hour')) cpm = parse_num(tags.get('cost_per_month')) if not (cph or cpm) or cph > 100 or cpm > 100 * 24 * 31: cph, cpm = map(parse_num, self._list_machines__cost_machine(machine, node)) if not cph: cph = float(cpm) / month_days / 24 elif not cpm: cpm = cph * 24 * month_days machine.cost.hourly = cph machine.cost.monthly = cpm except Exception as exc: log.exception("Error while calculating cost " "for machine %s:%s for %s", machine.id, node.name, self.cloud) if node.state.lower() == 'terminated': machine.cost.hourly = 0 machine.cost.monthly = 0 # Save all changes to machine model on the database. try: machine.save() except me.ValidationError as exc: log.error("Error adding %s: %s", machine.name, exc.to_dict()) raise BadRequestError({"msg": exc.message, "errors": exc.to_dict()}) except me.NotUniqueError as exc: log.error("Machine %s not unique error: %s", machine.name, exc) raise ConflictError("Machine with this name already exists") machines.append(machine) # Append generic-type machines, which aren't handled by libcloud. for machine in self._list_machines__fetch_generic_machines(): machine.last_seen = now machine.missing_since = None machine.state = config.STATES[NodeState.UNKNOWN] for action in ('start', 'stop', 'reboot', 'destroy', 'rename', 'resume', 'suspend', 'undefine'): setattr(machine.actions, action, False) machine.actions.tag = True # allow reboot action for bare metal with key associated if machine.key_associations: machine.actions.reboot = True machine.save() machines.append(machine) # Set last_seen on machine models we didn't see for the first time now. Machine.objects(cloud=self.cloud, id__nin=[m.id for m in machines], missing_since=None).update(missing_since=now) # Update RBAC Mappings given the list of nodes seen for the first time. self.cloud.owner.mapper.update(new_machines) # Update machine counts on cloud and org. # FIXME: resolve circular import issues from mist.api.clouds.models import Cloud self.cloud.machine_count = len(machines) self.cloud.save() self.cloud.owner.total_machine_count = sum( cloud.machine_count for cloud in Cloud.objects( owner=self.cloud.owner, deleted=None ).only('machine_count') ) self.cloud.owner.save() # Close libcloud connection try: self.disconnect() except Exception as exc: log.warning("Error while closing connection: %r", exc) return machines
def add_machine(self, name, host='', ssh_user='******', ssh_port=22, ssh_key=None, os_type='unix', rdp_port=3389, fail_on_error=True): """Add machine to this dummy Cloud This is a special method that exists only on this Cloud subclass. """ old_machines = [m.as_dict() for m in self.cloud.ctl.compute.list_cached_machines()] # FIXME: Move ssh command to Machine controller once it is migrated. from mist.api.methods import ssh_command try: ssh_port = int(ssh_port) except (ValueError, TypeError): ssh_port = 22 try: rdp_port = int(rdp_port) except (ValueError, TypeError): rdp_port = 3389 if ssh_key: ssh_key = Key.objects.get(owner=self.cloud.owner, id=ssh_key, deleted=None) from mist.api.machines.models import Machine # Create and save machine entry to database. machine = Machine( cloud=self.cloud, name=name, machine_id=uuid.uuid4().hex, os_type=os_type, ssh_port=ssh_port, rdp_port=rdp_port, last_seen=datetime.datetime.utcnow() ) if host: # Sanitize inputs. host = sanitize_host(host) check_host(host) machine.hostname = host if is_private_subnet(socket.gethostbyname(host)): machine.private_ips = [host] else: machine.public_ips = [host] machine.save(write_concern={'w': 1, 'fsync': True}) # Attempt to connect. if os_type == 'unix' and ssh_key: if not ssh_user: ssh_user = '******' # Try to connect. If it works, it will create the association. try: if not host: raise BadRequestError("You have specified an SSH key but " "machine hostname is empty.") to_tunnel(self.cloud.owner, host) # May raise VPNTunnelError ssh_command( self.cloud.owner, self.cloud.id, machine.id, host, 'uptime', key_id=ssh_key.id, username=ssh_user, port=ssh_port ) except MachineUnauthorizedError as exc: if fail_on_error: machine.delete() raise CloudUnauthorizedError(exc) except ServiceUnavailableError as exc: if fail_on_error: machine.delete() raise MistError("Couldn't connect to host '%s'." % host) except: if fail_on_error: machine.delete() raise if amqp_owner_listening(self.cloud.owner.id): new_machines = self.cloud.ctl.compute.list_cached_machines() self.cloud.ctl.compute.produce_and_publish_patch( old_machines, new_machines) return machine
def create_machine(owner, cloud_id, key_id, machine_name, location_id, image_id, size_id, image_extra, disk, image_name, size_name, location_name, ips, monitoring, networks=[], docker_env=[], docker_command=None, ssh_port=22, script='', script_id='', script_params='', job_id=None, job=None, docker_port_bindings={}, docker_exposed_ports={}, azure_port_bindings='', hostname='', plugins=None, disk_size=None, disk_path=None, post_script_id='', post_script_params='', cloud_init='', associate_floating_ip=False, associate_floating_ip_subnet=None, project_id=None, schedule={}, command=None, tags=None, bare_metal=False, hourly=True, softlayer_backend_vlan_id=None, size_ram=256, size_cpu=1, size_disk_primary=5, size_disk_swap=1, boot=True, build=True, cpu_priority=1, cpu_sockets=1, cpu_threads=1, port_speed=0, hypervisor_group_id=None): """Creates a new virtual machine on the specified cloud. If the cloud is Rackspace it attempts to deploy the node with an ssh key provided in config. the method used is the only one working in the old Rackspace cloud. create_node(), from libcloud.compute.base, with 'auth' kwarg doesn't do the trick. Didn't test if you can upload some ssh related files using the 'ex_files' kwarg from openstack 1.0 driver. In Linode creation is a bit different. There you can pass the key file directly during creation. The Linode API also requires to set a disk size and doesn't get it from size.id. So, send size.disk from the client and use it in all cases just to avoid provider checking. Finally, Linode API does not support association between a machine and the image it came from. We could set this, at least for machines created through mist.api in ex_comment, lroot or lconfig. lroot seems more appropriate. However, liblcoud doesn't support linode.config.list at the moment, so no way to get them. Also, it will create inconsistencies for machines created through mist.api and those from the Linode interface. """ # script: a command that is given once # script_id: id of a script that exists - for mist.core # script_params: extra params, for script_id # post_script_id: id of a script that exists - for mist.core. If script_id # or monitoring are supplied, this will run after both finish # post_script_params: extra params, for post_script_id log.info('Creating machine %s on cloud %s' % (machine_name, cloud_id)) cloud = Cloud.objects.get(owner=owner, id=cloud_id, deleted=None) conn = connect_provider(cloud) machine_name = machine_name_validator(conn.type, machine_name) key = None if key_id: key = Key.objects.get(owner=owner, id=key_id, deleted=None) # if key_id not provided, search for default key if conn.type not in [Provider.LIBVIRT, Provider.DOCKER, Provider.ONAPP]: if not key_id: key = Key.objects.get(owner=owner, default=True, deleted=None) key_id = key.name if key: private_key = key.private public_key = key.public.replace('\n', '') else: public_key = None size = NodeSize(size_id, name=size_name, ram='', disk=disk, bandwidth='', price='', driver=conn) image = NodeImage(image_id, name=image_name, extra=image_extra, driver=conn) location = NodeLocation(location_id, name=location_name, country='', driver=conn) if conn.type is Provider.DOCKER: if public_key: node = _create_machine_docker( conn, machine_name, image_id, '', public_key=public_key, docker_env=docker_env, docker_command=docker_command, docker_port_bindings=docker_port_bindings, docker_exposed_ports=docker_exposed_ports ) node_info = conn.inspect_node(node) try: ssh_port = int( node_info.extra[ 'network_settings']['Ports']['22/tcp'][0]['HostPort']) except: pass else: node = _create_machine_docker( conn, machine_name, image_id, script, docker_env=docker_env, docker_command=docker_command, docker_port_bindings=docker_port_bindings, docker_exposed_ports=docker_exposed_ports ) elif conn.type in [Provider.RACKSPACE_FIRST_GEN, Provider.RACKSPACE]: node = _create_machine_rackspace(conn, public_key, machine_name, image, size, location, user_data=cloud_init) elif conn.type in [Provider.OPENSTACK]: node = _create_machine_openstack(conn, private_key, public_key, machine_name, image, size, location, networks, cloud_init) elif conn.type in config.EC2_PROVIDERS and private_key: locations = conn.list_locations() for loc in locations: if loc.id == location_id: location = loc break node = _create_machine_ec2(conn, key_id, private_key, public_key, machine_name, image, size, location, cloud_init) elif conn.type is Provider.NEPHOSCALE: node = _create_machine_nephoscale(conn, key_id, private_key, public_key, machine_name, image, size, location, ips) elif conn.type is Provider.GCE: sizes = conn.list_sizes(location=location_name) for size in sizes: if size.id == size_id: size = size break node = _create_machine_gce(conn, key_id, private_key, public_key, machine_name, image, size, location, cloud_init) elif conn.type is Provider.SOFTLAYER: node = _create_machine_softlayer( conn, key_id, private_key, public_key, machine_name, image, size, location, bare_metal, cloud_init, hourly, softlayer_backend_vlan_id ) elif conn.type is Provider.ONAPP: node = _create_machine_onapp( conn, public_key, machine_name, image, size_ram, size_cpu, size_disk_primary, size_disk_swap, boot, build, cpu_priority, cpu_sockets, cpu_threads, port_speed, location, networks, hypervisor_group_id ) elif conn.type is Provider.DIGITAL_OCEAN: node = _create_machine_digital_ocean( conn, key_id, private_key, public_key, machine_name, image, size, location, cloud_init) elif conn.type == Provider.AZURE: node = _create_machine_azure( conn, key_id, private_key, public_key, machine_name, image, size, location, cloud_init=cloud_init, cloud_service_name=None, azure_port_bindings=azure_port_bindings ) elif conn.type in [Provider.VCLOUD, Provider.INDONESIAN_VCLOUD]: node = _create_machine_vcloud(conn, machine_name, image, size, public_key, networks) elif conn.type is Provider.LINODE and private_key: # FIXME: The orchestration UI does not provide all the necessary # parameters, thus we need to fetch the proper size and image objects. # This should be properly fixed when migrated to the controllers. if not disk: for size in conn.list_sizes(): if int(size.id) == int(size_id): size = size break if not image_extra: # Missing: {'64bit': 1, 'pvops': 1} for image in conn.list_images(): if int(image.id) == int(image_id): image = image break node = _create_machine_linode(conn, key_id, private_key, public_key, machine_name, image, size, location) elif conn.type == Provider.HOSTVIRTUAL: node = _create_machine_hostvirtual(conn, public_key, machine_name, image, size, location) elif conn.type == Provider.VULTR: node = _create_machine_vultr(conn, public_key, machine_name, image, size, location, cloud_init) elif conn.type is Provider.LIBVIRT: try: # size_id should have a format cpu:ram, eg 1:2048 cpu = size_id.split(':')[0] ram = size_id.split(':')[1] except: ram = 512 cpu = 1 node = _create_machine_libvirt(conn, machine_name, disk_size=disk_size, ram=ram, cpu=cpu, image=image_id, disk_path=disk_path, networks=networks, public_key=public_key, cloud_init=cloud_init) elif conn.type == Provider.PACKET: node = _create_machine_packet(conn, public_key, machine_name, image, size, location, cloud_init, project_id) else: raise BadRequestError("Provider unknown.") if key is not None: # we did this change because there was race condition with # list_machines try: machine = Machine(cloud=cloud, machine_id=node.id).save() except me.NotUniqueError: machine = Machine.objects.get(cloud=cloud, machine_id=node.id) username = node.extra.get('username', '') machine.ctl.associate_key(key, username=username, port=ssh_port, no_connect=True) # Call post_deploy_steps for every provider if conn.type == Provider.AZURE: # for Azure, connect with the generated password, deploy the ssh key # when this is ok, it calls post_deploy for script/monitoring mist.api.tasks.azure_post_create_steps.delay( owner.id, cloud_id, node.id, monitoring, key_id, node.extra.get('username'), node.extra.get('password'), public_key, script=script, script_id=script_id, script_params=script_params, job_id=job_id, hostname=hostname, plugins=plugins, post_script_id=post_script_id, post_script_params=post_script_params, schedule=schedule, job=job, ) elif conn.type == Provider.OPENSTACK: if associate_floating_ip: networks = list_networks(owner, cloud_id) mist.api.tasks.openstack_post_create_steps.delay( owner.id, cloud_id, node.id, monitoring, key_id, node.extra.get('username'), node.extra.get('password'), public_key, script=script, script_id=script_id, script_params=script_params, job_id=job_id, job=job, hostname=hostname, plugins=plugins, post_script_params=post_script_params, networks=networks, schedule=schedule, ) elif conn.type == Provider.RACKSPACE_FIRST_GEN: # for Rackspace First Gen, cannot specify ssh keys. When node is # created we have the generated password, so deploy the ssh key # when this is ok and call post_deploy for script/monitoring mist.api.tasks.rackspace_first_gen_post_create_steps.delay( owner.id, cloud_id, node.id, monitoring, key_id, node.extra.get('password'), public_key, script=script, script_id=script_id, script_params=script_params, job_id=job_id, job=job, hostname=hostname, plugins=plugins, post_script_id=post_script_id, post_script_params=post_script_params, schedule=schedule ) elif key_id: mist.api.tasks.post_deploy_steps.delay( owner.id, cloud_id, node.id, monitoring, script=script, key_id=key_id, script_id=script_id, script_params=script_params, job_id=job_id, job=job, hostname=hostname, plugins=plugins, post_script_id=post_script_id, post_script_params=post_script_params, schedule=schedule, ) if tags: resolve_id_and_set_tags(owner, 'machine', node.id, tags, cloud_id=cloud_id) ret = {'id': node.id, 'name': node.name, 'extra': node.extra, 'public_ips': node.public_ips, 'private_ips': node.private_ips, 'job_id': job_id, } return ret