def wrapper(*args, **kwargs): if config.parsed.get('NODE_IP') != config.parsed.get( 'NETWORK_NODE_IP'): admin_token = util.get_api_token( 'http://%s:%d' % (config.parsed.get('NETWORK_NODE_IP'), config.parsed.get('API_PORT')), namespace='system') r = requests.request(flask.request.environ['REQUEST_METHOD'], 'http://%s:%d%s' % (config.parsed.get('NETWORK_NODE_IP'), config.parsed.get('API_PORT'), flask.request.environ['PATH_INFO']), data=flask.request.data, headers={ 'Authorization': admin_token, 'User-Agent': util.get_user_agent() }) logutil.info( None, 'Returning proxied request: %d, %s' % (r.status_code, r.text)) resp = flask.Response(r.text, mimetype='application/json') resp.status_code = r.status_code return resp return func(*args, **kwargs)
def wrapper(*args, **kwargs): i = kwargs.get('instance_from_db_virt') if i and i.db_entry['node'] != config.parsed.get('NODE_NAME'): url = 'http://%s:%d%s' % (i.db_entry['node'], config.parsed.get('API_PORT'), flask.request.environ['PATH_INFO']) api_token = util.get_api_token( 'http://%s:%d' % (i.db_entry['node'], config.parsed.get('API_PORT')), namespace=get_jwt_identity()) r = requests.request(flask.request.environ['REQUEST_METHOD'], url, data=json.dumps(flask_get_post_body()), headers={ 'Authorization': api_token, 'User-Agent': util.get_user_agent() }) logutil.info( None, 'Proxied %s %s returns: %d, %s' % (flask.request.environ['REQUEST_METHOD'], url, r.status_code, r.text)) resp = flask.Response(r.text, mimetype='application/json') resp.status_code = r.status_code return resp return func(*args, **kwargs)
def observe(path, instance_uuid): setproctitle.setproctitle('%s-%s' % (daemon.process_name('triggers'), instance_uuid)) regexps = {'login prompt': ['^.* login: .*', re.compile('.* login: .*')]} while not os.path.exists(path): time.sleep(1) fd = os.open(path, os.O_RDONLY | os.O_NONBLOCK) logutil.info([virt.ThinInstance(instance_uuid)], 'Monitoring %s for triggers' % path) db.add_event('instance', instance_uuid, 'trigger monitor', 'detected console log', None, None) os.lseek(fd, 0, os.SEEK_END) buffer = '' while True: d = os.read(fd, 1024).decode('utf-8') if d: buffer += d lines = buffer.split('\n') buffer = lines[-1] for line in lines: if line: for trigger in regexps: m = regexps[trigger][1].match(line) if m: logutil.info([virt.ThinInstance(instance_uuid)], 'Trigger %s matched' % trigger) db.add_event('instance', instance_uuid, 'trigger', None, None, trigger) time.sleep(1)
def resolve(name): resp = requests.get(CIRROS_URL, headers={'User-Agent': util.get_user_agent()}) if resp.status_code != 200: raise exceptions.HTTPError( 'Failed to fetch http://download.cirros-cloud.net/, ' 'status code %d' % resp.status_code) if name == 'cirros': versions = [] dir_re = re.compile(r'.*<a href="([0-9]+\.[0-9]+\.[0-9]+)/">.*/</a>.*') for line in resp.text.split('\n'): m = dir_re.match(line) if m: versions.append(m.group(1)) logutil.info(None, 'Found cirros versions: %s' % versions) vernum = versions[-1] else: try: # Name is assumed to be in the form cirros:0.4.0 _, vernum = name.split(':') except Exception: raise exceptions.VersionSpecificationError( 'Cannot parse version: %s' % name) return (config.parsed.get('DOWNLOAD_URL_CIRROS') % {'vernum': vernum})
def _get(self, locks, related_object): """Fetch image if not downloaded and return image path.""" with db.get_lock('image', config.parsed.get('NODE_NAME'), self.hashed_image_url) as image_lock: with util.RecordedOperation('fetch image', related_object): dirty_fields, resp = self._requires_fetch() if dirty_fields: logutil.info([self], 'Starting fetch due to dirty fields %s' % dirty_fields) if related_object: t, u = related_object.get_describing_tuple() dirty_fields_pretty = [] for field in dirty_fields: dirty_fields_pretty.append( '%s: %s -> %s' % (field, dirty_fields[field]['before'], dirty_fields[field]['after'])) db.add_event(t, u, 'image requires fetch', None, None, '\n'.join(dirty_fields_pretty)) actual_image = self._fetch(resp, locks=locks.append(image_lock)) else: actual_image = '%s.v%03d' % (self.hashed_image_path, self.info['version']) _transcode(locks, actual_image, related_object) return actual_image
def _start_daemon(d): pid = os.fork() if pid == 0: LOG.removeHandler(HANDLER) DAEMON_IMPLEMENTATIONS[d].Monitor(d).run() DAEMON_PIDS[pid] = d logutil.info(None, '%s pid is %d' % (d, pid))
def refresh_lock(lock, relatedobjects=None): if not lock.is_acquired(): raise exceptions.LockException('The lock on %s has expired.' % lock.path) lock.refresh() logutil.info(relatedobjects, 'Refreshed lock %s' % lock.name)
def get_api_token(base_url, namespace='system'): with db.get_lock('namespace', None, namespace): auth_url = base_url + '/auth' logutil.info(None, 'Fetching %s auth token from %s' % (namespace, auth_url)) ns = db.get_namespace(namespace) if 'service_key' in ns: key = ns['service_key'] else: key = ''.join( random.choice(string.ascii_lowercase) for i in range(50)) ns['service_key'] = key db.persist_namespace(namespace, ns) r = requests.request('POST', auth_url, data=json.dumps({ 'namespace': namespace, 'key': key }), headers={ 'Content-Type': 'application/json', 'User-Agent': get_user_agent() }) if r.status_code != 200: raise Exception('Unauthorized') return 'Bearer %s' % r.json()['access_token']
def _remove_mesh_element(self, node): logutil.info([self], 'Removing excess mesh element %s' % node) subst = self.subst_dict() subst['node'] = node util.execute( None, 'bridge fdb del to 00:00:00:00:00:00 dst %(node)s dev %(vx_interface)s' % subst)
def _add_mesh_element(self, node): logutil.info([self], 'Adding new mesh element %s' % node) subst = self.subst_dict() subst['node'] = node util.execute( None, 'bridge fdb append to 00:00:00:00:00:00 dst %(node)s dev %(vx_interface)s' % subst)
def run(self): logutil.info(None, 'Starting') util.execute(None, (config.parsed.get('API_COMMAND_LINE') % { 'port': config.parsed.get('API_PORT'), 'timeout': config.parsed.get('API_TIMEOUT'), 'name': daemon.process_name('api') }), env_variables=os.environ)
def __exit__(self, *args): duration = time.time() - self.start_time logutil.info([self.object], 'Finish %s, duration %.02f seconds' % (self.operation, duration)) object_type, object_uuid = self.get_describing_tuple() if object_type and object_uuid: db.add_event(object_type, object_uuid, self.operation, 'finish', duration, None)
def wrapper(*args, **kwargs): if 'network_uuid' in kwargs: kwargs['network_from_db'] = db.get_network(kwargs['network_uuid']) if not kwargs.get('network_from_db'): logutil.info([net.ThinNetwork(kwargs['network_uuid'])], 'Network not found, missing or deleted') return error(404, 'network not found') return func(*args, **kwargs)
def wrapper(*args, **kwargs): if 'instance_uuid' in kwargs: kwargs['instance_from_db_virt'] = virt.from_db( kwargs['instance_uuid']) if not kwargs.get('instance_from_db_virt'): logutil.info([virt.ThinInstance(kwargs['instance_uuid'])], 'Instance not found, genuinely missing') return error(404, 'instance not found') return func(*args, **kwargs)
def _read_local_info(self): if not os.path.exists(self.hashed_image_path + '.info'): logutil.info([self], 'No info in cache for this image') return { 'url': self.url, 'hash': self.hashed_image_url, 'version': 0 } else: with open(self.hashed_image_path + '.info') as f: return json.loads(f.read())
def update_metrics(): global last_metrics stats = _get_stats() for metric in stats: if metric not in gauges: gauges[metric] = Gauge(metric, '') gauges[metric].set(stats[metric]) db.update_metrics_bulk(stats) logutil.info(None, 'Updated metrics') gauges['updated_at'].set_to_current_time()
def wrapper(*args, **kwargs): if not kwargs.get('instance_from_db'): logutil.info([virt.ThinInstance(kwargs['instance_uuid'])], 'Instance not found, kwarg missing') return error(404, 'instance not found') if get_jwt_identity() not in [ kwargs['instance_from_db']['namespace'], 'system' ]: logutil.info([virt.ThinInstance(kwargs['instance_uuid'])], 'Instance not found, ownership test in decorator') return error(404, 'instance not found') return func(*args, **kwargs)
def enqueue(queuename, workitem): with get_lock('queue', None, queuename): i = 0 entry_time = time.time() jobname = '%s-%03d' % (entry_time, i) while get('queue', queuename, jobname): i += 1 jobname = '%s-%03d' % (entry_time, i) put('queue', queuename, jobname, workitem) logutil.info( None, 'Enqueued workitem %s for queue %s with work %s' % (jobname, queuename, workitem))
def wrapper(*args, **kwargs): if not kwargs.get('network_from_db'): logutil.info([net.ThinNetwork(kwargs['network_uuid'])], 'Network not found, kwarg missing') return error(404, 'network not found') if get_jwt_identity() not in [ kwargs['network_from_db']['namespace'], 'system' ]: logutil.info([net.ThinNetwork(kwargs['network_uuid'])], 'Network not found, ownership test in decorator') return error(404, 'network not found') return func(*args, **kwargs)
def run(self): logutil.info(None, 'Starting') observers = {} while True: # Cleanup terminated observers all_observers = list(observers.keys()) for instance_uuid in all_observers: if not observers[instance_uuid].is_alive(): # Reap process observers[instance_uuid].join(1) logutil.info([virt.ThinInstance(instance_uuid)], 'Trigger observer has terminated') db.add_event('instance', instance_uuid, 'trigger monitor', 'crashed', None, None) del observers[instance_uuid] # Start missing observers extra_instances = list(observers.keys()) for inst in db.get_instances( only_node=config.parsed.get('NODE_NAME')): if inst['uuid'] in extra_instances: extra_instances.remove(inst['uuid']) if inst['state'] != 'created': continue if inst['uuid'] not in observers: console_path = os.path.join( config.parsed.get('STORAGE_PATH'), 'instances', inst['uuid'], 'console.log') p = multiprocessing.Process( target=observe, args=(console_path, inst['uuid']), name='%s-%s' % (daemon.process_name('triggers'), inst['uuid'])) p.start() observers[inst['uuid']] = p logutil.info([virt.ThinInstance(inst['uuid'])], 'Started trigger observer') db.add_event('instance', inst['uuid'], 'trigger monitor', 'started', None, None) # Cleanup extra observers for instance_uuid in extra_instances: p = observers[instance_uuid] try: os.kill(p.pid, signal.SIGKILL) except Exception: pass del observers[instance_uuid] logutil.info([virt.ThinInstance(instance_uuid)], 'Finished trigger observer') db.add_event('instance', instance_uuid, 'trigger monitor', 'finished', None, None) time.sleep(1)
def remove_floating_ip(self, floating_address, inner_address): logutil.info([self], 'Removing floating ip %s -> %s' % (floating_address, inner_address)) subst = self.subst_dict() subst['floating_address'] = floating_address subst['inner_address'] = inner_address util.execute( None, 'ip addr del %(floating_address)s/%(netmask)s ' 'dev %(physical_veth_outer)s' % subst) util.execute( None, '%(in_netns)s iptables -t nat -D PREROUTING ' '-d %(floating_address)s -j DNAT --to-destination %(inner_address)s' % subst)
def wrapper(*args, **kwargs): try: j = flask_get_post_body() if j: for key in j: if key == 'uuid': destkey = 'passed_uuid' else: destkey = key kwargs[destkey] = j[key] formatted_headers = [] for header in flask.request.headers: formatted_headers.append(str(header)) msg = 'API request: %s %s' % (flask.request.method, flask.request.url) msg += '\n Args: %s\n KWargs: %s' % (args, kwargs) if re.match(r'http(|s)://0.0.0.0:\d+/$', flask.request.url): logutil.debug(None, msg) else: logutil.info(None, msg) return func(*args, **kwargs) except TypeError as e: return error(400, str(e)) except DecodeError: # Send a more informative message than 'Not enough segments' return error(401, 'invalid JWT in Authorization header') except ( JWTDecodeError, NoAuthorizationError, InvalidHeaderError, WrongTokenError, RevokedTokenError, FreshTokenRequired, CSRFError, PyJWTError, ) as e: return error(401, str(e)) except Exception: return error(500, 'server error')
def _compact_etcd(self): try: # We need to determine what revision to compact to, so we keep a # key which stores when we last compacted and we use it's latest # revision number as the revision to compact to. Note that we use # a different library for compaction as our primary library does # not support it. c = etcd3.client() c.put('/sf/compact', json.dumps({'compacted_at': time.time()})) _, kv = c.get('/sf/compact') c.compact(kv.mod_revision, physical=True) c.defragment() logutil.info(None, 'Compacted etcd') except Exception as e: util.ignore_exception('etcd compaction', e)
def _audit_daemons(): running_daemons = [] for pid in DAEMON_PIDS: running_daemons.append(DAEMON_PIDS[pid]) logutil.info( None, 'Daemons running: %s' % ', '.join(sorted(running_daemons))) for d in DAEMON_IMPLEMENTATIONS: if d not in running_daemons: _start_daemon(d) for d in DAEMON_PIDS: if not psutil.pid_exists(d): logutil.warning( None, '%s pid is missing, restarting' % DAEMON_PIDS[d]) _start_daemon(DAEMON_PIDS[d])
def delete(self, confirm=False, namespace=None): """Delete all networks in the namespace.""" if confirm is not True: return error(400, 'parameter confirm is not set true') if get_jwt_identity() == 'system': if not isinstance(namespace, str): # A client using a system key must specify the namespace. This # ensures that deleting all networks in the cluster (by # specifying namespace='system') is a deliberate act. return error(400, 'system user must specify parameter namespace') else: if namespace and namespace != get_jwt_identity(): return error(401, 'you cannot delete other namespaces') namespace = get_jwt_identity() networks_del = [] networks_unable = [] for n in list(db.get_networks(all=all, namespace=namespace)): if n['uuid'] == 'floating': continue if len(list(db.get_network_interfaces(n['uuid']))) > 0: logutil.info([n], 'Network in use, cannot be deleted by delete-all') networks_unable.append(n['uuid']) continue if n['state'] == 'deleted': continue _delete_network(n) networks_del.append(n['uuid']) if networks_unable: return error(403, { 'deleted': networks_del, 'unable': networks_unable }) return networks_del
def from_db(uuid): dbnet = db.get_network(uuid) if not dbnet: return None n = Network(uuid=dbnet['uuid'], vxlan_id=dbnet['vxid'], provide_dhcp=dbnet['provide_dhcp'], provide_nat=dbnet['provide_nat'], ipblock=dbnet['netblock'], physical_nic=config.parsed.get('NODE_EGRESS_NIC'), floating_gateway=dbnet['floating_gateway'], namespace=dbnet['namespace']) if dbnet['state'] == 'deleted': logutil.info([n], 'Netowrk is deleted, returning None.') return None return n
def resolve(name): resp = requests.get(UBUNTU_URL, headers={'User-Agent': util.get_user_agent()}) if resp.status_code != 200: raise exceptions.HTTPError('Failed to fetch %s, status code %d' % (UBUNTU_URL, resp.status_code)) num_to_name = {} name_to_num = {} dir_re = re.compile( r'.*<a href="(.*)/">.*Ubuntu Server ([0-9]+\.[0-9]+).*') for line in resp.text.split('\n'): m = dir_re.match(line) if m: num_to_name[m.group(2)] = m.group(1) name_to_num[m.group(1)] = m.group(2) logutil.info(None, 'Found ubuntu versions: %s' % num_to_name) vernum = None vername = None if name == 'ubuntu': vernum = sorted(num_to_name.keys())[-1] vername = num_to_name[vernum] else: try: # Name is assumed to be in the form ubuntu:18.04 or ubuntu:bionic _, version = name.split(':') if version in num_to_name: vernum = version vername = num_to_name[version] else: vername = version vernum = name_to_num[version] except Exception: raise exceptions.VersionSpecificationError( 'Cannot parse version: %s' % name) return (config.parsed.get('DOWNLOAD_URL_UBUNTU') % { 'vernum': vernum, 'vername': vername })
def dequeue(queuename): queue_path = _construct_key('queue', queuename, None) client = Etcd3Client() with get_lock('queue', None, queuename): for data, metadata in client.get_prefix(queue_path, sort_order='ascend', sort_target='key'): jobname = str(metadata['key']).split('/')[-1].rstrip("'") workitem = json.loads(data) put('processing', queuename, jobname, workitem) client.delete(metadata['key']) logutil.info( None, 'Moved workitem %s from queue to processing for %s with work %s' % (jobname, queuename, workitem)) return jobname, workitem return None, None
def restore_instances(): # Ensure all instances for this node are defined networks = [] instances = [] for inst in list( db.get_instances(only_node=config.parsed.get('NODE_NAME'))): for iface in db.get_instance_interfaces(inst['uuid']): if not iface['network_uuid'] in networks: networks.append(iface['network_uuid']) instances.append(inst['uuid']) with util.RecordedOperation('restore networks', None): for network in networks: try: n = net.from_db(network) logutil.info([n], 'Restoring network') n.create() n.ensure_mesh() n.update_dhcp() except Exception as e: util.ignore_exception('restore network %s' % network, e) with util.RecordedOperation('restore instances', None): for instance in instances: try: i = virt.from_db(instance) if not i: continue if i.db_entry.get('power_state', 'unknown') not in [ 'on', 'transition-to-on', 'initial', 'unknown' ]: continue logutil.info([i], 'Restoring instance') i.create() except Exception as e: util.ignore_exception('restore instance %s' % instance, e) db.enqueue_instance_delete( config.parsed.get('NODE_NAME'), instance, 'error', 'exception while restoring instance on daemon restart')
def _safe_get_network_interface(interface_uuid): ni = db.get_interface(interface_uuid) if not ni: return None, None, error(404, 'interface not found') n = net.from_db(ni['network_uuid']) if not n: logutil.info([ net.ThinNetwork(ni['network_uuid']), net.ThinNetworkInterface(ni['uuid']) ], 'Network not found or deleted') return None, None, error(404, 'interface network not found') if get_jwt_identity() not in [n.namespace, 'system']: logutil.info([n, net.ThinNetworkInterface(ni['uuid'])], 'Interface not found, ownership test') return None, None, error(404, 'interface not found') i = virt.from_db(ni['instance_uuid']) if get_jwt_identity() not in [i.db_entry['namespace'], 'system']: logutil.info([n, i, net.ThinNetworkInterface(ni['uuid'])], 'Instance not found, ownership test') return None, None, error(404, 'interface not found') return ni, n, None