def test_check_node_is_locked(self): """Test LocalStorage.check_node_is_locked method.""" kube_type = Kube.get_default_kube_type() node1 = Node(ip='192.168.1.2', hostname='host1', kube_id=kube_type) node2 = Node(ip='192.168.1.3', hostname='host2', kube_id=kube_type) db.session.add_all([node1, node2]) db.session.commit() user, _ = self.fixtures.user_fixtures() pd = PersistentDisk(name='q', owner_id=user.id, size=1) db.session.add(pd) db.session.commit() flag, reason = pstorage.LocalStorage.check_node_is_locked(node1.id) self.assertFalse(flag) self.assertIsNone(reason) flag, reason = pstorage.LocalStorage.check_node_is_locked(node2.id) self.assertFalse(flag) pd = PersistentDisk(name='w', owner_id=user.id, size=1, node_id=node1.id) db.session.add(pd) db.session.commit() flag, reason = pstorage.LocalStorage.check_node_is_locked(node1.id) self.assertTrue(flag) self.assertIsNotNone(reason)
def test_api_get_node_logs(self, get_logs_mock): hostname = 'qwerty' ip1 = '192.168.1.2' host1 = 'host1' ip2 = '192.168.1.3' host2 = 'host2' kube_type = Kube.get_default_kube_type() node1 = Node(ip=ip1, hostname=host1, kube_id=kube_type) node2 = Node(ip=ip2, hostname=host2, kube_id=kube_type) db.session.add_all((node1, node2)) db.session.commit() url = self.url + '/node/' + hostname # unknown hostname response = self.admin_open(url) self.assert404(response) url = self.url + '/node/' + host2 get_logs_mock.return_value = {'2': 3} response = self.admin_open(url) self.assert200(response) self.assertEqual(response.json, { u'status': u'OK', u'data': get_logs_mock.return_value }) get_logs_mock.assert_called_once_with(host2, None, 100, host=ip2)
def add_two_nodes(self): ip1 = '192.168.1.2' host1 = 'host1' ip2 = '192.168.1.3' host2 = 'host2' kube_type = Kube.get_default_kube_type() node1 = Node(ip=ip1, hostname=host1, kube_id=kube_type) node2 = Node(ip=ip2, hostname=host2, kube_id=kube_type) db.session.add_all((node1, node2)) db.session.commit() self.node1 = node1 self.node2 = node2 return (node1, node2)
def nodes(hostname): end = datetime.utcnow() start = end - timedelta(minutes=60) node = Node.get_by_name(hostname) if node is None: raise APIError('Unknown node', 404) if node.state != NODE_STATUSES.completed: raise NodeNotRunning resources = node_utils.get_one_node(node.id)['resources'] if resources: data = kubestat.get_node_stat(hostname, start, end) cpu_capacity = float(resources.get('cpu')) * 1000 memory_capacity = float(resources.get('memory')) else: # We have checked that node is running, but no resources means # that node is not running raise InternalStatsError( 'DBNode is running, but k8s says it does not. Unknown error') diagrams = [ CpuDiagram(_get_cpu_points(data, cpu_capacity)).to_dict(), MemoryDiagram(_get_memory_points(data, memory_capacity)).to_dict(), NetworkDiagram(_get_network_points(data)).to_dict() ] + [ FsDiagram(fs_data['points'], title=fs_data['device']).to_dict() for fs_data in _get_fs_points(data) ] return diagrams
def wait_for_nodes(nodes_list, timeout, verbose=False): timeout = timeout or WAIT_TIMEOUT def _print(msg): if verbose: print msg wait_end = time.time() + timeout host_list = list(set(nodes_list)) nodes_in_trouble = {} while host_list: if time.time() > wait_end: remaining_nodes = [Node.get_by_name(nhost) for nhost in nodes_list] raise WaitTimeoutException( "These nodes did not become 'running' in a given " "timeout {}s:\n{}".format(timeout, remaining_nodes)) time.sleep(WAIT_RETRY_DELAY) db.session.expire_all() for nhost in host_list[:]: # Do not modify list while iterating it db_node = Node.get_by_name(nhost) if db_node is None: raise WaitTimeoutException("Node `%s` was not found." % nhost) k8s_node = get_one_node(db_node.id) state = k8s_node['status'] if state == NODE_STATUSES.troubles: if nhost not in nodes_in_trouble: nodes_in_trouble[nhost] = time.time() + WAIT_TROUBLE_TIMEOUT if time.time() > nodes_in_trouble[nhost]: raise WaitTroubleException( "Node '{}' went into troubles and still in troubles " "state after '{}' seconds.".format( nhost, WAIT_TROUBLE_TIMEOUT)) else: _print("Node '{}' state is 'troubles' but acceptable " "troubles timeout '{}'s is not reached yet..".format( nhost, WAIT_TROUBLE_TIMEOUT)) elif state == NODE_STATUSES.running: host_list.remove(nhost) else: _print("Node '{}' state is '{}', continue waiting..".format( nhost, state))
def node(hostname=None, ip=None, kube_id=None, owner=None): if owner is None: owner, _ = user_fixtures() if kube_id is None: kube_id = Kube.get_default_kube() if ip is None: ip = random_ip() if hostname is None: hostname = randstr() return Node(ip=ip, hostname=hostname, kube_id=kube_id.id, state=NODE_STATUSES.pending)
def run(self, nodename, flagname, value, delete): node = Node.get_by_name(nodename) if not node: raise InvalidCommand(u'Node "{0}" not found'.format(nodename)) if delete: NodeFlag.delete_by_name(node.id, flagname) print u'Node flag "{0}" was deleted'.format(flagname) return NodeFlag.save_flag(node.id, flagname, value) if flagname == NodeFlagNames.CEPH_INSTALLED: tasks.add_k8s_node_labels( node.hostname, {NODE_CEPH_AWARE_KUBERDOCK_LABEL: "True"} ) check_namespace_exists(node.ip) print u'Node "{0}": flag "{1}" was set to "{2}"'.format( nodename, flagname, value)
def delete_nodes(): """ Delete all nodes from restored cluster. When restoring cluster after a full crush it makes sense to drop nodes from restored db because they will be added again (re-deployed). This also implies purging of the pods - in this case they also need to be re-deployed (or restored from pod backups) """ from kubedock.core import db from kubedock.api import create_app from kubedock.pods.models import PersistentDisk, Pod, PodIP from kubedock.usage.models import IpState, ContainerState, PodState from kubedock.domains.models import PodDomain from kubedock.nodes.models import Node from kubedock.kapi.nodes import delete_node_from_db create_app(fake_sessions=True).app_context().push() IpState.query.delete() PodIP.query.delete() ContainerState.query.delete() PodDomain.query.delete() PersistentDisk.query.delete() PodState.query.delete() Pod.query.delete() logger.info("All pod data purged.") db.session.commit() devnull = open(os.devnull, 'w') subprocess.call(['kubectl', 'delete', '--all', 'namespaces'], stderr=devnull, stdout=devnull) subprocess.call(['kubectl', 'delete', '--all', 'nodes'], stderr=devnull, stdout=devnull) logger.info("Etcd data purged.") for node in Node.get_all(): delete_node_from_db(node) logger.info("Node `{0}` purged.".format(node.hostname))
def test_execute_es_query(self, es_mock): """Test elasticsearch_utils.execute_es_query function.""" # Add two log pods config + two approprate nodes internal_user = User.get_internal() pod_id1 = str(uuid4()) service1 = 'srv1' namespace1 = 'ns1' pod_id2 = str(uuid4()) service2 = 'srv2' namespace2 = 'ns2' host1 = 'h1' host2 = 'h2' kube_id = Kube.get_default_kube_type() pod1 = Pod(id=pod_id1, name=get_kuberdock_logs_pod_name(host1), owner_id=internal_user.id, kube_id=kube_id, config=json.dumps({ "service": service1, "namespace": namespace1 }), status='RUNNING') pod2 = Pod(id=pod_id2, name=get_kuberdock_logs_pod_name(host2), owner_id=internal_user.id, kube_id=kube_id, config=json.dumps({ "service": service2, "namespace": namespace2 }), status='RUNNING') db.session.add_all([pod1, pod2]) db.session.commit() node1 = Node(ip='123.123.123', hostname=host1, kube_id=kube_id, state='completed', upgrade_status='applied') node2 = Node(ip='123.123.124', hostname=host2, kube_id=kube_id, state='completed', upgrade_status='applied') db.session.add_all([node1, node2]) db.session.commit() size = 123 index = '1234qwerty' query = None sort = None search_result = {'hits': {'total': 333, 'hits': [1, 2, 3]}} search_mock = es_mock.return_value.search search_mock.return_value = search_result res = elasticsearch_utils.execute_es_query(index, query, size, sort) self.assertEqual( res, { 'total': search_result['hits']['total'], 'hits': search_result['hits']['hits'], }) prefix1 = elasticsearch_utils.K8S_PROXY_PREFIX + \ '/namespaces/' + namespace1 + '/services/' + service1 + ':9200/' prefix2 = elasticsearch_utils.K8S_PROXY_PREFIX + \ '/namespaces/' + namespace2 + '/services/' + service2 + ':9200/' es_mock.assert_called_once_with([ { 'host': KUBE_API_HOST, 'port': KUBE_API_PORT, 'url_prefix': prefix1, }, { 'host': KUBE_API_HOST, 'port': KUBE_API_PORT, 'url_prefix': prefix2, }, ]) search_mock.assert_called_once_with(index=index, body={'size': size}) query = {'a': 1} sort = {'b': 2} elasticsearch_utils.execute_es_query(index, query, size, sort) search_mock.assert_called_with(index=index, body={ 'size': size, 'sort': sort, 'query': query }) search_mock.side_effect = RequestException('!!!') with self.assertRaises(elasticsearch_utils.LogsError): elasticsearch_utils.execute_es_query(index, query, size, sort)
def test_get_nodes_collection(self, get_all_nodes_mock, fix_missed_nodes_mock, system_settings_mock): """Test for kapi.node_utils.get_nodes_collection function.""" node1, node2 = self.add_two_nodes() ip3 = '192.168.1.4' host3 = 'host3' kube_type = Kube.get_default_kube_type() node3 = Node(ip=ip3, hostname=host3, kube_id=kube_type, state=NODE_STATUSES.pending) db.session.add(node3) db.session.commit() get_all_nodes_mock.return_value = [{ 'metadata': { 'name': node1.hostname }, 'status': { 'conditions': [{ 'type': 'Ready', 'status': 'True' }] } }, { 'metadata': { 'name': node2.hostname }, 'status': { 'conditions': [{ 'type': 'Unknown', 'status': 'True', 'reason': 'qwerty', 'lastTransitionTime': 'asdfg' }] } }] fix_missed_nodes_mock.return_value = (node1, node2, node3) system_settings_mock.get_by_name = self.get_by_name res = node_utils.get_nodes_collection() get_all_nodes_mock.assert_called_once_with() # AC-3349 Fix. Not needed due to fix in 'get_nodes_collection'. # fix_missed_nodes_mock.assert_called_once_with( # [node1, node2, node3], # {x['metadata']['name']: x # for x in get_all_nodes_mock.return_value}) self.assertEqual(len(res), 3) self.assertEqual( res[0], { 'id': node1.id, 'ip': node1.ip, 'hostname': node1.hostname, 'kube_type': node1.kube_id, 'status': NODE_STATUSES.running, 'reason': '', 'install_log': '', 'resources': {} }) self.assertEqual(res[1]['id'], node2.id) self.assertEqual(res[1]['ip'], node2.ip) self.assertEqual(res[1]['hostname'], node2.hostname) self.assertTrue(res[1]['status'], NODE_STATUSES.troubles) self.assertTrue('qwerty' in res[1]['reason']) self.assertTrue('asdfg' in res[1]['reason']) self.assertEqual(res[2]['id'], node3.id) self.assertEqual(res[2]['ip'], node3.ip) self.assertEqual(res[2]['hostname'], node3.hostname) self.assertTrue(res[2]['status'], NODE_STATUSES.pending)
def fetch_nodes(): """ Gets basic nodes data: IP address, cpu cores and memory """ return [{'_ip': node.ip} for node in Node.get_all()]
def run(self, nodename): node = Node.get_by_name(nodename) if not node: raise InvalidCommand(u'Node "{0}" not found'.format(nodename)) print json.dumps(node.to_dict())
def setUp(self): from kubedock.nodes.models import Node from kubedock.users.models import User from kubedock.kapi.podcollection import POD_STATUSES self.node = Node( ip='12.13.14.15', hostname='test-node-1', kube=bill_models.Kube.get_default_kube(), state=NODE_STATUSES.running, ) self.internal_user = User.get_internal() self.pod = self.fixtures.pod(name='logs pod', status=POD_STATUSES.running) # disable redis caching patcher = mock.patch.object(es_logs, 'check_logs_pod', es_logs._check_logs_pod) self.addCleanup(patcher.stop) self.PodCollectionMock = patcher.start() patcher = mock.patch.object(es_logs, 'PodCollection') self.addCleanup(patcher.stop) self.PodCollectionMock = patcher.start() self.PodCollectionMock.return_value.get.return_value = [ self.pod.to_dict()] patcher = mock.patch.object(es_logs, 'get_kuberdock_logs_pod_name') self.addCleanup(patcher.stop) self.get_kuberdock_logs_pod_name_mock = patcher.start() self.get_kuberdock_logs_pod_name_mock.return_value = self.pod.name pod_state = usage_models.PodState( pod_id=self.pod.id, start_time=datetime(2015, 2, 5), last_event_time=datetime.utcnow(), last_event='MODIFIED', hostname=self.node.hostname, kube_id=self.pod.kube_id, ) db.session.add_all([ self.node, pod_state, usage_models.ContainerState( pod_state=pod_state, container_name='elasticsearch', docker_id='om3xcnhonfao9nhc', start_time=datetime(2015, 2, 5), end_time=datetime.utcnow(), exit_code=2, ), usage_models.ContainerState( pod_state=pod_state, container_name='fluentd', docker_id='aoncrh47rhwdcevf', start_time=datetime(2015, 2, 5), end_time=datetime.utcnow(), exit_code=2, ), usage_models.ContainerState( pod_state=pod_state, container_name='elasticsearch', docker_id='p93urmqahdeef', start_time=datetime.utcnow() - timedelta(minutes=1), ), usage_models.ContainerState( pod_state=pod_state, container_name='fluentd', docker_id='1wlsj2enhdfo4838', start_time=datetime.utcnow() - timedelta(minutes=1), ), ]) db.session.flush()
def upgrade_localstorage_paths(upd): upd.print_log('Update local storages on nodes...') node_to_pds = defaultdict(list) pod_to_pds = defaultdict(list) for pd in db.session.query(PersistentDisk).filter( PersistentDisk.state != PersistentDiskStatuses.TODELETE): node_to_pds[pd.node_id].append(pd) if pd.pod_id: pod_to_pds[pd.pod_id].append(pd) for node_id, pd_list in node_to_pds.iteritems(): # move PD on each node to new location if node_id: node = Node.get_by_id(node_id) else: node = None path_pairs = [] for pd in pd_list: old_path = os.path.join(NODE_LOCAL_STORAGE_PREFIX, pd.drive_name) pd.drive_name = pd_utils.compose_pdname(pd.name, pd.owner_id) new_path = pstorage.LocalStorage.get_full_drive_path(pd.drive_name) path_pairs.append([old_path, new_path, pd.size]) timeout = 60 if node is not None: with settings(hide(NODE_STATUSES.running, 'warnings', 'stdout', 'stderr'), host_string=node.hostname, warn_only=True): try: put('/var/opt/kuberdock/node_network_plugin.py', os.path.join(u138_PLUGIN_DIR, 'kuberdock.py')) put('/var/opt/kuberdock/fslimit.py', u138_FSLIMIT_PATH) for old_path, new_path, size in path_pairs: result = run('test -d {}'.format(old_path), timeout=timeout) if result.return_code != 0: continue run('[ -d {0} ] || mkdir -p {0}'.format( os.path.dirname(new_path)), timeout=timeout) run('mv {} {}'.format(old_path, new_path), timeout=timeout) run('/usr/bin/env python2 {} storage {}={}g'.format( u138_FSLIMIT_PATH, new_path, size)) except (CommandTimeout, NetworkError): upd.print_log( 'Node {} is unavailable, skip moving of localstorages' ) for pd in pd_list: pstorage.update_pods_volumes(pd) # Update RC and running pods for pod_id in pod_to_pds: pod = Pod.query.filter(Pod.id == pod_id).first() config = pod.get_dbconfig() volumes = config.get('volumes', []) annotations = [vol.pop('annotation') for vol in volumes] res = PodCollection().patch_running_pod( pod_id, { 'metadata': { 'annotations': { 'kuberdock-volume-annotations': json.dumps(annotations) } }, 'spec': { 'volumes': volumes }, }, replace_lists=True, restart=True ) res = res or {} upd.print_log('Updated pod: {}'.format(res.get('name', 'Unknown'))) db.session.commit()