def generate_config(url: str, path_to_cdf: str) -> None: provider = ConfStoreProvider(url) utils = Utils(provider) conf_dir = get_config_dir(url) path = os.getenv('PATH') if path: path += os.pathsep + '/opt/seagate/cortx/hare/bin/' python_path = os.pathsep.join(sys.path) transport_type = utils.get_transport_type() cmd = ['configure', '-c', conf_dir, path_to_cdf, '--transport', transport_type, '--log-dir', get_log_dir(url), '--log-file', LOG_FILE, '--uuid', provider.get_machine_id()] locale_info = execute(['locale', '-a']) env = {'PYTHONPATH': python_path, 'PATH': path} if 'en_US.utf-8' in locale_info or 'en_US.utf8' in locale_info: env.update({'LC_ALL': "en_US.utf-8", 'LANG': "en_US.utf-8"}) execute(cmd, env) utils.copy_conf_files(conf_dir) utils.copy_consul_files(conf_dir, mode='client') # consul-kv.json contains key values for the entire cluster. Thus, # it is sufficent to import consul-kv just once. We fetch one of # the consul kv to check if the key-values were already imported # during start up of one of the nodes in the cluster, this avoids # duplicate imports and thus a possible overwriting of the updated # cluster state. if not is_kv_imported(utils): utils.import_kv(conf_dir)
def generate_cdf(url: str, motr_md_url: str) -> str: # ConfStoreProvider creates an empty file, if file does not exist. # So, we are validating the file is present or not. if not os.path.isfile(urlparse(motr_md_url).path): raise FileNotFoundError(f'config file: {motr_md_url} does not exist') motr_provider = ConfStoreProvider(motr_md_url, index='motr_md') generator = CdfGenerator(ConfStoreProvider(url), motr_provider) return generator.generate()
def test_is_cluster_first_node(self): conf = ConfStoreProvider(URL) hostname = socket.gethostname() machine_id = conf.get_machine_id() update_machine(machine_id, hostname) validator = Validator( ConfStoreProvider("json:///tmp/temp-test-conf-store.json")) self.assertEqual(True, validator.is_first_node_in_cluster())
def test_invalid_machine_id(self): conf = ConfStoreProvider(URL) hostname = 'invalid-hostname' machine_id = conf.get_machine_id() update_machine(machine_id, hostname) validator = Validator( ConfStoreProvider("json:///tmp/temp-test-conf-store.json")) with self.assertRaises(RuntimeError): validator._get_machine_id()
def is_mkfs_required(url: str) -> bool: try: conf = ConfStoreProvider(url) utils = Utils(conf) machine_id = conf.get_machine_id() return utils.is_motr_io_present(machine_id) except Exception as error: logging.warn('Failed to get pod type (%s). Current stage will ' 'be assumed as not required by default', error) return False
def get_server_type(url: str) -> str: try: provider = ConfStoreProvider(url) # Values supported by below key are - VM, HW, K8 server_type = provider.get('cortx>common>setup_type') # For 'server_type' of 'HW' we will consider env as 'physical' and # for 'server_type' of 'VM' and 'K8' we will consider env as virtual return 'physical' if server_type == 'HW' else 'virtual' except Exception as error: logging.error('Cannot get server type (%s)', error) return 'unknown'
def generate_config(url: str, path_to_cdf: str) -> None: conf_dir = '/var/lib/hare' path = os.getenv('PATH') if path: path += os.pathsep + '/opt/seagate/cortx/hare/bin/' python_path = os.pathsep.join(sys.path) cmd = ['cfgen', '-o', conf_dir, path_to_cdf] execute(cmd, env={'PYTHONPATH': python_path, 'PATH': path}) conf = ConfStoreProvider(url) hostname = conf.get_hostname() save(f'{conf_dir}/node-name', hostname)
def get_server_type(url: str) -> str: try: provider = ConfStoreProvider(url) machine_id = provider.get_machine_id() server_type = provider.get(f'server_node>{machine_id}>type') if server_type == 'VM': return 'virtual' else: return 'physical' except Exception as error: logging.error('Cannot get server type (%s)', error) return 'unknown'
def all_services_started(url: str, nr_svcs: int) -> bool: conf = ConfStoreProvider(url) hostname = conf.get_hostname() kv = KVAdapter() status_data = kv.kv_get(f'{hostname}/processes', recurse=True) statuses = [] for val in status_data: state = val['Value'] statuses.append(json.loads(state.decode('utf8'))['state']) started = [status == 'M0_CONF_HA_PROCESS_STARTED' for status in statuses] if len(started) != nr_svcs: return False return all(started)
def test_template_sane(self): _, path = tempfile.mkstemp() try: with open(path, 'w') as f: f.write(self._get_confstore_template()) store = ConfStoreProvider(f'json://{path}') store.get_machine_id = Mock(return_value='1114a50a6bf6f9c93ebd3c49d07d3fd4') # # the method will raise an exception if either # Dhall is unhappy or some values are not found in ConfStore cdf = CdfGenerator(provider=store, motr_provider=Mock()) cdf._get_m0d_per_cvg = Mock(return_value=1) cdf.generate() finally: os.unlink(path)
def kv_cleanup(url): util: ConsulUtil = ConsulUtil() conf = ConfStoreProvider(url) utils = Utils(conf) cleanup_disks_info(utils, util) cleanup_node_facts(utils, util) if is_cluster_running(): logging.info('Cluster is running, shutting down') shutdown_cluster() keys: List[KeyDelete] = [ KeyDelete(name='epoch', recurse=False), KeyDelete(name='eq-epoch', recurse=False), KeyDelete(name='last_fidk', recurse=False), KeyDelete(name='leader', recurse=False), KeyDelete(name='m0conf/', recurse=True), KeyDelete(name='processes/', recurse=True), KeyDelete(name='stats/', recurse=True), KeyDelete(name='mkfs/', recurse=True), KeyDelete(name='bytecount/', recurse=True), KeyDelete(name='config_path', recurse=False), KeyDelete(name='failvec', recurse=False), KeyDelete(name='m0_client_types', recurse=True) ] logging.info('Deleting Hare KV entries (%s)', keys) if not util.kv.kv_delete_in_transaction(keys): raise RuntimeError('Error during key delete in transaction')
def is_content_ok(content: str, mocker, kv_adapter) -> bool: if len(content) < 4: # Some templates represent empty JSONs and should be skipped return True _, path = tempfile.mkstemp() try: with open(path, 'w') as f: f.write(content) store = ConfStoreProvider(f'json://{path}') mocker.patch.object(store, 'get_machine_id', return_value='machine-id') mocker.patch.object(store, 'get_machine_ids_for_service', return_value=['machine-id']) motr_store = ValueProvider() mocker.patch.object(motr_store, '_raw_get', return_value='/dev/dummy') # # the method will raise an exception if either # Dhall is unhappy or some values are not found in ConfStore generator = CdfGenerator(provider=store) generator.utils.kv = kv_adapter generator.generate() return True finally: if os.path.isfile(path): os.unlink(path)
def setup_logging(url) -> None: provider = ConfStoreProvider(url) machine_id = provider.get_machine_id() log_path = provider.get('cortx>common>storage>log') log_dir = log_path + LOG_DIR_EXT + machine_id + '/hare_deployment/' log_file = log_dir + LOG_FILE create_logger_directory(log_dir) console = logging.StreamHandler(stream=sys.stdout) fhandler = logging.handlers.RotatingFileHandler(log_file, maxBytes=LOG_FILE_SIZE, mode='a', backupCount=5, encoding=None, delay=False) logging.basicConfig(level=logging.INFO, handlers=[console, fhandler], format='%(asctime)s [%(levelname)s] %(message)s')
def init(args): try: url = args.config[0] if not is_mkfs_required(url): return conf = ConfStoreProvider(url) utils = Utils(conf) cns_utils = ConsulUtil() stop_event = Event() config_dir = get_config_dir(url) log_dir = get_log_dir(url) # Starting consul and hax consul_starter = _start_consul(utils, stop_event, config_dir, log_dir, url) hax_starter = _start_hax(utils, stop_event, config_dir, log_dir) hostname = utils.get_local_hostname() # Cleanup old mkfs state cleanup_mkfs_state(utils, cns_utils) start_mkfs_parallel(hostname, config_dir) # Update mkfs state set_mkfs_done_for(hostname, cns_utils) data_nodes = conf.get_hostnames_for_service( Const.SERVICE_MOTR_IO.value) # Wait for other nodes to complete. # This will block. while not is_mkfs_done_on_all_nodes(utils, cns_utils, data_nodes): sleep(5) # Stopping hax and consul stop_hax_blocking(hax_starter) stop_consul_blocking(consul_starter) except Exception as error: if hax_starter: stop_hax_blocking(hax_starter) if consul_starter: stop_consul_blocking(consul_starter) raise RuntimeError(f'Error while initializing cluster :key={error}')
def start(args): logging.info('Starting Hare services') url = args.config[0] utils = Utils(ConfStoreProvider(url)) logrotate_generic(url) start_crond() if args.systemd: start_hax_with_systemd() else: # This is a blocking call and will block until either consul # or hax process terminates. # TODO: Check if the respective processes need to be restarted. start_hax_and_consul_without_systemd(url, utils)
def _start_consul(utils: Utils, stop_event: Event, hare_local_dir: str, hare_log_dir: str, url: str): log_dir = hare_log_dir data_dir = f'{hare_local_dir}/consul/data' config_dir = f'{hare_local_dir}/consul/config' provider = ConfStoreProvider(url) node_id = uuid.uuid4() consul_endpoints = provider.get('cortx>external>consul>endpoints') cns_utils: ConsulUtil = ConsulUtil() hostname = utils.get_local_hostname() # remove tcp:// peers = [] for endpoint in consul_endpoints: key = endpoint.split('/') # Considering tcp endpoints only. Ignoring all other endpoints. if key[0] != 'tcp:': continue peer = ('/'.join(key[2:])) peers.append(peer) bind_addr = socket.gethostbyname(hostname) consul_nodename = hostname + ':' + str(node_id)[:8] consul_starter = ConsulStarter(utils=utils, cns_utils=cns_utils, stop_event=stop_event, log_dir=log_dir, data_dir=data_dir, config_dir=config_dir, node_id=str(node_id), node_name=consul_nodename, peers=peers, bind_addr=bind_addr) consul_starter.start() save_consul_node_name(cns_utils, consul_nodename, hostname) return consul_starter
def init_with_bootstrap(args): url = args.config[0] validator = Validator(ConfStoreProvider(url)) disable_hare_consul_agent() if validator.is_first_node_in_cluster(): if args.file: path_to_cdf = args.file[0] else: path_to_cdf = get_config_dir(url) + '/cluster.yaml' if not is_cluster_running() and bootstrap_cluster( path_to_cdf, True): raise RuntimeError('Failed to bootstrap the cluster') wait_for_cluster_start(url) shutdown_cluster() enable_hare_consul_agent()
def init(args): try: rc = 0 url = args.config[0] validator = Validator(ConfStoreProvider(url)) disable_hare_consul_agent() if validator.is_first_node_in_cluster(): path_to_cdf = args.file[0] if not is_cluster_running() and bootstrap_cluster( path_to_cdf, True): logging.error('Failed to bootstrap the cluster') rc = -1 if rc == 0: wait_for_cluster_start(url) shutdown_cluster() enable_hare_consul_agent() exit(rc) except Exception as error: shutdown_cluster() raise RuntimeError(f'Error while initializing cluster :key={error}')
def prepare(args): url = args.config[0] utils = Utils(ConfStoreProvider(url)) stop_event = Event() conf_dir = get_config_dir(url) log_dir = get_log_dir(url) _create_consul_namespace(conf_dir) consul_starter = _start_consul(utils, stop_event, conf_dir, log_dir, url) utils.save_config_path(url) utils.save_log_path() utils.save_node_facts() utils.save_drives_info() try: util: ConsulUtil = ConsulUtil() sess = util.get_leader_session_no_wait() util.destroy_session(sess) except Exception: logging.debug('No leader is elected yet') stop_consul_blocking(consul_starter)
def test(args): try: url = args.config[0] validator = Validator(ConfStoreProvider(url)) if validator.is_first_node_in_cluster(): if args.file: path_to_cdf = args.file[0] else: path_to_cdf = get_config_dir(url) + '/cluster.yaml' if not is_cluster_running() and bootstrap_cluster(path_to_cdf): raise RuntimeError("Failed to bootstrap the cluster") cluster_status = check_cluster_status(path_to_cdf) wait_for_cluster_start(url) if cluster_status: raise RuntimeError(f'Cluster status reports failure :' f' {cluster_status}') finally: shutdown_cluster()
def config(args): consul_starter = None try: url = args.config[0] utils = Utils(ConfStoreProvider(url)) stop_event = Event() conf_dir = get_config_dir(url) log_dir = get_log_dir(url) consul_starter = _start_consul(utils, stop_event, conf_dir, log_dir, url) if args.file: filename = args.file[0] else: filename = get_config_dir(url) + '/cluster.yaml' save(filename, generate_cdf(url)) update_hax_unit('/usr/lib/systemd/system/hare-hax.service') generate_config(url, filename) stop_consul_blocking(consul_starter) except Exception as error: if consul_starter: stop_consul_blocking(consul_starter) raise RuntimeError(f'Error performing configuration : {error}')
def test(args): try: rc = 0 url = args.config[0] validator = Validator(ConfStoreProvider(url)) if validator.is_first_node_in_cluster(): path_to_cdf = args.file[0] if not is_cluster_running() and bootstrap_cluster(path_to_cdf): logging.error('Failed to bootstrap the cluster') rc = -1 cluster_status = check_cluster_status(path_to_cdf) if rc == 0: wait_for_cluster_start(url) shutdown_cluster() if cluster_status: logging.error('Cluster status reports failure') rc = -1 exit(rc) except Exception as error: logging.error('Error while checking cluster status (%s)', error) shutdown_cluster() exit(-1)
def generate_cdf(url: str) -> str: generator = CdfGenerator(ConfStoreProvider(url)) return generator.generate()
def get_config_dir(url) -> str: provider = ConfStoreProvider(url) machine_id = provider.get_machine_id() config_path = provider.get('cortx>common>storage>local') return config_path + CONF_DIR_EXT + '/' + machine_id
def get_log_dir(url) -> str: provider = ConfStoreProvider(url) machine_id = provider.get_machine_id() log_path = provider.get('cortx>common>storage>log') return log_path + LOG_DIR_EXT + machine_id