def client_main(cluster_configuration): global manager connection_retry_interval = get_cluster_items( )['intervals']['client']['connection_retry'] # Internal socket internal_socket_thread = InternalSocketThread("c-internal", tag="[Client]") internal_socket_thread.start() # Loop while True: try: manager = ClientManager(cluster_config=cluster_configuration) internal_socket_thread.setmanager(manager, ClientInternalSocketHandler) asyncore.loop(timeout=1, use_poll=False, map=manager.handler.map, count=None) logger.error( "[{0}] Disconnected. Trying to connect again in {1}s.".format( manager_tag, connection_retry_interval)) manager.exit() except socket.gaierror as e: logger.error( "[Client] Could not connect to master: {}. Review if the master's hostname or IP is correct. Trying to connect again in {}s" .format(str(e), connection_retry_interval)) time.sleep(connection_retry_interval)
def __init__(self, input_json: Dict, logger: logging.Logger, node: c_common.Handler = None, debug: bool = False, pretty: bool = False): """ Class constructor :param input_json: JSON containing information/arguments about the request. :param logger: Logging logger to use :param node: Asyncio protocol object to use when sending requests to other nodes :param debug: Enable debug messages and raise exceptions. :param pretty: Return request result with pretty indent """ self.logger = logger self.input_json = input_json self.node = node if node is not None else local_client self.cluster_items = cluster.get_cluster_items( ) if node is None else node.cluster_items self.debug = debug self.pretty = pretty self.node_info = cluster.get_node() if node is None else node.get_node( ) self.request_id = str(random.randint(0, 2**10 - 1))
def distribute_function(input_json, pretty=False, debug=False): """ Distributes an API call. :param input_json: API call to execute. :param pretty: JSON pretty print. :param debug: whether to raise an exception or return an error. :return: a JSON response """ try: node_info = cluster.get_node() request_type = rq.functions[input_json['function']]['type'] is_dapi_enabled = cluster.get_cluster_items( )['distributed_api']['enabled'] logger.debug("[Cluster] [D API ] Distributed API is {}.".format( "enabled" if is_dapi_enabled else "disabled")) if 'wait_for_complete' not in input_json['arguments']: input_json['arguments']['wait_for_complete'] = False # First case: execute the request local. # If the distributed api is not enabled # If the cluster is disabled or the request type is local_any # if the request was made in the master node and the request type is local_master # if the request came forwarded from the master node and its type is distributed_master if not is_dapi_enabled or not cluster.check_cluster_status() or request_type == 'local_any' or\ (request_type == 'local_master' and node_info['type'] == 'master') or\ (request_type == 'distributed_master' and input_json['from_cluster']): del input_json['arguments'][ 'wait_for_complete'] # local requests don't use this parameter return execute_local_request(input_json, pretty, debug) # Second case: forward the request # Only the master node will forward a request, and it will only be forwarded if its type is distributed_master elif request_type == 'distributed_master' and node_info[ 'type'] == 'master': return forward_request(input_json, node_info['node'], pretty, debug) # Last case: execute the request remotely. # A request will only be executed remotely if it was made in a worker node and its type isn't local_any else: return execute_remote_request(input_json, pretty) except WazuhException as e: return print_json(data=e.message, error=e.code, pretty=pretty) except Exception as e: if debug: raise return print_json(data=str(e), error=1000, pretty=pretty)
def __init__(self, command: bytes, data: bytes, wait_for_complete: bool): super().__init__(configuration=cluster.read_config(), enable_ssl=False, performance_test=0, concurrency_test=0, file='', string=0, logger=logging.getLogger(), tag="Local Client", cluster_items=cluster.get_cluster_items()) self.request_result = None self.command = command self.data = data self.wait_for_complete = wait_for_complete self.protocol = None self.transport = None
def __init__(self, command: bytes, data: bytes, wait_for_complete: bool): """ Class constructor :param command: Command to send :param data: Payload to send :param wait_for_complete: Whether to enable timeout or not """ super().__init__(configuration=cluster.read_config(), enable_ssl=False, performance_test=0, concurrency_test=0, file='', string=0, logger=logging.getLogger(), tag="Local Client", cluster_items=cluster.get_cluster_items()) self.request_result = None self.command = command self.data = data self.wait_for_complete = wait_for_complete self.protocol = None self.transport = None
def _update_worker_files_in_master(self, json_file, zip_dir_path, worker_name, cluster_control_key, cluster_control_subkey, tag): def update_file(n_errors, name, data, file_time=None, content=None, agents=None): # Full path full_path = common.ossec_path + name error_updating_file = False # Cluster items information: write mode and umask w_mode = cluster_items[data['cluster_item_key']]['write_mode'] umask = cluster_items[data['cluster_item_key']]['umask'] if content is None: zip_path = "{}/{}".format(zip_dir_path, name) with open(zip_path, 'rb') as f: content = f.read() lock_full_path = "{}/queue/cluster/lockdir/{}.lock".format( common.ossec_path, os.path.basename(full_path)) lock_file = open(lock_full_path, 'a+') try: fcntl.lockf(lock_file, fcntl.LOCK_EX) _update_file(file_path=name, new_content=content, umask_int=umask, mtime=file_time, w_mode=w_mode, tmp_dir=tmp_path, whoami='master', agents=agents) except WazuhException as e: logger.debug2("{}: Warning updating file '{}': {}".format( tag, name, e)) error_tag = 'warnings' error_updating_file = True except Exception as e: logger.debug2("{}: Error updating file '{}': {}".format( tag, name, e)) error_tag = 'errors' error_updating_file = True if error_updating_file: n_errors[error_tag][data['cluster_item_key']] = 1 if not n_errors[error_tag].get(data['cluster_item_key']) \ else n_errors[error_tag][data['cluster_item_key']] + 1 fcntl.lockf(lock_file, fcntl.LOCK_UN) lock_file.close() return n_errors, error_updating_file # tmp path tmp_path = "/queue/cluster/{}/tmp_files".format(worker_name) cluster_items = get_cluster_items()['files'] n_merged_files = 0 n_errors = {'errors': {}, 'warnings': {}} # create temporary directory for lock files lock_directory = "{}/queue/cluster/lockdir".format(common.ossec_path) if not os.path.exists(lock_directory): mkdir_with_mode(lock_directory) try: agents = Agent.get_agents_overview(select={'fields': ['name']}, limit=None)['items'] agent_names = set(map(itemgetter('name'), agents)) agent_ids = set(map(itemgetter('id'), agents)) except Exception as e: logger.debug2("{}: Error getting agent ids and names: {}".format( tag, e)) agent_names, agent_ids = {}, {} before = time.time() try: for filename, data in json_file.items(): if data['merged']: for file_path, file_data, file_time in unmerge_agent_info( data['merge_type'], zip_dir_path, data['merge_name']): n_errors, error_updating_file = update_file( n_errors, file_path, data, file_time, file_data, (agent_names, agent_ids)) if not error_updating_file: n_merged_files += 1 if self.stopper.is_set(): break else: n_errors, _ = update_file(n_errors, filename, data) except Exception as e: logger.error("{}: Error updating worker files: '{}'.".format( tag, e)) raise e after = time.time() logger.debug( "{0}: Time updating worker files: {1:.2f}s. Total of updated worker files: {2}." .format(tag, after - before, n_merged_files)) if sum(n_errors['errors'].values()) > 0: logging.error("{}: Errors updating worker files: {}".format( tag, ' | '.join([ '{}: {}'.format(key, value) for key, value in n_errors['errors'].items() ]))) if sum(n_errors['warnings'].values()) > 0: for key, value in n_errors['warnings'].items(): if key == '/queue/agent-info/': logger.debug2( "Received {} agent statuses for non-existent agents. Skipping." .format(value)) elif key == '/queue/agent-groups/': logger.debug2( "Received {} group assignments for non-existent agents. Skipping." .format(value)) # Save info for healthcheck self.manager.set_worker_status(worker_id=self.name, key=cluster_control_key, subkey=cluster_control_subkey, status=n_merged_files)
def _update_master_files_in_worker(self, wrong_files, zip_path_dir, tag=None): def overwrite_or_create_files(filename, data, content=None): # Cluster items information: write mode and umask cluster_item_key = data['cluster_item_key'] w_mode = cluster_items[cluster_item_key]['write_mode'] umask = cluster_items[cluster_item_key]['umask'] if content is None: # Full path file_path = common.ossec_path + filename zip_path = "{}/{}".format(zip_path_dir, filename) # File content and time with open(zip_path, 'r') as f: file_data = f.read() else: file_data = content tmp_path='/queue/cluster/tmp_files' _update_file(file_path=filename, new_content=file_data, umask_int=umask, w_mode=w_mode, tmp_dir=tmp_path, whoami='worker') if not tag: tag = "[Worker] [Sync process]" cluster_items = get_cluster_items()['files'] before = time.time() error_shared_files = 0 if wrong_files['shared']: logger.debug("{0}: Received {1} wrong files to fix from master. Action: Overwrite files.".format(tag, len(wrong_files['shared']))) for file_to_overwrite, data in wrong_files['shared'].items(): try: logger.debug2("{0}: Overwrite file: '{1}'".format(tag, file_to_overwrite)) if data['merged']: for name, content, _ in unmerge_agent_info('agent-groups', zip_path_dir, file_to_overwrite): overwrite_or_create_files(name, data, content) if self.stopper.is_set(): break else: overwrite_or_create_files(file_to_overwrite, data) if self.stopper.is_set(): break except Exception as e: error_shared_files += 1 logger.debug2("{}: Error overwriting file '{}': {}".format(tag, file_to_overwrite, str(e))) continue error_missing_files = 0 if wrong_files['missing']: logger.debug("{0}: Received {1} missing files from master. Action: Create files.".format(tag, len(wrong_files['missing']))) for file_to_create, data in wrong_files['missing'].items(): try: logger.debug2("{0}: Create file: '{1}'".format(tag, file_to_create)) if data['merged']: for name, content, _ in unmerge_agent_info('agent-groups', zip_path_dir, file_to_create): overwrite_or_create_files(name, data, content) if self.stopper.is_set(): break else: overwrite_or_create_files(file_to_create, data) if self.stopper.is_set(): break except Exception as e: error_missing_files += 1 logger.debug2("{}: Error creating file '{}': {}".format(tag, file_to_create, str(e))) continue error_extra_files = 0 if wrong_files['extra']: logger.debug("{0}: Received {1} extra files from master. Action: Remove files.".format(tag, len(wrong_files['extra']))) for file_to_remove in wrong_files['extra']: try: logger.debug2("{0}: Remove file: '{1}'".format(tag, file_to_remove)) file_path = common.ossec_path + file_to_remove try: os.remove(file_path) except OSError as e: if e.errno == errno.ENOENT and '/queue/agent-groups/' in file_path: logger.debug2("{}: File {} doesn't exist.".format(tag, file_to_remove)) continue else: raise e except Exception as e: error_extra_files += 1 logger.debug2("{}: Error removing file '{}': {}".format(tag, file_to_remove, str(e))) continue if self.stopper.is_set(): break directories_to_check = {os.path.dirname(f): cluster_items[data\ ['cluster_item_key']]['remove_subdirs_if_empty'] for f, data in wrong_files['extra'].items()} for directory in map(itemgetter(0), filter(lambda x: x[1], directories_to_check.items())): try: full_path = common.ossec_path + directory dir_files = set(os.listdir(full_path)) if not dir_files or dir_files.issubset(set(cluster_items['excluded_files'])): shutil.rmtree(full_path) except Exception as e: error_extra_files += 1 logger.debug2("{}: Error removing directory '{}': {}".format(tag, directory, str(e))) continue if self.stopper.is_set(): break if error_extra_files or error_shared_files or error_missing_files: logger.error("{}: Found errors: {} overwriting, {} creating and {} removing".format(tag, error_shared_files, error_missing_files, error_extra_files)) after = time.time() logger.debug2("{}: Time updating integrity from master: {}s".format(tag, after - before)) return True
def _update_client_files_in_master(self, json_file, files_to_update_json, zip_dir_path, client_name, cluster_control_key, cluster_control_subkey, tag): def update_file(n_errors, name, data, file_time=None, content=None, agents=None): # Full path full_path = common.ossec_path + name # Cluster items information: write mode and umask w_mode = cluster_items[data['cluster_item_key']]['write_mode'] umask = int(cluster_items[data['cluster_item_key']]['umask'], base=0) if content is None: zip_path = "{}/{}".format(zip_dir_path, name) with open(zip_path, 'rb') as f: content = f.read() lock_full_path = "{}/queue/cluster/lockdir/{}.lock".format( common.ossec_path, os.path.basename(full_path)) lock_file = open(lock_full_path, 'a+') try: fcntl.lockf(lock_file, fcntl.LOCK_EX) _update_file(file_path=name, new_content=content, umask_int=umask, mtime=file_time, w_mode=w_mode, tmp_dir=tmp_path, whoami='master', agents=agents) except Exception as e: logger.debug2("{}: Error updating file '{}': {}".format( tag, name, e)) n_errors[data['cluster_item_key']] = 1 if not n_errors.get(data['cluster_item_key']) \ else n_errors[data['cluster_item_key']] + 1 fcntl.lockf(lock_file, fcntl.LOCK_UN) lock_file.close() return n_errors # tmp path tmp_path = "/queue/cluster/{}/tmp_files".format(client_name) cluster_items = get_cluster_items()['files'] n_agentsinfo = 0 n_agentgroups = 0 n_errors = {} # create temporary directory for lock files lock_directory = "{}/queue/cluster/lockdir".format(common.ossec_path) if not os.path.exists(lock_directory): mkdir_with_mode(lock_directory) try: agents = Agent.get_agents_overview(select={'fields': ['name']}, limit=None)['items'] agent_names = set(map(itemgetter('name'), agents)) agent_ids = set(map(itemgetter('id'), agents)) agents = None except Exception as e: logger.debug2("{}: Error getting agent ids and names: {}".format( tag, e)) agent_names, agent_ids = {}, {} before = time.time() try: for filename, data in json_file.items(): if data['merged']: for file_path, file_data, file_time in unmerge_agent_info( data['merge_type'], zip_dir_path, data['merge_name']): n_errors = update_file(n_errors, file_path, data, file_time, file_data, (agent_names, agent_ids)) if data['merge_type'] == 'agent-info': n_agentsinfo += 1 else: n_agentgroups += 1 if self.stopper.is_set(): break else: n_errors = update_file(n_errors, filename, data) except Exception as e: logger.error("{}: Error updating client files: '{}'.".format( tag, e)) raise e after = time.time() logger.debug( "{0}: Time updating client files: {1:.2f}s. Agents-info updated total: {2}. Agent-groups updated total: {3}." .format(tag, after - before, n_agentsinfo, n_agentgroups)) if sum(n_errors.values()) > 0: logging.error("{}: Errors updating client files: {}".format( tag, ' | '.join([ '{}: {}'.format(key, value) for key, value in n_errors.items() ]))) # Save info for healthcheck status_number = n_agentsinfo if cluster_control_key == 'last_sync_agentinfo' else n_agentgroups self.manager.set_client_status(client_id=self.name, key=cluster_control_key, subkey=cluster_control_subkey, status=status_number)
'wazuh_clusterd', 'debug', 2, 0) or args.debug_level except Exception: debug_mode = 0 # set correct permissions on cluster.log file if os.path.exists('{0}/logs/cluster.log'.format(common.ossec_path)): os.chown('{0}/logs/cluster.log'.format(common.ossec_path), common.ossec_uid, common.ossec_gid) os.chmod('{0}/logs/cluster.log'.format(common.ossec_path), 0o660) main_logger = set_logging(debug_mode) cluster_configuration = cluster.read_config(config_file=args.config_file) if cluster_configuration['disabled']: sys.exit(0) cluster_items = cluster.get_cluster_items() try: cluster.check_cluster_config(cluster_configuration) except Exception as e: main_logger.error(e) sys.exit(1) if args.test_config: sys.exit(0) # clean cluster.clean_up() # Foreground/Daemon if not args.foreground: pyDaemonModule.pyDaemon()