def _init_network_sockets(self): my_0mq_id = get_server_uuid("config") self.bind_id = my_0mq_id self.socket_dict = {} # get all ipv4 interfaces with their ip addresses, dict: interfacename -> IPv4 for key, sock_type, bind_port, target_func in [ ("router", zmq.ROUTER, global_config["COMMAND_PORT"], self._new_com), # @UndefinedVariable ]: client = self.zmq_context.socket(sock_type) client.setsockopt(zmq.IDENTITY, my_0mq_id) # @UndefinedVariable client.setsockopt(zmq.LINGER, 100) # @UndefinedVariable client.setsockopt(zmq.RCVHWM, 256) # @UndefinedVariable client.setsockopt(zmq.SNDHWM, 256) # @UndefinedVariable client.setsockopt(zmq.BACKLOG, 1) # @UndefinedVariable client.setsockopt(zmq.RECONNECT_IVL_MAX, 500) # @UndefinedVariable client.setsockopt(zmq.RECONNECT_IVL, 200) # @UndefinedVariable client.setsockopt(zmq.TCP_KEEPALIVE, 1) # @UndefinedVariable client.setsockopt(zmq.TCP_KEEPALIVE_IDLE, 300) # @UndefinedVariable conn_str = "tcp://*:%d" % (bind_port) try: client.bind(conn_str) except zmq.ZMQError: self.log( "error binding to {}{{{:d}}}: {}".format( conn_str, sock_type, process_tools.get_except_info()), logging_tools.LOG_LEVEL_CRITICAL) client.close() else: self.log("bind to port {}{{{:d}}}".format(conn_str, sock_type)) self.register_poller(client, zmq.POLLIN, target_func) # @UndefinedVariable self.socket_dict[key] = client
def get_send_data(self): _r_dict = { "master": True if not self.__slave_name else False, "pk": self.monitor_server.idx, "pure_uuid": self.monitor_server.uuid, "dir_offset": self.__dir_offset, # todo, FIXME "master_port": 8010, "master_uuid": routing.get_server_uuid( icswServiceEnum.monitor_server, self.monitor_server.uuid, ) } if self.__slave_name: _r_dict.update({ "name": self.__slave_name, "pk": self.monitor_server.pk, "master_ip": self.master_ip, "master_uuid": self.master_uuid, "slave_ip": self.slave_ip, "slave_uuid": self.slave_uuid, }) return _r_dict
def _run_bg_jobs(self, cur_bg, to_run): if to_run: self.log("commands to execute: {:d}".format(len(to_run))) cur_bg.num_servers = len(to_run) cur_bg.save() for _run_job, _send_xml, _srv_type in to_run: _run_job.save() # set BackGroundJobRunID _send_xml["bgjrid"] = "{:d}".format(_run_job.pk) # add to waiting list _is_local = _run_job.server_id == self.__server_idx and _srv_type == icswServiceEnum.cluster_server _conn_str = self.srv_routing.get_connection_string( _srv_type, _run_job.server_id) self.__waiting_ids.append(_run_job.pk) if not _conn_str: self.log( u"got empty connection_string for {} ({})".format( _srv_type, _send_xml["*command"], ), logging_tools.LOG_LEVEL_ERROR) # set result _send_xml.set_result( "empty connection string", server_command.SRV_REPLY_STATE_CRITICAL, ) self.bg_notify_handle_result(_send_xml) else: _srv_uuid = get_server_uuid(_srv_type, _run_job.server.uuid) self.log( u"command to {} on {} {} ({}, command {}, {})".format( _srv_type.name, unicode(_run_job.server), _conn_str, _srv_uuid, _send_xml["*command"], "local" if _is_local else "remote", )) _ok = self.bg_send_to_server( _conn_str, _srv_uuid, _send_xml, local=_is_local, ) if not _ok: _send_xml.set_result( "error sending to {}".format(_conn_str), server_command.SRV_REPLY_STATE_CRITICAL) self.bg_notify_handle_result(_send_xml) else: self.bg_notify_check_for_bgj_finish(cur_bg)
def _init_network_sockets(self): self.__connection_dict = {} self.__discovery_dict = {} self.bind_id = get_server_uuid("server") self.virtual_sockets = [] if self.__run_command: self.main_socket = None else: try: self.network_bind( service_type_enum=icswServiceEnum.cluster_server, need_all_binds=global_config["NEED_ALL_NETWORK_BINDS"], pollin=self._recv_command, bind_to_localhost=True, ) except: self.log("error while bind: {}".format(process_tools.get_except_info()), logging_tools.LOG_LEVEL_CRITICAL) self["exit_requested"] = True
def _generate_config_step2(self, cur_c, b_dev, act_prod_net, boot_netdev, dev_sc): self.router_obj.check_for_update() running_ip = [ ip.ip for ip in dev_sc.identifier_ip_lut["p"] if dev_sc.ip_netdevice_lut[ip.ip].pk == boot_netdev.pk ][0] cur_c.log( "IP in production network '{}' is {}, network_postfix is '{}'". format(act_prod_net.identifier, running_ip, act_prod_net.postfix)) # multiple configs multiple_configs = [icswServiceEnum.cluster_server] all_servers = config_tools.icswDeviceWithConfig(service_type_enum=None) def_servers = all_servers.get(icswServiceEnum.cluster_server, []) # def_servers = [] if not def_servers: cur_c.log("no Servers found", logging_tools.LOG_LEVEL_ERROR, state="done") else: srv_names = sorted([ "{}{}".format(cur_srv.short_host_name, act_prod_net.postfix) for cur_srv in def_servers ]) cur_c.log("{} found: {}".format( logging_tools.get_plural("server", len(def_servers)), ", ".join(srv_names))) # store in act_prod_net conf_dict = {} conf_dict["servers"] = srv_names # custom Enum cannot be compared against each other for srv_type in all_servers.keys(): if srv_type not in multiple_configs: routing_info, act_server, routes_found = ([66666666], None, 0) for actual_server in all_servers[srv_type]: act_routing_info = actual_server.get_route_to_other_device( self.router_obj, dev_sc, filter_ip=running_ip, allow_route_to_other_networks=True) if act_routing_info: routes_found += 1 # store in some dict-like structure # print "***", actual_server.short_host_name, dir(actual_server) # FIXME, postfix not handled conf_dict["{}:{}".format( actual_server.short_host_name, srv_type.name )] = actual_server.device.full_name conf_dict["{}:{}_ip".format( actual_server.short_host_name, srv_type.name)] = act_routing_info[0][2][1][0] if srv_type in [ icswServiceEnum.config_server, icswServiceEnum.mother_server ] and actual_server.device.pk == b_dev.bootserver_id: routing_info, act_server = ( act_routing_info[0], actual_server) else: if act_routing_info[0][0] < routing_info[0]: routing_info, act_server = ( act_routing_info[0], actual_server) else: cur_c.log( "empty routing info for {} to {}".format( srv_type.name, actual_server.device.name, ), logging_tools.LOG_LEVEL_WARN) if act_server: server_ip = routing_info[2][1][0] # map from server_ip to localized name try: conf_dict[srv_type.name] = net_ip.objects.get( Q(ip=server_ip)).full_name except net_ip.MultipleObjectsReturned: cur_c.log( "more than one net_ip found for server_type {} (IP {})" .format( srv_type.name, server_ip, ), logging_tools.LOG_LEVEL_ERROR) raise conf_dict["{}_ip".format(srv_type.name)] = server_ip try: conf_dict["{}_uuid".format( srv_type.name)] = get_server_uuid( srv_type, act_server.device.uuid) except KeyError: self.log( " ... encountered slave config {}".format( srv_type.name), logging_tools.LOG_LEVEL_WARN) cur_c.log(" {:<20s}: {:<25s} (IP {:15s}){}".format( srv_type.name, conf_dict[srv_type.name], server_ip, " (best of {} found)".format( logging_tools.get_plural( "route", routes_found)) if routes_found > 1 else "")) else: cur_c.log(" {:20s}: not found".format(srv_type.name)) new_img = b_dev.new_image if new_img: conf_dict["system"] = { "vendor": new_img.sys_vendor, "version": new_img.sys_version, "release": new_img.sys_release, } else: self.log("no image defined, using defaults") conf_dict["system"] = { "vendor": "suse", "version": 13, "release": 1, } conf_dict["device"] = b_dev conf_dict["net"] = act_prod_net conf_dict["host"] = b_dev.name conf_dict["hostfq"] = b_dev.full_name conf_dict["device_idx"] = b_dev.pk # image is missing, FIXME # # dc.execute("SELECT * FROM image WHERE image_idx=%s", (self["new_image"])) # # if dc.rowcount: # # act_prod_net["image"] = dc.fetchone() # # else: # # act_prod_net["image"] = {} config_pks = list( config.objects.filter( Q(device_config__device=b_dev) | (Q(device_config__device__device_group=b_dev. device_group_id) & Q(device_config__device__is_meta_device=True))). order_by("-priority", "name").distinct().values_list("pk", flat=True)) pseudo_config_list = config.objects.all().prefetch_related( "config_str_set", "config_int_set", "config_bool_set", "config_blob_set", "config_script_set").order_by("-priority", "name") config_dict = {cur_pc.pk: cur_pc for cur_pc in pseudo_config_list} # copy variables for p_config in pseudo_config_list: for var_type in ["str", "int", "bool", "blob"]: for cur_var in getattr( p_config, "config_{}_set".format(var_type)).all(): conf_dict["{}.{}".format(p_config.name, cur_var.name)] = cur_var.value for _cur_conf in pseudo_config_list: # cur_conf.show_variables(cur_c.log, detail=global_config["DEBUG"]) pass cur_c.log("{} found: {}".format( logging_tools.get_plural("config", len(config_pks)), ", ".join([config_dict[pk].name for pk in config_pks]) if config_pks else "no configs", )) # node interfaces conf_dict["node_if"] = [] taken_list, not_taken_list = ([], []) for cur_net in b_dev.netdevice_set.exclude( Q(enabled=False)).prefetch_related( "net_ip_set", "net_ip_set__network", "net_ip_set__network__network_type", "net_ip_set__domain_tree_node"): for cur_ip in cur_net.net_ip_set.all(): # if cur_ip.network_id if cur_ip.network_id in act_prod_net.idx_list: take_it, cause = (True, "network_index in list") elif cur_ip.network.network_type.identifier == "l": take_it, cause = (True, "network_type is loopback") else: if cur_ip.domain_tree_node and cur_ip.domain_tree_node.always_create_ip: take_it, cause = ( True, "network_index not in list but always_create_ip set" ) else: take_it, cause = ( False, "network_index not in list and always_create_ip not set" ) if take_it: conf_dict["node_if"].append(cur_ip) taken_list.append((cur_ip, cause)) else: not_taken_list.append((cur_ip, cause)) cur_c.log("{} in taken_list".format( logging_tools.get_plural("Netdevice", len(taken_list)))) for entry, cause in taken_list: cur_c.log( " - {:<6s} (IP {:<15s}, network {:<20s}) : {}".format( entry.netdevice.devname, entry.ip, str(entry.network), cause)) cur_c.log("{} in not_taken_list".format( logging_tools.get_plural("Netdevice", len(not_taken_list)))) for entry, cause in not_taken_list: cur_c.log( " - {:<6s} (IP {:<15s}, network {:<20s}) : {}".format( entry.netdevice.devname, entry.ip, str(entry.network), cause)) if cur_c.command == "get_config_vars": cur_c.var_tuple_list = self._generate_vtl(conf_dict) cur_c.add_set_keys("var_tuple_list") cur_c.log("vars created", state="done") elif cur_c.command == "build_config": # create config # dict: which config was called (sucessfully) conf_dict["called"] = {} cur_c.conf_dict, cur_c.link_dict, cur_c.erase_dict = ({}, {}, {}) # cur_c.conf_dict[config_obj.dest] = config_obj new_tree = GeneratedTree() cur_bc = BuildContainer(cur_c, config_dict, conf_dict, new_tree, self.router_obj) for pk in config_pks: cur_bc.process_scripts(pk) new_tree.write_node_config(cur_c, cur_bc) if False in conf_dict["called"]: cur_c.log("error in scripts for {}: {}".format( logging_tools.get_plural( "config", len(conf_dict["called"][False])), ", ".join( sorted([ str(config_dict[pk]) for pk, err_lines in conf_dict["called"][False] ]))), logging_tools.LOG_LEVEL_ERROR, state="done") cur_c.add_set_keys("error_dict") cur_c.error_dict = { str(config_dict[pk]): err_lines for pk, err_lines in conf_dict["called"][False] } else: cur_c.log("config built", state="done") cur_bc.close() else: cur_c.log("unknown action '{}'".format(cur_c.command), logging_tools.LOG_LEVEL_ERROR, state="done")
def __init__(self, proc, monitor_server, **kwargs): """ holds information about remote monitoring satellites """ self.__process = proc self.__slave_name = kwargs.get("slave_name", None) self.__main_dir = global_config["MD_BASEDIR"] self.distributed = kwargs.get("distributed", False) self.master = True if not self.__slave_name else False if self.__slave_name: self.__dir_offset = os.path.join("slaves", self.__slave_name) master_cfg = config_tools.device_with_config( service_type_enum=icswServiceEnum.monitor_server) self.master_uuid = routing.get_server_uuid( icswServiceEnum.monitor_slave, master_cfg[icswServiceEnum.monitor_server] [0].effective_device.uuid, ) slave_cfg = config_tools.server_check( host_name=monitor_server.full_name, service_type_enum=icswServiceEnum.monitor_slave, fetch_network_info=True) self.slave_uuid = routing.get_server_uuid( icswServiceEnum.monitor_slave, monitor_server.uuid, ) route = master_cfg[ icswServiceEnum.monitor_server][0].get_route_to_other_device( self.__process.router_obj, slave_cfg, allow_route_to_other_networks=True, global_sort_results=True, ) if not route: self.slave_ip = None self.master_ip = None self.log( "no route to slave {} found".format( unicode(monitor_server)), logging_tools.LOG_LEVEL_ERROR) else: self.slave_ip = route[0][3][1][0] self.master_ip = route[0][2][1][0] self.log("IP-address of slave {} is {} (master ip: {})".format( unicode(monitor_server), self.slave_ip, self.master_ip)) # target config version directory for distribute self.__tcv_dict = {} else: # hm, for send_* commands self.slave_uuid = "" self.__dir_offset = "master" self.monitor_server = monitor_server self.__dict = {} self._create_directories() # flags # config state, one of # u .... unknown # b .... building # d .... done self.config_state = "u" # version of config build self.config_version_build = 0 # version of config in send state self.config_version_send = 0 # version of config installed self.config_version_installed = 0 # start of send self.send_time = 0 # lut: send_time -> config_version_send self.send_time_lut = {} # lut: config_version_send -> number transmitted self.num_send = {} # distribution state self.dist_ok = True # flag for reload after sync self.reload_after_sync_flag = False # relayer info (== icsw software version) # clear md_struct self.__md_struct = None # raw info self.__raw_info = { "version": { "relayer_version": "?.?-0", "mon_version": "?.?-0", "livestatus_version": "?.?", }, # system falgs "sysinfo": {}, "name": self.__slave_name, "master": self.master or "", "latest_contact": 0, } # try to get relayer / mon_version from latest build if self.master: _latest_build = mon_dist_master.objects.filter( Q(device=self.monitor_server)).order_by("-pk") else: _latest_build = mon_dist_slave.objects.filter( Q(device=self.monitor_server)).order_by("-pk") if len(_latest_build): _latest_build = _latest_build[0] for _attr in [ "mon_version", "relayer_version", "livestatus_version" ]: self.__raw_info["version"][_attr] = getattr( _latest_build, _attr) self.log("recovered {} from DB".format(self.vers_info))
def _call(self, cur_inst): zmq_id = routing.get_server_uuid("server") cur_inst.srv_com["zmq_id"] = zmq_id cur_inst.srv_com.set_result( "0MQ_ID is {}".format(zmq_id), )
def _check_for_slaves(self, **kwargs): master_server = device.objects.get(Q(pk=global_config["SERVER_IDX"])) slave_servers = device.objects.exclude( # exclude master server Q(pk=master_server.idx)).filter( Q(device_config__config__config_service_enum__enum_name= icswServiceEnum.monitor_slave.name)).select_related( "domain_tree_node") # slave configs self.__master_config = SyncConfig( self, master_server, distributed=True if len(slave_servers) else False) self.__slave_configs, self.__slave_lut = ({}, {}) # create lut entry for master config self.__slave_configs[master_server.pk] = self.__master_config self.__slave_lut[master_server.full_name] = master_server.pk self.__slave_lut[master_server.uuid] = master_server.pk # connect to local relayer self.__primary_slave_uuid = routing.get_server_uuid( icswServiceEnum.monitor_slave, master_server.uuid) self.send_pool_message("set_sync_master_uuid", self.__primary_slave_uuid) self.log(" master {} (IP {}, {})".format(master_server.full_name, "127.0.0.1", self.__primary_slave_uuid)) self.send_pool_message("register_remote", "127.0.0.1", self.__primary_slave_uuid, icswServiceEnum.monitor_slave.name) _send_data = [self.__master_config.get_send_data()] if len(slave_servers): self.log("found {}: {}".format( logging_tools.get_plural("slave_server", len(slave_servers)), ", ".join( sorted([cur_dev.full_name for cur_dev in slave_servers])))) for cur_dev in slave_servers: _slave_c = SyncConfig( self, cur_dev, slave_name=cur_dev.full_name, master_server=master_server, ) self.__slave_configs[cur_dev.pk] = _slave_c self.__slave_lut[cur_dev.full_name] = cur_dev.pk self.__slave_lut[cur_dev.uuid] = cur_dev.pk self.log(" slave {} (IP {}, {})".format( _slave_c.monitor_server.full_name, _slave_c.slave_ip, _slave_c.monitor_server.uuid)) _send_data.append(_slave_c.get_send_data()) # if _slave_c.slave_ip: # self.send_pool_message("register_slave", _slave_c.slave_ip, _slave_c.monitor_server.uuid) # else: # self.log("slave has an invalid IP", logging_tools.LOG_LEVEL_CRITICAL) else: self.log("no slave-servers found") # send distribution info to local syncer distr_info = server_command.srv_command( command="distribute_info", info=server_command.compress(_send_data, marshal=True), ) self.send_pool_message("distribution_info", _send_data) self.send_sync_command(distr_info)
def network_bind(self, **kwargs): _need_all_binds = kwargs.get("need_all_binds", False) pollin = kwargs.get("pollin", None) ext_call = kwargs.get("ext_call", False) immediate = kwargs.get("immediate", True) if "server_type" in kwargs: _inst = InstanceXML(log_com=self.log) _srv_type = kwargs["server_type"] bind_port = _inst.get_port_dict(_srv_type, ptype="command") elif "service_type_enum" in kwargs: _inst = InstanceXML(log_com=self.log) _srv_type = kwargs["service_type_enum"] bind_port = _inst.get_port_dict(_srv_type, ptype="command") elif "bind_port" in kwargs: bind_port = kwargs["bind_port"] else: raise KeyError("neither bind_port, service_type_enum nor server_type defined in kwargs") main_socket_name = kwargs.get("main_socket_name", "main_socket") virtual_sockets_name = kwargs.get("virtual_sockets_name", "virtual_sockets") bind_to_localhost = kwargs.get("bind_to_localhost", False) _sock_type = kwargs.get("socket_type", "ROUTER") if "client_type" in kwargs: uuid = uuid_tools.get_uuid().get_urn() if not uuid.startswith("urn"): uuid = "urn:uuid:{}".format(uuid) self.bind_id = "{}:{}:".format( uuid, InstanceXML(quiet=True).get_uuid_postfix(kwargs["client_type"]), ) dev_r = None else: from initat.tools import cluster_location from initat.cluster.backbone.routing import get_server_uuid self.bind_id = get_server_uuid(_srv_type) if kwargs.get("simple_server_bind", False): dev_r = None else: # device recognition dev_r = cluster_location.DeviceRecognition() # virtual sockets if hasattr(self, virtual_sockets_name): _virtual_sockets = getattr(self, virtual_sockets_name) else: _virtual_sockets = [] # main socket _main_socket = None # create bind list if dev_r and dev_r.device_dict: _bind_ips = set( list(dev_r.local_ips) + sum( [ _list for _dev, _list in dev_r.ip_r_lut.iteritems() ], [] ) ) # complex bind master_bind_list = [ ( True, [ "tcp://{}:{:d}".format(_local_ip, bind_port) for _local_ip in dev_r.local_ips ], self.bind_id, None, ) ] _virt_list = [] for _dev, _ip_list in dev_r.ip_r_lut.iteritems(): if _dev.pk != dev_r.device.pk: _virt_list.append( ( False, [ "tcp://{}:{:d}".format(_virtual_ip, bind_port) for _virtual_ip in _ip_list ], # ignore local device get_server_uuid(_srv_type, _dev.uuid), _dev, ) ) else: self.log( "ignoring virtual IP list ({}) (same device)".format( ", ".join(sorted(_ip_list)), ) ) master_bind_list.extend(_virt_list) # we have to bind to localhost but localhost is not present in bind_list, add master_bind if bind_to_localhost and not any([_ip.startswith("127.") for _ip in _bind_ips]): self.log( "bind_to_localhost is set but not IP in range 127.0.0.0/8 found in list, adding virtual_bind", logging_tools.LOG_LEVEL_WARN ) master_bind_list.append( ( False, [ "tcp://127.0.0.1:{:d}".format(bind_port) ], self.bind_id, None, ) ) else: # simple bind master_bind_list = [ ( True, [ "tcp://*:{:d}".format(bind_port) ], self.bind_id, None, ) ] _errors = [] # pprint.pprint(master_bind_list) bound_list = set() for master_bind, bind_list, bind_id, bind_dev in master_bind_list: client = process_tools.get_socket( self.zmq_context, _sock_type, identity=bind_id, immediate=immediate ) for _bind_str in bind_list: if _bind_str in bound_list: self.log( "bind_str '{}' (for {}) already used, skipping ...".format( _bind_str, " device '{}'".format(bind_dev) if bind_dev is not None else " master device", ), logging_tools.LOG_LEVEL_ERROR ) else: bound_list.add(_bind_str) try: client.bind(_bind_str) except zmq.ZMQError: self.log( "error binding to {}: {}".format( _bind_str, process_tools.get_except_info(), ), logging_tools.LOG_LEVEL_CRITICAL ) _errors.append(_bind_str) else: self.log("bound {} to {} with id {}".format(_sock_type, _bind_str, bind_id)) if pollin: self.register_poller(client, zmq.POLLIN, pollin, ext_call=ext_call, bind_id=bind_id) if master_bind: _main_socket = client else: _virtual_sockets.append(client) setattr(self, main_socket_name, _main_socket) setattr(self, virtual_sockets_name, _virtual_sockets) if _errors and _need_all_binds: raise ValueError("{} went wrong: {}".format(logging_tools.get_plural("bind", len(_errors)), ", ".join(_errors)))