def zfm_load_configuration(config_file, zfm_config): # # Open the file for reading. # try: f = open(config_file) except FileNotFound: Log.error('no such file: {}', config_file) return False # # Read the file line by line. Strip out the comments and then split the line into key and value. # errors = False for line in f: kv_line, _, _ = line.partition('#') kv_line = kv_line.strip() if len(kv_line) == 0: continue tokens = kv_line.split() if len(tokens) == 2: key, value = tokens zfm_config[key] = value else: Log.error('invalid line: {}', line.rstrip()) errors = True return not errors
def check(self): # # Get the port metrics. # status, attr = self.get() if not status: Log.error('can\'t fetch port metrics for sweep of remote node') return status # # First time through. Nothing to compare. # if not self.data: self.data = copy.deepcopy(attr) return True # # Check the error counters and flag those that are increasing. # for s in attr['Gen-Z']: v_curr = attr['Gen-Z'][s] v_last = self.data['Gen-Z'][s] if v_curr != v_last: Log.error('{:<20} port {:<2} : {:<25} : {} -> {}', self.node.name, self.port.index, s, v_last, v_curr) self.data = copy.deepcopy(attr) return status
def validate(self): if not self.query(): Log.error( '{}[{}].validate : can\'t fetch attributes - downing port', self.node.name, self.index) self.active = False return False # # Get the node's view of its remote neighbor # attr_uid = self.current['Oem']['Hpe']['RemoteComponentID']['UID'] attr_port = self.current['Oem']['Hpe']['RemoteComponentID']['Port'] # # Compare it to our view. # status = (attr_uid == self.remote_uid) and (attr_port == self.remote_port) if not status: Log.error('{}[{}] didn\'t validate: 0x{:X}:{:<2} vs 0x{:X}:{:<2}', self.node.name, self.index, self.remote_uid, self.remote_port, attr_uid, attr_port) self.active = False return False return True
def sweep(self): # # Read the port metrics. # status = self.metrics.check() if not status: Log.error('{}[{}].sweep : can\'t read metrics - downing port', self.node.name, self.index) self.active = False return status # # Check the port state. If the link state is good and the interface state is bad, then it is # possible that someone is resetting some metrics. (You do this by resetting the interface.) # So in this case, we sleep for two seconds to allow the interface to be good again. # link_state, if_state = self.link_interface_state() if (link_state == 'Enabled') and (if_state == 'Disabled'): time.sleep(2) link_state, if_state = self.link_interface_state() status = (link_state == 'Enabled') and (if_state == 'Enabled') if not status: Log.error( '{}[{}].sweep : link/interface in bad state - downing port', self.node.name, self.index) self.active = False return status
def is_enabled(self): # # # Make sure that this port is enabled. # if not self.query(): Log.error('{}[{}].enable : can\'t read status', self.node.name, self.index) self.active = False return False state, health = self.current['Status']['State'], self.current[ 'Status']['Health'] link_state = self.current['LinkState'] if_state = self.current['InterfaceState'] if health != 'OK': Log.error('{}[{}].enable : port health {} is bad', self.node.name, self.index, health) port.active = False return False if state != 'Enabled': Log.debug('{}[{}].enable : state not yet ready {}/{}', self.node.name, self.index, state, health) return False if if_state != 'Enabled': Log.debug('{}[{}].enable : interface not yet enabled {}', self.node.name, self.index, if_state) return False return True
def query(self): # # Get the port attribute. Update the current view. # status, attr = self.node.get(self.name) if status: self.current = attr else: Log.error('failed to get {}/{} attributes', self.node.name, self.index) return status
def load(self): status = True if self.lprt: status &= self.lprt.patch() if self.mprt: status &= self.mprt.patch() if self.vcat: status &= self.vcat.patch() if self.metrics: status &= self.metrics.reset() if not status: Log.error('{}[{}].load : can\'t load attributes - downing port', self.node.name, self.index) self.active = False return status
def reply(self, status, headers=None, data=None): try: self.send_response(status) if headers: for key,value in headers.items(): self.send_header(key, value) self.end_headers() if data: encoded_data = data.encode() self.wfile.write(encoded_data) except: Log.error('can\'t reply to requester')
def initialize(self): # # Validate the sweep type. # if self.sweep_type not in Fabric.sweep_types: Log.error('invalid sweep type {}', self.sweep_type) return False # # Read the node profiles. # self.nodes = {} for filename in self.node_file_list: Log.info('reading {}', filename) try: with open(filename) as f: node_profiles = json.load(f) for name, profile in node_profiles.items(): node_type = profile['type'] self.nodes[name] = Fabric.name_classes[node_type]( name, profile) except: Log.error('error reading {}', filename) return False # # There are 5 steps for node initialization: # 1) load the GCIDs and UID into the endpoints # 2) train the port links # 3) validate that all ports are wired correctly # 4) load the node attributes # 5) enable the port interfaces # status = True if status: status = self.init_ids() if status: status = self.train_ports() if status: status = self.validate_ports() if status: status = self.load_nodes() if status: status = self.enable_ports() # # Verify the state of all connected ports. # if status: status = self.verify_fabric_health() Log.debug('fabric initialization status = {}', status) return status
def wait_for(self, command, args, kwargs): Log.info('starting {}...', command) kwargs['retries'] = self.timers.get(command, self.timers['default']) verify_name = '{}_done'.format(command) # # Tell the node threads to start the command. # active_nodes = [node for node in self.nodes.values() if node.active] for node in active_nodes: node.enqueue(command, args, kwargs) # # Wait for all of the node threads to complete. # cycles = 0 node_statuses = [-1] while (self.wait_status(node_statuses) < 0) and (cycles < self.timers[command]): time.sleep(1) cycles += 1 node_statuses = [ getattr(node, verify_name)() for node in active_nodes ] if (cycles % 60) == 0: self.wait_display(command, cycles, node_statuses) # # Check for node timeouts. # if self.wait_status(node_statuses) < 0: self.wait_display(command, cycles, node_statuses) for i in range(len(node_statuses)): if node_statuses[i] == -1: Log.error('{} : {} timed out', command, active_nodes[i].name) # # Display the command status. # command_status = self.wait_status(node_statuses) == 1 Log.info('{} done ... status = {} cycles = {}', command, command_status, cycles) return command_status
def GET_port(self, parameters): node = self.node # # Fetch the port attribute. # metric_attr = self.query_all() if not metric_attr: Log.error('can\'t retrieve port attribute for {}', self.name) return 404, None # # We now have the port attribute. # oem_data = self.current['Oem']['Hpe'] data = { 'DataType': 'PORT', 'Timestamp': datetime.datetime.now().isoformat(), 'Node': node.profile['name'], 'Hostname': node.name, 'Index': self.index, 'ConfigState': 'Enabled' if self.active else 'Disabled', 'Status': '{}/{}'.format(self.current['Status']['State'], self.current['Status']['Health']), 'LinkState': self.current['LinkState'], 'InterfaceState': self.current['InterfaceState'], 'Remote': '0x{:08X}/{:<2}'.format(oem_data['RemoteComponentID']['UID'], oem_data['RemoteComponentID']['Port']), 'Metrics': self.current_metrics } return 200, data
def locate_node(self, node_id): node_hex_id = -1 if type(node_id) is int: node_hex_id = node_id elif type(node_id) is str and node_id.isdigit(): node_hex_id = int(node_id, 0) for name, node in self.nodes.items(): if name == node_id: return node elif node.topoid == node_id: return node elif node.geoid == node_id: return node elif node.uid == node_hex_id: return node Log.error('invalid node identifier {}', node_id) return None
def get(node, attribute): # # Send the REST command to the server. # headers = { "Accept": "application/json", "Content-Type": "application/json" } url = 'http://{address}{attribute}'.format(address=node.address, attribute=attribute) status, reply = Rest._rest_function(requests.get, url, headers=headers) if status != 200: Log.error('Rest(GET): {} failed with status {}', url,status) return False, None # # Good reply - need to convert from string -> data # if reply.startswith('<pre>') : reply = reply[5:] if reply.endswith('</pre>') : reply = reply[:-6] try: status,data = True,json.loads(reply) except: Log.error('Rest.get() : invalid JSON returned.') Log.error('Data={}', reply) status,data = False,None return status, data
def __init__(self, hostname, fabric): # # Resolve the hostname. # hostname, delimiter, hostport = hostname.partition(':') if not hostport: hostport = '60000' try: hostname = socket.gethostbyname(hostname) except: Log.error('can\'t resolve the ZFM server address {}', hostname) sys.exit(0) # # Setup the server environment. # self.address = hostname self.port = int(hostport) self.server = None try: self.server = HTTPServer((self.address, self.port), GenZHandler) except: output = subprocess.check_output('lsof -i:{}'.format(self.port), shell=True) Log.error('can\'t create HTTP server') Log.error(output.decode('utf-8')) sys.exit(0) self.server.formatter = ZFMFormatter() self.server.node_address = hostname self.server.redfish_base = os.path.join('redfish', 'v1') self.server.fabric = fabric
def enable(self): # # # Get the current port state. # if not self.query(): Log.error('{}[{}].enable : can\'t read status', self.node.name, self.index) self.active = False return False state, health = self.current['Status']['State'], self.current[ 'Status']['Health'] link_state = self.current['LinkState'] if_state = self.current['InterfaceState'] if health != 'OK': Log.error('{}[{}].enable : port health {} is bad', self.node.name, self.index, health) port.active = False return False # # Verify that the port is in the correct state for enabling the interface. # if state == 'Enabled': Log.debug('{}[{}].enable : already enabled {}/{}', self.node.name, self.index, state, health) return True if state != 'StandbyOffline': Log.debug('{}[{}].enable : state not ready {}/{}', self.node.name, self.index, state, health) return False if link_state != 'Enabled': Log.debug('{}[{}].enable : link state not ready {}', self.node.name, self.index, link_state) return False if if_state != 'Disabled': Log.debug('{}[{}].enable : interface state not ready {}', self.node.name, self.index, if_state) return False # # If we are in StandbyOffline mode, transition to Enabled. # values = {'InterfaceState': 'Enabled'} status, _ = self.node.patch(self.name, values) if not status: Log.error('{}[{}].enable : can\'t enable interface - downing port', self.node.name, self.index) self.active = False Log.debug('{}[{}].enable : status={}', self.node.name, self.index, status) return status
def verify_fabric_health(self): error_count = 0 for name, node in self.nodes.items(): if not node.active: continue for port in node.ports: if port.active: remote_node = self.locate_node(port.remote_uid) if not remote_node: Log.error('failed node connectivity test : {}/{}', name, port.index) error_count += 1 continue remote_port = remote_node.ports[port.remote_port] if not remote_port: Log.error('failed port connectivity test : {}/{}', name, port.index) error_count += 1 continue if remote_port.current['Status']['Health'] != 'OK': Log.error('pair not healthy : {}/{} <-> {}/{}', name, port.index, remote_node.name, remote_port.index) error_count += 1 continue if remote_port.current['InterfaceState'] != 'Enabled': Log.error('pair not enabled : {}/{} <-> {}/{}', name, port.index, remote_node.name, remote_port.index) error_count += 1 continue if error_count > 0: Log.error('can\'t verify remote port state') return True if error_count == 0 else False
def format(self, data, consumer): data_type = data.get('DataType', None) if consumer not in self.delimiters: Log.error('invalid consumer ({}) specified for formatter', consumer) return None elif not data_type: Log.error('data does not contain the "DataType" key') return None elif data_type not in self.funcs: Log.error('invalid type ({}) specified for formatter', data['DataType']) return None # # We have a proper call, execute the formatter. # return self.funcs[data_type][consumer](data, consumer, self.delimiters[consumer])
def is_trained(self): # # # Make sure that this port is enabled. # if not self.query(): Log.error('{}[{}].train : can\'t read status', self.node.name, self.index) self.active = False return False state, health = self.current['Status']['State'], self.current[ 'Status']['Health'] # # Check the port health. # if health != 'OK': Log.error('{}[{}].train : port health {} is bad', self.node.name, self.index, health) self.active = False return False # # Check the state. # if state == 'Disabled': return False elif state == 'Starting': return False elif state == 'StandbyOffline': return True elif state == 'Enabled': return True else: Log.error('{}[{}].train : port state {} is bad', self.node.name, self.index, state) self.active = False return False
def patch(node, attribute, value): # # Convert the input to JSON format. # try: data = json.dumps(value) except: Log.error('Rest.(PATCH) : could not convert input to JSON.') Log.error('Input={}', value) return False,None # # Send the REST command to the server. # headers = { "Accept": "application/json", "Content-Type": "application/json" } url = 'http://{address}{attribute}'.format(address=node.address, attribute=attribute) status, _ = Rest._rest_function(requests.patch, url, headers=headers, data=data) if status != 204: Log.error('Rest(PATCH): {} failed with status {}', url,status) return status == 204, None
def enable_ports(self): status = self.wait_for('enable', [], {}) if not status: Log.error('ports not enabled') return status
def init_ids(self): status = self.wait_for('init', [], {}) if not status: Log.error('nodes not inited') return status
def do_GET(self): if 'favicon.ico' in self.path: return 404, None Log.info('GET {}:{}', self.server.node_address, self.path) parsed_url = parse.urlsplit(self.path) tokens = parsed_url.path.split('/') query = parsed_url.query parameters = parse.parse_qs(query) if 'format' not in parameters: parameters['format'] = ['BROWSER'] # # Remove empty strings from the parsed URL. # while (len(tokens) > 0) and (tokens[0] == ''): del tokens[0] while (len(tokens) > 0) and (tokens[-1] == ''): del tokens[-1] if (len(tokens) > 0) and (tokens[-1] == 'favicon.ico'): return 404, None # # Valid requests are: # 1) [] -> fabric request # 2) [name] -> node request # 3) [name,port] -> port request # status,data = 404,None fabric = self.server.fabric node = None port = None # # Validate that the parameters are in range and correct. # if len(tokens) > 2: Log.error('too many fields in URL {}', self.path) return 404, None if len(tokens) == 2 and not tokens[1].isdigit(): Log.error('invalid URL (port incorrect) {}', self.path) return 404, None if len(tokens) >= 1: node = self.server.fabric.locate_node(tokens[0]) if not node: Log.error('invalid URL (node incorrect) {}', self.path) return 404, None if len(tokens) == 2: value = int(tokens[1]) if not (node.profile['portStart'] <= value < node.profile['portEnd']): Log.error('invalid URL (port out of range) {}', self.path) return 404, None port = node.ports[value] # # Execute the command at the appropriate level. # if len(tokens) == 0: status, data = fabric.GET(parameters) elif len(tokens) == 1: status, data = node.GET(parameters) elif len(tokens) == 2: status, data = port.GET(parameters) if status == 200: data['ZFM'] = self.server.node_address output = self.server.formatter.format(data, parameters['format'][0]) # # Send the reply back to the requester. # headers = {'Content-type' : 'text/html', 'Cache-Control' : 'no-cache, no-store, must-revalidate', 'Pragma' : 'no-cache', 'Expires' : '0' } payload = output if status == 200 else None self.reply(status, headers, payload)
def train_ports(self): status = self.wait_for('train', [], {}) if not status: Log.error('ports not trained') return status
def validate_ports(self): status = self.wait_for('validate', [], {}) if not status: Log.error('ports not validated') return status
help='sweep type', required=False, default='light') args = vars(parser.parse_args()) args['conf'] = 'zfm.conf' Log.Init(args['log']) # # Chdir to the config directory. # try: os.chdir(args['dir']) except: Log.error('{} is not accessible', args['dir']) sys.exit(1) zfm_config_file = args['conf'] zfm_sweep_type = args['sweep'] hostname = args['host'] # # Read the ZFM configuration file. # zfm_configuration = {} if not zfm_load_configuration(zfm_config_file, zfm_configuration): Log.error('invalid ZFM config file') sys.exit(1) timers = {
def sweep(self): status = self.wait_for('sweep', [self.sweep_type], {}) if not status: Log.error('nodes not swept') return status
def load_nodes(self): status = self.wait_for('load', [], {}) if not status: Log.error('ports not loaded') return status
def train(self): # # # Make sure that this port is enabled. # if not self.query(): Log.error('{}[{}].train : can\'t read status', self.node.name, self.index) self.active = False return False state, health = self.current['Status']['State'], self.current[ 'Status']['Health'] link_state = self.current['LinkState'] if_state = self.current['InterfaceState'] Log.debug('{}[{}].train : S={}/{} LS={} IF={}', self.node.name, self.index, state, health, link_state, if_state) # # Check the port health. # if health != 'OK': Log.error('{}[{}].train : port health {} is bad', self.node.name, self.index, health) self.active = False return False # # Check the port state for consistency. # possible_port_states = { # Status Link Interface ('Disabled', 'Disabled', 'Disabled'): True, ('Starting', 'Disabled', 'Disabled'): False, # link != Enabled ('StandbyOffline', 'Disabled', 'Disabled'): False, # link != Enabled ('Enabled', 'Disabled', 'Disabled'): False, # link != Enabled ('Disabled', 'Disabled', 'Enabled'): False, # interface != Disabled ('Starting', 'Disabled', 'Enabled'): False, # link != Enabled ('StandbyOffline', 'Disabled', 'Enabled'): False, # link != Enabled ('Enabled', 'Disabled', 'Enabled'): False, # link != Enabled ('Disabled', 'Enabled', 'Disabled'): False, # status != Starting ('Starting', 'Enabled', 'Disabled'): True, ('StandbyOffline', 'Enabled', 'Disabled'): True, ('Enabled', 'Enabled', 'Disabled'): False, # interface != Enabled ('Disabled', 'Enabled', 'Enabled'): False, # link != Disabled ('Starting', 'Enabled', 'Enabled'): False, # interface != Disabled ('StandbyOffline', 'Enabled', 'Enabled'): True, ('Enabled', 'Enabled', 'Enabled'): True, } port_state = possible_port_states.get((state, link_state, if_state), False) if not port_state: Log.error('{}[{}].train : invalid port state {} {} {}', self.node.name, self.index, state, link_state, if_state) return False # # Only train up if we are in the correct state. # if state == 'Disabled' and link_state == 'Disabled' and if_state == 'Disabled': status, _ = self.node.patch(self.name, {'LinkState': 'Enabled'}) if not status: Log.error('{}[{}].train : can\'t set LinkState', self.node.name, self.index) self.active = False return False return True