def get(self, **kwargs): """Replacing the original interface name in returned result""" addnl_fields = kwargs.pop('addnl_fields', []) columns = kwargs.get('columns', ['default']) vrf = kwargs.pop('vrf', None) peer = kwargs.pop('peer', None) hostname = kwargs.pop('hostname', None) drop_cols = ['origPeer', 'peerHost'] addnl_fields.extend(['origPeer']) if columns != ['*']: if 'peerIP' not in columns: addnl_fields.append('peerIP') drop_cols.append('peerIP') if 'updateSource' not in columns: addnl_fields.append('updateSource') drop_cols.append('updateSource') df = super().get(addnl_fields=addnl_fields, **kwargs) if df.empty: return df sch = SchemaForTable(self.iobj._table, self.schemas) query_str = build_query_str([], sch, vrf=vrf, peer=peer, hostname=hostname) if 'peer' in df.columns: df['peer'] = np.where(df['origPeer'] != "", df['origPeer'], df['peer']) if 'peerHostname' in df.columns: mdf = self._get_peer_matched_df(df) drop_cols = [x for x in drop_cols if x in mdf.columns] drop_cols.extend(list(mdf.filter(regex='_y'))) else: mdf = df if query_str: return mdf.query(query_str).drop(columns=drop_cols, errors='ignore') else: return mdf.drop(columns=drop_cols, errors='ignore')
def aver(self, **kwargs): """Assert that the OSPF state is OK""" kwargs.pop('columns', []) columns = [ "namespace", "hostname", "vrf", "ifname", "routerId", "helloTime", "deadTime", "passive", "ipAddress", "isUnnumbered", "areaStub", "networkType", "timestamp", "area", "nbrCount", ] # we have to not filter hostname at this point because we need to # understand neighbor relationships orig_hostname = kwargs.pop('hostname', '') ospf_df = self.get_valid_df("ospfIf", columns=columns, **kwargs) if ospf_df.empty: return pd.DataFrame(columns=columns) ospf_df["assertReason"] = [[] for _ in range(len(ospf_df))] df = (ospf_df[ospf_df["routerId"] != ""].groupby( ["routerId", "namespace"], as_index=False)[[ "hostname", "namespace" ]].agg(lambda x: x.unique().tolist())).dropna(how='any') # df is a dataframe with each row containing the routerId and the # corresponding list of hostnames with that routerId. In a good # configuration, the list must have exactly one entry ospf_df['assertReason'] = (ospf_df.merge( df, on=["routerId"], how="outer").apply( lambda x: ["duplicate routerId {}".format(x["hostname_y"])] if len(x['hostname_y']) != 1 else [], axis=1)) # Now peering match lldpobj = LldpObj(context=self.ctxt) lldp_df = lldpobj.get(namespace=kwargs.get("namespace", ""), hostname=kwargs.get("hostname", ""), ifname=kwargs.get("ifname", ""), columns=[ "namespace", "hostname", "ifname", "peerHostname", "peerIfname", "peerMacaddr" ]) if lldp_df.empty: ospf_df = ospf_df[~(ospf_df.ifname.str.contains('loopback') | ospf_df.ifname.str.contains('Vlan'))] ospf_df['assertReason'] = 'No LLDP peering info' ospf_df['assert'] = 'fail' return ospf_df[[ 'namespace', 'hostname', 'vrf', 'ifname', 'assertReason', 'assert' ]] # Create a single massive DF with fields populated appropriately use_cols = [ "namespace", "routerId", "hostname", "vrf", "ifname", "helloTime", "deadTime", "passive", "ipAddress", "areaStub", "isUnnumbered", "networkType", "area", "timestamp", ] int_df = ospf_df[use_cols].merge(lldp_df, on=["namespace", "hostname", "ifname"]) \ .dropna(how="any") # filter by hostname now if orig_hostname: ospfschema = SchemaForTable('ospf', schema=self.schemas) hq = build_query_str([], ospfschema, hostname=orig_hostname) ospf_df = ospf_df.query(hq) if int_df.empty: # Weed out the loopback and SVI interfaces as they have no LLDP peers ospf_df = ospf_df[~(ospf_df.ifname.str.contains('loopback') | ospf_df.ifname.str.contains('Vlan'))] ospf_df['assertReason'] = 'No LLDP peering info' ospf_df['assert'] = 'fail' return ospf_df[[ 'namespace', 'hostname', 'vrf', 'ifname', 'assertReason', 'assert' ]] ospf_df = ospf_df.merge(int_df, left_on=["namespace", "hostname", "ifname"], right_on=["namespace", "peerHostname", "peerIfname"]) \ .dropna(how="any") # Now start comparing the various parameters ospf_df["assertReason"] += ospf_df.apply( lambda x: ["subnet mismatch"] if ((x["isUnnumbered_x"] != x["isUnnumbered_y"]) and (IPv4Network(x["ipAddress_x"], strict=False) != IPv4Network( x["ipAddress_y"], strict=False))) else [], axis=1, ) ospf_df["assertReason"] += ospf_df.apply( lambda x: ["area mismatch"] if (x["area_x"] != x["area_y"] and x[ "areaStub_x"] != x["areaStub_y"]) else [], axis=1, ) ospf_df["assertReason"] += ospf_df.apply( lambda x: ["Hello timers mismatch"] if x["helloTime_x"] != x["helloTime_y"] else [], axis=1, ) ospf_df["assertReason"] += ospf_df.apply( lambda x: ["Dead timer mismatch"] if x["deadTime_x"] != x["deadTime_y"] else [], axis=1, ) ospf_df["assertReason"] += ospf_df.apply( lambda x: ["network type mismatch"] if x["networkType_x"] != x["networkType_y"] else [], axis=1, ) ospf_df["assertReason"] += ospf_df.apply( lambda x: ["passive config mismatch"] if x["passive_x"] != x["passive_y"] else [], axis=1, ) ospf_df["assertReason"] += ospf_df.apply( lambda x: ["vrf mismatch"] if x["vrf_x"] != x["vrf_y"] else [], axis=1, ) # Fill up a single assert column now indicating pass/fail ospf_df['assert'] = ospf_df.apply( lambda x: 'pass' if not len(x['assertReason']) else 'fail', axis=1) return (ospf_df.rename( index=str, columns={ "hostname_x": "hostname", "ifname_x": "ifname", "vrf_x": "vrf", }, )[[ "namespace", "hostname", "ifname", "vrf", "assert", "assertReason", "timestamp" ]].explode(column='assertReason').fillna({'assertReason': '-'}))
def get_table_df(self, cfg, schemas, **kwargs) -> pd.DataFrame: """Use Pandas instead of Spark to retrieve the data""" MAX_FILECNT_TO_READ_FOLDER = 10000 self.cfg = cfg table = kwargs.pop("table") start = kwargs.pop("start_time") end = kwargs.pop("end_time") view = kwargs.pop("view") sort_fields = kwargs.pop("sort_fields") ign_key_fields = kwargs.pop("ign_key", []) addnl_fields = kwargs.pop("addnl_fields", []) for f in ['active', 'timestamp']: if f not in addnl_fields: addnl_fields.append(f) sch = SchemaForTable(table, schema=schemas) phy_table = sch.get_phy_table_for_table() folder = self._get_table_directory(phy_table) # Restrict to a single DC if thats whats asked if "namespace" in kwargs: v = kwargs["namespace"] if v: if not isinstance(v, list): folder += "/namespace={}/".format(v) fcnt = self.get_filecnt(folder) if fcnt == 0: return pd.DataFrame() # We are going to hard code use_get_files until we have some autoamted testing use_get_files = False # use_get_files = ( # (fcnt > MAX_FILECNT_TO_READ_FOLDER and view == "latest") or # start or end # ) if use_get_files: # Switch to more efficient method when there are lotsa files # Reduce I/O since that is the worst drag key_fields = [] if len(kwargs.get("namespace", [])) > 1: del kwargs["namespace"] files = get_latest_files(folder, start, end, view) else: # ign_key_fields contains key fields that are not partition cols key_fields = [ i for i in sch.key_fields() if i not in ign_key_fields ] filters = self.build_pa_filters(start, end, key_fields, **kwargs) if "columns" in kwargs: columns = kwargs["columns"] del kwargs["columns"] else: columns = ["default"] fields = sch.get_display_fields(columns) for f in addnl_fields: if f not in fields: fields.append(f) # Create the filter to select only specified columns addnl_filter = kwargs.pop('add_filter', None) query_str = build_query_str(key_fields, sch, **kwargs) # Add the ignored fields back to key fields to ensure we # do the drop_duplicates correctly below incl reading reqd cols key_fields.extend(ign_key_fields) # Handle the case where key fields are missing from display fields fldset = set(fields) kfldset = set(key_fields) add_flds = kfldset.difference(fldset) if add_flds: fields.extend(list(add_flds)) if addnl_filter: # This is for special cases that are specific to an object if not query_str: query_str = addnl_filter else: query_str += ' and {}'.format(addnl_filter) # Restore the folder to what it needs to be folder = self._get_table_directory(phy_table) if use_get_files: if not query_str: query_str = "active == True" pdf_list = [] with Executor(max_workers=8) as exe: jobs = [ exe.submit(self.read_pq_file, f, fields, query_str) for f in files ] pdf_list = [job.result() for job in jobs] if pdf_list: final_df = pd.concat(pdf_list) else: final_df = pd.DataFrame(columns=fields) elif view == "latest": if not query_str: # Make up a dummy query string to avoid if/then/else query_str = "timestamp != 0" try: final_df = (pa.ParquetDataset( folder, filters=filters or None, validate_schema=False).read(columns=fields).to_pandas( split_blocks=True, self_destruct=True).query(query_str).drop_duplicates( subset=key_fields, keep="last").query("active == True")) except pa.lib.ArrowInvalid: return pd.DataFrame(columns=fields) else: if not query_str: # Make up a dummy query string to avoid if/then/else query_str = 'timestamp != "0"' try: final_df = (pa.ParquetDataset( folder, filters=filters or None, validate_schema=False).read( columns=fields).to_pandas().query(query_str)) except pa.lib.ArrowInvalid: return pd.DataFrame(columns=fields) if 'active' not in columns: final_df.drop(columns=['active'], axis=1, inplace=True) fields.remove('active') final_df = df_timestamp_to_datetime(final_df) fields = [x for x in fields if x in final_df.columns] if sort_fields and all(x in sort_fields for x in fields): return final_df[fields].sort_values(by=sort_fields) else: return final_df[fields]
def get(self, **kwargs): """Replacing the original interface name in returned result""" addnl_fields = kwargs.pop('addnl_fields', []) columns = kwargs.get('columns', ['default']) vrf = kwargs.pop('vrf', None) peer = kwargs.pop('peer', None) hostname = kwargs.pop('hostname', None) user_query = kwargs.pop('query_str', None) drop_cols = ['origPeer', 'peerHost'] addnl_fields.extend(['origPeer']) sch = SchemaForTable(self.iobj.table, self.schemas) fields = sch.get_display_fields(columns) for col in [ 'peerIP', 'updateSource', 'state', 'namespace', 'vrf', 'peer', 'hostname' ]: if col not in fields: addnl_fields.append(col) drop_cols.append(col) try: df = super().get(addnl_fields=addnl_fields, **kwargs) except KeyError as ex: if ('afi' in str(ex)) or ('safi' in str(ex)): df = pd.DataFrame({ 'error': [f'ERROR: Migrate BGP data first using sq-coalescer'] }) return df if df.empty: return df if 'afiSafi' in columns or (columns == ['*']): df['afiSafi'] = df['afi'] + ' ' + df['safi'] query_str = build_query_str([], sch, vrf=vrf, peer=peer, hostname=hostname) if 'peer' in df.columns: df['peer'] = np.where(df['origPeer'] != "", df['origPeer'], df['peer']) # Convert old data into new 2.0 data format if 'peerHostname' in df.columns: mdf = self._get_peer_matched_df(df) drop_cols = [x for x in drop_cols if x in mdf.columns] drop_cols.extend(list(mdf.filter(regex='_y'))) else: mdf = df mdf = self._handle_user_query_str(mdf, user_query) if query_str: return mdf.query(query_str).drop(columns=drop_cols, errors='ignore') else: return mdf.drop(columns=drop_cols, errors='ignore')