Python build_query_str Examples

Programming Language: Python

Namespace/Package Name: suzieq.utils

Method/Function: build_query_str

Examples at hotexamples.com: 4

Python build_query_str - 4 examples found. These are the top rated real world Python examples of suzieq.utils.build_query_str extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: bgp.py Project: dt-arrcus/suzieq

    def get(self, **kwargs):
        """Replacing the original interface name in returned result"""

        addnl_fields = kwargs.pop('addnl_fields', [])
        columns = kwargs.get('columns', ['default'])
        vrf = kwargs.pop('vrf', None)
        peer = kwargs.pop('peer', None)
        hostname = kwargs.pop('hostname', None)

        drop_cols = ['origPeer', 'peerHost']
        addnl_fields.extend(['origPeer'])

        if columns != ['*']:
            if 'peerIP' not in columns:
                addnl_fields.append('peerIP')
                drop_cols.append('peerIP')
            if 'updateSource' not in columns:
                addnl_fields.append('updateSource')
                drop_cols.append('updateSource')

        df = super().get(addnl_fields=addnl_fields, **kwargs)

        if df.empty:
            return df

        sch = SchemaForTable(self.iobj._table, self.schemas)
        query_str = build_query_str([],
                                    sch,
                                    vrf=vrf,
                                    peer=peer,
                                    hostname=hostname)
        if 'peer' in df.columns:
            df['peer'] = np.where(df['origPeer'] != "", df['origPeer'],
                                  df['peer'])
        if 'peerHostname' in df.columns:
            mdf = self._get_peer_matched_df(df)
            drop_cols = [x for x in drop_cols if x in mdf.columns]
            drop_cols.extend(list(mdf.filter(regex='_y')))
        else:
            mdf = df

        if query_str:
            return mdf.query(query_str).drop(columns=drop_cols,
                                             errors='ignore')
        else:
            return mdf.drop(columns=drop_cols, errors='ignore')

Example #2

Show file

    def aver(self, **kwargs):
        """Assert that the OSPF state is OK"""

        kwargs.pop('columns', [])
        columns = [
            "namespace",
            "hostname",
            "vrf",
            "ifname",
            "routerId",
            "helloTime",
            "deadTime",
            "passive",
            "ipAddress",
            "isUnnumbered",
            "areaStub",
            "networkType",
            "timestamp",
            "area",
            "nbrCount",
        ]

        # we have to not filter hostname at this point because we need to
        #   understand neighbor relationships
        orig_hostname = kwargs.pop('hostname', '')

        ospf_df = self.get_valid_df("ospfIf", columns=columns, **kwargs)
        if ospf_df.empty:
            return pd.DataFrame(columns=columns)

        ospf_df["assertReason"] = [[] for _ in range(len(ospf_df))]
        df = (ospf_df[ospf_df["routerId"] != ""].groupby(
            ["routerId", "namespace"], as_index=False)[[
                "hostname", "namespace"
            ]].agg(lambda x: x.unique().tolist())).dropna(how='any')

        # df is a dataframe with each row containing the routerId and the
        # corresponding list of hostnames with that routerId. In a good
        # configuration, the list must have exactly one entry
        ospf_df['assertReason'] = (ospf_df.merge(
            df, on=["routerId"], how="outer").apply(
                lambda x: ["duplicate routerId {}".format(x["hostname_y"])]
                if len(x['hostname_y']) != 1 else [],
                axis=1))

        # Now  peering match
        lldpobj = LldpObj(context=self.ctxt)
        lldp_df = lldpobj.get(namespace=kwargs.get("namespace", ""),
                              hostname=kwargs.get("hostname", ""),
                              ifname=kwargs.get("ifname", ""),
                              columns=[
                                  "namespace", "hostname", "ifname",
                                  "peerHostname", "peerIfname", "peerMacaddr"
                              ])
        if lldp_df.empty:
            ospf_df = ospf_df[~(ospf_df.ifname.str.contains('loopback')
                                | ospf_df.ifname.str.contains('Vlan'))]
            ospf_df['assertReason'] = 'No LLDP peering info'
            ospf_df['assert'] = 'fail'
            return ospf_df[[
                'namespace', 'hostname', 'vrf', 'ifname', 'assertReason',
                'assert'
            ]]

        # Create a single massive DF with fields populated appropriately
        use_cols = [
            "namespace",
            "routerId",
            "hostname",
            "vrf",
            "ifname",
            "helloTime",
            "deadTime",
            "passive",
            "ipAddress",
            "areaStub",
            "isUnnumbered",
            "networkType",
            "area",
            "timestamp",
        ]

        int_df = ospf_df[use_cols].merge(lldp_df,
                                         on=["namespace", "hostname",
                                             "ifname"]) \
            .dropna(how="any")

        # filter by hostname now
        if orig_hostname:
            ospfschema = SchemaForTable('ospf', schema=self.schemas)
            hq = build_query_str([], ospfschema, hostname=orig_hostname)
            ospf_df = ospf_df.query(hq)

        if int_df.empty:
            # Weed out the loopback and SVI interfaces as they have no LLDP peers
            ospf_df = ospf_df[~(ospf_df.ifname.str.contains('loopback')
                                | ospf_df.ifname.str.contains('Vlan'))]
            ospf_df['assertReason'] = 'No LLDP peering info'
            ospf_df['assert'] = 'fail'
            return ospf_df[[
                'namespace', 'hostname', 'vrf', 'ifname', 'assertReason',
                'assert'
            ]]

        ospf_df = ospf_df.merge(int_df,
                                left_on=["namespace", "hostname", "ifname"],
                                right_on=["namespace", "peerHostname",
                                          "peerIfname"]) \
            .dropna(how="any")

        # Now start comparing the various parameters
        ospf_df["assertReason"] += ospf_df.apply(
            lambda x: ["subnet mismatch"]
            if ((x["isUnnumbered_x"] != x["isUnnumbered_y"]) and
                (IPv4Network(x["ipAddress_x"], strict=False) != IPv4Network(
                    x["ipAddress_y"], strict=False))) else [],
            axis=1,
        )
        ospf_df["assertReason"] += ospf_df.apply(
            lambda x: ["area mismatch"] if (x["area_x"] != x["area_y"] and x[
                "areaStub_x"] != x["areaStub_y"]) else [],
            axis=1,
        )
        ospf_df["assertReason"] += ospf_df.apply(
            lambda x: ["Hello timers mismatch"]
            if x["helloTime_x"] != x["helloTime_y"] else [],
            axis=1,
        )
        ospf_df["assertReason"] += ospf_df.apply(
            lambda x: ["Dead timer mismatch"]
            if x["deadTime_x"] != x["deadTime_y"] else [],
            axis=1,
        )
        ospf_df["assertReason"] += ospf_df.apply(
            lambda x: ["network type mismatch"]
            if x["networkType_x"] != x["networkType_y"] else [],
            axis=1,
        )
        ospf_df["assertReason"] += ospf_df.apply(
            lambda x: ["passive config mismatch"]
            if x["passive_x"] != x["passive_y"] else [],
            axis=1,
        )
        ospf_df["assertReason"] += ospf_df.apply(
            lambda x: ["vrf mismatch"] if x["vrf_x"] != x["vrf_y"] else [],
            axis=1,
        )

        # Fill up a single assert column now indicating pass/fail
        ospf_df['assert'] = ospf_df.apply(
            lambda x: 'pass' if not len(x['assertReason']) else 'fail', axis=1)

        return (ospf_df.rename(
            index=str,
            columns={
                "hostname_x": "hostname",
                "ifname_x": "ifname",
                "vrf_x": "vrf",
            },
        )[[
            "namespace", "hostname", "ifname", "vrf", "assert", "assertReason",
            "timestamp"
        ]].explode(column='assertReason').fillna({'assertReason': '-'}))

Example #3

Show file

    def get_table_df(self, cfg, schemas, **kwargs) -> pd.DataFrame:
        """Use Pandas instead of Spark to retrieve the data"""

        MAX_FILECNT_TO_READ_FOLDER = 10000

        self.cfg = cfg

        table = kwargs.pop("table")
        start = kwargs.pop("start_time")
        end = kwargs.pop("end_time")
        view = kwargs.pop("view")
        sort_fields = kwargs.pop("sort_fields")
        ign_key_fields = kwargs.pop("ign_key", [])
        addnl_fields = kwargs.pop("addnl_fields", [])

        for f in ['active', 'timestamp']:
            if f not in addnl_fields:
                addnl_fields.append(f)

        sch = SchemaForTable(table, schema=schemas)
        phy_table = sch.get_phy_table_for_table()

        folder = self._get_table_directory(phy_table)

        # Restrict to a single DC if thats whats asked
        if "namespace" in kwargs:
            v = kwargs["namespace"]
            if v:
                if not isinstance(v, list):
                    folder += "/namespace={}/".format(v)

        fcnt = self.get_filecnt(folder)

        if fcnt == 0:
            return pd.DataFrame()

        # We are going to hard code use_get_files until we have some autoamted testing
        use_get_files = False

        # use_get_files = (
        #    (fcnt > MAX_FILECNT_TO_READ_FOLDER and view == "latest") or
        #    start or end
        # )

        if use_get_files:
            # Switch to more efficient method when there are lotsa files
            # Reduce I/O since that is the worst drag
            key_fields = []
            if len(kwargs.get("namespace", [])) > 1:
                del kwargs["namespace"]
            files = get_latest_files(folder, start, end, view)
        else:
            # ign_key_fields contains key fields that are not partition cols
            key_fields = [
                i for i in sch.key_fields() if i not in ign_key_fields
            ]
            filters = self.build_pa_filters(start, end, key_fields, **kwargs)

        if "columns" in kwargs:
            columns = kwargs["columns"]
            del kwargs["columns"]
        else:
            columns = ["default"]

        fields = sch.get_display_fields(columns)
        for f in addnl_fields:
            if f not in fields:
                fields.append(f)

        # Create the filter to select only specified columns
        addnl_filter = kwargs.pop('add_filter', None)
        query_str = build_query_str(key_fields, sch, **kwargs)

        # Add the ignored fields back to key fields to ensure we
        # do the drop_duplicates correctly below incl reading reqd cols
        key_fields.extend(ign_key_fields)

        # Handle the case where key fields are missing from display fields
        fldset = set(fields)
        kfldset = set(key_fields)
        add_flds = kfldset.difference(fldset)
        if add_flds:
            fields.extend(list(add_flds))

        if addnl_filter:
            # This is for special cases that are specific to an object
            if not query_str:
                query_str = addnl_filter
            else:
                query_str += ' and {}'.format(addnl_filter)

        # Restore the folder to what it needs to be
        folder = self._get_table_directory(phy_table)
        if use_get_files:
            if not query_str:
                query_str = "active == True"

            pdf_list = []
            with Executor(max_workers=8) as exe:
                jobs = [
                    exe.submit(self.read_pq_file, f, fields, query_str)
                    for f in files
                ]
                pdf_list = [job.result() for job in jobs]

            if pdf_list:
                final_df = pd.concat(pdf_list)
            else:
                final_df = pd.DataFrame(columns=fields)

        elif view == "latest":
            if not query_str:
                # Make up a dummy query string to avoid if/then/else
                query_str = "timestamp != 0"

            try:
                final_df = (pa.ParquetDataset(
                    folder, filters=filters or None,
                    validate_schema=False).read(columns=fields).to_pandas(
                        split_blocks=True,
                        self_destruct=True).query(query_str).drop_duplicates(
                            subset=key_fields,
                            keep="last").query("active == True"))
            except pa.lib.ArrowInvalid:
                return pd.DataFrame(columns=fields)
        else:
            if not query_str:
                # Make up a dummy query string to avoid if/then/else
                query_str = 'timestamp != "0"'

            try:
                final_df = (pa.ParquetDataset(
                    folder, filters=filters or None,
                    validate_schema=False).read(
                        columns=fields).to_pandas().query(query_str))
            except pa.lib.ArrowInvalid:
                return pd.DataFrame(columns=fields)

        if 'active' not in columns:
            final_df.drop(columns=['active'], axis=1, inplace=True)
            fields.remove('active')

        final_df = df_timestamp_to_datetime(final_df)
        fields = [x for x in fields if x in final_df.columns]
        if sort_fields and all(x in sort_fields for x in fields):
            return final_df[fields].sort_values(by=sort_fields)
        else:
            return final_df[fields]

Example #4

Show file

File: bgp.py Project: remilocherer/suzieq

    def get(self, **kwargs):
        """Replacing the original interface name in returned result"""

        addnl_fields = kwargs.pop('addnl_fields', [])
        columns = kwargs.get('columns', ['default'])
        vrf = kwargs.pop('vrf', None)
        peer = kwargs.pop('peer', None)
        hostname = kwargs.pop('hostname', None)
        user_query = kwargs.pop('query_str', None)

        drop_cols = ['origPeer', 'peerHost']
        addnl_fields.extend(['origPeer'])
        sch = SchemaForTable(self.iobj.table, self.schemas)
        fields = sch.get_display_fields(columns)

        for col in [
                'peerIP', 'updateSource', 'state', 'namespace', 'vrf', 'peer',
                'hostname'
        ]:
            if col not in fields:
                addnl_fields.append(col)
                drop_cols.append(col)

        try:
            df = super().get(addnl_fields=addnl_fields, **kwargs)
        except KeyError as ex:
            if ('afi' in str(ex)) or ('safi' in str(ex)):
                df = pd.DataFrame({
                    'error':
                    [f'ERROR: Migrate BGP data first using sq-coalescer']
                })
                return df

        if df.empty:
            return df

        if 'afiSafi' in columns or (columns == ['*']):
            df['afiSafi'] = df['afi'] + ' ' + df['safi']
        query_str = build_query_str([],
                                    sch,
                                    vrf=vrf,
                                    peer=peer,
                                    hostname=hostname)
        if 'peer' in df.columns:
            df['peer'] = np.where(df['origPeer'] != "", df['origPeer'],
                                  df['peer'])

        # Convert old data into new 2.0 data format
        if 'peerHostname' in df.columns:
            mdf = self._get_peer_matched_df(df)
            drop_cols = [x for x in drop_cols if x in mdf.columns]
            drop_cols.extend(list(mdf.filter(regex='_y')))
        else:
            mdf = df

        mdf = self._handle_user_query_str(mdf, user_query)

        if query_str:
            return mdf.query(query_str).drop(columns=drop_cols,
                                             errors='ignore')
        else:
            return mdf.drop(columns=drop_cols, errors='ignore')