Exemple #1
0
    def test_get_whois_df(self):
        results = get_whois_df(data=self.input_df, ip_column="AllExtIPs")
        self.assertEqual(len(results), len(self.input_df))
        self.assertIn("AsnDescription", results.columns)

        results2 = get_whois_df(data=self.input_df,
                                ip_column="AllExtIPs",
                                asn_col="asn",
                                whois_col="whois")
        self.assertEqual(len(results2), len(self.input_df))
        self.assertIn("asn", results2.columns)
        self.assertIn("whois", results2.columns)
        self.assertEqual(len(results2[~results2["asn"].isna()]),
                         len(self.input_df))
        self.assertEqual(len(results2[~results2["whois"].isna()]),
                         len(self.input_df))
Exemple #2
0
def get_geoip_whois(geo_lookup, data: pd.DataFrame, ip_col: str):
    """
    Get GeoIP and WhoIs data for IPs.

    Parameters
    ----------
    geo_lookup : GeoIpLookup
        GeoIP Provider
    data : pd.DataFrame
        Input data frame
    ip_col : str
        Name of Ip address column

    Returns
    -------
    pd.DataFrame
        Results dataframe with GeoIP and WhoIs data

    """
    data = _normalize_ip4(data, ip_col)
    nb_markdown(f"Querying geolocation for {len(data)} ip addresses...")

    geo_ips = geo_lookup.lookup_ips(data, column=ip_col)
    geo_df = data.merge(geo_ips,
                        how="left",
                        left_on=ip_col,
                        right_on="IpAddress")

    nb_markdown(f"Querying WhoIs for {len(data)} ip addresses...")
    # Get the WhoIs results
    return get_whois_df(geo_df, "IpAddress", whois_col="Whois_data")
def _get_flow_summary(flow_index):
    flows_df = (
        flow_index[
            ["source", "dest", "L7Protocol", "FlowDirection", "TotalAllowedFlows"]
        ]
        .groupby(["source", "dest", "L7Protocol", "FlowDirection"])
        .sum()
        .reset_index()
    )

    num_ips = len(flows_df["source"].unique()) + len(flows_df["dest"].unique())
    nb_markdown(f"Found {num_ips} unique IP Addresses.")

    nb_data_wait("Whois")
    flows_df = get_whois_df(
        flows_df,
        ip_column="dest",
        asn_col="DestASN",
        whois_col="DestASNFull",
        show_progress=True,
    )
    flows_df = get_whois_df(
        flows_df,
        ip_column="source",
        asn_col="SourceASN",
        whois_col="SourceASNFull",
        show_progress=True,
    )

    return (
        flows_df.groupby(["DestASN", "SourceASN"])
        .agg(
            TotalAllowedFlows=pd.NamedAgg(column="TotalAllowedFlows", aggfunc="sum"),
            L7Protocols=pd.NamedAgg(
                column="L7Protocol", aggfunc=lambda x: x.unique().tolist()
            ),
            source_ips=pd.NamedAgg(
                column="source", aggfunc=lambda x: x.unique().tolist()
            ),
            dest_ips=pd.NamedAgg(column="dest", aggfunc=lambda x: x.unique().tolist()),
        )
        .reset_index()
    )