def comapare_dataframes( cls, df1: DataFrame, df2: DataFrame, excluded_keys: Union[List, str, None] = []) -> bool: """ Compares 2 DataFrames for exact match\ internally it use pandas.testing.assert_frame_equal :param df1: processed data :type df1: DataFrame :param df2: gold standard expected data :type df2: DataFrame :return: True :param excluded_keys: columns to be excluded from comparision, optional :type excluded_keys: Union[List, str, None] :rtype: Boolean :raises: AssertionError Dataframe mismatch """ excluded_keys = excluded_keys if type(excluded_keys) == list else [ excluded_keys ] df1 = df1.drop(*excluded_keys) df2 = df2.drop(*excluded_keys) sort_columns = [cols[0] for cols in df1.dtypes] df1_sorted = df1.toPandas().sort_values(by=sort_columns, ignore_index=True) df2_sorted = df2.toPandas().sort_values(by=sort_columns, ignore_index=True) assert_frame_equal(df1_sorted, df2_sorted) return True
def create_input_widgets(years: DataFrame, countries: DataFrame, ratings: DataFrame, dbutils: DBUtils): min_year = years.toPandas().values[0][0] max_year = years.toPandas().values[0][1] country_list = list(map(lambda x: x[0], countries.toPandas().values.tolist())) rating_list = list(map(lambda x: x[0], ratings.toPandas().values.tolist())) # country_list.append("All") rating_list.remove(None) rating_list.sort() dbutils.widgets.dropdown("year", str(min_year), list(map(str, range(min_year, max_year + 1))), "Select year") dbutils.widgets.dropdown("country", country_list[0], country_list, "Select country") dbutils.widgets.dropdown("rating", "C", rating_list, "Select rating")
def plot_defaults_per_country(df: DataFrame): year = dbutils.widgets.get("year") # noqa: F821 rating = dbutils.widgets.get("rating") # noqa: F821 if len(df.head(1)) == 0: return ax = sns.barplot(x="Country", y="Defaults", data=df.toPandas()) ax.set_title(f"Defaults per Country of {rating} rating during {year}") return display(ax) # noqa: F821
def plot_defaults_per_month(df: DataFrame): year = dbutils.widgets.get("year") # noqa: F821 country = dbutils.widgets.get("country") # noqa: F821 rating = dbutils.widgets.get("rating") # noqa: F821 if len(df.head(1)) == 0: return ax = sns.barplot(x="Month", y="Defaults", data=df.toPandas()) ax.set_title(f"Defaults per Month in {year} in {country} of {rating} rating") return display(ax) # noqa: F821