def test_var_in(self): df = data.df_diamonds[["cut"]].head(10) d1 = df[(df.cut == "Ideal") | (df.cut == "Premium")].reset_index(drop=True) d2 = df >> gr.tf_filter(gr.var_in(X.cut, ["Ideal", "Premium"])) self.assertTrue(d1.equals(d2))
def test_kmeans(self): ## Fit routine creates usable model var = ["x", "y"] md_fit = fit.fit_kmeans(self.df_cluster, var=var, n_clusters=2) df_res = gr.eval_df(md_fit, self.df_cluster[var]) ## Check correctness # Match clusters by min(x) id_true = (self.df_cluster >> gr.tf_filter(X.x == gr.colmin(X.x))).c[0] id_res = (df_res >> gr.tf_filter(X.x == gr.colmin(X.x))).cluster_id[0] df_res1 = (self.df_cluster >> gr.tf_filter(X.c == id_true) >> gr.tf_select(X.x, X.y)) df_res2 = (df_res >> gr.tf_filter(X.cluster_id == id_res) >> gr.tf_select(X.x, X.y)) self.assertTrue(gr.df_equal(df_res1, df_res2))