def test_eval_input_subsets(self): """ Test inputs are subsets of the provided DataFrames for eval_pnd() """ # Model to make Dataset md_true = make_pareto_random(twoDim=False) # Create dataframe df_data = ( md_true >> gr.ev_sample(n=2e3, seed=101, df_det="nom") ) ## Select training set df_train = ( df_data >> gr.tf_sample(n=10) ) ## select test set df_test = ( df_data >> gr.tf_anti_join( df_train, by=["x1", "x2"], ) >> gr.tf_sample(n=200) ) # Create fitted model md_fit = ( df_train >> ft_gp( var=["x1", "x2", "x3"], out=["y1", "y2", "y3"], ) ) # Call eval_pnd w/ only "y1" and "y2" df_pnd = ( md_fit >> gr.ev_pnd( df_train, df_test, signs = {"y1":1, "y2":1}, seed = 101 ) ) ### how to imply x1 and x2 from y1 and y2? # Test for correctness by shape self.assertTrue(len(df_pnd) == df_test.shape[0]) # Test for correctness by # of outputs self.assertTrue(len(df_pnd.columns.values) == len(df_test.columns.values) + 2)
def test_eval_append(self): """ Test append parameter on eval_pnd() """ # Model to make Dataset md_true = make_pareto_random(twoDim=False) # Create dataframe df_data = ( md_true >> gr.ev_sample(n=2e3, seed=101, df_det="nom") ) ## Select training set df_train = ( df_data >> gr.tf_sample(n=10) ) ## select test set df_test = ( df_data >> gr.tf_anti_join( df_train, by=["x1", "x2"], ) >> gr.tf_sample(n=200) ) # Create fitted model md_fit = ( df_train >> ft_gp( var=["x1", "x2", "x3"], out=["y1", "y2", "y3"], ) ) # Call eval_pnd df_pnd = ( md_fit >> gr.ev_pnd( df_train, df_test, signs = {"y1":1, "y2":1,"y3":1}, seed = 101, append = False ) ) # Test for correctness by shape self.assertTrue(len(df_pnd) == df_test.shape[0]) # Test for correctness by # of outputs self.assertTrue(len(df_pnd.columns.values) == 2)
def test_eval_faulty_inputs(self): """ Test faulty inputs to eval_pnd """ # Model to make Dataset md_true = make_pareto_random() # Create dataframe df_data = ( md_true >> gr.ev_sample(n=2e3, seed=101, df_det="nom") ) ## Select training set df_train = ( df_data >> gr.tf_sample(n=10) ) ## select test set df_test = ( df_data >> gr.tf_anti_join( df_train, by=["x1", "x2"], ) >> gr.tf_sample(n=200) ) # Create fitted model md_fit = ( df_train >> ft_gp( var=["x1", "x2"], out=["y1", "y2"], ) ) # Call eval_pnd with self.assertRaises(ValueError): df_pnd = ( md_fit >> gr.ev_pnd( df_train, df_test, signs = {"y":1, "y2":1}, seed = 101 ) )
def test_anti_join(self): ab = pd.DataFrame({"x1": ["C"], "x2": [3]}, index=[2]) c = self.dfA >> gr.tf_anti_join(self.dfB, by="x1") self.assertTrue(c.equals(ab))