def test_feature_union_multi(self): mock0 = ptt.UniformTransformer( pd.DataFrame([["q1", "doc1", 0]], columns=["qid", "docno", "score"])) mock1 = ptt.UniformTransformer( pd.DataFrame([["q1", "doc1", 5]], columns=["qid", "docno", "score"])) mock2 = ptt.UniformTransformer( pd.DataFrame([["q1", "doc1", 10]], columns=["qid", "docno", "score"])) mock3 = ptt.UniformTransformer( pd.DataFrame([["q1", "doc1", 15]], columns=["qid", "docno", "score"])) mock12a = mock1**mock2 mock123a = mock1**mock2**mock3 mock123b = mock12a**mock3 self.assertEqual(2, len(mock12a.models)) self.assertEqual(2, len(mock12a.models)) ptt.setup_rewrites() mock123_simple = mock123a.compile() self.assertIsNotNone(mock123_simple) self.assertEqual( "FeatureUnionPipeline(UniformTransformer(), UniformTransformer(), UniformTransformer())", mock123_simple.__repr__()) # #mock123a, mock123b self.assertEqual(3, len(mock123_simple.models)) for expression in [mock123_simple]: # we dont need an input, as both Identity transformers will return anyway rtr = (mock0 >> expression).transform(None) self.assertIsNotNone(rtr) self.assertEqual(1, len(rtr)) self.assertTrue("qid" in rtr.columns) self.assertTrue("docno" in rtr.columns) self.assertTrue("score" in rtr.columns) self.assertTrue("features" in rtr.columns) self.assertTrue("q1" in rtr["qid"].values) self.assertTrue("doc1" in rtr["docno"].values) import numpy as np self.assertTrue( np.array_equal(np.array([5, 10, 15]), rtr.iloc[0]["features"]))
def test_feature_union_multi(self): import pyterrier.transformer as ptt mock0 = pt.Transformer.from_df(pd.DataFrame( [["q1", "doc1", 0], ["q1", "doc2", 0]], columns=["qid", "docno", "score"]), uniform=True) mock1 = pt.Transformer.from_df(pd.DataFrame( [["q1", "doc1", 5], ["q1", "doc2", 0]], columns=["qid", "docno", "score"]), uniform=True) mock2 = pt.Transformer.from_df(pd.DataFrame( [["q1", "doc1", 10], ["q1", "doc2", 0]], columns=["qid", "docno", "score"]), uniform=True) mock3 = pt.Transformer.from_df(pd.DataFrame( [["q1", "doc1", 15], ["q1", "doc2", 0]], columns=["qid", "docno", "score"]), uniform=True) mock3_empty = pt.Transformer.from_df(pd.DataFrame( [], columns=["qid", "docno", "score"]), uniform=True) mock2_partial = pt.Transformer.from_df(pd.DataFrame( [["q1", "doc1", 10]], columns=["qid", "docno", "score"]), uniform=True) mock3_partial = pt.Transformer.from_df(pd.DataFrame( [["q1", "doc1", 15]], columns=["qid", "docno", "score"]), uniform=True) mock12a = mock1**mock2 mock123a = mock1**mock2**mock3 mock123b = mock12a**mock3 mock123a_manual = ptt.FeatureUnionPipeline( ptt.FeatureUnionPipeline(mock1, mock2), mock3) mock123b_manual = ptt.FeatureUnionPipeline( mock1, ptt.FeatureUnionPipeline(mock2, mock3), ) mock123e = ptt.FeatureUnionPipeline( mock1, ptt.FeatureUnionPipeline(mock2, mock3_empty), ) mock12e3 = ptt.FeatureUnionPipeline( mock1, ptt.FeatureUnionPipeline(mock3_empty, mock3), ) mock123p = ptt.FeatureUnionPipeline( mock1, ptt.FeatureUnionPipeline(mock2, mock3_partial), ) mock12p3 = ptt.FeatureUnionPipeline( mock1, ptt.FeatureUnionPipeline(mock2_partial, mock3), ) self.assertEqual(2, len(mock12a.models)) self.assertEqual(2, len(mock12a.models)) ptt.setup_rewrites() mock123_simple = mock123a.compile() self.assertIsNotNone(mock123_simple) self.assertEqual( "FeatureUnionPipeline(UniformTransformer(), UniformTransformer(), UniformTransformer())", mock123_simple.__repr__()) # # self.assertEqual(3, len(mock123_simple.models)) def _test_expression(expression): # we dont need an input, as both Identity transformers will return anyway rtr = (mock0 >> expression).transform(None) #print(rtr) self.assertIsNotNone(rtr) self.assertEqual(2, len(rtr)) self.assertTrue("qid" in rtr.columns) self.assertTrue("docno" in rtr.columns) self.assertFalse("features_x" in rtr.columns) self.assertFalse("features_y" in rtr.columns) self.assertTrue("features" in rtr.columns) self.assertTrue("q1" in rtr["qid"].values) self.assertTrue("doc1" in rtr["docno"].values) import numpy as np self.assertTrue( np.allclose(np.array([5, 10, 15]), rtr.iloc[0]["features"])) _test_expression(mock123_simple) _test_expression(mock123a) _test_expression(mock123b) _test_expression(mock123b) with self.assertRaises(ValueError): _test_expression(mock123e) with self.assertRaises(ValueError): _test_expression(mock12e3) with warnings.catch_warnings(record=True) as w: _test_expression(mock123p) assert "Got number of results" in str(w[-1].message) with warnings.catch_warnings(record=True) as w: _test_expression(mock12p3) assert "Got number of results" in str(w[-1].message)