def test_then_dataframe(self): #this test is we can have DataFrame >> SOMETHINGELSE topicsSource = pd.DataFrame([["1", "AA"]], columns=["qid", "query"]) def rewrite(topics): for index, row in topics.iterrows(): row["query"] = row["query"] + " test" return topics fn1 = lambda topics: rewrite(topics) import pyterrier.transformer as ptt topics = pd.DataFrame([["1", "A"]], columns=["qid", "query"]) rtr = pt.Transformer.from_df(topicsSource)(topics) self.assertEqual(1, len(rtr)) self.assertTrue("query" in rtr.columns) self.assertTrue("qid" in rtr.columns) self.assertEqual(2, len(rtr.columns)) self.assertEqual("AA", rtr.iloc[0]["query"]) sequence1 = pt.Transformer.from_df( topicsSource) >> ptt.ApplyGenericTransformer(fn1) rtr = sequence1(topics) self.assertTrue("query" in rtr.columns) self.assertTrue("qid" in rtr.columns) self.assertEqual(2, len(rtr.columns)) self.assertEqual(1, len(rtr)) self.assertEqual("AA test", rtr.iloc[0]["query"])
def test_then(self): def rewrite(topics): for index, row in topics.iterrows(): row["query"] = row["query"] + " test" return topics fn1 = lambda topics: rewrite(topics) fn2 = lambda topics: rewrite(topics) import pyterrier.transformer as ptt sequence1 = ptt.ApplyGenericTransformer( fn1) >> ptt.ApplyGenericTransformer(fn2) sequence2 = ptt.ApplyGenericTransformer(fn1) >> fn2 sequence3 = ptt.ApplyGenericTransformer(fn1) >> rewrite sequence4 = fn1 >> ptt.ApplyGenericTransformer(fn2) sequence5 = rewrite >> ptt.ApplyGenericTransformer(fn2) for sequence in [ sequence1, sequence2, sequence3, sequence4, sequence5 ]: self.assertTrue(isinstance(sequence, ptt.TransformerBase)) #check we can access items self.assertEqual(2, len(sequence)) self.assertTrue(sequence[0], ptt.TransformerBase) self.assertTrue(sequence[1], ptt.TransformerBase) input = pd.DataFrame([["q1", "hello"]], columns=["qid", "query"]) output = sequence.transform(input) self.assertEqual(1, len(output)) self.assertEqual("q1", output.iloc[0]["qid"]) self.assertEqual("hello test test", output.iloc[0]["query"])