Esempio n. 1
0
    def test_then_dataframe(self):
        #this test is we can have DataFrame >> SOMETHINGELSE

        topicsSource = pd.DataFrame([["1", "AA"]], columns=["qid", "query"])

        def rewrite(topics):
            for index, row in topics.iterrows():
                row["query"] = row["query"] + " test"
            return topics

        fn1 = lambda topics: rewrite(topics)
        import pyterrier.transformer as ptt

        topics = pd.DataFrame([["1", "A"]], columns=["qid", "query"])
        rtr = pt.Transformer.from_df(topicsSource)(topics)
        self.assertEqual(1, len(rtr))
        self.assertTrue("query" in rtr.columns)
        self.assertTrue("qid" in rtr.columns)
        self.assertEqual(2, len(rtr.columns))
        self.assertEqual("AA", rtr.iloc[0]["query"])

        sequence1 = pt.Transformer.from_df(
            topicsSource) >> ptt.ApplyGenericTransformer(fn1)
        rtr = sequence1(topics)
        self.assertTrue("query" in rtr.columns)
        self.assertTrue("qid" in rtr.columns)
        self.assertEqual(2, len(rtr.columns))
        self.assertEqual(1, len(rtr))
        self.assertEqual("AA test", rtr.iloc[0]["query"])
Esempio n. 2
0
    def test_then(self):
        def rewrite(topics):
            for index, row in topics.iterrows():
                row["query"] = row["query"] + " test"
            return topics

        fn1 = lambda topics: rewrite(topics)
        fn2 = lambda topics: rewrite(topics)
        import pyterrier.transformer as ptt
        sequence1 = ptt.ApplyGenericTransformer(
            fn1) >> ptt.ApplyGenericTransformer(fn2)
        sequence2 = ptt.ApplyGenericTransformer(fn1) >> fn2
        sequence3 = ptt.ApplyGenericTransformer(fn1) >> rewrite
        sequence4 = fn1 >> ptt.ApplyGenericTransformer(fn2)
        sequence5 = rewrite >> ptt.ApplyGenericTransformer(fn2)

        for sequence in [
                sequence1, sequence2, sequence3, sequence4, sequence5
        ]:
            self.assertTrue(isinstance(sequence, ptt.TransformerBase))
            #check we can access items
            self.assertEqual(2, len(sequence))
            self.assertTrue(sequence[0], ptt.TransformerBase)
            self.assertTrue(sequence[1], ptt.TransformerBase)
            input = pd.DataFrame([["q1", "hello"]], columns=["qid", "query"])
            output = sequence.transform(input)
            self.assertEqual(1, len(output))
            self.assertEqual("q1", output.iloc[0]["qid"])
            self.assertEqual("hello test test", output.iloc[0]["query"])