Exemple #1
0
 def testSubset(self):
     args = [
         "n=3",
         "reducers=1",
         "input=" + self.INPUT_FILE,
         "output=top_3_queries",
     ]
     proxy = PigProxy.from_file(self.PIG_SCRIPT, args)
     input_data = [
         "yahoo\t10",
         "twitter\t7",
         "facebook\t10",
         "yahoo\t15",
         "facebook\t5",
         "a\t1",
         "b\t2",
         "c\t3",
         "d\t4",
         "e\t5",
     ]
     output = [
         "(yahoo,25)",
         "(facebook,15)",
         "(twitter,7)",
     ]
     proxy.override_to_data("data", input_data)
     self.assertOutput(proxy, "queries_limit", output)
Exemple #2
0
 def testSubset(self):
     args = [
         "n=3",
         "reducers=1",
         "input=" + self.INPUT_FILE,
         "output=top_3_queries",
         ]
     proxy = PigProxy.from_file(self.PIG_SCRIPT, args)
     input_data = [
         "yahoo\t10",
         "twitter\t7",
         "facebook\t10",
         "yahoo\t15",
         "facebook\t5",
         "a\t1",
         "b\t2",
         "c\t3",
         "d\t4",
         "e\t5",
         ]
     output = [
         "(yahoo,25)",
         "(facebook,15)",
         "(twitter,7)",
         ]
     proxy.override_to_data("data", input_data)
     self.assertOutput(proxy, "queries_limit", output);
Exemple #3
0
 def testLastStoreName(self):
     args = [
         "n=3",
         "reducers=1",
         "input=" + self.INPUT_FILE,
         "output=top_3_queries",
     ]
     proxy = PigProxy.from_file(self.PIG_SCRIPT, args)
     self.assertEqual("queries_limit", proxy.last_stored_alias_name())
Exemple #4
0
 def testArgFiles(self):
     argsFile = ["tests/data/top_queries_params.txt"]
     proxy = PigProxy.from_file(self.PIG_SCRIPT, arg_files=argsFile)
     output = [
         "(yahoo,25)",
         "(facebook,15)",
         "(twitter,7)",
     ]
     self.assertOutput(proxy, "queries_limit", output)
Exemple #5
0
 def testLastStoreName(self):
     args = [
         "n=3",
         "reducers=1",
         "input=" + self.INPUT_FILE,
         "output=top_3_queries",
         ]
     proxy = PigProxy.from_file(self.PIG_SCRIPT, args)
     self.assertEqual("queries_limit", proxy.last_stored_alias_name())
Exemple #6
0
 def testSchemaFor(self):
     args = [
         "n=3",
         "reducers=1",
         "input=" + self.INPUT_FILE,
         "output=top_3_queries",
     ]
     proxy = PigProxy.from_file(self.PIG_SCRIPT, args)
     schema = proxy.schemaFor('queries_sum')
     self.assertEqual(schema, '(query: chararray,count: long)')
Exemple #7
0
 def testSchemaFor(self):
     args = [
         "n=3",
         "reducers=1",
         "input=" + self.INPUT_FILE,
         "output=top_3_queries",
         ]
     proxy = PigProxy.from_file(self.PIG_SCRIPT, args)
     schema = proxy.schemaFor('queries_sum')
     self.assertEqual(schema, '(query: chararray,count: long)')
Exemple #8
0
 def testArgFiles(self):
     argsFile = [
         "tests/data/top_queries_params.txt"
         ]
     proxy = PigProxy.from_file(self.PIG_SCRIPT, arg_files = argsFile)
     output = [
         "(yahoo,25)",
         "(facebook,15)",
         "(twitter,7)",
         ]
     self.assertOutput(proxy, "queries_limit", output)
Exemple #9
0
 def testImplicitNtoN(self):
     args = [
         "n=3",
         "reducers=1",
         "input=" + self.INPUT_FILE,
         "output=top_3_queries",
     ]
     proxy = PigProxy.from_file(self.PIG_SCRIPT, args)
     output = [
         "(yahoo,25)",
         "(facebook,15)",
         "(twitter,7)",
     ]
     self.assertLastOutput(proxy, output)
Exemple #10
0
 def testImplicitNtoN(self):
     args = [
         "n=3",
         "reducers=1",
         "input=" + self.INPUT_FILE,
         "output=top_3_queries",
         ]
     proxy = PigProxy.from_file(self.PIG_SCRIPT, args)        
     output = [
         "(yahoo,25)",
         "(facebook,15)",
         "(twitter,7)",
         ]
     self.assertLastOutput(proxy, output)
Exemple #11
0
 def testOverride(self):
     args = [
         "n=3",
         "reducers=1",
         "input=" + self.INPUT_FILE,
         "output=top_3_queries",
         ]
     proxy = PigProxy.from_file(self.PIG_SCRIPT, args)
     proxy.override("queries_limit", "queries_limit = LIMIT queries_ordered 2;");
     output = [
         "(yahoo,25)",
         "(facebook,15)",
         ]
     self.assertLastOutput(proxy, output);
Exemple #12
0
 def testOverride(self):
     args = [
         "n=3",
         "reducers=1",
         "input=" + self.INPUT_FILE,
         "output=top_3_queries",
     ]
     proxy = PigProxy.from_file(self.PIG_SCRIPT, args)
     proxy.override("queries_limit",
                    "queries_limit = LIMIT queries_ordered 2;")
     output = [
         "(yahoo,25)",
         "(facebook,15)",
     ]
     self.assertLastOutput(proxy, output)
Exemple #13
0
    def testStore(self):
        from tempfile import mktemp
        tempdir = mktemp()
        outfile = tempdir + '/top_3_queries'
        args = [
            "n=3",
            "reducers=1",
            "input=" + self.INPUT_FILE,
            "output=" + outfile,
        ]
        proxy = PigProxy.from_file(self.PIG_SCRIPT, args)

        # By default all STORE and DUMP commands are removed
        proxy.unoverride("STORE")
        proxy.run_script()
        cluster = Cluster(proxy.pig.getPigContext())
        self.assert_(cluster.delete(Path(outfile)))
Exemple #14
0
    def testStore(self):
        from tempfile import mktemp
        tempdir = mktemp()
        outfile = tempdir + '/top_3_queries'
        args = [
            "n=3",
            "reducers=1",
            "input=" + self.INPUT_FILE,
            "output=" + outfile,
            ]
        proxy = PigProxy.from_file(self.PIG_SCRIPT, args)

        # By default all STORE and DUMP commands are removed
        proxy.unoverride("STORE")
        proxy.run_script()
        cluster = Cluster(proxy.pig.getPigContext())
        self.assert_(cluster.delete(Path(outfile)))
Exemple #15
0
    def testOverrideToData_SupportsNone(self):
        """over_to_data() w/None value results in Null value being loaded"""
        args = [
            "n=3",
            "reducers=1",
            "input=" + self.INPUT_FILE,
            "output=top_3_queries",
            ]
        proxy = PigProxy.from_file(self.PIG_SCRIPT, args)

        new_data = [
            (None, 3),
            (None, 4),
            ]
        proxy.override_to_data("data", new_data)
        proxy.override("queries_limit", "queries_limit = FILTER data BY query IS NOT NULL");
        result_records = list(proxy.get_alias("queries_limit"))
Exemple #16
0
    def testOverrideToData_SupportsNone(self):
        """over_to_data() w/None value results in Null value being loaded"""
        args = [
            "n=3",
            "reducers=1",
            "input=" + self.INPUT_FILE,
            "output=top_3_queries",
        ]
        proxy = PigProxy.from_file(self.PIG_SCRIPT, args)

        new_data = [
            (None, 3),
            (None, 4),
        ]
        proxy.override_to_data("data", new_data)
        proxy.override("queries_limit",
                       "queries_limit = FILTER data BY query IS NOT NULL")
        result_records = list(proxy.get_alias("queries_limit"))