def testSubset(self): args = [ "n=3", "reducers=1", "input=" + self.INPUT_FILE, "output=top_3_queries", ] proxy = PigProxy.from_file(self.PIG_SCRIPT, args) input_data = [ "yahoo\t10", "twitter\t7", "facebook\t10", "yahoo\t15", "facebook\t5", "a\t1", "b\t2", "c\t3", "d\t4", "e\t5", ] output = [ "(yahoo,25)", "(facebook,15)", "(twitter,7)", ] proxy.override_to_data("data", input_data) self.assertOutput(proxy, "queries_limit", output)
def testSubset(self): args = [ "n=3", "reducers=1", "input=" + self.INPUT_FILE, "output=top_3_queries", ] proxy = PigProxy.from_file(self.PIG_SCRIPT, args) input_data = [ "yahoo\t10", "twitter\t7", "facebook\t10", "yahoo\t15", "facebook\t5", "a\t1", "b\t2", "c\t3", "d\t4", "e\t5", ] output = [ "(yahoo,25)", "(facebook,15)", "(twitter,7)", ] proxy.override_to_data("data", input_data) self.assertOutput(proxy, "queries_limit", output);
def testLastStoreName(self): args = [ "n=3", "reducers=1", "input=" + self.INPUT_FILE, "output=top_3_queries", ] proxy = PigProxy.from_file(self.PIG_SCRIPT, args) self.assertEqual("queries_limit", proxy.last_stored_alias_name())
def testArgFiles(self): argsFile = ["tests/data/top_queries_params.txt"] proxy = PigProxy.from_file(self.PIG_SCRIPT, arg_files=argsFile) output = [ "(yahoo,25)", "(facebook,15)", "(twitter,7)", ] self.assertOutput(proxy, "queries_limit", output)
def testLastStoreName(self): args = [ "n=3", "reducers=1", "input=" + self.INPUT_FILE, "output=top_3_queries", ] proxy = PigProxy.from_file(self.PIG_SCRIPT, args) self.assertEqual("queries_limit", proxy.last_stored_alias_name())
def testSchemaFor(self): args = [ "n=3", "reducers=1", "input=" + self.INPUT_FILE, "output=top_3_queries", ] proxy = PigProxy.from_file(self.PIG_SCRIPT, args) schema = proxy.schemaFor('queries_sum') self.assertEqual(schema, '(query: chararray,count: long)')
def testSchemaFor(self): args = [ "n=3", "reducers=1", "input=" + self.INPUT_FILE, "output=top_3_queries", ] proxy = PigProxy.from_file(self.PIG_SCRIPT, args) schema = proxy.schemaFor('queries_sum') self.assertEqual(schema, '(query: chararray,count: long)')
def testArgFiles(self): argsFile = [ "tests/data/top_queries_params.txt" ] proxy = PigProxy.from_file(self.PIG_SCRIPT, arg_files = argsFile) output = [ "(yahoo,25)", "(facebook,15)", "(twitter,7)", ] self.assertOutput(proxy, "queries_limit", output)
def testImplicitNtoN(self): args = [ "n=3", "reducers=1", "input=" + self.INPUT_FILE, "output=top_3_queries", ] proxy = PigProxy.from_file(self.PIG_SCRIPT, args) output = [ "(yahoo,25)", "(facebook,15)", "(twitter,7)", ] self.assertLastOutput(proxy, output)
def testImplicitNtoN(self): args = [ "n=3", "reducers=1", "input=" + self.INPUT_FILE, "output=top_3_queries", ] proxy = PigProxy.from_file(self.PIG_SCRIPT, args) output = [ "(yahoo,25)", "(facebook,15)", "(twitter,7)", ] self.assertLastOutput(proxy, output)
def testOverride(self): args = [ "n=3", "reducers=1", "input=" + self.INPUT_FILE, "output=top_3_queries", ] proxy = PigProxy.from_file(self.PIG_SCRIPT, args) proxy.override("queries_limit", "queries_limit = LIMIT queries_ordered 2;"); output = [ "(yahoo,25)", "(facebook,15)", ] self.assertLastOutput(proxy, output);
def testOverride(self): args = [ "n=3", "reducers=1", "input=" + self.INPUT_FILE, "output=top_3_queries", ] proxy = PigProxy.from_file(self.PIG_SCRIPT, args) proxy.override("queries_limit", "queries_limit = LIMIT queries_ordered 2;") output = [ "(yahoo,25)", "(facebook,15)", ] self.assertLastOutput(proxy, output)
def testStore(self): from tempfile import mktemp tempdir = mktemp() outfile = tempdir + '/top_3_queries' args = [ "n=3", "reducers=1", "input=" + self.INPUT_FILE, "output=" + outfile, ] proxy = PigProxy.from_file(self.PIG_SCRIPT, args) # By default all STORE and DUMP commands are removed proxy.unoverride("STORE") proxy.run_script() cluster = Cluster(proxy.pig.getPigContext()) self.assert_(cluster.delete(Path(outfile)))
def testStore(self): from tempfile import mktemp tempdir = mktemp() outfile = tempdir + '/top_3_queries' args = [ "n=3", "reducers=1", "input=" + self.INPUT_FILE, "output=" + outfile, ] proxy = PigProxy.from_file(self.PIG_SCRIPT, args) # By default all STORE and DUMP commands are removed proxy.unoverride("STORE") proxy.run_script() cluster = Cluster(proxy.pig.getPigContext()) self.assert_(cluster.delete(Path(outfile)))
def testOverrideToData_SupportsNone(self): """over_to_data() w/None value results in Null value being loaded""" args = [ "n=3", "reducers=1", "input=" + self.INPUT_FILE, "output=top_3_queries", ] proxy = PigProxy.from_file(self.PIG_SCRIPT, args) new_data = [ (None, 3), (None, 4), ] proxy.override_to_data("data", new_data) proxy.override("queries_limit", "queries_limit = FILTER data BY query IS NOT NULL"); result_records = list(proxy.get_alias("queries_limit"))
def testOverrideToData_SupportsNone(self): """over_to_data() w/None value results in Null value being loaded""" args = [ "n=3", "reducers=1", "input=" + self.INPUT_FILE, "output=top_3_queries", ] proxy = PigProxy.from_file(self.PIG_SCRIPT, args) new_data = [ (None, 3), (None, 4), ] proxy.override_to_data("data", new_data) proxy.override("queries_limit", "queries_limit = FILTER data BY query IS NOT NULL") result_records = list(proxy.get_alias("queries_limit"))