def test_simple_pig(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") if is_travis_or_appveyor(): return download_java_standalone() assert is_java_installed() try: download_pig_standalone(fLOG=fLOG) except FileNotFoundError: # for some unknown reason, it requires to be done twice # due to FileNotFoundError: [Errno 2] No such file or directory: # 'pyensae\\src\\pyensae\\file_helper\\pigjar\\pig-0.15.0\\contrib\\piggybank\\java\\build\\classes\\org\\apache\\pig\\piggybank\\storage\\IndexedStorage$IndexedStorageInputFormat$IndexedStorageRecordReader$IndexedStorageRecordReaderComparator.class' download_pig_standalone(fLOG=fLOG) # it does not work for the time being return this = os.path.abspath(os.path.dirname(__file__)) temp = os.path.join(this, "temp_pig") if not os.path.exists(temp): os.mkdir(temp) rnd = os.path.join(temp, "random.sample.txt") with open(rnd, "w") as f: for i in range(0, 1000): x = random.random() f.write(str(x) + "\n") pg = os.path.normpath(os.path.join(temp, "..", "p1.pig")) tf = "file:/" + rnd.replace("\\", "/") with open(pg, "w", encoding="utf8") as f: f.write(''' values = LOAD '%s' USING PigStorage('\t') AS (x:double); values_h = FOREACH values GENERATE x, ((int)(x / 0.1)) * 0.1 AS h ; hist_group = GROUP values_h BY h ; hist = FOREACH hist_group GENERATE group, COUNT(values_h) AS nb ; STORE hist INTO '%s' USING PigStorage('\t') ; '''.replace(" ", "") % (tf, tf + ".out.txt")) out, err = run_pig(pg, fLOG=fLOG, logpath=temp) if "first try with pig" not in out: raise Exception("OUT:\n{0}\nERR:\n{1}".format(out, err))
def test_simple_pig(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") if "travis" in sys.executable: return download_java_standalone() assert is_java_installed() download_pig_standalone(fLOG=fLOG) # it does not work for the time being return this = os.path.abspath(os.path.dirname(__file__)) temp = os.path.join(this, "temp_pig") if not os.path.exists(temp): os.mkdir(temp) rnd = os.path.join(temp, "random.sample.txt") with open(rnd, "w") as f: for i in range(0, 1000): x = random.random() f.write(str(x) + "\n") pg = os.path.normpath(os.path.join(temp, "..", "p1.pig")) tf = "file:/" + rnd.replace("\\", "/") with open(pg, "w", encoding="utf8") as f: f.write(''' values = LOAD '%s' USING PigStorage('\t') AS (x:double); values_h = FOREACH values GENERATE x, ((int)(x / 0.1)) * 0.1 AS h ; hist_group = GROUP values_h BY h ; hist = FOREACH hist_group GENERATE group, COUNT(values_h) AS nb ; STORE hist INTO '%s' USING PigStorage('\t') ; '''.replace(" ", "") % (tf, tf + ".out.txt")) out, err = run_pig(pg, fLOG=fLOG, logpath=temp) if "first try with pig" not in out: raise Exception("OUT:\n{0}\nERR:\n{1}".format(out, err))