def build_grammar(g4): """ compile the grammar for a specific file @param g4 grammar format antlr4 @return list of files The compilation must be done with `antlr4 <http://www.antlr.org/>`_. It generates a lexer and a parser which can be imported in Python. The options for the command line are described at: `antlr4 options <https://theantlrguy.atlassian.net/wiki/display/ANTLR4/Options>`_. """ if not g4.endswith(".g4"): g4 = g4 + ".g4" version = "4.4" url = "http://www.antlr.org/download/antlr-{0}-complete.jar".format(version) spl = url.split("/") domain,name = "/".join(spl[:-1])+"/", spl[-1] folder = os.path.abspath(os.path.dirname(__file__)) final = os.path.join(folder, name) if not os.path.exists(final): from ..resources.http_retrieve import download_data name = download_data(name, website=domain,whereTo=folder) print(name) if not os.path.exists(name): raise FileNotFoundError("unable to download: " + url) path = os.environ.get("CLASSPATH","") if name not in path: path = ".;{0}\antlr-{1}-complete.jar;%CLASSPATH%".format(folder,version) os.environ["CLASSPATH"] = path else: path = ".;{0}\antlr-{1}-complete.jar;%CLASSPATH%".format(folder,version) os.environ["CLASSPATH"] = os.environ["CLASSPATH"] + ";" + path cmd = "org.antlr.v4.Tool -Dlanguage=Python3 " + g4 from pyquickhelper import run_cmd out,err= run_cmd("java " + cmd, wait=True) if len(err)>0: javapath = r"C:\Program Files\Java\jre7\bin" os.environ["PATH"] = os.environ["PATH"] + ";" + javapath if sys.platform.startswith("win") and os.path.exists(javapath): cp = os.path.abspath(folder) out,err= run_cmd("java " + cmd, wait=True) if len(err)>0: raise Exception("unable to compile: " + final + "\nERR:\n" + err + "\nCMD:\njava " + cmd + "\nYou should do it manually.") else: raise Exception("unable to compile: " + final + "\nERR:\n" + err + "\nCMD:\njava " + cmd) return out
def runpy(self, line, cell=None): """ defines command ``%%runpy`` run a python script which accepts standards input and produces standard outputs, a timeout is set up at 10s .. versionadded:: 1.1 """ parser = self.get_parser(MagicFile.runpy_parser, "runpy") args = self.get_args(line, parser) if args is not None: filename = args.file if len(filename) == 0: self.runpy("") else: args = args.args cmd = sys.executable.replace( "pythonw", "python") + " " + filename + " " + args tosend = cell out, err = run_cmd( cmd, wait=True, sin=tosend, communicate=True, timeout=10, shell=False) if len(err) > 0: return HTML( '<font color="#DD0000">Error</font><br /><pre>\n%s\n</pre>' % err) else: return HTML('<pre>\n%s\n</pre>' % out)
def run_jython(pyfile, argv=None, jython_path=None, sin=None, timeout=None, fLOG=noLOG): """ runs a jython script and returns the standard output and error @param pyfile jython file @param argv arguments to sned to the command line @param jython_path path to jython standalone @param sin data to send to the standard input @param timeout timeout @param fLOG logging function @return out, err If *jython_path* is None, the function looks into this directory. """ if jython_path is None: jython_path = get_jython_jar() def clean(i, p): if i == 0: return p if '"' in p: p = p.replace('"', '\\"') if " " in p: p = '"{0}"'.format(p) return p cmd = [get_java_cmd(), "-jar", jython_path, pyfile] if argv is not None: cmd.extend(argv) cmd = " ".join(clean(i, _) for i, _ in enumerate(cmd)) out, err = run_cmd(cmd, wait=True, sin=sin, communicate=True, timeout=timeout, shell=False) return out, err
def test_flake8(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") thi = os.path.abspath(os.path.dirname(__file__)) src = os.path.normpath(os.path.join(thi, "..", "..", "src")) exe = os.path.dirname(sys.executable) scr = os.path.join(exe, "Scripts") fla = os.path.join(scr, "flake8") cmd = fla + " " + src out, err = run_cmd(cmd, fLOG=fLOG, wait=True) lines = out.split("\n") lines = [_ for _ in lines if "E501" not in _ and "__init__.py" not in _ and "E265" not in _ and "W291" not in _] lines = [_ for _ in lines if len(_) > 1] if __name__ == "__main__": for l in lines: spl = l.split(":") if len(spl[0]) == 1: spl[1] = ":".join(spl[0:2]) del spl[0] print( ' File "{0}", line {1}, {2}'.format(spl[0], spl[1], spl[-1])) if len(lines) > 0: raise Exception( "{0} lines\n{1}".format(len(lines), "\n".join(lines)))
def test_flake8(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") thi = os.path.abspath(os.path.dirname(__file__)) src = os.path.normpath(os.path.join(thi, "..", "..", "src")) exe = os.path.dirname(sys.executable) scr = os.path.join(exe, "Scripts") fla = os.path.join(scr, "flake8") cmd = fla + " " + src out, err = run_cmd(cmd, fLOG=fLOG, wait=True) lines = out.split("\n") lines = [_ for _ in lines if "E501" not in _ and "__init__.py" not in _ and "E265" not in _ and "W291" not in _ and "W293" not in _ and "pandas_helper" not in _] lines = [_ for _ in lines if len(_) > 1] if __name__ == "__main__": for l in lines: spl = l.split(":") if len(spl[0]) == 1: spl[1] = ":".join(spl[0:2]) del spl[0] print( ' File "{0}", line {1}, {2}'.format(spl[0], spl[1], spl[-1])) if len(lines) > 1: raise Exception( "{0} lines\n{1}".format(len(lines), "\n".join(lines)))
def is_java_installed(fLOG=noLOG): """ this function checks if java is installed @return boolean """ cmd = get_java_cmd() + " -showversion" out, err = run_cmd(cmd, wait=True, log_error=False) fLOG("OUT:\n", out) fLOG("ERR:\n", err) return "Java(TM)" in err
def _run_jython(self, cell, filename, func_name, args, true_jython=None): """ run a jython script @param cell content of the cell @param filename filename used to store the content of the cell @param func_name function name @param args list of arguments to run @param true_jython jython (True) or this Python (False) @return out, err """ with open(filename, 'r', encoding="utf8") as pyf: content = pyf.read() temp = filename.replace(".py", ".temp.py") with open(temp, "w", encoding="utf8") as pyf: pyf.write(""" # -*- coding: utf8 -*- if __name__ != '__lib__': def outputSchema(dont_care): def wrapper(func): def inner(*args, **kwargs): return func(*args, **kwargs) return inner return wrapper """.replace(" ", "")) pyf.write( content.replace( "except Exception,", "except Exception as ")) pyf.write(""" if __name__ != '__lib__': import sys for row in sys.stdin: row = row.strip() res = {0}(row) sys.stdout.write(str(res)) sys.stdout.write("\\n") sys.stdout.flush() """.format(func_name).replace(" ", "")) cmd = sys.executable.replace( "pythonw", "python") + " " + temp + " " + " ".join("{}".format(_) for _ in args) tosend = cell if true_jython: download_java_standalone() out, err = run_jython(temp, sin=cell, timeout=10) else: out, err = run_cmd( cmd, wait=True, sin=tosend, communicate=True, timeout=10, shell=False)
def run_dot(dot_file, outimg): """ calls graphivz on a dot file and produces an image @param dot_file file, format `DOT <http://www.graphviz.org/doc/info/lang.html>`_ @param outimg output image @return out, err (stdout, stderr from graphviz) .. versionadded:: 1.1 """ ext = os.path.splitext(outimg)[-1].strip(".") exe = dot_exe() cmd = "\"{0}\" -T{1} -o{2} {3}".format(exe, ext, outimg, dot_file) out, err = run_cmd(cmd, wait=True) if len(err) > 0: raise Exception("unable to run graphviz on {0}.\nCMD:\n{1}\nOUT:\n{2}\nERR:\n{3}".format( dot_file, cmd, out, err)) return out, err
def git_commit_all(local_folder, url_https, message, user=None, password=None, timeout=300, fLOG=noLOG): """ from a git repository, it requires `GIT <http://git-scm.com/>`_ to be installed and uses the command line. @param local_folder local folder of the project @param url_https url, example ``https://gitlab.server/folder/project_name`` @param message message for the commit @param user part 1 of the credentials @param password part 2 of the credentials @param timeout timeout for the command line @param fLOG logging function @return None If the reposity has already been cloned, it does not do it again. We assume that git can be run without giving its full location. The function executes the following commands:: cd [folder] git add -A git commit -m "[message]" git push -u origin master """ #url_user = git_url_user_password(url_https, user, password) cmds = """ cd {0} git add -A git commit -m "{1}" git push -u origin master """.format(local_folder, message).replace(" ", "").strip(" \n\r\t") cmd = cmds.replace("\n", "&") sin = "" # "{0}\n".format(password) out, err = run_cmd(cmd, sin=sin, wait=True, timeout=timeout, fLOG=fLOG) git_check_error(out, err, fLOG)
def test_script_pig(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") data = os.path.join( os.path.abspath( os.path.split(__file__)[0]), "data") pyth = """ import sys, datetime cols = [ _ for _ in sys.argv if ".py" not in _ ] for row in sys.stdin: js = eval(row) for station in js: vals = [ station[c] for c in cols ] sys.stdout.write(",".join(vals)) sys.stdout.write("\\n") sys.stdout.flush() """.replace(" ", "") fold = os.path.join(data, "..", "temp_pypig_out") if not os.path.exists(fold): os.mkdir(fold) pyfile = os.path.join(fold, "pystream.py") with open(pyfile, "w", encoding="utf8") as f: f.write(pyth) tosend = """[{'address': "52 RUE D'ENGHIEN / ANGLE RUE DU FAUBOURG POISSONIERE - 75010 PARIS", 'collect_date': datetime.datetime(2014, 11, 11, 22, 1, 18, 331070), 'lng': 2.348395236282807, 'contract_name': 'Paris', 'name': '10042 - POISSONNIÈRE - ENGHIEN', 'banking': 0, 'lat': 48.87242006305313, 'bonus': 0, 'status': 'OPEN', 'available_bikes': 32, 'last_update': datetime.datetime(2014, 11, 11, 21, 59, 5), 'number': 10042, 'available_bike_stands': 1, 'bike_stands': 33}]""" cmd = sys.executable.replace( "pythonw", "python") + " " + pyfile + " name" out, err = run_cmd(cmd, wait=True, sin=tosend, communicate=True, timeout=3, shell=False) fLOG("OUT\n", out) fLOG("ERR\n", err) assert len(out) > 0
def git_change_remote_origin(local_folder, url_https, user=None, password=None, add_fetch=False, timeout=10, fLOG=noLOG): """ Change the origin of the repository. The url and the password refer to the new repository. @param local_folder local folder @param url_https url, example ``https://gitlab.server/folder/project_name`` @param user part 1 of the credentials @param password part 2 of the credentials @param timeout timeout for the command line @param add_fetch add instruction ``fetch`` @param fLOG logging function @return something The function runs the instruction:: git remote remove origin git remote add origin url """ url_user = git_url_user_password(url_https, user, password) cmds = """ cd {0} git remote remove origin git remote add origin {1} """.format(local_folder, url_user).replace(" ", "").strip(" \n\r\t") if add_fetch: cmds += "\ngit fetch" cmd = cmds.replace("\n", "&") sin = "" # "{0}\n".format(password) out, err = run_cmd(cmd, sin=sin, wait=True, timeout=timeout, fLOG=fLOG) git_check_error(out, err, fLOG)
def build_grammar(g4, version="4.5", fLOG=noLOG): """ compile the grammar for a specific file @param g4 grammar format antlr4 @param version version of *antlr4* to use, 4.4, 4.5-rc-2 @param fLOG logging function @return list of files The compilation must be done with `antlr4 <http://www.antlr.org/>`_. It generates a lexer and a parser which can be imported in Python. The options for the command line are described at: `antlr4 options <https://theantlrguy.atlassian.net/wiki/display/ANTLR4/Options>`_. @example(Build a Antlr4 grammer) See `grammars-v4 <https://github.com/antlr/grammars-v4>`_ @code build_grammar("R.g4") @endcode @endexample """ if not g4.endswith(".g4"): fold = os.path.abspath(os.path.dirname(__file__)) g4 = os.path.join(fold, g4 + ".g4") url = "http://www.antlr.org/download/antlr-{0}-complete.jar".format( version) spl = url.split("/") domain, name = "/".join(spl[:-1]) + "/", spl[-1] folder = os.path.abspath(os.path.dirname(__file__)) final = os.path.join(folder, name) if not os.path.exists(final): from ..resources.http_retrieve import download_data name = download_data(name, website=domain, whereTo=folder) if not os.path.exists(name): raise FileNotFoundError("unable to download: " + url) path = os.environ.get("CLASSPATH", "") if name not in path: path = ".;{0}\\antlr-{1}-complete.jar".format(folder, version) else: path = ".;{0}\\antlr-{1}-complete.jar;{2}".format( folder, version, os.environ["CLASSPATH"]) os.environ["CLASSPATH"] = path fLOG("CLASSPATH", os.environ["CLASSPATH"]) # we remove -rc... version = version.split("-")[0] cmd = "org.antlr.v4.Tool -Dlanguage=Python3 " + g4 from pyquickhelper import run_cmd out, err = run_cmd("java " + cmd, wait=True, fLOG=fLOG) def compiled(): lexer = g4.replace(".g4", "Lexer.py") return os.path.exists(lexer) if not compiled() or (len(err) > 0 and "error" in err): javapath = r'C:\Program Files\Java\jre7\bin\java.exe' os.environ["PATH"] = os.environ["PATH"] + ";" + javapath if sys.platform.startswith("win") and os.path.exists(javapath): out, err = run_cmd( '"' + javapath + '" ' + cmd, wait=True, fLOG=fLOG) if not compiled() or (len(err) > 0 and "error" in err): raise Exception( "unable to compile: " + final + "\nCLASSPATH:\n" + os.environ["CLASSPATH"] + "\nERR:\n" + err + "\nCMD:\njava " + cmd + "\nYou should do it manually.") else: raise Exception( "unable to compile: " + final + "\nCLASSPATH:\n" + os.environ["CLASSPATH"] + "\nERR:\n" + err + "\nCMD:\njava " + cmd) return out + "\nERR:\n" + err
def git_clone(local_folder, url_https, user=None, password=None, timeout=60, init=True, fLOG=noLOG): """ clone a project from a git repository in a non empty local folder, it requires `GIT <http://git-scm.com/>`_ to be installed and uses the command line. @param local_folder local folder of the project @param url_https url, example ``https://gitlab.server/folder/project_name`` @param user part 1 of the credentials @param password part 2 of the credentials @param timeout timeout for the command line @param init see below (True, use fetch, False, use clone) @param fLOG logging function @return local_folder If the reposity has already been cloned, it does not do it again. We assume that git can be run without giving its full location. The function executes the following commands (if init is True):: cd [folder] git init git remote add origin [https://user.password@server/project.git] git fetch Otherwise, it does:: cd [folder] git clone origin [https://user.password@server/project.git] git fetch A folder will be created. @example(Automation___Clone many folders in one row) @code eleves = "project1;project2;..." root = r"destination" for el in eleves.split(";"): cl = el.lower().replace(".","-") fold = os.path.join(root, el) if not os.path.exists(fold): print("clone", el) url = "https://<gitlab>/<group>/{0}.git".format(cl) git_clone( fold, url,user=user,password=password, init=False,fLOG=print) @endcode @endexample """ url_user = git_url_user_password(url_https, user, password) timeout = 60 local_folder = os.path.normpath(os.path.abspath(local_folder)) if init: if not os.path.exists(local_folder): fLOG("creating folder", local_folder) os.mkdir(local_folder) hg = os.path.join(local_folder, ".git") if os.path.exists(hg): raise Exception("folder {0} should not exist".format(local_folder)) if not os.path.exists(hg): cmds = """ cd {0} git init git remote add origin {1} git fetch """.format(local_folder, url_user).replace(" ", "").strip(" \n\r\t") cmd = cmds.replace("\n", "&") sin = "" # "{0}\n".format(password) out, err = run_cmd(cmd, sin=sin, wait=True, timeout=timeout, fLOG=fLOG) git_check_error(out, err, fLOG) return local_folder else: if not os.path.exists(local_folder): fLOG("creating folder", local_folder) os.mkdir(local_folder) hg = os.path.join(local_folder, ".git") if os.path.exists(hg): raise Exception("folder {0} should not exist".format(local_folder)) final = os.path.split(url_user)[-1].replace(".git", "") locf = os.path.join(local_folder, final) if os.path.exists(locf): raise Exception( "folder {0} should not exists before cloning".format(locf)) cmds = """ cd {0} git clone {1} . """.format(local_folder, url_user).replace(" ", "").strip(" \n\r\t") cmd = cmds.replace("\n", "&") sin = "" # "{0}\n".format(password) out, err = run_cmd(cmd, sin=sin, wait=True, timeout=timeout, fLOG=fLOG) git_check_error(out, err, fLOG) return locf
def compile_cython_single_script(script, fLOG=noLOG): """ This function considers a script ``.pyx``, writes a the proper setup file, and compiles it. @param script filename @param fLOG logging function The function applies the steps described in the basic tutorial `The Basics of Cython <http://docs.cython.org/src/tutorial/cython_tutorial.html>`_. The function creates a ``setup.py`` in the same location and compiles it. @FAQ(Cython___Compiler une function Cython?) Cette fonction compile un script `Cython <http://cython.org/>`_. Cette extension permet d'implémenter des fonctions Python dans un pseudo-langage proche du `C <https://en.wikipedia.org/wiki/C_(programming_language)>`_. Il faut suivre les instructions décrite dans le tutorial `The Basics of Cython <http://docs.cython.org/src/tutorial/cython_tutorial.html>`_ pour réussir à utiliser une fonction codée en Cython. C'est ce que fait la fonction :func:`compile_cython_single_script`. Etant donné que la partie en pseudo C est compilée afin de la rendre beaucoup plus rapide, la partie la plus difficile est généralement celle qui consiste à faire en sorte que l'interpréteur Python trouve le <b>bon</b> compilateur. Ce compilateur est nécessairement le même que celui utilisé pour compiler Python et celui-ci change à chaque version. Voir `Compiling Python on Windows <https://docs.python.org/3/using/windows.html?highlight=visual%20studio#compiling-python-on-windows>`_ et faire attention à la version de Python que vous utilisez. @endFAQ """ ext = os.path.splitext(script)[-1] if ext != ".pyx": raise ValueError("no extension .pyx: " + script) if not os.path.exists(script): raise FileNotFoundError(script) setup_script = """ from distutils.core import setup from Cython.Build import cythonize setup( ext_modules = cythonize("{0}") ) """.replace(" ", "").format(os.path.split(script)[-1]) current, name = os.path.split(script) filename = os.path.join(os.path.dirname(script), name + ".setup.py") with open(filename, "w") as f: f.write(setup_script) cmd = sys.executable + " -u {0} build_ext --inplace".format(filename) out, err = run_cmd(cmd, wait=True, fLOG=fLOG, change_path=current) if len(err) > 0: raise CustomCythonError( "CMD:\n{0}\nOUT:\n{1}ERR:\n{2}".format(cmd, out, err)) return out
def run_pig(pigfile, argv=None, pig_path=None, hadoop_path=None, jython_path=None, timeout=None, logpath="logs", fLOG=noLOG): """ runs a pig script and returns the standard output and error @param pigfile pig file @param argv arguments to sned to the command line @param pig_path path to pig 0.XX.0 @param hadoop_path path to hadoop 2.5.2 @param timeout timeout @param logpath path to the logs @param fLOG logging function @return out, err If *pig_path* is None, the function looks into this directory. """ if pig_path is None: pig_path = os.path.join(get_pig_path(), "pig-0.14.0") if hadoop_path is None: hadoop_path = get_hadoop_path() java = get_java_path() if "JAVA_HOME" not in os.environ: os.environ["JAVA_HOME"] = java if "PIG_CONF_DIR" not in os.environ: os.environ["PIG_CONF_DIR"] = os.path.normpath( os.path.join( pig_path, "conf")) if not os.path.exists(os.environ["PIG_CONF_DIR"]): raise FileNotFoundError(os.environ["PIG_CONF_DIR"]) if "HADOOP_HOME" not in os.environ: os.environ["HADOOP_HOME"] = hadoop_path if not os.path.exists(os.environ["HADOOP_HOME"]): raise FileNotFoundError(os.environ["HADOOP_HOME"]) if "HADOOP_CLIENT_OPTS" not in os.environ: os.environ["HADOOP_CLIENT_OPTS"] = "-Xmx1024m" fLOG("PIG_CONF_DIR=", os.environ["PIG_CONF_DIR"]) def clean(i, p): if i == 0: return p if '"' in p: p = p.replace('"', '\\"') if " " in p: p = '"{0}"'.format(p) return p full = False jars = [] if full: jars.extend(get_pig_jars()) # + get_hadoop_jars() folds = set(os.path.split(j)[0] for j in jars) jars = [os.path.join(f, "*.jar") for f in folds] jars.append( os.path.join( hadoop_path, "hadoop-2.5.2", "share", "hadoop", "common", "lib", "*.jar")) jars.append( os.path.join( hadoop_path, "hadoop-2.5.2", "share", "hadoop", "hdfs", "lib", "*.jar")) jars.append( os.path.join( hadoop_path, "hadoop-2.5.2", "share", "hadoop", "mapreduce", "lib", "*.jar")) jars.append( os.path.join( hadoop_path, "hadoop-2.5.2", "share", "hadoop", "httpfs", "tomcat", "lib", "*.jar")) jars.append( os.path.join( hadoop_path, "hadoop-2.5.2", "share", "hadoop", "tools", "lib", "*.jar")) jars.append( os.path.join( hadoop_path, "hadoop-2.5.2", "share", "hadoop", "yarn", "lib", "*.jar")) jars.append( os.path.join( hadoop_path, "hadoop-2.5.2", "share", "hadoop", "common", "hadoop-common-2.5.2.jar")) jars.append( os.path.join( hadoop_path, "hadoop-2.5.2", "share", "hadoop", "common", "hadoop-nfs-2.5.2")) jars.append( os.path.join( hadoop_path, "hadoop-2.5.2", "share", "hadoop", "hdfs", "hadoop-hdfs-2.5.2.jar")) jars.append( os.path.join( hadoop_path, "hadoop-2.5.2", "share", "hadoop", "hdfs", "hadoop-hdfs-nfs-2.5.2.jar")) jars.append( os.path.join( hadoop_path, "hadoop-2.5.2", "share", "hadoop", "mapreduce", "*.jar")) jars.append( os.path.join( hadoop_path, "hadoop-2.5.2", "share", "hadoop", "yarn", "*.jar")) jars.append(os.path.join(pig_path, "pig-0.14.0-core-h1.jar")) else: jars.append( os.path.join( pig_path, "pig-0.14", "legacy", "pig-0.14.0-withouthadoop-h2.jar")) jarsall = [] for j in jars: r = glob.glob(j) jarsall.extend(r) jarsall.sort() for j in jarsall: fLOG(j) jars = ";".join(jars) fLOG("jars", jars) cmd = [get_java_cmd(), "-Xmx1024m", "-classpath", jars, "-Dpig.log.dir=" + logpath, "-Dhadoop.log.dir=" + logpath, "-Dhadoop.tmp.dir=" + logpath, "-Dpig.log.file=pid.log", "-Djava.io.tmpdir=" + logpath, "-Dpig.home.dir=" + pig_path, #"-Dpig.schematuple=true", #"-Dpig.schematuple.local.dir=" + logpath, "org.apache.pig.Main", "-x", "local", pigfile, "-stop_on_failure" ] cmd = " ".join(clean(i, _) for i, _ in enumerate(cmd)) out, err = run_cmd( cmd, wait=True, sin=None, communicate=True, timeout=timeout, shell=False) return out, err
def test_script_pig(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") if self.client is None: return data = os.path.join(os.path.abspath(os.path.split(__file__)[0]), "data") fold = os.path.join(data, "..", "temp_pypig_az") if not os.path.exists(fold): os.mkdir(fold) # python script pyth = """ import sys, datetime cols = [ _ for _ in sys.argv if ".py" not in _ ] for row in sys.stdin: row = row.strip() if len(row) == 0 : continue js = eval(row) for station in js: vals = [ str(station[c]).strip() for c in cols ] sys.stdout.write(",".join(vals)) sys.stdout.write("\\n") sys.stdout.flush() """.replace( " ", "" ) pyfile = os.path.join(fold, "pystream.py") with open(pyfile, "w", encoding="utf8") as f: f.write(pyth) tosend = """[{'address': "52 RUE D'ENGHIEN / ANGLE RUE DU FAUBOURG POISSONIERE - 75010 PARIS", 'collect_date': datetime.datetime(2014, 11, 11, 22, 1, 18, 331070), 'lng': 2.348395236282807, 'contract_name': 'Paris', 'name': '10042 - POISSONNIÈRE - ENGHIEN', 'banking': 0, 'lat': 48.87242006305313, 'bonus': 0, 'status': 'OPEN', 'available_bikes': 32, 'last_update': datetime.datetime(2014, 11, 11, 21, 59, 5), 'number': 10042, 'available_bike_stands': 1, 'bike_stands': 33},{'address': "52 RUE D'ENGHIEN / ANGLE RUE DU FAUBOURG POISSONIERE - 75010 PARIS", 'collect_date': datetime.datetime(2014, 11, 11, 22, 1, 18, 331070), 'lng': 2.348395236282807, 'contract_name': 'Paris', 'name': '10042 - POISSONNIÈRE - ENGHIEN', 'banking': 0, 'lat': 48.87242006305313, 'bonus': 0, 'status': 'OPEN', 'available_bikes': 32, 'last_update': datetime.datetime(2014, 11, 11, 21, 59, 5), 'number': 10042, 'available_bike_stands': 1, 'bike_stands': 33}]""" cmd = sys.executable.replace("pythonw", "python") + " " + pyfile + " name" out, err = run_cmd(cmd, wait=True, sin=tosend, communicate=True, timeout=3, shell=False) out = out.strip("\n\r ") spl = out.split("\n") if len(spl) != 2: raise Exception("len:{2}\nOUT:\n{0}\nERR:\n{1}".format(out, err, len(out))) # PIG script pig = """ DEFINE pystream `python pystream.py bonus available_bike_stands available_bikes lat lng name status` SHIP ('pystream.py') INPUT(stdin USING PigStreaming(',')) OUTPUT (stdout USING PigStreaming(',')); jspy = LOAD '$CONTAINER/$UTT/*.txt' USING PigStorage('\t') AS (arow:chararray); --DUMP jspy ; matrice = STREAM jspy THROUGH pystream AS ( bonus:chararray, available_bike_stands:double, available_bikes:double, lat:double, lng:double, name:chararray, status:chararray) ; DUMP matrice ; DESCRIBE jspy ; DESCRIBE matrice ; STORE matrice INTO '$CONTAINER/$PSEUDO/unittest2/results.txt' USING PigStorage('\t') ; """.replace( " ", "" ) pigfile = os.path.join(fold, "pystream.pig") with open(pigfile, "w", encoding="utf8") as f: f.write(pig) # we upload some files files = os.listdir(data) files = [os.path.join(data, _) for _ in files] files = [_ for _ in files if os.path.isfile(_) and "paris" in _] content = self.client.ls(self.blob_serv, self.container, "unittest2") if len(content) == 0: self.client.upload(self.blob_serv, self.container, "unittest2", files) if self.client.exists(self.blob_serv, self.container, "unittest2/results.txt"): self.client.delete_folder(self.blob_serv, self.container, "unittest2/results.txt") # we submit the job recall = None if recall is None: job = self.client.pig_submit( self.blob_serv, self.container, pigfile, dependencies=[pyfile], params=dict(UTT="unittest2") ) job_id = job["id"] else: job_id = recall status = self.client.wait_job(job_id, fLOG=fLOG) out, err = self.client.standard_outputs(status, self.blob_serv, self.container, fold) if "Total records written : 4" not in err: raise Exception("OUT:\n{0}\nERR:\n{1}".format(out, err)) dest = os.path.join(fold, "out_merged.txt") fLOG("dest=", dest) if os.path.exists(dest): os.remove(dest) self.client.download_merge(self.blob_serv, self.container, "$PSEUDO/unittest2/results.txt", dest) if not os.path.exists(dest): raise FileNotFoundError(dest) with open(dest, "r", encoding="utf8") as f: content = f.read() fLOG("-----\n", content) assert len(content.strip(" \n\r\t")) > 0