def run(self): from pyquickhelper.loghelper import run_cmd parameters = self.get_parameters() location = parameters['file_or_folder'] folder = os.path.join(location, "_doc", "notebooks") if not os.path.exists(folder): folder = location cmd = [ "jupyter-notebook", "--notebook-dir=%s" % folder, "--NotebookApp.token=", "--NotebookApp.password="******" ".join(cmd), wait=True, fLOG=print, communicate=False)
def plot_pipeline(dot=None, pipeline=None, dataframe=None, name='pipeline'): dot_file = name + "_graph.dot" if dot == None: dot = pipeline2dot(pipeline, dataframe) with open(dot_file, "w", encoding="utf-8") as f: f.write(dot) if sys.platform.startswith("win") and "Graphviz" not in os.environ["PATH"]: os.environ['PATH'] = os.environ[ 'PATH'] + r';C:\Program Files (x86)\Graphviz2.38\bin' cmd = "dot -G=300 -Tpng {0} -o{0}.png".format(dot_file) run_cmd(cmd, wait=True, fLOG=print)
def build_machinelearningext(version="Release"): "Builds the module machinelearningext." from pyquickhelper.loghelper import run_cmd env = os.environ.get('DOTNET_CLI_TELEMETRY_OPTOUT', None) if env is None: os.environ['DOTNET_CLI_TELEMETRY_OPTOUT'] = '1' print('[csharpyml.env] DOTNET_CLI_TELEMETRY_OPTOUT={0}'.format( os.environ['DOTNET_CLI_TELEMETRY_OPTOUT'])) # builds the other libraries cmds = [ 'dotnet restore machinelearningext.sln', 'dotnet build -c %s machinelearningext.sln' % version ] folder = os.path.abspath("cscode") folder = os.path.join("machinelearningext", "machinelearningext") outs = [] for cmd in cmds: out, err = run_cmd(cmd, fLOG=print, wait=True, change_path=folder) if len(err) > 0: raise RuntimeError( "Unable to compile C# code.\nCMD: {0}\n--ERR--\n{1}".format( cmd, err)) elif len(out) > 0: outs.append(out) print('[csharpyml.dotnet] OUT') print(out) # Copy specific files. copy_assemblies(version=version)
def run_graphviz(filename, image, engine="dot"): """ Run :epkg:`GraphViz`. @param filename filename which contains the graph definition @param image output image @param engine *dot* or *neato* @return output of graphviz """ ext = os.path.splitext(image)[-1] if ext != ".png": raise Exception("extension should be .png not " + str(ext)) if sys.platform.startswith("win"): bin_ = os.path.dirname(find_graphviz_dot()) # if bin not in os.environ["PATH"]: # os.environ["PATH"] = os.environ["PATH"] + ";" + bin cmd = '"{0}\\{3}" -Tpng "{1}" -o "{2}"'.format( bin_, filename, image, engine) else: cmd = '"{0}" -Tpng "{1}" -o "{2}"'.format(engine, filename, image) out, err = run_cmd(cmd, wait=True) if len(err) > 0: raise Exception( "unable to run Graphviz\nCMD:\n{0}\nOUT:\n{1}\nERR:\n{2}".format(cmd, out, err)) return out
def test_reap_children(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") if sys.platform.startswith("win"): cmd = "pause" else: cmd = 'ls -la | less' temp = get_temp_folder(__file__, "temp_reap_children") clog = CustomLog(temp) proc, _ = run_cmd(cmd, wait=False, fLOG=clog) self.assertTrue(_ is None) clog('proc={} pid={}'.format(proc, proc.pid)) ki = reap_children(fLOG=clog, subset={proc.pid}) clog('ki={0}'.format(ki)) if ki is None and not is_travis_or_appveyor( ) and __name__ != '__main__': warnings.warn( "reap_children could not be fully tested ki is None.") return self.assertTrue(ki is not None) self.assertEqual(len(ki), 1) # fLOG(ki) # To avoid a warning. proc.returncode = 0
def test_exe(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") command = "dir" if sys.platform.startswith("win32") else "ls" yml = """ language: python python: - {{Python35}} before_script: - %s after_script: - %s {{PLATFORM}} script: - %s """.replace(" ", "") % (command, command, command) temp = get_temp_folder(__file__, "temp_yaml_exe") context = dict(Python34="fake", Python35=os.path.dirname(sys.executable), Python27=None, Anaconda3=None, Anaconda2=None, WinPython35=None, project_name="pyquickhelper", root_path="ROOT", PLATFORM="win32") obj, name = load_yaml(yml, context=context) self.assertTrue(name is not None) res = list(enumerate_convert_yaml_into_instructions( obj, variables=context)) for r, var in res: conv = convert_sequence_into_batch_file(r, variables=var) if ("%s " % command) not in conv: raise Exception("{0}\n--\n{1}".format(command, conv)) fLOG("####", conv) ext = "bat" if command == "dir" else "sh" name = os.path.join(temp, "yml.%s" % ext) with open(name, "w") as f: f.write(conv) if is_travis_or_appveyor() == "__travis": # linux, unable to test TestYamlExe.test_exe. pass else: if sys.platform.startswith("win"): cmd = name else: cmd = "bash " + name try: out, err = run_cmd(cmd, wait=True) except PermissionError as e: raise Exception( "Unable to execute '{0}' which contains\n{1}".format(name, conv)) from e fLOG("###") fLOG(out) if "BEFORE_SCRIPT" not in out: raise Exception( "{0}\nERR\n{2}\n#########\n{1}".format(out, conv, err)) if "AFTER_SCRIPT" not in out: raise Exception( "{0}\nERR\n{2}\n#########\n{1}".format(out, conv, err)) if "SCRIPT" not in out: raise Exception( "{0}\nERR\n{2}\n#########\n{1}".format(out, conv, err))
def convert(pathname, verbose=0): """ Converts into ONNX. """ # tflite lite = os.path.join(pathname, 'model.lite') if False and not os.path.exists(lite): import tensorflow.lite as tfl if verbose: print('[convert] to lite %r.' % pathname) converter = tfl.TFLiteConverter.from_saved_model(pathname) try: tflite_model = converter.convert() except Exception as e: print('[convert] lite failed due to %r.' % e) tflite_model = None if tflite_model is not None: if verbose: print('[convert] save lite %r.' % pathname) with open(lite, "wb") as f: f.write(tflite_model) # node list if verbose: res = get_node_list(pathname, verbose=verbose) print('[convert] node types: %r' % res) # onnx output = os.path.join(pathname, "model.onnx") lastname = pathname.replace("\\", "/").split('/')[-1] inputs = None if lastname not in MODELS: raise ValueError("Unknown model %r." % lastname) model = MODELS[lastname] inputs = model['inputs'] outputs = model['outputs'] tag = model['tag'] sig = model['signature_def'] onnx_inputs = model['onnx_inputs'] if inputs is None: if verbose: print('[convert] to ONNX %r.' % pathname) raise NotImplementedError("Unable to convert %r." % lastname) inputs = ",".join(inputs) outputs = ",".join(outputs) if not os.path.exists(output): def noprint(*args): pass cmd = [ "python", "-m", "tf2onnx.convert", "--saved-model", pathname, "--output", output, "--inputs", inputs, "--outputs", outputs, '--tag', tag, '--signature_def', sig ] out, err = run_cmd(" ".join(cmd), wait=True, fLOG=print if verbose else noprint, shell=True) return pathname, output, inputs, outputs, onnx_inputs
def run_graphviz(filename, image, engine="dot"): """ Run :epkg:`GraphViz`. @param filename filename which contains the graph definition @param image output image @param engine *dot* or *neato* @return output of graphviz """ ext = os.path.splitext(image)[-1] if ext != ".png": raise Exception("extension should be .png not " + str(ext)) if sys.platform.startswith("win"): bin_ = os.path.dirname(find_graphviz_dot()) # if bin not in os.environ["PATH"]: # os.environ["PATH"] = os.environ["PATH"] + ";" + bin cmd = '"{0}\\{3}" -Tpng "{1}" -o "{2}"'.format(bin_, filename, image, engine) else: cmd = '"{0}" -Tpng "{1}" -o "{2}"'.format(engine, filename, image) out, err = run_cmd(cmd, wait=True) if len(err) > 0: raise Exception( "Unable to run Graphviz\nCMD:\n{0}\nOUT:\n{1}\nERR:\n{2}".format( cmd, out, err)) return out
def git_change_remote_origin(local_folder, url_https, user=None, password=None, add_fetch=False, timeout=10, fLOG=noLOG): """ Changes the origin of the repository. The url and the password refer to the new repository. @param local_folder local folder @param url_https url, example ``https://gitlab.server/folder/project_name`` @param user part 1 of the credentials @param password part 2 of the credentials @param timeout timeout for the command line @param add_fetch add instruction ``fetch`` @param fLOG logging function @return something The function runs the instruction:: git remote remove origin git remote add origin url """ url_user = git_url_user_password(url_https, user, password) cmds = """ cd {0} git remote remove origin git remote add origin {1} """.format(local_folder, url_user).replace(" ", "").strip(" \n\r\t") if add_fetch: cmds += "\ngit fetch" cmd = cmds.replace("\n", "&") sin = "" # "{0}\n".format(password) out, err = run_cmd(cmd, sin=sin, wait=True, timeout=timeout, fLOG=fLOG) git_check_error(out, err, fLOG)
def build_module(version="Release"): "build the module" # git submodule add https://github.com/dotnet/machinelearning.git cscode/machinelearning # We build a dotnet application. from pyquickhelper.loghelper import run_cmd env = os.environ.get('DOTNET_CLI_TELEMETRY_OPTOUT', None) if env is None: os.environ['DOTNET_CLI_TELEMETRY_OPTOUT'] = '1' print('[csharpyml.env] DOTNET_CLI_TELEMETRY_OPTOUT={0}'.format( os.environ['DOTNET_CLI_TELEMETRY_OPTOUT'])) # builds the other libraries cmds = [ 'dotnet restore CSharPyMLExtension_netcore.sln', 'dotnet build -c %s CSharPyMLExtension_netcore.sln' % version ] folder = os.path.abspath("cscode") outs = [] for cmd in cmds: out, err = run_cmd(cmd, fLOG=print, wait=True, change_path=folder) if len(err) > 0: raise RuntimeError( "Unable to compile C# code.\nCMD: {0}\n--ERR--\n{1}".format( cmd, err)) elif len(out) > 0: outs.append(out) print('[csharpyml.dotnet] OUT') print(out) # Copy specific files. copy_assemblies(version=version)
def run_benchmark(runtime, srcdir, logger, skip, white_list=None): filenames = [] skls = sklearn_operators(extended=True) skls = [_['name'] for _ in skls] if white_list: skls = [_ for _ in skls if _ in white_list] skls.sort() pbar = tqdm(skls) for op in pbar: if skip is not None and op in skip: continue pbar.set_description("[%s]" % (op + " " * (25 - len(op)))) out_raw = os.path.join(srcdir, "bench_raw_%s_%s.csv" % (runtime, op)) out_sum = os.path.join(srcdir, "bench_sum_%s_%s.csv" % (runtime, op)) cmd = ('{0} -m mlprodict validate_runtime --verbose=0 --out_raw={1} --out_summary={2} ' '--benchmark=1 --dump_folder={3} --runtime={4} --models={5}'.format( get_interpreter_path(), out_raw, out_sum, srcdir, runtime, op)) logger.info("[mlprodict] cmd '{}'.".format(cmd)) out, err = run_cmd(cmd, wait=True, fLOG=None) if not os.path.exists(out_sum): logger.warning("[mlprodict] unable to find '{}'.".format(out_sum)) print("[mlprodict-sphinx] cmd '{}'".format(cmd)) print("[mlprodict-sphinx] unable to find '{}'".format(out_sum)) msg = "Unable to find '{}'\n--CMD--\n{}\n--OUT--\n{}\n--ERR--\n{}".format( out_sum, cmd, out, err) print(msg) rows = [{'name': op, 'scenario': 'CRASH', 'ERROR-msg': msg.replace("\n", " -- ")}] df = DataFrame(rows) df.to_csv(out_sum, index=False) filenames.append((out_raw, out_sum)) return filenames
def is_java_installed(fLOG=noLOG): """ Checks if :epkg:`java` is installed. @return boolean """ if sys.platform.startswith("win"): cmd = get_java_cmd() + " -showversion" out, err = run_cmd(cmd, wait=True, log_error=False) fLOG("OUT:\n", out) fLOG("ERR:\n", err) return "Java(TM)" in err else: cmd = get_java_cmd() + " -showversion" out, err = run_cmd(cmd, wait=True, log_error=False) fLOG("OUT:\n", out) fLOG("ERR:\n", err) return "OpenJDK Runtime Environment" in err
def test_notebook_runner_ml_huge(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") if is_travis_or_appveyor() == "appveyor": # pytables has an issue # return pass if sys.platform.startswith("win"): import numpy import tables foldn = os.path.abspath(os.path.dirname(numpy.__file__)) foldt = os.path.normpath(os.path.dirname(tables.__file__)) rootn = os.path.dirname(foldn) roott = os.path.dirname(foldt) if rootn != roott: pp = os.environ.get('PYTHONPATH', '') if "SECONDTRY" in pp: raise Exception( "Infinite loog\n{0}\n{1}\n**EXE\n{2}\n**PP\n{3}\n****".format(rootn, roott, sys.executable, pp)) # We need to run this file with the main python. # Otherwise it fails for tables: DLL load failed. exe = os.path.normpath(os.path.join( rootn, "..", "..", "python.exe")) cmd = '"{0}" -u "{1}"'.format(exe, os.path.abspath(__file__)) import pyquickhelper import pyensae import jyquickhelper import src.ensae_teaching_cs import mlstatpy import pymyinstall add = ["SECONDTRY"] for mod in [pyquickhelper, pyensae, jyquickhelper, src.ensae_teaching_cs, mlstatpy, pymyinstall]: add.append(os.path.normpath(os.path.join( os.path.dirname(mod.__file__), ".."))) fLOG("set PYTHONPATH={0}".format(";".join(add))) os.environ['PYTHONPATH'] = ";".join(add) out, err = run_cmd(cmd, wait=True, fLOG=fLOG) if len(err) > 0: lines = err.split("\n") lines = [_ for _ in lines if _[0] != " "] lines = [_ for _ in lines if "warning" not in _.lower()] if len(lines) > 0: raise Exception("--CMD:\n{0}\n--OUT:\n{1}\n--ERR\n{2}\n--ERR2\n{3}\n--PP\n{4}".format( cmd, out, err, "\n".join(lines), pp)) return import tables assert tables is not None this = os.path.abspath(os.path.dirname(tables.__file__)) self.a_test_notebook_runner( "ml_huge", "expose", additional_path=[this])
def is_java_installed(fLOG=noLOG): """ this function checks if java is installed @return boolean """ cmd = get_java_cmd() + " -showversion" out, err = run_cmd(cmd, wait=True, log_error=False) fLOG("OUT:\n", out) fLOG("ERR:\n", err) return "Java(TM)" in err
def test_notebook_runner_ml_huge(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") if sys.platform.startswith("win"): import numpy import tables # pylint: disable=E0401 foldn = os.path.abspath(os.path.dirname(numpy.__file__)) foldt = os.path.normpath(os.path.dirname(tables.__file__)) rootn = os.path.dirname(foldn) roott = os.path.dirname(foldt) if rootn != roott: pp = os.environ.get('PYTHONPATH', '') if "SECONDTRY" in pp: raise Exception( "Infinite loog\n{0}\n{1}\n**EXE\n{2}\n**PP\n{3}\n****".format(rootn, roott, sys.executable, pp)) # We need to run this file with the main python. # Otherwise it fails for tables: DLL load failed. exe = os.path.normpath(os.path.join( rootn, "..", "..", "python.exe")) cmd = '"{0}" -u "{1}"'.format(exe, os.path.abspath(__file__)) import pyquickhelper import pyensae import jyquickhelper import ensae_teaching_cs import mlstatpy import pymyinstall add = ["SECONDTRY"] for mod in [pyquickhelper, pyensae, jyquickhelper, ensae_teaching_cs, mlstatpy, pymyinstall]: add.append(os.path.normpath(os.path.join( os.path.dirname(mod.__file__), ".."))) fLOG("set PYTHONPATH={0}".format(";".join(add))) os.environ['PYTHONPATH'] = ";".join(add) out, err = run_cmd(cmd, wait=True, fLOG=fLOG) if len(err) > 0: lines = err.split("\n") lines = [_ for _ in lines if _[0] != " "] lines = [_ for _ in lines if "warning" not in _.lower()] if len(lines) > 0: raise Exception("--CMD:\n{0}\n--OUT:\n{1}\n--ERR\n{2}\n--ERR2\n{3}\n--PP\n{4}".format( cmd, out, err, "\n".join(lines), pp)) return import tables # pylint: disable=E0401 assert tables is not None this = os.path.abspath(os.path.dirname(tables.__file__)) self.a_test_notebook_runner( "ml_huge", "expose", additional_path=[this])
def runpy(self, line, cell=None): """ Defines command ``%%runpy``. .. nbref:: :title: runpy ``%%runpy`` runs a python script which accepts standards input and produces standard outputs, a timeout is set up at 10s. It is almost equivalent to:: from pyquickhelper.loghelper import run_cmd import sys cmd = sys.executable.replace( "pythonw", "python") + " " + filename + " " + args out, err = run_cmd( cmd, wait=True, sin=cell, communicate=True, timeout=10, shell=False) .. versionadded:: 1.1 """ parser = self.get_parser(MagicFile.runpy_parser, "runpy") args = self.get_args(line, parser) if args is not None: filename = args.file if len(filename) == 0: self.runpy("") else: args = args.args cmd = sys.executable.replace("pythonw", "python") cmd += " " + filename + " " cmd += " ".join( '"{0}"'.format(_) for _ in args) if isinstance(args, list) else args tosend = cell out, err = run_cmd(cmd, wait=True, sin=tosend, communicate=True, timeout=10, shell=False) if len(err) > 0: return HTML( '<font color="#DD0000">Error</font><br /><pre>\n%s\n</pre>' % err) else: return HTML('<pre>\n%s\n</pre>' % out) return None
def test_notebook_svg(self): """ If the test fails, look into issue `216 <https://github.com/sdpython/pyquickhelper/issues/216>`_. Avoid nbconvert==5.4.0,==5.4.1. """ fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") path = os.path.abspath(os.path.split(__file__)[0]) fold = os.path.normpath(os.path.join(path, "notebooks_svg")) nbs = [ os.path.join(fold, _) for _ in os.listdir(fold) if ".ipynb" in _ ] formats = ["latex", "pdf"] temp = os.path.join(path, "temp_nb_bug_svg") if not os.path.exists(temp): os.mkdir(temp) for file in os.listdir(temp): os.remove(os.path.join(temp, file)) if is_travis_or_appveyor() in ('travis', 'appveyor', 'azurepipe', 'circleci'): return setup_environment_for_help() obj = SVG2PDFPreprocessor() self.assertIn('inkscape', obj.inkscape) cmd = '%s --version' % obj.inkscape out, err = run_cmd(cmd, wait=True, shell=False) self.assertIn('inkscape', out.lower()) vers = obj.inkscape_version self.assertIn('inkscape', out.lower()) res = process_notebooks(nbs, temp, temp, formats=formats) for _ in res: fLOG(_) self.assertExists(_[0]) with open(os.path.join(temp, "seance4_projection_population_correction.tex"), "r", encoding="utf8") as f: content = f.read() exp = "seance4_projection_population_correction_50_0.pdf" if exp not in content: raise Exception(content)
def test_py3to2(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") temp = get_temp_folder(__file__, "temp_py3to2") root = os.path.abspath(os.path.dirname(__file__)) root = os.path.normpath(os.path.join(root, "..", "..")) conv = py3to2_convert_tree(root, temp, fLOG=fLOG) if len(conv) < 20: raise Exception("not enough copied files") script = """ import sys sys.path = [p for p in sys.path if "src" not in p and "ut_" not in p] sys.path.append(r"{0}") print "" for k in sys.path: print k import pyquickhelper """.replace(" ", "") script = script.format(os.path.join(temp, "src")) to = os.path.join(temp, "simpletry.py") with open(to, "w", encoding="utf8") as f: f.write(script) pyexe2 = None for location in [ r"C:\Anaconda2", r"C:\Anaconda", r"C:\WinPython-64bit-2.7.9.3\python-2.7.9.amd64", ]: exe = os.path.join(location, "python.exe") if os.path.exists(exe): pyexe2 = exe break if pyexe2 is not None: cmd = "{0} {1}".format(pyexe2, to) out, err = run_cmd(cmd, wait=True) if len(err) > 0: raise Exception( "conversion did not work:\nOUT\n:{0}\nERR:\n{1}".format( out, err)) else: fLOG("python 2.7 was not found")
def test_script_pig(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") data = os.path.join(os.path.abspath(os.path.split(__file__)[0]), "data") pyth = """ import sys, datetime cols = [ _ for _ in sys.argv if ".py" not in _ ] for row in sys.stdin: js = eval(row) for station in js: vals = [ station[c] for c in cols ] sys.stdout.write(",".join(vals)) sys.stdout.write("\\n") sys.stdout.flush() """.replace(" ", "") fold = os.path.join(data, "..", "temp_pypig_out") if not os.path.exists(fold): os.mkdir(fold) pyfile = os.path.join(fold, "pystream.py") with open(pyfile, "w", encoding="utf8") as f: f.write(pyth) tosend = """[{'address': "52 RUE D'ENGHIEN / ANGLE RUE DU FAUBOURG POISSONIERE - """ + \ """75010 PARIS", 'collect_date': datetime.datetime(2014, 11, 11, 22, 1, 18, 331070), """ + \ """'lng': 2.348395236282807, 'contract_name': """ + \ """'Paris', 'name': '10042 - POISSONNIÈRE - ENGHIEN', 'banking': 0, 'lat': """ + \ """48.87242006305313, 'bonus': 0, 'status': 'OPEN', 'available_bikes': 32, """ + \ """'last_update': datetime.datetime(2014, 11, 11, 21, 59, 5), """ + \ """'number': 10042, 'available_bike_stands': 1, 'bike_stands': 33}]""" cmd = sys.executable.replace("pythonw", "python") + " " + pyfile + " name" out, err = run_cmd(cmd, wait=True, sin=tosend, communicate=True, timeout=3, shell=False) fLOG("OUT\n", out) fLOG("ERR\n", err) assert len(out) > 0
def run_dot(dot_file, outimg): """ calls graphivz on a dot file and produces an image @param dot_file file, format `DOT <http://www.graphviz.org/doc/info/lang.html>`_ @param outimg output image @return out, err (stdout, stderr from graphviz) .. versionadded:: 1.1 """ ext = os.path.splitext(outimg)[-1].strip(".") exe = dot_exe() cmd = "\"{0}\" -T{1} -o{2} {3}".format(exe, ext, outimg, dot_file) out, err = run_cmd(cmd, wait=True) if len(err) > 0: raise Exception("unable to run graphviz on {0}.\nCMD:\n{1}\nOUT:\n{2}\nERR:\n{3}".format( dot_file, cmd, out, err)) return out, err
def run_jython(pyfile, argv=None, jython_path=None, sin=None, timeout=None, fLOG=noLOG): """ runs a jython script and returns the standard output and error @param pyfile jython file @param argv arguments to sned to the command line @param jython_path path to jython standalone @param sin data to send to the standard input @param timeout timeout @param fLOG logging function @return out, err If *jython_path* is None, the function looks into this directory. """ if jython_path is None: jython_path = get_jython_jar() def clean(i, p): "local function" if i == 0: return p if '"' in p: p = p.replace('"', '\\"') if " " in p: p = '"{0}"'.format(p) return p cmd = [get_java_cmd(), "-jar", jython_path, pyfile] if argv is not None: cmd.extend(argv) cmd = " ".join(clean(i, _) for i, _ in enumerate(cmd)) out, err = run_cmd(cmd, wait=True, sin=sin, communicate=True, timeout=timeout, shell=False) return out, err
def git_commit_all(local_folder, url_https, message, user=None, password=None, timeout=300, fLOG=noLOG): """ from a git repository, it requires `GIT <http://git-scm.com/>`_ to be installed and uses the command line. @param local_folder local folder of the project @param url_https url, example ``https://gitlab.server/folder/project_name`` @param message message for the commit @param user part 1 of the credentials @param password part 2 of the credentials @param timeout timeout for the command line @param fLOG logging function @return None If the reposity has already been cloned, it does not do it again. We assume that git can be run without giving its full location. The function executes the following commands:: cd [folder] git add -A git commit -m "[message]" git push -u origin master """ cmds = """ cd {0} git add -A git commit -m "{1}" git push -u origin master """.format(local_folder, message).replace(" ", "").strip(" \n\r\t") cmd = cmds.replace("\n", "&") sin = "" # "{0}\n".format(password) out, err = run_cmd(cmd, sin=sin, wait=True, timeout=timeout, fLOG=fLOG) git_check_error(out, err, fLOG)
def run_dot(dot_file, outimg): """ calls graphivz on a dot file and produces an image @param dot_file file, format `DOT <http://www.graphviz.org/doc/info/lang.html>`_ @param outimg output image @return out, err (stdout, stderr from graphviz) .. versionadded:: 1.1 """ ext = os.path.splitext(outimg)[-1].strip(".") exe = dot_exe() cmd = "\"{0}\" -T{1} -o{2} {3}".format(exe, ext, outimg, dot_file) out, err = run_cmd(cmd, wait=True) if len(err) > 0: raise Exception( "unable to run graphviz on {0}.\nCMD:\n{1}\nOUT:\n{2}\nERR:\n{3}". format(dot_file, cmd, out, err)) return out, err
def runpy(self, line, cell=None): """ Defines command ``%%runpy``. .. nbref:: :title: runpy ``%%runpy`` runs a python script which accepts standards input and produces standard outputs, a timeout is set up at 10s. It is almost equivalent to:: from pyquickhelper.loghelper import run_cmd import sys cmd = sys.executable.replace( "pythonw", "python") + " " + filename + " " + args out, err = run_cmd( cmd, wait=True, sin=cell, communicate=True, timeout=10, shell=False) .. versionadded:: 1.1 """ parser = self.get_parser(MagicFile.runpy_parser, "runpy") args = self.get_args(line, parser) if args is not None: filename = args.file if len(filename) == 0: self.runpy("") else: args = args.args cmd = sys.executable.replace("pythonw", "python") cmd += " " + filename + " " cmd += " ".join('"{0}"'.format(_) for _ in args) if isinstance(args, list) else args tosend = cell out, err = run_cmd( cmd, wait=True, sin=tosend, communicate=True, timeout=10, shell=False) if len(err) > 0: return HTML( '<font color="#DD0000">Error</font><br /><pre>\n%s\n</pre>' % err) else: return HTML('<pre>\n%s\n</pre>' % out) return None
def git_commit_all( local_folder, url_https, message, user=None, password=None, timeout=300, fLOG=noLOG): """ from a git repository, it requires `GIT <http://git-scm.com/>`_ to be installed and uses the command line. @param local_folder local folder of the project @param url_https url, example ``https://gitlab.server/folder/project_name`` @param message message for the commit @param user part 1 of the credentials @param password part 2 of the credentials @param timeout timeout for the command line @param fLOG logging function @return None If the reposity has already been cloned, it does not do it again. We assume that git can be run without giving its full location. The function executes the following commands:: cd [folder] git add -A git commit -m "[message]" git push -u origin master """ cmds = """ cd {0} git add -A git commit -m "{1}" git push -u origin master """.format(local_folder, message).replace(" ", "").strip(" \n\r\t") cmd = cmds.replace("\n", "&") sin = "" # "{0}\n".format(password) out, err = run_cmd(cmd, sin=sin, wait=True, timeout=timeout, fLOG=fLOG) git_check_error(out, err, fLOG)
def build_machinelearning(version="Release"): "Builds machinelearning (ml.net)." from pyquickhelper.loghelper import run_cmd print('[csharpyml.machinelearning]') this = os.path.abspath(os.path.dirname(__file__)) folder = os.path.join(this, 'cscode', 'machinelearning') cmd = "build{0}" if sys.platform.startswith("win"): cmd = cmd.format('.cmd') else: cmd = cmd.format('.sh') full = os.path.join(folder, cmd) if not os.path.exists(full): existing = os.listdir(folder) raise FileNotFoundError( "Unable to find '{0}', build failed. Found:\n{1}".format( full, "\n".join(existing))) if not sys.platform.startswith("win"): cmd = "bash --verbose " + cmd cmd += ' -' + version out, err = run_cmd(cmd, wait=True, change_path=folder) if len(err) > 0: # Filter out small errors. errs = [] lines = err.split('\n') for line in lines: if 'ILAsmVersion.txt: No such file or directory' in line: continue errs.append(line) err = "\n".join(errs) if len(err) > 0: raise RuntimeError( "Unable to build machinelearning code.\nCMD: {0}\n--ERR--\n{1}". format(cmd, err)) elif len(out) > 0: print('[csharpyml.machinelearning] OUT') print(out) bin = os.path.join(folder, "bin") if not os.path.exists(bin): existing = os.listdir(folder) raise FileNotFoundError( "Unable to find '{0}', build failed. Found:\n{1}".format( bin, "\n".join(existing)))
def test_script_pig(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") data = os.path.join( os.path.abspath( os.path.split(__file__)[0]), "data") pyth = """ import sys, datetime cols = [ _ for _ in sys.argv if ".py" not in _ ] for row in sys.stdin: js = eval(row) for station in js: vals = [ station[c] for c in cols ] sys.stdout.write(",".join(vals)) sys.stdout.write("\\n") sys.stdout.flush() """.replace(" ", "") fold = os.path.join(data, "..", "temp_pypig_out") if not os.path.exists(fold): os.mkdir(fold) pyfile = os.path.join(fold, "pystream.py") with open(pyfile, "w", encoding="utf8") as f: f.write(pyth) tosend = """[{'address': "52 RUE D'ENGHIEN / ANGLE RUE DU FAUBOURG POISSONIERE - 75010 PARIS", 'collect_date': datetime.datetime(2014, 11, 11, 22, 1, 18, 331070), 'lng': 2.348395236282807, 'contract_name': """ + \ """'Paris', 'name': '10042 - POISSONNIÈRE - ENGHIEN', 'banking': 0, 'lat': 48.87242006305313, 'bonus': 0, 'status': 'OPEN', 'available_bikes': 32, 'last_update': datetime.datetime(2014, 11, 11, 21, 59, 5), """ + \ """'number': 10042, 'available_bike_stands': 1, 'bike_stands': 33}]""" cmd = sys.executable.replace( "pythonw", "python") + " " + pyfile + " name" out, err = run_cmd(cmd, wait=True, sin=tosend, communicate=True, timeout=3, shell=False) fLOG("OUT\n", out) fLOG("ERR\n", err) assert len(out) > 0
def git_change_remote_origin(local_folder, url_https, user=None, password=None, add_fetch=False, timeout=10, fLOG=noLOG): """ Change the origin of the repository. The url and the password refer to the new repository. @param local_folder local folder @param url_https url, example ``https://gitlab.server/folder/project_name`` @param user part 1 of the credentials @param password part 2 of the credentials @param timeout timeout for the command line @param add_fetch add instruction ``fetch`` @param fLOG logging function @return something The function runs the instruction:: git remote remove origin git remote add origin url """ url_user = git_url_user_password(url_https, user, password) cmds = """ cd {0} git remote remove origin git remote add origin {1} """.format(local_folder, url_user).replace(" ", "").strip(" \n\r\t") if add_fetch: cmds += "\ngit fetch" cmd = cmds.replace("\n", "&") sin = "" # "{0}\n".format(password) out, err = run_cmd(cmd, sin=sin, wait=True, timeout=timeout, fLOG=fLOG) git_check_error(out, err, fLOG)
def run_jython(pyfile, argv=None, jython_path=None, sin=None, timeout=None, fLOG=noLOG): """ runs a jython script and returns the standard output and error @param pyfile jython file @param argv arguments to sned to the command line @param jython_path path to jython standalone @param sin data to send to the standard input @param timeout timeout @param fLOG logging function @return out, err If *jython_path* is None, the function looks into this directory. """ if jython_path is None: jython_path = get_jython_jar() def clean(i, p): if i == 0: return p if '"' in p: p = p.replace('"', '\\"') if " " in p: p = '"{0}"'.format(p) return p cmd = [get_java_cmd(), "-jar", jython_path, pyfile] if argv is not None: cmd.extend(argv) cmd = " ".join(clean(i, _) for i, _ in enumerate(cmd)) out, err = run_cmd( cmd, wait=True, sin=sin, communicate=True, timeout=timeout, shell=False) return out, err
def git_clone(local_folder, url_https, user=None, password=None, timeout=60, init=True, fLOG=noLOG): """ Clones a project from a git repository in a non empty local folder, it requires `GIT <http://git-scm.com/>`_ to be installed and uses the command line. @param local_folder local folder of the project @param url_https url, example ``https://gitlab.server/folder/project_name`` @param user part 1 of the credentials @param password part 2 of the credentials @param timeout timeout for the command line @param init see below (True, use fetch, False, use clone) @param fLOG logging function @return local_folder If the reposity has already been cloned, it does not do it again. We assume that git can be run without giving its full location. The function executes the following commands (if init is True):: cd [folder] git init git remote add origin [https://user.password@server/project.git] git fetch Otherwise, it does:: cd [folder] git clone origin [https://user.password@server/project.git] git fetch A folder will be created. .. exref:: :tag: Automation :title: Clone many folders in one row :: eleves = "project1;project2;..." root = r"destination" for el in eleves.split(";"): cl = el.lower().replace(".","-") fold = os.path.join(root, el) if not os.path.exists(fold): print("clone", el) url = "https://<gitlab>/<group>/{0}.git".format(cl) git_clone( fold, url,user=user,password=password, init=False,fLOG=print) """ url_user = git_url_user_password(url_https, user, password) timeout = 60 local_folder = os.path.normpath(os.path.abspath(local_folder)) if init: if not os.path.exists(local_folder): fLOG("creating folder", local_folder) os.mkdir(local_folder) hg = os.path.join(local_folder, ".git") if os.path.exists(hg): raise Exception("folder {0} should not exist".format(local_folder)) if not os.path.exists(hg): cmds = """ cd {0} git init git remote add origin {1} git fetch """.format(local_folder, url_user).replace(" ", "").strip(" \n\r\t") cmd = cmds.replace("\n", "&") sin = "" # "{0}\n".format(password) out, err = run_cmd( cmd, sin=sin, wait=True, timeout=timeout, fLOG=fLOG) git_check_error(out, err, fLOG) return local_folder else: if not os.path.exists(local_folder): fLOG("creating folder", local_folder) os.mkdir(local_folder) hg = os.path.join(local_folder, ".git") if os.path.exists(hg): raise Exception("folder {0} should not exist".format(local_folder)) final = os.path.split(url_user)[-1].replace(".git", "") locf = os.path.join(local_folder, final) if os.path.exists(locf): raise Exception( "folder {0} should not exists before cloning".format(locf)) cmds = """ cd {0} git clone {1} . """.format(local_folder, url_user).replace(" ", "").strip(" \n\r\t") cmd = cmds.replace("\n", "&") sin = "" # "{0}\n".format(password) out, err = run_cmd(cmd, sin=sin, wait=True, timeout=timeout, fLOG=fLOG) git_check_error(out, err, fLOG) return locf
def test_compile_module(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") temp = get_temp_folder(__file__, "temp_compile_module") source = os.path.join(temp, "cdemo.cpp") init = os.path.join(temp, "__init__.py") setup = os.path.join(temp, "setup.py") with open(source, "w") as f: f.write(TestModuleC.content_c) with open(init, "w") as f: pass setup_content = dedent(""" from distutils.core import setup, Extension module1 = Extension('stdchelper_demo', sources=['{0}']) setup (name = 'ccdemo', version = '1.0', description = 'This is a demo package.', ext_modules = [module1]) """.format(source.replace("\\", "/"))) with open(setup, "w") as f: f.write(setup_content) cmd = "{0} {1} build_ext --inplace".format(sys.executable, setup) out, err = run_cmd(cmd, wait=True, fLOG=fLOG, change_path=temp) if "error" in out or "error" in err: out_ = out.replace("-Werror=format-security", "") if "error" in out_: raise Exception( "Unable to compile\n--OUT--\n{0}\n--ERR--\n{1}".format(out, err)) if sys.platform == "win32": name = "stdchelper_demo.cp%d%d-win_amd64.pyd" % sys.version_info[:2] elif sys.platform == "darwin": if sys.version_info[:2] <= (3, 7): name = "stdchelper_demo.cpython-%d%dm-darwin.so" % sys.version_info[:2] else: name = "stdchelper_demo.cpython-%d%d-darwin.so" % sys.version_info[:2] else: if sys.version_info[:2] <= (3, 7): name = "stdchelper_demo.cpython-%d%dm-x86_64-linux-gnu.so" % sys.version_info[:2] else: name = "stdchelper_demo.cpython-%d%d-x86_64-linux-gnu.so" % sys.version_info[:2] fullname = os.path.join(temp, name) if not os.path.exists(fullname): files = os.listdir(os.path.dirname(fullname)) raise FileNotFoundError( "Unable to find '{0}' (platform '{1}')\nFound:\n{2}".format( fullname, sys.platform, "\n".join(files))) mo = import_module(None, fullname, fLOG, additional_sys_path=None, first_try=True) self.assertIsInstance(mo, tuple) self.assertEqual(len(mo), 2) self.assertTrue(hasattr(mo[0], '__doc__')) if 'stdchelper_demo' in sys.modules: del sys.modules['stdchelper_demo'] temp2 = get_temp_folder(__file__, "temp_compile_module2") store_obj = {} actions = copy_source_files(temp, temp2, fLOG=fLOG) store_obj = {} indexes = {} add_file_rst(temp2, store_obj, actions, fLOG=fLOG, rootrep=("stdchelper_demo.", ""), indexes=indexes) if sys.platform == "darwin": warnings.warn( "add_file_rst does not work yet on MacOSX for C++ modules.") return self.assertNotEmpty(store_obj) self.assertEqual(len(store_obj), 1) if len(actions) not in (3, 4): raise Exception("{0}\n{1}".format( len(actions), "\n".join(str(_) for _ in actions))) self.assertEqual(len(indexes), 1)
def test_script_pig(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") if self.client is None: return data = os.path.join( os.path.abspath( os.path.split(__file__)[0]), "data") fold = os.path.join(data, "..", "temp_pypig_az") if not os.path.exists(fold): os.mkdir(fold) # python script pyth = """ import sys, datetime cols = [ _ for _ in sys.argv if ".py" not in _ ] for row in sys.stdin: row = row.strip() if len(row) == 0 : continue js = eval(row) for station in js: vals = [ str(station[c]).strip() for c in cols ] sys.stdout.write(",".join(vals)) sys.stdout.write("\\n") sys.stdout.flush() """.replace(" ", "") pyfile = os.path.join(fold, "pystream.py") with open(pyfile, "w", encoding="utf8") as f: f.write(pyth) tosend = """[{'address': "52 RUE D'ENGHIEN / ANGLE RUE DU FAUBOURG POISSONIERE - 75010 PARIS", 'collect_date': datetime.datetime(2014, 11, 11, 22, 1, 18, 331070), """ + \ """'lng': 2.348395236282807, 'contract_name': 'Paris', 'name': '10042 - POISSONNIÈRE - ENGHIEN', 'banking': 0, 'lat': 48.87242006305313, 'bonus': 0, 'status': """ + \ """'OPEN', 'available_bikes': 32, 'last_update': datetime.datetime(2014, 11, 11, 21, 59, 5), 'number': 10042, 'available_bike_stands': 1, 'bike_stands': 33},""" + \ """{'address': "52 RUE D'ENGHIEN / ANGLE RUE DU FAUBOURG POISSONIERE - 75010 PARIS", 'collect_date': datetime.datetime(2014, 11, 11, 22, 1, 18, 331070), """ + \ """'lng': 2.348395236282807, 'contract_name': 'Paris', 'name': '10042 - POISSONNIÈRE - ENGHIEN', 'banking': 0, 'lat': 48.87242006305313, 'bonus': 0, 'status': """ + \ """'OPEN', 'available_bikes': 32, 'last_update': datetime.datetime(2014, 11, 11, 21, 59, 5), 'number': 10042, 'available_bike_stands': 1, 'bike_stands': 33}]""" cmd = sys.executable.replace( "pythonw", "python") + " " + pyfile + " name" out, err = run_cmd( cmd, wait=True, sin=tosend, communicate=True, timeout=3, shell=False) out = out.strip("\n\r ") spl = out.split("\n") if len(spl) != 2: raise Exception( "len:{2}\nOUT:\n{0}\nERR:\n{1}".format( out, err, len(out))) # PIG script pig = """ DEFINE pystream `python pystream.py bonus available_bike_stands available_bikes lat lng name status` SHIP ('pystream.py') INPUT(stdin USING PigStreaming(',')) OUTPUT (stdout USING PigStreaming(',')); jspy = LOAD '$CONTAINER/$UTT/*.txt' USING PigStorage('\t') AS (arow:chararray); --DUMP jspy ; matrice = STREAM jspy THROUGH pystream AS ( bonus:chararray, available_bike_stands:double, available_bikes:double, lat:double, lng:double, name:chararray, status:chararray) ; DUMP matrice ; DESCRIBE jspy ; DESCRIBE matrice ; STORE matrice INTO '$CONTAINER/$PSEUDO/unittest2/results.txt' USING PigStorage('\t') ; """.replace(" ", "") pigfile = os.path.join(fold, "pystream.pig") with open(pigfile, "w", encoding="utf8") as f: f.write(pig) # we upload some files files = os.listdir(data) files = [os.path.join(data, _) for _ in files] files = [_ for _ in files if os.path.isfile(_) and "paris" in _] import azure.common try: content = self.client.ls( self.blob_serv, self.container, "unittest2") except azure.common.AzureException as e: warnings.warn( "Unable to test azure, storage is still up?\n" + str(e)) return if len(content) == 0: self.client.upload( self.blob_serv, self.container, "unittest2", files) if self.client.exists( self.blob_serv, self.container, "unittest2/results.txt"): self.client.delete_folder( self.blob_serv, self.container, "unittest2/results.txt") # we submit the job recall = None if recall is None: try: job = self.client.pig_submit(self.blob_serv, self.container, pigfile, dependencies=[pyfile], params=dict(UTT="unittest2")) except (ConnectionError, NewConnectionError): # the cluster is probably not set up warnings.warn("hadoop cluster is not set up") return job_id = job["id"] else: job_id = recall status = self.client.wait_job(job_id, fLOG=fLOG) out, err = self.client.standard_outputs( status, self.blob_serv, self.container, fold) if "Total records written : 4" not in err: raise Exception("OUT:\n{0}\nERR:\n{1}".format(out, err)) dest = os.path.join(fold, "out_merged.txt") fLOG("dest=", dest) if os.path.exists(dest): os.remove(dest) self.client.download_merge( self.blob_serv, self.container, "$PSEUDO/unittest2/results.txt", dest) if not os.path.exists(dest): raise FileNotFoundError(dest) with open(dest, "r", encoding="utf8") as f: content = f.read() fLOG("-----\n", content) assert len(content.strip(" \n\r\t")) > 0 df = self.client.df_head(self.blob_serv, self.container, "$PSEUDO/unittest2/results.txt", sep=",", merge=True) fLOG(df) assert len(df) > 0
return df ######################### # Clones skl2onnx # +++++++++++++++ this = os.path.abspath(os.path.dirname(__file__)) skl = os.path.join(this, "sklearn-onnx") if os.path.exists(skl): pth = skl cmd = "git pull" else: pth = None cmd = "git clone https://github.com/onnx/sklearn-onnx.git " + skl run_cmd(cmd, wait=True, change_path=pth, fLOG=print) ######################### # Runs the benchmark # ++++++++++++++++++ folder = os.path.join(this, 'onnxruntime-skl2onnx') location = os.path.join(this, 'sklearn-onnx', "tests") filename = os.path.splitext(os.path.split(__file__)[-1])[0] full_filename = filename + ".perf.csv" if not os.path.exists(full_filename): with sklearn.config_context(assume_finite=True): df = run_all_tests(location, folder, verbose=True) print("[benchmark] saves into '{}'.".format(full_filename)) df.to_csv(full_filename, index=False) else:
def convert_short_latex_into_png( latex, temp_folder=".", fLOG=print, miktex=r"C:\Program Files\MiKTeX 2.9\miktex\bin\x64", final_name=None): """ Convert a short latex script into an image. @param latex latex equation @param temp_folder temp_folder (where temporary files will be placed) @param fLOG logging function @param miktex miktex location @param final_name if not None, copy the image at this location using this name @return a location to the image (it should be copied), and its size You should not call the function twice at the same in the same folder. @warning The function ends the program if there was a failure. Something is missing on the command line. """ if not os.path.exists(miktex): raise FileNotFoundError("unable to find miktex") if sys.platform.startswith("win"): htlatex = os.path.join(miktex, "htlatex.exe") if not os.path.exists(htlatex): raise FileNotFoundError("unable to find htlatex") else: htlatex = os.path.join(miktex, "htlatex") if not os.path.exists(temp_folder): os.makedirs(temp_folder) eq = os.path.join(temp_folder, "eq.tex") with open(eq, "w") as f: f.write(r"""\documentclass[12pt]{article} \pagestyle{empty} \begin{document} $$ %s $$ \end{document}""".replace(" ", "") % latex.strip("\n\r ")) cmd = '"' + htlatex + '" eq.tex "html, graphics-300" "" "" "--interaction=nonstopmode"' cwd = os.getcwd() os.chdir(temp_folder) out, err = run_cmd(cmd, wait=True) os.chdir(cwd) if "FAILED" in err: raise Exception("it failed\n-----\n{0}\n----------\n{1}".format( out, err)) img = os.path.join(temp_folder, "eq0x.png") if not os.path.exists(img): with open(os.path.join(temp_folder, "eq.log"), "r") as f: log = f.read() raise FileNotFoundError("the compilation did not work\n" + log) if final_name is not None: # size reduction im = Image.open(img) shutil.copy(img, final_name) return final_name, im.size else: im = Image.open(img) return img, im.size
def git_clone(local_folder, url_https, user=None, password=None, timeout=60, init=True, fLOG=noLOG): """ clone a project from a git repository in a non empty local folder, it requires `GIT <http://git-scm.com/>`_ to be installed and uses the command line. @param local_folder local folder of the project @param url_https url, example ``https://gitlab.server/folder/project_name`` @param user part 1 of the credentials @param password part 2 of the credentials @param timeout timeout for the command line @param init see below (True, use fetch, False, use clone) @param fLOG logging function @return local_folder If the reposity has already been cloned, it does not do it again. We assume that git can be run without giving its full location. The function executes the following commands (if init is True):: cd [folder] git init git remote add origin [https://user.password@server/project.git] git fetch Otherwise, it does:: cd [folder] git clone origin [https://user.password@server/project.git] git fetch A folder will be created. .. exref:: :tag: Automation :title: Clone many folders in one row :: eleves = "project1;project2;..." root = r"destination" for el in eleves.split(";"): cl = el.lower().replace(".","-") fold = os.path.join(root, el) if not os.path.exists(fold): print("clone", el) url = "https://<gitlab>/<group>/{0}.git".format(cl) git_clone( fold, url,user=user,password=password, init=False,fLOG=print) """ url_user = git_url_user_password(url_https, user, password) timeout = 60 local_folder = os.path.normpath(os.path.abspath(local_folder)) if init: if not os.path.exists(local_folder): fLOG("creating folder", local_folder) os.mkdir(local_folder) hg = os.path.join(local_folder, ".git") if os.path.exists(hg): raise Exception("folder {0} should not exist".format(local_folder)) if not os.path.exists(hg): cmds = """ cd {0} git init git remote add origin {1} git fetch """.format(local_folder, url_user).replace(" ", "").strip(" \n\r\t") cmd = cmds.replace("\n", "&") sin = "" # "{0}\n".format(password) out, err = run_cmd(cmd, sin=sin, wait=True, timeout=timeout, fLOG=fLOG) git_check_error(out, err, fLOG) return local_folder else: if not os.path.exists(local_folder): fLOG("creating folder", local_folder) os.mkdir(local_folder) hg = os.path.join(local_folder, ".git") if os.path.exists(hg): raise Exception("folder {0} should not exist".format(local_folder)) final = os.path.split(url_user)[-1].replace(".git", "") locf = os.path.join(local_folder, final) if os.path.exists(locf): raise Exception( "folder {0} should not exists before cloning".format(locf)) cmds = """ cd {0} git clone {1} . """.format(local_folder, url_user).replace(" ", "").strip(" \n\r\t") cmd = cmds.replace("\n", "&") sin = "" # "{0}\n".format(password) out, err = run_cmd(cmd, sin=sin, wait=True, timeout=timeout, fLOG=fLOG) git_check_error(out, err, fLOG) return locf
def compile_cython_single_script(script, skip_warn=True, fLOG=noLOG): """ This function considers a script ``.pyx``, writes the proper setup file, and compiles it. @param script filename @param skip_warn skip warnings @param fLOG logging function The function applies the steps described in the basic tutorial `The Basics of Cython <http://docs.cython.org/src/tutorial/cython_tutorial.html>`_. The function creates a ``setup.py`` in the same location and compiles it. The compilation requires a compiler (not `MinGW <http://www.mingw.org/>`_ or `Visual Studio (Community Edition) <https://www.microsoft.com/france/visual-studio/produits/community/Default.aspx>`_). If none was found, Python usually displays an error message like:: Unable to find vcvarsall.bat You can also read this old blog post: `Build a Python 64 bit extension on Windows <http://www.xavierdupre.fr/blog/2013-07-07_nojs.html>`_ about this file:: ``C:\\Python35_x64\\lib\\distutils\\msvc9compiler.py``. .. faqref:: :tag: cython :title: Compiler une function Cython ? Cette fonction compile un script `Cython <http://cython.org/>`_. Cette extension permet d'implémenter des fonctions Python dans un pseudo-langage proche du `C <https://en.wikipedia.org/wiki/C_(programming_language)>`_. Il faut suivre les instructions décrite dans le tutorial `The Basics of Cython <http://docs.cython.org/src/tutorial/cython_tutorial.html>`_ pour réussir à utiliser une fonction codée en Cython. C'est ce que fait la fonction :func:`compile_cython_single_script`. Etant donné que la partie en pseudo C est compilée afin de la rendre beaucoup plus rapide, la partie la plus difficile est généralement celle qui consiste à faire en sorte que l'interpréteur Python trouve le <b>bon</b> compilateur. Ce compilateur est nécessairement le même que celui utilisé pour compiler Python et celui-ci change à chaque version. Voir `Compiling Python on Windows <https://docs.python.org/3/using/windows.html?highlight=visual%20studio#compiling-python-on-windows>`_ et faire attention à la version de Python que vous utilisez. """ ext = os.path.splitext(script)[-1] if ext != ".pyx": raise ValueError("no extension .pyx: " + script) if not os.path.exists(script): raise FileNotFoundError(script) name = os.path.split(script)[-1] namen = os.path.splitext(name)[0] setup_script = """ from distutils.core import setup from Cython.Build import cythonize setup( name='{1}', ext_modules=cythonize("{0}") ) """.replace(" ", "").format(name, namen) current, name = os.path.split(script) filename = os.path.join(os.path.dirname(script), name + ".setup.py") with open(filename, "w") as f: f.write(setup_script) cmd = sys.executable + " -u {0} build_ext --inplace".format(filename) out, err = run_cmd(cmd, wait=True, fLOG=fLOG, change_path=current) if len(err) > 0: if skip_warn: do_raise = False lines = err.split("\n") for line in lines: if len(line) > 0 and not line.startswith(" "): if "UserWarning" not in line: do_raise = True break else: do_raise = True if do_raise: with open(script, "r", encoding="utf-8") as f: content = f.read() raise CustomCythonError( "CMD:\n{0}\nOUT:\n{1}ERR:\n{2}\nSCRIPT:\n{3}".format(cmd, out, err, content)) else: warnings.warn( "[compile_cython_single_script] CMD:\n{0}\nOUT:\n{1}ERR:\n{2}".format(cmd, out, err)) return out
def execute_python_scripts(root, df, col_names=None, url=None, eol="/", fLOG=noLOG, gen_mail=None): """ retrieve all python scripts and run them @param root main folder @param df dataframe @param col_names dictionary for columns: folder, mail, program, out, err, url, cmp, url_content, key, time @param eol if not None, replaces end of lines by *eof* @param gen_mail generator of mails @param fLOG logging function @return dataframe """ if gen_mail is None: def iter_mail(mail): yield mail yield mail.lower() gen_mail = iter_mail def post_process(out, eol): out = out.strip("\r\t\n").rstrip().replace( "\r", "").replace("\t", " ") if eol: out = out.replace("\n", eol) return out downloads = {} res = [] for name, mail in zip(df[col_names.get("folder", "folder")], df[col_names.get("mail", "mail")]): row = {col_names.get("folder", "folder"): name} fLOG("[execute_python_script], look into '{0}'".format(name)) subf = os.path.join(root, name) col_find = col_names.get("exists", "exists") if not os.path.exists(subf): subf = os.path.join(root, name.replace("-", ".")) if not os.path.exists(subf): row[col_find] = False res.append(row) else: row[col_find] = True store = [] for py in explore_folder_iterfile(subf, ".*[.]py$"): store.append(py) fLOG(" -", len(store), "programs found") col_out = col_names.get("out", "out") col_err = col_names.get("err", "err") col_prog = col_names.get("program", "program") col_time = col_names.get("time", "time") col_key = col_names.get("key", "key") col_size = col_names.get("size", "size") col_url = col_names.get("url", "url") col_ind = col_names.get("pattern_id", "pattern_id") if len(store) == 0: for mm in sorted(gen_mail(mail.strip())): mailid = _get_code(mm.encode("utf-8")) r = row.copy() loc = url.format(mailid) ind = {col_key: mm, col_ind: mailid, col_url: loc} r.update(ind) res.append(r) continue # test all programs outs = [] for py in sorted(store): cmd = '"{0}" "{1}"'.format(sys.executable, py) t1 = time.clock() try: out, err = run_cmd(cmd, wait=True) except Exception as e: out = None err = str(e) out = post_process(out, eol) t2 = time.clock() outs.append({col_out: out, col_err: post_process(err, eol), col_prog: os.path.split(py)[-1], col_time: t2 - t1, col_size: os.stat(py).st_size}) if url is None: for o in outs: r = row.copy() r.update(o) res.append(r) elif url is not None: col_cmp = col_names.get("cmp", "cmp") col_in = col_names.get( "sortie_dans_motif", "sortie_dans_motif") col_in2 = col_names.get( "motif_dans_sortie", "motif_dans_sortie") col_dist = col_names.get("dist", "dist") col_content = col_names.get("content", "content") if out is None: for ii, mm in gen_mail(mail.strip()): mailid = _get_code(mm.encode("utf-8")) ind = {col_ind: mailid} for o in outs: r = row.copy() r.update(o) r.update(ind) res.append(r) else: for mm in sorted(gen_mail(mail.strip())): mailid = _get_code(mm.encode("utf-8")) loc = url.format(mailid) ind = {col_key: mm, col_ind: mailid, col_url: loc} if loc not in downloads: downloads[loc] = get_url_content_timeout( loc).strip("\n\r\t ") content = post_process(downloads[loc], eol) ind[col_content] = content for o in outs: r = row.copy() r.update(o) r.update(ind) out = r[col_out] r[col_cmp] = out == content or out.strip( ) == content.strip() r[col_in] = out in content r[col_in2] = content in out r[col_dist] = (edit_distance(out, content)[0]) if ( len(content) > len(out) // 2) else abs(len(content) - len(out)) res.append(r) return pandas.DataFrame(res)
def build_grammar(g4, version="4.7.1", fLOG=noLOG): """ Compiles the grammar for a specific file. @param g4 grammar format antlr4 @param version version of *antlr4* to use, 4.7 @param fLOG logging function @return list of files The compilation must be done with `antlr4 <http://www.antlr.org/>`_. It generates a lexer and a parser which can be imported in Python. The options for the command line are described at: `antlr4 options <https://theantlrguy.atlassian.net/wiki/display/ANTLR4/Options>`_. .. exref:: :title: Builds a Antlr4 grammar See `grammars-v4 <https://github.com/antlr/grammars-v4>`_ :: build_grammar("R.g4") """ if not g4.endswith(".g4"): fold = os.path.abspath(os.path.dirname(__file__)) g4 = os.path.join(fold, g4 + ".g4") url = "http://www.antlr.org/download/antlr-{0}-complete.jar".format( version) spl = url.split("/") domain, name = "/".join(spl[:-1]) + "/", spl[-1] folder = os.path.abspath(os.path.dirname(__file__)) final = os.path.join(folder, name) if not os.path.exists(final): from ..datasource.http_retrieve import download_data name = download_data(name, website=domain, whereTo=folder) if not os.path.exists(name): raise FileNotFoundError("unable to download: " + url) path = os.environ.get("CLASSPATH", "") if name not in path: path = ".;{0}\\antlr-{1}-complete.jar".format(folder, version) else: path = ".;{0}\\antlr-{1}-complete.jar;{2}".format( folder, version, os.environ["CLASSPATH"]) os.environ["CLASSPATH"] = path fLOG("CLASSPATH", os.environ["CLASSPATH"]) # we remove -rc... version = version.split("-")[0] cmd = "org.antlr.v4.Tool " if "Lexer" not in g4: cmd += "-Dlanguage=Python3 " cmd += g4 from pyquickhelper.loghelper import run_cmd out, err = run_cmd("java " + cmd, wait=True, fLOG=fLOG) def compiled(): if "Lexer" in g4: lexer = g4.replace(".g4", ".tokens") else: lexer = g4.replace(".g4", ".py") return os.path.exists(lexer) if not compiled() or (len(err) > 0 and "error" in err): javapath = r'C:\Program Files\Java\jre7\bin\java.exe' os.environ["PATH"] = os.environ["PATH"] + ";" + javapath if sys.platform.startswith("win") and os.path.exists(javapath): out, err = run_cmd( '"' + javapath + '" ' + cmd, wait=True, fLOG=fLOG) if not compiled() or (len(err) > 0 and "error" in err): raise Exception( "unable to compile: " + final + "\nCLASSPATH:\n" + os.environ["CLASSPATH"] + "\nERR:\n" + err + "\nCMD:\njava " + cmd + "\nYou should do it manually.") elif err: err_lines = err.split(err) err_lines = [_ for _ in err_lines if not _.startswith("warning(")] err2 = "\n".join(err_lines).strip("\n ") if len(err2) > 0: raise Exception( "unable to compile: " + final + "\nCLASSPATH:\n" + os.environ["CLASSPATH"] + "\nERR:\n" + err + "\nCMD:\njava " + cmd) if os.environ.get("USERNAME", os.environ.get("USER", "")) in g4: dest = os.path.dirname(g4) for name in os.listdir(dest): if "Parser" not in name and "Lexer" not in name and \ "Token" not in name and "Listener" not in name: continue full = os.path.join(dest, name) with open(full, "r", encoding="utf-8") as f: content = f.read() content1 = content.replace(dest, "") if content1 != content: fLOG("[build_grammar] modified", name) with open(full, "w", encoding="utf-8") as f: f.write(content1) return out + "\n---ERR---\n" + err
def test_matplotlib_example(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") progs = ["ffmpeg"] if not sys.platform.startswith("win"): progs.append("avconv") errs = [] prog = None for prog in progs: out, err = run_cmd(prog, wait=True, fLOG=fLOG) exps = "usage:" if (exps not in out and exps not in err) or err is None or len(err) == 0: errs.append((prog, err)) else: break if len(errs) >= len(progs): if sys.platform.startswith("win"): fLOG("download ffmpeg") add_missing_development_version( ["pyensae"], __file__, hide=True) from pyensae.datasource import download_data download_data("ffmpeg.zip", website="xd") else: raise FileNotFoundError( "Unable to find '{1}'.\nPATH='{0}'\n--------\n[OUT]\n{2}\n[ERR]\n{3}".format( os.environ["PATH"], prog, out, "\n----\n".join("{0}:\n{1}".format(*_) for _ in errs))) temp = get_temp_folder(__file__, "temp_example_example") fix_tkinter_issues_virtualenv() # update a distribution based on new data. import numpy as np import matplotlib.pyplot as plt import scipy.stats as ss from matplotlib.animation import FuncAnimation, writers # To get the list of available writers if not writers.is_available(prog): writers.register(prog) fLOG(writers.list()) class UpdateDist: def __init__(self, ax, prob=0.5): self.success = 0 self.prob = prob self.line, = ax.plot([], [], 'k-') self.x = np.linspace(0, 1, 200) self.ax = ax # Set up plot parameters self.ax.set_xlim(0, 1) self.ax.set_ylim(0, 15) self.ax.grid(True) # This vertical line represents the theoretical value, to # which the plotted distribution should converge. self.ax.axvline(prob, linestyle='--', color='black') def init(self): self.success = 0 self.line.set_data([], []) return self.line, def __call__(self, i): # This way the plot can continuously run and we just keep # watching new realizations of the process if i == 0: return self.init() # Choose success based on exceed a threshold with a uniform # pick if np.random.rand(1,) < self.prob: # pylint: disable=W0143 self.success += 1 y = ss.beta.pdf(self.x, self.success + 1, (i - self.success) + 1) self.line.set_data(self.x, y) return self.line, fig = plt.figure() ax = fig.add_subplot(1, 1, 1) ud = UpdateDist(ax, prob=0.7) anim = FuncAnimation(fig, ud, frames=np.arange(100), init_func=ud.init, interval=100, blit=True) try: Writer = writers[prog] except KeyError as e: if prog == "avconv": from matplotlib.animation import AVConvWriter Writer = AVConvWriter else: raise e writer = Writer(fps=15, metadata=dict(artist='Me'), bitrate=1800) anim.save(os.path.join(temp, 'lines2.mp4'), writer=writer) plt.close('all') fLOG("end")
def test_script_pig(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") if self.client is None: return data = os.path.join(os.path.abspath(os.path.split(__file__)[0]), "data") fold = os.path.join(data, "..", "temp_pypig_az") if not os.path.exists(fold): os.mkdir(fold) # python script pyth = """ import sys, datetime cols = [ _ for _ in sys.argv if ".py" not in _ ] for row in sys.stdin: row = row.strip() if len(row) == 0 : continue js = eval(row) for station in js: vals = [ str(station[c]).strip() for c in cols ] sys.stdout.write(",".join(vals)) sys.stdout.write("\\n") sys.stdout.flush() """.replace(" ", "") pyfile = os.path.join(fold, "pystream.py") with open(pyfile, "w", encoding="utf8") as f: f.write(pyth) tosend = """[{'address': "52 RUE D'ENGHIEN / ANGLE RUE DU FAUBOURG POISSONIERE - 75010 PARIS", """ + \ """'collect_date': datetime.datetime(2014, 11, 11, 22, 1, 18, 331070), """ + \ """'lng': 2.348395236282807, 'contract_name': 'Paris', 'name': '10042 - POISSONNIÈRE - """ + \ """ENGHIEN', 'banking': 0, 'lat': 48.87242006305313, 'bonus': 0, 'status': """ + \ """'OPEN', 'available_bikes': 32, 'last_update': datetime.datetime(2014, 11, 11, 21, 59, 5), """ + \ """'number': 10042, 'available_bike_stands': 1, 'bike_stands': 33},""" + \ """{'address': "52 RUE D'ENGHIEN / ANGLE RUE DU FAUBOURG POISSONIERE - 75010 PARIS", """ + \ """'collect_date': datetime.datetime(2014, 11, 11, 22, 1, 18, 331070), """ + \ """'lng': 2.348395236282807, 'contract_name': 'Paris', 'name': '10042 - POISSONNIÈRE - """ + \ """ENGHIEN', 'banking': 0, 'lat': 48.87242006305313, 'bonus': 0, 'status': """ + \ """'OPEN', 'available_bikes': 32, 'last_update': datetime.datetime(2014, 11, 11, 21, 59, 5), """ + \ """'number': 10042, 'available_bike_stands': 1, 'bike_stands': 33}]""" cmd = sys.executable.replace("pythonw", "python") + " " + pyfile + " name" out, err = run_cmd(cmd, wait=True, sin=tosend, communicate=True, timeout=3, shell=False) out = out.strip("\n\r ") spl = out.split("\n") if len(spl) != 2: raise Exception("len:{2}\nOUT:\n{0}\nERR:\n{1}".format( out, err, len(out))) # PIG script pig = """ DEFINE pystream `python pystream.py bonus available_bike_stands available_bikes lat lng name status` SHIP ('pystream.py') INPUT(stdin USING PigStreaming(',')) OUTPUT (stdout USING PigStreaming(',')); jspy = LOAD '$CONTAINER/$UTT/*.txt' USING PigStorage('\t') AS (arow:chararray); --DUMP jspy ; matrice = STREAM jspy THROUGH pystream AS ( bonus:chararray, available_bike_stands:double, available_bikes:double, lat:double, lng:double, name:chararray, status:chararray) ; DUMP matrice ; DESCRIBE jspy ; DESCRIBE matrice ; STORE matrice INTO '$CONTAINER/$PSEUDO/unittest2/results.txt' USING PigStorage('\t') ; """.replace(" ", "") pigfile = os.path.join(fold, "pystream.pig") with open(pigfile, "w", encoding="utf8") as f: f.write(pig) # we upload some files files = os.listdir(data) files = [os.path.join(data, _) for _ in files] files = [_ for _ in files if os.path.isfile(_) and "paris" in _] import azure.common # pylint: disable=C0415 try: content = self.client.ls(self.blob_serv, self.container, "unittest2") except azure.common.AzureException as e: warnings.warn("Unable to test azure, storage is still up?\n" + str(e)) return if len(content) == 0: self.client.upload(self.blob_serv, self.container, "unittest2", files) if self.client.exists(self.blob_serv, self.container, "unittest2/results.txt"): self.client.delete_folder(self.blob_serv, self.container, "unittest2/results.txt") # we submit the job recall = None if recall is None: try: job = self.client.pig_submit(self.blob_serv, self.container, pigfile, dependencies=[pyfile], params=dict(UTT="unittest2")) except (ConnectionError, NewConnectionError): # the cluster is probably not set up warnings.warn("hadoop cluster is not set up") return job_id = job["id"] else: job_id = recall status = self.client.wait_job(job_id, fLOG=fLOG) out, err = self.client.standard_outputs(status, self.blob_serv, self.container, fold) if "Total records written : 4" not in err: raise Exception("OUT:\n{0}\nERR:\n{1}".format(out, err)) dest = os.path.join(fold, "out_merged.txt") fLOG("dest=", dest) if os.path.exists(dest): os.remove(dest) self.client.download_merge(self.blob_serv, self.container, "$PSEUDO/unittest2/results.txt", dest) if not os.path.exists(dest): raise FileNotFoundError(dest) with open(dest, "r", encoding="utf8") as f: content = f.read() fLOG("-----\n", content) assert len(content.strip(" \n\r\t")) > 0 df = self.client.df_head(self.blob_serv, self.container, "$PSEUDO/unittest2/results.txt", sep=",", merge=True) fLOG(df) assert len(df) > 0
def test_script_pig(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") if self.client is None: return data = os.path.join( os.path.abspath( os.path.split(__file__)[0]), "data") fLOG("AA") # python script pyth = """ import sys, datetime cols = [ _ for _ in sys.argv if ".py" not in _ ] for row in sys.stdin: row = row.strip() if len(row) == 0 : continue js = eval(row) for station in js: vals = [ str(station[c]).strip() for c in cols ] sys.stdout.write(",".join(vals)) sys.stdout.write("\\n") sys.stdout.flush() """.replace(" ", "") fold = os.path.join(data, "..", "temp_pypig") if not os.path.exists(fold): os.mkdir(fold) fLOG("BB") pyfile = os.path.join(fold, "pystream.py") with open(pyfile, "w", encoding="utf8") as f: f.write(pyth) tosend = """[{'address': "52 RUE D'ENGHIEN / ANGLE RUE DU FAUBOURG POISSONIERE - 75010 PARIS", 'collect_date': """ + \ """datetime.datetime(2014, 11, 11, 22, 1, 18, 331070), 'lng': 2.348395236282807, 'contract_name': 'Paris', 'name': """ + \ """'10042 - POISSONNIÈRE - ENGHIEN', 'banking': 0, 'lat': 48.87242006305313, 'bonus': 0, 'status': 'OPEN', """ + \ """'available_bikes': 32, 'last_update': datetime.datetime(2014, 11, 11, 21, 59, 5), 'number': 10042, """ + \ """'available_bike_stands': 1, 'bike_stands': 33},{'address': "52 RUE D'ENGHIEN / ANGLE RUE DU FAUBOURG POISSONIERE - """ + \ """75010 PARIS", 'collect_date': datetime.datetime(2014, 11, 11, 22, 1, 18, 331070), 'lng': 2.348395236282807, """ + \ """'contract_name': 'Paris', 'name': '10042 - POISSONNIÈRE - ENGHIEN', 'banking': 0, 'lat': 48.87242006305313, """ + \ """'bonus': 0, 'status': 'OPEN', 'available_bikes': 32, 'last_update': datetime.datetime(2014, 11, 11, 21, 59, 5), """ + \ """'number': 10042, 'available_bike_stands': 1, 'bike_stands': 33}]""" cmd = sys.executable.replace( "pythonw", "python") + " " + pyfile + " name" out, err = run_cmd( cmd, wait=True, sin=tosend, communicate=True, timeout=3, shell=False) out = out.strip("\n\r ") spl = out.split("\n") if len(spl) != 2: raise Exception( "len:{2}\nOUT:\n{0}\nERR:\n{1}".format( out, err, len(out))) fLOG("CC") # PIG script pig = """ DEFINE pystream `python pystream.py bonus available_bike_stands available_bikes lat lng name status` SHIP ('pystream.py') INPUT(stdin USING PigStreaming(',')) OUTPUT (stdout USING PigStreaming(',')); jspy = LOAD '$UTT/*.txt' USING PigStorage('\t') AS (arow:chararray); --DUMP jspy ; matrice = STREAM jspy THROUGH pystream AS ( bonus:chararray, available_bike_stands:double, available_bikes:double, lat:double, lng:double, name:chararray, status:chararray) ; DUMP matrice ; DESCRIBE jspy ; DESCRIBE matrice ; STORE matrice INTO 'unittest2/results.txt' USING PigStorage('\t') ; """.replace(" ", "") fLOG(self.client.username) hive_sql = """ DROP TABLE IF EXISTS bikes20; CREATE TABLE bikes20 (sjson STRING); LOAD DATA INPATH "/user/__USERNAME__/unittest2/paris*.txt" INTO TABLE bikes20; SELECT * FROM bikes20 LIMIT 10; """.replace("__USERNAME__", self.client.username.decode("ascii")) fLOG(hive_sql) #${hiveconf:UTT} pigfile = os.path.join(fold, "pystream.pig") with open(pigfile, "w", encoding="utf8") as f: f.write(pig) fLOG("DD upload") # we upload some files files = os.listdir(data) files = [os.path.join(data, _) for _ in files] files = [_ for _ in files if os.path.isfile(_) and "paris" in _] if not self.client.dfs_exists("unittest2"): self.client.dfs_mkdir("unittest2") content = self.client.dfs_ls("unittest2") if len(content) == 0: self.client.upload_cluster(files, "unittest2") if self.client.dfs_exists("unittest2/results.txt"): self.client.dfs_rm("unittest2/results.txt", True) fLOG("FF") # we test the syntax out, err = self.client.pig_submit(pigfile, dependencies=[pyfile], check=True, no_exception=True, params=dict(UTT="unittest2"), fLOG=fLOG) if "pystream.pig syntax OK" not in err: raise Exception("OUT:\n{0}\nERR:\n{1}".format(out, err)) fLOG("II") # we submit the job out, err = self.client.pig_submit(pigfile, dependencies=[pyfile], stop_on_failure=True, no_exception=True, redirection=None, params=dict(UTT="unittest2")) fLOG("JJ") if "Total records written : 4" not in err: raise Exception("PIG OUT:\n{0}\nPIG ERR:\n{1}".format(out, err)) dest = os.path.join(fold, "out_merged.txt") fLOG("dest=", dest) if os.path.exists(dest): os.remove(dest) fLOG("KK") self.client.download_cluster("unittest2/results.txt", dest, merge=True) assert os.path.exists(dest) with open(dest, "r", encoding="utf8") as f: content = f.read() fLOG("-----\n", content) assert len(content.strip(" \n\r\t")) > 0 fLOG("LL") # we submit the job # disable HIVE for the time being (broken) warnings.warn("hive not being tested") return out, err = self.client.hive_submit(hive_sql, redirection=None, params=dict(UTT="unittest2"), fLOG=fLOG) fLOG("HIVE OUT") fLOG(out) fLOG("HIVE ERR") fLOG(err) #assert "(0,1.0,32.0,48.8724200631,2.34839523628,10042" in out fLOG("END")
def compile_cython_single_script(script, skip_warn=True, fLOG=noLOG): """ This function considers a script ``.pyx``, writes a the proper setup file, and compiles it. @param script filename @param skip_warn skip warnings @param fLOG logging function The function applies the steps described in the basic tutorial `The Basics of Cython <http://docs.cython.org/src/tutorial/cython_tutorial.html>`_. The function creates a ``setup.py`` in the same location and compiles it. The compilation requires a compiler (not `MinGW <http://www.mingw.org/>`_ or `Visual Studio (Community Edition) <https://www.microsoft.com/france/visual-studio/produits/community/Default.aspx>`_). If none was found, Python usually displays an error message like:: Unable to find vcvarsall.bat You can also read this old blog post: `Build a Python 64 bit extension on Windows <http://www.xavierdupre.fr/blog/2013-07-07_nojs.html>`_ about this file:: ``C:\\Python35_x64\\lib\\distutils\\msvc9compiler.py``. .. faqref:: :tag: cython :title: Compiler une function Cython ? Cette fonction compile un script `Cython <http://cython.org/>`_. Cette extension permet d'implémenter des fonctions Python dans un pseudo-langage proche du `C <https://en.wikipedia.org/wiki/C_(programming_language)>`_. Il faut suivre les instructions décrite dans le tutorial `The Basics of Cython <http://docs.cython.org/src/tutorial/cython_tutorial.html>`_ pour réussir à utiliser une fonction codée en Cython. C'est ce que fait la fonction :func:`compile_cython_single_script`. Etant donné que la partie en pseudo C est compilée afin de la rendre beaucoup plus rapide, la partie la plus difficile est généralement celle qui consiste à faire en sorte que l'interpréteur Python trouve le <b>bon</b> compilateur. Ce compilateur est nécessairement le même que celui utilisé pour compiler Python et celui-ci change à chaque version. Voir `Compiling Python on Windows <https://docs.python.org/3/using/windows.html?highlight=visual%20studio#compiling-python-on-windows>`_ et faire attention à la version de Python que vous utilisez. """ ext = os.path.splitext(script)[-1] if ext != ".pyx": raise ValueError("no extension .pyx: " + script) if not os.path.exists(script): raise FileNotFoundError(script) name = os.path.split(script)[-1] namen = os.path.splitext(name)[0] setup_script = """ from distutils.core import setup from Cython.Build import cythonize setup( name='{1}', ext_modules=cythonize("{0}") ) """.replace(" ", "").format(name, namen) current, name = os.path.split(script) filename = os.path.join(os.path.dirname(script), name + ".setup.py") with open(filename, "w") as f: f.write(setup_script) cmd = sys.executable + " -u {0} build_ext --inplace".format(filename) out, err = run_cmd(cmd, wait=True, fLOG=fLOG, change_path=current) if len(err) > 0: if skip_warn: do_raise = False lines = err.split("\n") for line in lines: if len(line) > 0 and not line.startswith(" "): if "UserWarning" not in line: do_raise = True break if do_raise: with open(script, "r", encoding="utf-8") as f: content = f.read() raise CustomCythonError( "CMD:\n{0}\nOUT:\n{1}ERR:\n{2}\nSCRIPT:\n{3}".format(cmd, out, err, content)) else: warnings.warn( "CMD:\n{0}\nOUT:\n{1}ERR:\n{2}".format(cmd, out, err)) return out
def build_grammar(g4, version="4.6", fLOG=noLOG): """ compile the grammar for a specific file @param g4 grammar format antlr4 @param version version of *antlr4* to use, 4.4, 4.5-rc-2 @param fLOG logging function @return list of files The compilation must be done with `antlr4 <http://www.antlr.org/>`_. It generates a lexer and a parser which can be imported in Python. The options for the command line are described at: `antlr4 options <https://theantlrguy.atlassian.net/wiki/display/ANTLR4/Options>`_. .. exref:: :title: Build a Antlr4 grammer See `grammars-v4 <https://github.com/antlr/grammars-v4>`_ :: build_grammar("R.g4") """ if not g4.endswith(".g4"): fold = os.path.abspath(os.path.dirname(__file__)) g4 = os.path.join(fold, g4 + ".g4") url = "http://www.antlr.org/download/antlr-{0}-complete.jar".format( version) spl = url.split("/") domain, name = "/".join(spl[:-1]) + "/", spl[-1] folder = os.path.abspath(os.path.dirname(__file__)) final = os.path.join(folder, name) if not os.path.exists(final): from ..datasource.http_retrieve import download_data name = download_data(name, website=domain, whereTo=folder) if not os.path.exists(name): raise FileNotFoundError("unable to download: " + url) path = os.environ.get("CLASSPATH", "") if name not in path: path = ".;{0}\\antlr-{1}-complete.jar".format(folder, version) else: path = ".;{0}\\antlr-{1}-complete.jar;{2}".format( folder, version, os.environ["CLASSPATH"]) os.environ["CLASSPATH"] = path fLOG("CLASSPATH", os.environ["CLASSPATH"]) # we remove -rc... version = version.split("-")[0] cmd = "org.antlr.v4.Tool -Dlanguage=Python3 " + g4 from pyquickhelper.loghelper import run_cmd out, err = run_cmd("java " + cmd, wait=True, fLOG=fLOG) def compiled(): lexer = g4.replace(".g4", "Lexer.py") return os.path.exists(lexer) if not compiled() or (len(err) > 0 and "error" in err): javapath = r'C:\Program Files\Java\jre7\bin\java.exe' os.environ["PATH"] = os.environ["PATH"] + ";" + javapath if sys.platform.startswith("win") and os.path.exists(javapath): out, err = run_cmd( '"' + javapath + '" ' + cmd, wait=True, fLOG=fLOG) if not compiled() or (len(err) > 0 and "error" in err): raise Exception( "unable to compile: " + final + "\nCLASSPATH:\n" + os.environ["CLASSPATH"] + "\nERR:\n" + err + "\nCMD:\njava " + cmd + "\nYou should do it manually.") else: raise Exception( "unable to compile: " + final + "\nCLASSPATH:\n" + os.environ["CLASSPATH"] + "\nERR:\n" + err + "\nCMD:\njava " + cmd) return out + "\nERR:\n" + err
def test_compile_module(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") temp = get_temp_folder(__file__, "temp_compile_module") source = os.path.join(temp, "cdemo.cpp") init = os.path.join(temp, "__init__.py") setup = os.path.join(temp, "setup.py") with open(source, "w") as f: f.write(TestModuleC.content_c) with open(init, "w") as f: pass setup_content = dedent(""" from distutils.core import setup, Extension module1 = Extension('stdchelper_demo', sources=['{0}']) setup (name = 'ccdemo', version = '1.0', description = 'This is a demo package.', ext_modules = [module1]) """.format(source.replace("\\", "/"))) with open(setup, "w") as f: f.write(setup_content) cmd = "{0} {1} build_ext --inplace".format(sys.executable, setup) out, err = run_cmd(cmd, wait=True, fLOG=fLOG, change_path=temp) if "error" in out or "error" in err: out_ = out.replace("-Werror=format-security", "") if "error" in out_: raise Exception( "Unable to compile\n--OUT--\n{0}\n--ERR--\n{1}".format(out, err)) if sys.platform == "win32": name = "stdchelper_demo.cp%d%d-win_amd64.pyd" % sys.version_info[:2] elif sys.platform == "darwin": name = "stdchelper_demo.cpython-%d%dm-darwin.so" % sys.version_info[:2] else: name = "stdchelper_demo.cpython-%d%dm-x86_64-linux-gnu.so" % sys.version_info[:2] fullname = os.path.join(temp, name) if not os.path.exists(fullname): files = os.listdir(os.path.dirname(fullname)) raise FileNotFoundError( "Unable to find '{0}' (platform '{1}')\nFound:\n{2}".format( fullname, sys.platform, "\n".join(files))) mo = import_module(None, fullname, fLOG, additional_sys_path=None, first_try=True) self.assertIsInstance(mo, tuple) self.assertEqual(len(mo), 2) self.assertTrue(hasattr(mo[0], '__doc__')) if 'stdchelper_demo' in sys.modules: del sys.modules['stdchelper_demo'] temp2 = get_temp_folder(__file__, "temp_compile_module2") store_obj = {} actions = copy_source_files(temp, temp2, fLOG=fLOG) store_obj = {} indexes = {} add_file_rst(temp2, store_obj, actions, fLOG=fLOG, rootrep=("stdchelper_demo.", ""), indexes=indexes) if sys.platform == "darwin": warnings.warn( "add_file_rst does not work yet on MacOSX for C++ modules.") return self.assertNotEmpty(store_obj) self.assertEqual(len(store_obj), 1) if len(actions) not in (3, 4): raise Exception("{0}\n{1}".format( len(actions), "\n".join(str(_) for _ in actions))) self.assertEqual(len(indexes), 1)
def test_script_pig(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") if self.client is None: return data = os.path.join(os.path.abspath(os.path.split(__file__)[0]), "data") fLOG("AA") # python script pyth = """ import sys, datetime cols = [ _ for _ in sys.argv if ".py" not in _ ] for row in sys.stdin: row = row.strip() if len(row) == 0 : continue js = eval(row) for station in js: vals = [ str(station[c]).strip() for c in cols ] sys.stdout.write(",".join(vals)) sys.stdout.write("\\n") sys.stdout.flush() """.replace(" ", "") fold = os.path.join(data, "..", "temp_pypig") if not os.path.exists(fold): os.mkdir(fold) fLOG("BB") pyfile = os.path.join(fold, "pystream.py") with open(pyfile, "w", encoding="utf8") as f: f.write(pyth) tosend = """[{'address': "52 RUE D'ENGHIEN / ANGLE RUE DU FAUBOURG POISSONIERE - 75010 PARIS", 'collect_date': """ + \ """datetime.datetime(2014, 11, 11, 22, 1, 18, 331070), 'lng': 2.348395236282807, 'contract_name': 'Paris', 'name': """ + \ """'10042 - POISSONNIÈRE - ENGHIEN', 'banking': 0, 'lat': 48.87242006305313, 'bonus': 0, 'status': 'OPEN', """ + \ """'available_bikes': 32, 'last_update': datetime.datetime(2014, 11, 11, 21, 59, 5), 'number': 10042, """ + \ """'available_bike_stands': 1, 'bike_stands': 33},{'address': "52 RUE D'ENGHIEN / ANGLE RUE DU FAUBOURG POISSONIERE - """ + \ """75010 PARIS", 'collect_date': datetime.datetime(2014, 11, 11, 22, 1, 18, 331070), 'lng': 2.348395236282807, """ + \ """'contract_name': 'Paris', 'name': '10042 - POISSONNIÈRE - ENGHIEN', 'banking': 0, 'lat': 48.87242006305313, """ + \ """'bonus': 0, 'status': 'OPEN', 'available_bikes': 32, 'last_update': datetime.datetime(2014, 11, 11, 21, 59, 5), """ + \ """'number': 10042, 'available_bike_stands': 1, 'bike_stands': 33}]""" cmd = sys.executable.replace("pythonw", "python") + " " + pyfile + " name" out, err = run_cmd(cmd, wait=True, sin=tosend, communicate=True, timeout=3, shell=False) out = out.strip("\n\r ") spl = out.split("\n") if len(spl) != 2: raise Exception("len:{2}\nOUT:\n{0}\nERR:\n{1}".format( out, err, len(out))) fLOG("CC") # PIG script pig = """ DEFINE pystream `python pystream.py bonus available_bike_stands available_bikes lat lng name status` SHIP ('pystream.py') INPUT(stdin USING PigStreaming(',')) OUTPUT (stdout USING PigStreaming(',')); jspy = LOAD '$UTT/*.txt' USING PigStorage('\t') AS (arow:chararray); --DUMP jspy ; matrice = STREAM jspy THROUGH pystream AS ( bonus:chararray, available_bike_stands:double, available_bikes:double, lat:double, lng:double, name:chararray, status:chararray) ; DUMP matrice ; DESCRIBE jspy ; DESCRIBE matrice ; STORE matrice INTO 'unittest2/results.txt' USING PigStorage('\t') ; """.replace(" ", "") fLOG(self.client.username) hive_sql = """ DROP TABLE IF EXISTS bikes20; CREATE TABLE bikes20 (sjson STRING); LOAD DATA INPATH "/user/__USERNAME__/unittest2/paris*.txt" INTO TABLE bikes20; SELECT * FROM bikes20 LIMIT 10; """.replace("__USERNAME__", self.client.username.decode("ascii")) fLOG(hive_sql) # ${hiveconf:UTT} pigfile = os.path.join(fold, "pystream.pig") with open(pigfile, "w", encoding="utf8") as f: f.write(pig) fLOG("DD upload") # we upload some files files = os.listdir(data) files = [os.path.join(data, _) for _ in files] files = [_ for _ in files if os.path.isfile(_) and "paris" in _] if not self.client.dfs_exists("unittest2"): self.client.dfs_mkdir("unittest2") content = self.client.dfs_ls("unittest2") if len(content) == 0: self.client.upload_cluster(files, "unittest2") if self.client.dfs_exists("unittest2/results.txt"): self.client.dfs_rm("unittest2/results.txt", True) fLOG("FF") # we test the syntax out, err = self.client.pig_submit(pigfile, dependencies=[pyfile], check=True, no_exception=True, params=dict(UTT="unittest2"), fLOG=fLOG) if "pystream.pig syntax OK" not in err: raise Exception("OUT:\n{0}\nERR:\n{1}".format(out, err)) fLOG("II") # we submit the job out, err = self.client.pig_submit(pigfile, dependencies=[pyfile], stop_on_failure=True, no_exception=True, redirection=None, params=dict(UTT="unittest2")) fLOG("JJ") if "Total records written : 4" not in err: raise Exception("PIG OUT:\n{0}\nPIG ERR:\n{1}".format(out, err)) dest = os.path.join(fold, "out_merged.txt") fLOG("dest=", dest) if os.path.exists(dest): os.remove(dest) fLOG("KK") self.client.download_cluster("unittest2/results.txt", dest, merge=True) assert os.path.exists(dest) with open(dest, "r", encoding="utf8") as f: content = f.read() fLOG("-----\n", content) assert len(content.strip(" \n\r\t")) > 0 fLOG("LL") # we submit the job # disable HIVE for the time being (broken) warnings.warn("hive not being tested") out, err = self.client.hive_submit(hive_sql, redirection=None, params=dict(UTT="unittest2"), fLOG=fLOG) fLOG("HIVE OUT") fLOG(out) fLOG("HIVE ERR") fLOG(err) #assert "(0,1.0,32.0,48.8724200631,2.34839523628,10042" in out fLOG("END")
def execute_python_scripts(root, df, col_names=None, url=None, eol="/", fLOG=noLOG, gen_mail=None): """ Retrieves all :epkg:`python` scripts and run them. @param root main folder @param df dataframe @param col_names dictionary for columns: folder, mail, program, out, err, url, cmp, url_content, key, time @param eol if not None, replaces end of lines by *eof* @param gen_mail generator of mails @param fLOG logging function @return dataframe """ if gen_mail is None: def iter_mail(mail): yield mail yield mail.lower() gen_mail = iter_mail def post_process(out, eol): out = out.strip("\r\t\n").rstrip().replace("\r", "").replace("\t", " ") if eol: out = out.replace("\n", eol) return out downloads = {} res = [] for name, mail in zip(df[col_names.get("folder", "folder")], df[col_names.get("mail", "mail")]): row = {col_names.get("folder", "folder"): name} fLOG("[execute_python_script], look into '{0}'".format(name)) subf = os.path.join(root, name) col_find = col_names.get("exists", "exists") if not os.path.exists(subf): subf = os.path.join(root, name.replace("-", ".")) if not os.path.exists(subf): row[col_find] = False res.append(row) else: row[col_find] = True store = [] for py in explore_folder_iterfile(subf, ".*[.]py$"): store.append(py) fLOG(" -", len(store), "programs found") col_out = col_names.get("out", "out") col_err = col_names.get("err", "err") col_prog = col_names.get("program", "program") col_time = col_names.get("time", "time") col_key = col_names.get("key", "key") col_size = col_names.get("size", "size") col_url = col_names.get("url", "url") col_ind = col_names.get("pattern_id", "pattern_id") if len(store) == 0: for mm in sorted(gen_mail(mail.strip())): mailid = _get_code(mm.encode("utf-8")) r = row.copy() loc = url.format(mailid) ind = {col_key: mm, col_ind: mailid, col_url: loc} r.update(ind) res.append(r) continue # test all programs outs = [] for py in sorted(store): cmd = '"{0}" "{1}"'.format(sys.executable, py) t1 = time.clock() try: out, err = run_cmd(cmd, wait=True) except Exception as e: out = None err = str(e) out = post_process(out, eol) t2 = time.clock() outs.append({ col_out: out, col_err: post_process(err, eol), col_prog: os.path.split(py)[-1], col_time: t2 - t1, col_size: os.stat(py).st_size }) if url is None: for o in outs: r = row.copy() r.update(o) res.append(r) elif url is not None: col_cmp = col_names.get("cmp", "cmp") col_in = col_names.get("sortie_dans_motif", "sortie_dans_motif") col_in2 = col_names.get("motif_dans_sortie", "motif_dans_sortie") col_dist = col_names.get("dist", "dist") col_content = col_names.get("content", "content") if out is None: for _, mm in gen_mail(mail.strip()): mailid = _get_code(mm.encode("utf-8")) ind = {col_ind: mailid} for o in outs: r = row.copy() r.update(o) r.update(ind) res.append(r) else: for mm in sorted(gen_mail(mail.strip())): mailid = _get_code(mm.encode("utf-8")) loc = url.format(mailid) ind = {col_key: mm, col_ind: mailid, col_url: loc} if loc not in downloads: downloads[loc] = get_url_content_timeout( loc).strip("\n\r\t ") content = post_process(downloads[loc], eol) ind[col_content] = content for o in outs: r = row.copy() r.update(o) r.update(ind) out = r[col_out] r[col_cmp] = out == content or out.strip( ) == content.strip() r[col_in] = out in content r[col_in2] = content in out r[col_dist] = (edit_distance(out, content)[0]) if ( len(content) > len(out) // 2) else abs(len(content) - len(out)) res.append(r) return pandas.DataFrame(res)
def convert_short_latex_into_png(latex, temp_folder=".", fLOG=print, miktex=r"C:\Program Files\MiKTeX 2.9\miktex\bin\x64", final_name=None): """ Convert a short latex script into an image. @param latex latex equation @param temp_folder temp_folder (where temporary files will be placed) @param fLOG logging function @param miktex miktex location @param final_name if not None, copy the image at this location using this name @return a location to the image (it should be copied), and its size You should not call the function twice at the same in the same folder. @warning The function ends the program if there was a failure. Something is missing on the command line. """ if not os.path.exists(miktex): raise FileNotFoundError("unable to find miktex") if sys.platform.startswith("win"): htlatex = os.path.join(miktex, "htlatex.exe") if not os.path.exists(htlatex): raise FileNotFoundError("unable to find htlatex") else: htlatex = os.path.join(miktex, "htlatex") if not os.path.exists(temp_folder): os.makedirs(temp_folder) eq = os.path.join(temp_folder, "eq.tex") with open(eq, "w") as f: f.write(r"""\documentclass[12pt]{article} \pagestyle{empty} \begin{document} $$ %s $$ \end{document}""".replace(" ", "") % latex.strip("\n\r ")) cmd = '"' + htlatex + '" eq.tex "html, graphics-300" "" "" "--interaction=nonstopmode"' cwd = os.getcwd() os.chdir(temp_folder) out, err = run_cmd(cmd, wait=True) os.chdir(cwd) if "FAILED" in err: raise Exception( "it failed\n-----\n{0}\n----------\n{1}".format(out, err)) img = os.path.join(temp_folder, "eq0x.png") if not os.path.exists(img): with open(os.path.join(temp_folder, "eq.log"), "r") as f: log = f.read() raise FileNotFoundError("the compilation did not work\n" + log) if final_name is not None: # size reduction im = Image.open(img) shutil.copy(img, final_name) return final_name, im.size else: im = Image.open(img) return img, im.size
docpath = os.path.normpath( os.path.join(thispath, "_doc", "presentation" + suffix)) os.chdir(docpath) lay = "html" build = "build" over = "" sconf = "" import_pyquickhelper() from pyquickhelper.helpgen import process_sphinx_cmd cmd_file = os.path.abspath(process_sphinx_cmd.__file__) cmd = '"{4}" "{5}" -b {1} -d {0}/doctrees{2}{3} source {0}/{1}'.format( build, lay, over, sconf, sys.executable, cmd_file) from pyquickhelper.loghelper import run_cmd out, err = run_cmd(cmd, wait=True, fLOG=print) print(out) print(err) if sys.platform.startswith("win"): os.chdir(pa) else: # builds the setup from pyquickhelper.pycode import clean_readme long_description = clean_readme(long_description) setup( name=project_var_name, version='%s%s' % (sversion, subversion),
def run_pig(pigfile, argv=None, pig_path=None, hadoop_path=None, jython_path=None, timeout=None, logpath="logs", pig_version="0.15.0", hadoop_version="2.7.2", fLOG=noLOG): """ runs a pig script and returns the standard output and error @param pigfile pig file @param argv arguments to sned to the command line @param pig_path path to pig 0.XX.0 @param hadoop_path path to hadoop @param timeout timeout @param logpath path to the logs @param pig_version PIG version (if *pig_path* is not defined) @param hadoop_version Hadoop version (if *hadoop_path* is not defined) @param fLOG logging function @return out, err If *pig_path* is None, the function looks into this directory. """ if pig_path is None: pig_path = os.path.join(get_pig_path(), "pig-%s" % pig_version) if hadoop_path is None: hadoop_path = get_hadoop_path() java = get_java_path() if "JAVA_HOME" not in os.environ: os.environ["JAVA_HOME"] = java if "PIG_CONF_DIR" not in os.environ: os.environ["PIG_CONF_DIR"] = os.path.normpath( os.path.join( pig_path, "conf")) if not os.path.exists(os.environ["PIG_CONF_DIR"]): raise FileNotFoundError(os.environ["PIG_CONF_DIR"]) if "HADOOP_HOME" not in os.environ: os.environ["HADOOP_HOME"] = hadoop_path if not os.path.exists(os.environ["HADOOP_HOME"]): raise FileNotFoundError(os.environ["HADOOP_HOME"]) if "HADOOP_CLIENT_OPTS" not in os.environ: os.environ["HADOOP_CLIENT_OPTS"] = "-Xmx1024m" fLOG("PIG_CONF_DIR=", os.environ["PIG_CONF_DIR"]) def clean(i, p): if i == 0: return p if '"' in p: p = p.replace('"', '\\"') if " " in p: p = '"{0}"'.format(p) return p full = False jars = [] if full: jars.extend(get_pig_jars()) # + get_hadoop_jars() folds = set(os.path.split(j)[0] for j in jars) jars = [os.path.join(f, "*.jar") for f in folds] jars.append( os.path.join( hadoop_path, "hadoop-%s" % hadoop_version, "share", "hadoop", "common", "lib", "*.jar")) jars.append( os.path.join( hadoop_path, "hadoop-%s" % hadoop_version, "share", "hadoop", "hdfs", "lib", "*.jar")) jars.append( os.path.join( hadoop_path, "hadoop-%s" % hadoop_version, "share", "hadoop", "mapreduce", "lib", "*.jar")) jars.append( os.path.join( hadoop_path, "hadoop-%s" % hadoop_version, "share", "hadoop", "httpfs", "tomcat", "lib", "*.jar")) jars.append( os.path.join( hadoop_path, "hadoop-%s" % hadoop_version, "share", "hadoop", "tools", "lib", "*.jar")) jars.append( os.path.join( hadoop_path, "hadoop-%s" % hadoop_version, "share", "hadoop", "yarn", "lib", "*.jar")) jars.append( os.path.join( hadoop_path, "hadoop-%s" % hadoop_version, "share", "hadoop", "common", "hadoop-common-%s.jar" % hadoop_version)) jars.append( os.path.join( hadoop_path, "hadoop-%s" % hadoop_version, "share", "hadoop", "common", "hadoop-nfs-%s" % hadoop_version)) jars.append( os.path.join( hadoop_path, "hadoop-%s" % hadoop_version, "share", "hadoop", "hdfs", "hadoop-hdfs-%s.jar" % hadoop_version)) jars.append( os.path.join( hadoop_path, "hadoop-2.5.2", "share", "hadoop", "hdfs", "hadoop-hdfs-nfs-%s.jar" % hadoop_version)) jars.append( os.path.join( hadoop_path, "hadoop-%s" % hadoop_version, "share", "hadoop", "mapreduce", "*.jar")) jars.append( os.path.join( hadoop_path, "hadoop-%s" % hadoop_version, "share", "hadoop", "yarn", "*.jar")) jars.append(os.path.join(pig_path, "pig-%s-core-h1.jar" % pig_version)) else: jars.append( os.path.join( pig_path, "pig-%s" % pig_version, "legacy", "pig-%s-withouthadoop-h2.jar" % pig_version)) jarsall = [] for j in jars: r = glob.glob(j) jarsall.extend(r) jarsall.sort() for j in jarsall: fLOG(j) jars = ";".join(jars) fLOG("jars", jars) cmd = [get_java_cmd(), "-Xmx1024m", "-classpath", jars, "-Dpig.log.dir=" + logpath, "-Dhadoop.log.dir=" + logpath, "-Dhadoop.tmp.dir=" + logpath, "-Dpig.log.file=pid.log", "-Djava.io.tmpdir=" + logpath, "-Dpig.home.dir=" + pig_path, #"-Dpig.schematuple=true", #"-Dpig.schematuple.local.dir=" + logpath, "org.apache.pig.Main", "-x", "local", pigfile, "-stop_on_failure" ] cmd = " ".join(clean(i, _) for i, _ in enumerate(cmd)) out, err = run_cmd( cmd, wait=True, sin=None, communicate=True, timeout=timeout, shell=False) return out, err