Ejemplo n.º 1
0
 def run(self):
     from pyquickhelper.loghelper import run_cmd
     parameters = self.get_parameters()
     location = parameters['file_or_folder']
     folder = os.path.join(location, "_doc", "notebooks")
     if not os.path.exists(folder):
         folder = location
     cmd = [
         "jupyter-notebook",
         "--notebook-dir=%s" % folder, "--NotebookApp.token=",
         "--NotebookApp.password="******" ".join(cmd), wait=True, fLOG=print, communicate=False)
Ejemplo n.º 2
0
def plot_pipeline(dot=None, pipeline=None, dataframe=None, name='pipeline'):

    dot_file = name + "_graph.dot"

    if dot == None:
        dot = pipeline2dot(pipeline, dataframe)
        with open(dot_file, "w", encoding="utf-8") as f:
            f.write(dot)

    if sys.platform.startswith("win") and "Graphviz" not in os.environ["PATH"]:
        os.environ['PATH'] = os.environ[
            'PATH'] + r';C:\Program Files (x86)\Graphviz2.38\bin'

    cmd = "dot -G=300 -Tpng {0} -o{0}.png".format(dot_file)
    run_cmd(cmd, wait=True, fLOG=print)
Ejemplo n.º 3
0
def build_machinelearningext(version="Release"):
    "Builds the module machinelearningext."
    from pyquickhelper.loghelper import run_cmd

    env = os.environ.get('DOTNET_CLI_TELEMETRY_OPTOUT', None)
    if env is None:
        os.environ['DOTNET_CLI_TELEMETRY_OPTOUT'] = '1'
    print('[csharpyml.env] DOTNET_CLI_TELEMETRY_OPTOUT={0}'.format(
        os.environ['DOTNET_CLI_TELEMETRY_OPTOUT']))

    # builds the other libraries
    cmds = [
        'dotnet restore machinelearningext.sln',
        'dotnet build -c %s machinelearningext.sln' % version
    ]
    folder = os.path.abspath("cscode")
    folder = os.path.join("machinelearningext", "machinelearningext")
    outs = []
    for cmd in cmds:
        out, err = run_cmd(cmd, fLOG=print, wait=True, change_path=folder)
        if len(err) > 0:
            raise RuntimeError(
                "Unable to compile C# code.\nCMD: {0}\n--ERR--\n{1}".format(
                    cmd, err))
        elif len(out) > 0:
            outs.append(out)
            print('[csharpyml.dotnet] OUT')
            print(out)

    # Copy specific files.
    copy_assemblies(version=version)
Ejemplo n.º 4
0
def run_graphviz(filename, image, engine="dot"):
    """
    Run :epkg:`GraphViz`.

    @param      filename        filename which contains the graph definition
    @param      image           output image
    @param      engine          *dot* or *neato*
    @return                     output of graphviz
    """
    ext = os.path.splitext(image)[-1]
    if ext != ".png":
        raise Exception("extension should be .png not " + str(ext))
    if sys.platform.startswith("win"):
        bin_ = os.path.dirname(find_graphviz_dot())
        # if bin not in os.environ["PATH"]:
        #    os.environ["PATH"] = os.environ["PATH"] + ";" + bin
        cmd = '"{0}\\{3}" -Tpng "{1}" -o "{2}"'.format(
            bin_, filename, image, engine)
    else:
        cmd = '"{0}" -Tpng "{1}" -o "{2}"'.format(engine, filename, image)
    out, err = run_cmd(cmd, wait=True)
    if len(err) > 0:
        raise Exception(
            "unable to run Graphviz\nCMD:\n{0}\nOUT:\n{1}\nERR:\n{2}".format(cmd, out, err))
    return out
Ejemplo n.º 5
0
    def test_reap_children(self):
        fLOG(__file__,
             self._testMethodName,
             OutputPrint=__name__ == "__main__")

        if sys.platform.startswith("win"):
            cmd = "pause"
        else:
            cmd = 'ls -la | less'
        temp = get_temp_folder(__file__, "temp_reap_children")
        clog = CustomLog(temp)
        proc, _ = run_cmd(cmd, wait=False, fLOG=clog)
        self.assertTrue(_ is None)
        clog('proc={} pid={}'.format(proc, proc.pid))
        ki = reap_children(fLOG=clog, subset={proc.pid})
        clog('ki={0}'.format(ki))
        if ki is None and not is_travis_or_appveyor(
        ) and __name__ != '__main__':
            warnings.warn(
                "reap_children could not be fully tested ki is None.")
            return
        self.assertTrue(ki is not None)
        self.assertEqual(len(ki), 1)
        # fLOG(ki)
        # To avoid a warning.
        proc.returncode = 0
Ejemplo n.º 6
0
    def test_exe(self):
        fLOG(
            __file__,
            self._testMethodName,
            OutputPrint=__name__ == "__main__")

        command = "dir" if sys.platform.startswith("win32") else "ls"
        yml = """
        language: python
        python:
            - {{Python35}}
        before_script:
            - %s
        after_script:
            - %s {{PLATFORM}}
        script:
            - %s
        """.replace("        ", "") % (command, command, command)
        temp = get_temp_folder(__file__, "temp_yaml_exe")
        context = dict(Python34="fake", Python35=os.path.dirname(sys.executable),
                       Python27=None, Anaconda3=None, Anaconda2=None,
                       WinPython35=None, project_name="pyquickhelper",
                       root_path="ROOT", PLATFORM="win32")
        obj, name = load_yaml(yml, context=context)
        self.assertTrue(name is not None)
        res = list(enumerate_convert_yaml_into_instructions(
            obj, variables=context))
        for r, var in res:
            conv = convert_sequence_into_batch_file(r, variables=var)
            if ("%s " % command) not in conv:
                raise Exception("{0}\n--\n{1}".format(command, conv))
            fLOG("####", conv)
            ext = "bat" if command == "dir" else "sh"
            name = os.path.join(temp, "yml.%s" % ext)
            with open(name, "w") as f:
                f.write(conv)
            if is_travis_or_appveyor() == "__travis":
                # linux, unable to test TestYamlExe.test_exe.
                pass
            else:
                if sys.platform.startswith("win"):
                    cmd = name
                else:
                    cmd = "bash " + name
                try:
                    out, err = run_cmd(cmd, wait=True)
                except PermissionError as e:
                    raise Exception(
                        "Unable to execute '{0}' which contains\n{1}".format(name, conv)) from e
                fLOG("###")
                fLOG(out)
                if "BEFORE_SCRIPT" not in out:
                    raise Exception(
                        "{0}\nERR\n{2}\n#########\n{1}".format(out, conv, err))
                if "AFTER_SCRIPT" not in out:
                    raise Exception(
                        "{0}\nERR\n{2}\n#########\n{1}".format(out, conv, err))
                if "SCRIPT" not in out:
                    raise Exception(
                        "{0}\nERR\n{2}\n#########\n{1}".format(out, conv, err))
Ejemplo n.º 7
0
def convert(pathname, verbose=0):
    """
    Converts into ONNX.
    """
    # tflite
    lite = os.path.join(pathname, 'model.lite')
    if False and not os.path.exists(lite):
        import tensorflow.lite as tfl
        if verbose:
            print('[convert] to lite %r.' % pathname)
        converter = tfl.TFLiteConverter.from_saved_model(pathname)
        try:
            tflite_model = converter.convert()
        except Exception as e:
            print('[convert] lite failed due to %r.' % e)
            tflite_model = None
        if tflite_model is not None:
            if verbose:
                print('[convert] save lite %r.' % pathname)
            with open(lite, "wb") as f:
                f.write(tflite_model)

    # node list
    if verbose:
        res = get_node_list(pathname, verbose=verbose)
        print('[convert] node types: %r' % res)

    # onnx
    output = os.path.join(pathname, "model.onnx")
    lastname = pathname.replace("\\", "/").split('/')[-1]
    inputs = None
    if lastname not in MODELS:
        raise ValueError("Unknown model %r." % lastname)
    model = MODELS[lastname]
    inputs = model['inputs']
    outputs = model['outputs']
    tag = model['tag']
    sig = model['signature_def']
    onnx_inputs = model['onnx_inputs']
    if inputs is None:
        if verbose:
            print('[convert] to ONNX %r.' % pathname)
        raise NotImplementedError("Unable to convert %r." % lastname)
    inputs = ",".join(inputs)
    outputs = ",".join(outputs)
    if not os.path.exists(output):

        def noprint(*args):
            pass

        cmd = [
            "python", "-m", "tf2onnx.convert", "--saved-model", pathname,
            "--output", output, "--inputs", inputs, "--outputs", outputs,
            '--tag', tag, '--signature_def', sig
        ]
        out, err = run_cmd(" ".join(cmd),
                           wait=True,
                           fLOG=print if verbose else noprint,
                           shell=True)
    return pathname, output, inputs, outputs, onnx_inputs
Ejemplo n.º 8
0
def run_graphviz(filename, image, engine="dot"):
    """
    Run :epkg:`GraphViz`.

    @param      filename        filename which contains the graph definition
    @param      image           output image
    @param      engine          *dot* or *neato*
    @return                     output of graphviz
    """
    ext = os.path.splitext(image)[-1]
    if ext != ".png":
        raise Exception("extension should be .png not " + str(ext))
    if sys.platform.startswith("win"):
        bin_ = os.path.dirname(find_graphviz_dot())
        # if bin not in os.environ["PATH"]:
        #    os.environ["PATH"] = os.environ["PATH"] + ";" + bin
        cmd = '"{0}\\{3}" -Tpng "{1}" -o "{2}"'.format(bin_, filename, image,
                                                       engine)
    else:
        cmd = '"{0}" -Tpng "{1}" -o "{2}"'.format(engine, filename, image)
    out, err = run_cmd(cmd, wait=True)
    if len(err) > 0:
        raise Exception(
            "Unable to run Graphviz\nCMD:\n{0}\nOUT:\n{1}\nERR:\n{2}".format(
                cmd, out, err))
    return out
Ejemplo n.º 9
0
def git_change_remote_origin(local_folder, url_https, user=None, password=None,
                             add_fetch=False, timeout=10, fLOG=noLOG):
    """
    Changes the origin of the repository. The url and the password
    refer to the new repository.

    @param      local_folder   local folder
    @param      url_https       url, example ``https://gitlab.server/folder/project_name``
    @param      user            part 1 of the credentials
    @param      password        part 2 of the credentials
    @param      timeout         timeout for the command line
    @param      add_fetch       add instruction ``fetch``
    @param      fLOG            logging function
    @return                     something

    The function runs the instruction::

        git remote remove origin
        git remote add origin url

    """
    url_user = git_url_user_password(url_https, user, password)
    cmds = """
            cd {0}
            git remote remove origin
            git remote add origin {1}
            """.format(local_folder, url_user).replace("            ", "").strip(" \n\r\t")
    if add_fetch:
        cmds += "\ngit fetch"
    cmd = cmds.replace("\n", "&")
    sin = ""  # "{0}\n".format(password)
    out, err = run_cmd(cmd, sin=sin, wait=True, timeout=timeout, fLOG=fLOG)
    git_check_error(out, err, fLOG)
Ejemplo n.º 10
0
def build_module(version="Release"):
    "build the module"
    # git submodule add https://github.com/dotnet/machinelearning.git cscode/machinelearning
    # We build a dotnet application.
    from pyquickhelper.loghelper import run_cmd

    env = os.environ.get('DOTNET_CLI_TELEMETRY_OPTOUT', None)
    if env is None:
        os.environ['DOTNET_CLI_TELEMETRY_OPTOUT'] = '1'
    print('[csharpyml.env] DOTNET_CLI_TELEMETRY_OPTOUT={0}'.format(
        os.environ['DOTNET_CLI_TELEMETRY_OPTOUT']))

    # builds the other libraries
    cmds = [
        'dotnet restore CSharPyMLExtension_netcore.sln',
        'dotnet build -c %s CSharPyMLExtension_netcore.sln' % version
    ]
    folder = os.path.abspath("cscode")
    outs = []
    for cmd in cmds:
        out, err = run_cmd(cmd, fLOG=print, wait=True, change_path=folder)
        if len(err) > 0:
            raise RuntimeError(
                "Unable to compile C# code.\nCMD: {0}\n--ERR--\n{1}".format(
                    cmd, err))
        elif len(out) > 0:
            outs.append(out)
            print('[csharpyml.dotnet] OUT')
            print(out)

    # Copy specific files.
    copy_assemblies(version=version)
Ejemplo n.º 11
0
def run_benchmark(runtime, srcdir, logger, skip, white_list=None):
    filenames = []
    skls = sklearn_operators(extended=True)
    skls = [_['name'] for _ in skls]
    if white_list:
        skls = [_ for _ in skls if _ in white_list]
    skls.sort()
    pbar = tqdm(skls)
    for op in pbar:
        if skip is not None and op in skip:
            continue
        pbar.set_description("[%s]" % (op + " " * (25 - len(op))))

        out_raw = os.path.join(srcdir, "bench_raw_%s_%s.csv" % (runtime, op))
        out_sum = os.path.join(srcdir, "bench_sum_%s_%s.csv" % (runtime, op))
        cmd = ('{0} -m mlprodict validate_runtime --verbose=0 --out_raw={1} --out_summary={2} '
               '--benchmark=1 --dump_folder={3} --runtime={4} --models={5}'.format(
                   get_interpreter_path(), out_raw, out_sum, srcdir, runtime, op))
        logger.info("[mlprodict] cmd '{}'.".format(cmd))
        out, err = run_cmd(cmd, wait=True, fLOG=None)
        if not os.path.exists(out_sum):
            logger.warning("[mlprodict] unable to find '{}'.".format(out_sum))
            print("[mlprodict-sphinx] cmd '{}'".format(cmd))
            print("[mlprodict-sphinx] unable to find '{}'".format(out_sum))
            msg = "Unable to find '{}'\n--CMD--\n{}\n--OUT--\n{}\n--ERR--\n{}".format(
                out_sum, cmd, out, err)
            print(msg)
            rows = [{'name': op, 'scenario': 'CRASH',
                     'ERROR-msg': msg.replace("\n", " -- ")}]
            df = DataFrame(rows)
            df.to_csv(out_sum, index=False)
        filenames.append((out_raw, out_sum))
    return filenames
Ejemplo n.º 12
0
def is_java_installed(fLOG=noLOG):
    """
    Checks if :epkg:`java` is installed.

    @return     boolean
    """
    if sys.platform.startswith("win"):
        cmd = get_java_cmd() + " -showversion"
        out, err = run_cmd(cmd, wait=True, log_error=False)
        fLOG("OUT:\n", out)
        fLOG("ERR:\n", err)
        return "Java(TM)" in err
    else:
        cmd = get_java_cmd() + " -showversion"
        out, err = run_cmd(cmd, wait=True, log_error=False)
        fLOG("OUT:\n", out)
        fLOG("ERR:\n", err)
        return "OpenJDK Runtime Environment" in err
Ejemplo n.º 13
0
    def test_notebook_runner_ml_huge(self):
        fLOG(
            __file__,
            self._testMethodName,
            OutputPrint=__name__ == "__main__")

        if is_travis_or_appveyor() == "appveyor":
            # pytables has an issue
            # return
            pass

        if sys.platform.startswith("win"):
            import numpy
            import tables
            foldn = os.path.abspath(os.path.dirname(numpy.__file__))
            foldt = os.path.normpath(os.path.dirname(tables.__file__))
            rootn = os.path.dirname(foldn)
            roott = os.path.dirname(foldt)
            if rootn != roott:
                pp = os.environ.get('PYTHONPATH', '')
                if "SECONDTRY" in pp:
                    raise Exception(
                        "Infinite loog\n{0}\n{1}\n**EXE\n{2}\n**PP\n{3}\n****".format(rootn, roott, sys.executable, pp))
                # We need to run this file with the main python.
                # Otherwise it fails for tables: DLL load failed.
                exe = os.path.normpath(os.path.join(
                    rootn, "..", "..", "python.exe"))
                cmd = '"{0}" -u "{1}"'.format(exe, os.path.abspath(__file__))
                import pyquickhelper
                import pyensae
                import jyquickhelper
                import src.ensae_teaching_cs
                import mlstatpy
                import pymyinstall
                add = ["SECONDTRY"]
                for mod in [pyquickhelper, pyensae, jyquickhelper, src.ensae_teaching_cs, mlstatpy, pymyinstall]:
                    add.append(os.path.normpath(os.path.join(
                        os.path.dirname(mod.__file__), "..")))
                fLOG("set PYTHONPATH={0}".format(";".join(add)))
                os.environ['PYTHONPATH'] = ";".join(add)
                out, err = run_cmd(cmd, wait=True, fLOG=fLOG)
                if len(err) > 0:
                    lines = err.split("\n")
                    lines = [_ for _ in lines if _[0] != " "]
                    lines = [_ for _ in lines if "warning" not in _.lower()]
                    if len(lines) > 0:
                        raise Exception("--CMD:\n{0}\n--OUT:\n{1}\n--ERR\n{2}\n--ERR2\n{3}\n--PP\n{4}".format(
                            cmd, out, err, "\n".join(lines), pp))
            return

        import tables
        assert tables is not None
        this = os.path.abspath(os.path.dirname(tables.__file__))
        self.a_test_notebook_runner(
            "ml_huge", "expose", additional_path=[this])
Ejemplo n.º 14
0
def is_java_installed(fLOG=noLOG):
    """
    this function checks if java is installed

    @return     boolean
    """
    cmd = get_java_cmd() + " -showversion"
    out, err = run_cmd(cmd, wait=True, log_error=False)
    fLOG("OUT:\n", out)
    fLOG("ERR:\n", err)
    return "Java(TM)" in err
    def test_notebook_runner_ml_huge(self):
        fLOG(
            __file__,
            self._testMethodName,
            OutputPrint=__name__ == "__main__")

        if sys.platform.startswith("win"):
            import numpy
            import tables  # pylint: disable=E0401
            foldn = os.path.abspath(os.path.dirname(numpy.__file__))
            foldt = os.path.normpath(os.path.dirname(tables.__file__))
            rootn = os.path.dirname(foldn)
            roott = os.path.dirname(foldt)
            if rootn != roott:
                pp = os.environ.get('PYTHONPATH', '')
                if "SECONDTRY" in pp:
                    raise Exception(
                        "Infinite loog\n{0}\n{1}\n**EXE\n{2}\n**PP\n{3}\n****".format(rootn, roott, sys.executable, pp))
                # We need to run this file with the main python.
                # Otherwise it fails for tables: DLL load failed.
                exe = os.path.normpath(os.path.join(
                    rootn, "..", "..", "python.exe"))
                cmd = '"{0}" -u "{1}"'.format(exe, os.path.abspath(__file__))
                import pyquickhelper
                import pyensae
                import jyquickhelper
                import ensae_teaching_cs
                import mlstatpy
                import pymyinstall
                add = ["SECONDTRY"]
                for mod in [pyquickhelper, pyensae, jyquickhelper, ensae_teaching_cs,
                            mlstatpy, pymyinstall]:
                    add.append(os.path.normpath(os.path.join(
                        os.path.dirname(mod.__file__), "..")))
                fLOG("set PYTHONPATH={0}".format(";".join(add)))
                os.environ['PYTHONPATH'] = ";".join(add)
                out, err = run_cmd(cmd, wait=True, fLOG=fLOG)
                if len(err) > 0:
                    lines = err.split("\n")
                    lines = [_ for _ in lines if _[0] != " "]
                    lines = [_ for _ in lines if "warning" not in _.lower()]
                    if len(lines) > 0:
                        raise Exception("--CMD:\n{0}\n--OUT:\n{1}\n--ERR\n{2}\n--ERR2\n{3}\n--PP\n{4}".format(
                            cmd, out, err, "\n".join(lines), pp))
            return

        import tables  # pylint: disable=E0401
        assert tables is not None
        this = os.path.abspath(os.path.dirname(tables.__file__))
        self.a_test_notebook_runner(
            "ml_huge", "expose", additional_path=[this])
Ejemplo n.º 16
0
    def runpy(self, line, cell=None):
        """
        Defines command ``%%runpy``.

        .. nbref::
            :title: runpy

            ``%%runpy`` runs  a python script which accepts
            standards input and produces standard outputs,
            a timeout is set up at 10s. It is almost equivalent to::

                from pyquickhelper.loghelper import run_cmd
                import sys
                cmd = sys.executable.replace(
                    "pythonw",
                    "python") + " " + filename + " " + args
                out, err = run_cmd(
                    cmd, wait=True, sin=cell, communicate=True, timeout=10, shell=False)

        .. versionadded:: 1.1
        """
        parser = self.get_parser(MagicFile.runpy_parser, "runpy")
        args = self.get_args(line, parser)

        if args is not None:
            filename = args.file
            if len(filename) == 0:
                self.runpy("")
            else:
                args = args.args
                cmd = sys.executable.replace("pythonw", "python")
                cmd += " " + filename + " "
                cmd += " ".join(
                    '"{0}"'.format(_)
                    for _ in args) if isinstance(args, list) else args
                tosend = cell
                out, err = run_cmd(cmd,
                                   wait=True,
                                   sin=tosend,
                                   communicate=True,
                                   timeout=10,
                                   shell=False)
                if len(err) > 0:
                    return HTML(
                        '<font color="#DD0000">Error</font><br /><pre>\n%s\n</pre>'
                        % err)
                else:
                    return HTML('<pre>\n%s\n</pre>' % out)
        return None
    def test_notebook_svg(self):
        """
        If the test fails, look into issue
        `216 <https://github.com/sdpython/pyquickhelper/issues/216>`_.
        Avoid nbconvert==5.4.0,==5.4.1.
        """
        fLOG(__file__,
             self._testMethodName,
             OutputPrint=__name__ == "__main__")
        path = os.path.abspath(os.path.split(__file__)[0])
        fold = os.path.normpath(os.path.join(path, "notebooks_svg"))
        nbs = [
            os.path.join(fold, _) for _ in os.listdir(fold) if ".ipynb" in _
        ]
        formats = ["latex", "pdf"]

        temp = os.path.join(path, "temp_nb_bug_svg")
        if not os.path.exists(temp):
            os.mkdir(temp)
        for file in os.listdir(temp):
            os.remove(os.path.join(temp, file))

        if is_travis_or_appveyor() in ('travis', 'appveyor', 'azurepipe',
                                       'circleci'):
            return

        setup_environment_for_help()
        obj = SVG2PDFPreprocessor()
        self.assertIn('inkscape', obj.inkscape)
        cmd = '%s --version' % obj.inkscape
        out, err = run_cmd(cmd, wait=True, shell=False)
        self.assertIn('inkscape', out.lower())
        vers = obj.inkscape_version
        self.assertIn('inkscape', out.lower())

        res = process_notebooks(nbs, temp, temp, formats=formats)
        for _ in res:
            fLOG(_)
            self.assertExists(_[0])

        with open(os.path.join(temp,
                               "seance4_projection_population_correction.tex"),
                  "r",
                  encoding="utf8") as f:
            content = f.read()
        exp = "seance4_projection_population_correction_50_0.pdf"
        if exp not in content:
            raise Exception(content)
Ejemplo n.º 18
0
    def test_py3to2(self):
        fLOG(__file__,
             self._testMethodName,
             OutputPrint=__name__ == "__main__")

        temp = get_temp_folder(__file__, "temp_py3to2")
        root = os.path.abspath(os.path.dirname(__file__))
        root = os.path.normpath(os.path.join(root, "..", ".."))
        conv = py3to2_convert_tree(root, temp, fLOG=fLOG)

        if len(conv) < 20:
            raise Exception("not enough copied files")

        script = """
            import sys
            sys.path = [p for p in sys.path if "src" not in p and "ut_" not in p]
            sys.path.append(r"{0}")
            print ""
            for k in sys.path:
                print k
            import pyquickhelper
            """.replace("            ", "")
        script = script.format(os.path.join(temp, "src"))

        to = os.path.join(temp, "simpletry.py")
        with open(to, "w", encoding="utf8") as f:
            f.write(script)

        pyexe2 = None
        for location in [
                r"C:\Anaconda2",
                r"C:\Anaconda",
                r"C:\WinPython-64bit-2.7.9.3\python-2.7.9.amd64",
        ]:
            exe = os.path.join(location, "python.exe")
            if os.path.exists(exe):
                pyexe2 = exe
                break

        if pyexe2 is not None:
            cmd = "{0} {1}".format(pyexe2, to)
            out, err = run_cmd(cmd, wait=True)
            if len(err) > 0:
                raise Exception(
                    "conversion did not work:\nOUT\n:{0}\nERR:\n{1}".format(
                        out, err))
        else:
            fLOG("python 2.7 was not found")
Ejemplo n.º 19
0
    def test_script_pig(self):
        fLOG(__file__,
             self._testMethodName,
             OutputPrint=__name__ == "__main__")
        data = os.path.join(os.path.abspath(os.path.split(__file__)[0]),
                            "data")

        pyth = """
                    import sys, datetime
                    cols = [ _ for _ in sys.argv if ".py" not in _ ]
                    for row in sys.stdin:
                        js = eval(row)
                        for station in js:
                            vals = [ station[c] for c in cols ]
                            sys.stdout.write(",".join(vals))
                            sys.stdout.write("\\n")
                            sys.stdout.flush()
                """.replace("                    ", "")

        fold = os.path.join(data, "..", "temp_pypig_out")
        if not os.path.exists(fold):
            os.mkdir(fold)

        pyfile = os.path.join(fold, "pystream.py")
        with open(pyfile, "w", encoding="utf8") as f:
            f.write(pyth)

        tosend = """[{'address': "52 RUE D'ENGHIEN / ANGLE RUE DU FAUBOURG POISSONIERE - """ + \
                 """75010 PARIS", 'collect_date': datetime.datetime(2014, 11, 11, 22, 1, 18, 331070), """ + \
                 """'lng': 2.348395236282807, 'contract_name': """ + \
                 """'Paris', 'name': '10042 - POISSONNIÈRE - ENGHIEN', 'banking': 0, 'lat': """ + \
                 """48.87242006305313, 'bonus': 0, 'status': 'OPEN', 'available_bikes': 32, """ + \
                 """'last_update': datetime.datetime(2014, 11, 11, 21, 59, 5), """ + \
                 """'number': 10042, 'available_bike_stands': 1, 'bike_stands': 33}]"""

        cmd = sys.executable.replace("pythonw",
                                     "python") + " " + pyfile + " name"
        out, err = run_cmd(cmd,
                           wait=True,
                           sin=tosend,
                           communicate=True,
                           timeout=3,
                           shell=False)
        fLOG("OUT\n", out)
        fLOG("ERR\n", err)
        assert len(out) > 0
Ejemplo n.º 20
0
def run_dot(dot_file, outimg):
    """
    calls graphivz on a dot file and produces an image

    @param      dot_file        file, format `DOT <http://www.graphviz.org/doc/info/lang.html>`_
    @param      outimg          output image
    @return                     out, err (stdout, stderr from graphviz)

    .. versionadded:: 1.1
    """
    ext = os.path.splitext(outimg)[-1].strip(".")
    exe = dot_exe()
    cmd = "\"{0}\" -T{1} -o{2} {3}".format(exe, ext, outimg, dot_file)
    out, err = run_cmd(cmd, wait=True)
    if len(err) > 0:
        raise Exception("unable to run graphviz on {0}.\nCMD:\n{1}\nOUT:\n{2}\nERR:\n{3}".format(
            dot_file, cmd, out, err))
    return out, err
Ejemplo n.º 21
0
def run_jython(pyfile,
               argv=None,
               jython_path=None,
               sin=None,
               timeout=None,
               fLOG=noLOG):
    """
    runs a jython script and returns the standard output and error

    @param      pyfile          jython file
    @param      argv            arguments to sned to the command line
    @param      jython_path     path to jython standalone
    @param      sin             data to send to the standard input
    @param      timeout         timeout
    @param      fLOG            logging function
    @return                     out, err

    If *jython_path* is None, the function looks into this directory.
    """
    if jython_path is None:
        jython_path = get_jython_jar()

    def clean(i, p):
        "local function"
        if i == 0:
            return p
        if '"' in p:
            p = p.replace('"', '\\"')
        if " " in p:
            p = '"{0}"'.format(p)
        return p

    cmd = [get_java_cmd(), "-jar", jython_path, pyfile]
    if argv is not None:
        cmd.extend(argv)
    cmd = " ".join(clean(i, _) for i, _ in enumerate(cmd))
    out, err = run_cmd(cmd,
                       wait=True,
                       sin=sin,
                       communicate=True,
                       timeout=timeout,
                       shell=False)
    return out, err
Ejemplo n.º 22
0
def git_commit_all(local_folder,
                   url_https,
                   message,
                   user=None,
                   password=None,
                   timeout=300,
                   fLOG=noLOG):
    """
    from a git repository,
    it requires `GIT <http://git-scm.com/>`_ to be installed
    and uses the command line.

    @param      local_folder    local folder of the project
    @param      url_https       url, example ``https://gitlab.server/folder/project_name``
    @param      message         message for the commit
    @param      user            part 1 of the credentials
    @param      password        part 2 of the credentials
    @param      timeout         timeout for the command line
    @param      fLOG            logging function
    @return                     None

    If the reposity has already been cloned, it does not do it again.
    We assume that git can be run without giving its full location.

    The function executes the following commands::

        cd [folder]
        git add -A
        git commit -m "[message]"
        git push -u origin master

    """
    cmds = """
            cd {0}
            git add -A
            git commit -m "{1}"
            git push -u origin master
            """.format(local_folder, message).replace("            ",
                                                      "").strip(" \n\r\t")
    cmd = cmds.replace("\n", "&")
    sin = ""  # "{0}\n".format(password)
    out, err = run_cmd(cmd, sin=sin, wait=True, timeout=timeout, fLOG=fLOG)
    git_check_error(out, err, fLOG)
Ejemplo n.º 23
0
def run_dot(dot_file, outimg):
    """
    calls graphivz on a dot file and produces an image

    @param      dot_file        file, format `DOT <http://www.graphviz.org/doc/info/lang.html>`_
    @param      outimg          output image
    @return                     out, err (stdout, stderr from graphviz)

    .. versionadded:: 1.1
    """
    ext = os.path.splitext(outimg)[-1].strip(".")
    exe = dot_exe()
    cmd = "\"{0}\" -T{1} -o{2} {3}".format(exe, ext, outimg, dot_file)
    out, err = run_cmd(cmd, wait=True)
    if len(err) > 0:
        raise Exception(
            "unable to run graphviz on {0}.\nCMD:\n{1}\nOUT:\n{2}\nERR:\n{3}".
            format(dot_file, cmd, out, err))
    return out, err
Ejemplo n.º 24
0
    def runpy(self, line, cell=None):
        """
        Defines command ``%%runpy``.

        .. nbref::
            :title: runpy

            ``%%runpy`` runs  a python script which accepts
            standards input and produces standard outputs,
            a timeout is set up at 10s. It is almost equivalent to::

                from pyquickhelper.loghelper import run_cmd
                import sys
                cmd = sys.executable.replace(
                    "pythonw",
                    "python") + " " + filename + " " + args
                out, err = run_cmd(
                    cmd, wait=True, sin=cell, communicate=True, timeout=10, shell=False)

        .. versionadded:: 1.1
        """
        parser = self.get_parser(MagicFile.runpy_parser, "runpy")
        args = self.get_args(line, parser)

        if args is not None:
            filename = args.file
            if len(filename) == 0:
                self.runpy("")
            else:
                args = args.args
                cmd = sys.executable.replace("pythonw", "python")
                cmd += " " + filename + " "
                cmd += " ".join('"{0}"'.format(_)
                                for _ in args) if isinstance(args, list) else args
                tosend = cell
                out, err = run_cmd(
                    cmd, wait=True, sin=tosend, communicate=True, timeout=10, shell=False)
                if len(err) > 0:
                    return HTML(
                        '<font color="#DD0000">Error</font><br /><pre>\n%s\n</pre>' % err)
                else:
                    return HTML('<pre>\n%s\n</pre>' % out)
        return None
Ejemplo n.º 25
0
def git_commit_all(
        local_folder,
        url_https,
        message,
        user=None,
        password=None,
        timeout=300,
        fLOG=noLOG):
    """
    from a git repository,
    it requires `GIT <http://git-scm.com/>`_ to be installed
    and uses the command line.

    @param      local_folder    local folder of the project
    @param      url_https       url, example ``https://gitlab.server/folder/project_name``
    @param      message         message for the commit
    @param      user            part 1 of the credentials
    @param      password        part 2 of the credentials
    @param      timeout         timeout for the command line
    @param      fLOG            logging function
    @return                     None

    If the reposity has already been cloned, it does not do it again.
    We assume that git can be run without giving its full location.

    The function executes the following commands::

        cd [folder]
        git add -A
        git commit -m "[message]"
        git push -u origin master

    """
    cmds = """
            cd {0}
            git add -A
            git commit -m "{1}"
            git push -u origin master
            """.format(local_folder, message).replace("            ", "").strip(" \n\r\t")
    cmd = cmds.replace("\n", "&")
    sin = ""  # "{0}\n".format(password)
    out, err = run_cmd(cmd, sin=sin, wait=True, timeout=timeout, fLOG=fLOG)
    git_check_error(out, err, fLOG)
Ejemplo n.º 26
0
def build_machinelearning(version="Release"):
    "Builds machinelearning (ml.net)."
    from pyquickhelper.loghelper import run_cmd
    print('[csharpyml.machinelearning]')
    this = os.path.abspath(os.path.dirname(__file__))
    folder = os.path.join(this, 'cscode', 'machinelearning')
    cmd = "build{0}"
    if sys.platform.startswith("win"):
        cmd = cmd.format('.cmd')
    else:
        cmd = cmd.format('.sh')
    full = os.path.join(folder, cmd)
    if not os.path.exists(full):
        existing = os.listdir(folder)
        raise FileNotFoundError(
            "Unable to find '{0}', build failed. Found:\n{1}".format(
                full, "\n".join(existing)))
    if not sys.platform.startswith("win"):
        cmd = "bash --verbose " + cmd
    cmd += ' -' + version
    out, err = run_cmd(cmd, wait=True, change_path=folder)
    if len(err) > 0:
        # Filter out small errors.
        errs = []
        lines = err.split('\n')
        for line in lines:
            if 'ILAsmVersion.txt: No such file or directory' in line:
                continue
            errs.append(line)
        err = "\n".join(errs)
    if len(err) > 0:
        raise RuntimeError(
            "Unable to build machinelearning code.\nCMD: {0}\n--ERR--\n{1}".
            format(cmd, err))
    elif len(out) > 0:
        print('[csharpyml.machinelearning] OUT')
        print(out)
    bin = os.path.join(folder, "bin")
    if not os.path.exists(bin):
        existing = os.listdir(folder)
        raise FileNotFoundError(
            "Unable to find '{0}', build failed. Found:\n{1}".format(
                bin, "\n".join(existing)))
Ejemplo n.º 27
0
    def test_script_pig(self):
        fLOG(
            __file__,
            self._testMethodName,
            OutputPrint=__name__ == "__main__")
        data = os.path.join(
            os.path.abspath(
                os.path.split(__file__)[0]),
            "data")

        pyth = """
                    import sys, datetime
                    cols = [ _ for _ in sys.argv if ".py" not in _ ]
                    for row in sys.stdin:
                        js = eval(row)
                        for station in js:
                            vals = [ station[c] for c in cols ]
                            sys.stdout.write(",".join(vals))
                            sys.stdout.write("\\n")
                            sys.stdout.flush()
                """.replace("                    ", "")

        fold = os.path.join(data, "..", "temp_pypig_out")
        if not os.path.exists(fold):
            os.mkdir(fold)

        pyfile = os.path.join(fold, "pystream.py")
        with open(pyfile, "w", encoding="utf8") as f:
            f.write(pyth)

        tosend = """[{'address': "52 RUE D'ENGHIEN / ANGLE RUE DU FAUBOURG POISSONIERE - 75010 PARIS", 'collect_date': datetime.datetime(2014, 11, 11, 22, 1, 18, 331070), 'lng': 2.348395236282807, 'contract_name': """ + \
                 """'Paris', 'name': '10042 - POISSONNIÈRE - ENGHIEN', 'banking': 0, 'lat': 48.87242006305313, 'bonus': 0, 'status': 'OPEN', 'available_bikes': 32, 'last_update': datetime.datetime(2014, 11, 11, 21, 59, 5), """ + \
                 """'number': 10042, 'available_bike_stands': 1, 'bike_stands': 33}]"""

        cmd = sys.executable.replace(
            "pythonw",
            "python") + " " + pyfile + " name"
        out, err = run_cmd(cmd, wait=True, sin=tosend,
                           communicate=True, timeout=3, shell=False)
        fLOG("OUT\n", out)
        fLOG("ERR\n", err)
        assert len(out) > 0
Ejemplo n.º 28
0
def git_change_remote_origin(local_folder,
                             url_https,
                             user=None,
                             password=None,
                             add_fetch=False,
                             timeout=10,
                             fLOG=noLOG):
    """
    Change the origin of the repository. The url and the password
    refer to the new repository.

    @param      local_folder   local folder
    @param      url_https       url, example ``https://gitlab.server/folder/project_name``
    @param      user            part 1 of the credentials
    @param      password        part 2 of the credentials
    @param      timeout         timeout for the command line
    @param      add_fetch       add instruction ``fetch``
    @param      fLOG            logging function
    @return                     something

    The function runs the instruction::

        git remote remove origin
        git remote add origin url

    """
    url_user = git_url_user_password(url_https, user, password)
    cmds = """
            cd {0}
            git remote remove origin
            git remote add origin {1}
            """.format(local_folder, url_user).replace("            ",
                                                       "").strip(" \n\r\t")
    if add_fetch:
        cmds += "\ngit fetch"
    cmd = cmds.replace("\n", "&")
    sin = ""  # "{0}\n".format(password)
    out, err = run_cmd(cmd, sin=sin, wait=True, timeout=timeout, fLOG=fLOG)
    git_check_error(out, err, fLOG)
Ejemplo n.º 29
0
def run_jython(pyfile,
               argv=None,
               jython_path=None,
               sin=None,
               timeout=None,
               fLOG=noLOG):
    """
    runs a jython script and returns the standard output and error

    @param      pyfile          jython file
    @param      argv            arguments to sned to the command line
    @param      jython_path     path to jython standalone
    @param      sin             data to send to the standard input
    @param      timeout         timeout
    @param      fLOG            logging function
    @return                     out, err

    If *jython_path* is None, the function looks into this directory.
    """
    if jython_path is None:
        jython_path = get_jython_jar()

    def clean(i, p):
        if i == 0:
            return p
        if '"' in p:
            p = p.replace('"', '\\"')
        if " " in p:
            p = '"{0}"'.format(p)
        return p

    cmd = [get_java_cmd(), "-jar", jython_path, pyfile]
    if argv is not None:
        cmd.extend(argv)
    cmd = " ".join(clean(i, _) for i, _ in enumerate(cmd))
    out, err = run_cmd(
        cmd, wait=True, sin=sin, communicate=True, timeout=timeout, shell=False)
    return out, err
Ejemplo n.º 30
0
def git_clone(local_folder, url_https, user=None, password=None, timeout=60,
              init=True, fLOG=noLOG):
    """
    Clones a project from a git repository in a non empty local folder,
    it requires `GIT <http://git-scm.com/>`_ to be installed
    and uses the command line.

    @param      local_folder    local folder of the project
    @param      url_https       url, example ``https://gitlab.server/folder/project_name``
    @param      user            part 1 of the credentials
    @param      password        part 2 of the credentials
    @param      timeout         timeout for the command line
    @param      init            see below (True, use fetch, False, use clone)
    @param      fLOG            logging function
    @return                     local_folder

    If the reposity has already been cloned, it does not do it again.
    We assume that git can be run without giving its full location.

    The function executes the following commands (if init is True)::

        cd [folder]
        git init
        git remote add origin [https://user.password@server/project.git]
        git fetch

    Otherwise, it does::

        cd [folder]
        git clone origin [https://user.password@server/project.git]
        git fetch

    A folder will be created.

    .. exref::
        :tag: Automation
        :title: Clone many folders in one row

        ::

            eleves = "project1;project2;..."
            root = r"destination"

            for el in eleves.split(";"):
                cl = el.lower().replace(".","-")
                fold = os.path.join(root, el)
                if not os.path.exists(fold):
                    print("clone", el)
                    url = "https://<gitlab>/<group>/{0}.git".format(cl)
                    git_clone(  fold, url,user=user,password=password, init=False,fLOG=print)

    """
    url_user = git_url_user_password(url_https, user, password)
    timeout = 60
    local_folder = os.path.normpath(os.path.abspath(local_folder))

    if init:
        if not os.path.exists(local_folder):
            fLOG("creating folder", local_folder)
            os.mkdir(local_folder)

        hg = os.path.join(local_folder, ".git")
        if os.path.exists(hg):
            raise Exception("folder {0} should not exist".format(local_folder))

        if not os.path.exists(hg):
            cmds = """
                    cd {0}
                    git init
                    git remote add origin {1}
                    git fetch
                    """.format(local_folder, url_user).replace("                    ", "").strip(" \n\r\t")
            cmd = cmds.replace("\n", "&")
            sin = ""  # "{0}\n".format(password)
            out, err = run_cmd(
                cmd, sin=sin, wait=True, timeout=timeout, fLOG=fLOG)
            git_check_error(out, err, fLOG)

        return local_folder
    else:
        if not os.path.exists(local_folder):
            fLOG("creating folder", local_folder)
            os.mkdir(local_folder)

        hg = os.path.join(local_folder, ".git")
        if os.path.exists(hg):
            raise Exception("folder {0} should not exist".format(local_folder))

        final = os.path.split(url_user)[-1].replace(".git", "")
        locf = os.path.join(local_folder, final)
        if os.path.exists(locf):
            raise Exception(
                "folder {0} should not exists before cloning".format(locf))

        cmds = """
                cd {0}
                git clone {1} .
                """.format(local_folder, url_user).replace("                ", "").strip(" \n\r\t")
        cmd = cmds.replace("\n", "&")
        sin = ""  # "{0}\n".format(password)
        out, err = run_cmd(cmd, sin=sin, wait=True, timeout=timeout, fLOG=fLOG)
        git_check_error(out, err, fLOG)

        return locf
Ejemplo n.º 31
0
    def test_compile_module(self):
        fLOG(
            __file__,
            self._testMethodName,
            OutputPrint=__name__ == "__main__")
        temp = get_temp_folder(__file__, "temp_compile_module")
        source = os.path.join(temp, "cdemo.cpp")
        init = os.path.join(temp, "__init__.py")
        setup = os.path.join(temp, "setup.py")
        with open(source, "w") as f:
            f.write(TestModuleC.content_c)
        with open(init, "w") as f:
            pass

        setup_content = dedent("""
            from distutils.core import setup, Extension
            module1 = Extension('stdchelper_demo', sources=['{0}'])
            setup (name = 'ccdemo', version = '1.0',
                   description = 'This is a demo package.',
                   ext_modules = [module1])
            """.format(source.replace("\\", "/")))
        with open(setup, "w") as f:
            f.write(setup_content)

        cmd = "{0} {1} build_ext --inplace".format(sys.executable, setup)
        out, err = run_cmd(cmd, wait=True, fLOG=fLOG, change_path=temp)
        if "error" in out or "error" in err:
            out_ = out.replace("-Werror=format-security", "")
            if "error" in out_:
                raise Exception(
                    "Unable to compile\n--OUT--\n{0}\n--ERR--\n{1}".format(out, err))
        if sys.platform == "win32":
            name = "stdchelper_demo.cp%d%d-win_amd64.pyd" % sys.version_info[:2]
        elif sys.platform == "darwin":
            if sys.version_info[:2] <= (3, 7):
                name = "stdchelper_demo.cpython-%d%dm-darwin.so" % sys.version_info[:2]
            else:
                name = "stdchelper_demo.cpython-%d%d-darwin.so" % sys.version_info[:2]
        else:
            if sys.version_info[:2] <= (3, 7):
                name = "stdchelper_demo.cpython-%d%dm-x86_64-linux-gnu.so" % sys.version_info[:2]
            else:
                name = "stdchelper_demo.cpython-%d%d-x86_64-linux-gnu.so" % sys.version_info[:2]
        fullname = os.path.join(temp, name)
        if not os.path.exists(fullname):
            files = os.listdir(os.path.dirname(fullname))
            raise FileNotFoundError(
                "Unable to find '{0}' (platform '{1}')\nFound:\n{2}".format(
                    fullname, sys.platform, "\n".join(files)))
        mo = import_module(None, fullname, fLOG,
                           additional_sys_path=None, first_try=True)
        self.assertIsInstance(mo, tuple)
        self.assertEqual(len(mo), 2)
        self.assertTrue(hasattr(mo[0], '__doc__'))

        if 'stdchelper_demo' in sys.modules:
            del sys.modules['stdchelper_demo']

        temp2 = get_temp_folder(__file__, "temp_compile_module2")
        store_obj = {}
        actions = copy_source_files(temp, temp2, fLOG=fLOG)
        store_obj = {}
        indexes = {}
        add_file_rst(temp2, store_obj, actions, fLOG=fLOG,
                     rootrep=("stdchelper_demo.", ""), indexes=indexes)
        if sys.platform == "darwin":
            warnings.warn(
                "add_file_rst does not work yet on MacOSX for C++ modules.")
            return
        self.assertNotEmpty(store_obj)
        self.assertEqual(len(store_obj), 1)
        if len(actions) not in (3, 4):
            raise Exception("{0}\n{1}".format(
                len(actions), "\n".join(str(_) for _ in actions)))
        self.assertEqual(len(indexes), 1)
Ejemplo n.º 32
0
    def test_script_pig(self):
        fLOG(
            __file__,
            self._testMethodName,
            OutputPrint=__name__ == "__main__")
        if self.client is None:
            return
        data = os.path.join(
            os.path.abspath(
                os.path.split(__file__)[0]),
            "data")

        fold = os.path.join(data, "..", "temp_pypig_az")
        if not os.path.exists(fold):
            os.mkdir(fold)

        # python script

        pyth = """
                    import sys, datetime
                    cols = [ _ for _ in sys.argv if ".py" not in _ ]
                    for row in sys.stdin:
                        row = row.strip()
                        if len(row) == 0 :
                            continue
                        js = eval(row)
                        for station in js:
                            vals = [ str(station[c]).strip() for c in cols ]
                            sys.stdout.write(",".join(vals))
                            sys.stdout.write("\\n")
                            sys.stdout.flush()
                """.replace("                    ", "")

        pyfile = os.path.join(fold, "pystream.py")
        with open(pyfile, "w", encoding="utf8") as f:
            f.write(pyth)

        tosend = """[{'address': "52 RUE D'ENGHIEN / ANGLE RUE DU FAUBOURG POISSONIERE - 75010 PARIS", 'collect_date': datetime.datetime(2014, 11, 11, 22, 1, 18, 331070), """ + \
                 """'lng': 2.348395236282807, 'contract_name': 'Paris', 'name': '10042 - POISSONNIÈRE - ENGHIEN', 'banking': 0, 'lat': 48.87242006305313, 'bonus': 0, 'status': """ + \
                 """'OPEN', 'available_bikes': 32, 'last_update': datetime.datetime(2014, 11, 11, 21, 59, 5), 'number': 10042, 'available_bike_stands': 1, 'bike_stands': 33},""" + \
                 """{'address': "52 RUE D'ENGHIEN / ANGLE RUE DU FAUBOURG POISSONIERE - 75010 PARIS", 'collect_date': datetime.datetime(2014, 11, 11, 22, 1, 18, 331070), """ + \
                 """'lng': 2.348395236282807, 'contract_name': 'Paris', 'name': '10042 - POISSONNIÈRE - ENGHIEN', 'banking': 0, 'lat': 48.87242006305313, 'bonus': 0, 'status': """ + \
                 """'OPEN', 'available_bikes': 32, 'last_update': datetime.datetime(2014, 11, 11, 21, 59, 5), 'number': 10042, 'available_bike_stands': 1, 'bike_stands': 33}]"""

        cmd = sys.executable.replace(
            "pythonw",
            "python") + " " + pyfile + " name"
        out, err = run_cmd(
            cmd, wait=True, sin=tosend, communicate=True, timeout=3, shell=False)
        out = out.strip("\n\r ")
        spl = out.split("\n")
        if len(spl) != 2:
            raise Exception(
                "len:{2}\nOUT:\n{0}\nERR:\n{1}".format(
                    out,
                    err,
                    len(out)))

        # PIG script

        pig = """
                DEFINE pystream `python pystream.py bonus available_bike_stands available_bikes lat lng name status`
                        SHIP ('pystream.py')
                        INPUT(stdin USING PigStreaming(',')) OUTPUT (stdout USING PigStreaming(','));

                jspy = LOAD '$CONTAINER/$UTT/*.txt' USING PigStorage('\t') AS (arow:chararray);

                --DUMP jspy ;

                matrice = STREAM jspy THROUGH pystream AS
                                (   bonus:chararray,
                                    available_bike_stands:double,
                                    available_bikes:double,
                                    lat:double,
                                    lng:double,
                                    name:chararray,
                                    status:chararray) ;

                DUMP matrice ;

                DESCRIBE jspy ;
                DESCRIBE matrice ;

                STORE matrice INTO '$CONTAINER/$PSEUDO/unittest2/results.txt' USING PigStorage('\t') ;
            """.replace("                ", "")

        pigfile = os.path.join(fold, "pystream.pig")
        with open(pigfile, "w", encoding="utf8") as f:
            f.write(pig)

        # we upload some files

        files = os.listdir(data)
        files = [os.path.join(data, _) for _ in files]
        files = [_ for _ in files if os.path.isfile(_) and "paris" in _]

        import azure.common
        try:
            content = self.client.ls(
                self.blob_serv, self.container, "unittest2")
        except azure.common.AzureException as e:
            warnings.warn(
                "Unable to test azure, storage is still up?\n" + str(e))
            return

        if len(content) == 0:
            self.client.upload(
                self.blob_serv,
                self.container,
                "unittest2",
                files)

        if self.client.exists(
                self.blob_serv, self.container, "unittest2/results.txt"):
            self.client.delete_folder(
                self.blob_serv,
                self.container,
                "unittest2/results.txt")

        # we submit the job
        recall = None
        if recall is None:
            try:
                job = self.client.pig_submit(self.blob_serv, self.container,
                                             pigfile, dependencies=[pyfile],
                                             params=dict(UTT="unittest2"))
            except (ConnectionError, NewConnectionError):
                # the cluster is probably not set up
                warnings.warn("hadoop cluster is not set up")
                return
            job_id = job["id"]
        else:
            job_id = recall

        status = self.client.wait_job(job_id, fLOG=fLOG)

        out, err = self.client.standard_outputs(
            status, self.blob_serv, self.container, fold)

        if "Total records written : 4" not in err:
            raise Exception("OUT:\n{0}\nERR:\n{1}".format(out, err))

        dest = os.path.join(fold, "out_merged.txt")
        fLOG("dest=", dest)
        if os.path.exists(dest):
            os.remove(dest)
        self.client.download_merge(
            self.blob_serv,
            self.container,
            "$PSEUDO/unittest2/results.txt",
            dest)

        if not os.path.exists(dest):
            raise FileNotFoundError(dest)
        with open(dest, "r", encoding="utf8") as f:
            content = f.read()
        fLOG("-----\n", content)
        assert len(content.strip(" \n\r\t")) > 0

        df = self.client.df_head(self.blob_serv, self.container,
                                 "$PSEUDO/unittest2/results.txt", sep=",", merge=True)
        fLOG(df)
        assert len(df) > 0
    return df


#########################
# Clones skl2onnx
# +++++++++++++++

this = os.path.abspath(os.path.dirname(__file__))
skl = os.path.join(this, "sklearn-onnx")
if os.path.exists(skl):
    pth = skl
    cmd = "git pull"
else:
    pth = None
    cmd = "git clone https://github.com/onnx/sklearn-onnx.git " + skl
run_cmd(cmd, wait=True, change_path=pth, fLOG=print)

#########################
# Runs the benchmark
# ++++++++++++++++++

folder = os.path.join(this, 'onnxruntime-skl2onnx')
location = os.path.join(this, 'sklearn-onnx', "tests")
filename = os.path.splitext(os.path.split(__file__)[-1])[0]
full_filename = filename + ".perf.csv"
if not os.path.exists(full_filename):
    with sklearn.config_context(assume_finite=True):
        df = run_all_tests(location, folder, verbose=True)
    print("[benchmark] saves into '{}'.".format(full_filename))
    df.to_csv(full_filename, index=False)
else:
Ejemplo n.º 34
0
def convert_short_latex_into_png(
        latex,
        temp_folder=".",
        fLOG=print,
        miktex=r"C:\Program Files\MiKTeX 2.9\miktex\bin\x64",
        final_name=None):
    """
    Convert a short latex script into an image.

    @param      latex           latex equation
    @param      temp_folder     temp_folder  (where temporary files will be placed)
    @param      fLOG            logging function
    @param      miktex          miktex location
    @param      final_name      if not None, copy the image at this location using this name
    @return                     a location to the image (it should be copied), and its size

    You should not call the function twice at the same in the same folder.

    @warning The function ends the program if there was a failure. Something is missing on the command line.
    """
    if not os.path.exists(miktex):
        raise FileNotFoundError("unable to find miktex")

    if sys.platform.startswith("win"):
        htlatex = os.path.join(miktex, "htlatex.exe")
        if not os.path.exists(htlatex):
            raise FileNotFoundError("unable to find htlatex")
    else:
        htlatex = os.path.join(miktex, "htlatex")

    if not os.path.exists(temp_folder):
        os.makedirs(temp_folder)

    eq = os.path.join(temp_folder, "eq.tex")
    with open(eq, "w") as f:
        f.write(r"""\documentclass[12pt]{article}
                \pagestyle{empty}
                \begin{document}
                $$
                %s
                $$
                \end{document}""".replace("                ", "") %
                latex.strip("\n\r "))

    cmd = '"' + htlatex + '" eq.tex "html, graphics-300" "" "" "--interaction=nonstopmode"'
    cwd = os.getcwd()
    os.chdir(temp_folder)
    out, err = run_cmd(cmd, wait=True)
    os.chdir(cwd)

    if "FAILED" in err:
        raise Exception("it failed\n-----\n{0}\n----------\n{1}".format(
            out, err))
    img = os.path.join(temp_folder, "eq0x.png")
    if not os.path.exists(img):
        with open(os.path.join(temp_folder, "eq.log"), "r") as f:
            log = f.read()
        raise FileNotFoundError("the compilation did not work\n" + log)

    if final_name is not None:
        # size reduction
        im = Image.open(img)
        shutil.copy(img, final_name)
        return final_name, im.size
    else:
        im = Image.open(img)
        return img, im.size
Ejemplo n.º 35
0
def git_clone(local_folder,
              url_https,
              user=None,
              password=None,
              timeout=60,
              init=True,
              fLOG=noLOG):
    """
    clone a project from a git repository in a non empty local folder,
    it requires `GIT <http://git-scm.com/>`_ to be installed
    and uses the command line.

    @param      local_folder    local folder of the project
    @param      url_https       url, example ``https://gitlab.server/folder/project_name``
    @param      user            part 1 of the credentials
    @param      password        part 2 of the credentials
    @param      timeout         timeout for the command line
    @param      init            see below (True, use fetch, False, use clone)
    @param      fLOG            logging function
    @return                     local_folder

    If the reposity has already been cloned, it does not do it again.
    We assume that git can be run without giving its full location.

    The function executes the following commands (if init is True)::

        cd [folder]
        git init
        git remote add origin [https://user.password@server/project.git]
        git fetch

    Otherwise, it does::

        cd [folder]
        git clone origin [https://user.password@server/project.git]
        git fetch

    A folder will be created.

    .. exref::
        :tag: Automation
        :title: Clone many folders in one row

        ::

            eleves = "project1;project2;..."
            root = r"destination"

            for el in eleves.split(";"):
                cl = el.lower().replace(".","-")
                fold = os.path.join(root, el)
                if not os.path.exists(fold):
                    print("clone", el)
                    url = "https://<gitlab>/<group>/{0}.git".format(cl)
                    git_clone(  fold, url,user=user,password=password, init=False,fLOG=print)

    """
    url_user = git_url_user_password(url_https, user, password)
    timeout = 60
    local_folder = os.path.normpath(os.path.abspath(local_folder))

    if init:
        if not os.path.exists(local_folder):
            fLOG("creating folder", local_folder)
            os.mkdir(local_folder)

        hg = os.path.join(local_folder, ".git")
        if os.path.exists(hg):
            raise Exception("folder {0} should not exist".format(local_folder))

        if not os.path.exists(hg):
            cmds = """
                    cd {0}
                    git init
                    git remote add origin {1}
                    git fetch
                    """.format(local_folder,
                               url_user).replace("                    ",
                                                 "").strip(" \n\r\t")
            cmd = cmds.replace("\n", "&")
            sin = ""  # "{0}\n".format(password)
            out, err = run_cmd(cmd,
                               sin=sin,
                               wait=True,
                               timeout=timeout,
                               fLOG=fLOG)
            git_check_error(out, err, fLOG)

        return local_folder
    else:
        if not os.path.exists(local_folder):
            fLOG("creating folder", local_folder)
            os.mkdir(local_folder)

        hg = os.path.join(local_folder, ".git")
        if os.path.exists(hg):
            raise Exception("folder {0} should not exist".format(local_folder))

        final = os.path.split(url_user)[-1].replace(".git", "")
        locf = os.path.join(local_folder, final)
        if os.path.exists(locf):
            raise Exception(
                "folder {0} should not exists before cloning".format(locf))

        cmds = """
                cd {0}
                git clone {1} .
                """.format(local_folder,
                           url_user).replace("                ",
                                             "").strip(" \n\r\t")
        cmd = cmds.replace("\n", "&")
        sin = ""  # "{0}\n".format(password)
        out, err = run_cmd(cmd, sin=sin, wait=True, timeout=timeout, fLOG=fLOG)
        git_check_error(out, err, fLOG)

        return locf
Ejemplo n.º 36
0
def compile_cython_single_script(script, skip_warn=True, fLOG=noLOG):
    """
    This function considers a script ``.pyx``, writes
    the proper setup file, and compiles it.

    @param      script      filename
    @param      skip_warn   skip warnings
    @param      fLOG        logging function

    The function applies the steps described in the basic tutorial
    `The Basics of Cython <http://docs.cython.org/src/tutorial/cython_tutorial.html>`_.
    The function creates a ``setup.py``
    in the same location and compiles it.

    The compilation requires a compiler
    (not `MinGW <http://www.mingw.org/>`_ or
    `Visual Studio (Community Edition) <https://www.microsoft.com/france/visual-studio/produits/community/Default.aspx>`_).
    If none was found, Python usually displays an error message like::

        Unable to find vcvarsall.bat

    You can also read this old blog post:
    `Build a Python 64 bit extension on Windows <http://www.xavierdupre.fr/blog/2013-07-07_nojs.html>`_
    about this file:: ``C:\\Python35_x64\\lib\\distutils\\msvc9compiler.py``.

    .. faqref::
        :tag: cython
        :title: Compiler une function Cython ?

        Cette fonction compile un script
        `Cython <http://cython.org/>`_.
        Cette extension permet d'implémenter des fonctions Python dans un
        pseudo-langage proche du `C <https://en.wikipedia.org/wiki/C_(programming_language)>`_.
        Il faut suivre les instructions décrite dans le tutorial
        `The Basics of Cython <http://docs.cython.org/src/tutorial/cython_tutorial.html>`_
        pour réussir à utiliser une fonction codée en Cython.
        C'est ce que fait la fonction :func:`compile_cython_single_script`.

        Etant donné que la partie en pseudo C est compilée afin de la rendre beaucoup
        plus rapide, la partie la plus difficile est généralement celle qui consiste à faire
        en sorte que l'interpréteur Python trouve le <b>bon</b> compilateur.
        Ce compilateur est nécessairement le même que celui utilisé pour compiler
        Python et celui-ci change à chaque version.
        Voir
        `Compiling Python on Windows <https://docs.python.org/3/using/windows.html?highlight=visual%20studio#compiling-python-on-windows>`_
        et faire attention à la version de Python que vous utilisez.
    """
    ext = os.path.splitext(script)[-1]
    if ext != ".pyx":
        raise ValueError("no extension .pyx: " + script)
    if not os.path.exists(script):
        raise FileNotFoundError(script)

    name = os.path.split(script)[-1]
    namen = os.path.splitext(name)[0]
    setup_script = """
        from distutils.core import setup
        from Cython.Build import cythonize
        setup(
            name='{1}',
            ext_modules=cythonize("{0}")
        )
        """.replace("        ", "").format(name, namen)

    current, name = os.path.split(script)
    filename = os.path.join(os.path.dirname(script), name + ".setup.py")
    with open(filename, "w") as f:
        f.write(setup_script)

    cmd = sys.executable + " -u {0} build_ext --inplace".format(filename)

    out, err = run_cmd(cmd, wait=True, fLOG=fLOG, change_path=current)
    if len(err) > 0:
        if skip_warn:
            do_raise = False
            lines = err.split("\n")
            for line in lines:
                if len(line) > 0 and not line.startswith(" "):
                    if "UserWarning" not in line:
                        do_raise = True
                        break
        else:
            do_raise = True
        if do_raise:
            with open(script, "r", encoding="utf-8") as f:
                content = f.read()
            raise CustomCythonError(
                "CMD:\n{0}\nOUT:\n{1}ERR:\n{2}\nSCRIPT:\n{3}".format(cmd, out, err, content))
        else:
            warnings.warn(
                "[compile_cython_single_script] CMD:\n{0}\nOUT:\n{1}ERR:\n{2}".format(cmd, out, err))
    return out
Ejemplo n.º 37
0
def execute_python_scripts(root, df, col_names=None, url=None, eol="/", fLOG=noLOG, gen_mail=None):
    """
    retrieve all python scripts and run them

    @param      root            main folder
    @param      df              dataframe
    @param      col_names       dictionary for columns:
                                folder, mail, program, out, err, url, cmp, url_content, key, time
    @param      eol             if not None, replaces end of lines by *eof*
    @param      gen_mail        generator of mails
    @param      fLOG            logging function
    @return                     dataframe
    """
    if gen_mail is None:
        def iter_mail(mail):
            yield mail
            yield mail.lower()
        gen_mail = iter_mail

    def post_process(out, eol):
        out = out.strip("\r\t\n").rstrip().replace(
            "\r", "").replace("\t", "    ")
        if eol:
            out = out.replace("\n", eol)
        return out

    downloads = {}
    res = []
    for name, mail in zip(df[col_names.get("folder", "folder")], df[col_names.get("mail", "mail")]):
        row = {col_names.get("folder", "folder"): name}
        fLOG("[execute_python_script], look into '{0}'".format(name))
        subf = os.path.join(root, name)
        col_find = col_names.get("exists", "exists")
        if not os.path.exists(subf):
            subf = os.path.join(root, name.replace("-", "."))
        if not os.path.exists(subf):
            row[col_find] = False
            res.append(row)
        else:
            row[col_find] = True
            store = []
            for py in explore_folder_iterfile(subf, ".*[.]py$"):
                store.append(py)
            fLOG("     -", len(store), "programs found")

            col_out = col_names.get("out", "out")
            col_err = col_names.get("err", "err")
            col_prog = col_names.get("program", "program")
            col_time = col_names.get("time", "time")
            col_key = col_names.get("key", "key")
            col_size = col_names.get("size", "size")
            col_url = col_names.get("url", "url")
            col_ind = col_names.get("pattern_id", "pattern_id")

            if len(store) == 0:
                for mm in sorted(gen_mail(mail.strip())):
                    mailid = _get_code(mm.encode("utf-8"))
                    r = row.copy()
                    loc = url.format(mailid)
                    ind = {col_key: mm, col_ind: mailid, col_url: loc}
                    r.update(ind)
                    res.append(r)
                continue

            # test all programs
            outs = []
            for py in sorted(store):
                cmd = '"{0}" "{1}"'.format(sys.executable, py)
                t1 = time.clock()
                try:
                    out, err = run_cmd(cmd, wait=True)
                except Exception as e:
                    out = None
                    err = str(e)
                out = post_process(out, eol)
                t2 = time.clock()
                outs.append({col_out: out, col_err: post_process(err, eol),
                             col_prog: os.path.split(py)[-1], col_time: t2 - t1,
                             col_size: os.stat(py).st_size})

            if url is None:
                for o in outs:
                    r = row.copy()
                    r.update(o)
                    res.append(r)
            elif url is not None:
                col_cmp = col_names.get("cmp", "cmp")
                col_in = col_names.get(
                    "sortie_dans_motif", "sortie_dans_motif")
                col_in2 = col_names.get(
                    "motif_dans_sortie", "motif_dans_sortie")
                col_dist = col_names.get("dist", "dist")
                col_content = col_names.get("content", "content")

                if out is None:
                    for ii, mm in gen_mail(mail.strip()):
                        mailid = _get_code(mm.encode("utf-8"))
                        ind = {col_ind: mailid}
                        for o in outs:
                            r = row.copy()
                            r.update(o)
                            r.update(ind)
                            res.append(r)
                else:
                    for mm in sorted(gen_mail(mail.strip())):
                        mailid = _get_code(mm.encode("utf-8"))
                        loc = url.format(mailid)
                        ind = {col_key: mm, col_ind: mailid, col_url: loc}

                        if loc not in downloads:
                            downloads[loc] = get_url_content_timeout(
                                loc).strip("\n\r\t ")
                        content = post_process(downloads[loc], eol)
                        ind[col_content] = content

                        for o in outs:
                            r = row.copy()
                            r.update(o)
                            r.update(ind)
                            out = r[col_out]
                            r[col_cmp] = out == content or out.strip(
                            ) == content.strip()
                            r[col_in] = out in content
                            r[col_in2] = content in out
                            r[col_dist] = (edit_distance(out, content)[0]) if (
                                len(content) > len(out) // 2) else abs(len(content) - len(out))
                            res.append(r)
    return pandas.DataFrame(res)
Ejemplo n.º 38
0
def build_grammar(g4, version="4.7.1", fLOG=noLOG):
    """
    Compiles the grammar for a specific file.

    @param      g4          grammar format antlr4
    @param      version     version of *antlr4* to use, 4.7
    @param      fLOG        logging function
    @return                 list of files

    The compilation must be done with `antlr4 <http://www.antlr.org/>`_.
    It generates a lexer and a parser which can be imported in Python.
    The options for the command line are described at:
    `antlr4 options <https://theantlrguy.atlassian.net/wiki/display/ANTLR4/Options>`_.

    .. exref::
        :title: Builds a Antlr4 grammar

        See `grammars-v4 <https://github.com/antlr/grammars-v4>`_

        ::

            build_grammar("R.g4")
    """
    if not g4.endswith(".g4"):
        fold = os.path.abspath(os.path.dirname(__file__))
        g4 = os.path.join(fold, g4 + ".g4")

    url = "http://www.antlr.org/download/antlr-{0}-complete.jar".format(
        version)
    spl = url.split("/")
    domain, name = "/".join(spl[:-1]) + "/", spl[-1]
    folder = os.path.abspath(os.path.dirname(__file__))
    final = os.path.join(folder, name)

    if not os.path.exists(final):
        from ..datasource.http_retrieve import download_data
        name = download_data(name, website=domain, whereTo=folder)
        if not os.path.exists(name):
            raise FileNotFoundError("unable to download: " + url)

    path = os.environ.get("CLASSPATH", "")
    if name not in path:
        path = ".;{0}\\antlr-{1}-complete.jar".format(folder, version)
    else:
        path = ".;{0}\\antlr-{1}-complete.jar;{2}".format(
            folder,
            version,
            os.environ["CLASSPATH"])

    os.environ["CLASSPATH"] = path
    fLOG("CLASSPATH", os.environ["CLASSPATH"])

    # we remove -rc...
    version = version.split("-")[0]

    cmd = "org.antlr.v4.Tool "
    if "Lexer" not in g4:
        cmd += "-Dlanguage=Python3 "
    cmd += g4
    from pyquickhelper.loghelper import run_cmd
    out, err = run_cmd("java " + cmd, wait=True, fLOG=fLOG)

    def compiled():
        if "Lexer" in g4:
            lexer = g4.replace(".g4", ".tokens")
        else:
            lexer = g4.replace(".g4", ".py")
        return os.path.exists(lexer)

    if not compiled() or (len(err) > 0 and "error" in err):

        javapath = r'C:\Program Files\Java\jre7\bin\java.exe'
        os.environ["PATH"] = os.environ["PATH"] + ";" + javapath
        if sys.platform.startswith("win") and os.path.exists(javapath):
            out, err = run_cmd(
                '"' + javapath + '" ' + cmd, wait=True, fLOG=fLOG)
            if not compiled() or (len(err) > 0 and "error" in err):
                raise Exception(
                    "unable to compile: " +
                    final +
                    "\nCLASSPATH:\n" +
                    os.environ["CLASSPATH"] +
                    "\nERR:\n" +
                    err +
                    "\nCMD:\njava " +
                    cmd +
                    "\nYou should do it manually.")
        elif err:
            err_lines = err.split(err)
            err_lines = [_ for _ in err_lines if not _.startswith("warning(")]
            err2 = "\n".join(err_lines).strip("\n ")
            if len(err2) > 0:
                raise Exception(
                    "unable to compile: " +
                    final +
                    "\nCLASSPATH:\n" +
                    os.environ["CLASSPATH"] +
                    "\nERR:\n" +
                    err +
                    "\nCMD:\njava " +
                    cmd)

    if os.environ.get("USERNAME", os.environ.get("USER", "")) in g4:
        dest = os.path.dirname(g4)
        for name in os.listdir(dest):
            if "Parser" not in name and "Lexer" not in name and \
                    "Token" not in name and "Listener" not in name:
                continue
            full = os.path.join(dest, name)
            with open(full, "r", encoding="utf-8") as f:
                content = f.read()
            content1 = content.replace(dest, "")
            if content1 != content:
                fLOG("[build_grammar] modified", name)
                with open(full, "w", encoding="utf-8") as f:
                    f.write(content1)

    return out + "\n---ERR---\n" + err
Ejemplo n.º 39
0
    def test_matplotlib_example(self):
        fLOG(
            __file__,
            self._testMethodName,
            OutputPrint=__name__ == "__main__")

        progs = ["ffmpeg"]
        if not sys.platform.startswith("win"):
            progs.append("avconv")
        errs = []
        prog = None
        for prog in progs:
            out, err = run_cmd(prog, wait=True, fLOG=fLOG)
            exps = "usage:"
            if (exps not in out and exps not in err) or err is None or len(err) == 0:
                errs.append((prog, err))
            else:
                break

        if len(errs) >= len(progs):
            if sys.platform.startswith("win"):
                fLOG("download ffmpeg")
                add_missing_development_version(
                    ["pyensae"], __file__, hide=True)
                from pyensae.datasource import download_data
                download_data("ffmpeg.zip", website="xd")
            else:
                raise FileNotFoundError(
                    "Unable to find '{1}'.\nPATH='{0}'\n--------\n[OUT]\n{2}\n[ERR]\n{3}".format(
                        os.environ["PATH"], prog, out,
                        "\n----\n".join("{0}:\n{1}".format(*_) for _ in errs)))

        temp = get_temp_folder(__file__, "temp_example_example")
        fix_tkinter_issues_virtualenv()

        # update a distribution based on new data.
        import numpy as np
        import matplotlib.pyplot as plt
        import scipy.stats as ss
        from matplotlib.animation import FuncAnimation, writers

        # To get the list of available writers
        if not writers.is_available(prog):
            writers.register(prog)
        fLOG(writers.list())

        class UpdateDist:

            def __init__(self, ax, prob=0.5):
                self.success = 0
                self.prob = prob
                self.line, = ax.plot([], [], 'k-')
                self.x = np.linspace(0, 1, 200)
                self.ax = ax

                # Set up plot parameters
                self.ax.set_xlim(0, 1)
                self.ax.set_ylim(0, 15)
                self.ax.grid(True)

                # This vertical line represents the theoretical value, to
                # which the plotted distribution should converge.
                self.ax.axvline(prob, linestyle='--', color='black')

            def init(self):
                self.success = 0
                self.line.set_data([], [])
                return self.line,

            def __call__(self, i):
                # This way the plot can continuously run and we just keep
                # watching new realizations of the process
                if i == 0:
                    return self.init()

                # Choose success based on exceed a threshold with a uniform
                # pick
                if np.random.rand(1,) < self.prob:  # pylint: disable=W0143
                    self.success += 1
                y = ss.beta.pdf(self.x, self.success + 1,
                                (i - self.success) + 1)
                self.line.set_data(self.x, y)
                return self.line,

        fig = plt.figure()
        ax = fig.add_subplot(1, 1, 1)
        ud = UpdateDist(ax, prob=0.7)
        anim = FuncAnimation(fig, ud, frames=np.arange(100), init_func=ud.init,
                             interval=100, blit=True)

        try:
            Writer = writers[prog]
        except KeyError as e:
            if prog == "avconv":
                from matplotlib.animation import AVConvWriter
                Writer = AVConvWriter
            else:
                raise e
        writer = Writer(fps=15, metadata=dict(artist='Me'), bitrate=1800)
        anim.save(os.path.join(temp, 'lines2.mp4'), writer=writer)

        plt.close('all')
        fLOG("end")
Ejemplo n.º 40
0
    def test_script_pig(self):
        fLOG(__file__,
             self._testMethodName,
             OutputPrint=__name__ == "__main__")
        if self.client is None:
            return
        data = os.path.join(os.path.abspath(os.path.split(__file__)[0]),
                            "data")

        fold = os.path.join(data, "..", "temp_pypig_az")
        if not os.path.exists(fold):
            os.mkdir(fold)

        # python script

        pyth = """
                    import sys, datetime
                    cols = [ _ for _ in sys.argv if ".py" not in _ ]
                    for row in sys.stdin:
                        row = row.strip()
                        if len(row) == 0 :
                            continue
                        js = eval(row)
                        for station in js:
                            vals = [ str(station[c]).strip() for c in cols ]
                            sys.stdout.write(",".join(vals))
                            sys.stdout.write("\\n")
                            sys.stdout.flush()
                """.replace("                    ", "")

        pyfile = os.path.join(fold, "pystream.py")
        with open(pyfile, "w", encoding="utf8") as f:
            f.write(pyth)

        tosend = """[{'address': "52 RUE D'ENGHIEN / ANGLE RUE DU FAUBOURG POISSONIERE - 75010 PARIS", """ + \
                 """'collect_date': datetime.datetime(2014, 11, 11, 22, 1, 18, 331070), """ + \
                 """'lng': 2.348395236282807, 'contract_name': 'Paris', 'name': '10042 - POISSONNIÈRE - """ + \
                 """ENGHIEN', 'banking': 0, 'lat': 48.87242006305313, 'bonus': 0, 'status': """ + \
                 """'OPEN', 'available_bikes': 32, 'last_update': datetime.datetime(2014, 11, 11, 21, 59, 5), """ + \
                 """'number': 10042, 'available_bike_stands': 1, 'bike_stands': 33},""" + \
                 """{'address': "52 RUE D'ENGHIEN / ANGLE RUE DU FAUBOURG POISSONIERE - 75010 PARIS", """ + \
                 """'collect_date': datetime.datetime(2014, 11, 11, 22, 1, 18, 331070), """ + \
                 """'lng': 2.348395236282807, 'contract_name': 'Paris', 'name': '10042 - POISSONNIÈRE - """ + \
                 """ENGHIEN', 'banking': 0, 'lat': 48.87242006305313, 'bonus': 0, 'status': """ + \
                 """'OPEN', 'available_bikes': 32, 'last_update': datetime.datetime(2014, 11, 11, 21, 59, 5), """ + \
                 """'number': 10042, 'available_bike_stands': 1, 'bike_stands': 33}]"""

        cmd = sys.executable.replace("pythonw",
                                     "python") + " " + pyfile + " name"
        out, err = run_cmd(cmd,
                           wait=True,
                           sin=tosend,
                           communicate=True,
                           timeout=3,
                           shell=False)
        out = out.strip("\n\r ")
        spl = out.split("\n")
        if len(spl) != 2:
            raise Exception("len:{2}\nOUT:\n{0}\nERR:\n{1}".format(
                out, err, len(out)))

        # PIG script

        pig = """
                DEFINE pystream `python pystream.py bonus available_bike_stands available_bikes lat lng name status`
                        SHIP ('pystream.py')
                        INPUT(stdin USING PigStreaming(',')) OUTPUT (stdout USING PigStreaming(','));

                jspy = LOAD '$CONTAINER/$UTT/*.txt' USING PigStorage('\t') AS (arow:chararray);

                --DUMP jspy ;

                matrice = STREAM jspy THROUGH pystream AS
                                (   bonus:chararray,
                                    available_bike_stands:double,
                                    available_bikes:double,
                                    lat:double,
                                    lng:double,
                                    name:chararray,
                                    status:chararray) ;

                DUMP matrice ;

                DESCRIBE jspy ;
                DESCRIBE matrice ;

                STORE matrice INTO '$CONTAINER/$PSEUDO/unittest2/results.txt' USING PigStorage('\t') ;
            """.replace("                ", "")

        pigfile = os.path.join(fold, "pystream.pig")
        with open(pigfile, "w", encoding="utf8") as f:
            f.write(pig)

        # we upload some files

        files = os.listdir(data)
        files = [os.path.join(data, _) for _ in files]
        files = [_ for _ in files if os.path.isfile(_) and "paris" in _]

        import azure.common  # pylint: disable=C0415
        try:
            content = self.client.ls(self.blob_serv, self.container,
                                     "unittest2")
        except azure.common.AzureException as e:
            warnings.warn("Unable to test azure, storage is still up?\n" +
                          str(e))
            return

        if len(content) == 0:
            self.client.upload(self.blob_serv, self.container, "unittest2",
                               files)

        if self.client.exists(self.blob_serv, self.container,
                              "unittest2/results.txt"):
            self.client.delete_folder(self.blob_serv, self.container,
                                      "unittest2/results.txt")

        # we submit the job
        recall = None
        if recall is None:
            try:
                job = self.client.pig_submit(self.blob_serv,
                                             self.container,
                                             pigfile,
                                             dependencies=[pyfile],
                                             params=dict(UTT="unittest2"))
            except (ConnectionError, NewConnectionError):
                # the cluster is probably not set up
                warnings.warn("hadoop cluster is not set up")
                return
            job_id = job["id"]
        else:
            job_id = recall

        status = self.client.wait_job(job_id, fLOG=fLOG)

        out, err = self.client.standard_outputs(status, self.blob_serv,
                                                self.container, fold)

        if "Total records written : 4" not in err:
            raise Exception("OUT:\n{0}\nERR:\n{1}".format(out, err))

        dest = os.path.join(fold, "out_merged.txt")
        fLOG("dest=", dest)
        if os.path.exists(dest):
            os.remove(dest)
        self.client.download_merge(self.blob_serv, self.container,
                                   "$PSEUDO/unittest2/results.txt", dest)

        if not os.path.exists(dest):
            raise FileNotFoundError(dest)
        with open(dest, "r", encoding="utf8") as f:
            content = f.read()
        fLOG("-----\n", content)
        assert len(content.strip(" \n\r\t")) > 0

        df = self.client.df_head(self.blob_serv,
                                 self.container,
                                 "$PSEUDO/unittest2/results.txt",
                                 sep=",",
                                 merge=True)
        fLOG(df)
        assert len(df) > 0
Ejemplo n.º 41
0
    def test_script_pig(self):
        fLOG(
            __file__,
            self._testMethodName,
            OutputPrint=__name__ == "__main__")
        if self.client is None:
            return
        data = os.path.join(
            os.path.abspath(
                os.path.split(__file__)[0]),
            "data")

        fLOG("AA")
        # python script

        pyth = """
                    import sys, datetime
                    cols = [ _ for _ in sys.argv if ".py" not in _ ]
                    for row in sys.stdin:
                        row = row.strip()
                        if len(row) == 0 :
                            continue
                        js = eval(row)
                        for station in js:
                            vals = [ str(station[c]).strip() for c in cols ]
                            sys.stdout.write(",".join(vals))
                            sys.stdout.write("\\n")
                            sys.stdout.flush()
                """.replace("                    ", "")

        fold = os.path.join(data, "..", "temp_pypig")
        if not os.path.exists(fold):
            os.mkdir(fold)

        fLOG("BB")

        pyfile = os.path.join(fold, "pystream.py")
        with open(pyfile, "w", encoding="utf8") as f:
            f.write(pyth)

        tosend = """[{'address': "52 RUE D'ENGHIEN / ANGLE RUE DU FAUBOURG POISSONIERE - 75010 PARIS", 'collect_date': """ + \
                 """datetime.datetime(2014, 11, 11, 22, 1, 18, 331070), 'lng': 2.348395236282807, 'contract_name': 'Paris', 'name': """ + \
                 """'10042 - POISSONNIÈRE - ENGHIEN', 'banking': 0, 'lat': 48.87242006305313, 'bonus': 0, 'status': 'OPEN', """ + \
                 """'available_bikes': 32, 'last_update': datetime.datetime(2014, 11, 11, 21, 59, 5), 'number': 10042, """ + \
                 """'available_bike_stands': 1, 'bike_stands': 33},{'address': "52 RUE D'ENGHIEN / ANGLE RUE DU FAUBOURG POISSONIERE - """ + \
                 """75010 PARIS", 'collect_date': datetime.datetime(2014, 11, 11, 22, 1, 18, 331070), 'lng': 2.348395236282807, """ + \
                 """'contract_name': 'Paris', 'name': '10042 - POISSONNIÈRE - ENGHIEN', 'banking': 0, 'lat': 48.87242006305313, """ + \
                 """'bonus': 0, 'status': 'OPEN', 'available_bikes': 32, 'last_update': datetime.datetime(2014, 11, 11, 21, 59, 5), """ + \
                 """'number': 10042, 'available_bike_stands': 1, 'bike_stands': 33}]"""

        cmd = sys.executable.replace(
            "pythonw",
            "python") + " " + pyfile + " name"
        out, err = run_cmd(
            cmd, wait=True, sin=tosend, communicate=True, timeout=3, shell=False)
        out = out.strip("\n\r ")
        spl = out.split("\n")
        if len(spl) != 2:
            raise Exception(
                "len:{2}\nOUT:\n{0}\nERR:\n{1}".format(
                    out,
                    err,
                    len(out)))

        fLOG("CC")
        # PIG script

        pig = """
                DEFINE pystream `python pystream.py bonus available_bike_stands available_bikes lat lng name status`
                        SHIP ('pystream.py')
                        INPUT(stdin USING PigStreaming(',')) OUTPUT (stdout USING PigStreaming(','));

                jspy = LOAD '$UTT/*.txt' USING PigStorage('\t') AS (arow:chararray);

                --DUMP jspy ;

                matrice = STREAM jspy THROUGH pystream AS
                                (   bonus:chararray,
                                    available_bike_stands:double,
                                    available_bikes:double,
                                    lat:double,
                                    lng:double,
                                    name:chararray,
                                    status:chararray) ;

                DUMP matrice ;

                DESCRIBE jspy ;
                DESCRIBE matrice ;

                STORE matrice INTO 'unittest2/results.txt' USING PigStorage('\t') ;
            """.replace("                ", "")

        fLOG(self.client.username)
        hive_sql = """
            DROP TABLE IF EXISTS bikes20;
            CREATE TABLE bikes20 (sjson STRING);
            LOAD DATA INPATH "/user/__USERNAME__/unittest2/paris*.txt"
                INTO TABLE bikes20;
            SELECT * FROM bikes20 LIMIT 10;
            """.replace("__USERNAME__", self.client.username.decode("ascii"))
        fLOG(hive_sql)
        #${hiveconf:UTT}

        pigfile = os.path.join(fold, "pystream.pig")
        with open(pigfile, "w", encoding="utf8") as f:
            f.write(pig)

        fLOG("DD upload")

        # we upload some files

        files = os.listdir(data)
        files = [os.path.join(data, _) for _ in files]
        files = [_ for _ in files if os.path.isfile(_) and "paris" in _]

        if not self.client.dfs_exists("unittest2"):
            self.client.dfs_mkdir("unittest2")

        content = self.client.dfs_ls("unittest2")
        if len(content) == 0:
            self.client.upload_cluster(files, "unittest2")

        if self.client.dfs_exists("unittest2/results.txt"):
            self.client.dfs_rm("unittest2/results.txt", True)

        fLOG("FF")

        # we test the syntax
        out, err = self.client.pig_submit(pigfile, dependencies=[pyfile], check=True,
                                          no_exception=True,
                                          params=dict(UTT="unittest2"),
                                          fLOG=fLOG)
        if "pystream.pig syntax OK" not in err:
            raise Exception("OUT:\n{0}\nERR:\n{1}".format(out, err))

        fLOG("II")

        # we submit the job
        out, err = self.client.pig_submit(pigfile, dependencies=[pyfile],
                                          stop_on_failure=True, no_exception=True,
                                          redirection=None,
                                          params=dict(UTT="unittest2"))

        fLOG("JJ")

        if "Total records written : 4" not in err:
            raise Exception("PIG OUT:\n{0}\nPIG ERR:\n{1}".format(out, err))

        dest = os.path.join(fold, "out_merged.txt")
        fLOG("dest=", dest)
        if os.path.exists(dest):
            os.remove(dest)

        fLOG("KK")

        self.client.download_cluster("unittest2/results.txt", dest, merge=True)
        assert os.path.exists(dest)
        with open(dest, "r", encoding="utf8") as f:
            content = f.read()
        fLOG("-----\n", content)
        assert len(content.strip(" \n\r\t")) > 0

        fLOG("LL")

        # we submit the job
        # disable HIVE for the time being (broken)
        warnings.warn("hive not being tested")
        return
        out, err = self.client.hive_submit(hive_sql,
                                           redirection=None,
                                           params=dict(UTT="unittest2"),
                                           fLOG=fLOG)

        fLOG("HIVE OUT")
        fLOG(out)
        fLOG("HIVE ERR")
        fLOG(err)
        #assert "(0,1.0,32.0,48.8724200631,2.34839523628,10042" in out

        fLOG("END")
Ejemplo n.º 42
0
def compile_cython_single_script(script, skip_warn=True, fLOG=noLOG):
    """
    This function considers a script ``.pyx``, writes
    a the proper setup file, and compiles it.

    @param      script      filename
    @param      skip_warn   skip warnings
    @param      fLOG        logging function

    The function applies the steps described in the basic tutorial
    `The Basics of Cython <http://docs.cython.org/src/tutorial/cython_tutorial.html>`_.
    The function creates a ``setup.py``
    in the same location and compiles it.

    The compilation requires a compiler
    (not `MinGW <http://www.mingw.org/>`_ or
    `Visual Studio (Community Edition) <https://www.microsoft.com/france/visual-studio/produits/community/Default.aspx>`_).
    If none was found, Python usually displays an error message like::

        Unable to find vcvarsall.bat

    You can also read this old blog post:
    `Build a Python 64 bit extension on Windows <http://www.xavierdupre.fr/blog/2013-07-07_nojs.html>`_
    about this file:: ``C:\\Python35_x64\\lib\\distutils\\msvc9compiler.py``.

    .. faqref::
        :tag: cython
        :title: Compiler une function Cython ?

        Cette fonction compile un script
        `Cython <http://cython.org/>`_.
        Cette extension permet d'implémenter des fonctions Python dans un
        pseudo-langage proche du `C <https://en.wikipedia.org/wiki/C_(programming_language)>`_.
        Il faut suivre les instructions décrite dans le tutorial
        `The Basics of Cython <http://docs.cython.org/src/tutorial/cython_tutorial.html>`_
        pour réussir à utiliser une fonction codée en Cython.
        C'est ce que fait la fonction :func:`compile_cython_single_script`.

        Etant donné que la partie en pseudo C est compilée afin de la rendre beaucoup
        plus rapide, la partie la plus difficile est généralement celle qui consiste à faire
        en sorte que l'interpréteur Python trouve le <b>bon</b> compilateur.
        Ce compilateur est nécessairement le même que celui utilisé pour compiler
        Python et celui-ci change à chaque version.
        Voir
        `Compiling Python on Windows <https://docs.python.org/3/using/windows.html?highlight=visual%20studio#compiling-python-on-windows>`_
        et faire attention à la version de Python que vous utilisez.
    """
    ext = os.path.splitext(script)[-1]
    if ext != ".pyx":
        raise ValueError("no extension .pyx: " + script)
    if not os.path.exists(script):
        raise FileNotFoundError(script)

    name = os.path.split(script)[-1]
    namen = os.path.splitext(name)[0]
    setup_script = """
        from distutils.core import setup
        from Cython.Build import cythonize
        setup(
            name='{1}',
            ext_modules=cythonize("{0}")
        )
        """.replace("        ", "").format(name, namen)

    current, name = os.path.split(script)
    filename = os.path.join(os.path.dirname(script), name + ".setup.py")
    with open(filename, "w") as f:
        f.write(setup_script)

    cmd = sys.executable + " -u {0} build_ext --inplace".format(filename)

    out, err = run_cmd(cmd, wait=True, fLOG=fLOG, change_path=current)
    if len(err) > 0:
        if skip_warn:
            do_raise = False
            lines = err.split("\n")
            for line in lines:
                if len(line) > 0 and not line.startswith(" "):
                    if "UserWarning" not in line:
                        do_raise = True
                        break
        if do_raise:
            with open(script, "r", encoding="utf-8") as f:
                content = f.read()
            raise CustomCythonError(
                "CMD:\n{0}\nOUT:\n{1}ERR:\n{2}\nSCRIPT:\n{3}".format(cmd, out, err, content))
        else:
            warnings.warn(
                "CMD:\n{0}\nOUT:\n{1}ERR:\n{2}".format(cmd, out, err))
    return out
Ejemplo n.º 43
0
def build_grammar(g4, version="4.6", fLOG=noLOG):
    """
    compile the grammar for a specific file

    @param      g4          grammar format antlr4
    @param      version     version of *antlr4* to use, 4.4, 4.5-rc-2
    @param      fLOG        logging function
    @return                 list of files

    The compilation must be done with `antlr4 <http://www.antlr.org/>`_.
    It generates a lexer and a parser which can be imported in Python.
    The options for the command line are described at: `antlr4 options <https://theantlrguy.atlassian.net/wiki/display/ANTLR4/Options>`_.

    .. exref::
        :title: Build a Antlr4 grammer

        See `grammars-v4 <https://github.com/antlr/grammars-v4>`_

        ::

            build_grammar("R.g4")
    """
    if not g4.endswith(".g4"):
        fold = os.path.abspath(os.path.dirname(__file__))
        g4 = os.path.join(fold, g4 + ".g4")

    url = "http://www.antlr.org/download/antlr-{0}-complete.jar".format(
        version)
    spl = url.split("/")
    domain, name = "/".join(spl[:-1]) + "/", spl[-1]
    folder = os.path.abspath(os.path.dirname(__file__))
    final = os.path.join(folder, name)

    if not os.path.exists(final):
        from ..datasource.http_retrieve import download_data
        name = download_data(name, website=domain, whereTo=folder)
        if not os.path.exists(name):
            raise FileNotFoundError("unable to download: " + url)

    path = os.environ.get("CLASSPATH", "")
    if name not in path:
        path = ".;{0}\\antlr-{1}-complete.jar".format(folder, version)
    else:
        path = ".;{0}\\antlr-{1}-complete.jar;{2}".format(
            folder,
            version,
            os.environ["CLASSPATH"])

    os.environ["CLASSPATH"] = path
    fLOG("CLASSPATH", os.environ["CLASSPATH"])

    # we remove -rc...
    version = version.split("-")[0]

    cmd = "org.antlr.v4.Tool -Dlanguage=Python3 " + g4
    from pyquickhelper.loghelper import run_cmd
    out, err = run_cmd("java " + cmd, wait=True, fLOG=fLOG)

    def compiled():
        lexer = g4.replace(".g4", "Lexer.py")
        return os.path.exists(lexer)

    if not compiled() or (len(err) > 0 and "error" in err):

        javapath = r'C:\Program Files\Java\jre7\bin\java.exe'
        os.environ["PATH"] = os.environ["PATH"] + ";" + javapath
        if sys.platform.startswith("win") and os.path.exists(javapath):
            out, err = run_cmd(
                '"' + javapath + '" ' + cmd, wait=True, fLOG=fLOG)
            if not compiled() or (len(err) > 0 and "error" in err):
                raise Exception(
                    "unable to compile: " +
                    final +
                    "\nCLASSPATH:\n" +
                    os.environ["CLASSPATH"] +
                    "\nERR:\n" +
                    err +
                    "\nCMD:\njava " +
                    cmd +
                    "\nYou should do it manually.")
        else:
            raise Exception(
                "unable to compile: " +
                final +
                "\nCLASSPATH:\n" +
                os.environ["CLASSPATH"] +
                "\nERR:\n" +
                err +
                "\nCMD:\njava " +
                cmd)

    return out + "\nERR:\n" + err
Ejemplo n.º 44
0
    def test_compile_module(self):
        fLOG(
            __file__,
            self._testMethodName,
            OutputPrint=__name__ == "__main__")
        temp = get_temp_folder(__file__, "temp_compile_module")
        source = os.path.join(temp, "cdemo.cpp")
        init = os.path.join(temp, "__init__.py")
        setup = os.path.join(temp, "setup.py")
        with open(source, "w") as f:
            f.write(TestModuleC.content_c)
        with open(init, "w") as f:
            pass

        setup_content = dedent("""
            from distutils.core import setup, Extension
            module1 = Extension('stdchelper_demo', sources=['{0}'])
            setup (name = 'ccdemo', version = '1.0',
                   description = 'This is a demo package.',
                   ext_modules = [module1])
            """.format(source.replace("\\", "/")))
        with open(setup, "w") as f:
            f.write(setup_content)

        cmd = "{0} {1} build_ext --inplace".format(sys.executable, setup)
        out, err = run_cmd(cmd, wait=True, fLOG=fLOG, change_path=temp)
        if "error" in out or "error" in err:
            out_ = out.replace("-Werror=format-security", "")
            if "error" in out_:
                raise Exception(
                    "Unable to compile\n--OUT--\n{0}\n--ERR--\n{1}".format(out, err))
        if sys.platform == "win32":
            name = "stdchelper_demo.cp%d%d-win_amd64.pyd" % sys.version_info[:2]
        elif sys.platform == "darwin":
            name = "stdchelper_demo.cpython-%d%dm-darwin.so" % sys.version_info[:2]
        else:
            name = "stdchelper_demo.cpython-%d%dm-x86_64-linux-gnu.so" % sys.version_info[:2]
        fullname = os.path.join(temp, name)
        if not os.path.exists(fullname):
            files = os.listdir(os.path.dirname(fullname))
            raise FileNotFoundError(
                "Unable to find '{0}' (platform '{1}')\nFound:\n{2}".format(
                    fullname, sys.platform, "\n".join(files)))
        mo = import_module(None, fullname, fLOG,
                           additional_sys_path=None, first_try=True)
        self.assertIsInstance(mo, tuple)
        self.assertEqual(len(mo), 2)
        self.assertTrue(hasattr(mo[0], '__doc__'))

        if 'stdchelper_demo' in sys.modules:
            del sys.modules['stdchelper_demo']

        temp2 = get_temp_folder(__file__, "temp_compile_module2")
        store_obj = {}
        actions = copy_source_files(temp, temp2, fLOG=fLOG)
        store_obj = {}
        indexes = {}
        add_file_rst(temp2, store_obj, actions, fLOG=fLOG,
                     rootrep=("stdchelper_demo.", ""), indexes=indexes)
        if sys.platform == "darwin":
            warnings.warn(
                "add_file_rst does not work yet on MacOSX for C++ modules.")
            return
        self.assertNotEmpty(store_obj)
        self.assertEqual(len(store_obj), 1)
        if len(actions) not in (3, 4):
            raise Exception("{0}\n{1}".format(
                len(actions), "\n".join(str(_) for _ in actions)))
        self.assertEqual(len(indexes), 1)
Ejemplo n.º 45
0
    def test_script_pig(self):
        fLOG(__file__,
             self._testMethodName,
             OutputPrint=__name__ == "__main__")
        if self.client is None:
            return
        data = os.path.join(os.path.abspath(os.path.split(__file__)[0]),
                            "data")

        fLOG("AA")
        # python script

        pyth = """
                    import sys, datetime
                    cols = [ _ for _ in sys.argv if ".py" not in _ ]
                    for row in sys.stdin:
                        row = row.strip()
                        if len(row) == 0 :
                            continue
                        js = eval(row)
                        for station in js:
                            vals = [ str(station[c]).strip() for c in cols ]
                            sys.stdout.write(",".join(vals))
                            sys.stdout.write("\\n")
                            sys.stdout.flush()
                """.replace("                    ", "")

        fold = os.path.join(data, "..", "temp_pypig")
        if not os.path.exists(fold):
            os.mkdir(fold)

        fLOG("BB")

        pyfile = os.path.join(fold, "pystream.py")
        with open(pyfile, "w", encoding="utf8") as f:
            f.write(pyth)

        tosend = """[{'address': "52 RUE D'ENGHIEN / ANGLE RUE DU FAUBOURG POISSONIERE - 75010 PARIS", 'collect_date': """ + \
                 """datetime.datetime(2014, 11, 11, 22, 1, 18, 331070), 'lng': 2.348395236282807, 'contract_name': 'Paris', 'name': """ + \
                 """'10042 - POISSONNIÈRE - ENGHIEN', 'banking': 0, 'lat': 48.87242006305313, 'bonus': 0, 'status': 'OPEN', """ + \
                 """'available_bikes': 32, 'last_update': datetime.datetime(2014, 11, 11, 21, 59, 5), 'number': 10042, """ + \
                 """'available_bike_stands': 1, 'bike_stands': 33},{'address': "52 RUE D'ENGHIEN / ANGLE RUE DU FAUBOURG POISSONIERE - """ + \
                 """75010 PARIS", 'collect_date': datetime.datetime(2014, 11, 11, 22, 1, 18, 331070), 'lng': 2.348395236282807, """ + \
                 """'contract_name': 'Paris', 'name': '10042 - POISSONNIÈRE - ENGHIEN', 'banking': 0, 'lat': 48.87242006305313, """ + \
                 """'bonus': 0, 'status': 'OPEN', 'available_bikes': 32, 'last_update': datetime.datetime(2014, 11, 11, 21, 59, 5), """ + \
                 """'number': 10042, 'available_bike_stands': 1, 'bike_stands': 33}]"""

        cmd = sys.executable.replace("pythonw",
                                     "python") + " " + pyfile + " name"
        out, err = run_cmd(cmd,
                           wait=True,
                           sin=tosend,
                           communicate=True,
                           timeout=3,
                           shell=False)
        out = out.strip("\n\r ")
        spl = out.split("\n")
        if len(spl) != 2:
            raise Exception("len:{2}\nOUT:\n{0}\nERR:\n{1}".format(
                out, err, len(out)))

        fLOG("CC")
        # PIG script

        pig = """
                DEFINE pystream `python pystream.py bonus available_bike_stands available_bikes lat lng name status`
                        SHIP ('pystream.py')
                        INPUT(stdin USING PigStreaming(',')) OUTPUT (stdout USING PigStreaming(','));

                jspy = LOAD '$UTT/*.txt' USING PigStorage('\t') AS (arow:chararray);

                --DUMP jspy ;

                matrice = STREAM jspy THROUGH pystream AS
                                (   bonus:chararray,
                                    available_bike_stands:double,
                                    available_bikes:double,
                                    lat:double,
                                    lng:double,
                                    name:chararray,
                                    status:chararray) ;

                DUMP matrice ;

                DESCRIBE jspy ;
                DESCRIBE matrice ;

                STORE matrice INTO 'unittest2/results.txt' USING PigStorage('\t') ;
            """.replace("                ", "")

        fLOG(self.client.username)
        hive_sql = """
            DROP TABLE IF EXISTS bikes20;
            CREATE TABLE bikes20 (sjson STRING);
            LOAD DATA INPATH "/user/__USERNAME__/unittest2/paris*.txt"
                INTO TABLE bikes20;
            SELECT * FROM bikes20 LIMIT 10;
            """.replace("__USERNAME__", self.client.username.decode("ascii"))
        fLOG(hive_sql)
        # ${hiveconf:UTT}

        pigfile = os.path.join(fold, "pystream.pig")
        with open(pigfile, "w", encoding="utf8") as f:
            f.write(pig)

        fLOG("DD upload")

        # we upload some files

        files = os.listdir(data)
        files = [os.path.join(data, _) for _ in files]
        files = [_ for _ in files if os.path.isfile(_) and "paris" in _]

        if not self.client.dfs_exists("unittest2"):
            self.client.dfs_mkdir("unittest2")

        content = self.client.dfs_ls("unittest2")
        if len(content) == 0:
            self.client.upload_cluster(files, "unittest2")

        if self.client.dfs_exists("unittest2/results.txt"):
            self.client.dfs_rm("unittest2/results.txt", True)

        fLOG("FF")

        # we test the syntax
        out, err = self.client.pig_submit(pigfile,
                                          dependencies=[pyfile],
                                          check=True,
                                          no_exception=True,
                                          params=dict(UTT="unittest2"),
                                          fLOG=fLOG)
        if "pystream.pig syntax OK" not in err:
            raise Exception("OUT:\n{0}\nERR:\n{1}".format(out, err))

        fLOG("II")

        # we submit the job
        out, err = self.client.pig_submit(pigfile,
                                          dependencies=[pyfile],
                                          stop_on_failure=True,
                                          no_exception=True,
                                          redirection=None,
                                          params=dict(UTT="unittest2"))

        fLOG("JJ")

        if "Total records written : 4" not in err:
            raise Exception("PIG OUT:\n{0}\nPIG ERR:\n{1}".format(out, err))

        dest = os.path.join(fold, "out_merged.txt")
        fLOG("dest=", dest)
        if os.path.exists(dest):
            os.remove(dest)

        fLOG("KK")

        self.client.download_cluster("unittest2/results.txt", dest, merge=True)
        assert os.path.exists(dest)
        with open(dest, "r", encoding="utf8") as f:
            content = f.read()
        fLOG("-----\n", content)
        assert len(content.strip(" \n\r\t")) > 0

        fLOG("LL")

        # we submit the job
        # disable HIVE for the time being (broken)
        warnings.warn("hive not being tested")

        out, err = self.client.hive_submit(hive_sql,
                                           redirection=None,
                                           params=dict(UTT="unittest2"),
                                           fLOG=fLOG)

        fLOG("HIVE OUT")
        fLOG(out)
        fLOG("HIVE ERR")
        fLOG(err)
        #assert "(0,1.0,32.0,48.8724200631,2.34839523628,10042" in out

        fLOG("END")
Ejemplo n.º 46
0
def execute_python_scripts(root,
                           df,
                           col_names=None,
                           url=None,
                           eol="/",
                           fLOG=noLOG,
                           gen_mail=None):
    """
    Retrieves all :epkg:`python` scripts and run them.

    @param      root            main folder
    @param      df              dataframe
    @param      col_names       dictionary for columns:
                                folder, mail, program, out, err, url, cmp, url_content, key, time
    @param      eol             if not None, replaces end of lines by *eof*
    @param      gen_mail        generator of mails
    @param      fLOG            logging function
    @return                     dataframe
    """
    if gen_mail is None:

        def iter_mail(mail):
            yield mail
            yield mail.lower()

        gen_mail = iter_mail

    def post_process(out, eol):
        out = out.strip("\r\t\n").rstrip().replace("\r",
                                                   "").replace("\t", "    ")
        if eol:
            out = out.replace("\n", eol)
        return out

    downloads = {}
    res = []
    for name, mail in zip(df[col_names.get("folder", "folder")],
                          df[col_names.get("mail", "mail")]):
        row = {col_names.get("folder", "folder"): name}
        fLOG("[execute_python_script], look into '{0}'".format(name))
        subf = os.path.join(root, name)
        col_find = col_names.get("exists", "exists")
        if not os.path.exists(subf):
            subf = os.path.join(root, name.replace("-", "."))
        if not os.path.exists(subf):
            row[col_find] = False
            res.append(row)
        else:
            row[col_find] = True
            store = []
            for py in explore_folder_iterfile(subf, ".*[.]py$"):
                store.append(py)
            fLOG("     -", len(store), "programs found")

            col_out = col_names.get("out", "out")
            col_err = col_names.get("err", "err")
            col_prog = col_names.get("program", "program")
            col_time = col_names.get("time", "time")
            col_key = col_names.get("key", "key")
            col_size = col_names.get("size", "size")
            col_url = col_names.get("url", "url")
            col_ind = col_names.get("pattern_id", "pattern_id")

            if len(store) == 0:
                for mm in sorted(gen_mail(mail.strip())):
                    mailid = _get_code(mm.encode("utf-8"))
                    r = row.copy()
                    loc = url.format(mailid)
                    ind = {col_key: mm, col_ind: mailid, col_url: loc}
                    r.update(ind)
                    res.append(r)
                continue

            # test all programs
            outs = []
            for py in sorted(store):
                cmd = '"{0}" "{1}"'.format(sys.executable, py)
                t1 = time.clock()
                try:
                    out, err = run_cmd(cmd, wait=True)
                except Exception as e:
                    out = None
                    err = str(e)
                out = post_process(out, eol)
                t2 = time.clock()
                outs.append({
                    col_out: out,
                    col_err: post_process(err, eol),
                    col_prog: os.path.split(py)[-1],
                    col_time: t2 - t1,
                    col_size: os.stat(py).st_size
                })

            if url is None:
                for o in outs:
                    r = row.copy()
                    r.update(o)
                    res.append(r)
            elif url is not None:
                col_cmp = col_names.get("cmp", "cmp")
                col_in = col_names.get("sortie_dans_motif",
                                       "sortie_dans_motif")
                col_in2 = col_names.get("motif_dans_sortie",
                                        "motif_dans_sortie")
                col_dist = col_names.get("dist", "dist")
                col_content = col_names.get("content", "content")

                if out is None:
                    for _, mm in gen_mail(mail.strip()):
                        mailid = _get_code(mm.encode("utf-8"))
                        ind = {col_ind: mailid}
                        for o in outs:
                            r = row.copy()
                            r.update(o)
                            r.update(ind)
                            res.append(r)
                else:
                    for mm in sorted(gen_mail(mail.strip())):
                        mailid = _get_code(mm.encode("utf-8"))
                        loc = url.format(mailid)
                        ind = {col_key: mm, col_ind: mailid, col_url: loc}

                        if loc not in downloads:
                            downloads[loc] = get_url_content_timeout(
                                loc).strip("\n\r\t ")
                        content = post_process(downloads[loc], eol)
                        ind[col_content] = content

                        for o in outs:
                            r = row.copy()
                            r.update(o)
                            r.update(ind)
                            out = r[col_out]
                            r[col_cmp] = out == content or out.strip(
                            ) == content.strip()
                            r[col_in] = out in content
                            r[col_in2] = content in out
                            r[col_dist] = (edit_distance(out, content)[0]) if (
                                len(content) > len(out) //
                                2) else abs(len(content) - len(out))
                            res.append(r)
    return pandas.DataFrame(res)
Ejemplo n.º 47
0
def convert_short_latex_into_png(latex, temp_folder=".", fLOG=print,
                                 miktex=r"C:\Program Files\MiKTeX 2.9\miktex\bin\x64",
                                 final_name=None):
    """
    Convert a short latex script into an image.

    @param      latex           latex equation
    @param      temp_folder     temp_folder  (where temporary files will be placed)
    @param      fLOG            logging function
    @param      miktex          miktex location
    @param      final_name      if not None, copy the image at this location using this name
    @return                     a location to the image (it should be copied), and its size

    You should not call the function twice at the same in the same folder.

    @warning The function ends the program if there was a failure. Something is missing on the command line.
    """
    if not os.path.exists(miktex):
        raise FileNotFoundError("unable to find miktex")

    if sys.platform.startswith("win"):
        htlatex = os.path.join(miktex, "htlatex.exe")
        if not os.path.exists(htlatex):
            raise FileNotFoundError("unable to find htlatex")
    else:
        htlatex = os.path.join(miktex, "htlatex")

    if not os.path.exists(temp_folder):
        os.makedirs(temp_folder)

    eq = os.path.join(temp_folder, "eq.tex")
    with open(eq, "w") as f:
        f.write(r"""\documentclass[12pt]{article}
                \pagestyle{empty}
                \begin{document}
                $$
                %s
                $$
                \end{document}""".replace("                ", "") % latex.strip("\n\r "))

    cmd = '"' + htlatex + '" eq.tex "html, graphics-300" "" "" "--interaction=nonstopmode"'
    cwd = os.getcwd()
    os.chdir(temp_folder)
    out, err = run_cmd(cmd, wait=True)
    os.chdir(cwd)

    if "FAILED" in err:
        raise Exception(
            "it failed\n-----\n{0}\n----------\n{1}".format(out, err))
    img = os.path.join(temp_folder, "eq0x.png")
    if not os.path.exists(img):
        with open(os.path.join(temp_folder, "eq.log"), "r") as f:
            log = f.read()
        raise FileNotFoundError("the compilation did not work\n" + log)

    if final_name is not None:
        # size reduction
        im = Image.open(img)
        shutil.copy(img, final_name)
        return final_name, im.size
    else:
        im = Image.open(img)
        return img, im.size
Ejemplo n.º 48
0
            docpath = os.path.normpath(
                os.path.join(thispath, "_doc", "presentation" + suffix))
            os.chdir(docpath)

        lay = "html"
        build = "build"
        over = ""
        sconf = ""

        import_pyquickhelper()
        from pyquickhelper.helpgen import process_sphinx_cmd
        cmd_file = os.path.abspath(process_sphinx_cmd.__file__)
        cmd = '"{4}" "{5}" -b {1} -d {0}/doctrees{2}{3} source {0}/{1}'.format(
            build, lay, over, sconf, sys.executable, cmd_file)
        from pyquickhelper.loghelper import run_cmd
        out, err = run_cmd(cmd, wait=True, fLOG=print)
        print(out)
        print(err)

        if sys.platform.startswith("win"):
            os.chdir(pa)

    else:
        # builds the setup

        from pyquickhelper.pycode import clean_readme
        long_description = clean_readme(long_description)

        setup(
            name=project_var_name,
            version='%s%s' % (sversion, subversion),
Ejemplo n.º 49
0
def run_pig(pigfile,
            argv=None,
            pig_path=None,
            hadoop_path=None,
            jython_path=None,
            timeout=None,
            logpath="logs",
            pig_version="0.15.0",
            hadoop_version="2.7.2",
            fLOG=noLOG):
    """
    runs a pig script and returns the standard output and error

    @param      pigfile         pig file
    @param      argv            arguments to sned to the command line
    @param      pig_path        path to pig 0.XX.0
    @param      hadoop_path     path to hadoop
    @param      timeout         timeout
    @param      logpath         path to the logs
    @param      pig_version     PIG version (if *pig_path* is not defined)
    @param      hadoop_version  Hadoop version (if *hadoop_path* is not defined)
    @param      fLOG            logging function
    @return                     out, err

    If *pig_path* is None, the function looks into this directory.
    """
    if pig_path is None:
        pig_path = os.path.join(get_pig_path(), "pig-%s" % pig_version)

    if hadoop_path is None:
        hadoop_path = get_hadoop_path()

    java = get_java_path()
    if "JAVA_HOME" not in os.environ:
        os.environ["JAVA_HOME"] = java

    if "PIG_CONF_DIR" not in os.environ:
        os.environ["PIG_CONF_DIR"] = os.path.normpath(
            os.path.join(
                pig_path,
                "conf"))
        if not os.path.exists(os.environ["PIG_CONF_DIR"]):
            raise FileNotFoundError(os.environ["PIG_CONF_DIR"])

    if "HADOOP_HOME" not in os.environ:
        os.environ["HADOOP_HOME"] = hadoop_path
        if not os.path.exists(os.environ["HADOOP_HOME"]):
            raise FileNotFoundError(os.environ["HADOOP_HOME"])

    if "HADOOP_CLIENT_OPTS" not in os.environ:
        os.environ["HADOOP_CLIENT_OPTS"] = "-Xmx1024m"

    fLOG("PIG_CONF_DIR=", os.environ["PIG_CONF_DIR"])

    def clean(i, p):
        if i == 0:
            return p
        if '"' in p:
            p = p.replace('"', '\\"')
        if " " in p:
            p = '"{0}"'.format(p)
        return p

    full = False
    jars = []
    if full:
        jars.extend(get_pig_jars())  # + get_hadoop_jars()
        folds = set(os.path.split(j)[0] for j in jars)
        jars = [os.path.join(f, "*.jar") for f in folds]

        jars.append(
            os.path.join(
                hadoop_path,
                "hadoop-%s" % hadoop_version,
                "share",
                "hadoop",
                "common",
                "lib",
                "*.jar"))
        jars.append(
            os.path.join(
                hadoop_path,
                "hadoop-%s" % hadoop_version,
                "share",
                "hadoop",
                "hdfs",
                "lib",
                "*.jar"))
        jars.append(
            os.path.join(
                hadoop_path,
                "hadoop-%s" % hadoop_version,
                "share",
                "hadoop",
                "mapreduce",
                "lib",
                "*.jar"))
        jars.append(
            os.path.join(
                hadoop_path,
                "hadoop-%s" % hadoop_version,
                "share",
                "hadoop",
                "httpfs",
                "tomcat",
                "lib",
                "*.jar"))
        jars.append(
            os.path.join(
                hadoop_path,
                "hadoop-%s" % hadoop_version,
                "share",
                "hadoop",
                "tools",
                "lib",
                "*.jar"))
        jars.append(
            os.path.join(
                hadoop_path,
                "hadoop-%s" % hadoop_version,
                "share",
                "hadoop",
                "yarn",
                "lib",
                "*.jar"))

        jars.append(
            os.path.join(
                hadoop_path,
                "hadoop-%s" % hadoop_version,
                "share",
                "hadoop",
                "common",
                "hadoop-common-%s.jar" % hadoop_version))
        jars.append(
            os.path.join(
                hadoop_path,
                "hadoop-%s" % hadoop_version,
                "share",
                "hadoop",
                "common",
                "hadoop-nfs-%s" % hadoop_version))
        jars.append(
            os.path.join(
                hadoop_path,
                "hadoop-%s" % hadoop_version,
                "share",
                "hadoop",
                "hdfs",
                "hadoop-hdfs-%s.jar" % hadoop_version))
        jars.append(
            os.path.join(
                hadoop_path,
                "hadoop-2.5.2",
                "share",
                "hadoop",
                "hdfs",
                "hadoop-hdfs-nfs-%s.jar" % hadoop_version))
        jars.append(
            os.path.join(
                hadoop_path,
                "hadoop-%s" % hadoop_version,
                "share",
                "hadoop",
                "mapreduce",
                "*.jar"))
        jars.append(
            os.path.join(
                hadoop_path,
                "hadoop-%s" % hadoop_version,
                "share",
                "hadoop",
                "yarn",
                "*.jar"))

        jars.append(os.path.join(pig_path, "pig-%s-core-h1.jar" % pig_version))
    else:
        jars.append(
            os.path.join(
                pig_path,
                "pig-%s" % pig_version,
                "legacy",
                "pig-%s-withouthadoop-h2.jar" % pig_version))

    jarsall = []
    for j in jars:
        r = glob.glob(j)
        jarsall.extend(r)
    jarsall.sort()
    for j in jarsall:
        fLOG(j)

    jars = ";".join(jars)
    fLOG("jars", jars)

    cmd = [get_java_cmd(), "-Xmx1024m",
           "-classpath", jars,
           "-Dpig.log.dir=" + logpath,
           "-Dhadoop.log.dir=" + logpath,
           "-Dhadoop.tmp.dir=" + logpath,
           "-Dpig.log.file=pid.log",
           "-Djava.io.tmpdir=" + logpath,
           "-Dpig.home.dir=" + pig_path,
           #"-Dpig.schematuple=true",
           #"-Dpig.schematuple.local.dir=" + logpath,
           "org.apache.pig.Main",
           "-x", "local", pigfile,
           "-stop_on_failure"
           ]

    cmd = " ".join(clean(i, _) for i, _ in enumerate(cmd))
    out, err = run_cmd(
        cmd, wait=True, sin=None, communicate=True, timeout=timeout, shell=False)
    return out, err