Beispiel #1
0
def build_grammar(g4):
    """
    compile the grammar for a specific file
    
    @param      g4      grammar format antlr4
    @return             list of files
    
    The compilation must be done with `antlr4 <http://www.antlr.org/>`_.
    It generates a lexer and a parser which can be imported in Python.
    The options for the command line are described at: `antlr4 options <https://theantlrguy.atlassian.net/wiki/display/ANTLR4/Options>`_.
    """
    if not g4.endswith(".g4"):
        g4 = g4 + ".g4"
        
    version = "4.4"
    
    url = "http://www.antlr.org/download/antlr-{0}-complete.jar".format(version)
    spl = url.split("/")
    domain,name = "/".join(spl[:-1])+"/", spl[-1]
    folder = os.path.abspath(os.path.dirname(__file__))
    final = os.path.join(folder, name)
    if not os.path.exists(final):
        from ..resources.http_retrieve import download_data
        name = download_data(name, website=domain,whereTo=folder)
        print(name)
        if not os.path.exists(name):
            raise FileNotFoundError("unable to download: " + url)
    path = os.environ.get("CLASSPATH","")
    if name not in path:
        path = ".;{0}\antlr-{1}-complete.jar;%CLASSPATH%".format(folder,version)
        os.environ["CLASSPATH"] = path
    else:
        path = ".;{0}\antlr-{1}-complete.jar;%CLASSPATH%".format(folder,version)
        os.environ["CLASSPATH"] = os.environ["CLASSPATH"] + ";" + path
    
    cmd = "org.antlr.v4.Tool -Dlanguage=Python3 " + g4
    from pyquickhelper import run_cmd
    out,err= run_cmd("java " + cmd, wait=True)
    
    if len(err)>0:
        
        javapath = r"C:\Program Files\Java\jre7\bin"
        os.environ["PATH"] = os.environ["PATH"] + ";" + javapath
        if sys.platform.startswith("win") and os.path.exists(javapath):
            cp = os.path.abspath(folder)
            out,err= run_cmd("java " + cmd, wait=True)
            if len(err)>0:
                raise Exception("unable to compile: " + final + "\nERR:\n" + err + "\nCMD:\njava " + cmd + "\nYou should do it manually.")
        else:
            raise Exception("unable to compile: " + final + "\nERR:\n" + err + "\nCMD:\njava " + cmd)
    
    return out

    
Beispiel #2
0
    def runpy(self, line, cell=None):
        """
        defines command ``%%runpy``

        run a python script which accepts standards input and produces standard outputs,
        a timeout is set up at 10s

        .. versionadded:: 1.1
        """
        parser = self.get_parser(MagicFile.runpy_parser, "runpy")
        args = self.get_args(line, parser)

        if args is not None:
            filename = args.file
            if len(filename) == 0:
                self.runpy("")
            else:
                args = args.args
                cmd = sys.executable.replace(
                    "pythonw",
                    "python") + " " + filename + " " + args
                tosend = cell
                out, err = run_cmd(
                    cmd, wait=True, sin=tosend, communicate=True, timeout=10, shell=False)
                if len(err) > 0:
                    return HTML(
                        '<font color="#DD0000">Error</font><br /><pre>\n%s\n</pre>' % err)
                else:
                    return HTML('<pre>\n%s\n</pre>' % out)
def run_jython(pyfile, argv=None, jython_path=None, sin=None, timeout=None, fLOG=noLOG):
    """
    runs a jython script and returns the standard output and error

    @param      pyfile          jython file
    @param      argv            arguments to sned to the command line
    @param      jython_path     path to jython standalone
    @param      sin             data to send to the standard input
    @param      timeout         timeout
    @param      fLOG            logging function
    @return                     out, err

    If *jython_path* is None, the function looks into this directory.
    """
    if jython_path is None:
        jython_path = get_jython_jar()

    def clean(i, p):
        if i == 0:
            return p
        if '"' in p:
            p = p.replace('"', '\\"')
        if " " in p:
            p = '"{0}"'.format(p)
        return p

    cmd = [get_java_cmd(), "-jar", jython_path, pyfile]
    if argv is not None:
        cmd.extend(argv)
    cmd = " ".join(clean(i, _) for i, _ in enumerate(cmd))
    out, err = run_cmd(cmd, wait=True, sin=sin, communicate=True, timeout=timeout, shell=False)
    return out, err
    def test_flake8(self):
        fLOG(
            __file__,
            self._testMethodName,
            OutputPrint=__name__ == "__main__")

        thi = os.path.abspath(os.path.dirname(__file__))
        src = os.path.normpath(os.path.join(thi, "..", "..", "src"))
        exe = os.path.dirname(sys.executable)
        scr = os.path.join(exe, "Scripts")
        fla = os.path.join(scr, "flake8")
        cmd = fla + " " + src
        out, err = run_cmd(cmd, fLOG=fLOG, wait=True)

        lines = out.split("\n")
        lines = [_ for _ in lines if "E501" not in _ and "__init__.py" not in _ and "E265" not in _
                 and "W291" not in _]
        lines = [_ for _ in lines if len(_) > 1]
        if __name__ == "__main__":
            for l in lines:
                spl = l.split(":")
                if len(spl[0]) == 1:
                    spl[1] = ":".join(spl[0:2])
                    del spl[0]
                    print(
                        '  File "{0}", line {1}, {2}'.format(spl[0], spl[1], spl[-1]))
        if len(lines) > 0:
            raise Exception(
                "{0} lines\n{1}".format(len(lines), "\n".join(lines)))
    def test_flake8(self):
        fLOG(
            __file__,
            self._testMethodName,
            OutputPrint=__name__ == "__main__")

        thi = os.path.abspath(os.path.dirname(__file__))
        src = os.path.normpath(os.path.join(thi, "..", "..", "src"))
        exe = os.path.dirname(sys.executable)
        scr = os.path.join(exe, "Scripts")
        fla = os.path.join(scr, "flake8")
        cmd = fla + " " + src
        out, err = run_cmd(cmd, fLOG=fLOG, wait=True)

        lines = out.split("\n")
        lines = [_ for _ in lines if "E501" not in _ and "__init__.py" not in _ and "E265" not in _
                 and "W291" not in _ and "W293" not in _ and "pandas_helper" not in _]
        lines = [_ for _ in lines if len(_) > 1]
        if __name__ == "__main__":
            for l in lines:
                spl = l.split(":")
                if len(spl[0]) == 1:
                    spl[1] = ":".join(spl[0:2])
                    del spl[0]
                    print(
                        '  File "{0}", line {1}, {2}'.format(spl[0], spl[1], spl[-1]))
        if len(lines) > 1:
            raise Exception(
                "{0} lines\n{1}".format(len(lines), "\n".join(lines)))
def is_java_installed(fLOG=noLOG):
    """
    this function checks if java is installed

    @return     boolean
    """
    cmd = get_java_cmd() + " -showversion"
    out, err = run_cmd(cmd, wait=True, log_error=False)
    fLOG("OUT:\n", out)
    fLOG("ERR:\n", err)
    return "Java(TM)" in err
Beispiel #7
0
    def _run_jython(self, cell, filename, func_name, args, true_jython=None):
        """
        run a jython script

        @param      cell            content of the cell
        @param      filename        filename used to store the content of the cell
        @param      func_name       function name
        @param      args            list of arguments to run
        @param      true_jython     jython (True) or this Python (False)
        @return                     out, err
        """
        with open(filename, 'r', encoding="utf8") as pyf:
            content = pyf.read()
        temp = filename.replace(".py", ".temp.py")
        with open(temp, "w", encoding="utf8") as pyf:
            pyf.write("""
                    # -*- coding: utf8 -*-
                    if __name__ != '__lib__':
                        def outputSchema(dont_care):
                            def wrapper(func):
                                def inner(*args, **kwargs):
                                    return func(*args, **kwargs)
                                return inner
                            return wrapper
                    """.replace("                            ", ""))
            pyf.write(
                content.replace(
                    "except Exception,",
                    "except Exception as "))
            pyf.write("""
                    if __name__ != '__lib__':
                        import sys
                        for row in sys.stdin:
                            row = row.strip()
                            res = {0}(row)
                            sys.stdout.write(str(res))
                            sys.stdout.write("\\n")
                            sys.stdout.flush()
                    """.format(func_name).replace("                            ", ""))

        cmd = sys.executable.replace(
            "pythonw",
            "python") + " " + temp + " " + " ".join("{}".format(_) for _ in args)
        tosend = cell

        if true_jython:
            download_java_standalone()
            out, err = run_jython(temp, sin=cell, timeout=10)
        else:
            out, err = run_cmd(
                cmd, wait=True, sin=tosend, communicate=True, timeout=10, shell=False)
def run_dot(dot_file, outimg):
    """
    calls graphivz on a dot file and produces an image

    @param      dot_file        file, format `DOT <http://www.graphviz.org/doc/info/lang.html>`_
    @param      outimg          output image
    @return                     out, err (stdout, stderr from graphviz)

    .. versionadded:: 1.1
    """
    ext = os.path.splitext(outimg)[-1].strip(".")
    exe = dot_exe()
    cmd = "\"{0}\" -T{1} -o{2} {3}".format(exe, ext, outimg, dot_file)
    out, err = run_cmd(cmd, wait=True)
    if len(err) > 0:
        raise Exception("unable to run graphviz on {0}.\nCMD:\n{1}\nOUT:\n{2}\nERR:\n{3}".format(
            dot_file, cmd, out, err))
    return out, err
def git_commit_all(local_folder,
                   url_https,
                   message,
                   user=None,
                   password=None,
                   timeout=300,
                   fLOG=noLOG):
    """
    from a git repository,
    it requires `GIT <http://git-scm.com/>`_ to be installed
    and uses the command line.

    @param      local_folder    local folder of the project
    @param      url_https       url, example ``https://gitlab.server/folder/project_name``
    @param      message         message for the commit
    @param      user            part 1 of the credentials
    @param      password        part 2 of the credentials
    @param      timeout         timeout for the command line
    @param      fLOG            logging function
    @return                     None

    If the reposity has already been cloned, it does not do it again.
    We assume that git can be run without giving its full location.

    The function executes the following commands::

        cd [folder]
        git add -A
        git commit -m "[message]"
        git push -u origin master

    """
    #url_user = git_url_user_password(url_https, user, password)
    cmds = """
            cd {0}
            git add -A
            git commit -m "{1}"
            git push -u origin master
            """.format(local_folder, message).replace("            ",
                                                      "").strip(" \n\r\t")
    cmd = cmds.replace("\n", "&")
    sin = ""  # "{0}\n".format(password)
    out, err = run_cmd(cmd, sin=sin, wait=True, timeout=timeout, fLOG=fLOG)
    git_check_error(out, err, fLOG)
    def test_script_pig(self):
        fLOG(
            __file__,
            self._testMethodName,
            OutputPrint=__name__ == "__main__")
        data = os.path.join(
            os.path.abspath(
                os.path.split(__file__)[0]),
            "data")

        pyth = """
                    import sys, datetime
                    cols = [ _ for _ in sys.argv if ".py" not in _ ]
                    for row in sys.stdin:
                        js = eval(row)
                        for station in js:
                            vals = [ station[c] for c in cols ]
                            sys.stdout.write(",".join(vals))
                            sys.stdout.write("\\n")
                            sys.stdout.flush()
                """.replace("                    ", "")

        fold = os.path.join(data, "..", "temp_pypig_out")
        if not os.path.exists(fold):
            os.mkdir(fold)

        pyfile = os.path.join(fold, "pystream.py")
        with open(pyfile, "w", encoding="utf8") as f:
            f.write(pyth)

        tosend = """[{'address': "52 RUE D'ENGHIEN / ANGLE RUE DU FAUBOURG POISSONIERE - 75010 PARIS", 'collect_date': datetime.datetime(2014, 11, 11, 22, 1, 18, 331070), 'lng': 2.348395236282807, 'contract_name': 'Paris', 'name': '10042 - POISSONNIÈRE - ENGHIEN', 'banking': 0, 'lat': 48.87242006305313, 'bonus': 0, 'status': 'OPEN', 'available_bikes': 32, 'last_update': datetime.datetime(2014, 11, 11, 21, 59, 5), 'number': 10042, 'available_bike_stands': 1, 'bike_stands': 33}]"""

        cmd = sys.executable.replace(
            "pythonw",
            "python") + " " + pyfile + " name"
        out, err = run_cmd(cmd, wait=True, sin=tosend,
                           communicate=True, timeout=3, shell=False)
        fLOG("OUT\n", out)
        fLOG("ERR\n", err)
        assert len(out) > 0
def git_change_remote_origin(local_folder,
                             url_https,
                             user=None,
                             password=None,
                             add_fetch=False,
                             timeout=10,
                             fLOG=noLOG):
    """
    Change the origin of the repository. The url and the password
    refer to the new repository.

    @param      local_folder   local folder
    @param      url_https       url, example ``https://gitlab.server/folder/project_name``
    @param      user            part 1 of the credentials
    @param      password        part 2 of the credentials
    @param      timeout         timeout for the command line
    @param      add_fetch       add instruction ``fetch``
    @param      fLOG            logging function
    @return                     something

    The function runs the instruction::

        git remote remove origin
        git remote add origin url

    """
    url_user = git_url_user_password(url_https, user, password)
    cmds = """
            cd {0}
            git remote remove origin
            git remote add origin {1}
            """.format(local_folder, url_user).replace("            ",
                                                       "").strip(" \n\r\t")
    if add_fetch:
        cmds += "\ngit fetch"
    cmd = cmds.replace("\n", "&")
    sin = ""  # "{0}\n".format(password)
    out, err = run_cmd(cmd, sin=sin, wait=True, timeout=timeout, fLOG=fLOG)
    git_check_error(out, err, fLOG)
def build_grammar(g4, version="4.5", fLOG=noLOG):
    """
    compile the grammar for a specific file

    @param      g4          grammar format antlr4
    @param      version     version of *antlr4* to use, 4.4, 4.5-rc-2
    @param      fLOG        logging function
    @return                 list of files

    The compilation must be done with `antlr4 <http://www.antlr.org/>`_.
    It generates a lexer and a parser which can be imported in Python.
    The options for the command line are described at: `antlr4 options <https://theantlrguy.atlassian.net/wiki/display/ANTLR4/Options>`_.

    @example(Build a Antlr4 grammer)

    See `grammars-v4 <https://github.com/antlr/grammars-v4>`_

    @code
    build_grammar("R.g4")
    @endcode
    @endexample
    """
    if not g4.endswith(".g4"):
        fold = os.path.abspath(os.path.dirname(__file__))
        g4 = os.path.join(fold, g4 + ".g4")

    url = "http://www.antlr.org/download/antlr-{0}-complete.jar".format(
        version)
    spl = url.split("/")
    domain, name = "/".join(spl[:-1]) + "/", spl[-1]
    folder = os.path.abspath(os.path.dirname(__file__))
    final = os.path.join(folder, name)

    if not os.path.exists(final):
        from ..resources.http_retrieve import download_data
        name = download_data(name, website=domain, whereTo=folder)
        if not os.path.exists(name):
            raise FileNotFoundError("unable to download: " + url)

    path = os.environ.get("CLASSPATH", "")
    if name not in path:
        path = ".;{0}\\antlr-{1}-complete.jar".format(folder, version)
    else:
        path = ".;{0}\\antlr-{1}-complete.jar;{2}".format(
            folder,
            version,
            os.environ["CLASSPATH"])

    os.environ["CLASSPATH"] = path
    fLOG("CLASSPATH", os.environ["CLASSPATH"])

    # we remove -rc...
    version = version.split("-")[0]

    cmd = "org.antlr.v4.Tool -Dlanguage=Python3 " + g4
    from pyquickhelper import run_cmd
    out, err = run_cmd("java " + cmd, wait=True, fLOG=fLOG)

    def compiled():
        lexer = g4.replace(".g4", "Lexer.py")
        return os.path.exists(lexer)

    if not compiled() or (len(err) > 0 and "error" in err):

        javapath = r'C:\Program Files\Java\jre7\bin\java.exe'
        os.environ["PATH"] = os.environ["PATH"] + ";" + javapath
        if sys.platform.startswith("win") and os.path.exists(javapath):
            out, err = run_cmd(
                '"' + javapath + '" ' + cmd, wait=True, fLOG=fLOG)
            if not compiled() or (len(err) > 0 and "error" in err):
                raise Exception(
                    "unable to compile: " +
                    final +
                    "\nCLASSPATH:\n" +
                    os.environ["CLASSPATH"] +
                    "\nERR:\n" +
                    err +
                    "\nCMD:\njava " +
                    cmd +
                    "\nYou should do it manually.")
        else:
            raise Exception(
                "unable to compile: " +
                final +
                "\nCLASSPATH:\n" +
                os.environ["CLASSPATH"] +
                "\nERR:\n" +
                err +
                "\nCMD:\njava " +
                cmd)

    return out + "\nERR:\n" + err
def git_clone(local_folder,
              url_https,
              user=None,
              password=None,
              timeout=60,
              init=True,
              fLOG=noLOG):
    """
    clone a project from a git repository in a non empty local folder,
    it requires `GIT <http://git-scm.com/>`_ to be installed
    and uses the command line.

    @param      local_folder    local folder of the project
    @param      url_https       url, example ``https://gitlab.server/folder/project_name``
    @param      user            part 1 of the credentials
    @param      password        part 2 of the credentials
    @param      timeout         timeout for the command line
    @param      init            see below (True, use fetch, False, use clone)
    @param      fLOG            logging function
    @return                     local_folder

    If the reposity has already been cloned, it does not do it again.
    We assume that git can be run without giving its full location.

    The function executes the following commands (if init is True)::

        cd [folder]
        git init
        git remote add origin [https://user.password@server/project.git]
        git fetch

    Otherwise, it does::

        cd [folder]
        git clone origin [https://user.password@server/project.git]
        git fetch

    A folder will be created.

    @example(Automation___Clone many folders in one row)

    @code
    eleves = "project1;project2;..."
    root = r"destination"

    for el in eleves.split(";"):
        cl = el.lower().replace(".","-")
        fold = os.path.join(root, el)
        if not os.path.exists(fold):
            print("clone", el)
            url = "https://<gitlab>/<group>/{0}.git".format(cl)
            git_clone(  fold, url,user=user,password=password, init=False,fLOG=print)
    @endcode

    @endexample

    """
    url_user = git_url_user_password(url_https, user, password)
    timeout = 60
    local_folder = os.path.normpath(os.path.abspath(local_folder))

    if init:
        if not os.path.exists(local_folder):
            fLOG("creating folder", local_folder)
            os.mkdir(local_folder)

        hg = os.path.join(local_folder, ".git")
        if os.path.exists(hg):
            raise Exception("folder {0} should not exist".format(local_folder))

        if not os.path.exists(hg):
            cmds = """
                    cd {0}
                    git init
                    git remote add origin {1}
                    git fetch
                    """.format(local_folder,
                               url_user).replace("                    ",
                                                 "").strip(" \n\r\t")
            cmd = cmds.replace("\n", "&")
            sin = ""  # "{0}\n".format(password)
            out, err = run_cmd(cmd,
                               sin=sin,
                               wait=True,
                               timeout=timeout,
                               fLOG=fLOG)
            git_check_error(out, err, fLOG)

        return local_folder
    else:
        if not os.path.exists(local_folder):
            fLOG("creating folder", local_folder)
            os.mkdir(local_folder)

        hg = os.path.join(local_folder, ".git")
        if os.path.exists(hg):
            raise Exception("folder {0} should not exist".format(local_folder))

        final = os.path.split(url_user)[-1].replace(".git", "")
        locf = os.path.join(local_folder, final)
        if os.path.exists(locf):
            raise Exception(
                "folder {0} should not exists before cloning".format(locf))

        cmds = """
                cd {0}
                git clone {1} .
                """.format(local_folder,
                           url_user).replace("                ",
                                             "").strip(" \n\r\t")
        cmd = cmds.replace("\n", "&")
        sin = ""  # "{0}\n".format(password)
        out, err = run_cmd(cmd, sin=sin, wait=True, timeout=timeout, fLOG=fLOG)
        git_check_error(out, err, fLOG)

        return locf
def compile_cython_single_script(script, fLOG=noLOG):
    """
    This function considers a script ``.pyx``, writes
    a the proper setup file, and compiles it.

    @param      script      filename
    @param      fLOG        logging function

    The function applies the steps described in the basic tutorial
    `The Basics of Cython <http://docs.cython.org/src/tutorial/cython_tutorial.html>`_.
    The function creates a ``setup.py``
    in the same location and compiles it.

    @FAQ(Cython___Compiler une function Cython?)

    Cette fonction compile un script
    `Cython <http://cython.org/>`_.
    Cette extension permet d'implémenter des fonctions Python dans un
    pseudo-langage proche du `C <https://en.wikipedia.org/wiki/C_(programming_language)>`_.
    Il faut suivre les instructions décrite dans le tutorial
    `The Basics of Cython <http://docs.cython.org/src/tutorial/cython_tutorial.html>`_
    pour réussir à utiliser une fonction codée en Cython.
    C'est ce que fait la fonction :func:`compile_cython_single_script`.

    Etant donné que la partie en pseudo C est compilée afin de la rendre beaucoup
    plus rapide, la partie la plus difficile est généralement celle qui consiste à faire
    en sorte que l'interpréteur Python trouve le <b>bon</b> compilateur.
    Ce compilateur est nécessairement le même que celui utilisé pour compiler
    Python et celui-ci change à chaque version.
    Voir
    `Compiling Python on Windows <https://docs.python.org/3/using/windows.html?highlight=visual%20studio#compiling-python-on-windows>`_
    et faire attention à la version de Python que vous utilisez.

    @endFAQ
    """
    ext = os.path.splitext(script)[-1]
    if ext != ".pyx":
        raise ValueError("no extension .pyx: " + script)
    if not os.path.exists(script):
        raise FileNotFoundError(script)

    setup_script = """
        from distutils.core import setup
        from Cython.Build import cythonize
        setup(
            ext_modules = cythonize("{0}")
        )
        """.replace("        ", "").format(os.path.split(script)[-1])

    current, name = os.path.split(script)
    filename = os.path.join(os.path.dirname(script), name + ".setup.py")
    with open(filename, "w") as f:
        f.write(setup_script)

    cmd = sys.executable + " -u {0} build_ext --inplace".format(filename)

    out, err = run_cmd(cmd, wait=True, fLOG=fLOG, change_path=current)
    if len(err) > 0:
        raise CustomCythonError(
            "CMD:\n{0}\nOUT:\n{1}ERR:\n{2}".format(cmd, out, err))

    return out
Beispiel #15
0
def run_pig(pigfile,
            argv=None,
            pig_path=None,
            hadoop_path=None,
            jython_path=None,
            timeout=None,
            logpath="logs",
            fLOG=noLOG):
    """
    runs a pig script and returns the standard output and error

    @param      pigfile         pig file
    @param      argv            arguments to sned to the command line
    @param      pig_path        path to pig 0.XX.0
    @param      hadoop_path     path to hadoop 2.5.2
    @param      timeout         timeout
    @param      logpath         path to the logs
    @param      fLOG            logging function
    @return                     out, err

    If *pig_path* is None, the function looks into this directory.
    """
    if pig_path is None:
        pig_path = os.path.join(get_pig_path(), "pig-0.14.0")

    if hadoop_path is None:
        hadoop_path = get_hadoop_path()

    java = get_java_path()
    if "JAVA_HOME" not in os.environ:
        os.environ["JAVA_HOME"] = java

    if "PIG_CONF_DIR" not in os.environ:
        os.environ["PIG_CONF_DIR"] = os.path.normpath(
            os.path.join(
                pig_path,
                "conf"))
        if not os.path.exists(os.environ["PIG_CONF_DIR"]):
            raise FileNotFoundError(os.environ["PIG_CONF_DIR"])

    if "HADOOP_HOME" not in os.environ:
        os.environ["HADOOP_HOME"] = hadoop_path
        if not os.path.exists(os.environ["HADOOP_HOME"]):
            raise FileNotFoundError(os.environ["HADOOP_HOME"])

    if "HADOOP_CLIENT_OPTS" not in os.environ:
        os.environ["HADOOP_CLIENT_OPTS"] = "-Xmx1024m"

    fLOG("PIG_CONF_DIR=", os.environ["PIG_CONF_DIR"])

    def clean(i, p):
        if i == 0:
            return p
        if '"' in p:
            p = p.replace('"', '\\"')
        if " " in p:
            p = '"{0}"'.format(p)
        return p

    full = False
    jars = []
    if full:
        jars.extend(get_pig_jars())  # + get_hadoop_jars()
        folds = set(os.path.split(j)[0] for j in jars)
        jars = [os.path.join(f, "*.jar") for f in folds]

        jars.append(
            os.path.join(
                hadoop_path,
                "hadoop-2.5.2",
                "share",
                "hadoop",
                "common",
                "lib",
                "*.jar"))
        jars.append(
            os.path.join(
                hadoop_path,
                "hadoop-2.5.2",
                "share",
                "hadoop",
                "hdfs",
                "lib",
                "*.jar"))
        jars.append(
            os.path.join(
                hadoop_path,
                "hadoop-2.5.2",
                "share",
                "hadoop",
                "mapreduce",
                "lib",
                "*.jar"))
        jars.append(
            os.path.join(
                hadoop_path,
                "hadoop-2.5.2",
                "share",
                "hadoop",
                "httpfs",
                "tomcat",
                "lib",
                "*.jar"))
        jars.append(
            os.path.join(
                hadoop_path,
                "hadoop-2.5.2",
                "share",
                "hadoop",
                "tools",
                "lib",
                "*.jar"))
        jars.append(
            os.path.join(
                hadoop_path,
                "hadoop-2.5.2",
                "share",
                "hadoop",
                "yarn",
                "lib",
                "*.jar"))

        jars.append(
            os.path.join(
                hadoop_path,
                "hadoop-2.5.2",
                "share",
                "hadoop",
                "common",
                "hadoop-common-2.5.2.jar"))
        jars.append(
            os.path.join(
                hadoop_path,
                "hadoop-2.5.2",
                "share",
                "hadoop",
                "common",
                "hadoop-nfs-2.5.2"))
        jars.append(
            os.path.join(
                hadoop_path,
                "hadoop-2.5.2",
                "share",
                "hadoop",
                "hdfs",
                "hadoop-hdfs-2.5.2.jar"))
        jars.append(
            os.path.join(
                hadoop_path,
                "hadoop-2.5.2",
                "share",
                "hadoop",
                "hdfs",
                "hadoop-hdfs-nfs-2.5.2.jar"))
        jars.append(
            os.path.join(
                hadoop_path,
                "hadoop-2.5.2",
                "share",
                "hadoop",
                "mapreduce",
                "*.jar"))
        jars.append(
            os.path.join(
                hadoop_path,
                "hadoop-2.5.2",
                "share",
                "hadoop",
                "yarn",
                "*.jar"))

        jars.append(os.path.join(pig_path, "pig-0.14.0-core-h1.jar"))
    else:
        jars.append(
            os.path.join(
                pig_path,
                "pig-0.14",
                "legacy",
                "pig-0.14.0-withouthadoop-h2.jar"))

    jarsall = []
    for j in jars:
        r = glob.glob(j)
        jarsall.extend(r)
    jarsall.sort()
    for j in jarsall:
        fLOG(j)

    jars = ";".join(jars)
    fLOG("jars", jars)

    cmd = [get_java_cmd(), "-Xmx1024m",
           "-classpath", jars,
           "-Dpig.log.dir=" + logpath,
           "-Dhadoop.log.dir=" + logpath,
           "-Dhadoop.tmp.dir=" + logpath,
           "-Dpig.log.file=pid.log",
           "-Djava.io.tmpdir=" + logpath,
           "-Dpig.home.dir=" + pig_path,
           #"-Dpig.schematuple=true",
           #"-Dpig.schematuple.local.dir=" + logpath,
           "org.apache.pig.Main",
           "-x", "local", pigfile,
           "-stop_on_failure"
           ]

    cmd = " ".join(clean(i, _) for i, _ in enumerate(cmd))
    out, err = run_cmd(
        cmd, wait=True, sin=None, communicate=True, timeout=timeout, shell=False)
    return out, err
Beispiel #16
0
    def test_script_pig(self):
        fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__")
        if self.client is None:
            return
        data = os.path.join(os.path.abspath(os.path.split(__file__)[0]), "data")

        fold = os.path.join(data, "..", "temp_pypig_az")
        if not os.path.exists(fold):
            os.mkdir(fold)

        # python script

        pyth = """
                    import sys, datetime
                    cols = [ _ for _ in sys.argv if ".py" not in _ ]
                    for row in sys.stdin:
                        row = row.strip()
                        if len(row) == 0 :
                            continue
                        js = eval(row)
                        for station in js:
                            vals = [ str(station[c]).strip() for c in cols ]
                            sys.stdout.write(",".join(vals))
                            sys.stdout.write("\\n")
                            sys.stdout.flush()
                """.replace(
            "                    ", ""
        )

        pyfile = os.path.join(fold, "pystream.py")
        with open(pyfile, "w", encoding="utf8") as f:
            f.write(pyth)

        tosend = """[{'address': "52 RUE D'ENGHIEN / ANGLE RUE DU FAUBOURG POISSONIERE - 75010 PARIS", 'collect_date': datetime.datetime(2014, 11, 11, 22, 1, 18, 331070), 'lng': 2.348395236282807, 'contract_name': 'Paris', 'name': '10042 - POISSONNIÈRE - ENGHIEN', 'banking': 0, 'lat': 48.87242006305313, 'bonus': 0, 'status': 'OPEN', 'available_bikes': 32, 'last_update': datetime.datetime(2014, 11, 11, 21, 59, 5), 'number': 10042, 'available_bike_stands': 1, 'bike_stands': 33},{'address': "52 RUE D'ENGHIEN / ANGLE RUE DU FAUBOURG POISSONIERE - 75010 PARIS", 'collect_date': datetime.datetime(2014, 11, 11, 22, 1, 18, 331070), 'lng': 2.348395236282807, 'contract_name': 'Paris', 'name': '10042 - POISSONNIÈRE - ENGHIEN', 'banking': 0, 'lat': 48.87242006305313, 'bonus': 0, 'status': 'OPEN', 'available_bikes': 32, 'last_update': datetime.datetime(2014, 11, 11, 21, 59, 5), 'number': 10042, 'available_bike_stands': 1, 'bike_stands': 33}]"""

        cmd = sys.executable.replace("pythonw", "python") + " " + pyfile + " name"
        out, err = run_cmd(cmd, wait=True, sin=tosend, communicate=True, timeout=3, shell=False)
        out = out.strip("\n\r ")
        spl = out.split("\n")
        if len(spl) != 2:
            raise Exception("len:{2}\nOUT:\n{0}\nERR:\n{1}".format(out, err, len(out)))

        # PIG script

        pig = """
                DEFINE pystream `python pystream.py bonus available_bike_stands available_bikes lat lng name status`
                        SHIP ('pystream.py')
                        INPUT(stdin USING PigStreaming(',')) OUTPUT (stdout USING PigStreaming(','));

                jspy = LOAD '$CONTAINER/$UTT/*.txt' USING PigStorage('\t') AS (arow:chararray);

                --DUMP jspy ;

                matrice = STREAM jspy THROUGH pystream AS
                                (   bonus:chararray,
                                    available_bike_stands:double,
                                    available_bikes:double,
                                    lat:double,
                                    lng:double,
                                    name:chararray,
                                    status:chararray) ;

                DUMP matrice ;

                DESCRIBE jspy ;
                DESCRIBE matrice ;

                STORE matrice INTO '$CONTAINER/$PSEUDO/unittest2/results.txt' USING PigStorage('\t') ;
            """.replace(
            "                ", ""
        )

        pigfile = os.path.join(fold, "pystream.pig")
        with open(pigfile, "w", encoding="utf8") as f:
            f.write(pig)

        # we upload some files

        files = os.listdir(data)
        files = [os.path.join(data, _) for _ in files]
        files = [_ for _ in files if os.path.isfile(_) and "paris" in _]

        content = self.client.ls(self.blob_serv, self.container, "unittest2")
        if len(content) == 0:
            self.client.upload(self.blob_serv, self.container, "unittest2", files)

        if self.client.exists(self.blob_serv, self.container, "unittest2/results.txt"):
            self.client.delete_folder(self.blob_serv, self.container, "unittest2/results.txt")

        # we submit the job
        recall = None
        if recall is None:
            job = self.client.pig_submit(
                self.blob_serv, self.container, pigfile, dependencies=[pyfile], params=dict(UTT="unittest2")
            )
            job_id = job["id"]
        else:
            job_id = recall

        status = self.client.wait_job(job_id, fLOG=fLOG)

        out, err = self.client.standard_outputs(status, self.blob_serv, self.container, fold)

        if "Total records written : 4" not in err:
            raise Exception("OUT:\n{0}\nERR:\n{1}".format(out, err))

        dest = os.path.join(fold, "out_merged.txt")
        fLOG("dest=", dest)
        if os.path.exists(dest):
            os.remove(dest)
        self.client.download_merge(self.blob_serv, self.container, "$PSEUDO/unittest2/results.txt", dest)
        if not os.path.exists(dest):
            raise FileNotFoundError(dest)
        with open(dest, "r", encoding="utf8") as f:
            content = f.read()
        fLOG("-----\n", content)
        assert len(content.strip(" \n\r\t")) > 0