Beispiel #1
0
 def test_no_name(self):
     self.assertEqual(
         parse_setup_cmd('foo#'),
         [{'type': 'file', 'path': 'foo', 'name': None}])
     self.assertEqual(
         parse_setup_cmd('foo#/'),
         [{'type': 'archive', 'path': 'foo', 'name': None}, '/'])
Beispiel #2
0
 def test_no_name(self):
     self.assertEqual(
         parse_setup_cmd('foo#'),
         [{'type': 'file', 'path': 'foo', 'name': None}])
     self.assertEqual(
         parse_setup_cmd('foo#/'),
         [{'type': 'archive', 'path': 'foo', 'name': None}, '/'])
Beispiel #3
0
 def test_hash_path_alone(self):
     self.assertEqual(parse_setup_cmd("foo#bar"), [{"type": "file", "path": "foo", "name": "bar"}])
     self.assertEqual(parse_setup_cmd("/dir/foo#bar"), [{"type": "file", "path": "/dir/foo", "name": "bar"}])
     self.assertEqual(parse_setup_cmd("foo#bar/"), [{"type": "archive", "path": "foo", "name": "bar"}, "/"])
     self.assertEqual(
         parse_setup_cmd("/dir/foo#bar/"), [{"type": "archive", "path": "/dir/foo", "name": "bar"}, "/"]
     )
Beispiel #4
0
 def test_archive_hash_path_alone(self):
     self.assertEqual(
         parse_setup_cmd('foo#/'),
         [{'type': 'archive', 'path': 'foo', 'name': None}, '/'])
     self.assertEqual(
         parse_setup_cmd('foo#bar/'),
         [{'type': 'archive', 'path': 'foo', 'name': 'bar'}, '/'])
     self.assertEqual(
         parse_setup_cmd('/dir/foo#bar/'),
         [{'type': 'archive', 'path': '/dir/foo', 'name': 'bar'}, '/'])
Beispiel #5
0
 def test_file_hash_path_alone(self):
     self.assertEqual(
         parse_setup_cmd('foo#'),
         [{'type': 'file', 'path': 'foo', 'name': None}])
     self.assertEqual(
         parse_setup_cmd('foo#bar'),
         [{'type': 'file', 'path': 'foo', 'name': 'bar'}])
     self.assertEqual(
         parse_setup_cmd('/dir/foo#bar'),
         [{'type': 'file', 'path': '/dir/foo', 'name': 'bar'}])
Beispiel #6
0
 def test_archive_hash_path_alone(self):
     self.assertEqual(
         parse_setup_cmd('foo#/'),
         [{'type': 'archive', 'path': 'foo', 'name': None}, '/'])
     self.assertEqual(
         parse_setup_cmd('foo#bar/'),
         [{'type': 'archive', 'path': 'foo', 'name': 'bar'}, '/'])
     self.assertEqual(
         parse_setup_cmd('/dir/foo#bar/'),
         [{'type': 'archive', 'path': '/dir/foo', 'name': 'bar'}, '/'])
Beispiel #7
0
 def test_file_hash_path_alone(self):
     self.assertEqual(
         parse_setup_cmd('foo#'),
         [{'type': 'file', 'path': 'foo', 'name': None}])
     self.assertEqual(
         parse_setup_cmd('foo#bar'),
         [{'type': 'file', 'path': 'foo', 'name': 'bar'}])
     self.assertEqual(
         parse_setup_cmd('/dir/foo#bar'),
         [{'type': 'file', 'path': '/dir/foo', 'name': 'bar'}])
    def _parse_setup(self):
        """Parse the *setup* option with
        :py:func:`mrjob.setup.parse_setup_cmd()`.

        If *bootstrap_mrjob* and ``self.BOOTSTRAP_MRJOB_IN_SETUP`` are both
        true, create mrjob.tar.gz (if it doesn't exist already) and
        prepend a setup command that adds it to PYTHONPATH.

        Also patch in the deprecated
        options *python_archives*, *setup_cmd*, and *setup_script*
        as setup commands.
        """
        setup = []

        # python_archives
        for path in self._opts['python_archives']:
            path_dict = parse_legacy_hash_path('archive', path)
            setup.append(['export PYTHONPATH=', path_dict, ':$PYTHONPATH'])

        # setup
        for cmd in self._opts['setup']:
            setup.append(parse_setup_cmd(cmd))

        # setup_cmds
        for cmd in self._opts['setup_cmds']:
            if not isinstance(cmd, basestring):
                cmd = cmd_line(cmd)
            setup.append([cmd])

        # setup_scripts
        for path in self._opts['setup_scripts']:
            path_dict = parse_legacy_hash_path('file', path)
            setup.append([path_dict])

        return setup
Beispiel #9
0
    def __init__(self, **kwargs):
        super(MRJobBinRunner, self).__init__(**kwargs)

        # where a zip file of the mrjob library is stored locally
        self._mrjob_zip_path = None

        # we'll create the setup wrapper scripts later
        self._setup_wrapper_script_path = None
        self._manifest_setup_script_path = None

        # self._setup is a list of shell commands with path dicts
        # interleaved; see mrjob.setup.parse_setup_cmd() for details
        self._setup = [parse_setup_cmd(cmd) for cmd in self._opts['setup']]

        for cmd in self._setup:
            for token in cmd:
                if isinstance(token, dict):
                    # convert dir archives tokens to archives
                    if token['type'] == 'dir':
                        # feed the archive's path to self._working_dir_mgr
                        token['path'] = self._dir_archive_path(token['path'])
                        token['type'] = 'archive'

                    self._working_dir_mgr.add(**token)

        # --py-files on Spark doesn't allow '#' (see #1375)
        if any('#' in path for path in self._opts['py_files']):
            raise ValueError("py_files cannot contain '#'")
Beispiel #10
0
 def test_colon_after_name(self):
     self.assertEqual(parse_setup_cmd('echo foo.egg#:$PYTHONPATH'), [
         'echo ', {
             'type': 'file',
             'path': 'foo.egg',
             'name': None
         }, ':$PYTHONPATH'
     ])
Beispiel #11
0
 def test_start_path_after_equals(self):
     self.assertEqual(parse_setup_cmd('export PYTHONPATH=foo.egg#'), [
         'export PYTHONPATH=', {
             'type': 'file',
             'path': 'foo.egg',
             'name': None
         }
     ])
Beispiel #12
0
 def test_named_dir(self):
     self.assertEqual(parse_setup_cmd('cd src/#awesome-dir'), [
         'cd ', {
             'type': 'dir',
             'path': 'src',
             'name': 'awesome-dir'
         }, '/'
     ])
Beispiel #13
0
 def test_start_path_after_colon(self):
     self.assertEqual(
         parse_setup_cmd('export PYTHONPATH=$PYTHONPATH:foo.tar.gz#/'), [
             'export PYTHONPATH=$PYTHONPATH:', {
                 'type': 'archive',
                 'path': 'foo.tar.gz',
                 'name': None
             }, '/'
         ])
Beispiel #14
0
 def test_allow_colons_in_uris(self):
     self.assertEqual(
         parse_setup_cmd('export PATH=$PATH:s3://foo/script.sh#'), [
             'export PATH=$PATH:', {
                 'type': 'file',
                 'path': 's3://foo/script.sh',
                 'name': None
             }
         ])
Beispiel #15
0
 def test_file_inside_dir(self):
     self.assertEqual(parse_setup_cmd('sudo dpkg -i my_pkgs/#/fooify.deb'),
                      [
                          'sudo dpkg -i ', {
                              'type': 'dir',
                              'path': 'my_pkgs',
                              'name': None
                          }, '/fooify.deb'
                      ])
Beispiel #16
0
 def test_name_slash_included_in_command(self):
     self.assertEqual(
         parse_setup_cmd('sudo dpkg -i my_pkgs.tar#/fooify.deb'), [
             'sudo dpkg -i ', {
                 'type': 'archive',
                 'path': 'my_pkgs.tar',
                 'name': None
             }, '/fooify.deb'
         ])
Beispiel #17
0
 def test_resolve_path_but_not_name(self):
     with patch.dict(os.environ, {'HOME': '/home/foo',
                                  'USER': '******',
                                  'BAR': 'bar'}, clear=True):
         self.assertEqual(
             parse_setup_cmd(r'. ~/tmp/$USER/\$BAR.sh#$USER.sh'),
             ['. ',
              {'path': '/home/foo/tmp/foo/$BAR.sh',
               'name': '$USER.sh',
               'type': 'file'}])
Beispiel #18
0
 def test_shell_punctuation_after_name(self):
     self.assertEqual(
     parse_setup_cmd('touch foo#; cat bar#>baz; cat qux#|grep quux'),
         ['touch ',
          {'type': 'file', 'path': 'foo', 'name': None},
          '; cat ',
          {'type': 'file', 'path': 'bar', 'name': None},
          '>baz; cat ',
          {'type': 'file', 'path': 'qux', 'name': None},
          '|grep quux'])
Beispiel #19
0
 def test_resolve_path_but_not_name(self):
     with patch.dict(os.environ, {'HOME': '/home/foo',
                                  'USER': '******',
                                  'BAR': 'bar'}, clear=True):
         self.assertEqual(
             parse_setup_cmd(r'. ~/tmp/$USER/\$BAR.sh#$USER.sh'),
             ['. ',
              {'path': '/home/foo/tmp/foo/$BAR.sh',
               'name': '$USER.sh',
               'type': 'file'}])
Beispiel #20
0
 def test_shell_punctuation_after_name(self):
     self.assertEqual(
     parse_setup_cmd('touch foo#; cat bar#>baz; cat qux#|grep quux'),
         ['touch ',
          {'type': 'file', 'path': 'foo', 'name': None},
          '; cat ',
          {'type': 'file', 'path': 'bar', 'name': None},
          '>baz; cat ',
          {'type': 'file', 'path': 'qux', 'name': None},
          '|grep quux'])
Beispiel #21
0
 def test_dir_hash_path_alone(self):
     self.assertEqual(parse_setup_cmd('foo/#'), [{
         'type': 'dir',
         'path': 'foo',
         'name': None
     }, '/'])
     self.assertEqual(parse_setup_cmd('foo/#/'), [{
         'type': 'dir',
         'path': 'foo',
         'name': None
     }, '/'])
     self.assertEqual(parse_setup_cmd('foo/#bar'), [{
         'type': 'dir',
         'path': 'foo',
         'name': 'bar'
     }, '/'])
     self.assertEqual(parse_setup_cmd('foo/#bar/'), [{
         'type': 'dir',
         'path': 'foo',
         'name': 'bar'
     }, '/'])
     self.assertEqual(parse_setup_cmd('/dir/foo/#bar'), [{
         'type': 'dir',
         'path': '/dir/foo',
         'name': 'bar'
     }, '/'])
     self.assertEqual(parse_setup_cmd('/dir/foo/#bar/'), [{
         'type': 'dir',
         'path': '/dir/foo',
         'name': 'bar'
     }, '/'])
Beispiel #22
0
 def test_shell_punctuation_after_name(self):
     self.assertEqual(
         parse_setup_cmd("touch foo#; cat bar#>baz; cat qux#|grep quux"),
         [
             "touch ",
             {"type": "file", "path": "foo", "name": None},
             "; cat ",
             {"type": "file", "path": "bar", "name": None},
             ">baz; cat ",
             {"type": "file", "path": "qux", "name": None},
             "|grep quux",
         ],
     )
Beispiel #23
0
    def _parse_setup(self):
        """Parse the *setup* option with
        :py:func:`mrjob.setup.parse_setup_cmd()`.

        If *bootstrap_mrjob* and ``self.BOOTSTRAP_MRJOB_IN_SETUP`` are both
        true, create mrjob.tar.gz (if it doesn't exist already) and
        prepend a setup command that adds it to PYTHONPATH.

        Also patch in the deprecated
        options *python_archives*, *setup_cmd*, and *setup_script*
        as setup commands.
        """
        setup = []

        # python_archives
        for path in self._opts["python_archives"]:
            path_dict = parse_legacy_hash_path("archive", path)
            setup.append(["export PYTHONPATH=", path_dict, ":$PYTHONPATH"])

        # setup
        for cmd in self._opts["setup"]:
            setup.append(parse_setup_cmd(cmd))

        # setup_cmds
        if self._opts["setup_cmds"]:
            log.warning(
                "setup_cmds is deprecated since v0.4.2 and will be removed" " in v0.6.0. Consider using setup instead."
            )

        for cmd in self._opts["setup_cmds"]:
            if not isinstance(cmd, string_types):
                cmd = cmd_line(cmd)
            setup.append([cmd])

        # setup_scripts
        if self._opts["setup_scripts"]:
            log.warning(
                "setup_scripts is deprecated since v0.4.2 and will be removed"
                " in v0.6.0. Consider using setup instead."
            )

        for path in self._opts["setup_scripts"]:
            path_dict = parse_legacy_hash_path("file", path)
            setup.append([path_dict])

        return setup
Beispiel #24
0
    def _parse_setup_and_py_files(self):
        """Parse the *setup* option with
        :py:func:`mrjob.setup.parse_setup_cmd()`, and patch in *py_files*.
        """
        setup = []

        # py_files
        for path in self._opts['py_files']:
            # Spark (at least v1.3.1) doesn't work with # and --py-files,
            # see #1375
            if '#' in path:
                raise ValueError("py_files cannot contain '#'")
            path_dict = parse_legacy_hash_path('file', path)
            setup.append(['export PYTHONPATH=', path_dict, ':$PYTHONPATH'])

        # setup
        for cmd in self._opts['setup']:
            setup.append(parse_setup_cmd(cmd))

        return setup
Beispiel #25
0
    def _parse_setup_and_py_files(self):
        """Parse the *setup* option with
        :py:func:`mrjob.setup.parse_setup_cmd()`, and patch in *py_files*.
        """
        setup = []

        # py_files
        for path in self._opts['py_files']:
            # Spark (at least v1.3.1) doesn't work with # and --py-files,
            # see #1375
            if '#' in path:
                raise ValueError("py_files cannot contain '#'")
            path_dict = parse_legacy_hash_path('file', path)
            setup.append(['export PYTHONPATH=', path_dict, ':$PYTHONPATH'])

        # setup
        for cmd in self._opts['setup']:
            setup.append(parse_setup_cmd(cmd))

        return setup
Beispiel #26
0
    def __init__(self, **kwargs):
        super(MRJobBinRunner, self).__init__(**kwargs)

        # where a zip file of the mrjob library is stored locally
        self._mrjob_zip_path = None

        # we'll create the setup wrapper scripts later
        self._setup_wrapper_script_path = None
        self._manifest_setup_script_path = None
        self._spark_python_wrapper_path = None

        # self._setup is a list of shell commands with path dicts
        # interleaved; see mrjob.setup.parse_setup_cmd() for details
        self._setup = [parse_setup_cmd(cmd) for cmd in self._opts['setup']]

        for cmd in self._setup:
            for token in cmd:
                if isinstance(token, dict):
                    # convert dir archives tokens to archives
                    if token['type'] == 'dir':
                        # feed the archive's path to self._working_dir_mgr
                        token['path'] = self._dir_archive_path(token['path'])
                        token['type'] = 'archive'

                    self._working_dir_mgr.add(**token)

        # warning: no setup scripts on Spark when no working dir
        if self._setup and self._has_pyspark_steps() and not(
                self._spark_executors_have_own_wd()):
            log.warning("setup commands aren't supported on Spark master %r" %
                        self._spark_master())

        # --py-files on Spark doesn't allow '#' (see #1375)
        if any('#' in path for path in self._opts['py_files']):
            raise ValueError("py_files cannot contain '#'")

        # Keep track of where the spark-submit binary is
        self._spark_submit_bin = self._opts['spark_submit_bin']
Beispiel #27
0
    def __init__(self, **kwargs):
        super(MRJobBinRunner, self).__init__(**kwargs)

        # where a zip file of the mrjob library is stored locally
        self._mrjob_zip_path = None

        # we'll create the setup wrapper scripts later
        self._setup_wrapper_script_path = None
        self._manifest_setup_script_path = None
        self._spark_python_wrapper_path = None

        # self._setup is a list of shell commands with path dicts
        # interleaved; see mrjob.setup.parse_setup_cmd() for details
        self._setup = [parse_setup_cmd(cmd) for cmd in self._opts['setup']]

        if self._setup and self._has_pyspark_steps() and not (
                self._spark_setup_is_supported()):
            log.warning("setup commands aren't supported on Spark master %r" %
                        self._spark_master())

        for cmd in self._setup:
            for token in cmd:
                if isinstance(token, dict):
                    # convert dir archives tokens to archives
                    if token['type'] == 'dir':
                        # feed the archive's path to self._working_dir_mgr
                        token['path'] = self._dir_archive_path(token['path'])
                        token['type'] = 'archive'

                    self._working_dir_mgr.add(**token)

        # --py-files on Spark doesn't allow '#' (see #1375)
        if any('#' in path for path in self._opts['py_files']):
            raise ValueError("py_files cannot contain '#'")

        # Keep track of where the spark-submit binary is
        self._spark_submit_bin = self._opts['spark_submit_bin']
Beispiel #28
0
 def test_dir_hash_path_alone(self):
     self.assertEqual(
         parse_setup_cmd('foo/#'),
         [{'type': 'dir', 'path': 'foo', 'name': None}, '/'])
     self.assertEqual(
         parse_setup_cmd('foo/#/'),
         [{'type': 'dir', 'path': 'foo', 'name': None}, '/'])
     self.assertEqual(
         parse_setup_cmd('foo/#bar'),
         [{'type': 'dir', 'path': 'foo', 'name': 'bar'}, '/'])
     self.assertEqual(
         parse_setup_cmd('foo/#bar/'),
         [{'type': 'dir', 'path': 'foo', 'name': 'bar'}, '/'])
     self.assertEqual(
         parse_setup_cmd('/dir/foo/#bar'),
         [{'type': 'dir', 'path': '/dir/foo', 'name': 'bar'}, '/'])
     self.assertEqual(
         parse_setup_cmd('/dir/foo/#bar/'),
         [{'type': 'dir', 'path': '/dir/foo', 'name': 'bar'}, '/'])
Beispiel #29
0
 def test_no_hash(self):
     self.assertEqual(parse_setup_cmd('foo'), ['foo'])
Beispiel #30
0
 def test_no_hash(self):
     self.assertEqual(parse_setup_cmd('foo'), ['foo'])
Beispiel #31
0
 def test_name_slash_included_in_command(self):
     self.assertEqual(
         parse_setup_cmd('sudo dpkg -i my_pkgs.tar#/fooify.deb'),
         ['sudo dpkg -i ',
          {'type': 'archive', 'path': 'my_pkgs.tar', 'name': None},
          '/fooify.deb'])
Beispiel #32
0
 def test_double_hash(self):
     self.assertEqual(parse_setup_cmd('foo#bar#baz'),
                      [{'type': 'file', 'path': 'foo#bar', 'name': 'baz'}])
Beispiel #33
0
 def test_no_hash(self):
     self.assertEqual(parse_setup_cmd("foo"), ["foo"])
Beispiel #34
0
 def test_colon_after_name(self):
     self.assertEqual(
         parse_setup_cmd('echo foo.egg#:$PYTHONPATH'),
         ['echo ',
          {'type': 'file', 'path': 'foo.egg', 'name': None},
          ':$PYTHONPATH'])
Beispiel #35
0
 def test_no_path(self):
     self.assertEqual(parse_setup_cmd('#bar'), ['#bar'])
Beispiel #36
0
 def test_colon_after_name(self):
     self.assertEqual(
         parse_setup_cmd("echo foo.egg#:$PYTHONPATH"),
         ["echo ", {"type": "file", "path": "foo.egg", "name": None}, ":$PYTHONPATH"],
     )
Beispiel #37
0
 def test_no_path(self):
     self.assertEqual(parse_setup_cmd("#bar"), ["#bar"])
Beispiel #38
0
    def test_dont_parse_hash_path_inside_quotes(self):
        self.assertEqual(
            parse_setup_cmd('"foo#bar"'), ['"foo#bar"'])

        self.assertEqual(
            parse_setup_cmd("'foo#bar'"), ["'foo#bar'"])
Beispiel #39
0
 def test_root_dir_only(self):
     # tarring up the entire filesystem is a terrible idea; no
     # good reason to allow this
     self.assertEqual(parse_setup_cmd('/#'), ['/#'])
Beispiel #40
0
 def test_resolve_path_but_not_name(self):
     with patch.dict(os.environ, {"HOME": "/home/foo", "USER": "******", "BAR": "bar"}, clear=True):
         self.assertEqual(
             parse_setup_cmd(r". ~/tmp/$USER/\$BAR.sh#$USER.sh"),
             [". ", {"path": "/home/foo/tmp/foo/$BAR.sh", "name": "$USER.sh", "type": "file"}],
         )
Beispiel #41
0
 def test_allow_colons_in_uris(self):
     self.assertEqual(
         parse_setup_cmd("export PATH=$PATH:s3://foo/script.sh#"),
         ["export PATH=$PATH:", {"type": "file", "path": "s3://foo/script.sh", "name": None}],
     )
Beispiel #42
0
 def test_start_path_after_equals(self):
     self.assertEqual(
         parse_setup_cmd("export PYTHONPATH=foo.egg#"),
         ["export PYTHONPATH=", {"type": "file", "path": "foo.egg", "name": None}],
     )
Beispiel #43
0
 def test_start_path_after_colon(self):
     self.assertEqual(
         parse_setup_cmd("export PYTHONPATH=$PYTHONPATH:foo.tar.gz#/"),
         ["export PYTHONPATH=$PYTHONPATH:", {"type": "archive", "path": "foo.tar.gz", "name": None}, "/"],
     )
Beispiel #44
0
 def test_empty(self):
     self.assertEqual(parse_setup_cmd(''), [])
     self.assertEqual(parse_setup_cmd(' '), [' '])
     self.assertRaises(TypeError, parse_setup_cmd, None)
Beispiel #45
0
 def test_double_hash(self):
     self.assertEqual(parse_setup_cmd("foo#bar#baz"), [{"type": "file", "path": "foo#bar", "name": "baz"}])
Beispiel #46
0
 def test_empty(self):
     self.assertEqual(parse_setup_cmd(""), [])
     self.assertEqual(parse_setup_cmd(" "), [" "])
     self.assertRaises(TypeError, parse_setup_cmd, None)
Beispiel #47
0
 def test_name_slash_included_in_command(self):
     self.assertEqual(
         parse_setup_cmd("sudo dpkg -i my_pkgs.tar#/fooify.deb"),
         ["sudo dpkg -i ", {"type": "archive", "path": "my_pkgs.tar", "name": None}, "/fooify.deb"],
     )
Beispiel #48
0
 def test_double_hash(self):
     self.assertEqual(parse_setup_cmd('foo#bar#baz'),
                      [{'type': 'file', 'path': 'foo#bar', 'name': 'baz'}])
Beispiel #49
0
    def test_dont_parse_hash_path_inside_quotes(self):
        self.assertEqual(parse_setup_cmd('"foo#bar"'), ['"foo#bar"'])

        self.assertEqual(parse_setup_cmd("'foo#bar'"), ["'foo#bar'"])
Beispiel #50
0
 def _parse_bootstrap(self):
     """Parse the *bootstrap* option with
     :py:func:`mrjob.setup.parse_setup_cmd()`.
     """
     return [parse_setup_cmd(cmd) for cmd in self._opts['bootstrap']]
Beispiel #51
0
 def _parse_bootstrap(self):
     """Parse the *bootstrap* option with
     :py:func:`mrjob.setup.parse_setup_cmd()`.
     """
     return [parse_setup_cmd(cmd) for cmd in self._opts['bootstrap']]
Beispiel #52
0
 def test_start_path_after_colon(self):
     self.assertEqual(
         parse_setup_cmd('export PYTHONPATH=$PYTHONPATH:foo.tar.gz#/'),
         ['export PYTHONPATH=$PYTHONPATH:',
          {'type': 'archive', 'path': 'foo.tar.gz', 'name': None},
          '/'])
Beispiel #53
0
 def test_no_name(self):
     self.assertEqual(parse_setup_cmd("foo#"), [{"type": "file", "path": "foo", "name": None}])
     self.assertEqual(parse_setup_cmd("foo#/"), [{"type": "archive", "path": "foo", "name": None}, "/"])