Beispiel #1
0
    def libjars(self):
        """Optional list of paths of jar files to run our job with using
        Hadoop's ``-libjar`` option. Normally setting :py:attr:`LIBJARS`
        is sufficient.

        By default, this combines :option:`libjars` options from the command
        lines with :py:attr:`LIBJARS`, with command line arguments taking
        precedence. Paths from :py:attr:`LIBJARS` are interpreted as relative
        to the the directory containing the script (paths from the
        command-line are relative to the current working directory).

        Note that ``~`` and environment variables in paths will always be
        expanded by the job runner (see :mrjob-opt:`libjars`).

        .. versionadded:: 0.5.3
        """
        script_dir = os.path.dirname(self.mr_job_script())

        paths_from_libjars = []

        # libjar paths will eventually be combined with combine_path_lists,
        # which will expand environment variables. We don't want to assume
        # a path like $MY_DIR/some.jar is always relative ($MY_DIR could start
        # with /), but we also don't want to expand environment variables
        # prematurely.
        for path in self.LIBJARS or []:
            if os.path.isabs(expand_path(path)):
                paths_from_libjars.append(path)
            else:
                paths_from_libjars.append(os.path.join(script_dir, path))

        return combine_lists(paths_from_libjars, self.options.libjars)
Beispiel #2
0
 def _job_kwargs(self):
     """Keyword arguments to the runner class that can be specified
     by the job/launcher itself."""
     # use the most basic combiners; leave magic like resolving paths
     # and blanking out jobconf values to the runner
     return dict(
         # command-line has the final say on jobconf and libjars
         jobconf=combine_dicts(
             self.jobconf(), self.options.jobconf),
         libjars=combine_lists(
             self.libjars(), self.options.libjars),
         partitioner=self.partitioner(),
         sort_values=self.sort_values(),
         # TODO: should probably put self.options last below for consistency
         upload_archives=combine_lists(
             self.options.upload_archives, self.archives()),
         upload_dirs=combine_lists(
             self.options.upload_dirs, self.dirs()),
         upload_files=combine_lists(
             self.options.upload_files, self.files()),
     )
 def emr_job_runner_kwargs(self):
     args = super(DownloadToS3, self).emr_job_runner_kwargs()
     
     # set up AWS credentials on EMR instances
     access_key = os.environ['AWS_ACCESS_KEY_ID']
     secret = os.environ['AWS_SECRET_ACCESS_KEY']
     args['cmdenv'] = combine_dicts(args['cmdenv'], {'AWS_ACCESS_KEY_ID': access_key, 'AWS_SECRET_ACCESS_KEY': secret})
     
     # install pip, aws-cli, and boto
     args['bootstrap_cmds'] = combine_lists(args['bootstrap_cmds'],
                                            ['sysctl -w "net.ipv4.tcp_window_scaling=0"',
                                             'sudo apt-get install python-pip',
                                             'sudo pip install awscli',
                                             'sudo pip install boto'])
     return args
Beispiel #4
0
 def test_concatenation(self):
     assert_equal(combine_lists([1, 2], None, (3, 4)), [1, 2, 3, 4])
Beispiel #5
0
 def test_empty(self):
     assert_equal(combine_lists(), [])
Beispiel #6
0
 def test_mix_lists_and_scalars(self):
     self.assertEqual(combine_lists([1, 2], 3, (4, 5), 6),
                      [1, 2, 3, 4, 5, 6])
Beispiel #7
0
 def test_scalars(self):
     self.assertEqual(combine_lists(None, False, b'\x00', 42, 3.14),
                      [False, b'\x00', 42, 3.14])
Beispiel #8
0
 def test_strings(self):
     self.assertEqual(combine_lists('one', None, 'two', u'three'),
                      ['one', 'two', u'three'])
Beispiel #9
0
 def test_concatenation(self):
     self.assertEqual(combine_lists([1, 2], None, (3, 4)), [1, 2, 3, 4])
Beispiel #10
0
 def test_strings(self):
     self.assertEqual(combine_lists("one", None, "two", u"three"), ["one", "two", u"three"])
Beispiel #11
0
 def test_empty(self):
     self.assertEqual(combine_lists(), [])
Beispiel #12
0
 def test_dicts(self):
     self.assertEqual(combine_lists({1: 2}, None, {}), [{1: 2}, {}])
Beispiel #13
0
 def test_mix_lists_and_scalars(self):
     self.assertEqual(combine_lists([1, 2], 3, (4, 5), 6),
                      [1, 2, 3, 4, 5, 6])
Beispiel #14
0
 def test_scalars(self):
     self.assertEqual(combine_lists(None, False, b'\x00', 42, 3.14),
                      [False, b'\x00', 42, 3.14])
Beispiel #15
0
 def test_dicts(self):
     self.assertEqual(combine_lists({1: 2}, None, {}),
                      [{1: 2}, {}])
Beispiel #16
0
 def test_strings(self):
     self.assertEqual(combine_lists('one', None, 'two', u'three'),
                      ['one', 'two', u'three'])
Beispiel #17
0
 def test_empty(self):
     self.assertEqual(combine_lists(), [])
Beispiel #18
0
 def test_empty(self):
     assert_equal(combine_lists(), [])