コード例 #1
0
ファイル: hadut.py プロジェクト: tivvit/pydoop
def run_pipes(
    executable,
    input_path,
    output_path,
    more_args=None,
    properties=None,
    force_pydoop_submitter=False,
    hadoop_conf_dir=None,
    logger=None,
    keep_streams=False,
):
    """
    Run a pipes command.

    ``more_args`` (after setting input/output path) and ``properties``
    are passed to :func:`run_cmd`.

    If not specified otherwise, this function sets the properties
    ``hadoop.pipes.java.recordreader`` and ``hadoop.pipes.java.recordwriter``
    to ``"true"``.

    This function works around a bug in Hadoop pipes that affects
    versions of Hadoop with security when the local file system is
    used as the default FS (no HDFS); see
    https://issues.apache.org/jira/browse/MAPREDUCE-4000.  In those
    set-ups, the function uses Pydoop's own pipes submitter
    application.  You can force the use of Pydoop's submitter by
    passing the argument force_pydoop_submitter=True.
    """
    if logger is None:
        logger = utils.NullLogger()
    if not hdfs.path.exists(executable):
        raise IOError("executable %s not found" % executable)
    if not hdfs.path.exists(input_path) and not (set(input_path) & GLOB_CHARS):
        raise IOError("input path %s not found" % input_path)
    if properties is None:
        properties = {}
    properties.setdefault("hadoop.pipes.java.recordreader", "true")
    properties.setdefault("hadoop.pipes.java.recordwriter", "true")
    if force_pydoop_submitter:
        use_pydoop_submit = True
    else:
        use_pydoop_submit = False
        ver = pydoop.hadoop_version_info()
        if ver.has_security():
            if ver.is_cdh_mrv2() and hdfs.default_is_local():
                raise RuntimeError("mrv2 on local fs not supported yet")
            use_pydoop_submit = hdfs.default_is_local()
    args = ["-program", executable, "-input", input_path, "-output", output_path]
    if more_args is not None:
        args.extend(more_args)
    if use_pydoop_submit:
        submitter = "it.crs4.pydoop.pipes.Submitter"
        pydoop_jar = pydoop.jar_path()
        args.extend(("-libjars", pydoop_jar))
        return run_class(submitter, args, properties, classpath=pydoop_jar, logger=logger, keep_streams=keep_streams)
    else:
        return run_cmd(
            "pipes", args, properties, hadoop_conf_dir=hadoop_conf_dir, logger=logger, keep_streams=keep_streams
        )
コード例 #2
0
 def stat_on_local(self):
     wd_ = tempfile.mkdtemp(prefix='pydoop_', suffix=UNI_CHR)
     p_ = os.path.join(wd_, make_random_str())
     if hdfs.default_is_local():
         wd, p = wd_, p_
         host = "default"
     else:
         wd, p = ('file:%s' % _ for _ in (wd_, p_))
         host = ""
     fs = hdfs.hdfs(host, 0)
     with fs.open_file(p_, 'w') as fo:
         fo.write(make_random_str())
     info = fs.get_path_info(p_)
     fs.close()
     s = hdfs.path.stat(p)
     os_s = os.stat(p_)
     for n in dir(s):
         if n.startswith('st_'):
             try:
                 exp_v = getattr(os_s, n)
             except AttributeError:
                 try:
                     exp_v = info[self.NMAP[n]]
                 except KeyError:
                     continue
                 self.assertEqual(getattr(s, n), exp_v)
     self.__check_extra_args(s, info)
     self.__check_wrapper_funcs(p)
     hdfs.rmr(wd)
コード例 #3
0
ファイル: test_path.py プロジェクト: onlynone/pydoop
 def without_user(self):
   p = 'foo/bar'
   abs_p = hdfs.path.abspath(p, user=None, local=False)
   if hdfs.default_is_local():
     self.assertEqual(abs_p, '%s%s' % (self.root, os.path.abspath(p)))
   else:
     self.assertEqual(abs_p, '%s/user/%s/%s' % (self.root, DEFAULT_USER, p))
コード例 #4
0
ファイル: test_path.py プロジェクト: ilveroluca/pydoop
 def with_user(self):
   p = 'foo/bar'
   abs_p = hdfs.path.abspath(p, user="******", local=False)
   if hdfs.default_is_local():
     self.assertEqual(abs_p, '%s%s' % (self.root, os.path.abspath(p)))
   else:
     self.assertEqual(abs_p, '%s/user/pydoop/%s' % (self.root, p))
コード例 #5
0
ファイル: test_path.py プロジェクト: ilveroluca/pydoop
 def setUp(self):
   if hdfs.default_is_local():
     self.root = "file:"
   else:
     fs = hdfs.hdfs("default", 0)
     self.root = "hdfs://%s:%s" % (fs.host, fs.port)
     fs.close()
コード例 #6
0
ファイル: test_path.py プロジェクト: ilveroluca/pydoop
 def without_user(self):
   p = 'foo/bar'
   abs_p = hdfs.path.abspath(p, user=None, local=False)
   if hdfs.default_is_local():
     self.assertEqual(abs_p, '%s%s' % (self.root, os.path.abspath(p)))
   else:
     self.assertEqual(abs_p, '%s/user/%s/%s' % (self.root, DEFAULT_USER, p))
コード例 #7
0
ファイル: test_path.py プロジェクト: kikkomep/pydoop
 def good(self):
     cases = [
         ('hdfs://localhost:9000/', ('localhost', 9000, '/')),
         ('hdfs://localhost:9000/a/b', ('localhost', 9000, '/a/b')),
         ('hdfs://localhost/a/b', ('localhost', DEFAULT_PORT, '/a/b')),
         ('hdfs:///a/b', ('default', 0, '/a/b')),
         ('hdfs:/', ('default', 0, '/')),
         ('file:///a/b', ('', 0, '/a/b')),
         ('file:/a/b', ('', 0, '/a/b')),
         ('file:///a', ('', 0, '/a')),
         ('file:/a', ('', 0, '/a')),
         ('file://temp/foo.txt', ('', 0, 'temp/foo.txt')),
         ('file://temp', ('', 0, 'temp')),
     ]
     if hdfs.default_is_local():
         cases.extend([
             ('///a/b', ('', 0, '/a/b')),
             ('/a/b', ('', 0, '/a/b')),
             ('a/b', ('', 0, 'a/b')),
         ])
     else:
         cases.extend([
             ('///a/b', ('default', 0, '/a/b')),
             ('/a/b', ('default', 0, '/a/b')),
             ('a/b', ('default', 0, '/user/%s/a/b' % DEFAULT_USER)),
         ])
     for p, r in cases:
         self.assertEqual(hdfs.path.split(p), r)
     for p, r in cases[1:]:
         self.assertEqual(hdfs.path.split(p+UNI_CHR), uni_last(r))
コード例 #8
0
 def with_user(self):
     abs_p = hdfs.path.abspath(self.p, user="******", local=False)
     if hdfs.default_is_local():
         self.assertEqual(abs_p,
                          '%s%s' % (self.root, os.path.abspath(self.p)))
     else:
         self.assertEqual(abs_p, '%s/user/pydoop/%s' % (self.root, self.p))
コード例 #9
0
ファイル: test_path.py プロジェクト: kikkomep/pydoop
 def stat_on_local(self):
     wd_ = tempfile.mkdtemp(prefix='pydoop_', suffix=UNI_CHR)
     p_ = os.path.join(wd_, make_random_str())
     if hdfs.default_is_local():
         wd, p = wd_, p_
         host = "default"
     else:
         wd, p = ('file:%s' % _ for _ in (wd_, p_))
         host = ""
     fs = hdfs.hdfs(host, 0)
     with fs.open_file(p_, 'w') as fo:
         fo.write(make_random_str())
     info = fs.get_path_info(p_)
     fs.close()
     s = hdfs.path.stat(p)
     os_s = os.stat(p_)
     for n in dir(s):
         if n.startswith('st_'):
             try:
                 exp_v = getattr(os_s, n)
             except AttributeError:
                 try:
                     exp_v = info[self.NMAP[n]]
                 except KeyError:
                     continue
                 self.assertEqual(getattr(s, n), exp_v)
     self.__check_extra_args(s, info)
     self.__check_wrapper_funcs(p)
     hdfs.rmr(wd)
コード例 #10
0
 def good(self):
     cases = [
         ('hdfs://localhost:9000/', ('localhost', 9000, '/')),
         ('hdfs://localhost:9000/a/b', ('localhost', 9000, '/a/b')),
         ('hdfs://localhost/a/b', ('localhost', DEFAULT_PORT, '/a/b')),
         ('hdfs:///a/b', ('default', 0, '/a/b')),
         ('hdfs:/', ('default', 0, '/')),
         ('file:///a/b', ('', 0, '/a/b')),
         ('file:/a/b', ('', 0, '/a/b')),
         ('file:///a', ('', 0, '/a')),
         ('file:/a', ('', 0, '/a')),
         ('file://temp/foo.txt', ('', 0, 'temp/foo.txt')),
         ('file://temp', ('', 0, 'temp')),
     ]
     if hdfs.default_is_local():
         cases.extend([
             ('///a/b', ('', 0, '/a/b')),
             ('/a/b', ('', 0, '/a/b')),
             ('a/b', ('', 0, 'a/b')),
         ])
     else:
         cases.extend([
             ('///a/b', ('default', 0, '/a/b')),
             ('/a/b', ('default', 0, '/a/b')),
             ('a/b', ('default', 0, '/user/%s/a/b' % DEFAULT_USER)),
         ])
     for p, r in cases:
         self.assertEqual(hdfs.path.split(p), r)
     for p, r in cases[1:]:
         self.assertEqual(hdfs.path.split(p + UNI_CHR), uni_last(r))
コード例 #11
0
ファイル: test_path.py プロジェクト: onlynone/pydoop
 def setUp(self):
   if hdfs.default_is_local():
     self.root = "file:"
   else:
     fs = hdfs.hdfs("default", 0)
     self.root = "hdfs://%s:%s" % (fs.host, fs.port)
     fs.close()
コード例 #12
0
 def capacity(self):
     fs = hdfs.hdfs("", 0)
     self.assertRaises(RuntimeError, fs.capacity)
     fs.close()
     if not hdfs.default_is_local():
         fs = hdfs.hdfs("default", 0)
         cap = fs.capacity()
         self.assertGreaterEqual(cap, 0)
コード例 #13
0
ファイル: test_hdfs.py プロジェクト: kikkomep/pydoop
 def capacity(self):
     fs = hdfs.hdfs("", 0)
     self.assertRaises(RuntimeError, fs.capacity)
     fs.close()
     if not hdfs.default_is_local():
         fs = hdfs.hdfs("default", 0)
         cap = fs.capacity()
         self.assertGreaterEqual(cap, 0)
コード例 #14
0
 def expanduser(self):
     for pre in '~', '~%s' % DEFAULT_USER:
         for rest in '', '/d':
             p = '%s%s' % (pre, rest)
             if hdfs.default_is_local():
                 self.assertEqual(hdfs.path.expanduser(p),
                                  os.path.expanduser(p))
             else:
                 exp_res = '/user/%s%s' % (DEFAULT_USER, rest)
                 self.assertEqual(hdfs.path.expanduser(p), exp_res)
コード例 #15
0
ファイル: test_path.py プロジェクト: kikkomep/pydoop
 def expanduser(self):
     for pre in '~', '~%s' % DEFAULT_USER:
         for rest in '', '/d':
             p = '%s%s' % (pre, rest)
             if hdfs.default_is_local():
                 self.assertEqual(
                     hdfs.path.expanduser(p), os.path.expanduser(p)
                 )
             else:
                 exp_res = '/user/%s%s' % (DEFAULT_USER, rest)
                 self.assertEqual(hdfs.path.expanduser(p), exp_res)
コード例 #16
0
ファイル: test_path.py プロジェクト: ilveroluca/pydoop
 def bad(self):
   cases = [
     'ftp://localhost:9000/',             # bad scheme
     'hdfs://localhost:spam/',            # port is not an int
     'hdfs://localhost:9000',             # path part is empty
     'hdfs://localhost:9000/a:b',         # colon outside netloc
     ]
   if not hdfs.default_is_local():
     cases.append('/localhost:9000/a/b')  # colon outside netloc
   for p in cases:
     self.assertRaises(ValueError, hdfs.path.split, p)
コード例 #17
0
ファイル: test_path.py プロジェクト: ilveroluca/pydoop
 def good_with_user(self):
   if hdfs.default_is_local():
     cases = [('a/b', u, ('', 0, 'a/b')) for u in None, DEFAULT_USER, 'foo']
   else:
     cases = [
       ('a/b', None, ('default', 0, '/user/%s/a/b' % DEFAULT_USER)),
       ('a/b', DEFAULT_USER, ('default', 0, '/user/%s/a/b' % DEFAULT_USER)),
       ('a/b', 'foo', ('default', 0, '/user/foo/a/b')),
       ]
   for p, u, r in cases:
     self.assertEqual(hdfs.path.split(p, u), r)
コード例 #18
0
ファイル: test_path.py プロジェクト: onlynone/pydoop
 def good_with_user(self):
   if hdfs.default_is_local():
     cases = [('a/b', u, ('', 0, 'a/b')) for u in None, DEFAULT_USER, 'foo']
   else:
     cases = [
       ('a/b', None, ('default', 0, '/user/%s/a/b' % DEFAULT_USER)),
       ('a/b', DEFAULT_USER, ('default', 0, '/user/%s/a/b' % DEFAULT_USER)),
       ('a/b', 'foo', ('default', 0, '/user/foo/a/b')),
       ]
   for p, u, r in cases:
     self.assertEqual(hdfs.path.split(p, u), r)
コード例 #19
0
ファイル: test_path.py プロジェクト: onlynone/pydoop
 def bad(self):
   cases = [
     'ftp://localhost:9000/',             # bad scheme
     'hdfs://localhost:spam/',            # port is not an int
     'hdfs://localhost:9000',             # path part is empty
     'hdfs://localhost:9000/a:b',         # colon outside netloc
     ]
   if not hdfs.default_is_local():
     cases.append('/localhost:9000/a/b')  # colon outside netloc
   for p in cases:
     self.assertRaises(ValueError, hdfs.path.split, p)
コード例 #20
0
 def get_hosts(self):
     if hdfs.default_is_local():
         # only run on HDFS
         return
     hdfs.dump(self.data, self.hdfs_paths[0], mode="wb")
     fs = hdfs.hdfs("default", 0)
     hs = fs.get_hosts(self.hdfs_paths[0], 0, 10)
     self.assertTrue(len(hs) > 0)
     self.assertRaises(ValueError, fs.get_hosts, self.hdfs_paths[0], -10,
                       10)
     self.assertRaises(ValueError, fs.get_hosts, self.hdfs_paths[0], 0, -10)
コード例 #21
0
ファイル: test_hdfs.py プロジェクト: kikkomep/pydoop
 def get_hosts(self):
     if hdfs.default_is_local():
         # only run on HDFS
         return
     hdfs.dump(self.data, self.hdfs_paths[0])
     fs = hdfs.hdfs("default", 0)
     hs = fs.get_hosts(self.hdfs_paths[0], 0, 10)
     self.assertTrue(len(hs) > 0)
     self.assertRaises(
         ValueError, fs.get_hosts, self.hdfs_paths[0], -10, 10
     )
     self.assertRaises(ValueError, fs.get_hosts, self.hdfs_paths[0], 0, -10)
コード例 #22
0
 def setUp(self):
     self.hp_cases = [("default", 0)]
     self.u_cases = [None, CURRENT_USER]
     if not hdfs.default_is_local():
         self.hp_cases.append((u.HDFS_HOST, u.HDFS_PORT))
         self.u_cases.append("nobody")
         try:
             hdfs_ip = socket.gethostbyname(u.HDFS_HOST)
         except socket.gaierror:
             pass
         else:
             self.hp_cases.append((hdfs_ip, u.HDFS_PORT))
コード例 #23
0
ファイル: test_hdfs_fs.py プロジェクト: kmatzen/pydoop
 def setUp(self):
     self.hp_cases = [("default", 0)]
     self.u_cases = [None, CURRENT_USER]
     if not hdfs.default_is_local():
         self.hp_cases.append((u.HDFS_HOST, u.HDFS_PORT))
         self.u_cases.append("nobody")
         try:
             hdfs_ip = socket.gethostbyname(u.HDFS_HOST)
         except socket.gaierror:
             pass
         else:
             self.hp_cases.append((hdfs_ip, u.HDFS_PORT))
コード例 #24
0
 def bad(self):
     cases = [
         '',  # not allowed in the Java API
         'hdfs:',  # no scheme-specific part
         'hdfs://',  # path part is empty
         'ftp://localhost:9000/',  # bad scheme
         'hdfs://localhost:spam/',  # port is not an int
         'hdfs://localhost:9000',  # path part is empty
         'hdfs://localhost:9000/a:b',  # colon outside netloc
         '//localhost:9000/a/b',  # null scheme
     ]
     if not hdfs.default_is_local():
         cases.append('/localhost:9000/a/b')  # colon outside netloc
     for p in cases:
         self.assertRaises(ValueError, hdfs.path.split, p)
コード例 #25
0
ファイル: test_path.py プロジェクト: kikkomep/pydoop
 def bad(self):
     cases = [
         '',                                  # not allowed in the Java API
         'hdfs:',                             # no scheme-specific part
         'hdfs://',                           # path part is empty
         'ftp://localhost:9000/',             # bad scheme
         'hdfs://localhost:spam/',            # port is not an int
         'hdfs://localhost:9000',             # path part is empty
         'hdfs://localhost:9000/a:b',         # colon outside netloc
         '//localhost:9000/a/b',              # null scheme
     ]
     if not hdfs.default_is_local():
         cases.append('/localhost:9000/a/b')  # colon outside netloc
     for p in cases:
         self.assertRaises(ValueError, hdfs.path.split, p)
コード例 #26
0
ファイル: test_hdfs.py プロジェクト: ZEMUSHKA/pydoop
 def setUp(self):
   wd = tempfile.mkdtemp()
   wd_bn = os.path.basename(wd)
   self.local_wd = "file:%s" % wd
   fs = hdfs.hdfs("default", 0)
   fs.create_directory(wd_bn)
   self.hdfs_wd = fs.get_path_info(wd_bn)["name"]
   fs.close()
   basenames = ["test_path_%d" % i for i in xrange(2)]
   self.local_paths = ["%s/%s" % (self.local_wd, bn) for bn in basenames]
   self.hdfs_paths = ["%s/%s" % (self.hdfs_wd, bn) for bn in basenames]
   self.data = make_random_data(4*BUFSIZE + BUFSIZE/2)
   for path in self.local_paths:
     self.assertTrue(path.startswith("file:"))
   for path in self.hdfs_paths:
     if not hdfs.default_is_local():
       self.assertTrue(path.startswith("hdfs:"))
コード例 #27
0
ファイル: test_hdfs.py プロジェクト: onlynone/pydoop
 def setUp(self):
     wd = tempfile.mkdtemp()
     wd_bn = os.path.basename(wd)
     self.local_wd = "file:%s" % wd
     fs = hdfs.hdfs("default", 0)
     fs.create_directory(wd_bn)
     self.hdfs_wd = fs.get_path_info(wd_bn)["name"]
     fs.close()
     basenames = ["test_path_%d" % i for i in xrange(2)]
     self.local_paths = ["%s/%s" % (self.local_wd, bn) for bn in basenames]
     self.hdfs_paths = ["%s/%s" % (self.hdfs_wd, bn) for bn in basenames]
     self.data = make_random_data(4 * BUFSIZE + BUFSIZE / 2)
     for path in self.local_paths:
         self.assertTrue(path.startswith("file:"))
     for path in self.hdfs_paths:
         if not hdfs.default_is_local():
             self.assertTrue(path.startswith("hdfs:"))
コード例 #28
0
ファイル: test_path.py プロジェクト: kikkomep/pydoop
 def stat(self):
     if hdfs.default_is_local():
         return
     bn = '%s%s' % (make_random_str(), UNI_CHR)
     fn = '/user/%s/%s' % (DEFAULT_USER, bn)
     fs = hdfs.hdfs("default", 0)
     p = "hdfs://%s:%s%s" % (fs.host, fs.port, fn)
     with fs.open_file(fn, 'w') as fo:
         fo.write(make_random_str())
     info = fs.get_path_info(fn)
     fs.close()
     s = hdfs.path.stat(p)
     for n1, n2 in self.NMAP.iteritems():
         attr = getattr(s, n1, None)
         self.assertFalse(attr is None)
         self.assertEqual(attr, info[n2])
     self.__check_extra_args(s, info)
     self.__check_wrapper_funcs(p)
     hdfs.rmr(p)
コード例 #29
0
 def stat(self):
     if hdfs.default_is_local():
         return
     bn = '%s%s' % (make_random_str(), UNI_CHR)
     fn = '/user/%s/%s' % (DEFAULT_USER, bn)
     fs = hdfs.hdfs("default", 0)
     p = "hdfs://%s:%s%s" % (fs.host, fs.port, fn)
     with fs.open_file(fn, 'w') as fo:
         fo.write(make_random_str())
     info = fs.get_path_info(fn)
     fs.close()
     s = hdfs.path.stat(p)
     for n1, n2 in self.NMAP.iteritems():
         attr = getattr(s, n1, None)
         self.assertFalse(attr is None)
         self.assertEqual(attr, info[n2])
     self.__check_extra_args(s, info)
     self.__check_wrapper_funcs(p)
     hdfs.rmr(p)
コード例 #30
0
def suite():
    suite_ = unittest.TestSuite()
    suite_.addTest(TestConnection('connect'))
    suite_.addTest(TestConnection('cache'))
    tests = common_tests()
    if not hdfs.default_is_local():
        tests.extend([
            'capacity',
            'default_block_size',
            'used',
            'chown',
            'utime',
            'block_size',
            'replication',
            'set_replication',
            'readline_block_boundary',
            'get_hosts',
        ])
    for t in tests:
        suite_.addTest(TestHDFS(t))
    return suite_
コード例 #31
0
def suite():
    suite_ = unittest.TestSuite()
    suite_.addTest(TestConnection('connect'))
    suite_.addTest(TestConnection('cache'))
    tests = common_tests()
    if not hdfs.default_is_local():
        tests.extend([
            'capacity',
            'default_block_size',
            'used',
            'chown',
            'utime',
            'block_size',
            'replication',
            'set_replication',
            'readline_block_boundary',
            'get_hosts',
        ])
    for t in tests:
        suite_.addTest(TestHDFS(t))
    return suite_
コード例 #32
0
ファイル: test_hdfs_fs.py プロジェクト: kmatzen/pydoop
def suite():
    suite = unittest.TestSuite()
    suite.addTest(TestConnection("connect"))
    suite.addTest(TestConnection("cache"))
    tests = common_tests()
    if not hdfs.default_is_local():
        tests.extend(
            [
                "capacity",
                "default_block_size",
                "used",
                "chown",
                "utime",
                "block_size",
                "replication",
                "set_replication",
                "readline_block_boundary",
                "get_hosts",
            ]
        )
    for t in tests:
        suite.addTest(TestHDFS(t))
    return suite
コード例 #33
0
 def samefile_user(self):
     if not hdfs.default_is_local():
         self.assertTrue(hdfs.path.samefile('fn', '/user/u/fn', user='******'))
コード例 #34
0
ファイル: test_path.py プロジェクト: kikkomep/pydoop
 def samefile_user(self):
     if not hdfs.default_is_local():
         self.assertTrue(hdfs.path.samefile('fn', '/user/u/fn', user='******'))
コード例 #35
0
ファイル: test_support.py プロジェクト: kikkomep/pydoop
def get_wd_prefix(base="pydoop_"):
    if default_is_local():
        return os.path.join(tempfile.gettempdir(), "pydoop_")
    else:
        return base
コード例 #36
0
def get_wd_prefix(base="pydoop_"):
    if default_is_local():
        return os.path.join(tempfile.gettempdir(), "pydoop_")
    else:
        return base
コード例 #37
0
ファイル: hadut.py プロジェクト: onlynone/pydoop
def run_pipes(executable,
              input_path,
              output_path,
              more_args=None,
              properties=None,
              force_pydoop_submitter=False,
              hadoop_conf_dir=None,
              logger=None):
    """
  Run a pipes command.

  ``more_args`` (after setting input/output path) and ``properties``
  are passed to :func:`run_cmd`.

  If not specified otherwise, this function sets the properties
  hadoop.pipes.java.recordreader and hadoop.pipes.java.recordwriter to 'true'.

  This function works around a bug in Hadoop pipes that affects versions of
  Hadoop with security when the local file system is used as the default FS
  (no HDFS); see https://issues.apache.org/jira/browse/MAPREDUCE-4000.
  In those set-ups, the function uses Pydoop's own pipes submitter application.
  You can force the use of Pydoop's submitter by passing the argument
  force_pydoop_submitter=True.
  """
    if logger is None:
        logger = utils.NullLogger()
    if not hdfs.path.exists(executable):
        raise IOError("executable %s not found" % executable)
    if not hdfs.path.exists(input_path) and not (set(input_path) & GLOB_CHARS):
        raise IOError("input path %s not found" % input_path)
    if properties is None:
        properties = {}
    properties.setdefault('hadoop.pipes.java.recordreader', 'true')
    properties.setdefault('hadoop.pipes.java.recordwriter', 'true')
    if force_pydoop_submitter:
        use_pydoop_submit = True
    else:
        use_pydoop_submit = False
        ver = pydoop.hadoop_version_info()
        if ver.has_security():
            if ver.cdh >= (4, 0,
                           0) and not ver.ext and hdfs.default_is_local():
                raise RuntimeError(
                    "mrv2 on local fs not supported yet")  # FIXME
            use_pydoop_submit = hdfs.default_is_local()
    args = [
        "-program", executable, "-input", input_path, "-output", output_path
    ]
    if more_args is not None:
        args.extend(more_args)
    if use_pydoop_submit:
        submitter = "it.crs4.pydoop.pipes.Submitter"
        pydoop_jar = pydoop.jar_path()
        args.extend(("-libjars", pydoop_jar))
        return run_class(submitter,
                         args,
                         properties,
                         classpath=pydoop_jar,
                         logger=logger)
    else:
        return run_cmd("pipes",
                       args,
                       properties,
                       hadoop_conf_dir=hadoop_conf_dir,
                       logger=logger)