Beispiel #1
0
def move(src, dest, user=None):
  """
  Move or rename src to dest.
  """
  src_host, src_port, src_path = path.split(src, user)
  dest_host, dest_port, dest_path = path.split(dest, user)
  src_fs = hdfs(src_host, src_port, user)
  dest_fs = hdfs(dest_host, dest_port, user)
  try:
    retval = src_fs.move(src_path, dest_fs, dest_path)
    return retval
  finally:
    src_fs.close()
    dest_fs.close()
Beispiel #2
0
def move(src, dest, user=None):
    """
  Move or rename src to dest.
  """
    src_host, src_port, src_path = path.split(src, user)
    dest_host, dest_port, dest_path = path.split(dest, user)
    src_fs = hdfs(src_host, src_port, user)
    dest_fs = hdfs(dest_host, dest_port, user)
    try:
        retval = src_fs.move(src_path, dest_fs, dest_path)
        return retval
    finally:
        src_fs.close()
        dest_fs.close()
Beispiel #3
0
def abspath(hdfs_path, user=None, local=False):
  """
  Return an absolute path for ``hdfs_path``.

  The ``user`` arg is passed to :func:`split`. The ``local`` argument
  forces ``hdfs_path`` to be interpreted as an ordinary local path:

  .. code-block:: python

    >>> import os
    >>> os.chdir('/tmp')
    >>> import pydoop.hdfs.path as hpath
    >>> hpath.abspath('file:/tmp')
    'file:/tmp'
    >>> hpath.abspath('file:/tmp', local=True)
    'file:/tmp/file:/tmp'
  """
  if local:
    return 'file:%s' % os.path.abspath(hdfs_path)
  if _HdfsPathSplitter.PATTERN.match(hdfs_path):
    return hdfs_path
  hostname, port, path = split(hdfs_path, user=user)
  if hostname:
    fs = hdfs_fs.hdfs(hostname, port)
    apath = join("hdfs://%s:%s" % (fs.host, fs.port), path)
    fs.close()
  else:
    apath = "file:%s" % os.path.abspath(path)
  return apath
Beispiel #4
0
def abspath(hdfs_path, user=None, local=False):
    """
  Return an absolute path for ``hdfs_path``.

  The ``user`` arg is passed to :func:`split`. The ``local`` argument
  forces ``hdfs_path`` to be interpreted as an ordinary local path:

  .. code-block:: python

    >>> import os
    >>> os.chdir('/tmp')
    >>> import pydoop.hdfs.path as hpath
    >>> hpath.abspath('file:/tmp')
    'file:/tmp'
    >>> hpath.abspath('file:/tmp', local=True)
    'file:/tmp/file:/tmp'
  """
    if local:
        return 'file:%s' % os.path.abspath(hdfs_path)
    if _HdfsPathSplitter.PATTERN.match(hdfs_path):
        return hdfs_path
    hostname, port, path = split(hdfs_path, user=user)
    if hostname:
        fs = hdfs_fs.hdfs(hostname, port)
        apath = join("hdfs://%s:%s" % (fs.host, fs.port), path)
        fs.close()
    else:
        apath = "file:%s" % os.path.abspath(path)
    return apath
Beispiel #5
0
def mkdir(hdfs_path, user=None):
  """
  Create a directory and its parents as needed.
  """
  host, port, path_ = path.split(hdfs_path, user)
  fs = hdfs(host, port, user)
  retval = fs.create_directory(path_)
  fs.close()
  return retval
Beispiel #6
0
def exists(hdfs_path, user=None):
    """
  Return ``True`` if ``hdfs_path`` exists in the default HDFS, else ``False``.
  """
    hostname, port, path = split(hdfs_path, user=user)
    fs = hdfs_fs.hdfs(hostname, port)
    retval = fs.exists(path)
    fs.close()
    return retval
Beispiel #7
0
def rmr(hdfs_path, user=None):
    """
  Recursively remove files and directories.
  """
    host, port, path_ = path.split(hdfs_path, user)
    fs = hdfs(host, port, user)
    retval = fs.delete(path_)
    fs.close()
    return retval
Beispiel #8
0
def mkdir(hdfs_path, user=None):
    """
  Create a directory and its parents as needed.
  """
    host, port, path_ = path.split(hdfs_path, user)
    fs = hdfs(host, port, user)
    retval = fs.create_directory(path_)
    fs.close()
    return retval
Beispiel #9
0
def rmr(hdfs_path, user=None):
  """
  Recursively remove files and directories.
  """
  host, port, path_ = path.split(hdfs_path, user)
  fs = hdfs(host, port, user)
  retval = fs.delete(path_)
  fs.close()
  return retval
Beispiel #10
0
def exists(hdfs_path, user=None):
  """
  Return ``True`` if ``hdfs_path`` exists in the default HDFS, else ``False``.
  """
  hostname, port, path = split(hdfs_path, user=user)
  fs = hdfs_fs.hdfs(hostname, port)
  retval = fs.exists(path)
  fs.close()
  return retval
Beispiel #11
0
def cp(src_hdfs_path, dest_hdfs_path, **kwargs):
    """
  Copy the contents of ``src_hdfs_path`` to ``dest_hdfs_path``.

  Additional keyword arguments, if any, are handled like in
  :func:`open`.  If ``src_hdfs_path`` is a directory, its contents
  will be copied recursively.
  """
    src, dest = {}, {}
    try:
        for d, p in ((src, src_hdfs_path), (dest, dest_hdfs_path)):
            d["host"], d["port"], d["path"] = path.split(p)
            d["fs"] = hdfs(d["host"], d["port"])
        #--- does src exist? ---
        try:
            src["info"] = src["fs"].get_path_info(src["path"])
        except IOError:
            raise IOError("no such file or directory: %r" % (src["path"]))
        #--- src exists. Does dest exist? ---
        try:
            dest["info"] = dest["fs"].get_path_info(dest["path"])
        except IOError:
            if src["info"]["kind"] == "file":
                _cp_file(src["fs"], src["path"], dest["fs"], dest["path"],
                         **kwargs)
                return
            else:
                dest["fs"].create_directory(dest["path"])
                dest_hdfs_path = dest["fs"].get_path_info(dest["path"])["name"]
                for item in src["fs"].list_directory(src["path"]):
                    cp(item["name"], dest_hdfs_path, **kwargs)
                return
        #--- dest exists. Is it a file? ---
        if dest["info"]["kind"] == "file":
            raise IOError("%r already exists" % (dest["path"]))
        #--- dest is a directory ---
        dest["path"] = path.join(dest["path"], path.basename(src["path"]))
        if dest["fs"].exists(dest["path"]):
            raise IOError("%r already exists" % (dest["path"]))
        if src["info"]["kind"] == "file":
            _cp_file(src["fs"], src["path"], dest["fs"], dest["path"],
                     **kwargs)
        else:
            dest["fs"].create_directory(dest["path"])
            dest_hdfs_path = dest["fs"].get_path_info(dest["path"])["name"]
            for item in src["fs"].list_directory(src["path"]):
                cp(item["name"], dest_hdfs_path, **kwargs)
    finally:
        for d in src, dest:
            try:
                d["fs"].close()
            except KeyError:
                pass
Beispiel #12
0
def lsl(hdfs_path, user=None):
  """
  Return a list of dictionaries of file properties.

  If ``hdfs_path`` is a directory, each list item corresponds to a
  file or directory contained by it; if it is a file, there is only
  one item corresponding to the file itself.
  """
  host, port, path_ = path.split(hdfs_path, user)
  fs = hdfs(host, port, user)
  dir_list = fs.list_directory(path_)
  fs.close()
  return dir_list
Beispiel #13
0
def open(hdfs_path, mode="r", buff_size=0, replication=0, blocksize=0,
         readline_chunk_size=common.BUFSIZE, user=None):
  """
  Open a file, returning an :class:`hdfs_file` object.

  ``hdfs_path`` and ``user`` are passed to
  :func:`~path.split`, while the other args are
  passed to the :class:`hdfs_file` constructor.
  """
  host, port, path_ = path.split(hdfs_path, user)
  fs = hdfs(host, port, user)
  return fs.open_file(path_, mode, buff_size, replication, blocksize,
                      readline_chunk_size)
Beispiel #14
0
def cp(src_hdfs_path, dest_hdfs_path, **kwargs):
  """
  Copy the contents of ``src_hdfs_path`` to ``dest_hdfs_path``.

  Additional keyword arguments, if any, are handled like in
  :func:`open`.  If ``src_hdfs_path`` is a directory, its contents
  will be copied recursively.
  """
  src, dest = {}, {}
  try:
    for d, p in ((src, src_hdfs_path), (dest, dest_hdfs_path)):
      d["host"], d["port"], d["path"] = path.split(p)
      d["fs"] = hdfs(d["host"], d["port"])
    #--- does src exist? ---
    try:
      src["info"] = src["fs"].get_path_info(src["path"])
    except IOError:
      raise IOError("no such file or directory: %r" % (src["path"]))
    #--- src exists. Does dest exist? ---
    try:
      dest["info"] = dest["fs"].get_path_info(dest["path"])
    except IOError:
      if src["info"]["kind"] == "file":
        _cp_file(src["fs"], src["path"], dest["fs"], dest["path"], **kwargs)
        return
      else:
        dest["fs"].create_directory(dest["path"])
        dest_hdfs_path = dest["fs"].get_path_info(dest["path"])["name"]
        for item in src["fs"].list_directory(src["path"]):
          cp(item["name"], dest_hdfs_path, **kwargs)
        return
    #--- dest exists. Is it a file? ---
    if dest["info"]["kind"] == "file":
      raise IOError("%r already exists" % (dest["path"]))
    #--- dest is a directory ---
    dest["path"] = path.join(dest["path"], path.basename(src["path"]))
    if dest["fs"].exists(dest["path"]):
      raise IOError("%r already exists" % (dest["path"]))
    if src["info"]["kind"] == "file":
      _cp_file(src["fs"], src["path"], dest["fs"], dest["path"], **kwargs)
    else:
      dest["fs"].create_directory(dest["path"])
      dest_hdfs_path = dest["fs"].get_path_info(dest["path"])["name"]
      for item in src["fs"].list_directory(src["path"]):
        cp(item["name"], dest_hdfs_path, **kwargs)
  finally:
    for d in src, dest:
      try:
        d["fs"].close()
      except KeyError:
        pass
Beispiel #15
0
def kind(path, user=None):
    """
  Get the kind of item that the path references.

  Return None if the path doesn't exist.
  """
    hostname, port, path = split(path, user=user)
    fs = hdfs_fs.hdfs(hostname, port)
    try:
        return fs.get_path_info(path)['kind']
    except IOError:
        return None
    finally:
        fs.close()
Beispiel #16
0
def chmod(hdfs_path, mode, user=None):
  """
  Change file mode bits.

  :type path: string
  :param path: the path to the file or directory
  :type mode: int
  :param mode: the bitmask to set it to (e.g., 0777)
  """
  host, port, path_ = path.split(hdfs_path, user)
  fs = hdfs(host, port, user)
  retval = fs.chmod(path_, mode)
  fs.close()
  return retval
Beispiel #17
0
def kind(path, user=None):
  """
  Get the kind of item that the path references.

  Return None if the path doesn't exist.
  """
  hostname, port, path = split(path, user=user)
  fs = hdfs_fs.hdfs(hostname, port)
  try:
    return fs.get_path_info(path)['kind']
  except IOError:
    return None
  finally:
    fs.close()
Beispiel #18
0
def chmod(hdfs_path, mode, user=None):
    """
  Change file mode bits.

  :type path: string
  :param path: the path to the file or directory
  :type mode: int
  :param mode: the bitmask to set it to (e.g., 0777)
  """
    host, port, path_ = path.split(hdfs_path, user)
    fs = hdfs(host, port, user)
    retval = fs.chmod(path_, mode)
    fs.close()
    return retval
Beispiel #19
0
def open(hdfs_path,
         mode="r",
         buff_size=0,
         replication=0,
         blocksize=0,
         readline_chunk_size=common.BUFSIZE,
         user=None):
    """
  Open a file, returning an :class:`hdfs_file` object.

  ``hdfs_path`` and ``user`` are passed to
  :func:`~path.split`, while the other args are
  passed to the :class:`hdfs_file` constructor.
  """
    host, port, path_ = path.split(hdfs_path, user)
    fs = hdfs(host, port, user)
    return fs.open_file(path_, mode, buff_size, replication, blocksize,
                        readline_chunk_size)
Beispiel #20
0
def lsl(hdfs_path, user=None, recursive=False):
    """
  Return a list of dictionaries of file properties.

  If ``hdfs_path`` is a file, there is only one item corresponding to
  the file itself; if it is a directory and ``recursive`` is
  :obj:`False`, each list item corresponds to a file or directory
  contained by it; if it is a directory and ``recursive`` is
  :obj:`True`, the list contains one item for every file or directory
  in the tree rooted at ``hdfs_path``.
  """
    host, port, path_ = path.split(hdfs_path, user)
    fs = hdfs(host, port, user)
    if not recursive:
        dir_list = fs.list_directory(path_)
    else:
        treewalk = fs.walk(path_)
        top = treewalk.next()
        if top['kind'] == 'directory':
            dir_list = list(treewalk)
        else:
            dir_list = [top]
    fs.close()
    return dir_list
Beispiel #21
0
def lsl(hdfs_path, user=None, recursive=False):
  """
  Return a list of dictionaries of file properties.

  If ``hdfs_path`` is a file, there is only one item corresponding to
  the file itself; if it is a directory and ``recursive`` is
  :obj:`False`, each list item corresponds to a file or directory
  contained by it; if it is a directory and ``recursive`` is
  :obj:`True`, the list contains one item for every file or directory
  in the tree rooted at ``hdfs_path``.
  """
  host, port, path_ = path.split(hdfs_path, user)
  fs = hdfs(host, port, user)
  if not recursive:
    dir_list = fs.list_directory(path_)
  else:
    treewalk = fs.walk(path_)
    top = treewalk.next()
    if top['kind'] == 'directory':
      dir_list = list(treewalk)
    else:
      dir_list = [top]
  fs.close()
  return dir_list