Esempio n. 1
0
    def test_filesystem(self):
        hfile = Hfile(hostname, port, path, mode='w')
        hfile.close()

        fs = Hfilesystem(hostname, port)

        self.assertTrue(fs.exists(path))
        self.assertFalse(fs.exists(path + 'doesnotexist'))

        self.assertTrue(fs.rename(path, path + 'renamed'))

        self.assertTrue(fs.delete(path + 'renamed'))
        self.assertFalse(fs.delete(path))
Esempio n. 2
0
  def test_filesystem(self):
    hfile = Hfile(hostname, port, path, mode='w')
    hfile.close()

    fs = Hfilesystem(hostname, port)

    self.assertTrue(fs.exists(path))
    self.assertFalse(fs.exists(path + 'doesnotexist'))

    self.assertTrue(fs.rename(path, path + 'renamed'))

    self.assertTrue(fs.delete(path + 'renamed'))
    self.assertFalse(fs.delete(path))
Esempio n. 3
0
 def test_mkdir(self):
   fs = Hfilesystem(hostname, port)
   self.assertTrue(fs.mkdir(path))
   self.assertTrue(fs.delete(path))
Esempio n. 4
0
  hfile.write(line)

# And close them.
fh.close()
hfile.close()

# Let's read local_path into memory for comparison.
motd = open(local_path).read()

# Now let's read the data back
hfile = Hfile(hostname, port, hdfs_path)

# With an iterator
data_read_from_hdfs = ''
for line in hfile:
  data_read_from_hdfs += line
print motd == data_read_from_hdfs

# All at once
data_read_from_hdfs = hfile.read()
print motd == data_read_from_hdfs

hfile.close()

# Hopefully you have enough info to get started!

from hdfs.hfilesystem import Hfilesystem
hfs = Hfilesystem(hostname, port)
print hfs.getHosts(hdfs_path, 0, 1)

Esempio n. 5
0
 def test_mkdir(self):
     fs = Hfilesystem(hostname, port)
     self.assertTrue(fs.mkdir(path))
     self.assertTrue(fs.delete(path))
Esempio n. 6
0
 def __init__(self, hostname='speedy', port=8020):
   self.hfs = Hfilesystem(hostname, port)
   self.urlhead = 'hdfs://'  + hostname + ':' + str(port)
   self.urlheadlen = len(self.urlhead)
Esempio n. 7
0
class DFS(object):
  def __init__(self, hostname='speedy', port=8020):
    self.hfs = Hfilesystem(hostname, port)
    self.urlhead = 'hdfs://'  + hostname + ':' + str(port)
    self.urlheadlen = len(self.urlhead)
  
  def mkdir(self, dirname):
    if not self.exists(dirname):
      self.hfs.mkdir(dirname)
  
  def rmdir(self, dirname):
    if self.exists(dirname):
      self.hfs(dirname)
      
  def rm_rf(self, d):
    self.rmdir(d)
    
  def rename(self, srcpath, destpath):
    if os.path.exists(destpath):
      self.rm_rf(destpath)
    self.hfs.rename(srcpath, destpath)
    
  def is_done(self, dirname):
    return self.exists(os.path.join(dirname, DONE_TAG))
  
  def get_subdirs(self, dirname, checkdone = False):
    if not self.exists(dirname):
      return []
    return [sdir[sdir.rindex['/']+1:] for sdir in self.hfs.listdir(dirname) if self.hfs.isDir(os.path.join(dirname, sdir)) 
            if not checkdone or self.is_done(os.path.join(dirname, sdir))]
  
  def get_abs_subdirs(self, dirname, checkdone = False):
    if not self.exists(dirname):
      return []
    return [sdir[self.urlheadlen:] for sdir in self.hfs.listdir(dirname) if self.hfs.isDir(os.path.join(dirname, sdir)) 
            if not checkdone or self.is_done(os.path.join(dirname, sdir))]
    
  def get_unfinished_subdirs(self, dirname, jobname = '', checkdone = False):
    return [sdir for sdir in self.get_subdirs(dirname, checkdone) if
               not self.exists(os.path.join(dirname, sdir, jobname + FINISHED_TAG))]
    
  def get_buffered_subdirs(self, dirname, jobname = '', checkdone = False):
    return [sdir for sdir in self.get_subdirs(dirname, checkdone = False) if
               not self.exists(os.path.join(dirname, sdir, jobname + FINISHED_TAG))
               and not self.exists(os.path.join(dirname, sdir, jobname + STARTED_TAG))]
    
  def get_unfinished_abs_subdirs(self, dirname, jobname = '', checkdone = False):
    return [sdir for sdir in self.get_abs_subdirs(dirname, checkdone) if
               not self.exists(os.path.join(dirname, sdir, jobname + FINISHED_TAG))]
    
  def get_buffered_abs_subdirs(self, dirname, jobname = '', checkdone = False):
    return [os.path.join(dirname, sdir) for sdir in self.get_abs_subdirs(dirname, checkdone = False) if
               not self.exists(os.path.join(dirname, sdir, jobname + FINISHED_TAG))
               and not self.exists(os.path.join(dirname, sdir, jobname + STARTED_TAG))]
    
  def get_subfiles(self, dirname):
    if not self.exists(dirname):
      return []
    return [sfile for sfile in self.hfs.listdir(dirname) if self.hfs.isFile(os.path.join(dirname, sfile)) ]
  
  def get_abs_subfiles(self, dirname):
    return [os.path.join(dirname, sfile) for sfile in self.get_subfiles(dirname) ]
  

  def get_subdir_num(self, dirname):
    return len(self.get_subdirs(dirname))
  
  def get_unfinished_subdir_num(self, dirname, jobname = '', checkdone = False):
    return len(self.get_unfinished_subdirs(dirname, jobname, checkdone = False))
  
  def get_buffered_subdir_num(self, dirname, jobname = '', checkdone = False):
    return len(self.get_buffered_subdirs(dirname, jobname, checkdone = False))
  
  def get_subfile_num(self, dirname):
    return len(self.get_subfiles(dirname))
  
  # recursive
  def get_dir_size(self, dirname):
    size = 0L
    if self.exists(dirname):
      for node in self.hfs.listdir(dirname):
        if self.hfs.isFile(node):
          size += self.hfs.stat(node).mSize
        else:
          size += self.get_dir_size(node)
      
    return size
  
  # recursive
  def get_unfinished_dir_size(self, dirname, jobname = ''):
    size = 0L
    if self.exists(dirname) and not self.exists(os.path.join(dirname, jobname + FINISHED_TAG)):
      for node in self.hfs.listdir(dirname):
        if self.hfs.isFile(node):
          size += self.hfs.stat(node).mSize
        else:
          size += self.get_dir_size(node)
      
    return size
  
  def get_buffered_dir_size(self, dirname, jobname = ''):
    size = 0L
    if self.exists(dirname) and not self.exists(os.path.join(dirname, jobname + FINISHED_TAG)):
      for node in self.hfs.listdir(dirname):
        if self.hfs.isFile(node):
          size += self.hfs.stat(node).mSize
        else:
          size += self.get_dir_size(node)
      
    return size
  
  def exists(self, pathname):
    return self.hfs.exists(pathname)