Ejemplo n.º 1
0
 def stream_read(self, path, bytes_range=None):
     local_path, hdfs_path = self._init_path(path)
     self._create_local(local_path)
     nb_bytes = 0
     total_size = 0
     if not os.path.exists(local_path):
         self._create_local(local_path)
         hadoopy.get(hdfs_path, local_path)
     try:
         with open(local_path, mode='rb') as f:
             if bytes_range:
                 f.seek(bytes_range[0])
                 total_size = bytes_range[1] - bytes_range[0] + 1
             while True:
                 buf = None
                 if bytes_range:
                     # Bytes Range is enabled
                     buf_size = self.buffer_size
                     if nb_bytes + buf_size > total_size:
                         # We make sure we don't read out of the range
                         buf_size = total_size - nb_bytes
                     if buf_size > 0:
                         buf = f.read(buf_size)
                         nb_bytes += len(buf)
                     else:
                         # We're at the end of the range
                         buf = ''
                 else:
                     buf = f.read(self.buffer_size)
                 if not buf:
                     break
                 yield buf
     except IOError:
         raise exceptions.FileNotFoundError('%s is not there' % path)
Ejemplo n.º 2
0
    def get_content(self, path):
        local_path, hdfs_path = self._init_path(path)
        self._create_local(local_path)
        try:
            if not os.path.exists(local_path):
                    hadoopy.get(hdfs_path, local_path)
            with open(local_path, mode='rb') as f:
                d = f.read()
        except Exception as e:
            raise exceptions.FileNotFoundError('%s is not there (%s)'
                                               % (local_path, e.strerror))

        return d
Ejemplo n.º 3
0
def _record_to_fp(v):
    """Get data from a record 'v' and return a file object to it

    Args:
        v: record

    Returns:
        File object (either a NamedTemporaryFile or StringIO)
    """
    try:
        val = v['data']
        if not val:  # Empty data
            raise KeyError
        return StringIO.StringIO(val)
    except KeyError:
        try:
            fn = tempfile.NamedTemporaryFile().name
            hadoopy.get(v['hdfs_path'], fn)
            fp = _DelFile(fn)
            return fp
        except KeyError:
            raise ValueError("Can't find data or hdfs_path in record,"
                             " at least one is required.")
Ejemplo n.º 4
0
def copyFromHDFS(sourceMapfilePath,localDistPath):
    try:
        hadoopy.get(sourceMapfilePath,localDistPath)
    except Exception, e:
        logging.exception(e)
        return False