def stream_read(self, path, bytes_range=None): local_path, hdfs_path = self._init_path(path) self._create_local(local_path) nb_bytes = 0 total_size = 0 if not os.path.exists(local_path): self._create_local(local_path) hadoopy.get(hdfs_path, local_path) try: with open(local_path, mode='rb') as f: if bytes_range: f.seek(bytes_range[0]) total_size = bytes_range[1] - bytes_range[0] + 1 while True: buf = None if bytes_range: # Bytes Range is enabled buf_size = self.buffer_size if nb_bytes + buf_size > total_size: # We make sure we don't read out of the range buf_size = total_size - nb_bytes if buf_size > 0: buf = f.read(buf_size) nb_bytes += len(buf) else: # We're at the end of the range buf = '' else: buf = f.read(self.buffer_size) if not buf: break yield buf except IOError: raise exceptions.FileNotFoundError('%s is not there' % path)
def get_content(self, path): local_path, hdfs_path = self._init_path(path) self._create_local(local_path) try: if not os.path.exists(local_path): hadoopy.get(hdfs_path, local_path) with open(local_path, mode='rb') as f: d = f.read() except Exception as e: raise exceptions.FileNotFoundError('%s is not there (%s)' % (local_path, e.strerror)) return d
def _record_to_fp(v): """Get data from a record 'v' and return a file object to it Args: v: record Returns: File object (either a NamedTemporaryFile or StringIO) """ try: val = v['data'] if not val: # Empty data raise KeyError return StringIO.StringIO(val) except KeyError: try: fn = tempfile.NamedTemporaryFile().name hadoopy.get(v['hdfs_path'], fn) fp = _DelFile(fn) return fp except KeyError: raise ValueError("Can't find data or hdfs_path in record," " at least one is required.")
def copyFromHDFS(sourceMapfilePath,localDistPath): try: hadoopy.get(sourceMapfilePath,localDistPath) except Exception, e: logging.exception(e) return False