Example #1
0
    def _put(self, source, identity):
        '''Put the source to the remote, creating a compressed version if
        it is not originally compressed'''
        
        from databundles.util import bundle_file_type
        import gzip
        import os, tempfile, uuid
        from databundles.identity import ObjectNumber, DatasetNumber, PartitionNumber
        
        id_ = identity.id_
        
        on = ObjectNumber.parse(id_)
 
        if not on:
            raise ValueError("Failed to parse id: '{}'".format(id_))
 
        if not  isinstance(on, (DatasetNumber, PartitionNumber)):
            raise ValueError("Object number '{}' is neither for a dataset nor partition".format(id_))
 
        type_ = bundle_file_type(source)

        if  type_ == 'sqlite' or type_ == 'hdf':
            import shutil
            # If it is a plain sqlite file, compress it before sending it. 
            try:
                cf = os.path.join(tempfile.gettempdir(),str(uuid.uuid4()))
                
                with gzip.open(cf, 'wb') as out_f:
                    try:
                        shutil.copyfileobj(source, out_f)
                    except AttributeError:
                        with open(source) as in_f:
                            shutil.copyfileobj(in_f, out_f)
                        
             
                with open(cf) as sf_:
                    if isinstance(on,DatasetNumber ):
                        response =  self.remote.datasets(id_).put(sf_)
                    else:
                        response =  self.remote.datasets(str(on.dataset)).partitions(str(on)).put(sf_)

            finally:
                if os.path.exists(cf):
                    os.remove(cf)
       
        elif type_ == 'gzip':
            # the file is already gziped, so nothing to do. 

            if isinstance(on,DatasetNumber ):
                response =  self.remote.datasets(id_).put(source)
            else:
                response =  self.remote.datasets(str(on.dataset)).partitions(str(on)).put(source)
            
        else:
            raise Exception("Bad file for id {}  got type: {} ".format(id_, type_))

        raise_for_status(response)
        
        return response
Example #2
0
    def _put(self, id_,source):
        '''Put the source to the remote, creating a compressed version if
        it is not originally compressed'''
        
        from databundles.util import bundle_file_type
        import gzip
        import os, tempfile, uuid
        from databundles.identity import ObjectNumber, DatasetNumber, PartitionNumber
        
        on = ObjectNumber.parse(id_)
 
        if not on:
            raise ValueError("Failed to parse id: '{}'".format(id_))
 
        if not  isinstance(on, (DatasetNumber, PartitionNumber)):
            raise ValueError("Object number '{}' is neither for a dataset nor partition".format(id_))
 
        type_ = bundle_file_type(source)

        if  type_ == 'sqlite':
            # If it is a plain sqlite file, compress it before sending it. 
            try:
                cf = os.path.join(tempfile.gettempdir(),str(uuid.uuid4()))
                f = gzip.open(cf, 'wb')
                f.writelines(source)
                f.close()
             
                with open(cf) as source:
                    if isinstance(on,DatasetNumber ):
                        response =  self.api.datasets(id_).put(source)
                    else:
                        response =  self.api.datasets(str(on.dataset)).partitions(str(on)).put(source)

            finally:
                if os.path.exists(cf):
                    os.remove(cf)
       
        elif type_ == 'gzip':
            # the file is already gziped, so nothing to do. 

            if isinstance(on,DatasetNumber ):
                response =  self.api.datasets(id_).put(source)
            else:
                response =  self.api.datasets(str(on.dataset)).partitions(str(on)).put(source)
            
        else:
            raise Exception("Bad file got type: {} ".format(type_))


        raise_for_status(response)
        
        return response
Example #3
0
    def get_stream(self, rel_path, cb=None, return_meta=False):
        from ..util import bundle_file_type
        from ..util.flo import MetadataFlo
        import gzip

        source = self.upstream.get_stream(self._rename(rel_path), return_meta=return_meta)

        if not source:
            return None
  
        if bundle_file_type(source) == 'gzip':
            logger.debug("CC returning {} with decompression".format(rel_path)) 
            return MetadataFlo(gzip.GzipFile(fileobj=source), source.meta)
        else:
            logger.debug("CC returning {} with passthrough".format(rel_path)) 
            return source
Example #4
0
    def put(self, source, rel_path, metadata=None):
        from databundles.util import bundle_file_type
        import gzip

        # Pass through if the file is already compressed
    
       
        if not metadata:
            metadata = {}
     
        metadata['Content-Encoding'] = 'gzip'
    
        sink = self.upstream.put_stream(self._rename(rel_path), metadata = metadata)

        if bundle_file_type(source) == 'gzip':
            copy_file_or_flo(source,  sink)
        else:
            copy_file_or_flo(source,  gzip.GzipFile(fileobj=sink,  mode='wb'))
      
        sink.close()
        
        #self.put_metadata(rel_path, metadata)
        
        return self.path(self._rename(rel_path))
Example #5
0
    def x_test_put_redirect(self):
        from databundles.bundle import DbBundle
        from databundles.library import QueryCommand
        from databundles.util import md5_for_file, rm_rf, bundle_file_type

        #
        # Simple out and retrieve
        # 
        cache = self.bundle.filesystem._get_cache(self.server_rc.filesystem, 'direct-remote')
        cache2 = self.bundle.filesystem._get_cache(self.server_rc.filesystem, 'direct-remote-2')

        rm_rf(os.path.dirname(cache.cache_dir))
        rm_rf(os.path.dirname(cache2.cache_dir))
        
        cache.put( self.bundle.database.path, 'direct')

        path = cache2.get('direct')

        self.assertEquals('sqlite',bundle_file_type(path))

        cache.remove('direct', propagate = True)

        #
        #  Connect through server. 
        #
        rm_rf('/tmp/server')
        self.start_server(name='default-remote')
        
        api = None # Rest(self.server_url, self.rc.accounts)  

        # Upload directly, then download via the cache. 
        
        cache.remove(self.bundle.identity.cache_key, propagate = True)
        
        r = api.upload_file(self.bundle.identity, self.bundle.database.path, force=True )

        path = cache.get(self.bundle.identity.cache_key)
        
        b = DbBundle(path)

        self.assertEquals("source-dataset-subset-variation-ca0d",b.identity.name )
      
        #
        # Full service
        #

        p  = self.bundle.partitions.all[0]

        cache.remove(self.bundle.identity.cache_key, propagate = True)
        cache.remove(p.identity.cache_key, propagate = True)
        
        r = api.put( self.bundle.database.path, self.bundle.identity )
        print "Put {}".format(r.object)
        r = api.put(p.database.path, p.identity )
        print "Put {}".format(r.object)
        
        r = api.put(p.database.path, p.identity )
        
        r = api.get(p.identity,'/tmp/foo.db')
        print "Get {}".format(r)        

        b = DbBundle(r)

        self.assertEquals("source-dataset-subset-variation-ca0d",b.identity.name )