def _put(self, source, identity): '''Put the source to the remote, creating a compressed version if it is not originally compressed''' from databundles.util import bundle_file_type import gzip import os, tempfile, uuid from databundles.identity import ObjectNumber, DatasetNumber, PartitionNumber id_ = identity.id_ on = ObjectNumber.parse(id_) if not on: raise ValueError("Failed to parse id: '{}'".format(id_)) if not isinstance(on, (DatasetNumber, PartitionNumber)): raise ValueError("Object number '{}' is neither for a dataset nor partition".format(id_)) type_ = bundle_file_type(source) if type_ == 'sqlite' or type_ == 'hdf': import shutil # If it is a plain sqlite file, compress it before sending it. try: cf = os.path.join(tempfile.gettempdir(),str(uuid.uuid4())) with gzip.open(cf, 'wb') as out_f: try: shutil.copyfileobj(source, out_f) except AttributeError: with open(source) as in_f: shutil.copyfileobj(in_f, out_f) with open(cf) as sf_: if isinstance(on,DatasetNumber ): response = self.remote.datasets(id_).put(sf_) else: response = self.remote.datasets(str(on.dataset)).partitions(str(on)).put(sf_) finally: if os.path.exists(cf): os.remove(cf) elif type_ == 'gzip': # the file is already gziped, so nothing to do. if isinstance(on,DatasetNumber ): response = self.remote.datasets(id_).put(source) else: response = self.remote.datasets(str(on.dataset)).partitions(str(on)).put(source) else: raise Exception("Bad file for id {} got type: {} ".format(id_, type_)) raise_for_status(response) return response
def _put(self, id_,source): '''Put the source to the remote, creating a compressed version if it is not originally compressed''' from databundles.util import bundle_file_type import gzip import os, tempfile, uuid from databundles.identity import ObjectNumber, DatasetNumber, PartitionNumber on = ObjectNumber.parse(id_) if not on: raise ValueError("Failed to parse id: '{}'".format(id_)) if not isinstance(on, (DatasetNumber, PartitionNumber)): raise ValueError("Object number '{}' is neither for a dataset nor partition".format(id_)) type_ = bundle_file_type(source) if type_ == 'sqlite': # If it is a plain sqlite file, compress it before sending it. try: cf = os.path.join(tempfile.gettempdir(),str(uuid.uuid4())) f = gzip.open(cf, 'wb') f.writelines(source) f.close() with open(cf) as source: if isinstance(on,DatasetNumber ): response = self.api.datasets(id_).put(source) else: response = self.api.datasets(str(on.dataset)).partitions(str(on)).put(source) finally: if os.path.exists(cf): os.remove(cf) elif type_ == 'gzip': # the file is already gziped, so nothing to do. if isinstance(on,DatasetNumber ): response = self.api.datasets(id_).put(source) else: response = self.api.datasets(str(on.dataset)).partitions(str(on)).put(source) else: raise Exception("Bad file got type: {} ".format(type_)) raise_for_status(response) return response
def get_stream(self, rel_path, cb=None, return_meta=False): from ..util import bundle_file_type from ..util.flo import MetadataFlo import gzip source = self.upstream.get_stream(self._rename(rel_path), return_meta=return_meta) if not source: return None if bundle_file_type(source) == 'gzip': logger.debug("CC returning {} with decompression".format(rel_path)) return MetadataFlo(gzip.GzipFile(fileobj=source), source.meta) else: logger.debug("CC returning {} with passthrough".format(rel_path)) return source
def put(self, source, rel_path, metadata=None): from databundles.util import bundle_file_type import gzip # Pass through if the file is already compressed if not metadata: metadata = {} metadata['Content-Encoding'] = 'gzip' sink = self.upstream.put_stream(self._rename(rel_path), metadata = metadata) if bundle_file_type(source) == 'gzip': copy_file_or_flo(source, sink) else: copy_file_or_flo(source, gzip.GzipFile(fileobj=sink, mode='wb')) sink.close() #self.put_metadata(rel_path, metadata) return self.path(self._rename(rel_path))
def x_test_put_redirect(self): from databundles.bundle import DbBundle from databundles.library import QueryCommand from databundles.util import md5_for_file, rm_rf, bundle_file_type # # Simple out and retrieve # cache = self.bundle.filesystem._get_cache(self.server_rc.filesystem, 'direct-remote') cache2 = self.bundle.filesystem._get_cache(self.server_rc.filesystem, 'direct-remote-2') rm_rf(os.path.dirname(cache.cache_dir)) rm_rf(os.path.dirname(cache2.cache_dir)) cache.put( self.bundle.database.path, 'direct') path = cache2.get('direct') self.assertEquals('sqlite',bundle_file_type(path)) cache.remove('direct', propagate = True) # # Connect through server. # rm_rf('/tmp/server') self.start_server(name='default-remote') api = None # Rest(self.server_url, self.rc.accounts) # Upload directly, then download via the cache. cache.remove(self.bundle.identity.cache_key, propagate = True) r = api.upload_file(self.bundle.identity, self.bundle.database.path, force=True ) path = cache.get(self.bundle.identity.cache_key) b = DbBundle(path) self.assertEquals("source-dataset-subset-variation-ca0d",b.identity.name ) # # Full service # p = self.bundle.partitions.all[0] cache.remove(self.bundle.identity.cache_key, propagate = True) cache.remove(p.identity.cache_key, propagate = True) r = api.put( self.bundle.database.path, self.bundle.identity ) print "Put {}".format(r.object) r = api.put(p.database.path, p.identity ) print "Put {}".format(r.object) r = api.put(p.database.path, p.identity ) r = api.get(p.identity,'/tmp/foo.db') print "Get {}".format(r) b = DbBundle(r) self.assertEquals("source-dataset-subset-variation-ca0d",b.identity.name )