#instream is a bytes datatap but we want the file like object it reads mode = 'r' if self.compression: mode += ':' + self.compression archive = tarfile.TarFile(fileobj=instream.item_stream, mode=mode) manifest = None files = {} for tarinfo in archive: if not tarinfo.isreg(): continue fileobj = archive.extractfile(tarinfo) assert fileobj is not None files[tarinfo.path] = DjangoTarExtFile(fileobj, tarinfo) if tarinfo.name == 'manifest.json': manifest = fileobj if manifest is None: archive.extractfile('manifest.json') #TODO raise a proper exception filetap = self.get_filetap(archive, files) return JSONDataTap(StreamDataTap(manifest), filetap=filetap) def get_bytes_stream(self, instream): return instream command_option_list = [ Option('--gzip', action='store_const', const='gz', dest='compression'), Option('--bz', action='store_const', const='bz', dest='compression'), ] register_datatap('TarFile', TarFileDataTap)
item.save() yield item def commit(self): while self.deserialized_objects: instance = self.deserialized_objects.popleft() instance.save() self.deserialized_objects = None for instance in self: instance.save() @classmethod def load_from_command_line(cls, arglist, instream=None): ''' usage:: manage.py datatap Document [<app label>, ...] [<collection name>, ...] ''' parser = OptionParser(option_list=cls.command_option_list) options, args = parser.parse_args(arglist) document_sources = list() for arg in args: #list of apps and collection names if '.' in arg: document_sources.append(get_document(*arg.rsplit(".", 1))) else: #get docs from appname document_sources.extend(get_documents(arg)) return cls(instream=document_sources, **options.__dict__) register_datatap('Document', DocumentDataTap)
yield item def commit(self): while self.deserialized_objects: instance = self.deserialized_objects.popleft() instance.save() self.deserialized_objects = None for instance in self: instance.save() @classmethod def load_from_command_line(cls, arglist, instream=None): ''' usage:: manage.py datatap Document [<app label>, ...] [<collection name>, ...] ''' parser = OptionParser(option_list=cls.command_option_list) options, args = parser.parse_args(arglist) document_sources = list() for arg in args: #list of apps and collection names if '.' in arg: document_sources.append(get_document(*arg.rsplit(".", 1))) else: #get docs from appname document_sources.extend(get_documents(arg)) return cls(instream=document_sources, **options.__dict__) register_datatap('Document', DocumentDataTap)
parser = OptionParser(option_list=cls.command_option_list) options, args = parser.parse_args(arglist) kwargs = options.__dict__ kwargs['instream'] = instream if not kwargs.get('key_name') and args: kwargs['key_name'] = args.pop(0) return cls(**kwargs) @classmethod def load_from_command_line_for_write(cls, arglist, instream): ''' Retuns an instantiated DataTap with the provided arguments from commandline ''' parser = OptionParser(option_list=cls.command_option_list) options, args = parser.parse_args(arglist) kwargs = options.__dict__ kwargs['instream'] = instream if args: target = args.pop(0) else: target = kwargs.pop('key_name') datatap = cls(*args, **kwargs) def commit(*a, **k): datatap.send(target) datatap.commit = commit return datatap register_datatap('S3', S3DataTap)
return 'bytes' #bytes go in, bytes go out def get_bytes_stream(self, instream): return instream def read(self, *args, **kwargs): return self.item_stream.read(*args, **kwargs) def send(self, fileobj): #err? return self.instream.send(fileobj) def write(self, chunk): self.instream.write(chunk) register_datatap('Stream', StreamDataTap) class BufferedStreamDataTap(StreamDataTap): ''' A stream data tap that uses io.BytesIO to buffer read operations. This is useful for processing streams from the internet into modules that require random position reads like with zipfile. ''' def __init__(self, instream=None, **kwargs): if instream is not None: instream = BytesIO(instream) return super(BufferedStreamDataTap, self).__init__(instream=instream, **kwargs) def send(self, fileobj): fileobj = BytesIO(fileobj) return super(BufferedStreamDataTap, self).send(fileobj)
result.append(Application.objects.get(slug=app_slug)) if collections: for key in collections: collection = Collection.objects.get(key=key) result.append(collection) if isinstance(collection, VirtualDocumentCollection): exported_collections.append(collection) if indexes: for name in indexes: # TODO ambigious result.extend(Index.objects.filter(name=name)) if subsites: for slug in subsites: result.append(Subsite.objects.get(slug=slug)) if publicresources: for pk in publicresources: # TODO find a better identifier, uuid? result.append(PublicResource.objects.get(pk=pk)) for collection in exported_collections: result.append(collection.get_document()) return result command_option_list = [ make_option("--application", action="append", type="string", dest="applications"), make_option("--collection", action="append", type="string", dest="collections"), make_option("--index", action="append", type="string", dest="indexes"), make_option("--subsite", action="append", type="string", dest="subsites"), make_option("--publicresource", action="append", type="string", dest="publicresources"), ] register_datatap("DocKitCMS", DocKitCMSDataTap)
assert False, 'Unrecognized instream domain: %s' % self.instream.domain def get_filetap(self, archive): return ZipFileTap(archive) def send(self, fileobj): archive = zipfile.ZipFile(fileobj, 'w') filetap = self.get_filetap(archive) encoded_stream = JSONDataTap(self.item_stream, filetap=filetap) #encode our objects into json if isinstance(encoded_stream, basestring): manifest = encoded_stream else: manifest = ''.join(encoded_stream) archive.writestr('manifest.json', manifest) archive.close() def get_primitive_stream(self, instream): #instream is a bytes datatap but we want the file like object it reads archive = zipfile.ZipFile(instream.item_stream, 'r') filetap = self.get_filetap(archive) return JSONDataTap(StreamDataTap(archive.open('manifest.json')), filetap=filetap) def get_bytes_stream(self, instream): return instream #def detect_originating_datatap(self): # return lookup_datatap(self.zipfile.read('originator.txt')) register_datatap('Zip', ZipFileDataTap)
dest='applications', ), make_option( '--collection', action='append', type='string', dest='collections', ), make_option( '--index', action='append', type='string', dest='indexes', ), make_option( '--subsite', action='append', type='string', dest='subsites', ), make_option( '--publicresource', action='append', type='string', dest='publicresources', ) ] register_datatap('DocKitCMS', DocKitCMSDataTap)
self.deserialized_objects = None for instance in self: instance.save() transaction.commit() if transaction.is_dirty(): transaction.commit() command_option_list = [ Option('--disable_natural_keys', action='store_false', dest='use_natural_keys'), ] @classmethod def load_from_command_line(cls, arglist, instream=None): parser = OptionParser(option_list=cls.command_option_list) options, args = parser.parse_args(arglist) kwargs = options.__dict__ if instream is None: model_sources = list() for arg in args: #list of apps and model names if '.' in arg: model_sources.append(models.get_model(*arg.split(".", 1))) else: #get models from appname model_sources.extend(models.get_models(models.get_app(arg))) kwargs['instream'] = model_sources else: kwargs['instream'] = instream return cls(**kwargs) register_datatap('Model', ModelDataTap)