Example #1
0
        #instream is a bytes datatap but we want the file like object it reads
        mode = 'r'
        if self.compression:
            mode += ':' + self.compression
        archive = tarfile.TarFile(fileobj=instream.item_stream, mode=mode)
        manifest = None
        files = {}
        for tarinfo in archive:
            if not tarinfo.isreg():
                continue
            fileobj = archive.extractfile(tarinfo)
            assert fileobj is not None
            files[tarinfo.path] = DjangoTarExtFile(fileobj, tarinfo)
            if tarinfo.name == 'manifest.json':
                manifest = fileobj
        if manifest is None:
            archive.extractfile('manifest.json') #TODO raise a proper exception
        filetap = self.get_filetap(archive, files)
        return JSONDataTap(StreamDataTap(manifest), filetap=filetap)
    
    def get_bytes_stream(self, instream):
        return instream
    
    command_option_list = [
        Option('--gzip', action='store_const', const='gz', dest='compression'),
        Option('--bz', action='store_const', const='bz', dest='compression'),
    ]

register_datatap('TarFile', TarFileDataTap)

Example #2
0
            item.save()
            yield item
    
    def commit(self):
        while self.deserialized_objects:
            instance = self.deserialized_objects.popleft()
            instance.save()
        self.deserialized_objects = None
        for instance in self:
            instance.save()
    
    @classmethod
    def load_from_command_line(cls, arglist, instream=None):
        '''
        usage::
        
            manage.py datatap Document [<app label>, ...] [<collection name>, ...]
        '''
        parser = OptionParser(option_list=cls.command_option_list)
        options, args = parser.parse_args(arglist)
        document_sources = list()
        for arg in args: #list of apps and collection names
            if '.' in arg:
                document_sources.append(get_document(*arg.rsplit(".", 1)))
            else:
                #get docs from appname
                document_sources.extend(get_documents(arg))
        return cls(instream=document_sources, **options.__dict__)

register_datatap('Document', DocumentDataTap)
            yield item

    def commit(self):
        while self.deserialized_objects:
            instance = self.deserialized_objects.popleft()
            instance.save()
        self.deserialized_objects = None
        for instance in self:
            instance.save()

    @classmethod
    def load_from_command_line(cls, arglist, instream=None):
        '''
        usage::
        
            manage.py datatap Document [<app label>, ...] [<collection name>, ...]
        '''
        parser = OptionParser(option_list=cls.command_option_list)
        options, args = parser.parse_args(arglist)
        document_sources = list()
        for arg in args:  #list of apps and collection names
            if '.' in arg:
                document_sources.append(get_document(*arg.rsplit(".", 1)))
            else:
                #get docs from appname
                document_sources.extend(get_documents(arg))
        return cls(instream=document_sources, **options.__dict__)


register_datatap('Document', DocumentDataTap)
Example #4
0
        parser = OptionParser(option_list=cls.command_option_list)
        options, args = parser.parse_args(arglist)
        kwargs = options.__dict__
        kwargs['instream'] = instream
        if not kwargs.get('key_name') and args:
            kwargs['key_name'] = args.pop(0)
        return cls(**kwargs)
    
    @classmethod
    def load_from_command_line_for_write(cls, arglist, instream):
        '''
        Retuns an instantiated DataTap with the provided arguments from commandline
        '''
        parser = OptionParser(option_list=cls.command_option_list)
        options, args = parser.parse_args(arglist)
        kwargs = options.__dict__
        kwargs['instream'] = instream
        
        if args:
            target = args.pop(0)
        else:
            target = kwargs.pop('key_name')
        datatap = cls(*args, **kwargs)
        def commit(*a, **k):
            datatap.send(target)
        datatap.commit = commit
        return datatap

register_datatap('S3', S3DataTap)

Example #5
0
        return 'bytes' #bytes go in, bytes go out
    
    def get_bytes_stream(self, instream):
        return instream
    
    def read(self, *args, **kwargs):
        return self.item_stream.read(*args, **kwargs)
    
    def send(self, fileobj):
        #err?
        return self.instream.send(fileobj)
    
    def write(self, chunk):
        self.instream.write(chunk)

register_datatap('Stream', StreamDataTap)

class BufferedStreamDataTap(StreamDataTap):
    '''
    A stream data tap that uses io.BytesIO to buffer read operations.
    This is useful for processing streams from the internet into modules
    that require random position reads like with zipfile.
    '''
    def __init__(self, instream=None, **kwargs):
        if instream is not None:
            instream = BytesIO(instream)
        return super(BufferedStreamDataTap, self).__init__(instream=instream, **kwargs)
    
    def send(self, fileobj):
        fileobj = BytesIO(fileobj)
        return super(BufferedStreamDataTap, self).send(fileobj)
Example #6
0
                result.append(Application.objects.get(slug=app_slug))
        if collections:
            for key in collections:
                collection = Collection.objects.get(key=key)
                result.append(collection)
                if isinstance(collection, VirtualDocumentCollection):
                    exported_collections.append(collection)
        if indexes:
            for name in indexes:  # TODO ambigious
                result.extend(Index.objects.filter(name=name))
        if subsites:
            for slug in subsites:
                result.append(Subsite.objects.get(slug=slug))
        if publicresources:
            for pk in publicresources:  # TODO find a better identifier, uuid?
                result.append(PublicResource.objects.get(pk=pk))
        for collection in exported_collections:
            result.append(collection.get_document())
        return result

    command_option_list = [
        make_option("--application", action="append", type="string", dest="applications"),
        make_option("--collection", action="append", type="string", dest="collections"),
        make_option("--index", action="append", type="string", dest="indexes"),
        make_option("--subsite", action="append", type="string", dest="subsites"),
        make_option("--publicresource", action="append", type="string", dest="publicresources"),
    ]


register_datatap("DocKitCMS", DocKitCMSDataTap)
Example #7
0
        assert False, 'Unrecognized instream domain: %s' % self.instream.domain
    
    def get_filetap(self, archive):
        return ZipFileTap(archive)
    
    def send(self, fileobj):
        archive = zipfile.ZipFile(fileobj, 'w')
        filetap = self.get_filetap(archive)
        encoded_stream = JSONDataTap(self.item_stream, filetap=filetap) #encode our objects into json
        if isinstance(encoded_stream, basestring):
            manifest = encoded_stream
        else:
            manifest = ''.join(encoded_stream)
        archive.writestr('manifest.json', manifest)
        archive.close()
    
    def get_primitive_stream(self, instream):
        #instream is a bytes datatap but we want the file like object it reads
        archive = zipfile.ZipFile(instream.item_stream, 'r')
        filetap = self.get_filetap(archive)
        return JSONDataTap(StreamDataTap(archive.open('manifest.json')), filetap=filetap)
    
    def get_bytes_stream(self, instream):
        return instream
    
    #def detect_originating_datatap(self):
    #    return lookup_datatap(self.zipfile.read('originator.txt'))

register_datatap('Zip', ZipFileDataTap)

Example #8
0
            dest='applications',
        ),
        make_option(
            '--collection',
            action='append',
            type='string',
            dest='collections',
        ),
        make_option(
            '--index',
            action='append',
            type='string',
            dest='indexes',
        ),
        make_option(
            '--subsite',
            action='append',
            type='string',
            dest='subsites',
        ),
        make_option(
            '--publicresource',
            action='append',
            type='string',
            dest='publicresources',
        )
    ]


register_datatap('DocKitCMS', DocKitCMSDataTap)
Example #9
0
        self.deserialized_objects = None
        for instance in self:
            instance.save()
            transaction.commit()
        if transaction.is_dirty():
            transaction.commit()
    
    command_option_list = [
        Option('--disable_natural_keys', action='store_false', dest='use_natural_keys'),
    ]
    
    @classmethod
    def load_from_command_line(cls, arglist, instream=None):
        parser = OptionParser(option_list=cls.command_option_list)
        options, args = parser.parse_args(arglist)
        kwargs = options.__dict__
        if instream is None:
            model_sources = list()
            for arg in args: #list of apps and model names
                if '.' in arg:
                    model_sources.append(models.get_model(*arg.split(".", 1)))
                else:
                    #get models from appname
                    model_sources.extend(models.get_models(models.get_app(arg)))
            kwargs['instream'] = model_sources
        else:
            kwargs['instream'] = instream
        return cls(**kwargs)

register_datatap('Model', ModelDataTap)