class ExtractKV(StreamingCommand):
    _about_ = 'Extract keys/values pairs from a given field'
    _syntax_ = ('[field=]<source field> '
                '[[kvdelim=]<key/value delimiter>] '
                '[[pairdelim=]<key/value pairs delimiter>] '
                '[[dest=]<dest field>]')
    _aliases_ = ['extract_kv', 'extract_kvs']
    _schema_ = {'properties': {}}  # type: ignore

    def __init__(self,
                 field,
                 kvdelim: str = '=',
                 pairdelim: str = ',',
                 dest: str = None):
        """
        :param field: Source field
        :param kvdelim: Key and value delimiter regex;
            Defaults to an equal sign ``=``
        :param pairdelim: Key/value pairs delimiter regex;
            Defaults to a comma ``,``
        :param dest: Destination field;
            Defaults to the source field
        """
        super().__init__(field, kvdelim, pairdelim, dest)
        self.field = Field(field)
        self.kvdelim = Field(kvdelim, type=str, default='=')
        self.pairdelim = Field(pairdelim, type=str, default=',')
        self.dest = dest and Field(dest) or self.field

    async def setup(self, event, pipeline, context):
        self.kvdelim = regex.compile(await
                                     self.kvdelim.read(event, pipeline,
                                                       context))
        self.pairdelim = regex.compile(await self.pairdelim.read(
            event, pipeline, context))

    async def target(self, event, pipeline, context):
        line = await self.field.read(event, pipeline, context)
        pairs = [
            kv for kv in [
                self.kvdelim.split(pair)
                for pair in filter(None, self.pairdelim.split(line))
            ] if len(kv) == 2
        ]
        yield await self.dest.write(event, dict(pairs))
Exemple #2
0
class ExtractMap(StreamingCommand):
    _about_ = 'Extract values from a given field.'
    _syntax_ = ('[field=]<source field> '
                '[[headers=](headers)] '
                '[[prefix=]<key prefix>] '
                '[[delim=]<values delimiter>] '
                '[[dest=]<dest field>]')
    _aliases_ = ['extract_map', 'extract_maps']
    _schema_ = {'properties': {}}  # type: ignore

    def __init__(self,
                 field,
                 headers: list = [],
                 prefix: str = '_',
                 delim: str = ',',
                 dest: str = None):
        """
        :param field: Source field
        :param headers: Values keys (names)
        :param prefix: Values keys prefix if no or not enough header
            are provided; Defaults to 'the keys count
        :param delim: Values delimiter regex;
            Defaults to a comma ``,``
        :param dest: Destination field;
            Default to the source field
        """
        super().__init__(field, headers, delim, dest)
        self.field = Field(field)
        self.headers = Field(headers, seqn=True, default=[])
        self.prefix = Field(prefix, default='_')
        self.delim = Field(delim, type=str, default='=')
        self.dest = dest and Field(dest) or self.field

    def get_header(self):
        # First, yield headers
        # `self.headers` have been solved in `setup` at this step
        yield from self.headers
        # Then, yield enumerated headers with prefix and count
        # `self.prefix` have been solved in `setup` at this step
        count = -1
        while True:
            count += 1
            yield f'{self.prefix}{count}'

    async def setup(self, event, pipeline, context):
        self.headers = await self.headers.read(event, pipeline, context)
        self.prefix = await self.prefix.read(event, pipeline, context)
        self.delim = regex.compile(await
                                   self.delim.read(event, pipeline, context))

    async def target(self, event, pipeline, context):
        line = await self.field.read(event, pipeline, context)
        yield await self.dest.write(
            event,
            dict(zip(self.get_header(), filter(None, self.delim.split(line)))))