Example #1
0
def get_assignment(result, skip=False, **kwargs):
    # print(result)
    result = iter(listize(result))

    if skip:
        return None, result

    try:
        first_result = next(result)
    except StopIteration:
        first_result = None

    try:
        second_result = next(result)
    except StopIteration:
        # pipe delivers one result, e.g., strconcat
        result = chain([first_result], result)
        multiple = False
    else:
        # pipe delivers multiple results, e.g., fetchpage/tokenizer
        result = chain([first_result], [second_result], result)
        multiple = True

    first = kwargs.get('count') == 'first'
    _all = kwargs.get('count') == 'all'
    one = first or not (multiple or _all)
    return one, iter([first_result]) if one else result
Example #2
0
def _make_content(i, value=None, tag='content', append=True, strip=False):
    content = i.get(tag)

    try:
        value = value.strip() if value and strip else value
    except AttributeError:
        pass

    if content and value and append:
        content = listize(content)
        content.append(value)
    elif content and value:
        content = ''.join([content, value])
    elif value:
        content = value

    return {tag: content} if content else {}
Example #3
0
def _make_content(i, value=None, tag='content', append=True, strip=False):
    content = i.get(tag)

    try:
        value = value.strip() if value and strip else value
    except AttributeError:
        pass

    if content and value and append:
        content = listize(content)
        content.append(value)
    elif content and value:
        content = ''.join([content, value])
    elif value:
        content = value

    return {tag: content} if content else {}
Example #4
0
def get_broadcast_funcs(**kwargs):
    kw = Objectify(kwargs, conf={})
    pieces = kw.conf[kw.extract] if kw.extract else kw.conf
    no_conf = remove_keys(kwargs, 'conf')
    noop = partial(cast, _type='none')

    if kw.listize:
        listed = listize(pieces)
        piece_defs = map(DotDict, listed) if kw.pdictize else listed
        parser = partial(parse_conf, **no_conf)
        pfuncs = [partial(parser, conf=conf) for conf in piece_defs]
        get_pieces = lambda item: broadcast(item, *pfuncs)
    elif kw.ptype != 'none':
        conf = DotDict(pieces) if kw.pdictize and pieces else pieces
        get_pieces = partial(parse_conf, conf=conf, **no_conf)
    else:
        get_pieces = noop

    ffunc = noop if kw.ftype == 'none' else partial(get_field, **kwargs)
    return (ffunc, get_pieces)
Example #5
0
def get_broadcast_funcs(**kwargs):
    kw = Objectify(kwargs, conf={})
    pieces = kw.conf[kw.extract] if kw.extract else kw.conf
    no_conf = remove_keys(kwargs, 'conf')
    noop = partial(cast, _type='none')

    if kw.listize:
        listed = listize(pieces)
        piece_defs = map(DotDict, listed) if kw.pdictize else listed
        parser = partial(parse_conf, **no_conf)
        pfuncs = [partial(parser, conf=conf) for conf in piece_defs]
        get_pieces = lambda item: broadcast(item, *pfuncs)
    elif kw.ptype != 'none':
        conf = DotDict(pieces) if kw.pdictize and pieces else pieces
        get_pieces = partial(parse_conf, conf=conf, **no_conf)
    else:
        get_pieces = noop

    ffunc = noop if kw.ftype == 'none' else partial(get_field, **kwargs)
    return (ffunc, get_pieces)
Example #6
0
def get_skip(item, skip_if=None, **kwargs):
    item = item or {}

    if callable(skip_if):
        skip = skip_if(item)
    elif skip_if:
        skips = listize(skip_if)

        for _skip in skips:
            value = item.get(_skip['field'], '')
            text = _skip.get('text')
            op = _skip.get('op', 'contains')
            match = SKIP_SWITCH[op](text, value) if text else value
            skip = match if _skip.get('include') else not match

            if skip:
                break
    else:
        skip = False

    return skip
Example #7
0
def get_skip(item, skip_if=None, **kwargs):
    item = item or {}

    if callable(skip_if):
        skip = skip_if(item)
    elif skip_if:
        skips = listize(skip_if)

        for _skip in skips:
            value = item.get(_skip['field'], '')
            text = _skip.get('text')
            op = _skip.get('op', 'contains')
            match = SKIP_SWITCH[op](text, value) if text else value
            skip = match if _skip.get('include') else not match

            if skip:
                break
    else:
        skip = False

    return skip
Example #8
0
def get_assignment(result, skip=False, **kwargs):
    # print(result)
    result = iter(listize(result))

    if skip:
        return None, result

    first_result = next(result)

    try:
        second_result = next(result)
    except StopIteration:
        # pipe delivers one result, e.g., strconcat
        result = chain([first_result], result)
        multiple = False
    else:
        # pipe delivers multiple results, e.g., fetchpage/tokenizer
        result = chain([first_result], [second_result], result)
        multiple = True

    first = kwargs.get('count') == 'first'
    _all = kwargs.get('count') == 'all'
    one = first or not (multiple or _all)
    return one, iter([first_result]) if one else result