def get_assignment(result, skip=False, **kwargs): # print(result) result = iter(listize(result)) if skip: return None, result try: first_result = next(result) except StopIteration: first_result = None try: second_result = next(result) except StopIteration: # pipe delivers one result, e.g., strconcat result = chain([first_result], result) multiple = False else: # pipe delivers multiple results, e.g., fetchpage/tokenizer result = chain([first_result], [second_result], result) multiple = True first = kwargs.get('count') == 'first' _all = kwargs.get('count') == 'all' one = first or not (multiple or _all) return one, iter([first_result]) if one else result
def _make_content(i, value=None, tag='content', append=True, strip=False): content = i.get(tag) try: value = value.strip() if value and strip else value except AttributeError: pass if content and value and append: content = listize(content) content.append(value) elif content and value: content = ''.join([content, value]) elif value: content = value return {tag: content} if content else {}
def get_broadcast_funcs(**kwargs): kw = Objectify(kwargs, conf={}) pieces = kw.conf[kw.extract] if kw.extract else kw.conf no_conf = remove_keys(kwargs, 'conf') noop = partial(cast, _type='none') if kw.listize: listed = listize(pieces) piece_defs = map(DotDict, listed) if kw.pdictize else listed parser = partial(parse_conf, **no_conf) pfuncs = [partial(parser, conf=conf) for conf in piece_defs] get_pieces = lambda item: broadcast(item, *pfuncs) elif kw.ptype != 'none': conf = DotDict(pieces) if kw.pdictize and pieces else pieces get_pieces = partial(parse_conf, conf=conf, **no_conf) else: get_pieces = noop ffunc = noop if kw.ftype == 'none' else partial(get_field, **kwargs) return (ffunc, get_pieces)
def get_skip(item, skip_if=None, **kwargs): item = item or {} if callable(skip_if): skip = skip_if(item) elif skip_if: skips = listize(skip_if) for _skip in skips: value = item.get(_skip['field'], '') text = _skip.get('text') op = _skip.get('op', 'contains') match = SKIP_SWITCH[op](text, value) if text else value skip = match if _skip.get('include') else not match if skip: break else: skip = False return skip
def get_assignment(result, skip=False, **kwargs): # print(result) result = iter(listize(result)) if skip: return None, result first_result = next(result) try: second_result = next(result) except StopIteration: # pipe delivers one result, e.g., strconcat result = chain([first_result], result) multiple = False else: # pipe delivers multiple results, e.g., fetchpage/tokenizer result = chain([first_result], [second_result], result) multiple = True first = kwargs.get('count') == 'first' _all = kwargs.get('count') == 'all' one = first or not (multiple or _all) return one, iter([first_result]) if one else result