def pipe_926a27580e74684fc63559b92bddaa30(context=None, _INPUT=None, conf=None, **kwargs):
    # todo: insert pipeline description here
    conf = conf or {}

    if context and context.describe_input:
        return []

    forever = pipe_forever()

    sw_68 = pipe_fetch(
        context,
        forever,
        conf={
            "URL": [
                {"type": "url", "value": "http://www.guru.com/rss/jobs/c/web-software-it/"},
                {
                    "type": "url",
                    "value": "https://www.elance.com/r/rss/jobs/cat-it-programming/fxd-true/o-1/bgt-gt500-ns1/sct-database-development-10217-data-analysis-14174-database-administration-14177-business-intelligence-14173-data-engineering-14175-system-administration-10219-other-data-science-14178-technical-support-10218-other-it-programming-12350-software-application-10216-website-design-10225-web-programming-10224/tls-1/s-timelistedSort",
                },
            ]
        },
    )

    sw_90 = pipe_uniq(context, sw_68, conf={"field": {"type": "text", "value": "link"}})

    sw_87 = pipe_filter(
        context,
        sw_90,
        conf={
            "COMBINE": {"type": "text", "value": "or"},
            "MODE": {"type": "text", "value": "block"},
            "RULE": [
                {
                    "field": {"type": "text", "value": "title"},
                    "value": {"type": "text", "value": "php"},
                    "op": {"type": "text", "value": "contains"},
                }
            ],
        },
    )

    sw_101 = pipe_sort(
        context,
        sw_87,
        conf={"KEY": [{"field": {"type": "text", "value": "pubDate"}, "dir": {"type": "text", "value": "DESC"}}]},
    )

    _OUTPUT = pipe_output(context, sw_101, conf={})

    return _OUTPUT
Ejemplo n.º 2
0
def pipe_6e30c269a69baf92cd420900b0645f88(context=None, _INPUT=None, conf=None, **kwargs):
    # todo: insert pipeline description here
    conf = conf or {}

    if context and context.describe_input:
        return []

    if context and context.describe_dependencies:
        return [u'pipefetch', u'pipefilter', u'pipeoutput', u'piperegex', u'piperename', u'pipesort', u'pipeunion', u'pipeuniq']

    forever = pipe_forever()

    sw_135 = pipe_fetch(
        context, forever, conf={'URL': {'type': 'url', 'value': 'file://data/rss.sueddeutsche.de_rss_Topthemen.xml'}})
    
    sw_233 = pipe_fetch(
        context, forever, conf={'URL': {'type': 'url', 'value': 'file://data/rss.sueddeutsche.de_rss_Politik.xml'}})
    
    sw_154 = pipe_union(
        context, forever, _OTHER3=sw_233, conf={}, _OTHER=sw_135)
    
    sw_173 = pipe_uniq(
        context, sw_154, conf={'field': {'type': 'text', 'value': 'title'}})
    
    sw_180 = pipe_filter(
        context, sw_173, conf={'COMBINE': {'type': 'text', 'value': 'or'}, 'MODE': {'type': 'text', 'value': 'block'}, 'RULE': [{'field': {'type': 'text', 'value': 'link'}, 'value': {'type': 'text', 'value': '/sport/'}, 'op': {'type': 'text', 'value': 'contains'}}, {'field': {'type': 'text', 'value': 'title'}, 'value': {'type': 'text', 'value': 'Bildstrecke:'}, 'op': {'type': 'text', 'value': 'contains'}}]})
    
    sw_210 = pipe_rename(
        context, sw_180, conf={'RULE': [{'field': {'type': 'text', 'value': 'y:id.value'}, 'op': {'type': 'text', 'value': 'copy'}, 'newval': {'type': 'text', 'value': 'link'}}]})
    
    sw_195 = pipe_regex(
        context, sw_210, conf={'RULE': [{'singlelinematch': {'type': 'text', 'value': '2'}, 'globalmatch': {'type': 'text', 'value': '1'}, 'replace': {'type': 'text', 'value': ''}, 'field': {'type': 'text', 'value': 'description'}, 'casematch': {'type': 'text', 'value': '8'}, 'match': {'type': 'text', 'value': '</div>.*$'}}, {'field': {'type': 'text', 'value': 'link'}, 'match': {'type': 'text', 'value': '^(.*\\/.*)\\/'}, 'replace': {'type': 'text', 'value': '$1/2.220/'}}]})
    
    sw_191 = pipe_sort(
        context, sw_195, conf={'KEY': [{'field': {'type': 'text', 'value': 'pubDate'}, 'dir': {'type': 'text', 'value': 'DESC'}}]})
    
    _OUTPUT = pipe_output(
        context, sw_191, conf={})
    
    return _OUTPUT
def pipe_926a27580e74684fc63559b92bddaa30(context=None,
                                          _INPUT=None,
                                          conf=None,
                                          **kwargs):
    # todo: insert pipeline description here
    conf = conf or {}

    if context and context.describe_input:
        return []

    forever = pipe_forever()

    sw_68 = pipe_fetch(
        context,
        forever,
        conf={
            'URL': [{
                'type': 'url',
                'value': 'http://www.guru.com/rss/jobs/c/web-software-it/'
            }, {
                'type':
                'url',
                'value':
                'https://www.elance.com/r/rss/jobs/cat-it-programming/fxd-true/o-1/bgt-gt500-ns1/sct-database-development-10217-data-analysis-14174-database-administration-14177-business-intelligence-14173-data-engineering-14175-system-administration-10219-other-data-science-14178-technical-support-10218-other-it-programming-12350-software-application-10216-website-design-10225-web-programming-10224/tls-1/s-timelistedSort'
            }]
        })

    sw_90 = pipe_uniq(context,
                      sw_68,
                      conf={'field': {
                          'type': 'text',
                          'value': 'link'
                      }})

    sw_87 = pipe_filter(context,
                        sw_90,
                        conf={
                            'COMBINE': {
                                'type': 'text',
                                'value': 'or'
                            },
                            'MODE': {
                                'type': 'text',
                                'value': 'block'
                            },
                            'RULE': [{
                                'field': {
                                    'type': 'text',
                                    'value': 'title'
                                },
                                'value': {
                                    'type': 'text',
                                    'value': 'php'
                                },
                                'op': {
                                    'type': 'text',
                                    'value': 'contains'
                                }
                            }]
                        })

    sw_101 = pipe_sort(context,
                       sw_87,
                       conf={
                           'KEY': [{
                               'field': {
                                   'type': 'text',
                                   'value': 'pubDate'
                               },
                               'dir': {
                                   'type': 'text',
                                   'value': 'DESC'
                               }
                           }]
                       })

    _OUTPUT = pipe_output(context, sw_101, conf={})

    return _OUTPUT
def pipe_6e30c269a69baf92cd420900b0645f88(context=None, _INPUT=None, conf=None, **kwargs):
    # todo: insert pipeline description here
    conf = conf or {}

    if context and context.describe_input:
        return []

    if context and context.describe_dependencies:
        return [
            u"pipefetch",
            u"pipefilter",
            u"pipeoutput",
            u"piperegex",
            u"piperename",
            u"pipesort",
            u"pipeunion",
            u"pipeuniq",
        ]

    forever = pipe_forever()

    sw_135 = pipe_fetch(
        context, forever, conf={"URL": {"type": "url", "value": "file://data/rss.sueddeutsche.de_rss_Topthemen.xml"}}
    )

    sw_233 = pipe_fetch(
        context, forever, conf={"URL": {"type": "url", "value": "file://data/rss.sueddeutsche.de_rss_Politik.xml"}}
    )

    sw_154 = pipe_union(context, forever, _OTHER3=sw_233, conf={}, _OTHER=sw_135)

    sw_173 = pipe_uniq(context, sw_154, conf={"field": {"type": "text", "value": "title"}})

    sw_180 = pipe_filter(
        context,
        sw_173,
        conf={
            "COMBINE": {"type": "text", "value": "or"},
            "MODE": {"type": "text", "value": "block"},
            "RULE": [
                {
                    "field": {"type": "text", "value": "link"},
                    "value": {"type": "text", "value": "/sport/"},
                    "op": {"type": "text", "value": "contains"},
                },
                {
                    "field": {"type": "text", "value": "title"},
                    "value": {"type": "text", "value": "Bildstrecke:"},
                    "op": {"type": "text", "value": "contains"},
                },
            ],
        },
    )

    sw_210 = pipe_rename(
        context,
        sw_180,
        conf={
            "RULE": [
                {
                    "field": {"type": "text", "value": "y:id.value"},
                    "op": {"type": "text", "value": "copy"},
                    "newval": {"type": "text", "value": "link"},
                }
            ]
        },
    )

    sw_195 = pipe_regex(
        context,
        sw_210,
        conf={
            "RULE": [
                {
                    "singlelinematch": {"type": "text", "value": "2"},
                    "globalmatch": {"type": "text", "value": "1"},
                    "replace": {"type": "text", "value": ""},
                    "field": {"type": "text", "value": "description"},
                    "casematch": {"type": "text", "value": "8"},
                    "match": {"type": "text", "value": "</div>.*$"},
                },
                {
                    "field": {"type": "text", "value": "link"},
                    "match": {"type": "text", "value": "^(.*\\/.*)\\/"},
                    "replace": {"type": "text", "value": "$1/2.220/"},
                },
            ]
        },
    )

    sw_191 = pipe_sort(
        context,
        sw_195,
        conf={"KEY": [{"field": {"type": "text", "value": "pubDate"}, "dir": {"type": "text", "value": "DESC"}}]},
    )

    _OUTPUT = pipe_output(context, sw_191, conf={})

    return _OUTPUT