def pipe_926a27580e74684fc63559b92bddaa30(context=None, _INPUT=None, conf=None, **kwargs): # todo: insert pipeline description here conf = conf or {} if context and context.describe_input: return [] forever = pipe_forever() sw_68 = pipe_fetch( context, forever, conf={ "URL": [ {"type": "url", "value": "http://www.guru.com/rss/jobs/c/web-software-it/"}, { "type": "url", "value": "https://www.elance.com/r/rss/jobs/cat-it-programming/fxd-true/o-1/bgt-gt500-ns1/sct-database-development-10217-data-analysis-14174-database-administration-14177-business-intelligence-14173-data-engineering-14175-system-administration-10219-other-data-science-14178-technical-support-10218-other-it-programming-12350-software-application-10216-website-design-10225-web-programming-10224/tls-1/s-timelistedSort", }, ] }, ) sw_90 = pipe_uniq(context, sw_68, conf={"field": {"type": "text", "value": "link"}}) sw_87 = pipe_filter( context, sw_90, conf={ "COMBINE": {"type": "text", "value": "or"}, "MODE": {"type": "text", "value": "block"}, "RULE": [ { "field": {"type": "text", "value": "title"}, "value": {"type": "text", "value": "php"}, "op": {"type": "text", "value": "contains"}, } ], }, ) sw_101 = pipe_sort( context, sw_87, conf={"KEY": [{"field": {"type": "text", "value": "pubDate"}, "dir": {"type": "text", "value": "DESC"}}]}, ) _OUTPUT = pipe_output(context, sw_101, conf={}) return _OUTPUT
def pipe_6e30c269a69baf92cd420900b0645f88(context=None, _INPUT=None, conf=None, **kwargs): # todo: insert pipeline description here conf = conf or {} if context and context.describe_input: return [] if context and context.describe_dependencies: return [u'pipefetch', u'pipefilter', u'pipeoutput', u'piperegex', u'piperename', u'pipesort', u'pipeunion', u'pipeuniq'] forever = pipe_forever() sw_135 = pipe_fetch( context, forever, conf={'URL': {'type': 'url', 'value': 'file://data/rss.sueddeutsche.de_rss_Topthemen.xml'}}) sw_233 = pipe_fetch( context, forever, conf={'URL': {'type': 'url', 'value': 'file://data/rss.sueddeutsche.de_rss_Politik.xml'}}) sw_154 = pipe_union( context, forever, _OTHER3=sw_233, conf={}, _OTHER=sw_135) sw_173 = pipe_uniq( context, sw_154, conf={'field': {'type': 'text', 'value': 'title'}}) sw_180 = pipe_filter( context, sw_173, conf={'COMBINE': {'type': 'text', 'value': 'or'}, 'MODE': {'type': 'text', 'value': 'block'}, 'RULE': [{'field': {'type': 'text', 'value': 'link'}, 'value': {'type': 'text', 'value': '/sport/'}, 'op': {'type': 'text', 'value': 'contains'}}, {'field': {'type': 'text', 'value': 'title'}, 'value': {'type': 'text', 'value': 'Bildstrecke:'}, 'op': {'type': 'text', 'value': 'contains'}}]}) sw_210 = pipe_rename( context, sw_180, conf={'RULE': [{'field': {'type': 'text', 'value': 'y:id.value'}, 'op': {'type': 'text', 'value': 'copy'}, 'newval': {'type': 'text', 'value': 'link'}}]}) sw_195 = pipe_regex( context, sw_210, conf={'RULE': [{'singlelinematch': {'type': 'text', 'value': '2'}, 'globalmatch': {'type': 'text', 'value': '1'}, 'replace': {'type': 'text', 'value': ''}, 'field': {'type': 'text', 'value': 'description'}, 'casematch': {'type': 'text', 'value': '8'}, 'match': {'type': 'text', 'value': '</div>.*$'}}, {'field': {'type': 'text', 'value': 'link'}, 'match': {'type': 'text', 'value': '^(.*\\/.*)\\/'}, 'replace': {'type': 'text', 'value': '$1/2.220/'}}]}) sw_191 = pipe_sort( context, sw_195, conf={'KEY': [{'field': {'type': 'text', 'value': 'pubDate'}, 'dir': {'type': 'text', 'value': 'DESC'}}]}) _OUTPUT = pipe_output( context, sw_191, conf={}) return _OUTPUT
def pipe_926a27580e74684fc63559b92bddaa30(context=None, _INPUT=None, conf=None, **kwargs): # todo: insert pipeline description here conf = conf or {} if context and context.describe_input: return [] forever = pipe_forever() sw_68 = pipe_fetch( context, forever, conf={ 'URL': [{ 'type': 'url', 'value': 'http://www.guru.com/rss/jobs/c/web-software-it/' }, { 'type': 'url', 'value': 'https://www.elance.com/r/rss/jobs/cat-it-programming/fxd-true/o-1/bgt-gt500-ns1/sct-database-development-10217-data-analysis-14174-database-administration-14177-business-intelligence-14173-data-engineering-14175-system-administration-10219-other-data-science-14178-technical-support-10218-other-it-programming-12350-software-application-10216-website-design-10225-web-programming-10224/tls-1/s-timelistedSort' }] }) sw_90 = pipe_uniq(context, sw_68, conf={'field': { 'type': 'text', 'value': 'link' }}) sw_87 = pipe_filter(context, sw_90, conf={ 'COMBINE': { 'type': 'text', 'value': 'or' }, 'MODE': { 'type': 'text', 'value': 'block' }, 'RULE': [{ 'field': { 'type': 'text', 'value': 'title' }, 'value': { 'type': 'text', 'value': 'php' }, 'op': { 'type': 'text', 'value': 'contains' } }] }) sw_101 = pipe_sort(context, sw_87, conf={ 'KEY': [{ 'field': { 'type': 'text', 'value': 'pubDate' }, 'dir': { 'type': 'text', 'value': 'DESC' } }] }) _OUTPUT = pipe_output(context, sw_101, conf={}) return _OUTPUT
def pipe_6e30c269a69baf92cd420900b0645f88(context=None, _INPUT=None, conf=None, **kwargs): # todo: insert pipeline description here conf = conf or {} if context and context.describe_input: return [] if context and context.describe_dependencies: return [ u"pipefetch", u"pipefilter", u"pipeoutput", u"piperegex", u"piperename", u"pipesort", u"pipeunion", u"pipeuniq", ] forever = pipe_forever() sw_135 = pipe_fetch( context, forever, conf={"URL": {"type": "url", "value": "file://data/rss.sueddeutsche.de_rss_Topthemen.xml"}} ) sw_233 = pipe_fetch( context, forever, conf={"URL": {"type": "url", "value": "file://data/rss.sueddeutsche.de_rss_Politik.xml"}} ) sw_154 = pipe_union(context, forever, _OTHER3=sw_233, conf={}, _OTHER=sw_135) sw_173 = pipe_uniq(context, sw_154, conf={"field": {"type": "text", "value": "title"}}) sw_180 = pipe_filter( context, sw_173, conf={ "COMBINE": {"type": "text", "value": "or"}, "MODE": {"type": "text", "value": "block"}, "RULE": [ { "field": {"type": "text", "value": "link"}, "value": {"type": "text", "value": "/sport/"}, "op": {"type": "text", "value": "contains"}, }, { "field": {"type": "text", "value": "title"}, "value": {"type": "text", "value": "Bildstrecke:"}, "op": {"type": "text", "value": "contains"}, }, ], }, ) sw_210 = pipe_rename( context, sw_180, conf={ "RULE": [ { "field": {"type": "text", "value": "y:id.value"}, "op": {"type": "text", "value": "copy"}, "newval": {"type": "text", "value": "link"}, } ] }, ) sw_195 = pipe_regex( context, sw_210, conf={ "RULE": [ { "singlelinematch": {"type": "text", "value": "2"}, "globalmatch": {"type": "text", "value": "1"}, "replace": {"type": "text", "value": ""}, "field": {"type": "text", "value": "description"}, "casematch": {"type": "text", "value": "8"}, "match": {"type": "text", "value": "</div>.*$"}, }, { "field": {"type": "text", "value": "link"}, "match": {"type": "text", "value": "^(.*\\/.*)\\/"}, "replace": {"type": "text", "value": "$1/2.220/"}, }, ] }, ) sw_191 = pipe_sort( context, sw_195, conf={"KEY": [{"field": {"type": "text", "value": "pubDate"}, "dir": {"type": "text", "value": "DESC"}}]}, ) _OUTPUT = pipe_output(context, sw_191, conf={}) return _OUTPUT