def main(): rssSpider = RssSpider(myrss, '91ri.xml', '91ri.db') rssSpider.get_list( r'<div class="right-col">\s+<h1><a href="(.*?)" data-no-turbolink="true" target="_blank" title="(.*?)">', flag=re.S) rssSpider.get_content('<article class="single-post">', '</article>') rssSpider.save_rss_file()
def main(): rssSpider = RssSpider(myrss, '8She.xml', '8She.db') rssSpider.get_list( r'<header>.*?<i></i></a><h2><a href="(.*?)" title=".*?">(.*?)</a></h2></header>', flag=re.S) rssSpider.get_content('<article class="article-content">', '</article>') rssSpider.save_rss_file()
def main(): repl0 = { 'old': '<img src="extremesex/memder/ng/logo31.gif" width="47" height="25" border="0" alt="New!">', 'new': '', 'reg': False, 'flags': 0 } repl1 = {'old': '</a>P<br>', 'new': 'P</a><br>', 'reg': False, 'flags': 0} repl2 = { 'old': '<BR>\s+</A>', 'new': '</a><br>', 'reg': True, 'flags': re.S | re.I } repl3 = { 'old': '<A target="_blank" href="extremesex/memder/photo/messi1/index.html"></a><br>', 'new': '', 'reg': False, 'flags': 0 } replaces = [repl0, repl1, repl2, repl3] rssSpider = RssSpider(myrss, 'elm-world.xml', charset='CP932') rssSpider.get_list(r'<a.*?href="(.*?)".*?>(.*?)</a>', replaces=replaces, flag=re.S | re.I) rssSpider.get_content('', '') rssSpider.save_rss_file()
def main(): repl0 = {'old': '<img src="extremesex/memder/ng/logo31.gif" width="47" height="25" border="0" alt="New!">', 'new': '', 'reg': False, 'flags': 0} repl1 = {'old': '</a>P<br>', 'new': 'P</a><br>', 'reg': False, 'flags': 0} repl2 = {'old': '<BR>\s+</A>', 'new': '</a><br>', 'reg': True, 'flags': re.S|re.I} repl3 = {'old': '<A target="_blank" href="extremesex/memder/photo/messi1/index.html"></a><br>', 'new': '', 'reg': False, 'flags': 0} replaces = [repl0,repl1,repl2,repl3] rssSpider = RssSpider(myrss, 'elm-world.xml', charset='CP932') rssSpider.get_list(r'<a.*?href="(.*?)".*?>(.*?)</a>', replaces=replaces, flag=re.S|re.I) rssSpider.get_content('', '') rssSpider.save_rss_file()
def main(): rssSpider = RssSpider(myrss, '91ri.xml', '91ri.db') rssSpider.get_list(r'<div class="right-col">\s+<h1><a href="(.*?)" data-no-turbolink="true" target="_blank" title="(.*?)">', flag=re.S) rssSpider.get_content('<article class="single-post">', '</article>') rssSpider.save_rss_file()
def main(): rssSpider = RssSpider(myrss, '8She.xml', '8She.db') rssSpider.get_list(r'<header>.*?<i></i></a><h2><a href="(.*?)" title=".*?">(.*?)</a></h2></header>', flag=re.S) rssSpider.get_content('<article class="article-content">', '</article>') rssSpider.save_rss_file()