Exemple #1
0
    def crawl(self, pub_date):
        feed = self.parse_feed('http://www.smbc-comics.com/rss.php')
        for entry in feed.for_date(pub_date):
            title = entry.title.replace(
                'Saturday Morning Breakfast Cereal - ', '')

            url_1 = entry.summary.src('img[src*="/comics/"]')

            page = self.parse_page(entry.link)
            url_2 = page.src('#aftercomic img')

            return [CrawlerImage(url_1, title), CrawlerImage(url_2)]
Exemple #2
0
 def crawl(self, pub_date):
     feed = self.parse_feed('http://www.darklegacycomics.com/feed.xml')
     for entry in feed.for_date(pub_date):
         title = entry.title
         page = self.parse_page(entry.link)
         url = page.src('img.comic-image')
         return CrawlerImage(url, title)
Exemple #3
0
 def crawl(self, pub_date):
     page = self.parse_page('http://www.redmeat.com/max-cannon/FreshMeat')
     url = page.src('.comicStrip img')
     title = page.alt('.comicStrip img')
     if pub_date.strftime('%Y-%m-%d') not in url:
         return
     return CrawlerImage(url, title)
Exemple #4
0
 def crawl(self, pub_date):
     feed = self.parse_feed(
         'http://www.phdcomics.com/gradfeed.php')
     for entry in feed.for_date(pub_date):
         url = entry.summary.src('img')
         title = entry.title.split("'")[1]
         return CrawlerImage(url, title)
Exemple #5
0
 def crawl(self, pub_date):
     page_url = 'http://www.gunshowcomic.com/d/%s.html' % (
         pub_date.strftime('%Y%m%d'), )
     page = self.parse_page(page_url)
     urls = page.src('img[src^="http://gunshowcomic.com/comics/"]',
                     allow_multiple=True)
     return [CrawlerImage(url) for url in urls]
Exemple #6
0
 def crawl(self, pub_date):
     feed = self.parse_feed(
         'http://basicinstructions.net/basic-instructions/rss.xml')
     for entry in feed.for_date(pub_date):
         url = entry.summary.src('img[src*="/storage/"][src*=".gif"]')
         title = entry.title
         return CrawlerImage(url, title)
Exemple #7
0
 def crawl(self, pub_date):
     feed = self.parse_feed(
         'http://www.creators.com/comics/liberty-meadows.rss')
     for entry in feed.for_date(pub_date):
         page = self.parse_page(entry.link)
         url = page.src('img[src*="_thumb"]').replace('thumb', 'image')
         return CrawlerImage(url)
Exemple #8
0
 def crawl(self, pub_date):
     feed = self.parse_feed('http://cdn.sheldoncomics.com/rss.xml')
     for entry in feed.for_date(pub_date):
         if 'Comic' not in entry.tags:
             continue
         url = entry.content0.src('img[src*="/strips/"]')
         return CrawlerImage(url)
Exemple #9
0
 def crawl(self, pub_date):
     page_url = 'http://penny-arcade.com/comic/%s' % (
         pub_date.strftime('%Y/%m/%d'), )
     page = self.parse_page(page_url)
     title = page.alt('#comicFrame img')
     url = page.src('#comicFrame img')
     return CrawlerImage(url, title)
Exemple #10
0
 def crawl(self, pub_date):
     feed = self.parse_feed('http://abstrusegoose.com/atomfeed.xml')
     for entry in feed.for_date(pub_date):
         url = entry.summary.src('img[src*="/strips/"]')
         title = entry.title
         text = entry.summary.title('img[src*="/strips/"]')
         return CrawlerImage(url, title, text)
 def crawl(self, pub_date):
     page_url = 'http://kindofnormal.com/wumo/%s' % (
         pub_date.strftime('%Y/%m/%d'))
     page = self.parse_page(page_url)
     url = page.href('link[rel="image_src"]')
     title = page.alt('img[src="%s"]' % url)
     return CrawlerImage(url, title)
Exemple #12
0
 def crawl(self, pub_date):
     feed = self.parse_feed('http://darthsanddroids.net/rss.xml')
     for entry in feed.for_date(pub_date):
         if entry.title.startswith('Episode'):
             url = entry.summary.src('img')
             title = entry.title
             return CrawlerImage(url, title)
Exemple #13
0
 def crawl(self, pub_date):
     feed = self.parse_feed('http://www.goominet.com/unspeakable-vault/'
                            '?type=103&ecorss[clear_cache]=1')
     for entry in feed.for_date(pub_date):
         url = entry.summary.src('img[src*="/tx_cenostripviewer/"]')
         title = entry.title
         return CrawlerImage(url, title)
Exemple #14
0
 def crawl(self, pub_date):
     feed = self.parse_feed('http://sssscomic.com/ssss-feed.xml')
     for entry in feed.all():
         page = self.parse_page(entry.link)
         url = page.src('img.comicnormal')
         title = entry.title.replace('SSSS page', 'Page')
         return CrawlerImage(url, title)
Exemple #15
0
 def crawl(self, pub_date):
     feed = self.parse_feed('http://axecop.com/feed/')
     for entry in feed.for_date(pub_date):
         title = entry.title
         url = entry.summary.src('img[src*="/wp-content/uploads/"]')
         url = url.replace('-150x150', '')
         return CrawlerImage(url, title)
Exemple #16
0
 def crawl(self, pub_date):
     feed = self.parse_feed('http://www.rsspect.com/rss/qwantz.xml')
     for entry in feed.for_date(pub_date):
         url = entry.summary.src('img[src*="/comics/"]')
         title = entry.title
         text = entry.summary.title('img[src*="/comics/"]')
         return CrawlerImage(url, title, text)
Exemple #17
0
 def crawl(self, pub_date):
     feed = self.parse_feed(
         'http://www.myextralife.com/category/comic/feed/')
     for entry in feed.for_date(pub_date):
         url = entry.content0.src('img[src*="/wp-content/"]')
         title = entry.title
         return CrawlerImage(url, title)
Exemple #18
0
 def crawl(self, pub_date):
     feed = self.parse_feed('http://www.blasternation.com/rss.php')
     for entry in feed.for_date(pub_date):
         page = self.parse_page(entry.link)
         url = page.src('img#cc-comic')
         title = entry.title.replace('Blaster Nation - ', '')
         return CrawlerImage(url, title)
Exemple #19
0
 def crawl(self, pub_date):
     feed = self.parse_feed('http://thisishistorictimes.com/feed/')
     for entry in feed.for_date(pub_date):
         page = self.parse_page(entry.link)
         url = page.src('img[src*="/wp-content/uploads/"]')
         title = entry.title
         return CrawlerImage(url, title)
Exemple #20
0
 def crawl(self, pub_date):
     page_url = 'http://www.crfh.net/d2/%s.html' % (
         pub_date.strftime('%Y%m%d'), )
     page = self.parse_page(page_url)
     url = page.src('img[src*="crfh%s"]' % pub_date.strftime('%Y%m%d'))
     url = url.replace('\n', '')
     return CrawlerImage(url)
Exemple #21
0
 def crawl(self, pub_date):
     feed = self.parse_feed('http://www.goblinscomic.com/feed/')
     for entry in feed.for_date(pub_date):
         if 'Comics' not in entry.tags:
             continue
         url = entry.summary.src('img[src*="/comics/"]')
         return CrawlerImage(url)
Exemple #22
0
 def crawl(self, pub_date):
     feed = self.parse_feed(
         'http://www.donthitsave.com/donthitsavefeed.xml')
     for entry in feed.for_date(pub_date):
         url = entry.summary.src('img')
         title = entry.title
         return CrawlerImage(url, title)
Exemple #23
0
 def crawl(self, pub_date):
     feed = self.parse_feed(
         'http://www.little-gamers.com/category/comic/feed')
     for entry in feed.for_date(pub_date):
         url = entry.summary.src('img')
         title = entry.title
         return CrawlerImage(url, title)
Exemple #24
0
 def crawl(self, pub_date):
     feed = self.parse_feed('http://www.tehgladiators.com/rss.xml')
     for entry in feed.for_date(pub_date):
         page = self.parse_page(entry.link)
         url = page.src('img[alt^="Teh Gladiators Webcomic"]')
         title = entry.title
         return CrawlerImage(url, title)
Exemple #25
0
 def crawl(self, pub_date):
     feed = self.parse_feed('http://feeds.feedburner.com/satwcomic')
     for entry in feed.all():
         page = self.parse_page(entry.link)
         url = page.src('img[itemprop="image"]')
         title = entry.title
         text = page.text('span[itemprop="articleBody"]').strip()
         return CrawlerImage(url, title, text)
Exemple #26
0
 def crawl(self, pub_date):
     feed = self.parse_feed('http://www.megatokyo.com/rss/megatokyo.xml')
     for entry in feed.for_date(pub_date):
         if entry.title.startswith('Comic ['):
             title = entry.title.split('"')[1]
             page = self.parse_page(entry.link)
             url = page.src('img[src*="/strips/"]')
             return CrawlerImage(url, title)
Exemple #27
0
 def crawl(self, pub_date):
     feed = self.parse_feed('http://www.savagechickens.com/feed')
     for entry in feed.for_date(pub_date):
         if 'Cartoons' not in entry.tags:
             print 'skipping'
         url = entry.content0.src('img[src*="/wp-content/"]')
         title = entry.title
         return CrawlerImage(url, title)
Exemple #28
0
 def crawl(self, pub_date):
     feed = self.parse_feed('http://www.exocomics.com/feed')
     for entry in feed.for_date(pub_date):
         title = entry.title
         page = self.parse_page(entry.link)
         url = page.src('.comic img')
         text = page.title('.comic img')
         return CrawlerImage(url, title, text)
Exemple #29
0
 def crawl(self, pub_date):
     if pub_date.weekday() == 6:
         url = 'http://picayune.uclick.com/comics/ga/%s.jpg' % (
             pub_date.strftime('%Y/ga%y%m%d'), )
     else:
         url = 'http://images.ucomics.com/comics/ga/%s.gif' % (
             pub_date.strftime('%Y/ga%y%m%d'), )
     return CrawlerImage(url)
Exemple #30
0
 def crawl(self, pub_date):
     feed = self.parse_feed('http://thepunchlineismachismo.com/feed')
     for entry in feed.for_date(pub_date):
         url = entry.summary.src('img[src*="/wp-content/uploads/"]')
         if url is not None:
             url = url.replace('-150x150', '')
             title = entry.title
             return CrawlerImage(url, title)