Esempio n. 1
0
    def test_keeps_non_tag_text_after_dash(self):
        self.assertEqual(
            ketogasm.scrape_title(
                http.TextResponse(url='',
                                  body="""
<h1 class="entry-title">Pumpkin Seed Bark &#x2013; Dark Chocolate and Sea Salt</h1>"""
                                  )),
            u'Pumpkin Seed Bark \u2013 Dark Chocolate and Sea Salt')

        self.assertEqual(
            ketogasm.scrape_title(
                http.TextResponse(url='',
                                  body="""
<h1 class="entry-title">Keto Flatbread Recipe &#x2013; Low Carb, Gluten Free</h1>"""
                                  )), u'Keto Flatbread Recipe')
Esempio n. 2
0
    def test_strips_tags_after_dash(self):
        self.assertEqual(
            ketogasm.scrape_title(
                http.TextResponse(url='',
                                  body="""
<h1 class="entry-title">Spicy Chicken Sausage &#x2013; Low Carb, Gluten-Free</h1>"""
                                  )), u'Spicy Chicken Sausage')
Esempio n. 3
0
    def test_strips_bracketed_text(self):
        self.assertEqual(
            ketogasm.scrape_title(
                http.TextResponse(url='',
                                  body="""
<h1 class="entry-title">Low Carb Moscow Mule &#8211; [Keto, Alcohol, Sugar Free]</h1>"""
                                  )), 'Low Carb Moscow Mule')
Esempio n. 4
0
    def test_scrapes_title_with_no_flavor_text(self):
        self.assertEqual(
            ruled_me.scrape_title(
                http.TextResponse(
                    url='', body="""
<h1>Keto Beef Wellington</h1>
""")), 'Keto Beef Wellington')
Esempio n. 5
0
    def test_strips_tags_after_pipe(self):
        self.assertEqual(
            ketogasm.scrape_title(
                http.TextResponse(url='',
                                  body="""
<h1 class="entry-title">Spanish Cauliflower Rice | Low Carb</h1>""")),
            'Spanish Cauliflower Rice')
    def test_strips_tags_after_pipe(self):
        self.assertEqual(
            low_carb_yum.scrape_title(
                http.TextResponse(url='',
                                  body="""
<meta property="og:title" content="Almond Flour Biscuits - Paleo Low Carb" />"""
                                  )), 'Almond Flour Biscuits')
Esempio n. 7
0
    def test_when_meta_section_does_not_specify_category_raises_exception(self):
        with self.assertRaises(errors.NoRecipeFoundError):
            keto_size_me.scrape_category(
                http.TextResponse(
                    url='https://ketosizeme.com/keto-bulletproof-coffee/',
                    body="""
<meta property="article:section" content="Keto Brands We Love" />
"""))
    def test_strips_trailing_page_title(self):
        self.assertEqual(
            hey_keto_mama.scrape_title(
                http.TextResponse(
                    url='',
                    body="""
<meta property="og:title" content="Cream Cheese &amp; Salami Keto Pinwheels - Hey Keto Mama" />"""
                )), u'Cream Cheese & Salami Keto Pinwheels')
Esempio n. 9
0
    def test_scrapes_image(self):
        self.assertEqual(
            ruled_me.scrape_image(
                http.TextResponse(
                    url='',
                    body="""
 <meta property="og:image" content="https://ruled.me/recipe-image.jpg" />""")),
            'https://ruled.me/recipe-image.jpg')
    def test_scrapes_title_and_removes_flavor_text(self):
        self.assertEqual(
            ketoconnect.scrape_title(
                http.TextResponse(url='',
                                  body="""
<h1 class="entry-title">
  <a href="https://www.ketoconnect.net/recipe/cauliflower-waffles/">Cauliflower Waffles | Bacon and Cheddar!</a>
</h1>""")), 'Cauliflower Waffles')
    def test_scrapes_title_with_no_flavor_text(self):
        self.assertEqual(
            ketoconnect.scrape_title(
                http.TextResponse(url='',
                                  body="""
<h1 class="entry-title">
  <a href="https://www.ketoconnect.net/recipe/keto-butter-chicken/">Keto Butter Chicken</a>
</h1>""")), 'Keto Butter Chicken')
Esempio n. 12
0
    def test_scrapes_hierarchical_category(self):
        self.assertEqual(
            ruled_me.scrape_category(
                http.TextResponse(
                    url='',
                    body="""
<div class="postCategories">
Keto Recipes &gt; <a rel="nofollow" href="https://www.ruled.me/keto-recipes/" title="Dinner">Dinner</a>
</div>""")), 'entree')
    def test_scrapes_title_with_multiple_h1(self):
        self.assertEqual(
            ketoconnect.scrape_title(
                http.TextResponse(url='',
                                  body="""
<h1 class="entry-title">
  <a href="https://www.ketoconnect.net/recipe/cooked-oven-meat/">Cooked Oven Meat</a>
</h1>
<h1>Non-title text</h1>""")), 'Cooked Oven Meat')
Esempio n. 14
0
    def test_scrapes_simple_category(self):
        self.assertEqual(
            ruled_me.scrape_category(
                http.TextResponse(
                    url='',
                    body="""
<div class="postCategories">
&gt; <a rel="nofollow" href="" title="Dinner">Dinner</a>
</div>""")), 'entree')
    def test_scrapes_opengraph_image(self):
        self.assertEqual(
            ketoconnect.scrape_image(
                http.TextResponse(url='',
                                  body="""
<meta
  property="og:image"
  content="https://www.ketoconnect.net/recipe-image.jpg" />
""")), 'https://www.ketoconnect.net/recipe-image.jpg')
Esempio n. 16
0
    def test_strips_tags_after_colon(self):
        self.assertEqual(
            ketogasm.scrape_title(
                http.TextResponse(url='',
                                  body="""
<h1 class="entry-title">Vodka Mojito: Low Carb and Sugar-Free</h1>""")),
            'Vodka Mojito')
        self.assertEqual(
            ketogasm.scrape_title(
                http.TextResponse(url='',
                                  body="""
<h1 class="entry-title">Hot Buttered Rum Recipe: Low Carb, Sugar Free</h1>""")
            ), 'Hot Buttered Rum Recipe')
        self.assertEqual(
            ketogasm.scrape_title(
                http.TextResponse(url='',
                                  body="""
<h1 class="entry-title">Gin Fizz Cocktail Recipe &#8211; Low Carb &#038; Sugar Free!</h1>
""")), 'Gin Fizz Cocktail Recipe')
    def test_scrapes_non_opengraph_image(self):
        self.assertEqual(
            ketoconnect.scrape_image(
                http.TextResponse(url='',
                                  body="""
<div id="tve_editor">
<span class="junk">
<img class="tve_image" alt="" style="width: 400px;" src="https://www.ketoconnect.net/recipe-image.jpg" width="400" height="600" data-attachment-id="9282">
</span>
</div>""")), 'https://www.ketoconnect.net/recipe-image.jpg')
Esempio n. 18
0
    def test_reads_none_category_when_category_not_defined(self):
        self.assertEqual(
            ketogasm.scrape_category(
                http.TextResponse(url='',
                                  body="""
<script type="application/ld+json">
{
   "@context":"http:\/\/schema.org\/",
   "@type":"Recipe",
   "name":"Roasted Pumpkin Seeds Recipe"
}""")), None)
Esempio n. 19
0
    def test_scrapes_reverse_hierarchical_category(self):
        self.assertEqual(
            ruled_me.scrape_category(
                http.TextResponse(
                    url='',
                    body="""
<html>
<h1>Cauliflower Mac & Cheese</h1>
<div class="postCategories">
Side Items &gt; <a rel="nofollow" href="https://www.ruled.me/keto-recipes/side-items/" title="Keto Recipes">Keto Recipes</a>
</div>
</html>""")), 'side')
def parse(metadata, html):
    # Reconstruct the scrapy response from HTML.
    response = http.TextResponse(url=metadata['url'], body=html)

    scraper = _find_scraper(metadata['url'])

    title = titles.canonicalize(scraper.scrape_title(response, metadata))

    ingredients = _parse_ingredients(
        scraper.scrape_ingredients(response, metadata))

    return {
        'url': metadata['url'],
        'title': title,
        'category': scraper.scrape_category(response, metadata),
        'mainImage': scraper.scrape_image(response, metadata),
        'ingredients': ingredients,
        'publishedTime': scraper.scrape_published_time(response, metadata),
    }
 def test_scrapes_non_opengraph_image(self):
     self.assertEqual(
         ketoconnect.scrape_category(
             http.TextResponse(url='', body=''), {
                 'referer': 'https://www.ketoconnect.net/main-dishes/',
             }), 'entree')