Ejemplo n.º 1
0
    def parseDrink(self, response):
        """
        Parse function to go grab Drink info from each drink
        Returns Drink Item
        """
        hxs = HtmlXPathSelector(response)
        drink = Drink()

        drink['name'] = hxs.select("//h2[@class='pagetitle']/text()").extract()[0]
        drink['rating'] = None#hxs.select("//meta[@itemprop='ratingValue']/@content").extract()[0]
        drink['num_reviews'] = None#hxs.select("//meta[@itemprop='ratingCount']/@content").extract()[0]

        drink['tags'] = []
        tags = hxs.select("//div[@class='posttags']/a[@rel='tag']")
        for tag in tags:
            drink['tags'].append(tag.select("text()").extract()[0])

        drink['ingredients'] = []
        unit_analyzer = Unit_Analyzer()
        ingredient_strings = hxs.select("//ul[@class='ingredients']/li")
        for ingredient_string in ingredient_strings:
            final_triple = unit_analyzer.get_triple(ingredient_string.select('text()').extract()[0])
            drink['ingredients'].append(final_triple)

        drink['directions'] = hxs.select("//div[@class='entry']/div[3]/p/text()").extract()

        log.msg('Drink retrieved: %s' % drink, level=log.INFO)
        return drink
Ejemplo n.º 2
0
    def parseDrink(self, response):
        """
        Parse function to go grab Drink info from each drink
        Returns Drink Item
        """
        hxs = HtmlXPathSelector(response)
        drink = Drink()

        #Get names and ratings
        drink['name'] = hxs.select("//h1[@class='fn recipe_title']/text()").extract()[0]
        drink['rating'] = hxs.select("//div[@class='ratingsBox rating']/div[1]/div[1]/text()").extract()
        drink['num_reviews'] = hxs.select("//div[@class='ratingsBox rating']//span[@class='count']/text()").extract()

        #If no rating, make None
        if len(drink['rating'][0]) > 4:
            drink['rating'] = None
            drink['num_reviews'] = None

        # Print for Error checking
        #log.msg('name: %s' % drink['name'], level=log.INFO)
        #log.msg('rating: %s' % drink['rating'], level=log.INFO)
        #log.msg('num_reviews: %s' % drink['num_reviews'], level=log.INFO)

        #drink['tags'] = []
        #tags = hxs.select("//div[@class='posttags']/a[@rel='tag']")
        #for tag in tags:
        #    drink['tags'].append(tag.select("text()").extract()[0])

        drink['tags'] = None

        #Get Ingredients. Turn into string that is parsable by unit_analyzer
        drink['ingredients'] = []
        unit_analyzer = Unit_Analyzer()
        ingredient_strings = hxs.select("//div[@class='ingredients']//span[@class='ingredient']")
        for ingredient_string in ingredient_strings:
            full_string = ingredient_string.select(".//span[@class='amount']/text()").extract()[0]
            #log.msg('amount: %s' % full_string, level=log.INFO)
            full_string += " " + ingredient_string.select(".//span[@class='name']//a/text()").extract()[0]
            #log.msg('ingredient: %s' % full_string, level=log.INFO)
            final_triple = unit_analyzer.get_triple(full_string)
            drink['ingredients'].append(final_triple)

        #Directions
        drink['directions'] = hxs.select("//div[@class='RecipeDirections instructions']/text()").extract()

        #log.msg('Drink retrieved: %s' % drink, level=log.INFO)
        return drink
Ejemplo n.º 3
0
    def parseDrink(self, response):
        """
        Parse function to go grab Drink info from each drink
        Returns Drink Item
        """
        hxs = HtmlXPathSelector(response)
        drink = Drink()

        drink['name'] = hxs.select("//div[@id='drinkRecipe']/h2/text()").extract()[0]
        drink['rating'] = None
        drink['num_reviews'] = None
        drink['tags'] = None
        drink['directions'] = hxs.select("//div[@id='drinkRecipe']/p[position()=2]/text()").extract()[0]
        drink['ingredients'] = []
        unit_analyzer = Unit_Analyzer()
        ingredient_strings = hxs.select("//div[@id='drinkRecipe']/ul/li")
        for ingredient_string in ingredient_strings:
            final_triple = unit_analyzer.get_triple(ingredient_string.select('text()').extract()[0] + ingredient_string.select('a/text()').extract()[0])
            drink['ingredients'].append(final_triple)


        #log.msg('Drink retrieved: %s' % drink, level=log.INFO)
        return drink