Ejemplo n.º 1
0
    def get_drawings(self):
        drawings = []
        h3_list = self.html.xpath('.//h3')
        for h3 in h3_list:
            artist_name_age = h3.xpath('./a')
            if len(artist_name_age) == 0:
                raise Exception('Artist name should be a link')
            artist_name_age = artist_name_age[0].text

            # ARTIST NAME [required]
            if artist_name_age is None:
                raise Exception('Artist name section is empty')

            exp = artist_name_exp.search(artist_name_age)
            if exp is None:
                raise Exception('Artist name is not specified')
            artist_name = exp.group(1).strip()

            # ARTIST AGE [optional]
            exp = artist_age_exp.search(artist_name_age)
            if exp is not None and len(exp.groups()) > 0:
                artist_age = int(exp.group(1))
            else:
                artist_age = None

            image_filename = h3.xpath('./a/@href')[0]

            # PRODUCT TITLE [optional]
            drawing_data = lxml_utils.get_following_tags(h3, 'h3')
            if len(drawing_data) == 0:
                product_title = None
                # raise Exception('Product title not specified')
            else:
                product_title = drawing_data[0].text[1:-1]

            drawings.append((artist_name, artist_age, image_filename, product_title))

        return drawings