예제 #1
0
 def test_element_setattr(self):
     values = [None, 0, 1, '1', 'long string']
     item = RssItem()
     for elem_name in RssItem().elements:
         elem = getattr(item, elem_name)
         for attr in elem.attrs:
             for val in values:
                 setattr(elem, attr, val)
                 self.assertEqual(getattr(elem, attr), val)
예제 #2
0
파일: utils.py 프로젝트: loleg/scrapy_rss
 def __init__(self, *args, **kwargs):
     super(RssTestCase, self).__init__(*args, **kwargs)
     for elem in RssItem().elements.values():
         if isinstance(elem, MultipleElements):
             self.addTypeEqualityFunc((elem.__class__, None), self.assertMultipleRssElementsEqualsToValues)
         else:
             self.addTypeEqualityFunc((elem.__class__, None), self.assertRssElementEqualsToValue)
예제 #3
0
    def test_irregular_access(self):
        me = MultipleElements(CategoryElement)
        with self.assertRaisesRegexp(AttributeError, 'have not been assigned'):
            me.category
        me.add(['first', 'second'])
        with self.assertRaisesRegexp(AttributeError, 'more than one elements'):
            me.category
        with self.assertRaisesRegexp(AttributeError,
                                     'zero or more than one elements'):
            me.category = 'another'

        me.clear()
        me.add('single')
        me.category = 'another'
        self.assertEqual(me.category, 'another')

        item = RssItem()
        with self.assertRaisesRegexp(AttributeError, 'have not been assigned'):
            item.category.category
        item.category = ['first', 'second']
        with self.assertRaisesRegexp(AttributeError, 'more than one elements'):
            item.category.category
        with self.assertRaisesRegexp(AttributeError,
                                     'zero or more than one elements'):
            item.category.category = 'other'

        item.category.clear()
        item.category = 'single'
        item.category.category = 'another'
        self.assertEqual(item.category.category, 'another')
예제 #4
0
 def test_spider_output_handling(self):
     spider = self.MySpider()
     scraper = Scraper(Crawler(spider))
     scraper.open_spider(spider)
     scraper._process_spidermw_output(RssItem(), None, None, None)
     scraper._process_spidermw_output(ExtendableItem(), None, None, None)
     scraper._process_spidermw_output(RssedItem(), None, None, None)
     scraper.close_spider(spider)
예제 #5
0
 def test_item_properties_v1(self, elem, elem_name, value):
     item = RssItem()
     if elem.required_attrs:
         with six.assertRaisesRegex(self, ValueError,
                                    'Could not assign value'):
             setattr(item, elem_name, value)
     else:
         setattr(item, elem_name, value)
         self.assertEqual(getattr(item, elem_name), value)
예제 #6
0
    def test_attributes_uniqueness(self, elem, elem_name, attr, attr_name):
        item1 = RssItem()
        item2 = RssItem()
        attr1 = attr.__class__()
        attr2 = attr.__class__()
        self.assertIsNot(
            attr1,
            attr2,
            msg="Instances of attribute [class '{}'] are identical".format(
                attr.__class__.__name__))

        self.assertIsNot(
            getattr(getattr(item1, elem_name), attr_name.priv_name),
            getattr(getattr(item2, elem_name), attr_name.priv_name),
            msg=
            "Appropriate attributes [class '{}'] of appropriate elements [class '{}'] "
            "of RSS item instances are identical".format(
                attr.__class__.__name__, elem.__class__.__name__))
예제 #7
0
    def test_elements_uniqueness(self, elem, elem_name):
        elem1 = elem.__class__() if not isinstance(
            elem, MultipleElements) else elem.__class__(ItemElement)
        elem2 = elem.__class__() if not isinstance(
            elem, MultipleElements) else elem.__class__(ItemElement)
        self.assertIsNot(
            elem1,
            elem2,
            msg="Instances of element class '{}' are identical".format(
                elem.__class__.__name__))

        item1 = RssItem()
        item2 = RssItem()
        self.assertIsNot(
            getattr(item1, elem_name),
            getattr(item2, elem_name),
            msg=
            "Appropriate elements [class '{}'] of RSS item instances are identical"
            .format(elem.__class__.__name__))
예제 #8
0
 def parse_item(self, response):
     item = RssItem()
     item.title = response.css(
         'meta[property="og:title"]::attr(content)').get()
     item.link = response.url
     item.guid = response.url
     item.pubDate = response.css(".title h6::text").get()
     item.author = "Liberal Victoria"
     item.description = "".join(response.css(".mr-content").extract())
     yield item
예제 #9
0
    def test_item_properties(self):
        item = RssItem()
        for elem_name, elem in item.elements.items():
            for val in (None, 0, '', 1, '1'):
                if elem.required_attrs:
                    with self.assertRaisesRegexp(ValueError,
                                                 'Could not assign value'):
                        setattr(item, elem_name, val)
                else:
                    setattr(item, elem_name, val)
                    self.assertEqual(getattr(item, elem_name), val)

        self.assertEqual(self.item_with_empty_title_only.title,
                         self.empty_text)

        self.assertEqual(self.item_with_empty_description_only.description,
                         self.empty_text)

        self.assertEqual(self.item_with_title_only.title, self.non_empty_title)

        self.assertEqual(self.item_with_description_only.description,
                         self.non_empty_description)

        self.assertEqual(self.item_with_single_category.title,
                         self.non_empty_title)
        self.assertEqual(self.item_with_single_category.category,
                         self.categories[0])

        self.assertEqual(self.item_with_3_categories.title,
                         self.non_empty_title)
        self.assertEqual(self.item_with_2_categories.category,
                         self.categories[:2])

        self.assertEqual(self.item_with_4_categories.title,
                         self.non_empty_title)
        self.assertEqual(self.item_with_3_categories.category,
                         self.categories[:3])

        self.assertEqual(self.item_with_4_categories.title,
                         self.non_empty_title)
        self.assertEqual(self.item_with_4_categories.category,
                         self.categories[:4])

        self.assertEqual(self.item_with_unescaped_text.title,
                         self.unescaped_title)

        self.assertEqual(self.item_with_unescaped_text.description,
                         self.unescaped_description)

        for idx, items in self.items_with_guid.items():
            for item in items:
                self.assertEqual(item.guid, self.guids[idx]['guid'])
                self.assertEqual(item.guid.guid, self.guids[idx]['guid'])
                self.assertEqual(item.guid.isPermaLink,
                                 self.guids[idx]['isPermaLink'])
예제 #10
0
 def parse_item(self, response):
     item = RssItem()
     item.title = response.css("title::text").get().split(" - ",
                                                          1)[-1].strip()
     item.link = response.url
     item.guid = response.url
     item.pubDate = response.css("div.newsCreatedDate::text").get().strip()
     item.author = " & ".join(
         response.css("img.ministersPic::attr(alt)").getall())
     item.description = response.css("div.ms-rtestate-field").get()
     yield item
예제 #11
0
    def test_uniqueness(self):
        for elem_name, elem in RssItem().elements.items():
            elem1 = elem.__class__() if not isinstance(
                elem, MultipleElements) else elem.__class__(ItemElement)
            elem2 = elem.__class__() if not isinstance(
                elem, MultipleElements) else elem.__class__(ItemElement)
            self.assertIsNot(
                elem1,
                elem2,
                msg="Instances of element class '{}' are identical".format(
                    elem.__class__.__name__))

            item1 = RssItem()
            item2 = RssItem()
            self.assertIsNot(
                getattr(item1, elem_name),
                getattr(item2, elem_name),
                msg=
                "Appropriate elements [class '{}'] of RSS item instances are identical"
                .format(elem.__class__.__name__))

            for attr_name, attr in elem.attrs.items():
                attr1 = attr.__class__()
                attr2 = attr.__class__()
                self.assertIsNot(
                    attr1,
                    attr2,
                    msg="Instances of attribute [class '{}'] are identical".
                    format(attr.__class__.__name__))

                self.assertIsNot(
                    getattr(getattr(item1, elem_name),
                            '__{}'.format(attr_name)),
                    getattr(getattr(item2, elem_name),
                            '__{}'.format(attr_name)),
                    msg=
                    "Appropriate attributes [class '{}'] of appropriate elements [class '{}'] "
                    "of RSS item instances are identical".format(
                        attr.__class__.__name__, elem.__class__.__name__))
예제 #12
0
    def __init__(self, *args, **kwargs):
        super(TestMultipleElements, self).__init__(*args, **kwargs)
        self.category_names = [
            '1st category name', '2nd category name', '3rd category name',
            '4th category'
        ]
        non_empty_title = 'Item title'
        self.item_with_single_category = RssItem()
        self.item_with_single_category.title = non_empty_title
        self.item_with_single_category.category = self.category_names[0]

        self.item_with_2_categories = RssItem()
        self.item_with_2_categories.title = non_empty_title
        self.item_with_2_categories.category = self.category_names[:2]

        self.item_with_3_categories = RssItem()
        self.item_with_3_categories.title = non_empty_title
        self.item_with_3_categories.category = self.category_names[:3]

        self.item_with_4_categories = RssItem()
        self.item_with_4_categories.title = non_empty_title
        self.item_with_4_categories.category = self.category_names[:4]
예제 #13
0
 def parse_item(self, response):
     item = RssItem()
     item.title = response.css(
         'meta[name="dcterms.title"]::attr(content)').get()
     item.link = response.url
     item.guid = response.url
     item.pubDate = response.css(
         'meta[name="dcterms.issued"]::attr(content)').get()
     item.author = response.css(
         'meta[name="article.minister"]::attr(content)').get()
     summary = response.css("div.news-detail__summary p").getall()
     body = response.css("div.news-detail__body p").getall()
     item.description = "".join(summary + body)
     return item
예제 #14
0
    def parse_item(self, response):
        item = RssItem()
        item.title = response.css("title::text").get().split(" | ")[0]
        item.link = response.url
        item.guid = response.url
        item.pubDate = response.css(
            'meta[name="dcterms.date"]::attr(content)').get()
        item.description = response.css("div.nsw-wysiwyg-content").get()
        author = response.css(
            "div.standard-header__released_by div::text").getall()
        if author:
            item.author = author[-1].strip()
        else:
            item.author = "NSW Government"

        yield item
예제 #15
0
 def test_item_with_single_elem(self, attr_name, attr, elem_name,
                                elem_kwargs):
     elem_cls_name = "Element0"
     item_cls_name = "Item0"
     elem_cls = type(elem_cls_name, (ItemElement, ), {attr_name: attr})
     elem = elem_cls(**elem_kwargs)
     item_cls = type(item_cls_name, (RssItem, ), {elem_name: elem})
     item = item_cls()
     repr(item)
     if sys.version_info >= (3, 7):  # insertion ordered dict
         default_elems_repr = ("{}={!r}".format(
             name,
             value) for name, value in chain(RssItem().elements.items(), [(
                 elem_name, elem)]))
         assert repr(item) == "{}({})".format(item_cls_name,
                                              ", ".join(default_elems_repr))
예제 #16
0
    def __init__(self, *args, **kwargs):
        super(TestExporting, self).__init__(*args, **kwargs)

        minimal_item = RssItem()
        minimal_item.title = 'Title of minimal item'

        minimal_item2 = RssItem()
        minimal_item2.description = 'Description of minimal item'

        simple_item = RssItem()
        simple_item.title = 'Title of simple item'
        simple_item.description = 'Description of simple item'

        item_with_single_category = RssItem()
        item_with_single_category.title = 'Title of item with single category'
        item_with_single_category.category = 'Category 1'

        item_with_multiple_categories = RssItem()
        item_with_multiple_categories.title = 'Title of item with multiple categories'
        item_with_multiple_categories.category = ['Category 1', 'Category 2']

        item_with_guid = RssItem()
        item_with_guid.title = 'Title of item with guid'
        item_with_guid.guid = 'Identifier'

        item_with_unicode = RssItem()
        item_with_unicode.title = 'Title of item with unicode and special characters'
        item_with_unicode.description = "[Testing «ταБЬℓσ»: 1<2 & 4+1>3, now 20% off!]"

        item_with_enclosure = RssItem()
        item_with_enclosure.title = 'Title of item with enclosure'
        item_with_enclosure.enclosure.url = 'http://example.com/content'
        item_with_enclosure.enclosure.length = 0
        item_with_enclosure.enclosure.type = 'text/plain'

        self.items = {
            'minimal_item': minimal_item,
            'minimal_item2': minimal_item2,
            'simple_item': simple_item,
            'item_with_single_category': item_with_single_category,
            'item_with_multiple_categories': item_with_multiple_categories,
            'item_with_guid': item_with_guid,
            'item_with_unicode': item_with_unicode,
            'item_with_enclosure': item_with_enclosure,
        }
예제 #17
0
    def test_item_validation(self):
        invalid_item = RssItem()
        invalid_item.enclosure.url = 'http://example.com/content'

        with self.assertRaisesRegexp(InvalidRssItemAttributesError,
                                     'required attributes .*? not set'):
            with CrawlerContext(**self.feed_settings) as context:
                context.ipm.process_item(invalid_item, context.spider)

        class NonStandardElement(ItemElement):
            first_attribute = ItemElementAttribute(required=True,
                                                   is_content=True)
            second_attribute = ItemElementAttribute(required=True)

        class NonStandardItem(RssItem):
            element = NonStandardElement()

        invalid_item = NonStandardItem()
        with self.assertRaisesRegexp(InvalidElementValueError,
                                     'Could not assign'):
            invalid_item.element = 'valid value'
        invalid_item.element.first_attribute = 'valid value'

        with self.assertRaisesRegexp(InvalidRssItemAttributesError,
                                     'required attributes .*? not set'):
            with CrawlerContext(**self.feed_settings) as context:
                context.ipm.process_item(invalid_item, context.spider)

        class InvalidSuperItem1(ExtendableItem):
            pass

        class InvalidSuperItem2(ExtendableItem):
            field = scrapy.Field()

        class InvalidSuperItem3(ExtendableItem):
            rss = scrapy.Field()

        for invalid_item_cls in (InvalidSuperItem1, InvalidSuperItem2,
                                 InvalidSuperItem3):
            with self.assertRaisesRegexp(InvalidRssItemError,
                                         "Item must have 'rss'"):
                with CrawlerContext(**self.feed_settings) as context:
                    context.ipm.process_item(invalid_item_cls(),
                                             context.spider)
예제 #18
0
    def test_inner_cls_attr(self):
        item = RssItem()
        item.category = CategoryElement(category=self.category_names[0])
        self.assertEqual(item.category, self.category_names[0])
        for cnt in range(2, 2 + len(self.category_names)):
            item.category = [
                CategoryElement(category=cat_name)
                for cat_name in self.category_names[:cnt]
            ]
            self.assertEqual(item.category, self.category_names[:cnt])

            item.category = CategoryElement(category=self.category_names[0])
            self.assertEqual(item.category, self.category_names[0])

            item.category = self.category_names[:cnt]
            self.assertEqual(item.category, self.category_names[:cnt])

            item.category = self.category_names[0]
            self.assertEqual(item.category, self.category_names[0])
예제 #19
0
 def parse_item(self, response):
     item = RssItem()
     item.title = (
         response.css('meta[name="DCTERMS.title"]::attr(content)').get().strip()
     )
     item.link = response.url
     item.guid = response.url
     item.pubDate = (
         response.css("script::text")
         .re_first(r'"datePublished": ".*"')
         .split(":", 1)[-1]
         .strip(' "')
     )
     author = response.css("p.statement-ministers::text").getall()
     item.author = " & ".join(author)
     description = response.css("div div p").getall()
     cutoff = 2  # publish date & author
     if len(author) > 1:
         cutoff += len(author)
     item.description = "".join(description[cutoff:])
     yield item
예제 #20
0
 def test_item_with_multiple_elems(self, attrs, elems_descr):
     elems_names, elems_kwargs = zip(*elems_descr)
     item_cls_name = "Item0"
     elem_clses = [
         type("Element{}".format(n), (ItemElement, ), dict(attrs))
         for n in range(len(elems_descr))
     ]
     elem_instances = [
         elem_cls(**elems_kwargs[n])
         for n, elem_cls in enumerate(elem_clses)
     ]
     item_cls = type(item_cls_name, (RssItem, ),
                     dict(zip(elems_names, elem_instances)))
     item = item_cls()
     repr(item)
     if sys.version_info >= (3, 7):  # insertion ordered dict
         elems_reprs = (
             "{}={}".format(elem_name, elem)
             for elem_name, elem in chain(RssItem().elements.items(),
                                          zip(elems_names, elem_instances)))
         item_repr = "{}({})".format(item_cls_name, ", ".join(elems_reprs))
         assert repr(item) == item_repr
예제 #21
0
    def __init__(self, *args, **kwargs):
        class NSElement0(ItemElement):
            attr01 = ItemElementAttribute(ns_prefix="prefix01", ns_uri="id01")

        class NSElement1(ItemElement):
            prefix11__attr11 = ItemElementAttribute(ns_uri="id11")
            prefix12__attr12 = ItemElementAttribute(ns_prefix="prefix12",
                                                    ns_uri="id12")

        class NSElement2(ItemElement):
            attr21 = ItemElementAttribute(is_content=True)
            pseudo_prefix22__attr22 = ItemElementAttribute(
                ns_prefix="prefix22", ns_uri="id22")

        class NSElement3(ItemElement):
            attr31 = ItemElementAttribute(is_content=True)
            attr32 = ItemElementAttribute(ns_prefix="prefixa", ns_uri="id32")

        class NSElement4(ItemElement):
            attr41 = ItemElementAttribute()
            prefix42__attr41 = ItemElementAttribute(ns_uri="id42")

        class NSItem0(RssItem):
            elem0 = ItemElement()
            elem1 = NSElement0(ns_prefix="el_prefix1", ns_uri="el_id1")
            el_prefix2__elem2 = NSElement1(ns_uri="el_id2")
            el_prefix3__elem3 = NSElement2(ns_prefix="el_prefix3",
                                           ns_uri="el_id3")
            el_pseudo_prefix4__elem4 = NSElement0(ns_prefix="el_prefix4",
                                                  ns_uri="el_id4")

        class NSItem1(RssItem):
            elem1 = NSElement0(ns_prefix="el_prefix1", ns_uri="el_id1")
            el_prefix__elem2 = NSElement1(ns_uri="el_id2")
            elem3 = NSElement2(ns_prefix="el_prefix", ns_uri="el_id3")
            el_pseudo_prefix4__elem4 = NSElement0(ns_prefix="el_prefix4",
                                                  ns_uri="el_id4")

        class NSItem2(RssItem):
            elem1 = NSElement3(ns_prefix="prefix", ns_uri="el_id1")
            prefix__elem2 = NSElement3(ns_uri="el_id2")
            elem3 = NSElement3(ns_prefix="prefix", ns_uri="el_id3")
            el_pseudo_prefix4__elem4 = NSElement3(ns_prefix="prefix",
                                                  ns_uri="el_id4")

        class NSItem3(RssItem):
            elem1 = NSElement3(ns_uri="el_id1")
            elem2 = NSElement3(ns_uri="el_id2")
            elem3 = NSElement3(ns_prefix="prefix", ns_uri="el_id3")
            el_pseudo_prefix4__elem3 = NSElement3(ns_prefix="prefix2",
                                                  ns_uri="el_id4")
            elem4 = NSElement4()
            elem5 = NSElement4()

        PredefinedItems.NSItem0 = NSItem0
        PredefinedItems.NSItem1 = NSItem1
        PredefinedItems.NSItem2 = NSItem2
        PredefinedItems.NSItem3 = NSItem3

        minimal_item = RssItem()
        minimal_item.title = 'Title of minimal item'

        minimal_item2 = RssItem()
        minimal_item2.description = 'Description of minimal item'

        simple_item = RssItem()
        simple_item.title = 'Title of simple item'
        simple_item.description = 'Description of simple item'

        item_with_single_category = RssItem()
        item_with_single_category.title = 'Title of item with single category'
        item_with_single_category.category = 'Category 1'

        item_with_multiple_categories = RssItem()
        item_with_multiple_categories.title = 'Title of item with multiple categories'
        item_with_multiple_categories.category = ['Category 1', 'Category 2']

        item_with_guid = RssItem()
        item_with_guid.title = 'Title of item with guid'
        item_with_guid.guid = 'Identifier'

        item_with_unicode = RssItem()
        item_with_unicode.title = 'Title of item with unicode and special characters'
        item_with_unicode.description = "[Testing «ταБЬℓσ»: 1<2 & 4+1>3, now 20% off!]"

        item_with_enclosure = RssItem()
        item_with_enclosure.title = 'Title of item with enclosure'
        item_with_enclosure.enclosure.url = 'http://example.com/content'
        item_with_enclosure.enclosure.length = 0
        item_with_enclosure.enclosure.type = 'text/plain'

        item_with_unique_ns = NSItem0()
        item_with_unique_ns.title = "Title of item with unique namespaces"
        item_with_unique_ns.elem1.attr01 = ""
        item_with_unique_ns.el_prefix2__elem2.prefix11__attr11 = 0
        item_with_unique_ns.el_prefix2__elem2.prefix12__attr12 = ""
        item_with_unique_ns.el_prefix3__elem3.attr21 = "value3_21"
        item_with_unique_ns.el_prefix3__elem3.pseudo_prefix22__attr22 = 42
        item_with_unique_ns.el_pseudo_prefix4__elem4.attr01 = ""

        item_with_non_unique_ns = NSItem1()
        item_with_non_unique_ns.title = "Title of item with unique namespaces"
        item_with_non_unique_ns.elem1.attr01 = "-"
        item_with_non_unique_ns.el_prefix__elem2.prefix11__attr11 = -1
        item_with_non_unique_ns.el_prefix__elem2.prefix12__attr12 = "-"
        item_with_non_unique_ns.elem3.attr21 = "yet another value3_21"
        item_with_non_unique_ns.elem3.pseudo_prefix22__attr22 = 4224
        item_with_non_unique_ns.el_pseudo_prefix4__elem4.attr01 = "-"

        item_with_non_unique_ns2 = NSItem1()
        item_with_non_unique_ns2.title = "Title of item with unique namespaces 2"
        item_with_non_unique_ns2.elem1.attr01 = "0"
        item_with_non_unique_ns2.el_prefix__elem2.prefix11__attr11 = -999
        item_with_non_unique_ns2.elem3.attr21 = "value"
        item_with_non_unique_ns2.elem3.pseudo_prefix22__attr22 = 42
        item_with_non_unique_ns2.el_pseudo_prefix4__elem4.attr01 = ""

        item_with_same_ns_prefixes = NSItem2()
        item_with_same_ns_prefixes.title = "Title of item with same namespace prefixes"
        item_with_same_ns_prefixes.elem1.attr31 = "Content value 11ё"
        item_with_same_ns_prefixes.prefix__elem2.attr32 = "Attribute value 22"
        item_with_same_ns_prefixes.elem3.attr31 = "Content value 11"
        item_with_same_ns_prefixes.elem3.attr32 = "Attribute value 32"
        item_with_same_ns_prefixes.el_pseudo_prefix4__elem4.attr32 = ""

        item_with_default_nses = NSItem3()
        item_with_default_nses.title = "Title of item with default namespaces"
        item_with_default_nses.elem1.attr31 = "Content value 11ё"
        item_with_default_nses.elem2.attr32 = "Attribute value 22"
        item_with_default_nses.elem3.attr31 = "Content value 11"
        item_with_default_nses.elem3.attr32 = "Attribute value 32"
        item_with_default_nses.el_pseudo_prefix4__elem3.attr32 = ""
        item_with_default_nses.elem4.attr41 = "A41 b"
        item_with_default_nses.elem4.prefix42__attr41 = "0"

        self.items = {
            'minimal_item': minimal_item,
            'minimal_item2': minimal_item2,
            'simple_item': simple_item,
            'item_with_single_category': item_with_single_category,
            'item_with_multiple_categories': item_with_multiple_categories,
            'item_with_guid': item_with_guid,
            'item_with_unicode': item_with_unicode,
            'item_with_enclosure': item_with_enclosure,
            'item_with_unique_ns': item_with_unique_ns,
            'item_with_non_unique_ns': item_with_non_unique_ns,
            'item_with_same_ns_prefixes': item_with_same_ns_prefixes,
            'item_with_default_nses': item_with_default_nses
        }

        self.ns_items_of_same_cls = [
            ('item_with_non_unique_ns5', NSItem1, item_with_non_unique_ns),
            ('item_with_non_unique_ns4', NSItem1, item_with_non_unique_ns2),
        ]
        self.ns_items = [
            ('item_with_unique_ns2',
             [("el_prefix1", "el_id1"), ("prefix01", "id01"),
              ("el_prefix2", "el_id2"), ("prefix11", "id11"),
              ("prefix12", "id12")], None, item_with_unique_ns),
            ('item_with_unique_ns2',
             (("el_prefix1", "el_id1"), ("prefix01", "id01"),
              ("el_prefix2", "el_id2"), ("prefix11", "id11"),
              ("prefix12", "id12")), tuple(), item_with_unique_ns),
            ('item_with_unique_ns2', {
                "el_prefix1": "el_id1",
                "prefix01": "id01",
                "el_prefix2": "el_id2",
                "prefix11": "id11",
                "prefix12": "id12"
            }, None, item_with_unique_ns),
            ('item_with_unique_ns3', None, NSItem0, item_with_unique_ns),
            ('item_with_unique_ns3', None, 'tests.test_exporter.NSItem0',
             item_with_unique_ns),
            ('item_with_non_unique_ns2', [("el_prefix1", "el_id1"),
                                          ("prefix01", "id01"),
                                          ("prefix11", "id11"),
                                          ("prefix12", "id12"),
                                          ("prefix22", "id22"),
                                          ("el_prefix4", "el_id4")], None,
             item_with_non_unique_ns),
            ('item_with_non_unique_ns3', {
                "el_prefix1": "el_id1",
                "prefix01": "id01",
                "prefix11": "id11",
                "prefix12": "id12",
                "prefix22": "id22"
            }, None, item_with_non_unique_ns),
            ('item_with_non_unique_ns2', None, NSItem1,
             item_with_non_unique_ns),
            ('item_with_non_unique_ns2', None, 'tests.test_exporter.NSItem1',
             item_with_non_unique_ns),
            ('item_with_same_ns_prefixes2', [("prefix", "el_id1"),
                                             ("prefixa", "id32"),
                                             ("unused_prefix", "id000")], None,
             item_with_same_ns_prefixes),
            ('item_with_same_ns_prefixes2', {
                "prefix": "el_id1",
                "prefixa": "id32",
                "unused_prefix": "id000"
            }, None, item_with_same_ns_prefixes),
            ('item_with_same_ns_prefixes3', None, NSItem2,
             item_with_same_ns_prefixes),
            ('item_with_same_ns_prefixes3', None,
             'tests.test_exporter.NSItem2', item_with_same_ns_prefixes),
            ('item_with_default_nses3', {
                'prefixa': 'id32',
                'prefix2': 'el_id4'
            }, None, item_with_default_nses),
            ('item_with_default_nses2', None, 'tests.test_exporter.NSItem3',
             item_with_default_nses),
            ('item_with_default_nses2', None, NSItem3, item_with_default_nses)
        ]
예제 #22
0
 def __init__(self):
     super(SuperItem, self).__init__()
     self.rss = RssItem()
예제 #23
0
class TestSimpleElements(RssTestCase):
    def __init__(self, *args, **kwargs):
        super(TestSimpleElements, self).__init__(*args, **kwargs)

        self.empty_text = ""
        self.non_empty_title = "Non-empty title"
        self.non_empty_description = "Non-empty description"
        self.categories = [
            "first category name", "second category name",
            "third category name", "fourth category name"
        ]
        self.unescaped_title = "<b>Non-empty<br/> title</b>"
        self.unescaped_description = "<b>Non-empty description</b><img src='url'/>"

        self.item_with_empty_title_only = RssItem()
        self.item_with_empty_title_only.title = self.empty_text

        self.item_with_empty_description_only = RssItem()
        self.item_with_empty_description_only.description = self.empty_text

        self.item_with_title_only = RssItem()
        self.item_with_title_only.title = self.non_empty_title

        self.item_with_description_only = RssItem()
        self.item_with_description_only.description = self.non_empty_description

        self.item_with_single_category = RssItem()
        self.item_with_single_category.title = self.non_empty_title
        self.item_with_single_category.category = self.categories[0]

        self.item_with_2_categories = RssItem()
        self.item_with_2_categories.title = self.non_empty_title
        self.item_with_2_categories.category = self.categories[:2]

        self.item_with_3_categories = RssItem()
        self.item_with_3_categories.title = self.non_empty_title
        self.item_with_3_categories.category = self.categories[:3]

        self.item_with_4_categories = RssItem()
        self.item_with_4_categories.title = self.non_empty_title
        self.item_with_4_categories.category = self.categories[:4]

        self.item_with_unescaped_text = RssItem()
        self.item_with_unescaped_text.title = self.unescaped_title
        self.item_with_unescaped_text.description = self.unescaped_description

        self.guids = [
            {
                'guid': 'identifier 1',
                'isPermaLink': False
            },
            {
                'guid': 'identifier 2',
                'isPermaLink': True
            },
        ]

        self.items_with_guid = {0: [], 1: []}

        item_with_guid = RssItem()
        item_with_guid.title = self.non_empty_title
        item_with_guid.guid = self.guids[0]['guid']
        self.items_with_guid[0].append(item_with_guid)

        item_with_guid = RssItem()
        item_with_guid.title = self.non_empty_title
        item_with_guid.guid = self.guids[0]
        self.items_with_guid[0].append(item_with_guid)

        item_with_guid = RssItem()
        item_with_guid.title = self.non_empty_title
        item_with_guid.guid.guid = self.guids[0]['guid']
        self.items_with_guid[0].append(item_with_guid)

        item_with_guid = RssItem()
        item_with_guid.title = self.non_empty_title
        item_with_guid.guid = self.guids[1]
        self.items_with_guid[1].append(item_with_guid)

        item_with_guid = RssItem()
        item_with_guid.title = self.non_empty_title
        item_with_guid.guid = self.guids[1]['guid']
        item_with_guid.guid.isPermaLink = self.guids[1]['isPermaLink']
        self.items_with_guid[1].append(item_with_guid)

        item_with_guid = RssItem()
        item_with_guid.title = self.non_empty_title
        item_with_guid.guid = GuidElement(**self.guids[1])
        self.items_with_guid[1].append(item_with_guid)

    @parameterized.expand((elem, str(elem_name))
                          for elem_name, elem in RssItem().elements.items())
    def test_elements_uniqueness(self, elem, elem_name):
        elem1 = elem.__class__() if not isinstance(
            elem, MultipleElements) else elem.__class__(ItemElement)
        elem2 = elem.__class__() if not isinstance(
            elem, MultipleElements) else elem.__class__(ItemElement)
        self.assertIsNot(
            elem1,
            elem2,
            msg="Instances of element class '{}' are identical".format(
                elem.__class__.__name__))

        item1 = RssItem()
        item2 = RssItem()
        self.assertIsNot(
            getattr(item1, elem_name),
            getattr(item2, elem_name),
            msg=
            "Appropriate elements [class '{}'] of RSS item instances are identical"
            .format(elem.__class__.__name__))

    @parameterized.expand((elem, str(elem_name), attr, attr_name)
                          for elem_name, elem in RssItem().elements.items()
                          for attr_name, attr in elem.attrs.items())
    def test_attributes_uniqueness(self, elem, elem_name, attr, attr_name):
        item1 = RssItem()
        item2 = RssItem()
        attr1 = attr.__class__()
        attr2 = attr.__class__()
        self.assertIsNot(
            attr1,
            attr2,
            msg="Instances of attribute [class '{}'] are identical".format(
                attr.__class__.__name__))

        self.assertIsNot(
            getattr(getattr(item1, elem_name), attr_name.priv_name),
            getattr(getattr(item2, elem_name), attr_name.priv_name),
            msg=
            "Appropriate attributes [class '{}'] of appropriate elements [class '{}'] "
            "of RSS item instances are identical".format(
                attr.__class__.__name__, elem.__class__.__name__))

    @parameterized.expand((elem, str(elem_name), value)
                          for elem_name, elem in RssItem().elements.items()
                          for value in values)
    def test_item_properties_v1(self, elem, elem_name, value):
        item = RssItem()
        if elem.required_attrs:
            with six.assertRaisesRegex(self, ValueError,
                                       'Could not assign value'):
                setattr(item, elem_name, value)
        else:
            setattr(item, elem_name, value)
            self.assertEqual(getattr(item, elem_name), value)

    def test_item_properties_v2(self):
        self.assertEqual(self.item_with_empty_title_only.title,
                         self.empty_text)

        self.assertEqual(self.item_with_empty_description_only.description,
                         self.empty_text)

        self.assertEqual(self.item_with_title_only.title, self.non_empty_title)

        self.assertEqual(self.item_with_description_only.description,
                         self.non_empty_description)

        self.assertEqual(self.item_with_single_category.title,
                         self.non_empty_title)
        self.assertEqual(self.item_with_single_category.category,
                         self.categories[0])

        self.assertEqual(self.item_with_3_categories.title,
                         self.non_empty_title)
        self.assertEqual(self.item_with_2_categories.category,
                         self.categories[:2])

        self.assertEqual(self.item_with_4_categories.title,
                         self.non_empty_title)
        self.assertEqual(self.item_with_3_categories.category,
                         self.categories[:3])

        self.assertEqual(self.item_with_4_categories.title,
                         self.non_empty_title)
        self.assertEqual(self.item_with_4_categories.category,
                         self.categories[:4])

        self.assertEqual(self.item_with_unescaped_text.title,
                         self.unescaped_title)

        self.assertEqual(self.item_with_unescaped_text.description,
                         self.unescaped_description)

        for idx, items in self.items_with_guid.items():
            for item in items:
                self.assertEqual(item.guid, self.guids[idx]['guid'])
                self.assertEqual(item.guid.guid, self.guids[idx]['guid'])
                self.assertEqual(item.guid.isPermaLink,
                                 self.guids[idx]['isPermaLink'])

    @parameterized.expand((elem, ) for elem in RssItem().elements.values())
    def test_element_init_without_args(self, elem):
        elem_cls = elem.__class__
        if elem_cls is MultipleElements:
            elem_cls(ItemElement)
        else:
            elem_cls()

    @parameterized.expand((elem, str(attr), value)
                          for elem in RssItem().elements.values()
                          for attr in elem.attrs for value in values
                          if not isinstance(elem, MultipleElements))
    def test_element_init_with_single_kwarg(self, elem, attr_name, value):
        elem_cls = elem.__class__
        elem_cls(**{attr_name: value})

    @parameterized.expand(
        (elem, str(bad_attr), value) for elem in RssItem().elements.values()
        for bad_attr in chain(('impossible_attr', ),
                              set(attr for elem in RssItem().elements.values()
                                  for attr in elem.attrs) - set(elem.attrs))
        for value in values if not isinstance(elem, MultipleElements))
    def test_element_init_with_bad_kwarg(self, elem, bad_attr_name, value):
        elem_cls = elem.__class__
        with six.assertRaisesRegex(
                self,
                ValueError,
                'supports only the next named arguments',
                msg="Invalid attribute '{}' was passed to '{}' initializer".
                format(bad_attr_name, elem_cls.__name__)):
            elem_cls(**{bad_attr_name: value})

    @parameterized.expand((elem, value)
                          for elem in RssItem().elements.values()
                          for value in values
                          if not isinstance(elem, MultipleElements))
    def test_element_init_content_arg(self, elem, value):
        elem_cls = elem.__class__
        if elem.content_arg:
            el = elem_cls(value)
            self.assertEqual(el, getattr(el, str(el.content_arg)))
            self.assertEqual(el, value)
        else:
            with six.assertRaisesRegex(
                    self,
                    ValueError,
                    'does not support unnamed arguments',
                    msg="Invalid attribute was passed to '{}' initializer "
                    "(element must not have content)".format(
                        elem_cls.__name__)):
                elem_cls(value)

    @parameterized.expand((elem, value1, value2)
                          for elem in RssItem().elements.values()
                          for value1, value2 in zip(values, values)
                          if not isinstance(elem, MultipleElements))
    def test_element_init_with_multiple_args(self, elem, value1, value2):
        elem_cls = elem.__class__
        if elem.content_arg:
            with six.assertRaisesRegex(
                    self,
                    ValueError,
                    'supports only single unnamed argument',
                    msg="Invalid attribute was passed to '{}' initializer "
                    "(element must not have content)".format(
                        elem_cls.__name__)):
                elem_cls(value1, value2)
        else:
            with six.assertRaisesRegex(
                    self,
                    ValueError,
                    'does not support unnamed arguments',
                    msg="Invalid attribute was passed to '{}' initializer "
                    "(element must not have content)".format(
                        elem_cls.__name__)):
                elem_cls(value1, value2)

    @parameterized.expand(
        (str(elem_name), str(attr_name), value)
        for elem_name, elem_descr in RssItem().elements.items()
        for attr_name in elem_descr.attrs for value in values)
    def test_element_setattr(self, elem_name, attr_name, value):
        item = RssItem()
        elem = getattr(item, elem_name)
        setattr(elem, attr_name, value)
        self.assertEqual(getattr(elem, attr_name), value)

    def test_multi_content_element(self):
        with six.assertRaisesRegex(self, ValueError,
                                   r"More than one attributes.*as content"):

            class Element0(ItemElement):
                attr1 = ItemElementAttribute(is_content=True)
                attr2 = ItemElementAttribute(is_content=False)
                attr3 = ItemElementAttribute(is_content=True)
예제 #24
0
 def test_element_setattr(self, elem_name, attr_name, value):
     item = RssItem()
     elem = getattr(item, elem_name)
     setattr(elem, attr_name, value)
     self.assertEqual(getattr(elem, attr_name), value)
예제 #25
0
    def __init__(self, *args, **kwargs):
        super(TestSimpleElements, self).__init__(*args, **kwargs)

        self.empty_text = ""
        self.non_empty_title = "Non-empty title"
        self.non_empty_description = "Non-empty description"
        self.categories = [
            "first category name", "second category name",
            "third category name", "fourth category name"
        ]
        self.unescaped_title = "<b>Non-empty<br/> title</b>"
        self.unescaped_description = "<b>Non-empty description</b><img src='url'/>"

        self.item_with_empty_title_only = RssItem()
        self.item_with_empty_title_only.title = self.empty_text

        self.item_with_empty_description_only = RssItem()
        self.item_with_empty_description_only.description = self.empty_text

        self.item_with_title_only = RssItem()
        self.item_with_title_only.title = self.non_empty_title

        self.item_with_description_only = RssItem()
        self.item_with_description_only.description = self.non_empty_description

        self.item_with_single_category = RssItem()
        self.item_with_single_category.title = self.non_empty_title
        self.item_with_single_category.category = self.categories[0]

        self.item_with_2_categories = RssItem()
        self.item_with_2_categories.title = self.non_empty_title
        self.item_with_2_categories.category = self.categories[:2]

        self.item_with_3_categories = RssItem()
        self.item_with_3_categories.title = self.non_empty_title
        self.item_with_3_categories.category = self.categories[:3]

        self.item_with_4_categories = RssItem()
        self.item_with_4_categories.title = self.non_empty_title
        self.item_with_4_categories.category = self.categories[:4]

        self.item_with_unescaped_text = RssItem()
        self.item_with_unescaped_text.title = self.unescaped_title
        self.item_with_unescaped_text.description = self.unescaped_description

        self.guids = [
            {
                'guid': 'identifier 1',
                'isPermaLink': False
            },
            {
                'guid': 'identifier 2',
                'isPermaLink': True
            },
        ]

        self.items_with_guid = {0: [], 1: []}

        item_with_guid = RssItem()
        item_with_guid.title = self.non_empty_title
        item_with_guid.guid = self.guids[0]['guid']
        self.items_with_guid[0].append(item_with_guid)

        item_with_guid = RssItem()
        item_with_guid.title = self.non_empty_title
        item_with_guid.guid = self.guids[0]
        self.items_with_guid[0].append(item_with_guid)

        item_with_guid = RssItem()
        item_with_guid.title = self.non_empty_title
        item_with_guid.guid.guid = self.guids[0]['guid']
        self.items_with_guid[0].append(item_with_guid)

        item_with_guid = RssItem()
        item_with_guid.title = self.non_empty_title
        item_with_guid.guid = self.guids[1]
        self.items_with_guid[1].append(item_with_guid)

        item_with_guid = RssItem()
        item_with_guid.title = self.non_empty_title
        item_with_guid.guid = self.guids[1]['guid']
        item_with_guid.guid.isPermaLink = self.guids[1]['isPermaLink']
        self.items_with_guid[1].append(item_with_guid)

        item_with_guid = RssItem()
        item_with_guid.title = self.non_empty_title
        item_with_guid.guid = GuidElement(**self.guids[1])
        self.items_with_guid[1].append(item_with_guid)
예제 #26
0
    def test_element_initializer(self):
        all_attrs = set(attr for elem in RssItem().elements.values()
                        for attr in elem.attrs)
        all_attrs.add('impossible_attr')
        for elem in RssItem().elements.values():
            elem_cls = elem.__class__
            if elem_cls == MultipleElements:
                elem_cls(ItemElement)
                continue
            else:
                elem_cls()
            for attr in elem.attrs:
                elem_cls(**{attr: None})
                elem_cls(**{attr: 0})
                elem_cls(**{attr: ''})
                elem_cls(**{attr: 1})
                elem_cls(**{attr: '1'})
            for bad_attr in all_attrs - set(elem.attrs):
                for val in (None, 0, '', 1, '1'):
                    with self.assertRaisesRegexp(
                            ValueError,
                            'supports only the next named arguments',
                            msg=
                            "Invalid attribute '{}' was passed to '{}' initializer"
                            .format(bad_attr, elem_cls.__name__)):
                        elem_cls(**{bad_attr: val})
            for val in (None, 0, '', 1, '1'):
                if elem.content_arg:
                    el = elem_cls(val)
                    self.assertEqual(el, getattr(el, el.content_arg))
                    self.assertEqual(el, val)
                else:
                    with self.assertRaisesRegexp(
                            ValueError,
                            'does not support unnamed arguments',
                            msg=
                            "Invalid attribute was passed to '{}' initializer "
                            "(element must not have content)".format(
                                elem_cls.__name__)):
                        elem_cls(val)

            for val1, val2 in zip((None, 0, '', 1, '1'),
                                  (None, 0, '', 1, '1')):
                if elem.content_arg:
                    with self.assertRaisesRegexp(
                            ValueError,
                            'supports only single unnamed argument',
                            msg=
                            "Invalid attribute was passed to '{}' initializer "
                            "(element must not have content)".format(
                                elem_cls.__name__)):
                        elem_cls(val1, val2)
                else:
                    with self.assertRaisesRegexp(
                            ValueError,
                            'does not support unnamed arguments',
                            msg=
                            "Invalid attribute was passed to '{}' initializer "
                            "(element must not have content)".format(
                                elem_cls.__name__)):
                        elem_cls(val1, val2)