def test_element_setattr(self): values = [None, 0, 1, '1', 'long string'] item = RssItem() for elem_name in RssItem().elements: elem = getattr(item, elem_name) for attr in elem.attrs: for val in values: setattr(elem, attr, val) self.assertEqual(getattr(elem, attr), val)
def __init__(self, *args, **kwargs): super(RssTestCase, self).__init__(*args, **kwargs) for elem in RssItem().elements.values(): if isinstance(elem, MultipleElements): self.addTypeEqualityFunc((elem.__class__, None), self.assertMultipleRssElementsEqualsToValues) else: self.addTypeEqualityFunc((elem.__class__, None), self.assertRssElementEqualsToValue)
def test_irregular_access(self): me = MultipleElements(CategoryElement) with self.assertRaisesRegexp(AttributeError, 'have not been assigned'): me.category me.add(['first', 'second']) with self.assertRaisesRegexp(AttributeError, 'more than one elements'): me.category with self.assertRaisesRegexp(AttributeError, 'zero or more than one elements'): me.category = 'another' me.clear() me.add('single') me.category = 'another' self.assertEqual(me.category, 'another') item = RssItem() with self.assertRaisesRegexp(AttributeError, 'have not been assigned'): item.category.category item.category = ['first', 'second'] with self.assertRaisesRegexp(AttributeError, 'more than one elements'): item.category.category with self.assertRaisesRegexp(AttributeError, 'zero or more than one elements'): item.category.category = 'other' item.category.clear() item.category = 'single' item.category.category = 'another' self.assertEqual(item.category.category, 'another')
def test_spider_output_handling(self): spider = self.MySpider() scraper = Scraper(Crawler(spider)) scraper.open_spider(spider) scraper._process_spidermw_output(RssItem(), None, None, None) scraper._process_spidermw_output(ExtendableItem(), None, None, None) scraper._process_spidermw_output(RssedItem(), None, None, None) scraper.close_spider(spider)
def test_item_properties_v1(self, elem, elem_name, value): item = RssItem() if elem.required_attrs: with six.assertRaisesRegex(self, ValueError, 'Could not assign value'): setattr(item, elem_name, value) else: setattr(item, elem_name, value) self.assertEqual(getattr(item, elem_name), value)
def test_attributes_uniqueness(self, elem, elem_name, attr, attr_name): item1 = RssItem() item2 = RssItem() attr1 = attr.__class__() attr2 = attr.__class__() self.assertIsNot( attr1, attr2, msg="Instances of attribute [class '{}'] are identical".format( attr.__class__.__name__)) self.assertIsNot( getattr(getattr(item1, elem_name), attr_name.priv_name), getattr(getattr(item2, elem_name), attr_name.priv_name), msg= "Appropriate attributes [class '{}'] of appropriate elements [class '{}'] " "of RSS item instances are identical".format( attr.__class__.__name__, elem.__class__.__name__))
def test_elements_uniqueness(self, elem, elem_name): elem1 = elem.__class__() if not isinstance( elem, MultipleElements) else elem.__class__(ItemElement) elem2 = elem.__class__() if not isinstance( elem, MultipleElements) else elem.__class__(ItemElement) self.assertIsNot( elem1, elem2, msg="Instances of element class '{}' are identical".format( elem.__class__.__name__)) item1 = RssItem() item2 = RssItem() self.assertIsNot( getattr(item1, elem_name), getattr(item2, elem_name), msg= "Appropriate elements [class '{}'] of RSS item instances are identical" .format(elem.__class__.__name__))
def parse_item(self, response): item = RssItem() item.title = response.css( 'meta[property="og:title"]::attr(content)').get() item.link = response.url item.guid = response.url item.pubDate = response.css(".title h6::text").get() item.author = "Liberal Victoria" item.description = "".join(response.css(".mr-content").extract()) yield item
def test_item_properties(self): item = RssItem() for elem_name, elem in item.elements.items(): for val in (None, 0, '', 1, '1'): if elem.required_attrs: with self.assertRaisesRegexp(ValueError, 'Could not assign value'): setattr(item, elem_name, val) else: setattr(item, elem_name, val) self.assertEqual(getattr(item, elem_name), val) self.assertEqual(self.item_with_empty_title_only.title, self.empty_text) self.assertEqual(self.item_with_empty_description_only.description, self.empty_text) self.assertEqual(self.item_with_title_only.title, self.non_empty_title) self.assertEqual(self.item_with_description_only.description, self.non_empty_description) self.assertEqual(self.item_with_single_category.title, self.non_empty_title) self.assertEqual(self.item_with_single_category.category, self.categories[0]) self.assertEqual(self.item_with_3_categories.title, self.non_empty_title) self.assertEqual(self.item_with_2_categories.category, self.categories[:2]) self.assertEqual(self.item_with_4_categories.title, self.non_empty_title) self.assertEqual(self.item_with_3_categories.category, self.categories[:3]) self.assertEqual(self.item_with_4_categories.title, self.non_empty_title) self.assertEqual(self.item_with_4_categories.category, self.categories[:4]) self.assertEqual(self.item_with_unescaped_text.title, self.unescaped_title) self.assertEqual(self.item_with_unescaped_text.description, self.unescaped_description) for idx, items in self.items_with_guid.items(): for item in items: self.assertEqual(item.guid, self.guids[idx]['guid']) self.assertEqual(item.guid.guid, self.guids[idx]['guid']) self.assertEqual(item.guid.isPermaLink, self.guids[idx]['isPermaLink'])
def parse_item(self, response): item = RssItem() item.title = response.css("title::text").get().split(" - ", 1)[-1].strip() item.link = response.url item.guid = response.url item.pubDate = response.css("div.newsCreatedDate::text").get().strip() item.author = " & ".join( response.css("img.ministersPic::attr(alt)").getall()) item.description = response.css("div.ms-rtestate-field").get() yield item
def test_uniqueness(self): for elem_name, elem in RssItem().elements.items(): elem1 = elem.__class__() if not isinstance( elem, MultipleElements) else elem.__class__(ItemElement) elem2 = elem.__class__() if not isinstance( elem, MultipleElements) else elem.__class__(ItemElement) self.assertIsNot( elem1, elem2, msg="Instances of element class '{}' are identical".format( elem.__class__.__name__)) item1 = RssItem() item2 = RssItem() self.assertIsNot( getattr(item1, elem_name), getattr(item2, elem_name), msg= "Appropriate elements [class '{}'] of RSS item instances are identical" .format(elem.__class__.__name__)) for attr_name, attr in elem.attrs.items(): attr1 = attr.__class__() attr2 = attr.__class__() self.assertIsNot( attr1, attr2, msg="Instances of attribute [class '{}'] are identical". format(attr.__class__.__name__)) self.assertIsNot( getattr(getattr(item1, elem_name), '__{}'.format(attr_name)), getattr(getattr(item2, elem_name), '__{}'.format(attr_name)), msg= "Appropriate attributes [class '{}'] of appropriate elements [class '{}'] " "of RSS item instances are identical".format( attr.__class__.__name__, elem.__class__.__name__))
def __init__(self, *args, **kwargs): super(TestMultipleElements, self).__init__(*args, **kwargs) self.category_names = [ '1st category name', '2nd category name', '3rd category name', '4th category' ] non_empty_title = 'Item title' self.item_with_single_category = RssItem() self.item_with_single_category.title = non_empty_title self.item_with_single_category.category = self.category_names[0] self.item_with_2_categories = RssItem() self.item_with_2_categories.title = non_empty_title self.item_with_2_categories.category = self.category_names[:2] self.item_with_3_categories = RssItem() self.item_with_3_categories.title = non_empty_title self.item_with_3_categories.category = self.category_names[:3] self.item_with_4_categories = RssItem() self.item_with_4_categories.title = non_empty_title self.item_with_4_categories.category = self.category_names[:4]
def parse_item(self, response): item = RssItem() item.title = response.css( 'meta[name="dcterms.title"]::attr(content)').get() item.link = response.url item.guid = response.url item.pubDate = response.css( 'meta[name="dcterms.issued"]::attr(content)').get() item.author = response.css( 'meta[name="article.minister"]::attr(content)').get() summary = response.css("div.news-detail__summary p").getall() body = response.css("div.news-detail__body p").getall() item.description = "".join(summary + body) return item
def parse_item(self, response): item = RssItem() item.title = response.css("title::text").get().split(" | ")[0] item.link = response.url item.guid = response.url item.pubDate = response.css( 'meta[name="dcterms.date"]::attr(content)').get() item.description = response.css("div.nsw-wysiwyg-content").get() author = response.css( "div.standard-header__released_by div::text").getall() if author: item.author = author[-1].strip() else: item.author = "NSW Government" yield item
def test_item_with_single_elem(self, attr_name, attr, elem_name, elem_kwargs): elem_cls_name = "Element0" item_cls_name = "Item0" elem_cls = type(elem_cls_name, (ItemElement, ), {attr_name: attr}) elem = elem_cls(**elem_kwargs) item_cls = type(item_cls_name, (RssItem, ), {elem_name: elem}) item = item_cls() repr(item) if sys.version_info >= (3, 7): # insertion ordered dict default_elems_repr = ("{}={!r}".format( name, value) for name, value in chain(RssItem().elements.items(), [( elem_name, elem)])) assert repr(item) == "{}({})".format(item_cls_name, ", ".join(default_elems_repr))
def __init__(self, *args, **kwargs): super(TestExporting, self).__init__(*args, **kwargs) minimal_item = RssItem() minimal_item.title = 'Title of minimal item' minimal_item2 = RssItem() minimal_item2.description = 'Description of minimal item' simple_item = RssItem() simple_item.title = 'Title of simple item' simple_item.description = 'Description of simple item' item_with_single_category = RssItem() item_with_single_category.title = 'Title of item with single category' item_with_single_category.category = 'Category 1' item_with_multiple_categories = RssItem() item_with_multiple_categories.title = 'Title of item with multiple categories' item_with_multiple_categories.category = ['Category 1', 'Category 2'] item_with_guid = RssItem() item_with_guid.title = 'Title of item with guid' item_with_guid.guid = 'Identifier' item_with_unicode = RssItem() item_with_unicode.title = 'Title of item with unicode and special characters' item_with_unicode.description = "[Testing «ταБЬℓσ»: 1<2 & 4+1>3, now 20% off!]" item_with_enclosure = RssItem() item_with_enclosure.title = 'Title of item with enclosure' item_with_enclosure.enclosure.url = 'http://example.com/content' item_with_enclosure.enclosure.length = 0 item_with_enclosure.enclosure.type = 'text/plain' self.items = { 'minimal_item': minimal_item, 'minimal_item2': minimal_item2, 'simple_item': simple_item, 'item_with_single_category': item_with_single_category, 'item_with_multiple_categories': item_with_multiple_categories, 'item_with_guid': item_with_guid, 'item_with_unicode': item_with_unicode, 'item_with_enclosure': item_with_enclosure, }
def test_item_validation(self): invalid_item = RssItem() invalid_item.enclosure.url = 'http://example.com/content' with self.assertRaisesRegexp(InvalidRssItemAttributesError, 'required attributes .*? not set'): with CrawlerContext(**self.feed_settings) as context: context.ipm.process_item(invalid_item, context.spider) class NonStandardElement(ItemElement): first_attribute = ItemElementAttribute(required=True, is_content=True) second_attribute = ItemElementAttribute(required=True) class NonStandardItem(RssItem): element = NonStandardElement() invalid_item = NonStandardItem() with self.assertRaisesRegexp(InvalidElementValueError, 'Could not assign'): invalid_item.element = 'valid value' invalid_item.element.first_attribute = 'valid value' with self.assertRaisesRegexp(InvalidRssItemAttributesError, 'required attributes .*? not set'): with CrawlerContext(**self.feed_settings) as context: context.ipm.process_item(invalid_item, context.spider) class InvalidSuperItem1(ExtendableItem): pass class InvalidSuperItem2(ExtendableItem): field = scrapy.Field() class InvalidSuperItem3(ExtendableItem): rss = scrapy.Field() for invalid_item_cls in (InvalidSuperItem1, InvalidSuperItem2, InvalidSuperItem3): with self.assertRaisesRegexp(InvalidRssItemError, "Item must have 'rss'"): with CrawlerContext(**self.feed_settings) as context: context.ipm.process_item(invalid_item_cls(), context.spider)
def test_inner_cls_attr(self): item = RssItem() item.category = CategoryElement(category=self.category_names[0]) self.assertEqual(item.category, self.category_names[0]) for cnt in range(2, 2 + len(self.category_names)): item.category = [ CategoryElement(category=cat_name) for cat_name in self.category_names[:cnt] ] self.assertEqual(item.category, self.category_names[:cnt]) item.category = CategoryElement(category=self.category_names[0]) self.assertEqual(item.category, self.category_names[0]) item.category = self.category_names[:cnt] self.assertEqual(item.category, self.category_names[:cnt]) item.category = self.category_names[0] self.assertEqual(item.category, self.category_names[0])
def parse_item(self, response): item = RssItem() item.title = ( response.css('meta[name="DCTERMS.title"]::attr(content)').get().strip() ) item.link = response.url item.guid = response.url item.pubDate = ( response.css("script::text") .re_first(r'"datePublished": ".*"') .split(":", 1)[-1] .strip(' "') ) author = response.css("p.statement-ministers::text").getall() item.author = " & ".join(author) description = response.css("div div p").getall() cutoff = 2 # publish date & author if len(author) > 1: cutoff += len(author) item.description = "".join(description[cutoff:]) yield item
def test_item_with_multiple_elems(self, attrs, elems_descr): elems_names, elems_kwargs = zip(*elems_descr) item_cls_name = "Item0" elem_clses = [ type("Element{}".format(n), (ItemElement, ), dict(attrs)) for n in range(len(elems_descr)) ] elem_instances = [ elem_cls(**elems_kwargs[n]) for n, elem_cls in enumerate(elem_clses) ] item_cls = type(item_cls_name, (RssItem, ), dict(zip(elems_names, elem_instances))) item = item_cls() repr(item) if sys.version_info >= (3, 7): # insertion ordered dict elems_reprs = ( "{}={}".format(elem_name, elem) for elem_name, elem in chain(RssItem().elements.items(), zip(elems_names, elem_instances))) item_repr = "{}({})".format(item_cls_name, ", ".join(elems_reprs)) assert repr(item) == item_repr
def __init__(self, *args, **kwargs): class NSElement0(ItemElement): attr01 = ItemElementAttribute(ns_prefix="prefix01", ns_uri="id01") class NSElement1(ItemElement): prefix11__attr11 = ItemElementAttribute(ns_uri="id11") prefix12__attr12 = ItemElementAttribute(ns_prefix="prefix12", ns_uri="id12") class NSElement2(ItemElement): attr21 = ItemElementAttribute(is_content=True) pseudo_prefix22__attr22 = ItemElementAttribute( ns_prefix="prefix22", ns_uri="id22") class NSElement3(ItemElement): attr31 = ItemElementAttribute(is_content=True) attr32 = ItemElementAttribute(ns_prefix="prefixa", ns_uri="id32") class NSElement4(ItemElement): attr41 = ItemElementAttribute() prefix42__attr41 = ItemElementAttribute(ns_uri="id42") class NSItem0(RssItem): elem0 = ItemElement() elem1 = NSElement0(ns_prefix="el_prefix1", ns_uri="el_id1") el_prefix2__elem2 = NSElement1(ns_uri="el_id2") el_prefix3__elem3 = NSElement2(ns_prefix="el_prefix3", ns_uri="el_id3") el_pseudo_prefix4__elem4 = NSElement0(ns_prefix="el_prefix4", ns_uri="el_id4") class NSItem1(RssItem): elem1 = NSElement0(ns_prefix="el_prefix1", ns_uri="el_id1") el_prefix__elem2 = NSElement1(ns_uri="el_id2") elem3 = NSElement2(ns_prefix="el_prefix", ns_uri="el_id3") el_pseudo_prefix4__elem4 = NSElement0(ns_prefix="el_prefix4", ns_uri="el_id4") class NSItem2(RssItem): elem1 = NSElement3(ns_prefix="prefix", ns_uri="el_id1") prefix__elem2 = NSElement3(ns_uri="el_id2") elem3 = NSElement3(ns_prefix="prefix", ns_uri="el_id3") el_pseudo_prefix4__elem4 = NSElement3(ns_prefix="prefix", ns_uri="el_id4") class NSItem3(RssItem): elem1 = NSElement3(ns_uri="el_id1") elem2 = NSElement3(ns_uri="el_id2") elem3 = NSElement3(ns_prefix="prefix", ns_uri="el_id3") el_pseudo_prefix4__elem3 = NSElement3(ns_prefix="prefix2", ns_uri="el_id4") elem4 = NSElement4() elem5 = NSElement4() PredefinedItems.NSItem0 = NSItem0 PredefinedItems.NSItem1 = NSItem1 PredefinedItems.NSItem2 = NSItem2 PredefinedItems.NSItem3 = NSItem3 minimal_item = RssItem() minimal_item.title = 'Title of minimal item' minimal_item2 = RssItem() minimal_item2.description = 'Description of minimal item' simple_item = RssItem() simple_item.title = 'Title of simple item' simple_item.description = 'Description of simple item' item_with_single_category = RssItem() item_with_single_category.title = 'Title of item with single category' item_with_single_category.category = 'Category 1' item_with_multiple_categories = RssItem() item_with_multiple_categories.title = 'Title of item with multiple categories' item_with_multiple_categories.category = ['Category 1', 'Category 2'] item_with_guid = RssItem() item_with_guid.title = 'Title of item with guid' item_with_guid.guid = 'Identifier' item_with_unicode = RssItem() item_with_unicode.title = 'Title of item with unicode and special characters' item_with_unicode.description = "[Testing «ταБЬℓσ»: 1<2 & 4+1>3, now 20% off!]" item_with_enclosure = RssItem() item_with_enclosure.title = 'Title of item with enclosure' item_with_enclosure.enclosure.url = 'http://example.com/content' item_with_enclosure.enclosure.length = 0 item_with_enclosure.enclosure.type = 'text/plain' item_with_unique_ns = NSItem0() item_with_unique_ns.title = "Title of item with unique namespaces" item_with_unique_ns.elem1.attr01 = "" item_with_unique_ns.el_prefix2__elem2.prefix11__attr11 = 0 item_with_unique_ns.el_prefix2__elem2.prefix12__attr12 = "" item_with_unique_ns.el_prefix3__elem3.attr21 = "value3_21" item_with_unique_ns.el_prefix3__elem3.pseudo_prefix22__attr22 = 42 item_with_unique_ns.el_pseudo_prefix4__elem4.attr01 = "" item_with_non_unique_ns = NSItem1() item_with_non_unique_ns.title = "Title of item with unique namespaces" item_with_non_unique_ns.elem1.attr01 = "-" item_with_non_unique_ns.el_prefix__elem2.prefix11__attr11 = -1 item_with_non_unique_ns.el_prefix__elem2.prefix12__attr12 = "-" item_with_non_unique_ns.elem3.attr21 = "yet another value3_21" item_with_non_unique_ns.elem3.pseudo_prefix22__attr22 = 4224 item_with_non_unique_ns.el_pseudo_prefix4__elem4.attr01 = "-" item_with_non_unique_ns2 = NSItem1() item_with_non_unique_ns2.title = "Title of item with unique namespaces 2" item_with_non_unique_ns2.elem1.attr01 = "0" item_with_non_unique_ns2.el_prefix__elem2.prefix11__attr11 = -999 item_with_non_unique_ns2.elem3.attr21 = "value" item_with_non_unique_ns2.elem3.pseudo_prefix22__attr22 = 42 item_with_non_unique_ns2.el_pseudo_prefix4__elem4.attr01 = "" item_with_same_ns_prefixes = NSItem2() item_with_same_ns_prefixes.title = "Title of item with same namespace prefixes" item_with_same_ns_prefixes.elem1.attr31 = "Content value 11ё" item_with_same_ns_prefixes.prefix__elem2.attr32 = "Attribute value 22" item_with_same_ns_prefixes.elem3.attr31 = "Content value 11" item_with_same_ns_prefixes.elem3.attr32 = "Attribute value 32" item_with_same_ns_prefixes.el_pseudo_prefix4__elem4.attr32 = "" item_with_default_nses = NSItem3() item_with_default_nses.title = "Title of item with default namespaces" item_with_default_nses.elem1.attr31 = "Content value 11ё" item_with_default_nses.elem2.attr32 = "Attribute value 22" item_with_default_nses.elem3.attr31 = "Content value 11" item_with_default_nses.elem3.attr32 = "Attribute value 32" item_with_default_nses.el_pseudo_prefix4__elem3.attr32 = "" item_with_default_nses.elem4.attr41 = "A41 b" item_with_default_nses.elem4.prefix42__attr41 = "0" self.items = { 'minimal_item': minimal_item, 'minimal_item2': minimal_item2, 'simple_item': simple_item, 'item_with_single_category': item_with_single_category, 'item_with_multiple_categories': item_with_multiple_categories, 'item_with_guid': item_with_guid, 'item_with_unicode': item_with_unicode, 'item_with_enclosure': item_with_enclosure, 'item_with_unique_ns': item_with_unique_ns, 'item_with_non_unique_ns': item_with_non_unique_ns, 'item_with_same_ns_prefixes': item_with_same_ns_prefixes, 'item_with_default_nses': item_with_default_nses } self.ns_items_of_same_cls = [ ('item_with_non_unique_ns5', NSItem1, item_with_non_unique_ns), ('item_with_non_unique_ns4', NSItem1, item_with_non_unique_ns2), ] self.ns_items = [ ('item_with_unique_ns2', [("el_prefix1", "el_id1"), ("prefix01", "id01"), ("el_prefix2", "el_id2"), ("prefix11", "id11"), ("prefix12", "id12")], None, item_with_unique_ns), ('item_with_unique_ns2', (("el_prefix1", "el_id1"), ("prefix01", "id01"), ("el_prefix2", "el_id2"), ("prefix11", "id11"), ("prefix12", "id12")), tuple(), item_with_unique_ns), ('item_with_unique_ns2', { "el_prefix1": "el_id1", "prefix01": "id01", "el_prefix2": "el_id2", "prefix11": "id11", "prefix12": "id12" }, None, item_with_unique_ns), ('item_with_unique_ns3', None, NSItem0, item_with_unique_ns), ('item_with_unique_ns3', None, 'tests.test_exporter.NSItem0', item_with_unique_ns), ('item_with_non_unique_ns2', [("el_prefix1", "el_id1"), ("prefix01", "id01"), ("prefix11", "id11"), ("prefix12", "id12"), ("prefix22", "id22"), ("el_prefix4", "el_id4")], None, item_with_non_unique_ns), ('item_with_non_unique_ns3', { "el_prefix1": "el_id1", "prefix01": "id01", "prefix11": "id11", "prefix12": "id12", "prefix22": "id22" }, None, item_with_non_unique_ns), ('item_with_non_unique_ns2', None, NSItem1, item_with_non_unique_ns), ('item_with_non_unique_ns2', None, 'tests.test_exporter.NSItem1', item_with_non_unique_ns), ('item_with_same_ns_prefixes2', [("prefix", "el_id1"), ("prefixa", "id32"), ("unused_prefix", "id000")], None, item_with_same_ns_prefixes), ('item_with_same_ns_prefixes2', { "prefix": "el_id1", "prefixa": "id32", "unused_prefix": "id000" }, None, item_with_same_ns_prefixes), ('item_with_same_ns_prefixes3', None, NSItem2, item_with_same_ns_prefixes), ('item_with_same_ns_prefixes3', None, 'tests.test_exporter.NSItem2', item_with_same_ns_prefixes), ('item_with_default_nses3', { 'prefixa': 'id32', 'prefix2': 'el_id4' }, None, item_with_default_nses), ('item_with_default_nses2', None, 'tests.test_exporter.NSItem3', item_with_default_nses), ('item_with_default_nses2', None, NSItem3, item_with_default_nses) ]
def __init__(self): super(SuperItem, self).__init__() self.rss = RssItem()
class TestSimpleElements(RssTestCase): def __init__(self, *args, **kwargs): super(TestSimpleElements, self).__init__(*args, **kwargs) self.empty_text = "" self.non_empty_title = "Non-empty title" self.non_empty_description = "Non-empty description" self.categories = [ "first category name", "second category name", "third category name", "fourth category name" ] self.unescaped_title = "<b>Non-empty<br/> title</b>" self.unescaped_description = "<b>Non-empty description</b><img src='url'/>" self.item_with_empty_title_only = RssItem() self.item_with_empty_title_only.title = self.empty_text self.item_with_empty_description_only = RssItem() self.item_with_empty_description_only.description = self.empty_text self.item_with_title_only = RssItem() self.item_with_title_only.title = self.non_empty_title self.item_with_description_only = RssItem() self.item_with_description_only.description = self.non_empty_description self.item_with_single_category = RssItem() self.item_with_single_category.title = self.non_empty_title self.item_with_single_category.category = self.categories[0] self.item_with_2_categories = RssItem() self.item_with_2_categories.title = self.non_empty_title self.item_with_2_categories.category = self.categories[:2] self.item_with_3_categories = RssItem() self.item_with_3_categories.title = self.non_empty_title self.item_with_3_categories.category = self.categories[:3] self.item_with_4_categories = RssItem() self.item_with_4_categories.title = self.non_empty_title self.item_with_4_categories.category = self.categories[:4] self.item_with_unescaped_text = RssItem() self.item_with_unescaped_text.title = self.unescaped_title self.item_with_unescaped_text.description = self.unescaped_description self.guids = [ { 'guid': 'identifier 1', 'isPermaLink': False }, { 'guid': 'identifier 2', 'isPermaLink': True }, ] self.items_with_guid = {0: [], 1: []} item_with_guid = RssItem() item_with_guid.title = self.non_empty_title item_with_guid.guid = self.guids[0]['guid'] self.items_with_guid[0].append(item_with_guid) item_with_guid = RssItem() item_with_guid.title = self.non_empty_title item_with_guid.guid = self.guids[0] self.items_with_guid[0].append(item_with_guid) item_with_guid = RssItem() item_with_guid.title = self.non_empty_title item_with_guid.guid.guid = self.guids[0]['guid'] self.items_with_guid[0].append(item_with_guid) item_with_guid = RssItem() item_with_guid.title = self.non_empty_title item_with_guid.guid = self.guids[1] self.items_with_guid[1].append(item_with_guid) item_with_guid = RssItem() item_with_guid.title = self.non_empty_title item_with_guid.guid = self.guids[1]['guid'] item_with_guid.guid.isPermaLink = self.guids[1]['isPermaLink'] self.items_with_guid[1].append(item_with_guid) item_with_guid = RssItem() item_with_guid.title = self.non_empty_title item_with_guid.guid = GuidElement(**self.guids[1]) self.items_with_guid[1].append(item_with_guid) @parameterized.expand((elem, str(elem_name)) for elem_name, elem in RssItem().elements.items()) def test_elements_uniqueness(self, elem, elem_name): elem1 = elem.__class__() if not isinstance( elem, MultipleElements) else elem.__class__(ItemElement) elem2 = elem.__class__() if not isinstance( elem, MultipleElements) else elem.__class__(ItemElement) self.assertIsNot( elem1, elem2, msg="Instances of element class '{}' are identical".format( elem.__class__.__name__)) item1 = RssItem() item2 = RssItem() self.assertIsNot( getattr(item1, elem_name), getattr(item2, elem_name), msg= "Appropriate elements [class '{}'] of RSS item instances are identical" .format(elem.__class__.__name__)) @parameterized.expand((elem, str(elem_name), attr, attr_name) for elem_name, elem in RssItem().elements.items() for attr_name, attr in elem.attrs.items()) def test_attributes_uniqueness(self, elem, elem_name, attr, attr_name): item1 = RssItem() item2 = RssItem() attr1 = attr.__class__() attr2 = attr.__class__() self.assertIsNot( attr1, attr2, msg="Instances of attribute [class '{}'] are identical".format( attr.__class__.__name__)) self.assertIsNot( getattr(getattr(item1, elem_name), attr_name.priv_name), getattr(getattr(item2, elem_name), attr_name.priv_name), msg= "Appropriate attributes [class '{}'] of appropriate elements [class '{}'] " "of RSS item instances are identical".format( attr.__class__.__name__, elem.__class__.__name__)) @parameterized.expand((elem, str(elem_name), value) for elem_name, elem in RssItem().elements.items() for value in values) def test_item_properties_v1(self, elem, elem_name, value): item = RssItem() if elem.required_attrs: with six.assertRaisesRegex(self, ValueError, 'Could not assign value'): setattr(item, elem_name, value) else: setattr(item, elem_name, value) self.assertEqual(getattr(item, elem_name), value) def test_item_properties_v2(self): self.assertEqual(self.item_with_empty_title_only.title, self.empty_text) self.assertEqual(self.item_with_empty_description_only.description, self.empty_text) self.assertEqual(self.item_with_title_only.title, self.non_empty_title) self.assertEqual(self.item_with_description_only.description, self.non_empty_description) self.assertEqual(self.item_with_single_category.title, self.non_empty_title) self.assertEqual(self.item_with_single_category.category, self.categories[0]) self.assertEqual(self.item_with_3_categories.title, self.non_empty_title) self.assertEqual(self.item_with_2_categories.category, self.categories[:2]) self.assertEqual(self.item_with_4_categories.title, self.non_empty_title) self.assertEqual(self.item_with_3_categories.category, self.categories[:3]) self.assertEqual(self.item_with_4_categories.title, self.non_empty_title) self.assertEqual(self.item_with_4_categories.category, self.categories[:4]) self.assertEqual(self.item_with_unescaped_text.title, self.unescaped_title) self.assertEqual(self.item_with_unescaped_text.description, self.unescaped_description) for idx, items in self.items_with_guid.items(): for item in items: self.assertEqual(item.guid, self.guids[idx]['guid']) self.assertEqual(item.guid.guid, self.guids[idx]['guid']) self.assertEqual(item.guid.isPermaLink, self.guids[idx]['isPermaLink']) @parameterized.expand((elem, ) for elem in RssItem().elements.values()) def test_element_init_without_args(self, elem): elem_cls = elem.__class__ if elem_cls is MultipleElements: elem_cls(ItemElement) else: elem_cls() @parameterized.expand((elem, str(attr), value) for elem in RssItem().elements.values() for attr in elem.attrs for value in values if not isinstance(elem, MultipleElements)) def test_element_init_with_single_kwarg(self, elem, attr_name, value): elem_cls = elem.__class__ elem_cls(**{attr_name: value}) @parameterized.expand( (elem, str(bad_attr), value) for elem in RssItem().elements.values() for bad_attr in chain(('impossible_attr', ), set(attr for elem in RssItem().elements.values() for attr in elem.attrs) - set(elem.attrs)) for value in values if not isinstance(elem, MultipleElements)) def test_element_init_with_bad_kwarg(self, elem, bad_attr_name, value): elem_cls = elem.__class__ with six.assertRaisesRegex( self, ValueError, 'supports only the next named arguments', msg="Invalid attribute '{}' was passed to '{}' initializer". format(bad_attr_name, elem_cls.__name__)): elem_cls(**{bad_attr_name: value}) @parameterized.expand((elem, value) for elem in RssItem().elements.values() for value in values if not isinstance(elem, MultipleElements)) def test_element_init_content_arg(self, elem, value): elem_cls = elem.__class__ if elem.content_arg: el = elem_cls(value) self.assertEqual(el, getattr(el, str(el.content_arg))) self.assertEqual(el, value) else: with six.assertRaisesRegex( self, ValueError, 'does not support unnamed arguments', msg="Invalid attribute was passed to '{}' initializer " "(element must not have content)".format( elem_cls.__name__)): elem_cls(value) @parameterized.expand((elem, value1, value2) for elem in RssItem().elements.values() for value1, value2 in zip(values, values) if not isinstance(elem, MultipleElements)) def test_element_init_with_multiple_args(self, elem, value1, value2): elem_cls = elem.__class__ if elem.content_arg: with six.assertRaisesRegex( self, ValueError, 'supports only single unnamed argument', msg="Invalid attribute was passed to '{}' initializer " "(element must not have content)".format( elem_cls.__name__)): elem_cls(value1, value2) else: with six.assertRaisesRegex( self, ValueError, 'does not support unnamed arguments', msg="Invalid attribute was passed to '{}' initializer " "(element must not have content)".format( elem_cls.__name__)): elem_cls(value1, value2) @parameterized.expand( (str(elem_name), str(attr_name), value) for elem_name, elem_descr in RssItem().elements.items() for attr_name in elem_descr.attrs for value in values) def test_element_setattr(self, elem_name, attr_name, value): item = RssItem() elem = getattr(item, elem_name) setattr(elem, attr_name, value) self.assertEqual(getattr(elem, attr_name), value) def test_multi_content_element(self): with six.assertRaisesRegex(self, ValueError, r"More than one attributes.*as content"): class Element0(ItemElement): attr1 = ItemElementAttribute(is_content=True) attr2 = ItemElementAttribute(is_content=False) attr3 = ItemElementAttribute(is_content=True)
def test_element_setattr(self, elem_name, attr_name, value): item = RssItem() elem = getattr(item, elem_name) setattr(elem, attr_name, value) self.assertEqual(getattr(elem, attr_name), value)
def __init__(self, *args, **kwargs): super(TestSimpleElements, self).__init__(*args, **kwargs) self.empty_text = "" self.non_empty_title = "Non-empty title" self.non_empty_description = "Non-empty description" self.categories = [ "first category name", "second category name", "third category name", "fourth category name" ] self.unescaped_title = "<b>Non-empty<br/> title</b>" self.unescaped_description = "<b>Non-empty description</b><img src='url'/>" self.item_with_empty_title_only = RssItem() self.item_with_empty_title_only.title = self.empty_text self.item_with_empty_description_only = RssItem() self.item_with_empty_description_only.description = self.empty_text self.item_with_title_only = RssItem() self.item_with_title_only.title = self.non_empty_title self.item_with_description_only = RssItem() self.item_with_description_only.description = self.non_empty_description self.item_with_single_category = RssItem() self.item_with_single_category.title = self.non_empty_title self.item_with_single_category.category = self.categories[0] self.item_with_2_categories = RssItem() self.item_with_2_categories.title = self.non_empty_title self.item_with_2_categories.category = self.categories[:2] self.item_with_3_categories = RssItem() self.item_with_3_categories.title = self.non_empty_title self.item_with_3_categories.category = self.categories[:3] self.item_with_4_categories = RssItem() self.item_with_4_categories.title = self.non_empty_title self.item_with_4_categories.category = self.categories[:4] self.item_with_unescaped_text = RssItem() self.item_with_unescaped_text.title = self.unescaped_title self.item_with_unescaped_text.description = self.unescaped_description self.guids = [ { 'guid': 'identifier 1', 'isPermaLink': False }, { 'guid': 'identifier 2', 'isPermaLink': True }, ] self.items_with_guid = {0: [], 1: []} item_with_guid = RssItem() item_with_guid.title = self.non_empty_title item_with_guid.guid = self.guids[0]['guid'] self.items_with_guid[0].append(item_with_guid) item_with_guid = RssItem() item_with_guid.title = self.non_empty_title item_with_guid.guid = self.guids[0] self.items_with_guid[0].append(item_with_guid) item_with_guid = RssItem() item_with_guid.title = self.non_empty_title item_with_guid.guid.guid = self.guids[0]['guid'] self.items_with_guid[0].append(item_with_guid) item_with_guid = RssItem() item_with_guid.title = self.non_empty_title item_with_guid.guid = self.guids[1] self.items_with_guid[1].append(item_with_guid) item_with_guid = RssItem() item_with_guid.title = self.non_empty_title item_with_guid.guid = self.guids[1]['guid'] item_with_guid.guid.isPermaLink = self.guids[1]['isPermaLink'] self.items_with_guid[1].append(item_with_guid) item_with_guid = RssItem() item_with_guid.title = self.non_empty_title item_with_guid.guid = GuidElement(**self.guids[1]) self.items_with_guid[1].append(item_with_guid)
def test_element_initializer(self): all_attrs = set(attr for elem in RssItem().elements.values() for attr in elem.attrs) all_attrs.add('impossible_attr') for elem in RssItem().elements.values(): elem_cls = elem.__class__ if elem_cls == MultipleElements: elem_cls(ItemElement) continue else: elem_cls() for attr in elem.attrs: elem_cls(**{attr: None}) elem_cls(**{attr: 0}) elem_cls(**{attr: ''}) elem_cls(**{attr: 1}) elem_cls(**{attr: '1'}) for bad_attr in all_attrs - set(elem.attrs): for val in (None, 0, '', 1, '1'): with self.assertRaisesRegexp( ValueError, 'supports only the next named arguments', msg= "Invalid attribute '{}' was passed to '{}' initializer" .format(bad_attr, elem_cls.__name__)): elem_cls(**{bad_attr: val}) for val in (None, 0, '', 1, '1'): if elem.content_arg: el = elem_cls(val) self.assertEqual(el, getattr(el, el.content_arg)) self.assertEqual(el, val) else: with self.assertRaisesRegexp( ValueError, 'does not support unnamed arguments', msg= "Invalid attribute was passed to '{}' initializer " "(element must not have content)".format( elem_cls.__name__)): elem_cls(val) for val1, val2 in zip((None, 0, '', 1, '1'), (None, 0, '', 1, '1')): if elem.content_arg: with self.assertRaisesRegexp( ValueError, 'supports only single unnamed argument', msg= "Invalid attribute was passed to '{}' initializer " "(element must not have content)".format( elem_cls.__name__)): elem_cls(val1, val2) else: with self.assertRaisesRegexp( ValueError, 'does not support unnamed arguments', msg= "Invalid attribute was passed to '{}' initializer " "(element must not have content)".format( elem_cls.__name__)): elem_cls(val1, val2)