def parse_item(self, response): item = RssItem() item.title = response.css( 'meta[property="og:title"]::attr(content)').get() item.link = response.url item.guid = response.url item.pubDate = response.css(".title h6::text").get() item.author = "Liberal Victoria" item.description = "".join(response.css(".mr-content").extract()) yield item
def parse_item(self, response): item = RssItem() item.title = response.css("title::text").get().split(" - ", 1)[-1].strip() item.link = response.url item.guid = response.url item.pubDate = response.css("div.newsCreatedDate::text").get().strip() item.author = " & ".join( response.css("img.ministersPic::attr(alt)").getall()) item.description = response.css("div.ms-rtestate-field").get() yield item
def parse_item(self, response): item = RssItem() item.title = response.css( 'meta[name="dcterms.title"]::attr(content)').get() item.link = response.url item.guid = response.url item.pubDate = response.css( 'meta[name="dcterms.issued"]::attr(content)').get() item.author = response.css( 'meta[name="article.minister"]::attr(content)').get() summary = response.css("div.news-detail__summary p").getall() body = response.css("div.news-detail__body p").getall() item.description = "".join(summary + body) return item
def parse_item(self, response): item = RssItem() item.title = response.css("title::text").get().split(" | ")[0] item.link = response.url item.guid = response.url item.pubDate = response.css( 'meta[name="dcterms.date"]::attr(content)').get() item.description = response.css("div.nsw-wysiwyg-content").get() author = response.css( "div.standard-header__released_by div::text").getall() if author: item.author = author[-1].strip() else: item.author = "NSW Government" yield item
def __init__(self, *args, **kwargs): super(TestExporting, self).__init__(*args, **kwargs) minimal_item = RssItem() minimal_item.title = 'Title of minimal item' minimal_item2 = RssItem() minimal_item2.description = 'Description of minimal item' simple_item = RssItem() simple_item.title = 'Title of simple item' simple_item.description = 'Description of simple item' item_with_single_category = RssItem() item_with_single_category.title = 'Title of item with single category' item_with_single_category.category = 'Category 1' item_with_multiple_categories = RssItem() item_with_multiple_categories.title = 'Title of item with multiple categories' item_with_multiple_categories.category = ['Category 1', 'Category 2'] item_with_guid = RssItem() item_with_guid.title = 'Title of item with guid' item_with_guid.guid = 'Identifier' item_with_unicode = RssItem() item_with_unicode.title = 'Title of item with unicode and special characters' item_with_unicode.description = "[Testing «ταБЬℓσ»: 1<2 & 4+1>3, now 20% off!]" item_with_enclosure = RssItem() item_with_enclosure.title = 'Title of item with enclosure' item_with_enclosure.enclosure.url = 'http://example.com/content' item_with_enclosure.enclosure.length = 0 item_with_enclosure.enclosure.type = 'text/plain' self.items = { 'minimal_item': minimal_item, 'minimal_item2': minimal_item2, 'simple_item': simple_item, 'item_with_single_category': item_with_single_category, 'item_with_multiple_categories': item_with_multiple_categories, 'item_with_guid': item_with_guid, 'item_with_unicode': item_with_unicode, 'item_with_enclosure': item_with_enclosure, }
def parse_item(self, response): item = RssItem() item.title = ( response.css('meta[name="DCTERMS.title"]::attr(content)').get().strip() ) item.link = response.url item.guid = response.url item.pubDate = ( response.css("script::text") .re_first(r'"datePublished": ".*"') .split(":", 1)[-1] .strip(' "') ) author = response.css("p.statement-ministers::text").getall() item.author = " & ".join(author) description = response.css("div div p").getall() cutoff = 2 # publish date & author if len(author) > 1: cutoff += len(author) item.description = "".join(description[cutoff:]) yield item
def __init__(self, *args, **kwargs): super(TestSimpleElements, self).__init__(*args, **kwargs) self.empty_text = "" self.non_empty_title = "Non-empty title" self.non_empty_description = "Non-empty description" self.categories = [ "first category name", "second category name", "third category name", "fourth category name" ] self.unescaped_title = "<b>Non-empty<br/> title</b>" self.unescaped_description = "<b>Non-empty description</b><img src='url'/>" self.item_with_empty_title_only = RssItem() self.item_with_empty_title_only.title = self.empty_text self.item_with_empty_description_only = RssItem() self.item_with_empty_description_only.description = self.empty_text self.item_with_title_only = RssItem() self.item_with_title_only.title = self.non_empty_title self.item_with_description_only = RssItem() self.item_with_description_only.description = self.non_empty_description self.item_with_single_category = RssItem() self.item_with_single_category.title = self.non_empty_title self.item_with_single_category.category = self.categories[0] self.item_with_2_categories = RssItem() self.item_with_2_categories.title = self.non_empty_title self.item_with_2_categories.category = self.categories[:2] self.item_with_3_categories = RssItem() self.item_with_3_categories.title = self.non_empty_title self.item_with_3_categories.category = self.categories[:3] self.item_with_4_categories = RssItem() self.item_with_4_categories.title = self.non_empty_title self.item_with_4_categories.category = self.categories[:4] self.item_with_unescaped_text = RssItem() self.item_with_unescaped_text.title = self.unescaped_title self.item_with_unescaped_text.description = self.unescaped_description self.guids = [ { 'guid': 'identifier 1', 'isPermaLink': False }, { 'guid': 'identifier 2', 'isPermaLink': True }, ] self.items_with_guid = {0: [], 1: []} item_with_guid = RssItem() item_with_guid.title = self.non_empty_title item_with_guid.guid = self.guids[0]['guid'] self.items_with_guid[0].append(item_with_guid) item_with_guid = RssItem() item_with_guid.title = self.non_empty_title item_with_guid.guid = self.guids[0] self.items_with_guid[0].append(item_with_guid) item_with_guid = RssItem() item_with_guid.title = self.non_empty_title item_with_guid.guid.guid = self.guids[0]['guid'] self.items_with_guid[0].append(item_with_guid) item_with_guid = RssItem() item_with_guid.title = self.non_empty_title item_with_guid.guid = self.guids[1] self.items_with_guid[1].append(item_with_guid) item_with_guid = RssItem() item_with_guid.title = self.non_empty_title item_with_guid.guid = self.guids[1]['guid'] item_with_guid.guid.isPermaLink = self.guids[1]['isPermaLink'] self.items_with_guid[1].append(item_with_guid) item_with_guid = RssItem() item_with_guid.title = self.non_empty_title item_with_guid.guid = GuidElement(**self.guids[1]) self.items_with_guid[1].append(item_with_guid)
def __init__(self, *args, **kwargs): class NSElement0(ItemElement): attr01 = ItemElementAttribute(ns_prefix="prefix01", ns_uri="id01") class NSElement1(ItemElement): prefix11__attr11 = ItemElementAttribute(ns_uri="id11") prefix12__attr12 = ItemElementAttribute(ns_prefix="prefix12", ns_uri="id12") class NSElement2(ItemElement): attr21 = ItemElementAttribute(is_content=True) pseudo_prefix22__attr22 = ItemElementAttribute( ns_prefix="prefix22", ns_uri="id22") class NSElement3(ItemElement): attr31 = ItemElementAttribute(is_content=True) attr32 = ItemElementAttribute(ns_prefix="prefixa", ns_uri="id32") class NSElement4(ItemElement): attr41 = ItemElementAttribute() prefix42__attr41 = ItemElementAttribute(ns_uri="id42") class NSItem0(RssItem): elem0 = ItemElement() elem1 = NSElement0(ns_prefix="el_prefix1", ns_uri="el_id1") el_prefix2__elem2 = NSElement1(ns_uri="el_id2") el_prefix3__elem3 = NSElement2(ns_prefix="el_prefix3", ns_uri="el_id3") el_pseudo_prefix4__elem4 = NSElement0(ns_prefix="el_prefix4", ns_uri="el_id4") class NSItem1(RssItem): elem1 = NSElement0(ns_prefix="el_prefix1", ns_uri="el_id1") el_prefix__elem2 = NSElement1(ns_uri="el_id2") elem3 = NSElement2(ns_prefix="el_prefix", ns_uri="el_id3") el_pseudo_prefix4__elem4 = NSElement0(ns_prefix="el_prefix4", ns_uri="el_id4") class NSItem2(RssItem): elem1 = NSElement3(ns_prefix="prefix", ns_uri="el_id1") prefix__elem2 = NSElement3(ns_uri="el_id2") elem3 = NSElement3(ns_prefix="prefix", ns_uri="el_id3") el_pseudo_prefix4__elem4 = NSElement3(ns_prefix="prefix", ns_uri="el_id4") class NSItem3(RssItem): elem1 = NSElement3(ns_uri="el_id1") elem2 = NSElement3(ns_uri="el_id2") elem3 = NSElement3(ns_prefix="prefix", ns_uri="el_id3") el_pseudo_prefix4__elem3 = NSElement3(ns_prefix="prefix2", ns_uri="el_id4") elem4 = NSElement4() elem5 = NSElement4() PredefinedItems.NSItem0 = NSItem0 PredefinedItems.NSItem1 = NSItem1 PredefinedItems.NSItem2 = NSItem2 PredefinedItems.NSItem3 = NSItem3 minimal_item = RssItem() minimal_item.title = 'Title of minimal item' minimal_item2 = RssItem() minimal_item2.description = 'Description of minimal item' simple_item = RssItem() simple_item.title = 'Title of simple item' simple_item.description = 'Description of simple item' item_with_single_category = RssItem() item_with_single_category.title = 'Title of item with single category' item_with_single_category.category = 'Category 1' item_with_multiple_categories = RssItem() item_with_multiple_categories.title = 'Title of item with multiple categories' item_with_multiple_categories.category = ['Category 1', 'Category 2'] item_with_guid = RssItem() item_with_guid.title = 'Title of item with guid' item_with_guid.guid = 'Identifier' item_with_unicode = RssItem() item_with_unicode.title = 'Title of item with unicode and special characters' item_with_unicode.description = "[Testing «ταБЬℓσ»: 1<2 & 4+1>3, now 20% off!]" item_with_enclosure = RssItem() item_with_enclosure.title = 'Title of item with enclosure' item_with_enclosure.enclosure.url = 'http://example.com/content' item_with_enclosure.enclosure.length = 0 item_with_enclosure.enclosure.type = 'text/plain' item_with_unique_ns = NSItem0() item_with_unique_ns.title = "Title of item with unique namespaces" item_with_unique_ns.elem1.attr01 = "" item_with_unique_ns.el_prefix2__elem2.prefix11__attr11 = 0 item_with_unique_ns.el_prefix2__elem2.prefix12__attr12 = "" item_with_unique_ns.el_prefix3__elem3.attr21 = "value3_21" item_with_unique_ns.el_prefix3__elem3.pseudo_prefix22__attr22 = 42 item_with_unique_ns.el_pseudo_prefix4__elem4.attr01 = "" item_with_non_unique_ns = NSItem1() item_with_non_unique_ns.title = "Title of item with unique namespaces" item_with_non_unique_ns.elem1.attr01 = "-" item_with_non_unique_ns.el_prefix__elem2.prefix11__attr11 = -1 item_with_non_unique_ns.el_prefix__elem2.prefix12__attr12 = "-" item_with_non_unique_ns.elem3.attr21 = "yet another value3_21" item_with_non_unique_ns.elem3.pseudo_prefix22__attr22 = 4224 item_with_non_unique_ns.el_pseudo_prefix4__elem4.attr01 = "-" item_with_non_unique_ns2 = NSItem1() item_with_non_unique_ns2.title = "Title of item with unique namespaces 2" item_with_non_unique_ns2.elem1.attr01 = "0" item_with_non_unique_ns2.el_prefix__elem2.prefix11__attr11 = -999 item_with_non_unique_ns2.elem3.attr21 = "value" item_with_non_unique_ns2.elem3.pseudo_prefix22__attr22 = 42 item_with_non_unique_ns2.el_pseudo_prefix4__elem4.attr01 = "" item_with_same_ns_prefixes = NSItem2() item_with_same_ns_prefixes.title = "Title of item with same namespace prefixes" item_with_same_ns_prefixes.elem1.attr31 = "Content value 11ё" item_with_same_ns_prefixes.prefix__elem2.attr32 = "Attribute value 22" item_with_same_ns_prefixes.elem3.attr31 = "Content value 11" item_with_same_ns_prefixes.elem3.attr32 = "Attribute value 32" item_with_same_ns_prefixes.el_pseudo_prefix4__elem4.attr32 = "" item_with_default_nses = NSItem3() item_with_default_nses.title = "Title of item with default namespaces" item_with_default_nses.elem1.attr31 = "Content value 11ё" item_with_default_nses.elem2.attr32 = "Attribute value 22" item_with_default_nses.elem3.attr31 = "Content value 11" item_with_default_nses.elem3.attr32 = "Attribute value 32" item_with_default_nses.el_pseudo_prefix4__elem3.attr32 = "" item_with_default_nses.elem4.attr41 = "A41 b" item_with_default_nses.elem4.prefix42__attr41 = "0" self.items = { 'minimal_item': minimal_item, 'minimal_item2': minimal_item2, 'simple_item': simple_item, 'item_with_single_category': item_with_single_category, 'item_with_multiple_categories': item_with_multiple_categories, 'item_with_guid': item_with_guid, 'item_with_unicode': item_with_unicode, 'item_with_enclosure': item_with_enclosure, 'item_with_unique_ns': item_with_unique_ns, 'item_with_non_unique_ns': item_with_non_unique_ns, 'item_with_same_ns_prefixes': item_with_same_ns_prefixes, 'item_with_default_nses': item_with_default_nses } self.ns_items_of_same_cls = [ ('item_with_non_unique_ns5', NSItem1, item_with_non_unique_ns), ('item_with_non_unique_ns4', NSItem1, item_with_non_unique_ns2), ] self.ns_items = [ ('item_with_unique_ns2', [("el_prefix1", "el_id1"), ("prefix01", "id01"), ("el_prefix2", "el_id2"), ("prefix11", "id11"), ("prefix12", "id12")], None, item_with_unique_ns), ('item_with_unique_ns2', (("el_prefix1", "el_id1"), ("prefix01", "id01"), ("el_prefix2", "el_id2"), ("prefix11", "id11"), ("prefix12", "id12")), tuple(), item_with_unique_ns), ('item_with_unique_ns2', { "el_prefix1": "el_id1", "prefix01": "id01", "el_prefix2": "el_id2", "prefix11": "id11", "prefix12": "id12" }, None, item_with_unique_ns), ('item_with_unique_ns3', None, NSItem0, item_with_unique_ns), ('item_with_unique_ns3', None, 'tests.test_exporter.NSItem0', item_with_unique_ns), ('item_with_non_unique_ns2', [("el_prefix1", "el_id1"), ("prefix01", "id01"), ("prefix11", "id11"), ("prefix12", "id12"), ("prefix22", "id22"), ("el_prefix4", "el_id4")], None, item_with_non_unique_ns), ('item_with_non_unique_ns3', { "el_prefix1": "el_id1", "prefix01": "id01", "prefix11": "id11", "prefix12": "id12", "prefix22": "id22" }, None, item_with_non_unique_ns), ('item_with_non_unique_ns2', None, NSItem1, item_with_non_unique_ns), ('item_with_non_unique_ns2', None, 'tests.test_exporter.NSItem1', item_with_non_unique_ns), ('item_with_same_ns_prefixes2', [("prefix", "el_id1"), ("prefixa", "id32"), ("unused_prefix", "id000")], None, item_with_same_ns_prefixes), ('item_with_same_ns_prefixes2', { "prefix": "el_id1", "prefixa": "id32", "unused_prefix": "id000" }, None, item_with_same_ns_prefixes), ('item_with_same_ns_prefixes3', None, NSItem2, item_with_same_ns_prefixes), ('item_with_same_ns_prefixes3', None, 'tests.test_exporter.NSItem2', item_with_same_ns_prefixes), ('item_with_default_nses3', { 'prefixa': 'id32', 'prefix2': 'el_id4' }, None, item_with_default_nses), ('item_with_default_nses2', None, 'tests.test_exporter.NSItem3', item_with_default_nses), ('item_with_default_nses2', None, NSItem3, item_with_default_nses) ]