def get_items(self, soup): parent_node = soup for sel in self.parent_selectors: try: ordinal = sel['ordinal'] parent_node = parent_node.find_all(**sel)[ordinal] except KeyError: parent_node = parent_node.find(**sel) items = [] for el in parent_node.find_all(**self.element_selector): extractions = { name: extraction.extract(el) for name, extraction in self.extractions.items() } items.append(rfeed.Item(**{ k: v.format_map(extractions) for k, v in self.item_template.items() })) if self.reverse_items: items.reverse() return items
def main(): links, titles, items = [], [], [] for id in html_main.find_all("div", {"class": "mt-2 pt-2 text-right"}): id = id.get_text().split("aptm.in/protip/")[1].split(" »")[0] links.append(f"https://aptmasterclass.com/protip/{id}") for title in html_main.find_all("h1", {"class": "heading mb-3"}): title = title.get_text().split("\n")[1].strip() titles.append(title) for i in range(len(links)): items.insert( i, rfeed.Item( title=titles[i], link=links[i], description=get_description(links[i]) ), ) feed = rfeed.Feed( title=FEED_TITLE, description=FEED_DESCRIPTION, link=FEED_LINK, language="pl", lastBuildDate=datetime.now(), items=items, ) with open("feed.xml", "w") as f: f.write(feed.rss())
def generate_rss(blog_container: BlogContainer): """Generate RSS feed using rfeed""" request = blog_container.request blog_title = request.registry.settings.get("blog.title") blog_email = request.registry.settings.get("blog.rss_feed_email", "*****@*****.**") items = [] for post_resource in blog_container.get_posts(): post = post_resource.post item = rfeed.Item( title=post.title, link=request.resource_url(post_resource), description="This is the description of the first article", author=blog_email, creator=post.author, guid=rfeed.Guid(str(post.id)), pubDate=post.published_at, extensions=[ContentItem(post_resource)]) items.append(item) feed = rfeed.Feed(title=blog_title, link=request.resource_url(blog_container, "rss"), description="", language="en-US", lastBuildDate=now(), items=items, extensions=[Content()]) return feed
def test_import_items(): """ Check if we can properly publish a new feed :return: """ rname = "TestField2" rdescription = "Trying to create a new field and see if no data was lsot" # Creating the new field feed, nameOfFeed = rss.newFeed(rname, rdescription, debug=True) assert feed.link, "The new feed doesn't have any link , can't test the created content" parent = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) localPath = parent + "\\static\\rss\\" + nameOfFeed + ".xml" item = rfeed.Item(title=rname, link=feed.link, description=rdescription, pubDate=datetime.datetime.now()) feed.items.append(item) a = feed.rss() with open(localPath, 'w') as f: f.write(a) items = rss.import_items(localPath) print("len item {}".format(len(items))) for item in items: print(item.description) if item.description == rdescription and item.title == rname.replace( " ", "_"): assert True, "The new field was well created" return assert False, "The new rss feed creation and import failed"
def convert_rss_obj(inbody): '''Provisional function to return RSS object for each include.''' # soup = BeautifulSoup(inbody, 'html.parser') # description = soup.prettify() this_item = str(uuid.uuid4()) item = rss.Item(description = inbody, guid = rss.Guid(this_item)) return item
async def mangarock_manga_feed(oid): base_url = 'https://api.mangarockhd.com/query/web401/info' async with aiohttp.request('GET', base_url, params={'oid': oid}) as resp: if resp.status != 200: return text_response(f'No manga with ID {id} could be found'), 404 data = await resp.json() if data.get('code') != 0: return text_response(f'No manga with ID {id} could be found'), 404 else: data = data.get('data', {}) max_items = request.args.get('max', 20) chapters = [ rfeed.Item( title = c.get('name'), link = f'https://mangarock.com/manga/{oid}/chapter/{c["oid"]}', # pubDate = datetime.fromtimestamp(c['updatedAt']), # Ignore pubDate so that chapters will be displayed # in the right order. ) for c in islice(reversed(data.get('chapters', ())), max_items)] updated_time = datetime.fromtimestamp(data['last_update']) return rss_response(rfeed.Feed( title=data.get('name'), link=f'https://mangarock.com/manga/{oid}', description=data.get('description'), lastBuildDate=updated_time, pubDate=updated_time, generator=generator_name, items=chapters))
def entry_to_rss(title, link, abstract, authors, year, arxiv, date): y, m, d = date.split("-") m = int(m) d = int(d) return rfeed.Item(title=title, author=authors, link=link, description=abstract, guid=rfeed.Guid(arxiv), pubDate=datetime.datetime(int(year), m, d, 0, 0))
def format_tweet(tweet, user): ln = "https://twitter.com/{0}/status/{1}".format(user, tweet.id) return rf.Item( title=format_title(tweet.text), description=format_description(tweet.html), creator="@{0}".format(user), link=ln, guid=rf.Guid(ln), pubDate=tweet.timestamp, extensions=[TwitterItem()], )
def root(): db = sqlite3.connect('feed.sqlite3') cur = db.cursor() items = [] for d, link, desc, uuid in cur.execute( 'select d, link, desc, uuid from feed order by d asc'): item = rfeed.Item(title=desc[:30], link=link, description=desc, guid=rfeed.Guid(uuid)) items.append(item) feed = rfeed.Feed(title='Docfeed', link='http://rje.li', description='Docfeed', items=items) return feed.rss()
def create_feed(profile_html, url, max_items=10): # Parse html soup = BeautifulSoup(profile_html, features="html5lib") # Extract the data object data = None for script in soup.find_all('script'): text = str(script.string) pat = '^\s*window._sharedData\s*=' if re.match(pat, text): data = json.loads(re.sub(';\s*$', '', re.sub(pat, '', text))) # Select relevant data and build feed try: user = data['entry_data']['ProfilePage'][0]['graphql']['user'] timeline = user['edge_owner_to_timeline_media'] except (KeyError, TypeError): print(data, file=sys.stderr) raise items = [] for item in timeline['edges'][:max_items]: node = item['node'] link = 'https://www.instagram.com/p/' + node['shortcode'] caption = node['accessibility_caption'] caption_begin = caption.split('. ')[0] if caption is not None else '' caption_end = '. '.join( caption.split('. ')[1:]) if caption is not None else '' items.append( rfeed.Item(title=caption_begin, link=link, description=caption_end, author=user['full_name'], guid=rfeed.Guid(node['id']), pubDate=datetime.datetime.fromtimestamp( node['taken_at_timestamp']))) feed = rfeed.Feed(title=user['full_name'], link=url, description=soup.title.text.strip(), language="en-US", lastBuildDate=datetime.datetime.now(), items=items) return feed
def create_random_feed(filename, feed_length, title_length): selec = get_rows(filename, feed_length) content_list = [] for it in selec: it = it.strip() fi = rfeed.Item(title=truncate(it, title_length), description=it) content_list.append(fi) feed = rfeed.Feed(title="Sample RSS Feed", link="https://github.com/madhuri2k/fantastic-spoon", description="A Random selection of items", language="en-US", items=content_list) print("Feed is {}".format(feed.rss())) return feed
async def fanfic_feed(id): story_url = f'https://www.fanfiction.net/s/{id}' async with aiohttp.request('GET', story_url) as resp: if resp.status != 200: return text_response(f'No story with ID {id} could be found'), 404 story_html = await resp.content.read() soup = BeautifulSoup(story_html, 'html.parser') header = soup.find(id='profile_top') if not header: return text_response(f'No story with ID {id} could be found'), 404 title = header.find('b') author = title.find_next_sibling('a') description = author.find_next_sibling('div') updated = description.find_next_sibling('span').find('span', attrs={ 'data-xutime': True }) updated_time = datetime.fromtimestamp( int(updated['data-xutime'])) chapter_select = soup.find(id='chap_select') if not chapter_select: return text_response(f'No story with ID {id} could be found'), 404 max_items = request.args.get('max', 20) chapters = [] for option in islice(reversed(chapter_select.find_all('option')), max_items): chap_title = option.find( text=True, recursive=False).split('.', 1)[1].strip() i = option['value'] chapters.append(rfeed.Item( title = chap_title, link = f'{story_url}/{i}',)) return rss_response(rfeed.Feed( title=f'{title.text} by {author.text}', link=story_url, description=description.text, lastBuildDate=updated_time, pubDate=updated_time, generator=generator_name, items=chapters))
def posts_to_feed(self, posts, title, description): items = list() for post_data in posts: post_id = post_data["id"] # Fetch each post so we can get at its text body post = self.fetch_post(request, post_id) item = rfeed.Item(title=post_data["title"]) # Use the resource URL as the "link" if "resource_url" in post_data.keys(): item.link = post_data["resource_url"] item.description = ("Post created by user " + post_data["username"] + " on " + post_data["date"] + " in " + post_data["community"] + "\n" + post["text"]) item.author = post_data["username"] # No url in original scheme, so just use global ID item.guid = rfeed.Guid(self.make_posting_post_url( request, post_id)) # print(post_data) item.pubDate = self.database_date_to_datetime_object( post_data["date"]) items.append(item) feed = rfeed.Feed(title=title, link=request.url, description=description, language="en-US", lastBuildDate=datetime.datetime.now(), items=items) return feed
def run(publishing, channel_config): print("____________________________\n" "Runs the rss publication") json_data = json.loads(channel_config) rname = json_data['Feed title'] rdescription = json_data['Feed description'] rbaselineFeed = json_data['URL of original feed (optional)'] existingFeed = 0 if rbaselineFeed != "None": existingFeed = 1 item_title = publishing.title item_body = publishing.description item_link = publishing.link_url item_from = publishing.date_from item_until = publishing.date_until item_img = publishing.image_url item = rfeed.Item(title=item_title, link=item_link, description=item_body, pubDate=item_from) localPath = os.path.dirname(__file__) + "/rss/feed_" + str( publishing.channel_id) + ".xml" parent = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) feed, nof = newFeed(rname, rdescription) serverPath = parent + "/static/rss/" + nof + ".xml" feed.items.append(item) if os.path.isfile(localPath): # import older publishing if any olderItems = import_items(localPath) feed.items.extend(olderItems) elif existingFeed == 1: #Remote rss feed not yet in our server olderItems = import_items(rbaselineFeed) feed.items.extend(olderItems) a = feed.rss() with open(localPath, 'w') as f: f.write(a) with open(serverPath, 'w') as f: f.write(a)
def import_items(xml_path): """ :param xml_path: the path to an existing xml file :return: return the list of all previous publication (items) on the feed. Since when we publish, we overwrite the xml file, we need the history of the publication to re-write them aswell. """ items = list() d = feedparser.parse(xml_path) print("Parsed xml from ", xml_path, ":", d) for post in d.entries: title = None link = None body = None date = None if 'title' in post: title = post.title # print('title ok') if 'link' in post: link = post.link # print('link ok') if 'description' in post: body = post.description # print('description ok') if 'published' in post: date = post.published temp = date.split(" ") temp[5] = "GMT" date = " ".join(temp[:6]) # print('date ok') item = rfeed.Item(title=title, link=link, description=body, pubDate=datetime.datetime.strptime( date, "%a, %d %b %Y %X GMT")) items.append(item) return items
def root(): r = requests.get( 'https://calendar.google.com/calendar/ical/' + 'skypicker.com_dq9oupgj7ngbo0j41b0smoc0dk%40group.calendar.google.com' + '/public/basic.ics', timeout=10, ) r.raise_for_status() text = r.text events = [] feed = rfeed.Feed( title='code.kiwi.com events', link='https://goo.gl/aCCGCB', description='code.kiwi.com events', items=events, ) gcal = icalendar.Calendar.from_ical(text) for component in gcal.walk(): if component.name == "VEVENT": url = 'https://www.google.com/calendar/event?eid=' + \ base64.b64encode((component['UID'].split('@')[0] + ' skypicker.com_dq9oupgj7ngbo0j41b0smoc0dk@g').encode()).decode() description = component.get('description') if description: description = re.sub('Tato událost.*\nPřipojit se.*', '', description).strip() events.append( rfeed.Item( title=component.get('dtstart').dt.strftime( '%e %B').strip() + ' | ' + str(component.get('summary')), link=url, description=str(component.get('description')), guid=rfeed.Guid(url), )) return Response(feed.rss(), mimetype='application/rss+xml')
def generate(self): author = 'Workable' items = [] for job in self.jobs: title = job.get_position() + ', ' + job.get_company() item = rfeed.Item(title=title, link=job.get_link(), description=job.get_description(), author=author, guid=rfeed.Guid(job.get_link()), pubDate=job.get_date()) items.append(item) feed = rfeed.Feed(title='Workable\'s missing RSS feed', link='http://mylk.wtf/workable-missing-rss', description='Workable\'s missing RSS feed', language='el-GR', lastBuildDate=datetime.now(), items=items) return feed.rss()
async def make_feed(): base_url = 'http://www.bogleech.com/awfulhospital/archive.html' async with aiohttp.request('GET', base_url) as resp: if resp.status != 200: raise NotFound(f'Awful Hospital seems to be down right now') story_html = await resp.text() last_modified = resp.headers.get('Last-Modified') if last_modified: last_modified = datetime.strptime(last_modified, '%a, %d %b %Y %H:%M:%S %Z') else: last_modified = datetime.now() # The HTML is so gunked up even BeautifulSoup won't cut it # I'm pretty sure Bogleech updates it directly in Notepad # May the old gods forgive me layers = [] for match in re.finditer(r'<a href="(.*?)">(.*?)<br>', story_html): link, title = match.group(1, 2) title = html.unescape( re.sub(r'<.*?>', lambda m: '' if m.group() == '</a>' else None, title)) layers.append(rfeed.Item(title=title, link=link)) layers.reverse() return rfeed.Feed(title='Awful Hospital', description='Seriously the worst ever.', link='http://www.bogleech.com/awfulhospital/', lastBuildDate=last_modified, pubDate=last_modified, generator=generator_name, items=layers)
f'<p>Hover Text: {title}</p>', f'<p><img src="{votey_img}" alt="Extra joke"></p>', ] time_raw = entry.published pub_time = datetime.strptime(time_raw, '%a, %d %b %Y %H:%M:%S %z') print(f"\nLink: {link}") print(f" - Comic: {comic_img}") print(f" - Title: {title}") print(f" - Votey: {votey_img}") item = rfeed.Item( title=page_title, link=link, description='\n'.join(clines), guid=rfeed.Guid(link), pubDate=pub_time, ) item_list.append(item) limit -= 1 out_feed = rfeed.Feed( title="Saturday Morning Breakfast Cereal", link='https://www.smbc-comics.com/', description="RSS feed for Saturday Morning Breakfast Cereal", language="en-US", lastBuildDate=datetime.now(), items=item_list, generator=f"smbc-rss.py {github_url}"
def build_website(in_path, ignore_empty_posts=True, index_template="templates/index.html", post_template="templates/post.html", css_and_assets_path="templates", extension="md", index_paste_where="<!--###POSTS_LIST###-->", post_paste_where="<!--###POST_CONTENT###-->", title_paste_where="<!--###POSTPAGE_TITLE###-->", ul_class="postlist", post_wrapper="postcontent", headerseparator="---", obligatory_header=['title'], optional_header=['author', 'timestamp', 'tags', 'excerpt'], excerpt_type="chars", excerpt_len="500", excerpts_on=False, readmore="Read more >>", posts_per_page=0, pages_in_multiple_files=False, postlist_date_format="%d %b '%y", rss_feed_on=True, rss_feed_url="rss", blurb_is_manual_excerpt=False, rss_max_posts_number=10, blog_domain="", rss_feed_description='', rss_feed_title="My blog's RSS feed"): # Call everything try: fresh_posts = generate_posts(in_path, extension) except Exception as e: print( "Could not generate posts. Did you provide correct path to the post folder?" ) print(str(e)) try: filtered_posts = filter_bad_dates(fresh_posts) except Exception as e: print("Could not filter posts. Dunno why.") print(str(e)) try: ordered_posts = order(filtered_posts) except Exception as e: print("Could not order posts. It's impossible.") print(str(e)) try: for post in ordered_posts: post.get_content(headerseparator=headerseparator, obligatory=obligatory_header, optional=optional_header) post.build_pretty_date(date_format=postlist_date_format) post.get_excerpt(len_type=excerpt_type, excerpt_len=excerpt_len) except Exception as e: print( "Something went wrong with generating content and prettyfying dates. WHY?" ) print(str(e)) # Delete target folder so it can be rebuilt without conflicts try: shutil.rmtree("site", ignore_errors=True) except Exception as e: print( "Could not delete previous site folder. Check file permissions for the script." ) print(str(e)) try: build_site_folders() except Exception as e: print("Folders could not be built. Check file permissions.") print(str(e)) try: build_index_page(ordered_posts, index_template, ignore_empty=ignore_empty_posts, paste_where=index_paste_where, ul_class=ul_class, excerpts_on=excerpts_on, readmore=readmore, posts_per_page=posts_per_page, pages_in_multiple_files=pages_in_multiple_files) except Exception as e: print("Could not build index page. Did you provide a template?") print(str(e)) try: build_posts_folder(ordered_posts, post_template, ignore_empty=ignore_empty_posts, in_path=in_path, extension=extension, paste_where=post_paste_where, paste_where_title=title_paste_where, wrapper_class=post_wrapper) except Exception as e: print("Could not build post pages. Did you provide a template?") print(str(e)) try: # Build RSS Feed if rss_feed_on == True: # Loop to make items for the rfeed feed object rss_item_list = [] last_post_index = min(rss_max_posts_number, len(ordered_posts) - 1) for post in ordered_posts[0:last_post_index]: rss_item_list.append( rfeed.Item(title=post.title, link=blog_domain + "/posts/" + post.filename + ".html", description=post.excerpt, author=post.author, guid=rfeed.Guid(blog_domain + "/posts/" + post.filename + ".html"), pubDate=post.original_ugly_date)) rss_feed = rfeed.Feed(title=rss_feed_title, link=blog_domain + "/" + rss_feed_url, description=rss_feed_description, language=locale.getlocale()[0], lastBuildDate=datetime.datetime.now(), items=rss_item_list) # Writing the RSS feed to a file at the specified location with open("site/" + rss_feed_url, 'w+') as rss_target: rss_target.write(rss_feed.rss()) except Exception as e: print( "Could not generate the RSS feed or decide whether it should be generated at all" ) print(str(e)) # Copy all css, assets and lib try: copytree(css_and_assets_path + "/css", "site/css") except Exception as e: print( "Tried to copy contents of", css_and_assets_path, "/css folder but the folder does not exist! Make one, even empty!") print(str(e)) try: copytree(css_and_assets_path + "/assets", "site/assets") except Exception as e: print( "Tried to copy contents of", css_and_assets_path, "/assets folder but the folder does not exist! Make one, even empty!" ) print(str(e)) try: copytree(css_and_assets_path + "/lib", "site/lib") except Exception as e: print( "Tried to copy contents of", css_and_assets_path, "/lib folder but the folder does not exist! Make one, even empty!") print(str(e))
tor_tag = post_soup.find( 'a', attrs={'title': re.compile('Download attachment')}) mag_link = mag_tag.attrs['href'] tor_link = tor_tag.attrs['href'] data.append((post_name, post_url, tor_link, mag_link)) except: mag_link = None tor_link = None #print("%s:%s => %s"%(name,post_name,mag_link)) return (name, data) with Pool(16) as p: for name, data in p.map(fun, nms): movie_list[name] = data for ptype, purl, torurl, magurl in data: item_list.append( rfeed.Item(title=name + ' : ' + ptype, link=purl, enclosure=rfeed.Enclosure( magurl, 10000, 'application/x-bittorrent'))) #print(len(movie_list)) feed = rfeed.Feed(title="TR RSS Feed", description="Unofficial RSS feed for TR", link="localhost", lastBuildDate=datetime.datetime.now(), items=item_list) print(feed.rss())
feedItems = [] latestUpdate = parser.parse("1970-01-01T00:00:00Z") output.sort(key=lambda item: item["updated"] if "updated" in item else "---", reverse=True) for item in output: if "updated" in item and parser.parse(item["updated"]) > latestUpdate: latestUpdate = parser.parse(item["updated"]) if "updated" in item and (datetime.datetime.now(datetime.timezone.utc) - parser.parse(item["updated"])).days < 7: feedItems.append(rfeed.Item( title = item["title"] + " updated to " + item["version"] if "version" in item else "new version", link = "https://db.universal-team.net/" + webName(item["systems"][0]) + "/" + webName(item["title"]), description = (item["version_title"] if "version_title" in item else item["version"]) + (("<hr />" + item["update_notes"] if "update_notes" in item else "")), author = item["author"], guid = rfeed.Guid("https://db.universal-team.net/" + webName(item["systems"][0]) + "/" + webName(item["title"])), pubDate = parser.parse(item["updated"]), categories = item["systems"], extensions = [ rfeed.Enclosure( url = item["image"], length = len(requests.get(item["image"]).content), type = "image/png" ) if "image" in item else None ] )) if len(feedItems) > 0 and latestUpdate > oldUpdate: feed = rfeed.Feed( title = "Universal-DB", link = "https://db.universal-team.net", description = "A database of DS and 3DS homebrew", language = "en-US", lastBuildDate = datetime.datetime.now(),
the_date.strftime("%B %d, %Y")) img_url = "{}/{}".format(config.get("cache_url"), img_filename) clines = [] clines.append('<p><img src="{}" alt="{}"></p>'.format(img_url, title)) clines.append('<p>') clines.append(' <a href="{}">View on King Comics</a> -'.format(url)) clines.append(' <a href="{}">GitHub Project</a>'.format(github_url)) clines.append('</p>') pubtime = datetime.combine(the_date, datetime.min.time()) pubtime = pubtime.replace(tzinfo=pytz.UTC) item = rfeed.Item(title=title, link=url, description='\n'.join(clines), guid=rfeed.Guid(url), pubDate=pubtime) item_list.append(item) # Start building the feed feed = rfeed.Feed( title=entry.get('name'), link="{}/{}".format(root_url, slug), description="RSS feed for {}".format(entry.get('name')), language='en-US', lastBuildDate=datetime.now(), items=item_list, generator="comics-rss.py ({})".format(github_url), )
''' This is the example of the RSS library.''' import datetime import rfeed as rss item1 = rss.Item(title="First article", link="http://www.example.com/articles/1", description="This is the description of the first article", author="Santiago L. Valdarrama", guid=rss.Guid("http://www.example.com/articles/1"), pubDate=datetime.datetime(2014, 12, 29, 10, 00)) item2 = rss.Item(title="Second article", link="http://www.example.com/articles/2", description="This is the description of the second article", author="Santiago L. Valdarrama", guid=rss.Guid("http://www.example.com/articles/2"), pubDate=datetime.datetime(2014, 12, 30, 14, 15)) items = [item1, item2] feed = rss.Feed( title="Sample RSS Feed", link="http://www.example.com/rss", description= "This is an example of how to use rfeed to generate an RSS 2.0 feed", language="en-US", lastBuildDate=datetime.datetime.now(), items=items) print(feed.rss())
def __createItem(self, item): link = self.urlPrefix + 'media/' + item['video_id'] + '.m4a' return rfeed.Item(title=item['title'], enclosure=rfeed.Enclosure(url=link, length=0, type='audio/mp4'), guid=rfeed.Guid(link), pubDate=item['date'])