def handle_latest_episode_intent(self, message): utter = message.data['utterance'] podcast_names = [ self.settings["nameone"], self.settings["nametwo"], self.settings["namethree"] ] podcast_urls = [ self.settings["feedone"], self.settings["feedtwo"], self.settings["feedthree"] ] #check if the user specified a podcast to check for a new podcast for i in range(0, len(podcast_names)): #skip if podcast slot left empty if podcast_names[i] == "": continue elif podcast_names[i].lower() in utter.lower(): parsed_feed = pp.parse(podcast_urls[i], urllib.urlopen(podcast_urls[i])) last_episode = (parsed_feed['episodes'][0]['title']) speech_string = "The latest episode of " + podcast_names[ i] + " is " + last_episode self.speak(speech_string) return True #if no podcast names are provided, list all new episodes new_episodes = [] for i in range(0, len(podcast_urls)): if not podcast_urls[i]: continue parsed_feed = pp.parse(podcast_urls[i], urllib.urlopen(podcast_urls[i])) last_episode = (parsed_feed['episodes'][0]['title']) new_episodes.append(last_episode) speech_string = "The latest episodes are the following: " for i in range(0, len(new_episodes)): #if the podcast is the last in a list add "and" before the podcast name if i == (len(new_episodes) - 1) and i > 0: speech_string = speech_string + "and " + podcast_names[ i] + ": " + new_episodes[i] else: speech_string = speech_string + podcast_names[ i] + ": " + new_episodes[i] + ", " self.speak(speech_string)
def CPS_start(self, phrase, data): self.log.info("CPS_start phrase: {} data: {}".format(phrase, data)) tracklist = [] parsed_feed = pp.parse(data, urllib.request.urlopen(Request(data, data=None, headers={'User-Agent': self.user_agent})) ) episode_title = (parsed_feed['episodes'][0]['title']) # try and parse the rss feed, some are incompatible try: episode = (parsed_feed["episodes"][0]["enclosures"][0]["url"]) except: self.speak_dialog('badrss') # check for any redirects episode = urllib.request.urlopen(Request(episode, data=None, headers={'User-Agent': self.user_agent})) redirected_episode = episode.geturl() http_episode = re.sub('https', 'http', redirected_episode) self.log.info("http_episode: {}".format(http_episode)) tracklist.append(http_episode) if self.state in ['playing', 'paused']: self.mediaplayer.stop() self.mediaplayer.clear_list() self.mediaplayer.add_list(tracklist) # self.speak(self._get_play_message(data)) self.mediaplayer.play() self.state = 'playing'
def main(): """Playing media.""" parser = argparse.ArgumentParser( description="This script play a given media on the mpd (local) server") parser.add_argument( "-c", choices=['play', 'stop', 'status', 'volup', 'voldown', 'pod'], required=True) parser.add_argument("-u", help="url to play") args = parser.parse_args() media = args.u command = args.c myplayer = player() if command == 'play': if media is not None: myplayer.play(media) else: print "You must enter a media url/path to play" elif command == 'stop': myplayer.stop() elif command == 'volup': myplayer.volup() elif command == 'voldown': myplayer.voldown() elif command == 'status': state = myplayer.status() for cle, val in state.items(): print cle + " : " + val elif command == 'podlist': parsed = podcastparser.parse(media, urllib.urlopen(media)) pprint.pprint(parsed)
def run(): try: for podcast in self.data["podcasts"]: raw = urlopen( podcast["feed_url"], timeout=self._podcast_timeout, context=ssl.create_default_context( cafile=certifi.where()), ) parsed = podcastparser.parse(podcast["feed_url"], raw) episodes = parsed["episodes"] podcast["episodes"] = [] for episode in episodes: title = episode["title"] media_url = episode["enclosures"][0]["url"] # podcast['episodes'].append({"title":unicodedata.normalize('NFKD', title).encode('ascii','ignore'), "url":media_url}) podcast["episodes"].append({ "title": title, "url": media_url }) self.save() logger.info( "Transistor Library: done downloading podcasts infos") except Exception as e: logger.error( "Transistor: Can't retrieve podcast data: {}".format( str(e)))
def _handle_paged_feed(self, max_episodes): page = 2 remaining_episodes = max_episodes - len(self.parsed['episodes']) while ('paged_feed_next' in self.parsed and page < self.PAGED_FEED_MAX_PAGES and remaining_episodes > 0): # Take the next page from the paged feed url = self.parsed['paged_feed_next'] del self.parsed['paged_feed_next'] if not url: break try: logger.debug('Downloading page %d from %s', page, url) stream = util.urlopen(url) parsed = podcastparser.parse(url, stream, remaining_episodes) added_episodes = len(parsed['episodes']) remaining_episodes -= added_episodes logger.debug('Page %d contains %d additional episodes', page, added_episodes) self.parsed['episodes'].extend(parsed['episodes']) # Next iteration if we still have a next page if 'paged_feed_next' in parsed: self.parsed['paged_feed_next'] = parsed['paged_feed_next'] except Exception as e: logger.warn('Error while fetching feed page %d from %s: %s', page, url, e) # Give up, don't try to download additional pages here break page += 1
def refresh(): for rss in rss_links: parsed = podcastparser.parse(rss, urllib.request.urlopen(rss), 20) title = parsed.get('title') description = remove_html(parsed.get('description')) image = parsed.get('cover_url') link = parsed.get('link') podcast = Podcast.query.filter_by(link=parsed.get('link')).first() if(podcast is None): podcast = Podcast(title=title, description=description, image=image, link=link) db.session.add(podcast) db.session.commit() for episode in parsed.get('episodes'): episode_title = episode.get('title') episode_link = episode.get('link') episode_audio_url = episode.get('enclosures')[0]['url'] episode_time_published = episode.get('published') episode_length = episode.get('total_time') episode_podcast = podcast episode = Episode.query.filter_by(audio_url=episode_audio_url).first() if(episode is None): episode = Episode(title=episode_title, link=episode_link, audio_url=episode_audio_url, time_published=episode_time_published, length=episode_length, podcast=episode_podcast) db.session.add(episode) db.session.commit()
def __init__(self, channel, max_episodes): url = channel.authenticate_url(channel.url) logger.info('Parsing via podcastparser: %s', url) headers = {} if channel.http_etag: headers['If-None-Match'] = channel.http_etag if channel.http_last_modified: headers['If-Modified-Since'] = channel.http_last_modified try: stream = util.urlopen(url, headers) self.status = 200 info = stream.info() self.etag = info.get('etag') self.modified = info.get('last-modified') self.parsed = podcastparser.parse(url, stream, max_episodes) self._handle_paged_feed(max_episodes) except urllib.error.HTTPError as error: self.status = error.code if error.code == 304: logger.info('Not modified') else: logger.warn('Feed update failed: %s', error) raise error self.etag = None self.modified = None self.parsed = None
def main(): """Playing media.""" parser = argparse.ArgumentParser( description="This script play a given media on the mpd (local) server") parser.add_argument("-c", choices=['play', 'stop', 'status', 'volup', 'voldown', 'pod'], required=True) parser.add_argument("-u", help="url to play") args = parser.parse_args() media = args.u command = args.c myplayer = player() if command == 'play': if media is not None: myplayer.play(media) else: print "You must enter a media url/path to play" elif command == 'stop': myplayer.stop() elif command == 'volup': myplayer.volup() elif command == 'voldown': myplayer.voldown() elif command == 'status': state = myplayer.status() for cle, val in state.items(): print cle + " : " + val elif command == 'podlist': parsed = podcastparser.parse(media, urllib.urlopen(media)) pprint.pprint(parsed)
def update_using_feedservice(urls): import podcastparser from urllib.request import urlopen podcasts = [] for url in urls: feed = podcastparser.parse(url, urlopen(url), 5) if feed is None: _LOGGER.info("Feed not updated: %s", url) continue # Handle permanent redirects if feed.get("new_location", False): new_url = feed["new_location"] _LOGGER.info("Redirect %s => %s", url, new_url) url = new_url # Error handling if feed.get("errors", False): _LOGGER.error("Error parsing feed: %s", repr(feed["errors"])) continue # Update per-podcast metadata podcast = { "title": feed.get("title", ""), "link": feed.get("link", url), "description": feed.get("description", ""), "cover_url": feed.get("logo", ""), "episodes": [parse_entry(entry) for entry in feed["episodes"]], } podcasts.append(podcast) return podcasts
def parse_feed(event, context): feedurl = event['queryStringParameters']['feedUrl'] parsed = podcastparser.parse(feedurl, urllib.request.urlopen(feedurl)) body = { 'title': parsed['title'], 'link': parsed['link'], 'description': parsed['description'], 'episodes': [ { 'title': e['title'], 'description': e['description'], 'published': e['published'], 'mediaUrl': e['enclosures'][0]['url'] } for e in parsed['episodes'] ] } response = { 'statusCode': 200, 'headers': { 'Access-Control-Allow-Origin': '*', 'Content-Type': 'application/json' }, 'body': json.dumps(body), 'isBase64Encoded': False } return response
def __init__(self, channel, max_episodes): url = channel.authenticate_url(channel.url) logger.info('Parsing via podcastparser: %s', url) headers = {} if channel.http_etag: headers['If-None-Match'] = channel.http_etag if channel.http_last_modified: headers['If-Modified-Since'] = channel.http_last_modified try: stream = util.urlopen(url, headers) self.status = 200 info = stream.info() self.etag = info.get('etag') self.modified = info.get('last-modified') self.parsed = podcastparser.parse(url, stream, max_episodes) except urllib.error.HTTPError as error: self.status = error.code if error.code == 304: logger.info('Not modified') else: logger.warn('Feed update failed: %s', error) raise error self.etag = None self.modified = None self.parsed = None
def __init__(self, link): super().__init__() feed = podcastparser.parse(link, urlopen(link)) self.title = feed['title'] for item in feed['episodes']: self.items.append(PodcastArticle(item['title'], item['description'], item['link'], date.fromtimestamp(item['published']))) self.items.reverse()
def fetch_episodes(url): parsed = podcastparser.parse(url, urlopen(url)) #print(parsed) new = [dict(title=episode['title'], pub=episode['published'], \ duration=str(datetime.timedelta(seconds = episode['total_time'])), \ description=episode['description'], uri_link=episode['enclosures'][0]['url']) for episode in parsed['episodes']] return new
def fetch_feed_title(): if request.method == 'POST': posted_data = json.loads(json.dumps(request.json)) url = posted_data['url'] parsed = podcastparser.parse(url, urlopen(url)) json_data = [{ "title": parsed['title'] }] return json.dumps(json_data)
def __init__(self, url): self.url = url # get the 5 last episodes from podcast at url (podcastparser sorts by published date) self.pc = podcastparser.parse(self.url, stream=urllib.urlopen(self.url), max_episodes=5) self.episodes = []
def _parse_feed(self, url, etag, modified, autodiscovery=True): headers = {} if modified is not None: headers['If-Modified-Since'] = modified if etag is not None: headers['If-None-Match'] = etag if url.startswith('file://'): is_local = True url = url[len('file://'):] stream = open(url) else: is_local = False try: stream = util.urlopen(url, headers) except HTTPError as e: return self._check_statuscode(e, e.geturl()) data = stream if autodiscovery and not is_local and stream.headers.get( 'content-type', '').startswith('text/html'): # Not very robust attempt to detect encoding: http://stackoverflow.com/a/1495675/1072626 charset = stream.headers.get_param('charset') if charset is None: charset = 'utf-8' # utf-8 appears hard-coded elsewhere in this codebase # We use StringIO in case the stream needs to be read again data = StringIO(stream.read().decode(charset)) ad = FeedAutodiscovery(url) ad.feed(data.getvalue()) if ad._resolved_url: try: self._parse_feed(ad._resolved_url, None, None, False) return Result(NEW_LOCATION, ad._resolved_url) except Exception as e: logger.warn('Feed autodiscovery failed', exc_info=True) # Second, try to resolve the URL url = self._resolve_url(url) if url: return Result(NEW_LOCATION, url) # Reset the stream so podcastparser can give it a go data.seek(0) try: feed = podcastparser.parse(url, data) except ValueError as e: raise InvalidFeed('Could not parse feed: {msg}'.format(msg=e)) if is_local: feed['headers'] = {} return Result(UPDATED_FEED, feed) else: feed['headers'] = stream.headers return self._check_statuscode(stream, feed)
def parseFeed(url): """ docstring """ with urlopen(url) as response: try: return podcastparser.parse(url, response) except podcastparser.FeedParseError: return False
def _parse_feed(self, url, etag, modified, autodiscovery=True): headers = {} if modified is not None: headers['If-Modified-Since'] = modified if etag is not None: headers['If-None-Match'] = etag if url.startswith('file://'): is_local = True url = url[len('file://'):] stream = open(url) else: is_local = False try: stream = util.urlopen(url, headers) except HTTPError as e: return self._check_statuscode(e, e.geturl()) data = stream if autodiscovery and not is_local and stream.headers.get('content-type', '').startswith('text/html'): # Not very robust attempt to detect encoding: http://stackoverflow.com/a/1495675/1072626 charset = stream.headers.get_param('charset') if charset is None: charset = 'utf-8' # utf-8 appears hard-coded elsewhere in this codebase # We use StringIO in case the stream needs to be read again data = StringIO(stream.read().decode(charset)) ad = FeedAutodiscovery(url) ad.feed(data.getvalue()) if ad._resolved_url: try: self._parse_feed(ad._resolved_url, None, None, False) return Result(NEW_LOCATION, ad._resolved_url) except Exception as e: logger.warn('Feed autodiscovery failed', exc_info=True) # Second, try to resolve the URL url = self._resolve_url(url) if url: return Result(NEW_LOCATION, url) # Reset the stream so podcastparser can give it a go data.seek(0) try: feed = podcastparser.parse(url, data) except ValueError as e: raise InvalidFeed('Could not parse feed: {msg}'.format(msg=e)) if is_local: feed['headers'] = {} return Result(UPDATED_FEED, feed) else: feed['headers'] = stream.headers return self._check_statuscode(stream, feed)
def play(): feedurl = 'http://feeds.lds.org/ScriptureStories' parsed = podcastparser.parse(feedurl, urllib.request.urlopen(feedurl)) episode = random.choice(parsed['episodes']) url = episode['enclosures'][0]['url'] url = requests.get(url, allow_redirects=False).headers['location'] url = url.replace('http', 'https') speech = f"Playing Scripture Story {episode['title']}" _infodump(f"{speech} ({url})") return audio(speech).play(url) #, offset=93000)
def test_find_episode(self): feed_url = "https://feeds.megaphone.fm/theweeds" podcast = podcastparser.parse(feed_url, path.join(here, "data", "theweeds.rss")) title = "The reparations primary" stream_url = "https://traffic.megaphone.fm/VMP2975209749.mp3" item = utils.find_episode(podcast["episodes"], title, stream_url) self.assertEqual(item["guid"], "9aa25a44-ff17-11e8-89e8-dbfe1fc6a68f") self.assertEqual(item["total_time"], 3466) self.assertEqual(item["published"], 1553022019)
def handle_latest_episode_intent(self, message): utter = message.data['utterance'] self.enclosure.mouth_think() podcast_names = [self.settings["nameone"], self.settings["nametwo"], self.settings["namethree"]] podcast_urls = [self.settings["feedone"], self.settings["feedtwo"], self.settings["feedthree"]] #check if the user specified a podcast to check for a new podcast for index, name in enumerate(podcast_names): #skip if podcast slot left empty if not name: continue if name.lower() in utter.lower(): parsed_feed = pp.parse(podcast_urls[index], urllib.urlopen(podcast_urls[index])) last_episode = (parsed_feed['episodes'][0]['title']) speech_string = "The latest episode of " + name + " is " + last_episode break else: #if no podcast names are provided, list all new episodes new_episodes = [] for index, url in enumerate(podcast_urls): #skip if url slot left empty if not url: continue parsed_feed = pp.parse(podcast_urls[index], urllib.urlopen(podcast_urls[index])) last_episode = (parsed_feed['episodes'][0]['title']) new_episodes.append(last_episode) #skip if i[0] slot left empty elements = [": ".join(i) for i in zip(podcast_names, new_episodes) if i[0]] speech_string = "The latest episodes are the following: " speech_string += ", ".join(elements[:-2] + [" and ".join(elements[-2:])]) self.speak(speech_string)
def main(): console.clear() url = None if appex.is_running_extension(): url = appex.get_url() elif len(sys.argv) > 1: url = unquote(sys.argv[1]) if url is None: print("No URL found") webbrowser.open("overcast://") return console.show_activity() print(url) parser = OvercastParser() reminders = Reminders() data = requests.get(url).text parser.feed(data) (itunes_id, stream_url, overcast_id, title) = parser.close() print(title) feed_url = extract_feed_id(itunes_id) print(feed_url) podcast = podcastparser.parse(feed_url, urllib.request.urlopen(feed_url)) print(podcast["link"]) item = utils.find_episode(podcast["episodes"], title, stream_url) print(item) result = { "title": title, "itunes_channel_id": itunes_id, "enclosure_url": stream_url, "overcast_id": overcast_id, "guid": item["guid"], "channel_link": podcast["link"], "duration": item["total_time"], "published_time": item["published"], } reminders.add(json.dumps(result)) print("Added to reminders") console.hide_activity() webbrowser.open("overcast://")
def test_parse_rss(rss_filename): basename, _ = os.path.splitext(rss_filename) json_filename = basename + '.json' # read parameters to podcastparser.parse() from a separate file param_filename = basename + '.param.json' params = {} if os.path.exists(param_filename): params = json.load(open(param_filename)) expected = json.load(open(json_filename)) parsed = podcastparser.parse('file://' + rss_filename, open(rss_filename), **params) assert_equal(expected, parsed)
def parseFeed(url): """ docstring """ req = Request(url, headers={'User-Agent': 'Mozilla/5.0'}) with urlopen(req) as response: try: return podcastparser.parse(url, response) except podcastparser.FeedParseError: return False except HTTPError: return False except URLError: return False
def test_parse_rss(rss_filename): basename, _ = os.path.splitext(rss_filename) json_filename = basename + '.json' # read parameters to podcastparser.parse() from a separate file param_filename = basename + '.param.json' params = {} if os.path.exists(param_filename): params = json.load(open(param_filename)) expected = json.load(open(json_filename)) parsed = podcastparser.parse('file://' + rss_filename, open(rss_filename), **params) assert_equal.__self__.maxDiff = None assert_equal(expected, parsed)
def parse(url, max_episodes=0): with getfile(url) as file: parsed = podcastparser.parse(url, file, max_episodes) podcast = podcast_ripper.Podcast(parsed.get('title', 'Unknown'), parsed.get('link', ''), parsed.get('description', '')) for episode in parsed['episodes']: if len(episode['enclosures']) > 0: episode = podcast_ripper.Episode( episode['title'], episode['published'], episode['enclosures'][0]['url']) podcast.episodes.append(episode) return podcast
def _parse_feed(self, url, etag, modified, autodiscovery=True): headers = {} if modified is not None: headers['If-Modified-Since'] = modified if etag is not None: headers['If-None-Match'] = etag if url.startswith('file://'): is_local = True url = url[len('file://'):] stream = open(url) else: is_local = False try: stream = util.urlopen(url, headers) except HTTPError as e: return self._check_statuscode(e, e.geturl()) if not is_local and stream.headers.get('content-type', '').startswith('text/html'): if autodiscovery: ad = FeedAutodiscovery(url) ad.feed(stream.read()) if ad._resolved_url: try: self._parse_feed(ad._resolved_url, None, None, False) return Result(NEW_LOCATION, ad._resolved_url) except Exception as e: logger.warn('Feed autodiscovery failed', exc_info=True) # Second, try to resolve the URL url = self._resolve_url(url) if url: return Result(NEW_LOCATION, url) raise InvalidFeed('Got HTML document instead') feed = podcastparser.parse(url, stream) if is_local: feed['headers'] = {} return Result(UPDATED_FEED, feed) else: feed['headers'] = stream.headers return self._check_statuscode(stream, feed)
def test_parse_rss(rss_filename): basename, _ = os.path.splitext(rss_filename) json_filename = basename + '.json' # read parameters to podcastparser.parse() from a separate file param_filename = basename + '.param.json' params = {} if os.path.exists(param_filename): params = json.load(open(param_filename)) expected = json.load(open(json_filename)) normalized_rss_filename = rss_filename if os.sep == '\\': normalized_rss_filename = normalized_rss_filename.replace(os.sep, '/') parsed = podcastparser.parse('file://' + normalized_rss_filename, open(rss_filename), **params) assert_equal.__self__.maxDiff = None assert_equal(expected, parsed)
def test_parse_rss(self, rss_filename): basename, _ = os.path.splitext(rss_filename) json_filename = basename + '.json' # read parameters to podcastparser.parse() from a separate file param_filename = basename + '.param.json' params = {} if os.path.exists(param_filename): params = json.load(open(param_filename)) expected = json.load(open(json_filename)) normalized_rss_filename = rss_filename if os.sep == '\\': normalized_rss_filename = normalized_rss_filename.replace( os.sep, '/') parsed = podcastparser.parse('file://' + normalized_rss_filename, open(rss_filename), **params) assert expected == parsed
def parse(feedurl, count, root, override, managed): feed = podcastparser.parse(feedurl, urllib.request.urlopen(feedurl)) for _, cast in zip(range(count), feed['episodes']): for item in cast['enclosures']: extension = os.path.splitext(item['url'])[1] if managed: directory = root + '/' + feed['title'] if not os.path.isdir(directory): os.mkdir(directory) else: directory = root filename = directory + '/' + cast['title'] + extension path = os.path.abspath(filename) if (not os.path.isfile(path)) or (override): download(item['url'], path, cast['title']) else: print('{}: File already exists'.format(cast['title']))
def get_anchor_links_rss(newest=False): parsed = podcastparser.parse(anchor_fm_rss, urllib.request.urlopen(anchor_fm_rss)) total_episodes = len(parsed['episodes']) if newest: # just get the newest episode return parsed['episodes'][0], total_episodes episodes = parsed['episodes'][:10] anchor_links = [] anchor_titles = [] anchor_descriptions = [] for episode in episodes: anchor_links.append(episode['link']) anchor_titles.append(episode['title']) anchor_descriptions.append( format_description(episode['description_html'])) return anchor_links, anchor_titles, anchor_descriptions, total_episodes
def update_using_feedservice(urls): import podcastparser from urllib.request import urlopen, Request podcasts = [] for url in urls: try: feed = podcastparser.parse( url, urlopen(Request(url, headers=REQUEST_HEADERS)), 5) except Exception as error: # pylint: disable=broad-except _LOGGER.error("Could not update %s - %s", url, error) feed = None if feed is None: _LOGGER.info("Feed not updated: %s", url) continue # Handle permanent redirects if feed.get("new_location", False): new_url = feed["new_location"] _LOGGER.info("Redirect %s => %s", url, new_url) url = new_url # Error handling if feed.get("errors", False): _LOGGER.error("Error parsing feed: %s", repr(feed["errors"])) continue # Update per-podcast metadata podcast = { "title": feed.get("title", ""), "link": feed.get("link", url), "description": feed.get("description", ""), "cover_url": feed.get("logo", ""), "episodes": [parse_entry(entry) for entry in feed["episodes"]], } podcasts.append(podcast) return podcasts
def make_playlist(feedurl, get_newest=False): # fetch podcast feed podcast = podcastparser.parse(feedurl, urllib.urlopen(feedurl), max_episodes=10) # create a name name = podcast['title'].encode('utf-8') name = name.replace(' ', '_') name = ''.join(['Podcast_',name]) # extract episodes episodes = podcast['episodes'] content = get_episodes(episodes) # create playlist filename = pls_generator(name, content) if get_newest: return filename, content[0] else: return filename
def getMP3Lists(podcastURL): print "-------------------------------------------------------" print "Getting MP3 Links" u = urllib2.urlopen(podcastURL) # Save the output to the xml file localFile = open('mp3s.xml','w') localFile.write(u.read()) localFile.close() print "-------------------------------------------------------" del podcastMP3Array[:] # Parse the content through podcastparser parsed = podcastparser.parse(podcastURL, urllib.urlopen(podcastURL)) print "Found: " + str(len(parsed)) for i in range(len(parsed)): podcastMP3Array.append(parsed['episodes'][i]['enclosures'][0]['url']) print " "+parsed['episodes'][i]['enclosures'][0]['url'] print "-------------------------------------------------------"
def _parse_feed(self, url, etag, modified, autodiscovery=True): headers = {} if modified is not None: headers['If-Modified-Since'] = modified if etag is not None: headers['If-None-Match'] = etag if url.startswith('file://'): is_local = True url = url[len('file://'):] stream = open(url) else: is_local = False try: stream = util.urlopen(url, headers) except HTTPError as e: return self._check_statuscode(e, e.geturl()) if stream.headers.get('content-type', '').startswith('text/html'): if autodiscovery: ad = FeedAutodiscovery(url) ad.feed(stream.read()) if ad._resolved_url: try: self._parse_feed(ad._resolved_url, None, None, False) return Result(NEW_LOCATION, ad._resolved_url) except Exception as e: logger.warn('Feed autodiscovery failed', exc_info=True) # Second, try to resolve the URL url = self._resolve_url(url) if url: return Result(NEW_LOCATION, url) raise InvalidFeed('Got HTML document instead') feed = podcastparser.parse(url, stream) feed['headers'] = stream.headers return self._check_statuscode(stream, feed)
def handle_play_podcast_intent(self, message): utter = message.data['utterance'] podcast_names = [ self.settings["nameone"], self.settings["nametwo"], self.settings["namethree"] ] podcast_urls = [ self.settings["feedone"], self.settings["feedtwo"], self.settings["feedthree"] ] listen_url = self.chosen_podcast(utter, podcast_names, podcast_urls) #if misheard, retry and return false if Mycroft could not hear the name of the podcast try_count = 0 while (listen_url == "" and try_count < 2): try_count += 1 response = self.get_response('nomatch') listen_url = self.chosen_podcast(response, podcast_names, podcast_urls) if try_count == 1 and listen_url == "": self.speak_dialog('not.found') return False #normalise feed and parse it normalised_feed = pp.normalize_feed_url(listen_url) parsed_feed = pp.parse(normalised_feed, urllib.urlopen(normalised_feed)) #Check what episode the user wants episode_index = 0 #This block adds functionality for the user to choose an episode while (True): episode_title = parsed_feed['episodes'][episode_index]['title'] podcast_title = parsed_feed['title'] data_dict = { "podcast_title": podcast_title, "episode_title": episode_title } if episode_index == 0: response = self.get_response('play.previous', data=data_dict, on_fail='please.repeat') else: response = self.get_response('play.next.previous', data=data_dict, on_fail='please.repeat') #error check if response is None: break if "stop" in response: self.speak("Operation cancelled.") return False elif "play" in response: break elif "previous" in response: episode_index += 1 elif "next" in response: #ensure index doesnt go below zero if episode_index != 0: episode_index -= 1 self.speak("Playing podcast.") time.sleep(1) #some feeds have different formats, these two were the most common ones I found so it will try them both try: episode = ( parsed_feed["episodes"][episode_index]["enclosures"][0]["url"]) except: self.speak_dialog('badrss') #check for any redirects episode = urllib.urlopen(episode) redirected_episode = episode.geturl() # if audio service module is available use it if self.audioservice: self.audioservice.play(redirected_episode, message.data['utterance']) else: # othervice use normal mp3 playback self.process = play_mp3(redirected_episode) self.enclosure.mouth_text(episode_title)
def podlist(self, media): """Get podcast infos(parsed).""" parsed = podcastparser.parse(media, urllib.urlopen(media)) return parsed
def load_feed(path): print('loading ', path, '...') return podcastparser.parse(path, downloaded(path))
def test_fail_parse(self, feed): with pytest.raises(podcastparser.FeedParseError): podcastparser.parse('file://example.com/feed.xml', StringIO(feed))
except IndexError as e: pod_items = 5 # SET a user agent string because some podcast sites throw a 403 forbidden, if no UA set hdr = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/534.30 (KHTML, like Gecko) Ubuntu/10.10 Chromium/12.0.742.112 Chrome/12.0.742.112 Safari/534.30', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3', 'Accept-Encoding': 'none', 'Accept-Language': 'en-US,en;q=0.8', 'Connection': 'keep-alive'} feedrequest = urllib2.Request(feedurl, headers=hdr) # GET the dictonary object from Podcastparser try: data = podcastparser.parse(feedurl, urllib2.urlopen(feedrequest), int(pod_items)) except podcastparser.FeedParseError: print("Podcast Parser Error: Please file a bug report at github.com/gpodder/podcastparser") sys.exit() pod_title = data["title"] pod_timeformat = "%m/%d/%Y" pod_m3u = "#EXTM3U\n" # Let's do this, metaverse from podcastparser.py ... # total_time, # description
def infospodcast(self, url, max_episodes=0): """Get podcast shows list (parsed).""" parsed = podcastparser.parse(url, urllib.urlopen(url)) return parsed
def fetch_feed_details(url): parsed = podcastparser.parse(url, urlopen(url)) #print(parsed) feed_det = [dict(link=parsed['link'], title=parsed['title'], cover_url=parsed['cover_url'], description=parsed['description'])] #print(feed_det) return feed_det
def function(): """Get podcast infos(parsed).""" parsed = podcastparser.parse(media, urllib.urlopen(media)) return parsed
# read arguments from the command line args, unknown = parser.parse_known_args() print("<rss version=\"2.0\" xmlns:atom=\"http://www.w3.org/2005/Atom\">") print("<channel>") print("<title>%s</title>" % (args.title)) print("<link>%s</link>" % (args.link)) print("<image>") print("<url>%s</url>" % (args.image)) print("</image>") print("\n") for i in range(len(unknown)): feedurl = unknown[i] feed = podcastparser.parse(feedurl, urllib.request.urlopen(feedurl), max_episodes=7) feedtitle = feed.get('title', '') feedlink = feed.get('link', '') feeddesc = feed.get('description', '') for ep in feed['episodes']: eptitle = ep['title'] epdesc = ep['description'] eppubdate = datetime.utcfromtimestamp(int( ep['published'])).strftime('%a, %d %b %Y %T') enclosure = ep['enclosures'][0] epurl = enclosure['url'].split("?")[0] guid = hashlib.md5(epurl.encode() + eppubdate.encode()) print("<item>") print("<title>%s: %s</title>" %
os.mkdir(temp_dir) for url in feed_list: ##should do some kind of validation for blank lines etc podcast_url = url.strip().split("|")[0] request = urllib.request.Request(podcast_url) request.add_header("User-Agent", user_agent) if len(url.strip().split("|")) == 3: podcast_username = url.strip().split("|")[1] podcast_password = url.strip().split("|")[2] auth_str = bytes(podcast_username + ":" + podcast_password,'utf-8') base64string = base64.b64encode(auth_str).strip().decode('ascii') header = "Basic " + base64string request.add_header("Authorization", header) try: podcast = podcastparser.parse(podcast_url, urllib.request.urlopen(request)) except KeyboardInterrupt: quit() except: feed_log.write(podcast_url + '\n') feed_log.flush() print ("failed to parse: " + podcast_url) continue podcast_name = podcast['title'] podcast_dir = base_directory + podcast_name + "/" if os.path.isdir(podcast_dir) == False: os.mkdir(podcast_dir)
def test_fail_parse(feed): with assert_raises(podcastparser.FeedParseError): podcastparser.parse('file://example.com/feed.xml', StringIO(feed))