Python parse Examples, podcastparser.parse Python Examples

Example #1

0

Show file

    def handle_latest_episode_intent(self, message):
        utter = message.data['utterance']

        podcast_names = [
            self.settings["nameone"], self.settings["nametwo"],
            self.settings["namethree"]
        ]
        podcast_urls = [
            self.settings["feedone"], self.settings["feedtwo"],
            self.settings["feedthree"]
        ]

        #check if the user specified a podcast to check for a new podcast
        for i in range(0, len(podcast_names)):
            #skip if podcast slot left empty
            if podcast_names[i] == "":
                continue
            elif podcast_names[i].lower() in utter.lower():
                parsed_feed = pp.parse(podcast_urls[i],
                                       urllib.urlopen(podcast_urls[i]))
                last_episode = (parsed_feed['episodes'][0]['title'])

                speech_string = "The latest episode of " + podcast_names[
                    i] + " is " + last_episode
                self.speak(speech_string)
                return True

        #if no podcast names are provided, list all new episodes
        new_episodes = []
        for i in range(0, len(podcast_urls)):
            if not podcast_urls[i]:
                continue
            parsed_feed = pp.parse(podcast_urls[i],
                                   urllib.urlopen(podcast_urls[i]))
            last_episode = (parsed_feed['episodes'][0]['title'])
            new_episodes.append(last_episode)

            speech_string = "The latest episodes are the following: "

            for i in range(0, len(new_episodes)):
                #if the podcast is the last in a list add "and" before the podcast name
                if i == (len(new_episodes) - 1) and i > 0:
                    speech_string = speech_string + "and " + podcast_names[
                        i] + ": " + new_episodes[i]
                else:
                    speech_string = speech_string + podcast_names[
                        i] + ": " + new_episodes[i] + ", "

        self.speak(speech_string)

Example #2

0

Show file

    def CPS_start(self, phrase, data):
            self.log.info("CPS_start phrase: {} data: {}".format(phrase, data))
            tracklist = []
            parsed_feed = pp.parse(data, urllib.request.urlopen(Request(data,
                            data=None, headers={'User-Agent': self.user_agent}))
                          )
            episode_title = (parsed_feed['episodes'][0]['title'])

            # try and parse the rss feed, some are incompatible
            try:
                episode = (parsed_feed["episodes"][0]["enclosures"][0]["url"])
            except:
                self.speak_dialog('badrss')

            # check for any redirects
            episode = urllib.request.urlopen(Request(episode, data=None, headers={'User-Agent': self.user_agent}))
            redirected_episode = episode.geturl()

            http_episode = re.sub('https', 'http', redirected_episode)
            self.log.info("http_episode: {}".format(http_episode))
            tracklist.append(http_episode)

            if self.state in ['playing', 'paused']:
                self.mediaplayer.stop()
                self.mediaplayer.clear_list()
            self.mediaplayer.add_list(tracklist)
            # self.speak(self._get_play_message(data))
            self.mediaplayer.play()
            self.state = 'playing'

Example #3

0

Show file

def main():
    """Playing media."""
    parser = argparse.ArgumentParser(
        description="This script play a given media on the mpd (local) server")
    parser.add_argument(
        "-c",
        choices=['play', 'stop', 'status', 'volup', 'voldown', 'pod'],
        required=True)
    parser.add_argument("-u", help="url to play")

    args = parser.parse_args()
    media = args.u
    command = args.c

    myplayer = player()
    if command == 'play':
        if media is not None:
            myplayer.play(media)
        else:
            print "You must enter a media url/path to play"
    elif command == 'stop':
        myplayer.stop()
    elif command == 'volup':
        myplayer.volup()
    elif command == 'voldown':
        myplayer.voldown()
    elif command == 'status':
        state = myplayer.status()
        for cle, val in state.items():
            print cle + " : " + val
    elif command == 'podlist':
        parsed = podcastparser.parse(media, urllib.urlopen(media))
        pprint.pprint(parsed)

Example #4

0

Show file

File: library.py Project: lukh/mopidy-transistor

        def run():
            try:
                for podcast in self.data["podcasts"]:
                    raw = urlopen(
                        podcast["feed_url"],
                        timeout=self._podcast_timeout,
                        context=ssl.create_default_context(
                            cafile=certifi.where()),
                    )
                    parsed = podcastparser.parse(podcast["feed_url"], raw)
                    episodes = parsed["episodes"]

                    podcast["episodes"] = []

                    for episode in episodes:
                        title = episode["title"]
                        media_url = episode["enclosures"][0]["url"]

                        # podcast['episodes'].append({"title":unicodedata.normalize('NFKD', title).encode('ascii','ignore'), "url":media_url})
                        podcast["episodes"].append({
                            "title": title,
                            "url": media_url
                        })

                self.save()
                logger.info(
                    "Transistor Library: done downloading podcasts infos")

            except Exception as e:
                logger.error(
                    "Transistor: Can't retrieve podcast data: {}".format(
                        str(e)))

Example #5

0

Show file

File: podcast.py Project: Nrde/gpodder-core

    def _handle_paged_feed(self, max_episodes):
        page = 2
        remaining_episodes = max_episodes - len(self.parsed['episodes'])
        while ('paged_feed_next' in self.parsed and
                page < self.PAGED_FEED_MAX_PAGES and
                remaining_episodes > 0):
            # Take the next page from the paged feed
            url = self.parsed['paged_feed_next']
            del self.parsed['paged_feed_next']

            if not url:
                break

            try:
                logger.debug('Downloading page %d from %s', page, url)
                stream = util.urlopen(url)
                parsed = podcastparser.parse(url, stream, remaining_episodes)
                added_episodes = len(parsed['episodes'])
                remaining_episodes -= added_episodes
                logger.debug('Page %d contains %d additional episodes', page,
                             added_episodes)
                self.parsed['episodes'].extend(parsed['episodes'])

                # Next iteration if we still have a next page
                if 'paged_feed_next' in parsed:
                    self.parsed['paged_feed_next'] = parsed['paged_feed_next']
            except Exception as e:
                logger.warn('Error while fetching feed page %d from %s: %s', page, url, e)
                # Give up, don't try to download additional pages here
                break

            page += 1

Example #6

0

Show file

def refresh():
    for rss in rss_links:
        parsed = podcastparser.parse(rss, urllib.request.urlopen(rss), 20)
        title = parsed.get('title')
        description = remove_html(parsed.get('description'))
        image = parsed.get('cover_url')
        link = parsed.get('link')

        podcast = Podcast.query.filter_by(link=parsed.get('link')).first()

        if(podcast is None):
            podcast = Podcast(title=title, description=description, image=image, link=link)
            db.session.add(podcast)
            db.session.commit()

        for episode in parsed.get('episodes'):
            episode_title = episode.get('title')
            episode_link = episode.get('link')
            episode_audio_url = episode.get('enclosures')[0]['url']
            episode_time_published = episode.get('published')
            episode_length = episode.get('total_time')
            episode_podcast = podcast

            episode = Episode.query.filter_by(audio_url=episode_audio_url).first()

            if(episode is None):
                episode = Episode(title=episode_title,
                                    link=episode_link,
                                    audio_url=episode_audio_url,
                                    time_published=episode_time_published,
                                    length=episode_length,
                                    podcast=episode_podcast)
                db.session.add(episode)

        db.session.commit()

Example #7

0

Show file

File: podcast.py Project: haurog/gpodder-core

    def __init__(self, channel, max_episodes):
        url = channel.authenticate_url(channel.url)

        logger.info('Parsing via podcastparser: %s', url)

        headers = {}
        if channel.http_etag:
            headers['If-None-Match'] = channel.http_etag
        if channel.http_last_modified:
            headers['If-Modified-Since'] = channel.http_last_modified

        try:
            stream = util.urlopen(url, headers)
            self.status = 200
            info = stream.info()
            self.etag = info.get('etag')
            self.modified = info.get('last-modified')
            self.parsed = podcastparser.parse(url, stream, max_episodes)
            self._handle_paged_feed(max_episodes)
        except urllib.error.HTTPError as error:
            self.status = error.code
            if error.code == 304:
                logger.info('Not modified')
            else:
                logger.warn('Feed update failed: %s', error)
                raise error

            self.etag = None
            self.modified = None
            self.parsed = None

Example #8

0

Show file

File: podcast.py Project: haurog/gpodder-core

    def _handle_paged_feed(self, max_episodes):
        page = 2
        remaining_episodes = max_episodes - len(self.parsed['episodes'])
        while ('paged_feed_next' in self.parsed
               and page < self.PAGED_FEED_MAX_PAGES
               and remaining_episodes > 0):
            # Take the next page from the paged feed
            url = self.parsed['paged_feed_next']
            del self.parsed['paged_feed_next']

            if not url:
                break

            try:
                logger.debug('Downloading page %d from %s', page, url)
                stream = util.urlopen(url)
                parsed = podcastparser.parse(url, stream, remaining_episodes)
                added_episodes = len(parsed['episodes'])
                remaining_episodes -= added_episodes
                logger.debug('Page %d contains %d additional episodes', page,
                             added_episodes)
                self.parsed['episodes'].extend(parsed['episodes'])

                # Next iteration if we still have a next page
                if 'paged_feed_next' in parsed:
                    self.parsed['paged_feed_next'] = parsed['paged_feed_next']
            except Exception as e:
                logger.warn('Error while fetching feed page %d from %s: %s',
                            page, url, e)
                # Give up, don't try to download additional pages here
                break

            page += 1

Example #9

0

Show file

File: mympd.py Project: jeromefiot/web_apiclock

def main():
    """Playing media."""
    parser = argparse.ArgumentParser(
        description="This script play a given media on the mpd (local) server")
    parser.add_argument("-c",
        choices=['play', 'stop', 'status', 'volup', 'voldown', 'pod'],
                 required=True)
    parser.add_argument("-u", help="url to play")

    args = parser.parse_args()
    media = args.u
    command = args.c

    myplayer = player()
    if command == 'play':
        if media is not None:
            myplayer.play(media)
        else:
            print "You must enter a media url/path to play"
    elif command == 'stop':
        myplayer.stop()
    elif command == 'volup':
        myplayer.volup()
    elif command == 'voldown':
        myplayer.voldown()
    elif command == 'status':
        state = myplayer.status()
        for cle, val in state.items():
            print cle + " : " + val
    elif command == 'podlist':
        parsed = podcastparser.parse(media, urllib.urlopen(media))
        pprint.pprint(parsed)

Example #10

0

Show file

def update_using_feedservice(urls):
    import podcastparser
    from urllib.request import urlopen

    podcasts = []

    for url in urls:
        feed = podcastparser.parse(url, urlopen(url), 5)
        if feed is None:
            _LOGGER.info("Feed not updated: %s", url)
            continue

        # Handle permanent redirects
        if feed.get("new_location", False):
            new_url = feed["new_location"]
            _LOGGER.info("Redirect %s => %s", url, new_url)
            url = new_url

        # Error handling
        if feed.get("errors", False):
            _LOGGER.error("Error parsing feed: %s", repr(feed["errors"]))
            continue

        # Update per-podcast metadata
        podcast = {
            "title": feed.get("title", ""),
            "link": feed.get("link", url),
            "description": feed.get("description", ""),
            "cover_url": feed.get("logo", ""),
            "episodes": [parse_entry(entry) for entry in feed["episodes"]],
        }

        podcasts.append(podcast)

    return podcasts

Example #11

0

Show file

def parse_feed(event, context):
  feedurl = event['queryStringParameters']['feedUrl']
  parsed = podcastparser.parse(feedurl, urllib.request.urlopen(feedurl))

  body = {
    'title': parsed['title'],
    'link': parsed['link'],
    'description': parsed['description'],
    'episodes': [ {
        'title': e['title'],
        'description': e['description'],
        'published': e['published'],
        'mediaUrl': e['enclosures'][0]['url']
      } for e in parsed['episodes'] ]
  }

  response = {
    'statusCode': 200,
    'headers': {
      'Access-Control-Allow-Origin': '*',
      'Content-Type': 'application/json'
    },
    'body': json.dumps(body),
    'isBase64Encoded': False
  }

  return response

Example #12

0

Show file

File: podcast.py Project: sairuk/gpodder-core

    def __init__(self, channel, max_episodes):
        url = channel.authenticate_url(channel.url)

        logger.info('Parsing via podcastparser: %s', url)

        headers = {}
        if channel.http_etag:
            headers['If-None-Match'] = channel.http_etag
        if channel.http_last_modified:
            headers['If-Modified-Since'] = channel.http_last_modified

        try:
            stream = util.urlopen(url, headers)
            self.status = 200
            info = stream.info()
            self.etag = info.get('etag')
            self.modified = info.get('last-modified')
            self.parsed = podcastparser.parse(url, stream, max_episodes)
        except urllib.error.HTTPError as error:
            self.status = error.code
            if error.code == 304:
                logger.info('Not modified')
            else:
                logger.warn('Feed update failed: %s', error)
                raise error

            self.etag = None
            self.modified = None
            self.parsed = None

Example #13

0

Show file

File: podcasts.py Project: enzo-pellegrini/dueGiorniDiSub

 def __init__(self, link):
     super().__init__()
     feed = podcastparser.parse(link, urlopen(link))
     self.title = feed['title']
     for item in feed['episodes']:
         self.items.append(PodcastArticle(item['title'], item['description'], item['link'], date.fromtimestamp(item['published'])))
     self.items.reverse()

Example #14

0

Show file

File: Ponos.py Project: shadowjig/ponos

def fetch_episodes(url):
    parsed = podcastparser.parse(url, urlopen(url))
    #print(parsed)
    new = [dict(title=episode['title'], pub=episode['published'], \
    duration=str(datetime.timedelta(seconds = episode['total_time'])),  \
    description=episode['description'], uri_link=episode['enclosures'][0]['url']) for episode in parsed['episodes']]
    return new

Example #15

0

Show file

File: Ponos.py Project: shadowjig/ponos

def fetch_feed_title():    
    if request.method == 'POST':
        posted_data = json.loads(json.dumps(request.json))
        url = posted_data['url']
        parsed = podcastparser.parse(url, urlopen(url))
        json_data = [{ "title":  parsed['title'] }]
    return json.dumps(json_data)

Example #16

0

Show file

File: sonopod.py Project: havardgulldahl/sonopod

 def __init__(self, url):
     self.url = url
     # get the 5 last episodes from podcast at url (podcastparser sorts by published date)
     self.pc = podcastparser.parse(self.url, 
                                   stream=urllib.urlopen(self.url),
                                   max_episodes=5)
     self.episodes = []

Example #17

0

Show file

File: feedcore.py Project: simbiotiqu/gpodder

    def _parse_feed(self, url, etag, modified, autodiscovery=True):
        headers = {}
        if modified is not None:
            headers['If-Modified-Since'] = modified
        if etag is not None:
            headers['If-None-Match'] = etag

        if url.startswith('file://'):
            is_local = True
            url = url[len('file://'):]
            stream = open(url)
        else:
            is_local = False
            try:
                stream = util.urlopen(url, headers)
            except HTTPError as e:
                return self._check_statuscode(e, e.geturl())

        data = stream
        if autodiscovery and not is_local and stream.headers.get(
                'content-type', '').startswith('text/html'):
            # Not very robust attempt to detect encoding: http://stackoverflow.com/a/1495675/1072626
            charset = stream.headers.get_param('charset')
            if charset is None:
                charset = 'utf-8'  # utf-8 appears hard-coded elsewhere in this codebase

            # We use StringIO in case the stream needs to be read again
            data = StringIO(stream.read().decode(charset))
            ad = FeedAutodiscovery(url)

            ad.feed(data.getvalue())
            if ad._resolved_url:
                try:
                    self._parse_feed(ad._resolved_url, None, None, False)
                    return Result(NEW_LOCATION, ad._resolved_url)
                except Exception as e:
                    logger.warn('Feed autodiscovery failed', exc_info=True)

                # Second, try to resolve the URL
                url = self._resolve_url(url)
                if url:
                    return Result(NEW_LOCATION, url)

            # Reset the stream so podcastparser can give it a go
            data.seek(0)

        try:
            feed = podcastparser.parse(url, data)
        except ValueError as e:
            raise InvalidFeed('Could not parse feed: {msg}'.format(msg=e))

        if is_local:
            feed['headers'] = {}
            return Result(UPDATED_FEED, feed)
        else:
            feed['headers'] = stream.headers
            return self._check_statuscode(stream, feed)

Example #18

0

Show file

def parseFeed(url):
    """
    docstring
    """
    with urlopen(url) as response:
        try:
            return podcastparser.parse(url, response)
        except podcastparser.FeedParseError:
            return False

Example #19

0

Show file

File: feedcore.py Project: drmrboy/gpodder

    def _parse_feed(self, url, etag, modified, autodiscovery=True):
        headers = {}
        if modified is not None:
            headers['If-Modified-Since'] = modified
        if etag is not None:
            headers['If-None-Match'] = etag

        if url.startswith('file://'):
            is_local = True
            url = url[len('file://'):]
            stream = open(url)
        else:
            is_local = False
            try:
                stream = util.urlopen(url, headers)
            except HTTPError as e:
                return self._check_statuscode(e, e.geturl())

        data = stream
        if autodiscovery and not is_local and stream.headers.get('content-type', '').startswith('text/html'):
            # Not very robust attempt to detect encoding: http://stackoverflow.com/a/1495675/1072626
            charset = stream.headers.get_param('charset')
            if charset is None:
                charset = 'utf-8'  # utf-8 appears hard-coded elsewhere in this codebase

            # We use StringIO in case the stream needs to be read again
            data = StringIO(stream.read().decode(charset))
            ad = FeedAutodiscovery(url)

            ad.feed(data.getvalue())
            if ad._resolved_url:
                try:
                    self._parse_feed(ad._resolved_url, None, None, False)
                    return Result(NEW_LOCATION, ad._resolved_url)
                except Exception as e:
                    logger.warn('Feed autodiscovery failed', exc_info=True)

                # Second, try to resolve the URL
                url = self._resolve_url(url)
                if url:
                    return Result(NEW_LOCATION, url)

            # Reset the stream so podcastparser can give it a go
            data.seek(0)

        try:
            feed = podcastparser.parse(url, data)
        except ValueError as e:
            raise InvalidFeed('Could not parse feed: {msg}'.format(msg=e))

        if is_local:
            feed['headers'] = {}
            return Result(UPDATED_FEED, feed)
        else:
            feed['headers'] = stream.headers
            return self._check_statuscode(stream, feed)

Example #20

0

Show file

def play():
    feedurl = 'http://feeds.lds.org/ScriptureStories'
    parsed = podcastparser.parse(feedurl, urllib.request.urlopen(feedurl))
    episode = random.choice(parsed['episodes'])
    url = episode['enclosures'][0]['url']
    url = requests.get(url, allow_redirects=False).headers['location']
    url = url.replace('http', 'https')
    speech = f"Playing Scripture Story {episode['title']}"
    _infodump(f"{speech} ({url})")
    return audio(speech).play(url)  #, offset=93000)

Example #21

0

Show file

    def test_find_episode(self):
        feed_url = "https://feeds.megaphone.fm/theweeds"
        podcast = podcastparser.parse(feed_url,
                                      path.join(here, "data", "theweeds.rss"))

        title = "The reparations primary"
        stream_url = "https://traffic.megaphone.fm/VMP2975209749.mp3"

        item = utils.find_episode(podcast["episodes"], title, stream_url)

        self.assertEqual(item["guid"], "9aa25a44-ff17-11e8-89e8-dbfe1fc6a68f")
        self.assertEqual(item["total_time"], 3466)
        self.assertEqual(item["published"], 1553022019)

Example #22

0

Show file

File: __init__.py Project: backassward/podcast-skill

    def handle_latest_episode_intent(self, message):
        utter = message.data['utterance']
        self.enclosure.mouth_think()

        podcast_names = [self.settings["nameone"], self.settings["nametwo"], self.settings["namethree"]]
        podcast_urls = [self.settings["feedone"], self.settings["feedtwo"], self.settings["feedthree"]]

        #check if the user specified a podcast to check for a new podcast
        for index, name in enumerate(podcast_names):
            #skip if podcast slot left empty
            if not name:
                continue
            if name.lower() in utter.lower():
                parsed_feed = pp.parse(podcast_urls[index], 
                                urllib.urlopen(podcast_urls[index]))
                last_episode = (parsed_feed['episodes'][0]['title'])

                speech_string = "The latest episode of " + name + " is " + last_episode
                break
        else:
            #if no podcast names are provided, list all new episodes
            new_episodes = []
            for index, url in enumerate(podcast_urls):
                #skip if url slot left empty
                if not url:
                    continue
                parsed_feed = pp.parse(podcast_urls[index], 
                                urllib.urlopen(podcast_urls[index]))
                last_episode = (parsed_feed['episodes'][0]['title'])
                new_episodes.append(last_episode)

            #skip if i[0] slot left empty
            elements = [": ".join(i) for i in zip(podcast_names, new_episodes) if i[0]]
                
            speech_string = "The latest episodes are the following: "
            speech_string += ", ".join(elements[:-2] + [" and ".join(elements[-2:])])

        self.speak(speech_string)

Example #23

0

Show file

File: demo.py Project: tarsbase/overcast_parser

def main():
    console.clear()

    url = None
    if appex.is_running_extension():
        url = appex.get_url()
    elif len(sys.argv) > 1:
        url = unquote(sys.argv[1])

    if url is None:
        print("No URL found")
        webbrowser.open("overcast://")
        return

    console.show_activity()
    print(url)

    parser = OvercastParser()
    reminders = Reminders()

    data = requests.get(url).text
    parser.feed(data)
    (itunes_id, stream_url, overcast_id, title) = parser.close()
    print(title)

    feed_url = extract_feed_id(itunes_id)
    print(feed_url)

    podcast = podcastparser.parse(feed_url, urllib.request.urlopen(feed_url))
    print(podcast["link"])

    item = utils.find_episode(podcast["episodes"], title, stream_url)
    print(item)

    result = {
        "title": title,
        "itunes_channel_id": itunes_id,
        "enclosure_url": stream_url,
        "overcast_id": overcast_id,
        "guid": item["guid"],
        "channel_link": podcast["link"],
        "duration": item["total_time"],
        "published_time": item["published"],
    }

    reminders.add(json.dumps(result))
    print("Added to reminders")

    console.hide_activity()
    webbrowser.open("overcast://")

Example #24

0

Show file

File: test_podcastparser.py Project: dublado/podcastparser

    def test_parse_rss(rss_filename):
        basename, _ = os.path.splitext(rss_filename)
        json_filename = basename + '.json'

        # read parameters to podcastparser.parse() from a separate file
        param_filename = basename + '.param.json'
        params = {}
        if os.path.exists(param_filename):
            params = json.load(open(param_filename))

        expected = json.load(open(json_filename))
        parsed = podcastparser.parse('file://' + rss_filename,
                                     open(rss_filename), **params)
        assert_equal(expected, parsed)

Example #25

0

Show file

File: utils.py Project: probonopd/minimal-podcasts-player

def parseFeed(url):
    """
    docstring
    """
    req = Request(url, headers={'User-Agent': 'Mozilla/5.0'})
    with urlopen(req) as response:
        try:
            return podcastparser.parse(url, response)
        except podcastparser.FeedParseError:
            return False
        except HTTPError:
            return False
        except URLError:
            return False

Example #26

0

Show file

File: test_podcastparser.py Project: bs1/podcastparser

    def test_parse_rss(rss_filename):
        basename, _ = os.path.splitext(rss_filename)
        json_filename = basename + '.json'

        # read parameters to podcastparser.parse() from a separate file
        param_filename = basename + '.param.json'
        params = {}
        if os.path.exists(param_filename):
            params = json.load(open(param_filename))

        expected = json.load(open(json_filename))
        parsed = podcastparser.parse('file://' + rss_filename,
                                     open(rss_filename), **params)

        assert_equal.__self__.maxDiff = None
        assert_equal(expected, parsed)

Example #27

0

Show file

File: util.py Project: scavazzini/podcast-ripper

def parse(url, max_episodes=0):

    with getfile(url) as file:

        parsed = podcastparser.parse(url, file, max_episodes)

        podcast = podcast_ripper.Podcast(parsed.get('title', 'Unknown'),
                                         parsed.get('link', ''),
                                         parsed.get('description', ''))

        for episode in parsed['episodes']:
            if len(episode['enclosures']) > 0:
                episode = podcast_ripper.Episode(
                    episode['title'], episode['published'],
                    episode['enclosures'][0]['url'])
                podcast.episodes.append(episode)

    return podcast

Example #28

0

Show file

File: feedcore.py Project: wwweslei/gpodder

    def _parse_feed(self, url, etag, modified, autodiscovery=True):
        headers = {}
        if modified is not None:
            headers['If-Modified-Since'] = modified
        if etag is not None:
            headers['If-None-Match'] = etag

        if url.startswith('file://'):
            is_local = True
            url = url[len('file://'):]
            stream = open(url)
        else:
            is_local = False
            try:
                stream = util.urlopen(url, headers)
            except HTTPError as e:
                return self._check_statuscode(e, e.geturl())

        if not is_local and stream.headers.get('content-type',
                                               '').startswith('text/html'):
            if autodiscovery:
                ad = FeedAutodiscovery(url)
                ad.feed(stream.read())
                if ad._resolved_url:
                    try:
                        self._parse_feed(ad._resolved_url, None, None, False)
                        return Result(NEW_LOCATION, ad._resolved_url)
                    except Exception as e:
                        logger.warn('Feed autodiscovery failed', exc_info=True)

                    # Second, try to resolve the URL
                    url = self._resolve_url(url)
                    if url:
                        return Result(NEW_LOCATION, url)

            raise InvalidFeed('Got HTML document instead')

        feed = podcastparser.parse(url, stream)
        if is_local:
            feed['headers'] = {}
            return Result(UPDATED_FEED, feed)
        else:
            feed['headers'] = stream.headers
            return self._check_statuscode(stream, feed)

Example #29

0

Show file

File: test_podcastparser.py Project: gpodder/podcastparser

    def test_parse_rss(rss_filename):
        basename, _ = os.path.splitext(rss_filename)
        json_filename = basename + '.json'

        # read parameters to podcastparser.parse() from a separate file
        param_filename = basename + '.param.json'
        params = {}
        if os.path.exists(param_filename):
            params = json.load(open(param_filename))

        expected = json.load(open(json_filename))
        normalized_rss_filename = rss_filename
        if os.sep == '\\':
            normalized_rss_filename = normalized_rss_filename.replace(os.sep, '/')
        parsed = podcastparser.parse('file://' + normalized_rss_filename,
                                     open(rss_filename), **params)

        assert_equal.__self__.maxDiff = None
        assert_equal(expected, parsed)

Example #30

0

Show file

    def test_parse_rss(self, rss_filename):
        basename, _ = os.path.splitext(rss_filename)
        json_filename = basename + '.json'

        # read parameters to podcastparser.parse() from a separate file
        param_filename = basename + '.param.json'
        params = {}
        if os.path.exists(param_filename):
            params = json.load(open(param_filename))

        expected = json.load(open(json_filename))
        normalized_rss_filename = rss_filename
        if os.sep == '\\':
            normalized_rss_filename = normalized_rss_filename.replace(
                os.sep, '/')
        parsed = podcastparser.parse('file://' + normalized_rss_filename,
                                     open(rss_filename), **params)

        assert expected == parsed

Example #31

0

Show file

def parse(feedurl, count, root, override, managed):
    feed = podcastparser.parse(feedurl, urllib.request.urlopen(feedurl))
    for _, cast in zip(range(count), feed['episodes']):
        for item in cast['enclosures']:
            extension = os.path.splitext(item['url'])[1]

            if managed:
                directory = root + '/' + feed['title']

                if not os.path.isdir(directory):
                    os.mkdir(directory)
            else:
                directory = root

            filename = directory + '/' + cast['title'] + extension
            path = os.path.abspath(filename)
            if (not os.path.isfile(path)) or (override):
                download(item['url'], path, cast['title'])
            else:
                print('{}: File already exists'.format(cast['title']))

Example #32

0

Show file

def get_anchor_links_rss(newest=False):
    parsed = podcastparser.parse(anchor_fm_rss,
                                 urllib.request.urlopen(anchor_fm_rss))
    total_episodes = len(parsed['episodes'])

    if newest:  # just get the newest episode
        return parsed['episodes'][0], total_episodes

    episodes = parsed['episodes'][:10]
    anchor_links = []
    anchor_titles = []
    anchor_descriptions = []

    for episode in episodes:
        anchor_links.append(episode['link'])
        anchor_titles.append(episode['title'])
        anchor_descriptions.append(
            format_description(episode['description_html']))

    return anchor_links, anchor_titles, anchor_descriptions, total_episodes

Example #33

0

Show file

File: __init__.py Project: majorblunder/home-assistant

def update_using_feedservice(urls):
    import podcastparser
    from urllib.request import urlopen, Request

    podcasts = []

    for url in urls:
        try:
            feed = podcastparser.parse(
                url, urlopen(Request(url, headers=REQUEST_HEADERS)), 5)
        except Exception as error:  # pylint: disable=broad-except
            _LOGGER.error("Could not update %s - %s", url, error)
            feed = None

        if feed is None:
            _LOGGER.info("Feed not updated: %s", url)
            continue

        # Handle permanent redirects
        if feed.get("new_location", False):
            new_url = feed["new_location"]
            _LOGGER.info("Redirect %s => %s", url, new_url)
            url = new_url

        # Error handling
        if feed.get("errors", False):
            _LOGGER.error("Error parsing feed: %s", repr(feed["errors"]))
            continue

        # Update per-podcast metadata
        podcast = {
            "title": feed.get("title", ""),
            "link": feed.get("link", url),
            "description": feed.get("description", ""),
            "cover_url": feed.get("logo", ""),
            "episodes": [parse_entry(entry) for entry in feed["episodes"]],
        }

        podcasts.append(podcast)

    return podcasts

Example #34

0

Show file

File: pod2pls.py Project: tcvj/pod2pls

def make_playlist(feedurl, get_newest=False):
    
    # fetch podcast feed
    podcast = podcastparser.parse(feedurl, urllib.urlopen(feedurl), max_episodes=10)
    
    # create a name
    name = podcast['title'].encode('utf-8')
    name = name.replace(' ', '_')
    name = ''.join(['Podcast_',name])

    # extract episodes
    episodes = podcast['episodes']
    content  = get_episodes(episodes)

    # create playlist
    filename = pls_generator(name, content)

    if get_newest:
        return filename, content[0]
    else:
        return filename

Example #35

0

Show file

File: Infrequency.py Project: tommetcalfe/Infrequency

def getMP3Lists(podcastURL):
    print "-------------------------------------------------------"
    print "Getting MP3 Links"
    u = urllib2.urlopen(podcastURL)

    # Save the output to the xml file
    localFile = open('mp3s.xml','w')
    localFile.write(u.read())
    localFile.close()
    print "-------------------------------------------------------"
    del podcastMP3Array[:]

    # Parse the content through podcastparser
    parsed = podcastparser.parse(podcastURL, urllib.urlopen(podcastURL))
    print "Found: " + str(len(parsed))
    for i in range(len(parsed)):
        podcastMP3Array.append(parsed['episodes'][i]['enclosures'][0]['url'])
        print "         "+parsed['episodes'][i]['enclosures'][0]['url']


    print "-------------------------------------------------------"

Example #36

0

Show file

    def _parse_feed(self, url, etag, modified, autodiscovery=True):
        headers = {}
        if modified is not None:
            headers['If-Modified-Since'] = modified
        if etag is not None:
            headers['If-None-Match'] = etag

        if url.startswith('file://'):
            is_local = True
            url = url[len('file://'):]
            stream = open(url)
        else:
            is_local = False
            try:
                stream = util.urlopen(url, headers)
            except HTTPError as e:
                return self._check_statuscode(e, e.geturl())

        if stream.headers.get('content-type', '').startswith('text/html'):
            if autodiscovery:
                ad = FeedAutodiscovery(url)
                ad.feed(stream.read())
                if ad._resolved_url:
                    try:
                        self._parse_feed(ad._resolved_url, None, None, False)
                        return Result(NEW_LOCATION, ad._resolved_url)
                    except Exception as e:
                        logger.warn('Feed autodiscovery failed', exc_info=True)

                    # Second, try to resolve the URL
                    url = self._resolve_url(url)
                    if url:
                        return Result(NEW_LOCATION, url)

            raise InvalidFeed('Got HTML document instead')

        feed = podcastparser.parse(url, stream)
        feed['headers'] = stream.headers
        return self._check_statuscode(stream, feed)

Example #37

0

Show file

    def handle_play_podcast_intent(self, message):
        utter = message.data['utterance']

        podcast_names = [
            self.settings["nameone"], self.settings["nametwo"],
            self.settings["namethree"]
        ]
        podcast_urls = [
            self.settings["feedone"], self.settings["feedtwo"],
            self.settings["feedthree"]
        ]

        listen_url = self.chosen_podcast(utter, podcast_names, podcast_urls)

        #if misheard, retry and return false if Mycroft could not hear the name of the podcast
        try_count = 0
        while (listen_url == "" and try_count < 2):
            try_count += 1
            response = self.get_response('nomatch')
            listen_url = self.chosen_podcast(response, podcast_names,
                                             podcast_urls)
            if try_count == 1 and listen_url == "":
                self.speak_dialog('not.found')
                return False

        #normalise feed and parse it
        normalised_feed = pp.normalize_feed_url(listen_url)
        parsed_feed = pp.parse(normalised_feed,
                               urllib.urlopen(normalised_feed))

        #Check what episode the user wants
        episode_index = 0

        #This block adds functionality for the user to choose an episode
        while (True):
            episode_title = parsed_feed['episodes'][episode_index]['title']
            podcast_title = parsed_feed['title']

            data_dict = {
                "podcast_title": podcast_title,
                "episode_title": episode_title
            }

            if episode_index == 0:
                response = self.get_response('play.previous',
                                             data=data_dict,
                                             on_fail='please.repeat')
            else:
                response = self.get_response('play.next.previous',
                                             data=data_dict,
                                             on_fail='please.repeat')

            #error check
            if response is None:
                break

            if "stop" in response:
                self.speak("Operation cancelled.")
                return False
            elif "play" in response:
                break
            elif "previous" in response:
                episode_index += 1
            elif "next" in response:
                #ensure index doesnt go below zero
                if episode_index != 0:
                    episode_index -= 1

        self.speak("Playing podcast.")
        time.sleep(1)
        #some feeds have different formats, these two were the most common ones I found so it will try them both
        try:
            episode = (
                parsed_feed["episodes"][episode_index]["enclosures"][0]["url"])
        except:
            self.speak_dialog('badrss')

        #check for any redirects
        episode = urllib.urlopen(episode)
        redirected_episode = episode.geturl()

        # if audio service module is available use it
        if self.audioservice:
            self.audioservice.play(redirected_episode,
                                   message.data['utterance'])
        else:  # othervice use normal mp3 playback
            self.process = play_mp3(redirected_episode)

        self.enclosure.mouth_text(episode_title)

Example #38

0

Show file

File: mympd.py Project: jeromefiot/web_apiclock

 def podlist(self, media):
     """Get podcast infos(parsed)."""
     parsed = podcastparser.parse(media, urllib.urlopen(media))
     return parsed

Example #39

0

Show file

File: mix.py Project: ballingt/remixcast

def load_feed(path):
    print('loading ', path, '...')
    return podcastparser.parse(path, downloaded(path))

Example #40

0

Show file

 def test_fail_parse(self, feed):
     with pytest.raises(podcastparser.FeedParseError):
         podcastparser.parse('file://example.com/feed.xml', StringIO(feed))

Example #41

0

Show file

    except IndexError as e:
        pod_items = 5
    
    # SET a user agent string because some podcast sites throw a 403 forbidden, if no UA set
    hdr = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/534.30 (KHTML, like Gecko) Ubuntu/10.10 Chromium/12.0.742.112 Chrome/12.0.742.112 Safari/534.30',
       'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
       'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
       'Accept-Encoding': 'none',
       'Accept-Language': 'en-US,en;q=0.8',
       'Connection': 'keep-alive'}

    feedrequest = urllib2.Request(feedurl, headers=hdr)

    # GET the dictonary object from Podcastparser    
    try:
        data = podcastparser.parse(feedurl, urllib2.urlopen(feedrequest), int(pod_items))
    except podcastparser.FeedParseError:
        print("Podcast Parser Error: Please file a bug report at github.com/gpodder/podcastparser")
        sys.exit()
    
    
    
    
    pod_title       = data["title"]
    pod_timeformat  = "%m/%d/%Y"
    pod_m3u         = "#EXTM3U\n" 


    # Let's do this, metaverse from podcastparser.py ...
    # total_time,
    # description

Example #42

0

Show file

File: my_podcast.py Project: jeromefiot/Apiclock_DEV

 def infospodcast(self, url, max_episodes=0):
     """Get podcast shows list (parsed)."""
     parsed = podcastparser.parse(url, urllib.urlopen(url))
     return parsed

Example #43

0

Show file

File: Ponos.py Project: shadowjig/ponos

def fetch_feed_details(url):
    parsed = podcastparser.parse(url, urlopen(url))
    #print(parsed)
    feed_det = [dict(link=parsed['link'], title=parsed['title'], cover_url=parsed['cover_url'], description=parsed['description'])]
    #print(feed_det)
    return feed_det

Example #44

0

Show file

File: my_mpd.py Project: jeromefiot/Apiclock_DEV

 def function():
     """Get podcast infos(parsed)."""
     parsed = podcastparser.parse(media, urllib.urlopen(media))
     return parsed

Example #45

0

Show file

File: mix.py Project: thomasballinger/remixcast

def load_feed(path):
    print('loading ', path, '...')
    return podcastparser.parse(path, downloaded(path))

Example #46

0

Show file

File: podcast-aggregator.py Project: teldra/podcast-aggregator

# read arguments from the command line
args, unknown = parser.parse_known_args()

print("<rss version=\"2.0\" xmlns:atom=\"http://www.w3.org/2005/Atom\">")
print("<channel>")
print("<title>%s</title>" % (args.title))
print("<link>%s</link>" % (args.link))
print("<image>")
print("<url>%s</url>" % (args.image))
print("</image>")
print("\n")

for i in range(len(unknown)):
    feedurl = unknown[i]
    feed = podcastparser.parse(feedurl,
                               urllib.request.urlopen(feedurl),
                               max_episodes=7)
    feedtitle = feed.get('title', '')
    feedlink = feed.get('link', '')
    feeddesc = feed.get('description', '')
    for ep in feed['episodes']:
        eptitle = ep['title']
        epdesc = ep['description']
        eppubdate = datetime.utcfromtimestamp(int(
            ep['published'])).strftime('%a, %d %b %Y %T')
        enclosure = ep['enclosures'][0]
        epurl = enclosure['url'].split("?")[0]
        guid = hashlib.md5(epurl.encode() + eppubdate.encode())

        print("<item>")
        print("<title>%s: %s</title>" %

Example #47

0

Show file

File: PyPodcastArchive.py Project: 1000001101000/Podcast_Archiver

        os.mkdir(temp_dir)

for url in feed_list:
	##should do some kind of validation for blank lines etc
	podcast_url = url.strip().split("|")[0]
	request = urllib.request.Request(podcast_url)
	request.add_header("User-Agent", user_agent)
	if len(url.strip().split("|")) == 3:
		podcast_username = url.strip().split("|")[1]
		podcast_password = url.strip().split("|")[2]
		auth_str = bytes(podcast_username + ":" + podcast_password,'utf-8')
		base64string = base64.b64encode(auth_str).strip().decode('ascii')
		header = "Basic " + base64string
		request.add_header("Authorization", header)
	try:
		podcast = podcastparser.parse(podcast_url, urllib.request.urlopen(request))

	except KeyboardInterrupt:
		quit()

	except:
		feed_log.write(podcast_url + '\n')
		feed_log.flush()
		print ("failed to parse: " + podcast_url)
		continue

	podcast_name = podcast['title']

	podcast_dir = base_directory + podcast_name + "/"
	if os.path.isdir(podcast_dir) == False:
		os.mkdir(podcast_dir)

Example #48

0

Show file

File: test_podcastparser.py Project: gpodder/podcastparser

 def test_fail_parse(feed):
     with assert_raises(podcastparser.FeedParseError):
         podcastparser.parse('file://example.com/feed.xml', StringIO(feed))