Esempio n. 1
0
def genero_feed(episodesList):
    if episodesList:
        # Creo un nuovo podcast
        p = Podcast()

        p.name = "NECST Tech Time"
        p.description = "The NECSTLab (Novel, Emerging Computing System Technologies Laboratory) is a laboratory inside DEIB department of Politecnico di Milano, where there are a number of different research lines on advanced topics in computing systems: from architectural characteristics, to hardware-software codesign methodologies, to security and dependability issues of complex system architectures (scaling from mobile devices to large virtualized datacenters)."
        p.website = "http://www.poliradio.it/podcast/programmi/34/necst-tech-time"
        p.explicit = True
        p.image = "https://rss.draghetti.it/necst_image.jpg"
        p.feed_url = "https://rss.draghetti.it/necstpodcast.xml"
        p.copyright = "Poli Radio"
        p.language = "it-IT"

        for episodedetails in episodesList:
            episode = Episode()

            episode.title = episodedetails[1].encode("ascii", "ignore")
            episode.link = episodedetails[2].encode("ascii", "ignore")

            # La dimensione e statistica in base alle puntante analizzate
            episode.media = Media(episodedetails[3], 30000000, type="audio/x-m4a", duration=None)
            episode.publication_date = episodedetails[4]

            p.episodes.append(episode)

        # Print to stdout, just as an example
        p.rss_file(rssfile, minimize=False)
Esempio n. 2
0
def generate_podcast_xml(base, books):
    from podgen import Podcast, Episode
    from datetime import timedelta
    from podgen import Media

    p = Podcast()

    p.name = "AeonNeo's Audiobooks"
    p.description = "Description"
    p.website = "www.yangvincent.com"
    p.explicit = False

    # create episode
    for book_name in books:
        ep = Episode()
        ep.title = book_name[:-4]
        full_path = base + '/files/' + book_name
        dev_path = 'files/' + book_name
        try:
            book_size = os.path.getsize(dev_path)
        except OSError as e:
            print(e)
            book_size = 0

        ep.media = Media(full_path, type='audio/mp4a', size=book_size)
        p.episodes.append(ep)

    # Generate rss
    p.rss_file('skeleton/rss.xml', minimize=True)
Esempio n. 3
0
def genero_feed(puntateList):
    if puntateList:
        # Creo un nuovo podcast
        p = Podcast()

        p.name = "Il Ruggito del Coniglio"
        p.description = "Il Ruggito del Coniglio, il programma cult di Radio 2 condotto da Marco Presta e Antonello Dose, racconta l'attualita con folgorante ironia."
        p.website = "http://www.raiplayradio.it/programmi/ilruggitodelconiglio/"
        p.explicit = True
        p.image = "https://rss.draghetti.it/ruggitodelconiglio_image.jpg"
        p.feed_url = "https://rss.draghetti.it/ruggitodelconiglio.xml"
        p.copyright = "Rai Radio 2"
        p.language = "it-IT"

        for puntata in puntateList:
            episode = Episode()

            episode.title = puntata[0].encode("ascii", "ignore")
            episode.link = puntata[1]

            # La dimensione del file e approssimativa
            episode.media = Media(puntata[3], puntata[4])

            if puntata[2]:
                episode.publication_date = datetime.datetime(int(puntata[2].split("/")[2]),
                                                             int(puntata[2].split("/")[1]),
                                                             int(puntata[2].split("/")[0]), 10,
                                                             00, tzinfo=pytz.utc)
            else:
                episode.publication_date = pytz.utc.localize(datetime.datetime.utcnow())

            p.episodes.append(episode)

        # Print to stdout, just as an example
        p.rss_file(rssfile, minimize=False)
Esempio n. 4
0
def genero_feed(episodesList):
    if episodesList:
        # Creo un nuovo podcast
        p = Podcast()

        p.name = "NECST Tech Time"
        p.description = "Feed Podcast non ufficiale di NECST Tech Time - Powered By Andrea Draghetti"
        p.website = "http://www.poliradio.it/podcast/programmi/34/necst-tech-time"
        p.explicit = True
        p.image = "https://rss.draghetti.it/necst_image.jpg"
        p.feed_url = "https://rss.draghetti.it/necstpodcast.xml"
        p.copyright = "Poli Radio"
        p.language = "it-IT"

        for episodedetails in episodesList:
            episode = Episode()

            episode.title = episodedetails[1].encode("ascii", "ignore")
            episode.link = episodedetails[2].encode("ascii", "ignore")

            # La dimensione e statistica in base alle puntante analizzate
            episode.media = Media(episodedetails[3],
                                  30000000,
                                  type="audio/x-m4a",
                                  duration=None)
            episode.publication_date = episodedetails[4]

            p.episodes.append(episode)

        # Print to stdout, just as an example
        p.rss_file(rssfile, minimize=False)
def genero_feed(puntateList):
    if puntateList:
        # Creo un nuovo podcast
        p = Podcast()

        p.name = "Il Ruggito del Coniglio"
        p.description = "Il Ruggito del Coniglio, il programma cult di Radio 2 condotto da Marco Presta e Antonello Dose, racconta l'attualita con folgorante ironia."
        p.website = "http://www.raiplayradio.it/programmi/ilruggitodelconiglio/"
        p.explicit = True
        p.image = "https://rss.draghetti.it/ruggitodelconiglio_image.jpg"
        p.feed_url = "https://rss.draghetti.it/ruggitodelconiglio.xml"
        p.copyright = "Rai Radio 2"
        p.language = "it-IT"

        for puntata in puntateList:
            episode = Episode()

            episode.title = puntata[0].encode("ascii", "ignore")
            episode.link = puntata[1]

            # La dimensione del file e approssimativa
            episode.media = Media(puntata[3], puntata[4])

            if puntata[2]:
                episode.publication_date = datetime.datetime(int(puntata[2].split("/")[2]),
                                                             int(puntata[2].split("/")[1]),
                                                             int(puntata[2].split("/")[0]), 10,
                                                             00, tzinfo=pytz.utc)
            else:
                episode.publication_date = pytz.utc.localize(datetime.datetime.utcnow())

            p.episodes.append(episode)

        # Print to stdout, just as an example
        p.rss_file(rssfile, minimize=False)
Esempio n. 6
0
def genero_feed(puntateList):
    if puntateList:
        # Creo un nuovo podcast
        p = Podcast()

        p.name = "Pascal Rai Radio 2"
        p.description = "Pascal un programma di Matteo Caccia in onda su Radio2 che racconta storie di vita. Episodi grandi o piccoli, stravolgenti o minuti, momenti che hanno modificato per sempre la nostra vita o che, anche se di poco, l'hanno indirizzata. Storie che sono il termometro della temperatura di ognuno di noi e che in parte raccontano chi siamo. "
        p.website = "http://www.raiplayradio.it/programmi/pascal/"
        p.explicit = True
        p.image = "https://rss.draghetti.it/pascal_image.jpg"
        p.feed_url = "https://rss.draghetti.it/pascal.xml"
        p.copyright = "Rai Radio 2"
        p.language = "it-IT"

        for puntata in puntateList:
            episode = Episode()

            episode.title = puntata[0].encode("ascii", "ignore")
            episode.link = puntata[1]

            # La dimensione del file e approssimativa
            episode.media = Media(puntata[3], puntata[4])

            if puntata[2]:
                episode.publication_date = datetime.datetime(int(puntata[2].split("/")[2]),
                                                             int(puntata[2].split("/")[1]),
                                                             int(puntata[2].split("/")[0]), 20,
                                                             00, tzinfo=pytz.utc)
            else:
                episode.publication_date = pytz.utc.localize(datetime.datetime.utcnow())

            p.episodes.append(episode)

        # Print to stdout, just as an example
        p.rss_file(rssfile, minimize=False)
Esempio n. 7
0
def genero_feed(episodesList):
    if episodesList:
        # Creo un nuovo podcast
        p = Podcast()

        p.name = "All You Can Dance by Dino Brawn"
        p.description = "Feed Podcast non ufficiale di All You Can Dance by Dino Brown - Powered By Andrea Draghetti"
        p.website = "https://onedance.fm/"
        p.explicit = True
        p.image = "https://rss.draghetti.it/allyoucandance_image.jpg"
        p.feed_url = "https://rss.draghetti.it/allyoucandance.xml"
        p.copyright = "One Dance"
        p.language = "it-IT"

        for episodedetails in episodesList:
            episode = Episode()

            episode.title = episodedetails[1].encode("ascii", "ignore")
            episode.link = episodedetails[2].encode("ascii", "ignore")

            # La dimensione e statistica in base alle puntante analizzate
            episode.media = Media(episodedetails[3], 30000000, type="audio/x-m4a", duration=None)
            episode.publication_date = episodedetails[4]

            p.episodes.append(episode)

        # Print to stdout, just as an example
        p.rss_file(rssfile, minimize=False)
Esempio n. 8
0
def main():
    """Create an example podcast and print it or save it to a file."""
    # There must be exactly one argument, and it is must end with rss
    if len(sys.argv) != 2 or not (
            sys.argv[1].endswith('rss')):
        # Invalid usage, print help message
        # print_enc is just a custom function which functions like print,
        # except it deals with byte arrays properly.
        print_enc ('Usage: %s ( <file>.rss | rss )' % \
                'python -m podgen')
        print_enc ('')
        print_enc ('  rss              -- Generate RSS test output and print it to stdout.')
        print_enc ('  <file>.rss       -- Generate RSS test teed and write it to file.rss.')
        print_enc ('')
        exit()

    # Remember what type of feed the user wants
    arg = sys.argv[1]

    from podgen import Podcast, Person, Media, Category, htmlencode
    # Initialize the feed
    p = Podcast()
    p.name = 'Testfeed'
    p.authors.append(Person("Lars Kiesow", "*****@*****.**"))
    p.website = 'http://example.com'
    p.copyright = 'cc-by'
    p.description = 'This is a cool feed!'
    p.language = 'de'
    p.feed_url = 'http://example.com/feeds/myfeed.rss'
    p.category = Category('Technology', 'Podcasting')
    p.explicit = False
    p.complete = False
    p.new_feed_url = 'http://example.com/new-feed.rss'
    p.owner = Person('John Doe', '*****@*****.**')
    p.xslt = "http://example.com/stylesheet.xsl"

    e1 = p.add_episode()
    e1.id = 'http://lernfunk.de/_MEDIAID_123#1'
    e1.title = 'First Element'
    e1.summary = htmlencode('''Lorem ipsum dolor sit amet, consectetur adipiscing elit. Tamen
            aberramus a proposito, et, ne longius, prorsus, inquam, Piso, si ista
            mala sunt, placet. Aut etiam, ut vestitum, sic sententiam habeas aliam
            domesticam, aliam forensem, ut in fronte ostentatio sit, intus veritas
            occultetur? Cum id fugiunt, re eadem defendunt, quae Peripatetici,
            verba <3.''')
    e1.link = 'http://example.com'
    e1.authors = [Person('Lars Kiesow', '*****@*****.**')]
    e1.publication_date = datetime.datetime(2014, 5, 17, 13, 37, 10, tzinfo=pytz.utc)
    e1.media = Media("http://example.com/episodes/loremipsum.mp3", 454599964,
                     duration=
                     datetime.timedelta(hours=1, minutes=32, seconds=19))

    # Should we just print out, or write to file?
    if arg == 'rss':
        # Print
        print_enc(p.rss_str())
    elif arg.endswith('rss'):
        # Write to file
        p.rss_file(arg, minimize=True)
Esempio n. 9
0
class Qingting(object):
    def __init__(self, album_id):
        self.podcast = None
        self.album_id = album_id
        self.url = 'http://www.qingting.fm/channels/{}'.format(album_id)
        self.album_list_api = "http://api2.qingting.fm/v6/media/channelondemands/{}/programs/order/0/curpage/1/pagesize/100".format(
            album_id)
        self.album_info_api = "http://api2.qingting.fm/v6/media/channelondemands/{}".format(album_id)

    def album(self):
        album_info_content = requests.get(self.album_info_api).content
        album_info_data = json.loads(album_info_content)

        album_list_content = requests.get(self.album_list_api).content
        album_list_data = json.loads(album_list_content)

        self.podcast = Podcast()
        self.podcast.name = album_info_data['data']['title']
        self.podcast.authors.append(Person("Powered by maijver", '*****@*****.**'))
        self.podcast.website = self.url
        self.podcast.copyright = 'cc-by'
        self.podcast.description = album_info_data['data']['description']
        self.podcast.language = 'cn'
        self.podcast.image = album_info_data['data']['thumbs']['small_thumb'].replace('!200', '')
        self.podcast.feed_url = 'http://podcast.forecho.com/qingting/%s.rss' % self.album_id
        self.podcast.category = Category('Technology', 'Podcasting')
        self.podcast.explicit = False
        self.podcast.complete = False
        self.podcast.owner = Person("maijver", '*****@*****.**')

        for each in album_list_data['data']:
            episode = self.podcast.add_episode()
            episode.id = str(each['id'])
            episode.title = each['title']
            print(episode.title)
            episode.image = album_info_data['data']['thumbs']['small_thumb'].replace('!200', '')
            episode.summary = each['title']
            episode.link = 'http://www.qingting.fm/channels/{}/programs/{}'.format(self.album_id, each['id'])
            episode.authors = [Person("forecho", '*****@*****.**')]
            episode.publication_date = self.reduction_time(each['update_time'])
            episode.media = Media("http://od.qingting.fm/{}".format(each['mediainfo']['bitrates_url'][0]['file_path']),
                                  each['duration'])

        self.podcast.rss_file('qingting/{}.rss'.format(self.album_id), minimize=True)

    @staticmethod
    def reduction_time(created_date):
        timestamp = datetime.strptime(created_date, "%Y-%m-%d %H:%M:%S")
        return datetime(timestamp.year, timestamp.month, timestamp.day, timestamp.hour, timestamp.minute,
                        tzinfo=pytz.utc)
Esempio n. 10
0
def genero_feed(puntateList):
    if puntateList:
        # Creo un nuovo podcast
        p = Podcast()

        p.name = "Pascal Rai Radio 2"
        p.description = "Pascal un programma di Matteo Caccia in onda su Radio2 che racconta storie di vita. Episodi grandi o piccoli, stravolgenti o minuti, momenti che hanno modificato per sempre la nostra vita o che, anche se di poco, l'hanno indirizzata. Storie che sono il termometro della temperatura di ognuno di noi e che in parte raccontano chi siamo. "
        p.website = "http://www.raiplayradio.it/programmi/pascal/"
        p.explicit = True
        p.image = "https://rss.draghetti.it/pascal_image.jpg"
        p.feed_url = "https://rss.draghetti.it/pascal.xml"
        p.copyright = "Rai Radio 2"
        p.language = "it-IT"

        for puntata in puntateList:
            episode = Episode()

            episode.title = puntata[0].encode("ascii", "ignore")
            episode.link = puntata[1]

            # La dimensione del file e approssimativa
            episode.media = Media(puntata[3], puntata[4])

            if puntata[2]:
                episode.publication_date = datetime.datetime(
                    int(puntata[2].split("/")[2]),
                    int(puntata[2].split("/")[1]),
                    int(puntata[2].split("/")[0]),
                    20,
                    00,
                    tzinfo=pytz.utc)
            else:
                episode.publication_date = pytz.utc.localize(
                    datetime.datetime.utcnow())

            p.episodes.append(episode)

        # Print to stdout, just as an example
        p.rss_file(rssfile, minimize=False)
Esempio n. 11
0
                'filename': file_local_path,
                'name': video.title
            })
        except HTTPError as err:
            print('Can not parse this video bacause of HTTPError.')

# Create Podcast object and fill it with episodes
podcast_object = Podcast(
    name=CONFIG['podcast_name'],
    description=CONFIG['podcast_description'],
    website=CONFIG['podcast_website'],
    explicit=False,
    image=CONFIG['podcast_image'],
    language=CONFIG['podcast_language'],
    authors=[Person(CONFIG['podcast_author'], CONFIG['podcast_author_email'])],
    owner=Person(CONFIG['podcast_owner'], CONFIG['podcast_owner_email']),
    category=Category(CONFIG['podcast_category'],
                      CONFIG['podcast_subcategory']))

for item in db:
    web_media_path = "%s/podcast_%s.mp4" % (CONFIG['podcast_media_server'],
                                            item['link'][9:])
    podcast_object.add_episode(
        Episode(title=item['name'],
                media=Media(web_media_path,
                            os.stat(item['filename']).st_size,
                            type="audio/mpeg")))

# Generating RSS
podcast_object.rss_file(('%s/rss.xml' % CONFIG['data_path']), minimize=True)
Esempio n. 12
0
print('>>> wrote fresh sessions.json file')

# write the new rss file
p = Podcast()

p.name = "The Objectivism Seminar"
p.category = Category("Society &amp; Culture", "Philosophy")
p.language = "en-US"
p.explicit = True
p.description = (
    "A weekly online conference call to systematically study " +
    "the philosophy of Objectivism via the works of prominent Rand scholars.")
p.website = "https://www.ObjectivismSeminar.com"
p.image = "https://www.ObjectivismSeminar.com/assets/images/atlas-square.jpg"
p.feed_url = "https://www.ObjectivismSeminar.com/archives/rss"
p.authors = [Person("Greg Perkins, Host", "*****@*****.**")]
p.owner = Person("Greg Perkins", "*****@*****.**")

p.episodes += [
    Episode(title=x['title'],
            media=Media(x['link'], type="audio/mpeg", size=x['length']),
            id=x['GUID'],
            publication_date=x['pubDate'],
            summary=x['description']) for x in updated_session_items
]

p.rss_file(rss_filename)

print('>>> wrote fresh rss.xml file')
Esempio n. 13
0
class Ximalaya():
    def __init__(self, album_id):
        self.podcast = None
        self.album_id = album_id
        self.album_list_api = "http://www.ximalaya.com/revision/play/album?albumId={}&pageNum=1&sort=1&pageSize=999".format(
            album_id)
        self.album_url = 'http://www.ximalaya.com/album/%s' % album_id
        self.header = {
            'Accept': 'application/json, text/javascript, */*; q=0.01',
            'X-Requested-With': 'XMLHttpRequest',
            'User-Agent':
            'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36',
            'Content-Type': 'application/x-www-form-urlencoded',
            'Referer': self.album_url,
            'Cookie': '_ga=GA1.2.1628478964.1476015684; _gat=1',
        }

    def album(self):
        page = requests.get(self.album_url, headers=self.header)
        soup = BeautifulSoup(page.content, "lxml")

        # 初始化
        self.podcast = Podcast()
        self.podcast.name = soup.find('h1', 'title').get_text()
        self.podcast.authors.append(
            Person("Powered by forecho", '*****@*****.**'))
        self.podcast.website = self.album_url
        self.podcast.copyright = 'cc-by'
        self.podcast.description = soup.find('div', 'album-intro').get_text()
        self.podcast.language = 'cn'
        self.podcast.image = soup.find(
            'div', 'album-info').find('img').get('src').split('!')[0]
        self.podcast.feed_url = 'http://podcast.forecho.com/ximalaya/%s.rss' % self.album_id
        self.podcast.category = Category('Technology', 'Podcasting')
        self.podcast.explicit = False
        self.podcast.complete = False
        self.podcast.owner = Person("forecho", '*****@*****.**')

        album_list_content = requests.get(self.album_list_api,
                                          headers=self.header).content
        album_list_data = json.loads(album_list_content.decode('utf-8'))
        count = len(album_list_data['data']['tracksAudioPlay'])
        for each in album_list_data['data']['tracksAudioPlay']:
            try:
                page_info = requests.get('http://www.ximalaya.com/%s' %
                                         each['trackUrl'],
                                         headers=self.header)
                soup_info = BeautifulSoup(page_info.content, "lxml")
                episode = self.podcast.add_episode()
                episode.id = str(each['index'])
                episode.title = each['trackName']
                print self.podcast.name + '=====' + each['trackName']
                image = each['trackCoverPath'].split('!')[0]
                if (image[-4:] == '.gif' or image[-4:] == '.bmp'):
                    episode.image = self.podcast.image
                else:
                    episode.image = image
                if soup_info.find('article', 'intro'):
                    episode.summary = soup_info.find(
                        'article',
                        'intro').get_text().encode('gbk',
                                                   'ignore').decode('gbk')
                else:
                    episode.summary = each['trackName']
                episode.link = 'http://www.ximalaya.com/%s' % each['albumUrl']
                episode.authors = [Person("forecho", '*****@*****.**')]
                episode.publication_date = self.reduction_time(
                    soup_info.find('span', 'time').get_text())
                episode.media = Media(each['src'], each['duration'])
                episode.position = count - each['index'] + 1
            except Exception as e:
                print('异常:', e)
                print('异常 URL:',
                      'http://www.ximalaya.com/%s' % each['trackUrl'])

        # 生成文件
        # print self.podcast.rss_str()
        self.podcast.rss_file('ximalaya/%s.rss' % self.album_id, minimize=True)

    # 时间转换
    @staticmethod
    def reduction_time(created_date):
        timestamp = datetime.strptime(created_date, "%Y-%m-%d %H:%M:%S")
        return datetime(timestamp.year,
                        timestamp.month,
                        timestamp.day,
                        timestamp.hour,
                        timestamp.minute,
                        tzinfo=pytz.utc)
Esempio n. 14
0
class Ximalaya:
    def __init__(self, album_id):
        self.headers = tools.get_headers()
        self.podcast = None
        self.album_id = album_id
        self.episode_pre_page = 30
        self.album_info_url = "https://www.ximalaya.com/revision/album?albumId={}"
        self.album_list_url = "https://www.ximalaya.com/revision/play/album?albumId={}&pageNum={}&pageSize={}"
        self.episode_detail_url = "https://mobile.ximalaya.com/v1/track/baseInfo?trackId={}"
        self.album_url = "https://www.ximalaya.com/album/{}"

    def get_podcast(self):
        webpage = tools.get_url(self.album_info_url.format(self.album_id), self.headers)
        album_info = json.loads(webpage.decode('utf-8'))
        if album_info['ret'] == 200:
            album_info_data = album_info['data']

            self.podcast = Podcast()
            self.podcast.name = album_info_data['mainInfo']['albumTitle']
            self.podcast.website = self.album_url.format(self.album_id)
            if album_info_data['mainInfo']['richIntro']:
                self.podcast.description = album_info_data['mainInfo']['richIntro']
            self.podcast.language = 'cn'
            self.podcast.image = 'https:' + album_info_data['mainInfo']['cover'].split('!')[0]
            self.podcast.generator = 'kanemori.getpodcast'
            self.podcast.explicit = False
            self.podcast.withhold_from_itunes = True

            text = ''
            page_num = 1
            album_page_count = math.ceil(album_info_data['tracksInfo']['trackTotalCount'] / self.episode_pre_page) + 1
            while page_num <= album_page_count:
                webpage = tools.get_url(self.album_list_url.format(self.album_id, page_num, self.episode_pre_page),
                                        self.headers)
                album_list = json.loads(webpage.decode('utf-8'))
                for episode_info in album_list['data']['tracksAudioPlay']:
                    _, link = self.get_episode(episode_info['trackId'])
                    text += link

                page_num += 1

        path = './podcast/ximalaya'
        if not os.path.exists(path):
            os.makedirs(path)

        self.podcast.rss_file(os.path.join(path, '{}.xml'.format(self.album_id)), minimize=True)
        # tools.save_m4a(os.path.join(path, '{}.txt'.format(self.album_id)), text)
        print("「{}」が上手に焼きました".format(self.album_id))

    def get_episode(self, episode_id):
        trycount = 0
        findepisode = False

        while not findepisode:
            if trycount > 0:
                print("再接続中" + str(trycount) + "......")
            if trycount > 1:
                print("error url: " + self.episode_detail_url.format(episode_id) + "\n")
                return False, "error url: " + self.episode_detail_url.format(episode_id) + "\n"

            webpage = tools.get_url(self.episode_detail_url.format(episode_id), self.headers)
            detail = json.loads(webpage.decode('utf-8'))
            episode = self.podcast.add_episode()
            episode.id = str('ximalaya_' + str(episode_id))
            episode.title = detail['title']
            # print(self.podcast.name + '=====' + episode.title)
            if 'intro' in detail:
                episode.summary = detail['intro'].replace('\r', '\\r').replace('\n', '\\n')
            episode.publication_date = tools.publication_time(detail['createdAt'])
            episode.media = Media(detail['playUrl32'], duration=timedelta(milliseconds=detail['duration']))
            # episode.media = Media.create_from_server_response(detail['playUrl32'],
            #                                                   duration=timedelta(seconds=detail['duration']))
            episode.position = 1
            findepisode = True

            if not findepisode:
                trycount += 1
                print("30秒後に再接続する.......")
                sleep(30)

        return True, detail['playUrl32'] + '\n'
def lambda_handler(event, context):
    print('Starting cccRssBuilder Lambda function')
    # Get episodes from DynamoDB
    episodes = query_episodes()
    episodes.sort(key=lambda x: x['episode-num'])

    # Create the podcast feed
    # Main podcast info comes from "episode 0"
    episodeInfo = episodes[0]
    separator = ', '
    p = Podcast()
    p.name = episodeInfo['name']
    p.description = episodeInfo['description']
    p.website = episodeInfo['website']
    p.explicit = episodeInfo['explicit']
    p.image = episodeInfo['image']
    p.feed_url = episodeInfo['feed-url']
    p.language = episodeInfo['language']
    p.category = Category(episodeInfo['category'], episodeInfo['subcategory'])
    p.owner = Person(episodeInfo['owner-name'], episodeInfo['owner-email'])
    p.authors = [Person(episodeInfo['owner-name'], episodeInfo['owner-email'])]

    # Process each episode
    for episode in episodes:
        # Skip "Episode 0"
        if episode['episode-num'] == 0:
            continue
        # Check if episode contains media file info (name, duration, size).  If not, add it to db and episode object.
        if 'media-file' not in episode:
            episodeNum = episode['episode-num']
            print('Analyzing media file for episode', episodeNum)
            mediaFile = 'ccc-{:03d}-{}.mp3'.format(int(episodeNum),
                                                   episode['pub-date'])
            print('Media file:', mediaFile)
            localMediaFile = '/tmp/' + mediaFile
            s3 = boto3.client('s3')
            s3.download_file('kwksolutions.com', 'ccc/media/' + mediaFile,
                             localMediaFile)

            # Try to analyze the mp3 file - looking for duration and file size
            try:
                audio = MP3(localMediaFile)
            except:
                print('Not an MP3 file!')
                return
            duration = round(audio.info.length)
            hours = int(duration / 3600)
            minutes = int((duration % 3600) / 60)
            seconds = duration % 60
            if hours == 0:
                durationStr = '{:02d}:{:02d}'.format(minutes, seconds)
            else:
                durationStr = '{:02d}:{:02d}:{:02d}'.format(
                    hours, minutes, seconds)
            size = str(os.path.getsize(localMediaFile))
            update_episode(episodeNum, mediaFile, size, durationStr)
            episode['media-file'] = mediaFile
            episode['size'] = size
            episode['duration'] = durationStr

        # Figure out all the info needed for the episode object
        mediaURL = 'https://www.kwksolutions.com/ccc/media/' + episode[
            'media-file']
        durationList = episode['duration'].split(':')
        secs = int(durationList[-1])
        mins = int(durationList[-2])
        try:
            h = int(durationList[-3])
        except:
            h = 0
        pubdateList = episode['pub-date'].split('-')
        year = int(pubdateList[0])
        month = int(pubdateList[1])
        day = int(pubdateList[2])

        # Build the episode object
        e = p.add_episode()
        e.id = mediaURL
        e.title = 'Episode ' + str(episode['episode-num'])
        e.summary = episode['description']
        e.link = 'http://christcommunitycarmel.org/get-involved/podcasts'
        e.publication_date = datetime.datetime(year,
                                               month,
                                               day,
                                               12,
                                               00,
                                               00,
                                               tzinfo=pytz.timezone('EST'))
        e.media = Media(mediaURL,
                        episode['size'],
                        duration=datetime.timedelta(hours=h,
                                                    minutes=mins,
                                                    seconds=secs))

    # Write the rss file
    print('Writing RSS file to S3')
    rssLocalFile = '/tmp/podcast.rss'
    rssS3File = 'ccc/podcast.rss'
    p.rss_file(rssLocalFile)
    s3 = boto3.client('s3')
    s3.upload_file(rssLocalFile,
                   'kwksolutions.com',
                   rssS3File,
                   ExtraArgs={'ContentType': 'text/xml'})

    return
Esempio n. 16
0
class Ximalaya():
    def __init__(self, album_id):
        self.podcast = None
        self.album_id = album_id
        self.url = 'http://www.ximalaya.com/album/%s/' % album_id
        self.header = {
            'Accept': 'application/json, text/javascript, */*; q=0.01',
            'X-Requested-With': 'XMLHttpRequest',
            'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36',
            'Content-Type': 'application/x-www-form-urlencoded',
            'Referer': self.url,
            'Cookie': '_ga=GA1.2.1628478964.1476015684; _gat=1',
        }

    def album(self):
        page = requests.get(self.url, headers=self.header)
        soup = BeautifulSoup(page.content, "lxml")

        # 初始化
        self.podcast = Podcast()
        self.podcast.name = soup.find('div', 'detailContent_title').get_text()
        self.podcast.authors.append(Person("Powered by forecho", '*****@*****.**'))
        self.podcast.website = self.url
        self.podcast.copyright = 'cc-by'
        self.podcast.description = soup.find('div', 'mid_intro').get_text()
        self.podcast.language = 'cn'
        self.podcast.image = soup.find('a', 'albumface180').find('img').get('src').split('!')[0]
        self.podcast.feed_url = 'http://podcast.forecho.com/ximalaya/%s.rss' % self.album_id
        self.podcast.category = Category('Technology', 'Podcasting')
        self.podcast.explicit = False
        self.podcast.complete = False
        self.podcast.owner = Person("forecho", '*****@*****.**')

        sound_ids = soup.find('div', class_='personal_body').get('sound_ids').split(',')

        for sound_id in sound_ids:
            date = soup.find('li', sound_id=sound_id).find('div', class_='operate').get_text().strip()
            self.detail(sound_id, date)
        # 生成文件
        # print self.podcast.rss_str()
        self.podcast.rss_file('ximalaya/%s.rss' % self.album_id, minimize=True)

    def detail(self, sound_id, date):
        detail_url = 'http://www.ximalaya.com/tracks/%s.json' % sound_id
        response = requests.get(detail_url, headers=self.header)
        item = json.loads(response.content)

        episode = self.podcast.add_episode()
        episode.id = str(item['id'])
        episode.title = item['title']
        episode.image = item['cover_url_142'].split('?')[0]
        episode.summary = (item['intro'].replace('\n', '') if item['intro'] else '')
        episode.link = 'http://www.ximalaya.com/sound/%d' % item['id']
        episode.authors = [Person("forecho", '*****@*****.**')]
        episode.publication_date = self.reduction_time(
            date, item['formatted_created_at'])
        episode.media = Media(item['play_path_64'], 454599964)
        print self.podcast.name + '=====' + item['title']

    # 时间转换 第一个参数是年月日 第二个参数"12月11日 17:00"
    @staticmethod
    def reduction_time(date, created_date):
        timestamp = datetime.strptime(date, "%Y-%m-%d")
        created_at = datetime.strptime(created_date.split(' ')[1], "%H:%M")
        return datetime(timestamp.year, timestamp.month, timestamp.day, created_at.hour, created_at.minute,
                        tzinfo=pytz.utc)
Esempio n. 17
0
def create_rss(type, download):
    """Create an example podcast and print it or save it to a file."""
    
    # Create the Podcast & initialize the feed
    default_channel = Channel.defaultChannel()
    
    p = Podcast()
    p.name          = default_channel.name
    p.description   = default_channel.description
    p.website       = default_channel.website
    p.explicit      = default_channel.explicit
    p.image         = default_channel.image

    p.copyright     = default_channel.copyright
    p.language      = default_channel.language
    p.feed_url      = default_channel.feed_url
    p.category      = Category(default_channel.category)
    # p.category = Category('Technology', 'Podcasting')
    # p.xslt      = "https://example.com/feed/stylesheet.xsl"  # URL of XSLT stylesheet

    p.authors   = [Person(default_channel.authors, default_channel.authors_email)]
    p.owner     = Person(default_channel.owner, default_channel.owner_email)

    # Other Attributes
    p.generator = " "
    
    # Others for iTunes
    # p.complete = False
    # p.new_feed_url = 'http://example.com/new-feed.rss'

    # e1 = p.add_episode()
    # e1.id = 'http://lernfunk.de/_MEDIAID_123#1'
    # e1.title = 'First Element'
    # e1.summary = htmlencode('''Lorem ipsum dolor sit amet, consectetur adipiscing elit. Tamen
    #         aberramus a proposito, et, ne longius, prorsus, inquam, Piso, si ista
    #         mala sunt, placet. Aut etiam, ut vestitum, sic sententiam habeas aliam
    #         domesticam, aliam forensem, ut in fronte ostentatio sit, intus veritas
    #         occultetur? Cum id fugiunt, re eadem defendunt, quae Peripatetici,
    #         verba <3.''')
    # e1.link = 'http://example.com'
    # e1.authors = [Person('Lars Kiesow', '*****@*****.**')]
    # e1.publication_date = datetime.datetime(2014, 5, 17, 13, 37, 10, tzinfo=pytz.utc)
    # # e1.media = Media("http://example.com/episodes/loremipsum.mp3", 454599964,
    # #                  duration=
    # #                  datetime.timedelta(hours=1, minutes=32, seconds=19))
    # e1.media = Media("http://example.com/episodes/loremipsum.mp3", 454599964)

    # Add some episodes
    p.episodes += [
       Episode(title = download.title, 
            subtitle = download.subtitle,
            # id=str(uuid.uuid4()),
            position =2,
            media = Media(download.media_url, size=download.media_size, duration=timedelta(seconds=download.media_duration)),
            image = download.image_url,
            publication_date = datetime(year=2021, month=1, day=8, hour=10, minute=0, tzinfo=pytz.utc),
            summary = download.summary)
    ,
       Episode(title="Episode 2 - The Crazy Ones",
            subtitle="this is a cool episode, this is for th crazy ones",
            position=1,
            image="https://github.com/oliverbarreto/PersonalPodcast/raw/main/site-logo-1400x1400.png",
            media=Media("https://github.com/oliverbarreto/PersonalPodcast/raw/main/downloaded_with_pytube_Apple%20Steve%20Jobs%20Heres%20To%20The%20Crazy%20Ones.mp4", type="audio/mpeg", size=989, duration=timedelta(hours=0, minutes=1, seconds=1)),
            publication_date = datetime(year=2021, month=1, day=6, hour=10, minute=0, tzinfo=pytz.utc),
            summary=htmlencode("wow wow wow summary"))
    ,
        Episode(title="Episode 3 - The Super Crazy",
            subtitle="crazy ones revisited",
            position=0,
            image="https://github.com/oliverbarreto/PersonalPodcast/raw/main/site-logo-1400x1400.png",
            media=Media("https://drive.google.com/file/d/1X5Mwa8V0Su1IDqhcQL7LdzEY0VaMC1Nn", type="audio/mpeg", size=989, duration=timedelta(hours=0, minutes=1, seconds=1)),
            publication_date = datetime(year=2021, month=1, day=10, hour=10, minute=0, tzinfo=pytz.utc),
            summary=download.summary)
    ]

    # Should we just print out, or write to file?
    if type == 'print':
        # Print
        print_enc(p.rss_str())
    elif type== 'feed.xml':
        # Write to file
        p.rss_file(type, minimize=False)
        print("\n")
        print("feed.xml created !!!")
Esempio n. 18
0
class Ximalaya():
    def __init__(self, album_id):
        self.podcast = None
        self.album_id = album_id
        self.page_size = 30
        self.album_info_url = "https://www.ximalaya.com/revision/album?albumId={}"
        self.album_list_url = "https://www.ximalaya.com/revision/play/album?albumId={}&pageNum={}&pageSize={}"
        self.detail_url = "https://mobile.ximalaya.com/v1/track/baseInfo?device=android&trackId={}"
        self.album_url = "https://www.ximalaya.com/album/{}"
        self.time_api = 'https://www.ximalaya.com/revision/time'
        self.s = requests.session()
        self.header = {
            'Accept': 'application/json, text/javascript, */*; q=0.01',
            'User-Agent':
            'User-Agent:Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36',
            'Content-Type': 'application/json;charset=UTF-8',
            'Referer': self.album_url.format(self.album_id),
            'Accept-Encoding': "gzip, deflate",
            'Connection': "keep-alive",
            'cache-control': "no-cache",
        }

    def album(self):
        self.get_sign()
        album_info = self.s.get(self.album_info_url.format(self.album_id),
                                headers=self.header).content
        album_info_content = json.loads(album_info.decode('utf-8'))
        if album_info_content['ret'] == 200:
            album_info_data = album_info_content['data']

            # 初始化
            self.podcast = Podcast()
            self.podcast.name = album_info_data['mainInfo']['albumTitle']
            self.podcast.authors.append(
                Person("Powered by forecho", '*****@*****.**'))
            self.podcast.website = self.album_url.format(self.album_id)
            self.podcast.copyright = 'cc-by'
            if album_info_data['mainInfo']['richIntro']:
                self.podcast.description = album_info_data['mainInfo'][
                    'richIntro']
            else:
                self.podcast.description = self.podcast.name
            self.podcast.language = 'cn'
            self.podcast.image = 'https:' + album_info_data['mainInfo'][
                'cover'].split('!')[0]
            self.podcast.feed_url = 'http://podcast.forecho.com/ximalaya/%s.rss' % self.album_id
            self.podcast.category = Category('Technology', 'Podcasting')
            self.podcast.explicit = False
            self.podcast.complete = False
            self.podcast.owner = Person("forecho", '*****@*****.**')
            page_num = 1
            # py2 +1
            track_total_count = math.ceil(
                album_info_data['tracksInfo']['trackTotalCount'] /
                self.page_size) + 1
            while page_num <= track_total_count:
                self.header["Host"] = "www.ximalaya.com"
                album_list = self.s.get(self.album_list_url.format(
                    self.album_id, page_num, self.page_size),
                                        headers=self.header).content
                album_list_content = json.loads(album_list.decode('utf-8'))
                count = len(album_list_content['data']['tracksAudioPlay'])
                for each in album_list_content['data']['tracksAudioPlay']:
                    try:
                        self.header["Host"] = "mobile.ximalaya.com"
                        detail = requests.get(self.detail_url.format(
                            each['trackId']),
                                              headers=self.header).content
                        detail_content = json.loads(detail.decode('utf-8'))
                        episode = self.podcast.add_episode()
                        episode.id = str(each['index'])
                        episode.title = each['trackName']
                        print(self.podcast.name + '=====' + each['trackName'])
                        image = each['trackCoverPath'].split('!')[0]
                        if image[-4:] == '.png' or image[-4:] == '.jpg':
                            episode.image = 'https:' + image
                        else:
                            episode.image = self.podcast.image
                        if 'intro' in detail_content:
                            episode.summary = detail_content['intro'].replace(
                                '\r\n', '')
                        else:
                            episode.summary = each['trackName']
                        episode.link = 'http://www.ximalaya.com%s' % each[
                            'albumUrl']
                        episode.authors = [
                            Person("forecho", '*****@*****.**')
                        ]
                        episode.publication_date = self.reduction_time(
                            detail_content['createdAt'])
                        episode.media = Media(each['src'], each['duration'])
                        episode.position = count - each['index'] + 1
                    except Exception as e:
                        print('异常:', e)
                        print('异常 URL:',
                              'https://www.ximalaya.com%s' % each['trackUrl'])
                        traceback.print_exc()
                # 生成文件
                # print self.podcast.rss_str()
                page_num = page_num + 1
            self.podcast.rss_file('ximalaya/%s.rss' % self.album_id,
                                  minimize=True)

    def get_time(self):
        """
        获取服务器时间戳
        :return:
        """
        r = self.s.get(self.time_api, headers=self.header)
        return r.text

    def get_sign(self):
        """
        获取sign: md5(ximalaya-服务器时间戳)(100以内随机数)服务器时间戳(100以内随机数)现在时间戳
        :return: xm_sign
        """
        now_time = str(round(time.time() * 1000))
        server_time = self.get_time()
        sign = str(
            hashlib.md5("himalaya-{}".format(server_time).encode()).hexdigest(
            )) + "({})".format(str(round(
                random.random() * 100))) + server_time + "({})".format(
                    str(round(random.random() * 100))) + now_time
        self.header["xm-sign"] = sign
        # print(sign)
        # return sign

    # 时间转换 参数 毫秒时间戳
    @staticmethod
    def reduction_time(time):
        timestamp = datetime.fromtimestamp(time / 1000)
        return datetime(timestamp.year,
                        timestamp.month,
                        timestamp.day,
                        timestamp.hour,
                        timestamp.minute,
                        tzinfo=pytz.utc)
Esempio n. 19
0
p = Podcast(
    name="Chapo Cheaters Club",
    description="Just taking"
    "what isn't mine",
    website=host_address,
    explicit=False,
)

p.image = host_address + "logo.jpg"

for pod in find_pods():
    audio = MP3(os.path.join(base_dir, pod_dir, pod), ID3=EasyID3)
    if audio:
        title = str(*audio["title"])
    else:
        title = pod[:~3]
    size = os.path.getsize(os.path.join(base_dir, pod_dir, pod))
    duration = timedelta(seconds=audio.info.length)
    print(f'{pod}, {title}, {size}, {duration}')
    p.episodes += [
        Episode(title=title,
                media=Media(f"{host_address}{pod}",
                            size=size,
                            duration=duration),
                summary="summary goes here")
    ]

rss = p.rss_str()
p.rss_file(os.path.join(base_dir, pod_dir, 'rss.xml'))
Esempio n. 20
0
class Ximalaya():
    def __init__(self, album_id):
        self.podcast = None
        self.album_id = album_id
        self.album_list_api = "http://www.ximalaya.com/revision/play/album?albumId={}&pageNum=1&sort=1&pageSize=999".format(album_id)
        self.album_url = 'http://www.ximalaya.com/album/%s' % album_id
        self.header = {
            'Accept': 'application/json, text/javascript, */*; q=0.01',
            'X-Requested-With': 'XMLHttpRequest',
            'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36',
            'Content-Type': 'application/x-www-form-urlencoded',
            'Referer': self.album_url,
            'Cookie': '_ga=GA1.2.1628478964.1476015684; _gat=1',
        }

    def album(self):
        page = requests.get(self.album_url, headers=self.header)
        soup = BeautifulSoup(page.content, "lxml")

        # 初始化
        self.podcast = Podcast()
        self.podcast.name = soup.find('h1', 'title').get_text()
        self.podcast.authors.append(Person("Powered by forecho", '*****@*****.**'))
        self.podcast.website = self.album_url
        self.podcast.copyright = 'cc-by'
        if soup.find('div', 'album-intro') and soup.find('div', 'album-intro').get_text():
            self.podcast.description = soup.find('div', 'album-intro').get_text()
        else:
            self.podcast.description = self.podcast.name
        self.podcast.language = 'cn'
        self.podcast.image = soup.find('div', 'album-info').find('img').get('src').split('!')[0]
        self.podcast.feed_url = 'http://podcast.forecho.com/ximalaya/%s.rss' % self.album_id
        self.podcast.category = Category('Technology', 'Podcasting')
        self.podcast.explicit = False
        self.podcast.complete = False
        self.podcast.owner = Person("forecho", '*****@*****.**')

        album_list_content = requests.get(self.album_list_api, headers=self.header).content
        album_list_data = json.loads(album_list_content.decode('utf-8'))
        count = len(album_list_data['data']['tracksAudioPlay'])
        for each in album_list_data['data']['tracksAudioPlay']:
            try:
                detail_url = 'http://www.ximalaya.com/tracks/%s.json' % each['trackId']
                response = requests.get(detail_url, headers=self.header)
                item = json.loads(response.content)

                episode = self.podcast.add_episode()
                episode.id = str(each['index'])
                episode.title = each['trackName']
                print(self.podcast.name + '=====' + each['trackName'])
                image = each['trackCoverPath'].split('!')[0]
                if (image[-4:] == '.gif' or image[-4:] == '.bmp'):
                    episode.image = self.podcast.image
                else:
                    episode.image = image
                if item['intro']:
                    episode.summary = item['intro'].replace('\r\n', '')
                else:
                    episode.summary = each['trackName']
                episode.link = 'http://www.ximalaya.com%s' % each['albumUrl']
                episode.authors = [Person("forecho", '*****@*****.**')]
                episode.publication_date = self.reduction_time(item['time_until_now'], item['formatted_created_at'])
                episode.media = Media(each['src'], each['duration'])
                episode.position = count - each['index'] + 1
            except Exception as e:
                print('异常:', e)
                print('异常 URL:', 'http://www.ximalaya.com%s' % each['trackUrl'])
                traceback.print_exc()
            
        # 生成文件
        # print self.podcast.rss_str()
        self.podcast.rss_file('ximalaya/%s.rss' % self.album_id, minimize=True)

    # 时间转换 第一个参数是  "3年前", "12月11日 17:00"
    @staticmethod
    def reduction_time(time_until_now, created_at):
        date = datetime.strptime(created_at, "%m月%d日 %H:%M")
        reduction_year = datetime.now().year
        if '年前' in time_until_now:
            year = int(time_until_now.split('年前')[0])
            reduction = (datetime.now(tzlocal()) - relativedelta(years=year))
            if humanize_time(reduction) != ('%s years' % year):
                reduction_year = (datetime.now(tzlocal()) - relativedelta(years=year + 1)).year
            else:
                reduction_year = reduction.year
        elif '月前' in time_until_now:
            month = int(time_until_now.split('月前')[0])
            reduction_year = (datetime.now(tzlocal()) - relativedelta(months=month)).year
        elif '天前' in time_until_now:
            day = int(time_until_now.split('天前')[0])
            reduction_year = (datetime.now(tzlocal()) - relativedelta(days=day)).year

        return datetime(reduction_year, date.month, date.day, date.hour, date.second, tzinfo=pytz.utc)
Esempio n. 21
0
def generate(name, description, website, explicit, image, author_name,
             author_email, feed_path, copyright, language, category, blog,
             blog_path, verbose, folder):
    """Generate a podcast from mp3 files located in the provided FOLDER"""
    if verbose:
        logging.basicConfig(level=logging.DEBUG)
    else:
        logging.basicConfig(level=logging.INFO)

    attrs = locals()
    logging.debug('Processing input: %s' % (attrs))
    del attrs['folder']
    del attrs['author_name']
    del attrs['author_email']
    del attrs['verbose']
    del attrs['feed_path']
    del attrs['blog']
    del attrs['blog_path']

    attrs['authors'] = [Person(author_name, author_email)]
    attrs['owner'] = attrs['authors'][0]

    attrs['category'] = Category(category)

    feed_name = name.lower().replace(' ', '_') + '.rss'
    feed_base = '%s/%s' % (website, feed_path)
    feed_url = '%s/%s' % (feed_base, feed_name)
    attrs['feed_url'] = feed_url

    logging.info('Creating podcast %s, feed %s' % (name, feed_url))
    p = Podcast(**attrs)

    for fpath in sorted(glob.glob('%s*.mp3' % (folder))):
        logging.info('Adding episode %s' % (fpath))
        fname = os.path.basename(fpath)
        size = os.path.getsize(fpath)
        logging.debug('Filename: %s, size %i' % (fname, size))
        try:
            tag = ID3(fpath)
        except ID3NoHeaderError:
            logging.error('%s is not a valid mp3 file, ignoring it' % (fpath))
            continue
        logging.debug('Read tag: %s' % (tag))
        e = Episode()
        if 'TPE1' in tag:
            e.authors = [Person(tag['TPE1'][0])]
        else:
            e.authors = attrs['authors']
        e.title = tag['TIT2'][0]
        e.subtitle = e.title
        if 'COMM::eng' in tag:
            e.summary = tag['COMM::eng'][0]
        else:
            e.summary = description
        episode_url = '%s/%s' % (feed_base, fname)
        logging.debug('Episode url: %s' % (episode_url))
        e.media = Media(episode_url, size, type='audio/mpeg')
        e.media.populate_duration_from(fpath)
        pubdate = datetime.strptime(tag['TDRC'][0].text[:10], '%Y-%m-%d')
        pubdate = pubdate.replace(tzinfo=pytz.utc)
        e.publication_date = pubdate
        if blog:
            blog_post = ''
            short_name = re.search('[a-z]*_-_([a-z_]*[#0-9]*)', fname)
            if short_name:
                blog_post = short_name.group(1).replace('_', '-').\
                    replace('#', '') + '.html'
                e.link = '%s/%s/%s' % (website, blog_path, blog_post)
        p.episodes.append(e)

    feed_local_path = '%s%s' % (folder, feed_name)
    logging.info('Generating feed in %s' % (feed_local_path))
    p.rss_file(feed_local_path, minimize=False)
Esempio n. 22
0
class Ximalaya():
    def __init__(self, album_id):
        self.podcast = None
        self.album_id = album_id
        self.page_size = 30
        self.album_info_url = "https://www.ximalaya.com/revision/album?albumId={}"
        self.album_list_url = "https://www.ximalaya.com/revision/play/album?albumId={}&pageNum={}&pageSize={}"
        self.detail_url = "https://mobile.ximalaya.com/v1/track/baseInfo?device=android&trackId={}"
        self.album_url = "https://www.ximalaya.com/album/{}"
        self.header = {
            'Accept': 'application/json, text/javascript, */*; q=0.01',
            'X-Requested-With': 'XMLHttpRequest',
            'User-Agent':
            'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36',
            'Content-Type': 'application/x-www-form-urlencoded',
            'Referer': self.album_url.format(self.album_id),
            'Cookie': '_ga=GA1.2.1628478964.1476015684; _gat=1',
        }

    def album(self):
        album_info = requests.get(self.album_info_url.format(self.album_id),
                                  headers=self.header).content
        album_info_content = json.loads(album_info.decode('utf-8'))
        if album_info_content['ret'] == 200:
            album_info_data = album_info_content['data']

            # 初始化
            self.podcast = Podcast()
            self.podcast.name = album_info_data['mainInfo']['albumTitle']
            self.podcast.authors.append(
                Person("Powered by forecho", '*****@*****.**'))
            self.podcast.website = self.album_url.format(self.album_id)
            self.podcast.copyright = 'cc-by'
            if album_info_data['mainInfo']['richIntro']:
                self.podcast.description = album_info_data['mainInfo'][
                    'richIntro']
            else:
                self.podcast.description = self.podcast.name
            self.podcast.language = 'cn'
            self.podcast.image = 'https:' + album_info_data['mainInfo'][
                'cover'].split('!')[0]
            self.podcast.feed_url = 'http://podcast.forecho.com/ximalaya/%s.rss' % self.album_id
            self.podcast.category = Category('Technology', 'Podcasting')
            self.podcast.explicit = False
            self.podcast.complete = False
            self.podcast.owner = Person("forecho", '*****@*****.**')
            page_num = 1
            # py2 +1
            track_total_count = math.ceil(
                album_info_data['tracksInfo']['trackTotalCount'] /
                self.page_size) + 1
            while page_num <= track_total_count:
                album_list = requests.get(self.album_list_url.format(
                    self.album_id, page_num, self.page_size),
                                          headers=self.header).content
                album_list_content = json.loads(album_list.decode('utf-8'))
                count = len(album_list_content['data']['tracksAudioPlay'])
                for each in album_list_content['data']['tracksAudioPlay']:
                    try:
                        detail = requests.get(self.detail_url.format(
                            each['trackId']),
                                              headers=self.header).content
                        detail_content = json.loads(detail.decode('utf-8'))
                        episode = self.podcast.add_episode()
                        episode.id = str(each['index'])
                        episode.title = each['trackName']
                        print(self.podcast.name + '=====' + each['trackName'])
                        image = each['trackCoverPath'].split('!')[0]
                        if image[-4:] == '.png' or image[-4:] == '.jpg':
                            episode.image = 'https:' + image
                        else:
                            episode.image = self.podcast.image
                        if 'intro' in detail_content:
                            episode.summary = detail_content['intro'].replace(
                                '\r\n', '')
                        else:
                            episode.summary = each['trackName']
                        episode.link = 'http://www.ximalaya.com%s' % each[
                            'albumUrl']
                        episode.authors = [
                            Person("forecho", '*****@*****.**')
                        ]
                        episode.publication_date = self.reduction_time(
                            detail_content['createdAt'])
                        episode.media = Media(each['src'], each['duration'])
                        episode.position = count - each['index'] + 1
                    except Exception as e:
                        print('异常:', e)
                        print('异常 URL:',
                              'https://www.ximalaya.com%s' % each['trackUrl'])
                        traceback.print_exc()
                # 生成文件
                # print self.podcast.rss_str()
                page_num = page_num + 1
            self.podcast.rss_file('ximalaya/%s.rss' % self.album_id,
                                  minimize=True)

    # 时间转换 参数 毫秒时间戳
    @staticmethod
    def reduction_time(time):
        timestamp = datetime.fromtimestamp(time / 1000)
        return datetime(timestamp.year,
                        timestamp.month,
                        timestamp.day,
                        timestamp.hour,
                        timestamp.minute,
                        tzinfo=pytz.utc)
Esempio n. 23
0
            11932295),
        summary="With an English name adapted directly from Afrikaans "
        '-- literally meaning "earth pig" -- this fascinating '
        "animal has both circular teeth and a knack for "
        "digging.",
    ),
    Episode(
        title="ya, das ist die heiße scheiße",
        media=Media(
            "https://github.com/ssk8/365days_of_plops/raw/main/poop02.mp3",
            15363464),
        summary="Colon evacuation "
        "hurt my anus ",
    ),
    Episode(
        title="Spicy Shit",
        media=Media(
            "https://github.com/ssk8/365days_of_plops/raw/main/poop03.mp3",
            15363464),
        summary="burning shit "
        "really hurt my anus "
        "Case in point: we have found clothing made from "
        "alpaca fiber that is 2000 years old. How is this "
        "possible, and what makes it different from llamas?",
    )
]
# Generate the RSS feed
rss = p.rss_str()

p.rss_file('rss.xml', minimize=True)