def genero_feed(episodesList): if episodesList: # Creo un nuovo podcast p = Podcast() p.name = "NECST Tech Time" p.description = "The NECSTLab (Novel, Emerging Computing System Technologies Laboratory) is a laboratory inside DEIB department of Politecnico di Milano, where there are a number of different research lines on advanced topics in computing systems: from architectural characteristics, to hardware-software codesign methodologies, to security and dependability issues of complex system architectures (scaling from mobile devices to large virtualized datacenters)." p.website = "http://www.poliradio.it/podcast/programmi/34/necst-tech-time" p.explicit = True p.image = "https://rss.draghetti.it/necst_image.jpg" p.feed_url = "https://rss.draghetti.it/necstpodcast.xml" p.copyright = "Poli Radio" p.language = "it-IT" for episodedetails in episodesList: episode = Episode() episode.title = episodedetails[1].encode("ascii", "ignore") episode.link = episodedetails[2].encode("ascii", "ignore") # La dimensione e statistica in base alle puntante analizzate episode.media = Media(episodedetails[3], 30000000, type="audio/x-m4a", duration=None) episode.publication_date = episodedetails[4] p.episodes.append(episode) # Print to stdout, just as an example p.rss_file(rssfile, minimize=False)
def generate_podcast_xml(base, books): from podgen import Podcast, Episode from datetime import timedelta from podgen import Media p = Podcast() p.name = "AeonNeo's Audiobooks" p.description = "Description" p.website = "www.yangvincent.com" p.explicit = False # create episode for book_name in books: ep = Episode() ep.title = book_name[:-4] full_path = base + '/files/' + book_name dev_path = 'files/' + book_name try: book_size = os.path.getsize(dev_path) except OSError as e: print(e) book_size = 0 ep.media = Media(full_path, type='audio/mp4a', size=book_size) p.episodes.append(ep) # Generate rss p.rss_file('skeleton/rss.xml', minimize=True)
def genero_feed(puntateList): if puntateList: # Creo un nuovo podcast p = Podcast() p.name = "Il Ruggito del Coniglio" p.description = "Il Ruggito del Coniglio, il programma cult di Radio 2 condotto da Marco Presta e Antonello Dose, racconta l'attualita con folgorante ironia." p.website = "http://www.raiplayradio.it/programmi/ilruggitodelconiglio/" p.explicit = True p.image = "https://rss.draghetti.it/ruggitodelconiglio_image.jpg" p.feed_url = "https://rss.draghetti.it/ruggitodelconiglio.xml" p.copyright = "Rai Radio 2" p.language = "it-IT" for puntata in puntateList: episode = Episode() episode.title = puntata[0].encode("ascii", "ignore") episode.link = puntata[1] # La dimensione del file e approssimativa episode.media = Media(puntata[3], puntata[4]) if puntata[2]: episode.publication_date = datetime.datetime(int(puntata[2].split("/")[2]), int(puntata[2].split("/")[1]), int(puntata[2].split("/")[0]), 10, 00, tzinfo=pytz.utc) else: episode.publication_date = pytz.utc.localize(datetime.datetime.utcnow()) p.episodes.append(episode) # Print to stdout, just as an example p.rss_file(rssfile, minimize=False)
def genero_feed(episodesList): if episodesList: # Creo un nuovo podcast p = Podcast() p.name = "NECST Tech Time" p.description = "Feed Podcast non ufficiale di NECST Tech Time - Powered By Andrea Draghetti" p.website = "http://www.poliradio.it/podcast/programmi/34/necst-tech-time" p.explicit = True p.image = "https://rss.draghetti.it/necst_image.jpg" p.feed_url = "https://rss.draghetti.it/necstpodcast.xml" p.copyright = "Poli Radio" p.language = "it-IT" for episodedetails in episodesList: episode = Episode() episode.title = episodedetails[1].encode("ascii", "ignore") episode.link = episodedetails[2].encode("ascii", "ignore") # La dimensione e statistica in base alle puntante analizzate episode.media = Media(episodedetails[3], 30000000, type="audio/x-m4a", duration=None) episode.publication_date = episodedetails[4] p.episodes.append(episode) # Print to stdout, just as an example p.rss_file(rssfile, minimize=False)
def genero_feed(puntateList): if puntateList: # Creo un nuovo podcast p = Podcast() p.name = "Pascal Rai Radio 2" p.description = "Pascal un programma di Matteo Caccia in onda su Radio2 che racconta storie di vita. Episodi grandi o piccoli, stravolgenti o minuti, momenti che hanno modificato per sempre la nostra vita o che, anche se di poco, l'hanno indirizzata. Storie che sono il termometro della temperatura di ognuno di noi e che in parte raccontano chi siamo. " p.website = "http://www.raiplayradio.it/programmi/pascal/" p.explicit = True p.image = "https://rss.draghetti.it/pascal_image.jpg" p.feed_url = "https://rss.draghetti.it/pascal.xml" p.copyright = "Rai Radio 2" p.language = "it-IT" for puntata in puntateList: episode = Episode() episode.title = puntata[0].encode("ascii", "ignore") episode.link = puntata[1] # La dimensione del file e approssimativa episode.media = Media(puntata[3], puntata[4]) if puntata[2]: episode.publication_date = datetime.datetime(int(puntata[2].split("/")[2]), int(puntata[2].split("/")[1]), int(puntata[2].split("/")[0]), 20, 00, tzinfo=pytz.utc) else: episode.publication_date = pytz.utc.localize(datetime.datetime.utcnow()) p.episodes.append(episode) # Print to stdout, just as an example p.rss_file(rssfile, minimize=False)
def genero_feed(episodesList): if episodesList: # Creo un nuovo podcast p = Podcast() p.name = "All You Can Dance by Dino Brawn" p.description = "Feed Podcast non ufficiale di All You Can Dance by Dino Brown - Powered By Andrea Draghetti" p.website = "https://onedance.fm/" p.explicit = True p.image = "https://rss.draghetti.it/allyoucandance_image.jpg" p.feed_url = "https://rss.draghetti.it/allyoucandance.xml" p.copyright = "One Dance" p.language = "it-IT" for episodedetails in episodesList: episode = Episode() episode.title = episodedetails[1].encode("ascii", "ignore") episode.link = episodedetails[2].encode("ascii", "ignore") # La dimensione e statistica in base alle puntante analizzate episode.media = Media(episodedetails[3], 30000000, type="audio/x-m4a", duration=None) episode.publication_date = episodedetails[4] p.episodes.append(episode) # Print to stdout, just as an example p.rss_file(rssfile, minimize=False)
def main(): """Create an example podcast and print it or save it to a file.""" # There must be exactly one argument, and it is must end with rss if len(sys.argv) != 2 or not ( sys.argv[1].endswith('rss')): # Invalid usage, print help message # print_enc is just a custom function which functions like print, # except it deals with byte arrays properly. print_enc ('Usage: %s ( <file>.rss | rss )' % \ 'python -m podgen') print_enc ('') print_enc (' rss -- Generate RSS test output and print it to stdout.') print_enc (' <file>.rss -- Generate RSS test teed and write it to file.rss.') print_enc ('') exit() # Remember what type of feed the user wants arg = sys.argv[1] from podgen import Podcast, Person, Media, Category, htmlencode # Initialize the feed p = Podcast() p.name = 'Testfeed' p.authors.append(Person("Lars Kiesow", "*****@*****.**")) p.website = 'http://example.com' p.copyright = 'cc-by' p.description = 'This is a cool feed!' p.language = 'de' p.feed_url = 'http://example.com/feeds/myfeed.rss' p.category = Category('Technology', 'Podcasting') p.explicit = False p.complete = False p.new_feed_url = 'http://example.com/new-feed.rss' p.owner = Person('John Doe', '*****@*****.**') p.xslt = "http://example.com/stylesheet.xsl" e1 = p.add_episode() e1.id = 'http://lernfunk.de/_MEDIAID_123#1' e1.title = 'First Element' e1.summary = htmlencode('''Lorem ipsum dolor sit amet, consectetur adipiscing elit. Tamen aberramus a proposito, et, ne longius, prorsus, inquam, Piso, si ista mala sunt, placet. Aut etiam, ut vestitum, sic sententiam habeas aliam domesticam, aliam forensem, ut in fronte ostentatio sit, intus veritas occultetur? Cum id fugiunt, re eadem defendunt, quae Peripatetici, verba <3.''') e1.link = 'http://example.com' e1.authors = [Person('Lars Kiesow', '*****@*****.**')] e1.publication_date = datetime.datetime(2014, 5, 17, 13, 37, 10, tzinfo=pytz.utc) e1.media = Media("http://example.com/episodes/loremipsum.mp3", 454599964, duration= datetime.timedelta(hours=1, minutes=32, seconds=19)) # Should we just print out, or write to file? if arg == 'rss': # Print print_enc(p.rss_str()) elif arg.endswith('rss'): # Write to file p.rss_file(arg, minimize=True)
class Qingting(object): def __init__(self, album_id): self.podcast = None self.album_id = album_id self.url = 'http://www.qingting.fm/channels/{}'.format(album_id) self.album_list_api = "http://api2.qingting.fm/v6/media/channelondemands/{}/programs/order/0/curpage/1/pagesize/100".format( album_id) self.album_info_api = "http://api2.qingting.fm/v6/media/channelondemands/{}".format(album_id) def album(self): album_info_content = requests.get(self.album_info_api).content album_info_data = json.loads(album_info_content) album_list_content = requests.get(self.album_list_api).content album_list_data = json.loads(album_list_content) self.podcast = Podcast() self.podcast.name = album_info_data['data']['title'] self.podcast.authors.append(Person("Powered by maijver", '*****@*****.**')) self.podcast.website = self.url self.podcast.copyright = 'cc-by' self.podcast.description = album_info_data['data']['description'] self.podcast.language = 'cn' self.podcast.image = album_info_data['data']['thumbs']['small_thumb'].replace('!200', '') self.podcast.feed_url = 'http://podcast.forecho.com/qingting/%s.rss' % self.album_id self.podcast.category = Category('Technology', 'Podcasting') self.podcast.explicit = False self.podcast.complete = False self.podcast.owner = Person("maijver", '*****@*****.**') for each in album_list_data['data']: episode = self.podcast.add_episode() episode.id = str(each['id']) episode.title = each['title'] print(episode.title) episode.image = album_info_data['data']['thumbs']['small_thumb'].replace('!200', '') episode.summary = each['title'] episode.link = 'http://www.qingting.fm/channels/{}/programs/{}'.format(self.album_id, each['id']) episode.authors = [Person("forecho", '*****@*****.**')] episode.publication_date = self.reduction_time(each['update_time']) episode.media = Media("http://od.qingting.fm/{}".format(each['mediainfo']['bitrates_url'][0]['file_path']), each['duration']) self.podcast.rss_file('qingting/{}.rss'.format(self.album_id), minimize=True) @staticmethod def reduction_time(created_date): timestamp = datetime.strptime(created_date, "%Y-%m-%d %H:%M:%S") return datetime(timestamp.year, timestamp.month, timestamp.day, timestamp.hour, timestamp.minute, tzinfo=pytz.utc)
def genero_feed(puntateList): if puntateList: # Creo un nuovo podcast p = Podcast() p.name = "Pascal Rai Radio 2" p.description = "Pascal un programma di Matteo Caccia in onda su Radio2 che racconta storie di vita. Episodi grandi o piccoli, stravolgenti o minuti, momenti che hanno modificato per sempre la nostra vita o che, anche se di poco, l'hanno indirizzata. Storie che sono il termometro della temperatura di ognuno di noi e che in parte raccontano chi siamo. " p.website = "http://www.raiplayradio.it/programmi/pascal/" p.explicit = True p.image = "https://rss.draghetti.it/pascal_image.jpg" p.feed_url = "https://rss.draghetti.it/pascal.xml" p.copyright = "Rai Radio 2" p.language = "it-IT" for puntata in puntateList: episode = Episode() episode.title = puntata[0].encode("ascii", "ignore") episode.link = puntata[1] # La dimensione del file e approssimativa episode.media = Media(puntata[3], puntata[4]) if puntata[2]: episode.publication_date = datetime.datetime( int(puntata[2].split("/")[2]), int(puntata[2].split("/")[1]), int(puntata[2].split("/")[0]), 20, 00, tzinfo=pytz.utc) else: episode.publication_date = pytz.utc.localize( datetime.datetime.utcnow()) p.episodes.append(episode) # Print to stdout, just as an example p.rss_file(rssfile, minimize=False)
'filename': file_local_path, 'name': video.title }) except HTTPError as err: print('Can not parse this video bacause of HTTPError.') # Create Podcast object and fill it with episodes podcast_object = Podcast( name=CONFIG['podcast_name'], description=CONFIG['podcast_description'], website=CONFIG['podcast_website'], explicit=False, image=CONFIG['podcast_image'], language=CONFIG['podcast_language'], authors=[Person(CONFIG['podcast_author'], CONFIG['podcast_author_email'])], owner=Person(CONFIG['podcast_owner'], CONFIG['podcast_owner_email']), category=Category(CONFIG['podcast_category'], CONFIG['podcast_subcategory'])) for item in db: web_media_path = "%s/podcast_%s.mp4" % (CONFIG['podcast_media_server'], item['link'][9:]) podcast_object.add_episode( Episode(title=item['name'], media=Media(web_media_path, os.stat(item['filename']).st_size, type="audio/mpeg"))) # Generating RSS podcast_object.rss_file(('%s/rss.xml' % CONFIG['data_path']), minimize=True)
print('>>> wrote fresh sessions.json file') # write the new rss file p = Podcast() p.name = "The Objectivism Seminar" p.category = Category("Society & Culture", "Philosophy") p.language = "en-US" p.explicit = True p.description = ( "A weekly online conference call to systematically study " + "the philosophy of Objectivism via the works of prominent Rand scholars.") p.website = "https://www.ObjectivismSeminar.com" p.image = "https://www.ObjectivismSeminar.com/assets/images/atlas-square.jpg" p.feed_url = "https://www.ObjectivismSeminar.com/archives/rss" p.authors = [Person("Greg Perkins, Host", "*****@*****.**")] p.owner = Person("Greg Perkins", "*****@*****.**") p.episodes += [ Episode(title=x['title'], media=Media(x['link'], type="audio/mpeg", size=x['length']), id=x['GUID'], publication_date=x['pubDate'], summary=x['description']) for x in updated_session_items ] p.rss_file(rss_filename) print('>>> wrote fresh rss.xml file')
class Ximalaya(): def __init__(self, album_id): self.podcast = None self.album_id = album_id self.album_list_api = "http://www.ximalaya.com/revision/play/album?albumId={}&pageNum=1&sort=1&pageSize=999".format( album_id) self.album_url = 'http://www.ximalaya.com/album/%s' % album_id self.header = { 'Accept': 'application/json, text/javascript, */*; q=0.01', 'X-Requested-With': 'XMLHttpRequest', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36', 'Content-Type': 'application/x-www-form-urlencoded', 'Referer': self.album_url, 'Cookie': '_ga=GA1.2.1628478964.1476015684; _gat=1', } def album(self): page = requests.get(self.album_url, headers=self.header) soup = BeautifulSoup(page.content, "lxml") # 初始化 self.podcast = Podcast() self.podcast.name = soup.find('h1', 'title').get_text() self.podcast.authors.append( Person("Powered by forecho", '*****@*****.**')) self.podcast.website = self.album_url self.podcast.copyright = 'cc-by' self.podcast.description = soup.find('div', 'album-intro').get_text() self.podcast.language = 'cn' self.podcast.image = soup.find( 'div', 'album-info').find('img').get('src').split('!')[0] self.podcast.feed_url = 'http://podcast.forecho.com/ximalaya/%s.rss' % self.album_id self.podcast.category = Category('Technology', 'Podcasting') self.podcast.explicit = False self.podcast.complete = False self.podcast.owner = Person("forecho", '*****@*****.**') album_list_content = requests.get(self.album_list_api, headers=self.header).content album_list_data = json.loads(album_list_content.decode('utf-8')) count = len(album_list_data['data']['tracksAudioPlay']) for each in album_list_data['data']['tracksAudioPlay']: try: page_info = requests.get('http://www.ximalaya.com/%s' % each['trackUrl'], headers=self.header) soup_info = BeautifulSoup(page_info.content, "lxml") episode = self.podcast.add_episode() episode.id = str(each['index']) episode.title = each['trackName'] print self.podcast.name + '=====' + each['trackName'] image = each['trackCoverPath'].split('!')[0] if (image[-4:] == '.gif' or image[-4:] == '.bmp'): episode.image = self.podcast.image else: episode.image = image if soup_info.find('article', 'intro'): episode.summary = soup_info.find( 'article', 'intro').get_text().encode('gbk', 'ignore').decode('gbk') else: episode.summary = each['trackName'] episode.link = 'http://www.ximalaya.com/%s' % each['albumUrl'] episode.authors = [Person("forecho", '*****@*****.**')] episode.publication_date = self.reduction_time( soup_info.find('span', 'time').get_text()) episode.media = Media(each['src'], each['duration']) episode.position = count - each['index'] + 1 except Exception as e: print('异常:', e) print('异常 URL:', 'http://www.ximalaya.com/%s' % each['trackUrl']) # 生成文件 # print self.podcast.rss_str() self.podcast.rss_file('ximalaya/%s.rss' % self.album_id, minimize=True) # 时间转换 @staticmethod def reduction_time(created_date): timestamp = datetime.strptime(created_date, "%Y-%m-%d %H:%M:%S") return datetime(timestamp.year, timestamp.month, timestamp.day, timestamp.hour, timestamp.minute, tzinfo=pytz.utc)
class Ximalaya: def __init__(self, album_id): self.headers = tools.get_headers() self.podcast = None self.album_id = album_id self.episode_pre_page = 30 self.album_info_url = "https://www.ximalaya.com/revision/album?albumId={}" self.album_list_url = "https://www.ximalaya.com/revision/play/album?albumId={}&pageNum={}&pageSize={}" self.episode_detail_url = "https://mobile.ximalaya.com/v1/track/baseInfo?trackId={}" self.album_url = "https://www.ximalaya.com/album/{}" def get_podcast(self): webpage = tools.get_url(self.album_info_url.format(self.album_id), self.headers) album_info = json.loads(webpage.decode('utf-8')) if album_info['ret'] == 200: album_info_data = album_info['data'] self.podcast = Podcast() self.podcast.name = album_info_data['mainInfo']['albumTitle'] self.podcast.website = self.album_url.format(self.album_id) if album_info_data['mainInfo']['richIntro']: self.podcast.description = album_info_data['mainInfo']['richIntro'] self.podcast.language = 'cn' self.podcast.image = 'https:' + album_info_data['mainInfo']['cover'].split('!')[0] self.podcast.generator = 'kanemori.getpodcast' self.podcast.explicit = False self.podcast.withhold_from_itunes = True text = '' page_num = 1 album_page_count = math.ceil(album_info_data['tracksInfo']['trackTotalCount'] / self.episode_pre_page) + 1 while page_num <= album_page_count: webpage = tools.get_url(self.album_list_url.format(self.album_id, page_num, self.episode_pre_page), self.headers) album_list = json.loads(webpage.decode('utf-8')) for episode_info in album_list['data']['tracksAudioPlay']: _, link = self.get_episode(episode_info['trackId']) text += link page_num += 1 path = './podcast/ximalaya' if not os.path.exists(path): os.makedirs(path) self.podcast.rss_file(os.path.join(path, '{}.xml'.format(self.album_id)), minimize=True) # tools.save_m4a(os.path.join(path, '{}.txt'.format(self.album_id)), text) print("「{}」が上手に焼きました".format(self.album_id)) def get_episode(self, episode_id): trycount = 0 findepisode = False while not findepisode: if trycount > 0: print("再接続中" + str(trycount) + "......") if trycount > 1: print("error url: " + self.episode_detail_url.format(episode_id) + "\n") return False, "error url: " + self.episode_detail_url.format(episode_id) + "\n" webpage = tools.get_url(self.episode_detail_url.format(episode_id), self.headers) detail = json.loads(webpage.decode('utf-8')) episode = self.podcast.add_episode() episode.id = str('ximalaya_' + str(episode_id)) episode.title = detail['title'] # print(self.podcast.name + '=====' + episode.title) if 'intro' in detail: episode.summary = detail['intro'].replace('\r', '\\r').replace('\n', '\\n') episode.publication_date = tools.publication_time(detail['createdAt']) episode.media = Media(detail['playUrl32'], duration=timedelta(milliseconds=detail['duration'])) # episode.media = Media.create_from_server_response(detail['playUrl32'], # duration=timedelta(seconds=detail['duration'])) episode.position = 1 findepisode = True if not findepisode: trycount += 1 print("30秒後に再接続する.......") sleep(30) return True, detail['playUrl32'] + '\n'
def lambda_handler(event, context): print('Starting cccRssBuilder Lambda function') # Get episodes from DynamoDB episodes = query_episodes() episodes.sort(key=lambda x: x['episode-num']) # Create the podcast feed # Main podcast info comes from "episode 0" episodeInfo = episodes[0] separator = ', ' p = Podcast() p.name = episodeInfo['name'] p.description = episodeInfo['description'] p.website = episodeInfo['website'] p.explicit = episodeInfo['explicit'] p.image = episodeInfo['image'] p.feed_url = episodeInfo['feed-url'] p.language = episodeInfo['language'] p.category = Category(episodeInfo['category'], episodeInfo['subcategory']) p.owner = Person(episodeInfo['owner-name'], episodeInfo['owner-email']) p.authors = [Person(episodeInfo['owner-name'], episodeInfo['owner-email'])] # Process each episode for episode in episodes: # Skip "Episode 0" if episode['episode-num'] == 0: continue # Check if episode contains media file info (name, duration, size). If not, add it to db and episode object. if 'media-file' not in episode: episodeNum = episode['episode-num'] print('Analyzing media file for episode', episodeNum) mediaFile = 'ccc-{:03d}-{}.mp3'.format(int(episodeNum), episode['pub-date']) print('Media file:', mediaFile) localMediaFile = '/tmp/' + mediaFile s3 = boto3.client('s3') s3.download_file('kwksolutions.com', 'ccc/media/' + mediaFile, localMediaFile) # Try to analyze the mp3 file - looking for duration and file size try: audio = MP3(localMediaFile) except: print('Not an MP3 file!') return duration = round(audio.info.length) hours = int(duration / 3600) minutes = int((duration % 3600) / 60) seconds = duration % 60 if hours == 0: durationStr = '{:02d}:{:02d}'.format(minutes, seconds) else: durationStr = '{:02d}:{:02d}:{:02d}'.format( hours, minutes, seconds) size = str(os.path.getsize(localMediaFile)) update_episode(episodeNum, mediaFile, size, durationStr) episode['media-file'] = mediaFile episode['size'] = size episode['duration'] = durationStr # Figure out all the info needed for the episode object mediaURL = 'https://www.kwksolutions.com/ccc/media/' + episode[ 'media-file'] durationList = episode['duration'].split(':') secs = int(durationList[-1]) mins = int(durationList[-2]) try: h = int(durationList[-3]) except: h = 0 pubdateList = episode['pub-date'].split('-') year = int(pubdateList[0]) month = int(pubdateList[1]) day = int(pubdateList[2]) # Build the episode object e = p.add_episode() e.id = mediaURL e.title = 'Episode ' + str(episode['episode-num']) e.summary = episode['description'] e.link = 'http://christcommunitycarmel.org/get-involved/podcasts' e.publication_date = datetime.datetime(year, month, day, 12, 00, 00, tzinfo=pytz.timezone('EST')) e.media = Media(mediaURL, episode['size'], duration=datetime.timedelta(hours=h, minutes=mins, seconds=secs)) # Write the rss file print('Writing RSS file to S3') rssLocalFile = '/tmp/podcast.rss' rssS3File = 'ccc/podcast.rss' p.rss_file(rssLocalFile) s3 = boto3.client('s3') s3.upload_file(rssLocalFile, 'kwksolutions.com', rssS3File, ExtraArgs={'ContentType': 'text/xml'}) return
class Ximalaya(): def __init__(self, album_id): self.podcast = None self.album_id = album_id self.url = 'http://www.ximalaya.com/album/%s/' % album_id self.header = { 'Accept': 'application/json, text/javascript, */*; q=0.01', 'X-Requested-With': 'XMLHttpRequest', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36', 'Content-Type': 'application/x-www-form-urlencoded', 'Referer': self.url, 'Cookie': '_ga=GA1.2.1628478964.1476015684; _gat=1', } def album(self): page = requests.get(self.url, headers=self.header) soup = BeautifulSoup(page.content, "lxml") # 初始化 self.podcast = Podcast() self.podcast.name = soup.find('div', 'detailContent_title').get_text() self.podcast.authors.append(Person("Powered by forecho", '*****@*****.**')) self.podcast.website = self.url self.podcast.copyright = 'cc-by' self.podcast.description = soup.find('div', 'mid_intro').get_text() self.podcast.language = 'cn' self.podcast.image = soup.find('a', 'albumface180').find('img').get('src').split('!')[0] self.podcast.feed_url = 'http://podcast.forecho.com/ximalaya/%s.rss' % self.album_id self.podcast.category = Category('Technology', 'Podcasting') self.podcast.explicit = False self.podcast.complete = False self.podcast.owner = Person("forecho", '*****@*****.**') sound_ids = soup.find('div', class_='personal_body').get('sound_ids').split(',') for sound_id in sound_ids: date = soup.find('li', sound_id=sound_id).find('div', class_='operate').get_text().strip() self.detail(sound_id, date) # 生成文件 # print self.podcast.rss_str() self.podcast.rss_file('ximalaya/%s.rss' % self.album_id, minimize=True) def detail(self, sound_id, date): detail_url = 'http://www.ximalaya.com/tracks/%s.json' % sound_id response = requests.get(detail_url, headers=self.header) item = json.loads(response.content) episode = self.podcast.add_episode() episode.id = str(item['id']) episode.title = item['title'] episode.image = item['cover_url_142'].split('?')[0] episode.summary = (item['intro'].replace('\n', '') if item['intro'] else '') episode.link = 'http://www.ximalaya.com/sound/%d' % item['id'] episode.authors = [Person("forecho", '*****@*****.**')] episode.publication_date = self.reduction_time( date, item['formatted_created_at']) episode.media = Media(item['play_path_64'], 454599964) print self.podcast.name + '=====' + item['title'] # 时间转换 第一个参数是年月日 第二个参数"12月11日 17:00" @staticmethod def reduction_time(date, created_date): timestamp = datetime.strptime(date, "%Y-%m-%d") created_at = datetime.strptime(created_date.split(' ')[1], "%H:%M") return datetime(timestamp.year, timestamp.month, timestamp.day, created_at.hour, created_at.minute, tzinfo=pytz.utc)
def create_rss(type, download): """Create an example podcast and print it or save it to a file.""" # Create the Podcast & initialize the feed default_channel = Channel.defaultChannel() p = Podcast() p.name = default_channel.name p.description = default_channel.description p.website = default_channel.website p.explicit = default_channel.explicit p.image = default_channel.image p.copyright = default_channel.copyright p.language = default_channel.language p.feed_url = default_channel.feed_url p.category = Category(default_channel.category) # p.category = Category('Technology', 'Podcasting') # p.xslt = "https://example.com/feed/stylesheet.xsl" # URL of XSLT stylesheet p.authors = [Person(default_channel.authors, default_channel.authors_email)] p.owner = Person(default_channel.owner, default_channel.owner_email) # Other Attributes p.generator = " " # Others for iTunes # p.complete = False # p.new_feed_url = 'http://example.com/new-feed.rss' # e1 = p.add_episode() # e1.id = 'http://lernfunk.de/_MEDIAID_123#1' # e1.title = 'First Element' # e1.summary = htmlencode('''Lorem ipsum dolor sit amet, consectetur adipiscing elit. Tamen # aberramus a proposito, et, ne longius, prorsus, inquam, Piso, si ista # mala sunt, placet. Aut etiam, ut vestitum, sic sententiam habeas aliam # domesticam, aliam forensem, ut in fronte ostentatio sit, intus veritas # occultetur? Cum id fugiunt, re eadem defendunt, quae Peripatetici, # verba <3.''') # e1.link = 'http://example.com' # e1.authors = [Person('Lars Kiesow', '*****@*****.**')] # e1.publication_date = datetime.datetime(2014, 5, 17, 13, 37, 10, tzinfo=pytz.utc) # # e1.media = Media("http://example.com/episodes/loremipsum.mp3", 454599964, # # duration= # # datetime.timedelta(hours=1, minutes=32, seconds=19)) # e1.media = Media("http://example.com/episodes/loremipsum.mp3", 454599964) # Add some episodes p.episodes += [ Episode(title = download.title, subtitle = download.subtitle, # id=str(uuid.uuid4()), position =2, media = Media(download.media_url, size=download.media_size, duration=timedelta(seconds=download.media_duration)), image = download.image_url, publication_date = datetime(year=2021, month=1, day=8, hour=10, minute=0, tzinfo=pytz.utc), summary = download.summary) , Episode(title="Episode 2 - The Crazy Ones", subtitle="this is a cool episode, this is for th crazy ones", position=1, image="https://github.com/oliverbarreto/PersonalPodcast/raw/main/site-logo-1400x1400.png", media=Media("https://github.com/oliverbarreto/PersonalPodcast/raw/main/downloaded_with_pytube_Apple%20Steve%20Jobs%20Heres%20To%20The%20Crazy%20Ones.mp4", type="audio/mpeg", size=989, duration=timedelta(hours=0, minutes=1, seconds=1)), publication_date = datetime(year=2021, month=1, day=6, hour=10, minute=0, tzinfo=pytz.utc), summary=htmlencode("wow wow wow summary")) , Episode(title="Episode 3 - The Super Crazy", subtitle="crazy ones revisited", position=0, image="https://github.com/oliverbarreto/PersonalPodcast/raw/main/site-logo-1400x1400.png", media=Media("https://drive.google.com/file/d/1X5Mwa8V0Su1IDqhcQL7LdzEY0VaMC1Nn", type="audio/mpeg", size=989, duration=timedelta(hours=0, minutes=1, seconds=1)), publication_date = datetime(year=2021, month=1, day=10, hour=10, minute=0, tzinfo=pytz.utc), summary=download.summary) ] # Should we just print out, or write to file? if type == 'print': # Print print_enc(p.rss_str()) elif type== 'feed.xml': # Write to file p.rss_file(type, minimize=False) print("\n") print("feed.xml created !!!")
class Ximalaya(): def __init__(self, album_id): self.podcast = None self.album_id = album_id self.page_size = 30 self.album_info_url = "https://www.ximalaya.com/revision/album?albumId={}" self.album_list_url = "https://www.ximalaya.com/revision/play/album?albumId={}&pageNum={}&pageSize={}" self.detail_url = "https://mobile.ximalaya.com/v1/track/baseInfo?device=android&trackId={}" self.album_url = "https://www.ximalaya.com/album/{}" self.time_api = 'https://www.ximalaya.com/revision/time' self.s = requests.session() self.header = { 'Accept': 'application/json, text/javascript, */*; q=0.01', 'User-Agent': 'User-Agent:Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36', 'Content-Type': 'application/json;charset=UTF-8', 'Referer': self.album_url.format(self.album_id), 'Accept-Encoding': "gzip, deflate", 'Connection': "keep-alive", 'cache-control': "no-cache", } def album(self): self.get_sign() album_info = self.s.get(self.album_info_url.format(self.album_id), headers=self.header).content album_info_content = json.loads(album_info.decode('utf-8')) if album_info_content['ret'] == 200: album_info_data = album_info_content['data'] # 初始化 self.podcast = Podcast() self.podcast.name = album_info_data['mainInfo']['albumTitle'] self.podcast.authors.append( Person("Powered by forecho", '*****@*****.**')) self.podcast.website = self.album_url.format(self.album_id) self.podcast.copyright = 'cc-by' if album_info_data['mainInfo']['richIntro']: self.podcast.description = album_info_data['mainInfo'][ 'richIntro'] else: self.podcast.description = self.podcast.name self.podcast.language = 'cn' self.podcast.image = 'https:' + album_info_data['mainInfo'][ 'cover'].split('!')[0] self.podcast.feed_url = 'http://podcast.forecho.com/ximalaya/%s.rss' % self.album_id self.podcast.category = Category('Technology', 'Podcasting') self.podcast.explicit = False self.podcast.complete = False self.podcast.owner = Person("forecho", '*****@*****.**') page_num = 1 # py2 +1 track_total_count = math.ceil( album_info_data['tracksInfo']['trackTotalCount'] / self.page_size) + 1 while page_num <= track_total_count: self.header["Host"] = "www.ximalaya.com" album_list = self.s.get(self.album_list_url.format( self.album_id, page_num, self.page_size), headers=self.header).content album_list_content = json.loads(album_list.decode('utf-8')) count = len(album_list_content['data']['tracksAudioPlay']) for each in album_list_content['data']['tracksAudioPlay']: try: self.header["Host"] = "mobile.ximalaya.com" detail = requests.get(self.detail_url.format( each['trackId']), headers=self.header).content detail_content = json.loads(detail.decode('utf-8')) episode = self.podcast.add_episode() episode.id = str(each['index']) episode.title = each['trackName'] print(self.podcast.name + '=====' + each['trackName']) image = each['trackCoverPath'].split('!')[0] if image[-4:] == '.png' or image[-4:] == '.jpg': episode.image = 'https:' + image else: episode.image = self.podcast.image if 'intro' in detail_content: episode.summary = detail_content['intro'].replace( '\r\n', '') else: episode.summary = each['trackName'] episode.link = 'http://www.ximalaya.com%s' % each[ 'albumUrl'] episode.authors = [ Person("forecho", '*****@*****.**') ] episode.publication_date = self.reduction_time( detail_content['createdAt']) episode.media = Media(each['src'], each['duration']) episode.position = count - each['index'] + 1 except Exception as e: print('异常:', e) print('异常 URL:', 'https://www.ximalaya.com%s' % each['trackUrl']) traceback.print_exc() # 生成文件 # print self.podcast.rss_str() page_num = page_num + 1 self.podcast.rss_file('ximalaya/%s.rss' % self.album_id, minimize=True) def get_time(self): """ 获取服务器时间戳 :return: """ r = self.s.get(self.time_api, headers=self.header) return r.text def get_sign(self): """ 获取sign: md5(ximalaya-服务器时间戳)(100以内随机数)服务器时间戳(100以内随机数)现在时间戳 :return: xm_sign """ now_time = str(round(time.time() * 1000)) server_time = self.get_time() sign = str( hashlib.md5("himalaya-{}".format(server_time).encode()).hexdigest( )) + "({})".format(str(round( random.random() * 100))) + server_time + "({})".format( str(round(random.random() * 100))) + now_time self.header["xm-sign"] = sign # print(sign) # return sign # 时间转换 参数 毫秒时间戳 @staticmethod def reduction_time(time): timestamp = datetime.fromtimestamp(time / 1000) return datetime(timestamp.year, timestamp.month, timestamp.day, timestamp.hour, timestamp.minute, tzinfo=pytz.utc)
p = Podcast( name="Chapo Cheaters Club", description="Just taking" "what isn't mine", website=host_address, explicit=False, ) p.image = host_address + "logo.jpg" for pod in find_pods(): audio = MP3(os.path.join(base_dir, pod_dir, pod), ID3=EasyID3) if audio: title = str(*audio["title"]) else: title = pod[:~3] size = os.path.getsize(os.path.join(base_dir, pod_dir, pod)) duration = timedelta(seconds=audio.info.length) print(f'{pod}, {title}, {size}, {duration}') p.episodes += [ Episode(title=title, media=Media(f"{host_address}{pod}", size=size, duration=duration), summary="summary goes here") ] rss = p.rss_str() p.rss_file(os.path.join(base_dir, pod_dir, 'rss.xml'))
class Ximalaya(): def __init__(self, album_id): self.podcast = None self.album_id = album_id self.album_list_api = "http://www.ximalaya.com/revision/play/album?albumId={}&pageNum=1&sort=1&pageSize=999".format(album_id) self.album_url = 'http://www.ximalaya.com/album/%s' % album_id self.header = { 'Accept': 'application/json, text/javascript, */*; q=0.01', 'X-Requested-With': 'XMLHttpRequest', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36', 'Content-Type': 'application/x-www-form-urlencoded', 'Referer': self.album_url, 'Cookie': '_ga=GA1.2.1628478964.1476015684; _gat=1', } def album(self): page = requests.get(self.album_url, headers=self.header) soup = BeautifulSoup(page.content, "lxml") # 初始化 self.podcast = Podcast() self.podcast.name = soup.find('h1', 'title').get_text() self.podcast.authors.append(Person("Powered by forecho", '*****@*****.**')) self.podcast.website = self.album_url self.podcast.copyright = 'cc-by' if soup.find('div', 'album-intro') and soup.find('div', 'album-intro').get_text(): self.podcast.description = soup.find('div', 'album-intro').get_text() else: self.podcast.description = self.podcast.name self.podcast.language = 'cn' self.podcast.image = soup.find('div', 'album-info').find('img').get('src').split('!')[0] self.podcast.feed_url = 'http://podcast.forecho.com/ximalaya/%s.rss' % self.album_id self.podcast.category = Category('Technology', 'Podcasting') self.podcast.explicit = False self.podcast.complete = False self.podcast.owner = Person("forecho", '*****@*****.**') album_list_content = requests.get(self.album_list_api, headers=self.header).content album_list_data = json.loads(album_list_content.decode('utf-8')) count = len(album_list_data['data']['tracksAudioPlay']) for each in album_list_data['data']['tracksAudioPlay']: try: detail_url = 'http://www.ximalaya.com/tracks/%s.json' % each['trackId'] response = requests.get(detail_url, headers=self.header) item = json.loads(response.content) episode = self.podcast.add_episode() episode.id = str(each['index']) episode.title = each['trackName'] print(self.podcast.name + '=====' + each['trackName']) image = each['trackCoverPath'].split('!')[0] if (image[-4:] == '.gif' or image[-4:] == '.bmp'): episode.image = self.podcast.image else: episode.image = image if item['intro']: episode.summary = item['intro'].replace('\r\n', '') else: episode.summary = each['trackName'] episode.link = 'http://www.ximalaya.com%s' % each['albumUrl'] episode.authors = [Person("forecho", '*****@*****.**')] episode.publication_date = self.reduction_time(item['time_until_now'], item['formatted_created_at']) episode.media = Media(each['src'], each['duration']) episode.position = count - each['index'] + 1 except Exception as e: print('异常:', e) print('异常 URL:', 'http://www.ximalaya.com%s' % each['trackUrl']) traceback.print_exc() # 生成文件 # print self.podcast.rss_str() self.podcast.rss_file('ximalaya/%s.rss' % self.album_id, minimize=True) # 时间转换 第一个参数是 "3年前", "12月11日 17:00" @staticmethod def reduction_time(time_until_now, created_at): date = datetime.strptime(created_at, "%m月%d日 %H:%M") reduction_year = datetime.now().year if '年前' in time_until_now: year = int(time_until_now.split('年前')[0]) reduction = (datetime.now(tzlocal()) - relativedelta(years=year)) if humanize_time(reduction) != ('%s years' % year): reduction_year = (datetime.now(tzlocal()) - relativedelta(years=year + 1)).year else: reduction_year = reduction.year elif '月前' in time_until_now: month = int(time_until_now.split('月前')[0]) reduction_year = (datetime.now(tzlocal()) - relativedelta(months=month)).year elif '天前' in time_until_now: day = int(time_until_now.split('天前')[0]) reduction_year = (datetime.now(tzlocal()) - relativedelta(days=day)).year return datetime(reduction_year, date.month, date.day, date.hour, date.second, tzinfo=pytz.utc)
def generate(name, description, website, explicit, image, author_name, author_email, feed_path, copyright, language, category, blog, blog_path, verbose, folder): """Generate a podcast from mp3 files located in the provided FOLDER""" if verbose: logging.basicConfig(level=logging.DEBUG) else: logging.basicConfig(level=logging.INFO) attrs = locals() logging.debug('Processing input: %s' % (attrs)) del attrs['folder'] del attrs['author_name'] del attrs['author_email'] del attrs['verbose'] del attrs['feed_path'] del attrs['blog'] del attrs['blog_path'] attrs['authors'] = [Person(author_name, author_email)] attrs['owner'] = attrs['authors'][0] attrs['category'] = Category(category) feed_name = name.lower().replace(' ', '_') + '.rss' feed_base = '%s/%s' % (website, feed_path) feed_url = '%s/%s' % (feed_base, feed_name) attrs['feed_url'] = feed_url logging.info('Creating podcast %s, feed %s' % (name, feed_url)) p = Podcast(**attrs) for fpath in sorted(glob.glob('%s*.mp3' % (folder))): logging.info('Adding episode %s' % (fpath)) fname = os.path.basename(fpath) size = os.path.getsize(fpath) logging.debug('Filename: %s, size %i' % (fname, size)) try: tag = ID3(fpath) except ID3NoHeaderError: logging.error('%s is not a valid mp3 file, ignoring it' % (fpath)) continue logging.debug('Read tag: %s' % (tag)) e = Episode() if 'TPE1' in tag: e.authors = [Person(tag['TPE1'][0])] else: e.authors = attrs['authors'] e.title = tag['TIT2'][0] e.subtitle = e.title if 'COMM::eng' in tag: e.summary = tag['COMM::eng'][0] else: e.summary = description episode_url = '%s/%s' % (feed_base, fname) logging.debug('Episode url: %s' % (episode_url)) e.media = Media(episode_url, size, type='audio/mpeg') e.media.populate_duration_from(fpath) pubdate = datetime.strptime(tag['TDRC'][0].text[:10], '%Y-%m-%d') pubdate = pubdate.replace(tzinfo=pytz.utc) e.publication_date = pubdate if blog: blog_post = '' short_name = re.search('[a-z]*_-_([a-z_]*[#0-9]*)', fname) if short_name: blog_post = short_name.group(1).replace('_', '-').\ replace('#', '') + '.html' e.link = '%s/%s/%s' % (website, blog_path, blog_post) p.episodes.append(e) feed_local_path = '%s%s' % (folder, feed_name) logging.info('Generating feed in %s' % (feed_local_path)) p.rss_file(feed_local_path, minimize=False)
class Ximalaya(): def __init__(self, album_id): self.podcast = None self.album_id = album_id self.page_size = 30 self.album_info_url = "https://www.ximalaya.com/revision/album?albumId={}" self.album_list_url = "https://www.ximalaya.com/revision/play/album?albumId={}&pageNum={}&pageSize={}" self.detail_url = "https://mobile.ximalaya.com/v1/track/baseInfo?device=android&trackId={}" self.album_url = "https://www.ximalaya.com/album/{}" self.header = { 'Accept': 'application/json, text/javascript, */*; q=0.01', 'X-Requested-With': 'XMLHttpRequest', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36', 'Content-Type': 'application/x-www-form-urlencoded', 'Referer': self.album_url.format(self.album_id), 'Cookie': '_ga=GA1.2.1628478964.1476015684; _gat=1', } def album(self): album_info = requests.get(self.album_info_url.format(self.album_id), headers=self.header).content album_info_content = json.loads(album_info.decode('utf-8')) if album_info_content['ret'] == 200: album_info_data = album_info_content['data'] # 初始化 self.podcast = Podcast() self.podcast.name = album_info_data['mainInfo']['albumTitle'] self.podcast.authors.append( Person("Powered by forecho", '*****@*****.**')) self.podcast.website = self.album_url.format(self.album_id) self.podcast.copyright = 'cc-by' if album_info_data['mainInfo']['richIntro']: self.podcast.description = album_info_data['mainInfo'][ 'richIntro'] else: self.podcast.description = self.podcast.name self.podcast.language = 'cn' self.podcast.image = 'https:' + album_info_data['mainInfo'][ 'cover'].split('!')[0] self.podcast.feed_url = 'http://podcast.forecho.com/ximalaya/%s.rss' % self.album_id self.podcast.category = Category('Technology', 'Podcasting') self.podcast.explicit = False self.podcast.complete = False self.podcast.owner = Person("forecho", '*****@*****.**') page_num = 1 # py2 +1 track_total_count = math.ceil( album_info_data['tracksInfo']['trackTotalCount'] / self.page_size) + 1 while page_num <= track_total_count: album_list = requests.get(self.album_list_url.format( self.album_id, page_num, self.page_size), headers=self.header).content album_list_content = json.loads(album_list.decode('utf-8')) count = len(album_list_content['data']['tracksAudioPlay']) for each in album_list_content['data']['tracksAudioPlay']: try: detail = requests.get(self.detail_url.format( each['trackId']), headers=self.header).content detail_content = json.loads(detail.decode('utf-8')) episode = self.podcast.add_episode() episode.id = str(each['index']) episode.title = each['trackName'] print(self.podcast.name + '=====' + each['trackName']) image = each['trackCoverPath'].split('!')[0] if image[-4:] == '.png' or image[-4:] == '.jpg': episode.image = 'https:' + image else: episode.image = self.podcast.image if 'intro' in detail_content: episode.summary = detail_content['intro'].replace( '\r\n', '') else: episode.summary = each['trackName'] episode.link = 'http://www.ximalaya.com%s' % each[ 'albumUrl'] episode.authors = [ Person("forecho", '*****@*****.**') ] episode.publication_date = self.reduction_time( detail_content['createdAt']) episode.media = Media(each['src'], each['duration']) episode.position = count - each['index'] + 1 except Exception as e: print('异常:', e) print('异常 URL:', 'https://www.ximalaya.com%s' % each['trackUrl']) traceback.print_exc() # 生成文件 # print self.podcast.rss_str() page_num = page_num + 1 self.podcast.rss_file('ximalaya/%s.rss' % self.album_id, minimize=True) # 时间转换 参数 毫秒时间戳 @staticmethod def reduction_time(time): timestamp = datetime.fromtimestamp(time / 1000) return datetime(timestamp.year, timestamp.month, timestamp.day, timestamp.hour, timestamp.minute, tzinfo=pytz.utc)
11932295), summary="With an English name adapted directly from Afrikaans " '-- literally meaning "earth pig" -- this fascinating ' "animal has both circular teeth and a knack for " "digging.", ), Episode( title="ya, das ist die heiße scheiße", media=Media( "https://github.com/ssk8/365days_of_plops/raw/main/poop02.mp3", 15363464), summary="Colon evacuation " "hurt my anus ", ), Episode( title="Spicy Shit", media=Media( "https://github.com/ssk8/365days_of_plops/raw/main/poop03.mp3", 15363464), summary="burning shit " "really hurt my anus " "Case in point: we have found clothing made from " "alpaca fiber that is 2000 years old. How is this " "possible, and what makes it different from llamas?", ) ] # Generate the RSS feed rss = p.rss_str() p.rss_file('rss.xml', minimize=True)