def main(): with open('thebugle.json') as f: episodes = json.load(f) p = Podcast( name="TimesOnLine Bugle Archive", description="Old Bugle episodes, podcast feed", website="https://www.thebuglepodcast.com/", explicit=False, ) for episode in episodes: ep = p.add_episode( Episode(title=f"{episode['id']}: {episode['title']}")) ep.media = Media.create_from_server_response( f"{MEDIA_BASE_URL}/{episode['file']}") ep.media.fetch_duration() date = episode['date'].split('-') ep.publication_date = datetime(int(date[0]), int(date[1]), int(date[2]), 0, 0, 0, tzinfo=pytz.utc) print(p.rss_str())
def main(): """Create an example podcast and print it or save it to a file.""" # There must be exactly one argument, and it is must end with rss if len(sys.argv) != 2 or not ( sys.argv[1].endswith('rss')): # Invalid usage, print help message # print_enc is just a custom function which functions like print, # except it deals with byte arrays properly. print_enc ('Usage: %s ( <file>.rss | rss )' % \ 'python -m podgen') print_enc ('') print_enc (' rss -- Generate RSS test output and print it to stdout.') print_enc (' <file>.rss -- Generate RSS test teed and write it to file.rss.') print_enc ('') exit() # Remember what type of feed the user wants arg = sys.argv[1] from podgen import Podcast, Person, Media, Category, htmlencode # Initialize the feed p = Podcast() p.name = 'Testfeed' p.authors.append(Person("Lars Kiesow", "*****@*****.**")) p.website = 'http://example.com' p.copyright = 'cc-by' p.description = 'This is a cool feed!' p.language = 'de' p.feed_url = 'http://example.com/feeds/myfeed.rss' p.category = Category('Technology', 'Podcasting') p.explicit = False p.complete = False p.new_feed_url = 'http://example.com/new-feed.rss' p.owner = Person('John Doe', '*****@*****.**') p.xslt = "http://example.com/stylesheet.xsl" e1 = p.add_episode() e1.id = 'http://lernfunk.de/_MEDIAID_123#1' e1.title = 'First Element' e1.summary = htmlencode('''Lorem ipsum dolor sit amet, consectetur adipiscing elit. Tamen aberramus a proposito, et, ne longius, prorsus, inquam, Piso, si ista mala sunt, placet. Aut etiam, ut vestitum, sic sententiam habeas aliam domesticam, aliam forensem, ut in fronte ostentatio sit, intus veritas occultetur? Cum id fugiunt, re eadem defendunt, quae Peripatetici, verba <3.''') e1.link = 'http://example.com' e1.authors = [Person('Lars Kiesow', '*****@*****.**')] e1.publication_date = datetime.datetime(2014, 5, 17, 13, 37, 10, tzinfo=pytz.utc) e1.media = Media("http://example.com/episodes/loremipsum.mp3", 454599964, duration= datetime.timedelta(hours=1, minutes=32, seconds=19)) # Should we just print out, or write to file? if arg == 'rss': # Print print_enc(p.rss_str()) elif arg.endswith('rss'): # Write to file p.rss_file(arg, minimize=True)
def test_removeEntryByIndex(self): fg = Podcast() self.feedId = 'http://example.com' self.title = 'Some Testfeed' fe = fg.add_episode() fe.id = 'http://lernfunk.de/media/654321/1' fe.title = 'The Third BaseEpisode' assert len(fg.episodes) == 1 fg.episodes.pop(0) assert len(fg.episodes) == 0
def setUp(self): self.itunes_ns = 'http://www.itunes.com/dtds/podcast-1.0.dtd' self.dublin_ns = 'http://purl.org/dc/elements/1.1/' fg = Podcast() self.title = 'Some Testfeed' self.link = 'http://lernfunk.de' self.description = 'A cool tent' self.explicit = False fg.name = self.title fg.website = self.link fg.description = self.description fg.explicit = self.explicit fe = fg.add_episode() fe.id = 'http://lernfunk.de/media/654321/1' fe.title = 'The First Episode' self.fe = fe #Use also the list directly fe = Episode() fg.episodes.append(fe) fe.id = 'http://lernfunk.de/media/654321/1' fe.title = 'The Second Episode' fe = fg.add_episode() fe.id = 'http://lernfunk.de/media/654321/1' fe.title = 'The Third Episode' self.fg = fg warnings.simplefilter("always") def noop(*args, **kwargs): pass warnings.showwarning = noop
class Qingting(object): def __init__(self, album_id): self.podcast = None self.album_id = album_id self.url = 'http://www.qingting.fm/channels/{}'.format(album_id) self.album_list_api = "http://api2.qingting.fm/v6/media/channelondemands/{}/programs/order/0/curpage/1/pagesize/100".format( album_id) self.album_info_api = "http://api2.qingting.fm/v6/media/channelondemands/{}".format(album_id) def album(self): album_info_content = requests.get(self.album_info_api).content album_info_data = json.loads(album_info_content) album_list_content = requests.get(self.album_list_api).content album_list_data = json.loads(album_list_content) self.podcast = Podcast() self.podcast.name = album_info_data['data']['title'] self.podcast.authors.append(Person("Powered by maijver", '*****@*****.**')) self.podcast.website = self.url self.podcast.copyright = 'cc-by' self.podcast.description = album_info_data['data']['description'] self.podcast.language = 'cn' self.podcast.image = album_info_data['data']['thumbs']['small_thumb'].replace('!200', '') self.podcast.feed_url = 'http://podcast.forecho.com/qingting/%s.rss' % self.album_id self.podcast.category = Category('Technology', 'Podcasting') self.podcast.explicit = False self.podcast.complete = False self.podcast.owner = Person("maijver", '*****@*****.**') for each in album_list_data['data']: episode = self.podcast.add_episode() episode.id = str(each['id']) episode.title = each['title'] print(episode.title) episode.image = album_info_data['data']['thumbs']['small_thumb'].replace('!200', '') episode.summary = each['title'] episode.link = 'http://www.qingting.fm/channels/{}/programs/{}'.format(self.album_id, each['id']) episode.authors = [Person("forecho", '*****@*****.**')] episode.publication_date = self.reduction_time(each['update_time']) episode.media = Media("http://od.qingting.fm/{}".format(each['mediainfo']['bitrates_url'][0]['file_path']), each['duration']) self.podcast.rss_file('qingting/{}.rss'.format(self.album_id), minimize=True) @staticmethod def reduction_time(created_date): timestamp = datetime.strptime(created_date, "%Y-%m-%d %H:%M:%S") return datetime(timestamp.year, timestamp.month, timestamp.day, timestamp.hour, timestamp.minute, tzinfo=pytz.utc)
def generate_rss_from_articles(feed_settings, articles): """ Creates a FeedGenerator feed from a set of feed_entries. :param feed_settings: a feed_settings object containing :param articles: :return: """ # Initialize the feed podcast = Podcast() podcast.name = feed_settings.title author = Person(feed_settings.author['name'], feed_settings.author['email']) podcast.authors.append(author) podcast.website = feed_settings.source_page_url podcast.copyright = feed_settings.copyright podcast.description = feed_settings.subtitle podcast.summary = feed_settings.subtitle podcast.subtitle = feed_settings.subtitle podcast.language = 'vi' podcast.feed_url = feed_settings.output_url podcast.image = feed_settings.img_url podcast.category = Category('Music', 'Music Commentary') podcast.explicit = False # p.complete = False # p.new_feed_url = 'http://example.com/new-feed.rss' podcast.owner = author # p.xslt = "http://example.com/stylesheet.xsl" vt_tz = pytz.timezone('Asia/Ho_Chi_Minh') pastdate = datetime.datetime(2000, 1, 1, 0, 0).astimezone(vt_tz) # podcast.last_updated = datetime.datetime.now(vt_tz) for article in articles: episode = podcast.add_episode() episode.id = article.link episode.title = article.title episode.summary = article.description episode.link = article.link # episode.authors = [Person('Lars Kiesow', '*****@*****.**')] episode.publication_date = article.pub_date pastdate = max(pastdate, article.pub_date) # episode.media = Media.create_from_server_response(article.media, size=None, duration=None) episode.media = Media(article.media, size=None, duration=None, type=article.type) podcast.last_updated = pastdate podcast.publication_date = pastdate return podcast
class Ximalaya(): def __init__(self, album_id): self.podcast = None self.album_id = album_id self.album_list_api = "http://www.ximalaya.com/revision/play/album?albumId={}&pageNum=1&sort=1&pageSize=999".format(album_id) self.album_url = 'http://www.ximalaya.com/album/%s' % album_id self.header = { 'Accept': 'application/json, text/javascript, */*; q=0.01', 'X-Requested-With': 'XMLHttpRequest', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36', 'Content-Type': 'application/x-www-form-urlencoded', 'Referer': self.album_url, 'Cookie': '_ga=GA1.2.1628478964.1476015684; _gat=1', } def album(self): page = requests.get(self.album_url, headers=self.header) soup = BeautifulSoup(page.content, "lxml") # 初始化 self.podcast = Podcast() self.podcast.name = soup.find('h1', 'title').get_text() self.podcast.authors.append(Person("Powered by forecho", '*****@*****.**')) self.podcast.website = self.album_url self.podcast.copyright = 'cc-by' if soup.find('div', 'album-intro') and soup.find('div', 'album-intro').get_text(): self.podcast.description = soup.find('div', 'album-intro').get_text() else: self.podcast.description = self.podcast.name self.podcast.language = 'cn' self.podcast.image = soup.find('div', 'album-info').find('img').get('src').split('!')[0] self.podcast.feed_url = 'http://podcast.forecho.com/ximalaya/%s.rss' % self.album_id self.podcast.category = Category('Technology', 'Podcasting') self.podcast.explicit = False self.podcast.complete = False self.podcast.owner = Person("forecho", '*****@*****.**') album_list_content = requests.get(self.album_list_api, headers=self.header).content album_list_data = json.loads(album_list_content.decode('utf-8')) count = len(album_list_data['data']['tracksAudioPlay']) for each in album_list_data['data']['tracksAudioPlay']: try: detail_url = 'http://www.ximalaya.com/tracks/%s.json' % each['trackId'] response = requests.get(detail_url, headers=self.header) item = json.loads(response.content) episode = self.podcast.add_episode() episode.id = str(each['index']) episode.title = each['trackName'] print(self.podcast.name + '=====' + each['trackName']) image = each['trackCoverPath'].split('!')[0] if (image[-4:] == '.gif' or image[-4:] == '.bmp'): episode.image = self.podcast.image else: episode.image = image if item['intro']: episode.summary = item['intro'].replace('\r\n', '') else: episode.summary = each['trackName'] episode.link = 'http://www.ximalaya.com%s' % each['albumUrl'] episode.authors = [Person("forecho", '*****@*****.**')] episode.publication_date = self.reduction_time(item['time_until_now'], item['formatted_created_at']) episode.media = Media(each['src'], each['duration']) episode.position = count - each['index'] + 1 except Exception as e: print('异常:', e) print('异常 URL:', 'http://www.ximalaya.com%s' % each['trackUrl']) traceback.print_exc() # 生成文件 # print self.podcast.rss_str() self.podcast.rss_file('ximalaya/%s.rss' % self.album_id, minimize=True) # 时间转换 第一个参数是 "3年前", "12月11日 17:00" @staticmethod def reduction_time(time_until_now, created_at): date = datetime.strptime(created_at, "%m月%d日 %H:%M") reduction_year = datetime.now().year if '年前' in time_until_now: year = int(time_until_now.split('年前')[0]) reduction = (datetime.now(tzlocal()) - relativedelta(years=year)) if humanize_time(reduction) != ('%s years' % year): reduction_year = (datetime.now(tzlocal()) - relativedelta(years=year + 1)).year else: reduction_year = reduction.year elif '月前' in time_until_now: month = int(time_until_now.split('月前')[0]) reduction_year = (datetime.now(tzlocal()) - relativedelta(months=month)).year elif '天前' in time_until_now: day = int(time_until_now.split('天前')[0]) reduction_year = (datetime.now(tzlocal()) - relativedelta(days=day)).year return datetime(reduction_year, date.month, date.day, date.hour, date.second, tzinfo=pytz.utc)
class Ximalaya(): def __init__(self, album_id): self.podcast = None self.album_id = album_id self.page_size = 30 self.album_info_url = "https://www.ximalaya.com/revision/album?albumId={}" self.album_list_url = "https://www.ximalaya.com/revision/play/album?albumId={}&pageNum={}&pageSize={}" self.detail_url = "https://mobile.ximalaya.com/v1/track/baseInfo?device=android&trackId={}" self.album_url = "https://www.ximalaya.com/album/{}" self.time_api = 'https://www.ximalaya.com/revision/time' self.s = requests.session() self.header = { 'Accept': 'application/json, text/javascript, */*; q=0.01', 'User-Agent': 'User-Agent:Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36', 'Content-Type': 'application/json;charset=UTF-8', 'Referer': self.album_url.format(self.album_id), 'Accept-Encoding': "gzip, deflate", 'Connection': "keep-alive", 'cache-control': "no-cache", } def album(self): self.get_sign() album_info = self.s.get(self.album_info_url.format(self.album_id), headers=self.header).content album_info_content = json.loads(album_info.decode('utf-8')) if album_info_content['ret'] == 200: album_info_data = album_info_content['data'] # 初始化 self.podcast = Podcast() self.podcast.name = album_info_data['mainInfo']['albumTitle'] self.podcast.authors.append( Person("Powered by forecho", '*****@*****.**')) self.podcast.website = self.album_url.format(self.album_id) self.podcast.copyright = 'cc-by' if album_info_data['mainInfo']['richIntro']: self.podcast.description = album_info_data['mainInfo'][ 'richIntro'] else: self.podcast.description = self.podcast.name self.podcast.language = 'cn' self.podcast.image = 'https:' + album_info_data['mainInfo'][ 'cover'].split('!')[0] self.podcast.feed_url = 'http://podcast.forecho.com/ximalaya/%s.rss' % self.album_id self.podcast.category = Category('Technology', 'Podcasting') self.podcast.explicit = False self.podcast.complete = False self.podcast.owner = Person("forecho", '*****@*****.**') page_num = 1 # py2 +1 track_total_count = math.ceil( album_info_data['tracksInfo']['trackTotalCount'] / self.page_size) + 1 while page_num <= track_total_count: self.header["Host"] = "www.ximalaya.com" album_list = self.s.get(self.album_list_url.format( self.album_id, page_num, self.page_size), headers=self.header).content album_list_content = json.loads(album_list.decode('utf-8')) count = len(album_list_content['data']['tracksAudioPlay']) for each in album_list_content['data']['tracksAudioPlay']: try: self.header["Host"] = "mobile.ximalaya.com" detail = requests.get(self.detail_url.format( each['trackId']), headers=self.header).content detail_content = json.loads(detail.decode('utf-8')) episode = self.podcast.add_episode() episode.id = str(each['index']) episode.title = each['trackName'] print(self.podcast.name + '=====' + each['trackName']) image = each['trackCoverPath'].split('!')[0] if image[-4:] == '.png' or image[-4:] == '.jpg': episode.image = 'https:' + image else: episode.image = self.podcast.image if 'intro' in detail_content: episode.summary = detail_content['intro'].replace( '\r\n', '') else: episode.summary = each['trackName'] episode.link = 'http://www.ximalaya.com%s' % each[ 'albumUrl'] episode.authors = [ Person("forecho", '*****@*****.**') ] episode.publication_date = self.reduction_time( detail_content['createdAt']) episode.media = Media(each['src'], each['duration']) episode.position = count - each['index'] + 1 except Exception as e: print('异常:', e) print('异常 URL:', 'https://www.ximalaya.com%s' % each['trackUrl']) traceback.print_exc() # 生成文件 # print self.podcast.rss_str() page_num = page_num + 1 self.podcast.rss_file('ximalaya/%s.rss' % self.album_id, minimize=True) def get_time(self): """ 获取服务器时间戳 :return: """ r = self.s.get(self.time_api, headers=self.header) return r.text def get_sign(self): """ 获取sign: md5(ximalaya-服务器时间戳)(100以内随机数)服务器时间戳(100以内随机数)现在时间戳 :return: xm_sign """ now_time = str(round(time.time() * 1000)) server_time = self.get_time() sign = str( hashlib.md5("himalaya-{}".format(server_time).encode()).hexdigest( )) + "({})".format(str(round( random.random() * 100))) + server_time + "({})".format( str(round(random.random() * 100))) + now_time self.header["xm-sign"] = sign # print(sign) # return sign # 时间转换 参数 毫秒时间戳 @staticmethod def reduction_time(time): timestamp = datetime.fromtimestamp(time / 1000) return datetime(timestamp.year, timestamp.month, timestamp.day, timestamp.hour, timestamp.minute, tzinfo=pytz.utc)
class Ximalaya: def __init__(self, album_id): self.headers = tools.get_headers() self.podcast = None self.album_id = album_id self.episode_pre_page = 30 self.album_info_url = "https://www.ximalaya.com/revision/album?albumId={}" self.album_list_url = "https://www.ximalaya.com/revision/play/album?albumId={}&pageNum={}&pageSize={}" self.episode_detail_url = "https://mobile.ximalaya.com/v1/track/baseInfo?trackId={}" self.album_url = "https://www.ximalaya.com/album/{}" def get_podcast(self): webpage = tools.get_url(self.album_info_url.format(self.album_id), self.headers) album_info = json.loads(webpage.decode('utf-8')) if album_info['ret'] == 200: album_info_data = album_info['data'] self.podcast = Podcast() self.podcast.name = album_info_data['mainInfo']['albumTitle'] self.podcast.website = self.album_url.format(self.album_id) if album_info_data['mainInfo']['richIntro']: self.podcast.description = album_info_data['mainInfo']['richIntro'] self.podcast.language = 'cn' self.podcast.image = 'https:' + album_info_data['mainInfo']['cover'].split('!')[0] self.podcast.generator = 'kanemori.getpodcast' self.podcast.explicit = False self.podcast.withhold_from_itunes = True text = '' page_num = 1 album_page_count = math.ceil(album_info_data['tracksInfo']['trackTotalCount'] / self.episode_pre_page) + 1 while page_num <= album_page_count: webpage = tools.get_url(self.album_list_url.format(self.album_id, page_num, self.episode_pre_page), self.headers) album_list = json.loads(webpage.decode('utf-8')) for episode_info in album_list['data']['tracksAudioPlay']: _, link = self.get_episode(episode_info['trackId']) text += link page_num += 1 path = './podcast/ximalaya' if not os.path.exists(path): os.makedirs(path) self.podcast.rss_file(os.path.join(path, '{}.xml'.format(self.album_id)), minimize=True) # tools.save_m4a(os.path.join(path, '{}.txt'.format(self.album_id)), text) print("「{}」が上手に焼きました".format(self.album_id)) def get_episode(self, episode_id): trycount = 0 findepisode = False while not findepisode: if trycount > 0: print("再接続中" + str(trycount) + "......") if trycount > 1: print("error url: " + self.episode_detail_url.format(episode_id) + "\n") return False, "error url: " + self.episode_detail_url.format(episode_id) + "\n" webpage = tools.get_url(self.episode_detail_url.format(episode_id), self.headers) detail = json.loads(webpage.decode('utf-8')) episode = self.podcast.add_episode() episode.id = str('ximalaya_' + str(episode_id)) episode.title = detail['title'] # print(self.podcast.name + '=====' + episode.title) if 'intro' in detail: episode.summary = detail['intro'].replace('\r', '\\r').replace('\n', '\\n') episode.publication_date = tools.publication_time(detail['createdAt']) episode.media = Media(detail['playUrl32'], duration=timedelta(milliseconds=detail['duration'])) # episode.media = Media.create_from_server_response(detail['playUrl32'], # duration=timedelta(seconds=detail['duration'])) episode.position = 1 findepisode = True if not findepisode: trycount += 1 print("30秒後に再接続する.......") sleep(30) return True, detail['playUrl32'] + '\n'
def lambda_handler(event, context): print('Starting cccRssBuilder Lambda function') # Get episodes from DynamoDB episodes = query_episodes() episodes.sort(key=lambda x: x['episode-num']) # Create the podcast feed # Main podcast info comes from "episode 0" episodeInfo = episodes[0] separator = ', ' p = Podcast() p.name = episodeInfo['name'] p.description = episodeInfo['description'] p.website = episodeInfo['website'] p.explicit = episodeInfo['explicit'] p.image = episodeInfo['image'] p.feed_url = episodeInfo['feed-url'] p.language = episodeInfo['language'] p.category = Category(episodeInfo['category'], episodeInfo['subcategory']) p.owner = Person(episodeInfo['owner-name'], episodeInfo['owner-email']) p.authors = [Person(episodeInfo['owner-name'], episodeInfo['owner-email'])] # Process each episode for episode in episodes: # Skip "Episode 0" if episode['episode-num'] == 0: continue # Check if episode contains media file info (name, duration, size). If not, add it to db and episode object. if 'media-file' not in episode: episodeNum = episode['episode-num'] print('Analyzing media file for episode', episodeNum) mediaFile = 'ccc-{:03d}-{}.mp3'.format(int(episodeNum), episode['pub-date']) print('Media file:', mediaFile) localMediaFile = '/tmp/' + mediaFile s3 = boto3.client('s3') s3.download_file('kwksolutions.com', 'ccc/media/' + mediaFile, localMediaFile) # Try to analyze the mp3 file - looking for duration and file size try: audio = MP3(localMediaFile) except: print('Not an MP3 file!') return duration = round(audio.info.length) hours = int(duration / 3600) minutes = int((duration % 3600) / 60) seconds = duration % 60 if hours == 0: durationStr = '{:02d}:{:02d}'.format(minutes, seconds) else: durationStr = '{:02d}:{:02d}:{:02d}'.format( hours, minutes, seconds) size = str(os.path.getsize(localMediaFile)) update_episode(episodeNum, mediaFile, size, durationStr) episode['media-file'] = mediaFile episode['size'] = size episode['duration'] = durationStr # Figure out all the info needed for the episode object mediaURL = 'https://www.kwksolutions.com/ccc/media/' + episode[ 'media-file'] durationList = episode['duration'].split(':') secs = int(durationList[-1]) mins = int(durationList[-2]) try: h = int(durationList[-3]) except: h = 0 pubdateList = episode['pub-date'].split('-') year = int(pubdateList[0]) month = int(pubdateList[1]) day = int(pubdateList[2]) # Build the episode object e = p.add_episode() e.id = mediaURL e.title = 'Episode ' + str(episode['episode-num']) e.summary = episode['description'] e.link = 'http://christcommunitycarmel.org/get-involved/podcasts' e.publication_date = datetime.datetime(year, month, day, 12, 00, 00, tzinfo=pytz.timezone('EST')) e.media = Media(mediaURL, episode['size'], duration=datetime.timedelta(hours=h, minutes=mins, seconds=secs)) # Write the rss file print('Writing RSS file to S3') rssLocalFile = '/tmp/podcast.rss' rssS3File = 'ccc/podcast.rss' p.rss_file(rssLocalFile) s3 = boto3.client('s3') s3.upload_file(rssLocalFile, 'kwksolutions.com', rssS3File, ExtraArgs={'ContentType': 'text/xml'}) return
'filename': file_local_path, 'name': video.title }) except HTTPError as err: print('Can not parse this video bacause of HTTPError.') # Create Podcast object and fill it with episodes podcast_object = Podcast( name=CONFIG['podcast_name'], description=CONFIG['podcast_description'], website=CONFIG['podcast_website'], explicit=False, image=CONFIG['podcast_image'], language=CONFIG['podcast_language'], authors=[Person(CONFIG['podcast_author'], CONFIG['podcast_author_email'])], owner=Person(CONFIG['podcast_owner'], CONFIG['podcast_owner_email']), category=Category(CONFIG['podcast_category'], CONFIG['podcast_subcategory'])) for item in db: web_media_path = "%s/podcast_%s.mp4" % (CONFIG['podcast_media_server'], item['link'][9:]) podcast_object.add_episode( Episode(title=item['name'], media=Media(web_media_path, os.stat(item['filename']).st_size, type="audio/mpeg"))) # Generating RSS podcast_object.rss_file(('%s/rss.xml' % CONFIG['data_path']), minimize=True)
# Save the meeting details. with open(f"{output_prefix}.yaml", "w") as fp: fp.write(yaml.dump(meeting)) # Update podcast. podcast = Podcast( name="Zoomerang", description="Telecons you missed while you were sleeping.", website=config["zoomerang_remote_addr"], explicit=False) meeting_paths = glob(f"{recordings_dir_path}*.yaml") for meeting_path in meeting_paths: with open(meeting_path, "r") as fp: meeting = yaml.load(fp) podcast.add_episode( Episode(title=meeting["summary"], media=Media(meeting["url"], size=meeting["estimated_file_size"], type="audio/mpeg", duration=datetime.timedelta( seconds=meeting["duration"])), publication_date=dateutil.parser.parse( meeting["created_datetime"]))) with open(config["zoomerang_podcast_path"], "w") as fp: fp.write(f"{podcast}") print("Updated podcast.")
class Ximalaya(): def __init__(self, album_id): self.podcast = None self.album_id = album_id self.page_size = 30 self.album_info_url = "https://www.ximalaya.com/revision/album?albumId={}" self.album_list_url = "https://www.ximalaya.com/revision/play/album?albumId={}&pageNum={}&pageSize={}" self.detail_url = "https://mobile.ximalaya.com/v1/track/baseInfo?device=android&trackId={}" self.album_url = "https://www.ximalaya.com/album/{}" self.header = { 'Accept': 'application/json, text/javascript, */*; q=0.01', 'X-Requested-With': 'XMLHttpRequest', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36', 'Content-Type': 'application/x-www-form-urlencoded', 'Referer': self.album_url.format(self.album_id), 'Cookie': '_ga=GA1.2.1628478964.1476015684; _gat=1', } def album(self): album_info = requests.get(self.album_info_url.format(self.album_id), headers=self.header).content album_info_content = json.loads(album_info.decode('utf-8')) if album_info_content['ret'] == 200: album_info_data = album_info_content['data'] # 初始化 self.podcast = Podcast() self.podcast.name = album_info_data['mainInfo']['albumTitle'] self.podcast.authors.append( Person("Powered by forecho", '*****@*****.**')) self.podcast.website = self.album_url.format(self.album_id) self.podcast.copyright = 'cc-by' if album_info_data['mainInfo']['richIntro']: self.podcast.description = album_info_data['mainInfo'][ 'richIntro'] else: self.podcast.description = self.podcast.name self.podcast.language = 'cn' self.podcast.image = 'https:' + album_info_data['mainInfo'][ 'cover'].split('!')[0] self.podcast.feed_url = 'http://podcast.forecho.com/ximalaya/%s.rss' % self.album_id self.podcast.category = Category('Technology', 'Podcasting') self.podcast.explicit = False self.podcast.complete = False self.podcast.owner = Person("forecho", '*****@*****.**') page_num = 1 # py2 +1 track_total_count = math.ceil( album_info_data['tracksInfo']['trackTotalCount'] / self.page_size) + 1 while page_num <= track_total_count: album_list = requests.get(self.album_list_url.format( self.album_id, page_num, self.page_size), headers=self.header).content album_list_content = json.loads(album_list.decode('utf-8')) count = len(album_list_content['data']['tracksAudioPlay']) for each in album_list_content['data']['tracksAudioPlay']: try: detail = requests.get(self.detail_url.format( each['trackId']), headers=self.header).content detail_content = json.loads(detail.decode('utf-8')) episode = self.podcast.add_episode() episode.id = str(each['index']) episode.title = each['trackName'] print(self.podcast.name + '=====' + each['trackName']) image = each['trackCoverPath'].split('!')[0] if image[-4:] == '.png' or image[-4:] == '.jpg': episode.image = 'https:' + image else: episode.image = self.podcast.image if 'intro' in detail_content: episode.summary = detail_content['intro'].replace( '\r\n', '') else: episode.summary = each['trackName'] episode.link = 'http://www.ximalaya.com%s' % each[ 'albumUrl'] episode.authors = [ Person("forecho", '*****@*****.**') ] episode.publication_date = self.reduction_time( detail_content['createdAt']) episode.media = Media(each['src'], each['duration']) episode.position = count - each['index'] + 1 except Exception as e: print('异常:', e) print('异常 URL:', 'https://www.ximalaya.com%s' % each['trackUrl']) traceback.print_exc() # 生成文件 # print self.podcast.rss_str() page_num = page_num + 1 self.podcast.rss_file('ximalaya/%s.rss' % self.album_id, minimize=True) # 时间转换 参数 毫秒时间戳 @staticmethod def reduction_time(time): timestamp = datetime.fromtimestamp(time / 1000) return datetime(timestamp.year, timestamp.month, timestamp.day, timestamp.hour, timestamp.minute, tzinfo=pytz.utc)
class Ximalaya(): def __init__(self, album_id): self.podcast = None self.album_id = album_id self.album_list_api = "http://www.ximalaya.com/revision/play/album?albumId={}&pageNum=1&sort=1&pageSize=999".format( album_id) self.album_url = 'http://www.ximalaya.com/album/%s' % album_id self.header = { 'Accept': 'application/json, text/javascript, */*; q=0.01', 'X-Requested-With': 'XMLHttpRequest', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36', 'Content-Type': 'application/x-www-form-urlencoded', 'Referer': self.album_url, 'Cookie': '_ga=GA1.2.1628478964.1476015684; _gat=1', } def album(self): page = requests.get(self.album_url, headers=self.header) soup = BeautifulSoup(page.content, "lxml") # 初始化 self.podcast = Podcast() self.podcast.name = soup.find('h1', 'title').get_text() self.podcast.authors.append( Person("Powered by forecho", '*****@*****.**')) self.podcast.website = self.album_url self.podcast.copyright = 'cc-by' self.podcast.description = soup.find('div', 'album-intro').get_text() self.podcast.language = 'cn' self.podcast.image = soup.find( 'div', 'album-info').find('img').get('src').split('!')[0] self.podcast.feed_url = 'http://podcast.forecho.com/ximalaya/%s.rss' % self.album_id self.podcast.category = Category('Technology', 'Podcasting') self.podcast.explicit = False self.podcast.complete = False self.podcast.owner = Person("forecho", '*****@*****.**') album_list_content = requests.get(self.album_list_api, headers=self.header).content album_list_data = json.loads(album_list_content.decode('utf-8')) count = len(album_list_data['data']['tracksAudioPlay']) for each in album_list_data['data']['tracksAudioPlay']: try: page_info = requests.get('http://www.ximalaya.com/%s' % each['trackUrl'], headers=self.header) soup_info = BeautifulSoup(page_info.content, "lxml") episode = self.podcast.add_episode() episode.id = str(each['index']) episode.title = each['trackName'] print self.podcast.name + '=====' + each['trackName'] image = each['trackCoverPath'].split('!')[0] if (image[-4:] == '.gif' or image[-4:] == '.bmp'): episode.image = self.podcast.image else: episode.image = image if soup_info.find('article', 'intro'): episode.summary = soup_info.find( 'article', 'intro').get_text().encode('gbk', 'ignore').decode('gbk') else: episode.summary = each['trackName'] episode.link = 'http://www.ximalaya.com/%s' % each['albumUrl'] episode.authors = [Person("forecho", '*****@*****.**')] episode.publication_date = self.reduction_time( soup_info.find('span', 'time').get_text()) episode.media = Media(each['src'], each['duration']) episode.position = count - each['index'] + 1 except Exception as e: print('异常:', e) print('异常 URL:', 'http://www.ximalaya.com/%s' % each['trackUrl']) # 生成文件 # print self.podcast.rss_str() self.podcast.rss_file('ximalaya/%s.rss' % self.album_id, minimize=True) # 时间转换 @staticmethod def reduction_time(created_date): timestamp = datetime.strptime(created_date, "%Y-%m-%d %H:%M:%S") return datetime(timestamp.year, timestamp.month, timestamp.day, timestamp.hour, timestamp.minute, tzinfo=pytz.utc)
def generate_podcast(self, feed_name: str) -> str: """ Create podcast XML based on the files found in podcastDir. Taken from https://podgen.readthedocs.io/en/latest/usage_guide/podcasts.html :param self: PodcastService class :param feed_name: name of the feed and the sub-directory for files :return: string of the podcast """ # Initialize the feed p = Podcast() # Required fields p.name = f'{feed_name} Archive' p.description = 'Stuff to listen to later' p.website = self.base_url p.complete = False # Optional p.language = 'en-US' p.feed_url = f'{p.website}/feeds/{feed_name}/rss' p.explicit = False p.authors.append(Person("Anthology")) # for filepath in glob.iglob(f'{self.search_dir}/{feed_name}/*.mp3'): for path in Path(f'{self.search_dir}/{feed_name}').glob('**/*.mp3'): filepath = str(path) episode = p.add_episode() # Attempt to load saved metadata metadata_file_name = filepath.replace('.mp3', '.json') try: with open(metadata_file_name) as metadata_file: metadata = json.load(metadata_file) except FileNotFoundError: metadata = {} except JSONDecodeError: metadata = {} self.logger.error(f'Failed to read {metadata_file_name}') # Build the episode based on either the saved metadata or the file details episode.title = metadata.get( 'title', filepath.split('/')[-1].rstrip('.mp3')) episode.summary = metadata.get('summary', htmlencode('Some Summary')) if 'link' in metadata: episode.link = metadata.get('link') if 'authors' in metadata: episode.authors = [ Person(author) for author in metadata.get('authors') ] episode.publication_date = \ isoparse(metadata.get('publication_date')) if 'publication_date' in metadata \ else datetime.fromtimestamp(os.path.getmtime(filepath), tz=pytz.utc) episode.media = Media( f'{p.website}/{filepath.lstrip(self.search_dir)}'.replace( ' ', '+'), os.path.getsize(filepath)) episode.media.populate_duration_from(filepath) if "image" in metadata: episode.image = metadata.get('image') else: for ext in ['.jpg', '.png']: image_file_name = filepath.replace('.mp3', ext) if os.path.isfile(image_file_name): episode.image = f'{p.website}/{image_file_name.lstrip(self.search_dir)}'.replace( ' ', '+') break # Save the metadata for future editing if not os.path.exists(metadata_file_name): metadata = { 'title': episode.title, 'summary': episode.summary, 'publication_date': episode.publication_date, 'authors': episode.authors } with open(metadata_file_name, 'w') as outFile: json.dump(metadata, outFile, indent=2, default=str) return p.rss_str()
class Ximalaya(): def __init__(self, album_id): self.podcast = None self.album_id = album_id self.url = 'http://www.ximalaya.com/album/%s/' % album_id self.header = { 'Accept': 'application/json, text/javascript, */*; q=0.01', 'X-Requested-With': 'XMLHttpRequest', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36', 'Content-Type': 'application/x-www-form-urlencoded', 'Referer': self.url, 'Cookie': '_ga=GA1.2.1628478964.1476015684; _gat=1', } def album(self): page = requests.get(self.url, headers=self.header) soup = BeautifulSoup(page.content, "lxml") # 初始化 self.podcast = Podcast() self.podcast.name = soup.find('div', 'detailContent_title').get_text() self.podcast.authors.append(Person("Powered by forecho", '*****@*****.**')) self.podcast.website = self.url self.podcast.copyright = 'cc-by' self.podcast.description = soup.find('div', 'mid_intro').get_text() self.podcast.language = 'cn' self.podcast.image = soup.find('a', 'albumface180').find('img').get('src').split('!')[0] self.podcast.feed_url = 'http://podcast.forecho.com/ximalaya/%s.rss' % self.album_id self.podcast.category = Category('Technology', 'Podcasting') self.podcast.explicit = False self.podcast.complete = False self.podcast.owner = Person("forecho", '*****@*****.**') sound_ids = soup.find('div', class_='personal_body').get('sound_ids').split(',') for sound_id in sound_ids: date = soup.find('li', sound_id=sound_id).find('div', class_='operate').get_text().strip() self.detail(sound_id, date) # 生成文件 # print self.podcast.rss_str() self.podcast.rss_file('ximalaya/%s.rss' % self.album_id, minimize=True) def detail(self, sound_id, date): detail_url = 'http://www.ximalaya.com/tracks/%s.json' % sound_id response = requests.get(detail_url, headers=self.header) item = json.loads(response.content) episode = self.podcast.add_episode() episode.id = str(item['id']) episode.title = item['title'] episode.image = item['cover_url_142'].split('?')[0] episode.summary = (item['intro'].replace('\n', '') if item['intro'] else '') episode.link = 'http://www.ximalaya.com/sound/%d' % item['id'] episode.authors = [Person("forecho", '*****@*****.**')] episode.publication_date = self.reduction_time( date, item['formatted_created_at']) episode.media = Media(item['play_path_64'], 454599964) print self.podcast.name + '=====' + item['title'] # 时间转换 第一个参数是年月日 第二个参数"12月11日 17:00" @staticmethod def reduction_time(date, created_date): timestamp = datetime.strptime(date, "%Y-%m-%d") created_at = datetime.strptime(created_date.split(' ')[1], "%H:%M") return datetime(timestamp.year, timestamp.month, timestamp.day, created_at.hour, created_at.minute, tzinfo=pytz.utc)