예제 #1
0
def main():
    with open('thebugle.json') as f:
        episodes = json.load(f)

    p = Podcast(
        name="TimesOnLine Bugle Archive",
        description="Old Bugle episodes, podcast feed",
        website="https://www.thebuglepodcast.com/",
        explicit=False,
    )

    for episode in episodes:
        ep = p.add_episode(
            Episode(title=f"{episode['id']}: {episode['title']}"))
        ep.media = Media.create_from_server_response(
            f"{MEDIA_BASE_URL}/{episode['file']}")

        ep.media.fetch_duration()

        date = episode['date'].split('-')
        ep.publication_date = datetime(int(date[0]),
                                       int(date[1]),
                                       int(date[2]),
                                       0,
                                       0,
                                       0,
                                       tzinfo=pytz.utc)

    print(p.rss_str())
예제 #2
0
def main():
    """Create an example podcast and print it or save it to a file."""
    # There must be exactly one argument, and it is must end with rss
    if len(sys.argv) != 2 or not (
            sys.argv[1].endswith('rss')):
        # Invalid usage, print help message
        # print_enc is just a custom function which functions like print,
        # except it deals with byte arrays properly.
        print_enc ('Usage: %s ( <file>.rss | rss )' % \
                'python -m podgen')
        print_enc ('')
        print_enc ('  rss              -- Generate RSS test output and print it to stdout.')
        print_enc ('  <file>.rss       -- Generate RSS test teed and write it to file.rss.')
        print_enc ('')
        exit()

    # Remember what type of feed the user wants
    arg = sys.argv[1]

    from podgen import Podcast, Person, Media, Category, htmlencode
    # Initialize the feed
    p = Podcast()
    p.name = 'Testfeed'
    p.authors.append(Person("Lars Kiesow", "*****@*****.**"))
    p.website = 'http://example.com'
    p.copyright = 'cc-by'
    p.description = 'This is a cool feed!'
    p.language = 'de'
    p.feed_url = 'http://example.com/feeds/myfeed.rss'
    p.category = Category('Technology', 'Podcasting')
    p.explicit = False
    p.complete = False
    p.new_feed_url = 'http://example.com/new-feed.rss'
    p.owner = Person('John Doe', '*****@*****.**')
    p.xslt = "http://example.com/stylesheet.xsl"

    e1 = p.add_episode()
    e1.id = 'http://lernfunk.de/_MEDIAID_123#1'
    e1.title = 'First Element'
    e1.summary = htmlencode('''Lorem ipsum dolor sit amet, consectetur adipiscing elit. Tamen
            aberramus a proposito, et, ne longius, prorsus, inquam, Piso, si ista
            mala sunt, placet. Aut etiam, ut vestitum, sic sententiam habeas aliam
            domesticam, aliam forensem, ut in fronte ostentatio sit, intus veritas
            occultetur? Cum id fugiunt, re eadem defendunt, quae Peripatetici,
            verba <3.''')
    e1.link = 'http://example.com'
    e1.authors = [Person('Lars Kiesow', '*****@*****.**')]
    e1.publication_date = datetime.datetime(2014, 5, 17, 13, 37, 10, tzinfo=pytz.utc)
    e1.media = Media("http://example.com/episodes/loremipsum.mp3", 454599964,
                     duration=
                     datetime.timedelta(hours=1, minutes=32, seconds=19))

    # Should we just print out, or write to file?
    if arg == 'rss':
        # Print
        print_enc(p.rss_str())
    elif arg.endswith('rss'):
        # Write to file
        p.rss_file(arg, minimize=True)
예제 #3
0
    def test_removeEntryByIndex(self):
        fg = Podcast()
        self.feedId = 'http://example.com'
        self.title = 'Some Testfeed'

        fe = fg.add_episode()
        fe.id = 'http://lernfunk.de/media/654321/1'
        fe.title = 'The Third BaseEpisode'
        assert len(fg.episodes) == 1
        fg.episodes.pop(0)
        assert len(fg.episodes) == 0
예제 #4
0
    def test_removeEntryByIndex(self):
        fg = Podcast()
        self.feedId = 'http://example.com'
        self.title = 'Some Testfeed'

        fe = fg.add_episode()
        fe.id = 'http://lernfunk.de/media/654321/1'
        fe.title = 'The Third BaseEpisode'
        assert len(fg.episodes) == 1
        fg.episodes.pop(0)
        assert len(fg.episodes) == 0
예제 #5
0
    def setUp(self):

        self.itunes_ns = 'http://www.itunes.com/dtds/podcast-1.0.dtd'
        self.dublin_ns = 'http://purl.org/dc/elements/1.1/'

        fg = Podcast()
        self.title = 'Some Testfeed'
        self.link = 'http://lernfunk.de'
        self.description = 'A cool tent'
        self.explicit = False

        fg.name = self.title
        fg.website = self.link
        fg.description = self.description
        fg.explicit = self.explicit

        fe = fg.add_episode()
        fe.id = 'http://lernfunk.de/media/654321/1'
        fe.title = 'The First Episode'
        self.fe = fe

        #Use also the list directly
        fe = Episode()
        fg.episodes.append(fe)
        fe.id = 'http://lernfunk.de/media/654321/1'
        fe.title = 'The Second Episode'

        fe = fg.add_episode()
        fe.id = 'http://lernfunk.de/media/654321/1'
        fe.title = 'The Third Episode'

        self.fg = fg

        warnings.simplefilter("always")

        def noop(*args, **kwargs):
            pass

        warnings.showwarning = noop
예제 #6
0
class Qingting(object):
    def __init__(self, album_id):
        self.podcast = None
        self.album_id = album_id
        self.url = 'http://www.qingting.fm/channels/{}'.format(album_id)
        self.album_list_api = "http://api2.qingting.fm/v6/media/channelondemands/{}/programs/order/0/curpage/1/pagesize/100".format(
            album_id)
        self.album_info_api = "http://api2.qingting.fm/v6/media/channelondemands/{}".format(album_id)

    def album(self):
        album_info_content = requests.get(self.album_info_api).content
        album_info_data = json.loads(album_info_content)

        album_list_content = requests.get(self.album_list_api).content
        album_list_data = json.loads(album_list_content)

        self.podcast = Podcast()
        self.podcast.name = album_info_data['data']['title']
        self.podcast.authors.append(Person("Powered by maijver", '*****@*****.**'))
        self.podcast.website = self.url
        self.podcast.copyright = 'cc-by'
        self.podcast.description = album_info_data['data']['description']
        self.podcast.language = 'cn'
        self.podcast.image = album_info_data['data']['thumbs']['small_thumb'].replace('!200', '')
        self.podcast.feed_url = 'http://podcast.forecho.com/qingting/%s.rss' % self.album_id
        self.podcast.category = Category('Technology', 'Podcasting')
        self.podcast.explicit = False
        self.podcast.complete = False
        self.podcast.owner = Person("maijver", '*****@*****.**')

        for each in album_list_data['data']:
            episode = self.podcast.add_episode()
            episode.id = str(each['id'])
            episode.title = each['title']
            print(episode.title)
            episode.image = album_info_data['data']['thumbs']['small_thumb'].replace('!200', '')
            episode.summary = each['title']
            episode.link = 'http://www.qingting.fm/channels/{}/programs/{}'.format(self.album_id, each['id'])
            episode.authors = [Person("forecho", '*****@*****.**')]
            episode.publication_date = self.reduction_time(each['update_time'])
            episode.media = Media("http://od.qingting.fm/{}".format(each['mediainfo']['bitrates_url'][0]['file_path']),
                                  each['duration'])

        self.podcast.rss_file('qingting/{}.rss'.format(self.album_id), minimize=True)

    @staticmethod
    def reduction_time(created_date):
        timestamp = datetime.strptime(created_date, "%Y-%m-%d %H:%M:%S")
        return datetime(timestamp.year, timestamp.month, timestamp.day, timestamp.hour, timestamp.minute,
                        tzinfo=pytz.utc)
예제 #7
0
    def setUp(self):

        self.itunes_ns = 'http://www.itunes.com/dtds/podcast-1.0.dtd'
        self.dublin_ns = 'http://purl.org/dc/elements/1.1/'

        fg = Podcast()
        self.title = 'Some Testfeed'
        self.link = 'http://lernfunk.de'
        self.description = 'A cool tent'
        self.explicit = False

        fg.name = self.title
        fg.website = self.link
        fg.description = self.description
        fg.explicit = self.explicit

        fe = fg.add_episode()
        fe.id = 'http://lernfunk.de/media/654321/1'
        fe.title = 'The First Episode'
        self.fe = fe

        #Use also the list directly
        fe = Episode()
        fg.episodes.append(fe)
        fe.id = 'http://lernfunk.de/media/654321/1'
        fe.title = 'The Second Episode'

        fe = fg.add_episode()
        fe.id = 'http://lernfunk.de/media/654321/1'
        fe.title = 'The Third Episode'

        self.fg = fg

        warnings.simplefilter("always")
        def noop(*args, **kwargs):
            pass
        warnings.showwarning = noop
예제 #8
0
def generate_rss_from_articles(feed_settings, articles):
    """
    Creates a FeedGenerator feed from a set of feed_entries.

    :param feed_settings: a feed_settings object containing
    :param articles:
    :return:
    """
    # Initialize the feed
    podcast = Podcast()
    podcast.name = feed_settings.title
    author = Person(feed_settings.author['name'], feed_settings.author['email'])
    podcast.authors.append(author)
    podcast.website = feed_settings.source_page_url
    podcast.copyright = feed_settings.copyright
    podcast.description = feed_settings.subtitle
    podcast.summary = feed_settings.subtitle
    podcast.subtitle = feed_settings.subtitle
    podcast.language = 'vi'
    podcast.feed_url = feed_settings.output_url
    podcast.image = feed_settings.img_url
    podcast.category = Category('Music', 'Music Commentary')
    podcast.explicit = False
    # p.complete = False
    # p.new_feed_url = 'http://example.com/new-feed.rss'
    podcast.owner = author
    # p.xslt = "http://example.com/stylesheet.xsl"

    vt_tz = pytz.timezone('Asia/Ho_Chi_Minh')
    pastdate = datetime.datetime(2000, 1, 1, 0, 0).astimezone(vt_tz)
    # podcast.last_updated = datetime.datetime.now(vt_tz)

    for article in articles:
        episode = podcast.add_episode()
        episode.id = article.link
        episode.title = article.title
        episode.summary = article.description
        episode.link = article.link
        # episode.authors = [Person('Lars Kiesow', '*****@*****.**')]
        episode.publication_date = article.pub_date
        pastdate = max(pastdate, article.pub_date)
        # episode.media = Media.create_from_server_response(article.media, size=None, duration=None)
        episode.media = Media(article.media, size=None, duration=None, type=article.type)

    podcast.last_updated = pastdate
    podcast.publication_date = pastdate

    return podcast
예제 #9
0
class Ximalaya():
    def __init__(self, album_id):
        self.podcast = None
        self.album_id = album_id
        self.album_list_api = "http://www.ximalaya.com/revision/play/album?albumId={}&pageNum=1&sort=1&pageSize=999".format(album_id)
        self.album_url = 'http://www.ximalaya.com/album/%s' % album_id
        self.header = {
            'Accept': 'application/json, text/javascript, */*; q=0.01',
            'X-Requested-With': 'XMLHttpRequest',
            'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36',
            'Content-Type': 'application/x-www-form-urlencoded',
            'Referer': self.album_url,
            'Cookie': '_ga=GA1.2.1628478964.1476015684; _gat=1',
        }

    def album(self):
        page = requests.get(self.album_url, headers=self.header)
        soup = BeautifulSoup(page.content, "lxml")

        # 初始化
        self.podcast = Podcast()
        self.podcast.name = soup.find('h1', 'title').get_text()
        self.podcast.authors.append(Person("Powered by forecho", '*****@*****.**'))
        self.podcast.website = self.album_url
        self.podcast.copyright = 'cc-by'
        if soup.find('div', 'album-intro') and soup.find('div', 'album-intro').get_text():
            self.podcast.description = soup.find('div', 'album-intro').get_text()
        else:
            self.podcast.description = self.podcast.name
        self.podcast.language = 'cn'
        self.podcast.image = soup.find('div', 'album-info').find('img').get('src').split('!')[0]
        self.podcast.feed_url = 'http://podcast.forecho.com/ximalaya/%s.rss' % self.album_id
        self.podcast.category = Category('Technology', 'Podcasting')
        self.podcast.explicit = False
        self.podcast.complete = False
        self.podcast.owner = Person("forecho", '*****@*****.**')

        album_list_content = requests.get(self.album_list_api, headers=self.header).content
        album_list_data = json.loads(album_list_content.decode('utf-8'))
        count = len(album_list_data['data']['tracksAudioPlay'])
        for each in album_list_data['data']['tracksAudioPlay']:
            try:
                detail_url = 'http://www.ximalaya.com/tracks/%s.json' % each['trackId']
                response = requests.get(detail_url, headers=self.header)
                item = json.loads(response.content)

                episode = self.podcast.add_episode()
                episode.id = str(each['index'])
                episode.title = each['trackName']
                print(self.podcast.name + '=====' + each['trackName'])
                image = each['trackCoverPath'].split('!')[0]
                if (image[-4:] == '.gif' or image[-4:] == '.bmp'):
                    episode.image = self.podcast.image
                else:
                    episode.image = image
                if item['intro']:
                    episode.summary = item['intro'].replace('\r\n', '')
                else:
                    episode.summary = each['trackName']
                episode.link = 'http://www.ximalaya.com%s' % each['albumUrl']
                episode.authors = [Person("forecho", '*****@*****.**')]
                episode.publication_date = self.reduction_time(item['time_until_now'], item['formatted_created_at'])
                episode.media = Media(each['src'], each['duration'])
                episode.position = count - each['index'] + 1
            except Exception as e:
                print('异常:', e)
                print('异常 URL:', 'http://www.ximalaya.com%s' % each['trackUrl'])
                traceback.print_exc()
            
        # 生成文件
        # print self.podcast.rss_str()
        self.podcast.rss_file('ximalaya/%s.rss' % self.album_id, minimize=True)

    # 时间转换 第一个参数是  "3年前", "12月11日 17:00"
    @staticmethod
    def reduction_time(time_until_now, created_at):
        date = datetime.strptime(created_at, "%m月%d日 %H:%M")
        reduction_year = datetime.now().year
        if '年前' in time_until_now:
            year = int(time_until_now.split('年前')[0])
            reduction = (datetime.now(tzlocal()) - relativedelta(years=year))
            if humanize_time(reduction) != ('%s years' % year):
                reduction_year = (datetime.now(tzlocal()) - relativedelta(years=year + 1)).year
            else:
                reduction_year = reduction.year
        elif '月前' in time_until_now:
            month = int(time_until_now.split('月前')[0])
            reduction_year = (datetime.now(tzlocal()) - relativedelta(months=month)).year
        elif '天前' in time_until_now:
            day = int(time_until_now.split('天前')[0])
            reduction_year = (datetime.now(tzlocal()) - relativedelta(days=day)).year

        return datetime(reduction_year, date.month, date.day, date.hour, date.second, tzinfo=pytz.utc)
예제 #10
0
class Ximalaya():
    def __init__(self, album_id):
        self.podcast = None
        self.album_id = album_id
        self.page_size = 30
        self.album_info_url = "https://www.ximalaya.com/revision/album?albumId={}"
        self.album_list_url = "https://www.ximalaya.com/revision/play/album?albumId={}&pageNum={}&pageSize={}"
        self.detail_url = "https://mobile.ximalaya.com/v1/track/baseInfo?device=android&trackId={}"
        self.album_url = "https://www.ximalaya.com/album/{}"
        self.time_api = 'https://www.ximalaya.com/revision/time'
        self.s = requests.session()
        self.header = {
            'Accept': 'application/json, text/javascript, */*; q=0.01',
            'User-Agent':
            'User-Agent:Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36',
            'Content-Type': 'application/json;charset=UTF-8',
            'Referer': self.album_url.format(self.album_id),
            'Accept-Encoding': "gzip, deflate",
            'Connection': "keep-alive",
            'cache-control': "no-cache",
        }

    def album(self):
        self.get_sign()
        album_info = self.s.get(self.album_info_url.format(self.album_id),
                                headers=self.header).content
        album_info_content = json.loads(album_info.decode('utf-8'))
        if album_info_content['ret'] == 200:
            album_info_data = album_info_content['data']

            # 初始化
            self.podcast = Podcast()
            self.podcast.name = album_info_data['mainInfo']['albumTitle']
            self.podcast.authors.append(
                Person("Powered by forecho", '*****@*****.**'))
            self.podcast.website = self.album_url.format(self.album_id)
            self.podcast.copyright = 'cc-by'
            if album_info_data['mainInfo']['richIntro']:
                self.podcast.description = album_info_data['mainInfo'][
                    'richIntro']
            else:
                self.podcast.description = self.podcast.name
            self.podcast.language = 'cn'
            self.podcast.image = 'https:' + album_info_data['mainInfo'][
                'cover'].split('!')[0]
            self.podcast.feed_url = 'http://podcast.forecho.com/ximalaya/%s.rss' % self.album_id
            self.podcast.category = Category('Technology', 'Podcasting')
            self.podcast.explicit = False
            self.podcast.complete = False
            self.podcast.owner = Person("forecho", '*****@*****.**')
            page_num = 1
            # py2 +1
            track_total_count = math.ceil(
                album_info_data['tracksInfo']['trackTotalCount'] /
                self.page_size) + 1
            while page_num <= track_total_count:
                self.header["Host"] = "www.ximalaya.com"
                album_list = self.s.get(self.album_list_url.format(
                    self.album_id, page_num, self.page_size),
                                        headers=self.header).content
                album_list_content = json.loads(album_list.decode('utf-8'))
                count = len(album_list_content['data']['tracksAudioPlay'])
                for each in album_list_content['data']['tracksAudioPlay']:
                    try:
                        self.header["Host"] = "mobile.ximalaya.com"
                        detail = requests.get(self.detail_url.format(
                            each['trackId']),
                                              headers=self.header).content
                        detail_content = json.loads(detail.decode('utf-8'))
                        episode = self.podcast.add_episode()
                        episode.id = str(each['index'])
                        episode.title = each['trackName']
                        print(self.podcast.name + '=====' + each['trackName'])
                        image = each['trackCoverPath'].split('!')[0]
                        if image[-4:] == '.png' or image[-4:] == '.jpg':
                            episode.image = 'https:' + image
                        else:
                            episode.image = self.podcast.image
                        if 'intro' in detail_content:
                            episode.summary = detail_content['intro'].replace(
                                '\r\n', '')
                        else:
                            episode.summary = each['trackName']
                        episode.link = 'http://www.ximalaya.com%s' % each[
                            'albumUrl']
                        episode.authors = [
                            Person("forecho", '*****@*****.**')
                        ]
                        episode.publication_date = self.reduction_time(
                            detail_content['createdAt'])
                        episode.media = Media(each['src'], each['duration'])
                        episode.position = count - each['index'] + 1
                    except Exception as e:
                        print('异常:', e)
                        print('异常 URL:',
                              'https://www.ximalaya.com%s' % each['trackUrl'])
                        traceback.print_exc()
                # 生成文件
                # print self.podcast.rss_str()
                page_num = page_num + 1
            self.podcast.rss_file('ximalaya/%s.rss' % self.album_id,
                                  minimize=True)

    def get_time(self):
        """
        获取服务器时间戳
        :return:
        """
        r = self.s.get(self.time_api, headers=self.header)
        return r.text

    def get_sign(self):
        """
        获取sign: md5(ximalaya-服务器时间戳)(100以内随机数)服务器时间戳(100以内随机数)现在时间戳
        :return: xm_sign
        """
        now_time = str(round(time.time() * 1000))
        server_time = self.get_time()
        sign = str(
            hashlib.md5("himalaya-{}".format(server_time).encode()).hexdigest(
            )) + "({})".format(str(round(
                random.random() * 100))) + server_time + "({})".format(
                    str(round(random.random() * 100))) + now_time
        self.header["xm-sign"] = sign
        # print(sign)
        # return sign

    # 时间转换 参数 毫秒时间戳
    @staticmethod
    def reduction_time(time):
        timestamp = datetime.fromtimestamp(time / 1000)
        return datetime(timestamp.year,
                        timestamp.month,
                        timestamp.day,
                        timestamp.hour,
                        timestamp.minute,
                        tzinfo=pytz.utc)
예제 #11
0
class Ximalaya:
    def __init__(self, album_id):
        self.headers = tools.get_headers()
        self.podcast = None
        self.album_id = album_id
        self.episode_pre_page = 30
        self.album_info_url = "https://www.ximalaya.com/revision/album?albumId={}"
        self.album_list_url = "https://www.ximalaya.com/revision/play/album?albumId={}&pageNum={}&pageSize={}"
        self.episode_detail_url = "https://mobile.ximalaya.com/v1/track/baseInfo?trackId={}"
        self.album_url = "https://www.ximalaya.com/album/{}"

    def get_podcast(self):
        webpage = tools.get_url(self.album_info_url.format(self.album_id), self.headers)
        album_info = json.loads(webpage.decode('utf-8'))
        if album_info['ret'] == 200:
            album_info_data = album_info['data']

            self.podcast = Podcast()
            self.podcast.name = album_info_data['mainInfo']['albumTitle']
            self.podcast.website = self.album_url.format(self.album_id)
            if album_info_data['mainInfo']['richIntro']:
                self.podcast.description = album_info_data['mainInfo']['richIntro']
            self.podcast.language = 'cn'
            self.podcast.image = 'https:' + album_info_data['mainInfo']['cover'].split('!')[0]
            self.podcast.generator = 'kanemori.getpodcast'
            self.podcast.explicit = False
            self.podcast.withhold_from_itunes = True

            text = ''
            page_num = 1
            album_page_count = math.ceil(album_info_data['tracksInfo']['trackTotalCount'] / self.episode_pre_page) + 1
            while page_num <= album_page_count:
                webpage = tools.get_url(self.album_list_url.format(self.album_id, page_num, self.episode_pre_page),
                                        self.headers)
                album_list = json.loads(webpage.decode('utf-8'))
                for episode_info in album_list['data']['tracksAudioPlay']:
                    _, link = self.get_episode(episode_info['trackId'])
                    text += link

                page_num += 1

        path = './podcast/ximalaya'
        if not os.path.exists(path):
            os.makedirs(path)

        self.podcast.rss_file(os.path.join(path, '{}.xml'.format(self.album_id)), minimize=True)
        # tools.save_m4a(os.path.join(path, '{}.txt'.format(self.album_id)), text)
        print("「{}」が上手に焼きました".format(self.album_id))

    def get_episode(self, episode_id):
        trycount = 0
        findepisode = False

        while not findepisode:
            if trycount > 0:
                print("再接続中" + str(trycount) + "......")
            if trycount > 1:
                print("error url: " + self.episode_detail_url.format(episode_id) + "\n")
                return False, "error url: " + self.episode_detail_url.format(episode_id) + "\n"

            webpage = tools.get_url(self.episode_detail_url.format(episode_id), self.headers)
            detail = json.loads(webpage.decode('utf-8'))
            episode = self.podcast.add_episode()
            episode.id = str('ximalaya_' + str(episode_id))
            episode.title = detail['title']
            # print(self.podcast.name + '=====' + episode.title)
            if 'intro' in detail:
                episode.summary = detail['intro'].replace('\r', '\\r').replace('\n', '\\n')
            episode.publication_date = tools.publication_time(detail['createdAt'])
            episode.media = Media(detail['playUrl32'], duration=timedelta(milliseconds=detail['duration']))
            # episode.media = Media.create_from_server_response(detail['playUrl32'],
            #                                                   duration=timedelta(seconds=detail['duration']))
            episode.position = 1
            findepisode = True

            if not findepisode:
                trycount += 1
                print("30秒後に再接続する.......")
                sleep(30)

        return True, detail['playUrl32'] + '\n'
def lambda_handler(event, context):
    print('Starting cccRssBuilder Lambda function')
    # Get episodes from DynamoDB
    episodes = query_episodes()
    episodes.sort(key=lambda x: x['episode-num'])

    # Create the podcast feed
    # Main podcast info comes from "episode 0"
    episodeInfo = episodes[0]
    separator = ', '
    p = Podcast()
    p.name = episodeInfo['name']
    p.description = episodeInfo['description']
    p.website = episodeInfo['website']
    p.explicit = episodeInfo['explicit']
    p.image = episodeInfo['image']
    p.feed_url = episodeInfo['feed-url']
    p.language = episodeInfo['language']
    p.category = Category(episodeInfo['category'], episodeInfo['subcategory'])
    p.owner = Person(episodeInfo['owner-name'], episodeInfo['owner-email'])
    p.authors = [Person(episodeInfo['owner-name'], episodeInfo['owner-email'])]

    # Process each episode
    for episode in episodes:
        # Skip "Episode 0"
        if episode['episode-num'] == 0:
            continue
        # Check if episode contains media file info (name, duration, size).  If not, add it to db and episode object.
        if 'media-file' not in episode:
            episodeNum = episode['episode-num']
            print('Analyzing media file for episode', episodeNum)
            mediaFile = 'ccc-{:03d}-{}.mp3'.format(int(episodeNum),
                                                   episode['pub-date'])
            print('Media file:', mediaFile)
            localMediaFile = '/tmp/' + mediaFile
            s3 = boto3.client('s3')
            s3.download_file('kwksolutions.com', 'ccc/media/' + mediaFile,
                             localMediaFile)

            # Try to analyze the mp3 file - looking for duration and file size
            try:
                audio = MP3(localMediaFile)
            except:
                print('Not an MP3 file!')
                return
            duration = round(audio.info.length)
            hours = int(duration / 3600)
            minutes = int((duration % 3600) / 60)
            seconds = duration % 60
            if hours == 0:
                durationStr = '{:02d}:{:02d}'.format(minutes, seconds)
            else:
                durationStr = '{:02d}:{:02d}:{:02d}'.format(
                    hours, minutes, seconds)
            size = str(os.path.getsize(localMediaFile))
            update_episode(episodeNum, mediaFile, size, durationStr)
            episode['media-file'] = mediaFile
            episode['size'] = size
            episode['duration'] = durationStr

        # Figure out all the info needed for the episode object
        mediaURL = 'https://www.kwksolutions.com/ccc/media/' + episode[
            'media-file']
        durationList = episode['duration'].split(':')
        secs = int(durationList[-1])
        mins = int(durationList[-2])
        try:
            h = int(durationList[-3])
        except:
            h = 0
        pubdateList = episode['pub-date'].split('-')
        year = int(pubdateList[0])
        month = int(pubdateList[1])
        day = int(pubdateList[2])

        # Build the episode object
        e = p.add_episode()
        e.id = mediaURL
        e.title = 'Episode ' + str(episode['episode-num'])
        e.summary = episode['description']
        e.link = 'http://christcommunitycarmel.org/get-involved/podcasts'
        e.publication_date = datetime.datetime(year,
                                               month,
                                               day,
                                               12,
                                               00,
                                               00,
                                               tzinfo=pytz.timezone('EST'))
        e.media = Media(mediaURL,
                        episode['size'],
                        duration=datetime.timedelta(hours=h,
                                                    minutes=mins,
                                                    seconds=secs))

    # Write the rss file
    print('Writing RSS file to S3')
    rssLocalFile = '/tmp/podcast.rss'
    rssS3File = 'ccc/podcast.rss'
    p.rss_file(rssLocalFile)
    s3 = boto3.client('s3')
    s3.upload_file(rssLocalFile,
                   'kwksolutions.com',
                   rssS3File,
                   ExtraArgs={'ContentType': 'text/xml'})

    return
예제 #13
0
                'filename': file_local_path,
                'name': video.title
            })
        except HTTPError as err:
            print('Can not parse this video bacause of HTTPError.')

# Create Podcast object and fill it with episodes
podcast_object = Podcast(
    name=CONFIG['podcast_name'],
    description=CONFIG['podcast_description'],
    website=CONFIG['podcast_website'],
    explicit=False,
    image=CONFIG['podcast_image'],
    language=CONFIG['podcast_language'],
    authors=[Person(CONFIG['podcast_author'], CONFIG['podcast_author_email'])],
    owner=Person(CONFIG['podcast_owner'], CONFIG['podcast_owner_email']),
    category=Category(CONFIG['podcast_category'],
                      CONFIG['podcast_subcategory']))

for item in db:
    web_media_path = "%s/podcast_%s.mp4" % (CONFIG['podcast_media_server'],
                                            item['link'][9:])
    podcast_object.add_episode(
        Episode(title=item['name'],
                media=Media(web_media_path,
                            os.stat(item['filename']).st_size,
                            type="audio/mpeg")))

# Generating RSS
podcast_object.rss_file(('%s/rss.xml' % CONFIG['data_path']), minimize=True)
예제 #14
0
        # Save the meeting details.
        with open(f"{output_prefix}.yaml", "w") as fp:
            fp.write(yaml.dump(meeting))

    # Update podcast.
    podcast = Podcast(
        name="Zoomerang",
        description="Telecons you missed while you were sleeping.",
        website=config["zoomerang_remote_addr"],
        explicit=False)

    meeting_paths = glob(f"{recordings_dir_path}*.yaml")
    for meeting_path in meeting_paths:
        with open(meeting_path, "r") as fp:
            meeting = yaml.load(fp)

        podcast.add_episode(
            Episode(title=meeting["summary"],
                    media=Media(meeting["url"],
                                size=meeting["estimated_file_size"],
                                type="audio/mpeg",
                                duration=datetime.timedelta(
                                    seconds=meeting["duration"])),
                    publication_date=dateutil.parser.parse(
                        meeting["created_datetime"])))

    with open(config["zoomerang_podcast_path"], "w") as fp:
        fp.write(f"{podcast}")

    print("Updated podcast.")
예제 #15
0
class Ximalaya():
    def __init__(self, album_id):
        self.podcast = None
        self.album_id = album_id
        self.page_size = 30
        self.album_info_url = "https://www.ximalaya.com/revision/album?albumId={}"
        self.album_list_url = "https://www.ximalaya.com/revision/play/album?albumId={}&pageNum={}&pageSize={}"
        self.detail_url = "https://mobile.ximalaya.com/v1/track/baseInfo?device=android&trackId={}"
        self.album_url = "https://www.ximalaya.com/album/{}"
        self.header = {
            'Accept': 'application/json, text/javascript, */*; q=0.01',
            'X-Requested-With': 'XMLHttpRequest',
            'User-Agent':
            'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36',
            'Content-Type': 'application/x-www-form-urlencoded',
            'Referer': self.album_url.format(self.album_id),
            'Cookie': '_ga=GA1.2.1628478964.1476015684; _gat=1',
        }

    def album(self):
        album_info = requests.get(self.album_info_url.format(self.album_id),
                                  headers=self.header).content
        album_info_content = json.loads(album_info.decode('utf-8'))
        if album_info_content['ret'] == 200:
            album_info_data = album_info_content['data']

            # 初始化
            self.podcast = Podcast()
            self.podcast.name = album_info_data['mainInfo']['albumTitle']
            self.podcast.authors.append(
                Person("Powered by forecho", '*****@*****.**'))
            self.podcast.website = self.album_url.format(self.album_id)
            self.podcast.copyright = 'cc-by'
            if album_info_data['mainInfo']['richIntro']:
                self.podcast.description = album_info_data['mainInfo'][
                    'richIntro']
            else:
                self.podcast.description = self.podcast.name
            self.podcast.language = 'cn'
            self.podcast.image = 'https:' + album_info_data['mainInfo'][
                'cover'].split('!')[0]
            self.podcast.feed_url = 'http://podcast.forecho.com/ximalaya/%s.rss' % self.album_id
            self.podcast.category = Category('Technology', 'Podcasting')
            self.podcast.explicit = False
            self.podcast.complete = False
            self.podcast.owner = Person("forecho", '*****@*****.**')
            page_num = 1
            # py2 +1
            track_total_count = math.ceil(
                album_info_data['tracksInfo']['trackTotalCount'] /
                self.page_size) + 1
            while page_num <= track_total_count:
                album_list = requests.get(self.album_list_url.format(
                    self.album_id, page_num, self.page_size),
                                          headers=self.header).content
                album_list_content = json.loads(album_list.decode('utf-8'))
                count = len(album_list_content['data']['tracksAudioPlay'])
                for each in album_list_content['data']['tracksAudioPlay']:
                    try:
                        detail = requests.get(self.detail_url.format(
                            each['trackId']),
                                              headers=self.header).content
                        detail_content = json.loads(detail.decode('utf-8'))
                        episode = self.podcast.add_episode()
                        episode.id = str(each['index'])
                        episode.title = each['trackName']
                        print(self.podcast.name + '=====' + each['trackName'])
                        image = each['trackCoverPath'].split('!')[0]
                        if image[-4:] == '.png' or image[-4:] == '.jpg':
                            episode.image = 'https:' + image
                        else:
                            episode.image = self.podcast.image
                        if 'intro' in detail_content:
                            episode.summary = detail_content['intro'].replace(
                                '\r\n', '')
                        else:
                            episode.summary = each['trackName']
                        episode.link = 'http://www.ximalaya.com%s' % each[
                            'albumUrl']
                        episode.authors = [
                            Person("forecho", '*****@*****.**')
                        ]
                        episode.publication_date = self.reduction_time(
                            detail_content['createdAt'])
                        episode.media = Media(each['src'], each['duration'])
                        episode.position = count - each['index'] + 1
                    except Exception as e:
                        print('异常:', e)
                        print('异常 URL:',
                              'https://www.ximalaya.com%s' % each['trackUrl'])
                        traceback.print_exc()
                # 生成文件
                # print self.podcast.rss_str()
                page_num = page_num + 1
            self.podcast.rss_file('ximalaya/%s.rss' % self.album_id,
                                  minimize=True)

    # 时间转换 参数 毫秒时间戳
    @staticmethod
    def reduction_time(time):
        timestamp = datetime.fromtimestamp(time / 1000)
        return datetime(timestamp.year,
                        timestamp.month,
                        timestamp.day,
                        timestamp.hour,
                        timestamp.minute,
                        tzinfo=pytz.utc)
예제 #16
0
class Ximalaya():
    def __init__(self, album_id):
        self.podcast = None
        self.album_id = album_id
        self.album_list_api = "http://www.ximalaya.com/revision/play/album?albumId={}&pageNum=1&sort=1&pageSize=999".format(
            album_id)
        self.album_url = 'http://www.ximalaya.com/album/%s' % album_id
        self.header = {
            'Accept': 'application/json, text/javascript, */*; q=0.01',
            'X-Requested-With': 'XMLHttpRequest',
            'User-Agent':
            'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36',
            'Content-Type': 'application/x-www-form-urlencoded',
            'Referer': self.album_url,
            'Cookie': '_ga=GA1.2.1628478964.1476015684; _gat=1',
        }

    def album(self):
        page = requests.get(self.album_url, headers=self.header)
        soup = BeautifulSoup(page.content, "lxml")

        # 初始化
        self.podcast = Podcast()
        self.podcast.name = soup.find('h1', 'title').get_text()
        self.podcast.authors.append(
            Person("Powered by forecho", '*****@*****.**'))
        self.podcast.website = self.album_url
        self.podcast.copyright = 'cc-by'
        self.podcast.description = soup.find('div', 'album-intro').get_text()
        self.podcast.language = 'cn'
        self.podcast.image = soup.find(
            'div', 'album-info').find('img').get('src').split('!')[0]
        self.podcast.feed_url = 'http://podcast.forecho.com/ximalaya/%s.rss' % self.album_id
        self.podcast.category = Category('Technology', 'Podcasting')
        self.podcast.explicit = False
        self.podcast.complete = False
        self.podcast.owner = Person("forecho", '*****@*****.**')

        album_list_content = requests.get(self.album_list_api,
                                          headers=self.header).content
        album_list_data = json.loads(album_list_content.decode('utf-8'))
        count = len(album_list_data['data']['tracksAudioPlay'])
        for each in album_list_data['data']['tracksAudioPlay']:
            try:
                page_info = requests.get('http://www.ximalaya.com/%s' %
                                         each['trackUrl'],
                                         headers=self.header)
                soup_info = BeautifulSoup(page_info.content, "lxml")
                episode = self.podcast.add_episode()
                episode.id = str(each['index'])
                episode.title = each['trackName']
                print self.podcast.name + '=====' + each['trackName']
                image = each['trackCoverPath'].split('!')[0]
                if (image[-4:] == '.gif' or image[-4:] == '.bmp'):
                    episode.image = self.podcast.image
                else:
                    episode.image = image
                if soup_info.find('article', 'intro'):
                    episode.summary = soup_info.find(
                        'article',
                        'intro').get_text().encode('gbk',
                                                   'ignore').decode('gbk')
                else:
                    episode.summary = each['trackName']
                episode.link = 'http://www.ximalaya.com/%s' % each['albumUrl']
                episode.authors = [Person("forecho", '*****@*****.**')]
                episode.publication_date = self.reduction_time(
                    soup_info.find('span', 'time').get_text())
                episode.media = Media(each['src'], each['duration'])
                episode.position = count - each['index'] + 1
            except Exception as e:
                print('异常:', e)
                print('异常 URL:',
                      'http://www.ximalaya.com/%s' % each['trackUrl'])

        # 生成文件
        # print self.podcast.rss_str()
        self.podcast.rss_file('ximalaya/%s.rss' % self.album_id, minimize=True)

    # 时间转换
    @staticmethod
    def reduction_time(created_date):
        timestamp = datetime.strptime(created_date, "%Y-%m-%d %H:%M:%S")
        return datetime(timestamp.year,
                        timestamp.month,
                        timestamp.day,
                        timestamp.hour,
                        timestamp.minute,
                        tzinfo=pytz.utc)
예제 #17
0
    def generate_podcast(self, feed_name: str) -> str:
        """
        Create podcast XML based on the files found in podcastDir. Taken from
        https://podgen.readthedocs.io/en/latest/usage_guide/podcasts.html

        :param self: PodcastService class
        :param feed_name: name of the feed and the sub-directory for files
        :return:  string of the podcast
        """
        # Initialize the feed
        p = Podcast()

        # Required fields
        p.name = f'{feed_name} Archive'
        p.description = 'Stuff to listen to later'
        p.website = self.base_url
        p.complete = False

        # Optional
        p.language = 'en-US'
        p.feed_url = f'{p.website}/feeds/{feed_name}/rss'
        p.explicit = False
        p.authors.append(Person("Anthology"))

        # for filepath in glob.iglob(f'{self.search_dir}/{feed_name}/*.mp3'):
        for path in Path(f'{self.search_dir}/{feed_name}').glob('**/*.mp3'):
            filepath = str(path)
            episode = p.add_episode()

            # Attempt to load saved metadata
            metadata_file_name = filepath.replace('.mp3', '.json')
            try:
                with open(metadata_file_name) as metadata_file:
                    metadata = json.load(metadata_file)
            except FileNotFoundError:
                metadata = {}
            except JSONDecodeError:
                metadata = {}
                self.logger.error(f'Failed to read {metadata_file_name}')

            # Build the episode based on either the saved metadata or the file details
            episode.title = metadata.get(
                'title',
                filepath.split('/')[-1].rstrip('.mp3'))
            episode.summary = metadata.get('summary',
                                           htmlencode('Some Summary'))
            if 'link' in metadata:
                episode.link = metadata.get('link')
            if 'authors' in metadata:
                episode.authors = [
                    Person(author) for author in metadata.get('authors')
                ]
            episode.publication_date = \
                isoparse(metadata.get('publication_date')) if 'publication_date' in metadata \
                else datetime.fromtimestamp(os.path.getmtime(filepath), tz=pytz.utc)
            episode.media = Media(
                f'{p.website}/{filepath.lstrip(self.search_dir)}'.replace(
                    ' ', '+'), os.path.getsize(filepath))
            episode.media.populate_duration_from(filepath)

            if "image" in metadata:
                episode.image = metadata.get('image')
            else:
                for ext in ['.jpg', '.png']:
                    image_file_name = filepath.replace('.mp3', ext)
                    if os.path.isfile(image_file_name):
                        episode.image = f'{p.website}/{image_file_name.lstrip(self.search_dir)}'.replace(
                            ' ', '+')
                        break

            # Save the metadata for future editing
            if not os.path.exists(metadata_file_name):
                metadata = {
                    'title': episode.title,
                    'summary': episode.summary,
                    'publication_date': episode.publication_date,
                    'authors': episode.authors
                }
                with open(metadata_file_name, 'w') as outFile:
                    json.dump(metadata, outFile, indent=2, default=str)

        return p.rss_str()
예제 #18
0
class Ximalaya():
    def __init__(self, album_id):
        self.podcast = None
        self.album_id = album_id
        self.url = 'http://www.ximalaya.com/album/%s/' % album_id
        self.header = {
            'Accept': 'application/json, text/javascript, */*; q=0.01',
            'X-Requested-With': 'XMLHttpRequest',
            'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36',
            'Content-Type': 'application/x-www-form-urlencoded',
            'Referer': self.url,
            'Cookie': '_ga=GA1.2.1628478964.1476015684; _gat=1',
        }

    def album(self):
        page = requests.get(self.url, headers=self.header)
        soup = BeautifulSoup(page.content, "lxml")

        # 初始化
        self.podcast = Podcast()
        self.podcast.name = soup.find('div', 'detailContent_title').get_text()
        self.podcast.authors.append(Person("Powered by forecho", '*****@*****.**'))
        self.podcast.website = self.url
        self.podcast.copyright = 'cc-by'
        self.podcast.description = soup.find('div', 'mid_intro').get_text()
        self.podcast.language = 'cn'
        self.podcast.image = soup.find('a', 'albumface180').find('img').get('src').split('!')[0]
        self.podcast.feed_url = 'http://podcast.forecho.com/ximalaya/%s.rss' % self.album_id
        self.podcast.category = Category('Technology', 'Podcasting')
        self.podcast.explicit = False
        self.podcast.complete = False
        self.podcast.owner = Person("forecho", '*****@*****.**')

        sound_ids = soup.find('div', class_='personal_body').get('sound_ids').split(',')

        for sound_id in sound_ids:
            date = soup.find('li', sound_id=sound_id).find('div', class_='operate').get_text().strip()
            self.detail(sound_id, date)
        # 生成文件
        # print self.podcast.rss_str()
        self.podcast.rss_file('ximalaya/%s.rss' % self.album_id, minimize=True)

    def detail(self, sound_id, date):
        detail_url = 'http://www.ximalaya.com/tracks/%s.json' % sound_id
        response = requests.get(detail_url, headers=self.header)
        item = json.loads(response.content)

        episode = self.podcast.add_episode()
        episode.id = str(item['id'])
        episode.title = item['title']
        episode.image = item['cover_url_142'].split('?')[0]
        episode.summary = (item['intro'].replace('\n', '') if item['intro'] else '')
        episode.link = 'http://www.ximalaya.com/sound/%d' % item['id']
        episode.authors = [Person("forecho", '*****@*****.**')]
        episode.publication_date = self.reduction_time(
            date, item['formatted_created_at'])
        episode.media = Media(item['play_path_64'], 454599964)
        print self.podcast.name + '=====' + item['title']

    # 时间转换 第一个参数是年月日 第二个参数"12月11日 17:00"
    @staticmethod
    def reduction_time(date, created_date):
        timestamp = datetime.strptime(date, "%Y-%m-%d")
        created_at = datetime.strptime(created_date.split(' ')[1], "%H:%M")
        return datetime(timestamp.year, timestamp.month, timestamp.day, created_at.hour, created_at.minute,
                        tzinfo=pytz.utc)