def broadcast_update(self, broadcast_id, max_results=None, filters=None): self.logger.debug("Getting episode info from RSS feed:%s", broadcast_id) req = requests.get(broadcast_id) if req.status_code != 200: raise HathorException("Getting invalid status code:%s for rss feed" % req.status_code) soup_data = i_like_soup(req.text, "html.parser") filters = filters or [] episodes = [] for item in soup_data.find_all("item"): if max_results and len(episodes) >= max_results: self.logger.debug("Exiting rss update early, at max results:%s", max_results) return episodes episode_data = { "download_link": utils.clean_string(item.find("enclosure").attrs["url"]), "title": utils.clean_string(item.find("title").string), "date": parser.parse(item.find("pubdate").string), "description": utils.clean_string(item.find("description").string), } if not verify_title_filters(filters, episode_data["title"]): self.logger.debug("Title:%s , does not pass filters, skipping", episode_data["title"]) continue episodes.append(episode_data) return episodes
def podcast_create(self, archive_type, broadcast_id, podcast_name, max_allowed=None, remove_commercials=False, file_location=None, artist_name=None, automatic_download=True): ''' Create new podcast archive_type : Where podcast is downloaded from (rss/soundcloud/youtube) broadcast_id : Identifier of podcast by archive_type, such as youtube channel ID podcast_name : Name to identify podcast in database max_allowed : When syncing the podcast, keep the last N episodes(if none keep all) remove_commercials : Attempt to remove commercials once audio files are downloaded file_location : Where podcast files will be stored artist_name : Name of artist to use when updating media file metadata automatic_download : Automatically download new episodes with file-sync Returns: Integer ID of created podcast ''' self._check_argument_type(podcast_name, basestring, 'Podcast name must be string type') self._check_argument_type(broadcast_id, basestring, 'Brodcast ID must be string type') self._check_argument_type(archive_type, basestring, 'Archive Type must be string type') self._check_argument_type(automatic_download, bool, 'Automatic download must be boolean type') self._check_argument_type(remove_commercials, bool, 'Remove commercials must be boolean type') self._check_argument_type(max_allowed, [None, int], 'Max allowed must be None or int type') self._check_argument_type(file_location, [None, basestring], 'File location must be None or string type') self._check_argument_type(artist_name, [None, basestring], 'File location must be None or string type') self._check_argument_oneof(archive_type, ARCHIVE_KEYS, 'Archive Type must be in accepted list of keys') if max_allowed is not None and max_allowed < 1: self._fail('Max allowed must be positive integer, %s given' % max_allowed) if file_location is None: if self.podcast_directory is None: self._fail("No default podcast directory specified, will need specific file location to create podcast") file_location = os.path.join(self.podcast_directory, utils.normalize_name(podcast_name)) pod_args = { 'name' : utils.clean_string(podcast_name), 'archive_type' : archive_type, 'broadcast_id' : utils.clean_string(broadcast_id), 'max_allowed' : max_allowed, 'remove_commercial' : remove_commercials, 'file_location' : os.path.abspath(file_location), 'artist_name' : utils.clean_string(artist_name), 'automatic_episode_download' : automatic_download, } new_pod = Podcast(**pod_args) try: self.db_session.add(new_pod) self.db_session.commit() self.logger.info("Podcast created in database, id:%d, args %s", new_pod.id, ' -- '.join('%s-%s' % (k, v) for k, v in pod_args.items())) except IntegrityError: self.db_session.rollback() self._fail('Cannot create podcast, name was %s' % pod_args['name']) self.logger.debug("Ensuring podcast %d path exists %s", new_pod.id, file_location) self._ensure_path(file_location) return new_pod.id
def broadcast_update(self, broadcast_id, max_results=None, filters=None): self.logger.debug("Getting episodes for youtube broadcast:%s", broadcast_id) pagetoken = None archive_data = [] filters = filters or [] while True: url = urls.youtube_channel_get(broadcast_id, self.google_api_key, page_token=pagetoken) req = requests.get(url) if req.status_code == 400 or req.status_code == 403: raise HathorException("Invalid status code:%s" % req.status_code) data = json.loads(req.text) for item in data["items"]: if item["id"]["kind"] != "youtube#video": self.logger.debug("Item %s is not a video, skipping" % str(item["id"])) continue title = utils.clean_string(item["snippet"]["title"]) if not verify_title_filters(filters, title): self.logger.debug("Title:%s , does not pass filters, skipping", title) continue download_url = "https://www.youtube.com/watch?v=%s" % item["id"]["videoId"] date = datetime.strptime(item["snippet"]["publishedAt"], "%Y-%m-%dT%H:%M:%S.000Z") episode_data = { "title": title, "description": utils.clean_string(item["snippet"]["description"]), "download_link": download_url, "date": date, } archive_data.append(episode_data) if max_results and len(archive_data) >= max_results: self.logger.debug("At max results:%s, exiting early", max_results) return archive_data try: pagetoken = data["nextPageToken"] except KeyError: self.logger.debug("No key 'pagetoken' in youtube data, exiting") break if not pagetoken: self.logger.debug("Page token is none in youtube data, exiting") break return archive_data
def broadcast_update(self, broadcast_id, max_results=None, filters=None): self.logger.debug("Getting episodes from soundcloud broadcast:%s", broadcast_id) url = urls.soundcloud_track_list(broadcast_id, self.soundcloud_client_id) archive_data = [] filters = filters or [] while True: req = requests.get(url) if req.status_code != 200: raise HathorException("Error getting soundcloud track list, request error:%s" % req.status_code) data = json.loads(req.text) for item in data["collection"]: # if not downloadable, skip if not item["downloadable"]: self.logger.debug("Item with title:%s not downloadable, skipping", item["title"]) continue episode_data = { "date": datetime.strptime(item["created_at"], "%Y/%m/%d %H:%M:%S +0000"), "title": utils.clean_string(item["title"]), "download_link": utils.clean_string(item["download_url"]), "description": utils.clean_string(item["description"]), } if not verify_title_filters(filters, episode_data["title"]): self.logger.debug("Title:%s , does not pass filters, skipping", episode_data["title"]) continue archive_data.append(episode_data) if max_results and max_results <= len(archive_data): self.logger.debug("At max results limit:%s, exiting early", len(archive_data)) return archive_data # check if another page is there try: url = data["next_href"] except KeyError: self.logger.debug("No more soundcloud episodes found, exiting") break return archive_data
def __episode_download_input(self, episode_input): def build_episode_path(episode, podcast): pod_path = podcast['file_location'] title = utils.normalize_name(episode.title) date = utils.normalize_name(episode.date.strftime(self.datetime_output_format)) file_name = '%s.%s' % (date, title) return os.path.join(pod_path, file_name) podcast_cache = dict() episodes_downloaded = [] for episode in episode_input: try: podcast = podcast_cache[episode.podcast_id] except KeyError: podcast = self.db_session.query(Podcast).get(episode.podcast_id).as_dict(self.datetime_output_format) podcast_cache[episode.podcast_id] = podcast manager = self._archive_manager(podcast['archive_type']) self.logger.debug("Downloading data from url:%s", episode.download_url) episode_path_prefix = build_episode_path(episode, podcast) output_path, download_size = manager.episode_download(episode.download_url, episode_path_prefix) if output_path is None and download_size is None: self.logger.error("Unable to download episode:%s, skipping", episode.id) continue self.logger.info("Downloaded episode %s data to file %s", episode.id, output_path) if podcast['remove_commercial']: self.logger.info("Removing commercials for episode:%s", episode.id) download_size = self.__remove_commercials(output_path) episode.file_path = utils.clean_string(output_path) episode.file_size = download_size self.db_session.commit() # use artist name if possible artist_name = podcast['artist_name'] or podcast['name'] try: metadata.tags_update(output_path, artist=artist_name, album_artist=artist_name, album=podcast['name'], title=episode.title, date=episode.date.strftime(self.datetime_output_format)) self.logger.debug("Updated database audio tags for episode %s", episode.id) except AudioFileException as error: self.logger.warn("Unable to update tags on file %s : %s", output_path, str(error)) episodes_downloaded.append(episode.id) return episodes_downloaded
def test_clean_stringy(self): self.assert_none(utils.clean_string(None)) self.assert_not_none(utils.clean_string('')) self.assert_not_none(utils.clean_string('foo'))
def podcast_update(self, podcast_id, podcast_name=None, broadcast_id=None, archive_type=None, max_allowed=None, remove_commercials=None, artist_name=None, automatic_download=None): ''' Update a single podcast podcast_id : ID of podcast to edit archive_type : Where podcast is downloaded from (rss/soundcloud/youtube) broadcast_id : Identifier of podcast by archive_type, such as youtube channel ID podcast_name : Name to identify podcast in database max_allowed : When syncing the podcast, keep the last N episodes. Set to 0 for unlimited remove_commercials : Attempt to remove commercials once audio files are downloaded artist_name : Name of artist to use when updating media file metadata automatic_download : Automatically download episodes with file-sync Returns: null ''' self._check_argument_type(podcast_id, int, 'Podcast ID must be int type') pod = self.db_session.query(Podcast).get(podcast_id) if not pod: self._fail("Podcast not found for ID:%s" % podcast_id) if podcast_name is not None: self._check_argument_type(podcast_name, basestring, 'Podcast name must be string type or None') self.logger.debug("Updating podcast name to %s for podcast %s", podcast_name, podcast_id) pod.name = utils.clean_string(podcast_name) if artist_name is not None: self._check_argument_type(artist_name, basestring, 'Podcast name must be string type or None') self.logger.debug("Updating artist name to %s for podcast %s", artist_name, podcast_id) pod.artist_name = utils.clean_string(artist_name) if archive_type is not None: self._check_argument_type(archive_type, basestring, 'Archive Type must be string type or None') self._check_argument_oneof(archive_type, ARCHIVE_KEYS, 'Archive Type must be in accepted list') self.logger.debug("Updating archive to %s for podcast %s", archive_type, podcast_id) pod.archive_type = archive_type if broadcast_id is not None: self._check_argument_type(broadcast_id, basestring, 'Broadcast ID must be string type or None') self.logger.debug("Updating broadcast id to %s for podcast %s", broadcast_id, podcast_id) pod.broadcast_id = utils.clean_string(broadcast_id) if max_allowed is not None: self._check_argument_type(max_allowed, int, 'Max allowed must be int type or None') if max_allowed < 0: self._fail('Max allowed must be positive integer or 0') if max_allowed == 0: pod.max_allowed = None else: pod.max_allowed = max_allowed self.logger.debug("Updating max allowed to %s for podcast %s", max_allowed, podcast_id) if remove_commercials is not None: self._check_argument_type(remove_commercials, bool, 'Remove commercials must be bool type') self.logger.debug("Updating remove commercials to %s for podcast %s", remove_commercials, podcast_id) pod.remove_commercial = remove_commercials if automatic_download is not None: self._check_argument_type(automatic_download, bool, 'Automatic download must be bool type') self.logger.debug("Updating automatic download to %s for podcast %s", automatic_download, podcast_id) pod.automatic_episode_download = automatic_download try: self.db_session.commit() self.logger.info("Podcast %s update commited", pod.id) except IntegrityError: self.db_session.rollback() self._fail('Cannot update podcast id:%s' % podcast_id)