Python clean_string 예제들, hathor.utils.clean_string Python 예제들

예제 #1

0

파일 보기

파일: archive.py 프로젝트: tnoff/hathor

    def broadcast_update(self, broadcast_id, max_results=None, filters=None):
        self.logger.debug("Getting episode info from RSS feed:%s", broadcast_id)
        req = requests.get(broadcast_id)
        if req.status_code != 200:
            raise HathorException("Getting invalid status code:%s for rss feed" % req.status_code)
        soup_data = i_like_soup(req.text, "html.parser")

        filters = filters or []

        episodes = []
        for item in soup_data.find_all("item"):
            if max_results and len(episodes) >= max_results:
                self.logger.debug("Exiting rss update early, at max results:%s", max_results)
                return episodes
            episode_data = {
                "download_link": utils.clean_string(item.find("enclosure").attrs["url"]),
                "title": utils.clean_string(item.find("title").string),
                "date": parser.parse(item.find("pubdate").string),
                "description": utils.clean_string(item.find("description").string),
            }
            if not verify_title_filters(filters, episode_data["title"]):
                self.logger.debug("Title:%s , does not pass filters, skipping", episode_data["title"])
                continue
            episodes.append(episode_data)
        return episodes

예제 #2

0

파일 보기

파일: client.py 프로젝트: tnoff/hathor

    def podcast_create(self, archive_type, broadcast_id, podcast_name, max_allowed=None,
                       remove_commercials=False, file_location=None, artist_name=None,
                       automatic_download=True):
        '''
        Create new podcast
        archive_type         :   Where podcast is downloaded from (rss/soundcloud/youtube)
        broadcast_id         :   Identifier of podcast by archive_type, such as youtube channel ID
        podcast_name         :   Name to identify podcast in database
        max_allowed          :   When syncing the podcast, keep the last N episodes(if none keep all)
        remove_commercials   :   Attempt to remove commercials once audio files are downloaded
        file_location        :   Where podcast files will be stored
        artist_name          :   Name of artist to use when updating media file metadata
        automatic_download   :   Automatically download new episodes with file-sync

        Returns: Integer ID of created podcast
        '''
        self._check_argument_type(podcast_name, basestring, 'Podcast name must be string type')
        self._check_argument_type(broadcast_id, basestring, 'Brodcast ID must be string type')
        self._check_argument_type(archive_type, basestring, 'Archive Type must be string type')
        self._check_argument_type(automatic_download, bool, 'Automatic download must be boolean type')
        self._check_argument_type(remove_commercials, bool, 'Remove commercials must be boolean type')
        self._check_argument_type(max_allowed, [None, int], 'Max allowed must be None or int type')
        self._check_argument_type(file_location, [None, basestring], 'File location must be None or string type')
        self._check_argument_type(artist_name, [None, basestring], 'File location must be None or string type')

        self._check_argument_oneof(archive_type, ARCHIVE_KEYS, 'Archive Type must be in accepted list of keys')

        if max_allowed is not None and max_allowed < 1:
            self._fail('Max allowed must be positive integer, %s given' % max_allowed)

        if file_location is None:
            if self.podcast_directory is None:
                self._fail("No default podcast directory specified, will need specific file location to create podcast")
            file_location = os.path.join(self.podcast_directory, utils.normalize_name(podcast_name))

        pod_args = {
            'name' : utils.clean_string(podcast_name),
            'archive_type' : archive_type,
            'broadcast_id' : utils.clean_string(broadcast_id),
            'max_allowed' : max_allowed,
            'remove_commercial' : remove_commercials,
            'file_location' : os.path.abspath(file_location),
            'artist_name' : utils.clean_string(artist_name),
            'automatic_episode_download' : automatic_download,
        }
        new_pod = Podcast(**pod_args)
        try:
            self.db_session.add(new_pod)
            self.db_session.commit()
            self.logger.info("Podcast created in database, id:%d, args %s",
                             new_pod.id, ' -- '.join('%s-%s' % (k, v) for k, v in pod_args.items()))
        except IntegrityError:
            self.db_session.rollback()
            self._fail('Cannot create podcast, name was %s' % pod_args['name'])

        self.logger.debug("Ensuring podcast %d path exists %s", new_pod.id, file_location)
        self._ensure_path(file_location)
        return new_pod.id

예제 #3

0

파일 보기

파일: archive.py 프로젝트: tnoff/hathor

    def broadcast_update(self, broadcast_id, max_results=None, filters=None):
        self.logger.debug("Getting episodes for youtube broadcast:%s", broadcast_id)
        pagetoken = None
        archive_data = []
        filters = filters or []

        while True:
            url = urls.youtube_channel_get(broadcast_id, self.google_api_key, page_token=pagetoken)
            req = requests.get(url)
            if req.status_code == 400 or req.status_code == 403:
                raise HathorException("Invalid status code:%s" % req.status_code)

            data = json.loads(req.text)
            for item in data["items"]:
                if item["id"]["kind"] != "youtube#video":
                    self.logger.debug("Item %s is not a video, skipping" % str(item["id"]))
                    continue
                title = utils.clean_string(item["snippet"]["title"])
                if not verify_title_filters(filters, title):
                    self.logger.debug("Title:%s , does not pass filters, skipping", title)
                    continue

                download_url = "https://www.youtube.com/watch?v=%s" % item["id"]["videoId"]
                date = datetime.strptime(item["snippet"]["publishedAt"], "%Y-%m-%dT%H:%M:%S.000Z")
                episode_data = {
                    "title": title,
                    "description": utils.clean_string(item["snippet"]["description"]),
                    "download_link": download_url,
                    "date": date,
                }
                archive_data.append(episode_data)
                if max_results and len(archive_data) >= max_results:
                    self.logger.debug("At max results:%s, exiting early", max_results)
                    return archive_data
            try:
                pagetoken = data["nextPageToken"]
            except KeyError:
                self.logger.debug("No key 'pagetoken' in youtube data, exiting")
                break
            if not pagetoken:
                self.logger.debug("Page token is none in youtube data, exiting")
                break

        return archive_data

예제 #4

0

파일 보기

파일: archive.py 프로젝트: tnoff/hathor

    def broadcast_update(self, broadcast_id, max_results=None, filters=None):
        self.logger.debug("Getting episodes from soundcloud broadcast:%s", broadcast_id)
        url = urls.soundcloud_track_list(broadcast_id, self.soundcloud_client_id)
        archive_data = []

        filters = filters or []

        while True:
            req = requests.get(url)
            if req.status_code != 200:
                raise HathorException("Error getting soundcloud track list, request error:%s" % req.status_code)
            data = json.loads(req.text)

            for item in data["collection"]:
                # if not downloadable, skip
                if not item["downloadable"]:
                    self.logger.debug("Item with title:%s not downloadable, skipping", item["title"])
                    continue
                episode_data = {
                    "date": datetime.strptime(item["created_at"], "%Y/%m/%d %H:%M:%S +0000"),
                    "title": utils.clean_string(item["title"]),
                    "download_link": utils.clean_string(item["download_url"]),
                    "description": utils.clean_string(item["description"]),
                }
                if not verify_title_filters(filters, episode_data["title"]):
                    self.logger.debug("Title:%s , does not pass filters, skipping", episode_data["title"])
                    continue
                archive_data.append(episode_data)
                if max_results and max_results <= len(archive_data):
                    self.logger.debug("At max results limit:%s, exiting early", len(archive_data))
                    return archive_data
            # check if another page is there
            try:
                url = data["next_href"]
            except KeyError:
                self.logger.debug("No more soundcloud episodes found, exiting")
                break

        return archive_data

예제 #5

0

파일 보기

파일: client.py 프로젝트: tnoff/hathor

    def __episode_download_input(self, episode_input):
        def build_episode_path(episode, podcast):
            pod_path = podcast['file_location']
            title = utils.normalize_name(episode.title)
            date = utils.normalize_name(episode.date.strftime(self.datetime_output_format))
            file_name = '%s.%s' % (date, title)
            return os.path.join(pod_path, file_name)

        podcast_cache = dict()

        episodes_downloaded = []

        for episode in episode_input:
            try:
                podcast = podcast_cache[episode.podcast_id]
            except KeyError:
                podcast = self.db_session.query(Podcast).get(episode.podcast_id).as_dict(self.datetime_output_format)
                podcast_cache[episode.podcast_id] = podcast

            manager = self._archive_manager(podcast['archive_type'])

            self.logger.debug("Downloading data from url:%s", episode.download_url)

            episode_path_prefix = build_episode_path(episode, podcast)

            output_path, download_size = manager.episode_download(episode.download_url,
                                                                  episode_path_prefix)
            if output_path is None and download_size is None:
                self.logger.error("Unable to download episode:%s, skipping", episode.id)
                continue
            self.logger.info("Downloaded episode %s data to file %s", episode.id, output_path)
            if podcast['remove_commercial']:
                self.logger.info("Removing commercials for episode:%s", episode.id)
                download_size = self.__remove_commercials(output_path)

            episode.file_path = utils.clean_string(output_path)
            episode.file_size = download_size
            self.db_session.commit()

            # use artist name if possible
            artist_name = podcast['artist_name'] or podcast['name']

            try:
                metadata.tags_update(output_path, artist=artist_name, album_artist=artist_name,
                                     album=podcast['name'], title=episode.title,
                                     date=episode.date.strftime(self.datetime_output_format))
                self.logger.debug("Updated database audio tags for episode %s", episode.id)
            except AudioFileException as error:
                self.logger.warn("Unable to update tags on file %s : %s", output_path, str(error))
            episodes_downloaded.append(episode.id)
        return episodes_downloaded

예제 #6

0

파일 보기

파일: test_utils.py 프로젝트: tnoff/hathor

 def test_clean_stringy(self):
     self.assert_none(utils.clean_string(None))
     self.assert_not_none(utils.clean_string(''))
     self.assert_not_none(utils.clean_string('foo'))

예제 #7

0

파일 보기

파일: client.py 프로젝트: tnoff/hathor

    def podcast_update(self, podcast_id, podcast_name=None, broadcast_id=None, archive_type=None,
                       max_allowed=None, remove_commercials=None, artist_name=None,
                       automatic_download=None):
        '''
        Update a single podcast
        podcast_id           :   ID of podcast to edit
        archive_type         :   Where podcast is downloaded from (rss/soundcloud/youtube)
        broadcast_id         :   Identifier of podcast by archive_type, such as youtube channel ID
        podcast_name         :   Name to identify podcast in database
        max_allowed          :   When syncing the podcast, keep the last N episodes. Set to 0 for unlimited
        remove_commercials   :   Attempt to remove commercials once audio files are downloaded
        artist_name          :   Name of artist to use when updating media file metadata
        automatic_download   :   Automatically download episodes with file-sync

        Returns: null
        '''
        self._check_argument_type(podcast_id, int, 'Podcast ID must be int type')
        pod = self.db_session.query(Podcast).get(podcast_id)
        if not pod:
            self._fail("Podcast not found for ID:%s" % podcast_id)

        if podcast_name is not None:
            self._check_argument_type(podcast_name, basestring, 'Podcast name must be string type or None')
            self.logger.debug("Updating podcast name to %s for podcast %s", podcast_name, podcast_id)
            pod.name = utils.clean_string(podcast_name)
        if artist_name is not None:
            self._check_argument_type(artist_name, basestring, 'Podcast name must be string type or None')
            self.logger.debug("Updating artist name to %s for podcast %s", artist_name, podcast_id)
            pod.artist_name = utils.clean_string(artist_name)
        if archive_type is not None:
            self._check_argument_type(archive_type, basestring, 'Archive Type must be string type or None')
            self._check_argument_oneof(archive_type, ARCHIVE_KEYS, 'Archive Type must be in accepted list')
            self.logger.debug("Updating archive to %s for podcast %s", archive_type, podcast_id)
            pod.archive_type = archive_type
        if broadcast_id is not None:
            self._check_argument_type(broadcast_id, basestring, 'Broadcast ID must be string type or None')
            self.logger.debug("Updating broadcast id to %s for podcast %s", broadcast_id, podcast_id)
            pod.broadcast_id = utils.clean_string(broadcast_id)
        if max_allowed is not None:
            self._check_argument_type(max_allowed, int, 'Max allowed must be int type or None')
            if max_allowed < 0:
                self._fail('Max allowed must be positive integer or 0')
            if max_allowed == 0:
                pod.max_allowed = None
            else:
                pod.max_allowed = max_allowed
            self.logger.debug("Updating max allowed to %s for podcast %s", max_allowed, podcast_id)
        if remove_commercials is not None:
            self._check_argument_type(remove_commercials, bool, 'Remove commercials must be bool type')
            self.logger.debug("Updating remove commercials to %s for podcast %s", remove_commercials, podcast_id)
            pod.remove_commercial = remove_commercials
        if automatic_download is not None:
            self._check_argument_type(automatic_download, bool, 'Automatic download must be bool type')
            self.logger.debug("Updating automatic download to %s for podcast %s", automatic_download, podcast_id)
            pod.automatic_episode_download = automatic_download

        try:
            self.db_session.commit()
            self.logger.info("Podcast %s update commited", pod.id)
        except IntegrityError:
            self.db_session.rollback()
            self._fail('Cannot update podcast id:%s' % podcast_id)