Python Util.html_to_ascii 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: src.Util

클래스/타입: Util

메소드/함수: html_to_ascii

hotexamples.com에서의 예제들: 3

Python Util.html_to_ascii - 3개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 src.Util.Util.html_to_ascii에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Util(4)

check_file(3)

html_to_ascii(3)

generate_noisy_image(2)

time_in_seconds(2)

generateSha1(1)

superimpose_points_on_image(1)

sendResponse(1)

saveToFile(1)

rename_song_file(1)

refreshFile(1)

readJson(1)

print_summary(1)

max_suffix_duval(1)

normalize_audio(1)

create_points_from_numpyimage(1)

get_terminal_points_from_coliner_points(1)

get_song_in_filepath(1)

get_song_filename(1)

get_best_song_from_search(1)

getResponse(1)

generate_unique_key(1)

generate_plottable_points_between_twopoints(1)

write_metadata(1)

예제 #1

파일 보기

    def get_playlist(self):
        # Retrieve HTML source if it has not been retrieved already
        if not self.html_src:
            source = self.retrieve_html_source()
        else:
            source = self.html_src

        #split to find the playlist name
        name_source = source.split(r'<h1 class="main">')[1]
        name_source = name_source.split('</span>')[0]
        playlist_name = re.findall(r'\">(.*)</a>', name_source)[0]

        # Remove everything before the playlist section
        songs_source = source.split("<tbody data-bind=\"foreach: tracks\"")[1]
        # Divide up into songs
        songs = songs_source.split("</tr>")

        # Create a array of dictionaries of all the songs
        songs_dict = []
        for song in songs:
            try:
                title = re.findall(r'<td.*>(.*)<\/div>', song, re.S)[0]
                artist = re.findall(r'spotify:artist:.*>(.*)<\/a>', song)[0]
                album = re.findall(r'spotify:album.*>(.*)<\/a>', song)[0]
                song_time = re.findall(r'tl-time\">([\w|:]*)<\/td>', song,
                                       re.S)[0]

                title = re.sub(r" - \w* Edit", "", title, re.IGNORECASE)
                title = re.sub(r" -.*Version.*", "", title, re.IGNORECASE)
                title = re.sub(r" -.*Remaster(ed)?.*", "", title,
                               re.IGNORECASE)
                title = re.sub(r" \(Remaster(ed)?\) *", "", title,
                               re.IGNORECASE)
                title = re.sub(r" -.*Anniversary Mix.*", "", title,
                               re.IGNORECASE)

                song_dict = {
                    'title': Util.html_to_ascii(title),
                    'artist': Util.html_to_ascii(artist),
                    'album': Util.html_to_ascii(album),
                    'time': Util.time_in_seconds(song_time),
                }
                songs_dict.append(song_dict)
            except IndexError:
                pass
        return [playlist_name, songs_dict]

예제 #2

파일 보기

    def _get_search_info(self, song_search_url):
        """
        Downloads the page source of the song_search_url, and returns a list of dictionaries containing
        the information for each search result. The dictionaries contain 'title', 'url', and 'time' (in seconds) fields.

        :param song_search_url: The url of a search for a song
        :return: A list of dictionaries, each containing the 'title', 'url', and 'time' (in seconds) info of each search result
        """
        with urllib.request.urlopen(song_search_url) as response:
            html = response.read()

        # decodes html source from binary bytes to string
        search_source = html.decode("UTF-8", "ignore")

        # parse source for vid info
        search_info = []

        # Isolate the list of results in the source
        results_source = re.split(
            r"<ol id=\"item-section-.*?\" class=\"item-section\">",
            search_source)[1]
        results_source = re.split(
            r"<div class=\"branded-page-box search-pager.*\">", results_source,
            1)[0]

        # split by video in list, returns the type of entry (video, playlist, channel)
        results_source = re.split(
            r"<li><div class=\"yt-lockup yt-lockup-tile yt-lockup-(.*?) vve-check clearfix.*?\"",
            results_source)[1:]

        index = 0
        while len(search_info) < self.MAX_NUM_SEARCH_RESULTS and index < len(
                results_source) - 1:
            source_type = results_source[index]
            source = results_source[index + 1]

            if source_type == "video":
                video_url = re.findall(r"href=\"\/watch\?v=(.*?)\"", source)[0]
                video_url = self.SONG_URL_RESULT_ROOT + video_url
                video_title = re.findall(r"title=\"(.*?)\"", source)[2]
                video_title = Util.html_to_ascii(video_title)
                video_time = re.findall(r"Duration: (\d+:\d+)", source)[0]
                video_time = re.split(r":", video_time)
                video_time = int(video_time[0]) * 60 + int(video_time[1])

                search_info.append({
                    "url": video_url,
                    "title": video_title,
                    "time": video_time
                })

            index += 2

        return search_info

예제 #3

파일 보기

파일: SoundcloudDownloader.py 프로젝트: MathewMacDougall/GetMeMusic

    def _get_search_info(self, song_search_url):
        """
        Downloads the page source of the song_search_url, and returns a list of dictionaries containing
        the information for each search result. The dictionaries contain 'title', 'url', and 'time' (in seconds) fields.

        :param song_search_url: The url of a search for a song
        :return: A list of dictionaries, each containing the 'title', 'url', and 'time' (in seconds) info of each search result
        """
        with urllib.request.urlopen(song_search_url) as response:
            html = response.read()

        # decodes html source from binary bytes to string
        search_source = html.decode("UTF-8", "ignore")

        # parse source for vid info
        search_info = []

        # Isolate the list of results in the source
        results_source = re.split(r"<div class=\"searchResultGroupHeading\">",
                                  search_source)[1]
        results_source = re.split(r"</ul>", results_source, 1)[0]

        # split by search result
        results_source = re.split(r"<div class=\"searchItem\">",
                                  results_source)[1:]

        # This code theoretically works, but urllib can't access all of Soundclouds
        # websource because it thinks it's an invalid browser of something

        index = 0
        while len(search_info) < self.MAX_NUM_SEARCH_RESULTS and index < len(
                results_source):
            source = results_source[index]

            artist = re.findall(
                r"<span class=\"soundTitle_+usernameText\">(.*)</span>",
                source)[0]
            title = re.findall(r"<span class=\"\">(.*)</span>", source)[0]
            url = re.findall(
                r"<a class=\"soundTitle_+title sc-link-dark\" href=\"(.*)\">",
                source)[0]

            title = Util.html_to_ascii(artist + " " + title)
            url = self.SONG_URL_RESULT_ROOT + url

            search_info.append({"url": url, "title": title, "time": None})

            index += 1

        return search_info