Esempio n. 1
0
    def scrape(self, tries_remaining=5):
        """ Search and scrape YouTube to return a list of matching videos. """

        # prevents an infinite loop but allows for a few retries
        if tries_remaining == 0:
            log.debug('No tries left. I quit.')
            return

        if self.meta_tags is None:
            song = self.raw_song
            search_url = generate_search_url(song)
        else:
            song = internals.generate_songname(const.args.file_format,
                                               self.meta_tags)
            search_url = generate_search_url(song)
        log.debug('Opening URL: {0}'.format(search_url))

        item = urllib.request.urlopen(search_url).read()
        items_parse = BeautifulSoup(item, "html.parser")

        videos = []
        for x in items_parse.find_all(
                'div', {'class': 'yt-lockup-dismissable yt-uix-tile'}):

            if not is_video(x):
                continue

            y = x.find('div', class_='yt-lockup-content')
            link = y.find('a')['href'][-11:]
            title = y.find('a')['title']

            try:
                videotime = x.find('span', class_="video-time").get_text()
            except AttributeError:
                log.debug(
                    'Could not find video duration on YouTube, retrying..')
                return generate_youtube_url(self.raw_song, self.meta_tags,
                                            tries_remaining - 1)

            youtubedetails = {
                'link': link,
                'title': title,
                'videotime': videotime,
                'seconds': internals.get_sec(videotime)
            }
            videos.append(youtubedetails)
            if self.meta_tags is None:
                break

        return self._best_match(videos)
Esempio n. 2
0
 def test_raise_error(self):
     with pytest.raises(ValueError):
         internals.get_sec('10*05')
     with pytest.raises(ValueError):
         internals.get_sec('02,28,46')
Esempio n. 3
0
 def test_from_hours(self):
     expect_secs = 5405
     secs = internals.get_sec('1.30.05')
     assert secs == expect_secs
     secs = internals.get_sec('1:30:05')
     assert secs == expect_secs
Esempio n. 4
0
 def test_from_minutes(self):
     expect_secs = 213
     secs = internals.get_sec('3.33')
     assert secs == expect_secs
     secs = internals.get_sec('3:33')
     assert secs == expect_secs
Esempio n. 5
0
 def test_from_seconds(self):
     expect_secs = 45
     secs = internals.get_sec('0:45')
     assert secs == expect_secs
     secs = internals.get_sec('0.45')
     assert secs == expect_secs
Esempio n. 6
0
def generate_youtube_url(raw_song, meta_tags, tries_remaining=5):
    """ Search for the song on YouTube and generate a URL to its video. """
    # prevents an infinite loop but allows for a few retries
    if tries_remaining == 0:
        log.debug('No tries left. I quit.')
        return

    if meta_tags is None:
        song = raw_song
        search_url = internals.generate_search_url(song, viewsort=False)
    else:
        song = generate_songname(meta_tags)
        search_url = internals.generate_search_url(song, viewsort=True)
    log.debug('Opening URL: {0}'.format(search_url))

    item = urllib.request.urlopen(search_url).read()
    items_parse = BeautifulSoup(item, "html.parser")

    videos = []
    for x in items_parse.find_all(
            'div', {'class': 'yt-lockup-dismissable yt-uix-tile'}):

        if not is_video(x):
            continue

        y = x.find('div', class_='yt-lockup-content')
        link = y.find('a')['href']
        title = y.find('a')['title']

        try:
            videotime = x.find('span', class_="video-time").get_text()
        except AttributeError:
            log.debug('Could not find video duration on YouTube, retrying..')
            return generate_youtube_url(raw_song, meta_tags,
                                        tries_remaining - 1)

        youtubedetails = {
            'link': link,
            'title': title,
            'videotime': videotime,
            'seconds': internals.get_sec(videotime)
        }
        videos.append(youtubedetails)
        if meta_tags is None:
            break

    if not videos:
        return None

    log.debug(pprint.pformat(videos))

    if args.manual:
        log.info(song)
        log.info('0. Skip downloading this song.\n')
        # fetch all video links on first page on YouTube
        for i, v in enumerate(videos):
            log.info(u'{0}. {1} {2} {3}'.format(
                i + 1, v['title'], v['videotime'],
                "http://youtube.com" + v['link']))
        # let user select the song to download
        result = internals.input_link(videos)
        if result is None:
            return None
    else:
        if meta_tags is None:
            # if the metadata could not be acquired, take the first result
            # from Youtube because the proper song length is unknown
            result = videos[0]
            log.debug(
                'Since no metadata found on Spotify, going with the first result'
            )
        else:
            # filter out videos that do not have a similar length to the Spotify song
            duration_tolerance = 10
            max_duration_tolerance = 20
            possible_videos_by_duration = list()
            '''
            start with a reasonable duration_tolerance, and increment duration_tolerance
            until one of the Youtube results falls within the correct duration or
            the duration_tolerance has reached the max_duration_tolerance
            '''
            while len(possible_videos_by_duration) == 0:
                possible_videos_by_duration = list(
                    filter(
                        lambda x: abs(x['seconds'] - (int(meta_tags[
                            'duration_ms']) / 1000)) <= duration_tolerance,
                        videos))
                duration_tolerance += 1
                if duration_tolerance > max_duration_tolerance:
                    log.error("{0} by {1} was not found.\n".format(
                        meta_tags['name'], meta_tags['artists'][0]['name']))
                    return None

            result = possible_videos_by_duration[0]

    if result:
        full_link = u'http://youtube.com{0}'.format(result['link'])
    else:
        full_link = None

    log.debug('Best matching video link: {}'.format(full_link))
    return full_link