Exemple #1
0
class YoutubeMusic:
    def __init__(self):
        self.service = YouTubeService()

    def search(self, artist):
        query = YouTubeVideoQuery()
        query.vq = artist
        query.orderby = 'relevance'
        query.racy = 'exclude'
        query.format = '5'
        query.max_results = 50
        query.categories.append("/Music")
        feed = self.service.YouTubeQuery(query)
        results = []
        for entry in feed.entry:
            if not self.is_valid_entry(artist, entry):
                continue
            results.append({
                'url': entry.media.player.url,
                'title': smart_str(entry.media.title.text),
                'duration': int(entry.media.duration.seconds),
            })
        return {'artist': artist, 'results': results}

    def is_valid_entry(self, artist, entry):
        duration = int(entry.media.duration.seconds)
        title = smart_str(entry.media.title.text).lower()
        if entry.rating is not None and float(entry.rating.average) < 3.5:
            return False
        if entry.statistics is None or int(entry.statistics.view_count) < 1000:
            return False
        if duration < (2 * 60) or duration > (9 * 60):
            return False
        if artist.lower() not in title:
            return False
        if re.search(r"\b(perform|performance|concert|cover)\b", title):
            return False
        return True
Exemple #2
0
class YouTube(WebDataSource):
    '''
    searches youtube video library
    '''

    YT_ATOM_RESULT_TO_DICT_MAPPING = {
        'media.title.text': 'title',
        'published.text': 'published',
        'media.description.text': 'content',
        'media.duration.seconds': 'duration',
        'statistics.view_count': 'statistics_viewcount',
        'statistics.favorite_count': 'statistics_favoritecount',
        'rating.average': 'rating_average',
        'rating.max': 'rating_max',
        'rating.min': 'rating_min',
        'rating.num_raters': 'rating_numraters',
        'summary': 'summary',
        'rights': 'rights',
        'updated.text': 'last_modified',
        'source': 'yt_source'
    }

    YT_COMMENTS_MAPPING = {
        'id.text': 'id',
        'title.text': 'title',
        'published.text': 'published',
        'updated.text': 'last_modified',
        'content.text': 'content'
    }

    def __init__(self):
        WebDataSource.__init__(self)
        self.youtube_service = YouTubeService()

    def search(self,
               search_terms,
               location=None,
               max_results=MAX_RESULTS_PER_QUERY,
               max_age=None,
               orderby='published',
               max_comment_count=0):
        """ 
        Searches for youtube videos.
        
        @param search_terms: list of search terms
        @param location: tuple latitude, longitue, e.g. 37.42307,-122.08427
        @param max_results:
        @param max_age: datetime of the oldest entry  
        @param orderby: order search results by (relevance, published, 
                        viewCount, rating)
        @param max_comment_count: maximum number of comments to fetch 
                                  (default: 0)
        """

        if not (isinstance(search_terms, list) or isinstance(
                search_terms, tuple) or isinstance(search_terms, set)):
            raise ValueError("Warning search requires a list of search terms, \
                             rather than a single term")

        # all youtube search parameter are here:
        # https://developers.google.com/youtube/2.0/reference?hl=de#Custom_parameters
        query = YouTubeVideoQuery()
        query.vq = ', '.join(search_terms)
        query.orderby = orderby
        query.racy = 'include'
        query.time = self.get_query_time(max_age)
        query.max_results = MAX_RESULTS_PER_QUERY

        if location:
            query.location = location

        return self.search_youtube(query, max_results, max_comment_count)

    @classmethod
    def get_query_time(cls, max_age):
        ''' converts a datetime or int (age in minutes) to the youtube specific
        query parameter (e.g. this_month, today ...)
        @param max_age: int or datetime object
        @return: youtube specific query_time 
        '''
        if not max_age:
            return 'all_time'

        if isinstance(max_age, datetime):
            # convert datetime to minutes
            max_age = (datetime.now() - max_age).total_seconds() / 60

        if max_age <= 1440:
            query_time = 'today'
        elif max_age > 1440 and max_age <= 10080:
            query_time = 'this_week'
        else:
            query_time = 'this_month'

        return query_time

    def search_youtube(self,
                       query,
                       max_results=MAX_RESULTS_PER_QUERY,
                       max_comment_count=0):
        ''' executes the youtube query and facilitates paging of the resultset
        @param query: YouTubeVideoQuery
        @param max_results: 
        @param max_comment_count: maximum number of comments to fetch
        @return: list of dictionaries 
        '''
        result = []
        feed = self.youtube_service.YouTubeQuery(query)

        while feed:
            for entry in feed.entry:
                try:
                    yt_dict = self.convert_feed_entry(entry, max_comment_count)
                    result.append(yt_dict)
                except Exception, e:
                    logger.exception('Exception converting entry: %s' % e)

                if len(result) == max_results:
                    return result

            if not feed.GetNextLink():
                break

            feed = self.youtube_service.GetYouTubeVideoFeed(
                feed.GetNextLink().href)

        return result