Esempio n. 1
0
 def get_folder_contents(self):
     v = VimeoClient(token=self.vimeo_access_token,
                     key=self.vimeo_client_id,
                     secret=self.vimeo_client_secret)
     response = v.get('/users/' + self.vimeo_user_id + '/projects/' +
                      self.vimeo_project_id + '/videos')
     assert response.status_code == 200
     return CPSSVimeoCollectionType(response.json())
Esempio n. 2
0
def search():

    """
    Handling of POST requests for autocomplete.js
    """

    if request.method == "POST":

        max_results = 3
        result = []

        debug(("Incoming POST request: {}").format(request.json["search"]))

        yt_search_request = (
            "{}/search?q={}&type=playlist&part=id,snippet"
            + "&fields=items(id/playlistId,snippet(thumbnails/medium/url,title))"
            + "&maxResults={}&key={}").format(
                read_config("YOUTUBE_API_URL"), quote(request.json["search"]),
                max_results, read_config("YOUTUBE_API_KEY"))
        yt_search_response = urllib_request.urlopen(yt_search_request)
        youtube = loads(yt_search_response.read().decode())

        VIMEO = VimeoClient(
            token=read_config("VIMEO_TOKEN"),
            key=read_config("VIMEO_KEY"),
            secret=read_config("VIMEO_SECRET"))

        vim_search_request = VIMEO.get(("/channels?query={}&per_page={}").format(quote(request.json["search"]), max_results), params={"fields": "name, uri, pictures.uri, metadata.connections.videos.total"})

        vimeo = vim_search_request.json()

        for playlist in youtube["items"]:

            req = (
                "{}/playlistItems?playlistId={}"
                + "&part=id&fields=pageInfo/totalResults"
                + "&maxresults=1&key={}").format(
                    read_config("YOUTUBE_API_URL"), playlist["id"]["playlistId"], read_config("YOUTUBE_API_KEY"))
            request_send = urllib_request.urlopen(req)
            videos_in_playlist = loads(request_send.read().decode())

            #TODO: decide what to return in case of missing thumbnail
            thumbnail_url = ""

            if "thumbnails" in playlist["snippet"]:
                # api call needed as playlist thumbnail != thumbnail of first video (or not inevitable)
                thumbnail_url = playlist["snippet"]["thumbnails"]["medium"]["url"]

            result.append({
                "source": "youtube",
                "id": playlist["id"]["playlistId"],
                "title": playlist["snippet"]["title"],
                "thumb": thumbnail_url,
                "amount": videos_in_playlist["pageInfo"]["totalResults"]})

        for video in vimeo["data"]:
            result.append({
                "source": "vimeo",
                "id": video["uri"].split("/")[2],
                "title": video["name"],
                #TODO: check if thumbnail of first video is always thumbnail of channel (or customizable as on YouTube)
                "thumb": ("https://i.vimeocdn.com/video/{}_100x75.jpg").format(video["pictures"]["uri"].split("/")[4]),
                "amount": video["metadata"]["connections"]["videos"]["total"]
            })

        return dumps(result)
Esempio n. 3
0
class VimeoCrawler(object):
    def __init__(self, vimeo_data, channel_file, video_file, log_file):
        """Creates crawler for dailymotion.

        Args:
            vimeo_data (dictionary): Dictionary with vimeo client informations: accessToken, clientId, clientSecret.
            channel_file (string): Path to a file where metadata from channels will be saved.
            video_file (string): Path to a file where metadata from videos will be saved.
            log_file (string): Path to a file where logs will be saved.

        """

        self.v = VimeoClient(token=vimeo_data['accessToken'],
                             key=vimeo_data['clientId'],
                             secret=vimeo_data['clientSecret'])
        """VimeoClient

        Object used for sending requests and getting responses using Vimeo API.
        """

        self.channel_file = channel_file
        """string

        Path to a file where metadata from channels will be saved.
        """

        dot_idx = video_file.rfind('.')
        self.video_file_name = video_file[:dot_idx]
        """string

        Path to a file without extension where metadata from videos will be saved.
        """

        self.video_file_extension = video_file[dot_idx:]
        """string

        Extension for the path to a file where metadata from videos will be saved.
        """

        logging.basicConfig(level=logging.INFO,
                            propagate=False,
                            filename=log_file,
                            format='%(asctime)-15s %(message)s')
        logging.getLogger("requests").setLevel(logging.WARNING)
        self.logger = logging.getLogger(__name__)
        """Logger

        Object used for logging.
        """

        self.channels_array = []
        """array

        Array that stores names of the channels which will be analyzed.
        """

        self.vimeo_channels = []
        """array

        Array that stores metadata from channels.
        """

        self.total_videos = 0
        """int

        The total number of metadata from videos successfully obtained during crawling.
        """

        self.max_requests_per_save = 10
        """int

        The number of requests after which metadata from videos will be saved to a file.
        """

        self.condition_array = [
            '2016-08', '2016-07', '2016-06', '2016-05', '2016-04', '2016-03',
            '2016-02', '2016-01', '2015-12', '2015-11', '2015-10', '2015-09'
        ]
        """array

        Array that stores permitted dates. Only them fulfill the conditions for filtering.
        """

        self.min_views = 0
        """int

        The minimum number of views that fulfill the condition for filtering.
        """

    def add_content_providers(self, csv_file):
        """Adds names of the channels to be analyzed.

        Args:
            csv_file (string): Path to a csv file with names of the channels.

        """
        try:
            with open(csv_file, 'r') as f:
                data = reader(f)
                for row in data:
                    self.channels_array.append(row[1])
                self.channels_array.pop(0)
        except Exception as e:
            raise Exception('Can not read data from file: {}'.format(str(e)))

    def perform_filtering(self, video_data):
        """Performs filtering. Checks whether the video meets the conditions.

        Args:
            video_data (dictionary): The dictionary with video's metadata.

        Returns:
            boolean: True if video meets the conditions, False otherwise.

        """
        date = video_data['created_time'][:7]
        if date not in self.condition_array:
            return False
        views = video_data['stats']['plays']
        if views is None or views < self.min_views:
            return False
        return True

    def analyze_channel(self, channel):
        """Gets metadata from the channel.
        Metadata obtained from the channel: 'metadata', 'user.metadata'.

        Args:
            channel (string): The id of the channel.

        Returns:
            dictionary: The dictionary with basic channel informations (keys: 'channel_id',
            'channel_likes')

        """
        try:
            response = self.v.get('/channels/{}'.format(channel)).json()
            response = {
                'channel_id': channel,
                'channel_meta': response['metadata'],
                'user_meta': response['user']['metadata']
            }
        except Exception as e:
            raise Exception('Request for channel {} failed: {}'.format(
                channel, str(e)))
        self.vimeo_channels.append(response)
        try:
            with open(self.channel_file, 'w') as f:
                f.write(dumps(self.vimeo_channels, indent=4))
        except Exception as e:
            raise Exception('Can not save vimeo channels to file: {}'.format(
                str(e)))
        channel_info = {
            'channel_id':
            channel,
            'channel_likes':
            response['channel_meta']['connections']['users']['total']
        }
        return channel_info

    def save_videos(self, channel_videos, video_file, total_channel_videos):
        """Saves metadata from videos for currently analyzed channel to a file.

        Args:
            channel_videos (array): Array with metadata from videos for currently analyzed channel.
            video_file (string): Path to a file where metadata from videos for currently analyzed
            channel will be saved.
            total_channel_videos (int): The number of metadata from videos successfully obtained
            for currently analyzed channel.

        """
        self.logger.info('Saving to file...')
        self.logger.info(
            'Total channel videos: {}'.format(total_channel_videos))
        try:
            with open(video_file, 'w') as f:
                f.write(dumps(channel_videos, indent=4))
        except Exception as e:
            raise Exception('Can not save videos to file.')
        self.logger.info('Saving finished.')

    def analyze_channel_videos(self, channel, channel_info):
        """Gets metadata from videos for currently analyzed channel.

        Args:
            channel (string): The id of the channel.
            channel_info (dictionary): The dictionary with basic channel informations (keys: 'channel_id',
            'channel_likes')

        """
        channel_videos = []
        integrity_array = []
        video_file = '{}_{}{}'.format(self.video_file_name, channel,
                                      self.video_file_extension)
        request_counter = 0
        total_channel_videos = 0
        fields = {
            'uri',
            'name',
            'description',
            'link',
            'duration',
            'width',
            'height',
            'language',
            'created_time',
            'modified_time',
            'privacy',
            'pictures',
            'tags',
            'stats',
            'metadata',
            'user.uri',
        }
        request = '/channels/{}/videos?fields={}&per_page=50&page=1&sort=date&direction=desc'.format(
            channel, ','.join(fields))
        while request is not None:
            try:
                response = self.v.get(request).json()
            except Exception as e:
                self.logger.error(
                    'Request for video data from channel {} failed: {}'.format(
                        channel, str(e)))
                break
            if 'data' not in response:
                break
            request_counter += 1
            for video_d in response['data']:
                if self.perform_filtering(video_d):
                    if video_d['uri'] in integrity_array:
                        continue
                    integrity_array.append(video_d['uri'])
                    video_d.update(channel_info)
                    channel_videos.append(video_d)
                    total_channel_videos += 1
                    self.total_videos += 1
            if request_counter == self.max_requests_per_save:
                request_counter = 0
                self.save_videos(channel_videos, video_file,
                                 total_channel_videos)
            request = response['paging']['next']
        self.save_videos(channel_videos, video_file, total_channel_videos)
        self.logger.info('Total videos: {}'.format(self.total_videos))

    def start(self):
        """Starts crawling.

        """
        self.logger.info('Start crawling')
        for channel in self.channels_array:
            self.logger.info('Analyzing channel: {}'.format(channel))
            try:
                channel_info = self.analyze_channel(channel)
                self.analyze_channel_videos(channel, channel_info)
            except Exception as e:
                self.logger.error(str(e))
        self.logger.info('Crawling finished.')
Esempio n. 4
0
class Vlog(object):
    def __init__(self, conf, blog):
        self.log = getLogger(__name__)
        self.conf = conf
        self.blog = blog

        self.client = VimeoClient(
            token=self.conf.vimeo_token,
            key=self.conf.vimeo_client_id,
            secret=self.conf.vimeo_client_secret
        )

    def get(self, *uri, **param):
        url = '/{}'.format('/'.join(uri).lstrip('/'))
        self.log.debug('request vlog info "%s"', url)
        req = self.client.get(url, **param)
        res = req.json()
        if req.status_code != 200:
            self.log.warning('vlog info error response "%s"', res)
            return
        return res

    def quota(self, size):
        info = self.get('me')
        if info:
            return info['upload_quota']['space']['free'] > size

    def upload(self, source):
        res = self.client.post('/me/videos', data=dict(
            type='pull', link=source
        ))
        video = res.json()
        if res.status_code != 200:
            self.log.error('video upload error "%s"', video)
            return
        return video

    def change(self, video, *, title, caption, public, tags=[]):
        res = self.client.patch(video['uri'], data=dict(
            name=title,
            description=caption,
            privacy=dict(
                embed='public',
                view=('anybody' if public else 'nobody'),
            )
        ))
        if res.status_code != 200:
            self.log.error('video edit error "%s"', res.json())
            return

        res = self.client.put('{}/tags'.format(video['uri']), data=tags)
        if res.status_code not in [200, 201]:
            self.log.error('video tag error "%s"', res.json())
            return
        return video

    def pull_videos(self):
        for offset in range(1, self.get('me', 'videos').get('total', 1), 25):
            for post in self.get(
                    'me', 'videos', data=dict(page=offset)
            )['data']:
                yield post