def get_folder_contents(self): v = VimeoClient(token=self.vimeo_access_token, key=self.vimeo_client_id, secret=self.vimeo_client_secret) response = v.get('/users/' + self.vimeo_user_id + '/projects/' + self.vimeo_project_id + '/videos') assert response.status_code == 200 return CPSSVimeoCollectionType(response.json())
def search(): """ Handling of POST requests for autocomplete.js """ if request.method == "POST": max_results = 3 result = [] debug(("Incoming POST request: {}").format(request.json["search"])) yt_search_request = ( "{}/search?q={}&type=playlist&part=id,snippet" + "&fields=items(id/playlistId,snippet(thumbnails/medium/url,title))" + "&maxResults={}&key={}").format( read_config("YOUTUBE_API_URL"), quote(request.json["search"]), max_results, read_config("YOUTUBE_API_KEY")) yt_search_response = urllib_request.urlopen(yt_search_request) youtube = loads(yt_search_response.read().decode()) VIMEO = VimeoClient( token=read_config("VIMEO_TOKEN"), key=read_config("VIMEO_KEY"), secret=read_config("VIMEO_SECRET")) vim_search_request = VIMEO.get(("/channels?query={}&per_page={}").format(quote(request.json["search"]), max_results), params={"fields": "name, uri, pictures.uri, metadata.connections.videos.total"}) vimeo = vim_search_request.json() for playlist in youtube["items"]: req = ( "{}/playlistItems?playlistId={}" + "&part=id&fields=pageInfo/totalResults" + "&maxresults=1&key={}").format( read_config("YOUTUBE_API_URL"), playlist["id"]["playlistId"], read_config("YOUTUBE_API_KEY")) request_send = urllib_request.urlopen(req) videos_in_playlist = loads(request_send.read().decode()) #TODO: decide what to return in case of missing thumbnail thumbnail_url = "" if "thumbnails" in playlist["snippet"]: # api call needed as playlist thumbnail != thumbnail of first video (or not inevitable) thumbnail_url = playlist["snippet"]["thumbnails"]["medium"]["url"] result.append({ "source": "youtube", "id": playlist["id"]["playlistId"], "title": playlist["snippet"]["title"], "thumb": thumbnail_url, "amount": videos_in_playlist["pageInfo"]["totalResults"]}) for video in vimeo["data"]: result.append({ "source": "vimeo", "id": video["uri"].split("/")[2], "title": video["name"], #TODO: check if thumbnail of first video is always thumbnail of channel (or customizable as on YouTube) "thumb": ("https://i.vimeocdn.com/video/{}_100x75.jpg").format(video["pictures"]["uri"].split("/")[4]), "amount": video["metadata"]["connections"]["videos"]["total"] }) return dumps(result)
class VimeoCrawler(object): def __init__(self, vimeo_data, channel_file, video_file, log_file): """Creates crawler for dailymotion. Args: vimeo_data (dictionary): Dictionary with vimeo client informations: accessToken, clientId, clientSecret. channel_file (string): Path to a file where metadata from channels will be saved. video_file (string): Path to a file where metadata from videos will be saved. log_file (string): Path to a file where logs will be saved. """ self.v = VimeoClient(token=vimeo_data['accessToken'], key=vimeo_data['clientId'], secret=vimeo_data['clientSecret']) """VimeoClient Object used for sending requests and getting responses using Vimeo API. """ self.channel_file = channel_file """string Path to a file where metadata from channels will be saved. """ dot_idx = video_file.rfind('.') self.video_file_name = video_file[:dot_idx] """string Path to a file without extension where metadata from videos will be saved. """ self.video_file_extension = video_file[dot_idx:] """string Extension for the path to a file where metadata from videos will be saved. """ logging.basicConfig(level=logging.INFO, propagate=False, filename=log_file, format='%(asctime)-15s %(message)s') logging.getLogger("requests").setLevel(logging.WARNING) self.logger = logging.getLogger(__name__) """Logger Object used for logging. """ self.channels_array = [] """array Array that stores names of the channels which will be analyzed. """ self.vimeo_channels = [] """array Array that stores metadata from channels. """ self.total_videos = 0 """int The total number of metadata from videos successfully obtained during crawling. """ self.max_requests_per_save = 10 """int The number of requests after which metadata from videos will be saved to a file. """ self.condition_array = [ '2016-08', '2016-07', '2016-06', '2016-05', '2016-04', '2016-03', '2016-02', '2016-01', '2015-12', '2015-11', '2015-10', '2015-09' ] """array Array that stores permitted dates. Only them fulfill the conditions for filtering. """ self.min_views = 0 """int The minimum number of views that fulfill the condition for filtering. """ def add_content_providers(self, csv_file): """Adds names of the channels to be analyzed. Args: csv_file (string): Path to a csv file with names of the channels. """ try: with open(csv_file, 'r') as f: data = reader(f) for row in data: self.channels_array.append(row[1]) self.channels_array.pop(0) except Exception as e: raise Exception('Can not read data from file: {}'.format(str(e))) def perform_filtering(self, video_data): """Performs filtering. Checks whether the video meets the conditions. Args: video_data (dictionary): The dictionary with video's metadata. Returns: boolean: True if video meets the conditions, False otherwise. """ date = video_data['created_time'][:7] if date not in self.condition_array: return False views = video_data['stats']['plays'] if views is None or views < self.min_views: return False return True def analyze_channel(self, channel): """Gets metadata from the channel. Metadata obtained from the channel: 'metadata', 'user.metadata'. Args: channel (string): The id of the channel. Returns: dictionary: The dictionary with basic channel informations (keys: 'channel_id', 'channel_likes') """ try: response = self.v.get('/channels/{}'.format(channel)).json() response = { 'channel_id': channel, 'channel_meta': response['metadata'], 'user_meta': response['user']['metadata'] } except Exception as e: raise Exception('Request for channel {} failed: {}'.format( channel, str(e))) self.vimeo_channels.append(response) try: with open(self.channel_file, 'w') as f: f.write(dumps(self.vimeo_channels, indent=4)) except Exception as e: raise Exception('Can not save vimeo channels to file: {}'.format( str(e))) channel_info = { 'channel_id': channel, 'channel_likes': response['channel_meta']['connections']['users']['total'] } return channel_info def save_videos(self, channel_videos, video_file, total_channel_videos): """Saves metadata from videos for currently analyzed channel to a file. Args: channel_videos (array): Array with metadata from videos for currently analyzed channel. video_file (string): Path to a file where metadata from videos for currently analyzed channel will be saved. total_channel_videos (int): The number of metadata from videos successfully obtained for currently analyzed channel. """ self.logger.info('Saving to file...') self.logger.info( 'Total channel videos: {}'.format(total_channel_videos)) try: with open(video_file, 'w') as f: f.write(dumps(channel_videos, indent=4)) except Exception as e: raise Exception('Can not save videos to file.') self.logger.info('Saving finished.') def analyze_channel_videos(self, channel, channel_info): """Gets metadata from videos for currently analyzed channel. Args: channel (string): The id of the channel. channel_info (dictionary): The dictionary with basic channel informations (keys: 'channel_id', 'channel_likes') """ channel_videos = [] integrity_array = [] video_file = '{}_{}{}'.format(self.video_file_name, channel, self.video_file_extension) request_counter = 0 total_channel_videos = 0 fields = { 'uri', 'name', 'description', 'link', 'duration', 'width', 'height', 'language', 'created_time', 'modified_time', 'privacy', 'pictures', 'tags', 'stats', 'metadata', 'user.uri', } request = '/channels/{}/videos?fields={}&per_page=50&page=1&sort=date&direction=desc'.format( channel, ','.join(fields)) while request is not None: try: response = self.v.get(request).json() except Exception as e: self.logger.error( 'Request for video data from channel {} failed: {}'.format( channel, str(e))) break if 'data' not in response: break request_counter += 1 for video_d in response['data']: if self.perform_filtering(video_d): if video_d['uri'] in integrity_array: continue integrity_array.append(video_d['uri']) video_d.update(channel_info) channel_videos.append(video_d) total_channel_videos += 1 self.total_videos += 1 if request_counter == self.max_requests_per_save: request_counter = 0 self.save_videos(channel_videos, video_file, total_channel_videos) request = response['paging']['next'] self.save_videos(channel_videos, video_file, total_channel_videos) self.logger.info('Total videos: {}'.format(self.total_videos)) def start(self): """Starts crawling. """ self.logger.info('Start crawling') for channel in self.channels_array: self.logger.info('Analyzing channel: {}'.format(channel)) try: channel_info = self.analyze_channel(channel) self.analyze_channel_videos(channel, channel_info) except Exception as e: self.logger.error(str(e)) self.logger.info('Crawling finished.')
class Vlog(object): def __init__(self, conf, blog): self.log = getLogger(__name__) self.conf = conf self.blog = blog self.client = VimeoClient( token=self.conf.vimeo_token, key=self.conf.vimeo_client_id, secret=self.conf.vimeo_client_secret ) def get(self, *uri, **param): url = '/{}'.format('/'.join(uri).lstrip('/')) self.log.debug('request vlog info "%s"', url) req = self.client.get(url, **param) res = req.json() if req.status_code != 200: self.log.warning('vlog info error response "%s"', res) return return res def quota(self, size): info = self.get('me') if info: return info['upload_quota']['space']['free'] > size def upload(self, source): res = self.client.post('/me/videos', data=dict( type='pull', link=source )) video = res.json() if res.status_code != 200: self.log.error('video upload error "%s"', video) return return video def change(self, video, *, title, caption, public, tags=[]): res = self.client.patch(video['uri'], data=dict( name=title, description=caption, privacy=dict( embed='public', view=('anybody' if public else 'nobody'), ) )) if res.status_code != 200: self.log.error('video edit error "%s"', res.json()) return res = self.client.put('{}/tags'.format(video['uri']), data=tags) if res.status_code not in [200, 201]: self.log.error('video tag error "%s"', res.json()) return return video def pull_videos(self): for offset in range(1, self.get('me', 'videos').get('total', 1), 25): for post in self.get( 'me', 'videos', data=dict(page=offset) )['data']: yield post