def import_video_restrictions(self, automatic_flush=True): from rockpack.mainsite.services.video.models import VideoRestriction, VideoInstance, Video, Channel with app.test_request_context(): query = VideoRestriction.query.join( VideoInstance, VideoInstance.video == VideoRestriction.video ).join(Channel, VideoInstance.channel == Channel.id).join(Video, Video.id == VideoRestriction.video).filter(Video.visible == True, Channel.public == True).order_by(VideoInstance.id) for relationship in ('allow', 'deny',): for instance_id, group in groupby(query.filter(VideoRestriction.relationship == relationship).yield_per(6000).values(VideoInstance.id, VideoRestriction.country), lambda x: x[0]): countries = [c.encode('utf8') for i, c in group] try: self.conn.partial_update( self.indexing['video']['index'], self.indexing['video']['type'], instance_id, "ctx._source.country_restriction.%s = %s" % (relationship, str(countries),) ) except pyes.exceptions.ElasticSearchException: pass sys.stdout.flush() if automatic_flush: self.conn.flush_bulk(forced=True)
def import_channel_video_counts(self, channel_ids): if channel_ids: from rockpack.mainsite.services.video.models import Channel, VideoInstance with app.test_request_context(): query = Channel.query.join( VideoInstance, (VideoInstance.channel == Channel.id) & (VideoInstance.deleted == False) ).filter( Channel.id.in_(channel_ids) ).group_by( Channel.id ).values(Channel.id, func.count(Channel.id)) for channel_id, count in query: try: self.conn.partial_update( self.indexing['channel']['index'], self.indexing['channel']['type'], channel_id, "ctx._source.video_counts = %s" % count ) except pyes.exceptions.ElasticSearchException: pass
def import_users(self, start=None, stop=None, automatic_flush=True): from rockpack.mainsite.services.user import models with app.test_request_context(): users = models.User.query.filter(models.User.is_active == True) if start: users = users.filter(models.User.date_updated.between(start, stop)) total = users.count() app.logger.info('importing %d users', total) start_time = time.time() count = 1 for users in users.yield_per(6000): api.add_user_to_index(users, bulk=True, no_check=True) if app.logger.isEnabledFor(logging.DEBUG): self.print_percent_complete(count, total) count += 1 if automatic_flush: self.conn.flush_bulk(forced=True) app.logger.debug('finished in %d seconds', time.time() - start_time)
def full_video_import(start=None, stop=None, prefix=None): from rockpack.mainsite.services.video import models with app.test_request_context(): videos_to_delete = models.VideoInstance.query.join( models.Video, models.VideoInstance.video == models.Video.id ).filter( (models.VideoInstance.deleted == True) | (models.Video.visible == False)) if start: videos_to_delete = videos_to_delete.filter( (models.VideoInstance.date_updated.between(start, stop)) | (models.Video.date_updated.between(start, stop))) if prefix: videos_to_delete = videos_to_delete.filter( models.VideoInstance.id.like(prefix.replace('_', '\\_') + '%')) delete = [v[0] for v in videos_to_delete.values(models.VideoInstance.id)] if delete: api.ESVideo.delete(delete) api.ESVideo.flush() imp = DBImport() imp.import_videos(prefix=prefix, start=start, stop=stop, automatic_flush=False) imp.import_dolly_video_owners(prefix=prefix, start=start, stop=stop, automatic_flush=False) imp.import_video_stars(prefix=prefix, automatic_flush=False, start=start, stop=stop) # Comments support disabled # imp.import_comment_counts(prefix=prefix, automatic_flush=False, start=start, stop=stop) api.ESVideo.flush()
def full_channel_import(start=None, stop=None): from rockpack.mainsite.services.video import models with app.test_request_context(): channels_to_delete = models.Channel.query.filter( (models.Channel.public == False) | (models.Channel.deleted == True) | (models.Channel.visible == False) ) if start: channels_to_delete = channels_to_delete.filter(models.Channel.date_updated.between(start, stop)) delete = [c[0] for c in channels_to_delete.values('id')] if delete: api.ESChannel.delete(delete) api.ESChannel.flush() imp = DBImport() imp.import_channels(start=start, stop=stop, automatic_flush=False) imp.import_average_category(start=start, stop=stop, automatic_flush=False) imp.import_video_channel_terms(start=start, stop=stop, automatic_flush=False) api.ESChannel.flush()
def import_dolly_video_owners(self, prefix=None, start=None, stop=None, automatic_flush=True): """ Import all the owner attributes of a video instance belonging to a channel """ from rockpack.mainsite.services.video.models import Channel, VideoInstance from rockpack.mainsite.services.user.models import User with app.test_request_context(): instances = VideoInstance.query.join( Channel, (VideoInstance.channel == Channel.id) & (Channel.public == True) & (Channel.visible == True) & (Channel.deleted == False) ).join( User, User.id == Channel.owner ).filter( VideoInstance.deleted == False ) if prefix: instances = instances.filter(VideoInstance.id.like(prefix.replace('_', '\\_') + '%')) if start: instances = instances.filter(VideoInstance.date_updated.between(start, stop)) instances = instances.with_entities(VideoInstance, User).order_by(VideoInstance.channel) total = instances.count() done = 1 current_user_dict = None current_channel = None ec = ESVideo.updater(bulk=True) for instance, user in instances.yield_per(6000): if instance.channel != current_channel: # Duck punch the user on for perf instance.owner_rel = user mapped = ESVideoAttributeMap(instance) current_user_dict = mapped.owner current_channel = instance.channel ec.set_document_id(instance.id) ec.add_field('owner', current_user_dict) ec.update() ec.reset() if app.logger.isEnabledFor(logging.DEBUG): self.print_percent_complete(done, total) done += 1 app.logger.info('Video owner size: %d', done) if automatic_flush: self.conn.flush_bulk(forced=True)
def import_channels(self, start=None, stop=None, automatic_flush=True): from rockpack.mainsite.services.video.models import Channel, VideoInstance, Video with app.test_request_context(): channels = Channel.query.filter( Channel.public == True, Channel.visible == True, Channel.deleted == False ).options( joinedload(Channel.category_rel), joinedload(Channel.metas), joinedload(Channel.owner_rel) ) if start: channels = channels.filter(Channel.date_updated.between(start, stop)) total = channels.count() app.logger.info('importing %d channels', total) start_time = time.time() ec = ESChannel.inserter(bulk=True) count = 1 query = VideoInstance.query.join( Video, (Video.id == VideoInstance.video) & (Video.visible == True) ).filter(VideoInstance.deleted == False).group_by(VideoInstance.channel) if start: # Restrict the counts selected to the channels we want query = query.join( Channel, (Channel.id == VideoInstance.channel) & (Channel.date_updated.between(start, stop)) ) query = query.values(VideoInstance.channel, func.count(VideoInstance.id)) video_counts = dict(query) for channel in channels.yield_per(6000): channel._video_count = video_counts.get(channel.id) or 0 ec.insert(channel.id, channel) if app.logger.isEnabledFor(logging.DEBUG): self.print_percent_complete(count, total) count += 1 if automatic_flush: ec.flush_bulk() app.logger.debug('finished in %d seconds', time.time() - start_time)
def import_search_suggestions(self): from rockpack.mainsite.services.video.models import Video with app.test_request_context(): inserter = ESSearchSuggestion.inserter(bulk=True) videos = Video.query.filter_by(visible=True).with_entities( Video.id, literal('video').label('type'), Video.title.label('query'), Video.star_count.label('weight'), ) for video in videos.yield_per(6000): inserter.insert(video.id, video) inserter.flush_bulk()
def import_video_stars(self, prefix=None, automatic_flush=True, start=None, stop=None): from rockpack.mainsite.services.user.models import UserActivity updated_activity = aliased(UserActivity, name='updated_activity') with app.test_request_context(): query = UserActivity.query.filter( UserActivity.action == 'star', UserActivity.object_type == 'video_instance' ).order_by( UserActivity.object_id, UserActivity.date_actioned.desc() ) if start: query = query.join( updated_activity, (updated_activity.object_id == UserActivity.object_id) & (updated_activity.date_actioned.between(start, stop))) if prefix: query = query.filter(UserActivity.object_id.like(prefix.replace('_', '\\_') + '%')) ec = ESVideo.updater(bulk=True) done = 1 for instance_id, group in groupby(query.yield_per(200).values(UserActivity.object_id, UserActivity.user), lambda x: x[0]): try: ec.set_document_id(instance_id) ec.add_field( 'recent_user_stars', list(set([u.encode('utf8') for v, u in group]))[:5] ) ec.update() except pyes.exceptions.ElasticSearchException: pass finally: ec.reset() done += 1 app.logger.info('Video star size: %d', done) if automatic_flush: self.conn.flush_bulk(forced=True)
def full_user_import(start=None, stop=None): from rockpack.mainsite.services.user import models with app.test_request_context(): users_to_delete = models.User.query.filter(models.User.is_active == False) if start: users_to_delete = users_to_delete.filter(models.User.date_updated.between(start, stop)) delete = [u[0] for u in users_to_delete.values('id')] if delete: api.ESUser.delete(delete) api.ESUser.flush() imp = DBImport() imp.import_users(start=start, stop=stop, automatic_flush=False) update_user_categories(automatic_flush=False, start=start, stop=stop) update_user_subscription_count(start=start, stop=stop, automatic_flush=False) api.ESUser.flush()
def install(*args): dbfixture = SQLAlchemyFixture( env={ 'LocaleData': video_models.Locale, 'CategoryData': video_models.Category, 'CategoryTranslationData': video_models.CategoryTranslation, 'RockpackCoverArtData': RockpackCoverArt, 'SourceData': video_models.Source, 'UserData': User, 'ChannelData': video_models.Channel, 'ChannelLocaleMetaData': video_models.ChannelLocaleMeta, 'VideoData': video_models.Video, 'VideoInstanceData': video_models.VideoInstance, 'MoodData': video_models.Mood, 'ExternalTokenData': oauth_models.ExternalToken, }, session=db.session, ) data = dbfixture.data(*args) with app.test_request_context(): data.setup() db.session.commit()
def import_dolly_repin_counts(self, prefix=None, start=None, stop=None, automatic_flush=True): from rockpack.mainsite.services.video.models import VideoInstance, Video with app.test_request_context(): child = aliased(VideoInstance, name='child') query = readonly_session.query( VideoInstance.id, VideoInstance.video, child.source_channel, func.count(VideoInstance.id) ).outerjoin( child, (VideoInstance.video == child.video) & (VideoInstance.channel == child.source_channel) ).join( Video, (Video.id == VideoInstance.video) & (Video.visible == True) & (VideoInstance.deleted == False)) if prefix: query = query.filter(VideoInstance.id.like(prefix.replace('_', '\\_') + '%')) if start: video_ids = VideoInstance.query.filter( VideoInstance.date_updated.between(start, stop) ).with_entities(VideoInstance.video).subquery() query = query.filter(Video.id.in_(video_ids)) query = query.group_by(VideoInstance.id, VideoInstance.video, child.source_channel) instance_counts = {} influential_index = {} total = query.count() done = 1 for _id, video, source_channel, count in query.yield_per(6000): # Set the count for the video instance instance_counts[(_id, video)] = count # If the count is higher for the same video that # the previous instance, mark this instance as the # influential one for this video i_id, i_count = influential_index.get(video, [None, 0]) # Count will always be at least 1 # but should really be zero if no children if not source_channel and count == 1: count = 0 if (count > i_count) or\ (count == i_count) and not source_channel: influential_index.update({video: (_id, count,)}) if app.logger.isEnabledFor(logging.DEBUG): self.print_percent_complete(done, total) done += 1 total = len(instance_counts) done = 1 ec = ESVideo.updater(bulk=True) for (_id, video), count in instance_counts.iteritems(): ec.set_document_id(_id) ec.add_field('child_instance_count', count) ec.add_field('most_influential', True if influential_index.get(video, '')[0] == _id else False) ec.update() ec.reset() if app.logger.isEnabledFor(logging.DEBUG): self.print_percent_complete(done, total) done += 1 if automatic_flush: ESVideo.flush()
def import_videos(self, prefix=None, start=None, stop=None, recent_user_stars=False, automatic_flush=True): from rockpack.mainsite.services.video.models import (Channel, Video, VideoInstanceLocaleMeta, VideoInstance) from rockpack.mainsite.services.user.models import User with app.test_request_context(): query = VideoInstance.query.join( Channel, Channel.id == VideoInstance.channel ).join(Video).outerjoin( VideoInstanceLocaleMeta, VideoInstance.id == VideoInstanceLocaleMeta.video_instance ).outerjoin( User, (User.id == VideoInstance.original_channel_owner) & (User.is_active == True) ).options( joinedload(VideoInstance.metas) ).options( joinedload(VideoInstance.video_rel) ).options( joinedload(VideoInstance.video_channel) ).options( joinedload(VideoInstance.original_channel_owner_rel) ).filter( VideoInstance.deleted == False, Video.visible == True ) if prefix: query = query.filter(VideoInstance.id.like(prefix.replace('_', '\\_') + '%')) if start: query = query.filter(VideoInstance.date_updated.between(start, stop)) total = query.count() app.logger.info('importing %d videos', total) start_time = time.time() done = 1 ev = ESVideo.inserter(bulk=True) channel_ids = [] for v in query.yield_per(6000): mapped = ESVideoAttributeMap(v) rep = dict( id=mapped.id, public=mapped.public, video=mapped.video, title=mapped.title, channel=mapped.channel, channel_title=mapped.channel_title, category=mapped.category, date_added=mapped.date_added, position=mapped.position, locales=mapped.locales, recent_user_stars=mapped.recent_user_stars(empty=not recent_user_stars), country_restriction=mapped.country_restriction(empty=True), comments=mapped.comments(empty=True), child_instance_count=mapped.child_instance_count, link_url=mapped.link_url, link_title=mapped.link_title, tags=mapped.tags, is_favourite=mapped.is_favourite, most_influential=mapped.most_influential, original_channel_owner=mapped.original_channel_owner, label=mapped.label, ) ev.manager.indexer.insert(v.id, rep) if app.logger.isEnabledFor(logging.DEBUG): self.print_percent_complete(done, total) done += 1 if start and v.channel not in channel_ids: channel_ids.append(v.channel) if start: # We'll want to update channel counts that # may not get trigger elsewhere self.import_channel_video_counts(channel_ids) if automatic_flush: ev.flush_bulk() app.logger.debug('finished in %d seconds', time.time() - start_time)
def test_video_comments(self): with app.test_request_context(): user_id = self.create_test_user().id with self.app.test_client() as client: # create new channel r = client.post( '/ws/{}/channels/'.format(user_id), data=json.dumps(dict( title='test', description='test', category='', cover='', public=True, )), content_type='application/json', headers=[get_auth_header(user_id)] ) self.assertEquals(r.status_code, 201) channel_id = json.loads(r.data)['id'] # add videos r = client.put( '/ws/{}/channels/{}/videos/'.format(user_id, channel_id), data=json.dumps([ VideoInstanceData.video_instance1.id, VideoInstanceData.video_instance2.id, ]), content_type='application/json', headers=[get_auth_header(user_id)] ) self.assertEquals(r.status_code, 204) # add comment instance_data = dict(userid=user_id, channelid=channel_id) r = client.get( '/ws/{userid}/channels/{channelid}/'.format(**instance_data), content_type='application/json', headers=[get_auth_header(user_id)]) instance_data['videoid'] = json.loads(r.data)['videos']['items'][0]['id'] r = client.post( '/ws/{userid}/channels/{channelid}/videos/{videoid}/comments/'.format(**instance_data), data=json.dumps(dict(comment="this is a comment")), content_type='application/json', headers=[get_auth_header(user_id)] ) self.assertEquals(r.status_code, 201) self.wait_for_es() v = VideoSearch('en-gb') v.add_id(instance_data['videoid']) instance = v.videos()[0] self.assertEquals(instance['comments']['total'], 1) # delete comment r = client.get( '/ws/{userid}/channels/{channelid}/videos/{videoid}/comments/'.format(**instance_data), content_type='application/json', headers=[get_auth_header(user_id)] ) comment_id = json.loads(r.data)['comments']['items'][0]['id'] instance_data.update({'commentid': comment_id}) r = client.delete( '/ws/{userid}/channels/{channelid}/videos/{videoid}/comments/{commentid}/'.format(**instance_data), content_type='application/json', headers=[get_auth_header(user_id)] ) self.wait_for_es() v = VideoSearch('en-gb') v.add_id(instance_data['videoid']) instance = v.videos()[0] self.assertEquals(instance['comments']['total'], 0)