def import_comment_counts(self, prefix=None, automatic_flush=True, start=None, stop=None): from rockpack.mainsite.services.video.models import VideoInstanceComment, VideoInstance, Video from rockpack.mainsite.core.dbapi import db counts = db.session.query(VideoInstance.id, func.count(VideoInstance.id)).join( VideoInstanceComment, VideoInstanceComment.video_instance == VideoInstance.id ).join( Video, (Video.id == VideoInstance.video) & (Video.visible == True) ).filter(VideoInstance.deleted == False) if prefix: counts = counts.filter(VideoInstance.id.like(prefix.replace('_', '\\_') + '%')) if start: counts = counts.filter(VideoInstanceComment.date_added.between(start, stop)) counts = counts.group_by( VideoInstance.id ) app.logger.debug('%d video%s with comments' % (counts.count(), 's' if counts.count() > 1 else '')) app.logger.debug('Processing ...') ev = ESVideo.updater(bulk=True) for videoid, count in counts: ev.set_document_id(videoid) ev.add_field('comments.count', count) ev.update() ev.reset() if automatic_flush: ESVideo.flush()
def import_dolly_video_owners(self, prefix=None, start=None, stop=None, automatic_flush=True): """ Import all the owner attributes of a video instance belonging to a channel """ from rockpack.mainsite.services.video.models import Channel, VideoInstance from rockpack.mainsite.services.user.models import User with app.test_request_context(): instances = VideoInstance.query.join( Channel, (VideoInstance.channel == Channel.id) & (Channel.public == True) & (Channel.visible == True) & (Channel.deleted == False) ).join( User, User.id == Channel.owner ).filter( VideoInstance.deleted == False ) if prefix: instances = instances.filter(VideoInstance.id.like(prefix.replace('_', '\\_') + '%')) if start: instances = instances.filter(VideoInstance.date_updated.between(start, stop)) instances = instances.with_entities(VideoInstance, User).order_by(VideoInstance.channel) total = instances.count() done = 1 current_user_dict = None current_channel = None ec = ESVideo.updater(bulk=True) for instance, user in instances.yield_per(6000): if instance.channel != current_channel: # Duck punch the user on for perf instance.owner_rel = user mapped = ESVideoAttributeMap(instance) current_user_dict = mapped.owner current_channel = instance.channel ec.set_document_id(instance.id) ec.add_field('owner', current_user_dict) ec.update() ec.reset() if app.logger.isEnabledFor(logging.DEBUG): self.print_percent_complete(done, total) done += 1 app.logger.info('Video owner size: %d', done) if automatic_flush: self.conn.flush_bulk(forced=True)
def import_video_stars(self, prefix=None, automatic_flush=True, start=None, stop=None): from rockpack.mainsite.services.user.models import UserActivity updated_activity = aliased(UserActivity, name='updated_activity') with app.test_request_context(): query = UserActivity.query.filter( UserActivity.action == 'star', UserActivity.object_type == 'video_instance' ).order_by( UserActivity.object_id, UserActivity.date_actioned.desc() ) if start: query = query.join( updated_activity, (updated_activity.object_id == UserActivity.object_id) & (updated_activity.date_actioned.between(start, stop))) if prefix: query = query.filter(UserActivity.object_id.like(prefix.replace('_', '\\_') + '%')) ec = ESVideo.updater(bulk=True) done = 1 for instance_id, group in groupby(query.yield_per(200).values(UserActivity.object_id, UserActivity.user), lambda x: x[0]): try: ec.set_document_id(instance_id) ec.add_field( 'recent_user_stars', list(set([u.encode('utf8') for v, u in group]))[:5] ) ec.update() except pyes.exceptions.ElasticSearchException: pass finally: ec.reset() done += 1 app.logger.info('Video star size: %d', done) if automatic_flush: self.conn.flush_bulk(forced=True)
def import_dolly_repin_counts(self, prefix=None, start=None, stop=None, automatic_flush=True): from rockpack.mainsite.services.video.models import VideoInstance, Video with app.test_request_context(): child = aliased(VideoInstance, name='child') query = readonly_session.query( VideoInstance.id, VideoInstance.video, child.source_channel, func.count(VideoInstance.id) ).outerjoin( child, (VideoInstance.video == child.video) & (VideoInstance.channel == child.source_channel) ).join( Video, (Video.id == VideoInstance.video) & (Video.visible == True) & (VideoInstance.deleted == False)) if prefix: query = query.filter(VideoInstance.id.like(prefix.replace('_', '\\_') + '%')) if start: video_ids = VideoInstance.query.filter( VideoInstance.date_updated.between(start, stop) ).with_entities(VideoInstance.video).subquery() query = query.filter(Video.id.in_(video_ids)) query = query.group_by(VideoInstance.id, VideoInstance.video, child.source_channel) instance_counts = {} influential_index = {} total = query.count() done = 1 for _id, video, source_channel, count in query.yield_per(6000): # Set the count for the video instance instance_counts[(_id, video)] = count # If the count is higher for the same video that # the previous instance, mark this instance as the # influential one for this video i_id, i_count = influential_index.get(video, [None, 0]) # Count will always be at least 1 # but should really be zero if no children if not source_channel and count == 1: count = 0 if (count > i_count) or\ (count == i_count) and not source_channel: influential_index.update({video: (_id, count,)}) if app.logger.isEnabledFor(logging.DEBUG): self.print_percent_complete(done, total) done += 1 total = len(instance_counts) done = 1 ec = ESVideo.updater(bulk=True) for (_id, video), count in instance_counts.iteritems(): ec.set_document_id(_id) ec.add_field('child_instance_count', count) ec.add_field('most_influential', True if influential_index.get(video, '')[0] == _id else False) ec.update() ec.reset() if app.logger.isEnabledFor(logging.DEBUG): self.print_percent_complete(done, total) done += 1 if automatic_flush: ESVideo.flush()
def import_videos(self, prefix=None, start=None, stop=None, recent_user_stars=False, automatic_flush=True): from rockpack.mainsite.services.video.models import (Channel, Video, VideoInstanceLocaleMeta, VideoInstance) from rockpack.mainsite.services.user.models import User with app.test_request_context(): query = VideoInstance.query.join( Channel, Channel.id == VideoInstance.channel ).join(Video).outerjoin( VideoInstanceLocaleMeta, VideoInstance.id == VideoInstanceLocaleMeta.video_instance ).outerjoin( User, (User.id == VideoInstance.original_channel_owner) & (User.is_active == True) ).options( joinedload(VideoInstance.metas) ).options( joinedload(VideoInstance.video_rel) ).options( joinedload(VideoInstance.video_channel) ).options( joinedload(VideoInstance.original_channel_owner_rel) ).filter( VideoInstance.deleted == False, Video.visible == True ) if prefix: query = query.filter(VideoInstance.id.like(prefix.replace('_', '\\_') + '%')) if start: query = query.filter(VideoInstance.date_updated.between(start, stop)) total = query.count() app.logger.info('importing %d videos', total) start_time = time.time() done = 1 ev = ESVideo.inserter(bulk=True) channel_ids = [] for v in query.yield_per(6000): mapped = ESVideoAttributeMap(v) rep = dict( id=mapped.id, public=mapped.public, video=mapped.video, title=mapped.title, channel=mapped.channel, channel_title=mapped.channel_title, category=mapped.category, date_added=mapped.date_added, position=mapped.position, locales=mapped.locales, recent_user_stars=mapped.recent_user_stars(empty=not recent_user_stars), country_restriction=mapped.country_restriction(empty=True), comments=mapped.comments(empty=True), child_instance_count=mapped.child_instance_count, link_url=mapped.link_url, link_title=mapped.link_title, tags=mapped.tags, is_favourite=mapped.is_favourite, most_influential=mapped.most_influential, original_channel_owner=mapped.original_channel_owner, label=mapped.label, ) ev.manager.indexer.insert(v.id, rep) if app.logger.isEnabledFor(logging.DEBUG): self.print_percent_complete(done, total) done += 1 if start and v.channel not in channel_ids: channel_ids.append(v.channel) if start: # We'll want to update channel counts that # may not get trigger elsewhere self.import_channel_video_counts(channel_ids) if automatic_flush: ev.flush_bulk() app.logger.debug('finished in %d seconds', time.time() - start_time)