Beispiel #1
0
    def import_comment_counts(self, prefix=None, automatic_flush=True, start=None, stop=None):
        from rockpack.mainsite.services.video.models import VideoInstanceComment, VideoInstance, Video
        from rockpack.mainsite.core.dbapi import db

        counts = db.session.query(VideoInstance.id, func.count(VideoInstance.id)).join(
            VideoInstanceComment,
            VideoInstanceComment.video_instance == VideoInstance.id
        ).join(
            Video,
            (Video.id == VideoInstance.video) &
            (Video.visible == True)
        ).filter(VideoInstance.deleted == False)

        if prefix:
            counts = counts.filter(VideoInstance.id.like(prefix.replace('_', '\\_') + '%'))

        if start:
            counts = counts.filter(VideoInstanceComment.date_added.between(start, stop))

        counts = counts.group_by(
            VideoInstance.id
        )

        app.logger.debug('%d video%s with comments' % (counts.count(), 's' if counts.count() > 1 else ''))
        app.logger.debug('Processing ...')

        ev = ESVideo.updater(bulk=True)
        for videoid, count in counts:
            ev.set_document_id(videoid)
            ev.add_field('comments.count', count)
            ev.update()
            ev.reset()

        if automatic_flush:
            ESVideo.flush()
Beispiel #2
0
    def import_dolly_video_owners(self, prefix=None, start=None, stop=None, automatic_flush=True):
        """ Import all the owner attributes of
            a video instance belonging to a channel """

        from rockpack.mainsite.services.video.models import Channel, VideoInstance
        from rockpack.mainsite.services.user.models import User

        with app.test_request_context():
            instances = VideoInstance.query.join(
                Channel,
                (VideoInstance.channel == Channel.id) &
                (Channel.public == True) &
                (Channel.visible == True) &
                (Channel.deleted == False)
            ).join(
                User,
                User.id == Channel.owner
            ).filter(
                VideoInstance.deleted == False
            )

            if prefix:
                instances = instances.filter(VideoInstance.id.like(prefix.replace('_', '\\_') + '%'))

            if start:
                instances = instances.filter(VideoInstance.date_updated.between(start, stop))

            instances = instances.with_entities(VideoInstance, User).order_by(VideoInstance.channel)

            total = instances.count()
            done = 1

            current_user_dict = None
            current_channel = None

            ec = ESVideo.updater(bulk=True)
            for instance, user in instances.yield_per(6000):
                if instance.channel != current_channel:
                    # Duck punch the user on for perf
                    instance.owner_rel = user
                    mapped = ESVideoAttributeMap(instance)
                    current_user_dict = mapped.owner
                    current_channel = instance.channel

                ec.set_document_id(instance.id)
                ec.add_field('owner', current_user_dict)
                ec.update()
                ec.reset()

                if app.logger.isEnabledFor(logging.DEBUG):
                    self.print_percent_complete(done, total)
                done += 1

            app.logger.info('Video owner size: %d', done)

            if automatic_flush:
                self.conn.flush_bulk(forced=True)
Beispiel #3
0
    def import_video_stars(self, prefix=None, automatic_flush=True, start=None, stop=None):
        from rockpack.mainsite.services.user.models import UserActivity

        updated_activity = aliased(UserActivity, name='updated_activity')

        with app.test_request_context():
            query = UserActivity.query.filter(
                UserActivity.action == 'star',
                UserActivity.object_type == 'video_instance'
            ).order_by(
                UserActivity.object_id, UserActivity.date_actioned.desc()
            )

            if start:
                query = query.join(
                    updated_activity,
                    (updated_activity.object_id == UserActivity.object_id) &
                    (updated_activity.date_actioned.between(start, stop)))

            if prefix:
                query = query.filter(UserActivity.object_id.like(prefix.replace('_', '\\_') + '%'))

            ec = ESVideo.updater(bulk=True)
            done = 1
            for instance_id, group in groupby(query.yield_per(200).values(UserActivity.object_id, UserActivity.user), lambda x: x[0]):
                try:
                    ec.set_document_id(instance_id)
                    ec.add_field(
                        'recent_user_stars',
                        list(set([u.encode('utf8') for v, u in group]))[:5]
                    )
                    ec.update()
                except pyes.exceptions.ElasticSearchException:
                    pass
                finally:
                    ec.reset()
                done += 1

            app.logger.info('Video star size: %d', done)

            if automatic_flush:
                self.conn.flush_bulk(forced=True)
Beispiel #4
0
    def import_dolly_repin_counts(self, prefix=None, start=None, stop=None, automatic_flush=True):
        from rockpack.mainsite.services.video.models import VideoInstance, Video

        with app.test_request_context():
            child = aliased(VideoInstance, name='child')
            query = readonly_session.query(
                VideoInstance.id,
                VideoInstance.video,
                child.source_channel,
                func.count(VideoInstance.id)
            ).outerjoin(
                child,
                (VideoInstance.video == child.video) &
                (VideoInstance.channel == child.source_channel)
            ).join(
                Video,
                (Video.id == VideoInstance.video) &
                (Video.visible == True) &
                (VideoInstance.deleted == False))

            if prefix:
                query = query.filter(VideoInstance.id.like(prefix.replace('_', '\\_') + '%'))

            if start:
                video_ids = VideoInstance.query.filter(
                    VideoInstance.date_updated.between(start, stop)
                ).with_entities(VideoInstance.video).subquery()

                query = query.filter(Video.id.in_(video_ids))

            query = query.group_by(VideoInstance.id, VideoInstance.video, child.source_channel)

            instance_counts = {}
            influential_index = {}

            total = query.count()
            done = 1

            for _id, video, source_channel, count in query.yield_per(6000):
                # Set the count for the video instance
                instance_counts[(_id, video)] = count
                # If the count is higher for the same video that
                # the previous instance, mark this instance as the
                # influential one for this video
                i_id, i_count = influential_index.get(video, [None, 0])

                # Count will always be at least 1
                # but should really be zero if no children
                if not source_channel and count == 1:
                    count = 0
                if (count > i_count) or\
                        (count == i_count) and not source_channel:
                    influential_index.update({video: (_id, count,)})

                if app.logger.isEnabledFor(logging.DEBUG):
                    self.print_percent_complete(done, total)
                done += 1

            total = len(instance_counts)
            done = 1

            ec = ESVideo.updater(bulk=True)
            for (_id, video), count in instance_counts.iteritems():
                ec.set_document_id(_id)
                ec.add_field('child_instance_count', count)
                ec.add_field('most_influential', True if influential_index.get(video, '')[0] == _id else False)
                ec.update()
                ec.reset()

                if app.logger.isEnabledFor(logging.DEBUG):
                    self.print_percent_complete(done, total)
                done += 1

            if automatic_flush:
                ESVideo.flush()
Beispiel #5
0
    def import_videos(self, prefix=None, start=None, stop=None, recent_user_stars=False, automatic_flush=True):
        from rockpack.mainsite.services.video.models import (Channel, Video, VideoInstanceLocaleMeta,
                                                             VideoInstance)
        from rockpack.mainsite.services.user.models import User

        with app.test_request_context():
            query = VideoInstance.query.join(
                Channel,
                Channel.id == VideoInstance.channel
            ).join(Video).outerjoin(
                VideoInstanceLocaleMeta,
                VideoInstance.id == VideoInstanceLocaleMeta.video_instance
            ).outerjoin(
                User,
                (User.id == VideoInstance.original_channel_owner) &
                (User.is_active == True)
            ).options(
                joinedload(VideoInstance.metas)
            ).options(
                joinedload(VideoInstance.video_rel)
            ).options(
                joinedload(VideoInstance.video_channel)
            ).options(
                joinedload(VideoInstance.original_channel_owner_rel)
            ).filter(
                VideoInstance.deleted == False,
                Video.visible == True
            )

            if prefix:
                query = query.filter(VideoInstance.id.like(prefix.replace('_', '\\_') + '%'))

            if start:
                query = query.filter(VideoInstance.date_updated.between(start, stop))

            total = query.count()
            app.logger.info('importing %d videos', total)
            start_time = time.time()
            done = 1

            ev = ESVideo.inserter(bulk=True)
            channel_ids = []
            for v in query.yield_per(6000):
                mapped = ESVideoAttributeMap(v)
                rep = dict(
                    id=mapped.id,
                    public=mapped.public,
                    video=mapped.video,
                    title=mapped.title,
                    channel=mapped.channel,
                    channel_title=mapped.channel_title,
                    category=mapped.category,
                    date_added=mapped.date_added,
                    position=mapped.position,
                    locales=mapped.locales,
                    recent_user_stars=mapped.recent_user_stars(empty=not recent_user_stars),
                    country_restriction=mapped.country_restriction(empty=True),
                    comments=mapped.comments(empty=True),
                    child_instance_count=mapped.child_instance_count,
                    link_url=mapped.link_url,
                    link_title=mapped.link_title,
                    tags=mapped.tags,
                    is_favourite=mapped.is_favourite,
                    most_influential=mapped.most_influential,
                    original_channel_owner=mapped.original_channel_owner,
                    label=mapped.label,
                )
                ev.manager.indexer.insert(v.id, rep)

                if app.logger.isEnabledFor(logging.DEBUG):
                    self.print_percent_complete(done, total)
                done += 1

                if start and v.channel not in channel_ids:
                    channel_ids.append(v.channel)

            if start:
                # We'll want to update channel counts that
                # may not get trigger elsewhere
                self.import_channel_video_counts(channel_ids)

            if automatic_flush:
                ev.flush_bulk()

            app.logger.debug('finished in %d seconds', time.time() - start_time)