Beispiel #1
0
def initialize_content_caches(force=False):
    """
    Catch all function to regenerate any content caches in memory that need annotation
    with file availability
    """
    logging.info("Preloading content data.")
    topic_tools.get_content_cache(force=force, annotate=True)
    logging.info("Preloading topic tree data.")
    topic_tools.get_topic_tree(force=force, annotate=True)
Beispiel #2
0
 def browse_to_random_video(self):
     available = False
     while not available:
         video = get_content_cache()[choice(get_content_cache().keys())]
         # The inclusion of this line can potentially lead to the test hanging indefinitely
         # So we can't assume that a video has been downloaded for testing purposes :(
         # available = (len(video['languages']) > 0)
         available = True
     video_url = video['path']
     self.browse_to(self.reverse("learn") + video_url)
 def browse_to_random_video(self):
     available = False
     while not available:
         video = get_content_cache()[choice(get_content_cache().keys())]
         # The inclusion of this line can potentially lead to the test hanging indefinitely
         # So we can't assume that a video has been downloaded for testing purposes :(
         # available = (len(video['languages']) > 0)
         available = True
     video_url = video['path']
     self.browse_to(self.reverse("learn") + video_url)
Beispiel #4
0
 def create_random_content_file(self):
     """
     Helper function for testing content files.
     """
     content_id = random.choice(get_content_cache().keys())
     youtube_id = get_content_cache()[content_id]["youtube_id"]
     fake_content_file = os.path.join(settings.CONTENT_ROOT, "%s.mp4" % youtube_id)
     with open(fake_content_file, "w") as fh:
         fh.write("")
     self.assertTrue(os.path.exists(fake_content_file), "Make sure the content file was created, youtube_id='%s'." % youtube_id)
     return (fake_content_file, content_id, youtube_id)
Beispiel #5
0
 def create_random_content_file(self):
     """
     Helper function for testing content files.
     """
     content_id = random.choice(get_content_cache().keys())
     youtube_id = get_content_cache()[content_id]["youtube_id"]
     fake_content_file = os.path.join(settings.CONTENT_ROOT, "%s.mp4" % youtube_id)
     with open(fake_content_file, "w") as fh:
         fh.write("")
     self.assertTrue(os.path.exists(fake_content_file), "Make sure the content file was created, youtube_id='%s'." % youtube_id)
     return (fake_content_file, content_id, youtube_id)
Beispiel #6
0
def initialize_content_caches(force=False):
    """
    Catch all function to regenerate any content caches in memory that need annotation
    with file availability
    """
    for lang in i18n.get_installed_language_packs(force=True).keys():
        logging.info("Preloading exercise data for language {lang}.".format(lang=lang))
        topic_tools.get_exercise_cache(force=force, language=lang)
        logging.info("Preloading content data for language {lang}.".format(lang=lang))
        topic_tools.get_content_cache(force=force, annotate=True, language=lang)
        logging.info("Preloading topic tree data for language {lang}.".format(lang=lang))
        topic_tools.get_topic_tree(force=force, annotate=True, language=lang)
Beispiel #7
0
def before_feature(context, feature):
    base_before_feature(context, feature)

    if "with_progress" in feature.tags:
        user = FacilityUser.objects.get(username=context.user, facility=getattr(context, "facility", None))
        exercises = random.sample(get_exercise_cache().keys(), 2)
        for exercise in exercises:
            log = ExerciseLog(
                exercise_id=exercise,
                user=user,
                streak_progress=50,
                attempts=15,
                latest_activity_timestamp=datetime.datetime.now()
                )
            log.save()
        context.exercises = exercises

        videos = random.sample(get_content_cache().keys(), 2)

        for video in videos:
            log = VideoLog(
                youtube_id=video,
                video_id=video,
                user=user,
                total_seconds_watched=100,
                points=600,
                latest_activity_timestamp=datetime.datetime.now()
                )
            log.save()
        context.videos = videos
Beispiel #8
0
def learner_logs(request):

    lang = request.language
    page = request.GET.get("page", 1)

    limit = request.GET.get("limit", 50)

    # Look back a week by default
    time_window = request.GET.get("time_window", 7)

    start_date = request.GET.get("start_date", None)

    end_date = request.GET.get("end_date", None)

    topic_ids = request.GET.getlist("topic_id", [])

    learners = get_learners_from_GET(request)

    pages = int(ceil(len(learners)/float(limit)))

    if page*limit < len(learners):

        learners = learners[(page - 1)*limit: page*limit]

    log_types = request.GET.getlist("log_type", ["exercise", "video", "content"])

    output_logs = []

    output_objects = []

    end_date = datetime.datetime.strptime(end_date,'%Y/%m/%d') if end_date else datetime.datetime.now()

    start_date = datetime.datetime.strptime(start_date,'%Y/%m/%d') if start_date else end_date - datetime.timedelta(time_window)

    for log_type in log_types:
        LogModel, fields, id_field, obj_ids, objects = return_log_type_details(log_type, topic_ids)

        log_objects = LogModel.objects.filter(user__in=learners, **obj_ids).values(*fields)
        if not topic_ids:
            topic_objects = log_objects.filter(latest_activity_timestamp__gte=start_date, latest_activity_timestamp__lte=end_date)
            if topic_objects.count() == 0:
                topic_objects = log_objects
            objects = dict([(obj[id_field], get_content_cache(language=lang).get(obj[id_field], get_exercise_cache(language=lang).get(obj[id_field]))) for obj in topic_objects]).values()
        output_objects.extend(objects)
        output_logs.extend(log_objects)

    return JsonResponse({
        "logs": output_logs,
        "contents": output_objects,
        # Sometimes 'learners' gets collapsed to a list from the Queryset. This insures against that eventuality.
        "learners": [{
            "first_name": learner.first_name,
            "last_name": learner.last_name,
            "username": learner.username,
            "pk": learner.pk
            } for learner in learners],
        "page": page,
        "pages": pages,
        "limit": limit
    })
Beispiel #9
0
def before_scenario(context, scenario):
    base_before_scenario(context, scenario)

    if "with_progress" in context.tags:
        user = FacilityUser.objects.get(username=context.user,
                                        facility=getattr(
                                            context, "facility", None))
        exercises = random.sample(get_exercise_cache().keys(), 2)
        for exercise in exercises:
            log = ExerciseLog(
                exercise_id=exercise,
                user=user,
                streak_progress=50,
                attempts=15,
                latest_activity_timestamp=datetime.datetime.now())
            log.save()
        context.exercises = exercises

        videos = random.sample(get_content_cache().keys(), 2)

        for video in videos:
            log = VideoLog(youtube_id=video,
                           video_id=video,
                           user=user,
                           total_seconds_watched=100,
                           points=600,
                           latest_activity_timestamp=datetime.datetime.now())
            log.save()
        context.videos = videos
Beispiel #10
0
def aggregate_learner_logs(request):

    learners = get_learners_from_GET(request)

    event_limit = request.GET.get("event_limit", 10)

    # Look back a week by default
    time_window = request.GET.get("time_window", 7)

    topic_ids = request.GET.getlist("topic_id", [])

    log_types = request.GET.getlist("log_type", ["exercise", "video", "content"])

    output_logs = []

    output_dict = {
        "content_time_spent": 0,
        "exercise_attempts": 0,
        "exercise_mastery": None,
    }
    start_date = datetime.datetime.now() - datetime.timedelta(time_window)
    end_date = datetime.datetime.now()

    for log_type in log_types:

        LogModel, fields, id_field, obj_ids, objects = return_log_type_details(log_type, topic_ids)

        log_objects = LogModel.objects.filter(
            user__in=learners,
            latest_activity_timestamp__gte=start_date,
            latest_activity_timestamp__lte=end_date, **obj_ids).order_by("-latest_activity_timestamp")


        if log_type == "video":
            output_dict["content_time_spent"] += log_objects.aggregate(Sum("total_seconds_watched"))["total_seconds_watched__sum"] or 0
        elif log_type == "content":
            output_dict["content_time_spent"] += log_objects.aggregate(Sum("time_spent"))["time_spent__sum"] or 0
        elif log_type == "exercise":
            output_dict["exercise_attempts"] = AttemptLog.objects.filter(user__in=learners,
                timestamp__gte=start_date,
                timestamp__lte=end_date).count()
            if log_objects.aggregate(Avg("streak_progress"))["streak_progress__avg"] is not None:
                output_dict["exercise_mastery"] = round(log_objects.aggregate(Avg("streak_progress"))["streak_progress__avg"])
        output_logs.extend(log_objects)

    # Report total time in hours
    output_dict["content_time_spent"] = round(output_dict["content_time_spent"]/3600.0,1)
    output_logs.sort(key=lambda x: x.latest_activity_timestamp, reverse=True)
    output_dict["learner_events"] = [{
        "learner": log.user.get_name(),
        "complete": log.complete,
        "struggling": getattr(log, "struggling", None),
        "progress": getattr(log, "streak_progress", getattr(log, "progress", None)),
        "content": get_exercise_cache().get(getattr(log, "exercise_id", ""), get_content_cache().get(getattr(log, "video_id", getattr(log, "content_id", "")), {})),
        } for log in output_logs[:event_limit]]
    output_dict["total_time_logged"] = UserLogSummary.objects\
        .filter(user__in=learners, last_activity_datetime__gte=start_date, last_activity_datetime__lte=end_date)\
        .aggregate(Sum("total_seconds")).get("total_seconds__sum") or 0
    return JsonResponse(output_dict)
Beispiel #11
0
def initialize_content_caches(force=False):
    """
    Catch all function to regenerate any content caches in memory that need annotation
    with file availability
    """
    for lang in i18n.get_installed_language_packs(force=True).keys():
        logging.info(
            "Preloading exercise data for language {lang}.".format(lang=lang))
        topic_tools.get_exercise_cache(force=force, language=lang)
        logging.info(
            "Preloading content data for language {lang}.".format(lang=lang))
        topic_tools.get_content_cache(force=force,
                                      annotate=True,
                                      language=lang)
        logging.info("Preloading topic tree data for language {lang}.".format(
            lang=lang))
        topic_tools.get_topic_tree(force=force, annotate=True, language=lang)
def _get_content_by_readable_id(readable_id):
    global CONTENT_BY_READABLE_ID
    if not CONTENT_BY_READABLE_ID:
        CONTENT_BY_READABLE_ID = dict([(c["readable_id"], c) for c in get_content_cache().values()])
    try:
        return CONTENT_BY_READABLE_ID[readable_id]
    except KeyError:
        return CONTENT_BY_READABLE_ID.get(re.sub("\-+", "-", readable_id).lower(), None)
Beispiel #13
0
 def test_video_availability(self):
     ncontent_local = sum([
         len(node.get("languages", []))
         for node in get_content_cache().values()
     ])
     self.assertTrue(
         self.n_content >= ncontent_local,
         "# videos actually on disk should be >= # videos in topic tree")
def _get_content_by_readable_id(readable_id):
    global CONTENT_BY_READABLE_ID
    if not CONTENT_BY_READABLE_ID:
        CONTENT_BY_READABLE_ID = dict([(c["readable_id"], c)
                                       for c in get_content_cache().values()])
    try:
        return CONTENT_BY_READABLE_ID[readable_id]
    except KeyError:
        return CONTENT_BY_READABLE_ID.get(
            re.sub("\-+", "-", readable_id).lower(), None)
Beispiel #15
0
    def create_empty_entry(cls, entity_id, kind, playlist):
        if kind != "Quiz":
            if kind == "Video":
                topic_node = get_content_cache().get(entity_id)
            elif kind == "Exercise":
                topic_node = get_exercise_cache().get(entity_id)
            title = topic_node["title"]
            path = topic_node["path"]
        else:
            title = playlist["title"]
            path = ""
        entry = {"id": entity_id, "kind": kind, "status": "notstarted", "score": 0, "title": title, "path": path}

        return entry
Beispiel #16
0
    def obj_get(self, bundle, **kwargs):
        playlists = Playlist.all()
        pk = kwargs['pk']
        content_dict = get_content_cache()
        for playlist in playlists:
            if str(playlist.id) == pk:
                # Add the full titles onto the playlist entries
                playlist.entries = [PlaylistEntry.add_full_title_from_topic_tree(entry, content_dict) for entry in playlist.entries]

                for entry in playlist.entries:
                    if entry["entity_kind"] == "Video":
                        entry["youtube_id"] = get_slug2id_map()[entry["entity_id"]]
                return playlist
        else:
            raise NotFound('Playlist with pk %s not found' % pk)
Beispiel #17
0
    def obj_get(self, bundle, **kwargs):
        playlists = Playlist.all()
        pk = kwargs['pk']
        content_dict = get_content_cache()
        for playlist in playlists:
            if str(playlist.id) == pk:
                # Add the full titles onto the playlist entries
                playlist.entries = [
                    PlaylistEntry.add_full_title_from_topic_tree(
                        entry, content_dict) for entry in playlist.entries
                ]

                for entry in playlist.entries:
                    if entry["entity_kind"] == "Video":
                        entry["youtube_id"] = get_slug2id_map()[
                            entry["entity_id"]]
                return playlist
        else:
            raise NotFound('Playlist with pk %s not found' % pk)
Beispiel #18
0
    def create_empty_entry(cls, entity_id, kind, playlist):
        if kind != "Quiz":
            if kind == "Video":
                topic_node = get_content_cache().get(entity_id)
            elif kind == "Exercise":
                topic_node = get_exercise_cache().get(entity_id)
            title = topic_node["title"]
            path = topic_node["path"]
        else:
            title = playlist["title"]
            path = ""
        entry = {
            "id": entity_id,
            "kind": kind,
            "status": "notstarted",
            "score": 0,
            "title": title,
            "path": path,
        }

        return entry
Beispiel #19
0
def impl(context):
    assert get_content_cache().get(context.videos[1]).get("path") in context.browser.current_url, "Last in progress video not in %s" % context.browser.current_url
def generate_fake_video_logs(
    facility_user=None, topics=topics, start_date=datetime.datetime.now() - datetime.timedelta(days=30 * 6)
):
    """Add video logs for the given topics, for each of the given users.
    If no users are given, they are created.
    If no topics exist, they are taken from the list at the top of this file."""

    date_diff = datetime.datetime.now() - start_date
    video_logs = []

    # It's not a user: probably a list.
    # Recursive case
    if not hasattr(facility_user, "username"):
        # It's NONE :-/ generate the users first!
        if not facility_user:
            (facility_user, _, _) = generate_fake_facility_users()

        for topic in topics:
            for user in facility_user:
                video_logs.append(generate_fake_video_logs(facility_user=user, topics=[topic], start_date=start_date))

    # Actually generate!
    else:
        # First, make videos for the associated logs

        # Then make some unassociated videos, to simulate both exploration
        #   and watching videos without finishing.
        # Get (or create) user type
        try:
            user_settings = json.loads(facility_user.notes)
        except:
            user_settings = sample_user_settings()
            facility_user.notes = json.dumps(user_settings)
            try:
                facility_user.save()
            except Exception as e:
                logging.error("Error saving facility user: %s" % e)

        date_diff_started = datetime.timedelta(
            seconds=datediff(date_diff, units="seconds") * user_settings["time_in_program"]
        )  # when this user started in the program, relative to NOW

        # contains the video duration key
        video_cache = get_content_cache()

        for topic in topics:
            videos = get_topic_videos(topic_id=topic)

            exercises = get_topic_exercises(topic_id=topic)
            exercise_ids = [ex["id"] if "id" in ex else ex["name"] for ex in exercises]
            exercise_logs = ExerciseLog.objects.filter(user=facility_user, id__in=exercise_ids)

            # Probability of watching a video, irrespective of the context
            p_video_outer = probability_of("video", user_settings=user_settings)
            logging.debug(
                "# videos: %d; p(videos)=%4.3f, user settings: %s\n"
                % (len(videos), p_video_outer, json.dumps(user_settings))
            )

            for video in videos:
                p_completed = probability_of("completed", user_settings=user_settings)

                # If we're just doing random videos, fine.
                # If these videos relate to exercises, then suppress non-exercise-related videos
                #   for this user.
                p_video = p_video_outer  # start with the context-free value
                did_exercise = False
                if exercise_logs.count() > 0:
                    # 5x less likely to watch a video if you haven't done the exercise,
                    if "related_exercise" not in video:
                        p_video /= 5  # suppress

                    # 5x more likely to watch a video if they've done the exercise
                    # 2x more likely to have finished it.
                    else:
                        exercise_log = ExerciseLog.objects.filter(
                            user=facility_user, id=video["related_exercise"]["id"]
                        )
                        did_exercise = exercise_log.count() != 0
                        if did_exercise:
                            p_video *= 5
                            p_completed *= 2

                # Do the sampling
                if p_video < random.random():
                    continue
                    # didn't watch it
                elif p_completed > random.random():
                    pct_completed = 100.0
                else:  # Slower students will use videos more.  Effort also important.
                    pct_completed = 100.0 * min(
                        1.0,
                        sqrt(
                            random.random()
                            * sqrt(
                                user_settings["effort_level"]
                                * user_settings["time_in_program"]
                                / sqrt(user_settings["speed_of_learning"])
                            )
                        ),
                    )

                # get the video duration on the video cache
                video_id = video.get("id", "")
                video_duration = 0
                if video_id and video_cache:
                    video_item = video_cache.get(video_id, None)
                    if video_item:
                        video_duration = video_item.get("duration", 0)

                # Compute quantities based on sample
                total_seconds_watched = int(video_duration * pct_completed / 100.0)
                points = int(750 * pct_completed / 100.0)

                # Choose a rate of videos, based on their effort level.
                #   Compute the latest possible start time.
                #   Then sample a start time between their start time
                #   and the latest possible start_time
                if did_exercise:
                    # More jitter if you learn fast, less jitter if you try harder (more diligent)
                    date_jitter = datetime.timedelta(
                        days=max(0, random.gauss(1, user_settings["speed_of_learning"] / user_settings["effort_level"]))
                    )
                    date_completed = exercise_log[0].completion_timestamp - date_jitter
                else:
                    rate_of_videos = (
                        0.66 * user_settings["effort_level"] + 0.33 * user_settings["speed_of_learning"]
                    )  # exercises per day
                    time_for_watching = total_seconds_watched
                    time_delta_completed = datetime.timedelta(
                        seconds=random.randint(
                            int(time_for_watching), int(datediff(date_diff_started, units="seconds"))
                        )
                    )
                    date_completed = datetime.datetime.now() - time_delta_completed

                try:
                    vlog = VideoLog.objects.get(user=facility_user, video_id=video_id)
                except VideoLog.DoesNotExist:

                    logging.info(
                        "Creating video log: %-12s: %-45s (%4.1f%% watched, %d points)%s"
                        % (
                            facility_user.first_name,
                            video["title"],
                            pct_completed,
                            points,
                            " COMPLETE on %s!" % date_completed if pct_completed == 100 else "",
                        )
                    )
                    youtube_id = video.get("youtube_id", video_id)
                    vlog = VideoLog(
                        user=facility_user,
                        video_id=video_id,
                        youtube_id=youtube_id,
                        total_seconds_watched=total_seconds_watched,
                        points=points,
                        complete=(pct_completed == 100.0),
                        completion_timestamp=date_completed,
                        latest_activity_timestamp=date_completed,
                    )
                    try:
                        vlog.save()  # avoid userlog issues
                    except Exception as e:
                        logging.error("Error saving video log: %s" % e)
                        continue

                video_logs.append(vlog)

    return video_logs
Beispiel #21
0
    def handle(self, *args, **options):
        if len(args) != 1:
            raise CommandError("Takes exactly 1 argument")

        dest_file = os.path.abspath(args[0])

        logger.info("Starting up KA Lite export2zim command")
        beginning = datetime.now()
        logger.info("Begin: {}".format(beginning))

        language = options.get('language')
        if not language:
            raise CommandError("Must specify a language!")

        if not options.get('tmp_dir'):
            tmp_dir = os.path.join(tempfile.gettempdir(), 'ka-lite-zim_{}'.format(language))
        else:
            tmp_dir = options.get('tmp_dir')

        tmp_dir = os.path.abspath(tmp_dir)

        if os.path.exists(tmp_dir) and os.listdir(tmp_dir):
            if options['clear']:
                logger.info("Clearing directory {}".format(tmp_dir))
                shutil.rmtree(tmp_dir)
            elif options['resume']:
                logger.info("Resuming in dirty tmp directory {}".format(tmp_dir))
            else:
                raise CommandError(
                    "{} not empty, use the -c option to clean it, -r to resume, or use an empty destination directory.".format(
                        tmp_dir
                    )
                )

        zimwriterfs = options.get("zimwriterfs", None)
        publisher = options.get("publisher")
        transcode2webm = options.get("transcode2webm")
        ffmpeg = find_executable("ffmpeg")

        if not ffmpeg:
            logger.warning("FFMpeg not found in your path, you won't be able to create missing thumbnails or transcode to webm.")

        if not zimwriterfs:
            zimwriterfs = find_executable("zimwriterfs")
            if not zimwriterfs:
                raise CommandError("Could not find zimwriterfs in your path, try specifying --zimwriterfs=/path")

        if not os.path.exists(zimwriterfs):
            raise CommandError("Invalid --zimwriterfs")

        from kalite_zim import __name__ as base_path
        base_path = os.path.abspath(base_path)
        data_path = os.path.join(base_path, 'data')

        # Where subtitles are found in KA Lite
        subtitle_src_dir = i18n.get_srt_path(language)

        logger.info("Will export videos for language: {}".format(language))
        logger.info("Preparing KA Lite topic tree...")

        # Use live data
        if not options.get('test'):
            # This way of doing things will be deprecated in KA Lite 0.16
            topic_tree_json_path = topic_tools_settings.TOPICS_FILEPATHS.get('khan')
            content_cache = get_content_cache(language=language, annotate=True)
            exercise_cache = get_exercise_cache(language=language)
        # Use test data
        else:
            topic_tree_json_path = os.path.join(data_path, 'test_topics.json')
            content_cache = json.load(
                open(os.path.join(data_path, 'test_content.json'))
            )
            exercise_cache = json.load(
                open(os.path.join(data_path, 'test_exercise.json'))
            )

        topic_tree = softload_json(topic_tree_json_path, logger=logger.debug, raises=False)

        content_json_output = {}
        exercise_json_output = {}

        def annotate_tree(topic, depth=0, parent=None):
            """
            We need to recurse into the tree in order to annotate elements
            with topic data and exercise data
            """
            children = topic.get('children', [])
            new_children = []
            for child_topic in children:
                if child_topic.get("kind") in ("Video", "Topic"):
                    annotate_tree(child_topic, depth=depth + 1, parent=topic)
                    new_children.append(child_topic)
            topic["children"] = new_children
            if topic.get("kind") == "Exercise":
                topic['exercise'] = exercise_cache.get(topic.get("id"), {})
                exercise_json_output[topic.get("id")] = topic['exercise']
            elif topic.get("kind") == "Topic":
                pass
            else:
                topic['exercise'] = None
                topic['content'] = content_cache.get(topic.get("id"), {})
                content_json_output[topic.get("id")] = topic['content']
                if not topic['content']:
                    logger.error('No content!?, id is: {}'.format(topic.get('id')))

            # Translate everything for good measure
            with i18n.translate_block(language):
                topic["title"] = _(topic.get("title", ""))
                topic["description"] = _(topic.get("description", "")) if topic.get("description") else ""

            topic["url"] = topic["id"] + ".html"
            topic["parent"] = parent
            topic["depth"] = depth
            for key in ("child_data", "keywords", "hide", "contains"):
                topic.pop(key, None)

        # 1. Annotate a topic tree
        annotate_tree(topic_tree)

        # 2. Now go through the tree and copy each element into the destination
        # zim file system

        def copy_media(node):
            if node['kind'] == 'Topic':
                # Don't do anything if it's a topic
                pass
            elif node['kind'] == 'Exercise':
                # Exercises cannot be displayed
                node["content"]["available"] = False
            elif node['kind'] == 'Video':

                if node['content']['format'] == "webm":
                    logger.warning("Found a duplicate ID for {}, re-downloading".format(node['id']))
                    node['content']['format'] = "mp4"

                # Available is False by default until we locate the file
                node["content"]["available"] = False
                node_dir = os.path.join(tmp_dir, node["path"])
                if not os.path.exists(node_dir):
                    os.makedirs(node_dir)
                video_file_name = node['id'] + '.' + node['content']['format']
                thumb_file_name = node['id'] + '.png'
                video_file_src = os.path.join(CONTENT_ROOT, video_file_name)
                video_file_dest = os.path.join(node_dir, video_file_name)
                thumb_file_src = os.path.join(CONTENT_ROOT, thumb_file_name)
                thumb_file_dest = os.path.join(node_dir, thumb_file_name)

                if options['download'] and not os.path.exists(video_file_src):
                    logger.info("Video file being downloaded to: {}".format(video_file_src))
                    download_video(
                        node['content']['youtube_id'],
                        node['content']['format'],
                        CONTENT_ROOT,
                    )

                if os.path.exists(video_file_src):
                    if transcode2webm:
                        ffmpeg_pass_log = "/tmp/logfile_vp8.fpf"
                        if os.path.isfile(ffmpeg_pass_log):
                            os.unlink(ffmpeg_pass_log)
                        video_file_name = node['id'] + '.webm'
                        video_file_dest = os.path.join(node_dir, video_file_name)
                        if os.path.isfile(video_file_dest):
                            logger.info("Already encoded: {}".format(video_file_dest))
                        else:
                            ffmpeg_base_args = [
                                ffmpeg,
                                "-i", video_file_src,
                                "-codec:v", "libvpx",
                                "-quality", "best",
                                "-cpu-used", "0",
                                "-b:v", "300k",
                                "-qmin", "10",  # 10=lowest value
                                "-qmax", "35",  # 42=highest value
                                "-maxrate", "300k",
                                "-bufsize", "600k",
                                "-threads", "8",
                                # "-vf", "scale=-1",
                                "-codec:a", "libvorbis",
                                # "-b:a", "128k",
                                "-aq", "5",
                                "-f", "webm",
                            ]
                            ffmpeg_pass1 = ffmpeg_base_args + [
                                "-an",  # Disables audio, no effect first pass
                                "-pass", "1",
                                "-passlogfile", ffmpeg_pass_log,
                                video_file_dest,
                            ]
                            ffmpeg_pass2 = ffmpeg_base_args + [
                                "-pass", "2",
                                "-y", "-passlogfile", ffmpeg_pass_log,
                                video_file_dest,
                            ]
                            for cmd in (ffmpeg_pass1, ffmpeg_pass2):
                                process = subprocess.Popen(cmd, stdout=subprocess.PIPE)
                                stdout_data, _stderr_data = process.communicate()
                                if process.returncode != 0:
                                    logger.error("Error invoking ffmpeg: {}".format((_stderr_data or "") + (stdout_data or "")))
                                    logger.error("Command was: {}".format(" ".join(cmd)))
                                    raise CommandError("Could not complete transcoding")
                        node['content']['format'] = "webm"
                    else:
                        # If not transcoding, just link the original file
                        os.link(video_file_src, video_file_dest)
                    node["video_url"] = os.path.join(
                        node["path"],
                        video_file_name
                    )
                    copy_media.videos_found += 1
                    logger.info("Videos processed: {}".format(copy_media.videos_found))
                    node["content"]["available"] = True

                    # Create thumbnail if it wasn't downloaded
                    if not os.path.exists(thumb_file_src):
                        fp = create_thumbnail(video_file_src, output_format="png")
                        if fp is None:
                            logger.error("Failed to create thumbnail for {}".format(video_file_src))
                        else:
                            logger.info("Successfully created thumbnail for {}".format(video_file_src))
                            file(thumb_file_src, 'wb').write(fp.read())

                    # Handle thumbnail
                    if os.path.exists(thumb_file_src):
                        node["thumbnail_url"] = os.path.join(
                            node["path"],
                            node['id'] + '.png'
                        )
                        if not os.path.exists(thumb_file_dest):
                            os.link(thumb_file_src, thumb_file_dest)
                    else:
                        node["thumbnail_url"] = None

                    subtitle_srt = os.path.join(
                        subtitle_src_dir,
                        node['id'] + '.srt'
                    )
                    if os.path.isfile(subtitle_srt):
                        subtitle_vtt = os.path.join(
                            node_dir,
                            node['id'] + '.vtt'
                        )
                        # Convert to .vtt because this format is understood
                        # by latest video.js and the old ones that read
                        # .srt don't work with newer jquery etc.
                        submarine_parser(subtitle_srt, subtitle_vtt)
                        if not os.path.exists(subtitle_vtt):
                            logger.warning("Subtitle not converted: {}".format(subtitle_srt))
                        else:
                            logger.info("Subtitle convert from SRT to VTT: {}".format(subtitle_vtt))
                            node["subtitle_url"] = os.path.join(
                                node["path"],
                                node['id'] + '.vtt'
                            )

                else:
                    if options['download']:
                        logger.error("File not found or downloaded: {}".format(video_file_src))
            else:
                logger.error("Invalid node, kind: {}".format(node.get("kind", None)))
                # Exercises cannot be displayed
                node["content"] = {"available": False}

            new_children = []
            for child in node.get('children', []):
                copy_media(child)
                empty_topic = child["kind"] == "Topic" and not child.get("children", [])
                unavailable_video = child["kind"] == "Video" and not child.get("content", {}).get("available", False)
                if not (empty_topic or unavailable_video):
                    new_children.append(child)
            node['children'] = new_children
        copy_media.videos_found = 0

        def render_topic_pages(node):

            parents = [node] if node.get("children") else []
            parent = node["parent"]
            while parent:
                parents.append(parent)
                parent = parent["parent"]

            # Finally, render templates into the destination
            template_context = {
                "topic_tree": topic_tree,
                "topic": node,
                "parents": parents
            }
            with i18n.translate_block(language):
                topic_html = render_to_string("kalite_zim/topic.html", template_context)
            # Replace absolute references to '/static' with relative
            topic_html = topic_html.replace("/static", "static")

            dest_html = os.path.join(tmp_dir, node["id"] + ".html")
            logger.info("Rendering {}".format(dest_html))

            open(dest_html, "w").write(topic_html)

            render_topic_pages.pages_rendered += 1

            for child in node.get('children', []):
                render_topic_pages(child)
        render_topic_pages.pages_rendered = 0

        logger.info("Hard linking video files from KA Lite...")
        copy_media(topic_tree)

        sys.stderr.write("\n")
        logger.info("Done!")

        # Configure django-compressor
        compressor_init(os.path.join(base_path, 'static'))

        # Finally, render templates into the destination
        template_context = {
            "topic_tree": topic_tree,
            "welcome": True,
        }

        with i18n.translate_block(language):
            welcome_html = render_to_string("kalite_zim/welcome.html", template_context)
            about_html = render_to_string("kalite_zim/about.html", template_context)
        # Replace absolute references to '/static' with relative
        welcome_html = welcome_html.replace("/static", "static")
        about_html = about_html.replace("/static", "static")

        # Write the welcome.html file
        open(os.path.join(tmp_dir, 'welcome.html'), 'w').write(welcome_html)
        open(os.path.join(tmp_dir, 'about.html'), 'w').write(about_html)

        # Render all topic html files
        render_topic_pages(topic_tree)

        # Copy in static data after it's been handled by django compressor
        # (this happens during template rendering)

        shutil.copytree(os.path.join(base_path, 'static'), os.path.join(tmp_dir, 'static'))

        ending = datetime.now()
        duration = int((ending - beginning).total_seconds())
        logger.info("Total number of videos found: {}".format(copy_media.videos_found))
        logger.info("Total number of topic pages created: {}".format(render_topic_pages.pages_rendered))

        logger.info("Invoking zimwriterfs, writing to: {}".format(dest_file))

        zimwriterfs_args = (
            zimwriterfs,
            "--welcome", "welcome.html",
            "--favicon", "static/img/ka_leaf.png",
            "--publisher", publisher,
            "--creator", "KhanAcademy.org",
            "--description", "Khan Academy ({})".format(language),
            "--description", "Videos from Khan Academy",
            "--language", language,
            tmp_dir,
            dest_file,
        )

        process = subprocess.Popen(zimwriterfs_args, stdout=subprocess.PIPE)
        stdout_data, _stderr_data = process.communicate()

        if process.returncode != 0:
            logger.error("Error invoking zimwriterfs: {}").format(_stderr_data + stdout_data)

        logger.info(
            "Duration: {h:} hours, {m:} minutes, {s:} seconds".format(
                h=duration // 3600,
                m=(duration % 3600) // 60,
                s=duration % 60,
            )
        )
Beispiel #22
0
    def user_progress_detail(cls, user_id, playlist_id, language=None):
        """
        Return a list of video, exercise, and quiz log PlaylistProgressDetail
        objects associated with a specific user and playlist ID.
        """
        if not language:
            language = Settings.get(
                "default_language") or settings.LANGUAGE_CODE

        user = FacilityUser.objects.get(id=user_id)
        playlist = next(
            (pl for pl in get_leafed_topics() if pl.get("id") == playlist_id),
            None)

        pl_video_ids, pl_exercise_ids = cls.get_playlist_entry_ids(playlist)

        # Retrieve video, exercise, and quiz logs that appear in this playlist
        user_vid_logs, user_ex_logs = cls.get_user_logs(
            user, pl_video_ids, pl_exercise_ids)

        # Format & append quiz the quiz log, if it exists
        # quiz_exists, quiz_log, quiz_pct_score = cls.get_quiz_log(user, (playlist.get("entries") or playlist.get("children")), playlist.get("id"))

        # Finally, sort an ordered list of the playlist entries, with user progress
        # injected where it exists.
        progress_details = list()
        for entity_id in playlist.get("children"):
            entry = {}
            leaf_node = get_content_cache(
                language=language).get(entity_id) or get_exercise_cache(
                    language=language).get(entity_id) or {}
            kind = leaf_node.get("kind")

            status = "notstarted"
            score = 0

            if kind == "Video":
                vid_log = next((vid_log for vid_log in user_vid_logs
                                if vid_log["video_id"] == entity_id), None)
                if vid_log:
                    if vid_log.get("complete"):
                        status = "complete"
                    elif vid_log.get("total_seconds_watched"):
                        status = "inprogress"

                    score = int(
                        float(vid_log.get("points")) / float(750) * 100)

            elif kind == "Exercise":
                ex_log = next((ex_log for ex_log in user_ex_logs
                               if ex_log["exercise_id"] == entity_id), None)
                if ex_log:
                    if ex_log.get("struggling"):
                        status = "struggling"
                    elif ex_log.get("complete"):
                        status = "complete"
                    elif ex_log.get("attempts"):
                        status = "inprogress"

                    score = ex_log.get('streak_progress')

            entry = {
                "id": entity_id,
                "kind": kind,
                "status": status,
                "score": score,
                "title": leaf_node["title"],
                "path": leaf_node["path"],
            }

            progress_details.append(cls(**entry))

        return progress_details
 def test_video_availability(self):
     ncontent_local = sum([len(node.get("languages", [])) for node in get_content_cache().values()])
     self.assertTrue(self.n_content >= ncontent_local, "# videos actually on disk should be >= # videos in topic tree")
Beispiel #24
0
    def user_progress_detail(cls, user_id, playlist_id):
        """
        Return a list of video, exercise, and quiz log PlaylistProgressDetail
        objects associated with a specific user and playlist ID.
        """
        user = FacilityUser.objects.get(id=user_id)
        playlist = next(
            (
                pl
                for pl in [plist.__dict__ for plist in Playlist.all()] + get_leafed_topics()
                if pl.get("id") == playlist_id
            ),
            None,
        )

        pl_video_ids, pl_exercise_ids = cls.get_playlist_entry_ids(playlist)

        # Retrieve video, exercise, and quiz logs that appear in this playlist
        user_vid_logs, user_ex_logs = cls.get_user_logs(user, pl_video_ids, pl_exercise_ids)

        # Format & append quiz the quiz log, if it exists
        quiz_exists, quiz_log, quiz_pct_score = cls.get_quiz_log(
            user, (playlist.get("entries") or playlist.get("children")), playlist.get("id")
        )

        # Finally, sort an ordered list of the playlist entries, with user progress
        # injected where it exists.
        progress_details = list()
        for ent in playlist.get("entries") or playlist.get("children"):
            entry = {}
            kind = ent.get("entity_kind") or ent.get("kind")
            if kind == "Divider":
                continue
            elif kind == "Video":
                entity_id = get_slug2id_map().get(ent.get("entity_id")) or ent.get("id")
                vid_log = next((vid_log for vid_log in user_vid_logs if vid_log["video_id"] == entity_id), None)
                if vid_log:
                    if vid_log.get("complete"):
                        status = "complete"
                    elif vid_log.get("total_seconds_watched"):
                        status = "inprogress"
                    else:
                        status = "notstarted"

                    leaf_node = get_content_cache().get(vid_log["video_id"])

                    entry = {
                        "id": entity_id,
                        "kind": kind,
                        "status": status,
                        "score": int(float(vid_log.get("points")) / float(750) * 100),
                        "title": leaf_node["title"],
                        "path": leaf_node["path"],
                    }

            elif kind == "Exercise":
                entity_id = ent.get("entity_id") or ent.get("id")
                ex_log = next((ex_log for ex_log in user_ex_logs if ex_log["exercise_id"] == entity_id), None)
                if ex_log:
                    if ex_log.get("struggling"):
                        status = "struggling"
                    elif ex_log.get("complete"):
                        status = "complete"
                    elif ex_log.get("attempts"):
                        status = "inprogress"

                    ex_log_id = ex_log.get("exercise_id")
                    leaf_node = get_exercise_cache().get(ex_log_id)

                    entry = {
                        "id": ex_log_id,
                        "kind": kind,
                        "status": status,
                        "score": ex_log.get("streak_progress"),
                        "title": leaf_node["title"],
                        "path": leaf_node["path"],
                    }

            elif kind == "Quiz":
                entity_id = playlist["id"]
                if quiz_log:
                    if quiz_log.complete:
                        if quiz_pct_score <= 59:
                            status = "fail"
                        elif quiz_pct_score <= 79:
                            status = "borderline"
                        else:
                            status = "pass"
                    elif quiz_log.attempts:
                        status = "inprogress"
                    else:
                        status = "notstarted"

                    quiz_log_id = quiz_log.quiz

                    entry = {
                        "id": quiz_log_id,
                        "kind": "Quiz",
                        "status": status,
                        "score": quiz_pct_score,
                        "title": playlist.get("title"),
                        "path": "",
                    }

            if not entry:
                entry = cls.create_empty_entry(entity_id, kind, playlist)

            progress_details.append(cls(**entry))

        return progress_details
Beispiel #25
0
class CachingTest(KALiteTestCase):
    content_cache = get_content_cache()

    @unittest.skipIf(
        True, "Failing test that I'm tired of debugging."
    )  # TODO(bcipolli): re-enable when we need to be able to auto-cache
    @unittest.skipIf(settings.CACHE_TIME == 0,
                     "Test only relevant when caching is enabled")
    def test_cache_invalidation(self):
        """Create the cache item, then invalidate it and show that it is deleted."""

        # Get a random content id
        n_contents = len(self.content_cache)
        content_id = self.content_cache.keys()[
            10]  #random.choice(self.content_cache.keys())
        logging.debug("Testing on content_id = %s" % content_id)
        content_path = self.content_cache[content_id]['path']

        # Clean the cache for this item
        caching.expire_page(path=content_path, failure_ok=True)

        # Create the cache item, and check it
        self.assertFalse(caching.has_cache_key(path=content_path),
                         "expect: no cache key after expiring the page")

        caching.regenerate_all_pages_related_to_contents(
            content_ids=[content_id])
        self.assertTrue(caching.has_cache_key(path=content_path),
                        "expect: Cache key exists after Django Client get")

        # Invalidate the cache item, and check it
        caching.invalidate_all_caches()  # test the convenience function
        self.assertTrue(not caching.has_cache_key(path=content_path),
                        "expect: no cache key after expiring the page")

    @unittest.skipIf(settings.CACHE_TIME == 0,
                     "Test only relevant when caching is enabled")
    def test_cache_across_clients(self):
        """Show that caching is accessible across all clients
        (i.e. that different clients don't generate different cache keys)"""

        # Get a random content id
        n_contents = len(self.content_cache)
        content_id = random.choice(self.content_cache.keys())
        logging.debug("Testing on content_id = %s" % content_id)
        content_path = self.content_cache[content_id]['path']

        # Clean the cache for this item
        caching.expire_page(path=content_path, failure_ok=True)
        self.assertTrue(not caching.has_cache_key(path=content_path),
                        "expect: No cache key after expiring the page")

        # Set up the cache with Django client
        Client().get(content_path)
        self.assertTrue(caching.has_cache_key(path=content_path),
                        "expect: Cache key exists after Django Client get")
        caching.expire_page(path=content_path)  # clean cache
        self.assertTrue(not caching.has_cache_key(path=content_path),
                        "expect: No cache key after expiring the page")

        # Get the same cache key when getting with urllib, and make sure the cache is created again
        urllib.urlopen(self.live_server_url + content_path).close()
        self.assertTrue(caching.has_cache_key(path=content_path),
                        "expect: Cache key exists after urllib get")
        caching.expire_page(path=content_path)  # clean cache
        self.assertTrue(not caching.has_cache_key(path=content_path),
                        "expect: No cache key after expiring the page")

        # Same deal, now using requests library
        requests.get(self.live_server_url + content_path)
        self.assertTrue(caching.has_cache_key(path=content_path),
                        "expect: Cache key exists after requestsget")
        caching.expire_page(path=content_path)  # clean cache
        self.assertTrue(not caching.has_cache_key(path=content_path),
                        "expect: No cache key after expiring the page")
Beispiel #26
0
class CachingTest(KALiteTestCase):
    content_cache = get_content_cache()
Beispiel #27
0
    def user_progress_detail(cls, user_id, playlist_id, language=None):
        """
        Return a list of video, exercise, and quiz log PlaylistProgressDetail
        objects associated with a specific user and playlist ID.
        """
        if not language:
            language = Settings.get("default_language") or settings.LANGUAGE_CODE

        user = FacilityUser.objects.get(id=user_id)
        playlist = next((pl for pl in get_leafed_topics() if pl.get("id") == playlist_id), None)

        pl_video_ids, pl_exercise_ids = cls.get_playlist_entry_ids(playlist)

        # Retrieve video, exercise, and quiz logs that appear in this playlist
        user_vid_logs, user_ex_logs = cls.get_user_logs(user, pl_video_ids, pl_exercise_ids)

        # Format & append quiz the quiz log, if it exists
        # quiz_exists, quiz_log, quiz_pct_score = cls.get_quiz_log(user, (playlist.get("entries") or playlist.get("children")), playlist.get("id"))

        # Finally, sort an ordered list of the playlist entries, with user progress
        # injected where it exists.
        progress_details = list()
        for entity_id in playlist.get("children"):
            entry = {}
            leaf_node = get_content_cache(language=language).get(entity_id) or get_exercise_cache(language=language).get(entity_id) or {}
            kind = leaf_node.get("kind")

            status = "notstarted"
            score = 0

            if kind == "Video":
                vid_log = next((vid_log for vid_log in user_vid_logs if vid_log["video_id"] == entity_id), None)
                if vid_log:
                    if vid_log.get("complete"):
                        status = "complete"
                    elif vid_log.get("total_seconds_watched"):
                        status = "inprogress"

                    score = int(float(vid_log.get("points")) / float(750) * 100)

            elif kind == "Exercise":
                ex_log = next((ex_log for ex_log in user_ex_logs if ex_log["exercise_id"] == entity_id), None)
                if ex_log:
                    if ex_log.get("struggling"):
                        status = "struggling"
                    elif ex_log.get("complete"):
                        status = "complete"
                    elif ex_log.get("attempts"):
                        status = "inprogress"

                    score = ex_log.get('streak_progress')

            entry = {
                "id": entity_id,
                "kind": kind,
                "status": status,
                "score": score,
                "title": leaf_node["title"],
                "path": leaf_node["path"],
            }

            progress_details.append(cls(**entry))

        return progress_details
def generate_fake_video_logs(facility_user=None,
                             topics=topics,
                             start_date=datetime.datetime.now() -
                             datetime.timedelta(days=30 * 6)):
    """Add video logs for the given topics, for each of the given users.
    If no users are given, they are created.
    If no topics exist, they are taken from the list at the top of this file."""

    date_diff = datetime.datetime.now() - start_date
    video_logs = []

    # It's not a user: probably a list.
    # Recursive case
    if not hasattr(facility_user, "username"):
        # It's NONE :-/ generate the users first!
        if not facility_user:
            (facility_user, _, _) = generate_fake_facility_users()

        for topic in topics:
            for user in facility_user:
                video_logs.append(
                    generate_fake_video_logs(facility_user=user,
                                             topics=[topic],
                                             start_date=start_date))

    # Actually generate!
    else:
        # First, make videos for the associated logs

        # Then make some unassociated videos, to simulate both exploration
        #   and watching videos without finishing.
        # Get (or create) user type
        try:
            user_settings = json.loads(facility_user.notes)
        except:
            user_settings = sample_user_settings()
            facility_user.notes = json.dumps(user_settings)
            try:
                facility_user.save()
            except Exception as e:
                logging.error("Error saving facility user: %s" % e)

        date_diff_started = datetime.timedelta(
            seconds=datediff(date_diff, units="seconds") *
            user_settings["time_in_program"]
        )  # when this user started in the program, relative to NOW

        # contains the video duration key
        video_cache = get_content_cache()

        for topic in topics:
            videos = get_topic_videos(topic_id=topic)

            exercises = get_topic_exercises(topic_id=topic)
            exercise_ids = [
                ex["id"] if "id" in ex else ex['name'] for ex in exercises
            ]
            exercise_logs = ExerciseLog.objects.filter(user=facility_user,
                                                       id__in=exercise_ids)

            # Probability of watching a video, irrespective of the context
            p_video_outer = probability_of("video",
                                           user_settings=user_settings)
            logging.debug(
                "# videos: %d; p(videos)=%4.3f, user settings: %s\n" %
                (len(videos), p_video_outer, json.dumps(user_settings)))

            for video in videos:
                p_completed = probability_of("completed",
                                             user_settings=user_settings)

                # If we're just doing random videos, fine.
                # If these videos relate to exercises, then suppress non-exercise-related videos
                #   for this user.
                p_video = p_video_outer  # start with the context-free value
                did_exercise = False
                if exercise_logs.count() > 0:
                    # 5x less likely to watch a video if you haven't done the exercise,
                    if "related_exercise" not in video:
                        p_video /= 5  # suppress

                    # 5x more likely to watch a video if they've done the exercise
                    # 2x more likely to have finished it.
                    else:
                        exercise_log = ExerciseLog.objects.filter(
                            user=facility_user,
                            id=video["related_exercise"]["id"])
                        did_exercise = exercise_log.count() != 0
                        if did_exercise:
                            p_video *= 5
                            p_completed *= 2

                # Do the sampling
                if p_video < random.random():
                    continue
                    # didn't watch it
                elif p_completed > random.random():
                    pct_completed = 100.
                else:  # Slower students will use videos more.  Effort also important.
                    pct_completed = 100. * min(
                        1.,
                        sqrt(random.random() *
                             sqrt(user_settings["effort_level"] *
                                  user_settings["time_in_program"] /
                                  sqrt(user_settings["speed_of_learning"]))))

                # get the video duration on the video cache
                video_id = video.get("id", "")
                video_duration = 0
                if video_id and video_cache:
                    video_item = video_cache.get(video_id, None)
                    if video_item:
                        video_duration = video_item.get("duration", 0)

                # Compute quantities based on sample
                total_seconds_watched = int(video_duration * pct_completed /
                                            100.)
                points = int(750 * pct_completed / 100.)

                # Choose a rate of videos, based on their effort level.
                #   Compute the latest possible start time.
                #   Then sample a start time between their start time
                #   and the latest possible start_time
                if did_exercise:
                    # More jitter if you learn fast, less jitter if you try harder (more diligent)
                    date_jitter = datetime.timedelta(days=max(
                        0,
                        random.gauss(
                            1, user_settings["speed_of_learning"] /
                            user_settings["effort_level"])))
                    date_completed = exercise_log[
                        0].completion_timestamp - date_jitter
                else:
                    rate_of_videos = 0.66 * user_settings[
                        "effort_level"] + 0.33 * user_settings[
                            "speed_of_learning"]  # exercises per day
                    time_for_watching = total_seconds_watched
                    time_delta_completed = datetime.timedelta(
                        seconds=random.randint(
                            int(time_for_watching),
                            int(datediff(date_diff_started, units="seconds"))))
                    date_completed = datetime.datetime.now(
                    ) - time_delta_completed

                try:
                    vlog = VideoLog.objects.get(user=facility_user,
                                                video_id=video_id)
                except VideoLog.DoesNotExist:

                    logging.info(
                        "Creating video log: %-12s: %-45s (%4.1f%% watched, %d points)%s"
                        % (
                            facility_user.first_name,
                            video["title"],
                            pct_completed,
                            points,
                            " COMPLETE on %s!" %
                            date_completed if pct_completed == 100 else "",
                        ))
                    youtube_id = video.get("youtube_id", video_id)
                    vlog = VideoLog(
                        user=facility_user,
                        video_id=video_id,
                        youtube_id=youtube_id,
                        total_seconds_watched=total_seconds_watched,
                        points=points,
                        complete=(pct_completed == 100.),
                        completion_timestamp=date_completed,
                        latest_activity_timestamp=date_completed,
                    )
                    try:
                        vlog.save()  # avoid userlog issues
                    except Exception as e:
                        logging.error("Error saving video log: %s" % e)
                        continue

                video_logs.append(vlog)

    return video_logs
Beispiel #29
0
def aggregate_learner_logs(request):

    lang = request.language
    learners = get_learners_from_GET(request)

    event_limit = request.GET.get("event_limit", 10)

    # Look back a week by default
    time_window = request.GET.get("time_window", 7)

    start_date = request.GET.get("start_date", None)

    end_date = request.GET.get("end_date", None)

    topic_ids = request.GET.getlist("topic_id", [])

    log_types = request.GET.getlist("log_type", ["exercise", "video", "content"])

    output_logs = []

    output_dict = {
        "content_time_spent": 0,
        "exercise_attempts": 0,
        "exercise_mastery": None,
    }

    end_date = datetime.datetime.strptime(end_date,'%Y/%m/%d') if end_date else datetime.datetime.now()

    start_date = datetime.datetime.strptime(start_date,'%Y/%m/%d') if start_date else end_date - datetime.timedelta(time_window)

    for log_type in log_types:

        LogModel, fields, id_field, obj_ids, objects = return_log_type_details(log_type, topic_ids)

        log_objects = LogModel.objects.filter(
            user__in=learners,
            latest_activity_timestamp__gte=start_date,
            latest_activity_timestamp__lte=end_date, **obj_ids).order_by("-latest_activity_timestamp")


        if log_type == "video":
            output_dict["content_time_spent"] += log_objects.aggregate(Sum("total_seconds_watched"))["total_seconds_watched__sum"] or 0
        elif log_type == "content":
            output_dict["content_time_spent"] += log_objects.aggregate(Sum("time_spent"))["time_spent__sum"] or 0
        elif log_type == "exercise":
            output_dict["exercise_attempts"] = AttemptLog.objects.filter(user__in=learners,
                timestamp__gte=start_date,
                timestamp__lte=end_date).count()
            if log_objects.aggregate(Avg("streak_progress"))["streak_progress__avg"] is not None:
                output_dict["exercise_mastery"] = round(log_objects.aggregate(Avg("streak_progress"))["streak_progress__avg"])
        output_logs.extend(log_objects)

    # Report total time in hours
    output_dict["content_time_spent"] = round(output_dict["content_time_spent"]/3600.0,1)
    output_logs.sort(key=lambda x: x.latest_activity_timestamp, reverse=True)
    output_dict["learner_events"] = [{
        "learner": log.user.get_name(),
        "complete": log.complete,
        "struggling": getattr(log, "struggling", None),
        "progress": getattr(log, "streak_progress", getattr(log, "progress", None)),
        "content": get_exercise_cache(language=lang).get(getattr(log, "exercise_id", "")) or get_content_cache(language=lang).get(getattr(log, "video_id", None) or getattr(log, "content_id", "")) or {}
        } for log in output_logs[:event_limit]]
    output_dict["total_time_logged"] = round((UserLogSummary.objects\
        .filter(user__in=learners, start_datetime__gte=start_date, start_datetime__lte=end_date)\
        .aggregate(Sum("total_seconds")).get("total_seconds__sum") or 0)/3600.0, 1)
    return JsonResponse(output_dict)
Beispiel #30
0
    def user_progress_detail(cls, user_id, playlist_id):
        """
        Return a list of video, exercise, and quiz log PlaylistProgressDetail
        objects associated with a specific user and playlist ID.
        """
        user = FacilityUser.objects.get(id=user_id)
        playlist = next((pl for pl in [plist.__dict__ for plist in Playlist.all()] + get_leafed_topics() if pl.get("id") == playlist_id), None)

        pl_video_ids, pl_exercise_ids = cls.get_playlist_entry_ids(playlist)

        # Retrieve video, exercise, and quiz logs that appear in this playlist
        user_vid_logs, user_ex_logs = cls.get_user_logs(user, pl_video_ids, pl_exercise_ids)

        # Format & append quiz the quiz log, if it exists
        quiz_exists, quiz_log, quiz_pct_score = cls.get_quiz_log(user, (playlist.get("entries") or playlist.get("children")), playlist.get("id"))

        # Finally, sort an ordered list of the playlist entries, with user progress
        # injected where it exists.
        progress_details = list()
        for ent in (playlist.get("entries") or playlist.get("children")):
            entry = {}
            kind = ent.get("entity_kind") or ent.get("kind")
            if kind == "Divider":
                continue
            elif kind == "Video":
                entity_id = get_slug2id_map().get(ent.get("entity_id")) or ent.get("id")
                vid_log = next((vid_log for vid_log in user_vid_logs if vid_log["video_id"] == entity_id), None)
                if vid_log:
                    if vid_log.get("complete"):
                        status = "complete"
                    elif vid_log.get("total_seconds_watched"):
                        status = "inprogress"
                    else:
                        status = "notstarted"

                    leaf_node = get_content_cache().get(vid_log["video_id"])

                    entry = {
                        "id": entity_id,
                        "kind": kind,
                        "status": status,
                        "score": int(float(vid_log.get("points")) / float(750) * 100),
                        "title": leaf_node["title"],
                        "path": leaf_node["path"],
                    }

            elif kind == "Exercise":
                entity_id = (ent.get("entity_id") or ent.get("id"))
                ex_log = next((ex_log for ex_log in user_ex_logs if ex_log["exercise_id"] == entity_id), None)
                if ex_log:
                    if ex_log.get("struggling"):
                        status = "struggling"
                    elif ex_log.get("complete"):
                        status = "complete"
                    elif ex_log.get("attempts"):
                        status = "inprogress"

                    ex_log_id = ex_log.get("exercise_id")
                    leaf_node = get_exercise_cache().get(ex_log_id)

                    entry = {
                        "id": ex_log_id,
                        "kind": kind,
                        "status": status,
                        "score": ex_log.get("streak_progress"),
                        "title": leaf_node["title"],
                        "path": leaf_node["path"],
                    }

            elif kind == "Quiz":
                entity_id = playlist["id"]
                if quiz_log:
                    if quiz_log.complete:
                        if quiz_pct_score <= 59:
                            status = "fail"
                        elif quiz_pct_score <= 79:
                            status = "borderline"
                        else:
                            status = "pass"
                    elif quiz_log.attempts:
                        status = "inprogress"
                    else:
                        status = "notstarted"

                    quiz_log_id = quiz_log.quiz

                    entry = {
                        "id": quiz_log_id,
                        "kind": "Quiz",
                        "status": status,
                        "score": quiz_pct_score,
                        "title": playlist.get("title"),
                        "path": "",
                    }

            if not entry:
                entry = cls.create_empty_entry(entity_id, kind, playlist)

            progress_details.append(cls(**entry))

        return progress_details
Beispiel #31
0
    def user_progress_detail(cls, user_id, playlist_id):
        """
        Return a list of video, exercise, and quiz log PlaylistProgressDetail
        objects associated with a specific user and playlist ID.
        """
        user = FacilityUser.objects.get(id=user_id)
        playlist = next((pl for pl in get_leafed_topics() if pl.get("id") == playlist_id), None)

        pl_video_ids, pl_exercise_ids = cls.get_playlist_entry_ids(playlist)

        # Retrieve video, exercise, and quiz logs that appear in this playlist
        user_vid_logs, user_ex_logs = cls.get_user_logs(user, pl_video_ids, pl_exercise_ids)

        # Format & append quiz the quiz log, if it exists
        # quiz_exists, quiz_log, quiz_pct_score = cls.get_quiz_log(user, (playlist.get("entries") or playlist.get("children")), playlist.get("id"))

        # Finally, sort an ordered list of the playlist entries, with user progress
        # injected where it exists.
        progress_details = list()
        for entity_id in playlist.get("children"):
            entry = {}
            leaf_node = get_content_cache().get(entity_id, get_exercise_cache().get(entity_id, {}))
            kind = leaf_node.get("kind")

            if kind == "Video":
                vid_log = next((vid_log for vid_log in user_vid_logs if vid_log["video_id"] == entity_id), None)
                if vid_log:
                    if vid_log.get("complete"):
                        status = "complete"
                    elif vid_log.get("total_seconds_watched"):
                        status = "inprogress"
                    else:
                        status = "notstarted"

                    entry = {
                        "id": entity_id,
                        "kind": kind,
                        "status": status,
                        "score": int(float(vid_log.get("points")) / float(750) * 100),
                        "title": leaf_node["title"],
                        "path": leaf_node["path"],
                    }

            elif kind == "Exercise":
                ex_log = next((ex_log for ex_log in user_ex_logs if ex_log["exercise_id"] == entity_id), None)
                if ex_log:
                    if ex_log.get("struggling"):
                        status = "struggling"
                    elif ex_log.get("complete"):
                        status = "complete"
                    elif ex_log.get("attempts"):
                        status = "inprogress"

                    entry = {
                        "id": entity_id,
                        "kind": kind,
                        "status": status,
                        "score": ex_log.get("streak_progress"),
                        "title": leaf_node["title"],
                        "path": leaf_node["path"],
                    }

            # Oh Quizzes, we hardly knew ye!
            # TODO (rtibbles): Sort out the status of Quizzes, and either reinstate them or remove them.
            # Quizzes were introduced to provide a way of practicing multiple types of exercise at once
            # However, there is currently no way to access them, and the manner for generating them (from the now deprecated Playlist models) is inaccessible
            # elif kind == "Quiz":
            #     entity_id = playlist["id"]
            #     if quiz_log:
            #         if quiz_log.complete:
            #             if quiz_pct_score <= 59:
            #                 status = "fail"
            #             elif quiz_pct_score <= 79:
            #                 status = "borderline"
            #             else:
            #                 status = "pass"
            #         elif quiz_log.attempts:
            #             status = "inprogress"
            #         else:
            #             status = "notstarted"

            #         quiz_log_id = quiz_log.quiz

            #         entry = {
            #             "id": quiz_log_id,
            #             "kind": "Quiz",
            #             "status": status,
            #             "score": quiz_pct_score,
            #             "title": playlist.get("title"),
            #             "path": "",
            #         }

            if not entry:
                entry = cls.create_empty_entry(entity_id, kind, playlist)

            progress_details.append(cls(**entry))

        return progress_details
def impl(context):
    assert get_content_cache().get(context.videos[1]).get(
        "path"
    ) in context.browser.current_url, "Last in progress video not in %s" % context.browser.current_url
Beispiel #33
0
    def handle(self, *args, **options):
        if len(args) != 1:
            raise CommandError("Takes exactly 1 argument")

        dest_file = os.path.abspath(args[0])

        logger.info("Starting up KA Lite export2zim command")
        beginning = datetime.now()
        logger.info("Begin: {}".format(beginning))

        language = options.get('language')
        if not language:
            raise CommandError("Must specify a language!")

        if not options.get('tmp_dir'):
            tmp_dir = os.path.join(tempfile.gettempdir(),
                                   'ka-lite-zim_{}'.format(language))
        else:
            tmp_dir = options.get('tmp_dir')

        tmp_dir = os.path.abspath(tmp_dir)

        if os.path.exists(tmp_dir) and os.listdir(tmp_dir):
            if options['clear']:
                logger.info("Clearing directory {}".format(tmp_dir))
                shutil.rmtree(tmp_dir)
            elif options['resume']:
                logger.info(
                    "Resuming in dirty tmp directory {}".format(tmp_dir))
            else:
                raise CommandError(
                    "{} not empty, use the -c option to clean it, -r to resume, or use an empty destination directory."
                    .format(tmp_dir))

        zimwriterfs = options.get("zimwriterfs", None)
        publisher = options.get("publisher")
        transcode2webm = options.get("transcode2webm")
        ffmpeg = find_executable("ffmpeg")

        if not ffmpeg:
            logger.warning(
                "FFMpeg not found in your path, you won't be able to create missing thumbnails or transcode to webm."
            )

        if not zimwriterfs:
            zimwriterfs = find_executable("zimwriterfs")
            if not zimwriterfs:
                raise CommandError(
                    "Could not find zimwriterfs in your path, try specifying --zimwriterfs=/path"
                )

        if not os.path.exists(zimwriterfs):
            raise CommandError("Invalid --zimwriterfs")

        from kalite_zim import __name__ as base_path
        base_path = os.path.abspath(base_path)
        data_path = os.path.join(base_path, 'data')

        # Where subtitles are found in KA Lite
        subtitle_src_dir = i18n.get_srt_path(language)

        logger.info("Will export videos for language: {}".format(language))
        logger.info("Preparing KA Lite topic tree...")

        # Use live data
        if not options.get('test'):
            # This way of doing things will be deprecated in KA Lite 0.16
            topic_tree_json_path = topic_tools_settings.TOPICS_FILEPATHS.get(
                'khan')
            content_cache = get_content_cache(language=language, annotate=True)
            exercise_cache = get_exercise_cache(language=language)
        # Use test data
        else:
            topic_tree_json_path = os.path.join(data_path, 'test_topics.json')
            content_cache = json.load(
                open(os.path.join(data_path, 'test_content.json')))
            exercise_cache = json.load(
                open(os.path.join(data_path, 'test_exercise.json')))

        topic_tree = softload_json(topic_tree_json_path,
                                   logger=logger.debug,
                                   raises=False)

        content_json_output = {}
        exercise_json_output = {}

        def annotate_tree(topic, depth=0, parent=None):
            """
            We need to recurse into the tree in order to annotate elements
            with topic data and exercise data
            """
            children = topic.get('children', [])
            new_children = []
            for child_topic in children:
                if child_topic.get("kind") in ("Video", "Topic"):
                    annotate_tree(child_topic, depth=depth + 1, parent=topic)
                    new_children.append(child_topic)
            topic["children"] = new_children
            if topic.get("kind") == "Exercise":
                topic['exercise'] = exercise_cache.get(topic.get("id"), {})
                exercise_json_output[topic.get("id")] = topic['exercise']
            elif topic.get("kind") == "Topic":
                pass
            else:
                topic['exercise'] = None
                topic['content'] = content_cache.get(topic.get("id"), {})
                content_json_output[topic.get("id")] = topic['content']
                if not topic['content']:
                    logger.error('No content!?, id is: {}'.format(
                        topic.get('id')))

            # Translate everything for good measure
            with i18n.translate_block(language):
                topic["title"] = _(topic.get("title", ""))
                topic["description"] = _(topic.get(
                    "description", "")) if topic.get("description") else ""

            topic["url"] = topic["id"] + ".html"
            topic["parent"] = parent
            topic["depth"] = depth
            for key in ("child_data", "keywords", "hide", "contains"):
                topic.pop(key, None)

        # 1. Annotate a topic tree
        annotate_tree(topic_tree)

        # 2. Now go through the tree and copy each element into the destination
        # zim file system

        def copy_media(node):
            if node['kind'] == 'Topic':
                # Don't do anything if it's a topic
                pass
            elif node['kind'] == 'Exercise':
                # Exercises cannot be displayed
                node["content"]["available"] = False
            elif node['kind'] == 'Video':

                if node['content']['format'] == "webm":
                    logger.warning(
                        "Found a duplicate ID for {}, re-downloading".format(
                            node['id']))
                    node['content']['format'] = "mp4"

                # Available is False by default until we locate the file
                node["content"]["available"] = False
                node_dir = os.path.join(tmp_dir, node["path"])
                if not os.path.exists(node_dir):
                    os.makedirs(node_dir)
                video_file_name = node['id'] + '.' + node['content']['format']
                thumb_file_name = node['id'] + '.png'
                video_file_src = os.path.join(CONTENT_ROOT, video_file_name)
                video_file_dest = os.path.join(node_dir, video_file_name)
                thumb_file_src = os.path.join(CONTENT_ROOT, thumb_file_name)
                thumb_file_dest = os.path.join(node_dir, thumb_file_name)

                if options['download'] and not os.path.exists(video_file_src):
                    logger.info("Video file being downloaded to: {}".format(
                        video_file_src))
                    download_video(
                        node['content']['youtube_id'],
                        node['content']['format'],
                        CONTENT_ROOT,
                    )

                if os.path.exists(video_file_src):
                    if transcode2webm:
                        ffmpeg_pass_log = "/tmp/logfile_vp8.fpf"
                        if os.path.isfile(ffmpeg_pass_log):
                            os.unlink(ffmpeg_pass_log)
                        video_file_name = node['id'] + '.webm'
                        video_file_dest = os.path.join(node_dir,
                                                       video_file_name)
                        if os.path.isfile(video_file_dest):
                            logger.info(
                                "Already encoded: {}".format(video_file_dest))
                        else:
                            ffmpeg_base_args = [
                                ffmpeg,
                                "-i",
                                video_file_src,
                                "-codec:v",
                                "libvpx",
                                "-quality",
                                "best",
                                "-cpu-used",
                                "0",
                                "-b:v",
                                "300k",
                                "-qmin",
                                "10",  # 10=lowest value
                                "-qmax",
                                "35",  # 42=highest value
                                "-maxrate",
                                "300k",
                                "-bufsize",
                                "600k",
                                "-threads",
                                "8",
                                # "-vf", "scale=-1",
                                "-codec:a",
                                "libvorbis",
                                # "-b:a", "128k",
                                "-aq",
                                "5",
                                "-f",
                                "webm",
                            ]
                            ffmpeg_pass1 = ffmpeg_base_args + [
                                "-an",  # Disables audio, no effect first pass
                                "-pass",
                                "1",
                                "-passlogfile",
                                ffmpeg_pass_log,
                                video_file_dest,
                            ]
                            ffmpeg_pass2 = ffmpeg_base_args + [
                                "-pass",
                                "2",
                                "-y",
                                "-passlogfile",
                                ffmpeg_pass_log,
                                video_file_dest,
                            ]
                            for cmd in (ffmpeg_pass1, ffmpeg_pass2):
                                process = subprocess.Popen(
                                    cmd, stdout=subprocess.PIPE)
                                stdout_data, _stderr_data = process.communicate(
                                )
                                if process.returncode != 0:
                                    logger.error(
                                        "Error invoking ffmpeg: {}".format(
                                            (_stderr_data or "") +
                                            (stdout_data or "")))
                                    logger.error("Command was: {}".format(
                                        " ".join(cmd)))
                                    raise CommandError(
                                        "Could not complete transcoding")
                        node['content']['format'] = "webm"
                    else:
                        # If not transcoding, just link the original file
                        os.link(video_file_src, video_file_dest)
                    node["video_url"] = os.path.join(node["path"],
                                                     video_file_name)
                    copy_media.videos_found += 1
                    logger.info("Videos processed: {}".format(
                        copy_media.videos_found))
                    node["content"]["available"] = True

                    # Create thumbnail if it wasn't downloaded
                    if not os.path.exists(thumb_file_src):
                        fp = create_thumbnail(video_file_src,
                                              output_format="png")
                        if fp is None:
                            logger.error(
                                "Failed to create thumbnail for {}".format(
                                    video_file_src))
                        else:
                            logger.info(
                                "Successfully created thumbnail for {}".format(
                                    video_file_src))
                            file(thumb_file_src, 'wb').write(fp.read())

                    # Handle thumbnail
                    if os.path.exists(thumb_file_src):
                        node["thumbnail_url"] = os.path.join(
                            node["path"], node['id'] + '.png')
                        if not os.path.exists(thumb_file_dest):
                            os.link(thumb_file_src, thumb_file_dest)
                    else:
                        node["thumbnail_url"] = None

                    subtitle_srt = os.path.join(subtitle_src_dir,
                                                node['id'] + '.srt')
                    if os.path.isfile(subtitle_srt):
                        subtitle_vtt = os.path.join(node_dir,
                                                    node['id'] + '.vtt')
                        # Convert to .vtt because this format is understood
                        # by latest video.js and the old ones that read
                        # .srt don't work with newer jquery etc.
                        submarine_parser(subtitle_srt, subtitle_vtt)
                        if not os.path.exists(subtitle_vtt):
                            logger.warning("Subtitle not converted: {}".format(
                                subtitle_srt))
                        else:
                            logger.info(
                                "Subtitle convert from SRT to VTT: {}".format(
                                    subtitle_vtt))
                            node["subtitle_url"] = os.path.join(
                                node["path"], node['id'] + '.vtt')

                else:
                    if options['download']:
                        logger.error("File not found or downloaded: {}".format(
                            video_file_src))
            else:
                logger.error("Invalid node, kind: {}".format(
                    node.get("kind", None)))
                # Exercises cannot be displayed
                node["content"] = {"available": False}

            new_children = []
            for child in node.get('children', []):
                copy_media(child)
                empty_topic = child["kind"] == "Topic" and not child.get(
                    "children", [])
                unavailable_video = child["kind"] == "Video" and not child.get(
                    "content", {}).get("available", False)
                if not (empty_topic or unavailable_video):
                    new_children.append(child)
            node['children'] = new_children

        copy_media.videos_found = 0

        def render_topic_pages(node):

            parents = [node] if node.get("children") else []
            parent = node["parent"]
            while parent:
                parents.append(parent)
                parent = parent["parent"]

            # Finally, render templates into the destination
            template_context = {
                "topic_tree": topic_tree,
                "topic": node,
                "parents": parents
            }
            with i18n.translate_block(language):
                topic_html = render_to_string("kalite_zim/topic.html",
                                              template_context)
            # Replace absolute references to '/static' with relative
            topic_html = topic_html.replace("/static", "static")

            dest_html = os.path.join(tmp_dir, node["id"] + ".html")
            logger.info("Rendering {}".format(dest_html))

            open(dest_html, "w").write(topic_html)

            render_topic_pages.pages_rendered += 1

            for child in node.get('children', []):
                render_topic_pages(child)

        render_topic_pages.pages_rendered = 0

        logger.info("Hard linking video files from KA Lite...")
        copy_media(topic_tree)

        sys.stderr.write("\n")
        logger.info("Done!")

        # Configure django-compressor
        compressor_init(os.path.join(base_path, 'static'))

        # Finally, render templates into the destination
        template_context = {
            "topic_tree": topic_tree,
            "welcome": True,
        }

        with i18n.translate_block(language):
            welcome_html = render_to_string("kalite_zim/welcome.html",
                                            template_context)
            about_html = render_to_string("kalite_zim/about.html",
                                          template_context)
        # Replace absolute references to '/static' with relative
        welcome_html = welcome_html.replace("/static", "static")
        about_html = about_html.replace("/static", "static")

        # Write the welcome.html file
        open(os.path.join(tmp_dir, 'welcome.html'), 'w').write(welcome_html)
        open(os.path.join(tmp_dir, 'about.html'), 'w').write(about_html)

        # Render all topic html files
        render_topic_pages(topic_tree)

        # Copy in static data after it's been handled by django compressor
        # (this happens during template rendering)

        shutil.copytree(os.path.join(base_path, 'static'),
                        os.path.join(tmp_dir, 'static'))

        ending = datetime.now()
        duration = int((ending - beginning).total_seconds())
        logger.info("Total number of videos found: {}".format(
            copy_media.videos_found))
        logger.info("Total number of topic pages created: {}".format(
            render_topic_pages.pages_rendered))

        logger.info("Invoking zimwriterfs, writing to: {}".format(dest_file))

        zimwriterfs_args = (
            zimwriterfs,
            "--welcome",
            "welcome.html",
            "--favicon",
            "static/img/ka_leaf.png",
            "--publisher",
            publisher,
            "--creator",
            "KhanAcademy.org",
            "--description",
            "Khan Academy ({})".format(language),
            "--description",
            "Videos from Khan Academy",
            "--language",
            language,
            tmp_dir,
            dest_file,
        )

        process = subprocess.Popen(zimwriterfs_args, stdout=subprocess.PIPE)
        stdout_data, _stderr_data = process.communicate()

        if process.returncode != 0:
            logger.error("Error invoking zimwriterfs: {}").format(
                _stderr_data + stdout_data)

        logger.info("Duration: {h:} hours, {m:} minutes, {s:} seconds".format(
            h=duration // 3600,
            m=(duration % 3600) // 60,
            s=duration % 60,
        ))