Example #1
0
    def save(self, update_userlog=False, *args, **kwargs):
        # To deal with backwards compatibility,
        #   check video_id, whether imported or not.
        if not self.video_id:
            assert kwargs.get("imported", False), "video_id better be set by internal code."
            assert self.youtube_id, "If not video_id, you better have set youtube_id!"
            self.video_id = (
                i18n.get_video_id(self.youtube_id) or self.youtube_id
            )  # for unknown videos, default to the youtube_id

        if not kwargs.get("imported", False):
            self.full_clean()

            # Compute learner status
            already_complete = self.complete
            self.complete = self.points >= VideoLog.POINTS_PER_VIDEO
            if not already_complete and self.complete:
                self.completion_timestamp = datetime.now()

            # Tell logins that they are still active (ignoring validation failures).
            #   TODO(bcipolli): Could log video information in the future.
            if update_userlog:
                try:
                    UserLog.update_user_activity(
                        self.user,
                        activity_type="login",
                        update_datetime=(self.completion_timestamp or datetime.now()),
                        language=self.language,
                    )
                except ValidationError as e:
                    logging.error("Failed to update userlog during video: %s" % e)

        super(VideoLog, self).save(*args, **kwargs)
Example #2
0
        def add_missing_objects_to_db(youtube_ids_in_filesystem,
                                      videos_marked_at_all):
            # Files that exist, but are not in the DB, should be assumed to be good videos,
            #   and just needing to be added to the DB.  Add them to the DB in this way,
            #   so that these files also trigger the update code below (and trigger cache invalidation)
            youtube_ids_needing_model_creation = list(
                youtube_ids_in_filesystem - videos_marked_at_all)
            new_video_files = []
            if youtube_ids_needing_model_creation:
                for lang_code, youtube_ids in divide_videos_by_language(
                        youtube_ids_needing_model_creation).iteritems():
                    # OK to do bulk_create; cache invalidation triggered via save download
                    lang_video_files = [
                        VideoFile(youtube_id=id,
                                  percent_complete=100,
                                  download_in_progress=False,
                                  language=lang_code) for id in youtube_ids
                    ]
                    VideoFile.objects.bulk_create(lang_video_files)
                    new_video_files += lang_video_files
                    caching.invalidate_all_caches(
                    )  # Do this within the loop, to update users ASAP
                self.stdout.write(
                    "Created %d VideoFile models (and marked them as complete, since the files exist)\n"
                    % len(new_video_files))

            return [
                i18n.get_video_id(video_file.youtube_id)
                for video_file in new_video_files
            ]
Example #3
0
    def save(self, update_userlog=False, *args, **kwargs):
        # To deal with backwards compatibility,
        #   check video_id, whether imported or not.
        if not self.video_id:
            assert kwargs.get(
                "imported", False), "video_id better be set by internal code."
            assert self.youtube_id, "If not video_id, you better have set youtube_id!"
            self.video_id = i18n.get_video_id(
                self.youtube_id
            ) or self.youtube_id  # for unknown videos, default to the youtube_id

        if not kwargs.get("imported", False):
            self.full_clean()

            # Tell logins that they are still active (ignoring validation failures).
            #   TODO(bcipolli): Could log video information in the future.
            if update_userlog:
                try:
                    UserLog.update_user_activity(
                        self.user,
                        activity_type="login",
                        update_datetime=(self.completion_timestamp
                                         or datetime.now()),
                        language=self.language)
                except ValidationError as e:
                    logging.error("Failed to update userlog during video: %s" %
                                  e)

        super(VideoLog, self).save(*args, **kwargs)
Example #4
0
 def delete_objects_for_incomplete_videos():
     # delete VideoFile objects that are not marked as in progress, but are neither 0% nor 100% done; they're broken
     video_files_to_delete = VideoFile.objects.filter(download_in_progress=False, percent_complete__gt=0, percent_complete__lt=100)
     deleted_video_ids = [i18n.get_video_id(video_file.youtube_id) for video_file in video_files_to_delete]
     video_files_to_delete.delete()
     if deleted_video_ids:
         self.stdout.write("Deleted %d VideoFile models (to mark them as not downloaded, since they were in a bad state)\n" % len(deleted_video_ids))
     return deleted_video_ids
Example #5
0
 def delete_objects_for_incomplete_videos():
     # delete VideoFile objects that are not marked as in progress, but are neither 0% nor 100% done; they're broken
     video_files_to_delete = VideoFile.objects.filter(download_in_progress=False, percent_complete__gt=0, percent_complete__lt=100)
     deleted_video_ids = [i18n.get_video_id(video_file.youtube_id) for video_file in video_files_to_delete]
     video_files_to_delete.delete()
     if deleted_video_ids:
         self.stdout.write("Deleted %d VideoFile models (to mark them as not downloaded, since they were in a bad state)\n" % len(deleted_video_ids))
     return deleted_video_ids
Example #6
0
        def update_objects_to_be_complete(youtube_ids_in_filesystem):
            # Files that exist, are in the DB, but have percent_complete=0, download_in_progress=False
            updated_video_ids = []
            for chunk in break_into_chunks(youtube_ids_in_filesystem):
                video_files_needing_model_update = VideoFile.objects.filter(percent_complete=0, download_in_progress=False, youtube_id__in=chunk)
                video_files_needing_model_update.update(percent_complete=100, flagged_for_download=False)

                updated_video_ids += [i18n.get_video_id(video_file.youtube_id) for video_file in video_files_needing_model_update]

            if updated_video_ids:
                self.stdout.write("Updated %d VideoFile models (to mark them as complete, since the files exist)\n" % len(updated_video_ids))
            return updated_video_ids
Example #7
0
        def update_objects_to_be_complete(youtube_ids_in_filesystem):
            # Files that exist, are in the DB, but have percent_complete=0, download_in_progress=False
            updated_video_ids = []
            for chunk in break_into_chunks(youtube_ids_in_filesystem):
                video_files_needing_model_update = VideoFile.objects.filter(percent_complete=0, download_in_progress=False, youtube_id__in=chunk)
                video_files_needing_model_update.update(percent_complete=100, flagged_for_download=False)

                updated_video_ids += [i18n.get_video_id(video_file.youtube_id) for video_file in video_files_needing_model_update]

            if updated_video_ids:
                self.stdout.write("Updated %d VideoFile models (to mark them as complete, since the files exist)\n" % len(updated_video_ids))
            return updated_video_ids
Example #8
0
        def add_missing_objects_to_db(youtube_ids_in_filesystem, videos_marked_at_all):
            # Files that exist, but are not in the DB, should be assumed to be good videos,
            #   and just needing to be added to the DB.  Add them to the DB in this way,
            #   so that these files also trigger the update code below (and trigger cache invalidation)
            youtube_ids_needing_model_creation = list(youtube_ids_in_filesystem - videos_marked_at_all)
            new_video_files = []
            if youtube_ids_needing_model_creation:
                for lang_code, youtube_ids in divide_videos_by_language(youtube_ids_needing_model_creation).iteritems():
                    # OK to do bulk_create; cache invalidation triggered via save download
                    lang_video_files = [VideoFile(youtube_id=id, percent_complete=100, download_in_progress=False, language=lang_code) for id in youtube_ids]
                    VideoFile.objects.bulk_create(lang_video_files)
                    new_video_files += lang_video_files
                self.stdout.write("Created %d VideoFile models (and marked them as complete, since the files exist)\n" % len(new_video_files))

            return [i18n.get_video_id(video_file.youtube_id) for video_file in new_video_files]
Example #9
0
def get_video_node_by_youtube_id(youtube_id):
    """Returns the video node corresponding to the video_id of the given youtube_id, or None"""
    video_id = i18n.get_video_id(youtube_id=youtube_id)
    return topic_tools.get_node_cache("Video").get(video_id, [None])[0]
 def forwards(self, orm):
     # Setting the video ID
     for vlog in orm["main.VideoLog"].objects.all():
         vlog.video_id = i18n.get_video_id(vlog.youtube_id) or vlog.youtube_id
         vlog.save()
Example #11
0
def update_all_central_callback(request):
    """
    Callback after authentication.

    Parses out the request token verification.
    Then finishes the request by getting an auth token.
    """
    if not "ACCESS_TOKEN" in request.session:
        finish_auth(request)

    exercises = get_api_resource(request, "/api/v1/user/exercises")
    videos = get_api_resource(request, "/api/v1/user/videos")
    node_cache = get_node_cache()

    # Collate videos
    video_logs = []
    for video in videos:
        # Assume that KA videos are all english-language, not dubbed (for now)
        youtube_id = video.get('video', {}).get('youtube_id', "")
        video_id = get_video_id(youtube_id)  # map from youtube_id to video_id (across all languages)

        # Only save videos with progress
        if not video.get('seconds_watched', None):
            continue

        # Only save video logs for videos that we recognize.
        if video_id not in node_cache["Video"]:
            logging.warn("Skipping unknown video %s" % video_id)
            continue

        try:
            video_logs.append({
                "video_id": video_id,
                "youtube_id": youtube_id,
                "total_seconds_watched": video['seconds_watched'],
                "points": VideoLog.calc_points(video['seconds_watched'], video['duration']),
                "complete": video['completed'],
                "completion_timestamp": convert_ka_date(video['last_watched']) if video['completed'] else None,
            })
            logging.debug("Got video log for %s: %s" % (video_id, video_logs[-1]))
        except KeyError:  #
            logging.error("Could not save video log for data with missing values: %s" % video)

    # Collate exercises
    exercise_logs = []
    for exercise in exercises:
        # Only save exercises that have any progress.
        if not exercise.get('last_done', None):
            continue

        # Only save video logs for videos that we recognize.
        slug = exercise.get('exercise', "")
        if slug not in node_cache['Exercise']:
            logging.warn("Skipping unknown video %s" % slug)
            continue

        try:
            completed = exercise['streak'] >= 10
            basepoints = node_cache['Exercise'][slug][0]['basepoints']
            exercise_logs.append({
                "exercise_id": slug,
                "streak_progress": min(100, 100 * exercise['streak']/10),  # duplicates logic elsewhere
                "attempts": exercise['total_done'],
                "points": ExerciseLog.calc_points(basepoints, ncorrect=exercise['streak'], add_randomness=False),  # no randomness when importing from KA
                "complete": completed,
                "attempts_before_completion": exercise['total_done'] if not exercise['practiced'] else None,  #can't figure this out if they practiced after mastery.
                "completion_timestamp": convert_ka_date(exercise['proficient_date']) if completed else None,
            })
            logging.debug("Got exercise log for %s: %s" % (slug, exercise_logs[-1]))
        except KeyError:
            logging.error("Could not save exercise log for data with missing values: %s" % exercise)

    # POST the data back to the distributed server
    try:

        dthandler = lambda obj: obj.isoformat() if isinstance(obj, datetime.datetime) else None
        logging.debug("POST'ing to %s" % request.session["distributed_callback_url"])
        response = requests.post(
            request.session["distributed_callback_url"],
            cookies={ "csrftoken": request.session["distributed_csrf_token"] },
            data = {
                "csrfmiddlewaretoken": request.session["distributed_csrf_token"],
                "video_logs": json.dumps(video_logs, default=dthandler),
                "exercise_logs": json.dumps(exercise_logs, default=dthandler),
                "user_id": request.session["distributed_user_id"],
            }
        )
        logging.debug("Response (%d): %s" % (response.status_code, response.content))
    except requests.exceptions.ConnectionError as e:
        return HttpResponseRedirect(set_query_params(request.session["distributed_redirect_url"], {
            "message_type": "error",
            "message": _("Could not connect to your KA Lite installation to share Khan Academy data."),
            "message_id": "id_khanload",
        }))
    except Exception as e:
        return HttpResponseRedirect(set_query_params(request.session["distributed_redirect_url"], {
            "message_type": "error",
            "message": _("Failure to send data to your KA Lite installation: %s") % e,
            "message_id": "id_khanload",
        }))


    try:
        json_response = json.loads(response.content)
        if not isinstance(json_response, dict) or len(json_response) != 1:
            # Could not validate the message is a single key-value pair
            raise Exception(_("Unexpected response format from your KA Lite installation."))
        message_type = json_response.keys()[0]
        message = json_response.values()[0]
    except ValueError as e:
        message_type = "error"
        message = unicode(e)
    except Exception as e:
        message_type = "error"
        message = _("Loading json object: %s") % e

    # If something broke on the distributed server, we have no way to recover.
    #   For now, just show the error to users.
    #
    # Ultimately, we have a message, would like to share with the distributed server.
#    if response.status_code != 200:
#        return HttpResponseServerError(response.content)

    return HttpResponseRedirect(set_query_params(request.session["distributed_redirect_url"], {
        "message_type": message_type,
        "message": message,
        "message_id": "id_khanload",
    }))
Example #12
0
class TestSaveVideoLog(KALiteTestCase):

    ORIGINAL_POINTS = 84
    ORIGINAL_SECONDS_WATCHED = 32
    NEW_POINTS = 32
    NEW_SECONDS_WATCHED = 15
    YOUTUBE_ID = "aNqG4ChKShI"
    VIDEO_ID = i18n.get_video_id(YOUTUBE_ID) or "dummy"
    YOUTUBE_ID2 = "b22tMEc6Kko"
    VIDEO_ID2 = i18n.get_video_id(YOUTUBE_ID2) or "dummy2"
    USERNAME = "******"
    PASSWORD = "******"

    def setUp(self):
        super(TestSaveVideoLog, self).setUp()
        # create a facility and user that can be referred to in models across tests
        self.facility = Facility(name="Test Facility")
        self.facility.save()
        self.user = FacilityUser(username=self.USERNAME,
                                 facility=self.facility)
        self.user.set_password(self.PASSWORD)
        self.user.save()

        # create an initial VideoLog instance so we have something to update later
        self.original_videolog = VideoLog(video_id=self.VIDEO_ID,
                                          youtube_id=self.YOUTUBE_ID,
                                          user=self.user)
        self.original_videolog.points = self.ORIGINAL_POINTS
        self.original_videolog.total_seconds_watched = self.ORIGINAL_SECONDS_WATCHED
        self.original_videolog.save()

    def test_new_videolog(self):

        # make sure the target video log does not already exist
        videologs = VideoLog.objects.filter(video_id=self.VIDEO_ID2,
                                            user__username=self.USERNAME)
        self.assertEqual(
            videologs.count(), 0,
            "The target video log to be newly created already exists")

        c = KALiteClient()

        # login
        success = c.login(username=self.USERNAME,
                          password=self.PASSWORD,
                          facility=self.facility.id)
        self.assertTrue(success, "Was not able to login as the test user")

        # save a new video log
        result = c.save_video_log(
            video_id=self.VIDEO_ID2,
            youtube_id=self.YOUTUBE_ID2,
            total_seconds_watched=self.ORIGINAL_SECONDS_WATCHED,
            points=self.NEW_POINTS,
            user=self.USERNAME,
        )
        self.assertEqual(
            result.status_code, 201,
            "An error (%d) was thrown while saving the video log." %
            result.status_code)

        # get a reference to the newly created VideoLog
        videolog = VideoLog.objects.get(video_id=self.VIDEO_ID2,
                                        user__username=self.USERNAME)

        # make sure the VideoLog was properly created
        self.assertEqual(videolog.points, self.NEW_POINTS,
                         "The VideoLog's points were not saved correctly.")
        self.assertEqual(
            videolog.total_seconds_watched, self.ORIGINAL_SECONDS_WATCHED,
            "The VideoLog's seconds watched was not saved correctly.")

    def test_update_videolog(self):

        # get a new reference to the existing VideoLog
        videolog = VideoLog.objects.get(id=self.original_videolog.id)

        # make sure the VideoLog hasn't already been changed
        self.assertEqual(videolog.points, self.ORIGINAL_POINTS,
                         "The VideoLog's points have already changed.")
        self.assertEqual(videolog.total_seconds_watched,
                         self.ORIGINAL_SECONDS_WATCHED,
                         "The VideoLog's seconds watched already changed.")

        c = KALiteClient()

        # login
        success = c.login(username=self.USERNAME,
                          password=self.PASSWORD,
                          facility=self.facility.id)
        self.assertTrue(success, "Was not able to login as the test user")

        # save a new record onto the video log, with a correct answer (increasing the points and streak)
        result = c.save_video_log(
            video_id=self.VIDEO_ID,
            youtube_id=self.YOUTUBE_ID,
            total_seconds_watched=self.ORIGINAL_SECONDS_WATCHED +
            self.NEW_SECONDS_WATCHED,
            points=self.ORIGINAL_POINTS + self.NEW_POINTS,
            user=self.USERNAME,
        )
        self.assertEqual(
            result.status_code, 201,
            "An error (%d) was thrown while saving the video log." %
            result.status_code)

        # get a reference to the updated VideoLog
        videolog = VideoLog.objects.get(video_id=self.VIDEO_ID,
                                        user__username=self.USERNAME)

        # make sure the VideoLog was properly updated
        self.assertEqual(videolog.points,
                         self.ORIGINAL_POINTS + self.NEW_POINTS,
                         "The VideoLog's points were not updated correctly.")
        self.assertEqual(
            videolog.total_seconds_watched,
            self.ORIGINAL_SECONDS_WATCHED + self.NEW_SECONDS_WATCHED,
            "The VideoLog's seconds watched was not updated correctly.")
Example #13
0
def update_all_central_callback(request):
    """
    Callback after authentication.

    Parses out the request token verification.
    Then finishes the request by getting an auth token.
    """
    if not "ACCESS_TOKEN" in request.session:
        finish_auth(request)

    exercises = get_api_resource(request, "/api/v1/user/exercises")
    videos = get_api_resource(request, "/api/v1/user/videos")
    node_cache = get_node_cache()

    # Collate videos
    video_logs = []
    for video in videos:
        # Assume that KA videos are all english-language, not dubbed (for now)
        youtube_id = video.get('video', {}).get('youtube_id', "")
        video_id = get_video_id(
            youtube_id
        )  # map from youtube_id to video_id (across all languages)

        # Only save videos with progress
        if not video.get('seconds_watched', None):
            continue

        # Only save video logs for videos that we recognize.
        if video_id not in node_cache["Video"]:
            logging.warn("Skipping unknown video %s" % video_id)
            continue

        try:
            video_logs.append({
                "video_id":
                video_id,
                "youtube_id":
                youtube_id,
                "total_seconds_watched":
                video['seconds_watched'],
                "points":
                VideoLog.calc_points(video['seconds_watched'],
                                     video['duration']),
                "complete":
                video['completed'],
                "completion_timestamp":
                convert_ka_date(video['last_watched'])
                if video['completed'] else None,
            })
            logging.debug("Got video log for %s: %s" %
                          (video_id, video_logs[-1]))
        except KeyError:  #
            logging.error(
                "Could not save video log for data with missing values: %s" %
                video)

    # Collate exercises
    exercise_logs = []
    for exercise in exercises:
        # Only save exercises that have any progress.
        if not exercise.get('last_done', None):
            continue

        # Only save video logs for videos that we recognize.
        slug = exercise.get('exercise', "")
        if slug not in node_cache['Exercise']:
            logging.warn("Skipping unknown video %s" % slug)
            continue

        try:
            completed = exercise['streak'] >= 10
            basepoints = node_cache['Exercise'][slug][0]['basepoints']
            exercise_logs.append({
                "exercise_id":
                slug,
                "streak_progress":
                min(100, 100 * exercise['streak'] /
                    10),  # duplicates logic elsewhere
                "attempts":
                exercise['total_done'],
                "points":
                ExerciseLog.calc_points(
                    basepoints,
                    ncorrect=exercise['streak'],
                    add_randomness=False
                ),  # no randomness when importing from KA
                "complete":
                completed,
                "attempts_before_completion":
                exercise['total_done'] if not exercise['practiced'] else
                None,  #can't figure this out if they practiced after mastery.
                "completion_timestamp":
                convert_ka_date(exercise['proficient_date'])
                if completed else None,
            })
            logging.debug("Got exercise log for %s: %s" %
                          (slug, exercise_logs[-1]))
        except KeyError:
            logging.error(
                "Could not save exercise log for data with missing values: %s"
                % exercise)

    # POST the data back to the distributed server
    try:

        dthandler = lambda obj: obj.isoformat() if isinstance(
            obj, datetime.datetime) else None
        logging.debug("POST'ing to %s" %
                      request.session["distributed_callback_url"])
        response = requests.post(
            request.session["distributed_callback_url"],
            cookies={"csrftoken": request.session["distributed_csrf_token"]},
            data={
                "csrfmiddlewaretoken":
                request.session["distributed_csrf_token"],
                "video_logs": json.dumps(video_logs, default=dthandler),
                "exercise_logs": json.dumps(exercise_logs, default=dthandler),
                "user_id": request.session["distributed_user_id"],
            })
        logging.debug("Response (%d): %s" %
                      (response.status_code, response.content))
    except requests.exceptions.ConnectionError as e:
        return HttpResponseRedirect(
            set_query_params(
                request.session["distributed_redirect_url"], {
                    "message_type":
                    "error",
                    "message":
                    _("Could not connect to your KA Lite installation to share Khan Academy data."
                      ),
                    "message_id":
                    "id_khanload",
                }))
    except Exception as e:
        return HttpResponseRedirect(
            set_query_params(
                request.session["distributed_redirect_url"], {
                    "message_type":
                    "error",
                    "message":
                    _("Failure to send data to your KA Lite installation: %s")
                    % e,
                    "message_id":
                    "id_khanload",
                }))

    try:
        json_response = json.loads(response.content)
        if not isinstance(json_response, dict) or len(json_response) != 1:
            # Could not validate the message is a single key-value pair
            raise Exception(
                _("Unexpected response format from your KA Lite installation.")
            )
        message_type = json_response.keys()[0]
        message = json_response.values()[0]
    except ValueError as e:
        message_type = "error"
        message = unicode(e)
    except Exception as e:
        message_type = "error"
        message = _("Loading json object: %s") % e

    # If something broke on the distributed server, we have no way to recover.
    #   For now, just show the error to users.
    #
    # Ultimately, we have a message, would like to share with the distributed server.
#    if response.status_code != 200:
#        return HttpResponseServerError(response.content)

    return HttpResponseRedirect(
        set_query_params(
            request.session["distributed_redirect_url"], {
                "message_type": message_type,
                "message": message,
                "message_id": "id_khanload",
            }))
Example #14
0
 def forwards(self, orm):
     # Setting the video ID
     for vlog in orm["main.VideoLog"].objects.all():
         vlog.video_id = i18n.get_video_id(
             vlog.youtube_id) or vlog.youtube_id
         vlog.save()
Example #15
0
class TestVideoLogs(KALiteTestCase):

    ORIGINAL_POINTS = 37
    ORIGINAL_SECONDS_WATCHED = 3
    NEW_POINTS = 22
    NEW_SECONDS_WATCHED = 5
    YOUTUBE_ID = "aNqG4ChKShI"
    VIDEO_ID = i18n.get_video_id(YOUTUBE_ID) or "dummy"

    def setUp(self):
        super(TestVideoLogs, self).setUp()
        # create a facility and user that can be referred to in models across tests
        self.facility = Facility(name="Test Facility")
        self.facility.save()
        self.user = FacilityUser(username="******", facility=self.facility)
        self.user.set_password("dumber")
        self.user.save()

        # create an initial VideoLog instance so we have something to collide with later
        self.original_videolog = VideoLog(video_id=self.VIDEO_ID, youtube_id=self.YOUTUBE_ID, user=self.user)
        self.original_videolog.points = self.ORIGINAL_POINTS
        self.original_videolog.total_seconds_watched = self.ORIGINAL_SECONDS_WATCHED
        self.original_videolog.save()

        # get a new reference to the existing VideoLog
        videolog = VideoLog.objects.get(id=self.original_videolog.id)

        # make sure the VideoLog was created correctly
        self.assertEqual(videolog.points, self.ORIGINAL_POINTS, "The VideoLog's points have already changed.")
        self.assertEqual(videolog.total_seconds_watched, self.ORIGINAL_SECONDS_WATCHED, "The VideoLog's total seconds watched have already changed.")

    def test_videolog_update(self):

        # get a new reference to the existing VideoLog
        videolog = VideoLog.objects.get(id=self.original_videolog.id)

        # update the VideoLog
        videolog.points = self.NEW_POINTS
        videolog.total_seconds_watched = self.NEW_SECONDS_WATCHED
        videolog.save()

        # get a new reference to the existing VideoLog
        videolog2 = VideoLog.objects.get(id=self.original_videolog.id)

        # make sure the VideoLog was updated
        self.assertEqual(videolog2.points, self.NEW_POINTS, "The VideoLog's points were not updated.")
        self.assertEqual(videolog2.total_seconds_watched, self.NEW_SECONDS_WATCHED, "The VideoLog's total seconds watched were not updated.")

    @unittest.skip("Auto-merging is not yet automatic, so skip this")
    def test_videolog_collision(self):

        # create a new video log with the same youtube_id and user, but different points/total seconds watched
        videolog = VideoLog(video_id=self.VIDEO_ID, youtube_id=self.YOUTUBE_ID, user=self.user)
        videolog.points = self.NEW_POINTS
        videolog.total_seconds_watched = self.NEW_SECONDS_WATCHED

        # try saving the new VideoLog: this is where the collision will happen, hopefully leading to a merge
        videolog.save()

        # get a new reference to the existing VideoLog
        videolog2 = VideoLog.objects.get(id=self.original_videolog.id)

        # make sure the VideoLog has been properly merged
        self.assertEqual(videolog.points, max(self.ORIGINAL_POINTS, self.NEW_POINTS), "The VideoLog's points were not properly merged.")
        self.assertEqual(videolog.total_seconds_watched, max(self.ORIGINAL_ATTEMPTS, self.NEW_SECONDS_WATCHED), "The VideoLog's total seconds watched have already changed.")
Example #16
0
    def handle(self, *args, **options):
        self.video = None

        handled_youtube_ids = []  # stored to deal with caching
        failed_youtube_ids = []  # stored to avoid requerying failures.

        set_process_priority.lowest(logging=settings.LOG)

        try:
            while True:  # loop until the method is aborted
                # Grab any video that hasn't been tried yet
                videos = VideoFile.objects.filter(flagged_for_download=True, download_in_progress=False).exclude(
                    youtube_id__in=failed_youtube_ids
                )
                video_count = videos.count()
                if video_count == 0:
                    self.stdout.write(_("Nothing to download; exiting.") + "\n")
                    break

                # Grab a video as OURS to handle, set fields to indicate to others that we're on it!
                # Update the video logging
                video = videos[0]
                video.download_in_progress = True
                video.percent_complete = 0
                video.save()
                self.stdout.write(
                    (_("Downloading video '%(youtube_id)s'...") + "\n") % {"youtube_id": video.youtube_id}
                )

                # Update the progress logging
                self.set_stages(
                    num_stages=video_count
                    + len(handled_youtube_ids)
                    + len(failed_youtube_ids)
                    + int(options["auto_cache"])
                )
                if not self.started():
                    self.start(stage_name=video.youtube_id)

                # Initiate the download process
                try:
                    ensure_dir(settings.CONTENT_ROOT)

                    progress_callback = partial(self.download_progress_callback, video)
                    try:
                        # Download via urllib
                        download_video(video.youtube_id, callback=progress_callback)

                    except URLNotFound:
                        # Video was not found on amazon cloud service,
                        #   either due to a KA mistake, or due to the fact
                        #   that it's a dubbed video.
                        #
                        # We can use youtube-dl to get that video!!
                        logging.debug(
                            _("Retrieving youtube video %(youtube_id)s via youtube-dl")
                            % {"youtube_id": video.youtube_id}
                        )

                        def youtube_dl_cb(stats, progress_callback, *args, **kwargs):
                            if stats["status"] == "finished":
                                percent = 100.0
                            elif stats["status"] == "downloading":
                                percent = 100.0 * stats["downloaded_bytes"] / stats["total_bytes"]
                            else:
                                percent = 0.0
                            progress_callback(percent=percent)

                        scrape_video(
                            video.youtube_id,
                            quiet=not settings.DEBUG,
                            callback=partial(youtube_dl_cb, progress_callback=progress_callback),
                        )

                    # If we got here, we downloaded ... somehow :)
                    handled_youtube_ids.append(video.youtube_id)
                    self.stdout.write(_("Download is complete!") + "\n")

                    # caching.invalidate_all_caches()  # Unnecessary; we have a database listener for this.

                except DownloadCancelled:
                    # Cancellation event
                    video.percent_complete = 0
                    video.flagged_for_download = False
                    video.download_in_progress = False
                    video.save()
                    failed_youtube_ids.append(video.youtube_id)

                except Exception as e:
                    # On error, report the error, mark the video as not downloaded,
                    #   and allow the loop to try other videos.
                    msg = _("Error in downloading %(youtube_id)s: %(error_msg)s") % {
                        "youtube_id": video.youtube_id,
                        "error_msg": unicode(e),
                    }
                    self.stderr.write("%s\n" % msg)

                    # If a connection error, we should retry.
                    if isinstance(e, DownloadError):
                        connection_error = "[Errno 8]" in e.message
                    elif isinstance(e, IOError) and hasattr(e, "strerror"):
                        connection_error = e.strerror[0] == 8
                    else:
                        connection_error = False

                    video.download_in_progress = False
                    video.flagged_for_download = connection_error  # Any error other than a connection error is fatal.
                    video.save()

                    # Rather than getting stuck on one video, continue to the next video.
                    self.update_stage(
                        stage_status="error", notes=_("%(error_msg)s; continuing to next video.") % {"error_msg": msg}
                    )
                    failed_youtube_ids.append(video.youtube_id)
                    continue

            # This can take a long time, without any further update, so ... best to avoid.
            if options["auto_cache"] and caching.caching_is_enabled() and handled_youtube_ids:
                self.update_stage(
                    stage_name=self.video.youtube_id,
                    stage_percent=0,
                    notes=_("Generating all pages related to videos."),
                )
                caching.regenerate_all_pages_related_to_videos(
                    video_ids=list(set([i18n.get_video_id(yid) or yid for yid in handled_youtube_ids]))
                )

            # Update
            self.complete(
                notes=_("Downloaded %(num_handled_videos)s of %(num_total_videos)s videos successfully.")
                % {
                    "num_handled_videos": len(handled_youtube_ids),
                    "num_total_videos": len(handled_youtube_ids) + len(failed_youtube_ids),
                }
            )

        except Exception as e:
            self.cancel(stage_status="error", notes=_("Error: %(error_msg)s") % {"error_msg": e})
            raise
Example #17
0
def show_logs(request, ndays=None):
    """Show file-based logging info for video downloads, language packs, and subtitles"""
    ndays = ndays or int(request.GET.get("days", 7))

    def get_logger_filename(logger_type):
        return stats_logger(logger_type).handlers[0].baseFilename

    def parse_data(logger_type, data_fields, windowsize=128, ndays=None):
        parsed_data = {}
        nparts = len(data_fields)
        summary_data = dict([(fld, {}) for fld in (data_fields + ["date"])])

        filepath = get_logger_filename(logger_type)
        if not os.path.exists(filepath):
            return (parsed_data, summary_data)

        # Group by ip, date, and youtube_id
        old_data = ""
        first_loop = True
        last_loop = False
        with open(filepath, "r") as fp:
            fp.seek(0, 2)  # go to the end of the stream
            while True:
                # Read the next chunk of data
                try:
                    # Get the data
                    try:
                        if first_loop:
                            fp.seek(-windowsize, 1)  # go backwards by a few
                            first_loop = False
                        else:
                            fp.seek(-2 * windowsize, 1)  # go backwards by a few

                        cur_data = fp.read(windowsize) + old_data
                    except:
                        if last_loop and not old_data:
                            raise
                        elif last_loop:
                            cur_data = old_data
                            old_data = ""
                        else:
                            last_loop = True
                            fp.seek(0)
                            cur_data = fp.read(windowsize) + old_data  # could be some overlap...

                    if not cur_data:
                        break;
                except:
                    break

                # Parse the data
                lines = cur_data.split("\n")
                old_data = lines[0] if len(lines) > 1 else ""
                new_data = lines[1:] if len(lines) > 1 else lines
                for l in new_data:
                    if not l:
                        continue

                    # All start with a date
                    parts = l.split(" - ", 2)
                    if len(parts) != 2:
                        continue
                    tim = parts[0]
                    dat = tim.split(" ")[0]

                    # Validate that this date is within the accepted range
                    parsed_date = datetime.datetime.strptime(dat, "%Y-%m-%d")
                    #logging.debug("%s %s" % (parsed_date, (datetime.datetime.now() - timedelta(days=ndays))))
                    if ndays is not None and datetime.datetime.now() - timedelta(days=ndays) > parsed_date:
                        last_loop = True
                        old_data = ""
                        break;

                    # The rest is semicolon-delimited
                    parts = parts[1].split(";")  # vd;127.0.0.1;xvnpSRO9IDM

                    # Now save things off
                    parsed_data[tim] = dict([(data_fields[idx], parts[idx]) for idx in range(nparts)])
                    summary_data["date"][dat] = 1 + summary_data["date"].get(dat, 0)
                    for idx in range(nparts):
                        summary_data[data_fields[idx]][parts[idx]] = 1 + summary_data[data_fields[idx]].get(parts[idx], 0)

        for key, val in summary_data.iteritems():
            summary_data[key] = sorted_dict(val, key=lambda t: t[0])

        return (parsed_data, summary_data)

    (video_raw_data, video_summary_data) = parse_data("videos", ["task_id", "ip_address", "youtube_id"], ndays=ndays)
    (lp_raw_data, lp_summary_data)       = parse_data("language_packs", ["task_id", "ip_address", "lang_code", "version"], ndays=ndays)
    (srt_raw_data, srt_summary_data)     = parse_data("subtitles", ["task_id", "ip_address", "lang_code", "youtube_id"], ndays=ndays)

    return {
        "ndays": ndays,
        "videos": {
            "raw": video_raw_data,
            "dates": video_summary_data["date"],
            "ips": video_summary_data["ip_address"],
            "slugs": sum_counter(video_summary_data["youtube_id"], fn=lambda yid: get_id2slug_map().get(get_video_id(yid))),
            "lang_codes": sum_counter(video_summary_data["youtube_id"], fn=lambda yid: get_video_language(yid)),
        },
        "language_packs": {
            "raw": lp_raw_data,
            "dates": lp_summary_data["date"],
            "ips": lp_summary_data["ip_address"],
            "lang_codes": lp_summary_data["lang_code"],
            "versions": lp_summary_data["version"],
        },
        "subtitles": {
            "raw": srt_raw_data,
            "dates": srt_summary_data["date"],
            "ips": srt_summary_data["ip_address"],
            "lang_codes": srt_summary_data["lang_code"],
        },
    }
Example #18
0
    def handle(self, *args, **options):
        self.video = None

        handled_youtube_ids = []  # stored to deal with caching
        failed_youtube_ids = []  # stored to avoid requerying failures.

        set_process_priority.lowest(logging=settings.LOG)

        try:
            while True:  # loop until the method is aborted
                # Grab any video that hasn't been tried yet
                videos = VideoFile.objects \
                    .filter(flagged_for_download=True, download_in_progress=False) \
                    .exclude(youtube_id__in=failed_youtube_ids)
                video_count = videos.count()
                if video_count == 0:
                    self.stdout.write(
                        _("Nothing to download; exiting.") + "\n")
                    break

                # Grab a video as OURS to handle, set fields to indicate to others that we're on it!
                # Update the video logging
                video = videos[0]
                video.download_in_progress = True
                video.percent_complete = 0
                video.save()
                self.stdout.write(
                    (_("Downloading video '%(youtube_id)s'...") + "\n") %
                    {"youtube_id": video.youtube_id})

                # Update the progress logging
                self.set_stages(
                    num_stages=video_count + len(handled_youtube_ids) +
                    len(failed_youtube_ids) + int(options["auto_cache"]))
                if not self.started():
                    self.start(stage_name=video.youtube_id)

                # Initiate the download process
                try:
                    ensure_dir(settings.CONTENT_ROOT)

                    progress_callback = partial(
                        self.download_progress_callback, video)
                    try:
                        # Download via urllib
                        download_video(video.youtube_id,
                                       callback=progress_callback)

                    except URLNotFound:
                        # Video was not found on amazon cloud service,
                        #   either due to a KA mistake, or due to the fact
                        #   that it's a dubbed video.
                        #
                        # We can use youtube-dl to get that video!!
                        logging.debug(
                            _("Retrieving youtube video %(youtube_id)s via youtube-dl"
                              ) % {"youtube_id": video.youtube_id})

                        def youtube_dl_cb(stats, progress_callback, *args,
                                          **kwargs):
                            if stats['status'] == "finished":
                                percent = 100.
                            elif stats['status'] == "downloading":
                                percent = 100. * stats[
                                    'downloaded_bytes'] / stats['total_bytes']
                            else:
                                percent = 0.
                            progress_callback(percent=percent)

                        scrape_video(video.youtube_id,
                                     quiet=not settings.DEBUG,
                                     callback=partial(
                                         youtube_dl_cb,
                                         progress_callback=progress_callback))

                    # If we got here, we downloaded ... somehow :)
                    handled_youtube_ids.append(video.youtube_id)
                    self.stdout.write(_("Download is complete!") + "\n")

                    # caching.invalidate_all_caches()  # Unnecessary; we have a database listener for this.

                except DownloadCancelled:
                    # Cancellation event
                    video.percent_complete = 0
                    video.flagged_for_download = False
                    video.download_in_progress = False
                    video.save()
                    failed_youtube_ids.append(video.youtube_id)

                except Exception as e:
                    # On error, report the error, mark the video as not downloaded,
                    #   and allow the loop to try other videos.
                    msg = _(
                        "Error in downloading %(youtube_id)s: %(error_msg)s"
                    ) % {
                        "youtube_id": video.youtube_id,
                        "error_msg": unicode(e)
                    }
                    self.stderr.write("%s\n" % msg)

                    # If a connection error, we should retry.
                    if isinstance(e, DownloadError):
                        connection_error = "[Errno 8]" in e.message
                    elif isinstance(e, IOError) and hasattr(e, "strerror"):
                        connection_error = e.strerror[0] == 8
                    else:
                        connection_error = False

                    video.download_in_progress = False
                    video.flagged_for_download = connection_error  # Any error other than a connection error is fatal.
                    video.save()

                    # Rather than getting stuck on one video, continue to the next video.
                    self.update_stage(
                        stage_status="error",
                        notes=_("%(error_msg)s; continuing to next video.") %
                        {"error_msg": msg})
                    failed_youtube_ids.append(video.youtube_id)
                    continue

            # This can take a long time, without any further update, so ... best to avoid.
            if options["auto_cache"] and caching.caching_is_enabled(
            ) and handled_youtube_ids:
                self.update_stage(
                    stage_name=self.video.youtube_id,
                    stage_percent=0,
                    notes=_("Generating all pages related to videos."))
                caching.regenerate_all_pages_related_to_videos(video_ids=list(
                    set([
                        i18n.get_video_id(yid) or yid
                        for yid in handled_youtube_ids
                    ])))

            # Update
            self.complete(
                notes=
                _("Downloaded %(num_handled_videos)s of %(num_total_videos)s videos successfully."
                  ) % {
                      "num_handled_videos":
                      len(handled_youtube_ids),
                      "num_total_videos":
                      len(handled_youtube_ids) + len(failed_youtube_ids),
                  })

        except Exception as e:
            self.cancel(stage_status="error",
                        notes=_("Error: %(error_msg)s") % {"error_msg": e})
            raise
Example #19
0
def get_video_node_by_youtube_id(youtube_id):
    """Returns the video node corresponding to the video_id of the given youtube_id, or None"""
    video_id = i18n.get_video_id(youtube_id=youtube_id)
    return topic_tools.get_node_cache("Content").get(video_id, [None])
Example #20
0
def show_logs(request, ndays=None):
    """Show file-based logging info for video downloads, language packs, and subtitles"""
    ndays = ndays or int(request.GET.get("days", 7))

    def get_logger_filename(logger_type):
        return stats_logger(logger_type).handlers[0].baseFilename

    def parse_data(logger_type, data_fields, windowsize=128, ndays=None):
        parsed_data = {}
        nparts = len(data_fields)
        summary_data = dict([(fld, {}) for fld in (data_fields + ["date"])])

        filepath = get_logger_filename(logger_type)
        if not os.path.exists(filepath):
            return (parsed_data, summary_data)

        # Group by ip, date, and youtube_id
        old_data = ""
        first_loop = True
        last_loop = False
        with open(filepath, "r") as fp:
            fp.seek(0, 2)  # go to the end of the stream
            while True:
                # Read the next chunk of data
                try:
                    # Get the data
                    try:
                        if first_loop:
                            fp.seek(-windowsize, 1)  # go backwards by a few
                            first_loop = False
                        else:
                            fp.seek(-2 * windowsize,
                                    1)  # go backwards by a few

                        cur_data = fp.read(windowsize) + old_data
                    except:
                        if last_loop and not old_data:
                            raise
                        elif last_loop:
                            cur_data = old_data
                            old_data = ""
                        else:
                            last_loop = True
                            fp.seek(0)
                            cur_data = fp.read(
                                windowsize
                            ) + old_data  # could be some overlap...

                    if not cur_data:
                        break
                except:
                    break

                # Parse the data
                lines = cur_data.split("\n")
                old_data = lines[0] if len(lines) > 1 else ""
                new_data = lines[1:] if len(lines) > 1 else lines
                for l in new_data:
                    if not l:
                        continue

                    # All start with a date
                    parts = l.split(" - ", 2)
                    if len(parts) != 2:
                        continue
                    tim = parts[0]
                    dat = tim.split(" ")[0]

                    # Validate that this date is within the accepted range
                    parsed_date = datetime.datetime.strptime(dat, "%Y-%m-%d")
                    #logging.debug("%s %s" % (parsed_date, (datetime.datetime.now() - timedelta(days=ndays))))
                    if ndays is not None and datetime.datetime.now(
                    ) - timedelta(days=ndays) > parsed_date:
                        last_loop = True
                        old_data = ""
                        break

                    # The rest is semicolon-delimited
                    parts = parts[1].split(";")  # vd;127.0.0.1;xvnpSRO9IDM

                    # Now save things off
                    parsed_data[tim] = dict([(data_fields[idx], parts[idx])
                                             for idx in range(nparts)])
                    summary_data["date"][dat] = 1 + summary_data["date"].get(
                        dat, 0)
                    for idx in range(nparts):
                        summary_data[data_fields[idx]][parts[
                            idx]] = 1 + summary_data[data_fields[idx]].get(
                                parts[idx], 0)

        for key, val in summary_data.iteritems():
            summary_data[key] = sorted_dict(val, key=lambda t: t[0])

        return (parsed_data, summary_data)

    (video_raw_data,
     video_summary_data) = parse_data("videos",
                                      ["task_id", "ip_address", "youtube_id"],
                                      ndays=ndays)
    (lp_raw_data, lp_summary_data) = parse_data(
        "language_packs", ["task_id", "ip_address", "lang_code", "version"],
        ndays=ndays)
    (srt_raw_data, srt_summary_data) = parse_data(
        "subtitles", ["task_id", "ip_address", "lang_code", "youtube_id"],
        ndays=ndays)

    return {
        "ndays": ndays,
        "videos": {
            "raw":
            video_raw_data,
            "dates":
            video_summary_data["date"],
            "ips":
            video_summary_data["ip_address"],
            "slugs":
            sum_counter(
                video_summary_data["youtube_id"],
                fn=lambda yid: get_id2slug_map().get(get_video_id(yid))),
            "lang_codes":
            sum_counter(video_summary_data["youtube_id"],
                        fn=lambda yid: get_video_language(yid)),
        },
        "language_packs": {
            "raw": lp_raw_data,
            "dates": lp_summary_data["date"],
            "ips": lp_summary_data["ip_address"],
            "lang_codes": lp_summary_data["lang_code"],
            "versions": lp_summary_data["version"],
        },
        "subtitles": {
            "raw": srt_raw_data,
            "dates": srt_summary_data["date"],
            "ips": srt_summary_data["ip_address"],
            "lang_codes": srt_summary_data["lang_code"],
        },
    }
Example #21
0
def get_video_by_youtube_id(youtube_id):
    # TODO(bcipolli): will need to change for dubbed videos
    video_id = i18n.get_video_id(youtube_id=youtube_id)
    return get_node_cache("Video").get(video_id, [None])[0]