def validate_times(srt_content, srt_issues):
            times = re.findall("([0-9:,]+) --> ([0-9:,]+)\r\n", srt_content, re.S | re.M)

            parse_time = lambda str: datetime.datetime.strptime(str, "%H:%M:%S,%f")
            for i in range(len(times)):
                try:
                    between_subtitle_time = datediff(parse_time(times[i][0]), parse_time(times[i-1][1] if i > 0 else "00:00:00,000"))
                    within_subtitle_time  = datediff(parse_time(times[i][1]), parse_time(times[i][0]))

                    if between_subtitle_time > 60.:
                        srt_issues.append("Between-subtitle gap of %5.2f seconds" % between_subtitle_time)

                    if within_subtitle_time > 60.:
                        srt_issues.append("Within-subtitle duration of %5.2f seconds" % within_subtitle_time)
                    elif within_subtitle_time == 0.:
                        logging.debug("Subtitle flies by too fast (%s --> %s)." % times[i])

                    #print "Start: %s\tB: %5.2f\tW: %5.2f" % (parse_time(times[i][0]), between_subtitle_time, within_subtitle_time)
                except Exception as e:
                    if not times[i][1].startswith('99:59:59'):
                        srt_issues.append("Error checking times: %s" % e)
                    else:
                        if len(times) - i > 1 and len(times) - i - 1 > len(times)/10.:
                            if i == 0:
                                srt_issues.append("No subtitles have a valid starting point.")
                            else:
                                logging.debug("Hit end of movie, but %d (of %d) subtitle(s) remain in the queue." % (len(times) - i - 1, len(times)))
                        break
Ejemplo n.º 2
0
    def test_sign(self):
        """
        Test the diff in both directions, validate they are the same and of opposite signs.
        """
        d1 = datetime.datetime(2000, 1, 1, 0, 0, 0, 0)
        d2 = datetime.datetime(2000, 1, 1, 0, 0, 0, 1)  # test 4 years apart

        self.assertTrue(datediff(d1,d2) < 0, "First date earlier than the second returns negative.")
        self.assertTrue(datediff(d2,d1) > 0, "Second date earlier than the first returns positive.")
        self.assertTrue(datediff(d2,d2) == 0, "First date equals the second returns 0.")
Ejemplo n.º 3
0
    def test_sign(self):
        """
        Test the diff in both directions, validate they are the same and of opposite signs.
        """
        d1 = datetime.datetime(2000, 1, 1, 0, 0, 0, 0)
        d2 = datetime.datetime(2000, 1, 1, 0, 0, 0, 1)  # test 4 years apart

        self.assertTrue(
            datediff(d1, d2) < 0,
            "First date earlier than the second returns negative.")
        self.assertTrue(
            datediff(d2, d1) > 0,
            "Second date earlier than the first returns positive.")
        self.assertTrue(
            datediff(d2, d2) == 0, "First date equals the second returns 0.")
    def handle(self, *args, **options):

        # Get the CSV data, either from a recent cache_file
        #   or from the internet
        cache_dir = settings.MEDIA_ROOT
        cache_file = os.path.join(cache_dir, "dubbed_videos.csv")
        if os.path.exists(cache_file) and datediff(datetime.datetime.now(), datetime.datetime.fromtimestamp(os.path.getctime(cache_file)), units="days") <= 14.0:
            # Use cached data to generate the video map
            csv_data = open(cache_file, "r").read()
            (video_map, _) = generate_dubbed_video_mappings(csv_data=csv_data)
    
        else:
            # Use cached data to generate the video map
            (video_map, csv_data) = generate_dubbed_video_mappings()

            try:
                ensure_dir(cache_dir)
                with open(cache_file, "w") as fp:
                    fp.write(csv_data)
            except Exception as e:
                logging.error("Failed to make a local cache of the CSV data: %s" % e)



        # Now we've built the map.  Save it.
        out_file = DUBBED_VIDEOS_MAPPING_FILE
        ensure_dir(os.path.dirname(out_file))
        logging.info("Saving data to %s" % out_file)
        with open(out_file, "w") as fp:
            json.dump(video_map, fp)

        logging.info("Done.")
Ejemplo n.º 5
0
def add_to_summary(sender, **kwargs):
    assert UserLog.is_enabled(), "We shouldn't be saving unless UserLog is enabled."
    
    instance = kwargs["instance"]

    if not instance.start_datetime:
        raise ValidationError("start_datetime cannot be None")
    if instance.last_active_datetime and instance.start_datetime > instance.last_active_datetime:
        raise ValidationError("UserLog date consistency check for start_datetime and last_active_datetime")

    if instance.end_datetime and not instance.total_seconds:
        # Compute total_seconds, save to summary
        #   Note: only supports setting end_datetime once!
        instance.full_clean()

        # The top computation is more lenient: user activity is just time logged in, literally.
        # The bottom computation is more strict: user activity is from start until the last "action"
        #   recorded--in the current case, that means from login until the last moment an exercise or
        #   video log was updated.
        #instance.total_seconds = datediff(instance.end_datetime, instance.start_datetime, units="seconds")
        instance.total_seconds = 0 if not instance.last_active_datetime else datediff(instance.last_active_datetime, instance.start_datetime, units="seconds")

        # Confirm the result (output info first for easier debugging)
        if instance.total_seconds < 0:
            raise ValidationError("Total learning time should always be non-negative.")
        logging.debug("%s: total time (%d): %d seconds" % (instance.user.username, instance.activity_type, instance.total_seconds))

        # Save only completed log items to the UserLogSummary
        UserLogSummary.add_log_to_summary(instance)
Ejemplo n.º 6
0
    def handle(self, *args, **options):

        # Get the CSV data, either from a recent cache_file
        #   or from the internet
        cache_dir = settings.MEDIA_ROOT
        cache_file = os.path.join(cache_dir, "dubbed_videos.csv")
        if os.path.exists(cache_file) and datediff(
                datetime.datetime.now(),
                datetime.datetime.fromtimestamp(os.path.getctime(cache_file)),
                units="days") <= 14.0:
            # Use cached data to generate the video map
            csv_data = open(cache_file, "r").read()
            (video_map, _) = generate_dubbed_video_mappings(csv_data=csv_data)

        else:
            # Use cached data to generate the video map
            (video_map, csv_data) = generate_dubbed_video_mappings()

            try:
                ensure_dir(cache_dir)
                with open(cache_file, "w") as fp:
                    fp.write(csv_data)
            except Exception as e:
                logging.error(
                    "Failed to make a local cache of the CSV data: %s" % e)

        # Now we've built the map.  Save it.
        out_file = DUBBED_VIDEOS_MAPPING_FILE
        ensure_dir(os.path.dirname(out_file))
        logging.info("Saving data to %s" % out_file)
        with open(out_file, "w") as fp:
            json.dump(video_map, fp)

        logging.info("Done.")
Ejemplo n.º 7
0
def add_to_summary(sender, **kwargs):
    assert UserLog.is_enabled(), "We shouldn't be saving unless UserLog is enabled."

    instance = kwargs["instance"]

    if not instance.start_datetime:
        raise ValidationError("start_datetime cannot be None")
    if instance.last_active_datetime and instance.start_datetime > instance.last_active_datetime:
        raise ValidationError("UserLog date consistency check for start_datetime and last_active_datetime")

    if instance.end_datetime and not instance.total_seconds:
        # Compute total_seconds, save to summary
        #   Note: only supports setting end_datetime once!
        instance.full_clean()

        # The top computation is more lenient: user activity is just time logged in, literally.
        # The bottom computation is more strict: user activity is from start until the last "action"
        #   recorded--in the current case, that means from login until the last moment an exercise or
        #   video log was updated.
        #instance.total_seconds = datediff(instance.end_datetime, instance.start_datetime, units="seconds")
        instance.total_seconds = 0 if not instance.last_active_datetime else datediff(instance.last_active_datetime, instance.start_datetime, units="seconds")

        # Confirm the result (output info first for easier debugging)
        if instance.total_seconds < 0:
            raise ValidationError("Total learning time should always be non-negative.")
        logging.debug("%s: total time (%d): %d seconds" % (instance.user.username, instance.activity_type, instance.total_seconds))

        # Save only completed log items to the UserLogSummary
        UserLogSummary.add_log_to_summary(instance)
Ejemplo n.º 8
0
    def save(self, *args, **kwargs):
        """When this model is saved, check if the activity is ended.
        If so, compute total_seconds and update the corresponding summary log."""

        # Do nothing if the max # of records is zero or None
        # (i.e. this functionality is disabled)
        if not settings.USER_LOG_MAX_RECORDS:
            return

        # Compute total_seconds, save to summary
        #   Note: only supports setting end_datetime once!
        if self.end_datetime and not self.total_seconds:
            self.full_clean()

            # The top computation is more lenient: user activity is just time logged in, literally.
            # The bottom computation is more strict: user activity is from start until the last "action"
            #   recorded--in the current case, that means from login until the last moment an exercise or
            #   video log was updated.
            #self.total_seconds = datediff(self.end_datetime, self.start_datetime, units="seconds")
            self.total_seconds = 0 if not self.last_active_datetime else datediff(self.last_active_datetime, self.start_datetime, units="seconds")

            # Confirm the result (output info first for easier debugging)
            logging.debug("%s: total learning time: %d seconds" % (self.user.username, self.total_seconds))
            assert self.total_seconds >= 0, "Total learning time should always be non-negative."

            # Save only completed log items to the UserLogSummary
            UserLogSummary.add_log_to_summary(self)
        super(UserLog, self).save(*args, **kwargs)

        if UserLog.objects.count() > settings.USER_LOG_MAX_RECORDS:
            # Unfortunately, could not do an aggregate delete when doing a
            #   slice in query
            to_discard = UserLog.objects.order_by("start_datetime")[0:UserLog.objects.count()-settings.USER_LOG_MAX_RECORDS]
            UserLog.objects.filter(pk__in=to_discard).delete()
Ejemplo n.º 9
0
def download_khan_data(url, debug_cache_file=None, debug_cache_dir=khanload.KHANLOAD_CACHE_DIR):
    """Download data from the given url.

    In DEBUG mode, these downloads are slow.  So for the sake of faster iteration,
    save the download to disk and re-serve it up again, rather than download again,
    if the file is less than a day old.
    """
    # Get the filename
    if not debug_cache_file:
        debug_cache_file = url.split("/")[-1] + ".json"

    # Create a directory to store these cached json files
    if not os.path.exists(debug_cache_dir):
        os.mkdir(debug_cache_dir)
    debug_cache_file = os.path.join(debug_cache_dir, debug_cache_file)

    # Use the cache file if:
    # a) We're in DEBUG mode
    # b) The debug cache file exists
    # c) It's less than 7 days old.
    if settings.DEBUG and os.path.exists(debug_cache_file) and datediff(datetime.datetime.now(), datetime.datetime.fromtimestamp(os.path.getctime(debug_cache_file)), units="days") <= 1E6:
        # Slow to debug, so keep a local cache in the debug case only.
        #sys.stdout.write("Using cached file: %s\n" % debug_cache_file)
        with open(debug_cache_file, "r") as fp:
            data = json.load(fp)
    else:
        sys.stdout.write("Downloading data from %s..." % url)
        sys.stdout.flush()
        data = json.loads(requests.get(url).content)
        sys.stdout.write("done.\n")
        # In DEBUG mode, store the debug cache file.
        if settings.DEBUG:
            with open(debug_cache_file, "w") as fh:
                fh.write(json.dumps(data))
    return data
Ejemplo n.º 10
0
def download_khan_data(url, debug_cache_file=None, debug_cache_dir=settings.PROJECT_PATH + "../_khanload_cache"):
    """Download data from the given url.

    In DEBUG mode, these downloads are slow.  So for the sake of faster iteration,
    save the download to disk and re-serve it up again, rather than download again,
    if the file is less than a day old.
    """
    # Get the filename
    if not debug_cache_file:
        debug_cache_file = url.split("/")[-1] + ".json"

    # Create a directory to store these cached json files
    if not os.path.exists(debug_cache_dir):
        os.mkdir(debug_cache_dir)
    debug_cache_file = os.path.join(debug_cache_dir, debug_cache_file)

    # Use the cache file if:
    # a) We're in DEBUG mode
    # b) The debug cache file exists
    # c) It's less than 7 days old.
    if settings.DEBUG and os.path.exists(debug_cache_file) and datediff(datetime.datetime.now(), datetime.datetime.fromtimestamp(os.path.getctime(debug_cache_file)), units="days") <= 14.0:
        # Slow to debug, so keep a local cache in the debug case only.
        #sys.stdout.write("Using cached file: %s\n" % debug_cache_file)
        data = json.loads(open(debug_cache_file).read())
    else:
        sys.stdout.write("Downloading data from %s..." % url)
        sys.stdout.flush()
        data = json.loads(requests.get(url).content)
        sys.stdout.write("done.\n")
        # In DEBUG mode, store the debug cache file.
        if settings.DEBUG:
            with open(debug_cache_file, "w") as fh:
                fh.write(json.dumps(data))
    return data
Ejemplo n.º 11
0
    def save(self, *args, **kwargs):
        """When this model is saved, check if the activity is ended.
        If so, compute total_seconds and update the corresponding summary log."""

        # Do nothing if the max # of records is zero
        # (i.e. this functionality is disabled)
        if not self.is_enabled():
            return

        if not self.start_datetime:
            raise ValidationError("start_datetime cannot be None")
        if self.last_active_datetime and self.start_datetime > self.last_active_datetime:
            raise ValidationError("UserLog date consistency check for start_datetime and last_active_datetime")

        if not self.end_datetime:
            # Conflict_resolution
            related_open_logs = UserLog.objects \
                .filter(user=self.user, activity_type=self.activity_type, end_datetime__isnull=True) \
                .exclude(pk=self.pk)
            for log in related_open_logs:
                log.end_datetime = datetime.now()
                log.save()

        elif not self.total_seconds:
            # Compute total_seconds, save to summary
            #   Note: only supports setting end_datetime once!
            self.full_clean()

            # The top computation is more lenient: user activity is just time logged in, literally.
            # The bottom computation is more strict: user activity is from start until the last "action"
            #   recorded--in the current case, that means from login until the last moment an exercise or
            #   video log was updated.
            #self.total_seconds = datediff(self.end_datetime, self.start_datetime, units="seconds")
            self.total_seconds = 0 if not self.last_active_datetime else datediff(self.last_active_datetime, self.start_datetime, units="seconds")

            # Confirm the result (output info first for easier debugging)
            logging.debug("%s: total time (%d): %d seconds" % (self.user.username, self.activity_type, self.total_seconds))
            if self.total_seconds < 0:
                raise ValidationError("Total learning time should always be non-negative.")

            # Save only completed log items to the UserLogSummary
            UserLogSummary.add_log_to_summary(self)

        # This is inefficient only if something goes awry.  Otherwise,
        #   this will really only do something on ADD.
        #   AND, if you're using recommended config (USER_LOG_MAX_RECORDS_PER_USER == 1),
        #   this will be very efficient.
        if settings.USER_LOG_MAX_RECORDS_PER_USER:  # Works for None, out of the box
            current_models = UserLog.objects.filter(user=self.user, activity_type=self.activity_type)
            if current_models.count() > settings.USER_LOG_MAX_RECORDS_PER_USER:
                # Unfortunately, could not do an aggregate delete when doing a
                #   slice in query
                to_discard = current_models \
                    .order_by("start_datetime")[0:current_models.count() - settings.USER_LOG_MAX_RECORDS_PER_USER]
                UserLog.objects.filter(pk__in=to_discard).delete()

        # Do it here, for efficiency of the above delete.
        super(UserLog, self).save(*args, **kwargs)
Ejemplo n.º 12
0
        def validate_times(srt_content, srt_issues):
            times = re.findall("([0-9:,]+) --> ([0-9:,]+)\r\n", srt_content,
                               re.S | re.M)

            parse_time = lambda str: datetime.datetime.strptime(
                str, "%H:%M:%S,%f")
            for i in range(len(times)):
                try:
                    between_subtitle_time = datediff(
                        parse_time(times[i][0]),
                        parse_time(times[i -
                                         1][1] if i > 0 else "00:00:00,000"))
                    within_subtitle_time = datediff(parse_time(times[i][1]),
                                                    parse_time(times[i][0]))

                    if between_subtitle_time > 60.:
                        srt_issues.append(
                            "Between-subtitle gap of %5.2f seconds" %
                            between_subtitle_time)

                    if within_subtitle_time > 60.:
                        srt_issues.append(
                            "Within-subtitle duration of %5.2f seconds" %
                            within_subtitle_time)
                    elif within_subtitle_time == 0.:
                        logging.debug(
                            "Subtitle flies by too fast (%s --> %s)." %
                            times[i])

                    #print "Start: %s\tB: %5.2f\tW: %5.2f" % (parse_time(times[i][0]), between_subtitle_time, within_subtitle_time)
                except Exception as e:
                    if not times[i][1].startswith('99:59:59'):
                        srt_issues.append("Error checking times: %s" % e)
                    else:
                        if len(times) - i > 1 and len(
                                times) - i - 1 > len(times) / 10.:
                            if i == 0:
                                srt_issues.append(
                                    "No subtitles have a valid starting point."
                                )
                            else:
                                logging.debug(
                                    "Hit end of movie, but %d (of %d) subtitle(s) remain in the queue."
                                    % (len(times) - i - 1, len(times)))
                        break
Ejemplo n.º 13
0
    def test_cancel_progress(self):
    
        # Create the object
        progress_log = UpdateProgressLog(process_name="test_process", total_stages=1)
        progress_log.save()

        # Complete the process
        progress_log.cancel_progress()
        self.assertTrue(abs(datediff(progress_log.end_time, datetime.datetime.now())) < 10, "end time is within 10 seconds")
        self.assertEqual(progress_log.completed, False, "completed is False")
Ejemplo n.º 14
0
    def test_completion(self):
    
        # Create the object
        progress_log = UpdateProgressLog(process_name="test_process", total_stages=1)
        progress_log.save()

        # Complete the process
        progress_log.mark_as_completed()
        self.assertTrue(abs(datediff(progress_log.end_time, datetime.datetime.now())) < 10, "end time is within 10 seconds")
        self.assertEqual(progress_log.stage_percent, 1., "stage_percent==1")
        self.assertEqual(progress_log.process_percent, 1., "proces_percent==1")
        self.assertEqual(progress_log.completed, True, "completed is False")
Ejemplo n.º 15
0
    def test_cancel_progress(self):

        # Create the object
        progress_log = UpdateProgressLog(process_name="test_process",
                                         total_stages=1)
        progress_log.save()

        # Complete the process
        progress_log.cancel_progress()
        self.assertTrue(
            abs(datediff(progress_log.end_time, datetime.datetime.now())) < 10,
            "end time is within 10 seconds")
        self.assertEqual(progress_log.completed, False, "completed is False")
Ejemplo n.º 16
0
    def test_completion(self):

        # Create the object
        progress_log = UpdateProgressLog(process_name="test_process",
                                         total_stages=1)
        progress_log.save()

        # Complete the process
        progress_log.mark_as_completed()
        self.assertTrue(
            abs(datediff(progress_log.end_time, datetime.datetime.now())) < 10,
            "end time is within 10 seconds")
        self.assertEqual(progress_log.stage_percent, 1., "stage_percent==1")
        self.assertEqual(progress_log.process_percent, 1., "proces_percent==1")
        self.assertEqual(progress_log.completed, True, "completed is False")
Ejemplo n.º 17
0
    def save(self, *args, **kwargs):
        """When this model is saved, check if the activity is ended.
        If so, compute total_seconds and update the corresponding summary log."""

        # Do nothing if the max # of records is zero or None
        # (i.e. this functionality is disabled)
        if not settings.USER_LOG_MAX_RECORDS:
            return

        # Compute total_seconds, save to summary
        #   Note: only supports setting end_datetime once!
        if self.end_datetime and not self.total_seconds:
            self.full_clean()

            # The top computation is more lenient: user activity is just time logged in, literally.
            # The bottom computation is more strict: user activity is from start until the last "action"
            #   recorded--in the current case, that means from login until the last moment an exercise or
            #   video log was updated.
            #self.total_seconds = datediff(self.end_datetime, self.start_datetime, units="seconds")
            self.total_seconds = 0 if not self.last_active_datetime else datediff(
                self.last_active_datetime,
                self.start_datetime,
                units="seconds")

            # Confirm the result (output info first for easier debugging)
            logging.debug("%s: total learning time: %d seconds" %
                          (self.user.username, self.total_seconds))
            assert self.total_seconds >= 0, "Total learning time should always be non-negative."

            # Save only completed log items to the UserLogSummary
            UserLogSummary.add_log_to_summary(self)
        super(UserLog, self).save(*args, **kwargs)

        if UserLog.objects.count() > settings.USER_LOG_MAX_RECORDS:
            # Unfortunately, could not do an aggregate delete when doing a
            #   slice in query
            to_discard = UserLog.objects.order_by(
                "start_datetime")[0:UserLog.objects.count() -
                                  settings.USER_LOG_MAX_RECORDS]
            UserLog.objects.filter(pk__in=to_discard).delete()
Ejemplo n.º 18
0
def generate_fake_exercise_logs(facility_user=None, topics=topics, start_date=datetime.datetime.now() - datetime.timedelta(days=30 * 6)):
    """Add exercise logs for the given topics, for each of the given users.
    If no users are given, they are created.
    If no topics exist, they are taken from the list at the top of this file.

    By default, users start learning randomly between 6 months ago and now.
    """

    own_device = Device.get_own_device()
    date_diff = datetime.datetime.now() - start_date
    exercise_logs = []
    user_logs = []

    # It's not a user: probably a list.
    # Recursive case
    if not hasattr(facility_user, "username"):
        # It's NONE :-/ generate the users first!
        if not facility_user:
            (facility_user, _, _) = generate_fake_facility_users()

        for topic in topics:
            for user in facility_user:
                (elogs, ulogs) = generate_fake_exercise_logs(facility_user=user, topics=[topic], start_date=start_date)
                exercise_logs.append(elogs)
                user_logs.append(ulogs)

    # Actually generate!
    else:
        # Get (or create) user type
        try:
            user_settings = json.loads(facility_user.notes)
        except:
            user_settings = sample_user_settings()
            facility_user.notes = json.dumps(user_settings)
            facility_user.save()
        date_diff_started = datetime.timedelta(seconds=datediff(date_diff, units="seconds") * user_settings["time_in_program"])  # when this user started in the program, relative to NOW

        for topic in topics:
            # Get all exercises related to the topic
            exercises = get_topic_exercises(topic_id=topic)

            # Problem:
            #   Not realistic for students to have lots of unfinished exercises.
            #   If they start them, they tend to get stuck, right?
            #
            # So, need to make it more probable that they will finish an exercise,
            #   and less probable that they start one.
            #
            # What we need is P(streak|started), not P(streak)

            # Probability of doing any particular exercise
            p_exercise = probability_of(qty="exercise", user_settings=user_settings)
            logging.debug("# exercises: %d; p(exercise)=%4.3f, user settings: %s\n" % (len(exercises), p_exercise, json.dumps(user_settings)))

            # of exercises is related to
            for j, exercise in enumerate(exercises):
                if random.random() > p_exercise:
                    continue

                # Probability of completing this exercise, and .. proportion of attempts
                p_completed = probability_of(qty="completed", user_settings=user_settings)
                p_attempts = probability_of(qty="attempts", user_settings=user_settings)

                attempts = int(random.random() * p_attempts * 30 + 10)  # always enough to have completed
                completed = (random.random() < p_completed)
                if completed:
                    streak_progress = 100
                else:
                    streak_progress = max(0, min(90, random.gauss(100 * user_settings["speed_of_learning"], 20)))
                    streak_progress = int(floor(streak_progress / 10.)) * 10
                points = streak_progress / 10 * 12 if completed else 0  # only get points when you master.

                # Choose a rate of exercises, based on their effort level and speed of learning.
                #   Compute the latest possible start time.
                #   Then sample a start time between their start time
                #   and the latest possible start_time
                rate_of_exercises = 0.66 * user_settings["effort_level"] + 0.33 * user_settings["speed_of_learning"]  # exercises per day
                time_for_attempts = min(datetime.timedelta(days=rate_of_exercises * attempts), date_diff_started)  # protect with min
                time_delta_completed = datetime.timedelta(seconds=random.randint(int(datediff(time_for_attempts, units="seconds")), int(datediff(date_diff_started, units="seconds"))))
                date_completed = datetime.datetime.now() - time_delta_completed

                # Always create new
                logging.info("Creating exercise log: %-12s: %-25s (%d points, %d attempts, %d%% streak on %s)" % (
                    facility_user.first_name,
                    exercise["name"],
                    points,
                    attempts,
                    streak_progress,
                    date_completed,
                ))
                try:
                    elog = ExerciseLog.objects.get(user=facility_user, exercise_id=exercise["name"])
                except ExerciseLog.DoesNotExist:
                    elog = ExerciseLog(
                        user=facility_user,
                        exercise_id=exercise["name"],
                        attempts=int(attempts),
                        streak_progress=streak_progress,
                        points=int(points),
                        complete=completed,
                        completion_timestamp=date_completed,
                        completion_counter=datediff(date_completed, start_date, units="seconds"),
                    )
                    elog.counter = own_device.increment_and_get_counter()
                    elog.sign(own_device)  # have to sign after setting the counter
                    elog.save(imported=True)  # avoid userlog issues

                    # For now, make all attempts on an exercise into a single UserLog.
                    seconds_per_attempt = 10 * (1 + user_settings["speed_of_learning"] * random.random())
                    time_to_navigate = 15 * (0.5 + random.random())  #between 7.5s and 22.5s
                    time_to_logout = 5 * (0.5 + random.random()) # between 2.5 and 7.5s
                    if settings.USER_LOG_MAX_RECORDS_PER_USER != 0:
                        ulog = UserLog(
                            user=facility_user,
                            activity_type=1,
                            start_datetime = date_completed - datetime.timedelta(seconds=int(attempts * seconds_per_attempt + time_to_navigate)),
                            end_datetime = date_completed + datetime.timedelta(seconds=time_to_logout),
                            last_active_datetime = date_completed,
                        )
                        ulog.full_clean()
                        ulog.save()
                        user_logs.append(ulog)
                exercise_logs.append(elog)

    return (exercise_logs, user_logs)
Ejemplo n.º 19
0
def update_language_packs(lang_codes, options):

    package_metadata = {}

    since_date = datetime.datetime.now() - datetime.timedelta(int(options["days"]))

    if options['update_dubbed']:
        # Get the latest dubbed video map; it's shared across language packs
        force_dubbed_download = not os.path.exists(DUBBED_VIDEOS_MAPPING_FILEPATH) \
            or 0 < datediff(since_date, datetime.datetime.fromtimestamp(os.path.getctime(DUBBED_VIDEOS_MAPPING_FILEPATH)))
        get_dubbed_video_map(force=force_dubbed_download)

    for lang_code in lang_codes:
        lang_code_map = get_supported_language_map(lang_code)
        lang_metadata = {}

        # Step 1: Update / collect srts.  No version needed, we want to share latest always.
        if options['update_srts']:
            update_srts(since_date=since_date, lang_codes=[lang_code_map["amara"]])
        lang_metadata["subtitle_count"] = get_subtitle_count(lang_code_map["amara"])

        # Step 2: Update the dubbed video mappings. No version needed, we want to share latest always.
        dv_map = get_dubbed_video_map(lang_code_map["dubbed_videos"])
        lang_metadata["num_dubbed_videos"] = len(dv_map) if dv_map and version_diff(options["version"], "0.10.3") > 0 else 0

        # Step 3: Update the exercises.  No version needed, we want to share latest always.
        #  TODO(bcipolli): make sure that each language pack only grabs exercises that are included in its topic tree.
        if options['update_exercises'] and version_diff(options["version"], "0.10.3") > 0:
            call_command("scrape_exercises", lang_code=lang_code_map["exercises"])
        lang_metadata["num_exercises"] = get_localized_exercise_count(lang_code_map["exercises"]) if version_diff(options["version"], "0.10.3") > 0 else 0

        # Step 4: Update the crowdin translations.  Version needed!
        #   TODO(bcipolli): skip this when we're going backwards in version.
        if options["no_update"] or version_diff(options["version"], "0.10.3") == 0:
            trans_metadata = {lang_code: get_po_metadata(get_po_build_path(lang_code))}
        else:
            try:
                trans_metadata = update_translations(
                    lang_codes=[lang_code],  # will be converted, as needed
                    zip_file=options['zip_file'],
                    ka_zip_file=options['ka_zip_file'],
                    download_ka_translations=options['update_ka_trans'],
                    download_kalite_translations=options['update_kalite_trans'],
                    use_local=options["use_local"],
                    version=options["version"],
                )
            except SkipTranslations:
                trans_metadata = {lang_code: get_po_metadata(get_po_build_path(lang_code))}
        lang_metadata.update(trans_metadata.get(lang_code, {}))

        # Now create/update unified meta data

        generate_metadata(package_metadata={lang_code: lang_metadata}, version=options["version"], force_version_update=options["force_update"])

        # Zip into language packs
        package_sizes = zip_language_packs(lang_codes=[lang_code], version=options["version"])
        logging.debug("%s sizes: %s" % (lang_code, package_sizes.get(lang_code, {})))

        lang_metadata.update(package_sizes.get(lang_code, {}))

        # Update the metadata with the package size information
        update_metadata({lang_code: lang_metadata}, version=options["version"])

        # Update package metadata
        package_metadata[lang_code] = lang_metadata

    return package_metadata
Ejemplo n.º 20
0
    def test_units(self):
        """
        A single difference, tested across different units
        """
        d1 = datetime.datetime(2000, 1, 1)
        d2 = datetime.datetime(2004, 1, 1)  # test 4 years apart

        self.assertEqual(datediff(d2, d1, units="microsecond"), 1E6*60*60*24*(365*4 + 1), "4 years (with leap year), in microseconds")
        self.assertEqual(datediff(d2, d1, units="microseconds"), 1E6*60*60*24*(365*4 + 1), "4 years (with leap year), in microseconds")

        self.assertEqual(datediff(d2, d1), 60*60*24*(365*4 + 1), "4 years (with leap year), in seconds (default)")
        self.assertEqual(datediff(d2, d1, units="second"), 60*60*24*(365*4 + 1), "4 years (with leap year), in seconds")
        self.assertEqual(datediff(d2, d1, units="seconds"), 60*60*24*(365*4 + 1), "4 years (with leap year), in seconds")

        self.assertEqual(datediff(d2, d1, units="minute"), 60*24*(365*4 + 1), "4 years (with leap year), in minutes")
        self.assertEqual(datediff(d2, d1, units="minutes"), 60*24*(365*4 + 1), "4 years (with leap year), in minutes")

        self.assertEqual(datediff(d2, d1, units="hour"), 24*(365*4 + 1), "4 years (with leap year), in hours")
        self.assertEqual(datediff(d2, d1, units="hours"), 24*(365*4 + 1), "4 years (with leap year), in hours")

        self.assertEqual(datediff(d2, d1, units="day"), 365*4 + 1, "4 years (with leap year), in days")
        self.assertEqual(datediff(d2, d1, units="days"), 365*4 + 1, "4 years (with leap year), in days")

        self.assertEqual(datediff(d2, d1, units="week"), (365*4 + 1)/7., "4 years (with leap year), in weeks")
        self.assertEqual(datediff(d2, d1, units="weeks"), (365*4 + 1)/7., "4 years (with leap year), in weeks")
Ejemplo n.º 21
0
def generate_fake_video_logs(facility_user=None, topics=topics, start_date=datetime.datetime.now() - datetime.timedelta(days=30 * 6)):
    """Add video logs for the given topics, for each of the given users.
    If no users are given, they are created.
    If no topics exist, they are taken from the list at the top of this file."""

    date_diff = datetime.datetime.now() - start_date
    video_logs = []

    # It's not a user: probably a list.
    # Recursive case
    if not hasattr(facility_user, "username"):
        # It's NONE :-/ generate the users first!
        if not facility_user:
            (facility_user, _, _) = generate_fake_facility_users()

        for topic in topics:
            for user in facility_user:
                video_logs.append(generate_fake_video_logs(facility_user=user, topics=[topic], start_date=start_date))

    # Actually generate!
    else:
        # First, make videos for the associated logs

        # Then make some unassociated videos, to simulate both exploration
        #   and watching videos without finishing.
        # Get (or create) user type
        try:
            user_settings = json.loads(facility_user.notes)
        except:
            user_settings = sample_user_settings()
            facility_user.notes = json.dumps(user_settings)
            facility_user.save()

        date_diff_started = datetime.timedelta(seconds=datediff(date_diff, units="seconds") * user_settings["time_in_program"])  # when this user started in the program, relative to NOW

        for topic in topics:
            videos = get_topic_videos(topic_id=topic)

            exercises = get_topic_exercises(topic_id=topic)
            exercise_ids = [ex["id"] if "id" in ex else ex['name'] for ex in exercises]
            exercise_logs = ExerciseLog.objects.filter(user=facility_user, id__in=exercise_ids)

            # Probability of watching a video, irrespective of the context
            p_video_outer = probability_of("video", user_settings=user_settings)
            logging.debug("# videos: %d; p(videos)=%4.3f, user settings: %s\n" % (len(videos), p_video_outer, json.dumps(user_settings)))

            for video in videos:
                p_completed = probability_of("completed", user_settings=user_settings)

                # If we're just doing random videos, fine.
                # If these videos relate to exercises, then suppress non-exercise-related videos
                #   for this user.
                p_video = p_video_outer  # start with the context-free value
                did_exercise = False
                if exercise_logs.count() > 0:
                    # 5x less likely to watch a video if you haven't done the exercise,
                    if "related_exercise" not in video:
                        p_video /= 5  # suppress

                    # 5x more likely to watch a video if they've done the exercise
                    # 2x more likely to have finished it.
                    else:
                        exercise_log = ExerciseLog.objects.filter(user=facility_user, id=video["related_exercise"]["id"])
                        did_exercise = exercise_log.count() != 0
                        if did_exercise:
                            p_video *= 5
                            p_completed *= 2

                # Do the sampling
                if p_video < random.random():
                    continue
                    # didn't watch it
                elif p_completed > random.random():
                    pct_completed = 100.
                else:      # Slower students will use videos more.  Effort also important.
                    pct_completed = 100. * min(1., sqrt(random.random() * sqrt(user_settings["effort_level"] * user_settings["time_in_program"] / sqrt(user_settings["speed_of_learning"]))))
                # Compute quantities based on sample
                total_seconds_watched = int(video["duration"] * pct_completed / 100.)
                points = int(750 * pct_completed / 100.)

                # Choose a rate of videos, based on their effort level.
                #   Compute the latest possible start time.
                #   Then sample a start time between their start time
                #   and the latest possible start_time
                if did_exercise:
                    # More jitter if you learn fast, less jitter if you try harder (more diligent)
                    date_jitter = datetime.timedelta(days=max(0, random.gauss(1, user_settings["speed_of_learning"] / user_settings["effort_level"])))
                    date_completed = exercise_log[0].completion_timestamp - date_jitter
                else:
                    rate_of_videos = 0.66 * user_settings["effort_level"] + 0.33 * user_settings["speed_of_learning"]  # exercises per day
                    time_for_watching = total_seconds_watched
                    time_delta_completed = datetime.timedelta(seconds=random.randint(int(time_for_watching), int(datediff(date_diff_started, units="seconds"))))
                    date_completed = datetime.datetime.now() - time_delta_completed

                try:
                    log = VideoLog.objects.get(user=facility_user, youtube_id=video["youtube_id"])
                except VideoLog.DoesNotExist:

                    logging.info("Creating video log: %-12s: %-45s (%4.1f%% watched, %d points)%s" % (
                        facility_user.first_name,
                        video["title"],
                        pct_completed,
                        points,
                        " COMPLETE on %s!" % date_completed if pct_completed == 100 else "",
                    ))
                    log = VideoLog(
                        user=facility_user,
                        youtube_id=video["youtube_id"],
                        total_seconds_watched=total_seconds_watched,
                        points=points,
                        completion_timestamp=date_completed,
                        completion_counter=datediff(date_completed, start_date, units="seconds"),
                    )
                    log.full_clean()
                    # TODO(bcipolli): bulk saving of logs
                    log.save()

                video_logs.append(log)

    return video_logs
Ejemplo n.º 22
0
def generate_fake_exercise_logs(facility_user=None, topics=topics, start_date=datetime.datetime.now() - datetime.timedelta(days=30 * 6)):
    """Add exercise logs for the given topics, for each of the given users.
    If no users are given, they are created.
    If no topics exist, they are taken from the list at the top of this file.

    By default, users start learning randomly between 6 months ago and now.
    """

    date_diff = datetime.datetime.now() - start_date
    exercise_logs = []
    user_logs = []

    # It's not a user: probably a list.
    # Recursive case
    if not hasattr(facility_user, "username"):
        # It's NONE :-/ generate the users first!
        if not facility_user:
            (facility_user, _, _) = generate_fake_facility_users()

        for topic in topics:
            for user in facility_user:
                (elogs, ulogs) = generate_fake_exercise_logs(facility_user=user, topics=[topic], start_date=start_date)
                exercise_logs.append(elogs)
                user_logs.append(ulogs)

    # Actually generate!
    else:
        # Get (or create) user type
        try:
            user_settings = json.loads(facility_user.notes)
        except:
            user_settings = sample_user_settings()
            facility_user.notes = json.dumps(user_settings)
            facility_user.save()
        date_diff_started = datetime.timedelta(seconds=datediff(date_diff, units="seconds") * user_settings["time_in_program"])  # when this user started in the program, relative to NOW

        for topic in topics:
            # Get all exercises related to the topic
            exercises = get_topic_exercises(topic_id=topic)

            # Problem:
            #   Not realistic for students to have lots of unfinished exercises.
            #   If they start them, they tend to get stuck, right?
            #
            # So, need to make it more probable that they will finish an exercise,
            #   and less probable that they start one.
            #
            # What we need is P(streak|started), not P(streak)

            # Probability of doing any particular exercise
            p_exercise = probability_of(qty="exercise", user_settings=user_settings)
            logging.debug("# exercises: %d; p(exercise)=%4.3f, user settings: %s\n" % (len(exercises), p_exercise, json.dumps(user_settings)))

            # of exercises is related to
            for j, exercise in enumerate(exercises):
                if random.random() > p_exercise:
                    continue

                # Probability of completing this exercise, and .. proportion of attempts
                p_completed = probability_of(qty="completed", user_settings=user_settings)
                p_attempts = probability_of(qty="attempts", user_settings=user_settings)

                attempts = int(random.random() * p_attempts * 30 + 10)  # always enough to have completed
                completed = (random.random() < p_completed)
                if completed:
                    streak_progress = 100
                else:
                    streak_progress = max(0, min(90, random.gauss(100 * user_settings["speed_of_learning"], 20)))
                    streak_progress = int(floor(streak_progress / 10.)) * 10
                points = streak_progress / 10 * 12 if completed else 0  # only get points when you master.

                # Choose a rate of exercises, based on their effort level and speed of learning.
                #   Compute the latest possible start time.
                #   Then sample a start time between their start time
                #   and the latest possible start_time
                rate_of_exercises = 0.66 * user_settings["effort_level"] + 0.33 * user_settings["speed_of_learning"]  # exercises per day
                time_for_attempts = min(datetime.timedelta(days=rate_of_exercises * attempts), date_diff_started)  # protect with min
                time_delta_completed = datetime.timedelta(seconds=random.randint(int(datediff(time_for_attempts, units="seconds")), int(datediff(date_diff_started, units="seconds"))))
                date_completed = datetime.datetime.now() - time_delta_completed

                # Always create new
                logging.info("Creating exercise log: %-12s: %-25s (%d points, %d attempts, %d%% streak on %s)" % (
                    facility_user.first_name,
                    exercise["name"],
                    points,
                    attempts,
                    streak_progress,
                    date_completed,
                ))
                try:
                    elog = ExerciseLog.objects.get(user=facility_user, exercise_id=exercise["name"])
                except ExerciseLog.DoesNotExist:
                    elog = ExerciseLog(
                        user=facility_user,
                        exercise_id=exercise["name"],
                        attempts=int(attempts),
                        streak_progress=streak_progress,
                        points=int(points),
                        completion_timestamp=date_completed,
                        completion_counter=datediff(date_completed, start_date, units="seconds"),
                    )
                    elog.full_clean()
                    elog.save()   # TODO(bcipolli): bulk saving of logs

                    # For now, make all attempts on an exercise into a single UserLog.
                    seconds_per_attempt = 10 * (1 + user_settings["speed_of_learning"] * random.random())
                    time_to_navigate = 15 * (0.5 + random.random())  #between 7.5s and 22.5s
                    time_to_logout = 5 * (0.5 + random.random()) # between 2.5 and 7.5s
                    ulog = UserLog(
                        user=facility_user,
                        activity_type=1,
                        start_datetime = date_completed - datetime.timedelta(seconds=int(attempts * seconds_per_attempt + time_to_navigate)),
                        end_datetime = date_completed + datetime.timedelta(seconds=time_to_logout),
                        last_active_datetime = date_completed,
                    )
                    ulog.full_clean()
                    ulog.save()
                    user_logs.append(ulog)
                exercise_logs.append(elog)

    return (exercise_logs, user_logs)
Ejemplo n.º 23
0
    def save(self, *args, **kwargs):
        """When this model is saved, check if the activity is ended.
        If so, compute total_seconds and update the corresponding summary log."""

        # Do nothing if the max # of records is zero
        # (i.e. this functionality is disabled)
        if not self.is_enabled():
            return

        if not self.start_datetime:
            raise ValidationError("start_datetime cannot be None")
        if self.last_active_datetime and self.start_datetime > self.last_active_datetime:
            raise ValidationError(
                "UserLog date consistency check for start_datetime and last_active_datetime"
            )

        if not self.end_datetime:
            # Conflict_resolution
            related_open_logs = UserLog.objects \
                .filter(user=self.user, activity_type=self.activity_type, end_datetime__isnull=True) \
                .exclude(pk=self.pk)
            for log in related_open_logs:
                log.end_datetime = datetime.now()
                log.save()

        elif not self.total_seconds:
            # Compute total_seconds, save to summary
            #   Note: only supports setting end_datetime once!
            self.full_clean()

            # The top computation is more lenient: user activity is just time logged in, literally.
            # The bottom computation is more strict: user activity is from start until the last "action"
            #   recorded--in the current case, that means from login until the last moment an exercise or
            #   video log was updated.
            #self.total_seconds = datediff(self.end_datetime, self.start_datetime, units="seconds")
            self.total_seconds = 0 if not self.last_active_datetime else datediff(
                self.last_active_datetime,
                self.start_datetime,
                units="seconds")

            # Confirm the result (output info first for easier debugging)
            logging.debug(
                "%s: total time (%d): %d seconds" %
                (self.user.username, self.activity_type, self.total_seconds))
            if self.total_seconds < 0:
                raise ValidationError(
                    "Total learning time should always be non-negative.")

            # Save only completed log items to the UserLogSummary
            UserLogSummary.add_log_to_summary(self)

        # This is inefficient only if something goes awry.  Otherwise,
        #   this will really only do something on ADD.
        #   AND, if you're using recommended config (USER_LOG_MAX_RECORDS_PER_USER == 1),
        #   this will be very efficient.
        if settings.USER_LOG_MAX_RECORDS_PER_USER:  # Works for None, out of the box
            current_models = UserLog.objects.filter(
                user=self.user, activity_type=self.activity_type)
            if current_models.count() > settings.USER_LOG_MAX_RECORDS_PER_USER:
                # Unfortunately, could not do an aggregate delete when doing a
                #   slice in query
                to_discard = current_models \
                    .order_by("start_datetime")[0:current_models.count() - settings.USER_LOG_MAX_RECORDS_PER_USER]
                UserLog.objects.filter(pk__in=to_discard).delete()

        # Do it here, for efficiency of the above delete.
        super(UserLog, self).save(*args, **kwargs)
Ejemplo n.º 24
0
def generate_fake_video_logs(facility_user=None, topics=topics, start_date=datetime.datetime.now() - datetime.timedelta(days=30 * 6)):
    """Add video logs for the given topics, for each of the given users.
    If no users are given, they are created.
    If no topics exist, they are taken from the list at the top of this file."""

    own_device = Device.get_own_device()
    date_diff = datetime.datetime.now() - start_date
    video_logs = []

    # It's not a user: probably a list.
    # Recursive case
    if not hasattr(facility_user, "username"):
        # It's NONE :-/ generate the users first!
        if not facility_user:
            (facility_user, _, _) = generate_fake_facility_users()

        for topic in topics:
            for user in facility_user:
                video_logs.append(generate_fake_video_logs(facility_user=user, topics=[topic], start_date=start_date))

    # Actually generate!
    else:
        # First, make videos for the associated logs

        # Then make some unassociated videos, to simulate both exploration
        #   and watching videos without finishing.
        # Get (or create) user type
        try:
            user_settings = json.loads(facility_user.notes)
        except:
            user_settings = sample_user_settings()
            facility_user.notes = json.dumps(user_settings)
            facility_user.save()

        date_diff_started = datetime.timedelta(seconds=datediff(date_diff, units="seconds") * user_settings["time_in_program"])  # when this user started in the program, relative to NOW

        for topic in topics:
            videos = get_topic_videos(topic_id=topic)

            exercises = get_topic_exercises(topic_id=topic)
            exercise_ids = [ex["id"] if "id" in ex else ex['name'] for ex in exercises]
            exercise_logs = ExerciseLog.objects.filter(user=facility_user, id__in=exercise_ids)

            # Probability of watching a video, irrespective of the context
            p_video_outer = probability_of("video", user_settings=user_settings)
            logging.debug("# videos: %d; p(videos)=%4.3f, user settings: %s\n" % (len(videos), p_video_outer, json.dumps(user_settings)))

            for video in videos:
                p_completed = probability_of("completed", user_settings=user_settings)

                # If we're just doing random videos, fine.
                # If these videos relate to exercises, then suppress non-exercise-related videos
                #   for this user.
                p_video = p_video_outer  # start with the context-free value
                did_exercise = False
                if exercise_logs.count() > 0:
                    # 5x less likely to watch a video if you haven't done the exercise,
                    if "related_exercise" not in video:
                        p_video /= 5  # suppress

                    # 5x more likely to watch a video if they've done the exercise
                    # 2x more likely to have finished it.
                    else:
                        exercise_log = ExerciseLog.objects.filter(user=facility_user, id=video["related_exercise"]["id"])
                        did_exercise = exercise_log.count() != 0
                        if did_exercise:
                            p_video *= 5
                            p_completed *= 2

                # Do the sampling
                if p_video < random.random():
                    continue
                    # didn't watch it
                elif p_completed > random.random():
                    pct_completed = 100.
                else:      # Slower students will use videos more.  Effort also important.
                    pct_completed = 100. * min(1., sqrt(random.random() * sqrt(user_settings["effort_level"] * user_settings["time_in_program"] / sqrt(user_settings["speed_of_learning"]))))
                # Compute quantities based on sample
                total_seconds_watched = int(video["duration"] * pct_completed / 100.)
                points = int(750 * pct_completed / 100.)

                # Choose a rate of videos, based on their effort level.
                #   Compute the latest possible start time.
                #   Then sample a start time between their start time
                #   and the latest possible start_time
                if did_exercise:
                    # More jitter if you learn fast, less jitter if you try harder (more diligent)
                    date_jitter = datetime.timedelta(days=max(0, random.gauss(1, user_settings["speed_of_learning"] / user_settings["effort_level"])))
                    date_completed = exercise_log[0].completion_timestamp - date_jitter
                else:
                    rate_of_videos = 0.66 * user_settings["effort_level"] + 0.33 * user_settings["speed_of_learning"]  # exercises per day
                    time_for_watching = total_seconds_watched
                    time_delta_completed = datetime.timedelta(seconds=random.randint(int(time_for_watching), int(datediff(date_diff_started, units="seconds"))))
                    date_completed = datetime.datetime.now() - time_delta_completed

                try:
                    vlog = VideoLog.objects.get(user=facility_user, youtube_id=video["youtube_id"])
                except VideoLog.DoesNotExist:

                    logging.info("Creating video log: %-12s: %-45s (%4.1f%% watched, %d points)%s" % (
                        facility_user.first_name,
                        video["title"],
                        pct_completed,
                        points,
                        " COMPLETE on %s!" % date_completed if pct_completed == 100 else "",
                    ))
                    vlog = VideoLog(
                        user=facility_user,
                        youtube_id=video["youtube_id"],
                        total_seconds_watched=total_seconds_watched,
                        points=points,
                        complete=(pct_completed == 100.),
                        completion_timestamp=date_completed,
                        completion_counter=datediff(date_completed, start_date, units="seconds"),
                    )
                    vlog.full_clean()
                    # TODO(bcipolli): bulk saving of logs
                    vlog.counter = own_device.increment_and_get_counter()
                    vlog.sign(own_device)  # have to sign after setting the counter
                    vlog.save(imported=True)  # avoid userlog issues


                video_logs.append(vlog)

    return video_logs
Ejemplo n.º 25
0
def update_language_packs(lang_codes, options):

    package_metadata = {}

    since_date = datetime.datetime.now() - datetime.timedelta(
        int(options["days"]))

    if options['update_dubbed']:
        # Get the latest dubbed video map; it's shared across language packs
        force_dubbed_download = not os.path.exists(DUBBED_VIDEOS_MAPPING_FILEPATH) \
            or 0 < datediff(since_date, datetime.datetime.fromtimestamp(os.path.getctime(DUBBED_VIDEOS_MAPPING_FILEPATH)))
        get_dubbed_video_map(force=force_dubbed_download)

    for lang_code in lang_codes:
        lang_code_map = get_supported_language_map(lang_code)
        lang_metadata = {}

        # Step 1: Update / collect srts.  No version needed, we want to share latest always.
        if options['update_srts']:
            update_srts(since_date=since_date,
                        lang_codes=[lang_code_map["amara"]])
        lang_metadata["subtitle_count"] = get_subtitle_count(
            lang_code_map["amara"])

        # Step 2: Update the dubbed video mappings. No version needed, we want to share latest always.
        dv_map = get_dubbed_video_map(lang_code_map["dubbed_videos"])
        lang_metadata["num_dubbed_videos"] = len(
            dv_map) if dv_map and version_diff(options["version"],
                                               "0.10.3") > 0 else 0

        # Step 3: Update the exercises.  No version needed, we want to share latest always.
        #  TODO(bcipolli): make sure that each language pack only grabs exercises that are included in its topic tree.
        if options['update_exercises'] and version_diff(
                options["version"], "0.10.3") > 0:
            call_command("scrape_exercises",
                         lang_code=lang_code_map["exercises"])
        lang_metadata["num_exercises"] = get_localized_exercise_count(
            lang_code_map["exercises"]) if version_diff(
                options["version"], "0.10.3") > 0 else 0

        # Step 4: Update the crowdin translations.  Version needed!
        #   TODO(bcipolli): skip this when we're going backwards in version.
        if options["no_update"] or version_diff(options["version"],
                                                "0.10.3") == 0:
            trans_metadata = {
                lang_code: get_po_metadata(get_po_build_path(lang_code))
            }
        else:
            try:
                trans_metadata = update_translations(
                    lang_codes=[lang_code],  # will be converted, as needed
                    zip_file=options['zip_file'],
                    ka_zip_file=options['ka_zip_file'],
                    download_ka_translations=options['update_ka_trans'],
                    download_kalite_translations=options[
                        'update_kalite_trans'],
                    use_local=options["use_local"],
                    version=options["version"],
                )
            except SkipTranslations:
                trans_metadata = {
                    lang_code: get_po_metadata(get_po_build_path(lang_code))
                }
        lang_metadata.update(trans_metadata.get(lang_code, {}))

        # Now create/update unified meta data

        generate_metadata(package_metadata={lang_code: lang_metadata},
                          version=options["version"],
                          force_version_update=options["force_update"])

        # Zip into language packs
        package_sizes = zip_language_packs(lang_codes=[lang_code],
                                           version=options["version"])
        logging.debug("%s sizes: %s" %
                      (lang_code, package_sizes.get(lang_code, {})))

        lang_metadata.update(package_sizes.get(lang_code, {}))

        # Update the metadata with the package size information
        update_metadata({lang_code: lang_metadata}, version=options["version"])

        # Update package metadata
        package_metadata[lang_code] = lang_metadata

    return package_metadata
Ejemplo n.º 26
0
    def test_units(self):
        """
        A single difference, tested across different units
        """
        d1 = datetime.datetime(2000, 1, 1)
        d2 = datetime.datetime(2004, 1, 1)  # test 4 years apart

        self.assertEqual(datediff(d2, d1, units="microsecond"),
                         1E6 * 60 * 60 * 24 * (365 * 4 + 1),
                         "4 years (with leap year), in microseconds")
        self.assertEqual(datediff(d2, d1, units="microseconds"),
                         1E6 * 60 * 60 * 24 * (365 * 4 + 1),
                         "4 years (with leap year), in microseconds")

        self.assertEqual(datediff(d2, d1), 60 * 60 * 24 * (365 * 4 + 1),
                         "4 years (with leap year), in seconds (default)")
        self.assertEqual(datediff(d2, d1, units="second"),
                         60 * 60 * 24 * (365 * 4 + 1),
                         "4 years (with leap year), in seconds")
        self.assertEqual(datediff(d2, d1, units="seconds"),
                         60 * 60 * 24 * (365 * 4 + 1),
                         "4 years (with leap year), in seconds")

        self.assertEqual(datediff(d2, d1,
                                  units="minute"), 60 * 24 * (365 * 4 + 1),
                         "4 years (with leap year), in minutes")
        self.assertEqual(datediff(d2, d1,
                                  units="minutes"), 60 * 24 * (365 * 4 + 1),
                         "4 years (with leap year), in minutes")

        self.assertEqual(datediff(d2, d1, units="hour"), 24 * (365 * 4 + 1),
                         "4 years (with leap year), in hours")
        self.assertEqual(datediff(d2, d1, units="hours"), 24 * (365 * 4 + 1),
                         "4 years (with leap year), in hours")

        self.assertEqual(datediff(d2, d1, units="day"), 365 * 4 + 1,
                         "4 years (with leap year), in days")
        self.assertEqual(datediff(d2, d1, units="days"), 365 * 4 + 1,
                         "4 years (with leap year), in days")

        self.assertEqual(datediff(d2, d1, units="week"), (365 * 4 + 1) / 7.,
                         "4 years (with leap year), in weeks")
        self.assertEqual(datediff(d2, d1, units="weeks"), (365 * 4 + 1) / 7.,
                         "4 years (with leap year), in weeks")