Esempio n. 1
0
 def save(self, *args, **kwargs):
     """
     This save method first gets the add/drop period object for this
     PcaDemandDistributionEstimate object's semester (either by calling the
     get_or_create_add_drop_period method or by using a passed-in add_drop_period kwarg,
     which can be used for efficiency in bulk operations over PcaDemandDistributionEstimate
     objects).
     """
     if "add_drop_period" in kwargs:
         add_drop_period = kwargs["add_drop_period"]
         del kwargs["add_drop_period"]
     else:
         add_drop_period = get_or_create_add_drop_period(self.semester)
     super().save(*args, **kwargs)
     created_at = self.created_at
     start = add_drop_period.estimated_start
     end = add_drop_period.estimated_end
     if created_at < start:
         self.in_add_drop_period = False
         self.percent_through_add_drop_period = 0
     elif created_at > end:
         self.in_add_drop_period = False
         self.percent_through_add_drop_period = 1
     else:
         self.in_add_drop_period = True
         self.percent_through_add_drop_period = (created_at -
                                                 start) / (end - start)
     super().save()
Esempio n. 2
0
def should_send_pca_alert(course_term, course_status):
    if get_current_semester() != course_term:
        return False
    add_drop_period = get_or_create_add_drop_period(course_term)
    return (get_bool("SEND_FROM_WEBHOOK", False)
            and (course_status == "O" or course_status == "C")
            and (add_drop_period.end is None or datetime.utcnow().replace(
                tzinfo=gettz(TIME_ZONE)) < add_drop_period.end))
Esempio n. 3
0
def pca_registration_open():
    """
    Returns True iff PCA should be accepting new registrations.
    """
    current_adp = get_or_create_add_drop_period(
        semester=get_current_semester())
    return get_bool("REGISTRATION_OPEN", True) and (
        current_adp.end is None or
        datetime.utcnow().replace(tzinfo=gettz(TIME_ZONE)) < current_adp.end)
Esempio n. 4
0
    def save(self, *args, **kwargs):
        """
        This overridden save method first gets the add/drop period object for the semester of this
        StatusUpdate object (either by using the get_or_create_add_drop_period method or by using
        a passed-in add_drop_period kwarg, which can be used for efficiency in bulk operations
        over many StatusUpdate objects). Then it calls the overridden save method, and after that
        it sets the percent_through_add_drop_period field.
        """
        from alert.models import validate_add_drop_semester
        from alert.tasks import section_demand_change
        from courses.util import get_or_create_add_drop_period

        # ^ imported here to avoid circular imports

        add_drop_period = None
        if "add_drop_period" in kwargs:
            add_drop_period = kwargs["add_drop_period"]
            del kwargs["add_drop_period"]

        super().save(*args, **kwargs)

        # If this is a valid add/drop semester, set the percent_through_add_drop_period field
        try:
            validate_add_drop_semester(self.section.semester)
        except ValidationError:
            return

        if add_drop_period is None:
            add_drop_period = get_or_create_add_drop_period(
                self.section.semester)

        created_at = self.created_at
        start = add_drop_period.estimated_start
        end = add_drop_period.estimated_end
        if created_at < start:
            self.in_add_drop_period = False
            self.percent_through_add_drop_period = 0
        elif created_at > end:
            self.in_add_drop_period = False
            self.percent_through_add_drop_period = 1
        else:
            self.in_add_drop_period = True
            self.percent_through_add_drop_period = (created_at -
                                                    start) / (end - start)
        super().save()

        self.section.has_status_updates = True
        self.section.save()

        section_demand_change.delay(self.section.id, self.created_at)
Esempio n. 5
0
    def current_percent_open(self):
        """
        The percentage (expressed as a decimal number between 0 and 1) of the period between
        the beginning of its add/drop period and min[the current time, the end of its
        registration period] that this section was open. If this section's registration
        period hasn't started yet, this property is null (None in Python).
        """
        from courses.util import get_current_semester, get_or_create_add_drop_period

        # ^ imported here to avoid circular imports

        if self.semester == get_current_semester():
            add_drop = get_or_create_add_drop_period(self.semester)
            add_drop_start = add_drop.estimated_start
            add_drop_end = add_drop.estimated_end
            current_time = timezone.now()
            if current_time <= add_drop_start:
                return None
            try:
                last_status_update = StatusUpdate.objects.filter(
                    section=self,
                    created_at__gt=add_drop_start,
                    created_at__lt=add_drop_end).latest("created_at")
            except StatusUpdate.DoesNotExist:
                last_status_update = None
            last_update_dt = last_status_update.created_at if last_status_update else add_drop_start
            period_seconds = float((min(current_time, add_drop_end) -
                                    add_drop_start).total_seconds())
            percent_after_update = (float(self.is_open) * float(
                (current_time - last_update_dt).total_seconds()) /
                                    period_seconds)
            if last_status_update is None:
                return percent_after_update
            percent_before_update = (float(self.percent_open) * float(
                (last_update_dt - add_drop_start).total_seconds()) /
                                     period_seconds)
            return percent_before_update + percent_after_update
        else:
            return self.percent_open
def load_add_drop_dates(verbose=False):
    semester = get_current_semester()
    validate_add_drop_semester(semester)

    if verbose:
        print(
            f"Loading course selection period dates for semester {semester} from the Almanac"
        )
    with transaction.atomic():
        adp = get_or_create_add_drop_period(semester)
        start_date = adp.start
        end_date = adp.end
        html = requests.get(
            "https://almanac.upenn.edu/penn-academic-calendar").content
        soup = BeautifulSoup(html, "html.parser")
        if semester[4] == "C":
            start_sem = semester[:4] + " spring"
            end_sem = semester[:4] + " fall"
        elif semester[4] == "A":
            start_sem = str(int(semester[:4]) - 1) + " fall"
            end_sem = semester[:4] + " spring"
        else:
            raise ValueError(
                "This script currently only supports fall or spring semesters; "
                f"{semester} is invalid")
        tz = gettz(TIME_ZONE)

        s_year, s_month, s_day, e_year, e_month, e_day = (None, ) * 6
        start_mode = 0  # 0 if start semester hasn't been found, 1 if it has, 2 if finished sem
        end_mode = 0  # 0 if end semester hasn't been found, 1 if it has, 2 if finished sem
        all_th_parents = {el.parent for el in soup.find_all("th")}
        months = [
            "january",
            "february",
            "march",
            "april",
            "may",
            "june",
            "july",
            "august",
            "september",
            "october",
            "november",
            "december",
        ]
        for tr_el in soup.find_all("tr"):
            if tr_el in all_th_parents:
                sem_name = tr_el.th.get_text().lower()
                if start_sem in sem_name:
                    start_mode = 1
                elif start_mode == 1:
                    start_mode = 2
                if end_sem in sem_name:
                    end_mode = 1
                elif end_mode == 1:
                    end_mode = 2
            else:
                children = list(tr_el.findChildren("td", recursive=False))
                title = children[0]
                date_string = children[1].get_text()
                if title is not None and "advance registration" in title.get_text(
                ).lower():
                    if start_mode == 1:
                        dates = date_string.split("-")
                        ar_begin_month = None
                        for month in months:
                            if month in dates[0].lower():
                                ar_begin_month = month
                        ar_end_month = None
                        for month in months:
                            if month in dates[0].lower():
                                ar_end_month = month
                        if ar_end_month is None:
                            ar_end_month = ar_begin_month
                        s_year = int(start_sem[:4])
                        if ar_end_month is not None:
                            s_month = months.index(ar_end_month) + 1
                        day_candidates = [
                            int(s) for s in dates[1].split() if s.isdigit()
                        ]
                        if len(day_candidates) > 0:
                            s_day = day_candidates[0]
                if title is not None and "course selection period ends" in title.get_text(
                ).lower():
                    if end_mode == 1:
                        course_sel_end_month = None
                        for month in months:
                            if month in date_string.lower():
                                course_sel_end_month = month
                        e_year = int(end_sem[:4])
                        if course_sel_end_month is not None:
                            e_month = months.index(course_sel_end_month) + 1
                        day_candidates = [
                            int(s) for s in date_string.split() if s.isdigit()
                        ]
                        if len(day_candidates) > 0:
                            e_day = day_candidates[0]
        if None not in [s_year, s_month, s_day] and start_date is None:
            start_date = make_aware(
                datetime.strptime(f"{s_year}-{s_month}-{s_day} 07:00",
                                  "%Y-%m-%d %H:%M") + timedelta(days=1),
                timezone=tz,
            )
            if verbose:
                print(
                    "NOTE: Add/drop date start was estimated as the end of the advanced "
                    "registration period. Replace this date with the actual start of the "
                    "add/drop period through the Django admin console when it is announced "
                    "to students each semester.")
        if None not in [e_year, e_month, e_day]:
            end_date = make_aware(
                datetime.strptime(f"{e_year}-{e_month}-{e_day} 11:59",
                                  "%Y-%m-%d %H:%M"),
                timezone=tz,
            )
        adp.estimated_start, adp.end = start_date, end_date
        adp.save()
    if verbose:
        print("Done!")
def get_demand_data(semesters, section_query="", verbose=False):
    current_semester = get_current_semester()
    output_dict = dict()

    recompute_precomputed_fields(verbose=True)

    if verbose:
        print(f"Computing demand data for semesters {str(semesters)}...")
    for semester_num, semester in enumerate(semesters):
        try:
            validate_add_drop_semester(semester)
        except ValidationError:
            if verbose:
                print(
                    f"Skipping semester {semester} (unsupported kind for stats)."
                )
            continue
        add_drop_period = get_or_create_add_drop_period(semester)

        if verbose:
            print(f"Processing semester {semester}, "
                  f"{(semester_num+1)}/{len(semesters)}.\n")

        output_dict[semester] = []  # list of demand data dicts
        section_id_to_object = dict(
        )  # maps section id to section object (for this semester)
        volume_changes_map = dict(
        )  # maps section id to list of volume changes
        status_updates_map = dict(
        )  # maps section id to list of status updates

        iterator_wrapper = tqdm if verbose else (lambda x: x)
        if verbose:
            print("Indexing relevant sections...")
        for section in iterator_wrapper(
                Section.objects.filter(
                    extra_metrics_section_filters,
                    full_code__startswith=section_query,
                    course__semester=semester,
                ).annotate(
                    efficient_semester=F("course__semester"), ).distinct()):
            section_id_to_object[section.id] = section
            volume_changes_map[section.id] = []
            status_updates_map[section.id] = []

        if verbose:
            print(
                "Computing registration volume changes over time for each section..."
            )
        for registration in iterator_wrapper(
                Registration.objects.filter(
                    section_id__in=section_id_to_object.keys()).annotate(
                        section_capacity=F("section__capacity"))):
            section_id = registration.section_id
            volume_changes_map[section_id].append({
                "date": registration.created_at,
                "volume_change": 1
            })
            deactivated_at = registration.deactivated_at
            if deactivated_at is not None:
                volume_changes_map[section_id].append({
                    "date": deactivated_at,
                    "volume_change": -1
                })

        if verbose:
            print("Collecting status updates over time for each section...")
        for status_update in iterator_wrapper(
                StatusUpdate.objects.filter(
                    section_id__in=section_id_to_object.keys(),
                    in_add_drop_period=True)):
            section_id = status_update.section_id
            status_updates_map[section_id].append({
                "date":
                status_update.created_at,
                "old_status":
                status_update.old_status,
                "new_status":
                status_update.new_status,
            })

        if verbose:
            print("Joining updates for each section and sorting...")
        all_changes = sorted(
            [{
                "type": "status_update",
                "section_id": section_id,
                **update
            }
             for section_id, status_updates_list in status_updates_map.items()
             for update in status_updates_list] + [{
                 "type": "volume_change",
                 "section_id": section_id,
                 **change
             } for section_id, changes_list in volume_changes_map.items()
                                                   for change in changes_list],
            key=lambda x: (x["date"], int(x["type"] != "status_update")),
            # put status updates first on matching dates
        )

        # Initialize variables to be maintained in our main all_changes loop
        latest_popularity_dist_estimate = None
        registration_volumes = {
            section_id: 0
            for section_id in section_id_to_object.keys()
        }
        demands = {section_id: 0 for section_id in section_id_to_object.keys()}

        # Initialize section statuses
        section_status = {
            section_id: None
            for section_id in section_id_to_object.keys()
        }
        for change in all_changes:
            section_id = change["section_id"]
            if change["type"] == "status_update":
                if section_status[section_id] is None:
                    section_status[section_id] = change["old_status"]

        percent_through = (add_drop_period.get_percent_through_add_drop(
            timezone.now()) if semester == current_semester else 1)
        if percent_through == 0:
            if verbose:
                print(
                    f"Skipping semester {semester} because the add/drop period "
                    f"hasn't started yet.")
            continue
        distribution_estimate_threshold = sum(
            len(changes_list)
            for changes_list in volume_changes_map.values()) // (
                ROUGH_MINIMUM_DEMAND_DISTRIBUTION_ESTIMATES * percent_through)
        num_changes_without_estimate = 0

        if verbose:
            print(f"Compiling demand data for semester {semester}...")
        for change in iterator_wrapper(all_changes):
            section_id = change["section_id"]

            if section_status[section_id] is None:
                section_status[section_id] = (
                    "O" if section_id_to_object[section_id].percent_open > 0.5
                    else "C")
            if change["type"] == "status_update":
                section_status[section_id] = change["new_status"]
                continue

            date = change["date"]
            volume_change = change["volume_change"]
            registration_volumes[section_id] += volume_change
            demands[section_id] = (registration_volumes[section_id] /
                                   section_id_to_object[section_id].capacity)
            max_id = max(demands.keys(), key=lambda x: demands[x])
            min_id = min(demands.keys(), key=lambda x: demands[x])
            if (latest_popularity_dist_estimate is None
                    or section_id == latest_popularity_dist_estimate[
                        "highest_demand_section"].id or section_id ==
                    latest_popularity_dist_estimate["lowest_demand_section"].id
                    or
                    latest_popularity_dist_estimate["highest_demand_section"].
                    id != max_id or
                    latest_popularity_dist_estimate["lowest_demand_section"].id
                    != min_id or num_changes_without_estimate >=
                    distribution_estimate_threshold):
                num_changes_without_estimate = 0
                output_dict[semester].append({
                    "percent_through":
                    percent_through,
                    "demands": [
                        val for sec_id, val in demands.items()
                        if section_status[sec_id] == "C"
                    ],
                })

                latest_popularity_dist_estimate = {
                    "created_at": date,
                    "semester": semester,
                    "highest_demand_section": section_id_to_object[max_id],
                    "highest_demand_section_volume":
                    registration_volumes[max_id],
                    "lowest_demand_section": section_id_to_object[min_id],
                    "lowest_demand_section_volume":
                    registration_volumes[min_id],
                }
            else:
                num_changes_without_estimate += 1

    return output_dict
    def handle(self, *args, **kwargs):
        src = os.path.abspath(kwargs["src"])
        _, file_extension = os.path.splitext(kwargs["src"])
        if not os.path.exists(src):
            return "File does not exist."
        if file_extension != ".csv":
            return "File is not a csv."
        sections_map = dict()  # maps (full_code, semester) to section id
        row_count = 0
        with open(src) as history_file:
            history_reader = csv.reader(history_file)
            sections_to_fetch = set()
            for row in history_reader:
                sections_to_fetch.add((row[0], row[1]))
                row_count += 1
            full_codes = list(set([sec[0] for sec in sections_to_fetch]))
            semesters = list(set([sec[1] for sec in sections_to_fetch]))
            section_obs = Section.objects.filter(
                full_code__in=full_codes,
                course__semester__in=semesters).annotate(
                    efficient_semester=F("course__semester"))
            for section_ob in section_obs:
                sections_map[section_ob.full_code,
                             section_ob.efficient_semester] = section_ob.id
        add_drop_periods = dict()  # maps semester to AddDropPeriod object
        for adp in AddDropPeriod.objects.filter(semester__in=semesters):
            add_drop_periods[adp.semester] = adp
        print(
            "This script is atomic, meaning either all the status updates from the given "
            "CSV will be loaded into the database, or otherwise if an error is encountered, "
            "all changes will be rolled back and the database will remain as it was "
            "before the script was run.")
        with transaction.atomic():
            with open(src) as history_file:
                print(f"Beginning to load status history from {src}")
                history_reader = csv.reader(history_file)
                for row in tqdm(history_reader, total=row_count):
                    full_code = row[0]
                    semester = row[1]
                    created_at = datetime.strptime(row[2],
                                                   "%Y-%m-%d %H:%M:%S.%f %Z")
                    created_at = make_aware(created_at,
                                            timezone=gettz(TIME_ZONE),
                                            is_dst=None)
                    old_status = row[3]
                    new_status = row[4]
                    alert_sent = row[5]
                    if old_status != "O" and old_status != "C" and old_status != "X":
                        old_status = ""
                    if new_status != "O" and new_status != "C" and new_status != "X":
                        new_status = ""
                    if (full_code, semester) not in sections_map.keys():
                        raise ValueError(
                            f"Section {full_code} {semester} not found in db.")
                    section_id = sections_map[full_code, semester]
                    status_update = StatusUpdate(
                        section_id=section_id,
                        old_status=old_status,
                        new_status=new_status,
                        created_at=created_at,
                        alert_sent=alert_sent,
                    )
                    if semester not in add_drop_periods:
                        add_drop_periods[
                            semester] = get_or_create_add_drop_period(semester)
                    status_update.save(
                        add_drop_period=add_drop_periods[semester])

                print(
                    f"Finished loading status history from {src}... processed {row_count} rows. "
                )

                print(
                    f"Recomputing PCA Stats for {len(semesters)} semesters...")
                recompute_stats(semesters=",".join(semesters), verbose=True)
Esempio n. 9
0
def section_demand_change(section_id, updated_at):
    """
    This function should be called when a section's demand changes (i.e. the number of
    active registrations changes, or the section's status is updated). It updates the
    `PcaDemandDistributionEstimate` model and `current_demand_distribution_estimate`
    cache to reflect the demand change.

    :param: section_id: the id of the section involved in the demand change
    :param: updated_at: the datetime at which the demand change occurred
    """
    section = Section.objects.get(id=section_id)
    semester = section.semester
    if semester != get_current_semester():
        return

    with transaction.atomic():
        create_new_distribution_estimate = False
        sentinel = object()
        current_demand_distribution_estimate = cache.get(
            "current_demand_distribution_estimate", sentinel)
        if (current_demand_distribution_estimate == sentinel
                or current_demand_distribution_estimate.semester != semester):
            create_new_distribution_estimate = True

        sections_qs = (Section.objects.filter(
            extra_metrics_section_filters,
            course__semester=semester).select_for_update().annotate(
                raw_demand=Case(
                    When(
                        Q(capacity__gt=0),
                        then=(Cast(
                            "registration_volume",
                            models.FloatField(),
                        ) / Cast("capacity", models.FloatField())),
                    ),
                    default=None,
                    output_field=models.FloatField(),
                ), ).order_by("raw_demand"))

        try:
            lowest_demand_section = sections_qs[:1].get()
            highest_demand_section = sections_qs[-1:].get()
        except Section.DoesNotExist:
            return  # Don't add a PcaDemandDistributionEstimate -- there are no valid sections yet

        if (create_new_distribution_estimate
                or highest_demand_section.raw_demand >
                current_demand_distribution_estimate.highest_raw_demand
                or lowest_demand_section.raw_demand <
                current_demand_distribution_estimate.lowest_raw_demand):
            closed_sections_demand_values = np.asarray(
                sections_qs.filter(status="C").values_list("raw_demand",
                                                           flat=True))
            # "The term 'closed sections positive raw demand values' is
            # sometimes abbreviated as 'csprdv'
            csrdv_frac_zero, fit_shape, fit_loc, fit_scale = (None, None, None,
                                                              None)
            if len(closed_sections_demand_values) > 0:
                closed_sections_positive_demand_values = closed_sections_demand_values[
                    np.where(closed_sections_demand_values > 0)]
                csrdv_frac_zero = 1 - len(
                    closed_sections_positive_demand_values) / len(
                        closed_sections_demand_values)
                if len(closed_sections_positive_demand_values) > 0:
                    fit_shape, fit_loc, fit_scale = stats.lognorm.fit(
                        closed_sections_positive_demand_values)
            new_demand_distribution_estimate = PcaDemandDistributionEstimate(
                semester=semester,
                highest_demand_section=highest_demand_section,
                highest_demand_section_volume=highest_demand_section.
                registration_volume,
                lowest_demand_section=lowest_demand_section,
                lowest_demand_section_volume=lowest_demand_section.
                registration_volume,
                csrdv_frac_zero=csrdv_frac_zero,
                csprdv_lognorm_param_shape=fit_shape,
                csprdv_lognorm_param_loc=fit_loc,
                csprdv_lognorm_param_scale=fit_scale,
            )
            add_drop_period = get_or_create_add_drop_period(semester)
            new_demand_distribution_estimate.save(
                add_drop_period=add_drop_period)
            new_demand_distribution_estimate.created_at = updated_at
            new_demand_distribution_estimate.save(
                add_drop_period=add_drop_period)
            cache.set(
                "current_demand_distribution_estimate",
                new_demand_distribution_estimate,
                timeout=(add_drop_period.estimated_end -
                         add_drop_period.estimated_start).total_seconds() //
                ROUGH_MINIMUM_DEMAND_DISTRIBUTION_ESTIMATES,
            )  # set timeout to roughly follow ROUGH_MINIMUM_DEMAND_DISTRIBUTION_ESTIMATES
Esempio n. 10
0
def recompute_demand_distribution_estimates(
    semesters=None, semesters_precomputed=False, verbose=False
):
    """
    This script recomputes all PcaDemandDistributionEstimate objects for the given semester(s)
    based on saved Registration objects. In doing so, it also recomputes the registration_volume
    and percent_open fields for all sections in the given semester(s)
    (by calling recompute_registration_volumes and recompute_percent_open).

    :param semesters: The semesters argument should be a comma-separated list of string semesters
        corresponding to the semesters for which you want to recompute demand distribution
        estimate, i.e. "2019C,2020A,2020C" for fall 2019, spring 2020, and fall 2020. It
        defaults to None, in which case only the current semester is used. If you supply the
        string "all", it will recompute for all semesters found in Courses in the db.
        If semesters_precomputed is set to True (non-default), then this argument should
        instead be a list of single string semesters.
    :param semesters_precomputed: If False (default), the semesters argument will expect a raw
        comma-separated string input. If True, the semesters argument will expect a list of
        individual string semesters.
    :param verbose: Set to True if you want this script to print its status as it goes,
        or keep as False (default) if you want the script to work silently.
    """

    current_semester = get_current_semester()
    semesters = (
        semesters if semesters_precomputed else get_semesters(semesters=semesters, verbose=verbose)
    )

    recompute_precomputed_fields(verbose=verbose)
    recompute_registration_volumes(semesters=semesters, semesters_precomputed=True, verbose=verbose)
    recompute_percent_open(semesters=semesters, semesters_precomputed=True, verbose=verbose)

    if verbose:
        print(f"Recomputing demand distribution estimates for semesters {str(semesters)}...")
    for semester_num, semester in enumerate(semesters):
        try:
            validate_add_drop_semester(semester)
        except ValidationError:
            if verbose:
                print(f"Skipping semester {semester} (unsupported kind for stats).")
            continue
        add_drop_period = get_or_create_add_drop_period(semester)
        set_cache = semester == current_semester

        with transaction.atomic():
            # We make this command an atomic transaction, so that the database will not
            # be modified unless the entire update for a semester succeeds.
            # If set_cache is True, we will set the current_demand_distribution_estimate variable
            # in cache

            if verbose:
                print(f"Processing semester {semester}, " f"{(semester_num+1)}/{len(semesters)}.\n")
                print(
                    "Deleting existing PcaDemandDistributionEstimate objects for semester "
                    f"{semester} (so that we can recompute these objects)..."
                )
            PcaDemandDistributionEstimate.objects.filter(
                semester=semester
            ).select_for_update().delete()

            section_id_to_object = dict()  # maps section id to section object (for this semester)
            volume_changes_map = dict()  # maps section id to list of volume changes
            status_updates_map = dict()  # maps section id to list of status updates

            iterator_wrapper = tqdm if verbose else (lambda x: x)
            if verbose:
                print("Indexing relevant sections...")
            for section in iterator_wrapper(
                Section.objects.filter(extra_metrics_section_filters, course__semester=semester)
                .annotate(
                    efficient_semester=F("course__semester"),
                )
                .distinct()
            ):
                section_id_to_object[section.id] = section
                volume_changes_map[section.id] = []
                status_updates_map[section.id] = []

            if verbose:
                print("Computing registration volume changes over time for each section...")
            for registration in iterator_wrapper(
                Registration.objects.filter(section_id__in=section_id_to_object.keys())
                .annotate(section_capacity=F("section__capacity"))
                .select_for_update()
            ):
                section_id = registration.section_id
                volume_changes_map[section_id].append(
                    {"date": registration.created_at, "volume_change": 1}
                )
                deactivated_at = registration.deactivated_at
                if deactivated_at is not None:
                    volume_changes_map[section_id].append(
                        {"date": deactivated_at, "volume_change": -1}
                    )

            if verbose:
                print("Collecting status updates over time for each section...")
            for status_update in iterator_wrapper(
                StatusUpdate.objects.filter(
                    section_id__in=section_id_to_object.keys(), in_add_drop_period=True
                ).select_for_update()
            ):
                section_id = status_update.section_id
                status_updates_map[section_id].append(
                    {
                        "date": status_update.created_at,
                        "old_status": status_update.old_status,
                        "new_status": status_update.new_status,
                    }
                )

            if verbose:
                print("Joining updates for each section and sorting...")
            all_changes = sorted(
                [
                    {"type": "status_update", "section_id": section_id, **update}
                    for section_id, status_updates_list in status_updates_map.items()
                    for update in status_updates_list
                ]
                + [
                    {"type": "volume_change", "section_id": section_id, **change}
                    for section_id, changes_list in volume_changes_map.items()
                    for change in changes_list
                ],
                key=lambda x: (x["date"], int(x["type"] != "status_update")),
                # put status updates first on matching dates
            )

            # Initialize variables to be maintained in our main all_changes loop
            latest_popularity_dist_estimate = None
            registration_volumes = {section_id: 0 for section_id in section_id_to_object.keys()}
            demands = {section_id: 0 for section_id in section_id_to_object.keys()}

            # Initialize section statuses
            section_status = {section_id: None for section_id in section_id_to_object.keys()}
            for change in all_changes:
                section_id = change["section_id"]
                if change["type"] == "status_update":
                    if section_status[section_id] is None:
                        section_status[section_id] = change["old_status"]

            percent_through = (
                add_drop_period.get_percent_through_add_drop(timezone.now())
                if semester == current_semester
                else 1
            )
            if percent_through == 0:
                if verbose:
                    print(
                        f"Skipping semester {semester} because the add/drop period "
                        f"hasn't started yet."
                    )
                continue
            distribution_estimate_threshold = sum(
                len(changes_list) for changes_list in volume_changes_map.values()
            ) // (ROUGH_MINIMUM_DEMAND_DISTRIBUTION_ESTIMATES * percent_through)
            num_changes_without_estimate = 0

            if verbose:
                print(f"Creating PcaDemandDistributionEstimate objects for semester {semester}...")
            for change in iterator_wrapper(all_changes):
                section_id = change["section_id"]

                if section_status[section_id] is None:
                    section_status[section_id] = (
                        "O" if section_id_to_object[section_id].percent_open > 0.5 else "C"
                    )
                if change["type"] == "status_update":
                    section_status[section_id] = change["new_status"]
                    continue

                date = change["date"]
                volume_change = change["volume_change"]
                registration_volumes[section_id] += volume_change
                demands[section_id] = (
                    registration_volumes[section_id] / section_id_to_object[section_id].capacity
                )

                max_id = max(demands.keys(), key=lambda x: demands[x])
                min_id = min(demands.keys(), key=lambda x: demands[x])
                if (
                    latest_popularity_dist_estimate is None
                    or section_id == latest_popularity_dist_estimate.highest_demand_section_id
                    or section_id == latest_popularity_dist_estimate.lowest_demand_section_id
                    or latest_popularity_dist_estimate.highest_demand_section_id != max_id
                    or latest_popularity_dist_estimate.lowest_demand_section_id != min_id
                    or num_changes_without_estimate >= distribution_estimate_threshold
                ):
                    num_changes_without_estimate = 0
                    closed_sections_demand_values = np.asarray(
                        [val for sec_id, val in demands.items() if section_status[sec_id] == "C"]
                    )
                    csrdv_frac_zero, fit_shape, fit_loc, fit_scale = (None, None, None, None)
                    if len(closed_sections_demand_values) > 0:
                        closed_sections_positive_demand_values = closed_sections_demand_values[
                            np.where(closed_sections_demand_values > 0)
                        ]
                        csrdv_frac_zero = 1 - len(closed_sections_positive_demand_values) / len(
                            closed_sections_demand_values
                        )
                        if len(closed_sections_positive_demand_values) > 0:
                            fit_shape, fit_loc, fit_scale = stats.lognorm.fit(
                                closed_sections_positive_demand_values
                            )

                    latest_popularity_dist_estimate = PcaDemandDistributionEstimate(
                        created_at=date,
                        semester=semester,
                        highest_demand_section=section_id_to_object[max_id],
                        highest_demand_section_volume=registration_volumes[max_id],
                        lowest_demand_section=section_id_to_object[min_id],
                        lowest_demand_section_volume=registration_volumes[min_id],
                        csrdv_frac_zero=csrdv_frac_zero,
                        csprdv_lognorm_param_shape=fit_shape,
                        csprdv_lognorm_param_loc=fit_loc,
                        csprdv_lognorm_param_scale=fit_scale,
                    )
                    latest_popularity_dist_estimate.save(add_drop_period=add_drop_period)
                    latest_popularity_dist_estimate.created_at = date
                    latest_popularity_dist_estimate.save(add_drop_period=add_drop_period)
                else:
                    num_changes_without_estimate += 1

            if set_cache:
                if latest_popularity_dist_estimate is not None:
                    cache.set(
                        "current_demand_distribution_estimate",
                        latest_popularity_dist_estimate,
                        timeout=None,
                    )
                else:
                    cache.set("current_demand_distribution_estimate", None, timeout=None)

    if verbose:
        print(
            "Finished recomputing demand distribution estimate and section registration_volume "
            f"fields for semesters {str(semesters)}."
        )
Esempio n. 11
0
def recompute_percent_open(semesters=None, verbose=False, semesters_precomputed=False):
    """
    Recomputes the percent_open field for each section in the given semester(s).

    :param semesters: The semesters argument should be a comma-separated list of string semesters
        corresponding to the semesters for which you want to recompute percent_open fields,
        i.e. "2019C,2020A,2020C" for fall 2019, spring 2020, and fall 2020. It defaults to None,
        in which case only the current semester is used. If you supply the string "all",
        it will recompute for all semesters found in Courses in the db.
        If semesters_precomputed is set to True (non-default), then this argument should
        instead be a list of single string semesters.
    :param semesters_precomputed: If False (default), the semesters argument will expect a raw
        comma-separated string input. If True, the semesters argument will expect a list of
        individual string semesters.
    :param verbose: Set to True if you want this script to print its status as it goes,
        or keep as False (default) if you want the script to work silently.
    """

    current_semester = get_current_semester()
    semesters = (
        semesters if semesters_precomputed else get_semesters(semesters=semesters, verbose=verbose)
    )

    if verbose:
        print(f"Recomputing open percentages for semesters {str(semesters)}...")

    for semester_num, semester in enumerate(semesters):
        with transaction.atomic():
            # We make this command an atomic transaction, so that the database will not
            # be modified unless the entire update for a semester succeeds.

            if verbose:
                print(f"\nProcessing semester {semester}, " f"{(semester_num+1)}/{len(semesters)}.")

            add_drop = get_or_create_add_drop_period(semester)
            add_drop_start = add_drop.estimated_start
            add_drop_end = add_drop.estimated_end

            StatusUpdate.objects.filter(section__course__semester=semester).select_for_update()

            sections = Section.objects.filter(course__semester=semester)
            num_erroneous_updates = 0
            num_total_updates = 0
            for section in sections:
                status_updates = StatusUpdate.objects.filter(
                    section=section, created_at__gt=add_drop_start, created_at__lt=add_drop_end
                ).order_by("created_at")
                num_total_updates += len(status_updates)
                total_open_seconds = 0
                if not status_updates.exists():
                    try:
                        guess_status = (
                            StatusUpdate.objects.filter(
                                section=section, created_at__lte=add_drop_start
                            )
                            .latest("created_at")
                            .new_status
                        )
                    except StatusUpdate.DoesNotExist:
                        guess_status = section.status
                    section.percent_open = float(guess_status == "O")
                else:
                    last_dt = add_drop_start
                    last_status = status_updates.first().old_status
                    for update in status_updates:
                        if last_status != update.old_status:
                            num_erroneous_updates += 1
                        if last_status == "O" and update.new_status != "O":
                            total_open_seconds += (update.created_at - last_dt).total_seconds()
                        last_dt = update.created_at
                        last_status = update.new_status
                    section.percent_open = float(total_open_seconds) / float(
                        (status_updates.last().created_at - add_drop_start).total_seconds()
                    )
                    if section.semester != current_semester:
                        section.percent_open = float(
                            total_open_seconds
                            + int(last_status == "O") * (add_drop_end - last_dt).total_seconds()
                        ) / float((add_drop_end - add_drop_start).total_seconds())
                section.save()
            if verbose:
                print(
                    f"Finished calculating percent_open for {len(sections)} sections from "
                    f"semester {semester}, encountered {num_erroneous_updates} erroneous "
                    f"Status Updates (out of {num_total_updates} total Status Updates)"
                )
    if verbose:
        print(f"Finished recomputing open percentages for semesters {str(semesters)}.")