Beispiel #1
0
def get_semesters(semesters=None, verbose=False):
    """
    Validate a given string semesters argument, and return a list of the individual string semesters
    specified by the argument.
    """
    possible_semesters = all_semesters()
    if semesters is None:
        semesters = [get_current_semester()]
    elif semesters == "all":
        semesters = list(possible_semesters)
    else:
        semesters = semesters.strip().split(",")
        for s in semesters:
            if s not in possible_semesters:
                raise ValueError(f"Provided semester {s} was not found in the db.")
    if verbose:
        if len(semesters) > 1:
            print(
                "This script's updates for each semester are atomic, i.e. either all the "
                "updates for a certain semester are accepted by the database, or none of them are "
                "(if an error is encountered). If an error is encountered during the "
                "processing of a certain semester, any correctly completed updates for previously "
                "processed semesters will have already been accepted by the database."
            )
        else:
            print(
                "This script's updates for the given semester are atomic, i.e. either all the "
                "updates will be accepted by the database, or none of them will be "
                "(if an error is encountered)."
            )
    return semesters
Beispiel #2
0
def registrar_import(semester=None, query=""):
    if semester is None:
        semester = get_current_semester()

    print("Loading in courses with prefix %s from %s..." % (query, semester))
    results = registrar.get_courses(query, semester)

    missing_sections = set(
        Section.objects.filter(course__semester=semester).values_list(
            "full_code", flat=True))
    for info in tqdm(results):
        upsert_course_from_opendata(info, semester, missing_sections)
    Section.objects.filter(full_code__in=missing_sections).update(status="X")

    print("Updating department names...")
    departments = registrar.get_departments()
    for dept_code, dept_name in tqdm(departments.items()):
        dept, _ = Department.objects.get_or_create(code=dept_code)
        dept.name = dept_name
        dept.save()

    print("Loading course statuses from registrar...")
    set_all_status(semester=semester)

    recompute_stats(semesters=semester, verbose=True)

    fill_topics(verbose=True)
    load_crosswalk(print_missing=False, verbose=True)
def best_recommendations(
    cluster,
    curr_course_vectors_dict,
    user_vector,
    exclude: Optional[Set[str]] = None,
    n_recommendations=5,
):
    recs = []
    for course in cluster:
        if exclude is not None and course in exclude:
            continue
        course_vector = curr_course_vectors_dict[course]
        similarity = cosine_similarity(course_vector, user_vector)
        recs.append((course, similarity))
    rec_course_to_score = {course: score for course, score in recs}
    recs = [(c.full_code, rec_course_to_score[c.full_code])
            for c in Course.objects.filter(semester=get_current_semester(),
                                           full_code__in=list(
                                               rec_course_to_score.keys()))
            ]  # only recommend currently offered courses
    if n_recommendations > len(recs):
        n_recommendations = len(recs)

    return [
        course for course, _ in heapq.nlargest(n_recommendations, recs,
                                               lambda x: x[1])
    ]
Beispiel #4
0
    def get_semester(self):
        semester = self.kwargs.get("semester", "current")
        if semester == "current":
            semester = get_current_semester(allow_not_found=True)
            semester = semester if semester is not None else "all"

        return semester
Beispiel #5
0
    def handle(self, *args, **kwargs):
        root_logger = logging.getLogger("")
        root_logger.setLevel(logging.DEBUG)

        semester = get_current_semester()
        statuses = registrar.get_all_course_status(semester)
        stats = {
            "missing_data": 0,
            "section_not_found": 0,
            "duplicate_updates": 0,
            "sent": 0,
            "parse_error": 0,
            "error": 0,
            "skipped": 0,
        }
        for status in tqdm(statuses):
            data = status
            section_code = data.get("section_id_normalized")
            if section_code is None:
                stats["missing_data"] += 1
                continue

            course_status = data.get("status")
            if course_status is None:
                stats["missing_data"] += 1
                continue

            course_term = data.get("term")
            if course_term is None:
                stats["missing_data"] += 1
                continue

            # Ignore sections not in db
            try:
                _, section = get_course_and_section(section_code, semester)
            except (Section.DoesNotExist, Course.DoesNotExist):
                stats["section_not_found"] += 1
                continue

            # Ignore duplicate updates
            last_status_update = section.last_status_update
            if last_status_update and last_status_update.new_status == course_status:
                stats["duplicate_updates"] += 1
                continue

            if should_send_pca_alert(course_term, course_status):
                try:
                    alert_for_course(
                        section_code,
                        semester=course_term,
                        sent_by="WEB",
                        course_status=course_status,
                    )
                    stats["sent"] += 1
                except ValueError:
                    stats["parse_error"] += 1
            else:
                stats["skipped"] += 1

        print(stats)
Beispiel #6
0
def should_send_pca_alert(course_term, course_status):
    if get_current_semester() != course_term:
        return False
    add_drop_period = get_or_create_add_drop_period(course_term)
    return (get_bool("SEND_FROM_WEBHOOK", False)
            and (course_status == "O" or course_status == "C")
            and (add_drop_period.end is None or datetime.utcnow().replace(
                tzinfo=gettz(TIME_ZONE)) < add_drop_period.end))
Beispiel #7
0
def pca_registration_open():
    """
    Returns True iff PCA should be accepting new registrations.
    """
    current_adp = get_or_create_add_drop_period(
        semester=get_current_semester())
    return get_bool("REGISTRATION_OPEN", True) and (
        current_adp.end is None or
        datetime.utcnow().replace(tzinfo=gettz(TIME_ZONE)) < current_adp.end)
Beispiel #8
0
    def create(self, request, *args, **kwargs):
        if Schedule.objects.filter(id=request.data.get("id")).exists():
            return self.update(request, request.data.get("id"))

        try:
            sections = self.get_sections(request.data)
        except ObjectDoesNotExist:
            return Response(
                {"detail": "One or more sections not found in database."},
                status=status.HTTP_400_BAD_REQUEST,
            )

        semester_check_response = self.check_semester(request.data, sections)
        if semester_check_response is not None:
            return semester_check_response

        try:
            if (
                "id" in request.data
            ):  # Also from above we know that this id does not conflict with existing schedules.
                schedule = self.get_queryset().create(
                    person=request.user,
                    semester=request.data.get("semester", get_current_semester()),
                    name=request.data.get("name"),
                    id=request.data.get("id"),
                )
            else:
                schedule = self.get_queryset().create(
                    person=request.user,
                    semester=request.data.get("semester", get_current_semester()),
                    name=request.data.get("name"),
                )
            schedule.sections.set(sections)
            return Response(
                {"message": "success", "id": schedule.id}, status=status.HTTP_201_CREATED
            )
        except IntegrityError as e:
            return Response(
                {
                    "detail": "IntegrityError encountered while trying to create: "
                    + str(e.__cause__)
                },
                status=status.HTTP_400_BAD_REQUEST,
            )
Beispiel #9
0
def send_course_alerts(course_code, course_status, semester=None, sent_by=""):
    if semester is None:
        semester = get_current_semester()

    for reg in get_registrations_for_alerts(course_code,
                                            semester,
                                            course_status=course_status):
        send_alert.delay(reg.id,
                         close_notification=(course_status == "C"),
                         sent_by=sent_by)
Beispiel #10
0
def register_for_course(
    course_code,
    email_address=None,
    phone=None,
    source=SOURCE_PCA,
    api_key=None,
    user=None,
    auto_resub=False,
    close_notification=False,
):
    """
    This method is for the PCA 3rd party API (originally planned to service
    Penn Course Notify, until Notify's rejection of PCA's help and eventual downfall
    (coincidence? we think not...). It still may be used in the future so we are
    keeping the code.
    Returns RegStatus.<STATUS>, section.full_code, registration
    or None for the second two when appropriate
    """
    if (not user and not email_address
            and not phone) or (user and not user.profile.email
                               and not user.profile.phone
                               and not user.profile.push_notifications):
        return RegStatus.NO_CONTACT_INFO, None, None
    try:
        _, section = get_course_and_section(course_code,
                                            get_current_semester())
    except (Course.DoesNotExist, Section.DoesNotExist, ValueError):
        return RegStatus.COURSE_NOT_FOUND, None, None

    if user is None:
        registration = Registration(section=section,
                                    email=email_address,
                                    phone=phone,
                                    source=source)
        registration.validate_phone()
        if section.registrations.filter(
                email=email_address,
                phone=registration.phone,
                **Registration.is_active_filter()).exists():
            return RegStatus.OPEN_REG_EXISTS, section.full_code, None
    else:
        if section.registrations.filter(
                user=user, **Registration.is_active_filter()).exists():
            return RegStatus.OPEN_REG_EXISTS, section.full_code, None
        if close_notification and not user.profile.email and not user.profile.push_notifications:
            return RegStatus.TEXT_CLOSE_NOTIFICATION, section.full_code, None
        registration = Registration(section=section, user=user, source=source)
        registration.auto_resubscribe = auto_resub
        registration.close_notification = close_notification

    registration.api_key = api_key
    registration.save()

    return RegStatus.SUCCESS, section.full_code, registration
Beispiel #11
0
 def get_queryset(self):
     sem = get_current_semester()
     queryset = Schedule.objects.filter(person=self.request.user, semester=sem)
     queryset = queryset.prefetch_related(
         Prefetch("sections", Section.with_reviews.all()),
         "sections__associated_sections",
         "sections__instructors",
         "sections__meetings",
         "sections__meetings__room",
     )
     return queryset
Beispiel #12
0
def pre_ngss_requirement_filter(queryset, req_ids):
    if not req_ids:
        return queryset
    query = Q()
    for req_id in req_ids.split(","):
        code, school = req_id.split("@")
        try:
            requirement = PreNGSSRequirement.objects.get(
                code=code, school=school, semester=get_current_semester())
        except PreNGSSRequirement.DoesNotExist:
            continue
        query &= Q(id__in=requirement.satisfying_courses.all())

    return queryset.filter(query)
Beispiel #13
0
def set_all_status(semester=None):
    if semester is None:
        semester = get_current_semester()
    statuses = registrar.get_all_course_status(semester)
    for status in tqdm(statuses):
        section_code = status.get("section_id_normalized")
        if section_code is None:
            continue

        try:
            _, section = get_course_and_section(section_code, semester)
        except (Section.DoesNotExist, Course.DoesNotExist):
            continue
        section.status = status["status"]
        section.save()
Beispiel #14
0
    def handle(self, *args, **options):
        days = options["days"]
        send_to_slack = options["slack"]

        start = timezone.now() - timezone.timedelta(days=days)

        qs = Registration.objects.filter(
            section__course__semester=get_current_semester())

        num_registrations = qs.filter(created_at__gte=start,
                                      resubscribed_from__isnull=True).count()
        num_alerts_sent = qs.filter(notification_sent=True,
                                    notification_sent_at__gte=start).count()
        num_resubscribe = qs.filter(resubscribed_from__isnull=False,
                                    created_at__gte=start,
                                    auto_resubscribe=False).count()
        num_status_updates = StatusUpdate.objects.filter(
            created_at__gte=start).count()
        num_active_perpetual = qs.filter(
            resubscribed_to__isnull=True,
            auto_resubscribe=True,
            deleted=False,
            cancelled=False,
            notification_sent=False,
        ).count()
        num_cancelled_perpetual = (qs.filter(
            resubscribed_to__isnull=True,
            auto_resubscribe=True,
        ).filter(Q(deleted=True) | Q(cancelled=True)).count())

        message = dedent(f"""
        {f'Penn Course Alert stats in the past {days} day(s)'
         f' since {start.strftime("%H:%M on %d %B, %Y")}'}:
        New registrations: {num_registrations}
        Alerts sent: {num_alerts_sent}
        Manual resubscribes: {num_resubscribe}
        Active auto-resubscribe requests: {num_active_perpetual}
        Cancelled auto-resubscribe requests: {num_cancelled_perpetual}
        Status Updates from Penn InTouch: {num_status_updates}
        """)

        if send_to_slack:
            url = settings.STATS_WEBHOOK
            print("sending to Slack...")
            requests.post(url, data=json.dumps({"text": message}))
        else:
            print(message)
def vectorize_user_by_courses(curr_courses, past_courses,
                              curr_course_vectors_dict,
                              past_course_vectors_dict):
    n = len(next(iter(curr_course_vectors_dict.values())))

    # Input validation
    all_courses = set(curr_courses) | set(past_courses)
    if len(all_courses) != len(curr_courses) + len(past_courses):
        raise ValueError(
            "Repeated courses given in curr_courses and/or past_courses. "
            f"curr_courses: {str(curr_courses)}. past_courses: {str(past_courses)}"
        )
    invalid_curr_courses = set(curr_courses) - {
        c.full_code
        for c in Course.objects.filter(semester=get_current_semester(),
                                       full_code__in=curr_courses)
    }
    if len(invalid_curr_courses) > 0:
        raise ValueError(
            "The following courses in curr_courses are invalid or not offered this semester: "
            f"{str(invalid_curr_courses)}")
    invalid_past_courses = set(past_courses) - {
        c.full_code
        for c in Course.objects.filter(full_code__in=past_courses)
    }
    if len(invalid_past_courses) > 0:
        raise ValueError(
            f"The following courses in past_courses are invalid: {str(invalid_past_courses)}"
        )

    # Eliminate courses not in the model
    curr_courses = [c for c in curr_courses if c in curr_course_vectors_dict]
    past_courses = [c for c in past_courses if c in past_course_vectors_dict]

    curr_courses_vector = (np.zeros(n) if len(curr_courses) == 0 else sum(
        curr_course_vectors_dict[course] for course in curr_courses))
    past_courses_vector = (np.zeros(n) if len(past_courses) == 0 else sum(
        past_course_vectors_dict[course] for course in past_courses))

    vector = curr_courses_vector * CURR_COURSES_BIAS + past_courses_vector
    norm = np.linalg.norm(vector)
    vector = vector / norm if norm > 0 else vector
    return vector, all_courses
def vectorize_user(user, curr_course_vectors_dict, past_course_vectors_dict):
    """
    Aggregates a vector over all the courses in the user's schedule
    """
    curr_semester = get_current_semester()
    curr_courses = set([
        s
        for s in Schedule.objects.filter(person=user, semester=curr_semester).
        values_list("sections__course__full_code", flat=True) if s is not None
    ])
    past_courses = set([
        s for s in Schedule.objects.filter(
            person=user, semester__lt=curr_semester).values_list(
                "sections__course__full_code", flat=True) if s is not None
    ])
    past_courses = past_courses - curr_courses
    return vectorize_user_by_courses(list(curr_courses), list(past_courses),
                                     curr_course_vectors_dict,
                                     past_course_vectors_dict)
Beispiel #17
0
    def current_percent_open(self):
        """
        The percentage (expressed as a decimal number between 0 and 1) of the period between
        the beginning of its add/drop period and min[the current time, the end of its
        registration period] that this section was open. If this section's registration
        period hasn't started yet, this property is null (None in Python).
        """
        from courses.util import get_current_semester, get_or_create_add_drop_period

        # ^ imported here to avoid circular imports

        if self.semester == get_current_semester():
            add_drop = get_or_create_add_drop_period(self.semester)
            add_drop_start = add_drop.estimated_start
            add_drop_end = add_drop.estimated_end
            current_time = timezone.now()
            if current_time <= add_drop_start:
                return None
            try:
                last_status_update = StatusUpdate.objects.filter(
                    section=self,
                    created_at__gt=add_drop_start,
                    created_at__lt=add_drop_end).latest("created_at")
            except StatusUpdate.DoesNotExist:
                last_status_update = None
            last_update_dt = last_status_update.created_at if last_status_update else add_drop_start
            period_seconds = float((min(current_time, add_drop_end) -
                                    add_drop_start).total_seconds())
            percent_after_update = (float(self.is_open) * float(
                (current_time - last_update_dt).total_seconds()) /
                                    period_seconds)
            if last_status_update is None:
                return percent_after_update
            percent_before_update = (float(self.percent_open) * float(
                (last_update_dt - add_drop_start).total_seconds()) /
                                     period_seconds)
            return percent_before_update + percent_after_update
        else:
            return self.percent_open
Beispiel #18
0
    def update(self, request, pk=None):
        if not Schedule.objects.filter(id=pk).exists():
            return Response({"detail": "Not found."}, status=status.HTTP_404_NOT_FOUND)
        try:
            schedule = self.get_queryset().get(id=pk)
        except Schedule.DoesNotExist:
            return Response(
                {"detail": "You do not have access to the specified schedule."},
                status=status.HTTP_403_FORBIDDEN,
            )

        try:
            sections = self.get_sections(request.data)
        except ObjectDoesNotExist:
            return Response(
                {"detail": "One or more sections not found in database."},
                status=status.HTTP_400_BAD_REQUEST,
            )

        semester_check_response = self.check_semester(request.data, sections)
        if semester_check_response is not None:
            return semester_check_response

        try:
            schedule.person = request.user
            schedule.semester = request.data.get("semester", get_current_semester())
            schedule.name = request.data.get("name")
            schedule.save()
            schedule.sections.set(sections)
            return Response({"message": "success", "id": schedule.id}, status=status.HTTP_200_OK)
        except IntegrityError as e:
            return Response(
                {
                    "detail": "IntegrityError encountered while trying to update: "
                    + str(e.__cause__)
                },
                status=status.HTTP_400_BAD_REQUEST,
            )
def load_add_drop_dates(verbose=False):
    semester = get_current_semester()
    validate_add_drop_semester(semester)

    if verbose:
        print(
            f"Loading course selection period dates for semester {semester} from the Almanac"
        )
    with transaction.atomic():
        adp = get_or_create_add_drop_period(semester)
        start_date = adp.start
        end_date = adp.end
        html = requests.get(
            "https://almanac.upenn.edu/penn-academic-calendar").content
        soup = BeautifulSoup(html, "html.parser")
        if semester[4] == "C":
            start_sem = semester[:4] + " spring"
            end_sem = semester[:4] + " fall"
        elif semester[4] == "A":
            start_sem = str(int(semester[:4]) - 1) + " fall"
            end_sem = semester[:4] + " spring"
        else:
            raise ValueError(
                "This script currently only supports fall or spring semesters; "
                f"{semester} is invalid")
        tz = gettz(TIME_ZONE)

        s_year, s_month, s_day, e_year, e_month, e_day = (None, ) * 6
        start_mode = 0  # 0 if start semester hasn't been found, 1 if it has, 2 if finished sem
        end_mode = 0  # 0 if end semester hasn't been found, 1 if it has, 2 if finished sem
        all_th_parents = {el.parent for el in soup.find_all("th")}
        months = [
            "january",
            "february",
            "march",
            "april",
            "may",
            "june",
            "july",
            "august",
            "september",
            "october",
            "november",
            "december",
        ]
        for tr_el in soup.find_all("tr"):
            if tr_el in all_th_parents:
                sem_name = tr_el.th.get_text().lower()
                if start_sem in sem_name:
                    start_mode = 1
                elif start_mode == 1:
                    start_mode = 2
                if end_sem in sem_name:
                    end_mode = 1
                elif end_mode == 1:
                    end_mode = 2
            else:
                children = list(tr_el.findChildren("td", recursive=False))
                title = children[0]
                date_string = children[1].get_text()
                if title is not None and "advance registration" in title.get_text(
                ).lower():
                    if start_mode == 1:
                        dates = date_string.split("-")
                        ar_begin_month = None
                        for month in months:
                            if month in dates[0].lower():
                                ar_begin_month = month
                        ar_end_month = None
                        for month in months:
                            if month in dates[0].lower():
                                ar_end_month = month
                        if ar_end_month is None:
                            ar_end_month = ar_begin_month
                        s_year = int(start_sem[:4])
                        if ar_end_month is not None:
                            s_month = months.index(ar_end_month) + 1
                        day_candidates = [
                            int(s) for s in dates[1].split() if s.isdigit()
                        ]
                        if len(day_candidates) > 0:
                            s_day = day_candidates[0]
                if title is not None and "course selection period ends" in title.get_text(
                ).lower():
                    if end_mode == 1:
                        course_sel_end_month = None
                        for month in months:
                            if month in date_string.lower():
                                course_sel_end_month = month
                        e_year = int(end_sem[:4])
                        if course_sel_end_month is not None:
                            e_month = months.index(course_sel_end_month) + 1
                        day_candidates = [
                            int(s) for s in date_string.split() if s.isdigit()
                        ]
                        if len(day_candidates) > 0:
                            e_day = day_candidates[0]
        if None not in [s_year, s_month, s_day] and start_date is None:
            start_date = make_aware(
                datetime.strptime(f"{s_year}-{s_month}-{s_day} 07:00",
                                  "%Y-%m-%d %H:%M") + timedelta(days=1),
                timezone=tz,
            )
            if verbose:
                print(
                    "NOTE: Add/drop date start was estimated as the end of the advanced "
                    "registration period. Replace this date with the actual start of the "
                    "add/drop period through the Django admin console when it is announced "
                    "to students each semester.")
        if None not in [e_year, e_month, e_day]:
            end_date = make_aware(
                datetime.strptime(f"{e_year}-{e_month}-{e_day} 11:59",
                                  "%Y-%m-%d %H:%M"),
                timezone=tz,
            )
        adp.estimated_start, adp.end = start_date, end_date
        adp.save()
    if verbose:
        print("Done!")
def get_demand_data(semesters, section_query="", verbose=False):
    current_semester = get_current_semester()
    output_dict = dict()

    recompute_precomputed_fields(verbose=True)

    if verbose:
        print(f"Computing demand data for semesters {str(semesters)}...")
    for semester_num, semester in enumerate(semesters):
        try:
            validate_add_drop_semester(semester)
        except ValidationError:
            if verbose:
                print(
                    f"Skipping semester {semester} (unsupported kind for stats)."
                )
            continue
        add_drop_period = get_or_create_add_drop_period(semester)

        if verbose:
            print(f"Processing semester {semester}, "
                  f"{(semester_num+1)}/{len(semesters)}.\n")

        output_dict[semester] = []  # list of demand data dicts
        section_id_to_object = dict(
        )  # maps section id to section object (for this semester)
        volume_changes_map = dict(
        )  # maps section id to list of volume changes
        status_updates_map = dict(
        )  # maps section id to list of status updates

        iterator_wrapper = tqdm if verbose else (lambda x: x)
        if verbose:
            print("Indexing relevant sections...")
        for section in iterator_wrapper(
                Section.objects.filter(
                    extra_metrics_section_filters,
                    full_code__startswith=section_query,
                    course__semester=semester,
                ).annotate(
                    efficient_semester=F("course__semester"), ).distinct()):
            section_id_to_object[section.id] = section
            volume_changes_map[section.id] = []
            status_updates_map[section.id] = []

        if verbose:
            print(
                "Computing registration volume changes over time for each section..."
            )
        for registration in iterator_wrapper(
                Registration.objects.filter(
                    section_id__in=section_id_to_object.keys()).annotate(
                        section_capacity=F("section__capacity"))):
            section_id = registration.section_id
            volume_changes_map[section_id].append({
                "date": registration.created_at,
                "volume_change": 1
            })
            deactivated_at = registration.deactivated_at
            if deactivated_at is not None:
                volume_changes_map[section_id].append({
                    "date": deactivated_at,
                    "volume_change": -1
                })

        if verbose:
            print("Collecting status updates over time for each section...")
        for status_update in iterator_wrapper(
                StatusUpdate.objects.filter(
                    section_id__in=section_id_to_object.keys(),
                    in_add_drop_period=True)):
            section_id = status_update.section_id
            status_updates_map[section_id].append({
                "date":
                status_update.created_at,
                "old_status":
                status_update.old_status,
                "new_status":
                status_update.new_status,
            })

        if verbose:
            print("Joining updates for each section and sorting...")
        all_changes = sorted(
            [{
                "type": "status_update",
                "section_id": section_id,
                **update
            }
             for section_id, status_updates_list in status_updates_map.items()
             for update in status_updates_list] + [{
                 "type": "volume_change",
                 "section_id": section_id,
                 **change
             } for section_id, changes_list in volume_changes_map.items()
                                                   for change in changes_list],
            key=lambda x: (x["date"], int(x["type"] != "status_update")),
            # put status updates first on matching dates
        )

        # Initialize variables to be maintained in our main all_changes loop
        latest_popularity_dist_estimate = None
        registration_volumes = {
            section_id: 0
            for section_id in section_id_to_object.keys()
        }
        demands = {section_id: 0 for section_id in section_id_to_object.keys()}

        # Initialize section statuses
        section_status = {
            section_id: None
            for section_id in section_id_to_object.keys()
        }
        for change in all_changes:
            section_id = change["section_id"]
            if change["type"] == "status_update":
                if section_status[section_id] is None:
                    section_status[section_id] = change["old_status"]

        percent_through = (add_drop_period.get_percent_through_add_drop(
            timezone.now()) if semester == current_semester else 1)
        if percent_through == 0:
            if verbose:
                print(
                    f"Skipping semester {semester} because the add/drop period "
                    f"hasn't started yet.")
            continue
        distribution_estimate_threshold = sum(
            len(changes_list)
            for changes_list in volume_changes_map.values()) // (
                ROUGH_MINIMUM_DEMAND_DISTRIBUTION_ESTIMATES * percent_through)
        num_changes_without_estimate = 0

        if verbose:
            print(f"Compiling demand data for semester {semester}...")
        for change in iterator_wrapper(all_changes):
            section_id = change["section_id"]

            if section_status[section_id] is None:
                section_status[section_id] = (
                    "O" if section_id_to_object[section_id].percent_open > 0.5
                    else "C")
            if change["type"] == "status_update":
                section_status[section_id] = change["new_status"]
                continue

            date = change["date"]
            volume_change = change["volume_change"]
            registration_volumes[section_id] += volume_change
            demands[section_id] = (registration_volumes[section_id] /
                                   section_id_to_object[section_id].capacity)
            max_id = max(demands.keys(), key=lambda x: demands[x])
            min_id = min(demands.keys(), key=lambda x: demands[x])
            if (latest_popularity_dist_estimate is None
                    or section_id == latest_popularity_dist_estimate[
                        "highest_demand_section"].id or section_id ==
                    latest_popularity_dist_estimate["lowest_demand_section"].id
                    or
                    latest_popularity_dist_estimate["highest_demand_section"].
                    id != max_id or
                    latest_popularity_dist_estimate["lowest_demand_section"].id
                    != min_id or num_changes_without_estimate >=
                    distribution_estimate_threshold):
                num_changes_without_estimate = 0
                output_dict[semester].append({
                    "percent_through":
                    percent_through,
                    "demands": [
                        val for sec_id, val in demands.items()
                        if section_status[sec_id] == "C"
                    ],
                })

                latest_popularity_dist_estimate = {
                    "created_at": date,
                    "semester": semester,
                    "highest_demand_section": section_id_to_object[max_id],
                    "highest_demand_section_volume":
                    registration_volumes[max_id],
                    "lowest_demand_section": section_id_to_object[min_id],
                    "lowest_demand_section_volume":
                    registration_volumes[min_id],
                }
            else:
                num_changes_without_estimate += 1

    return output_dict
Beispiel #21
0
 def get_queryset(self):
     return StatusUpdate.objects.filter(
         section__full_code=self.kwargs["full_code"],
         section__course__semester=get_current_semester(),
         in_add_drop_period=True,
     ).order_by("created_at")
Beispiel #22
0
    def save(self, load_script=False, *args, **kwargs):
        """
        This save method enforces the following invariants on the registration:
          - The `phone` field is converted to E164 format, or set to `None` if unparseable.
          - If the `user` field is not `None`, but either of the legacy `phone`
            or `email` fields are not `None`, the contents of the `phone` / `email` fields
            are moved to the `profile` of the `user` object (this was only a concern during the
            PCA refresh transition process, when we switched away from using these legacy fields).
          - If `head_registration` is `None`, it is set to a self-reference.
          - Any other registration whose `head_registration` equals `self.resubscribed_from`
            are updated to have `self` as their `head_registration`.
          - The `original_created_at` field is set to the `created_at` of the tail of the
            resubscribe chain.

        If `load_script` is set to False (indicating this registration is being actively
        created by a PCA user, rather than being loaded in from an external data source),
        and the registration's semester is the current semester, and the registration
        has just been created or deactivated, then the `PcaDemandDistributionEstimate` model
        and `current_demand_distribution_estimate` cache are asynchronously updated
        (via a celery task) to reflect the resulting section demand change.
        """
        from alert.tasks import section_demand_change
        from courses.util import get_set_id, is_fk_set

        # ^ imported here to avoid circular imports

        with transaction.atomic():
            self.validate_phone()
            if self.user is not None:
                if self.email is not None:
                    user_data, _ = UserProfile.objects.get_or_create(
                        user=self.user)
                    user_data.email = self.email
                    user_data.save()
                    self.user.profile = user_data
                    self.user.save()
                    self.email = None
                if self.phone is not None:
                    user_data, _ = UserProfile.objects.get_or_create(
                        user=self.user)
                    user_data.phone = self.phone
                    user_data.save()
                    self.user.profile = user_data
                    self.user.save()
                    self.phone = None

            # Find old registration
            old_registration = Registration.objects.get(
                id=self.id) if self.id else None
            was_active = bool(old_registration and old_registration.is_active)

            # Set head_registration to self if not set
            if not is_fk_set(self, "head_registration"):
                self.head_registration_id = self.id or get_set_id(self)

            super().save(*args, **kwargs)

            if self.resubscribed_from_id:
                Registration.objects.filter(
                    head_registration_id=self.resubscribed_from_id).update(
                        head_registration=self)

            if self.original_created_at is None:
                self.original_created_at = self.get_original_registration(
                ).created_at
                super().save()

            if (not load_script
                    and self.section.semester == get_current_semester()
                    and was_active != self.is_active):
                section = self.section
                volume_change = int(self.is_active) - int(was_active)
                if volume_change > 0 or section.registration_volume >= 1:
                    section.registration_volume += volume_change
                    section.save()
                section_demand_change.delay(section.id, self.updated_at)
Beispiel #23
0
def section_demand_change(section_id, updated_at):
    """
    This function should be called when a section's demand changes (i.e. the number of
    active registrations changes, or the section's status is updated). It updates the
    `PcaDemandDistributionEstimate` model and `current_demand_distribution_estimate`
    cache to reflect the demand change.

    :param: section_id: the id of the section involved in the demand change
    :param: updated_at: the datetime at which the demand change occurred
    """
    section = Section.objects.get(id=section_id)
    semester = section.semester
    if semester != get_current_semester():
        return

    with transaction.atomic():
        create_new_distribution_estimate = False
        sentinel = object()
        current_demand_distribution_estimate = cache.get(
            "current_demand_distribution_estimate", sentinel)
        if (current_demand_distribution_estimate == sentinel
                or current_demand_distribution_estimate.semester != semester):
            create_new_distribution_estimate = True

        sections_qs = (Section.objects.filter(
            extra_metrics_section_filters,
            course__semester=semester).select_for_update().annotate(
                raw_demand=Case(
                    When(
                        Q(capacity__gt=0),
                        then=(Cast(
                            "registration_volume",
                            models.FloatField(),
                        ) / Cast("capacity", models.FloatField())),
                    ),
                    default=None,
                    output_field=models.FloatField(),
                ), ).order_by("raw_demand"))

        try:
            lowest_demand_section = sections_qs[:1].get()
            highest_demand_section = sections_qs[-1:].get()
        except Section.DoesNotExist:
            return  # Don't add a PcaDemandDistributionEstimate -- there are no valid sections yet

        if (create_new_distribution_estimate
                or highest_demand_section.raw_demand >
                current_demand_distribution_estimate.highest_raw_demand
                or lowest_demand_section.raw_demand <
                current_demand_distribution_estimate.lowest_raw_demand):
            closed_sections_demand_values = np.asarray(
                sections_qs.filter(status="C").values_list("raw_demand",
                                                           flat=True))
            # "The term 'closed sections positive raw demand values' is
            # sometimes abbreviated as 'csprdv'
            csrdv_frac_zero, fit_shape, fit_loc, fit_scale = (None, None, None,
                                                              None)
            if len(closed_sections_demand_values) > 0:
                closed_sections_positive_demand_values = closed_sections_demand_values[
                    np.where(closed_sections_demand_values > 0)]
                csrdv_frac_zero = 1 - len(
                    closed_sections_positive_demand_values) / len(
                        closed_sections_demand_values)
                if len(closed_sections_positive_demand_values) > 0:
                    fit_shape, fit_loc, fit_scale = stats.lognorm.fit(
                        closed_sections_positive_demand_values)
            new_demand_distribution_estimate = PcaDemandDistributionEstimate(
                semester=semester,
                highest_demand_section=highest_demand_section,
                highest_demand_section_volume=highest_demand_section.
                registration_volume,
                lowest_demand_section=lowest_demand_section,
                lowest_demand_section_volume=lowest_demand_section.
                registration_volume,
                csrdv_frac_zero=csrdv_frac_zero,
                csprdv_lognorm_param_shape=fit_shape,
                csprdv_lognorm_param_loc=fit_loc,
                csprdv_lognorm_param_scale=fit_scale,
            )
            add_drop_period = get_or_create_add_drop_period(semester)
            new_demand_distribution_estimate.save(
                add_drop_period=add_drop_period)
            new_demand_distribution_estimate.created_at = updated_at
            new_demand_distribution_estimate.save(
                add_drop_period=add_drop_period)
            cache.set(
                "current_demand_distribution_estimate",
                new_demand_distribution_estimate,
                timeout=(add_drop_period.estimated_end -
                         add_drop_period.estimated_start).total_seconds() //
                ROUGH_MINIMUM_DEMAND_DISTRIBUTION_ESTIMATES,
            )  # set timeout to roughly follow ROUGH_MINIMUM_DEMAND_DISTRIBUTION_ESTIMATES
Beispiel #24
0
def recompute_percent_open(semesters=None, verbose=False, semesters_precomputed=False):
    """
    Recomputes the percent_open field for each section in the given semester(s).

    :param semesters: The semesters argument should be a comma-separated list of string semesters
        corresponding to the semesters for which you want to recompute percent_open fields,
        i.e. "2019C,2020A,2020C" for fall 2019, spring 2020, and fall 2020. It defaults to None,
        in which case only the current semester is used. If you supply the string "all",
        it will recompute for all semesters found in Courses in the db.
        If semesters_precomputed is set to True (non-default), then this argument should
        instead be a list of single string semesters.
    :param semesters_precomputed: If False (default), the semesters argument will expect a raw
        comma-separated string input. If True, the semesters argument will expect a list of
        individual string semesters.
    :param verbose: Set to True if you want this script to print its status as it goes,
        or keep as False (default) if you want the script to work silently.
    """

    current_semester = get_current_semester()
    semesters = (
        semesters if semesters_precomputed else get_semesters(semesters=semesters, verbose=verbose)
    )

    if verbose:
        print(f"Recomputing open percentages for semesters {str(semesters)}...")

    for semester_num, semester in enumerate(semesters):
        with transaction.atomic():
            # We make this command an atomic transaction, so that the database will not
            # be modified unless the entire update for a semester succeeds.

            if verbose:
                print(f"\nProcessing semester {semester}, " f"{(semester_num+1)}/{len(semesters)}.")

            add_drop = get_or_create_add_drop_period(semester)
            add_drop_start = add_drop.estimated_start
            add_drop_end = add_drop.estimated_end

            StatusUpdate.objects.filter(section__course__semester=semester).select_for_update()

            sections = Section.objects.filter(course__semester=semester)
            num_erroneous_updates = 0
            num_total_updates = 0
            for section in sections:
                status_updates = StatusUpdate.objects.filter(
                    section=section, created_at__gt=add_drop_start, created_at__lt=add_drop_end
                ).order_by("created_at")
                num_total_updates += len(status_updates)
                total_open_seconds = 0
                if not status_updates.exists():
                    try:
                        guess_status = (
                            StatusUpdate.objects.filter(
                                section=section, created_at__lte=add_drop_start
                            )
                            .latest("created_at")
                            .new_status
                        )
                    except StatusUpdate.DoesNotExist:
                        guess_status = section.status
                    section.percent_open = float(guess_status == "O")
                else:
                    last_dt = add_drop_start
                    last_status = status_updates.first().old_status
                    for update in status_updates:
                        if last_status != update.old_status:
                            num_erroneous_updates += 1
                        if last_status == "O" and update.new_status != "O":
                            total_open_seconds += (update.created_at - last_dt).total_seconds()
                        last_dt = update.created_at
                        last_status = update.new_status
                    section.percent_open = float(total_open_seconds) / float(
                        (status_updates.last().created_at - add_drop_start).total_seconds()
                    )
                    if section.semester != current_semester:
                        section.percent_open = float(
                            total_open_seconds
                            + int(last_status == "O") * (add_drop_end - last_dt).total_seconds()
                        ) / float((add_drop_end - add_drop_start).total_seconds())
                section.save()
            if verbose:
                print(
                    f"Finished calculating percent_open for {len(sections)} sections from "
                    f"semester {semester}, encountered {num_erroneous_updates} erroneous "
                    f"Status Updates (out of {num_total_updates} total Status Updates)"
                )
    if verbose:
        print(f"Finished recomputing open percentages for semesters {str(semesters)}.")
Beispiel #25
0
def recompute_demand_distribution_estimates(
    semesters=None, semesters_precomputed=False, verbose=False
):
    """
    This script recomputes all PcaDemandDistributionEstimate objects for the given semester(s)
    based on saved Registration objects. In doing so, it also recomputes the registration_volume
    and percent_open fields for all sections in the given semester(s)
    (by calling recompute_registration_volumes and recompute_percent_open).

    :param semesters: The semesters argument should be a comma-separated list of string semesters
        corresponding to the semesters for which you want to recompute demand distribution
        estimate, i.e. "2019C,2020A,2020C" for fall 2019, spring 2020, and fall 2020. It
        defaults to None, in which case only the current semester is used. If you supply the
        string "all", it will recompute for all semesters found in Courses in the db.
        If semesters_precomputed is set to True (non-default), then this argument should
        instead be a list of single string semesters.
    :param semesters_precomputed: If False (default), the semesters argument will expect a raw
        comma-separated string input. If True, the semesters argument will expect a list of
        individual string semesters.
    :param verbose: Set to True if you want this script to print its status as it goes,
        or keep as False (default) if you want the script to work silently.
    """

    current_semester = get_current_semester()
    semesters = (
        semesters if semesters_precomputed else get_semesters(semesters=semesters, verbose=verbose)
    )

    recompute_precomputed_fields(verbose=verbose)
    recompute_registration_volumes(semesters=semesters, semesters_precomputed=True, verbose=verbose)
    recompute_percent_open(semesters=semesters, semesters_precomputed=True, verbose=verbose)

    if verbose:
        print(f"Recomputing demand distribution estimates for semesters {str(semesters)}...")
    for semester_num, semester in enumerate(semesters):
        try:
            validate_add_drop_semester(semester)
        except ValidationError:
            if verbose:
                print(f"Skipping semester {semester} (unsupported kind for stats).")
            continue
        add_drop_period = get_or_create_add_drop_period(semester)
        set_cache = semester == current_semester

        with transaction.atomic():
            # We make this command an atomic transaction, so that the database will not
            # be modified unless the entire update for a semester succeeds.
            # If set_cache is True, we will set the current_demand_distribution_estimate variable
            # in cache

            if verbose:
                print(f"Processing semester {semester}, " f"{(semester_num+1)}/{len(semesters)}.\n")
                print(
                    "Deleting existing PcaDemandDistributionEstimate objects for semester "
                    f"{semester} (so that we can recompute these objects)..."
                )
            PcaDemandDistributionEstimate.objects.filter(
                semester=semester
            ).select_for_update().delete()

            section_id_to_object = dict()  # maps section id to section object (for this semester)
            volume_changes_map = dict()  # maps section id to list of volume changes
            status_updates_map = dict()  # maps section id to list of status updates

            iterator_wrapper = tqdm if verbose else (lambda x: x)
            if verbose:
                print("Indexing relevant sections...")
            for section in iterator_wrapper(
                Section.objects.filter(extra_metrics_section_filters, course__semester=semester)
                .annotate(
                    efficient_semester=F("course__semester"),
                )
                .distinct()
            ):
                section_id_to_object[section.id] = section
                volume_changes_map[section.id] = []
                status_updates_map[section.id] = []

            if verbose:
                print("Computing registration volume changes over time for each section...")
            for registration in iterator_wrapper(
                Registration.objects.filter(section_id__in=section_id_to_object.keys())
                .annotate(section_capacity=F("section__capacity"))
                .select_for_update()
            ):
                section_id = registration.section_id
                volume_changes_map[section_id].append(
                    {"date": registration.created_at, "volume_change": 1}
                )
                deactivated_at = registration.deactivated_at
                if deactivated_at is not None:
                    volume_changes_map[section_id].append(
                        {"date": deactivated_at, "volume_change": -1}
                    )

            if verbose:
                print("Collecting status updates over time for each section...")
            for status_update in iterator_wrapper(
                StatusUpdate.objects.filter(
                    section_id__in=section_id_to_object.keys(), in_add_drop_period=True
                ).select_for_update()
            ):
                section_id = status_update.section_id
                status_updates_map[section_id].append(
                    {
                        "date": status_update.created_at,
                        "old_status": status_update.old_status,
                        "new_status": status_update.new_status,
                    }
                )

            if verbose:
                print("Joining updates for each section and sorting...")
            all_changes = sorted(
                [
                    {"type": "status_update", "section_id": section_id, **update}
                    for section_id, status_updates_list in status_updates_map.items()
                    for update in status_updates_list
                ]
                + [
                    {"type": "volume_change", "section_id": section_id, **change}
                    for section_id, changes_list in volume_changes_map.items()
                    for change in changes_list
                ],
                key=lambda x: (x["date"], int(x["type"] != "status_update")),
                # put status updates first on matching dates
            )

            # Initialize variables to be maintained in our main all_changes loop
            latest_popularity_dist_estimate = None
            registration_volumes = {section_id: 0 for section_id in section_id_to_object.keys()}
            demands = {section_id: 0 for section_id in section_id_to_object.keys()}

            # Initialize section statuses
            section_status = {section_id: None for section_id in section_id_to_object.keys()}
            for change in all_changes:
                section_id = change["section_id"]
                if change["type"] == "status_update":
                    if section_status[section_id] is None:
                        section_status[section_id] = change["old_status"]

            percent_through = (
                add_drop_period.get_percent_through_add_drop(timezone.now())
                if semester == current_semester
                else 1
            )
            if percent_through == 0:
                if verbose:
                    print(
                        f"Skipping semester {semester} because the add/drop period "
                        f"hasn't started yet."
                    )
                continue
            distribution_estimate_threshold = sum(
                len(changes_list) for changes_list in volume_changes_map.values()
            ) // (ROUGH_MINIMUM_DEMAND_DISTRIBUTION_ESTIMATES * percent_through)
            num_changes_without_estimate = 0

            if verbose:
                print(f"Creating PcaDemandDistributionEstimate objects for semester {semester}...")
            for change in iterator_wrapper(all_changes):
                section_id = change["section_id"]

                if section_status[section_id] is None:
                    section_status[section_id] = (
                        "O" if section_id_to_object[section_id].percent_open > 0.5 else "C"
                    )
                if change["type"] == "status_update":
                    section_status[section_id] = change["new_status"]
                    continue

                date = change["date"]
                volume_change = change["volume_change"]
                registration_volumes[section_id] += volume_change
                demands[section_id] = (
                    registration_volumes[section_id] / section_id_to_object[section_id].capacity
                )

                max_id = max(demands.keys(), key=lambda x: demands[x])
                min_id = min(demands.keys(), key=lambda x: demands[x])
                if (
                    latest_popularity_dist_estimate is None
                    or section_id == latest_popularity_dist_estimate.highest_demand_section_id
                    or section_id == latest_popularity_dist_estimate.lowest_demand_section_id
                    or latest_popularity_dist_estimate.highest_demand_section_id != max_id
                    or latest_popularity_dist_estimate.lowest_demand_section_id != min_id
                    or num_changes_without_estimate >= distribution_estimate_threshold
                ):
                    num_changes_without_estimate = 0
                    closed_sections_demand_values = np.asarray(
                        [val for sec_id, val in demands.items() if section_status[sec_id] == "C"]
                    )
                    csrdv_frac_zero, fit_shape, fit_loc, fit_scale = (None, None, None, None)
                    if len(closed_sections_demand_values) > 0:
                        closed_sections_positive_demand_values = closed_sections_demand_values[
                            np.where(closed_sections_demand_values > 0)
                        ]
                        csrdv_frac_zero = 1 - len(closed_sections_positive_demand_values) / len(
                            closed_sections_demand_values
                        )
                        if len(closed_sections_positive_demand_values) > 0:
                            fit_shape, fit_loc, fit_scale = stats.lognorm.fit(
                                closed_sections_positive_demand_values
                            )

                    latest_popularity_dist_estimate = PcaDemandDistributionEstimate(
                        created_at=date,
                        semester=semester,
                        highest_demand_section=section_id_to_object[max_id],
                        highest_demand_section_volume=registration_volumes[max_id],
                        lowest_demand_section=section_id_to_object[min_id],
                        lowest_demand_section_volume=registration_volumes[min_id],
                        csrdv_frac_zero=csrdv_frac_zero,
                        csprdv_lognorm_param_shape=fit_shape,
                        csprdv_lognorm_param_loc=fit_loc,
                        csprdv_lognorm_param_scale=fit_scale,
                    )
                    latest_popularity_dist_estimate.save(add_drop_period=add_drop_period)
                    latest_popularity_dist_estimate.created_at = date
                    latest_popularity_dist_estimate.save(add_drop_period=add_drop_period)
                else:
                    num_changes_without_estimate += 1

            if set_cache:
                if latest_popularity_dist_estimate is not None:
                    cache.set(
                        "current_demand_distribution_estimate",
                        latest_popularity_dist_estimate,
                        timeout=None,
                    )
                else:
                    cache.set("current_demand_distribution_estimate", None, timeout=None)

    if verbose:
        print(
            "Finished recomputing demand distribution estimate and section registration_volume "
            f"fields for semesters {str(semesters)}."
        )
Beispiel #26
0
def recommend_courses_view(request):
    """
    This route will optionally take in current and past courses. In order to
    make recommendations solely on the user's courses in past and current PCP schedules, simply
    omit both the curr_courses and past_courses fields in your request.
    Otherwise, in order to specify past and current courses,
    include a "curr-courses" and/or "past_courses" attribute in the request that should each contain
    an array of string course full codes of the form DEPT-XXX (e.g. CIS-120).
    If successful, this route will return a list of recommended courses, with the same schema
    as the List Courses route, starting with the most relevant course. The number of
    recommended courses returned can be specified using the n_recommendations attribute in the
    request body, but if this attribute is omitted, the default will be 5.
    If n_recommendations is not an integer, or is <=0, a 400 will be returned.
    If curr_courses contains repeated courses or invalid courses or non-current courses, a
    400 will be returned.
    If past_courses contains repeated courses or invalid courses, a 400 will be returned.
    If curr_courses and past_courses contain overlapping courses, a 400 will be returned.
    """

    user = request.user
    curr_courses = request.data.get("curr_courses", None)
    curr_courses = curr_courses if curr_courses is not None else []
    past_courses = request.data.get("past_courses", None)
    past_courses = past_courses if past_courses is not None else []
    n_recommendations = request.data.get("n_recommendations", 5)

    # input validation
    try:
        n_recommendations = int(n_recommendations)
    except ValueError:
        return Response(
            f"n_recommendations: {n_recommendations} is not int",
            status=status.HTTP_400_BAD_REQUEST,
        )
    if n_recommendations <= 0:
        return Response(
            f"n_recommendations: {n_recommendations} <= 0",
            status=status.HTTP_400_BAD_REQUEST,
        )

    course_clusters = retrieve_course_clusters()

    (
        cluster_centroids,
        clusters,
        curr_course_vectors_dict,
        past_course_vectors_dict,
    ) = course_clusters

    if curr_courses or past_courses:
        try:
            user_vector, user_courses = vectorize_user_by_courses(
                clean_course_input(curr_courses),
                clean_course_input(past_courses),
                curr_course_vectors_dict,
                past_course_vectors_dict,
            )
        except ValueError as e:
            return Response(
                str(e),
                status=status.HTTP_400_BAD_REQUEST,
            )
    else:
        user_vector, user_courses = vectorize_user(
            user, curr_course_vectors_dict, past_course_vectors_dict
        )

    recommended_course_codes = recommend_courses(
        curr_course_vectors_dict,
        cluster_centroids,
        clusters,
        user_vector,
        user_courses,
        n_recommendations,
    )

    queryset = Course.with_reviews.filter(
        semester=get_current_semester(), full_code__in=recommended_course_codes
    )
    queryset = queryset.prefetch_related(
        Prefetch(
            "sections",
            Section.with_reviews.all()
            .filter(credits__isnull=False)
            .filter(Q(status="O") | Q(status="C"))
            .distinct()
            .prefetch_related("course", "meetings__room"),
        )
    )

    return Response(
        CourseListSerializer(
            queryset,
            many=True,
        ).data,
        status=status.HTTP_200_OK,
    )
Beispiel #27
0
    def handle(self, *args, **kwargs):
        root_logger = logging.getLogger("")
        root_logger.setLevel(logging.DEBUG)

        src = kwargs["src"]
        semesters = kwargs["semester"]
        import_all = kwargs["import_all"]
        s3_bucket = kwargs["s3_bucket"]
        is_zip_file = kwargs["zip"] or s3_bucket is not None
        summary_file = kwargs["summary_file"]
        import_details = kwargs["import_details"]
        import_descriptions = kwargs["import_descriptions"]
        show_progress_bar = kwargs["show_progress_bar"]
        force = kwargs["force"]

        if src is None:
            raise CommandError("source directory or zip must be defined.")

        if semesters is None and not import_all:
            raise CommandError(
                "Must define semester with (-s) or explicitly import all semesters with (-a)."
            )
        if semesters is not None:
            current_semester = get_current_semester()
            for semester in semesters:
                if semester == current_semester:
                    raise ValueError(
                        f"You cannot import reviews for the current semester ({current_semester}). "
                        f"Did you forget to update the SEMESTER option in the Django admin console?"
                    )

        if s3_bucket is not None:
            fp = "/tmp/pcrdump.zip"
            # Make sure AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY
            # are loaded in as environment variables.
            print(f"downloading zip from s3 bucket: {src}")
            S3_client.download_file(s3_bucket, src, fp)
            src = fp

        print(
            "This script is an atomic transaction, meaning the database will only be "
            "modified if the whole script succeeds.")

        with transaction.atomic(
        ):  # Only commit changes if the whole script succeeds
            # TODO: When we import details and crosslistings, get their data here too.
            tables_to_get = [summary_file]
            idx = 1
            detail_idx = -1
            if import_details:
                tables_to_get.append(ISC_RATING_TABLE)
                detail_idx = idx
                idx += 1

            description_idx = -1
            if import_descriptions:
                tables_to_get.append(ISC_DESC_TABLE)
                description_idx = idx
                idx += 1

            files = self.get_files(src, is_zip_file, tables_to_get)

            summary_fo = files[0]
            print("Loading summary file...")
            summary_rows = load_sql_dump(summary_fo,
                                         progress=show_progress_bar,
                                         lazy=False)
            gc.collect()
            print("SQL parsed and loaded!")

            if not import_all:
                full_len = len(summary_rows)
                summary_rows = [
                    r for r in summary_rows if r["TERM"] in semesters
                ]
                gc.collect()
                filtered_len = len(summary_rows)
                print(f"Filtered {full_len} rows down to {filtered_len} rows.")

            semesters = sorted(list({r["TERM"] for r in summary_rows}))
            gc.collect()
            to_delete = Review.objects.filter(
                section__course__semester__in=semesters)
            delete_count = to_delete.count()

            if delete_count > 0:
                if not force:
                    prompt = input(
                        f"This import will overwrite {delete_count} rows that have already been"
                        + "imported. Continue? (y/N) ")
                    if prompt.strip().upper() != "Y":
                        print("Aborting...")
                        return 0

                print(
                    f"Deleting {delete_count} existing reviews for semesters from the database..."
                )
                to_delete.delete()

            print(f"Importing reviews for semester(s) {', '.join(semesters)}")
            stats = import_summary_rows(summary_rows, show_progress_bar)
            print(stats)

            gc.collect()

            if import_details:
                print("Loading details file...")
                stats = import_ratings_rows(*load_sql_dump(files[detail_idx]),
                                            semesters, show_progress_bar)
                print(stats)

            gc.collect()

            if import_descriptions:
                print("Loading descriptions file...")
                stats = import_description_rows(
                    *load_sql_dump(files[description_idx]),
                    None if import_all else semesters,
                    show_progress_bar,
                )
                print(stats)

            self.close_files(files)
            # invalidate cached views
            print("Invalidating cache...")
            del_count = clear_cache()
            print(
                f"{del_count if del_count >=0 else 'all'} cache entries removed."
            )

            gc.collect()

            print(
                f"Recomputing stats for semester(s) {', '.join(semesters)}...")
            recompute_stats(
                semesters=semesters,
                semesters_precomputed=True,
                verbose=True,
            )

        return 0