Beispiel #1
0
    def __init__(self, args):
        self._args = args

        self.out_path_root = args.out_dir
        os.makedirs(self.out_path_root, exist_ok=True)

        self.state_cfg = {
            'name': args.state_name,
            'code': args.state_code,
            'type': args.state_type
        }
        self.hier_source = args.hier_source

        self.workers = []
        if args.xml_out:
            self.workers.append(XmlWorker(self.out_path_root))

        self.subject_source = args.subject_source

        # assessment package settings
        self.pkg_source = args.pkg_source
        self.gen_sum = args.gen_sum
        self.gen_ica = args.gen_ica
        self.gen_iab = args.gen_iab
        self.gen_item = args.gen_item

        self.id_gen = IDGen()
def assign_student_groups(school, grade, grade_students, id_gen: IDGen,
                          subject_codes: [str]):
    """
    Assign students to groups.
    Each student is assigned to one group per subject. The groups assigned correspond
    to the subjects, so group_1 -> subjects[0], group_2 -> subjects[1], etc. The student
    group is overwritten if there is already one set.
    Currently, there are no "staff_based" groups assigned.

    @param school: The school
    @param grade: The grade in the school to assign groupings to.
    @param grade_students: The students currently in the grade for this school
    @param id_gen: The IDGen instance, used to make groups unique across multiple schools
    @param subject_codes: The list of subject codes
    """
    num_groups = int(ceil(len(grade_students) / school.group_size))
    for subject_code in subject_codes:
        # generate lists of subgroups for each subject corresponding to school's group size
        subgroups = []
        for _ in range(num_groups):
            group_id = id_gen.get_group_id('group')
            group_name = 'G' + str(grade) + '-' + str(group_id)
            subgroups.append((group_id, group_name))
        # assign each student a (randomly selected) group for this subject
        for grade_student in grade_students:
            (group_id, group_name) = random.choice(subgroups)
            grade_student.set_group(
                StudentGroup(subject_code, group_id, group_name))
def generate_teaching_staff_member(school: School,
                                   id_gen: IDGen = IDGen,
                                   sub_class=None):
    """Generate a teacher in a given school.

    :param school: The school the teacher teaches in
    :param id_gen: id generator
    :param sub_class: The sub-class of teaching staff to create (if requested, must be subclass of TeachingStaff)
    :returns: The staff member
    """
    s = TeachingStaff() if sub_class is None else sub_class()
    s.guid = id_gen.get_uuid()
    s.gender = random.choice(['male', 'female'])
    s.first_name, s.middle_name, s.last_name = name_gen.generate_person_name(
        s.gender)
    s.school = school
    return s
def generate_district_staff_member(district: District,
                                   id_gen: IDGen = IDGen,
                                   sub_class=None):
    """Generate a district-level staff member.

    :param district: The district the staff member belongs to
    :param id_gen: id generator
    :param sub_class: The sub-class of district staff to create (if requested, must be subclass of DistrictStaff)
    :return: The staff member
    """
    s = DistrictStaff() if sub_class is None else sub_class()
    s.guid = id_gen.get_uuid()
    s.gender = random.choice(['male', 'female'])
    s.first_name, s.middle_name, s.last_name = name_gen.generate_person_name(
        s.gender)
    s.district = district
    return s
def generate_class(name, subject_code, school: School, sub_class=None):
    """Generate a class for a subject in a school.

    :param name: The name of the class
    :param subject_code: The subject this class covers
    :param school: The school containing the class
    :param sub_class: The sub-class of class to create (if requested, must be subclass of Class)
    :returns: A class object
    """
    # Create the class
    c = Class() if sub_class is None else sub_class()
    c.guid = id_gen.get_uuid()
    c.school = school
    c.name = name
    c.subject_code = subject_code

    return c
Beispiel #6
0
def _extract_state(row: dict, cur: State) -> (State, bool):
    s = State()
    s.type_str = row['state_type']
    s.id = row['state_id']
    s.code = row['state_code']
    s.name = row['state_name']

    if cur and cur.type_str == s.type_str and cur.id == s.id and cur.code == s.code and cur.name == s.name:
        return cur, False

    if s.type_str not in state_config.STATE_TYPES:
        raise ValueError("State type '" + s.type_str + "' not found")

    s.config = state_config.STATE_TYPES[s.type_str]
    s.demo_config = pop_config.DEMOGRAPHICS[s.config['demographics']]
    s.guid = IDGen.get_uuid()

    return s, True
Beispiel #7
0
def _extract_district(row: dict, cur: District,
                      state: State) -> (District, bool):
    d = District()
    d.type_str = row['district_type']
    d.id = row['district_id']
    d.name = row['district_name']

    if cur and cur.type_str == d.type_str and cur.id == d.id and cur.name == d.name:
        return cur, False

    if d.type_str not in hier_config.DISTRICT_TYPES:
        raise ValueError("District type '" + d.type_str + "' not found")

    d.config = hier_config.DISTRICT_TYPES[d.type_str]
    d.demo_config = state.demo_config
    d.state = state
    d.guid = IDGen.get_uuid()

    return d, True
def generate_enrollment(section: Section,
                        student: Student,
                        grade=None,
                        sub_class=None):
    """Generate an enrollment record linking a student with a section.

    :param section: The section the student is enrolled in
    :param student: The student enrolled in the section
    :param grade: The grade of the student at the time of enrollment (defaults to student current grade)
    :param sub_class: The sub-class of enrollment to create (if requested, must be subclass of Enrollment)
    :returns: An enrollment object
    """
    # Create the enrollment
    e = Enrollment() if sub_class is None else sub_class()
    e.guid = id_gen.get_uuid()
    e.section = section
    e.student = student
    e.grade = grade if grade is not None else student.grade

    return e
Beispiel #9
0
def _extract_school(row: dict, cur: School,
                    district: District) -> (School, bool):
    s = School()
    s.type_str = row['school_type']
    s.id = row['school_id']
    s.name = row['school_name']

    if cur and cur.type_str == s.type_str and cur.id == s.id and cur.name == s.name:
        return cur, False

    if s.type_str not in hier_config.SCHOOL_TYPES:
        raise ValueError("School type '" + s.type_str + "' not found")

    s.config = hier_config.SCHOOL_TYPES[s.type_str]
    s.demo_config = district.demo_config
    s.district = district
    s.guid = IDGen.get_uuid()
    s.takes_interim_asmts = str(
        row['school_interims']).lower() in ['1', 't', 'y', 'true', 'yes']

    return s, True
Beispiel #10
0
def generate_section(
        clss: Class,
        name,
        grade,
        year=datetime.datetime.now().year,
        most_recent=False,
        teachers_for_section=general_enroll_config.TEACHERS_PER_SECTION,
        sub_class=None):
    """Generate a section for a given class. This will also generate the necessary number of teaching staff for the
    section.

    :param clss: The class to create a section for
    :param name: The name of the section
    :param grade: The grade of students for the section
    :param year: The academic year this section is in
    :param most_recent: If the section is the most recent section for this grade and class
    :param teachers_for_section: The number of teachers to generate for this section
    :param sub_class: The sub-class of section to create (if requested, must be subclass of Section)
    :returns: A section object
    """
    # Create the section
    s = Section() if sub_class is None else sub_class()
    s.guid = id_gen.get_uuid()
    s.clss = clss
    s.name = name
    s.grade = grade

    s.from_date = datetime.date(year, 9, 1)
    s.most_recent = most_recent
    if not most_recent:
        s.to_date = datetime.date(year + 1, 6, 1)

    # Generate teaching staff
    for i in range(teachers_for_section):
        s.teachers.append(
            general_pop_gen.generate_teaching_staff_member(clss.school))

    return s
Beispiel #11
0
def test_guid():
    idg = IDGen()
    assert re.match(GUID_REGEX, idg.get_uuid())
Beispiel #12
0
def test_rec_id_from_two_types_bigger():
    idg = IDGen()
    assert idg.get_rec_id('some_object_type_1') == 1000000000
    assert idg.get_rec_id('some_object_type_2') == 1000000000
Beispiel #13
0
Unit tests for the hierarchy module.

"""

import datetime
import re

import pytest

import datagen.generators.hierarchy as hier_gen
from datagen.model.district import District
from datagen.model.school import School
from datagen.model.state import State
from datagen.util.id_gen import IDGen

ID_GEN = IDGen()

GUID_REGEX = '[a-f0-9]{8}(-[a-f0-9]{4}){3}-[a-f0-9]{12}'


def test_generate_state():
    # Create object
    state = hier_gen.generate_state('devel', 'Example State', 'ES', ID_GEN)

    # Tests
    assert isinstance(state, State)
    assert state.id == '00'
    assert state.name == 'Example State'
    assert state.code == 'ES'

Beispiel #14
0
def generate_student(
        school: School,
        grade,
        id_gen: IDGen,
        acad_year,
        subject_codes: [str],
        military_connected_dist=pop_config.MILITARY_CONNECTED_DIST,
        has_email_address_rate=pop_config.HAS_EMAIL_ADDRESS_RATE,
        has_physical_address_rate=pop_config.HAS_PHYSICAL_ADDRESS_RATE,
        has_address_line_2_rate=pop_config.HAS_ADDRESS_LINE_2_RATE):
    """
    Generate a student.

    :param school: The school the student belongs to
    :param grade: The grade the student belongs to
    :param id_gen: id generator
    :param acad_year: The current academic year this student is being created for (optional, defaults to your machine
                      clock's current year)
    :param subject_codes: list of subject codes (for generating student capability)
    :param has_email_address_rate: The rate at which to generate an email address for the student
    :param has_physical_address_rate: The rate at which to generate a physical address for the student
    :param has_address_line_2_rate: The rate at which to generate a line two address for the student
    :return: The student
    """
    # Build student basics
    s = Student()
    s.guid = id_gen.get_uuid()
    s.grade = grade
    s.school = school
    s.dob = _determine_student_dob(s.grade, acad_year)

    # Determine demographics
    (gender, ethnicities, iep, sec504, lep,
     ed) = _determine_demographics(school.demo_config[str(grade)])
    s.gender = gender
    s.prg_iep = iep
    s.prg_sec504 = sec504
    s.prg_lep = lep
    s.prg_econ_disad = ed

    if 'amer_ind' in ethnicities:
        s.eth_amer_ind = True
    if 'black' in ethnicities:
        s.eth_black = True
    if 'hispanic' in ethnicities:
        s.eth_hispanic = True
    if 'asian' in ethnicities:
        s.eth_asian = True
    if 'filipino' in ethnicities:
        s.eth_filipino = True
    if 'pac_isl' in ethnicities:
        s.eth_pacific = True
    if 'white' in ethnicities:
        s.eth_white = True
    if 'multi' in ethnicities:
        s.eth_multi = True
    if 'none' in ethnicities:
        s.eth_none = True

    # Create the name
    s.first_name, s.middle_name, s.last_name = name_gen.generate_person_name(
        s.gender)

    # Create physical and email addresses
    if random.random() < has_email_address_rate:
        # Email address (first.last.#@example.com)
        s.email = s.first_name + '.' + s.last_name + '.' + str(
            random.randint(1, 5000)) + '@example.com'

    if random.random() < has_physical_address_rate:
        s.address_line_1 = name_gen.generate_street_address_line_1()
        if random.random() < has_address_line_2_rate:
            s.address_line_2 = name_gen.generate_street_address_line_2()
        s.address_city = name_gen.generate_street_address_city()
        s.address_zip = random.randint(10000, 99999)

    # Get the demographic config
    demo_config = school.demo_config[str(grade)]

    # Set other specifics
    s.state = school.district.state
    s.district = school.district
    s.id = id_gen.get_student_id()
    s.external_ssid = hashlib.md5(s.id.encode('utf-8')).hexdigest()
    s.rec_id = id_gen.get_rec_id('student')
    s.school_entry_date = _generate_date_enter_us_school(s.grade, acad_year)
    s.derived_demographic = _generate_derived_demographic(s)
    s.prg_migrant = determine_demo_option_selected(demo_config['migrant'])
    s.prg_idea = determine_demo_option_selected(demo_config['idea'])
    s.prg_primary_disability = random.choice(cfg.PRG_DISABILITY_TYPES)
    s.military_connected = _pick_demo_option(military_connected_dist)

    # None-out primary disability if it doesn't make sense
    if not s.prg_iep and not s.prg_idea and not s.prg_sec504:
        s.prg_primary_disability = None

    # Set language items
    _set_lang_items(s, acad_year)

    # generate and store the student's capability based on demographics and school adjustment
    adj = hier_config.SCHOOL_TYPES[school.type_str]['students'].get(
        'adjust_pld', 0.0)
    for subject_code in subject_codes:
        # hack to make performance in EL-related subjects reflect student's english-learner status
        subject_adj = adj
        if get_el_adjacent(
                subject_code
        ) and s.elas == 'EL' and cfg.LEP_PROFICIENCY_LEVELS.index(
                s.lang_prof_level) < 3:
            subject_adj += 0.4 * (
                cfg.LEP_PROFICIENCY_LEVELS.index(s.lang_prof_level) - 3)
        generator, demo = _get_level_demographics(s, subject_code)
        s.capability[subject_code] = random_capability(
            generator.distribution(demo), subject_adj)

    return s
Beispiel #15
0
def test_student_id():
    idg = IDGen()
    for _ in range(0, 10):
        assert re.match('^[1-9][0-9]{9}$', idg.get_student_id())
Beispiel #16
0
def test_school_id():
    idg = IDGen()
    assert re.match('^8880012[0-9]{7}$', idg.get_school_id('88800120000000'))
    for _ in range(0, 10):
        assert re.match('^0603465[0-9]{5}$', idg.get_school_id('0603465'))
Beispiel #17
0
def test_district_id():
    idg = IDGen()

    for _ in range(0, 10):
        assert re.match('^06[0-9]{5}$', idg.get_district_id('06'))
Beispiel #18
0
def generate_assessment_outcome(student: Student, assessment: Assessment,
                                id_gen: IDGen):
    """Generate an assessment outcome for a given student.

    :param student: The student to create the outcome for
    :param assessment: The assessment to create the outcome for
    :param id_gen: ID generator
    :returns: The assessment outcome
    """
    # Create the object
    ao = AssessmentOutcome()
    ao.guid = IDGen.get_uuid()
    ao.student = student
    ao.assessment = assessment

    # Set common behaviors
    # Be careful, there is some order dependency that mean most of this happens in the sub-generators
    ao.rec_id = id_gen.get_rec_id('assessment_outcome')

    # Create legacy accommodations details
    # hack for custom subjects
    subject_code = assessment.subject.code if assessment.subject.code in [
        'Math', 'ELA'
    ] else 'ELA'
    ao.acc_asl_video_embed = _pick_accommodation_code(
        cfg.LEGACY_ACCOMMODATIONS['acc_asl_video_embed'][subject_code])
    ao.acc_print_on_demand_items_nonembed = _pick_accommodation_code(
        cfg.LEGACY_ACCOMMODATIONS['acc_print_on_demand_items_nonembed']
        [subject_code])
    ao.acc_noise_buffer_nonembed = _pick_accommodation_code(
        cfg.LEGACY_ACCOMMODATIONS['acc_noise_buffer_nonembed'][subject_code])
    ao.acc_braile_embed = _pick_accommodation_code(
        cfg.LEGACY_ACCOMMODATIONS['acc_braile_embed'][subject_code])
    ao.acc_closed_captioning_embed = _pick_accommodation_code(
        cfg.LEGACY_ACCOMMODATIONS['acc_closed_captioning_embed'][subject_code])
    ao.acc_text_to_speech_embed = _pick_accommodation_code(
        cfg.LEGACY_ACCOMMODATIONS['acc_text_to_speech_embed'][subject_code])
    ao.acc_abacus_nonembed = _pick_accommodation_code(
        cfg.LEGACY_ACCOMMODATIONS['acc_abacus_nonembed'][subject_code])
    ao.acc_alternate_response_options_nonembed = _pick_accommodation_code(
        cfg.LEGACY_ACCOMMODATIONS['acc_alternate_response_options_nonembed']
        [subject_code])
    ao.acc_calculator_nonembed = _pick_accommodation_code(
        cfg.LEGACY_ACCOMMODATIONS['acc_calculator_nonembed'][subject_code])
    ao.acc_multiplication_table_nonembed = _pick_accommodation_code(
        cfg.LEGACY_ACCOMMODATIONS['acc_multiplication_table_nonembed']
        [subject_code])
    ao.acc_print_on_demand_nonembed = _pick_accommodation_code(
        cfg.LEGACY_ACCOMMODATIONS['acc_asl_video_embed'][subject_code])
    ao.acc_read_aloud_nonembed = _pick_accommodation_code(
        cfg.LEGACY_ACCOMMODATIONS['acc_read_aloud_nonembed'][subject_code])
    ao.acc_scribe_nonembed = _pick_accommodation_code(
        cfg.LEGACY_ACCOMMODATIONS['acc_scribe_nonembed'][subject_code])
    ao.acc_speech_to_text_nonembed = _pick_accommodation_code(
        cfg.LEGACY_ACCOMMODATIONS['acc_speech_to_text_nonembed'][subject_code])
    ao.acc_streamline_mode = _pick_accommodation_code(
        cfg.LEGACY_ACCOMMODATIONS['acc_streamline_mode'][subject_code])

    # Create real accommodations based on assessment and other data.
    # Yeah, this should be driven by configuration at some point but for now, let's get a couple emitted ...
    # FYI, student disability codes:
    # DB (Deaf-blindness)
    # HI (Hearing impairment)
    # MD (multiple disabilities)
    # SLI (speech or language impairment)
    # VI (visual impairment)
    if 'AmericanSignLanguage' in assessment.accommodations and student.prg_primary_disability in (
            'DB', 'HI', 'MD'):
        ao.accommodations.append(
            ('AmericanSignLanguage', 'TDS_ASL1', 'Show ASL videos'))
    if 'Braille' in assessment.accommodations and student.prg_primary_disability in (
            'DB', 'MD', 'VI'):
        ao.accommodations.append(('BrailleType', 'TDS_BT_UCT', 'UEB'))
    if 'Calculator' in assessment.accommodations:
        ao.accommodations.append(
            ('Calculator', 'TDS_CalcBasic', 'Calculator on'))
        ao.accommodations.append(
            ('Non-Embedded Accommodations', 'NEA_Calc', 'Calculator'))
    if 'Spanish' in assessment.accommodations and student.lang_code == 'spa' and student.prg_lep:
        ao.accommodations.append(('Language', 'ESN', 'Spanish'))
        ao.accommodations.append(
            ('Translation', 'TDS_WL_ESNGlossary', 'Spanish'))

    return ao
Beispiel #19
0
class WorkerManager(Worker):
    def __init__(self, args):
        self._args = args

        self.out_path_root = args.out_dir
        os.makedirs(self.out_path_root, exist_ok=True)

        self.state_cfg = {
            'name': args.state_name,
            'code': args.state_code,
            'type': args.state_type
        }
        self.hier_source = args.hier_source

        self.workers = []
        if args.xml_out:
            self.workers.append(XmlWorker(self.out_path_root))

        self.subject_source = args.subject_source

        # assessment package settings
        self.pkg_source = args.pkg_source
        self.gen_sum = args.gen_sum
        self.gen_ica = args.gen_ica
        self.gen_iab = args.gen_iab
        self.gen_item = args.gen_item

        self.id_gen = IDGen()

    def cleanup(self):
        for worker in self.workers:
            worker.cleanup()

    def prepare(self):
        for worker in self.workers:
            worker.prepare()

    def run(self):
        state, districts, schools = self.__hierarchy()

        if self.subject_source == 'generate' or self.subject_source == 'default':
            subjects = generate_default_subjects()
        else:
            subjects = load_subjects(self.subject_source)
        if len(subjects) == 0:
            print('No subject definitions found')
            return

        assessments = load_assessments(self.pkg_source, subjects, self.gen_sum,
                                       self.gen_ica, self.gen_iab,
                                       self.gen_item)
        if len(assessments) == 0:
            print('No assessment packages found')
            return

        # generate and emit inferred command line from args
        cl = ' '.join([('--' + k + ' ' + str(v))
                       for (k, v) in vars(self._args).items()])
        print(cl)
        with open(os.path.join(self.out_path_root, 'args.txt'), "a") as f:
            f.write(cl)

        # Process the state
        self.__generate_state_data(state, districts, schools, assessments)

    def __hierarchy(self):
        """
        Generate or load the hierarchy of state, districts, schools

        :return:
        """
        if self.hier_source == 'generate':
            state, districts, schools = hier_util.generate_hierarchy(
                self.state_cfg['type'], self.state_cfg['name'],
                self.state_cfg['code'], self.id_gen)
        else:
            state, districts, schools = hier_util.read_hierarchy(
                self.hier_source)

        # call hook for workers to write hierarchies
        hierarchies = [
            hier_gen.generate_institution_hierarchy(school.district.state,
                                                    school.district, school,
                                                    self.id_gen)
            for school in schools
        ]
        for worker in self.workers:
            worker.write_hierarchies(hierarchies)
        del hierarchies

        return state, districts, schools

    def __years(self, assessments: [Assessment]):
        """
        Return the sorted list of years represented by assessment packages.
        :param assessments: assessments
        :return: sorted list of years, e.g. [2015, 2016, 2017]
        """
        return sorted(set(map(lambda asmt: asmt.year, assessments)))

    def __subject_codes(self, assessments: [Assessment]):
        """
        Return the sorted list of subject codes represented by assessment packages.
        :param assessments: assessments
        :return: sorted list of subject codes, e.g. ['ELA', 'Math']
        """
        return sorted(set(map(lambda asmt: asmt.subject.code, assessments)))

    def __grades(self, assessments: [Assessment]):
        """
        Return the set of grades represented by assessment packages.
        :param assessments: assessments
        :return: set of grades, e.g. {1, 2, 6}
        """
        return set(map(lambda asmt: asmt.grade, assessments))

    def __generate_state_data(self, state: State, districts: [District],
                              schools: [School], assessments: [Assessment]):
        """
        Generate an entire data set for a single state.

        @param state: State to generate data for
        """
        print('Creating results for state: {}'.format(state.name))

        # build registration system by years
        rs_by_year = self.__build_registration_system(
            self.__years(assessments))

        # Build the districts
        student_avg_count = 0
        student_unique_count = 0
        for district in districts:
            print('\nCreating results for district {} ({} District)'.format(
                district.name, district.type_str))

            # collect schools for the district
            district_schools = [s for s in schools if s.district == district]

            # Generate the district data set
            avg_year, unique = self.__generate_district_data(
                district_schools, rs_by_year, assessments)

            # Print completion of district
            print(
                'District results created with average of {} students/year and {} total unique'
                .format(avg_year, unique))
            student_avg_count += avg_year
            student_unique_count += unique

        # Print completion of state
        print(
            'State results created with average of {} students/year and {} total unique'
            .format(student_avg_count, student_unique_count))

    def __build_registration_system(self, years):
        """"
        Build the registration system that will be used during the data generation run.

        @param years: The years for which data will be generated
        @returns: A list of year for the registration systems that was created
        """
        # Validate years
        if len(years) == 0:
            raise ValueError('Number of specified years is zero')

        # Build the registration systems for every year
        rs_by_year = {}
        start_year = years[0] - 1
        # Build the original system
        rs = hier_gen.generate_registration_system(
            start_year,
            str(start_year - 1) + '-02-25', self.id_gen)

        # Update it over every year
        for year in years:
            # Update the system
            rs.academic_year = year
            rs.extract_date = str(year - 1) + '-02-27'
            rs_by_year[year] = copy.deepcopy(rs)

            for worker in self.workers:
                worker.write_student_registration_config(year, rs)

        # Return the generated GUIDs
        return rs_by_year

    def __generate_district_data(self, schools: [School],
                                 reg_sys_by_year: {str: RegistrationSystem},
                                 assessments: [Assessment]):
        """
        Generate an entire data set for all schools in a single district.

        @param schools: schools for the district
        @param assessments: Dictionary of all assessment objects
        """
        # Sort the schools
        schools_by_grade = hier_gen.sort_schools_by_grade(schools)

        # Begin processing the years for data
        unique_students = {}
        students = {}
        student_count = 0

        # get range of years from assessment packages
        years = self.__years(assessments)
        print('School years: {}'.format(years))

        # start with "standard" SB grades and add any grade found in the assessments
        hierarchy_grades = {3, 4, 5, 6, 7, 8, 11}
        hierarchy_grades.update(self.__grades(assessments))
        print('Hierarchy grades: {}'.format(hierarchy_grades))

        # calculate the progress bar max and start the progress
        progress_max = len(
            hier_gen.set_up_schools_with_grades(schools,
                                                hierarchy_grades)) * len(years)
        bar = pyprind.ProgBar(
            progress_max,
            stream=sys.stdout,
            title='Generating assessments outcome for schools')

        for year in years:
            # Prepare output file names
            reg_system = reg_sys_by_year[year]

            # Set up a dictionary of schools and their grades
            schools_with_grades = hier_gen.set_up_schools_with_grades(
                schools, hierarchy_grades)

            # Advance the students forward in the grades
            for guid, student in students.items():
                # Assign the registration system and bump up the record ID
                student.reg_sys = reg_system
                student.rec_id = self.id_gen.get_rec_id('student')

                # Move the student forward (false from the advance method means the student disappears)
                # If the student is now in a grade that isn't a concern (i.e. no assessments) leave them out
                if pop_gen.advance_student(student, schools_by_grade):
                    if student.grade in schools_with_grades[student.school]:
                        schools_with_grades[student.school][
                            student.grade].append(student)

            # With the students moved around, we will re-populate empty grades
            # and create assessments with outcomes for the students
            for school, grades in schools_with_grades.items():
                # Process the whole school
                student_count += self.__process_school(grades, school,
                                                       students,
                                                       unique_students,
                                                       reg_system, year,
                                                       assessments)
                bar.update()

        unique_student_count = len(unique_students)

        # Some explicit garbage collection
        del schools_by_grade
        del students
        del unique_students

        # Return the average student count
        return int(student_count // len(years)), unique_student_count

    def __process_school(self, grades, school, students, unique_students,
                         reg_system: RegistrationSystem, year,
                         assessments: [Assessment]):

        district = school.district
        state = district.state

        # get all subjects represented by assessment packages
        subject_codes = self.__subject_codes(assessments)

        # Grab the assessment rates by subjects
        asmt_skip_rates_by_subject = state.config['subject_skip_percentages']
        # hack for custom subjects
        for subject_code in subject_codes:
            if subject_code not in asmt_skip_rates_by_subject:
                asmt_skip_rates_by_subject[
                    subject_code] = asmt_skip_rates_by_subject['Math']

        # Process the whole school
        assessment_results = {}
        iab_results = {}
        sr_students = []
        dim_students = []
        student_count = 0

        for grade, grade_students in grades.items():
            # Potentially re-populate the student population
            pop_gen.repopulate_school_grade(school, grade, grade_students,
                                            self.id_gen, reg_system, year,
                                            subject_codes)
            student_count += len(grade_students)

            # collect any assessments for this year and grade
            asmts = list(
                filter(lambda asmt: asmt.year == year and asmt.grade == grade,
                       assessments))

            # note: only use subjects for the assessments for this year and grade
            pop_gen.assign_student_groups(school, grade, grade_students,
                                          self.id_gen,
                                          self.__subject_codes(asmts))

            for asmt in asmts:
                date_taken = self.__date_taken_for_asmt(asmt)
                for student in grade_students:
                    if asmt.is_iab():
                        if school.takes_interim_asmts and random.random(
                        ) < cfg.IAB_STUDENT_RATE:
                            iab_asmt_gen.create_iab_outcome_object(
                                date_taken,
                                student,
                                asmt,
                                self.id_gen,
                                iab_results,
                                gen_item=self.gen_item)
                    else:
                        asmt_gen.create_assessment_outcome_object(
                            date_taken,
                            student,
                            asmt,
                            self.id_gen,
                            assessment_results,
                            asmt_skip_rates_by_subject[asmt.subject.code],
                            gen_item=self.gen_item)

                    # Make sure we have the student for the next run and for metrics
                    # (bit repetitive to do it in the inner loop but probably okay for now)
                    if student.guid not in students:
                        students[student.guid] = student
                        dim_students.append(student)
                    if student.guid not in unique_students:
                        unique_students[student.guid] = True

            # collect all the students for registration output (randomly missing a few)
            sr_students.extend([
                s for s in grade_students
                if random.random() < cfg.HAS_ASMT_RESULT_IN_SR_FILE_RATE
            ])

        # Write out the school
        self.__write_school_data(year, reg_system.guid, dim_students,
                                 sr_students, assessment_results, iab_results,
                                 state.code, district.guid)

        del dim_students
        del sr_students
        del assessment_results
        del iab_results

        return student_count

    def __write_school_data(self, year, rs_guid, dim_students, sr_students,
                            assessment_results, iab_results, state_code,
                            district_id):
        """
        Write student and assessment data for a school to one or more output formats.

        @param year: Current academic year
        @param dim_students: Students to write
        @param sr_students: Students to write
        @param assessment_results: Assessment outcomes to write
        @param iab_results: IAB assessment outcomes
        @param state_code: state code
        @param district_id: district it
        """

        for worker in self.workers:
            worker.write_students_dim(dim_students)
            worker.write_students_reg(sr_students, rs_guid, year)

        # Write assessment results if we have them
        for guid, iab_result in iab_results.items():
            for worker in self.workers:
                worker.write_iab_outcome(iab_result, guid)
        for guid, results in assessment_results.items():
            for worker in self.workers:
                worker.write_assessment_outcome(results, guid, state_code,
                                                district_id)

    def __date_taken_for_asmt(self, asmt: Assessment):
        """
        Generates a random date for an assessment.
        IABs can be pretty much any time from mid-Sep to mid-March
        ICAs will be late-January
        Summatives will be early May

        :param asmt: assessment
        :return: date taken
        """
        if asmt.is_iab():
            date_taken = datetime.date(
                asmt.year - 1, 9,
                15) + datetime.timedelta(days=random.randint(0, 180))
        elif asmt.is_summative():
            date_taken = datetime.date(asmt.year, 5, 10)
        else:
            date_taken = datetime.date(asmt.year, 1, 21)
        return self.__weekday_near(date_taken)

    def __weekday_near(self, value: datetime.date):
        """
        Generates a random date that is near the given target date and is a weekday.
        For now this is simple: shift date randomly +-3, then make sure it's not a weekend.

        :param value: date to be near
        :return: new date
        """
        value += datetime.timedelta(days=random.randint(-3, 3))
        if value.weekday() == 5:
            value += datetime.timedelta(days=-1)  # Sat -> Fri
        elif value.weekday() == 6:
            value += datetime.timedelta(days=+1)  # Sun -> Mon
        return value
Beispiel #20
0
def __load_row(row, asmt: Assessment, parse_asmt, parse_item):
    if parse_asmt:
        asmt.id = row['AssessmentId']
        asmt.name = row['AssessmentName']
        asmt.grade = __mapGrade(row['AssessmentGrade'])
        asmt.type = __mapAssessmentType(row['AssessmentType'], row['AssessmentSubtype'])
        asmt.version = row['AssessmentVersion']
        asmt.year = int(row['AcademicYear'])

        asmt.effective_date = datetime.date(asmt.year - 1, 8, 15)
        asmt.from_date = asmt.effective_date
        asmt.to_date = cfg.ASMT_TO_DATE

        asmt.overall = __getScorable(row, 'Scaled', 'Overall', 'Overall')

        # there may be up to 6 alt scores for an assessment
        if asmt.subject.alts:
            asmt.alts = [__getScorable(row, 'Alt' + str(i), alt_def.code, alt_def.name, alt_def.weight)
                         for (i, alt_def) in enumerate(asmt.subject.alts, start=1)]

        # claims
        if asmt.is_iab() or not asmt.subject.claims:
            asmt.claims = []
        else:
            asmt.claims = [__copyScorable(claim_def, asmt.overall.score_min, asmt.overall.score_max)
                           for claim_def in asmt.subject.claims]

        # if items are being parsed, create segment and list
        if parse_item:
            asmt.segment = AssessmentSegment()
            asmt.segment.id = IDGen.get_uuid()
            asmt.item_bank = []
            asmt.item_total_score = 0

    # infer allowed accommodations even if not parsing items
    if 'ASL' in row and len(row['ASL']) > 0:
        asmt.accommodations.add('AmericanSignLanguage')
    if 'Braille' in row and len(row['Braille']) > 0:
        asmt.accommodations.add('Braille')
    if 'AllowCalculator' in row and len(row['AllowCalculator']) > 0:
        asmt.accommodations.add('Calculator')
    if 'Spanish' in row and len(row['Spanish']) > 0:
        asmt.accommodations.add('Spanish')

    if parse_item:
        item = AssessmentItem()
        item.bank_key = row['BankKey']
        item.item_key = row['ItemId']
        item.type = row['ItemType']
        item.position = __getInt(row['ItemPosition'], 0)
        item.segment_id = asmt.segment.id
        item.max_score = int(row['MaxPoints'])
        item.dok = int(row['DOK'])
        item.difficulty = float(row['avg_b'])
        item.operational = '0' if row['IsFieldTest'] == 'true' else '1'
        item.answer_key = row['AnswerKey'] if 'AnswerKey' in row else None
        item.options_count = int(row['NumberOfAnswerOptions']) if 'NumberOfAnswerOptions' in row else 0
        # these are messy in tabulator output so split, strip, rejoin
        item.target = '|'.join(t.strip() for t in row['ClaimContentTarget'].split('|')) if 'ClaimContentTarget' in row else None
        asmt.item_bank.append(item)
        asmt.item_total_score += item.max_score
Beispiel #21
0
def test_group_id():
    idg = IDGen()
    id = idg.get_group_id('some_object_type')
    assert isinstance(id, int)
    assert id == 100