def combine_contiguous_enrollments(nsc_data, daysgap):
    """Takes the main table and combines contiguous enrollments for the
    same college and student. Enrollments are judged to be contiguous
    if the days between them is less than daysgap. Enrollments have no
    end date if the end date is after effdate."""
    # First get a list of students with matches
    students_raw = nsc_data.get_columns(["sId", n.RECORD_FOUND_Y_N])
    students = {student[0] for student in students_raw if student[1] == "Y"}

    hd = nsc_data.get_header_dict()  # So we can reference the elements
    results_table = []
    # Now process the records for each student
    print("Beginning to process %d students" % len(students))
    sCount = 0  # For screen display
    for sId in students:
        sCount += 1
        student_table = list(nsc_data.get_match_rows("sId", sId))
        colleges = {s[hd["NCES ID"]] for s in student_table}
        for col in colleges:
            s_c_table = [row for row in student_table if row[hd["NCES ID"]] == col]

            # Now need to process each college in student_table
            s_c_condensed = combine_s_c_enrollments(s_c_table, hd, daysgap)

            results_table.extend(s_c_condensed)

        # if not sCount % 10: print('.', end='', flush=True)
        if not sCount % 100:
            print("%d contacts processed." % sCount, flush=True)

    tt.add_header(results_table, hd)

    # Also need to handle records with no date

    return tc.Table(results_table)
def specify_high_schools(raw_con, hs):
    if hs: #User specified a single HS already
        return [hs]
    else:
        hc = tt.slice_header(raw_con) # we need to remember to add this back
        initial_hs = list(set([x[hc[c.High_School__c]] for x in raw_con]))
        initial_hs = [h for h in initial_hs if type(h) is str]
        initial_hs.sort()
        hs_list = tktools.check_pick_from_list(initial_hs,
                'Pick which High Schools to include in the report')
        tt.add_header(raw_con, hc)
        return hs_list
Beispiel #3
0
def specify_high_schools(raw_con, hs):
    if hs:  #User specified a single HS already
        return [hs]
    else:
        hc = tt.slice_header(raw_con)  # we need to remember to add this back
        initial_hs = list(set([x[hc[c.High_School__c]] for x in raw_con]))
        initial_hs = [h for h in initial_hs if type(h) is str]
        initial_hs.sort()
        hs_list = tktools.check_pick_from_list(
            initial_hs, 'Pick which High Schools to include in the report')
        tt.add_header(raw_con, hc)
        return hs_list
def remove_extra_rows_and_columns(raw_con, raw_acc, raw_enr, hs_set):
    '''Does what it says: First limits the number of entries based on the
    High Schools covered and then reduces the columns of data in each
    table prior to returning a Table class'''
    # First reduce rows in the contact table
    hc = tt.slice_header(raw_con)
    con_in_hs = [x for x in raw_con if x[hc[c.High_School__c]] in hs_set]
    student_set = set([x[hc[c.Id]] for x in con_in_hs]) # for enr
    tt.add_header(con_in_hs, hc)
    big_con = tc.Table(con_in_hs)

    # Second reduce rows in enrollment table
    he = tt.slice_header(raw_enr)
    enr_in_hs = [x for x in raw_enr if x[he[e.Student__c]] in student_set]
    college_set = set([x[he[e.College__c]] for x in enr_in_hs]) # for acc
    tt.add_header(enr_in_hs, he)
    big_enr = tc.Table(enr_in_hs)

    # Third reduce rows in the accounts table
    ha = tt.slice_header(raw_acc)
    acc_in_hs = [x for x in raw_acc if x[ha[a.Id]] in college_set]
    tt.add_header(acc_in_hs, ha)
    big_acc = tc.Table(acc_in_hs)

    # Finally, use the lists defined at the top of this file to reduce the
    # number of columns (this step is necessary so that users that supply
    # a CSV file don't need to supply the exact right columns
    little_con = big_con.new_subtable(con_fields, con_names)
    little_acc = big_acc.new_subtable(acc_fields, acc_names)
    little_enr = big_enr.new_subtable(enr_fields, enr_names)

    return (little_con, little_acc, little_enr)
Beispiel #5
0
def remove_extra_rows_and_columns(raw_con, raw_acc, raw_enr, hs_set):
    '''Does what it says: First limits the number of entries based on the
    High Schools covered and then reduces the columns of data in each
    table prior to returning a Table class'''
    # First reduce rows in the contact table
    hc = tt.slice_header(raw_con)
    con_in_hs = [x for x in raw_con if x[hc[c.High_School__c]] in hs_set]
    student_set = set([x[hc[c.Id]] for x in con_in_hs])  # for enr
    tt.add_header(con_in_hs, hc)
    big_con = tc.Table(con_in_hs)

    # Second reduce rows in enrollment table
    he = tt.slice_header(raw_enr)
    enr_in_hs = [x for x in raw_enr if x[he[e.Student__c]] in student_set]
    college_set = set([x[he[e.College__c]] for x in enr_in_hs])  # for acc
    tt.add_header(enr_in_hs, he)
    big_enr = tc.Table(enr_in_hs)

    # Third reduce rows in the accounts table
    ha = tt.slice_header(raw_acc)
    acc_in_hs = [x for x in raw_acc if x[ha[a.Id]] in college_set]
    tt.add_header(acc_in_hs, ha)
    big_acc = tc.Table(acc_in_hs)

    # Finally, use the lists defined at the top of this file to reduce the
    # number of columns (this step is necessary so that users that supply
    # a CSV file don't need to supply the exact right columns
    little_con = big_con.new_subtable(con_fields, con_names)
    little_acc = big_acc.new_subtable(acc_fields, acc_names)
    little_enr = big_enr.new_subtable(enr_fields, enr_names)

    return (little_con, little_acc, little_enr)
def combine_contiguous_enrollments(nsc_data, daysgap):
    '''Takes the main table and combines contiguous enrollments for the
    same college and student. Enrollments are judged to be contiguous
    if the days between them is less than daysgap. Enrollments have no
    end date if the end date is after effdate.'''
    # First get a list of students with matches
    students_raw = nsc_data.get_columns(['sId', n.RECORD_FOUND_Y_N])
    students = {student[0] for student in students_raw if student[1] == 'Y'}

    hd = nsc_data.get_header_dict()  #So we can reference the elements
    results_table = []
    # Now process the records for each student
    print('Beginning to process %d students' % len(students))
    sCount = 0  # For screen display
    for sId in students:
        sCount += 1
        student_table = list(nsc_data.get_match_rows('sId', sId))
        colleges = {s[hd['NCES ID']] for s in student_table}
        for col in colleges:
            s_c_table = [
                row for row in student_table if row[hd['NCES ID']] == col
                and row[hd['RECORD_FOUND_Y/N']] != 'N'
            ]

            #Now need to process each college in student_table
            s_c_condensed = combine_s_c_enrollments(s_c_table, hd, daysgap)

            results_table.extend(s_c_condensed)

        #if not sCount % 10: print('.', end='', flush=True)
        if not sCount % 100:
            print('%d contacts processed.' % sCount, flush=True)

    tt.add_header(results_table, hd)

    #Also need to handle records with no date

    return tc.Table(results_table)
            else:
                enrolled_at = school[dA[a.Name]]
    student.append(len(records))
    student.append(enrolled_at)
    if enrolled_at == student[dC[c.Currently_Enrolled_At__c]]:
        student.append('NO CHANGE')
    else:
        student.append('CHANGE')
        sf.Contact.update(student[dC[c.Id]],
                          {c.Currently_Enrolled_At__c: enrolled_at})
dC['EnrollmentCount'] = max(dC.values()) + 1
dC['NewEnrolledAt'] = max(dC.values()) + 1
dC['EnrollChangeStatus'] = max(dC.values()) + 1

#Reconstitute headers
tt.add_header(contacts, dC)
tt.add_header(accounts, dA)
tt.add_header(enrollments, dE)

#Now write everything to the file
import xlsxwriter
workbook = xlsxwriter.Workbook('FixCurrentlyEnrolledAtReport.xlsx')
ws = tt.table_to_exsheet(workbook, 'Contacts', contacts, bold=True, space=True)
ws.freeze_panes(1, 3)
ws = tt.table_to_exsheet(workbook, 'Accounts', accounts, bold=True, space=True)
ws.freeze_panes(1, 2)
ws = tt.table_to_exsheet(workbook,
                         'Enrollments',
                         enrollments,
                         bold=True,
                         space=True)
            else:
                enrolled_at = school[dA[a.Name]]
    student.append(len(records))
    student.append(enrolled_at)
    if enrolled_at == student[dC[c.Currently_Enrolled_At__c]]:
        student.append('NO CHANGE')
    else:
        student.append('CHANGE')
        sf.Contact.update(student[dC[c.Id]],
                          {c.Currently_Enrolled_At__c: enrolled_at})
dC['EnrollmentCount']=max(dC.values())+1
dC['NewEnrolledAt']=max(dC.values())+1
dC['EnrollChangeStatus']=max(dC.values())+1

#Reconstitute headers
tt.add_header(contacts, dC)
tt.add_header(accounts, dA)
tt.add_header(enrollments, dE)

#Now write everything to the file
import xlsxwriter
workbook = xlsxwriter.Workbook('FixCurrentlyEnrolledAtReport.xlsx')
ws =tt.table_to_exsheet(workbook, 'Contacts', contacts, bold=True, space=True)
ws.freeze_panes(1,3)
ws = tt.table_to_exsheet(workbook, 'Accounts', accounts, bold=True, space=True)
ws.freeze_panes(1,2)
ws = tt.table_to_exsheet(workbook, 'Enrollments', enrollments,
                         bold=True, space=True)
ws.freeze_panes(1,3)
workbook.close()