def combine_contiguous_enrollments(nsc_data, daysgap): """Takes the main table and combines contiguous enrollments for the same college and student. Enrollments are judged to be contiguous if the days between them is less than daysgap. Enrollments have no end date if the end date is after effdate.""" # First get a list of students with matches students_raw = nsc_data.get_columns(["sId", n.RECORD_FOUND_Y_N]) students = {student[0] for student in students_raw if student[1] == "Y"} hd = nsc_data.get_header_dict() # So we can reference the elements results_table = [] # Now process the records for each student print("Beginning to process %d students" % len(students)) sCount = 0 # For screen display for sId in students: sCount += 1 student_table = list(nsc_data.get_match_rows("sId", sId)) colleges = {s[hd["NCES ID"]] for s in student_table} for col in colleges: s_c_table = [row for row in student_table if row[hd["NCES ID"]] == col] # Now need to process each college in student_table s_c_condensed = combine_s_c_enrollments(s_c_table, hd, daysgap) results_table.extend(s_c_condensed) # if not sCount % 10: print('.', end='', flush=True) if not sCount % 100: print("%d contacts processed." % sCount, flush=True) tt.add_header(results_table, hd) # Also need to handle records with no date return tc.Table(results_table)
def specify_high_schools(raw_con, hs): if hs: #User specified a single HS already return [hs] else: hc = tt.slice_header(raw_con) # we need to remember to add this back initial_hs = list(set([x[hc[c.High_School__c]] for x in raw_con])) initial_hs = [h for h in initial_hs if type(h) is str] initial_hs.sort() hs_list = tktools.check_pick_from_list(initial_hs, 'Pick which High Schools to include in the report') tt.add_header(raw_con, hc) return hs_list
def specify_high_schools(raw_con, hs): if hs: #User specified a single HS already return [hs] else: hc = tt.slice_header(raw_con) # we need to remember to add this back initial_hs = list(set([x[hc[c.High_School__c]] for x in raw_con])) initial_hs = [h for h in initial_hs if type(h) is str] initial_hs.sort() hs_list = tktools.check_pick_from_list( initial_hs, 'Pick which High Schools to include in the report') tt.add_header(raw_con, hc) return hs_list
def remove_extra_rows_and_columns(raw_con, raw_acc, raw_enr, hs_set): '''Does what it says: First limits the number of entries based on the High Schools covered and then reduces the columns of data in each table prior to returning a Table class''' # First reduce rows in the contact table hc = tt.slice_header(raw_con) con_in_hs = [x for x in raw_con if x[hc[c.High_School__c]] in hs_set] student_set = set([x[hc[c.Id]] for x in con_in_hs]) # for enr tt.add_header(con_in_hs, hc) big_con = tc.Table(con_in_hs) # Second reduce rows in enrollment table he = tt.slice_header(raw_enr) enr_in_hs = [x for x in raw_enr if x[he[e.Student__c]] in student_set] college_set = set([x[he[e.College__c]] for x in enr_in_hs]) # for acc tt.add_header(enr_in_hs, he) big_enr = tc.Table(enr_in_hs) # Third reduce rows in the accounts table ha = tt.slice_header(raw_acc) acc_in_hs = [x for x in raw_acc if x[ha[a.Id]] in college_set] tt.add_header(acc_in_hs, ha) big_acc = tc.Table(acc_in_hs) # Finally, use the lists defined at the top of this file to reduce the # number of columns (this step is necessary so that users that supply # a CSV file don't need to supply the exact right columns little_con = big_con.new_subtable(con_fields, con_names) little_acc = big_acc.new_subtable(acc_fields, acc_names) little_enr = big_enr.new_subtable(enr_fields, enr_names) return (little_con, little_acc, little_enr)
def remove_extra_rows_and_columns(raw_con, raw_acc, raw_enr, hs_set): '''Does what it says: First limits the number of entries based on the High Schools covered and then reduces the columns of data in each table prior to returning a Table class''' # First reduce rows in the contact table hc = tt.slice_header(raw_con) con_in_hs = [x for x in raw_con if x[hc[c.High_School__c]] in hs_set] student_set = set([x[hc[c.Id]] for x in con_in_hs]) # for enr tt.add_header(con_in_hs, hc) big_con = tc.Table(con_in_hs) # Second reduce rows in enrollment table he = tt.slice_header(raw_enr) enr_in_hs = [x for x in raw_enr if x[he[e.Student__c]] in student_set] college_set = set([x[he[e.College__c]] for x in enr_in_hs]) # for acc tt.add_header(enr_in_hs, he) big_enr = tc.Table(enr_in_hs) # Third reduce rows in the accounts table ha = tt.slice_header(raw_acc) acc_in_hs = [x for x in raw_acc if x[ha[a.Id]] in college_set] tt.add_header(acc_in_hs, ha) big_acc = tc.Table(acc_in_hs) # Finally, use the lists defined at the top of this file to reduce the # number of columns (this step is necessary so that users that supply # a CSV file don't need to supply the exact right columns little_con = big_con.new_subtable(con_fields, con_names) little_acc = big_acc.new_subtable(acc_fields, acc_names) little_enr = big_enr.new_subtable(enr_fields, enr_names) return (little_con, little_acc, little_enr)
def combine_contiguous_enrollments(nsc_data, daysgap): '''Takes the main table and combines contiguous enrollments for the same college and student. Enrollments are judged to be contiguous if the days between them is less than daysgap. Enrollments have no end date if the end date is after effdate.''' # First get a list of students with matches students_raw = nsc_data.get_columns(['sId', n.RECORD_FOUND_Y_N]) students = {student[0] for student in students_raw if student[1] == 'Y'} hd = nsc_data.get_header_dict() #So we can reference the elements results_table = [] # Now process the records for each student print('Beginning to process %d students' % len(students)) sCount = 0 # For screen display for sId in students: sCount += 1 student_table = list(nsc_data.get_match_rows('sId', sId)) colleges = {s[hd['NCES ID']] for s in student_table} for col in colleges: s_c_table = [ row for row in student_table if row[hd['NCES ID']] == col and row[hd['RECORD_FOUND_Y/N']] != 'N' ] #Now need to process each college in student_table s_c_condensed = combine_s_c_enrollments(s_c_table, hd, daysgap) results_table.extend(s_c_condensed) #if not sCount % 10: print('.', end='', flush=True) if not sCount % 100: print('%d contacts processed.' % sCount, flush=True) tt.add_header(results_table, hd) #Also need to handle records with no date return tc.Table(results_table)
else: enrolled_at = school[dA[a.Name]] student.append(len(records)) student.append(enrolled_at) if enrolled_at == student[dC[c.Currently_Enrolled_At__c]]: student.append('NO CHANGE') else: student.append('CHANGE') sf.Contact.update(student[dC[c.Id]], {c.Currently_Enrolled_At__c: enrolled_at}) dC['EnrollmentCount'] = max(dC.values()) + 1 dC['NewEnrolledAt'] = max(dC.values()) + 1 dC['EnrollChangeStatus'] = max(dC.values()) + 1 #Reconstitute headers tt.add_header(contacts, dC) tt.add_header(accounts, dA) tt.add_header(enrollments, dE) #Now write everything to the file import xlsxwriter workbook = xlsxwriter.Workbook('FixCurrentlyEnrolledAtReport.xlsx') ws = tt.table_to_exsheet(workbook, 'Contacts', contacts, bold=True, space=True) ws.freeze_panes(1, 3) ws = tt.table_to_exsheet(workbook, 'Accounts', accounts, bold=True, space=True) ws.freeze_panes(1, 2) ws = tt.table_to_exsheet(workbook, 'Enrollments', enrollments, bold=True, space=True)
else: enrolled_at = school[dA[a.Name]] student.append(len(records)) student.append(enrolled_at) if enrolled_at == student[dC[c.Currently_Enrolled_At__c]]: student.append('NO CHANGE') else: student.append('CHANGE') sf.Contact.update(student[dC[c.Id]], {c.Currently_Enrolled_At__c: enrolled_at}) dC['EnrollmentCount']=max(dC.values())+1 dC['NewEnrolledAt']=max(dC.values())+1 dC['EnrollChangeStatus']=max(dC.values())+1 #Reconstitute headers tt.add_header(contacts, dC) tt.add_header(accounts, dA) tt.add_header(enrollments, dE) #Now write everything to the file import xlsxwriter workbook = xlsxwriter.Workbook('FixCurrentlyEnrolledAtReport.xlsx') ws =tt.table_to_exsheet(workbook, 'Contacts', contacts, bold=True, space=True) ws.freeze_panes(1,3) ws = tt.table_to_exsheet(workbook, 'Accounts', accounts, bold=True, space=True) ws.freeze_panes(1,2) ws = tt.table_to_exsheet(workbook, 'Enrollments', enrollments, bold=True, space=True) ws.freeze_panes(1,3) workbook.close()