def check_college_table_complete(nsc_data, college_dict): '''performs check and if not, creates an update file and exits''' nsc_colleges = set(nsc_data.get_column( n.COLLEGE_CODE_BRANCH)) - set(['']) # The OPEIDs we're comparing have follow the format '001650-01' in NSC # and 165001 in the IPEDS database. Also, if the two digit ending is # not found, '00' will specify the same school at the main branch opeid_missing=[] for entry in nsc_colleges: parts = entry.split('-') opeid1 = str(int(parts[0]+parts[1])) opeid2 = str(int(parts[0]+'00')) if opeid1 not in college_dict and opeid2 not in college_dict: opeid_missing.append(entry) if opeid_missing: nsc_dict = nsc_data.create_dict_list(n.COLLEGE_CODE_BRANCH, [n.COLLEGE_NAME, n.COLLEGE_STATE, n.PUBLIC_PRIVATE]) output = [['OPEID', 'NCESID', 'Name', 'State', 'Control']] for college in opeid_missing: data = nsc_dict[college] parts = college.split('-') opeid1 = str(int(parts[0]+parts[1])) row = [ opeid1, '?', data[0], data[1], data[2]] output.append(row) print('Some colleges from the NSC file are not included in your\n'+ 'degree list (default filename collegelist.csv). Please\n'+ 'inspect the file "missing_colleges.csv" and use it to\n'+ 'add new colleges to the bottom of collegelist.csv\n'+ 'before running again.') tt.table_to_csv('missing_colleges.csv',output) exit()
def check_college_table_complete(nsc_data, college_dict): """performs check and if not, creates an update file and exits""" nsc_colleges = set(nsc_data.get_column(n.COLLEGE_CODE_BRANCH)) - set([""]) # The OPEIDs we're comparing have follow the format '001650-01' in NSC # and 165001 in the IPEDS database. Also, if the two digit ending is # not found, '00' will specify the same school at the main branch opeid_missing = [] for entry in nsc_colleges: parts = entry.split("-") opeid1 = str(int(parts[0] + parts[1])) opeid2 = str(int(parts[0] + "00")) if opeid1 not in college_dict and opeid2 not in college_dict: opeid_missing.append(entry) if opeid_missing: nsc_dict = nsc_data.create_dict_list(n.COLLEGE_CODE_BRANCH, [n.COLLEGE_NAME, n.COLLEGE_STATE, n.PUBLIC_PRIVATE]) output = [["OPEID", "NCESID", "Name", "State", "Control"]] for college in opeid_missing: data = nsc_dict[college] parts = college.split("-") opeid1 = str(int(parts[0] + parts[1])) row = [opeid1, "?", data[0], data[1], data[2]] output.append(row) print( "Some colleges from the NSC file are not included in your\n" + "degree list (default filename collegelist.csv). Please\n" + 'inspect the file "missing_colleges.csv" and use it to\n' + "add new colleges to the bottom of collegelist.csv\n" + "before running again." ) tt.table_to_csv("missing_colleges.csv", output) exit()
def main(infile, casesfile, outfile): '''Main control flow for doing admissions analysis''' print('Loading raw data from %s' % infile) raw_data = get_data.get_CSV(infile) #Table Class print('Total of %d rows loaded' % len(raw_data)) print('Loading analysis cases from %s' % casesfile) output_table = [[ 'Case', 'N', 'GPAcoef', 'ACTcoef', 'Int', 'Score', 'Loss' ]] cases = get_data.get_cases(casesfile) #dictionary of cases for case in cases: print('Running case (%s)' % case) current_table = get_data.get_slice_by_case(cases[case], raw_data) # Huge hack: the 3 different lines below are just commented out # to pick the intended one of the three. School_Array is for # school specific analyses, while "standard" is for Barron's # based general ones (ACT25 for Black/Hispanic and ACT50 for # White/Asian) school_data = do_analysis.create_school_array(current_table) #school_data = do_analysis.create_standard_array(current_table,'ACT25') #school_data = do_analysis.create_standard_array(current_table,'ACT50') if len(school_data) > 1: try: school_results = do_analysis.run_lregression(school_data) new_row = [case, len(current_table)] new_row.extend(school_results) output_table.append(new_row) except: pass tt.table_to_csv(outfile, output_table)
def check_degrees_table_complete(nsc_data, degree_dict): """performs check and if not, creates an update file and exits""" nsc_degrees = set(nsc_data.get_column(n.DEGREE_TITLE)) - set([""]) missing_degrees = nsc_degrees - set(degree_dict) if missing_degrees: degree_lookup = {"A": "Associate's", "B": "Bachelor's", "C": "Certificate", "M": "Master's"} output_table = [] for miss in missing_degrees: if miss: if miss[0] in degree_lookup: output_table.append([miss, degree_lookup[miss[0]]]) else: output_table.append([miss, "?"]) print( "Some degrees from the NSC file are not included in your\n" + "degree list (default filename degreelist.csv). Please\n" + 'inspect the file "missing_degrees.csv" and copy the values\n' + "to the bottom of degreelist.csv before running again." ) tt.table_to_csv("missing_degrees.csv", output_table) exit()
def check_degrees_table_complete(nsc_data, degree_dict): '''performs check and if not, creates an update file and exits''' nsc_degrees = set(nsc_data.get_column(n.DEGREE_TITLE)) - set(['']) missing_degrees = nsc_degrees - set(degree_dict) if missing_degrees: degree_lookup = { 'A': "Associate's", 'B': "Bachelor's", 'C': "Certificate", 'M': "Master's"} output_table = [] for miss in missing_degrees: if miss: if miss[0] in degree_lookup: output_table.append([miss, degree_lookup[miss[0]]]) else: output_table.append([miss, '?']) print('Some degrees from the NSC file are not included in your\n'+ 'degree list (default filename degreelist.csv). Please\n'+ 'inspect the file "missing_degrees.csv" and copy the values\n'+ 'to the bottom of degreelist.csv before running again.') tt.table_to_csv('missing_degrees.csv',output_table) exit()
#!python3 ''' This module backs up the 5 main Salesforce tables and saves to CSV ''' from botutils.ADB import ssf from botutils.tabletools import tabletools as tt import botutils.fileutils.zipup as zipup #for compressing the output import datetime import os ends = datetime.datetime.now().strftime('%m_%d_%Y') os.mkdir(ends) sf = ssf.getSF() objects = [ 'Contact', 'Account', 'Enrollment__c', #'Contact_Note__c', ] for obj in objects: exec('oj = sf.' + obj) table = ssf.getAll(sf, oj) tt.table_to_csv(ends + '/' + obj + '_' + ends + '.csv', table) zipup.compress(ends) #Compresses the output directory into a single zip file
def main(nsc, db_flag, enr, con, acc): '''Main control flow for merging new enrollments with old in database''' print('-' * 40) print('Output file from intake_nsc.py is %s' % nsc) if db_flag: print('Grabbing data tables directly from database') else: print('Accounts file: %s' % acc) print('Contacts file: %s' % con) print('Enrollments file: %s' % enr) print('-' * 40) # Load the intake_nsc file intake_raw = tc.Table(nsc) nsc_enr = em.get_enrollments_and_chg_vartype(intake_raw) year_range = set(intake_raw.get_column('HS Class')) print('Year range of %s-%s.' % (min(year_range), max(year_range))) # Get the database information if db_flag: restr = c.HS_Class__c + ' IN ' restr += "('" + "','".join(year_range) + "')" db_res = aDBi.grabThreeMainTables_Analysis(contactRestriction=restr, mode='NSCmerge') con_raw = tc.Table(db_res[0]) acc_raw = tc.Table(db_res[1]) enr_raw = tc.Table(db_res[2]) else: acc_raw = tc.Table(acc) con_raw = tc.Table(con) enr_raw = tc.Table(enr) # Cleanup the database info db_enr = em.get_enrollments_and_chg_vartype(enr_raw, True) acc_dict = get_acc_dict(acc_raw) con_dict = get_con_dict(con_raw) # Prior to cycling through, append an index column to the two # enrollment tables so we can reference the original row even # if we're inspecting a subset of rows db_enr.add_column('Index', list(range(len(db_enr)))) nsc_enr.add_column('Index', list(range(len(nsc_enr)))) # Do many passes through tables to find matches print('-' * 40) print('Looking for matches: %d from db, %d from nsc' % (len(db_enr), len(nsc_enr))) db_enr_map, nsc_enr_map, match_table = em.find_matches(db_enr, nsc_enr) unmatched_db = [x for x in db_enr.rows() if x[-1] not in db_enr_map] unmatched_nsc = [x for x in nsc_enr.rows() if x[-1] not in nsc_enr_map] print('Still not matched: %d from db, %d from nsc' % (len(unmatched_db), len(unmatched_nsc))) # Use matching information to generate output tables print('-' * 40) print('Generating output files') enr_update = [[ e.Id, e.Start_Date__c, e.End_Date__c, e.Date_Last_Verified__c, e.Status__c, e.Degree_Type__c, e.Data_Source__c, e.Degree_Text__c, e.Major_Text__c, ]] new_enr = [em.get_enr_field_list()] con_flag = [[c.Id, c.Needs_NSC_Review__c, c.NSC_Review_Reason__c]] for row in match_table: # row[0] is always a MatchCase subclass that will have a custom # function for each of these three lines if pertinent for this # case. Otherwise, the function will return the default None enr_update.append(row[0].enr_update(row[1:], acc_dict)) new_enr.append(row[0].new_enr(row[1:])) con_flag.append(row[0].con_flag(row[1:], acc_dict)) # Scrub out the empty rows if a MatchCase had nothing to say enr_update = [x for x in enr_update if x] new_enr = [x for x in new_enr if x] con_flag = [x for x in con_flag if x] #Rollup the contact flags to have a single row per student con_update = [] for student in set(x[0] for x in con_flag[1:]): student_set = [x for x in con_flag if x[0] == student] new_flags = '; '.join([x[2] for x in student_set]) con_update.append([student, True, new_flags]) con_update.insert(0, con_flag[0]) # Write output tables to files today_ending = date.today().strftime('%m_%d_%Y') new_enr_fn = 'new_enr_' + today_ending + '.csv' enr_update_fn = 'enr_update_' + today_ending + '.csv' con_update_fn = 'con_update_' + today_ending + '.csv' tt.table_to_csv(enr_update_fn, enr_update) tt.table_to_csv(new_enr_fn, new_enr) tt.table_to_csv(con_update_fn, con_update) # debugging lines case_list = Counter([x[0] for x in match_table]).most_common() case_table = [['Matching case', 'Frequency']] case_table.extend([[c[0], c[1]] for c in case_list]) nsc_h = nsc_enr.get_full_table()[0] db_h = db_enr.get_full_table()[0] unmatched_nsc.insert(0, nsc_h) unmatched_db.insert(0, db_h) match_h = ['Case'] match_h.extend(nsc_h) match_h.extend(db_h) match_table.insert(0, match_h) if not os.path.exists('debugging_output'): os.makedirs('debugging_output') tt.table_to_csv('debugging_output/__match_table.csv', match_table) tt.table_to_csv('debugging_output/__unmatched_nsc.csv', unmatched_nsc) tt.table_to_csv('debugging_output/__unmatched_db.csv', unmatched_db) tt.table_to_csv('debugging_output/__matching_cases.csv', case_table)
''' This module backs up the 5 main Salesforce tables and saves to CSV ''' from botutils.ADB import ssf from botutils.tabletools import tabletools as tt import datetime import os from botutils.fileutils import zipup #for compressing the output file at the end dataDir = os.path.abspath('../SalesforceExports/KIPP_Chicago') if not os.path.exists(dataDir): os.makedirs(dataDir) ends =datetime.datetime.now().strftime('%m_%d_%Y') outDir = os.path.join(dataDir,ends) if not os.path.exists(outDir): os.makedirs(outDir) sf = ssf.getSF() objects = ['Contact', 'Account', 'Enrollment__c', 'Contact_Note__c', 'Relationship__c'] for obj in objects: exec('oj = sf.' + obj) table = ssf.getAll(sf, oj) tt.table_to_csv(outDir+'/'+obj+'_'+ends+'.csv', table) zipup.compress(outDir) #Compresses the output directory into a single zip file
def main(nsc, db_flag, enr, con, acc): '''Main control flow for merging new enrollments with old in database''' print('-'*40) print('Output file from intake_nsc.py is %s' % nsc) if db_flag: print('Grabbing data tables directly from database') else: print('Accounts file: %s' % acc) print('Contacts file: %s' % con) print('Enrollments file: %s' % enr) print('-'*40) # Load the intake_nsc file intake_raw = tc.Table(nsc) nsc_enr = em.get_enrollments_and_chg_vartype(intake_raw) year_range = set(intake_raw.get_column('HS Class')) print('Year range of %s-%s.' % (min(year_range), max(year_range))) # Get the database information if db_flag: restr = c.HS_Class__c + ' IN ' restr += "('" + "','".join(year_range) + "')" db_res = aDBi.grabThreeMainTables_Analysis( contactRestriction=restr, mode='NSCmerge') con_raw = tc.Table(db_res[0]) acc_raw = tc.Table(db_res[1]) enr_raw = tc.Table(db_res[2]) else: acc_raw = tc.Table(acc) con_raw = tc.Table(con) enr_raw = tc.Table(enr) # Cleanup the database info db_enr = em.get_enrollments_and_chg_vartype(enr_raw, True) acc_dict = get_acc_dict(acc_raw) con_dict = get_con_dict(con_raw) # Prior to cycling through, append an index column to the two # enrollment tables so we can reference the original row even # if we're inspecting a subset of rows db_enr.add_column('Index',list(range(len(db_enr)))) nsc_enr.add_column('Index',list(range(len(nsc_enr)))) # Do many passes through tables to find matches print('-'*40) print('Looking for matches: %d from db, %d from nsc' % (len(db_enr), len(nsc_enr))) db_enr_map, nsc_enr_map, match_table = em.find_matches(db_enr, nsc_enr) unmatched_db = [x for x in db_enr.rows() if x[-1] not in db_enr_map] unmatched_nsc = [x for x in nsc_enr.rows() if x[-1] not in nsc_enr_map] print('Still not matched: %d from db, %d from nsc' % (len(unmatched_db), len(unmatched_nsc))) # Use matching information to generate output tables print('-'*40) print('Generating output files') enr_update = [[e.Id, e.Start_Date__c, e.End_Date__c, e.Date_Last_Verified__c, e.Status__c, e.Degree_Type__c, e.Data_Source__c, e.Degree_Text__c, e.Major_Text__c, ]] new_enr = [em.get_enr_field_list()] con_flag = [[c.Id, c.Needs_NSC_Review__c, c.NSC_Review_Reason__c]] for row in match_table: # row[0] is always a MatchCase subclass that will have a custom # function for each of these three lines if pertinent for this # case. Otherwise, the function will return the default None enr_update.append(row[0].enr_update(row[1:], acc_dict)) new_enr.append(row[0].new_enr(row[1:])) con_flag.append(row[0].con_flag(row[1:], acc_dict)) # Scrub out the empty rows if a MatchCase had nothing to say enr_update = [x for x in enr_update if x] new_enr = [x for x in new_enr if x] con_flag = [x for x in con_flag if x] #Rollup the contact flags to have a single row per student con_update = [] for student in set(x[0] for x in con_flag[1:]): student_set = [x for x in con_flag if x[0] == student] new_flags = '; '.join([x[2] for x in student_set]) con_update.append([student, True, new_flags]) con_update.insert(0,con_flag[0]) # Write output tables to files today_ending = date.today().strftime('%m_%d_%Y') new_enr_fn = 'new_enr_' + today_ending + '.csv' enr_update_fn = 'enr_update_' + today_ending + '.csv' con_update_fn = 'con_update_' + today_ending + '.csv' tt.table_to_csv(enr_update_fn, enr_update) tt.table_to_csv(new_enr_fn, new_enr) tt.table_to_csv(con_update_fn, con_update) # debugging lines case_list = Counter([x[0] for x in match_table]).most_common() case_table = [['Matching case','Frequency']] case_table.extend([[c[0], c[1]] for c in case_list]) nsc_h = nsc_enr.get_full_table()[0] db_h = db_enr.get_full_table()[0] unmatched_nsc.insert(0,nsc_h) unmatched_db.insert(0,db_h) match_h = ['Case'] match_h.extend(nsc_h) match_h.extend(db_h) match_table.insert(0,match_h) if not os.path.exists('debugging_output'): os.makedirs('debugging_output') tt.table_to_csv('debugging_output/__match_table.csv',match_table) tt.table_to_csv('debugging_output/__unmatched_nsc.csv',unmatched_nsc) tt.table_to_csv('debugging_output/__unmatched_db.csv',unmatched_db) tt.table_to_csv('debugging_output/__matching_cases.csv',case_table)