예제 #1
0
def check_college_table_complete(nsc_data, college_dict):
    '''performs check and if not, creates an update file and exits'''
    nsc_colleges = set(nsc_data.get_column(
                        n.COLLEGE_CODE_BRANCH)) - set([''])
    # The OPEIDs we're comparing have follow the format '001650-01' in NSC
    # and 165001 in the IPEDS database. Also, if the two digit ending is
    # not found, '00' will specify the same school at the main branch
    opeid_missing=[] 
    for entry in nsc_colleges:
        parts = entry.split('-')
        opeid1 = str(int(parts[0]+parts[1]))
        opeid2 = str(int(parts[0]+'00'))
        if opeid1 not in college_dict and opeid2 not in college_dict:
            opeid_missing.append(entry)

    if opeid_missing:
        nsc_dict = nsc_data.create_dict_list(n.COLLEGE_CODE_BRANCH,
                                       [n.COLLEGE_NAME, n.COLLEGE_STATE,
                                        n.PUBLIC_PRIVATE])
        output = [['OPEID',  'NCESID', 'Name', 'State', 'Control']]
        for college in opeid_missing:
            data = nsc_dict[college]
            parts = college.split('-')
            opeid1 = str(int(parts[0]+parts[1]))
            row = [ opeid1, '?',      data[0], data[1], data[2]]
            output.append(row)
        print('Some colleges from the NSC file are not included in your\n'+
              'degree list (default filename collegelist.csv). Please\n'+
              'inspect the file "missing_colleges.csv" and use it to\n'+
              'add new colleges to the bottom of collegelist.csv\n'+
              'before running again.')
        tt.table_to_csv('missing_colleges.csv',output)
        exit()
예제 #2
0
def check_college_table_complete(nsc_data, college_dict):
    """performs check and if not, creates an update file and exits"""
    nsc_colleges = set(nsc_data.get_column(n.COLLEGE_CODE_BRANCH)) - set([""])
    # The OPEIDs we're comparing have follow the format '001650-01' in NSC
    # and 165001 in the IPEDS database. Also, if the two digit ending is
    # not found, '00' will specify the same school at the main branch
    opeid_missing = []
    for entry in nsc_colleges:
        parts = entry.split("-")
        opeid1 = str(int(parts[0] + parts[1]))
        opeid2 = str(int(parts[0] + "00"))
        if opeid1 not in college_dict and opeid2 not in college_dict:
            opeid_missing.append(entry)

    if opeid_missing:
        nsc_dict = nsc_data.create_dict_list(n.COLLEGE_CODE_BRANCH, [n.COLLEGE_NAME, n.COLLEGE_STATE, n.PUBLIC_PRIVATE])
        output = [["OPEID", "NCESID", "Name", "State", "Control"]]
        for college in opeid_missing:
            data = nsc_dict[college]
            parts = college.split("-")
            opeid1 = str(int(parts[0] + parts[1]))
            row = [opeid1, "?", data[0], data[1], data[2]]
            output.append(row)
        print(
            "Some colleges from the NSC file are not included in your\n"
            + "degree list (default filename collegelist.csv). Please\n"
            + 'inspect the file "missing_colleges.csv" and use it to\n'
            + "add new colleges to the bottom of collegelist.csv\n"
            + "before running again."
        )
        tt.table_to_csv("missing_colleges.csv", output)
        exit()
예제 #3
0
def main(infile, casesfile, outfile):
    '''Main control flow for doing admissions analysis'''
    print('Loading raw data from %s' % infile)
    raw_data = get_data.get_CSV(infile)  #Table Class
    print('Total of %d rows loaded' % len(raw_data))

    print('Loading analysis cases from %s' % casesfile)
    output_table = [[
        'Case', 'N', 'GPAcoef', 'ACTcoef', 'Int', 'Score', 'Loss'
    ]]
    cases = get_data.get_cases(casesfile)  #dictionary of cases
    for case in cases:
        print('Running case (%s)' % case)
        current_table = get_data.get_slice_by_case(cases[case], raw_data)
        # Huge hack: the 3 different lines below are just commented out
        # to pick the intended one of the three. School_Array is for
        # school specific analyses, while "standard" is for Barron's
        # based general ones (ACT25 for Black/Hispanic and ACT50 for
        # White/Asian)
        school_data = do_analysis.create_school_array(current_table)
        #school_data = do_analysis.create_standard_array(current_table,'ACT25')
        #school_data = do_analysis.create_standard_array(current_table,'ACT50')
        if len(school_data) > 1:
            try:
                school_results = do_analysis.run_lregression(school_data)
                new_row = [case, len(current_table)]
                new_row.extend(school_results)
                output_table.append(new_row)
            except:
                pass

    tt.table_to_csv(outfile, output_table)
예제 #4
0
def check_degrees_table_complete(nsc_data, degree_dict):
    """performs check and if not, creates an update file and exits"""
    nsc_degrees = set(nsc_data.get_column(n.DEGREE_TITLE)) - set([""])
    missing_degrees = nsc_degrees - set(degree_dict)
    if missing_degrees:
        degree_lookup = {"A": "Associate's", "B": "Bachelor's", "C": "Certificate", "M": "Master's"}
        output_table = []
        for miss in missing_degrees:
            if miss:
                if miss[0] in degree_lookup:
                    output_table.append([miss, degree_lookup[miss[0]]])
                else:
                    output_table.append([miss, "?"])
        print(
            "Some degrees from the NSC file are not included in your\n"
            + "degree list (default filename degreelist.csv). Please\n"
            + 'inspect the file "missing_degrees.csv" and copy the values\n'
            + "to the bottom of degreelist.csv before running again."
        )
        tt.table_to_csv("missing_degrees.csv", output_table)
        exit()
예제 #5
0
def check_degrees_table_complete(nsc_data, degree_dict):
    '''performs check and if not, creates an update file and exits'''
    nsc_degrees = set(nsc_data.get_column(n.DEGREE_TITLE)) - set([''])
    missing_degrees = nsc_degrees - set(degree_dict)
    if missing_degrees:
        degree_lookup = {   'A': "Associate's",
                            'B': "Bachelor's",
                            'C': "Certificate",
                            'M': "Master's"}
        output_table = []
        for miss in missing_degrees:
            if miss:
                if miss[0] in degree_lookup:
                    output_table.append([miss, degree_lookup[miss[0]]])
                else:
                    output_table.append([miss, '?'])
        print('Some degrees from the NSC file are not included in your\n'+
              'degree list (default filename degreelist.csv). Please\n'+
              'inspect the file "missing_degrees.csv" and copy the values\n'+
              'to the bottom of degreelist.csv before running again.')
        tt.table_to_csv('missing_degrees.csv',output_table)
        exit()
예제 #6
0
#!python3
'''
This module backs up the 5 main Salesforce tables and saves to CSV
'''
from botutils.ADB import ssf
from botutils.tabletools import tabletools as tt
import botutils.fileutils.zipup as zipup  #for compressing the output
import datetime
import os

ends = datetime.datetime.now().strftime('%m_%d_%Y')
os.mkdir(ends)
sf = ssf.getSF()
objects = [
    'Contact',
    'Account',
    'Enrollment__c',
    #'Contact_Note__c',
]
for obj in objects:
    exec('oj = sf.' + obj)
    table = ssf.getAll(sf, oj)
    tt.table_to_csv(ends + '/' + obj + '_' + ends + '.csv', table)

zipup.compress(ends)  #Compresses the output directory into a single zip file
예제 #7
0
def main(nsc, db_flag, enr, con, acc):
    '''Main control flow for merging new enrollments with old in database'''
    print('-' * 40)
    print('Output file from intake_nsc.py is %s' % nsc)
    if db_flag:
        print('Grabbing data tables directly from database')
    else:
        print('Accounts file: %s' % acc)
        print('Contacts file: %s' % con)
        print('Enrollments file: %s' % enr)
    print('-' * 40)

    # Load the intake_nsc file
    intake_raw = tc.Table(nsc)
    nsc_enr = em.get_enrollments_and_chg_vartype(intake_raw)
    year_range = set(intake_raw.get_column('HS Class'))
    print('Year range of %s-%s.' % (min(year_range), max(year_range)))

    # Get the database information
    if db_flag:
        restr = c.HS_Class__c + ' IN '
        restr += "('" + "','".join(year_range) + "')"
        db_res = aDBi.grabThreeMainTables_Analysis(contactRestriction=restr,
                                                   mode='NSCmerge')
        con_raw = tc.Table(db_res[0])
        acc_raw = tc.Table(db_res[1])
        enr_raw = tc.Table(db_res[2])
    else:
        acc_raw = tc.Table(acc)
        con_raw = tc.Table(con)
        enr_raw = tc.Table(enr)

    # Cleanup the database info
    db_enr = em.get_enrollments_and_chg_vartype(enr_raw, True)
    acc_dict = get_acc_dict(acc_raw)
    con_dict = get_con_dict(con_raw)

    # Prior to cycling through, append an index column to the two
    # enrollment tables so we can reference the original row even
    # if we're inspecting a subset of rows
    db_enr.add_column('Index', list(range(len(db_enr))))
    nsc_enr.add_column('Index', list(range(len(nsc_enr))))

    # Do many passes through tables to find matches
    print('-' * 40)
    print('Looking for matches: %d from db, %d from nsc' %
          (len(db_enr), len(nsc_enr)))

    db_enr_map, nsc_enr_map, match_table = em.find_matches(db_enr, nsc_enr)
    unmatched_db = [x for x in db_enr.rows() if x[-1] not in db_enr_map]
    unmatched_nsc = [x for x in nsc_enr.rows() if x[-1] not in nsc_enr_map]

    print('Still not matched: %d from db, %d from nsc' %
          (len(unmatched_db), len(unmatched_nsc)))

    # Use matching information to generate output tables
    print('-' * 40)
    print('Generating output files')

    enr_update = [[
        e.Id,
        e.Start_Date__c,
        e.End_Date__c,
        e.Date_Last_Verified__c,
        e.Status__c,
        e.Degree_Type__c,
        e.Data_Source__c,
        e.Degree_Text__c,
        e.Major_Text__c,
    ]]
    new_enr = [em.get_enr_field_list()]
    con_flag = [[c.Id, c.Needs_NSC_Review__c, c.NSC_Review_Reason__c]]

    for row in match_table:
        # row[0] is always a MatchCase subclass that will have a custom
        # function for each of these three lines if pertinent for this
        # case. Otherwise, the function will return the default None
        enr_update.append(row[0].enr_update(row[1:], acc_dict))
        new_enr.append(row[0].new_enr(row[1:]))
        con_flag.append(row[0].con_flag(row[1:], acc_dict))

    # Scrub out the empty rows if a MatchCase had nothing to say
    enr_update = [x for x in enr_update if x]
    new_enr = [x for x in new_enr if x]
    con_flag = [x for x in con_flag if x]

    #Rollup the contact flags to have a single row per student
    con_update = []
    for student in set(x[0] for x in con_flag[1:]):
        student_set = [x for x in con_flag if x[0] == student]
        new_flags = '; '.join([x[2] for x in student_set])
        con_update.append([student, True, new_flags])
    con_update.insert(0, con_flag[0])

    # Write output tables to files
    today_ending = date.today().strftime('%m_%d_%Y')
    new_enr_fn = 'new_enr_' + today_ending + '.csv'
    enr_update_fn = 'enr_update_' + today_ending + '.csv'
    con_update_fn = 'con_update_' + today_ending + '.csv'

    tt.table_to_csv(enr_update_fn, enr_update)
    tt.table_to_csv(new_enr_fn, new_enr)
    tt.table_to_csv(con_update_fn, con_update)

    # debugging lines
    case_list = Counter([x[0] for x in match_table]).most_common()
    case_table = [['Matching case', 'Frequency']]
    case_table.extend([[c[0], c[1]] for c in case_list])
    nsc_h = nsc_enr.get_full_table()[0]
    db_h = db_enr.get_full_table()[0]
    unmatched_nsc.insert(0, nsc_h)
    unmatched_db.insert(0, db_h)
    match_h = ['Case']
    match_h.extend(nsc_h)
    match_h.extend(db_h)
    match_table.insert(0, match_h)

    if not os.path.exists('debugging_output'):
        os.makedirs('debugging_output')
    tt.table_to_csv('debugging_output/__match_table.csv', match_table)
    tt.table_to_csv('debugging_output/__unmatched_nsc.csv', unmatched_nsc)
    tt.table_to_csv('debugging_output/__unmatched_db.csv', unmatched_db)
    tt.table_to_csv('debugging_output/__matching_cases.csv', case_table)
예제 #8
0
'''
This module backs up the 5 main Salesforce tables and saves to CSV
'''
from botutils.ADB import ssf
from botutils.tabletools import tabletools as tt
import datetime
import os
from botutils.fileutils import zipup #for compressing the output file at the end

dataDir = os.path.abspath('../SalesforceExports/KIPP_Chicago')
if not os.path.exists(dataDir):
    os.makedirs(dataDir)

ends =datetime.datetime.now().strftime('%m_%d_%Y')
outDir = os.path.join(dataDir,ends)
if not os.path.exists(outDir):
    os.makedirs(outDir)

sf = ssf.getSF()
objects = ['Contact',
           'Account',
           'Enrollment__c',
           'Contact_Note__c',
           'Relationship__c']
for obj in objects:
    exec('oj = sf.' + obj)
    table = ssf.getAll(sf, oj)
    tt.table_to_csv(outDir+'/'+obj+'_'+ends+'.csv', table)

zipup.compress(outDir) #Compresses the output directory into a single zip file
예제 #9
0
def main(nsc, db_flag, enr, con, acc):
    '''Main control flow for merging new enrollments with old in database'''
    print('-'*40)
    print('Output file from intake_nsc.py is %s' % nsc)
    if db_flag:
        print('Grabbing data tables directly from database')
    else:
        print('Accounts file: %s' % acc)
        print('Contacts file: %s' % con)
        print('Enrollments file: %s' % enr)
    print('-'*40)

    # Load the intake_nsc file
    intake_raw = tc.Table(nsc)
    nsc_enr = em.get_enrollments_and_chg_vartype(intake_raw)
    year_range = set(intake_raw.get_column('HS Class'))
    print('Year range of %s-%s.' % (min(year_range), max(year_range)))

    # Get the database information
    if db_flag:
        restr = c.HS_Class__c + ' IN '
        restr += "('" + "','".join(year_range) + "')"
        db_res = aDBi.grabThreeMainTables_Analysis(
                                        contactRestriction=restr,
                                        mode='NSCmerge')
        con_raw = tc.Table(db_res[0])
        acc_raw = tc.Table(db_res[1])
        enr_raw = tc.Table(db_res[2])
    else:
        acc_raw = tc.Table(acc)
        con_raw = tc.Table(con)
        enr_raw = tc.Table(enr)

    # Cleanup the database info
    db_enr = em.get_enrollments_and_chg_vartype(enr_raw, True)
    acc_dict = get_acc_dict(acc_raw)
    con_dict = get_con_dict(con_raw)

    # Prior to cycling through, append an index column to the two
    # enrollment tables so we can reference the original row even
    # if we're inspecting a subset of rows
    db_enr.add_column('Index',list(range(len(db_enr))))
    nsc_enr.add_column('Index',list(range(len(nsc_enr))))

    # Do many passes through tables to find matches
    print('-'*40)
    print('Looking for matches: %d from db, %d from nsc'
            % (len(db_enr), len(nsc_enr)))

    db_enr_map, nsc_enr_map, match_table = em.find_matches(db_enr, nsc_enr)
    unmatched_db =  [x for x in db_enr.rows()  if x[-1] not in db_enr_map]
    unmatched_nsc = [x for x in nsc_enr.rows() if x[-1] not in nsc_enr_map]

    print('Still not matched: %d from db, %d from nsc' %
            (len(unmatched_db), len(unmatched_nsc)))

    # Use matching information to generate output tables
    print('-'*40)
    print('Generating output files')

    enr_update = [[e.Id, e.Start_Date__c, e.End_Date__c,
                  e.Date_Last_Verified__c, e.Status__c,
                  e.Degree_Type__c, e.Data_Source__c,
                  e.Degree_Text__c, e.Major_Text__c, ]]
    new_enr = [em.get_enr_field_list()]
    con_flag = [[c.Id, c.Needs_NSC_Review__c, c.NSC_Review_Reason__c]]

    for row in match_table:
        # row[0] is always a MatchCase subclass that will have a custom
        # function for each of these three lines if pertinent for this
        # case. Otherwise, the function will return the default None
        enr_update.append(row[0].enr_update(row[1:], acc_dict))
        new_enr.append(row[0].new_enr(row[1:]))
        con_flag.append(row[0].con_flag(row[1:], acc_dict))

    # Scrub out the empty rows if a MatchCase had nothing to say
    enr_update = [x for x in enr_update if x]
    new_enr = [x for x in new_enr if x]
    con_flag = [x for x in con_flag if x]

    #Rollup the contact flags to have a single row per student
    con_update = []
    for student in set(x[0] for x in con_flag[1:]):
        student_set = [x for x in con_flag if x[0] == student]
        new_flags = '; '.join([x[2] for x in student_set])
        con_update.append([student, True, new_flags])
    con_update.insert(0,con_flag[0])

    # Write output tables to files
    today_ending = date.today().strftime('%m_%d_%Y')
    new_enr_fn = 'new_enr_' + today_ending + '.csv'
    enr_update_fn = 'enr_update_' + today_ending + '.csv'
    con_update_fn = 'con_update_' + today_ending + '.csv'

    tt.table_to_csv(enr_update_fn, enr_update)
    tt.table_to_csv(new_enr_fn, new_enr)
    tt.table_to_csv(con_update_fn, con_update)

    # debugging lines
    case_list = Counter([x[0] for x in match_table]).most_common()
    case_table = [['Matching case','Frequency']]
    case_table.extend([[c[0], c[1]] for c in case_list])
    nsc_h = nsc_enr.get_full_table()[0]
    db_h = db_enr.get_full_table()[0]
    unmatched_nsc.insert(0,nsc_h)
    unmatched_db.insert(0,db_h)
    match_h = ['Case']
    match_h.extend(nsc_h)
    match_h.extend(db_h)
    match_table.insert(0,match_h)

    if not os.path.exists('debugging_output'):
        os.makedirs('debugging_output')
    tt.table_to_csv('debugging_output/__match_table.csv',match_table)
    tt.table_to_csv('debugging_output/__unmatched_nsc.csv',unmatched_nsc)
    tt.table_to_csv('debugging_output/__unmatched_db.csv',unmatched_db)
    tt.table_to_csv('debugging_output/__matching_cases.csv',case_table)