def main():

    find_school_key()

    # Load all databases that join on school ID and join all years for each feature
    for Database in Database_l:
        instance = Database()
        con = utilities.connect_to_sql('temp')
        with con:
            cur = con.cursor()
            for year in config.year_l:
                instance.extract(cur, year)
        con = utilities.connect_to_sql('joined')
        with con:
            cur = con.cursor()
            join_years(cur, instance.new_table_s, 'ENTITY_CD')

    # Load all databases that join on school district ID and join all years for each feature
    for Database in DistrictDatabase_l:
        instance = Database()
        for year in config.year_l:
            instance.extract(year)
        con = utilities.connect_to_sql('joined')
        with con:
            cur = con.cursor()
            join_years(cur, instance.new_table_s, 'district')

    # Join all databases of features together
    con = utilities.connect_to_sql('joined')
    with con:
        cur = con.cursor()
        join_databases(cur, Database_l, DistrictDatabase_l)
Esempio n. 2
0
def main():

    find_school_key()

    # Load all databases that join on school ID and join all years for each feature
    for Database in Database_l:
        instance = Database()
        con = utilities.connect_to_sql('temp')
        with con:
            cur = con.cursor()
            for year in config.year_l:
                instance.extract(cur, year)
        con = utilities.connect_to_sql('joined')
        with con:
            cur = con.cursor()
            join_years(cur, instance.new_table_s, 'ENTITY_CD')

    # Load all databases that join on school district ID and join all years for each feature
    for Database in DistrictDatabase_l:
        instance = Database()
        for year in config.year_l:
            instance.extract(year)
        con = utilities.connect_to_sql('joined')
        with con:
            cur = con.cursor()
            join_years(cur, instance.new_table_s, 'district')

    # Join all databases of features together
    con = utilities.connect_to_sql('joined')
    with con:
        cur = con.cursor()
        join_databases(cur, Database_l, DistrictDatabase_l)
def find_school_key():
    """ Creates a table of each school ID and name """

    con = utilities.connect_to_sql('joined')
    with con:
        cur = con.cursor()
        command_s = 'DROP TABLE IF EXISTS school_key;'
        cur.execute(command_s)
        command_s = """CREATE TABLE school_key
SELECT ENTITY_CD, ENTITY_NAME FROM SRC{0:d}.`{1}`
WHERE YEAR = {0:d}
AND SUBJECT = 'REG_ENG'
AND SUBGROUP_NAME = 'General Education'
AND ENTITY_CD NOT LIKE '%0000'
AND ENTITY_CD NOT LIKE '00000000000%'
AND ENTITY_CD != '111111111111'
AND ENTITY_CD != '240901040001'
AND ENTITY_CD != '241001060003'"""
        # The REG_ENG is kind of a hack; and I had to remove 240901040001 and 241001060003 because the rows were multiplying exponentially in the database like a virus
        instance = RegentsPassRate()
        command_s = command_s.format(config.year_l[-1],
                                     instance.orig_table_s_d[config.year_l[-1]])
        cur.execute(command_s)
        command_s = """ALTER TABLE school_key ADD district CHAR(6)"""
        cur.execute(command_s)
        command_s = """UPDATE school_key SET district = SUBSTRING(ENTITY_CD, 1, 6);"""
        cur.execute(command_s)
        command_s = """ALTER TABLE school_key
ADD INDEX ENTITY_CD (ENTITY_CD)"""
        cur.execute(command_s)
Esempio n. 4
0
def find_school_key():
    """ Creates a table of each school ID and name """

    con = utilities.connect_to_sql('joined')
    with con:
        cur = con.cursor()
        command_s = 'DROP TABLE IF EXISTS school_key;'
        cur.execute(command_s)
        command_s = """CREATE TABLE school_key
SELECT ENTITY_CD, ENTITY_NAME FROM SRC{0:d}.`{1}`
WHERE YEAR = {0:d}
AND SUBJECT = 'REG_ENG'
AND SUBGROUP_NAME = 'General Education'
AND ENTITY_CD NOT LIKE '%0000'
AND ENTITY_CD NOT LIKE '00000000000%'
AND ENTITY_CD != '111111111111'
AND ENTITY_CD != '240901040001'
AND ENTITY_CD != '241001060003'"""
        # The REG_ENG is kind of a hack; and I had to remove 240901040001 and 241001060003 because the rows were multiplying exponentially in the database like a virus
        instance = RegentsPassRate()
        command_s = command_s.format(
            config.year_l[-1], instance.orig_table_s_d[config.year_l[-1]])
        cur.execute(command_s)
        command_s = """ALTER TABLE school_key ADD district CHAR(6)"""
        cur.execute(command_s)
        command_s = """UPDATE school_key SET district = SUBSTRING(ENTITY_CD, 1, 6);"""
        cur.execute(command_s)
        command_s = """ALTER TABLE school_key
ADD INDEX ENTITY_CD (ENTITY_CD)"""
        cur.execute(command_s)
Esempio n. 5
0
def plot_feature_histograms():
    """ Plot histograms of all features. """

    con = utilities.connect_to_sql('joined')
    with con:
        cur = con.cursor()
        for database_s in database_s_l:
            field_s_l = ['ENTITY_CD'] + \
                ['{0}_{1:d}'.format(database_s, year) for year in config.year_l]
            raw_data_a = utilities.select_data(con, cur, field_s_l, 'master',
                                      output_type='np_array')
            data_a = raw_data_a[:, 1:]
            valid_la = ~np.isnan(data_a)

            fig = plt.figure()
            ax = fig.add_subplot(111)
            for i, year in enumerate(config.year_l):
                col_a = data_a[:, i]
                ax.hist(col_a[valid_la[:, i]], bins=20,
                        color=config.year_plot_color_d[year],
                        histtype='step')
            ax.set_xlabel(database_s)
            ax.set_ylabel('Frequency')
            ax.ticklabel_format(useOffset=False)
            plt.savefig(os.path.join(save_path, database_s + '.png'))
def main(**kwargs):
    """ Read in all data and run the fits/predictions over all school statistics separately """


    ## Read in data
    con = utilities.connect_to_sql('joined')
    with con:
        cur = con.cursor()
        data_a_d = {}
        all_Database_l = join_data.Database_l + join_data.DistrictDatabase_l
        for Database in all_Database_l:
            Instance = Database()
            feature_s = Instance.new_table_s
            field_s_l = ['ENTITY_CD'] + \
                ['{0}_{1:d}'.format(feature_s, year) for year in config.year_l]
            data_a_d[feature_s] = utilities.select_data(con, cur, field_s_l,
                                                        'master',
                                                        output_type='np_array')

    ## Run prediction over all features
    for feature_s in data_a_d.iterkeys():
        predict_a_feature(data_a_d, feature_s, **kwargs)
Esempio n. 7
0
def main(**kwargs):
    """ Read in all data and run the fits/predictions over all school statistics separately """

    ## Read in data
    con = utilities.connect_to_sql('joined')
    with con:
        cur = con.cursor()
        data_a_d = {}
        all_Database_l = join_data.Database_l + join_data.DistrictDatabase_l
        for Database in all_Database_l:
            Instance = Database()
            feature_s = Instance.new_table_s
            field_s_l = ['ENTITY_CD'] + \
                ['{0}_{1:d}'.format(feature_s, year) for year in config.year_l]
            data_a_d[feature_s] = utilities.select_data(con,
                                                        cur,
                                                        field_s_l,
                                                        'master',
                                                        output_type='np_array')

    ## Run prediction over all features
    for feature_s in data_a_d.iterkeys():
        predict_a_feature(data_a_d, feature_s, **kwargs)
Esempio n. 8
0
    return ax



save_path = os.path.join(config.plot_path, 'explore_data')
if not os.path.isdir(save_path):
    os.mkdir(save_path)

database_s_l = []
for Database in join_data.Database_l + join_data.DistrictDatabase_l:
    Instance = Database()
    database_s_l.append(Instance.new_table_s)


data_a_d = {}
con = utilities.connect_to_sql('joined')
with con:
    cur = con.cursor()
    for database_s in database_s_l:
        field_s_l = ['ENTITY_CD'] + \
            ['{0}_{1:d}'.format(database_s, year) for year in config.year_l]
        raw_data_a = utilities.select_data(con, cur, field_s_l, 'master',
                                  output_type='np_array')
        data_a_d[database_s] = raw_data_a[:, 1:]



if __name__ == '__main__':
    main()