コード例 #1
0
def bulk_insert_into_db_gdb(num_min_year, num_max_year):
    """ Insert data into gdp database """
    con = uf.connect_db('scrappd_education_data.db')
    cur = con.cursor()  
    with open('world_bank_gdp_data/9612cab5-6177-41d5-a04f-55d22c4169b7_v2.csv', 'r') as input_file:
        # skip the first four irrelevant lines
        next(input_file) 
        next(input_file)
        next(input_file)
        next(input_file)
        # Get csv header and parse information to return relevant column names to the db
        header = next(input_file)
        col_indexes = get_csv_column_idx(header, num_min_year, num_max_year)
        country_idx = col_indexes['"Country Name"']
        min_year_idx = col_indexes[uf.stringify_text(num_min_year)]
        max_year_idx = col_indexes[uf.stringify_text(num_max_year)]
        
        input_reader = csv.reader(input_file)
        for line in input_reader:
            if line:
                with con:
                    cur.execute('INSERT INTO worldbank_gdp_data \
                    (Country, GDP_1999, GDP_2000, GDP_2001, \
                    GDP_2002, GDP_2003, GDP_2004, GDP_2005, \
                    GDP_2006, GDP_2007, GDP_2008, GDP_2009, GDP_2010) \
                    VALUES ("' + line[country_idx] + '","' + \
                                '","'.join(line[min_year_idx : (max_year_idx + 1)]) + '");')
コード例 #2
0
def get_csv_column_idx(csv_headers, num_min_year, num_max_year):
    """ Get relevant csv columns indexes from education csv file """
    str_min_year = uf.stringify_text(num_min_year)
    str_max_year = uf.stringify_text(num_max_year)
    country_field = uf.stringify_text('Country Name')
    header_list = csv_headers.split(',')
    col_indexes = {}
    for idx, val in enumerate(header_list):
        if val == country_field or val == str_min_year or val == str_max_year:
            col_indexes[val] = idx      
    return col_indexes