def bulk_insert_into_db_gdb(num_min_year, num_max_year): """ Insert data into gdp database """ con = uf.connect_db('scrappd_education_data.db') cur = con.cursor() with open('world_bank_gdp_data/9612cab5-6177-41d5-a04f-55d22c4169b7_v2.csv', 'r') as input_file: # skip the first four irrelevant lines next(input_file) next(input_file) next(input_file) next(input_file) # Get csv header and parse information to return relevant column names to the db header = next(input_file) col_indexes = get_csv_column_idx(header, num_min_year, num_max_year) country_idx = col_indexes['"Country Name"'] min_year_idx = col_indexes[uf.stringify_text(num_min_year)] max_year_idx = col_indexes[uf.stringify_text(num_max_year)] input_reader = csv.reader(input_file) for line in input_reader: if line: with con: cur.execute('INSERT INTO worldbank_gdp_data \ (Country, GDP_1999, GDP_2000, GDP_2001, \ GDP_2002, GDP_2003, GDP_2004, GDP_2005, \ GDP_2006, GDP_2007, GDP_2008, GDP_2009, GDP_2010) \ VALUES ("' + line[country_idx] + '","' + \ '","'.join(line[min_year_idx : (max_year_idx + 1)]) + '");')
def get_csv_column_idx(csv_headers, num_min_year, num_max_year): """ Get relevant csv columns indexes from education csv file """ str_min_year = uf.stringify_text(num_min_year) str_max_year = uf.stringify_text(num_max_year) country_field = uf.stringify_text('Country Name') header_list = csv_headers.split(',') col_indexes = {} for idx, val in enumerate(header_list): if val == country_field or val == str_min_year or val == str_max_year: col_indexes[val] = idx return col_indexes