def bulk_insert_into_db_gdb(num_min_year, num_max_year): """ Insert data into gdp database """ con = uf.connect_db('scrappd_education_data.db') cur = con.cursor() with open('world_bank_gdp_data/9612cab5-6177-41d5-a04f-55d22c4169b7_v2.csv', 'r') as input_file: # skip the first four irrelevant lines next(input_file) next(input_file) next(input_file) next(input_file) # Get csv header and parse information to return relevant column names to the db header = next(input_file) col_indexes = get_csv_column_idx(header, num_min_year, num_max_year) country_idx = col_indexes['"Country Name"'] min_year_idx = col_indexes[uf.stringify_text(num_min_year)] max_year_idx = col_indexes[uf.stringify_text(num_max_year)] input_reader = csv.reader(input_file) for line in input_reader: if line: with con: cur.execute('INSERT INTO worldbank_gdp_data \ (Country, GDP_1999, GDP_2000, GDP_2001, \ GDP_2002, GDP_2003, GDP_2004, GDP_2005, \ GDP_2006, GDP_2007, GDP_2008, GDP_2009, GDP_2010) \ VALUES ("' + line[country_idx] + '","' + \ '","'.join(line[min_year_idx : (max_year_idx + 1)]) + '");')
def build_dataframe_gdp(): """ Build DataFrame - world bankd gdp. """ con = uf.connect_db('scrappd_education_data.db') with con: df_gdb = pd.read_sql_query("SELECT * FROM worldbank_gdp_data", con) df_gdb = df_gdb.dropna(axis=1, how="all") # drop all rows with all empty columns return df_gdb
def create_table_education(): """ Create table for education data """ con = uf.connect_db('scrappd_education_data.db') cur = con.cursor() with con: cur.execute("DROP TABLE IF EXISTS education_life_info") cur.execute("CREATE TABLE education_life_info (Country TEXT PRIMARY KEY, \ Year INT, total_school_time INT, Male_Expectancy INT, Female_Expectancy INT);")
def bulk_insert_into_db_education(dataframe_education): """ Insert data into education database """ con = uf.connect_db('scrappd_education_data.db') cur = con.cursor() sql = "INSERT INTO education_life_info (Country, Year, \ Total_School_Time, Male_Expectancy, Female_Expectancy) VALUES (?,?,?,?,?)" with con: for row in dataframe_education.values: cur.execute(sql, (row[0], row[1], row[2], row[3], row[4]))
def create_table_gdp(): """ Create table gdp """ con = uf.connect_db('scrappd_education_data.db') cur = con.cursor() with con: cur.execute("DROP TABLE IF EXISTS worldbank_gdp_data") cur.execute('CREATE TABLE worldbank_gdp_data (Country REAL,\ GDP_1999 NUMERIC, GDP_2000 NUMERIC, GDP_2001 NUMERIC, GDP_2002 \ NUMERIC, GDP_2003 NUMERIC, GDP_2004 NUMERIC, GDP_2005 NUMERIC, \ GDP_2006 NUMERIC, GDP_2007 NUMERIC, GDP_2008 NUMERIC, GDP_2009 \ NUMERIC, GDP_2010 NUMERIC);')