select = """ SELECT * FROM Demographic WHERE element_id BETWEEN 511 AND 603 """ xs = np.ma.masked_less_equal(np.array(cursor.execute(select).fetchall()), 0)[:,1:] # Calculate masked differences along the first axis diff_xs = np.ma.masked_all(np.shape(xs[:, 4:])) diff_xs[:, 1:] = np.ma.diff(xs[:, 4:], axis=1) diff_xs = np.ma.filled(diff_xs, -1) # Stack id fields with net change values xs = np.ma.filled(xs, -1) ys = np.hstack((xs[:, :4], diff_xs)) # Convert ndarray to recarray ys = ys.reshape(-1, ).view(get_dtype(connection, 'Demographic', remove_id=True)) # Replace element values w/ their corresponding net change values for element_id in [511, 512, 513, 551, 561, 571, 581, 591, 592, 593, 601, 602, 603]: np.put(ys['element_id'], np.where(ys['element_id']==element_id), element_id + 100) # Get last index value + 1 of Demographic table for primary key values max_id, = np.array(cursor.execute("SELECT MAX(id) FROM Demographic").fetchall()).flatten() + 1 # Insert new data into the database using sqlite_io structure import sqlite_io sqlite_io.tosqlite(ys, max_id, DB, "Demographic", autoid=True, create=False) # Close the cursor and the connection cursor.close() connection.close()
# Resize insert array insert_xs = np.resize(insert_xs, (count, len(names))) # Fill masked values with -1 insert_xs = np.ma.filled(insert_xs, -1) # Convert ndarray to recarray insert_xs = insert_xs.view(ndtype).flatten() # Replace element_ids with those for the per capita elements np.put(insert_xs['element_id'], np.where(insert_xs['element_id']==100), 101) # per capita consumption np.put(insert_xs['element_id'], np.where(insert_xs['element_id']==51), 52) # per capita production # Replace unit_ids with those for the per capita elements np.put(insert_xs['unit_id'], np.where(insert_xs['unit_id']==3), 17) # tonnes/1000 people np.put(insert_xs['unit_id'], np.where(insert_xs['unit_id']==9), 18) # 1000 No/1000 people # Replace source_ids with a new code for 'InnovoSoy Calculated' insert_xs['source_id'] = 8 # Get last index value + 1 of Commodity table for primary key values max_id, = np.array(cursor.execute("SELECT MAX(id) FROM Commodity").fetchall()).flatten() + 1 # Insert new data into the database using sqlite_io structure sqlite_io.tosqlite(insert_xs, max_id, DB, "Commodity", autoid=True, create=False) # Close the cursor and the connection cursor.close() connection.close()
TABLE_NAME = "Commodity_Raw_Data" #Countries to merge with country one being the country to stay in the database country_one = 185 country_two = 228 country_name = "Russian_Federation" #table_name #Query to merge the rows of the two countries query = """ SELECT country_id, item_id, element_id, unit_id, source_id, %s FROM %%s WHERE country_id=%s OR country_id=%s GROUP BY item_id||element_id||source_id """%(",".join("SUM(yr%s) AS yr%s"%(x, x) for x in xrange(1961, 2011)), country_one, country_two) #Run query through sqlite_io file, creating a temporary table and then dropping when complete xs = sqlite_io.fromsqlite(DB, query%TABLE_NAME, "tmp") print xs[xs['item_id']==1012] exit() #Extract out merged data for country remaining in the database xs_merged = xs[xs['country_id']==country_one] #Create a new table in the database for this new merged country count = 0 foreign_keys = {'country_id':'Country', 'element_id':'Element', 'unit_id':'Unit', 'source_id':'Source'} index = ['source_id', 'element_id', 'item_id', 'country_id'] #index in order sqlite_io.tosqlite(xs_merged, count, DB, country_name, autoid=True, foreign_keys=foreign_keys, index=index)
trade_row = np.ma.masked_all(YEARS.size, dtype=float) # masked array with all year values trade_country_id, element_id = column.split("_") # split column on _ separator idx = y['year_id'] - MIN_YEAR # index value of years to include item_lookup = find_key(aggregate_item_lookup, item_id)[0] # lookup new item code trade_row[idx] = z # match up and assign values based on year values trade_rows[count] = trade_row id_rows[count] = np.array([country_id, element_id, item_lookup, trade_country_id, SOURCE_ID], dtype=int) count += 1 # Resize arrays to match actual size of the data trade_rows = np.ma.filled(np.ma.resize(trade_rows, (count,)), -1) # fill masked values with -1 for database id_rows = np.ma.resize(id_rows, (count,)) # Merge and flatten the two arrays to keep the proper data types xs_rows = merge_arrays((id_rows, trade_rows), flatten=True) # values for the database xs_rows = np.sort(xs_rows, order=["element_id", "item_id", "country_id", "trade_country_id"]) # Create new database table for Trade Relationships import sqlite_io DB = r".\GFIN_DB.db3" TABLE_NAME = "Trade" foreign_keys = { 'country_id':'Country', 'element_id':'Element','unit_id':'Unit', 'source_id':'Source', 'trade_country_id':'Country' } index = ['element_id', 'item_id', 'country_id'] # index in order sqlite_io.tosqlite(xs_rows, 0, DB, TABLE_NAME, autoid=True, foreign_keys=foreign_keys, index=index, create=True) # create a new table in the database for trade matrix
top_splits = create.split(",")[1:] for i, top in enumerate(top_splits): splits = top.split(" ")[:2] for j, split in enumerate(splits): splits[j] = split.replace("\n", "").replace(")", "").replace("FLOAT", "<f8").replace("INTEGER", "<f8") top_splits[i] = splits ndtype = [i for i in imap(tuple, top_splits)] # View combined data as a recarray zs = zs.view(ndtype).flatten() # Sort new array by index fields in order zs = np.sort(zs, order=['element_id', 'item_id', 'country_id']) # Insert data into the new table import sqlite_io sqlite_io.tosqlite(zs, 0, DB, "Datum", autoid=True, create=False) # Add on index index = "CREATE INDEX Datum_index ON Datum (element_id, item_id, country_id)" cursor.execute(index) connection.commit() # Drop Demographic and Commodity table drops = ["DROP TABLE Demographic", "DROP TABLE Commodity"] [cursor.execute(drop) for drop in drops] connection.commit() # Close the cursor and the connection cursor.close() connection.close()
[new_cursor.execute(statement) for statement in create_strs] new_connection.commit() # Insert data into each table for table in copy_tables: is_autoid = table in ('SchemeColor', 'AreaGroup') # tables with id as primary key ndtype, names = get_dtype(connection, table, remove_id=is_autoid, nameReturn=True) # Get data from master database for copying xs = np.ma.array(cursor.execute("SELECT %s FROM %s"%(",".join(names), table)).fetchall(), ndtype) # Mask all None values and create primary keys autoid = [False, True][is_autoid] # assign primary keys primary_key = ["%s_id"%table.lower(), False][is_autoid] # primary key xs = mask_none_values(xs) # mask none values sqlite_io.tosqlite(xs, 0, NEW_DB, table, autoid=autoid, create=False, primary_key=primary_key) # Format value tables with -1 values for missing values for table in TABLES: (names, typestr) = zip(*(_[1:3] for _ in connection.execute("PRAGMA TABLE_INFO(%s)"%table).fetchall())) names = ",".join([name.strip() for name in names if name.strip()!='id']) xs = sqlite_io.fromsqlite(DB, "SELECT %s FROM %s"%(names, table), "tmp_table") ndtype = xs.dtype xs = xs.view(float).reshape((-1, len(names.split(",")))) xs = np.ma.masked_less_equal(xs, 0) # mask any value less than or equal to 0 # Remove Commodity rows that have less than 5 values if table == 'Commodity': id_field_idx = 5 # number of columns that split the data b/w foreign keys and values id_fields = xs[:,:id_field_idx] # foreign key fields value_fields = xs[:,id_field_idx:] # data value fields