def test_multiple_object_ids(): data = [{'x': 1, 'y': 2, 'other': ObjectId('1' * 24)}, {'x': 3, 'y': 4, 'other': ObjectId('2' * 24)}] with coll(data) as c: assert discover(c) == dshape('2 * {x: int64, y: int64}') assert convert(list, c) == [(1, 2), (3, 4)]
def prepare_database(): # Train set df = dd.read_csv("./train.csv") col_map = dict( zip(df.columns, [ "week_num", "agency_id", "channel_id", "route_id", "client_id", "product_id", "sales_unit", "sales_peso", "returns_unit", "returns_peso", "adjusted_demand" ])) ds = dshape( "var * {week_num:int64,agency_id:int64,channel_id:int64,route_id:int64,client_id:int64,product_id:int64,\ sales_unit:int64,sales_peso:float64,returns_unit:int64,returns_peso:float64, adjusted_demand:int64}" ) df = df.rename(columns=col_map) print("translating the train set...") with ProgressBar(): odo(df, "sqlite:///data.sqlite3::train", dshape=ds) # the dirty part # Test set df = dd.read_csv("./test.csv", usecols=range(1, 7)) # discard the `id` (first) column col_map = dict( zip(df.columns, [ "week_num", "agency_id", "channel_id", "route_id", "client_id", "product_id" ])) ds = dshape( "var * {week_num:int64,agency_id:int64,channel_id:int64,route_id:int64,client_id:int64,product_id:int64}" ) df = df.rename(columns=col_map) print("translating the test set...") with ProgressBar(): odo(df, "sqlite:///data.sqlite3::test", dshape=ds)
def test_header_with_quotes(): csv = CSV(os.path.join(os.path.dirname(__file__), 'encoding.csv'), encoding='latin1') expected = dshape("""var * { D_PROC: ?string, NUM_SEQ: int64, COD_TIP_RELAC: ?float64, COMPL: ?string, COD_ASSUNTO: int64 } """) assert discover(csv) == expected
def test_multiple_object_ids(): data = [{ 'x': 1, 'y': 2, 'other': ObjectId('1' * 24) }, { 'x': 3, 'y': 4, 'other': ObjectId('2' * 24) }] with coll(data) as c: assert discover(c) == dshape('2 * {x: int64, y: int64}') assert convert(list, c) == [(1, 2), (3, 4)]
def saveTagsToPostgreSQL(installations, db_url, table): print("..........Loading and merging csv files............") for inst in installations: print(inst) conn = psycopg2.connect(db_url) files = [object.key for object in next_gen_cbm_bucket.objects.filter(Prefix="W46F/"+inst) if object.key[-6:] == "csv.gz"] print(len(files)) try: os.remove("temp.csv") except: pass print("..........inserting into postgreSQL............") with open("output.txt", "a") as outputfile: outputfile.writelines("........."+inst+".............") for f in range(0,len(files)): try: print(f) print("s3://next-gen-cbm-data/" + files[f]) ds = odo.dshape(getCorrectShape (odo.discover(odo.resource("s3://next-gen-cbm-data/" + files[f])))) csv = odo.odo("s3://next-gen-cbm-data/" + files[f], 'temp.csv', dshape=ds, usecols=["ts", "v", "tag", "engine_id","installation_id"]) df = pd.read_csv('temp.csv',dtype=str) df = df[["ts", "v", "tag", "engine_id", "installation_id"]] df = df.dropna() df.to_csv('temp.csv',index=False) # print(df.head()) copy_sql = """ COPY """+table+""" FROM stdin WITH CSV HEADER DELIMITER as ',' """ cur = conn.cursor() with open('temp.csv', 'r') as tempfile: cur.copy_expert(sql=copy_sql, file=tempfile) conn.commit() cur.close() os.remove("temp.csv") with open("output.txt", "a") as outputfile: outputfile.writelines("\n ..."+str(f)+"...s3://next-gen-cbm-data/" + files[f]) except Exception as err: print(err) print(files[f]+ " does not work") conn = psycopg2.connect(db_url) with open("output_failed.txt", "a") as outputfile: outputfile.writelines("\n ..."+str(f)+"...s3://next-gen-cbm-data/" + files[f]) print(".............Done.............")
def main(argv=sys.argv): if len(argv) != 2: usage(argv) config_uri = argv[1] setup_logging(config_uri) settings = get_appsettings(config_uri) engine = engine_from_config(settings, 'sqlalchemy.') DBSession.configure(bind=engine) print("Beginning stationfix.") if os.path.isfile('stations.json'): if datetime.fromtimestamp(os.path.getmtime( 'stations.json')) > datetime.today() - timedelta(days=7): print("Using cached stations.json") else: print("Downloading stations.jsonl from EDDB.io...") r = requests.get("https://eddb.io/archive/v5/stations.jsonl", stream=True) with open('stations.json', 'wb') as f: for chunk in r.iter_content(chunk_size=4096): if chunk: f.write(chunk) print("Saved stations.json. Creating temporary table and importing.") DBSession.execute( "CREATE TABLE IF NOT EXISTS stations_tmp (LIKE stations)") mark_changed(DBSession()) transaction.commit() url = str(engine.url) + "::stations_tmp" ds = dshape( "var *{ id: ?int64, name: ?string, system_id: ?int64, updated_at: ?int64, " "max_landing_pad_size: ?string, distance_to_star: ?int64, government_id: ?int64, " "government: ?string, allegiance_id: ?int64, allegiance: ?string, " "state_id: ?int64, state: ?string, type_id: ?int64, type: ?string, " "has_blackmarket: ?bool, has_market: ?bool, has_refuel: ?bool, " "has_repair: ?bool, has_rearm: ?bool, has_outfitting: ?bool, " "has_shipyard: ?bool, has_docking: ?bool, has_commodities: ?bool, " "import_commodities: ?json, export_commodities: ?json, prohibited_commodities: ?json, " "economies: ?json, shipyard_updated_at: ?int64, outfitting_updated_at: ?int64, " "market_updated_at: ?int64, is_planetary: ?bool, selling_ships: ?json, " "selling_modules: ?json, settlement_size_id: ?string, settlement_size: ?int64, " "settlement_security_id: ?int64, settlement_security: ?string, body_id: ?int64," "controlling_minor_faction_id: ?int64 }") t = odo('jsonlines://stations.json', url, dshape=ds)
def test_discover_csv_yields_string_on_totally_empty_columns(): expected = dshape('var * {a: int64, b: ?string, c: int64}') with filetext('a,b,c\n1,,3\n4,,6\n7,,9') as fn: csv = CSV(fn, has_header=True) assert discover(csv) == expected
tot_num_withdrawal: float64, total_withdrawal_amt: float64, min_withdrawal_amt: float64, max_withdrawal_amt: float64, avg_withdrawal_amt: float64, avg_deposit_amount_per_day: float64, avg_num_deposit_per_day: float64, avg_withdrawal_amount_per_day: float64, avg_num_withdrawal_per_day: float64, last_deposit_amount: float64, last_withdrawal_amount: float64, time_since_last_deposit: float64, time_since_last_withdrawal: float64, balance: float64, weighted_bal: float64, customer_type: ?string } """ ds = dshape(sh) odo('customer_type.csv', 'postgresql://*****:*****@localhost/sfl_demo_nigeria::customers', dshape=ds) # Load CSVs to Postgres # odo('hist3.csv', 'postgresql://*****:*****@localhost/sfl_fid_nig_demo::historical', dshape=ds) # Load CSVs to Postgres # odo('transactions.csv', 'postgresql://*****:*****@localhost/sfl_fid_nig_demo::transactions') # Load CSVs to Postgres # export DATABASE_URL='postgres://*****:*****@localhost/westcape' # 'postgres://*****:*****@localhost/flask_demo_auth'
'amount': 100 }, { 'name': 'Alice', 'amount': 200 }, { 'name': 'Bob', 'amount': 100 }, { 'name': 'Bob', 'amount': 200 }, { 'name': 'Bob', 'amount': 300 }) ds = dshape('var * {name: string, amount: int}') def test_discover(): with coll(bank) as c: assert discover(bank) == discover(c) def test_discover_db(): with coll(bank): assert set(discover(db).measure.names) == set( ['system.indexes', 'my_collection']) assert discover(db).measure.names == ['system.indexes'] def test_resource_db():
def main(argv=sys.argv): if len(argv) != 2: usage(argv) config_uri = argv[1] setup_logging(config_uri) settings = get_appsettings(config_uri) engine = engine_from_config(settings, 'sqlalchemy.') DBSession.configure(bind=engine) Base.metadata.drop_all(engine) Base.metadata.create_all(engine) # # Systems # if os.path.isfile('systems.csv'): if datetime.fromtimestamp(os.path.getmtime( 'systems.csv')) > datetime.today() - timedelta(days=7): print("Using cached systems.csv") else: print("Downloading systems.csv from EDDB.io...") r = requests.get("https://eddb.io/archive/v5/systems.csv", stream=True) with open('systems.csv', 'wb') as f: for chunk in r.iter_content(chunk_size=4096): if chunk: f.write(chunk) print("Saved systems.csv. Converting CSV to SQL.") ds = dshape( "var *{ id: ?int64, edsm_id: ?int64, name: ?string, x: ?float64, y: ?float64, " "z: ?float64, population: ?int64, is_populated: ?bool, government_id: ?int64, " "government: ?string, allegiance_id: ?int64, allegiance: ?string, " "state_id: ?int64, state: ?string, security_id: ?float64, security: ?string, " "primary_economy_id: ?float64, primary_economy: ?string, power: ?string, " "power_state: ?string, power_state_id: ?string, needs_permit: ?bool, " "updated_at: ?int64, simbad_ref: ?string, controlling_minor_faction_id: ?string, " "controlling_minor_faction: ?string, reserve_type_id: ?float64, reserve_type: ?string }" ) url = str(engine.url) + "::" + System.__tablename__ t = odo('systems.csv', url, dshape=ds) print("Uppercasing system names...") DBSession.execute("UPDATE systems set name = UPPER(name)") print("Creating indexes...") DBSession.execute( "create index index_system_names_trigram on systems using gin(name gin_trgm_ops)" ) DBSession.execute( "create index index_system_names_btree on systems (name)") print("Done!") # # Populated Systems # if os.path.isfile('systems_populated.json'): if datetime.fromtimestamp(os.path.getmtime('systems_populated.json') ) > datetime.today() - timedelta(days=7): print("Using cached systems.csv") else: print("Downloading systems_populated.jsonl from EDDB.io...") r = requests.get("https://eddb.io/archive/v5/systems_populated.jsonl", stream=True) with open('systems_populated.json', 'wb') as f: for chunk in r.iter_content(chunk_size=4096): if chunk: f.write(chunk) print("Saved systems_populated.json. Converting JSONL to SQL.") url = str(engine.url) + "::" + PopulatedSystem.__tablename__ ds = dshape( "var *{ id: ?int64, edsm_id: ?int64, name: ?string, x: ?float64, y: ?float64, " "z: ?float64, population: ?int64, is_populated: ?bool, government_id: ?int64, " "government: ?string, allegiance_id: ?int64, allegiance: ?string, " "state_id: ?int64, state: ?string, security_id: ?float64, security: ?string, " "primary_economy_id: ?float64, primary_economy: ?string, power: ?string, " "power_state: ?string, power_state_id: ?string, needs_permit: ?int64, " "updated_at: ?int64, simbad_ref: ?string, controlling_minor_faction_id: ?string, " "controlling_minor_faction: ?string, reserve_type_id: ?float64, reserve_type: ?string," "minor_faction_presences: ?json }") t = odo('jsonlines://systems_populated.json', url, dshape=ds) print("Uppercasing system names...") DBSession.execute("UPDATE populated_systems set name = UPPER(name)") print("Creating indexes...") DBSession.execute( "CREATE index index_populated_system_names_trigram on populated_systems using gin(name gin_trgm_ops)" ) DBSession.execute( "create index index_populated_system_names_btree on systems (name)") print("Done!") # # Factions # if os.path.isfile('factions.json'): if datetime.fromtimestamp(os.path.getmtime( 'factions.json')) > datetime.today() - timedelta(days=7): print("Using cached factions.json") else: print("Downloading factions.jsonl from EDDB.io...") r = requests.get("https://eddb.io/archive/v5/factions.jsonl", stream=True) with open('factions.json', 'wb') as f: for chunk in r.iter_content(chunk_size=4096): if chunk: f.write(chunk) print("Saved factions.json. Converting JSONL to SQL.") url = str(engine.url) + "::" + Faction.__tablename__ ds = dshape( "var *{ id: ?int64, name: ?string, updated_at: ?int64, government_id: ?int64, " "government: ?string, allegiance_id: ?int64, allegiance: ?string, " "state_id: ?int64, state: ?string, home_system_id: ?int64, " "is_player_faction: ?bool }") t = odo('jsonlines://factions.json', url, dshape=ds) print("Done!") DBSession.execute("create index factions_idx on factions(id)") # # Bodies # if os.path.isfile('bodies.json'): if datetime.fromtimestamp(os.path.getmtime( 'bodies.json')) > datetime.today() - timedelta(days=7): print("Using cached bodies.json") else: print("Downloading bodies.jsonl from EDDB.io...") r = requests.get("https://eddb.io/archive/v5/bodies.jsonl", stream=True) with open('bodies.json', 'wb') as f: for chunk in r.iter_content(chunk_size=4096): if chunk: f.write(chunk) print("Saved bodies.jsonl. Converting JSONL to SQL.") ds = dshape( "var *{ id: ?int64, created_at: ?int64, updated_at: ?int64, name: ?string, " "system_id: ?int64, group_id: ?int64, group_name: ?string, type_id: ?int64, " "type_name: ?string, distance_to_arrival: ?int64, full_spectral_class: ?string, " "spectral_class: ?string, spectral_sub_class: ?string, luminosity_class: ?string, " "luminosity_sub_class: ?string, surface_temperature: ?int64, is_main_star: ?bool, " "age: ?int64, solar_masses: ?float64, solar_radius: ?float64, catalogue_gliese_id : ?string, " "catalogue_hipp_id: ?string, catalogue_hd_id: ?string, volcanism_type_id: ?int64, " "volcanism_type_name: ?string, atmosphere_type_id: ?int64, atmosphere_type_name: ?string, " "terraforming_state_id: ?int64, terraforming_state_name: ?string, earth_masses: ?float64, " "radius: ?int64, gravity: ?float64, surface_pressure: ?int64, orbital_period: ?float64, " "semi_major_axis: ?float64, orbital_eccentricity: ?float64, orbital_inclination: ?float64, " "arg_of_periapsis: ?float64, rotational_period: ?float64, " "is_rotational_period_tidally_locked: ?bool, axis_tilt: ?float64, eg_id: ?int64, " "belt_moon_masses: ?float64, ring_type_id: ?int64, ring_type_name: ?string, " "ring_mass: ?int64, ring_inner_radius: ?float64, ring_outer_radius: ?float64, " "rings: ?json, atmosphere_composition: ?json, solid_composition: ?json, " "materials: ?json, is_landable: ?bool}") url = str(engine.url) + "::" + Body.__tablename__ t = odo('jsonlines://bodies.json', url, dshape=ds) print("Creating indexes...") DBSession.execute( "CREATE INDEX bodies_idx on bodies(name text_pattern_ops)") DBSession.execute("CREATE INDEX systemid_idx on bodies(system_id)") print("Done!")
from odo.backends.sas import discover, sas_to_iterator from odo.utils import tmpfile, into_path from odo import append, convert, resource, dshape test_path = into_path('backends', 'tests', 'airline.sas7bdat') sasfile = SAS7BDAT(test_path) columns = ("DATE", "AIR", "mon1", "mon2", "mon3", "mon4", "mon5", "mon6", "mon7", "mon8", "mon9", "mon10", "mon11", "mon12", "t", "Lair") ds = dshape('''var * {DATE: date, AIR: float64, mon1: float64, mon2: float64, mon3: float64, mon4: float64, mon5: float64, mon6: float64, mon7: float64, mon8: float64, mon9: float64, mon10: float64, mon11: float64, mon12: float64, t: float64, Lair: float64}''') def test_resource_sas7bdat(): assert isinstance(resource(test_path), SAS7BDAT) def test_discover_sas(): assert discover(sasfile) == ds def test_convert_sas_to_dataframe(): df = convert(pd.DataFrame, sasfile) assert isinstance(df, pd.DataFrame)
if data: c.insert(deepcopy(data)) try: yield c finally: c.drop() bank = ({'name': 'Alice', 'amount': 100}, {'name': 'Alice', 'amount': 200}, {'name': 'Bob', 'amount': 100}, {'name': 'Bob', 'amount': 200}, {'name': 'Bob', 'amount': 300}) ds = dshape('var * {name: string, amount: int}') def test_discover(): with coll(bank) as c: assert discover(bank) == discover(c) def test_discover_db(): with coll(bank): assert set(discover(db).measure.names) == set(['system.indexes', 'my_collection']) assert discover(db).measure.names == ['system.indexes'] def test_resource_db():
def main(argv=sys.argv): if len(argv) != 2: usage(argv) config_uri = argv[1] setup_logging(config_uri) settings = get_appsettings(config_uri) engine = engine_from_config(settings, 'sqlalchemy.') DBSession.configure(bind=engine) print("Beginning update.") PopulatedSystem.__table__.drop(engine) Listing.__table__.drop(engine) Station.__table__.drop(engine) Faction.__table__.drop(engine) Body.__table__.drop(engine) Faction.__table__.create(engine) PopulatedSystem.__table__.create(engine) Body.__table__.create(engine) Station.__table__.create(engine) Listing.__table__.create(engine) mark_changed(DBSession()) transaction.commit() # # Factions # print("Updating factions...") print("Downloading factions.jsonl from EDDB.io...") r = requests.get("https://eddb.io/archive/v5/factions.jsonl", stream=True) with open('factions.json', 'wb') as f: for chunk in r.iter_content(chunk_size=4096): if chunk: f.write(chunk) print("Saved factions.json. Updating...") url = str(engine.url) + "::" + Faction.__tablename__ ds = dshape("var *{ id: ?int64, name: ?string, updated_at: ?int64, government_id: ?int64, " "government: ?string, allegiance_id: ?int64, allegiance: ?string, " "state_id: ?int64, state: ?string, home_system_id: ?int64, " "is_player_faction: ?bool }") t = odo('jsonlines://factions.json', url, dshape=ds) print("Done! Creating index...") DBSession.execute("CREATE INDEX factions_idx ON factions(id)") mark_changed(DBSession()) transaction.commit() print("Completed processing factions.") # # Systems # print("Downloading systems_recently.csv from EDDB.io...") r = requests.get("https://eddb.io/archive/v5/systems_recently.csv", stream=True) with open('systems_recently.csv', 'wb') as f: for chunk in r.iter_content(chunk_size=4096): if chunk: f.write(chunk) print("Saved systems_recently.csv. Creating temporary table and importing...") DBSession.execute("CREATE TEMP TABLE systems_tmp (LIKE systems)") url = str(engine.url) + "::systems_tmp" ds = dshape("var *{ id: ?int64, edsm_id: ?int64, name: ?string, x: ?float64, y: ?float64, " "z: ?float64, population: ?int64, is_populated: ?bool, government_id: ?int64, " "government: ?string, allegiance_id: ?int64, allegiance: ?string, " "state_id: ?int64, state: ?string, security_id: ?float64, security: ?string, " "primary_economy_id: ?float64, primary_economy: ?string, power: ?string, " "power_state: ?string, power_state_id: ?string, needs_permit: ?bool, " "updated_at: ?int64, simbad_ref: ?string, controlling_minor_faction_id: ?string, " "controlling_minor_faction: ?string, reserve_type_id: ?float64, reserve_type: ?string }") t = odo('systems_recently.csv', url, dshape=ds) print("Updating systems...") DBSession.execute("INSERT INTO systems(id, edsm_id, name, x, y, z, population, is_populated, government_id, " "government, allegiance_id, allegiance, state_id, state, security_id, security, " "primary_economy_id, primary_economy, power, power_state, power_state_id, needs_permit, " "updated_at, simbad_ref, controlling_minor_faction_id, controlling_minor_faction, " "reserve_type_id, reserve_type) SELECT id, edsm_id, name, x, y, z, population, is_populated, " "government_id, government, allegiance_id, allegiance, state_id, state, security_id, security, " "primary_economy_id, primary_economy, power, power_state, power_state_id, needs_permit, " "updated_at, simbad_ref, controlling_minor_faction_id, controlling_minor_faction, " "reserve_type_id, reserve_type from systems_tmp ON CONFLICT DO UPDATE " "SET edsm_id = EXCLUDED.edsm_id, name = EXCLUDED.name, x = EXCLUDED.x, " "y = EXCLUDED.y, z = EXCLUDED.z, population = EXCLUDED.population, " "is_populated = EXCLUDED.population, government_id = EXCLUDED.government_id, " "government = EXCLUDED.government, allegiance_id = EXCLUDED.allegiance_id, " "allegiance = EXCLUDED.allegiance, state_id = EXCLUDED.state_id, " "state = EXCLUDED.state, security_id = EXCLUDED.security_id, security = EXCLUDED.security, " "primary_economy_id = EXCLUDED.primary_economy_id, primary_economy = EXCLUDED.primary_economy, " "power = EXCLUDED.power, power_state = EXCLUDED.power_state, power_state_id = " "EXCLUDED.power_state_id, needs_permit = EXCLUDED.needs_permit, updated_at = " "EXCLUDED.updated_at, simbad_ref = EXCLUDED.simbad_ref," "controlling_minor_faction_id = EXCLUDED.controlling_minor_faction_id, " "reserve_type_id = EXCLUDED.reserve_type_id, reserve_type = EXCLUDED.reserve_type") mark_changed(DBSession()) transaction.commit() print("Done!") # # Bodies # print("Downloading bodies.jsonl from EDDB.io...") r = requests.get("https://eddb.io/archive/v5/bodies.jsonl", stream=True) with open('bodies.json', 'wb') as f: for chunk in r.iter_content(chunk_size=4096): if chunk: f.write(chunk) print("Saved bodies.jsonl. Converting JSONL to SQL.") DBSession.execute("CREATE TEMP TABLE bodies_tmp (LIKE bodies)") url = str(engine.url) + "::bodies_tmp" ds = dshape("var *{ id: ?int64, created_at: ?int64, updated_at: ?int64, name: ?string, " "system_id: ?int64, group_id: ?int64, group_name: ?string, type_id: ?int64, " "type_name: ?string, distance_to_arrival: ?int64, full_spectral_class: ?string, " "spectral_class: ?string, spectral_sub_class: ?string, luminosity_class: ?string, " "luminosity_sub_class: ?string, surface_temperature: ?int64, is_main_star: ?bool, " "age: ?int64, solar_masses: ?float64, solar_radius: ?float64, catalogue_gliese_id : ?string, " "catalogue_hipp_id: ?string, catalogue_hd_id: ?string, volcanism_type_id: ?int64, " "volcanism_type_name: ?string, atmosphere_type_id: ?int64, atmosphere_type_name: ?string, " "terraforming_state_id: ?int64, terraforming_state_name: ?string, earth_masses: ?float64, " "radius: ?int64, gravity: ?float64, surface_pressure: ?int64, orbital_period: ?float64, " "semi_major_axis: ?float64, orbital_eccentricity: ?float64, orbital_inclination: ?float64, " "arg_of_periapsis: ?float64, rotational_period: ?float64, " "is_rotational_period_tidally_locked: ?bool, axis_tilt: ?float64, eg_id: ?int64, " "belt_moon_masses: ?float64, ring_type_id: ?int64, ring_type_name: ?string, " "ring_mass: ?int64, ring_inner_radius: ?float64, ring_outer_radius: ?float64, " "rings: ?json, atmosphere_composition: ?json, solid_composition: ?json, " "materials: ?json, is_landable: ?bool}") #url = str(engine.url) + "::" + Body.__tablename__ t = odo('jsonlines://bodies.json', url, dshape=ds) print("Creating indexes...") DBSession.execute("CREATE INDEX bodies_idx ON bodies(name text_pattern_ops)") mark_changed(DBSession()) transaction.commit() DBSession.execute("CREATE INDEX systemid_idx ON bodies(system_id)") mark_changed(DBSession()) transaction.commit() print("Done!") # # Populated systems # print("Downloading systems_populated.jsonl from EDDB.io...") r = requests.get("https://eddb.io/archive/v5/systems_populated.jsonl", stream=True) with open('systems_populated.json', 'wb') as f: for chunk in r.iter_content(chunk_size=4096): if chunk: f.write(chunk) print("Saved systems_populated.json. Updating...") url = str(engine.url) + "::" + PopulatedSystem.__tablename__ ds = dshape("var *{ id: ?int64, edsm_id: ?int64, name: ?string, x: ?float64, y: ?float64, " "z: ?float64, population: ?int64, is_populated: ?bool, government_id: ?int64, " "government: ?string, allegiance_id: ?int64, allegiance: ?string, " "state_id: ?int64, state: ?string, security_id: ?float64, security: ?string, " "primary_economy_id: ?float64, primary_economy: ?string, power: ?string, " "power_state: ?string, power_state_id: ?string, needs_permit: ?int64, " "updated_at: ?int64, simbad_ref: ?string, controlling_minor_faction_id: ?string, " "controlling_minor_faction: ?string, reserve_type_id: ?float64, reserve_type: ?string," "minor_faction_presences: ?json }") t = odo('jsonlines://systems_populated.json', url, dshape=ds) print("Done! Uppercasing system names...") DBSession.execute("UPDATE populated_systems SET name = UPPER(name)") mark_changed(DBSession()) transaction.commit() print("Creating indexes...") DBSession.execute("CREATE INDEX index_populated_system_names_trigram ON populated_systems " "USING GIN(name gin_trgm_ops)") mark_changed(DBSession()) transaction.commit() DBSession.execute("CREATE INDEX index_populated_system_names_btree ON populated_systems (name)") mark_changed(DBSession()) transaction.commit() print("Completed processing populated systems.") # # Stations # print("Downloading stations.jsonl from EDDB.io...") r = requests.get("https://eddb.io/archive/v5/stations.jsonl", stream=True) with open('stations.json', 'wb') as f: for chunk in r.iter_content(chunk_size=4096): if chunk: f.write(chunk) print("Saved stations.json. Updating...") DBSession.execute("CREATE TEMP TABLE stations_tmp (LIKE stations)") url = str(engine.url) + "::stations_tmp" #url = str(engine.url) + "::" + Station.__tablename__ ds = dshape("var *{ id: ?int64, name: ?string, system_id: ?int64, updated_at: ?int64, " "max_landing_pad_size: ?string, distance_to_star: ?int64, government_id: ?int64, " "government: ?string, allegiance_id: ?int64, allegiance: ?string, " "state_id: ?int64, state: ?string, type_id: ?int64, type: ?string, " "has_blackmarket: ?bool, has_market: ?bool, has_refuel: ?bool, " "has_repair: ?bool, has_rearm: ?bool, has_outfitting: ?bool, " "has_shipyard: ?bool, has_docking: ?bool, has_commodities: ?bool, " "import_commodities: ?json, export_commodities: ?json, prohibited_commodities: ?json, " "economies: ?json, shipyard_updated_at: ?int64, outfitting_updated_at: ?int64, " "market_updated_at: ?int64, is_planetary: ?bool, selling_ships: ?json, " "selling_modules: ?json, settlement_size_id: ?string, settlement_size: ?int64, " "settlement_security_id: ?int64, settlement_security: ?string, body_id: ?int64," "controlling_minor_faction_id: ?int64 }") t = odo('jsonlines://stations.json', url, dshape=ds) print("Done! Cleaning stations without body references...") DBSession.execute("DELETE FROM stations_tmp WHERE body_id NOT IN (SELECT b.id from bodies b)") mark_changed(DBSession()) transaction.commit() DBSession.execute("UPDATE stations SET id=t.id, name=t.name, system_id=t.system_id, updated_at=t.updated_at, " "max_landing_pad_size=t.max_landing_pad_size, ") DBSession.execute("CREATE INDEX index_stations_systemid_btree ON stations(system_id)") mark_changed(DBSession()) transaction.commit() DBSession.execute("CREATE INDEX index_stations_btree ON stations(id)") mark_changed(DBSession()) transaction.commit() print("Completed processing stations.") # # Listings # print("Downloading listings.csv from EDDB.io...") r = requests.get("https://eddb.io/archive/v5/listings.csv", stream=True) with open('listings.csv', 'wb') as f: for chunk in r.iter_content(chunk_size=4096): if chunk: f.write(chunk) print("Saved listings.csv. Updating...") url = str(engine.url) + "::" + Listing.__tablename__ ds = dshape("var *{ id: ?int64, station_id: ?int64, commodity: ?int64, supply: ?int64, " "buy_price: ?int64, sell_price: ?int64, demand: ?int64, collected_at: ?int64 }") t = odo('listings.csv', url, dshape=ds) print("Creating indexes...") DBSession.execute("CREATE INDEX index_listings_stationid_btree ON listings(station_id)") mark_changed(DBSession()) transaction.commit() print("Updates complete.")
def test_multiple_object_ids(multiple_object_ids): assert discover(multiple_object_ids) == dshape('2 * {x: int64, y: int64}') assert convert(list, multiple_object_ids) == [(1, 2), (3, 4)]
def test_multiple_object_ids(): data = [{"x": 1, "y": 2, "other": ObjectId("1" * 24)}, {"x": 3, "y": 4, "other": ObjectId("2" * 24)}] with coll(data) as c: assert discover(c) == dshape("2 * {x: int64, y: int64}") assert convert(list, c) == [(1, 2), (3, 4)]
try: yield c finally: c.drop() bank = ( {"name": "Alice", "amount": 100}, {"name": "Alice", "amount": 200}, {"name": "Bob", "amount": 100}, {"name": "Bob", "amount": 200}, {"name": "Bob", "amount": 300}, ) ds = dshape("var * {name: string, amount: int}") def test_discover(): with coll(bank) as c: assert discover(bank) == discover(c) def test_append_convert(): with coll([]) as c: append(c, bank, dshape=ds) assert convert(list, c, dshape=ds) == list(pluck(["name", "amount"], bank)) def test_resource():