Example #1
0
def test_multiple_object_ids():
    data = [{'x': 1, 'y': 2, 'other': ObjectId('1' * 24)},
            {'x': 3, 'y': 4, 'other': ObjectId('2' * 24)}]
    with coll(data) as c:
        assert discover(c) == dshape('2 * {x: int64, y: int64}')

        assert convert(list, c) == [(1, 2), (3, 4)]
Example #2
0
def prepare_database():
    # Train set
    df = dd.read_csv("./train.csv")

    col_map = dict(
        zip(df.columns, [
            "week_num", "agency_id", "channel_id", "route_id", "client_id",
            "product_id", "sales_unit", "sales_peso", "returns_unit",
            "returns_peso", "adjusted_demand"
        ]))

    ds = dshape(
        "var * {week_num:int64,agency_id:int64,channel_id:int64,route_id:int64,client_id:int64,product_id:int64,\
                sales_unit:int64,sales_peso:float64,returns_unit:int64,returns_peso:float64, adjusted_demand:int64}"
    )

    df = df.rename(columns=col_map)

    print("translating the train set...")
    with ProgressBar():
        odo(df, "sqlite:///data.sqlite3::train", dshape=ds)  # the dirty part

    # Test set
    df = dd.read_csv("./test.csv",
                     usecols=range(1, 7))  # discard the `id` (first) column

    col_map = dict(
        zip(df.columns, [
            "week_num", "agency_id", "channel_id", "route_id", "client_id",
            "product_id"
        ]))

    ds = dshape(
        "var * {week_num:int64,agency_id:int64,channel_id:int64,route_id:int64,client_id:int64,product_id:int64}"
    )

    df = df.rename(columns=col_map)

    print("translating the test set...")
    with ProgressBar():
        odo(df, "sqlite:///data.sqlite3::test", dshape=ds)
Example #3
0
def test_header_with_quotes():
    csv = CSV(os.path.join(os.path.dirname(__file__), 'encoding.csv'),
              encoding='latin1')
    expected = dshape("""var * {
        D_PROC: ?string,
        NUM_SEQ: int64,
        COD_TIP_RELAC: ?float64,
        COMPL: ?string,
        COD_ASSUNTO: int64
    }
    """)
    assert discover(csv) == expected
Example #4
0
def test_multiple_object_ids():
    data = [{
        'x': 1,
        'y': 2,
        'other': ObjectId('1' * 24)
    }, {
        'x': 3,
        'y': 4,
        'other': ObjectId('2' * 24)
    }]
    with coll(data) as c:
        assert discover(c) == dshape('2 * {x: int64, y: int64}')

        assert convert(list, c) == [(1, 2), (3, 4)]
def saveTagsToPostgreSQL(installations, db_url, table):
    print("..........Loading and merging csv files............")
     
    for inst in installations:
        print(inst)
        conn = psycopg2.connect(db_url)
        files =  [object.key for object in next_gen_cbm_bucket.objects.filter(Prefix="W46F/"+inst) if object.key[-6:] == "csv.gz"]
        print(len(files))
        try:
            os.remove("temp.csv")
        except:
            pass
        print("..........inserting into postgreSQL............")
        with open("output.txt", "a") as outputfile:
                    outputfile.writelines("........."+inst+".............")
        for f in range(0,len(files)):
            try:
                print(f)
                print("s3://next-gen-cbm-data/" + files[f])
                ds = odo.dshape(getCorrectShape (odo.discover(odo.resource("s3://next-gen-cbm-data/" + files[f]))))
                csv = odo.odo("s3://next-gen-cbm-data/" + files[f],  'temp.csv', dshape=ds, usecols=["ts", "v", "tag", "engine_id","installation_id"])
                df = pd.read_csv('temp.csv',dtype=str)
                df = df[["ts", "v", "tag", "engine_id", "installation_id"]]
                df = df.dropna()
                df.to_csv('temp.csv',index=False)
#                 print(df.head())
                copy_sql = """
                   COPY """+table+""" FROM stdin WITH CSV HEADER
                   DELIMITER as ','
                   """
                cur = conn.cursor()
                with open('temp.csv', 'r') as tempfile:
                    cur.copy_expert(sql=copy_sql, file=tempfile)
                    conn.commit()
                    cur.close()
                os.remove("temp.csv")
                 
                with open("output.txt", "a") as outputfile:
                    outputfile.writelines("\n ..."+str(f)+"...s3://next-gen-cbm-data/" + files[f])
                     
            except Exception as err:
                print(err)
                print(files[f]+ " does not work")
                conn = psycopg2.connect(db_url)
                with open("output_failed.txt", "a") as outputfile:
                    outputfile.writelines("\n ..."+str(f)+"...s3://next-gen-cbm-data/" + files[f])
         
        print(".............Done.............")
Example #6
0
def main(argv=sys.argv):
    if len(argv) != 2:
        usage(argv)
    config_uri = argv[1]
    setup_logging(config_uri)
    settings = get_appsettings(config_uri)
    engine = engine_from_config(settings, 'sqlalchemy.')
    DBSession.configure(bind=engine)
    print("Beginning stationfix.")
    if os.path.isfile('stations.json'):
        if datetime.fromtimestamp(os.path.getmtime(
                'stations.json')) > datetime.today() - timedelta(days=7):
            print("Using cached stations.json")
    else:
        print("Downloading stations.jsonl from EDDB.io...")
        r = requests.get("https://eddb.io/archive/v5/stations.jsonl",
                         stream=True)
        with open('stations.json', 'wb') as f:
            for chunk in r.iter_content(chunk_size=4096):
                if chunk:
                    f.write(chunk)
        print("Saved stations.json. Creating temporary table and importing.")
    DBSession.execute(
        "CREATE TABLE IF NOT EXISTS stations_tmp (LIKE stations)")
    mark_changed(DBSession())
    transaction.commit()

    url = str(engine.url) + "::stations_tmp"
    ds = dshape(
        "var *{  id: ?int64,  name: ?string,  system_id: ?int64,  updated_at: ?int64,  "
        "max_landing_pad_size: ?string,  distance_to_star: ?int64,  government_id: ?int64,  "
        "government: ?string,  allegiance_id: ?int64,  allegiance: ?string,  "
        "state_id: ?int64,  state: ?string,  type_id: ?int64,  type: ?string,  "
        "has_blackmarket: ?bool,  has_market: ?bool,  has_refuel: ?bool,  "
        "has_repair: ?bool,  has_rearm: ?bool,  has_outfitting: ?bool,  "
        "has_shipyard: ?bool,  has_docking: ?bool,  has_commodities: ?bool,  "
        "import_commodities: ?json,  export_commodities: ?json,  prohibited_commodities: ?json, "
        "economies: ?json, shipyard_updated_at: ?int64, outfitting_updated_at: ?int64, "
        "market_updated_at: ?int64, is_planetary: ?bool, selling_ships: ?json, "
        "selling_modules: ?json, settlement_size_id: ?string, settlement_size: ?int64, "
        "settlement_security_id: ?int64, settlement_security: ?string, body_id: ?int64,"
        "controlling_minor_faction_id: ?int64 }")
    t = odo('jsonlines://stations.json', url, dshape=ds)
Example #7
0
def test_discover_csv_yields_string_on_totally_empty_columns():
    expected = dshape('var * {a: int64, b: ?string, c: int64}')
    with filetext('a,b,c\n1,,3\n4,,6\n7,,9') as fn:
        csv = CSV(fn, has_header=True)
        assert discover(csv) == expected
Example #8
0
  tot_num_withdrawal: float64,
  total_withdrawal_amt: float64,
  min_withdrawal_amt: float64,
  max_withdrawal_amt: float64,
  avg_withdrawal_amt: float64,
  avg_deposit_amount_per_day: float64,
  avg_num_deposit_per_day: float64,
  avg_withdrawal_amount_per_day: float64,
  avg_num_withdrawal_per_day: float64,
  last_deposit_amount: float64,
  last_withdrawal_amount: float64,
  time_since_last_deposit: float64,
  time_since_last_withdrawal: float64,
  balance: float64,
  weighted_bal: float64,
  customer_type: ?string
  }
  """

ds = dshape(sh)

odo('customer_type.csv',
    'postgresql://*****:*****@localhost/sfl_demo_nigeria::customers',
    dshape=ds)  # Load CSVs to Postgres
# odo('hist3.csv', 'postgresql://*****:*****@localhost/sfl_fid_nig_demo::historical', dshape=ds)  # Load CSVs to Postgres

# odo('transactions.csv', 'postgresql://*****:*****@localhost/sfl_fid_nig_demo::transactions')  # Load CSVs to Postgres

# export DATABASE_URL='postgres://*****:*****@localhost/westcape'
# 'postgres://*****:*****@localhost/flask_demo_auth'
Example #9
0
    'amount': 100
}, {
    'name': 'Alice',
    'amount': 200
}, {
    'name': 'Bob',
    'amount': 100
}, {
    'name': 'Bob',
    'amount': 200
}, {
    'name': 'Bob',
    'amount': 300
})

ds = dshape('var * {name: string, amount: int}')


def test_discover():
    with coll(bank) as c:
        assert discover(bank) == discover(c)


def test_discover_db():
    with coll(bank):
        assert set(discover(db).measure.names) == set(
            ['system.indexes', 'my_collection'])
    assert discover(db).measure.names == ['system.indexes']


def test_resource_db():
Example #10
0
def main(argv=sys.argv):
    if len(argv) != 2:
        usage(argv)
    config_uri = argv[1]
    setup_logging(config_uri)
    settings = get_appsettings(config_uri)
    engine = engine_from_config(settings, 'sqlalchemy.')
    DBSession.configure(bind=engine)
    Base.metadata.drop_all(engine)
    Base.metadata.create_all(engine)

    #
    # Systems
    #
    if os.path.isfile('systems.csv'):
        if datetime.fromtimestamp(os.path.getmtime(
                'systems.csv')) > datetime.today() - timedelta(days=7):
            print("Using cached systems.csv")
    else:
        print("Downloading systems.csv from EDDB.io...")
        r = requests.get("https://eddb.io/archive/v5/systems.csv", stream=True)
        with open('systems.csv', 'wb') as f:
            for chunk in r.iter_content(chunk_size=4096):
                if chunk:
                    f.write(chunk)
        print("Saved systems.csv. Converting CSV to SQL.")

    ds = dshape(
        "var *{  id: ?int64,  edsm_id: ?int64,  name: ?string,  x: ?float64,  y: ?float64,  "
        "z: ?float64,  population: ?int64,  is_populated: ?bool,  government_id: ?int64,  "
        "government: ?string,  allegiance_id: ?int64,  allegiance: ?string,  "
        "state_id: ?int64,  state: ?string,  security_id: ?float64,  security: ?string,  "
        "primary_economy_id: ?float64,  primary_economy: ?string,  power: ?string,  "
        "power_state: ?string,  power_state_id: ?string,  needs_permit: ?bool,  "
        "updated_at: ?int64,  simbad_ref: ?string,  controlling_minor_faction_id: ?string,  "
        "controlling_minor_faction: ?string,  reserve_type_id: ?float64,  reserve_type: ?string  }"
    )
    url = str(engine.url) + "::" + System.__tablename__
    t = odo('systems.csv', url, dshape=ds)

    print("Uppercasing system names...")
    DBSession.execute("UPDATE systems set name = UPPER(name)")
    print("Creating indexes...")
    DBSession.execute(
        "create index index_system_names_trigram on systems using gin(name gin_trgm_ops)"
    )
    DBSession.execute(
        "create index index_system_names_btree on systems (name)")
    print("Done!")

    #
    # Populated Systems
    #
    if os.path.isfile('systems_populated.json'):
        if datetime.fromtimestamp(os.path.getmtime('systems_populated.json')
                                  ) > datetime.today() - timedelta(days=7):
            print("Using cached systems.csv")
    else:
        print("Downloading systems_populated.jsonl from EDDB.io...")
        r = requests.get("https://eddb.io/archive/v5/systems_populated.jsonl",
                         stream=True)
        with open('systems_populated.json', 'wb') as f:
            for chunk in r.iter_content(chunk_size=4096):
                if chunk:
                    f.write(chunk)
        print("Saved systems_populated.json. Converting JSONL to SQL.")

    url = str(engine.url) + "::" + PopulatedSystem.__tablename__
    ds = dshape(
        "var *{  id: ?int64,  edsm_id: ?int64,  name: ?string,  x: ?float64,  y: ?float64,  "
        "z: ?float64,  population: ?int64,  is_populated: ?bool,  government_id: ?int64,  "
        "government: ?string,  allegiance_id: ?int64,  allegiance: ?string,  "
        "state_id: ?int64,  state: ?string,  security_id: ?float64,  security: ?string,  "
        "primary_economy_id: ?float64,  primary_economy: ?string,  power: ?string,  "
        "power_state: ?string,  power_state_id: ?string,  needs_permit: ?int64,  "
        "updated_at: ?int64,  simbad_ref: ?string,  controlling_minor_faction_id: ?string,  "
        "controlling_minor_faction: ?string,  reserve_type_id: ?float64,  reserve_type: ?string,"
        "minor_faction_presences: ?json }")
    t = odo('jsonlines://systems_populated.json', url, dshape=ds)

    print("Uppercasing system names...")
    DBSession.execute("UPDATE populated_systems set name = UPPER(name)")
    print("Creating indexes...")
    DBSession.execute(
        "CREATE index index_populated_system_names_trigram on populated_systems using gin(name gin_trgm_ops)"
    )
    DBSession.execute(
        "create index index_populated_system_names_btree on systems (name)")

    print("Done!")

    #
    # Factions
    #
    if os.path.isfile('factions.json'):
        if datetime.fromtimestamp(os.path.getmtime(
                'factions.json')) > datetime.today() - timedelta(days=7):
            print("Using cached factions.json")
    else:
        print("Downloading factions.jsonl from EDDB.io...")
        r = requests.get("https://eddb.io/archive/v5/factions.jsonl",
                         stream=True)
        with open('factions.json', 'wb') as f:
            for chunk in r.iter_content(chunk_size=4096):
                if chunk:
                    f.write(chunk)
        print("Saved factions.json. Converting JSONL to SQL.")

    url = str(engine.url) + "::" + Faction.__tablename__
    ds = dshape(
        "var *{  id: ?int64,  name: ?string,  updated_at: ?int64,  government_id: ?int64,  "
        "government: ?string,  allegiance_id: ?int64,  allegiance: ?string,  "
        "state_id: ?int64,  state: ?string, home_system_id: ?int64,  "
        "is_player_faction: ?bool }")
    t = odo('jsonlines://factions.json', url, dshape=ds)
    print("Done!")
    DBSession.execute("create index factions_idx on factions(id)")

    #
    # Bodies
    #
    if os.path.isfile('bodies.json'):
        if datetime.fromtimestamp(os.path.getmtime(
                'bodies.json')) > datetime.today() - timedelta(days=7):
            print("Using cached bodies.json")
    else:
        print("Downloading bodies.jsonl from EDDB.io...")
        r = requests.get("https://eddb.io/archive/v5/bodies.jsonl",
                         stream=True)
        with open('bodies.json', 'wb') as f:
            for chunk in r.iter_content(chunk_size=4096):
                if chunk:
                    f.write(chunk)
    print("Saved bodies.jsonl. Converting JSONL to SQL.")
    ds = dshape(
        "var *{ id: ?int64, created_at: ?int64, updated_at: ?int64, name: ?string, "
        "system_id: ?int64, group_id: ?int64, group_name: ?string, type_id: ?int64, "
        "type_name: ?string, distance_to_arrival: ?int64, full_spectral_class: ?string, "
        "spectral_class: ?string, spectral_sub_class: ?string, luminosity_class: ?string, "
        "luminosity_sub_class: ?string, surface_temperature: ?int64, is_main_star: ?bool, "
        "age: ?int64, solar_masses: ?float64, solar_radius: ?float64, catalogue_gliese_id : ?string, "
        "catalogue_hipp_id: ?string, catalogue_hd_id: ?string, volcanism_type_id: ?int64, "
        "volcanism_type_name: ?string, atmosphere_type_id: ?int64, atmosphere_type_name: ?string, "
        "terraforming_state_id: ?int64, terraforming_state_name: ?string, earth_masses: ?float64, "
        "radius: ?int64, gravity: ?float64, surface_pressure: ?int64, orbital_period: ?float64, "
        "semi_major_axis: ?float64, orbital_eccentricity: ?float64, orbital_inclination: ?float64, "
        "arg_of_periapsis: ?float64, rotational_period: ?float64, "
        "is_rotational_period_tidally_locked: ?bool, axis_tilt: ?float64, eg_id: ?int64, "
        "belt_moon_masses: ?float64, ring_type_id: ?int64, ring_type_name: ?string, "
        "ring_mass: ?int64, ring_inner_radius: ?float64, ring_outer_radius: ?float64, "
        "rings: ?json, atmosphere_composition: ?json, solid_composition: ?json, "
        "materials: ?json, is_landable: ?bool}")
    url = str(engine.url) + "::" + Body.__tablename__
    t = odo('jsonlines://bodies.json', url, dshape=ds)
    print("Creating indexes...")
    DBSession.execute(
        "CREATE INDEX bodies_idx on bodies(name text_pattern_ops)")
    DBSession.execute("CREATE INDEX systemid_idx on bodies(system_id)")
    print("Done!")
Example #11
0
from odo.backends.sas import discover, sas_to_iterator
from odo.utils import tmpfile, into_path
from odo import append, convert, resource, dshape


test_path = into_path('backends', 'tests', 'airline.sas7bdat')
sasfile = SAS7BDAT(test_path)


columns = ("DATE", "AIR", "mon1", "mon2", "mon3", "mon4", "mon5", "mon6",
           "mon7", "mon8", "mon9", "mon10", "mon11", "mon12", "t", "Lair")

ds = dshape('''var * {DATE: date, AIR: float64, mon1: float64, mon2: float64,
                      mon3: float64, mon4: float64, mon5: float64,
                      mon6: float64, mon7: float64, mon8: float64,
                      mon9: float64, mon10: float64, mon11: float64,
                      mon12: float64, t: float64, Lair: float64}''')


def test_resource_sas7bdat():
    assert isinstance(resource(test_path), SAS7BDAT)


def test_discover_sas():
    assert discover(sasfile) == ds


def test_convert_sas_to_dataframe():
    df = convert(pd.DataFrame, sasfile)
    assert isinstance(df, pd.DataFrame)
Example #12
0
    if data:
        c.insert(deepcopy(data))

    try:
        yield c
    finally:
        c.drop()

bank = ({'name': 'Alice', 'amount': 100},
        {'name': 'Alice', 'amount': 200},
        {'name': 'Bob', 'amount': 100},
        {'name': 'Bob', 'amount': 200},
        {'name': 'Bob', 'amount': 300})


ds = dshape('var * {name: string, amount: int}')


def test_discover():
    with coll(bank) as c:
        assert discover(bank) == discover(c)


def test_discover_db():
    with coll(bank):
        assert set(discover(db).measure.names) == set(['system.indexes',
                                                       'my_collection'])
    assert discover(db).measure.names == ['system.indexes']


def test_resource_db():
Example #13
0
def main(argv=sys.argv):
    if len(argv) != 2:
        usage(argv)
    config_uri = argv[1]
    setup_logging(config_uri)
    settings = get_appsettings(config_uri)
    engine = engine_from_config(settings, 'sqlalchemy.')
    DBSession.configure(bind=engine)
    print("Beginning update.")
    PopulatedSystem.__table__.drop(engine)
    Listing.__table__.drop(engine)
    Station.__table__.drop(engine)
    Faction.__table__.drop(engine)
    Body.__table__.drop(engine)
    Faction.__table__.create(engine)
    PopulatedSystem.__table__.create(engine)
    Body.__table__.create(engine)
    Station.__table__.create(engine)
    Listing.__table__.create(engine)
    mark_changed(DBSession())
    transaction.commit()

    #
    # Factions
    #
    print("Updating factions...")
    print("Downloading factions.jsonl from EDDB.io...")
    r = requests.get("https://eddb.io/archive/v5/factions.jsonl", stream=True)
    with open('factions.json', 'wb') as f:
        for chunk in r.iter_content(chunk_size=4096):
            if chunk:
                f.write(chunk)
    print("Saved factions.json. Updating...")
    url = str(engine.url) + "::" + Faction.__tablename__
    ds = dshape("var *{  id: ?int64,  name: ?string,  updated_at: ?int64,  government_id: ?int64,  "
                "government: ?string,  allegiance_id: ?int64,  allegiance: ?string,  "
                "state_id: ?int64,  state: ?string, home_system_id: ?int64,  "
                "is_player_faction: ?bool }")
    t = odo('jsonlines://factions.json', url, dshape=ds)
    print("Done! Creating index...")
    DBSession.execute("CREATE INDEX factions_idx ON factions(id)")
    mark_changed(DBSession())
    transaction.commit()
    print("Completed processing factions.")

    #
    # Systems
    #
    print("Downloading systems_recently.csv from EDDB.io...")
    r = requests.get("https://eddb.io/archive/v5/systems_recently.csv", stream=True)
    with open('systems_recently.csv', 'wb') as f:
        for chunk in r.iter_content(chunk_size=4096):
            if chunk:
                f.write(chunk)
    print("Saved systems_recently.csv. Creating temporary table and importing...")
    DBSession.execute("CREATE TEMP TABLE systems_tmp (LIKE systems)")
    url = str(engine.url) + "::systems_tmp"
    ds = dshape("var *{  id: ?int64,  edsm_id: ?int64,  name: ?string,  x: ?float64,  y: ?float64,  "
                "z: ?float64,  population: ?int64,  is_populated: ?bool,  government_id: ?int64,  "
                "government: ?string,  allegiance_id: ?int64,  allegiance: ?string,  "
                "state_id: ?int64,  state: ?string,  security_id: ?float64,  security: ?string,  "
                "primary_economy_id: ?float64,  primary_economy: ?string,  power: ?string,  "
                "power_state: ?string,  power_state_id: ?string,  needs_permit: ?bool,  "
                "updated_at: ?int64,  simbad_ref: ?string,  controlling_minor_faction_id: ?string,  "
                "controlling_minor_faction: ?string,  reserve_type_id: ?float64,  reserve_type: ?string  }")
    t = odo('systems_recently.csv', url, dshape=ds)
    print("Updating systems...")
    DBSession.execute("INSERT INTO systems(id, edsm_id, name, x, y, z, population, is_populated, government_id, "
                      "government, allegiance_id, allegiance, state_id, state, security_id, security, "
                      "primary_economy_id, primary_economy, power, power_state, power_state_id, needs_permit, "
                      "updated_at, simbad_ref, controlling_minor_faction_id, controlling_minor_faction, "
                      "reserve_type_id, reserve_type) SELECT id, edsm_id, name, x, y, z, population, is_populated, "
                      "government_id, government, allegiance_id, allegiance, state_id, state, security_id, security, "
                      "primary_economy_id, primary_economy, power, power_state, power_state_id, needs_permit, "
                      "updated_at, simbad_ref, controlling_minor_faction_id, controlling_minor_faction, "
                      "reserve_type_id, reserve_type from systems_tmp ON CONFLICT DO UPDATE "
                      "SET edsm_id = EXCLUDED.edsm_id, name = EXCLUDED.name, x = EXCLUDED.x, "
                      "y = EXCLUDED.y, z = EXCLUDED.z, population = EXCLUDED.population, "
                      "is_populated = EXCLUDED.population, government_id = EXCLUDED.government_id, "
                      "government = EXCLUDED.government, allegiance_id = EXCLUDED.allegiance_id, "
                      "allegiance = EXCLUDED.allegiance, state_id = EXCLUDED.state_id, "
                      "state = EXCLUDED.state, security_id = EXCLUDED.security_id, security = EXCLUDED.security, "
                      "primary_economy_id = EXCLUDED.primary_economy_id, primary_economy = EXCLUDED.primary_economy, "
                      "power = EXCLUDED.power, power_state = EXCLUDED.power_state, power_state_id = "
                      "EXCLUDED.power_state_id, needs_permit = EXCLUDED.needs_permit, updated_at = "
                      "EXCLUDED.updated_at, simbad_ref = EXCLUDED.simbad_ref,"
                      "controlling_minor_faction_id = EXCLUDED.controlling_minor_faction_id, "
                      "reserve_type_id = EXCLUDED.reserve_type_id, reserve_type = EXCLUDED.reserve_type")
    mark_changed(DBSession())
    transaction.commit()
    print("Done!")

    #
    # Bodies
    #
    print("Downloading bodies.jsonl from EDDB.io...")
    r = requests.get("https://eddb.io/archive/v5/bodies.jsonl", stream=True)
    with open('bodies.json', 'wb') as f:
        for chunk in r.iter_content(chunk_size=4096):
            if chunk:
                f.write(chunk)
    print("Saved bodies.jsonl. Converting JSONL to SQL.")
    DBSession.execute("CREATE TEMP TABLE bodies_tmp (LIKE bodies)")
    url = str(engine.url) + "::bodies_tmp"
    ds = dshape("var *{ id: ?int64, created_at: ?int64, updated_at: ?int64, name: ?string, "
                "system_id: ?int64, group_id: ?int64, group_name: ?string, type_id: ?int64, "
                "type_name: ?string, distance_to_arrival: ?int64, full_spectral_class: ?string, "
                "spectral_class: ?string, spectral_sub_class: ?string, luminosity_class: ?string, "
                "luminosity_sub_class: ?string, surface_temperature: ?int64, is_main_star: ?bool, "
                "age: ?int64, solar_masses: ?float64, solar_radius: ?float64, catalogue_gliese_id : ?string, "
                "catalogue_hipp_id: ?string, catalogue_hd_id: ?string, volcanism_type_id: ?int64, "
                "volcanism_type_name: ?string, atmosphere_type_id: ?int64, atmosphere_type_name: ?string, "
                "terraforming_state_id: ?int64, terraforming_state_name: ?string, earth_masses: ?float64, "
                "radius: ?int64, gravity: ?float64, surface_pressure: ?int64, orbital_period: ?float64, "
                "semi_major_axis: ?float64, orbital_eccentricity: ?float64, orbital_inclination: ?float64, "
                "arg_of_periapsis: ?float64, rotational_period: ?float64, "
                "is_rotational_period_tidally_locked: ?bool, axis_tilt: ?float64, eg_id: ?int64, "
                "belt_moon_masses: ?float64, ring_type_id: ?int64, ring_type_name: ?string, "
                "ring_mass: ?int64, ring_inner_radius: ?float64, ring_outer_radius: ?float64, "
                "rings: ?json, atmosphere_composition: ?json, solid_composition: ?json, "
                "materials: ?json, is_landable: ?bool}")
    #url = str(engine.url) + "::" + Body.__tablename__
    t = odo('jsonlines://bodies.json', url, dshape=ds)
    print("Creating indexes...")
    DBSession.execute("CREATE INDEX bodies_idx ON bodies(name text_pattern_ops)")
    mark_changed(DBSession())
    transaction.commit()
    DBSession.execute("CREATE INDEX systemid_idx ON bodies(system_id)")
    mark_changed(DBSession())
    transaction.commit()
    print("Done!")

    #
    # Populated systems
    #
    print("Downloading systems_populated.jsonl from EDDB.io...")
    r = requests.get("https://eddb.io/archive/v5/systems_populated.jsonl", stream=True)
    with open('systems_populated.json', 'wb') as f:
        for chunk in r.iter_content(chunk_size=4096):
            if chunk:
                f.write(chunk)
    print("Saved systems_populated.json. Updating...")
    url = str(engine.url) + "::" + PopulatedSystem.__tablename__
    ds = dshape("var *{  id: ?int64,  edsm_id: ?int64,  name: ?string,  x: ?float64,  y: ?float64,  "
                "z: ?float64,  population: ?int64,  is_populated: ?bool,  government_id: ?int64,  "
                "government: ?string,  allegiance_id: ?int64,  allegiance: ?string,  "
                "state_id: ?int64,  state: ?string,  security_id: ?float64,  security: ?string,  "
                "primary_economy_id: ?float64,  primary_economy: ?string,  power: ?string,  "
                "power_state: ?string,  power_state_id: ?string,  needs_permit: ?int64,  "
                "updated_at: ?int64,  simbad_ref: ?string,  controlling_minor_faction_id: ?string,  "
                "controlling_minor_faction: ?string,  reserve_type_id: ?float64,  reserve_type: ?string,"
                "minor_faction_presences: ?json }")
    t = odo('jsonlines://systems_populated.json', url, dshape=ds)
    print("Done! Uppercasing system names...")
    DBSession.execute("UPDATE populated_systems SET name = UPPER(name)")
    mark_changed(DBSession())
    transaction.commit()
    print("Creating indexes...")
    DBSession.execute("CREATE INDEX index_populated_system_names_trigram ON populated_systems "
                      "USING GIN(name gin_trgm_ops)")
    mark_changed(DBSession())
    transaction.commit()
    DBSession.execute("CREATE INDEX index_populated_system_names_btree ON populated_systems (name)")
    mark_changed(DBSession())
    transaction.commit()
    print("Completed processing populated systems.")

    #
    # Stations
    #
    print("Downloading stations.jsonl from EDDB.io...")
    r = requests.get("https://eddb.io/archive/v5/stations.jsonl", stream=True)
    with open('stations.json', 'wb') as f:
        for chunk in r.iter_content(chunk_size=4096):
            if chunk:
                f.write(chunk)
    print("Saved stations.json. Updating...")
    DBSession.execute("CREATE TEMP TABLE stations_tmp (LIKE stations)")
    url = str(engine.url) + "::stations_tmp"
    #url = str(engine.url) + "::" + Station.__tablename__
    ds = dshape("var *{  id: ?int64,  name: ?string,  system_id: ?int64,  updated_at: ?int64,  "
                "max_landing_pad_size: ?string,  distance_to_star: ?int64,  government_id: ?int64,  "
                "government: ?string,  allegiance_id: ?int64,  allegiance: ?string,  "
                "state_id: ?int64,  state: ?string,  type_id: ?int64,  type: ?string,  "
                "has_blackmarket: ?bool,  has_market: ?bool,  has_refuel: ?bool,  "
                "has_repair: ?bool,  has_rearm: ?bool,  has_outfitting: ?bool,  "
                "has_shipyard: ?bool,  has_docking: ?bool,  has_commodities: ?bool,  "
                "import_commodities: ?json,  export_commodities: ?json,  prohibited_commodities: ?json, "
                "economies: ?json, shipyard_updated_at: ?int64, outfitting_updated_at: ?int64, "
                "market_updated_at: ?int64, is_planetary: ?bool, selling_ships: ?json, "
                "selling_modules: ?json, settlement_size_id: ?string, settlement_size: ?int64, "
                "settlement_security_id: ?int64, settlement_security: ?string, body_id: ?int64,"
                "controlling_minor_faction_id: ?int64 }")
    t = odo('jsonlines://stations.json', url, dshape=ds)
    print("Done! Cleaning stations without body references...")
    DBSession.execute("DELETE FROM stations_tmp WHERE body_id NOT IN (SELECT b.id from bodies b)")
    mark_changed(DBSession())
    transaction.commit()
    DBSession.execute("UPDATE stations SET id=t.id, name=t.name, system_id=t.system_id, updated_at=t.updated_at, "
                      "max_landing_pad_size=t.max_landing_pad_size, ")
    DBSession.execute("CREATE INDEX index_stations_systemid_btree ON stations(system_id)")
    mark_changed(DBSession())
    transaction.commit()
    DBSession.execute("CREATE INDEX index_stations_btree ON stations(id)")
    mark_changed(DBSession())
    transaction.commit()
    print("Completed processing stations.")

    #
    # Listings
    #
    print("Downloading listings.csv from EDDB.io...")
    r = requests.get("https://eddb.io/archive/v5/listings.csv", stream=True)
    with open('listings.csv', 'wb') as f:
        for chunk in r.iter_content(chunk_size=4096):
            if chunk:
                f.write(chunk)
    print("Saved listings.csv. Updating...")
    url = str(engine.url) + "::" + Listing.__tablename__
    ds = dshape("var *{  id: ?int64, station_id: ?int64, commodity: ?int64, supply: ?int64, "
                "buy_price: ?int64, sell_price: ?int64, demand: ?int64, collected_at: ?int64 }")
    t = odo('listings.csv', url, dshape=ds)

    print("Creating indexes...")
    DBSession.execute("CREATE INDEX index_listings_stationid_btree ON listings(station_id)")
    mark_changed(DBSession())
    transaction.commit()
    print("Updates complete.")
Example #14
0
def test_multiple_object_ids(multiple_object_ids):
    assert discover(multiple_object_ids) == dshape('2 * {x: int64, y: int64}')
    assert convert(list, multiple_object_ids) == [(1, 2), (3, 4)]
Example #15
0
def test_multiple_object_ids(multiple_object_ids):
    assert discover(multiple_object_ids) == dshape('2 * {x: int64, y: int64}')
    assert convert(list, multiple_object_ids) == [(1, 2), (3, 4)]
def test_multiple_object_ids():
    data = [{"x": 1, "y": 2, "other": ObjectId("1" * 24)}, {"x": 3, "y": 4, "other": ObjectId("2" * 24)}]
    with coll(data) as c:
        assert discover(c) == dshape("2 * {x: int64, y: int64}")

        assert convert(list, c) == [(1, 2), (3, 4)]
    try:
        yield c
    finally:
        c.drop()


bank = (
    {"name": "Alice", "amount": 100},
    {"name": "Alice", "amount": 200},
    {"name": "Bob", "amount": 100},
    {"name": "Bob", "amount": 200},
    {"name": "Bob", "amount": 300},
)


ds = dshape("var * {name: string, amount: int}")


def test_discover():
    with coll(bank) as c:
        assert discover(bank) == discover(c)


def test_append_convert():
    with coll([]) as c:
        append(c, bank, dshape=ds)

        assert convert(list, c, dshape=ds) == list(pluck(["name", "amount"], bank))


def test_resource():