def s3_bucket(extension): with conn(): b = 's3://%s/%s%s' % (test_bucket_name, next(_tmps), extension) try: yield b finally: drop(resource(b))
def fkey(url, pkey): try: fkey = odo([(i, int(np.random.randint(pkey.count().scalar())), int(np.random.randint(10000))) for i in range(10)], url % 'fkey', dshape=dshape('''var * {id: int64, sym_id: map[int64, {id: int64, sym: string, price: float64, main: map[int64, {id: int64, data: int64}]}], size: int64}'''), foreign_keys=dict(sym_id=pkey.c.id), primary_key=['id']) except sa.exc.OperationalError as e: pytest.skip(str(e)) else: try: yield fkey finally: drop(fkey)
def sql_with_null(url): ds = dshape(""" var * {name: ?string, sex: ?string, amount: int, id: int, comment: ?string} """) rows = [('Alice', 'F', 100, 1, 'Alice comment'), (None, 'M', 300, 2, None), ('Drew', 'F', 100, 4, 'Drew comment'), ('Bob', 'M', 100, 5, 'Bob comment 2'), ('Drew', 'M', 200, 5, None), ('first', None, 300, 4, 'Missing info'), (None, None, 300, 6, None)] try: x = url % next(names) t = data(x, dshape=ds) print(x) except sa.exc.OperationalError as e: pytest.skip(str(e)) else: assert t.dshape == ds t = data(odo(rows, t)) try: yield t finally: drop(t)
def pkey(url, main): choices = [ u'AAPL', u'HPQ', u'ORCL', u'IBM', u'DOW', u'SBUX', u'AMD', u'INTC', u'GOOG', u'PRU', u'MSFT', u'AIG', u'TXN', u'DELL', u'PEP' ] n = 100 data = list( zip(range(n), np.random.choice(choices, size=n).tolist(), np.random.uniform(10000, 20000, size=n).tolist(), np.random.randint(main.count().scalar(), size=n).tolist())) try: pkey = odo( data, url % 'pkey', dshape=dshape( 'var * {id: int64, sym: string, price: float64, main: map[int64, T]}' ), foreign_keys=dict(main=main.c.id), primary_key=['id']) except sa.exc.OperationalError as e: pytest.skip(str(e)) else: try: yield pkey finally: drop(pkey)
def pkey(url, main): choices = [u'AAPL', u'HPQ', u'ORCL', u'IBM', u'DOW', u'SBUX', u'AMD', u'INTC', u'GOOG', u'PRU', u'MSFT', u'AIG', u'TXN', u'DELL', u'PEP'] n = 100 data = list(zip(range(n), np.random.choice(choices, size=n).tolist(), np.random.uniform(10000, 20000, size=n).tolist(), np.random.randint(main.count().scalar(), size=n).tolist())) try: pkey = odo(data, url % 'pkey', dshape=dshape('''var * {id: int64, sym: string, price: float64, main: map[int64, {id: int64, data: int64}]}'''), foreign_keys=dict(main=main.c.id), primary_key=['id']) except sa.exc.OperationalError as e: pytest.skip(str(e)) else: try: yield pkey finally: drop(pkey)
def test_drop(): with tmpfile('json') as fn: js = JSON(fn) append(js, [1, 2, 3]) assert os.path.exists(fn) drop(js) assert not os.path.exists(fn)
def test_drop(): with tmpfile('.bson') as fn: bs = BSON(fn) append(bs, dat) assert os.path.exists(fn) drop(bs) assert not os.path.exists(fn)
def sql(url): try: t = resource(url, dshape='var * {a: int32, b: int32}') except sa.exc.OperationalError as e: pytest.skip(str(e)) else: yield t drop(t)
def fsql(engine, fcsv, name): try: t = resource('%s::%s' % (url, name), dshape=discover(fcsv)) except sqlalchemy.exc.OperationalError as e: pytest.skip(str(e)) else: yield t drop(t)
def hive_table(host): name = ('temp' + str(uuid.uuid1()).replace('-', ''))[:30] uri = 'hive://hdfs@%s:10000/default::%s' % (host, name) try: yield uri finally: with ignoring(Exception): drop(uri)
def decimal_sql(engine, name): try: t = resource("%s::%s" % (url, name), dshape="var * {a: ?decimal[10, 3], b: decimal[11, 2]}") except sa.exc.OperationalError as e: pytest.skip(str(e)) else: try: yield t finally: drop(t)
def dsql(engine, dcsv, name): try: t = resource("%s::%s" % (url, name), dshape=discover(dcsv)) except sqlalchemy.exc.OperationalError as e: pytest.skip(str(e)) else: try: yield t finally: drop(t)
def sql_with_float(url): try: t = data(url % next(names), dshape='var * {c: float64}') except sa.exc.OperationalError as e: pytest.skip(str(e)) else: try: yield t finally: drop(t)
def test_drop_reflects_database_state(url): data = list(zip(range(5), range(1, 6))) t = odo(data, url, dshape='var * {A: int64, B: int64}') assert t.exists() assert resource(url).exists() drop(url) with pytest.raises(ValueError): resource(url) # Table doesn't exist and no dshape
def sql_with_float(url): try: t = resource(url % next(names), dshape='var * {c: float64}') except sa.exc.OperationalError as e: pytest.skip(str(e)) else: try: yield t finally: drop(t)
def sql_with_float(url): try: t = resource(url, dshape='var * {c: float64}') except sa.exc.OperationalError as e: pytest.skip(str(e)) else: try: yield t finally: drop(t)
def sql_with_schema(url): try: t = resource(url, dshape='var * {a: int32, b: ?int32}', schema=next(names)) except sa.exc.OperationalError as e: pytest.skip(str(e)) else: try: yield t finally: drop(t)
def quoted_sql(pg_ip, csv): url = 'postgresql://postgres@{}/test::foo bar'.format(pg_ip) try: t = resource(url, dshape=discover(csv)) except sa.exc.OperationalError as e: pytest.skip(str(e)) else: try: yield t finally: drop(t)
def quoted_sql(engine, fcsv): dshape = var * Record(list(zip('ab', discover(fcsv).measure.types))) try: t = resource('%s::foo bar' % url, dshape=dshape) except sqlalchemy.exc.OperationalError as e: pytest.skip(str(e)) else: try: yield t finally: drop(t)
def nyc(): try: t = odo(example('nyc.csv'), 'postgresql://postgres@localhost/test::nyc') except sa.exc.OperationalError as e: pytest.skip(str(e)) else: try: yield t finally: drop(t)
def decimal_sql(engine, name): try: t = resource('%s::%s' % (url, name), dshape="var * {a: ?decimal[10, 3], b: decimal[11, 2]}") except sa.exc.OperationalError as e: pytest.skip(str(e)) else: try: yield t finally: drop(t)
def sql(url): try: t = resource(url, dshape='var * {A: string, B: int64}') except sa.exc.OperationalError as e: pytest.skip(str(e)) else: t = odo([('a', 1), ('b', 2)], t) try: yield t finally: drop(t)
def complex_sql(url): ds = """var * { Name: string, RegistrationDate: date, ZipCode: int32, Consts: float64 }""" try: t = resource(url, dshape=ds) except sa.exc.OperationalError as e: pytest.skip(str(e)) else: yield t drop(t)
def test_drop_group(): with tmpfile('.hdf5') as fn: f = h5py.File(fn) try: f.create_dataset('/group/data', data=x, chunks=True, maxshape=(None,) + x.shape[1:]) drop(f['/group']) assert '/group' not in f.keys() finally: with ignoring(Exception): f.close()
def data(): try: t = odo(example('nyc.csv'), 'mysql+pymysql://%s@localhost/test::nyc' % getuser()) except sa.exc.OperationalError as e: pytest.skip(str(e)) else: try: yield t.bind finally: drop(t)
def quoted_sql(csv): url = "postgresql://postgres@localhost/test::foo bar" try: t = resource(url, dshape=discover(csv)) except sa.exc.OperationalError as e: pytest.skip(str(e)) else: try: yield t finally: drop(t)
def sql(engine, csv, name): dshape = var * Record(list(zip('ab', discover(csv).measure.types))) try: t = resource('%s::%s' % (url, name), dshape=dshape) except sqlalchemy.exc.OperationalError as e: pytest.skip(str(e)) else: try: yield t finally: drop(t)
def big_sql(url): try: t = data(url % next(names), dshape='var * {A: string, B: int64}') except sa.exc.OperationalError as e: pytest.skip(str(e)) else: t = odo(zip(list('a' * 100), list(range(100))), t) try: yield t finally: drop(t)
def fsql(engine, fcsv, name): dshape = discover(fcsv) dshape = DataShape( var, Record([(n, typ) for n, typ in zip('ab', dshape.measure.types)])) try: t = resource('%s::%s' % (url, name), dshape=dshape) except sqlalchemy.exc.OperationalError as e: pytest.skip(str(e)) else: yield t drop(t)
def sql_with_timedeltas(url): try: t = data(url % next(names), dshape='var * {N: timedelta}') except sa.exc.OperationalError as e: pytest.skip(str(e)) else: t = odo([(timedelta(seconds=n), ) for n in range(10)], t) try: yield t finally: drop(t)
def sql_with_dts(url): try: t = resource(url, dshape="var * {A: datetime}") except sa.exc.OperationalError as e: pytest.skip(str(e)) else: t = odo([(d,) for d in pd.date_range("2014-01-01", "2014-02-01")], t) try: yield t finally: drop(t)
def sql_with_dts(url): try: t = resource(url % next(names), dshape='var * {A: datetime}') except sa.exc.OperationalError as e: pytest.skip(str(e)) else: t = odo([(d,) for d in pd.date_range('2014-01-01', '2014-02-01')], t) try: yield t finally: drop(t)
def sql_with_timedeltas(url): try: t = data(url % next(names), dshape='var * {N: timedelta}') except sa.exc.OperationalError as e: pytest.skip(str(e)) else: t = odo([(timedelta(seconds=n),) for n in range(10)], t) try: yield t finally: drop(t)
def sqlb(url): try: t = data(url % next(names), dshape='var * {A: string, B: int64}') except sa.exc.OperationalError as e: pytest.skip(str(e)) else: t = odo([('a', 1), ('b', 2)], t) try: yield t finally: drop(t)
def sql_with_dts(url): try: t = data(url % next(names), dshape='var * {A: datetime}') except sa.exc.OperationalError as e: pytest.skip(str(e)) else: t = odo([(d,) for d in pd.date_range('2014-01-01', '2014-02-01')], t) try: yield t finally: drop(t)
def big_sql(url): try: t = data(url % next(names), dshape='var * {A: string, B: int64}') except sa.exc.OperationalError as e: pytest.skip(str(e)) else: t = odo(zip(list('a'*100), list(range(100))), t) try: yield t finally: drop(t)
def sqla(url): try: t = data(url % next(names), dshape='var * {A: ?string, B: ?int32}') except sa.exc.OperationalError as e: pytest.skip(str(e)) else: t = odo([('a', 1), (None, 1), ('c', None)], t) try: yield t finally: drop(t)
def sqla(url): try: t = resource(url % next(names), dshape='var * {A: ?string, B: ?int32}') except sa.exc.OperationalError as e: pytest.skip(str(e)) else: t = odo([('a', 1), (None, 1), ('c', None)], t) try: yield t finally: drop(t)
def sqlb(url): try: t = resource(url % next(names), dshape='var * {A: string, B: int64}') except sa.exc.OperationalError as e: pytest.skip(str(e)) else: t = odo([('a', 1), ('b', 2)], t) try: yield t finally: drop(t)
def sql_with_ugly_schema(url): try: t = resource(url, dshape='var * {a: int32, b: ?int32}', schema='foo.b.ar') except sa.exc.OperationalError as e: pytest.skip(str(e)) else: try: yield t finally: drop(t) t.bind.execute(sa.sql.ddl.DropSchema(t.schema))
def sql_with_schema(): url = 'postgresql://postgres@localhost/test::%s' % next(names) try: t = resource(url, dshape='var * {a: int32, b: ?int32}', schema=next(names)) except sa.exc.OperationalError as e: pytest.skip(str(e)) else: try: yield t finally: drop(t)
def nyc_csv(pg_ip): try: t = odo( example('nyc.csv'), 'postgresql://postgres@{}/test::nyc'.format(pg_ip), ) except sa.exc.OperationalError as e: pytest.skip(str(e)) else: try: yield t finally: drop(t)
def test_drop_group(): with tmpfile('.hdf5') as fn: f = h5py.File(fn) try: f.create_dataset('/group/data', data=x, chunks=True, maxshape=(None, ) + x.shape[1:]) drop(f['/group']) assert '/group' not in f.keys() finally: with ignoring(Exception): f.close()
def sql(url): ds = dshape('var * {A: string, B: int64}') try: t = data(url % next(names), dshape=ds) except sa.exc.OperationalError as e: pytest.skip(str(e)) else: assert t.dshape == ds t = data(odo([('a', 1), ('b', 2)], t)) try: yield t finally: drop(t)
def main(url): try: main = odo([(i, int(np.random.randint(10))) for i in range(13)], url % 'main', dshape=dshape('var * {id: int64, data: int64}'), primary_key=['id']) except sa.exc.OperationalError as e: pytest.skip(str(e)) else: try: yield main finally: drop(main)
def sql_two_tables(url): dshape = 'var * {a: int32}' try: t = resource(url % next(names), dshape=dshape) u = resource(url % next(names), dshape=dshape) except sa.exc.OperationalError as e: pytest.skip(str(e)) else: try: yield u, t finally: drop(t) drop(u)
def nyc(pg_ip): # odoing csv -> pandas -> postgres is more robust, as it doesn't require # the postgres server to be on the same filesystem as the csv file. nyc_pd = odo(example('nyc.csv'), pd.DataFrame) try: t = odo(nyc_pd, 'postgresql://postgres@{}/test::nyc'.format(pg_ip)) except sa.exc.OperationalError as e: pytest.skip(str(e)) else: try: yield t finally: drop(t)
def import_asn(): try: logging.info("Dropping old asn-lists if exists") drop("mysql+pymysql://{}:{}@{}:{}/{}::asn".format( os.environ.get("DB_USER", "root"), os.environ.get("DB_PASS", "abc123"), os.environ.get("DB_HOST", "localhost"), os.environ.get("DB_PORT", "3306"), os.environ.get("DB_NAME", "censorship"), )) except ValueError: pass with open( os.environ.get("IMPORT_FILE", "geoip-list/GeoLite2-ASN-Blocks-IPv4.csv")) as f: logging.info("Importing asn-lists") odo( [ dict(row, autonomous_system_number= f'AS{row["autonomous_system_number"]}') for row in csv.DictReader(f, skipinitialspace=True) ], "mysql+pymysql://{}:{}@{}:{}/{}::asn".format( os.environ.get("DB_USER", "root"), os.environ.get("DB_PASS", "abc123"), os.environ.get("DB_HOST", "localhost"), os.environ.get("DB_PORT", "3306"), os.environ.get("DB_NAME", "censorship"), ), dshape=""" var * { network: string, autonomous_system_number: string[8], autonomous_system_organization: string } """, local="LOCAL", ) logging.info("Building index for asn-lists") _conn = pymysql.connect(host=os.environ.get("DB_HOST", "localhost"), port=int(os.environ.get("DB_PORT", 3306)), user=os.environ.get("DB_USER", "root"), password=os.environ.get("DB_PASS", "abc123"), db=os.environ.get("DB_NAME", "censorship")) with _conn.cursor() as cur: cur.execute(''' CREATE INDEX asn_idx_autonomous_system_number ON asn(autonomous_system_number); ''')