def test_import_person(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") file = os.path.join(os.path.abspath(os.path.split(__file__)[0]), "data", "person.txt") dbf = os.path.join(os.path.abspath(os.path.split(__file__)[0]), "temp_person.db3") if os.path.exists(dbf): os.remove(dbf) columns = "sequence tag timestamp dateformat x y z activity".split() try: import_flatfile_into_database(dbf, file, fLOG=fLOG, columns=columns, header=False) except NoHeaderException: return assert os.path.exists(dbf) db = Database(dbf, LOG=fLOG) db.connect() view = db.execute_view("SELECT * FROM ACAPA") assert len(view) > 0 assert len(view[0]) == 7
def test_import_person(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") file = os.path.join( os.path.abspath( os.path.split(__file__)[0]), "data", "person.txt") dbf = os.path.join( os.path.abspath( os.path.split(__file__)[0]), "temp_person.db3") if os.path.exists(dbf): os.remove(dbf) columns = "sequence tag timestamp dateformat x y z activity".split() try: import_flatfile_into_database( dbf, file, fLOG=fLOG, columns=columns, header=False) except NoHeaderException: return assert os.path.exists(dbf) db = Database(dbf, LOG=fLOG) db.connect() view = db.execute_view("SELECT * FROM ACAPA") assert len(view) > 0 assert len(view[0]) == 7
def test_makedirs_badengine(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") temp = get_temp_folder(__file__, "temp_missing_makedirs2") dbf = os.path.join(temp, 'tr', 'td.db3') values = [ { "name": "A", "age": 10, "score": 34.5 }, { "name": "B", "age": 20, "score": -34.5 }, ] df = pandas.DataFrame(values) try: Database.fill_sql_table(df, dbf, "newtable", engine='rty') raise AssertionError('engine is recognized but should not') except DBException: pass
def test_sqllite_sql(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") file = os.path.join( os.path.abspath( os.path.split(__file__)[0]), "data", "taxi_trip.csv") dbf = os.path.join( os.path.abspath( os.path.split(__file__)[0]), "temp_taxi.db3") if os.path.exists(dbf): os.remove(dbf) try: import_flatfile_into_database( dbf, file, fLOG=fLOG, header=True) except NoHeaderException: return assert os.path.exists(dbf) db = Database(dbf, LOG=fLOG) db.connect() view = db.execute_view("SELECT * FROM taxi_trip") fLOG(len(view)) fLOG(view) exp = ('1B5C0970F2AE8CFFBA8AE4584BEAED29', 'D961332334524990D1BBD462E2EFB8A4', 'CMT', '2013-02-08 23:35:14', 'CRD', 6.0, 0.5, 0.5, 0.0, 0, 7.0) assert len(view) > 0 assert len(view[0]) == len(exp)
def test_sqllite_sql(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") file = os.path.join(os.path.abspath(os.path.split(__file__)[0]), "data", "taxi_trip.csv") dbf = os.path.join(os.path.abspath(os.path.split(__file__)[0]), "temp_taxi.db3") if os.path.exists(dbf): os.remove(dbf) try: import_flatfile_into_database(dbf, file, fLOG=fLOG, header=True) except NoHeaderException: return assert os.path.exists(dbf) db = Database(dbf, LOG=fLOG) db.connect() view = db.execute_view("SELECT * FROM taxi_trip") fLOG(len(view)) fLOG(view) exp = ('1B5C0970F2AE8CFFBA8AE4584BEAED29', 'D961332334524990D1BBD462E2EFB8A4', 'CMT', '2013-02-08 23:35:14', 'CRD', 6.0, 0.5, 0.5, 0.0, 0, 7.0) assert len(view) > 0 assert len(view[0]) == len(exp)
def test_import_flatflit(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") fold = os.path.abspath(os.path.split(__file__)[0]) temp = os.path.join(fold, "temp_db_bug") if not os.path.exists(temp): os.mkdir(temp) text = [ "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten" ] data = [{ "name": text[random.randint(0, 9)], "number": random.randint(0, 99) } for i in range(0, 10000)] filename = os.path.join(temp, "out_flatfile_tab_pos2.txt") datatab = data[:1] + [{"name": " one\ttab", "number": 100}] + data[1:] df = pandas.DataFrame(datatab) df.to_csv(filename, sep="\t", encoding="utf8", header=True, index=False) with open(filename, "r", encoding="utf8") as f: content = f.read() content = content.replace('"', '') with open(filename + ".2.txt", "w", encoding="utf8") as f: f.write(content) dbfile = os.path.join(fold, "out_db.db3") if os.path.exists(dbfile): os.remove(dbfile) import_flatfile_into_database(dbfile, filename + ".2.txt", table="example", fLOG=fLOG) db = Database(dbfile, LOG=fLOG) db.connect() count = db.get_table_nb_lines("example") sch = db.get_table_columns("example") values = db.execute_view("SELECT * FROM example") db.close() if count != 10001: rows = [str(v) for v in values][:10] mes = "\n".join(rows) fLOG(datatab[:3]) raise Exception("expect:10001 not {0}\nROWS:\n{1}".format( count, mes)) exp = [('name', str), ('number', int)] if sch != exp: raise Exception("{0}!={1} ({2})".format(sch, exp, len(datatab)))
def test_import_df(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") dbf = os.path.join( os.path.abspath( os.path.split(__file__)[0]), "temp_database_df.db3") if os.path.exists(dbf): os.remove(dbf) values = [{"name": "A", "age": 10, "score": 34.5}, {"name": "B", "age": 20, "score": -34.5}, ] df = pandas.DataFrame(values) db = Database.fill_sql_table(df, dbf, "newtable") db.execute_view("SELECT * FROM newtable") df2 = db.to_df("SELECT * FROM newtable") df3 = df2[["age", "name", "score"]] assert len(df) > 0 assert len(df3) == len(df) for a, b in zip(df.values, df3.values): assert len(a) > 0 assert len(a) == len(b) for c, d in zip(a, b): assert c == d db.close()
def test_import_df(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") dbf = os.path.join( os.path.abspath( os.path.split(__file__)[0]), "temp_database_df.db3") if os.path.exists(dbf): os.remove(dbf) values = [{"name": "A", "age": 10, "score": 34.5}, {"name": "B", "age": 20, "score": -34.5}, ] df = pandas.DataFrame(values) db = Database.fill_sql_table(df, dbf, "newtable") db.execute_view("SELECT * FROM newtable") df2 = db.to_df("SELECT * FROM newtable") df3 = df2[["age", "name", "score"]] self.assertGreater(len(df), 0) self.assertEqual(len(df3), len(df)) for a, b in zip(df.values, df3.values): self.assertGreater(len(a), 0) self.assertEqual(len(a), len(b)) for c, d in zip(a, b): self.assertEqual(c, d) db.close()
def test_import_flatflit(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") file = os.path.join(os.path.abspath(os.path.split(__file__)[0]), "data", "ACA.PA.txt") dbf = os.path.join(os.path.abspath(os.path.split(__file__)[0]), "temp_database.db3") if os.path.exists(dbf): os.remove(dbf) import_flatfile_into_database(dbf, file, fLOG=fLOG) self.assertExists(dbf) db = Database(dbf, LOG=fLOG) db.connect() view = db.execute_view("SELECT * FROM ACAPA") self.assertGreater(len(view), 0) self.assertEqual(len(view[0]), 7)
def test_db_index(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") temp = get_temp_folder(__file__, "temp_missing_makedirs") dbf = os.path.join(temp, 'tr', 'td.db3') values = [ { "name": "A", "age": 10, "score": 34.5 }, { "name": "B", "age": 20, "score": -34.5 }, ] df = pandas.DataFrame(values) db = Database.fill_sql_table(df, dbf, "newtable") has = db.has_index('ind') self.assertFalse(has) resi = db.get_index_on_table('newtable', full=True) self.assertEqual(resi, []) resi = db.get_index_on_table('newtable', full=False) self.assertEqual(resi, []) resi = db.get_column_type('newtable', 'name') self.assertEqual(resi, str) self.assertRaise(lambda: db.get_column_type('newtable', 'name2'), DBException) resi = db.get_table_nfirst_lines('newtable', 2) self.assertEqual(resi, [(10, 'A', 34.5, 1), (20, 'B', -34.5, 2)]) cur = db.execute_script('SELECT name FROM newtable') self.assertEqual(cur, None) cur = db.execute_script('SELECT name FROM newtable', close=False) self.assertFalse(cur is None) db.create_table("nextable2", columns={ -1: ("key", int, "PRIMARYKEY", "AUTOINCREMENT"), 0: ("name", str), 1: ("number", float), 2: ('tint', int), 3: ('tint64', numpy.int64), 4: ('tfloat64', numpy.float64), 5: ('tdt', datetime.datetime), 6: ('tdec', decimal.Decimal) }) cur.close() db.close()
def test_import_flatflit(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") file = os.path.join( os.path.abspath( os.path.split(__file__)[0]), "data", "ACA.PA.txt") dbf = os.path.join( os.path.abspath( os.path.split(__file__)[0]), "temp_database.db3") if os.path.exists(dbf): os.remove(dbf) import_flatfile_into_database(dbf, file, fLOG=fLOG) assert os.path.exists(dbf) db = Database(dbf, LOG=fLOG) db.connect() view = db.execute_view("SELECT * FROM ACAPA") assert len(view) > 0 assert len(view[0]) == 7
def test_import_flatflit(self) : fLOG (__file__, self._testMethodName, OutputPrint = __name__ == "__main__") fold = os.path.abspath(os.path.split(__file__)[0]) temp = os.path.join(fold, "temp_db_bug") if not os.path.exists(temp) : os.mkdir(temp) text = [ "one","two","three","four","five","six","seven","eight","nine","ten" ] data = [ { "name": text[random.randint(0,9)], "number": random.randint(0,99)} \ for i in range(0,10000) ] filename = os.path.join(temp, "out_flatfile_tab_pos2.txt") datatab = data[:1] + [ {"name": " one\ttab", "number":100 } ] + data[1:] df = pandas.DataFrame(datatab) df.to_csv(filename, sep="\t", encoding="utf8", header=True, index=False) with open(filename,"r",encoding="utf8") as f : content = f.read() content = content.replace('"','') with open(filename + ".2.txt","w",encoding="utf8") as f : f.write(content) dbfile = os.path.join(fold, "out_db.db3") if os.path.exists(dbfile) : os.remove(dbfile) import_flatfile_into_database(dbfile, filename + ".2.txt", table="example", fLOG = fLOG) db = Database(dbfile, LOG = fLOG) db.connect() count = db.get_table_nb_lines("example") sch = db.get_table_columns("example") values = db.execute_view("SELECT * FROM example") db.close() if count != 10001 : rows = [ str(v) for v in values ] [:10] mes = "\n".join(rows) fLOG(datatab[:3]) raise Exception("expect:10001 not {0}\nROWS:\n{1}".format(count,mes)) exp = [('name', str), ('number', int)] if sch != exp: raise Exception("{0}!={1} ({2})".format(sch, exp, len(datatab)))
def test_makedirs(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") temp = get_temp_folder(__file__, "temp_missing_makedirs") dbf = os.path.join(temp, 'tr', 'td.db3') values = [ { "name": "A", "age": 10, "score": 34.5 }, { "name": "B", "age": 20, "score": -34.5 }, ] df = pandas.DataFrame(values) db = Database.fill_sql_table(df, dbf, "newtable") db.execute_view("SELECT * FROM newtable") db.close()
def test_import_flatflitand_copy(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") file = os.path.join( os.path.abspath( os.path.split(__file__)[0]), "data", "ACA.PA.txt") dbf = os.path.join( os.path.abspath( os.path.split(__file__)[0]), "temp_database_copy.db3") if os.path.exists(dbf): os.remove(dbf) dbf2 = os.path.join( os.path.abspath( os.path.split(__file__)[0]), "out_copy.db3") if os.path.exists(dbf2): os.remove(dbf2) import_flatfile_into_database(dbf, file, fLOG=fLOG) assert os.path.exists(dbf) db = Database(dbf, LOG=fLOG) dbm = Database(dbf2, LOG=fLOG) db.copy_to(dbm) db.connect() dbm.connect() tbls = dbm.get_table_list() if len(tbls) != 1: raise Exception("expects one table not %d" % len(tbls)) view = db.execute_view("SELECT * FROM ACAPA") viewm = dbm.execute_view("SELECT * FROM ACAPA") db.close() dbm.close() assert len(view) == len(viewm) dbm2 = Database(":memory:", LOG=fLOG) db.copy_to(dbm2) dbm2.connect() tbls = dbm2.get_table_list() if len(tbls) != 1: raise Exception("expects one table not %d" % len(tbls)) viewm2 = dbm2.execute_view("SELECT * FROM ACAPA") dbm2.close() assert len(view) == len(viewm2)
def test_import_flatflitand_copy(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") file = os.path.join(os.path.abspath(os.path.split(__file__)[0]), "data", "ACA.PA.txt") dbf = os.path.join(os.path.abspath(os.path.split(__file__)[0]), "temp_database_copy.db3") if os.path.exists(dbf): os.remove(dbf) dbf2 = os.path.join(os.path.abspath(os.path.split(__file__)[0]), "out_copy.db3") if os.path.exists(dbf2): os.remove(dbf2) import_flatfile_into_database(dbf, file, fLOG=fLOG) assert os.path.exists(dbf) db = Database(dbf, LOG=fLOG) dbm = Database(dbf2, LOG=fLOG) db.copy_to(dbm) db.connect() dbm.connect() tbls = dbm.get_table_list() if len(tbls) != 1: raise Exception("expects one table not %d" % len(tbls)) view = db.execute_view("SELECT * FROM ACAPA") viewm = dbm.execute_view("SELECT * FROM ACAPA") db.close() dbm.close() assert len(view) == len(viewm) dbm2 = Database(":memory:", LOG=fLOG) db.copy_to(dbm2) dbm2.connect() tbls = dbm2.get_table_list() if len(tbls) != 1: raise Exception("expects one table not %d" % len(tbls)) viewm2 = dbm2.execute_view("SELECT * FROM ACAPA") dbm2.close() assert len(view) == len(viewm2)
def test_import_index(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") file = os.path.join(os.path.abspath(os.path.split(__file__)[0]), "data", "ACA.PA.txt") dbf = os.path.join(os.path.abspath(os.path.split(__file__)[0]), "temp_database_index.db3") if os.path.exists(dbf): os.remove(dbf) fLOG("import flat file") ntbl2 = import_flatfile_into_database(dbf, file, fLOG=fLOG) fLOG("import flat file pandas") ntbl3 = import_flatfile_into_database_pandas(dbf, file, fLOG=fLOG, table="pandas_table", chunksize=500, sep="\t") fLOG("-----------------") db = Database(dbf, LOG=fLOG) db.connect() tbl1 = pandas.read_sql("SELECT * FROM " + ntbl2, db._connection) tbl2 = pandas.read_sql("SELECT * FROM " + ntbl3, db._connection) db.close() self.assertEqual(tbl1.shape, tbl2.shape) self.assertEqual(list(tbl1.columns), list(tbl2.columns)) self.assertEqual(list(tbl1.dtypes), list(tbl2.dtypes)) assert os.path.exists(dbf) db = Database(dbf, LOG=fLOG) db.connect() db.create_index("index1", "ACAPA", "Date") li = db.get_index_list() self.assertEqual( li, [('index1', 'ACAPA', 'CREATE INDEX index1 ON ACAPA (Date)', ('Date', ))]) line = db.get_table_nfirst_lines("ACAPA") col = [_[0] for _ in db.get_table_columns("ACAPA")] line = line[0] add = {k: v for k, v in zip(col, line)} db.insert("ACAPA", add) db.commit() db.update("ACAPA", "Date", add["Date"], add) db.commit() db.close()
def test_import_index(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") file = os.path.join( os.path.abspath( os.path.split(__file__)[0]), "data", "ACA.PA.txt") dbf = os.path.join( os.path.abspath( os.path.split(__file__)[0]), "temp_database_index.db3") if os.path.exists(dbf): os.remove(dbf) fLOG("import flat file") ntbl2 = import_flatfile_into_database(dbf, file, fLOG=fLOG) fLOG("import flat file pandas") ntbl3 = import_flatfile_into_database_pandas(dbf, file, fLOG=fLOG, table="pandas_table", chunksize=500, sep="\t") fLOG("-----------------") db = Database(dbf, LOG=fLOG) db.connect() tbl1 = pandas.read_sql("SELECT * FROM " + ntbl2, db._connection) tbl2 = pandas.read_sql("SELECT * FROM " + ntbl3, db._connection) db.close() self.assertEqual(tbl1.shape, tbl2.shape) self.assertEqual(list(tbl1.columns), list(tbl2.columns)) self.assertEqual(list(tbl1.dtypes), list(tbl2.dtypes)) assert os.path.exists(dbf) db = Database(dbf, LOG=fLOG) db.connect() db.create_index("index1", "ACAPA", "Date") li = db.get_index_list() self.assertEqual( li, [('index1', 'ACAPA', 'CREATE INDEX index1 ON ACAPA (Date)', ('Date',))]) line = db.get_table_nfirst_lines("ACAPA") col = [_[0] for _ in db.get_table_columns("ACAPA")] line = line[0] add = {k: v for k, v in zip(col, line)} db.insert("ACAPA", add) db.commit() db.update("ACAPA", "Date", add["Date"], add) db.commit() db.close()