def test_dataset_from_sql(self): sql = "select * from etl_sale where etl_file_id = %(ETL_FILE_ID)s" binds = {"ETL_FILE_ID": 201723} connection = ConnectionHelper().get_named_connection("current") # pandas.read_sql(sql, con, index_col=None, coerce_float=True, params=None, parse_dates=None, columns=None, chunksize=None)[source] now = datetime.datetime.now() sale_dataframe = Dataset.read_sql(sql, connection, params=binds) logger.debug("test_dataset_from_sql elapsed %s " % (datetime.datetime.now() - now)) logger.debug("rowcount %s" % len(sale_dataframe.rows))
def test_to_sqlite(self): logger = logging.getLogger(__name__ + ":test_to_sqlite") obj = [[1, "hello", datetime.date(2017, 5, 6)], [2, "goodbye", datetime.date(1965, 7, 31)] ] ft = Dataset.from_list_of_lists(obj, ["int", "str", "dt"]) now = datetime.datetime.now() connection = ft.to_sqlite("list_of_lists") logger.debug("elapsed %s " % (datetime.datetime.now() - now)) cursor = connection.cursor() rows = cursor.execute("select count(*) from list_of_lists") for row in rows: logger.debug ("count %s " % row[0])
def test_from_tuples(self): obj = [(1,"hello",datetime.date(2017,5,6)), (2,"goodbye",datetime.date(1965,7,31))] ft = Dataset.from_tuples(obj,["int","str","dt"]) self.assertEqual(ft.rows[0][0],1) self.assertEqual(ft.rows[0][1],"hello") self.assertEqual(ft.rows[0][2],datetime.date(2017,5,6)) self.assertEqual(ft.rows[1][0],2) self.assertEqual(ft.rows[1][1],"goodbye") self.assertEqual(ft.rows[1][2],datetime.date(1965,7,31)) self.assertEqual(len(ft.rows),2) self.assertEqual(len(ft.rows[0]),3) self.assertEqual(len(ft.rows[1]),3)
def test_from_items(self): logging.debug("test Dataset.from_items") ds = Dataset.from_items([('A', [1, 2, 3]), ('B', [4, 5, 6])]) self.assertEqual(ds.column_names[0], 'A') self.assertEqual(ds.column_names[1], 'B') self.assertEqual(len(ds.column_names), 2) self.assertEqual(ds.rows[0][0], 1) self.assertEqual(ds.rows[1][0], 2) self.assertEqual(ds.rows[2][0], 3) self.assertEqual(ds.rows[0][1], 4) self.assertEqual(ds.rows[1][1], 5) self.assertEqual(ds.rows[2][1], 6) self.assertEqual(len(ds.rows), 3) self.assertEqual(len(ds.rows[0]), 2) self.assertEqual(len(ds.rows[1]), 2) logger.debug ("test_from_items\n%s" % ds)
def dataset_list_of_lists(): a = datetime.datetime.now() df = Dataset.from_list_of_lists(list_of_lists, None) b = datetime.datetime.now() print("type df %s" % type(df)) print("dataset_list_of_lists time %s" % (b - a)) for row in df.rows: pass c = datetime.datetime.now() print("dataset_list_of_lists iter time %s" % (c - b)) for row in df.rows: for col in row: pass d = datetime.datetime.now() print("dataset_list_of_lists iter row col time %s" % (d - c)) return a, b
def test_from_list_of_lists(self): obj = [[1,"hello",datetime.date(2017,5,6)], [2,"goodbye",datetime.date(1965,7,31)] ] ft = Dataset.from_list_of_lists(obj,["int","str","dt"]) logger.debug("row 0 %s" % ft.rows ) self.assertEqual(ft.rows[0][0],1) self.assertEqual(ft.rows[0][1],"hello") self.assertEqual(ft.rows[0][2],datetime.date(2017,5,6)) self.assertEqual(ft.rows[1][0],2) self.assertEqual(ft.rows[1][1],"goodbye") self.assertEqual(ft.rows[1][2],datetime.date(1965,7,31)) self.assertEqual(len(ft.rows),2) self.assertEqual(len(ft.rows[0]),3) self.assertEqual(len(ft.rows[1]),3) self.assertEqual(ft.column_names[0],"int") self.assertEqual(ft.column_names[1],"str") self.assertEqual(ft.column_names[2],"dt") self.assertEqual(len(ft.column_names),3)
def test_from_items_index(self): logging.debug("test Dataset.from_items") ds = Dataset.from_items([('A', [1, 2, 3]), ('B', [4, 5, 6])],orient="index", columns=['one','two','three']) self.assertEqual(ds.column_names[0],'one') self.assertEqual(ds.column_names[1],'two') self.assertEqual(ds.column_names[2],'three') self.assertEqual(len(ds.column_names),3) self.assertEqual(ds.rows[0][0],1) self.assertEqual(ds.rows[0][1],2) self.assertEqual(ds.rows[0][2],3) self.assertEqual(ds.rows[1][0],4) self.assertEqual(ds.rows[1][1],5) self.assertEqual(ds.rows[1][2],6) self.assertEqual(len(ds.rows),2) self.assertEqual(len(ds.rows[0]),3) self.assertEqual(len(ds.rows[1]),3) self.assertEqual(ds.row_key[0],'A') self.assertEqual(ds.row_key[1],'B') self.assertEqual(len(ds.row_key),2) logger.debug ("test_from_items_index\n%s" % ds)
import sys from pdsutil.DbUtil import ConnectionHelper, CursorHelper from pdsutil.Dataset import Dataset import datetime import logging logging.basicConfig(level=logging.INFO) sql = "select * from etl_sale where etl_file_id = %(ETL_FILE_ID)s" binds = {"ETL_FILE_ID" : 201723} connection = ConnectionHelper().get_named_connection("current") cursor = CursorHelper(connection.cursor()) sales = Dataset.from_sql(connection,sql,binds) # sales.to_csv(sys.stdout) # to_csv out_file = open("/tmp/sales.csv","w") sales.to_csv(out_file) # to_sqlite sales.set_column_meta("curr_cd",str,3) sales.set_column_meta("org_customer_id",str,10) db = sales.to_sqlite("etl_sale",verbose=False) cursor = CursorHelper(db.cursor()) rows = cursor.execute("select count(*) from etl_sale") for row in rows: print (row)
def get_dataset_from_list_of_lists(matrix): return Dataset.from_list_of_lists(matrix, None)
for col in row: cell_sum += col def to_html(dataset): dataset.to_html() def add_matrix(rows, columns): matrix = get_list_of_lists(rows, columns) name = "%s x %s" % (rows, columns) matrices[name] = matrix add_matrix(20000, 30) add_matrix(10000, 2) add_matrix(500, 12) for k, v in matrices.items(): dataframes[k] = pandas.DataFrame(v) datasets[k] = Dataset.from_list_of_lists(v, None) for k in datasets: df = dataframes[k] ds = datasets[k] benchmark_pair(k, iter_rows) benchmark_pair(k, iter_rows_len) benchmark_pair(k, iter_rows_and_columns) benchmark_pair(k, sum_cells) benchmark_pair(k, to_html)