def testXlsSpacey(self): if not self.can_run: return tdf = TicDatFactory(**spacesSchema()) pdf = PanDatFactory(**spacesSchema()) ticDat = tdf.TicDat(**spacesData()) panDat = pan_dat_maker(spacesSchema(), ticDat) ext = ".xlsx" filePath = os.path.join(_scratchDir, "spaces_2%s" % ext) pdf.xls.write_file(panDat, filePath, case_space_sheet_names=True) panDat2 = pdf.xls.create_pan_dat(filePath) self.assertTrue(pdf._same_data(panDat, panDat2)) tdf = TicDatFactory(**netflowSchema()) pdf = PanDatFactory(**netflowSchema()) ticDat = tdf.freeze_me( tdf.TicDat( ** {t: getattr(netflowData(), t) for t in tdf.primary_key_fields})) panDat = pan_dat_maker(netflowSchema(), ticDat) filePath = os.path.join(_scratchDir, "spaces_2_2%s" % ext) pdf.xls.write_file(panDat, filePath, case_space_sheet_names=True) panDat2 = pdf.xls.create_pan_dat(filePath) self.assertTrue(pdf._same_data(panDat, panDat2))
def testSqlSpaceyTwo(self): if not self.can_run: return self.assertTrue(pandatio.sql, "this unit test requires SQLite installed") tdf = TicDatFactory(**spacesSchema()) pdf = PanDatFactory(**spacesSchema()) ticDat = tdf.TicDat( **{ "a_table": { 1: [1, 2, "3"], 22.2: (12, 0.12, "something"), 0.23: (11, 12, "thirt") }, "b_table": { (1, 2, "foo"): 1, (1012.22, 4, "0012"): 12 }, "c_table": (("this", 2, 3, 4), ("that", 102.212, 3, 5.5), ("another", 5, 12.5, 24)) }) panDat = pan_dat_maker(spacesSchema(), ticDat) ext = ".db" filePath = os.path.join(_scratchDir, "spaces_2%s" % ext) with pandatio.sql.connect(filePath) as con: pdf.sql.write_file(panDat, db_file_path=None, con=con, case_space_table_names=True) with pandatio.sql.connect(filePath) as con: panDat2 = pdf.sql.create_pan_dat(db_file_path=None, con=con) self.assertTrue(pdf._same_data(panDat, panDat2)) tdf = TicDatFactory(**netflowSchema()) pdf = PanDatFactory(**netflowSchema()) ticDat = tdf.freeze_me( tdf.TicDat( ** {t: getattr(netflowData(), t) for t in tdf.primary_key_fields})) panDat = pan_dat_maker(netflowSchema(), ticDat) filePath = os.path.join(_scratchDir, "spaces_2_2%s" % ext) with pandatio.sql.connect(filePath) as con: pdf.sql.write_file(panDat, db_file_path="", con=con, case_space_table_names=True) with pandatio.sql.connect(filePath) as con: panDat2 = pdf.sql.create_pan_dat(None, con) self.assertTrue(pdf._same_data(panDat, panDat2))
def testSpacey(self): if not _can_unit_test: return tdf = TicDatFactory(**spacesSchema()) spacesData = { "a_table": { 1: { "a Data 3": 3, "a Data 2": 2, "a Data 1": 1 }, 22: (1.1, 12, 12), 0.23: (11, 12, 11) }, "b_table": { ("1", "2", "3"): 1, ("a", "b", "b"): 12 }, "c_table": (("1", "2", "3", 4), { "c Data 4": 55, "c Data 2": "b", "c Data 3": "c", "c Data 1": "a" }, ("a", "b", "12", 24)) } dat = tdf.TicDat(**spacesData) filePath = "spaces.accdb" self.assertFalse(tdf.mdb.find_duplicates(filePath)) dat2 = tdf.mdb.create_tic_dat(filePath, freeze_it=True) self.assertTrue(tdf._same_data(dat, dat2))
def testSpacey(self): if not _can_accdb_unit_test: return tdf = TicDatFactory(**spacesSchema()) spacesData = { "a_table" : {1 : {"a Data 3":3, "a Data 2":2, "a Data 1":1}, 22 : (1.1, 12, 12), 0.23 : (11, 12, 11)}, "b_table" : {("1", "2", "3") : 1, ("a", "b", "b") : 12}, "c_table" : (("1", "2", "3", 4), {"c Data 4":55, "c Data 2":"b", "c Data 3":"c", "c Data 1":"a"}, ("a", "b", "12", 24) ) } dat = tdf.TicDat(**spacesData) filePath = makeCleanPath(os.path.join(_scratchDir, "spacey.accdb")) tdf.mdb.write_schema(filePath, a_table = {"a Field":"double"}, c_table = {"c Data 1":"text", "c Data 2":"text", "c Data 3":"text", "c Data 4":"int"}) tdf.mdb.write_file(dat, filePath) self.assertFalse(tdf.mdb.find_duplicates(filePath)) dat2 = tdf.mdb.create_tic_dat(filePath, freeze_it=True) self.assertTrue(tdf._same_data(dat,dat2)) with py.connect(_connection_str(filePath)) as con: for t in tdf.all_tables: con.cursor().execute("SELECT * INTO [%s] FROM %s"%(t.replace("_", " "), t)).commit() con.cursor().execute("DROP TABLE %s"%t).commit() #shutil.copy(filePath, "spaces.accdb") #uncomment to make readonly test file as .accdb dat3 = tdf.mdb.create_tic_dat(filePath, freeze_it=True) self.assertTrue(tdf._same_data(dat, dat3))
def testDenormalizedErrors(self): if not self.canRun: return c = clean_denormalization_errors f = utils.find_denormalized_sub_table_failures tdf = TicDatFactory(**spacesSchema()) dat = tdf.TicDat(**spacesData()) p = lambda :tdf.copy_to_pandas(dat, drop_pk_columns=False).b_table self.assertFalse(f(p(),"b Field 1",("b Field 2", "b Field 3"))) dat.b_table[2,2,3] = "boger" self.assertFalse(f(p(), "b Field 1",("b Field 2", "b Field 3"))) chk = f(p(), "b Field 2",("b Field 1", "b Field 3")) self.assertTrue(c(chk) == {2: {'b Field 1': {1, 2}}}) dat.b_table[2,2,4] = "boger" dat.b_table[1,'b','b'] = "boger" chk = f(p(), ["b Field 2"],("b Field 1", "b Field 3", "b Data")) self.assertTrue(c(chk) == c({2: {'b Field 3': (3, 4), 'b Data': (1, 'boger'), 'b Field 1': (1, 2)}, 'b': {'b Data': ('boger', 12), 'b Field 1': ('a', 1)}})) ex = self.firesException(lambda : f(p(), ["b Data"],"wtf")) self.assertTrue("wtf isn't a column" in ex) p = lambda :tdf.copy_to_pandas(dat, drop_pk_columns=False).c_table chk = f(p(), pk_fields=["c Data 1", "c Data 2"], data_fields=["c Data 3", "c Data 4"]) self.assertTrue(c(chk) == {('a', 'b'): {'c Data 3': {'c', 12}, 'c Data 4': {24, 'd'}}}) dat.c_table.append((1, 2, 3, 4)) dat.c_table.append((1, 2, 1, 4)) dat.c_table.append((1, 2, 1, 5)) dat.c_table.append((1, 2, 3, 6)) chk = f(p(), pk_fields=["c Data 1", "c Data 2"], data_fields=["c Data 3", "c Data 4"]) self.assertTrue(c(chk) == {('a', 'b'): {'c Data 3': {'c', 12}, 'c Data 4': {24, 'd'}}, (1,2):{'c Data 3':{3,1}, 'c Data 4':{4,5,6}}})
def testSpaces(self): if not self.can_run: return for hack, raw_data in list(product(*(([True, False],)*2))): tdf = TicDatFactory(**spacesSchema()) ticDat = tdf.TicDat(**spacesData()) self.assertTrue(tdf._same_data(ticDat, tdf.opalytics.create_tic_dat( create_inputset_mock(tdf, ticDat, hack), raw_data=raw_data)))
def testCsvSpacey(self): if not self.can_run: return self.assertTrue(pandatio.sql, "this unit test requires SQLite installed") tdf = TicDatFactory(**spacesSchema()) pdf = PanDatFactory(**spacesSchema()) ticDat = tdf.TicDat( **{ "a_table": { 1: [1, 2, "3"], 22.2: (12, 0.12, "something"), 0.23: (11, 12, "thirt") }, "b_table": { (1, 2, "foo"): 1, (1012.22, 4, "0012"): 12 }, "c_table": (("this", 2, 3, 4), ("that", 102.212, 3, 5.5), ("another", 5, 12.5, 24)) }) panDat = pan_dat_maker(spacesSchema(), ticDat) dirPath = os.path.join(_scratchDir, "spaces_2_csv") pdf.csv.write_directory(panDat, dirPath, case_space_table_names=True) panDat2 = pdf.csv.create_pan_dat(dirPath) self.assertTrue(pdf._same_data(panDat, panDat2)) tdf = TicDatFactory(**netflowSchema()) pdf = PanDatFactory(**netflowSchema()) ticDat = tdf.freeze_me( tdf.TicDat( ** {t: getattr(netflowData(), t) for t in tdf.primary_key_fields})) panDat = pan_dat_maker(netflowSchema(), ticDat) dirPath = os.path.join(_scratchDir, "spaces_2_2_csv") pdf.csv.write_directory(panDat, dirPath, case_space_table_names=True, sep=":") panDat2 = pdf.csv.create_pan_dat(dirPath, sep=":") self.assertTrue(pdf._same_data(panDat, panDat2))
def testSpacesOpalytics(self): if not self.can_run: return for hack, raw_data in list(itertools.product(*(([True, False], ) * 2))): tdf = TicDatFactory(**spacesSchema()) ticDat = tdf.TicDat(**spacesData()) inputset = create_inputset_mock(tdf, ticDat, hack) pdf = PanDatFactory(**tdf.schema()) panDat = pdf.opalytics.create_pan_dat(inputset, raw_data=raw_data) self.assertTrue(tdf._same_data(ticDat, pdf.copy_to_tic_dat(panDat)))
def testSpacey(self): if not self.can_run: return tdf = TicDatFactory(**spacesSchema()) dat = tdf.TicDat(**spacesData()) filePath = makeCleanPath(os.path.join(_scratchDir, "spacey.db")) tdf.sql.write_db_data(dat, filePath) dat2 = tdf.sql.create_tic_dat(filePath, freeze_it=True) self.assertTrue(tdf._same_data(dat,dat2)) with sql.connect(filePath) as con: for t in tdf.all_tables: con.execute("ALTER TABLE %s RENAME TO [%s]"%(t, t.replace("_", " "))) dat3 = tdf.sql.create_tic_dat(filePath, freeze_it=True) self.assertTrue(tdf._same_data(dat, dat3))
def testJsonSpacey(self): if not self.can_run: return tdf = TicDatFactory(**spacesSchema()) pdf = PanDatFactory(**spacesSchema()) ticDat = tdf.TicDat(**spacesData()) panDat = pan_dat_maker(spacesSchema(), ticDat) ext = ".json" filePath = os.path.join(_scratchDir, "spaces_2%s" % ext) pdf.json.write_file(panDat, filePath, case_space_table_names=True) panDat2 = pdf.json.create_pan_dat(filePath) self.assertTrue(pdf._same_data(panDat, panDat2)) panDat3 = pdf.json.create_pan_dat( pdf.json.write_file(panDat, "", case_space_table_names=True)) self.assertTrue(pdf._same_data(panDat, panDat3)) tdf = TicDatFactory(**netflowSchema()) pdf = PanDatFactory(**netflowSchema()) ticDat = tdf.freeze_me( tdf.TicDat( ** {t: getattr(netflowData(), t) for t in tdf.primary_key_fields})) panDat = pan_dat_maker(netflowSchema(), ticDat) filePath = os.path.join(_scratchDir, "spaces_2_2%s" % ext) pdf.json.write_file(panDat, filePath, case_space_table_names=True) panDat2 = pdf.json.create_pan_dat(filePath) self.assertTrue(pdf._same_data(panDat, panDat2)) panDat3 = pdf.json.create_pan_dat( pdf.json.write_file(panDat, "", case_space_table_names=True)) self.assertTrue(pdf._same_data(panDat, panDat3)) dicted = json.loads(pdf.json.write_file(panDat, "", orient='columns')) panDat4 = pdf.PanDat(**dicted) self.assertTrue(pdf._same_data(panDat, panDat4, epsilon=1e-5))
def testSpacey(self): if not self.can_run: return tdf = TicDatFactory(**spacesSchema()) ticDat = tdf.TicDat(**spacesData()) def writeData(insert_spaces): import xlwt book = xlwt.Workbook() for t in tdf.all_tables: sheet = book.add_sheet( t.replace("_", " " if insert_spaces else "_")) for i, f in enumerate( tdf.primary_key_fields.get(t, ()) + tdf.data_fields.get(t, ())): sheet.write(0, i, f) _t = getattr(ticDat, t) containerish = utils.containerish if utils.dictish(_t): for row_ind, (p_key, data) in enumerate(_t.items()): for field_ind, cell in enumerate( (p_key if containerish(p_key) else (p_key, )) + tuple(data[_f] for _f in tdf.data_fields.get(t, ()))): sheet.write(row_ind + 1, field_ind, cell) else: for row_ind, data in enumerate( _t if containerish(_t) else _t()): for field_ind, cell in enumerate( tuple(data[_f] for _f in tdf.data_fields[t])): sheet.write(row_ind + 1, field_ind, cell) if os.path.exists(filePath): os.remove(filePath) book.save(filePath) filePath = os.path.join(_scratchDir, "spaces.xls") writeData(insert_spaces=False) ticDat2 = tdf.xls.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, ticDat2)) writeData(insert_spaces=True) ticDat3 = tdf.xls.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, ticDat3))
def testSpacey2(self): if not self.can_run: return tdf = TicDatFactory(**spacesSchema()) ticDat = tdf.TicDat(**spacesData()) for ext in [".xls", ".xlsx"]: filePath = os.path.join(_scratchDir, "spaces_2%s" % ext) tdf.xls.write_file(ticDat, filePath, case_space_sheet_names=True) ticDat2 = tdf.xls.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, ticDat2)) tdf = TicDatFactory(**netflowSchema()) ticDat = tdf.freeze_me( tdf.TicDat( ** {t: getattr(netflowData(), t) for t in tdf.primary_key_fields})) for ext in [".xls", ".xlsx"]: filePath = os.path.join(_scratchDir, "spaces_2_2%s" % ext) tdf.xls.write_file(ticDat, filePath, case_space_sheet_names=True) ticDat2 = tdf.xls.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, ticDat2))