def testDenormalizedErrors(self): if not self.canRun: return c = clean_denormalization_errors f = utils.find_denormalized_sub_table_failures tdf = TicDatFactory(**spacesSchema()) dat = tdf.TicDat(**spacesData()) p = lambda :tdf.copy_to_pandas(dat, drop_pk_columns=False).b_table self.assertFalse(f(p(),"b Field 1",("b Field 2", "b Field 3"))) dat.b_table[2,2,3] = "boger" self.assertFalse(f(p(), "b Field 1",("b Field 2", "b Field 3"))) chk = f(p(), "b Field 2",("b Field 1", "b Field 3")) self.assertTrue(c(chk) == {2: {'b Field 1': {1, 2}}}) dat.b_table[2,2,4] = "boger" dat.b_table[1,'b','b'] = "boger" chk = f(p(), ["b Field 2"],("b Field 1", "b Field 3", "b Data")) self.assertTrue(c(chk) == c({2: {'b Field 3': (3, 4), 'b Data': (1, 'boger'), 'b Field 1': (1, 2)}, 'b': {'b Data': ('boger', 12), 'b Field 1': ('a', 1)}})) ex = self.firesException(lambda : f(p(), ["b Data"],"wtf")) self.assertTrue("wtf isn't a column" in ex) p = lambda :tdf.copy_to_pandas(dat, drop_pk_columns=False).c_table chk = f(p(), pk_fields=["c Data 1", "c Data 2"], data_fields=["c Data 3", "c Data 4"]) self.assertTrue(c(chk) == {('a', 'b'): {'c Data 3': {'c', 12}, 'c Data 4': {24, 'd'}}}) dat.c_table.append((1, 2, 3, 4)) dat.c_table.append((1, 2, 1, 4)) dat.c_table.append((1, 2, 1, 5)) dat.c_table.append((1, 2, 3, 6)) chk = f(p(), pk_fields=["c Data 1", "c Data 2"], data_fields=["c Data 3", "c Data 4"]) self.assertTrue(c(chk) == {('a', 'b'): {'c Data 3': {'c', 12}, 'c Data 4': {24, 'd'}}, (1,2):{'c Data 3':{3,1}, 'c Data 4':{4,5,6}}})
def testXlsSpacey(self): if not self.can_run: return tdf = TicDatFactory(**spacesSchema()) pdf = PanDatFactory(**spacesSchema()) ticDat = tdf.TicDat(**spacesData()) panDat = pan_dat_maker(spacesSchema(), ticDat) ext = ".xlsx" filePath = os.path.join(_scratchDir, "spaces_2%s" % ext) pdf.xls.write_file(panDat, filePath, case_space_sheet_names=True) panDat2 = pdf.xls.create_pan_dat(filePath) self.assertTrue(pdf._same_data(panDat, panDat2)) tdf = TicDatFactory(**netflowSchema()) pdf = PanDatFactory(**netflowSchema()) ticDat = tdf.freeze_me( tdf.TicDat( ** {t: getattr(netflowData(), t) for t in tdf.primary_key_fields})) panDat = pan_dat_maker(netflowSchema(), ticDat) filePath = os.path.join(_scratchDir, "spaces_2_2%s" % ext) pdf.xls.write_file(panDat, filePath, case_space_sheet_names=True) panDat2 = pdf.xls.create_pan_dat(filePath) self.assertTrue(pdf._same_data(panDat, panDat2))
def testSpaces(self): if not self.can_run: return for hack, raw_data in list(product(*(([True, False],)*2))): tdf = TicDatFactory(**spacesSchema()) ticDat = tdf.TicDat(**spacesData()) self.assertTrue(tdf._same_data(ticDat, tdf.opalytics.create_tic_dat( create_inputset_mock(tdf, ticDat, hack), raw_data=raw_data)))
def testSpacesOpalytics(self): if not self.can_run: return for hack, raw_data in list(itertools.product(*(([True, False], ) * 2))): tdf = TicDatFactory(**spacesSchema()) ticDat = tdf.TicDat(**spacesData()) inputset = create_inputset_mock(tdf, ticDat, hack) pdf = PanDatFactory(**tdf.schema()) panDat = pdf.opalytics.create_pan_dat(inputset, raw_data=raw_data) self.assertTrue(tdf._same_data(ticDat, pdf.copy_to_tic_dat(panDat)))
def testSpacey(self): if not self.can_run: return tdf = TicDatFactory(**spacesSchema()) dat = tdf.TicDat(**spacesData()) filePath = makeCleanPath(os.path.join(_scratchDir, "spacey.db")) tdf.sql.write_db_data(dat, filePath) dat2 = tdf.sql.create_tic_dat(filePath, freeze_it=True) self.assertTrue(tdf._same_data(dat,dat2)) with sql.connect(filePath) as con: for t in tdf.all_tables: con.execute("ALTER TABLE %s RENAME TO [%s]"%(t, t.replace("_", " "))) dat3 = tdf.sql.create_tic_dat(filePath, freeze_it=True) self.assertTrue(tdf._same_data(dat, dat3))
def testSpacey(self): if not self.can_run: return tdf = TicDatFactory(**spacesSchema()) ticDat = tdf.TicDat(**spacesData()) def writeData(insert_spaces): import xlwt book = xlwt.Workbook() for t in tdf.all_tables: sheet = book.add_sheet( t.replace("_", " " if insert_spaces else "_")) for i, f in enumerate( tdf.primary_key_fields.get(t, ()) + tdf.data_fields.get(t, ())): sheet.write(0, i, f) _t = getattr(ticDat, t) containerish = utils.containerish if utils.dictish(_t): for row_ind, (p_key, data) in enumerate(_t.items()): for field_ind, cell in enumerate( (p_key if containerish(p_key) else (p_key, )) + tuple(data[_f] for _f in tdf.data_fields.get(t, ()))): sheet.write(row_ind + 1, field_ind, cell) else: for row_ind, data in enumerate( _t if containerish(_t) else _t()): for field_ind, cell in enumerate( tuple(data[_f] for _f in tdf.data_fields[t])): sheet.write(row_ind + 1, field_ind, cell) if os.path.exists(filePath): os.remove(filePath) book.save(filePath) filePath = os.path.join(_scratchDir, "spaces.xls") writeData(insert_spaces=False) ticDat2 = tdf.xls.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, ticDat2)) writeData(insert_spaces=True) ticDat3 = tdf.xls.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, ticDat3))
def testSpacey2(self): if not self.can_run: return tdf = TicDatFactory(**spacesSchema()) ticDat = tdf.TicDat(**spacesData()) for ext in [".xls", ".xlsx"]: filePath = os.path.join(_scratchDir, "spaces_2%s" % ext) tdf.xls.write_file(ticDat, filePath, case_space_sheet_names=True) ticDat2 = tdf.xls.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, ticDat2)) tdf = TicDatFactory(**netflowSchema()) ticDat = tdf.freeze_me( tdf.TicDat( ** {t: getattr(netflowData(), t) for t in tdf.primary_key_fields})) for ext in [".xls", ".xlsx"]: filePath = os.path.join(_scratchDir, "spaces_2_2%s" % ext) tdf.xls.write_file(ticDat, filePath, case_space_sheet_names=True) ticDat2 = tdf.xls.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, ticDat2))
def testJsonSpacey(self): if not self.can_run: return tdf = TicDatFactory(**spacesSchema()) pdf = PanDatFactory(**spacesSchema()) ticDat = tdf.TicDat(**spacesData()) panDat = pan_dat_maker(spacesSchema(), ticDat) ext = ".json" filePath = os.path.join(_scratchDir, "spaces_2%s" % ext) pdf.json.write_file(panDat, filePath, case_space_table_names=True) panDat2 = pdf.json.create_pan_dat(filePath) self.assertTrue(pdf._same_data(panDat, panDat2)) panDat3 = pdf.json.create_pan_dat( pdf.json.write_file(panDat, "", case_space_table_names=True)) self.assertTrue(pdf._same_data(panDat, panDat3)) tdf = TicDatFactory(**netflowSchema()) pdf = PanDatFactory(**netflowSchema()) ticDat = tdf.freeze_me( tdf.TicDat( ** {t: getattr(netflowData(), t) for t in tdf.primary_key_fields})) panDat = pan_dat_maker(netflowSchema(), ticDat) filePath = os.path.join(_scratchDir, "spaces_2_2%s" % ext) pdf.json.write_file(panDat, filePath, case_space_table_names=True) panDat2 = pdf.json.create_pan_dat(filePath) self.assertTrue(pdf._same_data(panDat, panDat2)) panDat3 = pdf.json.create_pan_dat( pdf.json.write_file(panDat, "", case_space_table_names=True)) self.assertTrue(pdf._same_data(panDat, panDat3)) dicted = json.loads(pdf.json.write_file(panDat, "", orient='columns')) panDat4 = pdf.PanDat(**dicted) self.assertTrue(pdf._same_data(panDat, panDat4, epsilon=1e-5))