Example #1
0
    def testDenormalizedErrors(self):
        if not self.canRun:
            return
        c = clean_denormalization_errors
        f = utils.find_denormalized_sub_table_failures
        tdf = TicDatFactory(**spacesSchema())
        dat = tdf.TicDat(**spacesData())
        p = lambda :tdf.copy_to_pandas(dat, drop_pk_columns=False).b_table
        self.assertFalse(f(p(),"b Field 1",("b Field 2", "b Field 3")))
        dat.b_table[2,2,3] = "boger"
        self.assertFalse(f(p(), "b Field 1",("b Field 2", "b Field 3")))
        chk = f(p(), "b Field 2",("b Field 1", "b Field 3"))
        self.assertTrue(c(chk) == {2: {'b Field 1': {1, 2}}})
        dat.b_table[2,2,4] = "boger"
        dat.b_table[1,'b','b'] = "boger"
        chk = f(p(), ["b Field 2"],("b Field 1", "b Field 3", "b Data"))
        self.assertTrue(c(chk) == c({2: {'b Field 3': (3, 4), 'b Data': (1, 'boger'), 'b Field 1': (1, 2)},
                                 'b': {'b Data': ('boger', 12), 'b Field 1': ('a', 1)}}))

        ex = self.firesException(lambda : f(p(), ["b Data"],"wtf"))
        self.assertTrue("wtf isn't a column" in ex)


        p = lambda :tdf.copy_to_pandas(dat, drop_pk_columns=False).c_table
        chk = f(p(), pk_fields=["c Data 1", "c Data 2"], data_fields=["c Data 3", "c Data 4"])
        self.assertTrue(c(chk) == {('a', 'b'): {'c Data 3': {'c', 12}, 'c Data 4': {24, 'd'}}})
        dat.c_table.append((1, 2, 3, 4))
        dat.c_table.append((1, 2, 1, 4))
        dat.c_table.append((1, 2, 1, 5))
        dat.c_table.append((1, 2, 3, 6))
        chk = f(p(), pk_fields=["c Data 1", "c Data 2"], data_fields=["c Data 3", "c Data 4"])
        self.assertTrue(c(chk) == {('a', 'b'): {'c Data 3': {'c', 12}, 'c Data 4': {24, 'd'}},
                                   (1,2):{'c Data 3':{3,1}, 'c Data 4':{4,5,6}}})
Example #2
0
    def testXlsSpacey(self):
        if not self.can_run:
            return

        tdf = TicDatFactory(**spacesSchema())
        pdf = PanDatFactory(**spacesSchema())
        ticDat = tdf.TicDat(**spacesData())
        panDat = pan_dat_maker(spacesSchema(), ticDat)
        ext = ".xlsx"
        filePath = os.path.join(_scratchDir, "spaces_2%s" % ext)
        pdf.xls.write_file(panDat, filePath, case_space_sheet_names=True)
        panDat2 = pdf.xls.create_pan_dat(filePath)
        self.assertTrue(pdf._same_data(panDat, panDat2))

        tdf = TicDatFactory(**netflowSchema())
        pdf = PanDatFactory(**netflowSchema())
        ticDat = tdf.freeze_me(
            tdf.TicDat(
                **
                {t: getattr(netflowData(), t)
                 for t in tdf.primary_key_fields}))
        panDat = pan_dat_maker(netflowSchema(), ticDat)
        filePath = os.path.join(_scratchDir, "spaces_2_2%s" % ext)
        pdf.xls.write_file(panDat, filePath, case_space_sheet_names=True)
        panDat2 = pdf.xls.create_pan_dat(filePath)
        self.assertTrue(pdf._same_data(panDat, panDat2))
 def testSpaces(self):
     if not self.can_run:
         return
     for hack, raw_data in list(product(*(([True, False],)*2))):
         tdf = TicDatFactory(**spacesSchema())
         ticDat = tdf.TicDat(**spacesData())
         self.assertTrue(tdf._same_data(ticDat, tdf.opalytics.create_tic_dat(
             create_inputset_mock(tdf, ticDat, hack), raw_data=raw_data)))
 def testSpacesOpalytics(self):
     if not self.can_run:
         return
     for hack, raw_data in list(itertools.product(*(([True, False], ) *
                                                    2))):
         tdf = TicDatFactory(**spacesSchema())
         ticDat = tdf.TicDat(**spacesData())
         inputset = create_inputset_mock(tdf, ticDat, hack)
         pdf = PanDatFactory(**tdf.schema())
         panDat = pdf.opalytics.create_pan_dat(inputset, raw_data=raw_data)
         self.assertTrue(tdf._same_data(ticDat,
                                        pdf.copy_to_tic_dat(panDat)))
Example #5
0
    def testSpacey(self):
        if not self.can_run:
            return
        tdf = TicDatFactory(**spacesSchema())
        dat = tdf.TicDat(**spacesData())
        filePath = makeCleanPath(os.path.join(_scratchDir, "spacey.db"))
        tdf.sql.write_db_data(dat, filePath)
        dat2 = tdf.sql.create_tic_dat(filePath, freeze_it=True)
        self.assertTrue(tdf._same_data(dat,dat2))

        with sql.connect(filePath) as con:
            for t in tdf.all_tables:
                con.execute("ALTER TABLE %s RENAME TO [%s]"%(t, t.replace("_", " ")))
        dat3 = tdf.sql.create_tic_dat(filePath, freeze_it=True)
        self.assertTrue(tdf._same_data(dat, dat3))
Example #6
0
    def testSpacey(self):
        if not self.can_run:
            return
        tdf = TicDatFactory(**spacesSchema())
        ticDat = tdf.TicDat(**spacesData())

        def writeData(insert_spaces):
            import xlwt
            book = xlwt.Workbook()
            for t in tdf.all_tables:
                sheet = book.add_sheet(
                    t.replace("_", " " if insert_spaces else "_"))
                for i, f in enumerate(
                        tdf.primary_key_fields.get(t, ()) +
                        tdf.data_fields.get(t, ())):
                    sheet.write(0, i, f)
                _t = getattr(ticDat, t)
                containerish = utils.containerish
                if utils.dictish(_t):
                    for row_ind, (p_key, data) in enumerate(_t.items()):
                        for field_ind, cell in enumerate(
                            (p_key if containerish(p_key) else (p_key, )) +
                                tuple(data[_f]
                                      for _f in tdf.data_fields.get(t, ()))):
                            sheet.write(row_ind + 1, field_ind, cell)
                else:
                    for row_ind, data in enumerate(
                            _t if containerish(_t) else _t()):
                        for field_ind, cell in enumerate(
                                tuple(data[_f] for _f in tdf.data_fields[t])):
                            sheet.write(row_ind + 1, field_ind, cell)
            if os.path.exists(filePath):
                os.remove(filePath)
            book.save(filePath)

        filePath = os.path.join(_scratchDir, "spaces.xls")
        writeData(insert_spaces=False)
        ticDat2 = tdf.xls.create_tic_dat(filePath)
        self.assertTrue(tdf._same_data(ticDat, ticDat2))
        writeData(insert_spaces=True)
        ticDat3 = tdf.xls.create_tic_dat(filePath)
        self.assertTrue(tdf._same_data(ticDat, ticDat3))
Example #7
0
    def testSpacey2(self):
        if not self.can_run:
            return
        tdf = TicDatFactory(**spacesSchema())
        ticDat = tdf.TicDat(**spacesData())
        for ext in [".xls", ".xlsx"]:
            filePath = os.path.join(_scratchDir, "spaces_2%s" % ext)
            tdf.xls.write_file(ticDat, filePath, case_space_sheet_names=True)
            ticDat2 = tdf.xls.create_tic_dat(filePath)
            self.assertTrue(tdf._same_data(ticDat, ticDat2))

        tdf = TicDatFactory(**netflowSchema())
        ticDat = tdf.freeze_me(
            tdf.TicDat(
                **
                {t: getattr(netflowData(), t)
                 for t in tdf.primary_key_fields}))
        for ext in [".xls", ".xlsx"]:
            filePath = os.path.join(_scratchDir, "spaces_2_2%s" % ext)
            tdf.xls.write_file(ticDat, filePath, case_space_sheet_names=True)
            ticDat2 = tdf.xls.create_tic_dat(filePath)
            self.assertTrue(tdf._same_data(ticDat, ticDat2))
Example #8
0
    def testJsonSpacey(self):
        if not self.can_run:
            return

        tdf = TicDatFactory(**spacesSchema())
        pdf = PanDatFactory(**spacesSchema())
        ticDat = tdf.TicDat(**spacesData())
        panDat = pan_dat_maker(spacesSchema(), ticDat)
        ext = ".json"
        filePath = os.path.join(_scratchDir, "spaces_2%s" % ext)
        pdf.json.write_file(panDat, filePath, case_space_table_names=True)
        panDat2 = pdf.json.create_pan_dat(filePath)
        self.assertTrue(pdf._same_data(panDat, panDat2))
        panDat3 = pdf.json.create_pan_dat(
            pdf.json.write_file(panDat, "", case_space_table_names=True))
        self.assertTrue(pdf._same_data(panDat, panDat3))

        tdf = TicDatFactory(**netflowSchema())
        pdf = PanDatFactory(**netflowSchema())
        ticDat = tdf.freeze_me(
            tdf.TicDat(
                **
                {t: getattr(netflowData(), t)
                 for t in tdf.primary_key_fields}))
        panDat = pan_dat_maker(netflowSchema(), ticDat)
        filePath = os.path.join(_scratchDir, "spaces_2_2%s" % ext)
        pdf.json.write_file(panDat, filePath, case_space_table_names=True)
        panDat2 = pdf.json.create_pan_dat(filePath)
        self.assertTrue(pdf._same_data(panDat, panDat2))
        panDat3 = pdf.json.create_pan_dat(
            pdf.json.write_file(panDat, "", case_space_table_names=True))
        self.assertTrue(pdf._same_data(panDat, panDat3))

        dicted = json.loads(pdf.json.write_file(panDat, "", orient='columns'))
        panDat4 = pdf.PanDat(**dicted)
        self.assertTrue(pdf._same_data(panDat, panDat4, epsilon=1e-5))