Ejemplo n.º 1
0
    def testXlsSpacey(self):
        if not self.can_run:
            return

        tdf = TicDatFactory(**spacesSchema())
        pdf = PanDatFactory(**spacesSchema())
        ticDat = tdf.TicDat(**spacesData())
        panDat = pan_dat_maker(spacesSchema(), ticDat)
        ext = ".xlsx"
        filePath = os.path.join(_scratchDir, "spaces_2%s" % ext)
        pdf.xls.write_file(panDat, filePath, case_space_sheet_names=True)
        panDat2 = pdf.xls.create_pan_dat(filePath)
        self.assertTrue(pdf._same_data(panDat, panDat2))

        tdf = TicDatFactory(**netflowSchema())
        pdf = PanDatFactory(**netflowSchema())
        ticDat = tdf.freeze_me(
            tdf.TicDat(
                **
                {t: getattr(netflowData(), t)
                 for t in tdf.primary_key_fields}))
        panDat = pan_dat_maker(netflowSchema(), ticDat)
        filePath = os.path.join(_scratchDir, "spaces_2_2%s" % ext)
        pdf.xls.write_file(panDat, filePath, case_space_sheet_names=True)
        panDat2 = pdf.xls.create_pan_dat(filePath)
        self.assertTrue(pdf._same_data(panDat, panDat2))
Ejemplo n.º 2
0
    def testSqlSpaceyTwo(self):
        if not self.can_run:
            return
        self.assertTrue(pandatio.sql,
                        "this unit test requires SQLite installed")

        tdf = TicDatFactory(**spacesSchema())
        pdf = PanDatFactory(**spacesSchema())
        ticDat = tdf.TicDat(
            **{
                "a_table": {
                    1: [1, 2, "3"],
                    22.2: (12, 0.12, "something"),
                    0.23: (11, 12, "thirt")
                },
                "b_table": {
                    (1, 2, "foo"): 1,
                    (1012.22, 4, "0012"): 12
                },
                "c_table": (("this", 2, 3, 4), ("that", 102.212, 3, 5.5),
                            ("another", 5, 12.5, 24))
            })
        panDat = pan_dat_maker(spacesSchema(), ticDat)
        ext = ".db"
        filePath = os.path.join(_scratchDir, "spaces_2%s" % ext)
        with pandatio.sql.connect(filePath) as con:
            pdf.sql.write_file(panDat,
                               db_file_path=None,
                               con=con,
                               case_space_table_names=True)
        with pandatio.sql.connect(filePath) as con:
            panDat2 = pdf.sql.create_pan_dat(db_file_path=None, con=con)
        self.assertTrue(pdf._same_data(panDat, panDat2))

        tdf = TicDatFactory(**netflowSchema())
        pdf = PanDatFactory(**netflowSchema())
        ticDat = tdf.freeze_me(
            tdf.TicDat(
                **
                {t: getattr(netflowData(), t)
                 for t in tdf.primary_key_fields}))
        panDat = pan_dat_maker(netflowSchema(), ticDat)
        filePath = os.path.join(_scratchDir, "spaces_2_2%s" % ext)
        with pandatio.sql.connect(filePath) as con:
            pdf.sql.write_file(panDat,
                               db_file_path="",
                               con=con,
                               case_space_table_names=True)
        with pandatio.sql.connect(filePath) as con:
            panDat2 = pdf.sql.create_pan_dat(None, con)
        self.assertTrue(pdf._same_data(panDat, panDat2))
Ejemplo n.º 3
0
    def testSpacey(self):
        if not _can_unit_test:
            return
        tdf = TicDatFactory(**spacesSchema())
        spacesData = {
            "a_table": {
                1: {
                    "a Data 3": 3,
                    "a Data 2": 2,
                    "a Data 1": 1
                },
                22: (1.1, 12, 12),
                0.23: (11, 12, 11)
            },
            "b_table": {
                ("1", "2", "3"): 1,
                ("a", "b", "b"): 12
            },
            "c_table": (("1", "2", "3", 4), {
                "c Data 4": 55,
                "c Data 2": "b",
                "c Data 3": "c",
                "c Data 1": "a"
            }, ("a", "b", "12", 24))
        }

        dat = tdf.TicDat(**spacesData)
        filePath = "spaces.accdb"
        self.assertFalse(tdf.mdb.find_duplicates(filePath))
        dat2 = tdf.mdb.create_tic_dat(filePath, freeze_it=True)
        self.assertTrue(tdf._same_data(dat, dat2))
Ejemplo n.º 4
0
    def testSpacey(self):
        if not _can_accdb_unit_test:
            return
        tdf = TicDatFactory(**spacesSchema())
        spacesData =  {
        "a_table" : {1 : {"a Data 3":3, "a Data 2":2, "a Data 1":1},
                     22 : (1.1, 12, 12), 0.23 : (11, 12, 11)},
        "b_table" : {("1", "2", "3") : 1, ("a", "b", "b") : 12},
        "c_table" : (("1", "2", "3", 4),
                      {"c Data 4":55, "c Data 2":"b", "c Data 3":"c", "c Data 1":"a"},
                      ("a", "b", "12", 24) ) }

        dat = tdf.TicDat(**spacesData)
        filePath = makeCleanPath(os.path.join(_scratchDir, "spacey.accdb"))
        tdf.mdb.write_schema(filePath, a_table = {"a Field":"double"},
                                       c_table = {"c Data 1":"text", "c Data 2":"text",
                                                  "c Data 3":"text", "c Data 4":"int"})
        tdf.mdb.write_file(dat, filePath)
        self.assertFalse(tdf.mdb.find_duplicates(filePath))
        dat2 = tdf.mdb.create_tic_dat(filePath, freeze_it=True)
        self.assertTrue(tdf._same_data(dat,dat2))

        with py.connect(_connection_str(filePath)) as con:
            for t in tdf.all_tables:
                con.cursor().execute("SELECT * INTO [%s] FROM %s"%(t.replace("_", " "), t)).commit()
                con.cursor().execute("DROP TABLE %s"%t).commit()
        #shutil.copy(filePath, "spaces.accdb") #uncomment to make readonly test file as .accdb
        dat3 = tdf.mdb.create_tic_dat(filePath, freeze_it=True)
        self.assertTrue(tdf._same_data(dat, dat3))
Ejemplo n.º 5
0
    def testDenormalizedErrors(self):
        if not self.canRun:
            return
        c = clean_denormalization_errors
        f = utils.find_denormalized_sub_table_failures
        tdf = TicDatFactory(**spacesSchema())
        dat = tdf.TicDat(**spacesData())
        p = lambda :tdf.copy_to_pandas(dat, drop_pk_columns=False).b_table
        self.assertFalse(f(p(),"b Field 1",("b Field 2", "b Field 3")))
        dat.b_table[2,2,3] = "boger"
        self.assertFalse(f(p(), "b Field 1",("b Field 2", "b Field 3")))
        chk = f(p(), "b Field 2",("b Field 1", "b Field 3"))
        self.assertTrue(c(chk) == {2: {'b Field 1': {1, 2}}})
        dat.b_table[2,2,4] = "boger"
        dat.b_table[1,'b','b'] = "boger"
        chk = f(p(), ["b Field 2"],("b Field 1", "b Field 3", "b Data"))
        self.assertTrue(c(chk) == c({2: {'b Field 3': (3, 4), 'b Data': (1, 'boger'), 'b Field 1': (1, 2)},
                                 'b': {'b Data': ('boger', 12), 'b Field 1': ('a', 1)}}))

        ex = self.firesException(lambda : f(p(), ["b Data"],"wtf"))
        self.assertTrue("wtf isn't a column" in ex)


        p = lambda :tdf.copy_to_pandas(dat, drop_pk_columns=False).c_table
        chk = f(p(), pk_fields=["c Data 1", "c Data 2"], data_fields=["c Data 3", "c Data 4"])
        self.assertTrue(c(chk) == {('a', 'b'): {'c Data 3': {'c', 12}, 'c Data 4': {24, 'd'}}})
        dat.c_table.append((1, 2, 3, 4))
        dat.c_table.append((1, 2, 1, 4))
        dat.c_table.append((1, 2, 1, 5))
        dat.c_table.append((1, 2, 3, 6))
        chk = f(p(), pk_fields=["c Data 1", "c Data 2"], data_fields=["c Data 3", "c Data 4"])
        self.assertTrue(c(chk) == {('a', 'b'): {'c Data 3': {'c', 12}, 'c Data 4': {24, 'd'}},
                                   (1,2):{'c Data 3':{3,1}, 'c Data 4':{4,5,6}}})
Ejemplo n.º 6
0
 def testSpaces(self):
     if not self.can_run:
         return
     for hack, raw_data in list(product(*(([True, False],)*2))):
         tdf = TicDatFactory(**spacesSchema())
         ticDat = tdf.TicDat(**spacesData())
         self.assertTrue(tdf._same_data(ticDat, tdf.opalytics.create_tic_dat(
             create_inputset_mock(tdf, ticDat, hack), raw_data=raw_data)))
Ejemplo n.º 7
0
    def testCsvSpacey(self):
        if not self.can_run:
            return
        self.assertTrue(pandatio.sql,
                        "this unit test requires SQLite installed")

        tdf = TicDatFactory(**spacesSchema())
        pdf = PanDatFactory(**spacesSchema())
        ticDat = tdf.TicDat(
            **{
                "a_table": {
                    1: [1, 2, "3"],
                    22.2: (12, 0.12, "something"),
                    0.23: (11, 12, "thirt")
                },
                "b_table": {
                    (1, 2, "foo"): 1,
                    (1012.22, 4, "0012"): 12
                },
                "c_table": (("this", 2, 3, 4), ("that", 102.212, 3, 5.5),
                            ("another", 5, 12.5, 24))
            })
        panDat = pan_dat_maker(spacesSchema(), ticDat)
        dirPath = os.path.join(_scratchDir, "spaces_2_csv")
        pdf.csv.write_directory(panDat, dirPath, case_space_table_names=True)
        panDat2 = pdf.csv.create_pan_dat(dirPath)
        self.assertTrue(pdf._same_data(panDat, panDat2))

        tdf = TicDatFactory(**netflowSchema())
        pdf = PanDatFactory(**netflowSchema())
        ticDat = tdf.freeze_me(
            tdf.TicDat(
                **
                {t: getattr(netflowData(), t)
                 for t in tdf.primary_key_fields}))
        panDat = pan_dat_maker(netflowSchema(), ticDat)
        dirPath = os.path.join(_scratchDir, "spaces_2_2_csv")
        pdf.csv.write_directory(panDat,
                                dirPath,
                                case_space_table_names=True,
                                sep=":")
        panDat2 = pdf.csv.create_pan_dat(dirPath, sep=":")
        self.assertTrue(pdf._same_data(panDat, panDat2))
Ejemplo n.º 8
0
 def testSpacesOpalytics(self):
     if not self.can_run:
         return
     for hack, raw_data in list(itertools.product(*(([True, False], ) *
                                                    2))):
         tdf = TicDatFactory(**spacesSchema())
         ticDat = tdf.TicDat(**spacesData())
         inputset = create_inputset_mock(tdf, ticDat, hack)
         pdf = PanDatFactory(**tdf.schema())
         panDat = pdf.opalytics.create_pan_dat(inputset, raw_data=raw_data)
         self.assertTrue(tdf._same_data(ticDat,
                                        pdf.copy_to_tic_dat(panDat)))
Ejemplo n.º 9
0
    def testSpacey(self):
        if not self.can_run:
            return
        tdf = TicDatFactory(**spacesSchema())
        dat = tdf.TicDat(**spacesData())
        filePath = makeCleanPath(os.path.join(_scratchDir, "spacey.db"))
        tdf.sql.write_db_data(dat, filePath)
        dat2 = tdf.sql.create_tic_dat(filePath, freeze_it=True)
        self.assertTrue(tdf._same_data(dat,dat2))

        with sql.connect(filePath) as con:
            for t in tdf.all_tables:
                con.execute("ALTER TABLE %s RENAME TO [%s]"%(t, t.replace("_", " ")))
        dat3 = tdf.sql.create_tic_dat(filePath, freeze_it=True)
        self.assertTrue(tdf._same_data(dat, dat3))
Ejemplo n.º 10
0
    def testJsonSpacey(self):
        if not self.can_run:
            return

        tdf = TicDatFactory(**spacesSchema())
        pdf = PanDatFactory(**spacesSchema())
        ticDat = tdf.TicDat(**spacesData())
        panDat = pan_dat_maker(spacesSchema(), ticDat)
        ext = ".json"
        filePath = os.path.join(_scratchDir, "spaces_2%s" % ext)
        pdf.json.write_file(panDat, filePath, case_space_table_names=True)
        panDat2 = pdf.json.create_pan_dat(filePath)
        self.assertTrue(pdf._same_data(panDat, panDat2))
        panDat3 = pdf.json.create_pan_dat(
            pdf.json.write_file(panDat, "", case_space_table_names=True))
        self.assertTrue(pdf._same_data(panDat, panDat3))

        tdf = TicDatFactory(**netflowSchema())
        pdf = PanDatFactory(**netflowSchema())
        ticDat = tdf.freeze_me(
            tdf.TicDat(
                **
                {t: getattr(netflowData(), t)
                 for t in tdf.primary_key_fields}))
        panDat = pan_dat_maker(netflowSchema(), ticDat)
        filePath = os.path.join(_scratchDir, "spaces_2_2%s" % ext)
        pdf.json.write_file(panDat, filePath, case_space_table_names=True)
        panDat2 = pdf.json.create_pan_dat(filePath)
        self.assertTrue(pdf._same_data(panDat, panDat2))
        panDat3 = pdf.json.create_pan_dat(
            pdf.json.write_file(panDat, "", case_space_table_names=True))
        self.assertTrue(pdf._same_data(panDat, panDat3))

        dicted = json.loads(pdf.json.write_file(panDat, "", orient='columns'))
        panDat4 = pdf.PanDat(**dicted)
        self.assertTrue(pdf._same_data(panDat, panDat4, epsilon=1e-5))
Ejemplo n.º 11
0
    def testSpacey(self):
        if not self.can_run:
            return
        tdf = TicDatFactory(**spacesSchema())
        ticDat = tdf.TicDat(**spacesData())

        def writeData(insert_spaces):
            import xlwt
            book = xlwt.Workbook()
            for t in tdf.all_tables:
                sheet = book.add_sheet(
                    t.replace("_", " " if insert_spaces else "_"))
                for i, f in enumerate(
                        tdf.primary_key_fields.get(t, ()) +
                        tdf.data_fields.get(t, ())):
                    sheet.write(0, i, f)
                _t = getattr(ticDat, t)
                containerish = utils.containerish
                if utils.dictish(_t):
                    for row_ind, (p_key, data) in enumerate(_t.items()):
                        for field_ind, cell in enumerate(
                            (p_key if containerish(p_key) else (p_key, )) +
                                tuple(data[_f]
                                      for _f in tdf.data_fields.get(t, ()))):
                            sheet.write(row_ind + 1, field_ind, cell)
                else:
                    for row_ind, data in enumerate(
                            _t if containerish(_t) else _t()):
                        for field_ind, cell in enumerate(
                                tuple(data[_f] for _f in tdf.data_fields[t])):
                            sheet.write(row_ind + 1, field_ind, cell)
            if os.path.exists(filePath):
                os.remove(filePath)
            book.save(filePath)

        filePath = os.path.join(_scratchDir, "spaces.xls")
        writeData(insert_spaces=False)
        ticDat2 = tdf.xls.create_tic_dat(filePath)
        self.assertTrue(tdf._same_data(ticDat, ticDat2))
        writeData(insert_spaces=True)
        ticDat3 = tdf.xls.create_tic_dat(filePath)
        self.assertTrue(tdf._same_data(ticDat, ticDat3))
Ejemplo n.º 12
0
    def testSpacey2(self):
        if not self.can_run:
            return
        tdf = TicDatFactory(**spacesSchema())
        ticDat = tdf.TicDat(**spacesData())
        for ext in [".xls", ".xlsx"]:
            filePath = os.path.join(_scratchDir, "spaces_2%s" % ext)
            tdf.xls.write_file(ticDat, filePath, case_space_sheet_names=True)
            ticDat2 = tdf.xls.create_tic_dat(filePath)
            self.assertTrue(tdf._same_data(ticDat, ticDat2))

        tdf = TicDatFactory(**netflowSchema())
        ticDat = tdf.freeze_me(
            tdf.TicDat(
                **
                {t: getattr(netflowData(), t)
                 for t in tdf.primary_key_fields}))
        for ext in [".xls", ".xlsx"]:
            filePath = os.path.join(_scratchDir, "spaces_2_2%s" % ext)
            tdf.xls.write_file(ticDat, filePath, case_space_sheet_names=True)
            ticDat2 = tdf.xls.create_tic_dat(filePath)
            self.assertTrue(tdf._same_data(ticDat, ticDat2))