예제 #1
0
    def setUpClass(cls):
        #----------------------
        # set up db
        #----------------------
        cls.db = MySQLdb.connect(
                ks_db_settings.setting('host'), 
                ks_db_settings.setting('user'), 
                ks_db_settings.setting('password'), 
                ks_db_settings.setting('database'))


        cls.ks_fh = filehandler(cls.db)
        cls.ks_fh.reset()
        cls.company_id = 1
        register_raw_files("./tests/data2/Sales.csv", cls.company_id, cls.db)
        register_raw_files("./tests/data2/Currencyv2.csv", cls.company_id, cls.db)
        register_raw_files("./tests/data2/CountryRegion.csv", cls.company_id, cls.db)
        register_raw_files("./tests/data2/ComissionTax.csv",cls.company_id, cls.db)
    
        ks_precompute = precompute(cls.db)
        ks_precompute.reset()
        precompute
        ks_merge = merge(cls.db)
        load_precompute_normalize_URL(cls.company_id, cls.db)


        
        id = ks_precompute.getMaxBigTableIdForCompany(cls.company_id)
        ks_merge = merge(cls.db)
        mergeBigTable = ks_merge.getTables()
        metaData = ks_merge.getMetaDataFromTable(mergeBigTable[0])
                
        cls.ks_analytics = analytics(cls.db)
        newBigTable = "BigTable"+ str(ks_precompute.getMaxBigTableIdForCompany(cls.company_id))
        cls.ks_analytics.reset()
        cls.ks_analytics.addBigTable(mergeBigTable[0], newBigTable, metaData)
        
        #clean up
        sql ="update %s set TaxRate = TaxRate/100;"%("analytics."+newBigTable)
        cls.db.cursor().execute(sql)
        # ProductType changed from D to M see documentation of test case
        sql ="update %s set ProductType = 'M' where VendorId='0268_20140114_SOFA_ENGLIS' and DownloadDate='6/1/14';"%("analytics."+newBigTable)
        cls.db.cursor().execute(sql)
        
        cls.ks_analytics.addFactUsingBinaryOpAPI("NET_REVENUE", "Units", "RoyaltyPrice", "*", newBigTable) 
        cls.ks_analytics.addFactUsingBinaryOpAPI("TAXES", "NET_REVENUE","TaxRate","*", newBigTable)
        cls.ks_analytics.addFactUsingBinaryOpAPI("REVENUE_AFTER_TAX", "NET_REVENUE","TAXES","-", newBigTable)
        
        
        cls.ks_fh.registerFormula("", "Plus", "Plus", "Units+RoyaltyPrice", "sum")
        cls.ks_fh.registerFormula("", "Mult", "Mult", "Units*RoyaltyPrice", "sum")
        cls.ks_fh.registerFormula("", "Individual_Tax", "Individual_Tax", "RoyaltyPrice*TaxRate", "sum")
        cls.ks_fh.registerFormula("", "NET_REVENUE", "NET_REVENUE", "Units*RoyaltyPrice", "sum")
        cls.ks_fh.registerFormula("", "SumPlus", "SumPlus", "Sum(Units)+Sum(RoyaltyPrice)", "sum")
        cls.ks_fh.registerFormula("", "SumMult", "SumMult", "Sum(Units)*Sum(RoyaltyPrice)", "sum")
        cls.ks_fh.registerFormula("", "Individual_TaxSum", "Individual_TaxSum", "Sum(RoyaltyPrice)*Sum(TaxRate)", "sum")
        cls.ks_fh.registerFormula("", "NonsenseSum", "Individual_Tax", "Sum(RoyaltyPrice)+Sum(TaxRate)", "sum")
        cls.ks_fh.registerFormula("", "Nonsense", "Individual_Tax", "RoyaltyPrice+TaxRate", "sum")
        cls.ks_fh.registerFormula("", "REVENUE_AFTER_TAX", "REVENUE_AFTER_TAX", "", "sum")
예제 #2
0
    def test_load_files(self):
        ks_fh = filehandler(self.db)
        ks_fh.reset()
        company_id = 1
        register_raw_files("./tests/data2/Sales.csv", company_id, self.db)
        register_raw_files("./tests/data2/Currencyv2.csv", company_id, self.db)
        register_raw_files("./tests/data2/CountryRegion.csv", company_id, self.db)
        register_raw_files("./tests/data2/ComissionTax.csv",company_id, self.db)

        ks_precompute = precompute(self.db)
        ks_precompute.reset()
        precompute
        ks_merge = merge(self.db)
        load_precompute_normalize(company_id, self.db)

        ks_fh.registerFormula("", "Plus", "Plus", "Units+RoyaltyPrice", "sum")
        ks_fh.registerFormula("", "Mult", "Mult", "Units*RoyaltyPrice", "sum")

        plus_id = ks_fh.getMeasureID("Plus")
        mult_id = ks_fh.getMeasureID("Mult")
        units_id = ks_fh.getMeasureID("Units")
        royality_id = ks_fh.getMeasureID("RoyaltyPrice")

        # MEASURE DATA DEMO raw_facts + measures with formulas
        print(measure_data(self.db, company_id, [plus_id,mult_id,units_id,royality_id],"day","2014-06-01","2014-06-01"))

        # MEASURE DATA DEMO raw_facts group by
        print(measure_data(self.db, company_id, [units_id, royality_id],"day","2014-06-01","2014-06-01","Region"))

        self.db.commit()
def load_precompute_normalize_URL(company_name, db):
    ks_fh = filehandler(db)
    rows = ks_fh.getLatestTablesByCompany(company_name)
    ks_merge = merge(db)
    ks_merge.reset()
    for row in rows:
        table_name =row[1]
        file_path = row[2]
        ks_merge.addTableURL(file_path, table_name)

    ks_merge.automaticMerge()

    mergeBigTable = ks_merge.getTables()
    ks_precompute = precompute(db)
    meta_data = ks_merge.getMetaDataFromTable(mergeBigTable[0])
    ks_precompute.reset()
    ks_precompute.addBigTable(meta_data,mergeBigTable[0],company_name)

    id = ks_precompute.getMaxBigTableIdForCompany(company_name)


    metaData = ks_merge.getMetaDataFromTable(mergeBigTable[0])

    ks_analytics = analytics(db)
    newBigTable = "BigTable"+ str(ks_precompute.getMaxBigTableIdForCompany(company_name))
    ks_analytics.reset()
    ks_analytics.addBigTable(mergeBigTable[0], newBigTable, metaData)
예제 #4
0
 def test_AddBigTable(self):
     # compute BigTable
     first_table = "./ks_filehandler/ks_filehandler/data/Sales.csv"
     second_table = "./ks_filehandler/ks_filehandler/data/graph/Currencyv2.csv"
     third_table = "./ks_filehandler/ks_filehandler/data/CountryRegion.csv"
     fourth_table = "./ks_filehandler/ks_filehandler/data/ComissionTax.csv"
     ks_merge = merge(self.db)
     ks_merge.reset()
     ks_merge.addTable(first_table, "Sales")
     ks_merge.addTable(second_table, "Currencyv2")
     ks_merge.addTable(third_table, "CountryRegion")
     ks_merge.addTable(fourth_table, "ComissionTax")
     # ks_merge.automaticMerge()
     ks_precompute = precompute(self.db)
     meta_data = {
         "VendorId": "dim",
         "ProductType": "dim",
         "Units": "fact",
         "RoyaltyPrice": "fact",
         "DownloadDate": "date",
         "CustomerCurrency": "dim",
         "CountryCode": "dim",
         "Region": "dim",
         "ExchangeRate": "fact",
         "TaxRate": "fact",
         "RightsHolder": "dim",
         "ComissionRate": "fact",
         "id": "sys",
     }
     ks_precompute.reset()
     ks_precompute.addBigTable(meta_data, "Sales", 1)
예제 #5
0
 def test_Two_Tables_One_Link2(self):
     first_table = "./ks_filehandler/ks_filehandler/data/Sales.csv"
     second_table = "./ks_filehandler/ks_filehandler/data/graph/Currencyv2.csv"
     ks_merge = merge(self.db)
     ks_merge.reset()
     ks_merge.addTable(first_table,"Sales")
     ks_merge.addTable(second_table,"Currencyv2")
     self.assertEqual(False, ks_merge.isUniqueOneLink(first_table, second_table))
예제 #6
0
 def test_Two_Tables_One_Link1(self):
     first_table = "./ks_filehandler/ks_filehandler/data/Sales.csv"
     second_table = "./ks_filehandler/ks_filehandler/data/CountryRegion.csv"
     ks_merge = merge(self.db)
     ks_merge.reset()
     ks_merge.addTable(first_table,"Sales")
     ks_merge.addTable(second_table,"CountryRegion")
     self.assertEqual(True, ks_merge.isUniqueOneLink("Sales", "CountryRegion"))
예제 #7
0
 def test_JOIN_TWO_TABLES_TWO_UNIQUE_LINK(self):
     first_table = "./ks_filehandler/ks_filehandler/data/Sales.csv"
     second_table = "./ks_filehandler/ks_filehandler/data/graph/Currencyv2.csv"
     ks_merge = merge(self.db)
     ks_merge.reset()
     ks_merge.addTable(first_table,"Sales")
     ks_merge.addTable(second_table,"Currencyv2")
     print (ks_merge.isUniqueTwoLinks("Sales", "Currencyv2"))
     ks_merge.joinUniqueTwoLinks("Sales", "Currencyv2")
예제 #8
0
 def test_Two_Tables_One_Link_Case3(self):
     first_table = "./ks_filehandler/ks_filehandler/data/Sales.csv"
     second_table = "./ks_filehandler/ks_filehandler/data/CountryRegion.csv"
     ks_merge = merge(self.db)
     ks_merge.reset()
     ks_merge.addTable(first_table,"Sales")
     ks_merge.addTable(second_table,"CountryRegion")
     general_links = generalLinksDB(["Sales","CountryRegion"], ks_merge)
     self.assertEqual(False, general_links.isEdge('Sales.csv:Cou ntryCode', 'CountryRegion.csv:CountryCode'))
     self.db.commit()
예제 #9
0
 def test_JOIN_TWO_TABLES_ONE_UNIQUE_LINK(self):
     first_table = "./ks_filehandler/ks_filehandler/data/Sales.csv"
     second_table = "./ks_filehandler/ks_filehandler/data/CountryRegion.csv"
     ks_merge = merge(self.db)
     ks_merge.reset()
     ks_merge.addTable(first_table,"Sales")
     ks_merge.addTable(second_table,"CountryRegion")
     print (ks_merge.getLinks())
     ks_merge.joinUniqueOneLink("Sales", "CountryRegion")
     print (ks_merge.getLinks())
예제 #10
0
 def test_AutomaticFourTableMerge(self):
     first_table = "./ks_filehandler/ks_filehandler/data/Sales.csv"
     second_table = "./ks_filehandler/ks_filehandler/data/graph/Currencyv2.csv"    
     third_table = "./ks_filehandler/ks_filehandler/data/CountryRegion.csv"
     fourth_table = "./ks_filehandler/ks_filehandler/data/ComissionTax.csv"
     ks_merge = merge(self.db)
     ks_merge.reset()
     ks_merge.addTable(first_table,"Sales")
     ks_merge.addTable(second_table,"Currencyv2")
     ks_merge.addTable(third_table,"CountryRegion")
     ks_merge.addTable(fourth_table,"ComissionTax")
     ks_merge.automaticMerge()
예제 #11
0
 def test_JOIN_TABLEWISE(self):
     first_table = "./ks_filehandler/ks_filehandler/data/Sales.csv"
     second_table = "./ks_filehandler/ks_filehandler/data/graph/Currencyv2.csv"
     ks_merge = merge(self.db)
     ks_merge.reset()
     ks_merge.addTable(first_table,"Sales")
     ks_merge.addTable(second_table,"Currencyv2")
     ks_merge.joinUniqueTwoLinks("Sales", "Currencyv2")
     
     third_table = "./ks_filehandler/ks_filehandler/data/CountryRegion.csv"
     ks_merge.addTable(third_table,"CountryRegion")
     ks_merge.joinUniqueOneLink("Sales", "CountryRegion")
     
     fourth_table = "./ks_filehandler/ks_filehandler/data/ComissionTax.csv"
     ks_merge.addTable(fourth_table,"ComissionTax")
     ks_merge.joinUniqueTwoLinks("Sales", "ComissionTax")
예제 #12
0
 def test_Four_Tables_Five_Links(self):
     first_table = "./ks_filehandler/ks_filehandler/data/Sales.csv"
     second_table = "./ks_filehandler/ks_filehandler/data/graph/CountryRegion.csv"
     third_table = "./ks_filehandler/ks_filehandler/data/ComissionTax.csv"
     fourth_table = "./ks_filehandler/ks_filehandler/data/graph/Currencyv2.csv"
     ks_merge = merge(self.db)
     ks_merge.reset()
     ks_merge.addTable(first_table,"Sales")
     ks_merge.addTable(second_table,"CountryRegion")
     ks_merge.addTable(third_table,"ComissionTax")
     ks_merge.addTable(fourth_table,"Currencyv2")
     
     
     general_links = generalLinksDB(["Sales","Currencyv2","ComissionTax","CountryRegion"], ks_merge)
     
     print ("Links:")
     print(general_links.getLinks())
     self.assertEqual(True, general_links.isEdge('ComissionTax:Region', 'CountryRegion:Region'))
     self.assertEqual(True, general_links.isEdge('ComissionTax:VendorId', 'Sales:VendorId'))
     self.assertEqual(True, general_links.isEdge('Sales:CountryCode', 'CountryRegion:CountryCode'))
     self.assertEqual(True, general_links.isEdge('Sales:DownloadDate', 'Currencyv2:DownloadDate'))
     self.assertEqual(True, general_links.isEdge('Currencyv2:CustomerCurrency', 'Sales:CustomerCurrency'))
     self.db.commit()
예제 #13
0
 def test_IS_UNIQUE_COL_CASE2(self):
     first_table = "./ks_filehandler/ks_filehandler/data/Sales.csv"
     ks_merge = merge(self.db)
     ks_merge.reset()
     ks_merge.addTable(first_table,"Sales")
     self.assertEqual(False, ks_merge.isUniqueCol("Sales","CountryCode"))
예제 #14
0
 def test_IS_UNIQUE_COL_CASE1(self):
     first_table = "./ks_filehandler/ks_filehandler/data/CountryRegion.csv"
     ks_merge = merge(self.db)
     ks_merge.reset()
     ks_merge.addTable(first_table,"CountryRegion")
     self.assertEqual(True, ks_merge.isUniqueCol("CountryRegion","CountryCode"))
예제 #15
0
    def setUpClass(cls):
        #----------------------
        # set up db
        #----------------------
        cls.db = MySQLdb.connect(
                ks_db_settings.setting('host'), 
                ks_db_settings.setting('user'), 
                ks_db_settings.setting('password'), 
                ks_db_settings.setting('database'))


        #----------------------
        # filehandler
        #----------------------
        file_handler = filehandler(cls.db)
        file_handler.reset()
        file_handler.addTable("Sales", "1","Sales.csv")
        file_handler.addTable("Sales", "2","SalesCustomerTwo.csv")

        file_handler.addTable("ComissionTax", "1","ComissionTax.csv")
        file_handler.addTable("ComissionTax","2","ComissionTaxCustomerTwo.csv")

        file_handler.addTable("CountryRegion", "1","CountryRegion.csv")
        file_handler.addTable("CountryRegion","2","CountryRegionCustomerTwo.csv")

        file_handler.addTable("Currency2","1","Currencyv2.csv")
        time.sleep(1)
        file_handler.addTable("Sales", "1","SalesNewVersion.csv")
        print(file_handler.getLatestTable("Sales", "2"))

        #----------------------
        # merge
        #----------------------
        ks_merge = merge(cls.db)
        ks_merge.reset()
        ks_merge.addTable("./ks_filehandler/ks_filehandler/data/Sales.csv","Sales")
        ks_merge.addTable("./ks_filehandler/ks_filehandler/data/CountryRegion.csv","CountryRegion")
        ks_merge.addTable("./ks_filehandler/ks_filehandler/data/ComissionTax.csv","ComissionTax")
        sql_BigTable = "CREATE TABLE BigTable(id INT PRIMARY KEY AUTO_INCREMENT, \
             VendorId VARCHAR(25), \
             ProductType VARCHAR(25), \
             Units FLOAT, \
             RoyaltyPrice FLOAT, \
             DownloadDate VARCHAR(25), \
             CustomerCurrency VARCHAR(25), \
             CountryCode VARCHAR(25), \
             Region VARCHAR(25), \
             RightsHolder VARCHAR(25), \
             ComissionRate VARCHAR(25), \
             TaxRate VARCHAR(25))"
        

        sql_join = "insert into BigTable select S.id,S.VendorId,S.ProductType, "\
            "S.Units, S.RoyaltyPrice, S.DownloadDate, S.CustomerCurrency, "\
            "S.CountryCode, C.Region, T.RightsHolder, T.ComissionRate, "\
            "T.TaxRate from Sales S Inner Join CountryRegion C on "\
            "S.CountryCode=C.CountryCode Inner join ComissionTax T on " \
            "S.VendorId = T.VendorId and C.Region = T.Region;"
            
        ks_merge.join(sql_join, sql_BigTable)
        
        #----------------------
        # clean up
        #----------------------
        cursor = cls.db.cursor()
        sql = "use merge;"
        cursor.execute(sql)        
        sql = "ALTER TABLE BigTable change ComissionRate ComissionRate FLOAT;"
        cursor.execute(sql)
        sql = "ALTER TABLE BigTable change TaxRate TaxRate FLOAT;"
        cursor.execute(sql)
        sql ="update BigTable set TaxRate = TaxRate/100;"
        cursor.execute(sql)
        
        
        #----------------------
        # analytics
        #----------------------
        cls.ks_analytics = analytics(cls.db)
        cls.ks_analytics.addFactUsingBinaryOp("NET_REVENUE", "Units", "RoyaltyPrice", "*") 
        cls.ks_analytics.addFactUsingBinaryOp("TAXES", "NET_REVENUE","TaxRate","*")
        cls.ks_analytics.addFactUsingBinaryOp("REVENUE_AFTER_TAX", "NET_REVENUE","TAXES","-")