def setUpClass(cls): #---------------------- # set up db #---------------------- cls.db = MySQLdb.connect( ks_db_settings.setting('host'), ks_db_settings.setting('user'), ks_db_settings.setting('password'), ks_db_settings.setting('database')) cls.ks_fh = filehandler(cls.db) cls.ks_fh.reset() cls.company_id = 1 register_raw_files("./tests/data2/Sales.csv", cls.company_id, cls.db) register_raw_files("./tests/data2/Currencyv2.csv", cls.company_id, cls.db) register_raw_files("./tests/data2/CountryRegion.csv", cls.company_id, cls.db) register_raw_files("./tests/data2/ComissionTax.csv",cls.company_id, cls.db) ks_precompute = precompute(cls.db) ks_precompute.reset() precompute ks_merge = merge(cls.db) load_precompute_normalize_URL(cls.company_id, cls.db) id = ks_precompute.getMaxBigTableIdForCompany(cls.company_id) ks_merge = merge(cls.db) mergeBigTable = ks_merge.getTables() metaData = ks_merge.getMetaDataFromTable(mergeBigTable[0]) cls.ks_analytics = analytics(cls.db) newBigTable = "BigTable"+ str(ks_precompute.getMaxBigTableIdForCompany(cls.company_id)) cls.ks_analytics.reset() cls.ks_analytics.addBigTable(mergeBigTable[0], newBigTable, metaData) #clean up sql ="update %s set TaxRate = TaxRate/100;"%("analytics."+newBigTable) cls.db.cursor().execute(sql) # ProductType changed from D to M see documentation of test case sql ="update %s set ProductType = 'M' where VendorId='0268_20140114_SOFA_ENGLIS' and DownloadDate='6/1/14';"%("analytics."+newBigTable) cls.db.cursor().execute(sql) cls.ks_analytics.addFactUsingBinaryOpAPI("NET_REVENUE", "Units", "RoyaltyPrice", "*", newBigTable) cls.ks_analytics.addFactUsingBinaryOpAPI("TAXES", "NET_REVENUE","TaxRate","*", newBigTable) cls.ks_analytics.addFactUsingBinaryOpAPI("REVENUE_AFTER_TAX", "NET_REVENUE","TAXES","-", newBigTable) cls.ks_fh.registerFormula("", "Plus", "Plus", "Units+RoyaltyPrice", "sum") cls.ks_fh.registerFormula("", "Mult", "Mult", "Units*RoyaltyPrice", "sum") cls.ks_fh.registerFormula("", "Individual_Tax", "Individual_Tax", "RoyaltyPrice*TaxRate", "sum") cls.ks_fh.registerFormula("", "NET_REVENUE", "NET_REVENUE", "Units*RoyaltyPrice", "sum") cls.ks_fh.registerFormula("", "SumPlus", "SumPlus", "Sum(Units)+Sum(RoyaltyPrice)", "sum") cls.ks_fh.registerFormula("", "SumMult", "SumMult", "Sum(Units)*Sum(RoyaltyPrice)", "sum") cls.ks_fh.registerFormula("", "Individual_TaxSum", "Individual_TaxSum", "Sum(RoyaltyPrice)*Sum(TaxRate)", "sum") cls.ks_fh.registerFormula("", "NonsenseSum", "Individual_Tax", "Sum(RoyaltyPrice)+Sum(TaxRate)", "sum") cls.ks_fh.registerFormula("", "Nonsense", "Individual_Tax", "RoyaltyPrice+TaxRate", "sum") cls.ks_fh.registerFormula("", "REVENUE_AFTER_TAX", "REVENUE_AFTER_TAX", "", "sum")
def test_load_files(self): ks_fh = filehandler(self.db) ks_fh.reset() company_id = 1 register_raw_files("./tests/data2/Sales.csv", company_id, self.db) register_raw_files("./tests/data2/Currencyv2.csv", company_id, self.db) register_raw_files("./tests/data2/CountryRegion.csv", company_id, self.db) register_raw_files("./tests/data2/ComissionTax.csv",company_id, self.db) ks_precompute = precompute(self.db) ks_precompute.reset() precompute ks_merge = merge(self.db) load_precompute_normalize(company_id, self.db) ks_fh.registerFormula("", "Plus", "Plus", "Units+RoyaltyPrice", "sum") ks_fh.registerFormula("", "Mult", "Mult", "Units*RoyaltyPrice", "sum") plus_id = ks_fh.getMeasureID("Plus") mult_id = ks_fh.getMeasureID("Mult") units_id = ks_fh.getMeasureID("Units") royality_id = ks_fh.getMeasureID("RoyaltyPrice") # MEASURE DATA DEMO raw_facts + measures with formulas print(measure_data(self.db, company_id, [plus_id,mult_id,units_id,royality_id],"day","2014-06-01","2014-06-01")) # MEASURE DATA DEMO raw_facts group by print(measure_data(self.db, company_id, [units_id, royality_id],"day","2014-06-01","2014-06-01","Region")) self.db.commit()
def load_precompute_normalize_URL(company_name, db): ks_fh = filehandler(db) rows = ks_fh.getLatestTablesByCompany(company_name) ks_merge = merge(db) ks_merge.reset() for row in rows: table_name =row[1] file_path = row[2] ks_merge.addTableURL(file_path, table_name) ks_merge.automaticMerge() mergeBigTable = ks_merge.getTables() ks_precompute = precompute(db) meta_data = ks_merge.getMetaDataFromTable(mergeBigTable[0]) ks_precompute.reset() ks_precompute.addBigTable(meta_data,mergeBigTable[0],company_name) id = ks_precompute.getMaxBigTableIdForCompany(company_name) metaData = ks_merge.getMetaDataFromTable(mergeBigTable[0]) ks_analytics = analytics(db) newBigTable = "BigTable"+ str(ks_precompute.getMaxBigTableIdForCompany(company_name)) ks_analytics.reset() ks_analytics.addBigTable(mergeBigTable[0], newBigTable, metaData)
def test_AddBigTable(self): # compute BigTable first_table = "./ks_filehandler/ks_filehandler/data/Sales.csv" second_table = "./ks_filehandler/ks_filehandler/data/graph/Currencyv2.csv" third_table = "./ks_filehandler/ks_filehandler/data/CountryRegion.csv" fourth_table = "./ks_filehandler/ks_filehandler/data/ComissionTax.csv" ks_merge = merge(self.db) ks_merge.reset() ks_merge.addTable(first_table, "Sales") ks_merge.addTable(second_table, "Currencyv2") ks_merge.addTable(third_table, "CountryRegion") ks_merge.addTable(fourth_table, "ComissionTax") # ks_merge.automaticMerge() ks_precompute = precompute(self.db) meta_data = { "VendorId": "dim", "ProductType": "dim", "Units": "fact", "RoyaltyPrice": "fact", "DownloadDate": "date", "CustomerCurrency": "dim", "CountryCode": "dim", "Region": "dim", "ExchangeRate": "fact", "TaxRate": "fact", "RightsHolder": "dim", "ComissionRate": "fact", "id": "sys", } ks_precompute.reset() ks_precompute.addBigTable(meta_data, "Sales", 1)
def test_Two_Tables_One_Link2(self): first_table = "./ks_filehandler/ks_filehandler/data/Sales.csv" second_table = "./ks_filehandler/ks_filehandler/data/graph/Currencyv2.csv" ks_merge = merge(self.db) ks_merge.reset() ks_merge.addTable(first_table,"Sales") ks_merge.addTable(second_table,"Currencyv2") self.assertEqual(False, ks_merge.isUniqueOneLink(first_table, second_table))
def test_Two_Tables_One_Link1(self): first_table = "./ks_filehandler/ks_filehandler/data/Sales.csv" second_table = "./ks_filehandler/ks_filehandler/data/CountryRegion.csv" ks_merge = merge(self.db) ks_merge.reset() ks_merge.addTable(first_table,"Sales") ks_merge.addTable(second_table,"CountryRegion") self.assertEqual(True, ks_merge.isUniqueOneLink("Sales", "CountryRegion"))
def test_JOIN_TWO_TABLES_TWO_UNIQUE_LINK(self): first_table = "./ks_filehandler/ks_filehandler/data/Sales.csv" second_table = "./ks_filehandler/ks_filehandler/data/graph/Currencyv2.csv" ks_merge = merge(self.db) ks_merge.reset() ks_merge.addTable(first_table,"Sales") ks_merge.addTable(second_table,"Currencyv2") print (ks_merge.isUniqueTwoLinks("Sales", "Currencyv2")) ks_merge.joinUniqueTwoLinks("Sales", "Currencyv2")
def test_Two_Tables_One_Link_Case3(self): first_table = "./ks_filehandler/ks_filehandler/data/Sales.csv" second_table = "./ks_filehandler/ks_filehandler/data/CountryRegion.csv" ks_merge = merge(self.db) ks_merge.reset() ks_merge.addTable(first_table,"Sales") ks_merge.addTable(second_table,"CountryRegion") general_links = generalLinksDB(["Sales","CountryRegion"], ks_merge) self.assertEqual(False, general_links.isEdge('Sales.csv:Cou ntryCode', 'CountryRegion.csv:CountryCode')) self.db.commit()
def test_JOIN_TWO_TABLES_ONE_UNIQUE_LINK(self): first_table = "./ks_filehandler/ks_filehandler/data/Sales.csv" second_table = "./ks_filehandler/ks_filehandler/data/CountryRegion.csv" ks_merge = merge(self.db) ks_merge.reset() ks_merge.addTable(first_table,"Sales") ks_merge.addTable(second_table,"CountryRegion") print (ks_merge.getLinks()) ks_merge.joinUniqueOneLink("Sales", "CountryRegion") print (ks_merge.getLinks())
def test_AutomaticFourTableMerge(self): first_table = "./ks_filehandler/ks_filehandler/data/Sales.csv" second_table = "./ks_filehandler/ks_filehandler/data/graph/Currencyv2.csv" third_table = "./ks_filehandler/ks_filehandler/data/CountryRegion.csv" fourth_table = "./ks_filehandler/ks_filehandler/data/ComissionTax.csv" ks_merge = merge(self.db) ks_merge.reset() ks_merge.addTable(first_table,"Sales") ks_merge.addTable(second_table,"Currencyv2") ks_merge.addTable(third_table,"CountryRegion") ks_merge.addTable(fourth_table,"ComissionTax") ks_merge.automaticMerge()
def test_JOIN_TABLEWISE(self): first_table = "./ks_filehandler/ks_filehandler/data/Sales.csv" second_table = "./ks_filehandler/ks_filehandler/data/graph/Currencyv2.csv" ks_merge = merge(self.db) ks_merge.reset() ks_merge.addTable(first_table,"Sales") ks_merge.addTable(second_table,"Currencyv2") ks_merge.joinUniqueTwoLinks("Sales", "Currencyv2") third_table = "./ks_filehandler/ks_filehandler/data/CountryRegion.csv" ks_merge.addTable(third_table,"CountryRegion") ks_merge.joinUniqueOneLink("Sales", "CountryRegion") fourth_table = "./ks_filehandler/ks_filehandler/data/ComissionTax.csv" ks_merge.addTable(fourth_table,"ComissionTax") ks_merge.joinUniqueTwoLinks("Sales", "ComissionTax")
def test_Four_Tables_Five_Links(self): first_table = "./ks_filehandler/ks_filehandler/data/Sales.csv" second_table = "./ks_filehandler/ks_filehandler/data/graph/CountryRegion.csv" third_table = "./ks_filehandler/ks_filehandler/data/ComissionTax.csv" fourth_table = "./ks_filehandler/ks_filehandler/data/graph/Currencyv2.csv" ks_merge = merge(self.db) ks_merge.reset() ks_merge.addTable(first_table,"Sales") ks_merge.addTable(second_table,"CountryRegion") ks_merge.addTable(third_table,"ComissionTax") ks_merge.addTable(fourth_table,"Currencyv2") general_links = generalLinksDB(["Sales","Currencyv2","ComissionTax","CountryRegion"], ks_merge) print ("Links:") print(general_links.getLinks()) self.assertEqual(True, general_links.isEdge('ComissionTax:Region', 'CountryRegion:Region')) self.assertEqual(True, general_links.isEdge('ComissionTax:VendorId', 'Sales:VendorId')) self.assertEqual(True, general_links.isEdge('Sales:CountryCode', 'CountryRegion:CountryCode')) self.assertEqual(True, general_links.isEdge('Sales:DownloadDate', 'Currencyv2:DownloadDate')) self.assertEqual(True, general_links.isEdge('Currencyv2:CustomerCurrency', 'Sales:CustomerCurrency')) self.db.commit()
def test_IS_UNIQUE_COL_CASE2(self): first_table = "./ks_filehandler/ks_filehandler/data/Sales.csv" ks_merge = merge(self.db) ks_merge.reset() ks_merge.addTable(first_table,"Sales") self.assertEqual(False, ks_merge.isUniqueCol("Sales","CountryCode"))
def test_IS_UNIQUE_COL_CASE1(self): first_table = "./ks_filehandler/ks_filehandler/data/CountryRegion.csv" ks_merge = merge(self.db) ks_merge.reset() ks_merge.addTable(first_table,"CountryRegion") self.assertEqual(True, ks_merge.isUniqueCol("CountryRegion","CountryCode"))
def setUpClass(cls): #---------------------- # set up db #---------------------- cls.db = MySQLdb.connect( ks_db_settings.setting('host'), ks_db_settings.setting('user'), ks_db_settings.setting('password'), ks_db_settings.setting('database')) #---------------------- # filehandler #---------------------- file_handler = filehandler(cls.db) file_handler.reset() file_handler.addTable("Sales", "1","Sales.csv") file_handler.addTable("Sales", "2","SalesCustomerTwo.csv") file_handler.addTable("ComissionTax", "1","ComissionTax.csv") file_handler.addTable("ComissionTax","2","ComissionTaxCustomerTwo.csv") file_handler.addTable("CountryRegion", "1","CountryRegion.csv") file_handler.addTable("CountryRegion","2","CountryRegionCustomerTwo.csv") file_handler.addTable("Currency2","1","Currencyv2.csv") time.sleep(1) file_handler.addTable("Sales", "1","SalesNewVersion.csv") print(file_handler.getLatestTable("Sales", "2")) #---------------------- # merge #---------------------- ks_merge = merge(cls.db) ks_merge.reset() ks_merge.addTable("./ks_filehandler/ks_filehandler/data/Sales.csv","Sales") ks_merge.addTable("./ks_filehandler/ks_filehandler/data/CountryRegion.csv","CountryRegion") ks_merge.addTable("./ks_filehandler/ks_filehandler/data/ComissionTax.csv","ComissionTax") sql_BigTable = "CREATE TABLE BigTable(id INT PRIMARY KEY AUTO_INCREMENT, \ VendorId VARCHAR(25), \ ProductType VARCHAR(25), \ Units FLOAT, \ RoyaltyPrice FLOAT, \ DownloadDate VARCHAR(25), \ CustomerCurrency VARCHAR(25), \ CountryCode VARCHAR(25), \ Region VARCHAR(25), \ RightsHolder VARCHAR(25), \ ComissionRate VARCHAR(25), \ TaxRate VARCHAR(25))" sql_join = "insert into BigTable select S.id,S.VendorId,S.ProductType, "\ "S.Units, S.RoyaltyPrice, S.DownloadDate, S.CustomerCurrency, "\ "S.CountryCode, C.Region, T.RightsHolder, T.ComissionRate, "\ "T.TaxRate from Sales S Inner Join CountryRegion C on "\ "S.CountryCode=C.CountryCode Inner join ComissionTax T on " \ "S.VendorId = T.VendorId and C.Region = T.Region;" ks_merge.join(sql_join, sql_BigTable) #---------------------- # clean up #---------------------- cursor = cls.db.cursor() sql = "use merge;" cursor.execute(sql) sql = "ALTER TABLE BigTable change ComissionRate ComissionRate FLOAT;" cursor.execute(sql) sql = "ALTER TABLE BigTable change TaxRate TaxRate FLOAT;" cursor.execute(sql) sql ="update BigTable set TaxRate = TaxRate/100;" cursor.execute(sql) #---------------------- # analytics #---------------------- cls.ks_analytics = analytics(cls.db) cls.ks_analytics.addFactUsingBinaryOp("NET_REVENUE", "Units", "RoyaltyPrice", "*") cls.ks_analytics.addFactUsingBinaryOp("TAXES", "NET_REVENUE","TaxRate","*") cls.ks_analytics.addFactUsingBinaryOp("REVENUE_AFTER_TAX", "NET_REVENUE","TAXES","-")