def load_precompute_normalize_URL(company_name, db): ks_fh = filehandler(db) rows = ks_fh.getLatestTablesByCompany(company_name) ks_merge = merge(db) ks_merge.reset() for row in rows: table_name =row[1] file_path = row[2] ks_merge.addTableURL(file_path, table_name) ks_merge.automaticMerge() mergeBigTable = ks_merge.getTables() ks_precompute = precompute(db) meta_data = ks_merge.getMetaDataFromTable(mergeBigTable[0]) ks_precompute.reset() ks_precompute.addBigTable(meta_data,mergeBigTable[0],company_name) id = ks_precompute.getMaxBigTableIdForCompany(company_name) metaData = ks_merge.getMetaDataFromTable(mergeBigTable[0]) ks_analytics = analytics(db) newBigTable = "BigTable"+ str(ks_precompute.getMaxBigTableIdForCompany(company_name)) ks_analytics.reset() ks_analytics.addBigTable(mergeBigTable[0], newBigTable, metaData)
def setUpClass(cls): #---------------------- # set up db #---------------------- cls.db = MySQLdb.connect( ks_db_settings.setting('host'), ks_db_settings.setting('user'), ks_db_settings.setting('password'), ks_db_settings.setting('database')) cls.ks_fh = filehandler(cls.db) cls.ks_fh.reset() cls.company_id = 1 register_raw_files("./tests/data2/Sales.csv", cls.company_id, cls.db) register_raw_files("./tests/data2/Currencyv2.csv", cls.company_id, cls.db) register_raw_files("./tests/data2/CountryRegion.csv", cls.company_id, cls.db) register_raw_files("./tests/data2/ComissionTax.csv",cls.company_id, cls.db) ks_precompute = precompute(cls.db) ks_precompute.reset() precompute ks_merge = merge(cls.db) load_precompute_normalize_URL(cls.company_id, cls.db) id = ks_precompute.getMaxBigTableIdForCompany(cls.company_id) ks_merge = merge(cls.db) mergeBigTable = ks_merge.getTables() metaData = ks_merge.getMetaDataFromTable(mergeBigTable[0]) cls.ks_analytics = analytics(cls.db) newBigTable = "BigTable"+ str(ks_precompute.getMaxBigTableIdForCompany(cls.company_id)) cls.ks_analytics.reset() cls.ks_analytics.addBigTable(mergeBigTable[0], newBigTable, metaData) #clean up sql ="update %s set TaxRate = TaxRate/100;"%("analytics."+newBigTable) cls.db.cursor().execute(sql) # ProductType changed from D to M see documentation of test case sql ="update %s set ProductType = 'M' where VendorId='0268_20140114_SOFA_ENGLIS' and DownloadDate='6/1/14';"%("analytics."+newBigTable) cls.db.cursor().execute(sql) cls.ks_analytics.addFactUsingBinaryOpAPI("NET_REVENUE", "Units", "RoyaltyPrice", "*", newBigTable) cls.ks_analytics.addFactUsingBinaryOpAPI("TAXES", "NET_REVENUE","TaxRate","*", newBigTable) cls.ks_analytics.addFactUsingBinaryOpAPI("REVENUE_AFTER_TAX", "NET_REVENUE","TAXES","-", newBigTable) cls.ks_fh.registerFormula("", "Plus", "Plus", "Units+RoyaltyPrice", "sum") cls.ks_fh.registerFormula("", "Mult", "Mult", "Units*RoyaltyPrice", "sum") cls.ks_fh.registerFormula("", "Individual_Tax", "Individual_Tax", "RoyaltyPrice*TaxRate", "sum") cls.ks_fh.registerFormula("", "NET_REVENUE", "NET_REVENUE", "Units*RoyaltyPrice", "sum") cls.ks_fh.registerFormula("", "SumPlus", "SumPlus", "Sum(Units)+Sum(RoyaltyPrice)", "sum") cls.ks_fh.registerFormula("", "SumMult", "SumMult", "Sum(Units)*Sum(RoyaltyPrice)", "sum") cls.ks_fh.registerFormula("", "Individual_TaxSum", "Individual_TaxSum", "Sum(RoyaltyPrice)*Sum(TaxRate)", "sum") cls.ks_fh.registerFormula("", "NonsenseSum", "Individual_Tax", "Sum(RoyaltyPrice)+Sum(TaxRate)", "sum") cls.ks_fh.registerFormula("", "Nonsense", "Individual_Tax", "RoyaltyPrice+TaxRate", "sum") cls.ks_fh.registerFormula("", "REVENUE_AFTER_TAX", "REVENUE_AFTER_TAX", "", "sum")
def setUpClass(cls): #---------------------- # set up db #---------------------- cls.db = ks_db_settings.connect() cls.ks_fh = filehandler(cls.db) cls.ks_fh.reset() cls.company_id = 1 register_raw_filesCsvPy("Sales_new_version",cls.company_id, cls.db) register_raw_filesCsvPy("CurrencyV2",cls.company_id, cls.db) register_raw_filesCsvPy("ComissionTax_new_version",cls.company_id, cls.db) register_raw_filesCsvPy("CountryRegion",cls.company_id, cls.db) sql = 'update files set file_name = "./tests/data2/version/Sales.csv" where file_name = "./tests/data2/Sales.csv"' cls.db.cursor().execute(sql) sql = 'update files set file_name = "./tests/data2/version/ComissionTax.csv" where file_name = "./tests/data2/ComissionTax.csv"' cls.db.cursor().execute(sql) ks_precompute = precompute(cls.db) ks_precompute.reset() #precompute load_precompute_normalize_CsvPy(cls.company_id, cls.db) newBigTable = "BigTable"+ str(ks_precompute.getMaxBigTableIdForCompany(cls.company_id)) cls.ks_analytics = analytics(cls.db) #clean up sql ="update %s set TaxRate = TaxRate/100;"%("analytics."+newBigTable) cls.db.cursor().execute(sql) # ProductType changed from D to M see documentation of test case sql ="update %s set ProductType = 'M' where VendorId='0268_20140114_SOFA_ENGLIS' and DownloadDate='6/1/14';"%("analytics."+newBigTable) cls.db.cursor().execute(sql) cls.ks_analytics.addFactUsingBinaryOpAPI("NET_REVENUE", "Units", "RoyaltyPrice", "*", newBigTable) cls.ks_analytics.addFactUsingBinaryOpAPI("TAXES", "NET_REVENUE","TaxRate","*", newBigTable) cls.ks_analytics.addFactUsingBinaryOpAPI("REVENUE_AFTER_TAX", "NET_REVENUE","TAXES","-", newBigTable) cls.ks_fh.registerFormula("", "Plus", "Plus", "Units+RoyaltyPrice", "sum") cls.ks_fh.registerFormula("", "Mult", "Mult", "Units*RoyaltyPrice", "sum") cls.ks_fh.registerFormula("", "Individual_Tax", "Individual_Tax", "RoyaltyPrice*TaxRate", "sum") cls.ks_fh.registerFormula("", "NET_REVENUE", "NET_REVENUE", "Units*RoyaltyPrice", "sum") cls.ks_fh.registerFormula("", "SumPlus", "SumPlus", "Sum(Units)+Sum(RoyaltyPrice)", "sum") cls.ks_fh.registerFormula("", "SumMult", "SumMult", "Sum(Units)*Sum(RoyaltyPrice)", "sum") cls.ks_fh.registerFormula("", "Individual_TaxSum", "Individual_TaxSum", "Sum(RoyaltyPrice)*Sum(TaxRate)", "sum") cls.ks_fh.registerFormula("", "NonsenseSum", "Individual_Tax", "Sum(RoyaltyPrice)+Sum(TaxRate)", "sum") cls.ks_fh.registerFormula("", "Nonsense", "Individual_Tax", "RoyaltyPrice+TaxRate", "sum") cls.ks_fh.registerFormula("", "REVENUE_AFTER_TAX", "REVENUE_AFTER_TAX", "", "sum")
def setUpClass(cls): #---------------------- # set up db #---------------------- cls.db = ks_db_settings.connect() cls.ks_fh = filehandler(cls.db) cls.ks_fh.reset() cls.company_id = 1 register_raw_filesCsvPy("Sales",cls.company_id, cls.db) register_raw_filesCsvPy("CurrencyV2",cls.company_id, cls.db) register_raw_filesCsvPy("ComissionTax",cls.company_id, cls.db) register_raw_filesCsvPy("CountryRegion",cls.company_id, cls.db) ks_precompute = precompute(cls.db) ks_precompute.reset() #precompute load_precompute_normalize_CsvPy(cls.company_id, cls.db) newBigTable = "BigTable"+ str(ks_precompute.getMaxBigTableIdForCompany(cls.company_id)) cls.ks_analytics = analytics(cls.db) cls.ks_fh.registerFormula("", "UnitsSUM", "UnitsSUM", "sum(Units)", "sum")
def setUpClass(cls): #---------------------- # set up db #---------------------- cls.db = ks_db_settings.connect() cls.ks_fh = filehandler(cls.db) cls.ks_fh.reset() cls.company_id = 1 register_raw_filesCsvPy("Salesfreq",cls.company_id, cls.db) #register_raw_filesCsvPy("CurrencyV2",cls.company_id, cls.db) register_raw_filesCsvPy("ComissionTax",cls.company_id, cls.db) register_raw_filesCsvPy("CountryRegion",cls.company_id, cls.db) ks_precompute = precompute(cls.db) ks_precompute.reset() #precompute load_precompute_normalize_CsvPy(cls.company_id, cls.db) newBigTable = "BigTable"+ str(ks_precompute.getMaxBigTableIdForCompany(cls.company_id)) cls.ks_analytics = analytics(cls.db) cls.ks_analytics.addFactUsingBinaryOpAPI("NET_REVENUE", "Units", "RoyaltyPrice", "*", newBigTable) cls.ks_analytics.addFactUsingBinaryOpAPI("TAXES", "NET_REVENUE","TaxRate","*", newBigTable) cls.ks_analytics.addFactUsingBinaryOpAPI("REVENUE_AFTER_TAX", "NET_REVENUE","TAXES","-", newBigTable) cls.ks_fh.registerFormula("", "Plus", "Plus", "Units+RoyaltyPrice", "sum") cls.ks_fh.registerFormula("", "Mult", "Mult", "Units*RoyaltyPrice", "sum") cls.ks_fh.registerFormula("", "Individual_Tax", "Individual_Tax", "RoyaltyPrice*TaxRate", "sum") cls.ks_fh.registerFormula("", "NET_REVENUE", "NET_REVENUE", "Units*RoyaltyPrice", "sum") cls.ks_fh.registerFormula("", "SumPlus", "SumPlus", "Sum(Units)+Sum(RoyaltyPrice)", "sum") cls.ks_fh.registerFormula("", "SumMult", "SumMult", "Sum(Units)*Sum(RoyaltyPrice)", "sum") cls.ks_fh.registerFormula("", "Individual_TaxSum", "Individual_TaxSum", "Sum(RoyaltyPrice)*Sum(TaxRate)", "sum") cls.ks_fh.registerFormula("", "NonsenseSum", "Individual_Tax", "Sum(RoyaltyPrice)+Sum(TaxRate)", "sum") cls.ks_fh.registerFormula("", "Nonsense", "Individual_Tax", "RoyaltyPrice+TaxRate", "sum") cls.ks_fh.registerFormula("", "REVENUE_AFTER_TAX", "REVENUE_AFTER_TAX", "", "sum")
ks_fh = filehandler(db) ks_fh.reset() company_id = 1 register_raw_filesCsvPy("Sales",company_id, db) register_raw_filesCsvPy("CurrencyV2",company_id, db) register_raw_filesCsvPy("ComissionTax",company_id, db) register_raw_filesCsvPy("CountryRegion",company_id, db) ks_precompute = precompute(db) ks_precompute.reset() #precompute load_precompute_normalize_CsvPy(company_id, db) newBigTable = "BigTable"+ str(ks_precompute.getMaxBigTableIdForCompany(company_id)) ks_analytics = analytics(db) ks_analytics.addFactUsingBinaryOpAPI("NET_REVENUE", "Units", "RoyaltyPrice", "*", newBigTable) ks_analytics.addFactUsingBinaryOpAPI("TAXES", "NET_REVENUE","TaxRate","*", newBigTable) ks_analytics.addFactUsingBinaryOpAPI("REVENUE_AFTER_TAX", "NET_REVENUE","TAXES","-", newBigTable) ks_fh.registerFormula("", "SumMult", "SumMult", "Sum(Units)*Sum(RoyaltyPrice)", "sum") measure_ids = MeasureName2MeasureIds("SumMult") group_by = "ks_date, VendorId" print(ks_analytics.parseGroupBy(group_by)) result = measure_data(db, company_id, measure_ids, "day", "2014-06-01", "2014-06-02",group_by) print result
def measure_data(db, company_id, measures, frequency=None, start_date=None, end_date=None, groupby=None, measure_filter=None, dimension_filters=None, score_kpis=True): """ measures - set() of measure IDs frequency - str of 'month', 'day' or 'quarter' start_date - datetime.datetime object, use data >=this end_date - datetime.datetime object, use data <this groupby=None - str dimension name or None measure_filter=None - if passed, will be a set() of measure ID dimension_filters=None - dictionary with dimension name keys the values are a data structure meant to represent a logical expression on what levels (like a WHERE clause) e.g. { 'DIM1':'LEV1' 'DIM2':('|', 'LEV5', 'LEV6'), 'DIM3':('&', 'LEV10', 'LEV11'), } The above is equivalent to this simplified WHERE clause: WHERE DIM1 = LEV1 AND (DIM2 = LEV5 OR DIM2 = LEV6) AND (DIM3 = LEV10 AND DIM3 = LEV11) The DIM3 part will look strange, but the user may request this. With current data this will return no rows, but the fact table structure does support this case so it should be accessible. If a None is passed then don't do filtering on dimensions score_kpis=True - if True, return 1-10 scores for KPIs, if False return values Return Value: The return value will vary on whether groupby is passed or not. Without groupby the return will look like this: { 'CODE1':{ 'period1':100.0, 'period5':100.0, }, 'CODE2':{ 'period2':130.0, 'period3':130.0, } } CODE1 and CODE2 will be the measure codes of the measure IDs passed. periods are strings formatted as follows: For day frequency: YYYY-MM-DD For month frequency: YYYY-MM For quarter frequency: YYYY-Q[1-4] With a groupby, an additional level is added, like this: { 'CODE1':{ 'LEV1':{ 'period1':100.0, 'period5':100.0, }, 'LEV2':{ 'period1':100.0, 'period5':100.0, }, } } Where LEV1 and LEV2 are levels for the dimension specified. Periods and levels without data are omitted from the data structure. e.g. there is no need to have a '2011-03':None in the returned dictionary """ ks_precompute = precompute(db) bigTable = "BigTable"+ str(ks_precompute.getMaxBigTableIdForCompany(company_id)) ks_analytics = analytics(db) data = ks_analytics.getMeasureData(bigTable, measures, start_date, end_date, groupby, dimension_filters, frequency) return data
def setUpClass(cls): #---------------------- # set up db #---------------------- cls.db = MySQLdb.connect( ks_db_settings.setting('host'), ks_db_settings.setting('user'), ks_db_settings.setting('password'), ks_db_settings.setting('database')) #---------------------- # filehandler #---------------------- file_handler = filehandler(cls.db) file_handler.reset() file_handler.addTable("Sales", "1","Sales.csv") file_handler.addTable("Sales", "2","SalesCustomerTwo.csv") file_handler.addTable("ComissionTax", "1","ComissionTax.csv") file_handler.addTable("ComissionTax","2","ComissionTaxCustomerTwo.csv") file_handler.addTable("CountryRegion", "1","CountryRegion.csv") file_handler.addTable("CountryRegion","2","CountryRegionCustomerTwo.csv") file_handler.addTable("Currency2","1","Currencyv2.csv") time.sleep(1) file_handler.addTable("Sales", "1","SalesNewVersion.csv") print(file_handler.getLatestTable("Sales", "2")) #---------------------- # merge #---------------------- ks_merge = merge(cls.db) ks_merge.reset() ks_merge.addTable("./ks_filehandler/ks_filehandler/data/Sales.csv","Sales") ks_merge.addTable("./ks_filehandler/ks_filehandler/data/CountryRegion.csv","CountryRegion") ks_merge.addTable("./ks_filehandler/ks_filehandler/data/ComissionTax.csv","ComissionTax") sql_BigTable = "CREATE TABLE BigTable(id INT PRIMARY KEY AUTO_INCREMENT, \ VendorId VARCHAR(25), \ ProductType VARCHAR(25), \ Units FLOAT, \ RoyaltyPrice FLOAT, \ DownloadDate VARCHAR(25), \ CustomerCurrency VARCHAR(25), \ CountryCode VARCHAR(25), \ Region VARCHAR(25), \ RightsHolder VARCHAR(25), \ ComissionRate VARCHAR(25), \ TaxRate VARCHAR(25))" sql_join = "insert into BigTable select S.id,S.VendorId,S.ProductType, "\ "S.Units, S.RoyaltyPrice, S.DownloadDate, S.CustomerCurrency, "\ "S.CountryCode, C.Region, T.RightsHolder, T.ComissionRate, "\ "T.TaxRate from Sales S Inner Join CountryRegion C on "\ "S.CountryCode=C.CountryCode Inner join ComissionTax T on " \ "S.VendorId = T.VendorId and C.Region = T.Region;" ks_merge.join(sql_join, sql_BigTable) #---------------------- # clean up #---------------------- cursor = cls.db.cursor() sql = "use merge;" cursor.execute(sql) sql = "ALTER TABLE BigTable change ComissionRate ComissionRate FLOAT;" cursor.execute(sql) sql = "ALTER TABLE BigTable change TaxRate TaxRate FLOAT;" cursor.execute(sql) sql ="update BigTable set TaxRate = TaxRate/100;" cursor.execute(sql) #---------------------- # analytics #---------------------- cls.ks_analytics = analytics(cls.db) cls.ks_analytics.addFactUsingBinaryOp("NET_REVENUE", "Units", "RoyaltyPrice", "*") cls.ks_analytics.addFactUsingBinaryOp("TAXES", "NET_REVENUE","TaxRate","*") cls.ks_analytics.addFactUsingBinaryOp("REVENUE_AFTER_TAX", "NET_REVENUE","TAXES","-")