def load_precompute_normalize_URL(company_name, db):
    ks_fh = filehandler(db)
    rows = ks_fh.getLatestTablesByCompany(company_name)
    ks_merge = merge(db)
    ks_merge.reset()
    for row in rows:
        table_name =row[1]
        file_path = row[2]
        ks_merge.addTableURL(file_path, table_name)

    ks_merge.automaticMerge()

    mergeBigTable = ks_merge.getTables()
    ks_precompute = precompute(db)
    meta_data = ks_merge.getMetaDataFromTable(mergeBigTable[0])
    ks_precompute.reset()
    ks_precompute.addBigTable(meta_data,mergeBigTable[0],company_name)

    id = ks_precompute.getMaxBigTableIdForCompany(company_name)


    metaData = ks_merge.getMetaDataFromTable(mergeBigTable[0])

    ks_analytics = analytics(db)
    newBigTable = "BigTable"+ str(ks_precompute.getMaxBigTableIdForCompany(company_name))
    ks_analytics.reset()
    ks_analytics.addBigTable(mergeBigTable[0], newBigTable, metaData)
Example #2
0
    def setUpClass(cls):
        #----------------------
        # set up db
        #----------------------
        cls.db = MySQLdb.connect(
                ks_db_settings.setting('host'), 
                ks_db_settings.setting('user'), 
                ks_db_settings.setting('password'), 
                ks_db_settings.setting('database'))


        cls.ks_fh = filehandler(cls.db)
        cls.ks_fh.reset()
        cls.company_id = 1
        register_raw_files("./tests/data2/Sales.csv", cls.company_id, cls.db)
        register_raw_files("./tests/data2/Currencyv2.csv", cls.company_id, cls.db)
        register_raw_files("./tests/data2/CountryRegion.csv", cls.company_id, cls.db)
        register_raw_files("./tests/data2/ComissionTax.csv",cls.company_id, cls.db)
    
        ks_precompute = precompute(cls.db)
        ks_precompute.reset()
        precompute
        ks_merge = merge(cls.db)
        load_precompute_normalize_URL(cls.company_id, cls.db)


        
        id = ks_precompute.getMaxBigTableIdForCompany(cls.company_id)
        ks_merge = merge(cls.db)
        mergeBigTable = ks_merge.getTables()
        metaData = ks_merge.getMetaDataFromTable(mergeBigTable[0])
                
        cls.ks_analytics = analytics(cls.db)
        newBigTable = "BigTable"+ str(ks_precompute.getMaxBigTableIdForCompany(cls.company_id))
        cls.ks_analytics.reset()
        cls.ks_analytics.addBigTable(mergeBigTable[0], newBigTable, metaData)
        
        #clean up
        sql ="update %s set TaxRate = TaxRate/100;"%("analytics."+newBigTable)
        cls.db.cursor().execute(sql)
        # ProductType changed from D to M see documentation of test case
        sql ="update %s set ProductType = 'M' where VendorId='0268_20140114_SOFA_ENGLIS' and DownloadDate='6/1/14';"%("analytics."+newBigTable)
        cls.db.cursor().execute(sql)
        
        cls.ks_analytics.addFactUsingBinaryOpAPI("NET_REVENUE", "Units", "RoyaltyPrice", "*", newBigTable) 
        cls.ks_analytics.addFactUsingBinaryOpAPI("TAXES", "NET_REVENUE","TaxRate","*", newBigTable)
        cls.ks_analytics.addFactUsingBinaryOpAPI("REVENUE_AFTER_TAX", "NET_REVENUE","TAXES","-", newBigTable)
        
        
        cls.ks_fh.registerFormula("", "Plus", "Plus", "Units+RoyaltyPrice", "sum")
        cls.ks_fh.registerFormula("", "Mult", "Mult", "Units*RoyaltyPrice", "sum")
        cls.ks_fh.registerFormula("", "Individual_Tax", "Individual_Tax", "RoyaltyPrice*TaxRate", "sum")
        cls.ks_fh.registerFormula("", "NET_REVENUE", "NET_REVENUE", "Units*RoyaltyPrice", "sum")
        cls.ks_fh.registerFormula("", "SumPlus", "SumPlus", "Sum(Units)+Sum(RoyaltyPrice)", "sum")
        cls.ks_fh.registerFormula("", "SumMult", "SumMult", "Sum(Units)*Sum(RoyaltyPrice)", "sum")
        cls.ks_fh.registerFormula("", "Individual_TaxSum", "Individual_TaxSum", "Sum(RoyaltyPrice)*Sum(TaxRate)", "sum")
        cls.ks_fh.registerFormula("", "NonsenseSum", "Individual_Tax", "Sum(RoyaltyPrice)+Sum(TaxRate)", "sum")
        cls.ks_fh.registerFormula("", "Nonsense", "Individual_Tax", "RoyaltyPrice+TaxRate", "sum")
        cls.ks_fh.registerFormula("", "REVENUE_AFTER_TAX", "REVENUE_AFTER_TAX", "", "sum")
Example #3
0
    def setUpClass(cls):
        #----------------------
        # set up db
        #----------------------
        cls.db = ks_db_settings.connect()

        cls.ks_fh = filehandler(cls.db)
        cls.ks_fh.reset()
        cls.company_id = 1
        
        register_raw_filesCsvPy("Sales_new_version",cls.company_id, cls.db)
        register_raw_filesCsvPy("CurrencyV2",cls.company_id, cls.db)
        register_raw_filesCsvPy("ComissionTax_new_version",cls.company_id, cls.db)
        register_raw_filesCsvPy("CountryRegion",cls.company_id, cls.db)
                
        sql = 'update files set file_name = "./tests/data2/version/Sales.csv" where file_name = "./tests/data2/Sales.csv"'
        cls.db.cursor().execute(sql)
        sql = 'update files set file_name = "./tests/data2/version/ComissionTax.csv" where file_name = "./tests/data2/ComissionTax.csv"'
        cls.db.cursor().execute(sql)
    
    
        ks_precompute = precompute(cls.db)
        ks_precompute.reset()
        #precompute
        load_precompute_normalize_CsvPy(cls.company_id, cls.db)
        newBigTable = "BigTable"+ str(ks_precompute.getMaxBigTableIdForCompany(cls.company_id))
        cls.ks_analytics = analytics(cls.db)
        
        #clean up
        sql ="update %s set TaxRate = TaxRate/100;"%("analytics."+newBigTable)
        cls.db.cursor().execute(sql)
        # ProductType changed from D to M see documentation of test case
        sql ="update %s set ProductType = 'M' where VendorId='0268_20140114_SOFA_ENGLIS' and DownloadDate='6/1/14';"%("analytics."+newBigTable)
        cls.db.cursor().execute(sql)
        
        cls.ks_analytics.addFactUsingBinaryOpAPI("NET_REVENUE", "Units", "RoyaltyPrice", "*", newBigTable) 
        cls.ks_analytics.addFactUsingBinaryOpAPI("TAXES", "NET_REVENUE","TaxRate","*", newBigTable)
        cls.ks_analytics.addFactUsingBinaryOpAPI("REVENUE_AFTER_TAX", "NET_REVENUE","TAXES","-", newBigTable)
        
        
        cls.ks_fh.registerFormula("", "Plus", "Plus", "Units+RoyaltyPrice", "sum")
        cls.ks_fh.registerFormula("", "Mult", "Mult", "Units*RoyaltyPrice", "sum")
        cls.ks_fh.registerFormula("", "Individual_Tax", "Individual_Tax", "RoyaltyPrice*TaxRate", "sum")
        cls.ks_fh.registerFormula("", "NET_REVENUE", "NET_REVENUE", "Units*RoyaltyPrice", "sum")
        cls.ks_fh.registerFormula("", "SumPlus", "SumPlus", "Sum(Units)+Sum(RoyaltyPrice)", "sum")
        cls.ks_fh.registerFormula("", "SumMult", "SumMult", "Sum(Units)*Sum(RoyaltyPrice)", "sum")
        cls.ks_fh.registerFormula("", "Individual_TaxSum", "Individual_TaxSum", "Sum(RoyaltyPrice)*Sum(TaxRate)", "sum")
        cls.ks_fh.registerFormula("", "NonsenseSum", "Individual_Tax", "Sum(RoyaltyPrice)+Sum(TaxRate)", "sum")
        cls.ks_fh.registerFormula("", "Nonsense", "Individual_Tax", "RoyaltyPrice+TaxRate", "sum")
        cls.ks_fh.registerFormula("", "REVENUE_AFTER_TAX", "REVENUE_AFTER_TAX", "", "sum")
Example #4
0
    def setUpClass(cls):
        #----------------------
        # set up db
        #----------------------
        cls.db = ks_db_settings.connect()

        cls.ks_fh = filehandler(cls.db)
        cls.ks_fh.reset()
        cls.company_id = 1
        
        register_raw_filesCsvPy("Sales",cls.company_id, cls.db)
        register_raw_filesCsvPy("CurrencyV2",cls.company_id, cls.db)
        register_raw_filesCsvPy("ComissionTax",cls.company_id, cls.db)
        register_raw_filesCsvPy("CountryRegion",cls.company_id, cls.db)
    
        ks_precompute = precompute(cls.db)
        ks_precompute.reset()
        #precompute
        load_precompute_normalize_CsvPy(cls.company_id, cls.db)
        newBigTable = "BigTable"+ str(ks_precompute.getMaxBigTableIdForCompany(cls.company_id))
        cls.ks_analytics = analytics(cls.db)
        cls.ks_fh.registerFormula("", "UnitsSUM", "UnitsSUM", "sum(Units)", "sum")
    def setUpClass(cls):
        #----------------------
        # set up db
        #----------------------
        cls.db = ks_db_settings.connect()

        cls.ks_fh = filehandler(cls.db)
        cls.ks_fh.reset()
        cls.company_id = 1
        
        register_raw_filesCsvPy("Salesfreq",cls.company_id, cls.db)
        #register_raw_filesCsvPy("CurrencyV2",cls.company_id, cls.db)
        register_raw_filesCsvPy("ComissionTax",cls.company_id, cls.db)
        register_raw_filesCsvPy("CountryRegion",cls.company_id, cls.db)
    
        ks_precompute = precompute(cls.db)
        ks_precompute.reset()
        #precompute
        load_precompute_normalize_CsvPy(cls.company_id, cls.db)
        newBigTable = "BigTable"+ str(ks_precompute.getMaxBigTableIdForCompany(cls.company_id))
        cls.ks_analytics = analytics(cls.db)
        
        cls.ks_analytics.addFactUsingBinaryOpAPI("NET_REVENUE", "Units", "RoyaltyPrice", "*", newBigTable) 
        cls.ks_analytics.addFactUsingBinaryOpAPI("TAXES", "NET_REVENUE","TaxRate","*", newBigTable)
        cls.ks_analytics.addFactUsingBinaryOpAPI("REVENUE_AFTER_TAX", "NET_REVENUE","TAXES","-", newBigTable)
        
        
        cls.ks_fh.registerFormula("", "Plus", "Plus", "Units+RoyaltyPrice", "sum")
        cls.ks_fh.registerFormula("", "Mult", "Mult", "Units*RoyaltyPrice", "sum")
        cls.ks_fh.registerFormula("", "Individual_Tax", "Individual_Tax", "RoyaltyPrice*TaxRate", "sum")
        cls.ks_fh.registerFormula("", "NET_REVENUE", "NET_REVENUE", "Units*RoyaltyPrice", "sum")
        cls.ks_fh.registerFormula("", "SumPlus", "SumPlus", "Sum(Units)+Sum(RoyaltyPrice)", "sum")
        cls.ks_fh.registerFormula("", "SumMult", "SumMult", "Sum(Units)*Sum(RoyaltyPrice)", "sum")
        cls.ks_fh.registerFormula("", "Individual_TaxSum", "Individual_TaxSum", "Sum(RoyaltyPrice)*Sum(TaxRate)", "sum")
        cls.ks_fh.registerFormula("", "NonsenseSum", "Individual_Tax", "Sum(RoyaltyPrice)+Sum(TaxRate)", "sum")
        cls.ks_fh.registerFormula("", "Nonsense", "Individual_Tax", "RoyaltyPrice+TaxRate", "sum")
        cls.ks_fh.registerFormula("", "REVENUE_AFTER_TAX", "REVENUE_AFTER_TAX", "", "sum")
Example #6
0
ks_fh = filehandler(db)
ks_fh.reset()
company_id = 1

register_raw_filesCsvPy("Sales",company_id, db)
register_raw_filesCsvPy("CurrencyV2",company_id, db)
register_raw_filesCsvPy("ComissionTax",company_id, db)
register_raw_filesCsvPy("CountryRegion",company_id, db)

ks_precompute = precompute(db)
ks_precompute.reset()
#precompute
load_precompute_normalize_CsvPy(company_id, db)
newBigTable = "BigTable"+ str(ks_precompute.getMaxBigTableIdForCompany(company_id))
ks_analytics = analytics(db)

ks_analytics.addFactUsingBinaryOpAPI("NET_REVENUE", "Units", "RoyaltyPrice", "*", newBigTable) 
ks_analytics.addFactUsingBinaryOpAPI("TAXES", "NET_REVENUE","TaxRate","*", newBigTable)
ks_analytics.addFactUsingBinaryOpAPI("REVENUE_AFTER_TAX", "NET_REVENUE","TAXES","-", newBigTable)


ks_fh.registerFormula("", "SumMult", "SumMult", "Sum(Units)*Sum(RoyaltyPrice)", "sum")
measure_ids = MeasureName2MeasureIds("SumMult")
group_by = "ks_date, VendorId"

print(ks_analytics.parseGroupBy(group_by))
result = measure_data(db, company_id, measure_ids, "day", "2014-06-01", "2014-06-02",group_by)
print result

    
Example #7
0
def measure_data(db, company_id, measures, frequency=None, start_date=None, end_date=None,
    groupby=None, measure_filter=None, dimension_filters=None, score_kpis=True):
    """
    measures - set() of measure IDs
    frequency - str of 'month', 'day' or 'quarter'
    start_date - datetime.datetime object, use data >=this
    end_date - datetime.datetime object, use data <this
    groupby=None - str dimension name or None
    measure_filter=None - if passed, will be a set() of measure ID
    dimension_filters=None - dictionary with dimension name keys
                              the values are a data structure meant to represent
                              a logical expression on what levels (like a WHERE clause)
                              e.g.
                              {
                                  'DIM1':'LEV1'
                                  'DIM2':('|', 'LEV5', 'LEV6'),
                                  'DIM3':('&', 'LEV10', 'LEV11'),
                              }

                              The above is equivalent to this simplified WHERE clause:
                              WHERE DIM1 = LEV1 AND (DIM2 = LEV5 OR DIM2 = LEV6)
                                      AND (DIM3 = LEV10 AND DIM3 = LEV11)

                              The DIM3 part will look strange, but the user may request
                              this. With current data this will return no rows, but the
                              fact table structure does support this case so it should
                              be accessible.

                              If a None is passed then don't do filtering on dimensions

    score_kpis=True - if True, return 1-10 scores for KPIs, if False return values

    Return Value:

    The return value will vary on whether groupby is passed or not.

    Without groupby the return will look like this:
    {
        'CODE1':{
            'period1':100.0,
            'period5':100.0,
        },
        'CODE2':{
            'period2':130.0,
            'period3':130.0,
        }
    }

    CODE1 and CODE2 will be the measure codes of the measure IDs passed.
    periods are strings formatted as follows:
    For day frequency: YYYY-MM-DD
    For month frequency: YYYY-MM
    For quarter frequency: YYYY-Q[1-4]


    With a groupby, an additional level is added, like this:
    {
        'CODE1':{
            'LEV1':{
                'period1':100.0,
                'period5':100.0,
            },
            'LEV2':{
                'period1':100.0,
                'period5':100.0,
            },

        }
    }

    Where LEV1 and LEV2 are levels for the dimension specified.

    Periods and levels without data are omitted from the data structure.
    e.g. there is no need to have a '2011-03':None in the returned dictionary



    """
    ks_precompute = precompute(db)
    bigTable = "BigTable"+ str(ks_precompute.getMaxBigTableIdForCompany(company_id))
    
    ks_analytics = analytics(db)
    
    data = ks_analytics.getMeasureData(bigTable, 
                                       measures, 
                                       start_date,
                                       end_date,
                                       groupby, 
                                       dimension_filters,
                                       frequency)
    
    return data    
    def setUpClass(cls):
        #----------------------
        # set up db
        #----------------------
        cls.db = MySQLdb.connect(
                ks_db_settings.setting('host'), 
                ks_db_settings.setting('user'), 
                ks_db_settings.setting('password'), 
                ks_db_settings.setting('database'))


        #----------------------
        # filehandler
        #----------------------
        file_handler = filehandler(cls.db)
        file_handler.reset()
        file_handler.addTable("Sales", "1","Sales.csv")
        file_handler.addTable("Sales", "2","SalesCustomerTwo.csv")

        file_handler.addTable("ComissionTax", "1","ComissionTax.csv")
        file_handler.addTable("ComissionTax","2","ComissionTaxCustomerTwo.csv")

        file_handler.addTable("CountryRegion", "1","CountryRegion.csv")
        file_handler.addTable("CountryRegion","2","CountryRegionCustomerTwo.csv")

        file_handler.addTable("Currency2","1","Currencyv2.csv")
        time.sleep(1)
        file_handler.addTable("Sales", "1","SalesNewVersion.csv")
        print(file_handler.getLatestTable("Sales", "2"))

        #----------------------
        # merge
        #----------------------
        ks_merge = merge(cls.db)
        ks_merge.reset()
        ks_merge.addTable("./ks_filehandler/ks_filehandler/data/Sales.csv","Sales")
        ks_merge.addTable("./ks_filehandler/ks_filehandler/data/CountryRegion.csv","CountryRegion")
        ks_merge.addTable("./ks_filehandler/ks_filehandler/data/ComissionTax.csv","ComissionTax")
        sql_BigTable = "CREATE TABLE BigTable(id INT PRIMARY KEY AUTO_INCREMENT, \
             VendorId VARCHAR(25), \
             ProductType VARCHAR(25), \
             Units FLOAT, \
             RoyaltyPrice FLOAT, \
             DownloadDate VARCHAR(25), \
             CustomerCurrency VARCHAR(25), \
             CountryCode VARCHAR(25), \
             Region VARCHAR(25), \
             RightsHolder VARCHAR(25), \
             ComissionRate VARCHAR(25), \
             TaxRate VARCHAR(25))"
        

        sql_join = "insert into BigTable select S.id,S.VendorId,S.ProductType, "\
            "S.Units, S.RoyaltyPrice, S.DownloadDate, S.CustomerCurrency, "\
            "S.CountryCode, C.Region, T.RightsHolder, T.ComissionRate, "\
            "T.TaxRate from Sales S Inner Join CountryRegion C on "\
            "S.CountryCode=C.CountryCode Inner join ComissionTax T on " \
            "S.VendorId = T.VendorId and C.Region = T.Region;"
            
        ks_merge.join(sql_join, sql_BigTable)
        
        #----------------------
        # clean up
        #----------------------
        cursor = cls.db.cursor()
        sql = "use merge;"
        cursor.execute(sql)        
        sql = "ALTER TABLE BigTable change ComissionRate ComissionRate FLOAT;"
        cursor.execute(sql)
        sql = "ALTER TABLE BigTable change TaxRate TaxRate FLOAT;"
        cursor.execute(sql)
        sql ="update BigTable set TaxRate = TaxRate/100;"
        cursor.execute(sql)
        
        
        #----------------------
        # analytics
        #----------------------
        cls.ks_analytics = analytics(cls.db)
        cls.ks_analytics.addFactUsingBinaryOp("NET_REVENUE", "Units", "RoyaltyPrice", "*") 
        cls.ks_analytics.addFactUsingBinaryOp("TAXES", "NET_REVENUE","TaxRate","*")
        cls.ks_analytics.addFactUsingBinaryOp("REVENUE_AFTER_TAX", "NET_REVENUE","TAXES","-")