Ejemplo n.º 1
0
    def __init__(self, root, RunByUsername, log):
        #root = "E:/CUA OpenBank API/OpenBanking/ETL"
        
        log.logComment("BankingAdditionalInfo (01) -> Initialized")
        conn = Connector(root, RunByUsername, log)
        
        self.FailInd = conn.openConnector()
        
        log.logComment("BankingAdditionalInfo (01) -> Connection Status: " + str(self.FailInd))
        
        transform = Transform(conn, root, log)
        
        
        query = ("SELECT PRODUCTID, OVERVIEWURI, TERMSURI, ELIGIBILITYURI, FEESANDPRICINGURI,"
         "BUNDLEURI, lastupdated from BANKINGADDITIONALINFORMATION").lower()

        # Rearrange DF columns
        
        df_cols = ['Product ID (Mandatory)', 'Overview URL', 'Terms URL',
                   'Eligibility URL', 'Fees And PricingURL', 'Bundle URL', 'CUA Effective From (Mandatory) Date/Time'
                   ]
        
        productFiles = ['TeD_Products.csv', 'TnS_Products.csv', 'CCC_Products.csv']
        
        conn.openConnector()
        conn.cursor.execute(query)
        
        product_table  = conn.cursor.fetchall()
        
        import pandas as pd
        product_table = pd.DataFrame(product_table)
        
        #####
        firstRun = 1
        
        for file in productFiles:
            if firstRun == 1:
                product = transform.getDF(file)    
                product = product[df_cols]
                firstRun = 0
            else:
                newFile = transform.getDF(file)
                newFileCols = newFile.columns
                productCols = product.columns
                sizeofNewFile = len(newFile)
                sizeofProduct = len(product)
                
                
                # add any column which new file do not have
                for column in df_cols:
                    if column not in newFileCols:
                        addColumn = {column : [None] * sizeofNewFile}
                        newFile[column] = pd.Series(addColumn)
        
                # add any column which product file do not have
                for column in newFileCols:
                    if column not in productCols:
                        addColumn = {column : [None] * sizeofProduct}
                        product[column] = pd.Series(addColumn)
                
                # arrange columns in new DF
                newFile = newFile[df_cols]
                product = product[df_cols]
                
                # merge into product df
                product = pd.concat([product, newFile])
        #####
        
        # Accumulate all rows into one ordered string
        
        insertData = []
        
        for i in range(0, len(product)):
            insertData.append('|'.join([str(x) for x in product.iloc[[i],:].values.tolist()[0]]))
            
            
        # Gather data already present in table in var(insertData) format
            
        tableData = []
        
        for i in range(0, len(product_table)):
            # tableData.append('|'.join(product_table.iloc[[i],:].values.tolist()[0]))
            # Convert all elements to STR as join does not work on timestamp
            d = [ str(x) for x in product_table.iloc[[i],:].values.tolist()[0] ]
            tableData.append('|'.join(d))
            
            
        # Identify data needs to be inserted
        insertSelection = []
        
        for insertDat in insertData:
            if insertDat not in tableData:
                insertSelection.append(insertDat)
                
        insertQuery = ("INSERT INTO BANKINGADDITIONALINFORMATION "
                       "(PRODUCTID, OVERVIEWURI, TERMSURI, ELIGIBILITYURI, FEESANDPRICINGURI,"
                       "BUNDLEURI, lastupdated) VALUES (%s, %s, %s, %s, %s, %s, %s)").lower()
        
        
        reject = []
        
        if len(insertSelection) > 0:
            for val in insertSelection:
                val = val.replace("|False|","|0|")
                val = val.replace("|nan","|")
                val = val.replace("|None","|")
                d = val.split('|')
                if(d[1] in ['NaT', 'nan']):
                    d[1] = None
                if(d[2] in ['NaT', 'nan']):
                    d[2] = None
                    
                #cursor.execute(insertQuery, (d[0], d[1], d[2], d[3], d[4], d[5]))
                
                try:
                    conn.cursor.execute(insertQuery, (d[0], d[1], d[2], d[3], d[4], d[5], d[6]))
                except:
                    reject.append(val)
                
                
            #log value
            log = {'status' : 'Insert Success',
                   'Rows inserted' : len(insertSelection)
                   }
        else:
            log = {'status' : 'Insert Success',
                   'Rows inserted' : len(insertSelection)
                   }
        
        # committed to the database
        conn.closeConnector()
    def __init__(self, root, RunByUsername, log):

        log.logComment("BankingProductdepositRateTier (05) -> Initialized")
        conn = Connector(root, RunByUsername, log)
        self.FailInd = conn.openConnector()

        transform = Transform(conn, root, log)

        transform.fileList = ['RatesFile.csv']

        # parentTable = "select productid, depositRateId, applicationfrequency, depositratetype, rate from bankingproductdepositrate".lower()
        parentTable = "select productid, depositrateid, applicationfrequency, depositratetype, trim(replace(cast(rate as char), '0','')) from bankingproductdepositrate".lower(
        )
        parentdata = pd.DataFrame(conn.executeQuery(parentTable))

        preLoad = (
            'select depositRateId, name, unitOfMeasure, minimumValue, maximumValue,'
            'rateApplicationMethod from bankingproductdepositRateTier'
        ).lower()
        preLoadData = conn.executeQuery(preLoad)

        preLoadDataList = []
        if len(preLoadData) != 0:
            for row in preLoadData:
                row = [str(x) for x in row]
                preLoadDataList.append('|'.join(row))

        extractDataTS = transform.getDF('TnS_Rate.csv')

        extractDataTS = pd.merge(parentdata,
                                 extractDataTS,
                                 how='left',
                                 left_on=[0, 2, 3, 4],
                                 right_on=[
                                     'Product ID (Mandatory)',
                                     'applicationFrequency (Auto Generated)',
                                     'Rate Type (Mandatory)',
                                     'Rate (Mandatory)'
                                 ])
        extractDataTS = extractDataTS.dropna(subset=['Rate Type (Mandatory)'])
        df_cols = [
            1, 'Name', 'Unit of Measure', 'Minimum Value', 'Maximum Value',
            'Rate Application Method'
        ]
        extractDataTS = extractDataTS[df_cols]

        extractDataRF = transform.getDF('RatesFile.csv')

        if not extractDataRF.empty:
            extractDataRF = pd.merge(parentdata,
                                     extractDataRF,
                                     how='left',
                                     left_on=[0, 2, 3],
                                     right_on=[
                                         'Product ID',
                                         'Additional Value (Auto Generated)',
                                         'Rate Type (Mandatory)'
                                     ])
            extractDataRF = extractDataRF.dropna(
                subset=['Rate Type (Mandatory)'])

            df_cols = [
                1, 'Name', 'Unit of Measure', 'minimumValue', 'maximumValue',
                'Rate Application Method'
            ]
            extractDataRF = extractDataRF[df_cols]
            extractDataTS.columns = df_cols

        extractData = pd.concat([extractDataTS, extractDataRF])

        extractdataOutput = []
        for i in range(len(extractData)):
            dat = extractData.iloc[[i], :].values.tolist()[0]
            dat = [str(x) for x in dat]
            extractdataOutput.append('|'.join(dat))

        insertSelection = []

        for row in extractdataOutput:
            if row not in preLoadDataList:
                insertSelection.append(row)

        seqQuery = 'Select max(cast(depositRateTierId as decimal)) from bankingproductdepositRateTier'.lower(
        )
        seq = conn.executeQuery(seqQuery)
        seq = seq[0][0]
        if seq == None:
            seq = 0
        else:
            seq = int(seq)

        insertQuery = (
            'insert into bankingproductdepositRateTier '
            '(depositRateTierId, depositRateId, name, unitOfMeasure, minimumValue, '
            'maximumValue, rateApplicationMethod, '
            'createdOn, createdBy, systemCreatedOn, systemCreatedBy) '
            'values (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)').lower()

        reject = []

        for row in insertSelection:
            seq += 1
            d = "|".join([
                str(x) for x in [
                    seq, row, conn.updatedDate, conn.createdBy,
                    conn.updatedDate, conn.createdBy
                ]
            ])
            d = d.replace("|False|", "|0|")
            d = d.replace("|nan", "|")
            d = d.replace("|None", "|")
            d = d.split("|")
            if d[4] == '':
                d[4] = '0'
            if d[5] == '':
                d[5] = '0'

            if conn.insertSingleRow(insertQuery, d) == 1:
                reject.append(d)
        log.logComment(reject)
        conn.closeConnector()