def __init__(self, root, RunByUsername, log): #root = "E:/CUA OpenBank API/OpenBanking/ETL" log.logComment("BankingAdditionalInfo (01) -> Initialized") conn = Connector(root, RunByUsername, log) self.FailInd = conn.openConnector() log.logComment("BankingAdditionalInfo (01) -> Connection Status: " + str(self.FailInd)) transform = Transform(conn, root, log) query = ("SELECT PRODUCTID, OVERVIEWURI, TERMSURI, ELIGIBILITYURI, FEESANDPRICINGURI," "BUNDLEURI, lastupdated from BANKINGADDITIONALINFORMATION").lower() # Rearrange DF columns df_cols = ['Product ID (Mandatory)', 'Overview URL', 'Terms URL', 'Eligibility URL', 'Fees And PricingURL', 'Bundle URL', 'CUA Effective From (Mandatory) Date/Time' ] productFiles = ['TeD_Products.csv', 'TnS_Products.csv', 'CCC_Products.csv'] conn.openConnector() conn.cursor.execute(query) product_table = conn.cursor.fetchall() import pandas as pd product_table = pd.DataFrame(product_table) ##### firstRun = 1 for file in productFiles: if firstRun == 1: product = transform.getDF(file) product = product[df_cols] firstRun = 0 else: newFile = transform.getDF(file) newFileCols = newFile.columns productCols = product.columns sizeofNewFile = len(newFile) sizeofProduct = len(product) # add any column which new file do not have for column in df_cols: if column not in newFileCols: addColumn = {column : [None] * sizeofNewFile} newFile[column] = pd.Series(addColumn) # add any column which product file do not have for column in newFileCols: if column not in productCols: addColumn = {column : [None] * sizeofProduct} product[column] = pd.Series(addColumn) # arrange columns in new DF newFile = newFile[df_cols] product = product[df_cols] # merge into product df product = pd.concat([product, newFile]) ##### # Accumulate all rows into one ordered string insertData = [] for i in range(0, len(product)): insertData.append('|'.join([str(x) for x in product.iloc[[i],:].values.tolist()[0]])) # Gather data already present in table in var(insertData) format tableData = [] for i in range(0, len(product_table)): # tableData.append('|'.join(product_table.iloc[[i],:].values.tolist()[0])) # Convert all elements to STR as join does not work on timestamp d = [ str(x) for x in product_table.iloc[[i],:].values.tolist()[0] ] tableData.append('|'.join(d)) # Identify data needs to be inserted insertSelection = [] for insertDat in insertData: if insertDat not in tableData: insertSelection.append(insertDat) insertQuery = ("INSERT INTO BANKINGADDITIONALINFORMATION " "(PRODUCTID, OVERVIEWURI, TERMSURI, ELIGIBILITYURI, FEESANDPRICINGURI," "BUNDLEURI, lastupdated) VALUES (%s, %s, %s, %s, %s, %s, %s)").lower() reject = [] if len(insertSelection) > 0: for val in insertSelection: val = val.replace("|False|","|0|") val = val.replace("|nan","|") val = val.replace("|None","|") d = val.split('|') if(d[1] in ['NaT', 'nan']): d[1] = None if(d[2] in ['NaT', 'nan']): d[2] = None #cursor.execute(insertQuery, (d[0], d[1], d[2], d[3], d[4], d[5])) try: conn.cursor.execute(insertQuery, (d[0], d[1], d[2], d[3], d[4], d[5], d[6])) except: reject.append(val) #log value log = {'status' : 'Insert Success', 'Rows inserted' : len(insertSelection) } else: log = {'status' : 'Insert Success', 'Rows inserted' : len(insertSelection) } # committed to the database conn.closeConnector()
def __init__(self, root, RunByUsername, log): log.logComment("BankingProductdepositRateTier (05) -> Initialized") conn = Connector(root, RunByUsername, log) self.FailInd = conn.openConnector() transform = Transform(conn, root, log) transform.fileList = ['RatesFile.csv'] # parentTable = "select productid, depositRateId, applicationfrequency, depositratetype, rate from bankingproductdepositrate".lower() parentTable = "select productid, depositrateid, applicationfrequency, depositratetype, trim(replace(cast(rate as char), '0','')) from bankingproductdepositrate".lower( ) parentdata = pd.DataFrame(conn.executeQuery(parentTable)) preLoad = ( 'select depositRateId, name, unitOfMeasure, minimumValue, maximumValue,' 'rateApplicationMethod from bankingproductdepositRateTier' ).lower() preLoadData = conn.executeQuery(preLoad) preLoadDataList = [] if len(preLoadData) != 0: for row in preLoadData: row = [str(x) for x in row] preLoadDataList.append('|'.join(row)) extractDataTS = transform.getDF('TnS_Rate.csv') extractDataTS = pd.merge(parentdata, extractDataTS, how='left', left_on=[0, 2, 3, 4], right_on=[ 'Product ID (Mandatory)', 'applicationFrequency (Auto Generated)', 'Rate Type (Mandatory)', 'Rate (Mandatory)' ]) extractDataTS = extractDataTS.dropna(subset=['Rate Type (Mandatory)']) df_cols = [ 1, 'Name', 'Unit of Measure', 'Minimum Value', 'Maximum Value', 'Rate Application Method' ] extractDataTS = extractDataTS[df_cols] extractDataRF = transform.getDF('RatesFile.csv') if not extractDataRF.empty: extractDataRF = pd.merge(parentdata, extractDataRF, how='left', left_on=[0, 2, 3], right_on=[ 'Product ID', 'Additional Value (Auto Generated)', 'Rate Type (Mandatory)' ]) extractDataRF = extractDataRF.dropna( subset=['Rate Type (Mandatory)']) df_cols = [ 1, 'Name', 'Unit of Measure', 'minimumValue', 'maximumValue', 'Rate Application Method' ] extractDataRF = extractDataRF[df_cols] extractDataTS.columns = df_cols extractData = pd.concat([extractDataTS, extractDataRF]) extractdataOutput = [] for i in range(len(extractData)): dat = extractData.iloc[[i], :].values.tolist()[0] dat = [str(x) for x in dat] extractdataOutput.append('|'.join(dat)) insertSelection = [] for row in extractdataOutput: if row not in preLoadDataList: insertSelection.append(row) seqQuery = 'Select max(cast(depositRateTierId as decimal)) from bankingproductdepositRateTier'.lower( ) seq = conn.executeQuery(seqQuery) seq = seq[0][0] if seq == None: seq = 0 else: seq = int(seq) insertQuery = ( 'insert into bankingproductdepositRateTier ' '(depositRateTierId, depositRateId, name, unitOfMeasure, minimumValue, ' 'maximumValue, rateApplicationMethod, ' 'createdOn, createdBy, systemCreatedOn, systemCreatedBy) ' 'values (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)').lower() reject = [] for row in insertSelection: seq += 1 d = "|".join([ str(x) for x in [ seq, row, conn.updatedDate, conn.createdBy, conn.updatedDate, conn.createdBy ] ]) d = d.replace("|False|", "|0|") d = d.replace("|nan", "|") d = d.replace("|None", "|") d = d.split("|") if d[4] == '': d[4] = '0' if d[5] == '': d[5] = '0' if conn.insertSingleRow(insertQuery, d) == 1: reject.append(d) log.logComment(reject) conn.closeConnector()