def generate_cc_credits(merchant, no_trans, count): for i in range(no_trans): acct = random.choice(python_account_ID.accountid) cc_list = python_CC.CC_Dict[acct] #7)Set customer credit limit - skew to clients with $1000-$25000 and 10% with $25K - $50K limit = max( max((randrange(1, 101, 1) - 99), 0) * randrange(25000, 50000, 1000), randrange(1000, 25000, 1000)) #local Amt variable to calculate customer total usage usedAmt = 0 maxDate = datetime(0001, 01, 01) NoTrans = randrange(100, 150, 1) #loop to generate NoTrans transactions per customer, we can add logic for probabilities of # transactions if neccessary random number generator to avoid the constant value for j in range(NoTrans): dt = datetime.now().strftime("%Y-%m-%d %H:%M:%S") #generate amount for current transaction with 50%-50% distribution on credits and debits tmpAmt = randrange(1, limit, 100) #if not credit then generate debit #if tmpAmt == 0: #tmpAmt = randrange(1,limit,100) #add current amount to client total account usage usedAmt = usedAmt + tmpAmt #pull value from dictionary for randomly selected merchant category cat = random.choice(merchant) tranType = '' #set transaction type based on amount tranType = random.choice(Transaction_Type_Credits) #tranType random.choice(Transaction_Type) #append values to row list row = [str(count) + '_' + dt ] + [acct] + [gen_data.create_company_name()] row.append(cat) row.append(python_merchant_cat.All_Merchant_Cat[cat]) #date posted date1 = gen_data.create_date(past=True) if date1 > maxDate: maxDate = date1 #date of transaction a day later date2 = date1 - timedelta(days=1) row.extend([ date1, date2, tranType, 'US', limit, tmpAmt, usedAmt, cc_list[0], cc_list[1] ]) count = count + 1 writer.writerow(row) #post generating all transactions, check account balance - if overpaid - refund $ and add a refun transaction if usedAmt < limit * (-1): row = [str(count) + '_' + dt] + [acct] + [''] + [''] + [''] date1temp = maxDate + timedelta(days=90) date2 = date1temp - timedelta(days=1) row.extend([ date1temp, date2, 'Refund', '', limit, abs(limit - abs(usedAmt)) * (-1), 0, cc_list[0], cc_list[1] ]) count = count + 1 usedAmt = 0 maxDate = datetime(0001, 01, 01) writer.writerow(row)
def gen_cc_external(merchant,no_trans,count): for i in range(no_trans): acct=random.choice(python_account_ID.accountid) cc_list=python_CC.CC_Dict[acct] #7)Set customer credit limit - skew to clients with $1000-$25000 and 10% with $25K - $50K limit = max(max((randrange(1,101,1)-99),0)* randrange(25000,50000,1000),randrange(1000,25000,1000)) #local Amt variable to calculate customer total usage usedAmt = 0 maxDate= datetime(0001,01,01) NoTrans = randrange(100,150,1) #loop to generate NoTrans transactions per customer, we can add logic for probabilities of # transactions if neccessary random number generator to avoid the constant value for j in range(NoTrans): dt = datetime.now().strftime("%Y-%m-%d %H:%M:%S") #generate amount for current transaction with 50%-50% distribution on credits and debits tmpAmt = max((randrange(1,3,1)-1),0)* randrange(1,limit,100)*(-1) #if not credit then generate debit if tmpAmt == 0: tmpAmt = randrange(1,limit,100) #add current amount to client total account usage usedAmt = usedAmt + tmpAmt #pull value from dictionary for randomly selected merchant category cat = '' tranType = '' row = [str(count)+'_'+dt] + [acct] + [gen_data.create_company_name()] #set transaction type based on amount if tmpAmt < 0: tranType = random.choice(Transaction_Type_Credits) row.append('Non-BMO Acct') row.append('') else: tranType = random.choice(Transaction_Type_Debits) cat = random.choice(merchant) row.append(cat) row.append(python_merchant_cat.All_Merchant_Cat[cat]) #tranType random.choice(Transaction_Type) #date posted date1 = gen_data.create_date(past=True) if date1 > maxDate: maxDate = date1 #date of transaction a day later date2 = date1-timedelta(days=1) row.extend([date1, date2, tranType,random.choice(Country_Red),limit,tmpAmt,usedAmt,cc_list[0],cc_list[1]]) count = count + 1 writer.writerow(row) #post generating all transactions, check account balance - if overpaid - refund $ and add a refun transaction if usedAmt < limit * (-1): row = [str(count)+'_'+dt]+ [acct] + ['']+['']+[''] date1temp=maxDate+timedelta(days=90) date2=date1temp-timedelta(days=1) row.extend([date1temp, date2, 'Refund','',limit,abs(limit-abs(usedAmt))*(-1),0,cc_list[0],cc_list[1]]) count = count + 1 usedAmt = 0 maxDate= datetime(0001,01,01) writer.writerow(row)
#Creates Current Address zip = random.choice(zips.zip) addr = geo_data.create_city_state_zip[zip] #Creates Previous address zip2 = random.choice(zips.zip) addr2 = geo_data.create_city_state_zip[zip2] #Add additional data elements to current csv row lrg_cash_ex = random.choice(Yes_No) #Condition for SARs and Demarketed Clients if (Clsd == 'Yes'): #1% of closed accounts are demarketed but never had a SAR filed if (max((randrange(0, 101, 1) - 99), 0) == 1 and SAR == 'No'): demarket = 'Yes' dem_date = gen_data.create_date(past=True) if (max((randrange(0, 11, 1) - 9), 0) == 1 and demarket == 'No'): #10% of closed accounts have SARs SAR = 'Yes' #90% of closed accounts with SARs are demarketed if (max((randrange(0, 11, 1) - 9), 0) == 0): demarket = 'Yes' dem_date = gen_data.create_date(past=True) if (max((randrange(0, 101, 1) - 99), 0) == 1): PEP = 'Yes' row.extend([ addr[0], addr[1], zip, 'US', addr2[0], addr2[1], zip2, 'US', gen_data.create_birthday(min_age=2, max_age=85), PEP, SAR, Clsd ])
'9223':'Bail and Bond Payments', '9311':'Tax Payments Government Agencies', '9399':'Government Services (Not Elsewhere Classified)', '9402':'Postal Services Government Only', '9405':'U.S. Federal Government Agencies or Departments', '9950':'Intra Company Purchases' }; with open('CreditCard_Transaction_MerchantCredits.csv','w') as f1: writer=csv.writer(f1, delimiter=',',lineterminator='\n',) writer.writerow(['rownum'] +['Account_Number'] + ['Merchant_Name']+['Merchant_Category_Code']+['Merchant_Category_Desc'] +\ ['Post_Date'] + ['Transaction_Date'] + ['Transaction_Type'] +['Merchant_Country']+['Credit_Limit']+['Amount']) for i in range(10): dt=datetime.now().strftime("%Y-%m-%d %H:%M:%S") row = [str(i)+'_'+dt]+[random.choice(python_account_ID.accountid)] +[gen_data.create_company_name()] cat=random.choice(Merchant_Category) row.append(cat) row.append(All_Merchant_Cat[cat]) date1= gen_data.create_date(past=True) date2=date1-timedelta(days=1) row.append(date1) row.append(date2) #Set customer credit limit - skew to clients with $1000-$25000 and 10% with $25K - $50K limit = max(max((randrange(1,101,1)-99),0)* randrange(25000,50000,1000),randrange(1000,25000,1000)) tmpAmt = randrange(1,limit,100) row.extend([random.choice(Transaction_Type_Credits),random.choice(Country),limit,tmpAmt]) writer.writerow(row)
'Thriller record': (8, 20), 'Harry Potter book': (5, 35), 'iPhone': (400, 900), 'Rubik’s Cube': (15, 19), 'banana': (10, 10), # how much is a banana, 10 dollars? } salesdata = [] for i in range(2): for i in range(130045): sel = random.sample(humans, 1)[0] product = random.sample(products.keys(), 1)[0] units = random.randint(1, 50) if product != 'Corolla' else random.randint(1, 10) unitprice = round(random.uniform(products[product][0], products[product][1]), 2) salesdata.append(sel + [ str(gen_data.create_date(past=True, max_years_future=1, max_years_past=3)), product, units, unitprice, ]) try: del products['Corolla'] del products['Thriller record'] except: pass df = pd.DataFrame(salesdata) columns = ['name', 'birthday', 'customer', 'orderdate', 'product', 'units', 'unitprice'] df.columns = columns
'6733 Trusts, Except Educational, Religious, and Charitable',\ '8999 Services, NEC',\ ] Stock_Market_Listing=['Australian Stock Exchange',\ 'Brussels Stock Exchange',\ 'Montreal Stock Exchange',\ 'Not Found','Not Found','Not Found','Not Found','Not Found','Not Found','Not Found','Not Found','Not Found','Not Found','Not Found','Not Found','Not Found','Not Found','Not Found','Not Found','Not Found','Not Found','Not Found','Not Found','Not Found','Not Found','Not Found','Not Found','Not Found','Not Found','Not Found','Not Found','Not Found','Not Found',\ 'Tiers 1 and 2 of the TSX Venture Exchange (also known as Tiers 1 and 2 of the Canadian Venture Exchange)',\ 'Toronto Stock Exchange',\ ] Third_Party_Payment_Processor=Embassy_Consulate Transacting_Provider=Embassy_Consulate fake = Faker() gen_data.create_date(past=True) with open('large.csv','r') as csvinput: with open('large_extended.csv','w') as csvoutput: writer=csv.writer(csvoutput, delimiter=',',lineterminator='\n',) reader = csv.reader(csvinput) row = next(reader) row.append('RELATED_ACCT') row.append('RELATED_TYPE') row.append('PARTY_TYPE') row.append('PARTY_RELATION') row.append('PARTY_STARTDATE')
#Creates Current Address zip=random.choice(zips.zip) addr=geo_data.create_city_state_zip[zip] #Creates Previous address zip2=random.choice(zips.zip) addr2=geo_data.create_city_state_zip[zip2] #Add additional data elements to current csv row lrg_cash_ex=random.choice(Yes_No) #Condition for SARs and Demarketed Clients if(Clsd=='Yes'): #1% of closed accounts are demarketed but never had a SAR filed if (max((randrange(0,101,1)-99),0)==1 and SAR=='No'): demarket='Yes' dem_date=gen_data.create_date(past=True) if (max((randrange(0,11,1)-9),0)==1 and demarket=='No'): #10% of closed accounts have SARs SAR='Yes' #90% of closed accounts with SARs are demarketed if(max((randrange(0,11,1)-9),0)==0): demarket='Yes' dem_date=gen_data.create_date(past=True) if (max((randrange(0,101,1)-99),0)==1): PEP='Yes' row.extend([addr[0],addr[1],zip,'US',addr2[0],addr2[1],zip2,'US',gen_data.create_birthday(min_age=2, max_age=85),PEP,SAR,Clsd]) #Start Generating related accounts from account list once 10,000 accounts are generated if i > 10000: rel = int(random.choice(acct_list))*max((randrange(0,10001,1)-9999),0)
'Harry Potter book': (5, 35), 'iPhone': (400, 900), 'Rubik’s Cube': (15, 19), 'banana': (10, 10), # how much is a banana, 10 dollars? } salesdata = [] for i in range(2345): sel = random.sample(humans, 1)[0] units = random.randint(1, 50) product = random.sample(products.keys(), 1)[0] unitprice = round( random.uniform(products[product][0], products[product][1]), 2) salesdata.append(sel + [ str( gen_data.create_date( past=True, max_years_future=0, max_years_past=1)), product, units, unitprice, ]) df = pd.DataFrame(salesdata) columns = [ 'name', 'birthday', 'customer', 'orderdate', 'product', 'units', 'unitprice' ] df.columns = columns # df.to_json('blooth_sales_data_".json', orient='records') df.to_json('blooth_sales_data_2.json', orient='records',
with open('CreditCard_Transaction_Red_NonAccountHolders.csv', 'w') as f1: writer = csv.writer( f1, delimiter=',', lineterminator='\n', ) writer.writerow(['rownum'] +['Account_Number'] + ['Merchant_Name']+['Merchant_Category_Code']+['Merchant_Category_Desc'] +\ ['Post_Date'] + ['Transaction_Date'] + ['Transaction_Type'] +['Merchant_Country']+['Credit_Limit']+['Amount']) for i in range(10): dt = datetime.now().strftime("%Y-%m-%d %H:%M:%S") #generate external account that is not in ref acct file acct = randrange(100000, 100000000, 1) row = [str(i) + '_' + dt] + [acct] + [gen_data.create_company_name()] cat = random.choice(Merchant_Category) row.append(cat) row.append(All_Merchant_Cat[cat]) date1 = gen_data.create_date(past=True) date2 = date1 - timedelta(days=1) row.append(date1) row.append(date2) #7)Set customer credit limit - skew to clients with $1000-$25000 and 10% with $25K - $50K limit = max( max((randrange(1, 101, 1) - 99), 0) * randrange(25000, 50000, 1000), randrange(1000, 25000, 1000)) tmpAmt = randrange(1, limit, 100) row.extend([ random.choice(Transaction_Type), random.choice(Country), limit, tmpAmt ]) writer.writerow(row)
def createCusts(N): #List for client whose net worth is over $500K HighNetWorth = ['Yes'] + ['No'] * 30 #List for type of account Related_Type = ['Primary','Secondary','Joint'] #List for how the account was opened Party_Type = ['Person','Non-Person'] #List for a BMO customer Party_Relation = ['Customer','Non-Customer'] #List for random Yes/No Flag Yes_No = ['Yes'] + ['No'] * 12 #List for random Yes/No Consent Yes_No_Consent = ['Yes'] + ['No'] * 4 #List for equal Yes/No Flag Yes_No_50 = ['Yes','No'] #List for official language Official_Lang = ['English'] * 3 + ['French'] #List for method of communication Preffered_Channel = ['Direct Mail','Telemarketing','Email','SMS'] #List for status of customer #Customer_Status = ['Prospect','Inactive Customer','Past Customer'] + ['Active Customer'] * 56 #List for LOB Segment Type Seg_Model_Type = ['LOB Specific','Profitability','Geographical','Behavioral','Risk Tolerance'] #List for Model ID Model_ID = ['01','02','03','04','05'] #List for Model Name Seg_Model_Name = ['IRRI', 'CRS Risk Score','Geo Risk','Financial Behavior Risk','CM Risk'] #List for Model Score Seg_Model_Score = ['200','300','400','100','500'] #List for Model Group Seg_Model_Group = ['Group 1'] * 2 + ['Group 2','Group 3','Group 4'] #List for Model Description Seg_Model_Description = ['High Risk Tier','Mid Risk Tier','Low Risk Tier','Vertical Risk','Geographical Risk'] #List for random Arms Dealer flag Arms_Manufacturer=['Yes'] + ['No'] * 2 + [''] * 392 #List for random auction flag Auction=['Yes'] + ['No'] * 2 + [''] * 392 #List for random Cash Intensive flag CashIntensive_Business=['Yes'] + ['No'] * 2 + [''] * 392 #List for random Casino?Gaming flag Casino_Gambling=['Yes'] + ['No'] * 2 + [''] * 392 #List for random Client Onboarding flag Channel_Onboarding=['E-mail','In Person','In person - In Branch/Bank Office','In person - Offsite/Client Location','Mail','Online','Phone','Request for Proposal (RFP)'] + ['Not Applicable'] * 10 #List for random Transaction flag Channel_Ongoing_Transactions=['ATM','E-mail','Fax','Mail','Not Applicable','OTC Communication System','Phone'] + ['Online'] * 4 + ['In Person'] * 31 #List for random HI_Vehicle flag Complex_HI_Vehicle=['Yes'] + ['No'] * 2 + [''] * 392 #List for random Metals flag Dealer_Precious_Metal=['Yes'] + ['No'] * 2 + [''] * 392 #List for random Arms Dealer flag Digital_PM_Operator=['Yes'] + ['No'] * 2 + [''] * 392 #List for random Embassy flag Embassy_Consulate=['Yes'] + ['No'] * 2 + [''] * 392 #Sets variable to Embassy flag Exchange_Currency=Embassy_Consulate #Sets variable to Embassy flag Foreign_Financial_Institution=Embassy_Consulate #Sets variable to Embassy flag Foreign_Government=Embassy_Consulate #Sets variable to Embassy flag Foreign_NonBank_Financial_Institution=Embassy_Consulate #Sets variable to Embassy flag Internet_Gambling=Embassy_Consulate #Sets variable to Embassy flag Medical_Marijuana_Dispensary=Embassy_Consulate #Sets variable to Embassy flag Money_Service_Business=Embassy_Consulate #Sets variable to Embassy flag NonRegulated_Financial_Institution=Embassy_Consulate #Sets variable to Embassy flag Not_Profit=Embassy_Consulate #List for random occupation Occupation=['11-1011 Chief Executives',\ '11-3011 Administrative Services Managers',\ '11-3031 Financial Managers',\ '11-3061 Purchasing Managers',\ '13-1011 Agents and Business Managers of Artists Performers and Athletes',\ '13-1031 Claims Adjusters Examiners, and Investigators',\ '13-1199 Business Operations Specialists, All Other',\ '13-2099 Financial Specialists All Other',\ '17-1011 Architects Except Landscape and Naval',\ '23-1011 Lawyers',\ '23-1023 Judges, Magistrate Judges and Magistrates',\ '25-2012 Kindergarten Teachers Except Special Education',\ '25-2021 Elementary School Teachers Except Special Education',\ '29-1041 Optometrists',\ '29-2054 Respiratory Therapy Technicians',\ '33-2011 Firefighters',\ '37-1012 First-Line Supervisors of Landscaping Lawn Service and Groundskeeping Workers',\ '39-1011 Gaming Supervisors',\ '39-2011 Animal Trainers',\ '41-1011 First-Line Supervisors of Retail Sales Workers',\ '41-1012 First-Line Supervisors of Non-Retail Sales Workers',\ '41-2011 Cashiers',\ '41-2031 Retail Salespersons',\ '43-3021 Billing and Posting Clerks',\ '45-1011 First-Line Supervisors of Farming, Fishing, and Forestry Workers',\ '49-2011 Computer Automated Teller and Office Machine Repairers',\ '53-3021 Bus Drivers Transit and Intercity',\ '53-4031 Railroad Conductors and Yardmasters',\ '55-1011 Air Crew Officers',\ '55-1012 Aircraft Launch and Recovery Officers',\ '55-1013 Armored Assault Vehicle Officers',\ ] #Sets variable to Embassy flag Privately_ATM_Operator=Embassy_Consulate #List for random products Products=['Certificate of Deposit',\ 'Checking Account',\ 'Credit Card',\ 'Custodial and Investment Agency - Institutional',\ 'Custodial and Investment Agency - Personal',\ 'Custodial/Trust Outsourcing Services (BTOS)',\ 'Custody Accounts (PTIM)',\ 'Custody Accounts (RSTC)',\ 'DTF (BHFA)',\ 'Investment Agency - Personal',\ 'Investment Management Account (PTIM)',\ 'Lease',\ 'Loan / Letter of Credit',\ 'Money Market',\ 'Mortgage / Bond / Debentures',\ 'None',\ 'Savings Account',\ 'Trust Administration - Irrevocable and Revocable (PTIM)',\ 'Trust Administration - Irrevocable and Revocable Trusts (BDTC)',\ ] + ['Nondeposit Investment Products'] * 14 + ['Investment Agency - Institutional'] * 5 #Sets variable to Embassy flag Sales_Used_Vehicles=Embassy_Consulate #Dictionary for random Services Services=['Benefit Payment Services',\ 'Domestic Wires and Direct Deposit / ACH',\ 'Family Office Services (FOS)',\ 'Fiduciary Services',\ 'International Wires and IAT',\ 'Investment Advisory Services (IAS)',\ 'Investment Services',\ 'None',\ 'Online / Mobile Banking',\ 'Payroll',\ 'Short Term Cash Management',\ 'Trust Services',\ 'Trustee Services',\ 'Vault Cash Services',\ ] + ['Financial Planning'] * 6 + ['Retirement Plans'] * 19 #Dictionary for random SIC_Code SIC_Code=['6021 National Commercial Banks',\ '6211 Security Brokers Dealers and Flotation Companies',\ '6282 Investment Advice',\ '6311 Life Insurance',\ '6733 Trusts Except Educational Religious and Charitable',\ '8999 Services NEC',\ ] + ['6722 Management Investment Offices Open-End'] * 12 #Dictionary for random Market Listing Stock_Market_Listing=['Australian Stock Exchange',\ 'Brussels Stock Exchange',\ 'Montreal Stock Exchange',\ 'Tiers 1 and 2 of the TSX Venture Exchange (also known as Tiers 1 and 2 of the Canadian Venture Exchange)',\ 'Toronto Stock Exchange',\ ] + ['Not Found'] * 30 #Sets variable to Embassy flag Third_Party_Payment_Processor=Embassy_Consulate #Sets variable to Embassy flag Transacting_Provider=Embassy_Consulate #Dictionary for random Low Net Worth LowNet=[1,2] + [0] * 5 #Dictionary for Consumer vs Business Acct_Type = ['B'] + ['C'] * 5 #Dictionary for random number of credits cards per account Number_CC = [1] * 7 + [2] * 11 + [3] * 3 + [4] #Dictionary for Account list set to blank acct_list=[] #Dictionary for CreditCard list set to blank CC_list = [] #Dictionary for random Wolfsberg scenario Use_Case = [1,4,7,10,13,16,19,22,25,28,31,34,39] * 4 + [2,5,8,11,14,17,20,23,26,29,32,35,38] * 7 + [3,6,9,12,15,18,21,24,27,30,33,36] * 65 + [37] * 73 + [40,41] * 2 refrating = ['1','1','1','2','3','4','2','4','5','5','5','5','5','5','5','5','5','5','5','5'] fake = Faker() global liSSNMaster start=10786147 acct_list=[] liCSV = [] for i in xrange(N): #Initiate High Risk Flags #Politically Exposed Person PEP='No' #Customer with a Suspicous Activity Report SAR='No' #Customer with a closed account Clsd='No' #High risk customer flag high_risk='No' #High Risk Rating hr_rating='' #Customer that was demarketed by the bank demarket='No' dem_date='' #generate closed acct flag if (max((randrange(0,98,1)-96),0)==1): Clsd='Yes' #Random choice for number of credit card users per account number No_CCs = random.choice(Number_CC) #Generate account number acct=start+1+randrange(1,10,1) start=acct #Randomly generate customer name + middle name in tmp name = fake.name() tmp=gen_data.create_name() #Adds account number to account dictionary acct_list.extend([acct]) #Creates a new row and adds data elements row = [i]+[acct]+[random.choice(Acct_Type)]+[No_CCs]+[name]+[tmp[0]]+[liSSNMaster[i]] #Dictionary for names list set to blank names=[] #Dictionary for Social Security Number list set to blank ssn=[] #Middle Name to reduce name dups mdl=[] for j in range(No_CCs-1): names.insert(j,fake.name()) tmp2=gen_data.create_name() mdl.insert(j,tmp2[0]) ##Pull from SSN Master list randInt = randrange(1,len(liSSNMaster),1) if randInt != i: ssn.insert(j,liSSNMaster[randInt]) else: ssn.insert(j,liSSNMaster[randInt - 1]) #Name and SSN is set to blank if less than 4 customers on an account for k in range(4-No_CCs): names.insert(No_CCs+k,'') ssn.insert(No_CCs+k,'') mdl.insert(No_CCs,'') #Sets CC_NO to a random credit card number CC_NO=gen_data.cc_number() #Extract CC_Number from the tuple returned by CC_Number then scramble to ensure uniqueness...Tuple contains CC Number and Type CC_TRANS=CC_NO[1][0] dt = str(datetime.now()) clean=re.sub('\W','',dt) printCC=str(CC_TRANS[-4:])+str(clean[-12:-3])+str(randrange(1111,9999,randrange(1,10,1))) #Add data elements to current csv row row.extend([names[0],mdl[0],ssn[0],names[1],mdl[1],ssn[1],names[2],mdl[2],ssn[2],printCC,CC_NO[0],gen_data.create_company_name()+' '+tmp[1],\ gen_data.create_email(),gen_data.create_job_title()]) #Create Current Address zip=random.choice(zips.zip) addr=geo_data.create_city_state_zip[zip] #Create Previous address zip2=random.choice(zips.zip) addr2=geo_data.create_city_state_zip[zip2] #Add additional data elements to current csv row lrg_cash_ex=random.choice(Yes_No) #Condition for SARs and Demarketed Clients if(Clsd=='Yes'): #1% of closed accounts are demarketed but never had a SAR filed if (max((randrange(0,101,1)-99),0)==1 and SAR=='No'): demarket='Yes' dem_date=gen_data.create_date(past=True) if (max((randrange(0,11,1)-9),0)==1 and demarket=='No'): #10% of closed accounts have SARs SAR='Yes' #90% of closed accounts with SARs are demarketed if(max((randrange(0,11,1)-9),0)==0): demarket='Yes' dem_date=gen_data.create_date(past=True) if (max((randrange(0,101,1)-99),0)==1): PEP='Yes' row.extend([addr[0],addr[1],zip,'US',addr2[0],addr2[1],zip2,'US',gen_data.create_birthday(min_age=2, max_age=85),PEP,SAR,Clsd]) #Start Generating related accounts from account list once 10,000 accounts are generated - to avoid duplicating accounts in the beginning if i > 10000: rel = int(random.choice(acct_list))*max((randrange(0,10001,1)-9999),0) if rel <> 0: row.append(rel) row.append(random.choice(Related_Type)) else: row.append('') row.append('') else: row.append('') row.append('') #Randomly generates account start date party_start=gen_data.create_date(past=True) #Randomly selects consent option for sharing info Consent_Share = random.choice(Yes_No_Consent) #Add additional data elements to current csv row row.extend([random.choice(Party_Type),random.choice(Party_Relation),party_start,gen_data.create_date(past=True),\ lrg_cash_ex,demarket,dem_date,randrange(0,100,1),random.choice(Official_Lang)]) #Add data element preferred methond of contact for yes to share info...if not then blank to current row if Consent_Share == 'Yes': row.extend(['Yes',random.choice(Preffered_Channel)]) else: row.extend(['No','']) row.extend([zip,randrange(0,5,1)]) #Generate Segment ID then add additional Segment data based on the selection to the current csv row Segment_ID = randrange(0,5,1)%5 if Segment_ID == 0: row.extend([Model_ID[0],Seg_Model_Type[0],Seg_Model_Name[0],Seg_Model_Group[0],Seg_Model_Description[0],Seg_Model_Score[0]]) if Segment_ID == 1: row.extend([Model_ID[1],Seg_Model_Type[1],Seg_Model_Name[1],Seg_Model_Group[1],Seg_Model_Description[1],Seg_Model_Score[1]]) if Segment_ID == 2: row.extend([Model_ID[2],Seg_Model_Type[2],Seg_Model_Name[2],Seg_Model_Group[2],Seg_Model_Description[2],Seg_Model_Score[2]]) if Segment_ID == 3: row.extend([Model_ID[3],Seg_Model_Type[3],Seg_Model_Name[3],Seg_Model_Group[3],Seg_Model_Description[3],Seg_Model_Score[3]]) if Segment_ID == 4: row.extend([Model_ID[4],Seg_Model_Type[4],Seg_Model_Name[4],Seg_Model_Group[4],Seg_Model_Description[4],Seg_Model_Score[4]]) #Add additional data elements to current csv row hr0=random.choice(Arms_Manufacturer) hr01=random.choice(Auction) hr02=random.choice(CashIntensive_Business) hr03=random.choice(Casino_Gambling) hr04=random.choice(Channel_Onboarding) hr05=random.choice(Channel_Ongoing_Transactions) row.extend([hr0,hr01,hr02,hr03,hr04,hr05]) #Randomly select whether customer has a High Net Worth HighNetWorthFlag = random.choice(HighNetWorth) #Randomly Generate customer net worth based on the above flag if HighNetWorthFlag == 'Yes': row.append(max(max((randrange(0,101,1)-99),0)*randrange(1000000,25000000,1),randrange(1000000,5000000,1))) else: flag=random.choice(LowNet) if flag==0: row.append(randrange(-250000,600000,1)) else: if flag==1: row.append(randrange(149000,151000,1)) else: row.append(randrange(40000,50000,1)) #Add data elements to current csv row hr1=random.choice(Complex_HI_Vehicle) hr2=random.choice(Dealer_Precious_Metal) hr3=random.choice(Digital_PM_Operator) hr4=random.choice(Embassy_Consulate) hr5=random.choice(Exchange_Currency) hr6=random.choice(Foreign_Financial_Institution) hr7=random.choice(Foreign_Government) hr8=random.choice(Foreign_NonBank_Financial_Institution) hr9=random.choice(Internet_Gambling) hr10=random.choice(Medical_Marijuana_Dispensary) hr11=random.choice(Money_Service_Business) hr12=random.choice(NAICS.NAICS_Code) hr13=random.choice(NonRegulated_Financial_Institution) hr14=random.choice(Not_Profit) #hr15=random.choice(Occupation) - added before through gen_data hr16=random.choice(Privately_ATM_Operator) hr17=random.choice(Products) hr18=random.choice(Sales_Used_Vehicles) hr19=random.choice(Services) hr20=random.choice(SIC_Code) hr21=random.choice(Stock_Market_Listing) hr22=random.choice(Third_Party_Payment_Processor) hr23=random.choice(Transacting_Provider) if(PEP=='Yes' or SAR=='Yes' or lrg_cash_ex=='Yes' or demarket=='Yes' or hr0=='Yes' or hr01=='Yes' or hr02=='Yes' or hr03=='Yes' or hr1=='Yes' or hr2=='Yes' or hr3=='Yes' or hr4=='Yes' or hr5=='Yes' or hr6=='Yes' or hr7=='Yes' or hr8=='Yes' or hr9=='Yes' or hr10=='Yes' or hr11=='Yes' or hr13=='Yes' or hr14=='Yes' or hr16=='Yes' or hr17=='Yes' or hr18=='Yes' or hr22=='Yes' or hr23=='Yes' or HighNetWorthFlag=='Yes'): high_risk='Yes' hr_rating=random.choice(refrating) if(SAR=='No' and high_risk=='No'): if(max((randrange(0,101,1)-99),0)==1): high_risk='Yes' hr_rating=random.choice(refrating) if(PEP=='No' and high_risk=='No'): if(max((randrange(0,101,1)-99),0)==1): high_risk='Yes' hr_rating=random.choice(refrating) if(high_risk=='No'): if(max((randrange(0,101,1)-99),0)==1): high_risk='Yes' hr_rating=random.choice(refrating) row.extend([hr1,hr2,hr3,hr4,hr5,hr6,hr7,hr8,hr9,hr10,hr11,hr12,hr13,hr14,hr16,hr17,hr18,hr19,hr20,hr21,hr22,hr23, HighNetWorthFlag,high_risk,hr_rating,random.choice(Use_Case)]) liCSV.append(row) return liCSV
def __init__(self, i, acct, liSSNMaster, acct_list): self.ROWNUM = i self.ACCOUNTID = acct self.SSN = liSSNMaster[i] self.ACCT_TYPE = choice(Acct_Type) self.NUM_CCS = choice(Number_CC) self.NAME = fake.name() self.CUSTEMAIL = gen_data.create_email() self.OCCUPATION = gen_data.create_job_title() self.COUNTRY = 'US' self.PREVIOUS_COUNTRY = 'US' self.DOB = gen_data.create_birthday(min_age=2, max_age=85) self.PARTY_ENDDATE = gen_data.create_date(past=True) self.CONSENT_SHARING = choice(Yes_No_Consent) self.LARGE_CASH_EXEMPT = choice(Yes_No) self.PARTY_TYPE = choice(Party_Type) self.PARTY_RELATION = choice(Party_Relation) self.PROB_DEFAULT_RISKR = randrange(0, 100, 1) self.OFFICIAL_LANG_PREF = choice(Official_Lang) self.DEPENDANTS_COUNT = randrange(0, 5, 1) self.USE_CASE_SCENARIO = choice(Use_Case) self.CLOSEDACCOUNT = choice(Clsd_flag) self.HIGH_NET_WORTH = choice(HighNetWorth) self.PARTY_STARTDATE = gen_data.create_date(past=True) self.ARMS_MANUFACTURER = choice(Yes_No_Cust_Flag) self.AUCTION = choice(Yes_No_Cust_Flag) self.CASHINTENSIVE_BUSINESS = choice(Yes_No_Cust_Flag) self.CASINO_GAMBLING = choice(Yes_No_Cust_Flag) self.CHANNEL_ONBOARDING = choice(Channel_Onboarding) self.CHANNEL_ONGOING_TRANSACTIONS = choice( Channel_Ongoing_Transactions) self.COMPLEX_HI_VEHICLE = choice(Yes_No_Cust_Flag) self.DEALER_PRECIOUS_METAL = choice(Yes_No_Cust_Flag) self.DIGITAL_PM_OPERATOR = choice(Yes_No_Cust_Flag) self.EMBASSY_CONSULATE = choice(Yes_No_Cust_Flag) self.EXCHANGE_CURRENCY = choice(Yes_No_Cust_Flag) self.FOREIGN_FINANCIAL_INSTITUTION = choice(Yes_No_Cust_Flag) self.FOREIGN_GOVERNMENT = choice(Yes_No_Cust_Flag) self.FOREIGN_NONBANK_FINANCIAL_INSTITUTION = choice(Yes_No_Cust_Flag) self.INTERNET_GAMBLING = choice(Yes_No_Cust_Flag) self.MEDICAL_MARIJUANA_DISPENSARY = choice(Yes_No_Cust_Flag) self.MONEY_SERVICE_BUSINESS = choice(Yes_No_Cust_Flag) self.NAICS_CODE = choice(NAICS.NAICS_Code) self.NONREGULATED_FINANCIAL_INSTITUTION = choice(Yes_No_Cust_Flag) self.NOT_PROFIT = choice(Yes_No_Cust_Flag) self.PRIVATELY_ATM_OPERATOR = choice(Yes_No_Cust_Flag) self.PRODUCTS = choice(Products) self.SALES_USED_VEHICLES = choice(Yes_No_Cust_Flag) self.SERVICES = choice(Services) self.SIC_CODE = choice(SIC_Code) self.STOCK_MARKET_LISTING = choice(Stock_Market_Listing) self.THIRD_PARTY_PAYMENT_PROCESSOR = choice(Yes_No_Cust_Flag) self.TRANSACTING_PROVIDER = choice(Yes_No_Cust_Flag) self.ZIP = choice(zips.zip) self.PREVIOUS_ZIP = choice(zips.zip) addr = geo_data.create_city_state_zip[self.ZIP] addr2 = geo_data.create_city_state_zip[self.PREVIOUS_ZIP] self.CITY = addr[0] self.STATE = addr[1] self.PREVIOUS_CITY = addr2[0] self.PREVIOUS_STATE = addr2[1] self.PRIMARY_BRANCH_NO = self.ZIP tmp = gen_data.create_name() self.M_NAME = tmp[0] self.EMPLOYER = gen_data.create_company_name() + ' ' + tmp[1] No_CCs = choice(Number_CC) #Dictionary for names list set to blank names = [] #Dictionary for Social Security Number list set to blank ssn = [] #Middle Name to reduce name dups mdl = [] #Generates Name and SSN for Credit Users for j in range(4): if No_CCs > j: names.insert(j, fake.name()) tmp2 = gen_data.create_name() mdl.insert(j, tmp2[0]) randInt = randrange(1, len(liSSNMaster), 1) if randInt != i: ssn.insert(j, liSSNMaster[randInt]) else: ssn.insert(j, liSSNMaster[randInt - 1]) #Name and SSN is set to blank if less than 4 customers on an account else: names.insert(No_CCs + j, '') ssn.insert(No_CCs + j, '') mdl.insert(No_CCs + j, '') self.AUTHORIZED_NAME2 = names[0] self.M_NAME2 = mdl[0] self.SSN2 = ssn[0] self.AUTHORIZED_NAME3 = names[1] self.M_NAME3 = mdl[1] self.SSN3 = ssn[1] self.AUTHORIZED_NAME4 = names[2] self.M_NAME4 = mdl[2] self.SSN4 = ssn[2] #Sets CC_NO to a random credit card number CC_NO = gen_data.create_cc_number() CC_TRANS = CC_NO[1][0] dt = str(datetime.now()) clean = re.sub('\W', '', dt) self.CREDITCARDNUMBER = str(CC_TRANS[-4:]) + str(clean[-12:-3]) + str( randrange(1111, 9999, randrange(1, 10, 1))) self.CREDITCARDTYPE = CC_NO[0] self.RELATED_ACCT = '' self.RELATED_TYPE = '' if i > 10000: rel = int(choice(acct_list)) * max( (randrange(0, 10001, 1) - 9999), 0) if rel <> 0: self.RELATED_ACCT = rel self.RELATED_TYPE = choice(Related_Type) self.PREFERRED_CHANNEL = '' if self.CONSENT_SHARING == 'Yes': self.PREFERRED_CHANNEL = choice(Prefered_Channel) ## #Generates Segment ID then adds additional Segment data based on the selection to the current csv row Segment_ID = randrange(0, 5, 1) if Segment_ID == 0: self.SEG_MODEL_ID = '01' self.SEG_MODEL_TYPE = 'LOB Specific' self.SEG_MODEL_NAME = 'IRRI' self.SEG_MODEL_GROUP = 'Group 1' self.SEG_M_GRP_DESC = 'High Risk Tier' self.SEG_MODEL_SCORE = '200' if Segment_ID == 1: self.SEG_MODEL_ID = '02' self.SEG_MODEL_TYPE = 'Profitability' self.SEG_MODEL_NAME = 'CRS Risk Score' self.SEG_MODEL_GROUP = 'Group 1' self.SEG_M_GRP_DESC = 'Mid Risk Tier' self.SEG_MODEL_SCORE = '300' if Segment_ID == 2: self.SEG_MODEL_ID = '03' self.SEG_MODEL_TYPE = 'Geographical' self.SEG_MODEL_NAME = 'Geo Risk' self.SEG_MODEL_GROUP = 'Group 2' self.SEG_M_GRP_DESC = 'Low Risk Tier' self.SEG_MODEL_SCORE = '400' if Segment_ID == 3: self.SEG_MODEL_ID = '04' self.SEG_MODEL_TYPE = 'Behavioral' self.SEG_MODEL_NAME = 'Financial Behavior Risk' self.SEG_MODEL_GROUP = 'Group 3' self.SEG_M_GRP_DESC = 'Vertical Risk' self.SEG_MODEL_SCORE = '100' if Segment_ID == 4: self.SEG_MODEL_ID = '05' self.SEG_MODEL_TYPE = 'Risk Tolerance' self.SEG_MODEL_NAME = 'CM Risk' self.SEG_MODEL_GROUP = 'Group 4' self.SEG_M_GRP_DESC = 'Geographical Risk' self.SEG_MODEL_SCORE = '500' self.CLIENT_NET_WORTH = '' if self.HIGH_NET_WORTH == 'Yes': self.CLIENT_NET_WORTH = max( max((randrange(0, 101, 1) - 99), 0) * randrange(5000000, 25000000, 1), randrange(1000000, 5000000, 1)) else: flag = choice(LowNet) if flag == 0: self.CLIENT_NET_WORTH = randrange(-250000, 600000, 1) else: if flag == 1: self.CLIENT_NET_WORTH = randrange(149000, 151000, 1) else: self.CLIENT_NET_WORTH = randrange(40000, 50000, 1) #Politically Exposed Person self.PEP = 'No' #1% of accounts are PEP if (max((randrange(0, 101, 1) - 99), 0) == 1): self.PEP = 'Yes' #Customer that was demarketed by the bank self.DEMARKET_FLAG = 'No' self.DEMARKET_DATE = '' #Customer with a Suspicous Activity Report self.SAR = 'No' #Customer with a closed account #generate closed acct flag #Condition for SARs and Demarketed Clients if (self.CLOSEDACCOUNT == 'Yes'): #1% of closed accounts are demarketed but never had a SAR filed if (max((randrange(0, 101, 1) - 99), 0) == 1): self.DEMARKET_FLAG = 'Yes' self.DEMARKET_DATE = gen_data.create_date(past=True) if (self.DEMARKET_FLAG == 'No' and max( (randrange(0, 11, 1) - 9), 0) == 1): #10% of closed accounts have SARs self.SAR = 'Yes' #90% of closed accounts with SARs are demarketed if (max((randrange(0, 11, 1) - 9), 0) == 0): self.DEMARKET_FLAG = 'Yes' self.DEMARKET_DATE = gen_data.create_date(past=True) self.HIGH_RISK = 'No' self.RISK_RATING = '' if (self.PEP == 'Yes' or self.SAR == 'Yes' or self.LARGE_CASH_EXEMPT == 'Yes' or self.DEMARKET_FLAG == 'Yes' or self.ARMS_MANUFACTURER == 'Yes' or self.AUCTION == 'Yes' or self.CASHINTENSIVE_BUSINESS == 'Yes' or self.CASINO_GAMBLING == 'Yes' or self.COMPLEX_HI_VEHICLE == 'Yes' or self.DEALER_PRECIOUS_METAL == 'Yes' or self.DIGITAL_PM_OPERATOR == 'Yes' or self.EMBASSY_CONSULATE == 'Yes' or self.EXCHANGE_CURRENCY == 'Yes' or self.FOREIGN_FINANCIAL_INSTITUTION == 'Yes' or self.FOREIGN_GOVERNMENT == 'Yes' or self.FOREIGN_NONBANK_FINANCIAL_INSTITUTION == 'Yes' or self.INTERNET_GAMBLING == 'Yes' or self.MEDICAL_MARIJUANA_DISPENSARY == 'Yes' or self.MONEY_SERVICE_BUSINESS == 'Yes' or self.NONREGULATED_FINANCIAL_INSTITUTION == 'Yes' or self.NOT_PROFIT == 'Yes' or self.PRIVATELY_ATM_OPERATOR == 'Yes' or self.SALES_USED_VEHICLES == 'Yes' or self.THIRD_PARTY_PAYMENT_PROCESSOR == 'Yes' or self.TRANSACTING_PROVIDER == 'Yes' or self.HIGH_NET_WORTH == 'Yes'): self.HIGH_RISK = 'Yes' self.RISK_RATING = choice(refrating) elif (max((randrange(0, 101, 1) - 99), 0) == 1): self.HIGH_RISK = 'Yes' self.RISK_RATING = choice(refrating)
def generate_customers(): with get_file('uber_cust.csv', 'w') as f1: # Writer for CSV...Pipe delimited...Return for a new line writer = csv.writer( f1, delimiter='|', lineterminator='\n', ) # Header Row writer.writerow( ['ROWNUM'] + ['accountNumber'] + ['accountCategory'] + ['accountType'] + ['NUM_CCS'] + ['NAME'] + [ 'M_NAME'] + [ 'SSN'] + [ 'AUTHORIZED_NAME2'] + ['M_NAME2'] + ['SSN2'] + \ ['AUTHORIZED_NAME3'] + ['M_NAME3'] + ['SSN3'] + ['AUTHORIZED_NAME4'] + ['M_NAME4'] + ['SSN4'] + [ 'CREDITCARDNUMBER'] + ['CREDITCARDTYPE'] + ['EMPLOYER'] + ['CUSTEMAIL'] + \ ['OCCUPATION'] + ['CITY'] + ['STATE'] + ['ZIP'] + ['COUNTRY'] + ['PREVIOUS_CITY'] + [ 'PREVIOUS_STATE'] + \ ['PREVIOUS_ZIP'] + ['PREVIOUS_COUNTRY'] + ['DOB'] + ['politically_exposed_person'] + [ 'suspicious_activity_report'] + ['CLOSEDACCOUNT'] + [ 'RELATED_ACCT'] + ['RELATED_TYPE'] + ['PARTY_TYPE'] + ['PARTY_RELATION'] + [ 'PARTY_STARTDATE'] + ['PARTY_ENDDATE'] + \ ['LARGE_CASH_EXEMPT'] + ['DEMARKET_FLAG'] + ['DEMARKET_DATE'] + ['PROB_DEFAULT_RISKR'] + [ 'OFFICIAL_LANG_PREF'] + ['CONSENT_SHARING'] + \ ['PREFERRED_CHANNEL'] + ['PRIMARY_BRANCH_NO'] + ['DEPENDANTS_COUNT'] + ['SEG_MODEL_ID'] + [ 'SEG_MODEL_TYPE'] + \ ['SEG_MODEL_NAME'] + ['SEG_MODEL_GROUP'] + ['SEG_M_GRP_DESC'] + ['SEG_MODEL_SCORE'] + [ 'ARMS_MANUFACTURER'] + ['AUCTION'] + \ ['CASHINTENSIVE_BUSINESS'] + ['CASINO_GAMBLING'] + ['CHANNEL_ONBOARDING'] + [ 'CHANNEL_ONGOING_TRANSACTIONS'] + ['CLIENT_NET_WORTH'] + \ ['COMPLEX_HI_VEHICLE'] + ['DEALER_PRECIOUS_METAL'] + ['DIGITAL_PM_OPERATOR'] + [ 'EMBASSY_CONSULATE'] + ['EXCHANGE_CURRENCY'] + \ ['FOREIGN_FINANCIAL_INSTITUTION'] + ['FOREIGN_GOVERNMENT'] + [ 'FOREIGN_NONBANK_FINANCIAL_INSTITUTION'] + ['INTERNET_GAMBLING'] + \ ['MEDICAL_MARIJUANA_DISPENSARY'] + ['MONEY_SERVICE_BUSINESS'] + ['NAICS_CODE'] + [ 'NONREGULATED_FINANCIAL_INSTITUTION'] + \ ['NOT_PROFIT'] + ['PRIVATELY_ATM_OPERATOR'] + ['PRODUCTS'] + ['SALES_USED_VEHICLES'] + [ 'SERVICES'] + \ ['SIC_CODE'] + ['STOCK_MARKET_LISTING'] + ['THIRD_PARTY_PAYMENT_PROCESSOR'] + [ 'TRANSACTING_PROVIDER'] + ['HIGH_NET_WORTH'] + ['HIGH_RISK'] + ['RISK_RATING'] + [ 'USE_CASE_SCENARIO']) # Loop for number of accounts to generate start = 10 acct_list = [] li_ssn_master = list( set([ ''.join(str(random.randint(0, 9)) for _ in xrange(9)) for i in xrange(30) ])) if len(li_ssn_master) < 30: li_ssn_master = list( set([ ''.join(str(random.randint(0, 9)) for _ in xrange(9)) for i in xrange(30) ])) for i in xrange(30): # Initiate High Risk Flags politically_exposed_person = 'No' suspicious_activity_report = 'No' closed_cust_acct = 'No' # High risk customer flag high_risk = 'No' # High Risk Rating hr_rating = '' # Customer that was demarketed by the bank demarket = 'No' dem_date = '' # generate closed acct flag if max((randrange(0, 98, 1) - 96), 0) == 1: closed_cust_acct = 'Yes' # Random number generator for account number # acct = randrange(100000,100000000,1) # Random choice for number of credit cards per account number no_ccs = weighted_options('number_cc') # while acct_list.count(acct) > 0: # acct = randrange(100000,100000000,1) # dt = str(datetime.now()) # acct=str(i)++re.sub('\W','',dt) acct = start + 1 + randrange(1, 10, 1) start = acct name = fake.name() tmp = gen_data.create_name() # Adds account number to account dictionary acct_list.extend([acct]) # Creates a new row and adds data elements ## JS - Main Account Holder SSN as current index in master SSN list ## row = [i]+[acct]+[random.choice(acct_type)]+[No_CCs]+[name]+[tmp[0]]+[(str(randrange(101,1000,1))+str(randrange(10,100,1))+str(randrange(1000,10000,1)))] row = [i] + [acct] + [weighted_options('acct_type')] + [no_ccs] + [ name ] + [tmp[0]] + [li_ssn_master[i]] # Dictionary for names list set to blank names = [] # Dictionary for Social Security Number list set to blank ssn = [] # Generates Name and SSN for Credit Users # Middle Name to reduce name dups mdl = [] for j in range(no_ccs - 1): names.insert(j, fake.name()) tmp2 = gen_data.create_name() mdl.insert(j, tmp2[0]) ## JS - Pull from SSN Master list # ssn.insert(j,(str(randrange(101,1000,1))+str(randrange(10,100,1))+str(randrange(1000,10000,1)))) randInt = randrange(1, len(li_ssn_master), 1) if randInt != i: ssn.insert(j, li_ssn_master[randInt]) else: ssn.insert(j, li_ssn_master[randInt - 1]) # Name and SSN is set to blank if less than 4 customers on an account for k in range(4 - no_ccs): names.insert(no_ccs + k, '') ssn.insert(no_ccs + k, '') mdl.insert(no_ccs, '') # Sets CC_NO to a random credit card number CC_NO = gen_data.create_cc_number() # Extract CC_Number from the tuple returned by CC_Number...Tuple contains CC Number and Type # while credit_cards.count(CC_NO[1][0]) > 0: CC_TRANS = CC_NO[1][0] dt = str(datetime.now()) clean = re.sub('\W', '', dt) printCC = str(CC_TRANS[-4:]) + str(clean[-12:-3]) + str( randrange(1111, 9999, randrange(1, 10, 1))) # str(CC_TRANS[-4:])+str(clean[-12:-2])+str(randrange(1111,9999,randrange(1,10,1))) # Add CC_Number to control list to prevent duplicates # Add data elements to current csv row row.extend([ names[0], mdl[0], ssn[0], names[1], mdl[1], ssn[1], names[2], mdl[2], ssn[2], printCC, CC_NO[0], gen_data.create_company_name() + ' ' + tmp[1], gen_data.create_email(), gen_data.create_job_title() ]) # Creates Current Address zip = random.choice(zips.zip) addr = geo_data.create_city_state_zip[zip] # Creates Previous address zip2 = random.choice(zips.zip) addr2 = geo_data.create_city_state_zip[zip2] # Add additional data elements to current csv row lrg_cash_ex = weighted_options('yes_no') # Condition for SARs and Demarketed Clients if closed_cust_acct == 'Yes': # 1% of closed accounts are demarketed but never had a suspicious_activity_report filed if risk_range() and suspicious_activity_report == 'No': demarket = 'Yes' dem_date = gen_data.create_date(past=True) if risk_range() and demarket == 'No': # 10% of closed accounts have SARs suspicious_activity_report = 'Yes' # 90% of closed accounts with SARs are demarketed if max((randrange(0, 11, 1) - 9), 0) == 0: demarket = 'Yes' dem_date = gen_data.create_date(past=True) if risk_range(): politically_exposed_person = 'Yes' row.extend([ addr[0], addr[1], zip, 'US', addr2[0], addr2[1], zip2, 'US', gen_data.create_birthday(min_age=2, max_age=85), politically_exposed_person, suspicious_activity_report, closed_cust_acct ]) # Start Generating related accounts from account list once 10,000 accounts are generated if i > 10000: rel = int(random.choice(acct_list)) * max( (randrange(0, 10001, 1) - 9999), 0) if rel <> 0: row.append(rel) row.append(weighted_options('related_type')) else: row.append('') row.append('') else: row.append('') row.append('') # Randomly generates account start date party_start = gen_data.create_date(past=True) # Randomly selects consent option for sharing info consent_share = weighted_options('yes_no') # Add additional data elements to current csv row row.extend([ weighted_options('party_type'), weighted_options('party_relation'), party_start, gen_data.create_date(past=True), lrg_cash_ex, demarket, dem_date, randrange(0, 100, 1), weighted_options('official_lang') ]) # Add data element preferred methond of contact for yes to share info...if not then blank to current row if consent_share == 'Yes': row.extend(['Yes', weighted_options('preferred_channel')]) else: row.extend(['No', '']) # DO NOT USE CUST STATUS BELOW - NOT INTEGRATED WITH CLOSED STATUS! Add additional data elements to current csv row row.extend([zip, randrange(0, 5, 1)]) # Generates Segment ID then adds additional Segment data based on the selection to the current csv row Segment_ID = randrange(0, 5, 1) % 5 if Segment_ID == 0: row.extend([ MODEL_ID[0], SEG_MODEL_TYPE[0], SEG_MODEL_NAME[0], SEG_MODEL_GROUP[0], SEG_MODEL_DESCRIPTION[0], SEG_MODEL_SCORE[0] ]) if Segment_ID == 1: row.extend([ MODEL_ID[1], SEG_MODEL_TYPE[1], SEG_MODEL_NAME[1], SEG_MODEL_GROUP[1], SEG_MODEL_DESCRIPTION[1], SEG_MODEL_SCORE[1] ]) if Segment_ID == 2: row.extend([ MODEL_ID[2], SEG_MODEL_TYPE[2], SEG_MODEL_NAME[2], SEG_MODEL_GROUP[2], SEG_MODEL_DESCRIPTION[2], SEG_MODEL_SCORE[2] ]) if Segment_ID == 3: row.extend([ MODEL_ID[3], SEG_MODEL_TYPE[3], SEG_MODEL_NAME[3], SEG_MODEL_GROUP[3], SEG_MODEL_DESCRIPTION[3], SEG_MODEL_SCORE[3] ]) if Segment_ID == 4: row.extend([ MODEL_ID[4], SEG_MODEL_TYPE[4], SEG_MODEL_NAME[4], SEG_MODEL_GROUP[4], SEG_MODEL_DESCRIPTION[4], SEG_MODEL_SCORE[4] ]) # Add additional data elements to current csv row arms_manufacturer = weighted_options('arms_manufacturers') auction = weighted_options('auction') cash_intensive_business = weighted_options( 'cash_intensive_business') casino_gambling = weighted_options('casino_gambling') chan_ob = weighted_options('channel_onboarding') chan_txn = weighted_options('channel_ongoing_txn') row.extend([ arms_manufacturer, auction, cash_intensive_business, casino_gambling, chan_ob, chan_txn ]) # Randomly select whether customer has a High Net Worth high_net_worth_flag = weighted_options('high_net_worth') # Randomly Generates customer net worth based on the above flag if high_net_worth_flag == 'Yes': row.append( max( max((randrange(0, 101, 1) - 99), 0) * randrange(1000000, 25000000, 1), randrange(1000000, 5000000, 1))) else: flag = weighted_options('low_net') if flag == 0: row.append(randrange(-250000, 600000, 1)) else: if flag == 1: row.append(randrange(149000, 151000, 1)) else: row.append(randrange(40000, 50000, 1)) # Add data elements to current csv row hr1 = weighted_options('complex_hi_vehicle') hr2 = weighted_options('dealer_precious_metal') hr3 = weighted_options('digital_pm_operator') hr4 = weighted_options(EMBASSY_CONSULATE) hr5 = weighted_options(EXCHANGE_CURRENCY) hr6 = weighted_options(FOREIGN_FINANCIAL_INSTITUTION) hr7 = weighted_options(FOREIGN_GOVT) hr8 = weighted_options(FOREIGN_NONBANK_FINANCIAL_INSTITUTION) hr9 = weighted_options(INTERNET_GAMBLING) hr10 = weighted_options(MEDICAL_MARIJUANA_DISPENSARY) hr11 = weighted_options(MONEY_SERVICE_BUSINESS) hr12 = random.choice(NAICS.NAICS_Code) hr13 = weighted_options(NONREGULATED_FINANCIAL_INSTITUTION) hr14 = weighted_options(NOT_PROFIT) # hr15=random.choice(occupation) hr16 = weighted_options(PRIVATE_ATM_OPERATOR) hr17 = weighted_options('products') hr18 = weighted_options(SALES_USED_VEHICLES) hr19 = weighted_options('services') hr20 = weighted_options('sic_code') hr21 = weighted_options('stock_market_listing') hr22 = weighted_options(THIRD_PARTY_PAYMENT_PROCESSOR) hr23 = weighted_options(TRANSACTING_PROVIDER) if 'Yes' in (politically_exposed_person, suspicious_activity_report, lrg_cash_ex, demarket, arms_manufacturer, auction, cash_intensive_business, casino_gambling, hr1, hr2, hr3, hr4, hr5, hr6, hr7, hr8, hr9, hr10, hr11, hr13, hr14, hr16, hr17, hr18, hr22, hr23, high_net_worth_flag): high_risk = 'Yes' hr_rating = weighted_options('refrating') if suspicious_activity_report == 'No' and high_risk == 'No': if risk_range(): high_risk = 'Yes' hr_rating = weighted_options('refrating') if politically_exposed_person == 'No' and high_risk == 'No': if risk_range(): high_risk = 'Yes' hr_rating = weighted_options('refrating') if high_risk == 'No': if risk_range(): high_risk = 'Yes' hr_rating = weighted_options('refrating') row.extend([ hr1, hr2, hr3, hr4, hr5, hr6, hr7, hr8, hr9, hr10, hr11, hr12, hr13, hr14, hr16, hr17, hr18, hr19, hr20, hr21, hr22, hr23, high_net_worth_flag, high_risk, hr_rating, random.choice(USE_CASE) ]) # End the current row writer.writerow(row)
def createCusts(N): #List for client whose net worth is over $500K HighNetWorth = ['Yes'] + ['No'] * 30 #List for type of account Related_Type = ['Primary', 'Secondary', 'Joint'] #List for how the account was opened Party_Type = ['Person', 'Non-Person'] #List for a BMO customer Party_Relation = ['Customer', 'Non-Customer'] #List for random Yes/No Flag Yes_No = ['Yes'] + ['No'] * 12 #List for random Yes/No Consent Yes_No_Consent = ['Yes'] + ['No'] * 4 #List for equal Yes/No Flag Yes_No_50 = ['Yes', 'No'] #List for official language Official_Lang = ['English'] * 3 + ['French'] #List for method of communication Preffered_Channel = ['Direct Mail', 'Telemarketing', 'Email', 'SMS'] #List for status of customer #Customer_Status = ['Prospect','Inactive Customer','Past Customer'] + ['Active Customer'] * 56 #List for LOB Segment Type Seg_Model_Type = [ 'LOB Specific', 'Profitability', 'Geographical', 'Behavioral', 'Risk Tolerance' ] #List for Model ID Model_ID = ['01', '02', '03', '04', '05'] #List for Model Name Seg_Model_Name = [ 'IRRI', 'CRS Risk Score', 'Geo Risk', 'Financial Behavior Risk', 'CM Risk' ] #List for Model Score Seg_Model_Score = ['200', '300', '400', '100', '500'] #List for Model Group Seg_Model_Group = ['Group 1'] * 2 + ['Group 2', 'Group 3', 'Group 4'] #List for Model Description Seg_Model_Description = [ 'High Risk Tier', 'Mid Risk Tier', 'Low Risk Tier', 'Vertical Risk', 'Geographical Risk' ] #List for random Arms Dealer flag Arms_Manufacturer = ['Yes'] + ['No'] * 2 + [''] * 392 #List for random auction flag Auction = ['Yes'] + ['No'] * 2 + [''] * 392 #List for random Cash Intensive flag CashIntensive_Business = ['Yes'] + ['No'] * 2 + [''] * 392 #List for random Casino?Gaming flag Casino_Gambling = ['Yes'] + ['No'] * 2 + [''] * 392 #List for random Client Onboarding flag Channel_Onboarding = [ 'E-mail', 'In Person', 'In person - In Branch/Bank Office', 'In person - Offsite/Client Location', 'Mail', 'Online', 'Phone', 'Request for Proposal (RFP)' ] + ['Not Applicable'] * 10 #List for random Transaction flag Channel_Ongoing_Transactions = [ 'ATM', 'E-mail', 'Fax', 'Mail', 'Not Applicable', 'OTC Communication System', 'Phone' ] + ['Online'] * 4 + ['In Person'] * 31 #List for random HI_Vehicle flag Complex_HI_Vehicle = ['Yes'] + ['No'] * 2 + [''] * 392 #List for random Metals flag Dealer_Precious_Metal = ['Yes'] + ['No'] * 2 + [''] * 392 #List for random Arms Dealer flag Digital_PM_Operator = ['Yes'] + ['No'] * 2 + [''] * 392 #List for random Embassy flag Embassy_Consulate = ['Yes'] + ['No'] * 2 + [''] * 392 #Sets variable to Embassy flag Exchange_Currency = Embassy_Consulate #Sets variable to Embassy flag Foreign_Financial_Institution = Embassy_Consulate #Sets variable to Embassy flag Foreign_Government = Embassy_Consulate #Sets variable to Embassy flag Foreign_NonBank_Financial_Institution = Embassy_Consulate #Sets variable to Embassy flag Internet_Gambling = Embassy_Consulate #Sets variable to Embassy flag Medical_Marijuana_Dispensary = Embassy_Consulate #Sets variable to Embassy flag Money_Service_Business = Embassy_Consulate #Sets variable to Embassy flag NonRegulated_Financial_Institution = Embassy_Consulate #Sets variable to Embassy flag Not_Profit = Embassy_Consulate #List for random occupation Occupation=['11-1011 Chief Executives',\ '11-3011 Administrative Services Managers',\ '11-3031 Financial Managers',\ '11-3061 Purchasing Managers',\ '13-1011 Agents and Business Managers of Artists Performers and Athletes',\ '13-1031 Claims Adjusters Examiners, and Investigators',\ '13-1199 Business Operations Specialists, All Other',\ '13-2099 Financial Specialists All Other',\ '17-1011 Architects Except Landscape and Naval',\ '23-1011 Lawyers',\ '23-1023 Judges, Magistrate Judges and Magistrates',\ '25-2012 Kindergarten Teachers Except Special Education',\ '25-2021 Elementary School Teachers Except Special Education',\ '29-1041 Optometrists',\ '29-2054 Respiratory Therapy Technicians',\ '33-2011 Firefighters',\ '37-1012 First-Line Supervisors of Landscaping Lawn Service and Groundskeeping Workers',\ '39-1011 Gaming Supervisors',\ '39-2011 Animal Trainers',\ '41-1011 First-Line Supervisors of Retail Sales Workers',\ '41-1012 First-Line Supervisors of Non-Retail Sales Workers',\ '41-2011 Cashiers',\ '41-2031 Retail Salespersons',\ '43-3021 Billing and Posting Clerks',\ '45-1011 First-Line Supervisors of Farming, Fishing, and Forestry Workers',\ '49-2011 Computer Automated Teller and Office Machine Repairers',\ '53-3021 Bus Drivers Transit and Intercity',\ '53-4031 Railroad Conductors and Yardmasters',\ '55-1011 Air Crew Officers',\ '55-1012 Aircraft Launch and Recovery Officers',\ '55-1013 Armored Assault Vehicle Officers',\ ] #Sets variable to Embassy flag Privately_ATM_Operator = Embassy_Consulate #List for random products Products=['Certificate of Deposit',\ 'Checking Account',\ 'Credit Card',\ 'Custodial and Investment Agency - Institutional',\ 'Custodial and Investment Agency - Personal',\ 'Custodial/Trust Outsourcing Services (BTOS)',\ 'Custody Accounts (PTIM)',\ 'Custody Accounts (RSTC)',\ 'DTF (BHFA)',\ 'Investment Agency - Personal',\ 'Investment Management Account (PTIM)',\ 'Lease',\ 'Loan / Letter of Credit',\ 'Money Market',\ 'Mortgage / Bond / Debentures',\ 'None',\ 'Savings Account',\ 'Trust Administration - Irrevocable and Revocable (PTIM)',\ 'Trust Administration - Irrevocable and Revocable Trusts (BDTC)',\ ] + ['Nondeposit Investment Products'] * 14 + ['Investment Agency - Institutional'] * 5 #Sets variable to Embassy flag Sales_Used_Vehicles = Embassy_Consulate #Dictionary for random Services Services=['Benefit Payment Services',\ 'Domestic Wires and Direct Deposit / ACH',\ 'Family Office Services (FOS)',\ 'Fiduciary Services',\ 'International Wires and IAT',\ 'Investment Advisory Services (IAS)',\ 'Investment Services',\ 'None',\ 'Online / Mobile Banking',\ 'Payroll',\ 'Short Term Cash Management',\ 'Trust Services',\ 'Trustee Services',\ 'Vault Cash Services',\ ] + ['Financial Planning'] * 6 + ['Retirement Plans'] * 19 #Dictionary for random SIC_Code SIC_Code=['6021 National Commercial Banks',\ '6211 Security Brokers Dealers and Flotation Companies',\ '6282 Investment Advice',\ '6311 Life Insurance',\ '6733 Trusts Except Educational Religious and Charitable',\ '8999 Services NEC',\ ] + ['6722 Management Investment Offices Open-End'] * 12 #Dictionary for random Market Listing Stock_Market_Listing=['Australian Stock Exchange',\ 'Brussels Stock Exchange',\ 'Montreal Stock Exchange',\ 'Tiers 1 and 2 of the TSX Venture Exchange (also known as Tiers 1 and 2 of the Canadian Venture Exchange)',\ 'Toronto Stock Exchange',\ ] + ['Not Found'] * 30 #Sets variable to Embassy flag Third_Party_Payment_Processor = Embassy_Consulate #Sets variable to Embassy flag Transacting_Provider = Embassy_Consulate #Dictionary for random Low Net Worth LowNet = [1, 2] + [0] * 5 #Dictionary for Consumer vs Business Acct_Type = ['B'] + ['C'] * 5 #Dictionary for random number of credits cards per account Number_CC = [1] * 7 + [2] * 11 + [3] * 3 + [4] #Dictionary for Account list set to blank acct_list = [] #Dictionary for CreditCard list set to blank CC_list = [] #Dictionary for random Wolfsberg scenario Use_Case = [1, 4, 7, 10, 13, 16, 19, 22, 25, 28, 31, 34, 39] * 4 + [ 2, 5, 8, 11, 14, 17, 20, 23, 26, 29, 32, 35, 38 ] * 7 + [3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 36 ] * 65 + [37] * 73 + [40, 41] * 2 refrating = [ '1', '1', '1', '2', '3', '4', '2', '4', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5' ] fake = Faker() global liSSNMaster start = 10786147 acct_list = [] liCSV = [] for i in xrange(N): #Initiate High Risk Flags #Politically Exposed Person PEP = 'No' #Customer with a Suspicous Activity Report SAR = 'No' #Customer with a closed account Clsd = 'No' #High risk customer flag high_risk = 'No' #High Risk Rating hr_rating = '' #Customer that was demarketed by the bank demarket = 'No' dem_date = '' #generate closed acct flag if (max((randrange(0, 98, 1) - 96), 0) == 1): Clsd = 'Yes' #Random choice for number of credit card users per account number No_CCs = random.choice(Number_CC) #Generate account number acct = start + 1 + randrange(1, 10, 1) start = acct #Randomly generate customer name + middle name in tmp name = fake.name() tmp = gen_data.create_name() #Adds account number to account dictionary acct_list.extend([acct]) #Creates a new row and adds data elements row = [i] + [acct] + [random.choice(Acct_Type)] + [No_CCs] + [name] + [ tmp[0] ] + [liSSNMaster[i]] #Dictionary for names list set to blank names = [] #Dictionary for Social Security Number list set to blank ssn = [] #Middle Name to reduce name dups mdl = [] for j in range(No_CCs - 1): names.insert(j, fake.name()) tmp2 = gen_data.create_name() mdl.insert(j, tmp2[0]) ##Pull from SSN Master list randInt = randrange(1, len(liSSNMaster), 1) if randInt != i: ssn.insert(j, liSSNMaster[randInt]) else: ssn.insert(j, liSSNMaster[randInt - 1]) #Name and SSN is set to blank if less than 4 customers on an account for k in range(4 - No_CCs): names.insert(No_CCs + k, '') ssn.insert(No_CCs + k, '') mdl.insert(No_CCs, '') #Sets CC_NO to a random credit card number CC_NO = gen_data.cc_number() #Extract CC_Number from the tuple returned by CC_Number then scramble to ensure uniqueness...Tuple contains CC Number and Type CC_TRANS = CC_NO[1][0] dt = str(datetime.now()) clean = re.sub('\W', '', dt) printCC = str(CC_TRANS[-4:]) + str(clean[-12:-3]) + str( randrange(1111, 9999, randrange(1, 10, 1))) #Add data elements to current csv row row.extend([names[0],mdl[0],ssn[0],names[1],mdl[1],ssn[1],names[2],mdl[2],ssn[2],printCC,CC_NO[0],gen_data.create_company_name()+' '+tmp[1],\ gen_data.create_email(),gen_data.create_job_title()]) #Create Current Address zip = random.choice(zips.zip) addr = geo_data.create_city_state_zip[zip] #Create Previous address zip2 = random.choice(zips.zip) addr2 = geo_data.create_city_state_zip[zip2] #Add additional data elements to current csv row lrg_cash_ex = random.choice(Yes_No) #Condition for SARs and Demarketed Clients if (Clsd == 'Yes'): #1% of closed accounts are demarketed but never had a SAR filed if (max((randrange(0, 101, 1) - 99), 0) == 1 and SAR == 'No'): demarket = 'Yes' dem_date = gen_data.create_date(past=True) if (max((randrange(0, 11, 1) - 9), 0) == 1 and demarket == 'No'): #10% of closed accounts have SARs SAR = 'Yes' #90% of closed accounts with SARs are demarketed if (max((randrange(0, 11, 1) - 9), 0) == 0): demarket = 'Yes' dem_date = gen_data.create_date(past=True) if (max((randrange(0, 101, 1) - 99), 0) == 1): PEP = 'Yes' row.extend([ addr[0], addr[1], zip, 'US', addr2[0], addr2[1], zip2, 'US', gen_data.create_birthday(min_age=2, max_age=85), PEP, SAR, Clsd ]) #Start Generating related accounts from account list once 10,000 accounts are generated - to avoid duplicating accounts in the beginning if i > 10000: rel = int(random.choice(acct_list)) * max( (randrange(0, 10001, 1) - 9999), 0) if rel <> 0: row.append(rel) row.append(random.choice(Related_Type)) else: row.append('') row.append('') else: row.append('') row.append('') #Randomly generates account start date party_start = gen_data.create_date(past=True) #Randomly selects consent option for sharing info Consent_Share = random.choice(Yes_No_Consent) #Add additional data elements to current csv row row.extend([random.choice(Party_Type),random.choice(Party_Relation),party_start,gen_data.create_date(past=True),\ lrg_cash_ex,demarket,dem_date,randrange(0,100,1),random.choice(Official_Lang)]) #Add data element preferred methond of contact for yes to share info...if not then blank to current row if Consent_Share == 'Yes': row.extend(['Yes', random.choice(Preffered_Channel)]) else: row.extend(['No', '']) row.extend([zip, randrange(0, 5, 1)]) #Generate Segment ID then add additional Segment data based on the selection to the current csv row Segment_ID = randrange(0, 5, 1) % 5 if Segment_ID == 0: row.extend([ Model_ID[0], Seg_Model_Type[0], Seg_Model_Name[0], Seg_Model_Group[0], Seg_Model_Description[0], Seg_Model_Score[0] ]) if Segment_ID == 1: row.extend([ Model_ID[1], Seg_Model_Type[1], Seg_Model_Name[1], Seg_Model_Group[1], Seg_Model_Description[1], Seg_Model_Score[1] ]) if Segment_ID == 2: row.extend([ Model_ID[2], Seg_Model_Type[2], Seg_Model_Name[2], Seg_Model_Group[2], Seg_Model_Description[2], Seg_Model_Score[2] ]) if Segment_ID == 3: row.extend([ Model_ID[3], Seg_Model_Type[3], Seg_Model_Name[3], Seg_Model_Group[3], Seg_Model_Description[3], Seg_Model_Score[3] ]) if Segment_ID == 4: row.extend([ Model_ID[4], Seg_Model_Type[4], Seg_Model_Name[4], Seg_Model_Group[4], Seg_Model_Description[4], Seg_Model_Score[4] ]) #Add additional data elements to current csv row hr0 = random.choice(Arms_Manufacturer) hr01 = random.choice(Auction) hr02 = random.choice(CashIntensive_Business) hr03 = random.choice(Casino_Gambling) hr04 = random.choice(Channel_Onboarding) hr05 = random.choice(Channel_Ongoing_Transactions) row.extend([hr0, hr01, hr02, hr03, hr04, hr05]) #Randomly select whether customer has a High Net Worth HighNetWorthFlag = random.choice(HighNetWorth) #Randomly Generate customer net worth based on the above flag if HighNetWorthFlag == 'Yes': row.append( max( max((randrange(0, 101, 1) - 99), 0) * randrange(1000000, 25000000, 1), randrange(1000000, 5000000, 1))) else: flag = random.choice(LowNet) if flag == 0: row.append(randrange(-250000, 600000, 1)) else: if flag == 1: row.append(randrange(149000, 151000, 1)) else: row.append(randrange(40000, 50000, 1)) #Add data elements to current csv row hr1 = random.choice(Complex_HI_Vehicle) hr2 = random.choice(Dealer_Precious_Metal) hr3 = random.choice(Digital_PM_Operator) hr4 = random.choice(Embassy_Consulate) hr5 = random.choice(Exchange_Currency) hr6 = random.choice(Foreign_Financial_Institution) hr7 = random.choice(Foreign_Government) hr8 = random.choice(Foreign_NonBank_Financial_Institution) hr9 = random.choice(Internet_Gambling) hr10 = random.choice(Medical_Marijuana_Dispensary) hr11 = random.choice(Money_Service_Business) hr12 = random.choice(NAICS.NAICS_Code) hr13 = random.choice(NonRegulated_Financial_Institution) hr14 = random.choice(Not_Profit) #hr15=random.choice(Occupation) - added before through gen_data hr16 = random.choice(Privately_ATM_Operator) hr17 = random.choice(Products) hr18 = random.choice(Sales_Used_Vehicles) hr19 = random.choice(Services) hr20 = random.choice(SIC_Code) hr21 = random.choice(Stock_Market_Listing) hr22 = random.choice(Third_Party_Payment_Processor) hr23 = random.choice(Transacting_Provider) if (PEP == 'Yes' or SAR == 'Yes' or lrg_cash_ex == 'Yes' or demarket == 'Yes' or hr0 == 'Yes' or hr01 == 'Yes' or hr02 == 'Yes' or hr03 == 'Yes' or hr1 == 'Yes' or hr2 == 'Yes' or hr3 == 'Yes' or hr4 == 'Yes' or hr5 == 'Yes' or hr6 == 'Yes' or hr7 == 'Yes' or hr8 == 'Yes' or hr9 == 'Yes' or hr10 == 'Yes' or hr11 == 'Yes' or hr13 == 'Yes' or hr14 == 'Yes' or hr16 == 'Yes' or hr17 == 'Yes' or hr18 == 'Yes' or hr22 == 'Yes' or hr23 == 'Yes' or HighNetWorthFlag == 'Yes'): high_risk = 'Yes' hr_rating = random.choice(refrating) if (SAR == 'No' and high_risk == 'No'): if (max((randrange(0, 101, 1) - 99), 0) == 1): high_risk = 'Yes' hr_rating = random.choice(refrating) if (PEP == 'No' and high_risk == 'No'): if (max((randrange(0, 101, 1) - 99), 0) == 1): high_risk = 'Yes' hr_rating = random.choice(refrating) if (high_risk == 'No'): if (max((randrange(0, 101, 1) - 99), 0) == 1): high_risk = 'Yes' hr_rating = random.choice(refrating) row.extend([ hr1, hr2, hr3, hr4, hr5, hr6, hr7, hr8, hr9, hr10, hr11, hr12, hr13, hr14, hr16, hr17, hr18, hr19, hr20, hr21, hr22, hr23, HighNetWorthFlag, high_risk, hr_rating, random.choice(Use_Case) ]) liCSV.append(row) return liCSV
def gen_cust(liSSNMaster, acct_list, i): fake = Faker() #Initiate High Risk Flags #Politically Exposed Person PEP = 'No' #Customer with a Suspicous Activity Report SAR = 'No' #Customer with a closed account #generate closed acct flag Clsd = choice(Clsd_flag) #High risk customer flag high_risk = 'No' #High Risk Rating hr_rating = '' #Customer that was demarketed by the bank demarket = 'No' dem_date = '' #Random choice for number of credit cards per account number No_CCs = choice(Number_CC) acct = start + 1 + randrange(1, 10, 1) start = acct #Randomly generates customer name name = fake.name() tmp = gen_data.create_name() #Adds account number to account dictionary acct_list.extend([acct]) #Creates a new row and adds data elements ## JS - Main Account Holder SSN as current index in master SSN list row = [i] + [acct] + [choice(Acct_Type) ] + [No_CCs] + [name] + [tmp[0]] + [liSSNMaster[i]] #Dictionary for names list set to blank names = [] #Dictionary for Social Security Number list set to blank ssn = [] #Generates Name and SSN for Credit Users #Middle Name to reduce name dups mdl = [] for j in range(No_CCs - 1): names.insert(j, fake.name()) tmp2 = gen_data.create_name() mdl.insert(j, tmp2[0]) ## JS - Pull from SSN Master list randInt = randrange(1, len(liSSNMaster), 1) if randInt != i: ssn.insert(j, liSSNMaster[randInt]) else: ssn.insert(j, liSSNMaster[randInt - 1]) #Name and SSN is set to blank if less than 4 customers on an account for k in range(4 - No_CCs): names.insert(No_CCs + k, '') ssn.insert(No_CCs + k, '') mdl.insert(No_CCs, '') #Sets CC_NO to a random credit card number CC_NO = gen_data.create_cc_number() CC_TRANS = CC_NO[1][0] dt = str(datetime.now()) clean = re.sub('\W', '', dt) printCC = str(CC_TRANS[-4:]) + str(clean[-12:-3]) + str( randrange(1111, 9999, randrange(1, 10, 1))) #Add data elements to current csv row row.extend([names[0],mdl[0],ssn[0],names[1],mdl[1],ssn[1],names[2],mdl[2],ssn[2],printCC,CC_NO[0],gen_data.create_company_name()+' '+tmp[1],\ gen_data.create_email(),gen_data.create_job_title()]) #Creates Current Address zip = choice(zips.zip) addr = geo_data.create_city_state_zip[zip] #Creates Previous address zip2 = choice(zips.zip) addr2 = geo_data.create_city_state_zip[zip2] #Add additional data elements to current csv row lrg_cash_ex = choice(Yes_No) #Condition for SARs and Demarketed Clients if (Clsd == 'Yes'): #1% of closed accounts are demarketed but never had a SAR filed if (max((randrange(0, 101, 1) - 99), 0) == 1 and SAR == 'No'): demarket = 'Yes' dem_date = gen_data.create_date(past=True) if (max((randrange(0, 11, 1) - 9), 0) == 1 and demarket == 'No'): #10% of closed accounts have SARs SAR = 'Yes' #90% of closed accounts with SARs are demarketed if (max((randrange(0, 11, 1) - 9), 0) == 0): demarket = 'Yes' dem_date = gen_data.create_date(past=True) #1% of accounts are PEP if (max((randrange(0, 101, 1) - 99), 0) == 1): PEP = 'Yes' row.extend([ addr[0], addr[1], zip, 'US', addr2[0], addr2[1], zip2, 'US', gen_data.create_birthday(min_age=2, max_age=85), PEP, SAR, Clsd ]) #Start Generating related accounts from account list once 10,000 accounts are generated if i > 10000: rel = int(choice(acct_list)) * max((randrange(0, 10001, 1) - 9999), 0) if rel <> 0: row.append(rel) row.append(choice(Related_Type)) else: row.append('') row.append('') else: row.append('') row.append('') #Randomly generates account start date party_start = gen_data.create_date(past=True) #Randomly selects consent option for sharing info Consent_Share = choice(Yes_No_Consent) #Add additional data elements to current csv row row.extend([choice(Party_Type),choice(Party_Relation),party_start,gen_data.create_date(past=True),\ lrg_cash_ex,demarket,dem_date,randrange(0,100,1),choice(Official_Lang)]) #Add data element preferred methond of contact for yes to share info...if not then blank to current row if Consent_Share == 'Yes': row.extend(['Yes', choice(Preffered_Channel)]) else: row.extend(['No', '']) #DO NOT USE CUST STATUS BELOW - NOT INTEGRATED WITH CLOSED STATUS! Add additional data elements to current csv row row.extend([zip, randrange(0, 5, 1)]) #Generates Segment ID then adds additional Segment data based on the selection to the current csv row Segment_ID = randrange(0, 5, 1) if Segment_ID == 0: row.extend( ['01', 'LOB Specific', 'IRRI', 'Group 1', 'High Risk Tier', '200']) if Segment_ID == 1: row.extend([ '02', 'Profitability', 'CRS Risk Score', 'Group 1', 'Mid Risk Tier', '300' ]) if Segment_ID == 2: row.extend([ '03', 'Geographical', 'Geo Risk', 'Group 2', 'Low Risk Tier', '400' ]) if Segment_ID == 3: row.extend([ '04', 'Behavioral', 'Financial Behavior Risk', 'Group 3', 'Vertical Risk', '100' ]) if Segment_ID == 4: row.extend([ '05', 'Risk Tolerance', 'CM Risk', 'Group 4', 'Geographical Risk', '500' ]) #Arms Manufacturer random choice hr0 = choice(Yes_No_Cust_Flag) #Auction random choice hr01 = choice(Yes_No_Cust_Flag) #Cash Intensive Business random choice hr02 = choice(Yes_No_Cust_Flag) #Casino Gambling random choice hr03 = choice(Yes_No_Cust_Flag) #Channel Onboarding random choice hr04 = choice(Channel_Onboarding) #Channel Ongoing Transactions random choice hr05 = choice(Channel_Ongoing_Transactions) #Add additional data elements to current csv row row.extend([hr0, hr01, hr02, hr03, hr04, hr05]) #Randomly select whther customer has a High Net Worth HighNetWorthFlag = choice(HighNetWorth) #Randomly Generates customer net worth based on the above flag if HighNetWorthFlag == 'Yes': row.append( max( max((randrange(0, 101, 1) - 99), 0) * randrange(5000000, 25000000, 1), randrange(1000000, 5000000, 1))) else: flag = choice(LowNet) if flag == 0: row.append(randrange(-250000, 600000, 1)) else: if flag == 1: row.append(randrange(149000, 151000, 1)) else: row.append(randrange(40000, 50000, 1)) #Add data elements to current csv row #Complex_HI_Vehicle random choice hr1 = choice(Yes_No_Cust_Flag) #Dealer_Precious_Metal random choice hr2 = choice(Yes_No_Cust_Flag) #Digital_PM_Operator random choice hr3 = choice(Yes_No_Cust_Flag) #Embassy_Consulate random choice hr4 = choice(Yes_No_Cust_Flag) #Exchange_Currency random choice hr5 = choice(Yes_No_Cust_Flag) #Foreign_Financial_Institution random choice hr6 = choice(Yes_No_Cust_Flag) #Foreign_Government random choice hr7 = choice(Yes_No_Cust_Flag) #Foreign_NonBank_Financial_Institution random choice hr8 = choice(Yes_No_Cust_Flag) #Internet_Gambling random choice hr9 = choice(Yes_No_Cust_Flag) #Medical_Marijuana_Dispensary random choice hr10 = choice(Yes_No_Cust_Flag) #Money_Service_Business random choice hr11 = choice(Yes_No_Cust_Flag) hr12 = choice(NAICS.NAICS_Code) #NonRegulated_Financial_Institution random choice hr13 = choice(Yes_No_Cust_Flag) #Not_Profit random choice hr14 = choice(Yes_No_Cust_Flag) #Occupation random choice #hr15=choice(Occupation) #Privately_ATM_Operator random choice hr16 = choice(Yes_No_Cust_Flag) #Products random choice hr17 = choice(Products) #Sales_Used_Vehicles random choice hr18 = choice(Yes_No_Cust_Flag) #Services random choice hr19 = choice(Services) #SIC_Code random choice hr20 = choice(SIC_Code) #Stock_Market_Listing random choice hr21 = choice(Stock_Market_Listing) #Third_Party_Payment_Processor random choice hr22 = choice(Yes_No_Cust_Flag) #Transacting_Provider random choice hr23 = choice(Yes_No_Cust_Flag) refrating = ['1'] * 3 + ['2', '4'] * 2 + ['3'] + ['5'] * 12 if (PEP == 'Yes' or SAR == 'Yes' or lrg_cash_ex == 'Yes' or demarket == 'Yes' or hr0 == 'Yes' or hr01 == 'Yes' or hr02 == 'Yes' or hr03 == 'Yes' or hr1 == 'Yes' or hr2 == 'Yes' or hr3 == 'Yes' or hr4 == 'Yes' or hr5 == 'Yes' or hr6 == 'Yes' or hr7 == 'Yes' or hr8 == 'Yes' or hr9 == 'Yes' or hr10 == 'Yes' or hr11 == 'Yes' or hr13 == 'Yes' or hr14 == 'Yes' or hr16 == 'Yes' or hr17 == 'Yes' or hr18 == 'Yes' or hr22 == 'Yes' or hr23 == 'Yes' or HighNetWorthFlag == 'Yes'): high_risk = 'Yes' hr_rating = choice(refrating) if (high_risk == 'No'): if (max((randrange(0, 101, 1) - 99), 0) == 1): high_risk = 'Yes' hr_rating = choice(refrating) row.extend([ hr1, hr2, hr3, hr4, hr5, hr6, hr7, hr8, hr9, hr10, hr11, hr12, hr13, hr14, hr16, hr17, hr18, hr19, hr20, hr21, hr22, hr23, HighNetWorthFlag, high_risk, hr_rating, choice(Use_Case) ]) #End the current row return row