def generate_accounts(amount): known_accounts = [] for person in range(0, amount): accounts = [] first_name, last_name = gen_data.create_name() zip, city, state = gen_data.create_city_state_zip() address_id = str(random.randint(0, sys.maxint)) UID = str(random.randint(0, sys.maxint)) birth_day = gen_data.create_birthday() street_address = gen_data.create_street() email_address = gen_data.create_email(name=(first_name, last_name)) print_instance(get_random_uuid_uri(), "nco:PersonContact") print_property("nco:fullname", str.join(" ", [first_name, last_name])) print_property("nco:nameGiven", first_name) print_property("nco:nameFamily", last_name) for j in range(0, random.randint(0, 4)): account_data = get_random_in_list(ACCOUNTS) user_account = str.join("", [account_data[2], str(j), email_address]) print_property("nco:hasIMAccount", user_account, t="uri") accounts.append((user_account, account_data)) known_accounts.insert(0, user_account) print_property("nco:birthDate", str(birth_day), final=True) return known_accounts
def generate_accounts(amount): known_accounts = [] for person in range(0, amount): accounts = [] first_name, last_name = gen_data.create_name() zip, city, state = gen_data.create_city_state_zip() address_id = str(random.randint(0, sys.maxint)) UID = str(random.randint(0, sys.maxint)) birth_day = gen_data.create_birthday() street_address = gen_data.create_street() email_address = gen_data.create_email(name=(first_name, last_name)) print_instance(get_random_uuid_uri(), "nco:PersonContact") print_property("nco:fullname", str.join(' ', [first_name, last_name])) print_property("nco:nameGiven", first_name) print_property("nco:nameFamily", last_name) for j in range(0, random.randint(0, 4)): account_data = get_random_in_list(ACCOUNTS) user_account = str.join( '', [account_data[2], str(j), email_address]) print_property("nco:hasIMAccount", user_account, t="uri") accounts.append((user_account, account_data)) known_accounts.insert(0, user_account) print_property("nco:birthDate", str(birth_day), final=True) return known_accounts
def generate_vCard(): gender_initial = gender_vcard_list[random.randint(0, 4)] gender = None if gender_initial == 'M': gender = 'Male' elif gender_initial == 'F': gender = 'Female' (first_name, last_name) = gen_data.create_name(gender=gender) adr = gen_data.create_street() zip, city, state = gen_data.create_city_state_zip() properties = [] properties.append('FN:{} {}\r\n'.format(first_name, last_name)) if random.randint(0, 1): properties.append('N:{};{};;;\r\n'.format(last_name, first_name)) if random.randint(0, 1): properties.append('TEL:tel:{}\r\n'.format(gen_data.create_phone())) if random.randint(0, 1): properties.append('GENDER:{}\r\n'.format(gender_initial)) if random.randint(0, 1): properties.append('EMAIL:{}\r\n'.format( gen_data.create_email(name=(first_name, last_name)).lower())) if random.randint(0, 1): properties.append('IMPP:sip:{}@{}\r\n'.format(first_name.lower(), 'sip.linphone.org')) if random.randint(0, 1): properties.append('ADR:;;{};{};{};{};\r\n'.format( adr, city, state, zip)) if random.randint(0, 1): properties.append('NOTE:{}\r\n'.format(gen_data.create_sentence())) if random.randint(0, 1): properties.append('ORG:{}\r\n'.format(gen_data.create_company_name())) if random.randint(0, 1): properties.append('BDAY:{0:%Y%m%d}\r\n'.format( gen_data.create_birthday())) shuffle(properties) vCard = 'BEGIN:VCARD\r\n' vCard += 'VERSION:4.0\r\n' for property in properties: vCard += property vCard += 'END:VCARD\r\n' return vCard
def generate_vCard(): gender_initial = gender_vcard_list[random.randint(0, 4)] gender = None if gender_initial == 'M': gender = 'Male' elif gender_initial == 'F': gender = 'Female' (first_name, last_name) = gen_data.create_name(gender=gender) adr = gen_data.create_street() zip, city, state = gen_data.create_city_state_zip() properties = [] properties.append('FN:{} {}\r\n'.format(first_name, last_name)) if random.randint(0, 1): properties.append('N:{};{};;;\r\n'.format(last_name, first_name)) if random.randint(0, 1): properties.append('TEL:tel:{}\r\n'.format(gen_data.create_phone())) if random.randint(0, 1): properties.append('GENDER:{}\r\n'.format(gender_initial)) if random.randint(0, 1): properties.append('EMAIL:{}\r\n'.format(gen_data.create_email(name=(first_name, last_name)).lower())) if random.randint(0, 1): properties.append('IMPP:sip:{}@{}\r\n'.format(first_name.lower(), 'sip.linphone.org')) if random.randint(0, 1): properties.append('ADR:;;{};{};{};{};\r\n'.format(adr, city, state, zip)) if random.randint(0, 1): properties.append('NOTE:{}\r\n'.format(gen_data.create_sentence())) if random.randint(0, 1): properties.append('ORG:{}\r\n'.format(gen_data.create_company_name())) if random.randint(0, 1): properties.append('BDAY:{0:%Y%m%d}\r\n'.format(gen_data.create_birthday())) shuffle(properties) vCard = 'BEGIN:VCARD\r\n' vCard += 'VERSION:4.0\r\n' for property in properties: vCard += property vCard += 'END:VCARD\r\n' return vCard
def get_rows(): i=501 #line = input("Enter a row (python dict) into the table: ") while i < 1000: fake = Faker() #Pick an account number and store it in acct #if the account hasn't been already generated then generate a record with all fields i=i+1 line = "{'rownum':"+str(i)+",'dunno':"+str(10)+",'CC':"+str(gen_data.cc_number())+",'Employer':"+str(gen_data.create_company_name())+\ ",'Custemail':"+str(gen_data.create_email())+",'name':"+\ str(gen_data.create_name())+",'occupation':"+str(gen_data.create_job_title())+",'address_street':"+\ str(gen_data.create_city_state_zip())+",'DOB':"+str(gen_data.create_birthday(min_age=2, max_age=85))+\ ",'previous_address_city_state_zip':"+str(gen_data.create_city_state_zip())+",'altcustomer_name':"+str(fake.name())+\ ",'altcustomer_occupation':"+str(gen_data.create_job_title())+",'altcustomer_dob':"+str(gen_data.create_birthday(min_age=2, max_age=85))+\ ",'ssn':"+str((randrange(101,1000,1),randrange(10,100,1),randrange(1000,10000,1)))+",'phone':"+\ str((randrange(101,1000,1),randrange(101,999,1),randrange(1000,10000,1)))+ \ ",'AccountID':"+str(randrange(100000,100000000,1))+",'PepFlag':"+str(max((randrange(0,101,1)-99,0)))+",'altcustomerssn':"+\ str((randrange(101,1000,1),randrange(10,100,1),randrange(1000,10000,1)))+",'demarketed_customer_flag':"+\ str(max((randrange(0,101,1)-99),0))+\ ",'SAR_flag':"+str(max((randrange(0,101,1)-99),0))+",'nolonger_a_customer':"+str(max((randrange(0,101,1)-99),0))+\ ",'closed_account'"+str(max((randrange(0,101,1)-90),0))+",'High_risk_flag':"+str(max((randrange(0,101,1)-99),0))+\ ",'Risk_rating':"+str(max((randrange(0,101,1)-99),0))+"}" yield ast.literal_eval(line)
def get_email_address(): return gen_data.create_email(tld="co.uk")
def createCusts(N): #List for client whose net worth is over $500K HighNetWorth = ['Yes'] + ['No'] * 30 #List for type of account Related_Type = ['Primary','Secondary','Joint'] #List for how the account was opened Party_Type = ['Person','Non-Person'] #List for a BMO customer Party_Relation = ['Customer','Non-Customer'] #List for random Yes/No Flag Yes_No = ['Yes'] + ['No'] * 12 #List for random Yes/No Consent Yes_No_Consent = ['Yes'] + ['No'] * 4 #List for equal Yes/No Flag Yes_No_50 = ['Yes','No'] #List for official language Official_Lang = ['English'] * 3 + ['French'] #List for method of communication Preffered_Channel = ['Direct Mail','Telemarketing','Email','SMS'] #List for status of customer #Customer_Status = ['Prospect','Inactive Customer','Past Customer'] + ['Active Customer'] * 56 #List for LOB Segment Type Seg_Model_Type = ['LOB Specific','Profitability','Geographical','Behavioral','Risk Tolerance'] #List for Model ID Model_ID = ['01','02','03','04','05'] #List for Model Name Seg_Model_Name = ['IRRI', 'CRS Risk Score','Geo Risk','Financial Behavior Risk','CM Risk'] #List for Model Score Seg_Model_Score = ['200','300','400','100','500'] #List for Model Group Seg_Model_Group = ['Group 1'] * 2 + ['Group 2','Group 3','Group 4'] #List for Model Description Seg_Model_Description = ['High Risk Tier','Mid Risk Tier','Low Risk Tier','Vertical Risk','Geographical Risk'] #List for random Arms Dealer flag Arms_Manufacturer=['Yes'] + ['No'] * 2 + [''] * 392 #List for random auction flag Auction=['Yes'] + ['No'] * 2 + [''] * 392 #List for random Cash Intensive flag CashIntensive_Business=['Yes'] + ['No'] * 2 + [''] * 392 #List for random Casino?Gaming flag Casino_Gambling=['Yes'] + ['No'] * 2 + [''] * 392 #List for random Client Onboarding flag Channel_Onboarding=['E-mail','In Person','In person - In Branch/Bank Office','In person - Offsite/Client Location','Mail','Online','Phone','Request for Proposal (RFP)'] + ['Not Applicable'] * 10 #List for random Transaction flag Channel_Ongoing_Transactions=['ATM','E-mail','Fax','Mail','Not Applicable','OTC Communication System','Phone'] + ['Online'] * 4 + ['In Person'] * 31 #List for random HI_Vehicle flag Complex_HI_Vehicle=['Yes'] + ['No'] * 2 + [''] * 392 #List for random Metals flag Dealer_Precious_Metal=['Yes'] + ['No'] * 2 + [''] * 392 #List for random Arms Dealer flag Digital_PM_Operator=['Yes'] + ['No'] * 2 + [''] * 392 #List for random Embassy flag Embassy_Consulate=['Yes'] + ['No'] * 2 + [''] * 392 #Sets variable to Embassy flag Exchange_Currency=Embassy_Consulate #Sets variable to Embassy flag Foreign_Financial_Institution=Embassy_Consulate #Sets variable to Embassy flag Foreign_Government=Embassy_Consulate #Sets variable to Embassy flag Foreign_NonBank_Financial_Institution=Embassy_Consulate #Sets variable to Embassy flag Internet_Gambling=Embassy_Consulate #Sets variable to Embassy flag Medical_Marijuana_Dispensary=Embassy_Consulate #Sets variable to Embassy flag Money_Service_Business=Embassy_Consulate #Sets variable to Embassy flag NonRegulated_Financial_Institution=Embassy_Consulate #Sets variable to Embassy flag Not_Profit=Embassy_Consulate #List for random occupation Occupation=['11-1011 Chief Executives',\ '11-3011 Administrative Services Managers',\ '11-3031 Financial Managers',\ '11-3061 Purchasing Managers',\ '13-1011 Agents and Business Managers of Artists Performers and Athletes',\ '13-1031 Claims Adjusters Examiners, and Investigators',\ '13-1199 Business Operations Specialists, All Other',\ '13-2099 Financial Specialists All Other',\ '17-1011 Architects Except Landscape and Naval',\ '23-1011 Lawyers',\ '23-1023 Judges, Magistrate Judges and Magistrates',\ '25-2012 Kindergarten Teachers Except Special Education',\ '25-2021 Elementary School Teachers Except Special Education',\ '29-1041 Optometrists',\ '29-2054 Respiratory Therapy Technicians',\ '33-2011 Firefighters',\ '37-1012 First-Line Supervisors of Landscaping Lawn Service and Groundskeeping Workers',\ '39-1011 Gaming Supervisors',\ '39-2011 Animal Trainers',\ '41-1011 First-Line Supervisors of Retail Sales Workers',\ '41-1012 First-Line Supervisors of Non-Retail Sales Workers',\ '41-2011 Cashiers',\ '41-2031 Retail Salespersons',\ '43-3021 Billing and Posting Clerks',\ '45-1011 First-Line Supervisors of Farming, Fishing, and Forestry Workers',\ '49-2011 Computer Automated Teller and Office Machine Repairers',\ '53-3021 Bus Drivers Transit and Intercity',\ '53-4031 Railroad Conductors and Yardmasters',\ '55-1011 Air Crew Officers',\ '55-1012 Aircraft Launch and Recovery Officers',\ '55-1013 Armored Assault Vehicle Officers',\ ] #Sets variable to Embassy flag Privately_ATM_Operator=Embassy_Consulate #List for random products Products=['Certificate of Deposit',\ 'Checking Account',\ 'Credit Card',\ 'Custodial and Investment Agency - Institutional',\ 'Custodial and Investment Agency - Personal',\ 'Custodial/Trust Outsourcing Services (BTOS)',\ 'Custody Accounts (PTIM)',\ 'Custody Accounts (RSTC)',\ 'DTF (BHFA)',\ 'Investment Agency - Personal',\ 'Investment Management Account (PTIM)',\ 'Lease',\ 'Loan / Letter of Credit',\ 'Money Market',\ 'Mortgage / Bond / Debentures',\ 'None',\ 'Savings Account',\ 'Trust Administration - Irrevocable and Revocable (PTIM)',\ 'Trust Administration - Irrevocable and Revocable Trusts (BDTC)',\ ] + ['Nondeposit Investment Products'] * 14 + ['Investment Agency - Institutional'] * 5 #Sets variable to Embassy flag Sales_Used_Vehicles=Embassy_Consulate #Dictionary for random Services Services=['Benefit Payment Services',\ 'Domestic Wires and Direct Deposit / ACH',\ 'Family Office Services (FOS)',\ 'Fiduciary Services',\ 'International Wires and IAT',\ 'Investment Advisory Services (IAS)',\ 'Investment Services',\ 'None',\ 'Online / Mobile Banking',\ 'Payroll',\ 'Short Term Cash Management',\ 'Trust Services',\ 'Trustee Services',\ 'Vault Cash Services',\ ] + ['Financial Planning'] * 6 + ['Retirement Plans'] * 19 #Dictionary for random SIC_Code SIC_Code=['6021 National Commercial Banks',\ '6211 Security Brokers Dealers and Flotation Companies',\ '6282 Investment Advice',\ '6311 Life Insurance',\ '6733 Trusts Except Educational Religious and Charitable',\ '8999 Services NEC',\ ] + ['6722 Management Investment Offices Open-End'] * 12 #Dictionary for random Market Listing Stock_Market_Listing=['Australian Stock Exchange',\ 'Brussels Stock Exchange',\ 'Montreal Stock Exchange',\ 'Tiers 1 and 2 of the TSX Venture Exchange (also known as Tiers 1 and 2 of the Canadian Venture Exchange)',\ 'Toronto Stock Exchange',\ ] + ['Not Found'] * 30 #Sets variable to Embassy flag Third_Party_Payment_Processor=Embassy_Consulate #Sets variable to Embassy flag Transacting_Provider=Embassy_Consulate #Dictionary for random Low Net Worth LowNet=[1,2] + [0] * 5 #Dictionary for Consumer vs Business Acct_Type = ['B'] + ['C'] * 5 #Dictionary for random number of credits cards per account Number_CC = [1] * 7 + [2] * 11 + [3] * 3 + [4] #Dictionary for Account list set to blank acct_list=[] #Dictionary for CreditCard list set to blank CC_list = [] #Dictionary for random Wolfsberg scenario Use_Case = [1,4,7,10,13,16,19,22,25,28,31,34,39] * 4 + [2,5,8,11,14,17,20,23,26,29,32,35,38] * 7 + [3,6,9,12,15,18,21,24,27,30,33,36] * 65 + [37] * 73 + [40,41] * 2 refrating = ['1','1','1','2','3','4','2','4','5','5','5','5','5','5','5','5','5','5','5','5'] fake = Faker() global liSSNMaster start=10786147 acct_list=[] liCSV = [] for i in xrange(N): #Initiate High Risk Flags #Politically Exposed Person PEP='No' #Customer with a Suspicous Activity Report SAR='No' #Customer with a closed account Clsd='No' #High risk customer flag high_risk='No' #High Risk Rating hr_rating='' #Customer that was demarketed by the bank demarket='No' dem_date='' #generate closed acct flag if (max((randrange(0,98,1)-96),0)==1): Clsd='Yes' #Random choice for number of credit card users per account number No_CCs = random.choice(Number_CC) #Generate account number acct=start+1+randrange(1,10,1) start=acct #Randomly generate customer name + middle name in tmp name = fake.name() tmp=gen_data.create_name() #Adds account number to account dictionary acct_list.extend([acct]) #Creates a new row and adds data elements row = [i]+[acct]+[random.choice(Acct_Type)]+[No_CCs]+[name]+[tmp[0]]+[liSSNMaster[i]] #Dictionary for names list set to blank names=[] #Dictionary for Social Security Number list set to blank ssn=[] #Middle Name to reduce name dups mdl=[] for j in range(No_CCs-1): names.insert(j,fake.name()) tmp2=gen_data.create_name() mdl.insert(j,tmp2[0]) ##Pull from SSN Master list randInt = randrange(1,len(liSSNMaster),1) if randInt != i: ssn.insert(j,liSSNMaster[randInt]) else: ssn.insert(j,liSSNMaster[randInt - 1]) #Name and SSN is set to blank if less than 4 customers on an account for k in range(4-No_CCs): names.insert(No_CCs+k,'') ssn.insert(No_CCs+k,'') mdl.insert(No_CCs,'') #Sets CC_NO to a random credit card number CC_NO=gen_data.cc_number() #Extract CC_Number from the tuple returned by CC_Number then scramble to ensure uniqueness...Tuple contains CC Number and Type CC_TRANS=CC_NO[1][0] dt = str(datetime.now()) clean=re.sub('\W','',dt) printCC=str(CC_TRANS[-4:])+str(clean[-12:-3])+str(randrange(1111,9999,randrange(1,10,1))) #Add data elements to current csv row row.extend([names[0],mdl[0],ssn[0],names[1],mdl[1],ssn[1],names[2],mdl[2],ssn[2],printCC,CC_NO[0],gen_data.create_company_name()+' '+tmp[1],\ gen_data.create_email(),gen_data.create_job_title()]) #Create Current Address zip=random.choice(zips.zip) addr=geo_data.create_city_state_zip[zip] #Create Previous address zip2=random.choice(zips.zip) addr2=geo_data.create_city_state_zip[zip2] #Add additional data elements to current csv row lrg_cash_ex=random.choice(Yes_No) #Condition for SARs and Demarketed Clients if(Clsd=='Yes'): #1% of closed accounts are demarketed but never had a SAR filed if (max((randrange(0,101,1)-99),0)==1 and SAR=='No'): demarket='Yes' dem_date=gen_data.create_date(past=True) if (max((randrange(0,11,1)-9),0)==1 and demarket=='No'): #10% of closed accounts have SARs SAR='Yes' #90% of closed accounts with SARs are demarketed if(max((randrange(0,11,1)-9),0)==0): demarket='Yes' dem_date=gen_data.create_date(past=True) if (max((randrange(0,101,1)-99),0)==1): PEP='Yes' row.extend([addr[0],addr[1],zip,'US',addr2[0],addr2[1],zip2,'US',gen_data.create_birthday(min_age=2, max_age=85),PEP,SAR,Clsd]) #Start Generating related accounts from account list once 10,000 accounts are generated - to avoid duplicating accounts in the beginning if i > 10000: rel = int(random.choice(acct_list))*max((randrange(0,10001,1)-9999),0) if rel <> 0: row.append(rel) row.append(random.choice(Related_Type)) else: row.append('') row.append('') else: row.append('') row.append('') #Randomly generates account start date party_start=gen_data.create_date(past=True) #Randomly selects consent option for sharing info Consent_Share = random.choice(Yes_No_Consent) #Add additional data elements to current csv row row.extend([random.choice(Party_Type),random.choice(Party_Relation),party_start,gen_data.create_date(past=True),\ lrg_cash_ex,demarket,dem_date,randrange(0,100,1),random.choice(Official_Lang)]) #Add data element preferred methond of contact for yes to share info...if not then blank to current row if Consent_Share == 'Yes': row.extend(['Yes',random.choice(Preffered_Channel)]) else: row.extend(['No','']) row.extend([zip,randrange(0,5,1)]) #Generate Segment ID then add additional Segment data based on the selection to the current csv row Segment_ID = randrange(0,5,1)%5 if Segment_ID == 0: row.extend([Model_ID[0],Seg_Model_Type[0],Seg_Model_Name[0],Seg_Model_Group[0],Seg_Model_Description[0],Seg_Model_Score[0]]) if Segment_ID == 1: row.extend([Model_ID[1],Seg_Model_Type[1],Seg_Model_Name[1],Seg_Model_Group[1],Seg_Model_Description[1],Seg_Model_Score[1]]) if Segment_ID == 2: row.extend([Model_ID[2],Seg_Model_Type[2],Seg_Model_Name[2],Seg_Model_Group[2],Seg_Model_Description[2],Seg_Model_Score[2]]) if Segment_ID == 3: row.extend([Model_ID[3],Seg_Model_Type[3],Seg_Model_Name[3],Seg_Model_Group[3],Seg_Model_Description[3],Seg_Model_Score[3]]) if Segment_ID == 4: row.extend([Model_ID[4],Seg_Model_Type[4],Seg_Model_Name[4],Seg_Model_Group[4],Seg_Model_Description[4],Seg_Model_Score[4]]) #Add additional data elements to current csv row hr0=random.choice(Arms_Manufacturer) hr01=random.choice(Auction) hr02=random.choice(CashIntensive_Business) hr03=random.choice(Casino_Gambling) hr04=random.choice(Channel_Onboarding) hr05=random.choice(Channel_Ongoing_Transactions) row.extend([hr0,hr01,hr02,hr03,hr04,hr05]) #Randomly select whether customer has a High Net Worth HighNetWorthFlag = random.choice(HighNetWorth) #Randomly Generate customer net worth based on the above flag if HighNetWorthFlag == 'Yes': row.append(max(max((randrange(0,101,1)-99),0)*randrange(1000000,25000000,1),randrange(1000000,5000000,1))) else: flag=random.choice(LowNet) if flag==0: row.append(randrange(-250000,600000,1)) else: if flag==1: row.append(randrange(149000,151000,1)) else: row.append(randrange(40000,50000,1)) #Add data elements to current csv row hr1=random.choice(Complex_HI_Vehicle) hr2=random.choice(Dealer_Precious_Metal) hr3=random.choice(Digital_PM_Operator) hr4=random.choice(Embassy_Consulate) hr5=random.choice(Exchange_Currency) hr6=random.choice(Foreign_Financial_Institution) hr7=random.choice(Foreign_Government) hr8=random.choice(Foreign_NonBank_Financial_Institution) hr9=random.choice(Internet_Gambling) hr10=random.choice(Medical_Marijuana_Dispensary) hr11=random.choice(Money_Service_Business) hr12=random.choice(NAICS.NAICS_Code) hr13=random.choice(NonRegulated_Financial_Institution) hr14=random.choice(Not_Profit) #hr15=random.choice(Occupation) - added before through gen_data hr16=random.choice(Privately_ATM_Operator) hr17=random.choice(Products) hr18=random.choice(Sales_Used_Vehicles) hr19=random.choice(Services) hr20=random.choice(SIC_Code) hr21=random.choice(Stock_Market_Listing) hr22=random.choice(Third_Party_Payment_Processor) hr23=random.choice(Transacting_Provider) if(PEP=='Yes' or SAR=='Yes' or lrg_cash_ex=='Yes' or demarket=='Yes' or hr0=='Yes' or hr01=='Yes' or hr02=='Yes' or hr03=='Yes' or hr1=='Yes' or hr2=='Yes' or hr3=='Yes' or hr4=='Yes' or hr5=='Yes' or hr6=='Yes' or hr7=='Yes' or hr8=='Yes' or hr9=='Yes' or hr10=='Yes' or hr11=='Yes' or hr13=='Yes' or hr14=='Yes' or hr16=='Yes' or hr17=='Yes' or hr18=='Yes' or hr22=='Yes' or hr23=='Yes' or HighNetWorthFlag=='Yes'): high_risk='Yes' hr_rating=random.choice(refrating) if(SAR=='No' and high_risk=='No'): if(max((randrange(0,101,1)-99),0)==1): high_risk='Yes' hr_rating=random.choice(refrating) if(PEP=='No' and high_risk=='No'): if(max((randrange(0,101,1)-99),0)==1): high_risk='Yes' hr_rating=random.choice(refrating) if(high_risk=='No'): if(max((randrange(0,101,1)-99),0)==1): high_risk='Yes' hr_rating=random.choice(refrating) row.extend([hr1,hr2,hr3,hr4,hr5,hr6,hr7,hr8,hr9,hr10,hr11,hr12,hr13,hr14,hr16,hr17,hr18,hr19,hr20,hr21,hr22,hr23, HighNetWorthFlag,high_risk,hr_rating,random.choice(Use_Case)]) liCSV.append(row) return liCSV
f1, delimiter=',', lineterminator='\n', ) writer.writerow(['rownum'] +['dunno'] + ['CC'] + ['Employer'] + ['Custemail'] + ['name'] \ + ['occupation'] + ['address_street'] + ['DOB']+['previous address_city_state_zip']+ ['altcustomer_name'] \ + ['altcustomer_occupation'] + ['altcustomer_dob'] + ['ssn'] + ['phone'] + \ ['AccountID'] + ['PepFlag'] + ['altcustomerssn'] + ['demarketed_customer_flag'] + \ ['SAR_flag'] + ['nolonger_a_customer'] + ['closed_account'] +['High_risk_flag'] +['Risk_rating']) while i < 50000000: #Pick an account number and store it in acct acct = randrange(100000, 100000000, 1) #if the account hasn't been already generated then generate a record with all fields if d.has_key(str(acct)) == False: row = [i] + [10] + [gen_data.cc_number()]+[gen_data.create_company_name()] + \ [gen_data.create_email()]+[gen_data.create_name()] +[gen_data.create_job_title()] + \ [gen_data.create_city_state_zip()] + [gen_data.create_birthday(min_age=2, max_age=85)] + \ [gen_data.create_city_state_zip()] + [fake.name()] + [gen_data.create_job_title()] + \ [gen_data.create_birthday(min_age=2, max_age=85)] +\ [(randrange(101,1000,1),randrange(10,100,1),randrange(1000,10000,1))] + \ [(randrange(101,1000,1),randrange(101,999,1),randrange(1000,10000,1))] + \ [acct] + \ [max((randrange(0,101,1)-99),0)] + \ [(randrange(101,1000,1),randrange(10,100,1),randrange(1000,10000,1))] + \ [max((randrange(0,101,1)-99),0)] + [max((randrange(0,101,1)-99),0)] + \ [max((randrange(0,101,1)-99),0)] + [max((randrange(0,101,1)-90),0)] + \ [max((randrange(0,101,1)-99),0)] + [max((randrange(0,101,1)-99),0)] d[str(acct)] = acct i = i + 1 writer.writerow(row)
# import the pandas library import pandas as pd # impor the barnum library from barnum import gen_data # Create an empty list to store users users = [] # Create 1000 records for i in range(1000): company = gen_data.create_company_name() fname = gen_data.create_name(full_name=False) lname = gen_data.create_name(full_name=False) title = gen_data.create_job_title() email = gen_data.create_email(name=(fname, lname)) pw = gen_data.create_pw() street = gen_data.create_street() city_state_zip = gen_data.create_city_state_zip() cc = gen_data.create_cc_number() # append a new user to the users list users.append( (company, fname, lname, title, email, pw, street, city_state_zip, cc)) # Create a set of labels for the first row of the excel spreadsheet labels = [ 'Company', 'First', 'Last', 'Title', 'Email', 'Password', 'Street', 'City/State/ZIP', 'Credit Card' ] # Create a pandas dataframe df = pd.DataFrame(data=users, columns=labels)
def sqlrun(column, update): for i, d in zip(listId, column): cursor.execute(update % (table, d, i)) def sqlmpesa(col1, col2, col3, col4, col5, update): for i, d, e, f, g, h in zip(listId, col1, col2, col3, col4, col5): cursor.execute(update % (table, d, e, f, g, h, i)) # alias faker package fake = Faker() #fakemail = [fake.email() for _ in range(ranrange)] fakename = [fake.name() for _ in range(ranrange)] fakemail = [gen_data.create_email() for _ in range(ranrange)] # generate 12 digit numberx§ phone = [random.randint(0, 999999999999) for _ in range(ranrange)] phonere = "UPDATE %s SET phone='%s' WHERE id = '%s';" emailre = "UPDATE %s SET email='%s' WHERE id = '%s';" namere = "UPDATE %s SET name='%s' WHERE id = '%s';" mpesatb = "UPDATE %s SET receipt_no='%s', details='%s', other_party_info='%s', ac_no='%s', operator='%s' WHERE id = '%s';" altad = "ALTER TABLE `admins` AUTO_INCREMENT = 0" altch = "ALTER TABLE `admins` AUTO_INCREMENT = 1" altu = "ALTER TABLE `admins` AUTO_INCREMENT = 2" adadmin = "INSERT INTO `admins` (`id`,`name`, `email`, `phone`, `password`, `account_id`) VALUES (%s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE `id`=`id`+100" adadm = "INSERT INTO `admins` (`id`,`name`, `email`, `phone`, `password`, `account_id`) VALUES (%s, %s, %s, %s, %s, %s)" #Check if there is there uniques TO BE REMOVED #print ([k for k,v in Counter(fakemail).items() if v>1])
import random import article_collection_pb2 from barnum import gen_data """ Random generated data for demo """ names = [gen_data.create_name() for _ in range(0, 15)] emails = [gen_data.create_email() for _ in range(0, 15)] titles = [gen_data.create_nouns() for _ in range(0, 15)] contents = [gen_data.create_paragraphs(8) for _ in range(0, 15)] articles = [] """ Construct articles data """ for title in titles: content = random.choice(contents) name = random.choice(names) email = random.choice(emails) articles.append({ "id": random.randint(10010, 20020), "title": title, "snippet": content[0:100], "content": content, "isFeatured": random.choice([False, True]), "topics": random.sample([0, 1, 2, 3, 4], 3), "author": { "id": random.randint(10010, 20020), "name": name[0] + " " + name[1], "email": email } })
def createCusts(N): #List for client whose net worth is over $500K HighNetWorth = ['Yes'] + ['No'] * 30 #List for type of account Related_Type = ['Primary', 'Secondary', 'Joint'] #List for how the account was opened Party_Type = ['Person', 'Non-Person'] #List for a BMO customer Party_Relation = ['Customer', 'Non-Customer'] #List for random Yes/No Flag Yes_No = ['Yes'] + ['No'] * 12 #List for random Yes/No Consent Yes_No_Consent = ['Yes'] + ['No'] * 4 #List for equal Yes/No Flag Yes_No_50 = ['Yes', 'No'] #List for official language Official_Lang = ['English'] * 3 + ['French'] #List for method of communication Preffered_Channel = ['Direct Mail', 'Telemarketing', 'Email', 'SMS'] #List for status of customer #Customer_Status = ['Prospect','Inactive Customer','Past Customer'] + ['Active Customer'] * 56 #List for LOB Segment Type Seg_Model_Type = [ 'LOB Specific', 'Profitability', 'Geographical', 'Behavioral', 'Risk Tolerance' ] #List for Model ID Model_ID = ['01', '02', '03', '04', '05'] #List for Model Name Seg_Model_Name = [ 'IRRI', 'CRS Risk Score', 'Geo Risk', 'Financial Behavior Risk', 'CM Risk' ] #List for Model Score Seg_Model_Score = ['200', '300', '400', '100', '500'] #List for Model Group Seg_Model_Group = ['Group 1'] * 2 + ['Group 2', 'Group 3', 'Group 4'] #List for Model Description Seg_Model_Description = [ 'High Risk Tier', 'Mid Risk Tier', 'Low Risk Tier', 'Vertical Risk', 'Geographical Risk' ] #List for random Arms Dealer flag Arms_Manufacturer = ['Yes'] + ['No'] * 2 + [''] * 392 #List for random auction flag Auction = ['Yes'] + ['No'] * 2 + [''] * 392 #List for random Cash Intensive flag CashIntensive_Business = ['Yes'] + ['No'] * 2 + [''] * 392 #List for random Casino?Gaming flag Casino_Gambling = ['Yes'] + ['No'] * 2 + [''] * 392 #List for random Client Onboarding flag Channel_Onboarding = [ 'E-mail', 'In Person', 'In person - In Branch/Bank Office', 'In person - Offsite/Client Location', 'Mail', 'Online', 'Phone', 'Request for Proposal (RFP)' ] + ['Not Applicable'] * 10 #List for random Transaction flag Channel_Ongoing_Transactions = [ 'ATM', 'E-mail', 'Fax', 'Mail', 'Not Applicable', 'OTC Communication System', 'Phone' ] + ['Online'] * 4 + ['In Person'] * 31 #List for random HI_Vehicle flag Complex_HI_Vehicle = ['Yes'] + ['No'] * 2 + [''] * 392 #List for random Metals flag Dealer_Precious_Metal = ['Yes'] + ['No'] * 2 + [''] * 392 #List for random Arms Dealer flag Digital_PM_Operator = ['Yes'] + ['No'] * 2 + [''] * 392 #List for random Embassy flag Embassy_Consulate = ['Yes'] + ['No'] * 2 + [''] * 392 #Sets variable to Embassy flag Exchange_Currency = Embassy_Consulate #Sets variable to Embassy flag Foreign_Financial_Institution = Embassy_Consulate #Sets variable to Embassy flag Foreign_Government = Embassy_Consulate #Sets variable to Embassy flag Foreign_NonBank_Financial_Institution = Embassy_Consulate #Sets variable to Embassy flag Internet_Gambling = Embassy_Consulate #Sets variable to Embassy flag Medical_Marijuana_Dispensary = Embassy_Consulate #Sets variable to Embassy flag Money_Service_Business = Embassy_Consulate #Sets variable to Embassy flag NonRegulated_Financial_Institution = Embassy_Consulate #Sets variable to Embassy flag Not_Profit = Embassy_Consulate #List for random occupation Occupation=['11-1011 Chief Executives',\ '11-3011 Administrative Services Managers',\ '11-3031 Financial Managers',\ '11-3061 Purchasing Managers',\ '13-1011 Agents and Business Managers of Artists Performers and Athletes',\ '13-1031 Claims Adjusters Examiners, and Investigators',\ '13-1199 Business Operations Specialists, All Other',\ '13-2099 Financial Specialists All Other',\ '17-1011 Architects Except Landscape and Naval',\ '23-1011 Lawyers',\ '23-1023 Judges, Magistrate Judges and Magistrates',\ '25-2012 Kindergarten Teachers Except Special Education',\ '25-2021 Elementary School Teachers Except Special Education',\ '29-1041 Optometrists',\ '29-2054 Respiratory Therapy Technicians',\ '33-2011 Firefighters',\ '37-1012 First-Line Supervisors of Landscaping Lawn Service and Groundskeeping Workers',\ '39-1011 Gaming Supervisors',\ '39-2011 Animal Trainers',\ '41-1011 First-Line Supervisors of Retail Sales Workers',\ '41-1012 First-Line Supervisors of Non-Retail Sales Workers',\ '41-2011 Cashiers',\ '41-2031 Retail Salespersons',\ '43-3021 Billing and Posting Clerks',\ '45-1011 First-Line Supervisors of Farming, Fishing, and Forestry Workers',\ '49-2011 Computer Automated Teller and Office Machine Repairers',\ '53-3021 Bus Drivers Transit and Intercity',\ '53-4031 Railroad Conductors and Yardmasters',\ '55-1011 Air Crew Officers',\ '55-1012 Aircraft Launch and Recovery Officers',\ '55-1013 Armored Assault Vehicle Officers',\ ] #Sets variable to Embassy flag Privately_ATM_Operator = Embassy_Consulate #List for random products Products=['Certificate of Deposit',\ 'Checking Account',\ 'Credit Card',\ 'Custodial and Investment Agency - Institutional',\ 'Custodial and Investment Agency - Personal',\ 'Custodial/Trust Outsourcing Services (BTOS)',\ 'Custody Accounts (PTIM)',\ 'Custody Accounts (RSTC)',\ 'DTF (BHFA)',\ 'Investment Agency - Personal',\ 'Investment Management Account (PTIM)',\ 'Lease',\ 'Loan / Letter of Credit',\ 'Money Market',\ 'Mortgage / Bond / Debentures',\ 'None',\ 'Savings Account',\ 'Trust Administration - Irrevocable and Revocable (PTIM)',\ 'Trust Administration - Irrevocable and Revocable Trusts (BDTC)',\ ] + ['Nondeposit Investment Products'] * 14 + ['Investment Agency - Institutional'] * 5 #Sets variable to Embassy flag Sales_Used_Vehicles = Embassy_Consulate #Dictionary for random Services Services=['Benefit Payment Services',\ 'Domestic Wires and Direct Deposit / ACH',\ 'Family Office Services (FOS)',\ 'Fiduciary Services',\ 'International Wires and IAT',\ 'Investment Advisory Services (IAS)',\ 'Investment Services',\ 'None',\ 'Online / Mobile Banking',\ 'Payroll',\ 'Short Term Cash Management',\ 'Trust Services',\ 'Trustee Services',\ 'Vault Cash Services',\ ] + ['Financial Planning'] * 6 + ['Retirement Plans'] * 19 #Dictionary for random SIC_Code SIC_Code=['6021 National Commercial Banks',\ '6211 Security Brokers Dealers and Flotation Companies',\ '6282 Investment Advice',\ '6311 Life Insurance',\ '6733 Trusts Except Educational Religious and Charitable',\ '8999 Services NEC',\ ] + ['6722 Management Investment Offices Open-End'] * 12 #Dictionary for random Market Listing Stock_Market_Listing=['Australian Stock Exchange',\ 'Brussels Stock Exchange',\ 'Montreal Stock Exchange',\ 'Tiers 1 and 2 of the TSX Venture Exchange (also known as Tiers 1 and 2 of the Canadian Venture Exchange)',\ 'Toronto Stock Exchange',\ ] + ['Not Found'] * 30 #Sets variable to Embassy flag Third_Party_Payment_Processor = Embassy_Consulate #Sets variable to Embassy flag Transacting_Provider = Embassy_Consulate #Dictionary for random Low Net Worth LowNet = [1, 2] + [0] * 5 #Dictionary for Consumer vs Business Acct_Type = ['B'] + ['C'] * 5 #Dictionary for random number of credits cards per account Number_CC = [1] * 7 + [2] * 11 + [3] * 3 + [4] #Dictionary for Account list set to blank acct_list = [] #Dictionary for CreditCard list set to blank CC_list = [] #Dictionary for random Wolfsberg scenario Use_Case = [1, 4, 7, 10, 13, 16, 19, 22, 25, 28, 31, 34, 39] * 4 + [ 2, 5, 8, 11, 14, 17, 20, 23, 26, 29, 32, 35, 38 ] * 7 + [3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 36 ] * 65 + [37] * 73 + [40, 41] * 2 refrating = [ '1', '1', '1', '2', '3', '4', '2', '4', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5' ] fake = Faker() global liSSNMaster start = 10786147 acct_list = [] liCSV = [] for i in xrange(N): #Initiate High Risk Flags #Politically Exposed Person PEP = 'No' #Customer with a Suspicous Activity Report SAR = 'No' #Customer with a closed account Clsd = 'No' #High risk customer flag high_risk = 'No' #High Risk Rating hr_rating = '' #Customer that was demarketed by the bank demarket = 'No' dem_date = '' #generate closed acct flag if (max((randrange(0, 98, 1) - 96), 0) == 1): Clsd = 'Yes' #Random choice for number of credit card users per account number No_CCs = random.choice(Number_CC) #Generate account number acct = start + 1 + randrange(1, 10, 1) start = acct #Randomly generate customer name + middle name in tmp name = fake.name() tmp = gen_data.create_name() #Adds account number to account dictionary acct_list.extend([acct]) #Creates a new row and adds data elements row = [i] + [acct] + [random.choice(Acct_Type)] + [No_CCs] + [name] + [ tmp[0] ] + [liSSNMaster[i]] #Dictionary for names list set to blank names = [] #Dictionary for Social Security Number list set to blank ssn = [] #Middle Name to reduce name dups mdl = [] for j in range(No_CCs - 1): names.insert(j, fake.name()) tmp2 = gen_data.create_name() mdl.insert(j, tmp2[0]) ##Pull from SSN Master list randInt = randrange(1, len(liSSNMaster), 1) if randInt != i: ssn.insert(j, liSSNMaster[randInt]) else: ssn.insert(j, liSSNMaster[randInt - 1]) #Name and SSN is set to blank if less than 4 customers on an account for k in range(4 - No_CCs): names.insert(No_CCs + k, '') ssn.insert(No_CCs + k, '') mdl.insert(No_CCs, '') #Sets CC_NO to a random credit card number CC_NO = gen_data.cc_number() #Extract CC_Number from the tuple returned by CC_Number then scramble to ensure uniqueness...Tuple contains CC Number and Type CC_TRANS = CC_NO[1][0] dt = str(datetime.now()) clean = re.sub('\W', '', dt) printCC = str(CC_TRANS[-4:]) + str(clean[-12:-3]) + str( randrange(1111, 9999, randrange(1, 10, 1))) #Add data elements to current csv row row.extend([names[0],mdl[0],ssn[0],names[1],mdl[1],ssn[1],names[2],mdl[2],ssn[2],printCC,CC_NO[0],gen_data.create_company_name()+' '+tmp[1],\ gen_data.create_email(),gen_data.create_job_title()]) #Create Current Address zip = random.choice(zips.zip) addr = geo_data.create_city_state_zip[zip] #Create Previous address zip2 = random.choice(zips.zip) addr2 = geo_data.create_city_state_zip[zip2] #Add additional data elements to current csv row lrg_cash_ex = random.choice(Yes_No) #Condition for SARs and Demarketed Clients if (Clsd == 'Yes'): #1% of closed accounts are demarketed but never had a SAR filed if (max((randrange(0, 101, 1) - 99), 0) == 1 and SAR == 'No'): demarket = 'Yes' dem_date = gen_data.create_date(past=True) if (max((randrange(0, 11, 1) - 9), 0) == 1 and demarket == 'No'): #10% of closed accounts have SARs SAR = 'Yes' #90% of closed accounts with SARs are demarketed if (max((randrange(0, 11, 1) - 9), 0) == 0): demarket = 'Yes' dem_date = gen_data.create_date(past=True) if (max((randrange(0, 101, 1) - 99), 0) == 1): PEP = 'Yes' row.extend([ addr[0], addr[1], zip, 'US', addr2[0], addr2[1], zip2, 'US', gen_data.create_birthday(min_age=2, max_age=85), PEP, SAR, Clsd ]) #Start Generating related accounts from account list once 10,000 accounts are generated - to avoid duplicating accounts in the beginning if i > 10000: rel = int(random.choice(acct_list)) * max( (randrange(0, 10001, 1) - 9999), 0) if rel <> 0: row.append(rel) row.append(random.choice(Related_Type)) else: row.append('') row.append('') else: row.append('') row.append('') #Randomly generates account start date party_start = gen_data.create_date(past=True) #Randomly selects consent option for sharing info Consent_Share = random.choice(Yes_No_Consent) #Add additional data elements to current csv row row.extend([random.choice(Party_Type),random.choice(Party_Relation),party_start,gen_data.create_date(past=True),\ lrg_cash_ex,demarket,dem_date,randrange(0,100,1),random.choice(Official_Lang)]) #Add data element preferred methond of contact for yes to share info...if not then blank to current row if Consent_Share == 'Yes': row.extend(['Yes', random.choice(Preffered_Channel)]) else: row.extend(['No', '']) row.extend([zip, randrange(0, 5, 1)]) #Generate Segment ID then add additional Segment data based on the selection to the current csv row Segment_ID = randrange(0, 5, 1) % 5 if Segment_ID == 0: row.extend([ Model_ID[0], Seg_Model_Type[0], Seg_Model_Name[0], Seg_Model_Group[0], Seg_Model_Description[0], Seg_Model_Score[0] ]) if Segment_ID == 1: row.extend([ Model_ID[1], Seg_Model_Type[1], Seg_Model_Name[1], Seg_Model_Group[1], Seg_Model_Description[1], Seg_Model_Score[1] ]) if Segment_ID == 2: row.extend([ Model_ID[2], Seg_Model_Type[2], Seg_Model_Name[2], Seg_Model_Group[2], Seg_Model_Description[2], Seg_Model_Score[2] ]) if Segment_ID == 3: row.extend([ Model_ID[3], Seg_Model_Type[3], Seg_Model_Name[3], Seg_Model_Group[3], Seg_Model_Description[3], Seg_Model_Score[3] ]) if Segment_ID == 4: row.extend([ Model_ID[4], Seg_Model_Type[4], Seg_Model_Name[4], Seg_Model_Group[4], Seg_Model_Description[4], Seg_Model_Score[4] ]) #Add additional data elements to current csv row hr0 = random.choice(Arms_Manufacturer) hr01 = random.choice(Auction) hr02 = random.choice(CashIntensive_Business) hr03 = random.choice(Casino_Gambling) hr04 = random.choice(Channel_Onboarding) hr05 = random.choice(Channel_Ongoing_Transactions) row.extend([hr0, hr01, hr02, hr03, hr04, hr05]) #Randomly select whether customer has a High Net Worth HighNetWorthFlag = random.choice(HighNetWorth) #Randomly Generate customer net worth based on the above flag if HighNetWorthFlag == 'Yes': row.append( max( max((randrange(0, 101, 1) - 99), 0) * randrange(1000000, 25000000, 1), randrange(1000000, 5000000, 1))) else: flag = random.choice(LowNet) if flag == 0: row.append(randrange(-250000, 600000, 1)) else: if flag == 1: row.append(randrange(149000, 151000, 1)) else: row.append(randrange(40000, 50000, 1)) #Add data elements to current csv row hr1 = random.choice(Complex_HI_Vehicle) hr2 = random.choice(Dealer_Precious_Metal) hr3 = random.choice(Digital_PM_Operator) hr4 = random.choice(Embassy_Consulate) hr5 = random.choice(Exchange_Currency) hr6 = random.choice(Foreign_Financial_Institution) hr7 = random.choice(Foreign_Government) hr8 = random.choice(Foreign_NonBank_Financial_Institution) hr9 = random.choice(Internet_Gambling) hr10 = random.choice(Medical_Marijuana_Dispensary) hr11 = random.choice(Money_Service_Business) hr12 = random.choice(NAICS.NAICS_Code) hr13 = random.choice(NonRegulated_Financial_Institution) hr14 = random.choice(Not_Profit) #hr15=random.choice(Occupation) - added before through gen_data hr16 = random.choice(Privately_ATM_Operator) hr17 = random.choice(Products) hr18 = random.choice(Sales_Used_Vehicles) hr19 = random.choice(Services) hr20 = random.choice(SIC_Code) hr21 = random.choice(Stock_Market_Listing) hr22 = random.choice(Third_Party_Payment_Processor) hr23 = random.choice(Transacting_Provider) if (PEP == 'Yes' or SAR == 'Yes' or lrg_cash_ex == 'Yes' or demarket == 'Yes' or hr0 == 'Yes' or hr01 == 'Yes' or hr02 == 'Yes' or hr03 == 'Yes' or hr1 == 'Yes' or hr2 == 'Yes' or hr3 == 'Yes' or hr4 == 'Yes' or hr5 == 'Yes' or hr6 == 'Yes' or hr7 == 'Yes' or hr8 == 'Yes' or hr9 == 'Yes' or hr10 == 'Yes' or hr11 == 'Yes' or hr13 == 'Yes' or hr14 == 'Yes' or hr16 == 'Yes' or hr17 == 'Yes' or hr18 == 'Yes' or hr22 == 'Yes' or hr23 == 'Yes' or HighNetWorthFlag == 'Yes'): high_risk = 'Yes' hr_rating = random.choice(refrating) if (SAR == 'No' and high_risk == 'No'): if (max((randrange(0, 101, 1) - 99), 0) == 1): high_risk = 'Yes' hr_rating = random.choice(refrating) if (PEP == 'No' and high_risk == 'No'): if (max((randrange(0, 101, 1) - 99), 0) == 1): high_risk = 'Yes' hr_rating = random.choice(refrating) if (high_risk == 'No'): if (max((randrange(0, 101, 1) - 99), 0) == 1): high_risk = 'Yes' hr_rating = random.choice(refrating) row.extend([ hr1, hr2, hr3, hr4, hr5, hr6, hr7, hr8, hr9, hr10, hr11, hr12, hr13, hr14, hr16, hr17, hr18, hr19, hr20, hr21, hr22, hr23, HighNetWorthFlag, high_risk, hr_rating, random.choice(Use_Case) ]) liCSV.append(row) return liCSV
from random import random from random import shuffle from faker import Faker from barnum import gen_data import csv fake = Faker() with open('large.csv','w') as f1: writer=csv.writer(f1, delimiter=',',lineterminator='\n',) writer.writerow(['rownum'] +['dunno'] + ['CC'] + ['Employer'] + ['Custemail'] + ['name'] \ + ['occupation'] + ['address_street'] + ['DOB']+['previous address_city_state_zip']+ ['altcustomer_name'] \ + ['altcustomer_occupation'] + ['altcustomer_dob'] + ['ssn'] + ['phone'] + \ ['AccountID'] + ['PepFlag'] + ['altcustomerssn'] + ['demarketed_customer_flag'] + \ ['SAR_flag'] + ['nolonger_a_customer'] + ['closed_account'] +['High_risk_flag'] +['Risk_rating']) for i in range(50000000): row = [i] + [10] + [gen_data.cc_number()]+[gen_data.create_company_name()] + \ [gen_data.create_email()]+[gen_data.create_name()] +[gen_data.create_job_title()] + \ [gen_data.create_city_state_zip()] + [gen_data.create_birthday(min_age=2, max_age=85)] + \ [gen_data.create_city_state_zip()] + [fake.name()] + [gen_data.create_job_title()] + \ [gen_data.create_birthday(min_age=2, max_age=85)] +\ [(randrange(101,1000,1),randrange(10,100,1),randrange(1000,10000,1))] + \ [(randrange(101,1000,1),randrange(101,999,1),randrange(1000,10000,1))] + \ [randrange(100000,100000000,1)] + \ [max((randrange(0,101,1)-99),0)] + \ [(randrange(101,1000,1),randrange(10,100,1),randrange(1000,10000,1))] + \ [max((randrange(0,101,1)-99),0)] + [max((randrange(0,101,1)-99),0)] + \ [max((randrange(0,101,1)-99),0)] + [max((randrange(0,101,1)-90),0)] + \ [max((randrange(0,101,1)-99),0)] + [max((randrange(0,101,1)-99),0)] writer.writerow(row)
def gen_cust(liSSNMaster, acct_list, i): fake = Faker() #Initiate High Risk Flags #Politically Exposed Person PEP = 'No' #Customer with a Suspicous Activity Report SAR = 'No' #Customer with a closed account #generate closed acct flag Clsd = choice(Clsd_flag) #High risk customer flag high_risk = 'No' #High Risk Rating hr_rating = '' #Customer that was demarketed by the bank demarket = 'No' dem_date = '' #Random choice for number of credit cards per account number No_CCs = choice(Number_CC) acct = start + 1 + randrange(1, 10, 1) start = acct #Randomly generates customer name name = fake.name() tmp = gen_data.create_name() #Adds account number to account dictionary acct_list.extend([acct]) #Creates a new row and adds data elements ## JS - Main Account Holder SSN as current index in master SSN list row = [i] + [acct] + [choice(Acct_Type) ] + [No_CCs] + [name] + [tmp[0]] + [liSSNMaster[i]] #Dictionary for names list set to blank names = [] #Dictionary for Social Security Number list set to blank ssn = [] #Generates Name and SSN for Credit Users #Middle Name to reduce name dups mdl = [] for j in range(No_CCs - 1): names.insert(j, fake.name()) tmp2 = gen_data.create_name() mdl.insert(j, tmp2[0]) ## JS - Pull from SSN Master list randInt = randrange(1, len(liSSNMaster), 1) if randInt != i: ssn.insert(j, liSSNMaster[randInt]) else: ssn.insert(j, liSSNMaster[randInt - 1]) #Name and SSN is set to blank if less than 4 customers on an account for k in range(4 - No_CCs): names.insert(No_CCs + k, '') ssn.insert(No_CCs + k, '') mdl.insert(No_CCs, '') #Sets CC_NO to a random credit card number CC_NO = gen_data.create_cc_number() CC_TRANS = CC_NO[1][0] dt = str(datetime.now()) clean = re.sub('\W', '', dt) printCC = str(CC_TRANS[-4:]) + str(clean[-12:-3]) + str( randrange(1111, 9999, randrange(1, 10, 1))) #Add data elements to current csv row row.extend([names[0],mdl[0],ssn[0],names[1],mdl[1],ssn[1],names[2],mdl[2],ssn[2],printCC,CC_NO[0],gen_data.create_company_name()+' '+tmp[1],\ gen_data.create_email(),gen_data.create_job_title()]) #Creates Current Address zip = choice(zips.zip) addr = geo_data.create_city_state_zip[zip] #Creates Previous address zip2 = choice(zips.zip) addr2 = geo_data.create_city_state_zip[zip2] #Add additional data elements to current csv row lrg_cash_ex = choice(Yes_No) #Condition for SARs and Demarketed Clients if (Clsd == 'Yes'): #1% of closed accounts are demarketed but never had a SAR filed if (max((randrange(0, 101, 1) - 99), 0) == 1 and SAR == 'No'): demarket = 'Yes' dem_date = gen_data.create_date(past=True) if (max((randrange(0, 11, 1) - 9), 0) == 1 and demarket == 'No'): #10% of closed accounts have SARs SAR = 'Yes' #90% of closed accounts with SARs are demarketed if (max((randrange(0, 11, 1) - 9), 0) == 0): demarket = 'Yes' dem_date = gen_data.create_date(past=True) #1% of accounts are PEP if (max((randrange(0, 101, 1) - 99), 0) == 1): PEP = 'Yes' row.extend([ addr[0], addr[1], zip, 'US', addr2[0], addr2[1], zip2, 'US', gen_data.create_birthday(min_age=2, max_age=85), PEP, SAR, Clsd ]) #Start Generating related accounts from account list once 10,000 accounts are generated if i > 10000: rel = int(choice(acct_list)) * max((randrange(0, 10001, 1) - 9999), 0) if rel <> 0: row.append(rel) row.append(choice(Related_Type)) else: row.append('') row.append('') else: row.append('') row.append('') #Randomly generates account start date party_start = gen_data.create_date(past=True) #Randomly selects consent option for sharing info Consent_Share = choice(Yes_No_Consent) #Add additional data elements to current csv row row.extend([choice(Party_Type),choice(Party_Relation),party_start,gen_data.create_date(past=True),\ lrg_cash_ex,demarket,dem_date,randrange(0,100,1),choice(Official_Lang)]) #Add data element preferred methond of contact for yes to share info...if not then blank to current row if Consent_Share == 'Yes': row.extend(['Yes', choice(Preffered_Channel)]) else: row.extend(['No', '']) #DO NOT USE CUST STATUS BELOW - NOT INTEGRATED WITH CLOSED STATUS! Add additional data elements to current csv row row.extend([zip, randrange(0, 5, 1)]) #Generates Segment ID then adds additional Segment data based on the selection to the current csv row Segment_ID = randrange(0, 5, 1) if Segment_ID == 0: row.extend( ['01', 'LOB Specific', 'IRRI', 'Group 1', 'High Risk Tier', '200']) if Segment_ID == 1: row.extend([ '02', 'Profitability', 'CRS Risk Score', 'Group 1', 'Mid Risk Tier', '300' ]) if Segment_ID == 2: row.extend([ '03', 'Geographical', 'Geo Risk', 'Group 2', 'Low Risk Tier', '400' ]) if Segment_ID == 3: row.extend([ '04', 'Behavioral', 'Financial Behavior Risk', 'Group 3', 'Vertical Risk', '100' ]) if Segment_ID == 4: row.extend([ '05', 'Risk Tolerance', 'CM Risk', 'Group 4', 'Geographical Risk', '500' ]) #Arms Manufacturer random choice hr0 = choice(Yes_No_Cust_Flag) #Auction random choice hr01 = choice(Yes_No_Cust_Flag) #Cash Intensive Business random choice hr02 = choice(Yes_No_Cust_Flag) #Casino Gambling random choice hr03 = choice(Yes_No_Cust_Flag) #Channel Onboarding random choice hr04 = choice(Channel_Onboarding) #Channel Ongoing Transactions random choice hr05 = choice(Channel_Ongoing_Transactions) #Add additional data elements to current csv row row.extend([hr0, hr01, hr02, hr03, hr04, hr05]) #Randomly select whther customer has a High Net Worth HighNetWorthFlag = choice(HighNetWorth) #Randomly Generates customer net worth based on the above flag if HighNetWorthFlag == 'Yes': row.append( max( max((randrange(0, 101, 1) - 99), 0) * randrange(5000000, 25000000, 1), randrange(1000000, 5000000, 1))) else: flag = choice(LowNet) if flag == 0: row.append(randrange(-250000, 600000, 1)) else: if flag == 1: row.append(randrange(149000, 151000, 1)) else: row.append(randrange(40000, 50000, 1)) #Add data elements to current csv row #Complex_HI_Vehicle random choice hr1 = choice(Yes_No_Cust_Flag) #Dealer_Precious_Metal random choice hr2 = choice(Yes_No_Cust_Flag) #Digital_PM_Operator random choice hr3 = choice(Yes_No_Cust_Flag) #Embassy_Consulate random choice hr4 = choice(Yes_No_Cust_Flag) #Exchange_Currency random choice hr5 = choice(Yes_No_Cust_Flag) #Foreign_Financial_Institution random choice hr6 = choice(Yes_No_Cust_Flag) #Foreign_Government random choice hr7 = choice(Yes_No_Cust_Flag) #Foreign_NonBank_Financial_Institution random choice hr8 = choice(Yes_No_Cust_Flag) #Internet_Gambling random choice hr9 = choice(Yes_No_Cust_Flag) #Medical_Marijuana_Dispensary random choice hr10 = choice(Yes_No_Cust_Flag) #Money_Service_Business random choice hr11 = choice(Yes_No_Cust_Flag) hr12 = choice(NAICS.NAICS_Code) #NonRegulated_Financial_Institution random choice hr13 = choice(Yes_No_Cust_Flag) #Not_Profit random choice hr14 = choice(Yes_No_Cust_Flag) #Occupation random choice #hr15=choice(Occupation) #Privately_ATM_Operator random choice hr16 = choice(Yes_No_Cust_Flag) #Products random choice hr17 = choice(Products) #Sales_Used_Vehicles random choice hr18 = choice(Yes_No_Cust_Flag) #Services random choice hr19 = choice(Services) #SIC_Code random choice hr20 = choice(SIC_Code) #Stock_Market_Listing random choice hr21 = choice(Stock_Market_Listing) #Third_Party_Payment_Processor random choice hr22 = choice(Yes_No_Cust_Flag) #Transacting_Provider random choice hr23 = choice(Yes_No_Cust_Flag) refrating = ['1'] * 3 + ['2', '4'] * 2 + ['3'] + ['5'] * 12 if (PEP == 'Yes' or SAR == 'Yes' or lrg_cash_ex == 'Yes' or demarket == 'Yes' or hr0 == 'Yes' or hr01 == 'Yes' or hr02 == 'Yes' or hr03 == 'Yes' or hr1 == 'Yes' or hr2 == 'Yes' or hr3 == 'Yes' or hr4 == 'Yes' or hr5 == 'Yes' or hr6 == 'Yes' or hr7 == 'Yes' or hr8 == 'Yes' or hr9 == 'Yes' or hr10 == 'Yes' or hr11 == 'Yes' or hr13 == 'Yes' or hr14 == 'Yes' or hr16 == 'Yes' or hr17 == 'Yes' or hr18 == 'Yes' or hr22 == 'Yes' or hr23 == 'Yes' or HighNetWorthFlag == 'Yes'): high_risk = 'Yes' hr_rating = choice(refrating) if (high_risk == 'No'): if (max((randrange(0, 101, 1) - 99), 0) == 1): high_risk = 'Yes' hr_rating = choice(refrating) row.extend([ hr1, hr2, hr3, hr4, hr5, hr6, hr7, hr8, hr9, hr10, hr11, hr12, hr13, hr14, hr16, hr17, hr18, hr19, hr20, hr21, hr22, hr23, HighNetWorthFlag, high_risk, hr_rating, choice(Use_Case) ]) #End the current row return row
previousContacts = [] previousEmailAddresses = [] previousIMAccounts = [] allchars = string.maketrans('', '') for dummy in range(0, count): firstName, lastName = gen_data.create_name() zip, city, state = gen_data.create_city_state_zip() postalAddressID = str(random.randint(0, sys.maxint)) UID = str(random.randint(0, sys.maxint)) phoneNumber = gen_data.create_phone() phoneUri = 'tel:+1' + phoneNumber.translate(allchars, ' -()') birthDay = gen_data.create_birthday() streetAddress = gen_data.create_street() emailAddress = gen_data.create_email(name=(firstName, lastName)) xmppAddress = str(firstName + "." + lastName + "@gmail.com").lower() hasIMAccount = False hasPhoneNumber = False jobTitle = gen_data.create_job_title() generatePostalAddress() generateEmailAddress() #Only every 3rd have Phone or IM to add variation. if random.randint(0, 3) > 2 or count == 1: generateIMAccount(gen_data, str) hasIMAccount = True if random.randint(0, 3) > 2 or count == 1: generatePhoneNumber() hasPhoneNumber = True
def generate_customers(): with get_file('uber_cust.csv', 'w') as f1: # Writer for CSV...Pipe delimited...Return for a new line writer = csv.writer( f1, delimiter='|', lineterminator='\n', ) # Header Row writer.writerow( ['ROWNUM'] + ['accountNumber'] + ['accountCategory'] + ['accountType'] + ['NUM_CCS'] + ['NAME'] + [ 'M_NAME'] + [ 'SSN'] + [ 'AUTHORIZED_NAME2'] + ['M_NAME2'] + ['SSN2'] + \ ['AUTHORIZED_NAME3'] + ['M_NAME3'] + ['SSN3'] + ['AUTHORIZED_NAME4'] + ['M_NAME4'] + ['SSN4'] + [ 'CREDITCARDNUMBER'] + ['CREDITCARDTYPE'] + ['EMPLOYER'] + ['CUSTEMAIL'] + \ ['OCCUPATION'] + ['CITY'] + ['STATE'] + ['ZIP'] + ['COUNTRY'] + ['PREVIOUS_CITY'] + [ 'PREVIOUS_STATE'] + \ ['PREVIOUS_ZIP'] + ['PREVIOUS_COUNTRY'] + ['DOB'] + ['politically_exposed_person'] + [ 'suspicious_activity_report'] + ['CLOSEDACCOUNT'] + [ 'RELATED_ACCT'] + ['RELATED_TYPE'] + ['PARTY_TYPE'] + ['PARTY_RELATION'] + [ 'PARTY_STARTDATE'] + ['PARTY_ENDDATE'] + \ ['LARGE_CASH_EXEMPT'] + ['DEMARKET_FLAG'] + ['DEMARKET_DATE'] + ['PROB_DEFAULT_RISKR'] + [ 'OFFICIAL_LANG_PREF'] + ['CONSENT_SHARING'] + \ ['PREFERRED_CHANNEL'] + ['PRIMARY_BRANCH_NO'] + ['DEPENDANTS_COUNT'] + ['SEG_MODEL_ID'] + [ 'SEG_MODEL_TYPE'] + \ ['SEG_MODEL_NAME'] + ['SEG_MODEL_GROUP'] + ['SEG_M_GRP_DESC'] + ['SEG_MODEL_SCORE'] + [ 'ARMS_MANUFACTURER'] + ['AUCTION'] + \ ['CASHINTENSIVE_BUSINESS'] + ['CASINO_GAMBLING'] + ['CHANNEL_ONBOARDING'] + [ 'CHANNEL_ONGOING_TRANSACTIONS'] + ['CLIENT_NET_WORTH'] + \ ['COMPLEX_HI_VEHICLE'] + ['DEALER_PRECIOUS_METAL'] + ['DIGITAL_PM_OPERATOR'] + [ 'EMBASSY_CONSULATE'] + ['EXCHANGE_CURRENCY'] + \ ['FOREIGN_FINANCIAL_INSTITUTION'] + ['FOREIGN_GOVERNMENT'] + [ 'FOREIGN_NONBANK_FINANCIAL_INSTITUTION'] + ['INTERNET_GAMBLING'] + \ ['MEDICAL_MARIJUANA_DISPENSARY'] + ['MONEY_SERVICE_BUSINESS'] + ['NAICS_CODE'] + [ 'NONREGULATED_FINANCIAL_INSTITUTION'] + \ ['NOT_PROFIT'] + ['PRIVATELY_ATM_OPERATOR'] + ['PRODUCTS'] + ['SALES_USED_VEHICLES'] + [ 'SERVICES'] + \ ['SIC_CODE'] + ['STOCK_MARKET_LISTING'] + ['THIRD_PARTY_PAYMENT_PROCESSOR'] + [ 'TRANSACTING_PROVIDER'] + ['HIGH_NET_WORTH'] + ['HIGH_RISK'] + ['RISK_RATING'] + [ 'USE_CASE_SCENARIO']) # Loop for number of accounts to generate start = 10 acct_list = [] li_ssn_master = list( set([ ''.join(str(random.randint(0, 9)) for _ in xrange(9)) for i in xrange(30) ])) if len(li_ssn_master) < 30: li_ssn_master = list( set([ ''.join(str(random.randint(0, 9)) for _ in xrange(9)) for i in xrange(30) ])) for i in xrange(30): # Initiate High Risk Flags politically_exposed_person = 'No' suspicious_activity_report = 'No' closed_cust_acct = 'No' # High risk customer flag high_risk = 'No' # High Risk Rating hr_rating = '' # Customer that was demarketed by the bank demarket = 'No' dem_date = '' # generate closed acct flag if max((randrange(0, 98, 1) - 96), 0) == 1: closed_cust_acct = 'Yes' # Random number generator for account number # acct = randrange(100000,100000000,1) # Random choice for number of credit cards per account number no_ccs = weighted_options('number_cc') # while acct_list.count(acct) > 0: # acct = randrange(100000,100000000,1) # dt = str(datetime.now()) # acct=str(i)++re.sub('\W','',dt) acct = start + 1 + randrange(1, 10, 1) start = acct name = fake.name() tmp = gen_data.create_name() # Adds account number to account dictionary acct_list.extend([acct]) # Creates a new row and adds data elements ## JS - Main Account Holder SSN as current index in master SSN list ## row = [i]+[acct]+[random.choice(acct_type)]+[No_CCs]+[name]+[tmp[0]]+[(str(randrange(101,1000,1))+str(randrange(10,100,1))+str(randrange(1000,10000,1)))] row = [i] + [acct] + [weighted_options('acct_type')] + [no_ccs] + [ name ] + [tmp[0]] + [li_ssn_master[i]] # Dictionary for names list set to blank names = [] # Dictionary for Social Security Number list set to blank ssn = [] # Generates Name and SSN for Credit Users # Middle Name to reduce name dups mdl = [] for j in range(no_ccs - 1): names.insert(j, fake.name()) tmp2 = gen_data.create_name() mdl.insert(j, tmp2[0]) ## JS - Pull from SSN Master list # ssn.insert(j,(str(randrange(101,1000,1))+str(randrange(10,100,1))+str(randrange(1000,10000,1)))) randInt = randrange(1, len(li_ssn_master), 1) if randInt != i: ssn.insert(j, li_ssn_master[randInt]) else: ssn.insert(j, li_ssn_master[randInt - 1]) # Name and SSN is set to blank if less than 4 customers on an account for k in range(4 - no_ccs): names.insert(no_ccs + k, '') ssn.insert(no_ccs + k, '') mdl.insert(no_ccs, '') # Sets CC_NO to a random credit card number CC_NO = gen_data.create_cc_number() # Extract CC_Number from the tuple returned by CC_Number...Tuple contains CC Number and Type # while credit_cards.count(CC_NO[1][0]) > 0: CC_TRANS = CC_NO[1][0] dt = str(datetime.now()) clean = re.sub('\W', '', dt) printCC = str(CC_TRANS[-4:]) + str(clean[-12:-3]) + str( randrange(1111, 9999, randrange(1, 10, 1))) # str(CC_TRANS[-4:])+str(clean[-12:-2])+str(randrange(1111,9999,randrange(1,10,1))) # Add CC_Number to control list to prevent duplicates # Add data elements to current csv row row.extend([ names[0], mdl[0], ssn[0], names[1], mdl[1], ssn[1], names[2], mdl[2], ssn[2], printCC, CC_NO[0], gen_data.create_company_name() + ' ' + tmp[1], gen_data.create_email(), gen_data.create_job_title() ]) # Creates Current Address zip = random.choice(zips.zip) addr = geo_data.create_city_state_zip[zip] # Creates Previous address zip2 = random.choice(zips.zip) addr2 = geo_data.create_city_state_zip[zip2] # Add additional data elements to current csv row lrg_cash_ex = weighted_options('yes_no') # Condition for SARs and Demarketed Clients if closed_cust_acct == 'Yes': # 1% of closed accounts are demarketed but never had a suspicious_activity_report filed if risk_range() and suspicious_activity_report == 'No': demarket = 'Yes' dem_date = gen_data.create_date(past=True) if risk_range() and demarket == 'No': # 10% of closed accounts have SARs suspicious_activity_report = 'Yes' # 90% of closed accounts with SARs are demarketed if max((randrange(0, 11, 1) - 9), 0) == 0: demarket = 'Yes' dem_date = gen_data.create_date(past=True) if risk_range(): politically_exposed_person = 'Yes' row.extend([ addr[0], addr[1], zip, 'US', addr2[0], addr2[1], zip2, 'US', gen_data.create_birthday(min_age=2, max_age=85), politically_exposed_person, suspicious_activity_report, closed_cust_acct ]) # Start Generating related accounts from account list once 10,000 accounts are generated if i > 10000: rel = int(random.choice(acct_list)) * max( (randrange(0, 10001, 1) - 9999), 0) if rel <> 0: row.append(rel) row.append(weighted_options('related_type')) else: row.append('') row.append('') else: row.append('') row.append('') # Randomly generates account start date party_start = gen_data.create_date(past=True) # Randomly selects consent option for sharing info consent_share = weighted_options('yes_no') # Add additional data elements to current csv row row.extend([ weighted_options('party_type'), weighted_options('party_relation'), party_start, gen_data.create_date(past=True), lrg_cash_ex, demarket, dem_date, randrange(0, 100, 1), weighted_options('official_lang') ]) # Add data element preferred methond of contact for yes to share info...if not then blank to current row if consent_share == 'Yes': row.extend(['Yes', weighted_options('preferred_channel')]) else: row.extend(['No', '']) # DO NOT USE CUST STATUS BELOW - NOT INTEGRATED WITH CLOSED STATUS! Add additional data elements to current csv row row.extend([zip, randrange(0, 5, 1)]) # Generates Segment ID then adds additional Segment data based on the selection to the current csv row Segment_ID = randrange(0, 5, 1) % 5 if Segment_ID == 0: row.extend([ MODEL_ID[0], SEG_MODEL_TYPE[0], SEG_MODEL_NAME[0], SEG_MODEL_GROUP[0], SEG_MODEL_DESCRIPTION[0], SEG_MODEL_SCORE[0] ]) if Segment_ID == 1: row.extend([ MODEL_ID[1], SEG_MODEL_TYPE[1], SEG_MODEL_NAME[1], SEG_MODEL_GROUP[1], SEG_MODEL_DESCRIPTION[1], SEG_MODEL_SCORE[1] ]) if Segment_ID == 2: row.extend([ MODEL_ID[2], SEG_MODEL_TYPE[2], SEG_MODEL_NAME[2], SEG_MODEL_GROUP[2], SEG_MODEL_DESCRIPTION[2], SEG_MODEL_SCORE[2] ]) if Segment_ID == 3: row.extend([ MODEL_ID[3], SEG_MODEL_TYPE[3], SEG_MODEL_NAME[3], SEG_MODEL_GROUP[3], SEG_MODEL_DESCRIPTION[3], SEG_MODEL_SCORE[3] ]) if Segment_ID == 4: row.extend([ MODEL_ID[4], SEG_MODEL_TYPE[4], SEG_MODEL_NAME[4], SEG_MODEL_GROUP[4], SEG_MODEL_DESCRIPTION[4], SEG_MODEL_SCORE[4] ]) # Add additional data elements to current csv row arms_manufacturer = weighted_options('arms_manufacturers') auction = weighted_options('auction') cash_intensive_business = weighted_options( 'cash_intensive_business') casino_gambling = weighted_options('casino_gambling') chan_ob = weighted_options('channel_onboarding') chan_txn = weighted_options('channel_ongoing_txn') row.extend([ arms_manufacturer, auction, cash_intensive_business, casino_gambling, chan_ob, chan_txn ]) # Randomly select whether customer has a High Net Worth high_net_worth_flag = weighted_options('high_net_worth') # Randomly Generates customer net worth based on the above flag if high_net_worth_flag == 'Yes': row.append( max( max((randrange(0, 101, 1) - 99), 0) * randrange(1000000, 25000000, 1), randrange(1000000, 5000000, 1))) else: flag = weighted_options('low_net') if flag == 0: row.append(randrange(-250000, 600000, 1)) else: if flag == 1: row.append(randrange(149000, 151000, 1)) else: row.append(randrange(40000, 50000, 1)) # Add data elements to current csv row hr1 = weighted_options('complex_hi_vehicle') hr2 = weighted_options('dealer_precious_metal') hr3 = weighted_options('digital_pm_operator') hr4 = weighted_options(EMBASSY_CONSULATE) hr5 = weighted_options(EXCHANGE_CURRENCY) hr6 = weighted_options(FOREIGN_FINANCIAL_INSTITUTION) hr7 = weighted_options(FOREIGN_GOVT) hr8 = weighted_options(FOREIGN_NONBANK_FINANCIAL_INSTITUTION) hr9 = weighted_options(INTERNET_GAMBLING) hr10 = weighted_options(MEDICAL_MARIJUANA_DISPENSARY) hr11 = weighted_options(MONEY_SERVICE_BUSINESS) hr12 = random.choice(NAICS.NAICS_Code) hr13 = weighted_options(NONREGULATED_FINANCIAL_INSTITUTION) hr14 = weighted_options(NOT_PROFIT) # hr15=random.choice(occupation) hr16 = weighted_options(PRIVATE_ATM_OPERATOR) hr17 = weighted_options('products') hr18 = weighted_options(SALES_USED_VEHICLES) hr19 = weighted_options('services') hr20 = weighted_options('sic_code') hr21 = weighted_options('stock_market_listing') hr22 = weighted_options(THIRD_PARTY_PAYMENT_PROCESSOR) hr23 = weighted_options(TRANSACTING_PROVIDER) if 'Yes' in (politically_exposed_person, suspicious_activity_report, lrg_cash_ex, demarket, arms_manufacturer, auction, cash_intensive_business, casino_gambling, hr1, hr2, hr3, hr4, hr5, hr6, hr7, hr8, hr9, hr10, hr11, hr13, hr14, hr16, hr17, hr18, hr22, hr23, high_net_worth_flag): high_risk = 'Yes' hr_rating = weighted_options('refrating') if suspicious_activity_report == 'No' and high_risk == 'No': if risk_range(): high_risk = 'Yes' hr_rating = weighted_options('refrating') if politically_exposed_person == 'No' and high_risk == 'No': if risk_range(): high_risk = 'Yes' hr_rating = weighted_options('refrating') if high_risk == 'No': if risk_range(): high_risk = 'Yes' hr_rating = weighted_options('refrating') row.extend([ hr1, hr2, hr3, hr4, hr5, hr6, hr7, hr8, hr9, hr10, hr11, hr12, hr13, hr14, hr16, hr17, hr18, hr19, hr20, hr21, hr22, hr23, high_net_worth_flag, high_risk, hr_rating, random.choice(USE_CASE) ]) # End the current row writer.writerow(row)
previousContacts = [] previousEmailAddresses = [] previousIMAccounts = [] allchars = string.maketrans('','') for dummy in range (0, count): firstName, lastName = gen_data.create_name() zip, city, state = gen_data.create_city_state_zip() postalAddressID=str(random.randint(0, sys.maxint)) UID = str(random.randint(0, sys.maxint)) phoneNumber = gen_data.create_phone() phoneUri = 'tel:+1' + phoneNumber.translate(allchars,' -()') birthDay = gen_data.create_birthday() streetAddress = gen_data.create_street() emailAddress = gen_data.create_email(name=(firstName, lastName)) xmppAddress = str(firstName+"." + lastName + "@gmail.com").lower() hasIMAccount = False hasPhoneNumber = False jobTitle = gen_data.create_job_title() generatePostalAddress() generateEmailAddress() #Only every 3rd have Phone or IM to add variation. if random.randint(0, 3) > 2 or count == 1: generateIMAccount(gen_data, str) hasIMAccount = True if random.randint(0, 3) > 2 or count == 1: generatePhoneNumber() hasPhoneNumber = True
mdl.insert(No_CCs,'') #Sets CC_NO to a random credit card number CC_NO=gen_data.cc_number() #Extract CC_Number from the tuple returned by CC_Number...Tuple contains CC Number and Type #while CC_list.count(CC_NO[1][0]) > 0: CC_TRANS=CC_NO[1][0] dt = str(datetime.now()) clean=re.sub('\W','',dt) printCC=str(CC_TRANS[-4:])+str(clean[-12:-3])+str(randrange(1111,9999,randrange(1,10,1))) #str(CC_TRANS[-4:])+str(clean[-12:-2])+str(randrange(1111,9999,randrange(1,10,1))) #Add CC_Number to control list to prevent duplicates #Add data elements to current csv row row.extend([names[0],mdl[0],ssn[0],names[1],mdl[1],ssn[1],names[2],mdl[2],ssn[2],printCC,CC_NO[0],gen_data.create_company_name()+' '+tmp[1],\ gen_data.create_email(),gen_data.create_job_title()]) #Creates Current Address zip=random.choice(zips.zip) addr=geo_data.create_city_state_zip[zip] #Creates Previous address zip2=random.choice(zips.zip) addr2=geo_data.create_city_state_zip[zip2] #Add additional data elements to current csv row lrg_cash_ex=random.choice(Yes_No) #Condition for SARs and Demarketed Clients if(Clsd=='Yes'): #1% of closed accounts are demarketed but never had a SAR filed if (max((randrange(0,101,1)-99),0)==1 and SAR=='No'):
def __init__(self, i, acct, liSSNMaster, acct_list): self.ROWNUM = i self.ACCOUNTID = acct self.SSN = liSSNMaster[i] self.ACCT_TYPE = choice(Acct_Type) self.NUM_CCS = choice(Number_CC) self.NAME = fake.name() self.CUSTEMAIL = gen_data.create_email() self.OCCUPATION = gen_data.create_job_title() self.COUNTRY = 'US' self.PREVIOUS_COUNTRY = 'US' self.DOB = gen_data.create_birthday(min_age=2, max_age=85) self.PARTY_ENDDATE = gen_data.create_date(past=True) self.CONSENT_SHARING = choice(Yes_No_Consent) self.LARGE_CASH_EXEMPT = choice(Yes_No) self.PARTY_TYPE = choice(Party_Type) self.PARTY_RELATION = choice(Party_Relation) self.PROB_DEFAULT_RISKR = randrange(0, 100, 1) self.OFFICIAL_LANG_PREF = choice(Official_Lang) self.DEPENDANTS_COUNT = randrange(0, 5, 1) self.USE_CASE_SCENARIO = choice(Use_Case) self.CLOSEDACCOUNT = choice(Clsd_flag) self.HIGH_NET_WORTH = choice(HighNetWorth) self.PARTY_STARTDATE = gen_data.create_date(past=True) self.ARMS_MANUFACTURER = choice(Yes_No_Cust_Flag) self.AUCTION = choice(Yes_No_Cust_Flag) self.CASHINTENSIVE_BUSINESS = choice(Yes_No_Cust_Flag) self.CASINO_GAMBLING = choice(Yes_No_Cust_Flag) self.CHANNEL_ONBOARDING = choice(Channel_Onboarding) self.CHANNEL_ONGOING_TRANSACTIONS = choice( Channel_Ongoing_Transactions) self.COMPLEX_HI_VEHICLE = choice(Yes_No_Cust_Flag) self.DEALER_PRECIOUS_METAL = choice(Yes_No_Cust_Flag) self.DIGITAL_PM_OPERATOR = choice(Yes_No_Cust_Flag) self.EMBASSY_CONSULATE = choice(Yes_No_Cust_Flag) self.EXCHANGE_CURRENCY = choice(Yes_No_Cust_Flag) self.FOREIGN_FINANCIAL_INSTITUTION = choice(Yes_No_Cust_Flag) self.FOREIGN_GOVERNMENT = choice(Yes_No_Cust_Flag) self.FOREIGN_NONBANK_FINANCIAL_INSTITUTION = choice(Yes_No_Cust_Flag) self.INTERNET_GAMBLING = choice(Yes_No_Cust_Flag) self.MEDICAL_MARIJUANA_DISPENSARY = choice(Yes_No_Cust_Flag) self.MONEY_SERVICE_BUSINESS = choice(Yes_No_Cust_Flag) self.NAICS_CODE = choice(NAICS.NAICS_Code) self.NONREGULATED_FINANCIAL_INSTITUTION = choice(Yes_No_Cust_Flag) self.NOT_PROFIT = choice(Yes_No_Cust_Flag) self.PRIVATELY_ATM_OPERATOR = choice(Yes_No_Cust_Flag) self.PRODUCTS = choice(Products) self.SALES_USED_VEHICLES = choice(Yes_No_Cust_Flag) self.SERVICES = choice(Services) self.SIC_CODE = choice(SIC_Code) self.STOCK_MARKET_LISTING = choice(Stock_Market_Listing) self.THIRD_PARTY_PAYMENT_PROCESSOR = choice(Yes_No_Cust_Flag) self.TRANSACTING_PROVIDER = choice(Yes_No_Cust_Flag) self.ZIP = choice(zips.zip) self.PREVIOUS_ZIP = choice(zips.zip) addr = geo_data.create_city_state_zip[self.ZIP] addr2 = geo_data.create_city_state_zip[self.PREVIOUS_ZIP] self.CITY = addr[0] self.STATE = addr[1] self.PREVIOUS_CITY = addr2[0] self.PREVIOUS_STATE = addr2[1] self.PRIMARY_BRANCH_NO = self.ZIP tmp = gen_data.create_name() self.M_NAME = tmp[0] self.EMPLOYER = gen_data.create_company_name() + ' ' + tmp[1] No_CCs = choice(Number_CC) #Dictionary for names list set to blank names = [] #Dictionary for Social Security Number list set to blank ssn = [] #Middle Name to reduce name dups mdl = [] #Generates Name and SSN for Credit Users for j in range(4): if No_CCs > j: names.insert(j, fake.name()) tmp2 = gen_data.create_name() mdl.insert(j, tmp2[0]) randInt = randrange(1, len(liSSNMaster), 1) if randInt != i: ssn.insert(j, liSSNMaster[randInt]) else: ssn.insert(j, liSSNMaster[randInt - 1]) #Name and SSN is set to blank if less than 4 customers on an account else: names.insert(No_CCs + j, '') ssn.insert(No_CCs + j, '') mdl.insert(No_CCs + j, '') self.AUTHORIZED_NAME2 = names[0] self.M_NAME2 = mdl[0] self.SSN2 = ssn[0] self.AUTHORIZED_NAME3 = names[1] self.M_NAME3 = mdl[1] self.SSN3 = ssn[1] self.AUTHORIZED_NAME4 = names[2] self.M_NAME4 = mdl[2] self.SSN4 = ssn[2] #Sets CC_NO to a random credit card number CC_NO = gen_data.create_cc_number() CC_TRANS = CC_NO[1][0] dt = str(datetime.now()) clean = re.sub('\W', '', dt) self.CREDITCARDNUMBER = str(CC_TRANS[-4:]) + str(clean[-12:-3]) + str( randrange(1111, 9999, randrange(1, 10, 1))) self.CREDITCARDTYPE = CC_NO[0] self.RELATED_ACCT = '' self.RELATED_TYPE = '' if i > 10000: rel = int(choice(acct_list)) * max( (randrange(0, 10001, 1) - 9999), 0) if rel <> 0: self.RELATED_ACCT = rel self.RELATED_TYPE = choice(Related_Type) self.PREFERRED_CHANNEL = '' if self.CONSENT_SHARING == 'Yes': self.PREFERRED_CHANNEL = choice(Prefered_Channel) ## #Generates Segment ID then adds additional Segment data based on the selection to the current csv row Segment_ID = randrange(0, 5, 1) if Segment_ID == 0: self.SEG_MODEL_ID = '01' self.SEG_MODEL_TYPE = 'LOB Specific' self.SEG_MODEL_NAME = 'IRRI' self.SEG_MODEL_GROUP = 'Group 1' self.SEG_M_GRP_DESC = 'High Risk Tier' self.SEG_MODEL_SCORE = '200' if Segment_ID == 1: self.SEG_MODEL_ID = '02' self.SEG_MODEL_TYPE = 'Profitability' self.SEG_MODEL_NAME = 'CRS Risk Score' self.SEG_MODEL_GROUP = 'Group 1' self.SEG_M_GRP_DESC = 'Mid Risk Tier' self.SEG_MODEL_SCORE = '300' if Segment_ID == 2: self.SEG_MODEL_ID = '03' self.SEG_MODEL_TYPE = 'Geographical' self.SEG_MODEL_NAME = 'Geo Risk' self.SEG_MODEL_GROUP = 'Group 2' self.SEG_M_GRP_DESC = 'Low Risk Tier' self.SEG_MODEL_SCORE = '400' if Segment_ID == 3: self.SEG_MODEL_ID = '04' self.SEG_MODEL_TYPE = 'Behavioral' self.SEG_MODEL_NAME = 'Financial Behavior Risk' self.SEG_MODEL_GROUP = 'Group 3' self.SEG_M_GRP_DESC = 'Vertical Risk' self.SEG_MODEL_SCORE = '100' if Segment_ID == 4: self.SEG_MODEL_ID = '05' self.SEG_MODEL_TYPE = 'Risk Tolerance' self.SEG_MODEL_NAME = 'CM Risk' self.SEG_MODEL_GROUP = 'Group 4' self.SEG_M_GRP_DESC = 'Geographical Risk' self.SEG_MODEL_SCORE = '500' self.CLIENT_NET_WORTH = '' if self.HIGH_NET_WORTH == 'Yes': self.CLIENT_NET_WORTH = max( max((randrange(0, 101, 1) - 99), 0) * randrange(5000000, 25000000, 1), randrange(1000000, 5000000, 1)) else: flag = choice(LowNet) if flag == 0: self.CLIENT_NET_WORTH = randrange(-250000, 600000, 1) else: if flag == 1: self.CLIENT_NET_WORTH = randrange(149000, 151000, 1) else: self.CLIENT_NET_WORTH = randrange(40000, 50000, 1) #Politically Exposed Person self.PEP = 'No' #1% of accounts are PEP if (max((randrange(0, 101, 1) - 99), 0) == 1): self.PEP = 'Yes' #Customer that was demarketed by the bank self.DEMARKET_FLAG = 'No' self.DEMARKET_DATE = '' #Customer with a Suspicous Activity Report self.SAR = 'No' #Customer with a closed account #generate closed acct flag #Condition for SARs and Demarketed Clients if (self.CLOSEDACCOUNT == 'Yes'): #1% of closed accounts are demarketed but never had a SAR filed if (max((randrange(0, 101, 1) - 99), 0) == 1): self.DEMARKET_FLAG = 'Yes' self.DEMARKET_DATE = gen_data.create_date(past=True) if (self.DEMARKET_FLAG == 'No' and max( (randrange(0, 11, 1) - 9), 0) == 1): #10% of closed accounts have SARs self.SAR = 'Yes' #90% of closed accounts with SARs are demarketed if (max((randrange(0, 11, 1) - 9), 0) == 0): self.DEMARKET_FLAG = 'Yes' self.DEMARKET_DATE = gen_data.create_date(past=True) self.HIGH_RISK = 'No' self.RISK_RATING = '' if (self.PEP == 'Yes' or self.SAR == 'Yes' or self.LARGE_CASH_EXEMPT == 'Yes' or self.DEMARKET_FLAG == 'Yes' or self.ARMS_MANUFACTURER == 'Yes' or self.AUCTION == 'Yes' or self.CASHINTENSIVE_BUSINESS == 'Yes' or self.CASINO_GAMBLING == 'Yes' or self.COMPLEX_HI_VEHICLE == 'Yes' or self.DEALER_PRECIOUS_METAL == 'Yes' or self.DIGITAL_PM_OPERATOR == 'Yes' or self.EMBASSY_CONSULATE == 'Yes' or self.EXCHANGE_CURRENCY == 'Yes' or self.FOREIGN_FINANCIAL_INSTITUTION == 'Yes' or self.FOREIGN_GOVERNMENT == 'Yes' or self.FOREIGN_NONBANK_FINANCIAL_INSTITUTION == 'Yes' or self.INTERNET_GAMBLING == 'Yes' or self.MEDICAL_MARIJUANA_DISPENSARY == 'Yes' or self.MONEY_SERVICE_BUSINESS == 'Yes' or self.NONREGULATED_FINANCIAL_INSTITUTION == 'Yes' or self.NOT_PROFIT == 'Yes' or self.PRIVATELY_ATM_OPERATOR == 'Yes' or self.SALES_USED_VEHICLES == 'Yes' or self.THIRD_PARTY_PAYMENT_PROCESSOR == 'Yes' or self.TRANSACTING_PROVIDER == 'Yes' or self.HIGH_NET_WORTH == 'Yes'): self.HIGH_RISK = 'Yes' self.RISK_RATING = choice(refrating) elif (max((randrange(0, 101, 1) - 99), 0) == 1): self.HIGH_RISK = 'Yes' self.RISK_RATING = choice(refrating)
+ ["nolonger_a_customer"] + ["closed_account"] + ["High_risk_flag"] + ["Risk_rating"] ) while i < 50000000: # Pick an account number and store it in acct acct = randrange(100000, 100000000, 1) # if the account hasn't been already generated then generate a record with all fields if d.has_key(str(acct)) == False: row = ( [i] + [10] + [gen_data.cc_number()] + [gen_data.create_company_name()] + [gen_data.create_email()] + [gen_data.create_name()] + [gen_data.create_job_title()] + [gen_data.create_city_state_zip()] + [gen_data.create_birthday(min_age=2, max_age=85)] + [gen_data.create_city_state_zip()] + [fake.name()] + [gen_data.create_job_title()] + [gen_data.create_birthday(min_age=2, max_age=85)] + [(randrange(101, 1000, 1), randrange(10, 100, 1), randrange(1000, 10000, 1))] + [(randrange(101, 1000, 1), randrange(101, 999, 1), randrange(1000, 10000, 1))] + [acct] + [max((randrange(0, 101, 1) - 99), 0)] + [(randrange(101, 1000, 1), randrange(10, 100, 1), randrange(1000, 10000, 1))] + [max((randrange(0, 101, 1) - 99), 0)] + [max((randrange(0, 101, 1) - 99), 0)]
from barnum import gen_data import csv #gen_data = gen_data() with open('large.csv','w') as f1: writer=csv.writer(f1, delimiter=',',lineterminator='\n',) writer.writerow([''] + range(10)) for i in range(50000000): row = [i] + [10] + [gen_data.cc_number()]+[gen_data.create_company_name()] +[gen_data.create_email()]+[gen_data.create_name()] +[gen_data.create_job_title()] + [gen_data.create_city_state_zip()] + [gen_data.create_birthday(min_age=2, max_age=85)] writer.writerow(row) #row = [i] + [10] + [fake.name()] +[fake.address()]
#Sets CC_NO to a random credit card number CC_NO = gen_data.create_cc_number() #Extract CC_Number from the tuple returned by CC_Number...Tuple contains CC Number and Type #while CC_list.count(CC_NO[1][0]) > 0: CC_TRANS = CC_NO[1][0] dt = str(datetime.now()) clean = re.sub('\W', '', dt) printCC = str(CC_TRANS[-4:]) + str(clean[-12:-3]) + str( randrange(1111, 9999, randrange(1, 10, 1))) #str(CC_TRANS[-4:])+str(clean[-12:-2])+str(randrange(1111,9999,randrange(1,10,1))) #Add CC_Number to control list to prevent duplicates #Add data elements to current csv row row.extend([names[0],mdl[0],ssn[0],names[1],mdl[1],ssn[1],names[2],mdl[2],ssn[2],printCC,CC_NO[0],gen_data.create_company_name()+' '+tmp[1],\ gen_data.create_email(),gen_data.create_job_title()]) #Creates Current Address zip = random.choice(zips.zip) addr = geo_data.create_city_state_zip[zip] #Creates Previous address zip2 = random.choice(zips.zip) addr2 = geo_data.create_city_state_zip[zip2] #Add additional data elements to current csv row lrg_cash_ex = random.choice(Yes_No) #Condition for SARs and Demarketed Clients if (Clsd == 'Yes'): #1% of closed accounts are demarketed but never had a SAR filed if (max((randrange(0, 101, 1) - 99), 0) == 1 and SAR == 'No'):
from barnum import gen_data import json f = open('Users_old.txt', 'r') f_out = open('Users.txt', 'w') for line in f.readlines(): line = line.strip().split('\t') new_line = [] new_line.append(line[0]) new_line.append(line[1]) new_line.append(line[2]) info = {} info['birthday'] = str(gen_data.create_birthday(min_age=18, max_age=60)) info['email'] = gen_data.create_email(tld="com") info['mobile'] = gen_data.create_phone() tmp = gen_data.create_city_state_zip() info['city'] = tmp[1] + ', ' + tmp[2] new_line.append(json.dumps(info)) f_out.write('\t'.join(new_line) + '\n') f.close() f_out.close()