def generate_accounts(amount): known_accounts = [] for person in range(0, amount): accounts = [] first_name, last_name = gen_data.create_name() zip, city, state = gen_data.create_city_state_zip() address_id = str(random.randint(0, sys.maxint)) UID = str(random.randint(0, sys.maxint)) birth_day = gen_data.create_birthday() street_address = gen_data.create_street() email_address = gen_data.create_email(name=(first_name, last_name)) print_instance(get_random_uuid_uri(), "nco:PersonContact") print_property("nco:fullname", str.join(" ", [first_name, last_name])) print_property("nco:nameGiven", first_name) print_property("nco:nameFamily", last_name) for j in range(0, random.randint(0, 4)): account_data = get_random_in_list(ACCOUNTS) user_account = str.join("", [account_data[2], str(j), email_address]) print_property("nco:hasIMAccount", user_account, t="uri") accounts.append((user_account, account_data)) known_accounts.insert(0, user_account) print_property("nco:birthDate", str(birth_day), final=True) return known_accounts
def generate_accounts(amount): known_accounts = [] for person in range(0, amount): accounts = [] first_name, last_name = gen_data.create_name() zip, city, state = gen_data.create_city_state_zip() address_id = str(random.randint(0, sys.maxint)) UID = str(random.randint(0, sys.maxint)) birth_day = gen_data.create_birthday() street_address = gen_data.create_street() email_address = gen_data.create_email(name=(first_name, last_name)) print_instance(get_random_uuid_uri(), "nco:PersonContact") print_property("nco:fullname", str.join(' ', [first_name, last_name])) print_property("nco:nameGiven", first_name) print_property("nco:nameFamily", last_name) for j in range(0, random.randint(0, 4)): account_data = get_random_in_list(ACCOUNTS) user_account = str.join( '', [account_data[2], str(j), email_address]) print_property("nco:hasIMAccount", user_account, t="uri") accounts.append((user_account, account_data)) known_accounts.insert(0, user_account) print_property("nco:birthDate", str(birth_day), final=True) return known_accounts
def open_account(person): """ In this function create a person object with dummy data :param person: Proto buffer data structure object :return: None """ person.name = gen_data.create_name()[0] person.account_no = gen_data.create_cc_number(length=10)[1][0] person.pin_no = gen_data.create_pw(length=2) person.amount = 5000 print "Thank You", person.name, "to open your account..."
def new_character(user): game = get_game(user) owner = user name = gen_data.create_name() name = name[0] + ' ' + name[1] newchar = Character(owner=owner, name=name, display_name=name, game=game, experience=0) newchar.save() return newchar
def generate_vCard(): gender_initial = gender_vcard_list[random.randint(0, 4)] gender = None if gender_initial == 'M': gender = 'Male' elif gender_initial == 'F': gender = 'Female' (first_name, last_name) = gen_data.create_name(gender=gender) adr = gen_data.create_street() zip, city, state = gen_data.create_city_state_zip() properties = [] properties.append('FN:{} {}\r\n'.format(first_name, last_name)) if random.randint(0, 1): properties.append('N:{};{};;;\r\n'.format(last_name, first_name)) if random.randint(0, 1): properties.append('TEL:tel:{}\r\n'.format(gen_data.create_phone())) if random.randint(0, 1): properties.append('GENDER:{}\r\n'.format(gender_initial)) if random.randint(0, 1): properties.append('EMAIL:{}\r\n'.format( gen_data.create_email(name=(first_name, last_name)).lower())) if random.randint(0, 1): properties.append('IMPP:sip:{}@{}\r\n'.format(first_name.lower(), 'sip.linphone.org')) if random.randint(0, 1): properties.append('ADR:;;{};{};{};{};\r\n'.format( adr, city, state, zip)) if random.randint(0, 1): properties.append('NOTE:{}\r\n'.format(gen_data.create_sentence())) if random.randint(0, 1): properties.append('ORG:{}\r\n'.format(gen_data.create_company_name())) if random.randint(0, 1): properties.append('BDAY:{0:%Y%m%d}\r\n'.format( gen_data.create_birthday())) shuffle(properties) vCard = 'BEGIN:VCARD\r\n' vCard += 'VERSION:4.0\r\n' for property in properties: vCard += property vCard += 'END:VCARD\r\n' return vCard
def create_players_via_draft_batch(start, end): list_of_players = [] players_overalls = create_player_attributes(create_players_via_draft_batch_overalls(72, NUM_OF_PLAYERS)) number_of_draft_classes = range(start, end) # gen names for each player: list_of_names = get_player_names(number_of_draft_classes * NUM_OF_PLAYERS) for year in number_of_draft_classes: for i in range(NUM_OF_PLAYERS): potential_overall = int(players_overalls[i][0]) name = gen_data.create_name() list_of_players.append(Player(name=name, potential_overall=potential_overall, draft_year=year, drafted_by=None)) i += 1 print("Year: ", year) print("Num of players: ", i) year += 1 return list_of_players
def generate_vCard(): gender_initial = gender_vcard_list[random.randint(0, 4)] gender = None if gender_initial == 'M': gender = 'Male' elif gender_initial == 'F': gender = 'Female' (first_name, last_name) = gen_data.create_name(gender=gender) adr = gen_data.create_street() zip, city, state = gen_data.create_city_state_zip() properties = [] properties.append('FN:{} {}\r\n'.format(first_name, last_name)) if random.randint(0, 1): properties.append('N:{};{};;;\r\n'.format(last_name, first_name)) if random.randint(0, 1): properties.append('TEL:tel:{}\r\n'.format(gen_data.create_phone())) if random.randint(0, 1): properties.append('GENDER:{}\r\n'.format(gender_initial)) if random.randint(0, 1): properties.append('EMAIL:{}\r\n'.format(gen_data.create_email(name=(first_name, last_name)).lower())) if random.randint(0, 1): properties.append('IMPP:sip:{}@{}\r\n'.format(first_name.lower(), 'sip.linphone.org')) if random.randint(0, 1): properties.append('ADR:;;{};{};{};{};\r\n'.format(adr, city, state, zip)) if random.randint(0, 1): properties.append('NOTE:{}\r\n'.format(gen_data.create_sentence())) if random.randint(0, 1): properties.append('ORG:{}\r\n'.format(gen_data.create_company_name())) if random.randint(0, 1): properties.append('BDAY:{0:%Y%m%d}\r\n'.format(gen_data.create_birthday())) shuffle(properties) vCard = 'BEGIN:VCARD\r\n' vCard += 'VERSION:4.0\r\n' for property in properties: vCard += property vCard += 'END:VCARD\r\n' return vCard
def create_players_via_draft_batch(start, end): list_of_players = [] players_overalls = create_player_attributes( create_players_via_draft_batch_overalls(72, NUM_OF_PLAYERS)) number_of_draft_classes = range(start, end) # gen names for each player: list_of_names = get_player_names(number_of_draft_classes * NUM_OF_PLAYERS) for year in number_of_draft_classes: for i in range(NUM_OF_PLAYERS): potential_overall = int(players_overalls[i][0]) name = gen_data.create_name() list_of_players.append( Player(name=name, potential_overall=potential_overall, draft_year=year, drafted_by=None)) i += 1 print("Year: ", year) print("Num of players: ", i) year += 1 return list_of_players
def get_rows(): i=501 #line = input("Enter a row (python dict) into the table: ") while i < 1000: fake = Faker() #Pick an account number and store it in acct #if the account hasn't been already generated then generate a record with all fields i=i+1 line = "{'rownum':"+str(i)+",'dunno':"+str(10)+",'CC':"+str(gen_data.cc_number())+",'Employer':"+str(gen_data.create_company_name())+\ ",'Custemail':"+str(gen_data.create_email())+",'name':"+\ str(gen_data.create_name())+",'occupation':"+str(gen_data.create_job_title())+",'address_street':"+\ str(gen_data.create_city_state_zip())+",'DOB':"+str(gen_data.create_birthday(min_age=2, max_age=85))+\ ",'previous_address_city_state_zip':"+str(gen_data.create_city_state_zip())+",'altcustomer_name':"+str(fake.name())+\ ",'altcustomer_occupation':"+str(gen_data.create_job_title())+",'altcustomer_dob':"+str(gen_data.create_birthday(min_age=2, max_age=85))+\ ",'ssn':"+str((randrange(101,1000,1),randrange(10,100,1),randrange(1000,10000,1)))+",'phone':"+\ str((randrange(101,1000,1),randrange(101,999,1),randrange(1000,10000,1)))+ \ ",'AccountID':"+str(randrange(100000,100000000,1))+",'PepFlag':"+str(max((randrange(0,101,1)-99,0)))+",'altcustomerssn':"+\ str((randrange(101,1000,1),randrange(10,100,1),randrange(1000,10000,1)))+",'demarketed_customer_flag':"+\ str(max((randrange(0,101,1)-99),0))+\ ",'SAR_flag':"+str(max((randrange(0,101,1)-99),0))+",'nolonger_a_customer':"+str(max((randrange(0,101,1)-99),0))+\ ",'closed_account'"+str(max((randrange(0,101,1)-90),0))+",'High_risk_flag':"+str(max((randrange(0,101,1)-99),0))+\ ",'Risk_rating':"+str(max((randrange(0,101,1)-99),0))+"}" yield ast.literal_eval(line)
sys.stdout.write('\tnco:hasEmailAddress <mailto:[email protected]>;\n') sys.stdout.write('\tnco:hasPhoneNumber <tel:+11111111111>.\n') sys.stdout.write('\n') sys.stdout.write('<tel:+11111111111> a nco:PhoneNumber; \n') sys.stdout.write('\tnco:phoneNumber "(111) 111-1111".\n') sys.stdout.write('\n') #TODO need to create some email folders myOwnPhoneNumberURI = "tel:+11111111111" previousContacts = [] previousEmailAddresses = [] previousIMAccounts = [] allchars = string.maketrans('','') for dummy in range (0, count): firstName, lastName = gen_data.create_name() zip, city, state = gen_data.create_city_state_zip() postalAddressID=str(random.randint(0, sys.maxint)) UID = str(random.randint(0, sys.maxint)) phoneNumber = gen_data.create_phone() phoneUri = 'tel:+1' + phoneNumber.translate(allchars,' -()') birthDay = gen_data.create_birthday() streetAddress = gen_data.create_street() emailAddress = gen_data.create_email(name=(firstName, lastName)) xmppAddress = str(firstName+"." + lastName + "@gmail.com").lower() hasIMAccount = False hasPhoneNumber = False jobTitle = gen_data.create_job_title() generatePostalAddress()
import random from barnum import gen_data import addressbook_pb2 # Barnum generates US data but that's ok for the example names = [gen_data.create_name() for _ in range(0, 15)] phones = [gen_data.create_phone() for _ in range(0, 30)] postcodes = [gen_data.create_city_state_zip() for _ in range(0, 15)] streets = [gen_data.create_street() for _ in range(0, 30)] contacts = [] for name in names: address = {} # Simulate the fact that postcode are optionals if random.choice([True, False]): address['postcode'] = random.choice(postcodes)[0] address['address_lines'] = random.sample(streets, random.randint(0, 2)) phone_numbers = [] for _ in range(0, random.randint(0, 2)): phone_numbers.append({ 'type': random.choice(['MOBILE', 'LANDLINE']), 'number': random.choice(phones) }) contacts.append({ 'first_name': name[0], 'last_name': name[1], 'address': address,
from barnum import gen_data import csv with open('demographic.csv','w') as csvfile: csvwriter =csv.writer(csvfile, delimiter=' ') for i in range (0,100): name=gen_data.create_name() job_title=gen_data.create_job_title() phone=gen_data.create_phone() address=gen_data.create_city_state_zip() csvwriter.writerow([name,job_title,phone,address]) csvfile.close()
sys.stdout.write('\tnco:hasEmailAddress <mailto:[email protected]>;\n') sys.stdout.write('\tnco:hasPhoneNumber <tel:+11111111111>.\n') sys.stdout.write('\n') sys.stdout.write('<tel:+11111111111> a nco:PhoneNumber; \n') sys.stdout.write('\tnco:phoneNumber "(111) 111-1111".\n') sys.stdout.write('\n') #TODO need to create some email folders myOwnPhoneNumberURI = "tel:+11111111111" previousContacts = [] previousEmailAddresses = [] previousIMAccounts = [] allchars = string.maketrans('', '') for dummy in range(0, count): firstName, lastName = gen_data.create_name() zip, city, state = gen_data.create_city_state_zip() postalAddressID = str(random.randint(0, sys.maxint)) UID = str(random.randint(0, sys.maxint)) phoneNumber = gen_data.create_phone() phoneUri = 'tel:+1' + phoneNumber.translate(allchars, ' -()') birthDay = gen_data.create_birthday() streetAddress = gen_data.create_street() emailAddress = gen_data.create_email(name=(firstName, lastName)) xmppAddress = str(firstName + "." + lastName + "@gmail.com").lower() hasIMAccount = False hasPhoneNumber = False jobTitle = gen_data.create_job_title() generatePostalAddress()
from barnum import gen_data import random import pandas as pd import datetime """ creates a fake set of popular products being sold by the Blooth store to company customers. """ humans = [] for i in range(100): humans.append( [ gen_data.create_name(full_name=False), str(gen_data.create_birthday(min_age=18, max_age=65)), gen_data.create_company_name(biz_type='Generic'), ] ) humans.append( [ gen_data.create_name(full_name=False), str(gen_data.create_birthday(min_age=30, max_age=50)), gen_data.create_company_name(biz_type='Generic'), ] ) humans.append( [
+ ["closed_account"] + ["High_risk_flag"] + ["Risk_rating"] ) while i < 50000000: # Pick an account number and store it in acct acct = randrange(100000, 100000000, 1) # if the account hasn't been already generated then generate a record with all fields if d.has_key(str(acct)) == False: row = ( [i] + [10] + [gen_data.cc_number()] + [gen_data.create_company_name()] + [gen_data.create_email()] + [gen_data.create_name()] + [gen_data.create_job_title()] + [gen_data.create_city_state_zip()] + [gen_data.create_birthday(min_age=2, max_age=85)] + [gen_data.create_city_state_zip()] + [fake.name()] + [gen_data.create_job_title()] + [gen_data.create_birthday(min_age=2, max_age=85)] + [(randrange(101, 1000, 1), randrange(10, 100, 1), randrange(1000, 10000, 1))] + [(randrange(101, 1000, 1), randrange(101, 999, 1), randrange(1000, 10000, 1))] + [acct] + [max((randrange(0, 101, 1) - 99), 0)] + [(randrange(101, 1000, 1), randrange(10, 100, 1), randrange(1000, 10000, 1))] + [max((randrange(0, 101, 1) - 99), 0)] + [max((randrange(0, 101, 1) - 99), 0)] + [max((randrange(0, 101, 1) - 99), 0)]
def gen_tran(MCC_credits,MCC_debits,Tran_Country_Credits,Tran_Country_Debits,Tran_Type_C,Tran_Type_D,Upper_Limit,Delta,count,j,usecase): liTrans = [] #Initiate start date for transactions startDate=date(2015,01,01) #Pick out account based on counter acct=ACCTs[j] #Set customer credit limit - skew to clients with $1000-$25000 and 10% with $25K - $50K limit = max(max((randrange(1,101,1)-99),0)* randrange(25000,50000,1000),randrange(1000,25000,1000)) #local Amt variable to calculate customer total usage usedAmt = 0 tmpAmt = 0 Balance = limit maxDate= startDate #Random number generator for transactions per customer NoTrans = randrange(100,150,1) desc='' flag=0 maxCheckin='' maxBook='' #loop to generate NoTrans transactions per customer for k in range(NoTrans): dt = datetime.now().strftime("%Y-%m-%d %H:%M:%S") cr_dbt='D' tranType = '' country=[] cat_desc='' flag=0 #If Balance is within the credit limit, generate credits/debits if(Balance>0 and Balance<=limit*1.2): #Probability of credits (tmpAmt>0) and debits (tmpAmt==0) is driven by parameters Upper_Limit and Delta tmpAmt = max((randrange(1,Upper_Limit,1)+Delta),0)*randrange(1,Balance+1,1) flag=1 #Define time delta for next transaction tdelta = timedelta(days=randrange(1,4,1)) row = [str(count)+'_'+dt] + [acct] #If we have credit or debit within balance if tmpAmt == 0 and flag==1: tmpAmt=random.randrange(1,Balance+1,1) tranType = random.choice(Tran_Type_D) cat = random.choice(MCC_debits) cat_desc=python_merchant_cat.All_Merchant_Cat[cat] Balance = Balance - tmpAmt merch=gen_data.create_company_name() row.append(merch) row.append(cat) row.append(cat_desc) country=random.choice(Tran_Country_Debits) else: if tmpAmt > 0 and flag==1: cr_dbt='C' tranType=random.choice(Tran_Type_C) Balance = Balance + tmpAmt merch='' cat = random.choice(MCC_credits) cat_desc=python_merchant_cat.All_Merchant_Cat[cat] if(tranType=='Merchant Credit'): merch=gen_data.create_company_name() cat=random.choice(Merchant_Category.Green) cat_desc=python_merchant_cat.All_Merchant_Cat[cat] if(tranType=='Refund'): cat='0000' cat_desc=python_merchant_cat.All_Merchant_Cat[cat] row.append(merch) row.append(cat) row.append(cat_desc) country=random.choice(Tran_Country_Credits) #If we need to make a payment or get credit then assign codes if Balance > limit and flag==0: tmpAmt=random.randrange(1,Balance-limit+1,1) tranType = random.choice(Tran_Type_D) cat = random.choice(MCC_debits) cat_desc=python_merchant_cat.All_Merchant_Cat[cat] Balance = Balance - tmpAmt merch=gen_data.create_company_name() row.append(merch) row.append(cat) row.append(cat_desc) country=random.choice(Tran_Country_Debits) else: if ((Balance < 0 or Balance==0)and flag==0): cr_dbt='C' tranType='Payment' tmpAmt = random.randrange(1,limit/2,1) Balance = Balance + tmpAmt merch = '' cat = '1111' cat_desc=python_merchant_cat.All_Merchant_Cat[cat] row.append(merch) row.append(cat) row.append(cat_desc) country=random.choice(Tran_Country_Credits) #date posted date1 = maxDate+tdelta maxDate = date1 #date of transaction a day later date2 = date1-timedelta(days=1) row.extend([country,date1,date2,tranType,cr_dbt,limit,tmpAmt,Balance,CCs[j], CCTypes[j],usecase,Holders[j],CCsCount[j],Cities[j],States[j],ZIPs[j],Countries[j]]) count = count + 1 checkin='' checkout='' transDetail='' #Add details or Hotel Transactions if((cat_desc=='Hotels/Motels/Inns/Resorts' or cat_desc=='Hotels, Motels, and Resorts') and (UseCase[j]=='28' or UseCase[j]=='29')): if (maxCheckin == ''): checkin=maxDate+timedelta(days=randrange(365,389,1)) checkout=checkin+timedelta(days=randrange(4,11,1)) maxCheckin=checkin tmp2=gen_data.create_name() addr=gen_data.create_city_state_zip() hotel=tmp2[1]+' Hotels; '+'; Address: '+addr[1]+' '+addr[2]+', '+addr[0] transDetail='Checkin: '+str(checkin)+'; Checkout: '+str(checkout)+'; Hotel: '+hotel else: checkin=maxCheckin + timedelta(days=randrange(2,5,1)) checkout=checkin+timedelta(days=randrange(4,11,1)) maxCheckin=checkin tmp2=gen_data.create_name() addr=gen_data.create_city_state_zip() hotel=tmp2[1]+' Hotels; '+'; Address: '+addr[1]+' '+addr[2]+', '+addr[0] transDetail='Checkin: '+str(checkin)+'; Checkout: '+str(checkout)+'; Hotel: '+hotel if((cat_desc=='Hotels/Motels/Inns/Resorts' or cat_desc=='Hotels, Motels, and Resorts') and UseCase[j]=='30'): checkin=maxDate+timedelta(days=randrange(30,200,1)) checkout=checkin+timedelta(days=randrange(4,11,1)) tmp2=gen_data.create_name() addr=gen_data.create_city_state_zip() hotel=tmp2[1]+' Hotels; '+'; Address: '+addr[1]+' '+addr[2]+', '+addr[0] transDetail='Checkin: '+str(checkin)+'; Checkout: '+str(checkout)+'; Hotel: '+hotel #Add details or Airline Transactions if(cat_desc=='Airlines' and (UseCase[j]=='31' or UseCase[j]=='32')): if (maxBook == ''): booking=maxDate+timedelta(days=randrange(1,15,1)) maxBook=booking tmp2=gen_data.create_name() addr=gen_data.create_city_state_zip() transDetail='Date Booked: '+str(booking)+'; Name Booked: '+tmp2[0]+tmp2[1]+'; Address: '+addr[1]+' '+addr[2]+', '+addr[0]+'; Source :'+random.choice(Airport_Code)+'; Destination:'+random.choice(Airport_Code) else: booking=maxBook + timedelta(days=randrange(1,15,1)) maxBook=booking tmp2=gen_data.create_name() addr=gen_data.create_city_state_zip() transDetail='Date Booked: '+str(booking)+'; Name Booked: '+ tmp2[0] + tmp2[1] + '; Address: '+ addr[1] + ' ' + addr[2]+', '+addr[0]+'; Source :'+random.choice(Airport_Code)+'; Destination:'+random.choice(Airport_Code) if(cat_desc=='Airlines' and UseCase[j]=='33'): booking=maxDate + timedelta(days=randrange(1,15,1)) tmp2=gen_data.create_name() addr=gen_data.create_city_state_zip() transDetail='Date Booked: '+str(booking)+'; Name Booked: '+ tmp2[0] + tmp2[1] + '; Address: '+addr[1]+' '+addr[2]+', '+addr[0]+'; Source :'+random.choice(Airport_Code)+'; Destination:'+random.choice(Airport_Code) row.append(transDetail) writer.writerow(row) #post generating all transactions, check account balance - if overpaid - refund $ and add a refund transaction if Balance > limit: row = [str(count)+'_'+dt]+ [acct]+['Uber Bank']+['0000']+['Refund to Customer from Bank']+[random.choice(Tran_Country_Debits)] date1=maxDate+timedelta(days=90) date2=date1-timedelta(days=1) row.extend([date1, date2, 'Credit Balance Refund','D',limit,Balance-limit,limit,CCs[j],CCTypes[j], usecase,Holders[j],CCsCount[j],Cities[j],States[j],ZIPs[j],Countries[j],'']) count = count + 1 usedAmt = 0 maxDate= datetime(0001,01,01) else: date1 = maxDate+tdelta maxDate = date1 #date of transaction a day later date2 = date1-timedelta(days=1) row = [str(count)+'_'+dt]+[acct]+['Customer Payment']+['1111']+['Customer Payment']+[random.choice(Tran_Country_Credits)] row.extend([date1, date2, 'Payment','C',limit,limit-Balance,limit,CCs[j],CCTypes[j],usecase, Holders[j],CCsCount[j],Cities[j],States[j],ZIPs[j],Countries[j],'']) count = count + 1 usedAmt = 0 writer.writerow(row)
def generate_customers(): with get_file('uber_cust.csv', 'w') as f1: # Writer for CSV...Pipe delimited...Return for a new line writer = csv.writer( f1, delimiter='|', lineterminator='\n', ) # Header Row writer.writerow( ['ROWNUM'] + ['accountNumber'] + ['accountCategory'] + ['accountType'] + ['NUM_CCS'] + ['NAME'] + [ 'M_NAME'] + [ 'SSN'] + [ 'AUTHORIZED_NAME2'] + ['M_NAME2'] + ['SSN2'] + \ ['AUTHORIZED_NAME3'] + ['M_NAME3'] + ['SSN3'] + ['AUTHORIZED_NAME4'] + ['M_NAME4'] + ['SSN4'] + [ 'CREDITCARDNUMBER'] + ['CREDITCARDTYPE'] + ['EMPLOYER'] + ['CUSTEMAIL'] + \ ['OCCUPATION'] + ['CITY'] + ['STATE'] + ['ZIP'] + ['COUNTRY'] + ['PREVIOUS_CITY'] + [ 'PREVIOUS_STATE'] + \ ['PREVIOUS_ZIP'] + ['PREVIOUS_COUNTRY'] + ['DOB'] + ['politically_exposed_person'] + [ 'suspicious_activity_report'] + ['CLOSEDACCOUNT'] + [ 'RELATED_ACCT'] + ['RELATED_TYPE'] + ['PARTY_TYPE'] + ['PARTY_RELATION'] + [ 'PARTY_STARTDATE'] + ['PARTY_ENDDATE'] + \ ['LARGE_CASH_EXEMPT'] + ['DEMARKET_FLAG'] + ['DEMARKET_DATE'] + ['PROB_DEFAULT_RISKR'] + [ 'OFFICIAL_LANG_PREF'] + ['CONSENT_SHARING'] + \ ['PREFERRED_CHANNEL'] + ['PRIMARY_BRANCH_NO'] + ['DEPENDANTS_COUNT'] + ['SEG_MODEL_ID'] + [ 'SEG_MODEL_TYPE'] + \ ['SEG_MODEL_NAME'] + ['SEG_MODEL_GROUP'] + ['SEG_M_GRP_DESC'] + ['SEG_MODEL_SCORE'] + [ 'ARMS_MANUFACTURER'] + ['AUCTION'] + \ ['CASHINTENSIVE_BUSINESS'] + ['CASINO_GAMBLING'] + ['CHANNEL_ONBOARDING'] + [ 'CHANNEL_ONGOING_TRANSACTIONS'] + ['CLIENT_NET_WORTH'] + \ ['COMPLEX_HI_VEHICLE'] + ['DEALER_PRECIOUS_METAL'] + ['DIGITAL_PM_OPERATOR'] + [ 'EMBASSY_CONSULATE'] + ['EXCHANGE_CURRENCY'] + \ ['FOREIGN_FINANCIAL_INSTITUTION'] + ['FOREIGN_GOVERNMENT'] + [ 'FOREIGN_NONBANK_FINANCIAL_INSTITUTION'] + ['INTERNET_GAMBLING'] + \ ['MEDICAL_MARIJUANA_DISPENSARY'] + ['MONEY_SERVICE_BUSINESS'] + ['NAICS_CODE'] + [ 'NONREGULATED_FINANCIAL_INSTITUTION'] + \ ['NOT_PROFIT'] + ['PRIVATELY_ATM_OPERATOR'] + ['PRODUCTS'] + ['SALES_USED_VEHICLES'] + [ 'SERVICES'] + \ ['SIC_CODE'] + ['STOCK_MARKET_LISTING'] + ['THIRD_PARTY_PAYMENT_PROCESSOR'] + [ 'TRANSACTING_PROVIDER'] + ['HIGH_NET_WORTH'] + ['HIGH_RISK'] + ['RISK_RATING'] + [ 'USE_CASE_SCENARIO']) # Loop for number of accounts to generate start = 10 acct_list = [] li_ssn_master = list( set([ ''.join(str(random.randint(0, 9)) for _ in xrange(9)) for i in xrange(30) ])) if len(li_ssn_master) < 30: li_ssn_master = list( set([ ''.join(str(random.randint(0, 9)) for _ in xrange(9)) for i in xrange(30) ])) for i in xrange(30): # Initiate High Risk Flags politically_exposed_person = 'No' suspicious_activity_report = 'No' closed_cust_acct = 'No' # High risk customer flag high_risk = 'No' # High Risk Rating hr_rating = '' # Customer that was demarketed by the bank demarket = 'No' dem_date = '' # generate closed acct flag if max((randrange(0, 98, 1) - 96), 0) == 1: closed_cust_acct = 'Yes' # Random number generator for account number # acct = randrange(100000,100000000,1) # Random choice for number of credit cards per account number no_ccs = weighted_options('number_cc') # while acct_list.count(acct) > 0: # acct = randrange(100000,100000000,1) # dt = str(datetime.now()) # acct=str(i)++re.sub('\W','',dt) acct = start + 1 + randrange(1, 10, 1) start = acct name = fake.name() tmp = gen_data.create_name() # Adds account number to account dictionary acct_list.extend([acct]) # Creates a new row and adds data elements ## JS - Main Account Holder SSN as current index in master SSN list ## row = [i]+[acct]+[random.choice(acct_type)]+[No_CCs]+[name]+[tmp[0]]+[(str(randrange(101,1000,1))+str(randrange(10,100,1))+str(randrange(1000,10000,1)))] row = [i] + [acct] + [weighted_options('acct_type')] + [no_ccs] + [ name ] + [tmp[0]] + [li_ssn_master[i]] # Dictionary for names list set to blank names = [] # Dictionary for Social Security Number list set to blank ssn = [] # Generates Name and SSN for Credit Users # Middle Name to reduce name dups mdl = [] for j in range(no_ccs - 1): names.insert(j, fake.name()) tmp2 = gen_data.create_name() mdl.insert(j, tmp2[0]) ## JS - Pull from SSN Master list # ssn.insert(j,(str(randrange(101,1000,1))+str(randrange(10,100,1))+str(randrange(1000,10000,1)))) randInt = randrange(1, len(li_ssn_master), 1) if randInt != i: ssn.insert(j, li_ssn_master[randInt]) else: ssn.insert(j, li_ssn_master[randInt - 1]) # Name and SSN is set to blank if less than 4 customers on an account for k in range(4 - no_ccs): names.insert(no_ccs + k, '') ssn.insert(no_ccs + k, '') mdl.insert(no_ccs, '') # Sets CC_NO to a random credit card number CC_NO = gen_data.create_cc_number() # Extract CC_Number from the tuple returned by CC_Number...Tuple contains CC Number and Type # while credit_cards.count(CC_NO[1][0]) > 0: CC_TRANS = CC_NO[1][0] dt = str(datetime.now()) clean = re.sub('\W', '', dt) printCC = str(CC_TRANS[-4:]) + str(clean[-12:-3]) + str( randrange(1111, 9999, randrange(1, 10, 1))) # str(CC_TRANS[-4:])+str(clean[-12:-2])+str(randrange(1111,9999,randrange(1,10,1))) # Add CC_Number to control list to prevent duplicates # Add data elements to current csv row row.extend([ names[0], mdl[0], ssn[0], names[1], mdl[1], ssn[1], names[2], mdl[2], ssn[2], printCC, CC_NO[0], gen_data.create_company_name() + ' ' + tmp[1], gen_data.create_email(), gen_data.create_job_title() ]) # Creates Current Address zip = random.choice(zips.zip) addr = geo_data.create_city_state_zip[zip] # Creates Previous address zip2 = random.choice(zips.zip) addr2 = geo_data.create_city_state_zip[zip2] # Add additional data elements to current csv row lrg_cash_ex = weighted_options('yes_no') # Condition for SARs and Demarketed Clients if closed_cust_acct == 'Yes': # 1% of closed accounts are demarketed but never had a suspicious_activity_report filed if risk_range() and suspicious_activity_report == 'No': demarket = 'Yes' dem_date = gen_data.create_date(past=True) if risk_range() and demarket == 'No': # 10% of closed accounts have SARs suspicious_activity_report = 'Yes' # 90% of closed accounts with SARs are demarketed if max((randrange(0, 11, 1) - 9), 0) == 0: demarket = 'Yes' dem_date = gen_data.create_date(past=True) if risk_range(): politically_exposed_person = 'Yes' row.extend([ addr[0], addr[1], zip, 'US', addr2[0], addr2[1], zip2, 'US', gen_data.create_birthday(min_age=2, max_age=85), politically_exposed_person, suspicious_activity_report, closed_cust_acct ]) # Start Generating related accounts from account list once 10,000 accounts are generated if i > 10000: rel = int(random.choice(acct_list)) * max( (randrange(0, 10001, 1) - 9999), 0) if rel <> 0: row.append(rel) row.append(weighted_options('related_type')) else: row.append('') row.append('') else: row.append('') row.append('') # Randomly generates account start date party_start = gen_data.create_date(past=True) # Randomly selects consent option for sharing info consent_share = weighted_options('yes_no') # Add additional data elements to current csv row row.extend([ weighted_options('party_type'), weighted_options('party_relation'), party_start, gen_data.create_date(past=True), lrg_cash_ex, demarket, dem_date, randrange(0, 100, 1), weighted_options('official_lang') ]) # Add data element preferred methond of contact for yes to share info...if not then blank to current row if consent_share == 'Yes': row.extend(['Yes', weighted_options('preferred_channel')]) else: row.extend(['No', '']) # DO NOT USE CUST STATUS BELOW - NOT INTEGRATED WITH CLOSED STATUS! Add additional data elements to current csv row row.extend([zip, randrange(0, 5, 1)]) # Generates Segment ID then adds additional Segment data based on the selection to the current csv row Segment_ID = randrange(0, 5, 1) % 5 if Segment_ID == 0: row.extend([ MODEL_ID[0], SEG_MODEL_TYPE[0], SEG_MODEL_NAME[0], SEG_MODEL_GROUP[0], SEG_MODEL_DESCRIPTION[0], SEG_MODEL_SCORE[0] ]) if Segment_ID == 1: row.extend([ MODEL_ID[1], SEG_MODEL_TYPE[1], SEG_MODEL_NAME[1], SEG_MODEL_GROUP[1], SEG_MODEL_DESCRIPTION[1], SEG_MODEL_SCORE[1] ]) if Segment_ID == 2: row.extend([ MODEL_ID[2], SEG_MODEL_TYPE[2], SEG_MODEL_NAME[2], SEG_MODEL_GROUP[2], SEG_MODEL_DESCRIPTION[2], SEG_MODEL_SCORE[2] ]) if Segment_ID == 3: row.extend([ MODEL_ID[3], SEG_MODEL_TYPE[3], SEG_MODEL_NAME[3], SEG_MODEL_GROUP[3], SEG_MODEL_DESCRIPTION[3], SEG_MODEL_SCORE[3] ]) if Segment_ID == 4: row.extend([ MODEL_ID[4], SEG_MODEL_TYPE[4], SEG_MODEL_NAME[4], SEG_MODEL_GROUP[4], SEG_MODEL_DESCRIPTION[4], SEG_MODEL_SCORE[4] ]) # Add additional data elements to current csv row arms_manufacturer = weighted_options('arms_manufacturers') auction = weighted_options('auction') cash_intensive_business = weighted_options( 'cash_intensive_business') casino_gambling = weighted_options('casino_gambling') chan_ob = weighted_options('channel_onboarding') chan_txn = weighted_options('channel_ongoing_txn') row.extend([ arms_manufacturer, auction, cash_intensive_business, casino_gambling, chan_ob, chan_txn ]) # Randomly select whether customer has a High Net Worth high_net_worth_flag = weighted_options('high_net_worth') # Randomly Generates customer net worth based on the above flag if high_net_worth_flag == 'Yes': row.append( max( max((randrange(0, 101, 1) - 99), 0) * randrange(1000000, 25000000, 1), randrange(1000000, 5000000, 1))) else: flag = weighted_options('low_net') if flag == 0: row.append(randrange(-250000, 600000, 1)) else: if flag == 1: row.append(randrange(149000, 151000, 1)) else: row.append(randrange(40000, 50000, 1)) # Add data elements to current csv row hr1 = weighted_options('complex_hi_vehicle') hr2 = weighted_options('dealer_precious_metal') hr3 = weighted_options('digital_pm_operator') hr4 = weighted_options(EMBASSY_CONSULATE) hr5 = weighted_options(EXCHANGE_CURRENCY) hr6 = weighted_options(FOREIGN_FINANCIAL_INSTITUTION) hr7 = weighted_options(FOREIGN_GOVT) hr8 = weighted_options(FOREIGN_NONBANK_FINANCIAL_INSTITUTION) hr9 = weighted_options(INTERNET_GAMBLING) hr10 = weighted_options(MEDICAL_MARIJUANA_DISPENSARY) hr11 = weighted_options(MONEY_SERVICE_BUSINESS) hr12 = random.choice(NAICS.NAICS_Code) hr13 = weighted_options(NONREGULATED_FINANCIAL_INSTITUTION) hr14 = weighted_options(NOT_PROFIT) # hr15=random.choice(occupation) hr16 = weighted_options(PRIVATE_ATM_OPERATOR) hr17 = weighted_options('products') hr18 = weighted_options(SALES_USED_VEHICLES) hr19 = weighted_options('services') hr20 = weighted_options('sic_code') hr21 = weighted_options('stock_market_listing') hr22 = weighted_options(THIRD_PARTY_PAYMENT_PROCESSOR) hr23 = weighted_options(TRANSACTING_PROVIDER) if 'Yes' in (politically_exposed_person, suspicious_activity_report, lrg_cash_ex, demarket, arms_manufacturer, auction, cash_intensive_business, casino_gambling, hr1, hr2, hr3, hr4, hr5, hr6, hr7, hr8, hr9, hr10, hr11, hr13, hr14, hr16, hr17, hr18, hr22, hr23, high_net_worth_flag): high_risk = 'Yes' hr_rating = weighted_options('refrating') if suspicious_activity_report == 'No' and high_risk == 'No': if risk_range(): high_risk = 'Yes' hr_rating = weighted_options('refrating') if politically_exposed_person == 'No' and high_risk == 'No': if risk_range(): high_risk = 'Yes' hr_rating = weighted_options('refrating') if high_risk == 'No': if risk_range(): high_risk = 'Yes' hr_rating = weighted_options('refrating') row.extend([ hr1, hr2, hr3, hr4, hr5, hr6, hr7, hr8, hr9, hr10, hr11, hr12, hr13, hr14, hr16, hr17, hr18, hr19, hr20, hr21, hr22, hr23, high_net_worth_flag, high_risk, hr_rating, random.choice(USE_CASE) ]) # End the current row writer.writerow(row)
def createCusts(N): #List for client whose net worth is over $500K HighNetWorth = ['Yes'] + ['No'] * 30 #List for type of account Related_Type = ['Primary', 'Secondary', 'Joint'] #List for how the account was opened Party_Type = ['Person', 'Non-Person'] #List for a BMO customer Party_Relation = ['Customer', 'Non-Customer'] #List for random Yes/No Flag Yes_No = ['Yes'] + ['No'] * 12 #List for random Yes/No Consent Yes_No_Consent = ['Yes'] + ['No'] * 4 #List for equal Yes/No Flag Yes_No_50 = ['Yes', 'No'] #List for official language Official_Lang = ['English'] * 3 + ['French'] #List for method of communication Preffered_Channel = ['Direct Mail', 'Telemarketing', 'Email', 'SMS'] #List for status of customer #Customer_Status = ['Prospect','Inactive Customer','Past Customer'] + ['Active Customer'] * 56 #List for LOB Segment Type Seg_Model_Type = [ 'LOB Specific', 'Profitability', 'Geographical', 'Behavioral', 'Risk Tolerance' ] #List for Model ID Model_ID = ['01', '02', '03', '04', '05'] #List for Model Name Seg_Model_Name = [ 'IRRI', 'CRS Risk Score', 'Geo Risk', 'Financial Behavior Risk', 'CM Risk' ] #List for Model Score Seg_Model_Score = ['200', '300', '400', '100', '500'] #List for Model Group Seg_Model_Group = ['Group 1'] * 2 + ['Group 2', 'Group 3', 'Group 4'] #List for Model Description Seg_Model_Description = [ 'High Risk Tier', 'Mid Risk Tier', 'Low Risk Tier', 'Vertical Risk', 'Geographical Risk' ] #List for random Arms Dealer flag Arms_Manufacturer = ['Yes'] + ['No'] * 2 + [''] * 392 #List for random auction flag Auction = ['Yes'] + ['No'] * 2 + [''] * 392 #List for random Cash Intensive flag CashIntensive_Business = ['Yes'] + ['No'] * 2 + [''] * 392 #List for random Casino?Gaming flag Casino_Gambling = ['Yes'] + ['No'] * 2 + [''] * 392 #List for random Client Onboarding flag Channel_Onboarding = [ 'E-mail', 'In Person', 'In person - In Branch/Bank Office', 'In person - Offsite/Client Location', 'Mail', 'Online', 'Phone', 'Request for Proposal (RFP)' ] + ['Not Applicable'] * 10 #List for random Transaction flag Channel_Ongoing_Transactions = [ 'ATM', 'E-mail', 'Fax', 'Mail', 'Not Applicable', 'OTC Communication System', 'Phone' ] + ['Online'] * 4 + ['In Person'] * 31 #List for random HI_Vehicle flag Complex_HI_Vehicle = ['Yes'] + ['No'] * 2 + [''] * 392 #List for random Metals flag Dealer_Precious_Metal = ['Yes'] + ['No'] * 2 + [''] * 392 #List for random Arms Dealer flag Digital_PM_Operator = ['Yes'] + ['No'] * 2 + [''] * 392 #List for random Embassy flag Embassy_Consulate = ['Yes'] + ['No'] * 2 + [''] * 392 #Sets variable to Embassy flag Exchange_Currency = Embassy_Consulate #Sets variable to Embassy flag Foreign_Financial_Institution = Embassy_Consulate #Sets variable to Embassy flag Foreign_Government = Embassy_Consulate #Sets variable to Embassy flag Foreign_NonBank_Financial_Institution = Embassy_Consulate #Sets variable to Embassy flag Internet_Gambling = Embassy_Consulate #Sets variable to Embassy flag Medical_Marijuana_Dispensary = Embassy_Consulate #Sets variable to Embassy flag Money_Service_Business = Embassy_Consulate #Sets variable to Embassy flag NonRegulated_Financial_Institution = Embassy_Consulate #Sets variable to Embassy flag Not_Profit = Embassy_Consulate #List for random occupation Occupation=['11-1011 Chief Executives',\ '11-3011 Administrative Services Managers',\ '11-3031 Financial Managers',\ '11-3061 Purchasing Managers',\ '13-1011 Agents and Business Managers of Artists Performers and Athletes',\ '13-1031 Claims Adjusters Examiners, and Investigators',\ '13-1199 Business Operations Specialists, All Other',\ '13-2099 Financial Specialists All Other',\ '17-1011 Architects Except Landscape and Naval',\ '23-1011 Lawyers',\ '23-1023 Judges, Magistrate Judges and Magistrates',\ '25-2012 Kindergarten Teachers Except Special Education',\ '25-2021 Elementary School Teachers Except Special Education',\ '29-1041 Optometrists',\ '29-2054 Respiratory Therapy Technicians',\ '33-2011 Firefighters',\ '37-1012 First-Line Supervisors of Landscaping Lawn Service and Groundskeeping Workers',\ '39-1011 Gaming Supervisors',\ '39-2011 Animal Trainers',\ '41-1011 First-Line Supervisors of Retail Sales Workers',\ '41-1012 First-Line Supervisors of Non-Retail Sales Workers',\ '41-2011 Cashiers',\ '41-2031 Retail Salespersons',\ '43-3021 Billing and Posting Clerks',\ '45-1011 First-Line Supervisors of Farming, Fishing, and Forestry Workers',\ '49-2011 Computer Automated Teller and Office Machine Repairers',\ '53-3021 Bus Drivers Transit and Intercity',\ '53-4031 Railroad Conductors and Yardmasters',\ '55-1011 Air Crew Officers',\ '55-1012 Aircraft Launch and Recovery Officers',\ '55-1013 Armored Assault Vehicle Officers',\ ] #Sets variable to Embassy flag Privately_ATM_Operator = Embassy_Consulate #List for random products Products=['Certificate of Deposit',\ 'Checking Account',\ 'Credit Card',\ 'Custodial and Investment Agency - Institutional',\ 'Custodial and Investment Agency - Personal',\ 'Custodial/Trust Outsourcing Services (BTOS)',\ 'Custody Accounts (PTIM)',\ 'Custody Accounts (RSTC)',\ 'DTF (BHFA)',\ 'Investment Agency - Personal',\ 'Investment Management Account (PTIM)',\ 'Lease',\ 'Loan / Letter of Credit',\ 'Money Market',\ 'Mortgage / Bond / Debentures',\ 'None',\ 'Savings Account',\ 'Trust Administration - Irrevocable and Revocable (PTIM)',\ 'Trust Administration - Irrevocable and Revocable Trusts (BDTC)',\ ] + ['Nondeposit Investment Products'] * 14 + ['Investment Agency - Institutional'] * 5 #Sets variable to Embassy flag Sales_Used_Vehicles = Embassy_Consulate #Dictionary for random Services Services=['Benefit Payment Services',\ 'Domestic Wires and Direct Deposit / ACH',\ 'Family Office Services (FOS)',\ 'Fiduciary Services',\ 'International Wires and IAT',\ 'Investment Advisory Services (IAS)',\ 'Investment Services',\ 'None',\ 'Online / Mobile Banking',\ 'Payroll',\ 'Short Term Cash Management',\ 'Trust Services',\ 'Trustee Services',\ 'Vault Cash Services',\ ] + ['Financial Planning'] * 6 + ['Retirement Plans'] * 19 #Dictionary for random SIC_Code SIC_Code=['6021 National Commercial Banks',\ '6211 Security Brokers Dealers and Flotation Companies',\ '6282 Investment Advice',\ '6311 Life Insurance',\ '6733 Trusts Except Educational Religious and Charitable',\ '8999 Services NEC',\ ] + ['6722 Management Investment Offices Open-End'] * 12 #Dictionary for random Market Listing Stock_Market_Listing=['Australian Stock Exchange',\ 'Brussels Stock Exchange',\ 'Montreal Stock Exchange',\ 'Tiers 1 and 2 of the TSX Venture Exchange (also known as Tiers 1 and 2 of the Canadian Venture Exchange)',\ 'Toronto Stock Exchange',\ ] + ['Not Found'] * 30 #Sets variable to Embassy flag Third_Party_Payment_Processor = Embassy_Consulate #Sets variable to Embassy flag Transacting_Provider = Embassy_Consulate #Dictionary for random Low Net Worth LowNet = [1, 2] + [0] * 5 #Dictionary for Consumer vs Business Acct_Type = ['B'] + ['C'] * 5 #Dictionary for random number of credits cards per account Number_CC = [1] * 7 + [2] * 11 + [3] * 3 + [4] #Dictionary for Account list set to blank acct_list = [] #Dictionary for CreditCard list set to blank CC_list = [] #Dictionary for random Wolfsberg scenario Use_Case = [1, 4, 7, 10, 13, 16, 19, 22, 25, 28, 31, 34, 39] * 4 + [ 2, 5, 8, 11, 14, 17, 20, 23, 26, 29, 32, 35, 38 ] * 7 + [3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 36 ] * 65 + [37] * 73 + [40, 41] * 2 refrating = [ '1', '1', '1', '2', '3', '4', '2', '4', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5' ] fake = Faker() global liSSNMaster start = 10786147 acct_list = [] liCSV = [] for i in xrange(N): #Initiate High Risk Flags #Politically Exposed Person PEP = 'No' #Customer with a Suspicous Activity Report SAR = 'No' #Customer with a closed account Clsd = 'No' #High risk customer flag high_risk = 'No' #High Risk Rating hr_rating = '' #Customer that was demarketed by the bank demarket = 'No' dem_date = '' #generate closed acct flag if (max((randrange(0, 98, 1) - 96), 0) == 1): Clsd = 'Yes' #Random choice for number of credit card users per account number No_CCs = random.choice(Number_CC) #Generate account number acct = start + 1 + randrange(1, 10, 1) start = acct #Randomly generate customer name + middle name in tmp name = fake.name() tmp = gen_data.create_name() #Adds account number to account dictionary acct_list.extend([acct]) #Creates a new row and adds data elements row = [i] + [acct] + [random.choice(Acct_Type)] + [No_CCs] + [name] + [ tmp[0] ] + [liSSNMaster[i]] #Dictionary for names list set to blank names = [] #Dictionary for Social Security Number list set to blank ssn = [] #Middle Name to reduce name dups mdl = [] for j in range(No_CCs - 1): names.insert(j, fake.name()) tmp2 = gen_data.create_name() mdl.insert(j, tmp2[0]) ##Pull from SSN Master list randInt = randrange(1, len(liSSNMaster), 1) if randInt != i: ssn.insert(j, liSSNMaster[randInt]) else: ssn.insert(j, liSSNMaster[randInt - 1]) #Name and SSN is set to blank if less than 4 customers on an account for k in range(4 - No_CCs): names.insert(No_CCs + k, '') ssn.insert(No_CCs + k, '') mdl.insert(No_CCs, '') #Sets CC_NO to a random credit card number CC_NO = gen_data.cc_number() #Extract CC_Number from the tuple returned by CC_Number then scramble to ensure uniqueness...Tuple contains CC Number and Type CC_TRANS = CC_NO[1][0] dt = str(datetime.now()) clean = re.sub('\W', '', dt) printCC = str(CC_TRANS[-4:]) + str(clean[-12:-3]) + str( randrange(1111, 9999, randrange(1, 10, 1))) #Add data elements to current csv row row.extend([names[0],mdl[0],ssn[0],names[1],mdl[1],ssn[1],names[2],mdl[2],ssn[2],printCC,CC_NO[0],gen_data.create_company_name()+' '+tmp[1],\ gen_data.create_email(),gen_data.create_job_title()]) #Create Current Address zip = random.choice(zips.zip) addr = geo_data.create_city_state_zip[zip] #Create Previous address zip2 = random.choice(zips.zip) addr2 = geo_data.create_city_state_zip[zip2] #Add additional data elements to current csv row lrg_cash_ex = random.choice(Yes_No) #Condition for SARs and Demarketed Clients if (Clsd == 'Yes'): #1% of closed accounts are demarketed but never had a SAR filed if (max((randrange(0, 101, 1) - 99), 0) == 1 and SAR == 'No'): demarket = 'Yes' dem_date = gen_data.create_date(past=True) if (max((randrange(0, 11, 1) - 9), 0) == 1 and demarket == 'No'): #10% of closed accounts have SARs SAR = 'Yes' #90% of closed accounts with SARs are demarketed if (max((randrange(0, 11, 1) - 9), 0) == 0): demarket = 'Yes' dem_date = gen_data.create_date(past=True) if (max((randrange(0, 101, 1) - 99), 0) == 1): PEP = 'Yes' row.extend([ addr[0], addr[1], zip, 'US', addr2[0], addr2[1], zip2, 'US', gen_data.create_birthday(min_age=2, max_age=85), PEP, SAR, Clsd ]) #Start Generating related accounts from account list once 10,000 accounts are generated - to avoid duplicating accounts in the beginning if i > 10000: rel = int(random.choice(acct_list)) * max( (randrange(0, 10001, 1) - 9999), 0) if rel <> 0: row.append(rel) row.append(random.choice(Related_Type)) else: row.append('') row.append('') else: row.append('') row.append('') #Randomly generates account start date party_start = gen_data.create_date(past=True) #Randomly selects consent option for sharing info Consent_Share = random.choice(Yes_No_Consent) #Add additional data elements to current csv row row.extend([random.choice(Party_Type),random.choice(Party_Relation),party_start,gen_data.create_date(past=True),\ lrg_cash_ex,demarket,dem_date,randrange(0,100,1),random.choice(Official_Lang)]) #Add data element preferred methond of contact for yes to share info...if not then blank to current row if Consent_Share == 'Yes': row.extend(['Yes', random.choice(Preffered_Channel)]) else: row.extend(['No', '']) row.extend([zip, randrange(0, 5, 1)]) #Generate Segment ID then add additional Segment data based on the selection to the current csv row Segment_ID = randrange(0, 5, 1) % 5 if Segment_ID == 0: row.extend([ Model_ID[0], Seg_Model_Type[0], Seg_Model_Name[0], Seg_Model_Group[0], Seg_Model_Description[0], Seg_Model_Score[0] ]) if Segment_ID == 1: row.extend([ Model_ID[1], Seg_Model_Type[1], Seg_Model_Name[1], Seg_Model_Group[1], Seg_Model_Description[1], Seg_Model_Score[1] ]) if Segment_ID == 2: row.extend([ Model_ID[2], Seg_Model_Type[2], Seg_Model_Name[2], Seg_Model_Group[2], Seg_Model_Description[2], Seg_Model_Score[2] ]) if Segment_ID == 3: row.extend([ Model_ID[3], Seg_Model_Type[3], Seg_Model_Name[3], Seg_Model_Group[3], Seg_Model_Description[3], Seg_Model_Score[3] ]) if Segment_ID == 4: row.extend([ Model_ID[4], Seg_Model_Type[4], Seg_Model_Name[4], Seg_Model_Group[4], Seg_Model_Description[4], Seg_Model_Score[4] ]) #Add additional data elements to current csv row hr0 = random.choice(Arms_Manufacturer) hr01 = random.choice(Auction) hr02 = random.choice(CashIntensive_Business) hr03 = random.choice(Casino_Gambling) hr04 = random.choice(Channel_Onboarding) hr05 = random.choice(Channel_Ongoing_Transactions) row.extend([hr0, hr01, hr02, hr03, hr04, hr05]) #Randomly select whether customer has a High Net Worth HighNetWorthFlag = random.choice(HighNetWorth) #Randomly Generate customer net worth based on the above flag if HighNetWorthFlag == 'Yes': row.append( max( max((randrange(0, 101, 1) - 99), 0) * randrange(1000000, 25000000, 1), randrange(1000000, 5000000, 1))) else: flag = random.choice(LowNet) if flag == 0: row.append(randrange(-250000, 600000, 1)) else: if flag == 1: row.append(randrange(149000, 151000, 1)) else: row.append(randrange(40000, 50000, 1)) #Add data elements to current csv row hr1 = random.choice(Complex_HI_Vehicle) hr2 = random.choice(Dealer_Precious_Metal) hr3 = random.choice(Digital_PM_Operator) hr4 = random.choice(Embassy_Consulate) hr5 = random.choice(Exchange_Currency) hr6 = random.choice(Foreign_Financial_Institution) hr7 = random.choice(Foreign_Government) hr8 = random.choice(Foreign_NonBank_Financial_Institution) hr9 = random.choice(Internet_Gambling) hr10 = random.choice(Medical_Marijuana_Dispensary) hr11 = random.choice(Money_Service_Business) hr12 = random.choice(NAICS.NAICS_Code) hr13 = random.choice(NonRegulated_Financial_Institution) hr14 = random.choice(Not_Profit) #hr15=random.choice(Occupation) - added before through gen_data hr16 = random.choice(Privately_ATM_Operator) hr17 = random.choice(Products) hr18 = random.choice(Sales_Used_Vehicles) hr19 = random.choice(Services) hr20 = random.choice(SIC_Code) hr21 = random.choice(Stock_Market_Listing) hr22 = random.choice(Third_Party_Payment_Processor) hr23 = random.choice(Transacting_Provider) if (PEP == 'Yes' or SAR == 'Yes' or lrg_cash_ex == 'Yes' or demarket == 'Yes' or hr0 == 'Yes' or hr01 == 'Yes' or hr02 == 'Yes' or hr03 == 'Yes' or hr1 == 'Yes' or hr2 == 'Yes' or hr3 == 'Yes' or hr4 == 'Yes' or hr5 == 'Yes' or hr6 == 'Yes' or hr7 == 'Yes' or hr8 == 'Yes' or hr9 == 'Yes' or hr10 == 'Yes' or hr11 == 'Yes' or hr13 == 'Yes' or hr14 == 'Yes' or hr16 == 'Yes' or hr17 == 'Yes' or hr18 == 'Yes' or hr22 == 'Yes' or hr23 == 'Yes' or HighNetWorthFlag == 'Yes'): high_risk = 'Yes' hr_rating = random.choice(refrating) if (SAR == 'No' and high_risk == 'No'): if (max((randrange(0, 101, 1) - 99), 0) == 1): high_risk = 'Yes' hr_rating = random.choice(refrating) if (PEP == 'No' and high_risk == 'No'): if (max((randrange(0, 101, 1) - 99), 0) == 1): high_risk = 'Yes' hr_rating = random.choice(refrating) if (high_risk == 'No'): if (max((randrange(0, 101, 1) - 99), 0) == 1): high_risk = 'Yes' hr_rating = random.choice(refrating) row.extend([ hr1, hr2, hr3, hr4, hr5, hr6, hr7, hr8, hr9, hr10, hr11, hr12, hr13, hr14, hr16, hr17, hr18, hr19, hr20, hr21, hr22, hr23, HighNetWorthFlag, high_risk, hr_rating, random.choice(Use_Case) ]) liCSV.append(row) return liCSV
from random import random from random import shuffle from faker import Faker from barnum import gen_data import csv fake = Faker() with open('large.csv','w') as f1: writer=csv.writer(f1, delimiter=',',lineterminator='\n',) writer.writerow(['rownum'] +['dunno'] + ['CC'] + ['Employer'] + ['Custemail'] + ['name'] \ + ['occupation'] + ['address_street'] + ['DOB']+['previous address_city_state_zip']+ ['altcustomer_name'] \ + ['altcustomer_occupation'] + ['altcustomer_dob'] + ['ssn'] + ['phone'] + \ ['AccountID'] + ['PepFlag'] + ['altcustomerssn'] + ['demarketed_customer_flag'] + \ ['SAR_flag'] + ['nolonger_a_customer'] + ['closed_account'] +['High_risk_flag'] +['Risk_rating']) for i in range(50000000): row = [i] + [10] + [gen_data.cc_number()]+[gen_data.create_company_name()] + \ [gen_data.create_email()]+[gen_data.create_name()] +[gen_data.create_job_title()] + \ [gen_data.create_city_state_zip()] + [gen_data.create_birthday(min_age=2, max_age=85)] + \ [gen_data.create_city_state_zip()] + [fake.name()] + [gen_data.create_job_title()] + \ [gen_data.create_birthday(min_age=2, max_age=85)] +\ [(randrange(101,1000,1),randrange(10,100,1),randrange(1000,10000,1))] + \ [(randrange(101,1000,1),randrange(101,999,1),randrange(1000,10000,1))] + \ [randrange(100000,100000000,1)] + \ [max((randrange(0,101,1)-99),0)] + \ [(randrange(101,1000,1),randrange(10,100,1),randrange(1000,10000,1))] + \ [max((randrange(0,101,1)-99),0)] + [max((randrange(0,101,1)-99),0)] + \ [max((randrange(0,101,1)-99),0)] + [max((randrange(0,101,1)-90),0)] + \ [max((randrange(0,101,1)-99),0)] + [max((randrange(0,101,1)-99),0)] writer.writerow(row)
def gen_cust(liSSNMaster, acct_list, i): fake = Faker() #Initiate High Risk Flags #Politically Exposed Person PEP = 'No' #Customer with a Suspicous Activity Report SAR = 'No' #Customer with a closed account #generate closed acct flag Clsd = choice(Clsd_flag) #High risk customer flag high_risk = 'No' #High Risk Rating hr_rating = '' #Customer that was demarketed by the bank demarket = 'No' dem_date = '' #Random choice for number of credit cards per account number No_CCs = choice(Number_CC) acct = start + 1 + randrange(1, 10, 1) start = acct #Randomly generates customer name name = fake.name() tmp = gen_data.create_name() #Adds account number to account dictionary acct_list.extend([acct]) #Creates a new row and adds data elements ## JS - Main Account Holder SSN as current index in master SSN list row = [i] + [acct] + [choice(Acct_Type) ] + [No_CCs] + [name] + [tmp[0]] + [liSSNMaster[i]] #Dictionary for names list set to blank names = [] #Dictionary for Social Security Number list set to blank ssn = [] #Generates Name and SSN for Credit Users #Middle Name to reduce name dups mdl = [] for j in range(No_CCs - 1): names.insert(j, fake.name()) tmp2 = gen_data.create_name() mdl.insert(j, tmp2[0]) ## JS - Pull from SSN Master list randInt = randrange(1, len(liSSNMaster), 1) if randInt != i: ssn.insert(j, liSSNMaster[randInt]) else: ssn.insert(j, liSSNMaster[randInt - 1]) #Name and SSN is set to blank if less than 4 customers on an account for k in range(4 - No_CCs): names.insert(No_CCs + k, '') ssn.insert(No_CCs + k, '') mdl.insert(No_CCs, '') #Sets CC_NO to a random credit card number CC_NO = gen_data.create_cc_number() CC_TRANS = CC_NO[1][0] dt = str(datetime.now()) clean = re.sub('\W', '', dt) printCC = str(CC_TRANS[-4:]) + str(clean[-12:-3]) + str( randrange(1111, 9999, randrange(1, 10, 1))) #Add data elements to current csv row row.extend([names[0],mdl[0],ssn[0],names[1],mdl[1],ssn[1],names[2],mdl[2],ssn[2],printCC,CC_NO[0],gen_data.create_company_name()+' '+tmp[1],\ gen_data.create_email(),gen_data.create_job_title()]) #Creates Current Address zip = choice(zips.zip) addr = geo_data.create_city_state_zip[zip] #Creates Previous address zip2 = choice(zips.zip) addr2 = geo_data.create_city_state_zip[zip2] #Add additional data elements to current csv row lrg_cash_ex = choice(Yes_No) #Condition for SARs and Demarketed Clients if (Clsd == 'Yes'): #1% of closed accounts are demarketed but never had a SAR filed if (max((randrange(0, 101, 1) - 99), 0) == 1 and SAR == 'No'): demarket = 'Yes' dem_date = gen_data.create_date(past=True) if (max((randrange(0, 11, 1) - 9), 0) == 1 and demarket == 'No'): #10% of closed accounts have SARs SAR = 'Yes' #90% of closed accounts with SARs are demarketed if (max((randrange(0, 11, 1) - 9), 0) == 0): demarket = 'Yes' dem_date = gen_data.create_date(past=True) #1% of accounts are PEP if (max((randrange(0, 101, 1) - 99), 0) == 1): PEP = 'Yes' row.extend([ addr[0], addr[1], zip, 'US', addr2[0], addr2[1], zip2, 'US', gen_data.create_birthday(min_age=2, max_age=85), PEP, SAR, Clsd ]) #Start Generating related accounts from account list once 10,000 accounts are generated if i > 10000: rel = int(choice(acct_list)) * max((randrange(0, 10001, 1) - 9999), 0) if rel <> 0: row.append(rel) row.append(choice(Related_Type)) else: row.append('') row.append('') else: row.append('') row.append('') #Randomly generates account start date party_start = gen_data.create_date(past=True) #Randomly selects consent option for sharing info Consent_Share = choice(Yes_No_Consent) #Add additional data elements to current csv row row.extend([choice(Party_Type),choice(Party_Relation),party_start,gen_data.create_date(past=True),\ lrg_cash_ex,demarket,dem_date,randrange(0,100,1),choice(Official_Lang)]) #Add data element preferred methond of contact for yes to share info...if not then blank to current row if Consent_Share == 'Yes': row.extend(['Yes', choice(Preffered_Channel)]) else: row.extend(['No', '']) #DO NOT USE CUST STATUS BELOW - NOT INTEGRATED WITH CLOSED STATUS! Add additional data elements to current csv row row.extend([zip, randrange(0, 5, 1)]) #Generates Segment ID then adds additional Segment data based on the selection to the current csv row Segment_ID = randrange(0, 5, 1) if Segment_ID == 0: row.extend( ['01', 'LOB Specific', 'IRRI', 'Group 1', 'High Risk Tier', '200']) if Segment_ID == 1: row.extend([ '02', 'Profitability', 'CRS Risk Score', 'Group 1', 'Mid Risk Tier', '300' ]) if Segment_ID == 2: row.extend([ '03', 'Geographical', 'Geo Risk', 'Group 2', 'Low Risk Tier', '400' ]) if Segment_ID == 3: row.extend([ '04', 'Behavioral', 'Financial Behavior Risk', 'Group 3', 'Vertical Risk', '100' ]) if Segment_ID == 4: row.extend([ '05', 'Risk Tolerance', 'CM Risk', 'Group 4', 'Geographical Risk', '500' ]) #Arms Manufacturer random choice hr0 = choice(Yes_No_Cust_Flag) #Auction random choice hr01 = choice(Yes_No_Cust_Flag) #Cash Intensive Business random choice hr02 = choice(Yes_No_Cust_Flag) #Casino Gambling random choice hr03 = choice(Yes_No_Cust_Flag) #Channel Onboarding random choice hr04 = choice(Channel_Onboarding) #Channel Ongoing Transactions random choice hr05 = choice(Channel_Ongoing_Transactions) #Add additional data elements to current csv row row.extend([hr0, hr01, hr02, hr03, hr04, hr05]) #Randomly select whther customer has a High Net Worth HighNetWorthFlag = choice(HighNetWorth) #Randomly Generates customer net worth based on the above flag if HighNetWorthFlag == 'Yes': row.append( max( max((randrange(0, 101, 1) - 99), 0) * randrange(5000000, 25000000, 1), randrange(1000000, 5000000, 1))) else: flag = choice(LowNet) if flag == 0: row.append(randrange(-250000, 600000, 1)) else: if flag == 1: row.append(randrange(149000, 151000, 1)) else: row.append(randrange(40000, 50000, 1)) #Add data elements to current csv row #Complex_HI_Vehicle random choice hr1 = choice(Yes_No_Cust_Flag) #Dealer_Precious_Metal random choice hr2 = choice(Yes_No_Cust_Flag) #Digital_PM_Operator random choice hr3 = choice(Yes_No_Cust_Flag) #Embassy_Consulate random choice hr4 = choice(Yes_No_Cust_Flag) #Exchange_Currency random choice hr5 = choice(Yes_No_Cust_Flag) #Foreign_Financial_Institution random choice hr6 = choice(Yes_No_Cust_Flag) #Foreign_Government random choice hr7 = choice(Yes_No_Cust_Flag) #Foreign_NonBank_Financial_Institution random choice hr8 = choice(Yes_No_Cust_Flag) #Internet_Gambling random choice hr9 = choice(Yes_No_Cust_Flag) #Medical_Marijuana_Dispensary random choice hr10 = choice(Yes_No_Cust_Flag) #Money_Service_Business random choice hr11 = choice(Yes_No_Cust_Flag) hr12 = choice(NAICS.NAICS_Code) #NonRegulated_Financial_Institution random choice hr13 = choice(Yes_No_Cust_Flag) #Not_Profit random choice hr14 = choice(Yes_No_Cust_Flag) #Occupation random choice #hr15=choice(Occupation) #Privately_ATM_Operator random choice hr16 = choice(Yes_No_Cust_Flag) #Products random choice hr17 = choice(Products) #Sales_Used_Vehicles random choice hr18 = choice(Yes_No_Cust_Flag) #Services random choice hr19 = choice(Services) #SIC_Code random choice hr20 = choice(SIC_Code) #Stock_Market_Listing random choice hr21 = choice(Stock_Market_Listing) #Third_Party_Payment_Processor random choice hr22 = choice(Yes_No_Cust_Flag) #Transacting_Provider random choice hr23 = choice(Yes_No_Cust_Flag) refrating = ['1'] * 3 + ['2', '4'] * 2 + ['3'] + ['5'] * 12 if (PEP == 'Yes' or SAR == 'Yes' or lrg_cash_ex == 'Yes' or demarket == 'Yes' or hr0 == 'Yes' or hr01 == 'Yes' or hr02 == 'Yes' or hr03 == 'Yes' or hr1 == 'Yes' or hr2 == 'Yes' or hr3 == 'Yes' or hr4 == 'Yes' or hr5 == 'Yes' or hr6 == 'Yes' or hr7 == 'Yes' or hr8 == 'Yes' or hr9 == 'Yes' or hr10 == 'Yes' or hr11 == 'Yes' or hr13 == 'Yes' or hr14 == 'Yes' or hr16 == 'Yes' or hr17 == 'Yes' or hr18 == 'Yes' or hr22 == 'Yes' or hr23 == 'Yes' or HighNetWorthFlag == 'Yes'): high_risk = 'Yes' hr_rating = choice(refrating) if (high_risk == 'No'): if (max((randrange(0, 101, 1) - 99), 0) == 1): high_risk = 'Yes' hr_rating = choice(refrating) row.extend([ hr1, hr2, hr3, hr4, hr5, hr6, hr7, hr8, hr9, hr10, hr11, hr12, hr13, hr14, hr16, hr17, hr18, hr19, hr20, hr21, hr22, hr23, HighNetWorthFlag, high_risk, hr_rating, choice(Use_Case) ]) #End the current row return row
if (max((randrange(0,98,1)-96),0)==1): Clsd='Yes' #Random number generator for account number #acct = randrange(100000,100000000,1) #Random choice for number of credit cards per account number No_CCs = random.choice(Number_CC) #while acct_list.count(acct) > 0: # acct = randrange(100000,100000000,1) #dt = str(datetime.now()) #acct=str(i)++re.sub('\W','',dt) acct=start+1+randrange(1,10,1) start=acct #Randomly generates customer name name = fake.name() tmp=gen_data.create_name() #Adds account number to account dictionary acct_list.extend([acct]) #Creates a new row and adds data elements row = [i]+[acct]+[random.choice(Acct_Type)]+[No_CCs]+[name]+[tmp[0]]+[(str(randrange(101,1000,1))+str(randrange(10,100,1))+str(randrange(1000,10000,1)))] #Dictionary for names list set to blank names=[] #Dictionary for Social Security Number list set to blank ssn=[] #Generates Name and SSN for Credit Users #Middle Name to reduce name dups mdl=[] for j in range(No_CCs-1): names.insert(j,fake.name()) tmp2=gen_data.create_name() mdl.insert(j,tmp2[0])
def __init__(self, i, acct, liSSNMaster, acct_list): self.ROWNUM = i self.ACCOUNTID = acct self.SSN = liSSNMaster[i] self.ACCT_TYPE = choice(Acct_Type) self.NUM_CCS = choice(Number_CC) self.NAME = fake.name() self.CUSTEMAIL = gen_data.create_email() self.OCCUPATION = gen_data.create_job_title() self.COUNTRY = 'US' self.PREVIOUS_COUNTRY = 'US' self.DOB = gen_data.create_birthday(min_age=2, max_age=85) self.PARTY_ENDDATE = gen_data.create_date(past=True) self.CONSENT_SHARING = choice(Yes_No_Consent) self.LARGE_CASH_EXEMPT = choice(Yes_No) self.PARTY_TYPE = choice(Party_Type) self.PARTY_RELATION = choice(Party_Relation) self.PROB_DEFAULT_RISKR = randrange(0, 100, 1) self.OFFICIAL_LANG_PREF = choice(Official_Lang) self.DEPENDANTS_COUNT = randrange(0, 5, 1) self.USE_CASE_SCENARIO = choice(Use_Case) self.CLOSEDACCOUNT = choice(Clsd_flag) self.HIGH_NET_WORTH = choice(HighNetWorth) self.PARTY_STARTDATE = gen_data.create_date(past=True) self.ARMS_MANUFACTURER = choice(Yes_No_Cust_Flag) self.AUCTION = choice(Yes_No_Cust_Flag) self.CASHINTENSIVE_BUSINESS = choice(Yes_No_Cust_Flag) self.CASINO_GAMBLING = choice(Yes_No_Cust_Flag) self.CHANNEL_ONBOARDING = choice(Channel_Onboarding) self.CHANNEL_ONGOING_TRANSACTIONS = choice( Channel_Ongoing_Transactions) self.COMPLEX_HI_VEHICLE = choice(Yes_No_Cust_Flag) self.DEALER_PRECIOUS_METAL = choice(Yes_No_Cust_Flag) self.DIGITAL_PM_OPERATOR = choice(Yes_No_Cust_Flag) self.EMBASSY_CONSULATE = choice(Yes_No_Cust_Flag) self.EXCHANGE_CURRENCY = choice(Yes_No_Cust_Flag) self.FOREIGN_FINANCIAL_INSTITUTION = choice(Yes_No_Cust_Flag) self.FOREIGN_GOVERNMENT = choice(Yes_No_Cust_Flag) self.FOREIGN_NONBANK_FINANCIAL_INSTITUTION = choice(Yes_No_Cust_Flag) self.INTERNET_GAMBLING = choice(Yes_No_Cust_Flag) self.MEDICAL_MARIJUANA_DISPENSARY = choice(Yes_No_Cust_Flag) self.MONEY_SERVICE_BUSINESS = choice(Yes_No_Cust_Flag) self.NAICS_CODE = choice(NAICS.NAICS_Code) self.NONREGULATED_FINANCIAL_INSTITUTION = choice(Yes_No_Cust_Flag) self.NOT_PROFIT = choice(Yes_No_Cust_Flag) self.PRIVATELY_ATM_OPERATOR = choice(Yes_No_Cust_Flag) self.PRODUCTS = choice(Products) self.SALES_USED_VEHICLES = choice(Yes_No_Cust_Flag) self.SERVICES = choice(Services) self.SIC_CODE = choice(SIC_Code) self.STOCK_MARKET_LISTING = choice(Stock_Market_Listing) self.THIRD_PARTY_PAYMENT_PROCESSOR = choice(Yes_No_Cust_Flag) self.TRANSACTING_PROVIDER = choice(Yes_No_Cust_Flag) self.ZIP = choice(zips.zip) self.PREVIOUS_ZIP = choice(zips.zip) addr = geo_data.create_city_state_zip[self.ZIP] addr2 = geo_data.create_city_state_zip[self.PREVIOUS_ZIP] self.CITY = addr[0] self.STATE = addr[1] self.PREVIOUS_CITY = addr2[0] self.PREVIOUS_STATE = addr2[1] self.PRIMARY_BRANCH_NO = self.ZIP tmp = gen_data.create_name() self.M_NAME = tmp[0] self.EMPLOYER = gen_data.create_company_name() + ' ' + tmp[1] No_CCs = choice(Number_CC) #Dictionary for names list set to blank names = [] #Dictionary for Social Security Number list set to blank ssn = [] #Middle Name to reduce name dups mdl = [] #Generates Name and SSN for Credit Users for j in range(4): if No_CCs > j: names.insert(j, fake.name()) tmp2 = gen_data.create_name() mdl.insert(j, tmp2[0]) randInt = randrange(1, len(liSSNMaster), 1) if randInt != i: ssn.insert(j, liSSNMaster[randInt]) else: ssn.insert(j, liSSNMaster[randInt - 1]) #Name and SSN is set to blank if less than 4 customers on an account else: names.insert(No_CCs + j, '') ssn.insert(No_CCs + j, '') mdl.insert(No_CCs + j, '') self.AUTHORIZED_NAME2 = names[0] self.M_NAME2 = mdl[0] self.SSN2 = ssn[0] self.AUTHORIZED_NAME3 = names[1] self.M_NAME3 = mdl[1] self.SSN3 = ssn[1] self.AUTHORIZED_NAME4 = names[2] self.M_NAME4 = mdl[2] self.SSN4 = ssn[2] #Sets CC_NO to a random credit card number CC_NO = gen_data.create_cc_number() CC_TRANS = CC_NO[1][0] dt = str(datetime.now()) clean = re.sub('\W', '', dt) self.CREDITCARDNUMBER = str(CC_TRANS[-4:]) + str(clean[-12:-3]) + str( randrange(1111, 9999, randrange(1, 10, 1))) self.CREDITCARDTYPE = CC_NO[0] self.RELATED_ACCT = '' self.RELATED_TYPE = '' if i > 10000: rel = int(choice(acct_list)) * max( (randrange(0, 10001, 1) - 9999), 0) if rel <> 0: self.RELATED_ACCT = rel self.RELATED_TYPE = choice(Related_Type) self.PREFERRED_CHANNEL = '' if self.CONSENT_SHARING == 'Yes': self.PREFERRED_CHANNEL = choice(Prefered_Channel) ## #Generates Segment ID then adds additional Segment data based on the selection to the current csv row Segment_ID = randrange(0, 5, 1) if Segment_ID == 0: self.SEG_MODEL_ID = '01' self.SEG_MODEL_TYPE = 'LOB Specific' self.SEG_MODEL_NAME = 'IRRI' self.SEG_MODEL_GROUP = 'Group 1' self.SEG_M_GRP_DESC = 'High Risk Tier' self.SEG_MODEL_SCORE = '200' if Segment_ID == 1: self.SEG_MODEL_ID = '02' self.SEG_MODEL_TYPE = 'Profitability' self.SEG_MODEL_NAME = 'CRS Risk Score' self.SEG_MODEL_GROUP = 'Group 1' self.SEG_M_GRP_DESC = 'Mid Risk Tier' self.SEG_MODEL_SCORE = '300' if Segment_ID == 2: self.SEG_MODEL_ID = '03' self.SEG_MODEL_TYPE = 'Geographical' self.SEG_MODEL_NAME = 'Geo Risk' self.SEG_MODEL_GROUP = 'Group 2' self.SEG_M_GRP_DESC = 'Low Risk Tier' self.SEG_MODEL_SCORE = '400' if Segment_ID == 3: self.SEG_MODEL_ID = '04' self.SEG_MODEL_TYPE = 'Behavioral' self.SEG_MODEL_NAME = 'Financial Behavior Risk' self.SEG_MODEL_GROUP = 'Group 3' self.SEG_M_GRP_DESC = 'Vertical Risk' self.SEG_MODEL_SCORE = '100' if Segment_ID == 4: self.SEG_MODEL_ID = '05' self.SEG_MODEL_TYPE = 'Risk Tolerance' self.SEG_MODEL_NAME = 'CM Risk' self.SEG_MODEL_GROUP = 'Group 4' self.SEG_M_GRP_DESC = 'Geographical Risk' self.SEG_MODEL_SCORE = '500' self.CLIENT_NET_WORTH = '' if self.HIGH_NET_WORTH == 'Yes': self.CLIENT_NET_WORTH = max( max((randrange(0, 101, 1) - 99), 0) * randrange(5000000, 25000000, 1), randrange(1000000, 5000000, 1)) else: flag = choice(LowNet) if flag == 0: self.CLIENT_NET_WORTH = randrange(-250000, 600000, 1) else: if flag == 1: self.CLIENT_NET_WORTH = randrange(149000, 151000, 1) else: self.CLIENT_NET_WORTH = randrange(40000, 50000, 1) #Politically Exposed Person self.PEP = 'No' #1% of accounts are PEP if (max((randrange(0, 101, 1) - 99), 0) == 1): self.PEP = 'Yes' #Customer that was demarketed by the bank self.DEMARKET_FLAG = 'No' self.DEMARKET_DATE = '' #Customer with a Suspicous Activity Report self.SAR = 'No' #Customer with a closed account #generate closed acct flag #Condition for SARs and Demarketed Clients if (self.CLOSEDACCOUNT == 'Yes'): #1% of closed accounts are demarketed but never had a SAR filed if (max((randrange(0, 101, 1) - 99), 0) == 1): self.DEMARKET_FLAG = 'Yes' self.DEMARKET_DATE = gen_data.create_date(past=True) if (self.DEMARKET_FLAG == 'No' and max( (randrange(0, 11, 1) - 9), 0) == 1): #10% of closed accounts have SARs self.SAR = 'Yes' #90% of closed accounts with SARs are demarketed if (max((randrange(0, 11, 1) - 9), 0) == 0): self.DEMARKET_FLAG = 'Yes' self.DEMARKET_DATE = gen_data.create_date(past=True) self.HIGH_RISK = 'No' self.RISK_RATING = '' if (self.PEP == 'Yes' or self.SAR == 'Yes' or self.LARGE_CASH_EXEMPT == 'Yes' or self.DEMARKET_FLAG == 'Yes' or self.ARMS_MANUFACTURER == 'Yes' or self.AUCTION == 'Yes' or self.CASHINTENSIVE_BUSINESS == 'Yes' or self.CASINO_GAMBLING == 'Yes' or self.COMPLEX_HI_VEHICLE == 'Yes' or self.DEALER_PRECIOUS_METAL == 'Yes' or self.DIGITAL_PM_OPERATOR == 'Yes' or self.EMBASSY_CONSULATE == 'Yes' or self.EXCHANGE_CURRENCY == 'Yes' or self.FOREIGN_FINANCIAL_INSTITUTION == 'Yes' or self.FOREIGN_GOVERNMENT == 'Yes' or self.FOREIGN_NONBANK_FINANCIAL_INSTITUTION == 'Yes' or self.INTERNET_GAMBLING == 'Yes' or self.MEDICAL_MARIJUANA_DISPENSARY == 'Yes' or self.MONEY_SERVICE_BUSINESS == 'Yes' or self.NONREGULATED_FINANCIAL_INSTITUTION == 'Yes' or self.NOT_PROFIT == 'Yes' or self.PRIVATELY_ATM_OPERATOR == 'Yes' or self.SALES_USED_VEHICLES == 'Yes' or self.THIRD_PARTY_PAYMENT_PROCESSOR == 'Yes' or self.TRANSACTING_PROVIDER == 'Yes' or self.HIGH_NET_WORTH == 'Yes'): self.HIGH_RISK = 'Yes' self.RISK_RATING = choice(refrating) elif (max((randrange(0, 101, 1) - 99), 0) == 1): self.HIGH_RISK = 'Yes' self.RISK_RATING = choice(refrating)
import random import article_collection_pb2 from barnum import gen_data """ Random generated data for demo """ names = [gen_data.create_name() for _ in range(0, 15)] emails = [gen_data.create_email() for _ in range(0, 15)] titles = [gen_data.create_nouns() for _ in range(0, 15)] contents = [gen_data.create_paragraphs(8) for _ in range(0, 15)] articles = [] """ Construct articles data """ for title in titles: content = random.choice(contents) name = random.choice(names) email = random.choice(emails) articles.append({ "id": random.randint(10010, 20020), "title": title, "snippet": content[0:100], "content": content, "isFeatured": random.choice([False, True]), "topics": random.sample([0, 1, 2, 3, 4], 3), "author": { "id": random.randint(10010, 20020), "name": name[0] + " " + name[1], "email": email } })
from barnum import gen_data import csv #gen_data = gen_data() with open('large.csv','w') as f1: writer=csv.writer(f1, delimiter=',',lineterminator='\n',) writer.writerow([''] + range(10)) for i in range(50000000): row = [i] + [10] + [gen_data.cc_number()]+[gen_data.create_company_name()] +[gen_data.create_email()]+[gen_data.create_name()] +[gen_data.create_job_title()] + [gen_data.create_city_state_zip()] + [gen_data.create_birthday(min_age=2, max_age=85)] writer.writerow(row) #row = [i] + [10] + [fake.name()] +[fake.address()]
def pop_transDetail(cat_desc, maxDate, j, maxBook, maxCheckin, randomrange, randomchoice): checkin = date(2000, 1, 1) checkout = date(2000, 1, 1) booking = date(2000, 1, 1) transDetail = '' tmp2 = gen_data.create_name() addr = gen_data.create_city_state_zip() #Add details or Hotel Transactions if (cat_desc == 'Hotels/Motels/Inns/Resorts' or cat_desc == 'Hotels, Motels, and Resorts'): if (UseCase[j] == '28' or UseCase[j] == '29'): if (maxCheckin == ''): checkin = maxDate + timedelta(days=randomrange(365, 389, 1)) else: checkin = maxCheckin + timedelta(days=randomrange(2, 5, 1)) maxCheckin = checkin elif UseCase[j] == '30': checkin = maxDate + timedelta(days=randomrange(30, 200, 1)) checkout = checkin + timedelta(days=randomrange(4, 11, 1)) hotel = tmp2[1] + ' Hotels; ' + '; Address: ' + addr[1] + ' ' + addr[ 2] + ', ' + addr[0] transDetail = 'Checkin: ' + str(checkin) + '; Checkout: ' + str( checkout) + '; Hotel: ' + hotel #Add details or Airline Transactions elif cat_desc == 'Airlines': if (UseCase[j] == '31' or UseCase[j] == '32'): if (maxBook == ''): booking = maxDate + timedelta(days=randomrange(1, 15, 1)) else: booking = maxBook + timedelta(days=randomrange(1, 15, 1)) maxBook = booking elif UseCase[j] == '33': booking = maxDate + timedelta(days=randomrange(1, 15, 1)) Airport_Code = [ '0AK', '16A', '1G4', '2A3', '2A9', '3A5', '3T7', '3W2', '6R7', '74S', 'A61', 'A85', 'ABE', 'ABI', 'ABQ', 'ABR', 'ABY', 'ACB', 'ACK', 'ACT', 'ACV', 'ACY', 'ADK', 'ADQ', 'AEX', 'AFM', 'AGC', 'AGN', 'AGS', 'AHN', 'AIA', 'AID', 'AIY', 'AIZ', 'AKN', 'AKP', 'AKW', 'ALB', 'ALM', 'ALN', 'ALO', 'ALS', 'ALW', 'AMA', 'ANB', 'ANC', 'AND', 'ANI', 'AOO', 'APF', 'APN', 'AQH', 'AQT', 'ART', 'ASE', 'ASN', 'AST', 'ATK', 'ATL', 'ATW', 'ATY', 'AUG', 'AUK', 'AUS', 'AVL', 'AVP', 'AWI', 'AXN', 'AZO', 'BAF', 'BAK', 'BCE', 'BDE', 'BDL', 'BDR', 'BED', 'BEH', 'BET', 'BFD', 'BFF', 'BFI', 'BFL', 'BGM', 'BGR', 'BHB', 'BHM', 'BID', 'BIG', 'BIL', 'BIS', 'BJI', 'BKL', 'BKW', 'BKX', 'BLI', 'BLM', 'BLV', 'BMG', 'BMI', 'BNA', 'BOI', 'BOS', 'BPK', 'BPT', 'BQK', 'BQN', 'BRD', 'BRL', 'BRO', 'BRW', 'BTI', 'BTL', 'BTM', 'BTR', 'BTV', 'BUF', 'BUR', 'BVK', 'BWG', 'BWI', 'BZN', 'CAE', 'CAK', 'CCR', 'CDB', 'CDC', 'CDV', 'CDW', 'CEC', 'CEF', 'CEZ', 'CFK', 'CGA', 'CGF', 'CGI', 'CGX', 'CHA', 'CHO', 'CHS', 'CIC', 'CID', 'CIU', 'CKB', 'CLE', 'CLL', 'CLM', 'CLT', 'CMH', 'CMI', 'CMX', 'CNM', 'CNY', 'COD', 'COE', 'COS', 'COU', 'CPR', 'CPX', 'CRP', 'CRQ', 'CRW', 'CSG', 'CVG', 'CVO', 'CVX', 'CWA', 'CWI', 'CYS', 'D76', 'DAB', 'DAL', 'DAN', 'DAY', 'DBQ', 'DCA', 'DDC', 'DDH', 'DEC', 'DEN', 'DET', 'DFW', 'DHN', 'DIK', 'DLG', 'DLH', 'DNV', 'DRO', 'DRT', 'DSM', 'DTW', 'DUJ', 'DUT', 'DUY', 'DVL', 'DVT', 'DXR', 'EAR', 'EAT', 'EAU', 'EEK', 'EEN', 'EFD', 'EFK', 'EGE', 'EKM', 'EKO', 'ELI', 'ELM', 'ELO', 'ELP', 'ELY', 'ENA', 'ENM', 'ENW', 'ERI', 'ESC', 'ESF', 'EUG', 'EVV', 'EWB', 'EWN', 'EWR', 'EWU', 'EYW', 'FAI', 'FAQ', 'FAR', 'FAT', 'FAY', 'FHR', 'FHU', 'FKL', 'FLG', 'FLL', 'FLO', 'FMN', 'FNL', 'FNT', 'FOD', 'FOE', 'FRG', 'FRM', 'FSD', 'FSM', 'FTW', 'FWA', 'FYU', 'FYV', 'GAL', 'GAM', 'GBD', 'GBH', 'GCC', 'GCK', 'GCN', 'GED', 'GEG', 'GFK', 'GFL', 'GGG', 'GGV', 'GGW', 'GJT', 'GKN', 'GLD', 'GLH', 'GLR', 'GLS', 'GNV', 'GON', 'GPI', 'GPT', 'GPZ', 'GRB', 'GRI', 'GRK', 'GRO', 'GRR', 'GSN', 'GSO', 'GSP', 'GST', 'GTF', 'GTR', 'GUC', 'GUM', 'GUP', 'GYH', 'GYR', 'GYY', 'HDN', 'HFD', 'HGR', 'HIB', 'HII', 'HKS', 'HKY', 'HLA', 'HLN', 'HND', 'HNH', 'HNL', 'HNM', 'HNS', 'HOB', 'HOM', 'HON', 'HOT', 'HOU', 'HPB', 'HPN', 'HRL', 'HRO', 'HSL', 'HSV', 'HTS', 'HUF', 'HUT', 'HVN', 'HXD', 'HYA', 'HYL', 'HYS', 'IAD', 'IAH', 'IAN', 'ICT', 'IDA', 'IFP', 'IGM', 'IIK', 'ILE', 'ILG', 'ILI', 'ILL', 'ILM', 'IMT', 'IND', 'INL', 'INT', 'IPL', 'IPT', 'IRK', 'ISN', 'ISO', 'ISP', 'ITH', 'ITO', 'IWA', 'IWD', 'IXD', 'IYK', 'JAC', 'JAN', 'JAX', 'JBR', 'JEF', 'JFK', 'JHW', 'JLN', 'JMS', 'JNU', 'JRB', 'JST', 'JVL', 'JXN', 'KAE', 'KAL', 'KDK', 'KEB', 'KKA', 'KLG', 'KOA', 'KSM', 'KTB', 'KTN', 'KVC', 'KVL', 'KWT', 'LAA', 'LAF', 'LAL', 'LAN', 'LAR', 'LAS', 'LAW', 'LAX', 'LBB', 'LBE', 'LBF', 'LBL', 'LBX', 'LCH', 'LCK', 'LEB', 'LEX', 'LFT', 'LGA', 'LGB', 'LHD', 'LIH', 'LIT', 'LMT', 'LNK', 'LNS', 'LNY', 'LPR', 'LRD', 'LRU', 'LSE', 'LUK', 'LWB', 'LWS', 'LYH', 'MAF', 'MAZ', 'MBA', 'MBL', 'MBS', 'MCC', 'MCE', 'MCG', 'MCI', 'MCK', 'MCN', 'MCO', 'MCW', 'MDH', 'MDM', 'MDT', 'MDW', 'MDY', 'MEI', 'MEM', 'MFD', 'MFE', 'MFR', 'MGM', 'MGW', 'MHE', 'MHK', 'MHT', 'MIA', 'MIE', 'MIV', 'MJX', 'MKC', 'MKE', 'MKG', 'MKK', 'MKL', 'MKT', 'MLB', 'MLI', 'MLL', 'MLU', 'MMH', 'MMU', 'MMV', 'MNM', 'MOB', 'MOD', 'MOT', 'MOU', 'MPV', 'MQI', 'MQJ', 'MQY', 'MRI', 'MRY', 'MSL', 'MSN', 'MSO', 'MSP', 'MSS', 'MSV', 'MSY', 'MTH', 'MTJ', 'MTM', 'MTO', 'MUE', 'MVL', 'MVN', 'MVY', 'MWA', 'MWH', 'MYR', 'MZJ', 'N93', 'NEW', 'NQA', 'NUL', 'OAJ', 'OAK', 'OCF', 'OFK', 'OGD', 'OGG', 'OGS', 'OKC', 'OLM', 'OMA', 'OME', 'ONP', 'ONT', 'OOK', 'OQU', 'ORD', 'ORF', 'ORH', 'ORI', 'ORS', 'ORV', 'OSH', 'OSU', 'OTG', 'OTH', 'OTM', 'OTZ', 'OWB', 'OXC', 'OXR', 'PAE', 'PAH', 'PBI', 'PCW', 'PDT', 'PDX', 'PFN', 'PGA', 'PGD', 'PGM', 'PGV', 'PHF', 'PHL', 'PHO', 'PHX', 'PIA', 'PIB', 'PIE', 'PIH', 'PIR', 'PIT', 'PKB', 'PLB', 'PLK', 'PLN', 'PMD', 'PNC', 'PNS', 'POU', 'PPC', 'PPG', 'PQI', 'PQL', 'PRB', 'PRC', 'PSC', 'PSE', 'PSG', 'PSM', 'PSP', 'PTH', 'PTK', 'PUB', 'PUW', 'PVC', 'PVD', 'PVU', 'PWM', 'PWT', 'RAP', 'RDD', 'RDG', 'RDM', 'RDU', 'RFD', 'RHI', 'RIC', 'RIW', 'RKD', 'RKS', 'RME', 'RMG', 'RNO', 'ROA', 'ROC', 'ROW', 'RSH', 'RST', 'RSW', 'RUT', 'RWI', 'SAF', 'SAN', 'SAT', 'SAV', 'SAW', 'SBA', 'SBD', 'SBN', 'SBP', 'SBY', 'SCC', 'SCK', 'SCM', 'SDF', 'SDP', 'SDY', 'SEA', 'SFB', 'SFO', 'SFZ', 'SGF', 'SGH', 'SGJ', 'SGU', 'SGY', 'SHD', 'SHG', 'SHH', 'SHR', 'SHV', 'SIG', 'SIT', 'SJC', 'SJT', 'SJU', 'SKX', 'SLC', 'SLE', 'SLK', 'SLN', 'SMF', 'SMX', 'SNA', 'SNP', 'SOP', 'SOV', 'SOW', 'SPI', 'SPS', 'SQI', 'SRQ', 'SRR', 'STC', 'STJ', 'STL', 'STP', 'STS', 'STT', 'STX', 'SUN', 'SUS', 'SUX', 'SVA', 'SVC', 'SWF', 'SWO', 'SYR', 'T44', 'TAL', 'TBN', 'TCL', 'TEB', 'TEX', 'TIX', 'TLH', 'TLT', 'TNI', 'TOG', 'TOL', 'TPA', 'TPL', 'TRI', 'TTN', 'TUL', 'TUP', 'TUS', 'TVC', 'TVF', 'TVL', 'TVR', 'TWF', 'TXK', 'TYR', 'TYS', 'UCA', 'UIN', 'UNK', 'UNV', 'UOX', 'UUU', 'VAK', 'VCT', 'VCV', 'VDZ', 'VGT', 'VIS', 'VLD', 'VPS', 'VPZ', 'VQQ', 'VQS', 'VRB', 'WBB', 'WDG', 'WLK', 'WNA', 'WRG', 'WRL', 'WST', 'WTK', 'WWD', 'WYS', 'X44', 'X95', 'XNA', 'YAK', 'YKM', 'YKN', 'YNG', 'YUM', 'Z08', 'Z09' ] transDetail = 'Date Booked: ' + str( booking ) + '; Name Booked: ' + tmp2[0] + tmp2[1] + '; Address: ' + addr[ 1] + ' ' + addr[2] + ', ' + addr[0] + '; Source :' + randomchoice( Airport_Code) + '; Destination:' + randomchoice(Airport_Code) return transDetail
f1, delimiter=',', lineterminator='\n', ) writer.writerow(['rownum'] +['dunno'] + ['CC'] + ['Employer'] + ['Custemail'] + ['name'] \ + ['occupation'] + ['address_street'] + ['DOB']+['previous address_city_state_zip']+ ['altcustomer_name'] \ + ['altcustomer_occupation'] + ['altcustomer_dob'] + ['ssn'] + ['phone'] + \ ['AccountID'] + ['PepFlag'] + ['altcustomerssn'] + ['demarketed_customer_flag'] + \ ['SAR_flag'] + ['nolonger_a_customer'] + ['closed_account'] +['High_risk_flag'] +['Risk_rating']) while i < 50000000: #Pick an account number and store it in acct acct = randrange(100000, 100000000, 1) #if the account hasn't been already generated then generate a record with all fields if d.has_key(str(acct)) == False: row = [i] + [10] + [gen_data.cc_number()]+[gen_data.create_company_name()] + \ [gen_data.create_email()]+[gen_data.create_name()] +[gen_data.create_job_title()] + \ [gen_data.create_city_state_zip()] + [gen_data.create_birthday(min_age=2, max_age=85)] + \ [gen_data.create_city_state_zip()] + [fake.name()] + [gen_data.create_job_title()] + \ [gen_data.create_birthday(min_age=2, max_age=85)] +\ [(randrange(101,1000,1),randrange(10,100,1),randrange(1000,10000,1))] + \ [(randrange(101,1000,1),randrange(101,999,1),randrange(1000,10000,1))] + \ [acct] + \ [max((randrange(0,101,1)-99),0)] + \ [(randrange(101,1000,1),randrange(10,100,1),randrange(1000,10000,1))] + \ [max((randrange(0,101,1)-99),0)] + [max((randrange(0,101,1)-99),0)] + \ [max((randrange(0,101,1)-99),0)] + [max((randrange(0,101,1)-90),0)] + \ [max((randrange(0,101,1)-99),0)] + [max((randrange(0,101,1)-99),0)] d[str(acct)] = acct i = i + 1 writer.writerow(row)
def gen_tran(MCC_credits, MCC_debits, Tran_Country_Credits, Tran_Country_Debits, Tran_Type_C, Tran_Type_D, Upper_Limit, Delta, count, j, usecase): liTrans = [] #Initiate start date for transactions startDate = date(2015, 01, 01) #Pick out account based on counter acct = ACCTs[j] #Set customer credit limit - skew to clients with $1000-$25000 and 10% with $25K - $50K limit = max( max((randrange(1, 101, 1) - 99), 0) * randrange(25000, 50000, 1000), randrange(1000, 25000, 1000)) #local Amt variable to calculate customer total usage usedAmt = 0 tmpAmt = 0 Balance = limit maxDate = startDate #Random number generator for transactions per customer NoTrans = randrange(100, 150, 1) desc = '' flag = 0 maxCheckin = '' maxBook = '' #loop to generate NoTrans transactions per customer for k in range(NoTrans): dt = datetime.now().strftime("%Y-%m-%d %H:%M:%S") cr_dbt = 'D' tranType = '' country = [] cat_desc = '' flag = 0 #If Balance is within the credit limit, generate credits/debits if (Balance > 0 and Balance <= limit * 1.2): #Probability of credits (tmpAmt>0) and debits (tmpAmt==0) is driven by parameters Upper_Limit and Delta tmpAmt = max( (randrange(1, Upper_Limit, 1) + Delta), 0) * randrange( 1, Balance + 1, 1) flag = 1 #Define time delta for next transaction tdelta = timedelta(days=randrange(1, 4, 1)) row = [str(count) + '_' + dt] + [acct] #If we have credit or debit within balance if tmpAmt == 0 and flag == 1: tmpAmt = random.randrange(1, Balance + 1, 1) tranType = random.choice(Tran_Type_D) cat = random.choice(MCC_debits) cat_desc = python_merchant_cat.All_Merchant_Cat[cat] Balance = Balance - tmpAmt merch = gen_data.create_company_name() row.append(merch) row.append(cat) row.append(cat_desc) country = random.choice(Tran_Country_Debits) else: if tmpAmt > 0 and flag == 1: cr_dbt = 'C' tranType = random.choice(Tran_Type_C) Balance = Balance + tmpAmt merch = '' cat = random.choice(MCC_credits) cat_desc = python_merchant_cat.All_Merchant_Cat[cat] if (tranType == 'Merchant Credit'): merch = gen_data.create_company_name() cat = random.choice(Merchant_Category.Green) cat_desc = python_merchant_cat.All_Merchant_Cat[cat] if (tranType == 'Refund'): cat = '0000' cat_desc = python_merchant_cat.All_Merchant_Cat[cat] row.append(merch) row.append(cat) row.append(cat_desc) country = random.choice(Tran_Country_Credits) #If we need to make a payment or get credit then assign codes if Balance > limit and flag == 0: tmpAmt = random.randrange(1, Balance - limit + 1, 1) tranType = random.choice(Tran_Type_D) cat = random.choice(MCC_debits) cat_desc = python_merchant_cat.All_Merchant_Cat[cat] Balance = Balance - tmpAmt merch = gen_data.create_company_name() row.append(merch) row.append(cat) row.append(cat_desc) country = random.choice(Tran_Country_Debits) else: if ((Balance < 0 or Balance == 0) and flag == 0): cr_dbt = 'C' tranType = 'Payment' tmpAmt = random.randrange(1, limit / 2, 1) Balance = Balance + tmpAmt merch = '' cat = '1111' cat_desc = python_merchant_cat.All_Merchant_Cat[cat] row.append(merch) row.append(cat) row.append(cat_desc) country = random.choice(Tran_Country_Credits) #date posted date1 = maxDate + tdelta maxDate = date1 #date of transaction a day later date2 = date1 - timedelta(days=1) row.extend([ country, date1, date2, tranType, cr_dbt, limit, tmpAmt, Balance, CCs[j], CCTypes[j], usecase, Holders[j], CCsCount[j], Cities[j], States[j], ZIPs[j], Countries[j] ]) count = count + 1 checkin = '' checkout = '' transDetail = '' #Add details or Hotel Transactions if ((cat_desc == 'Hotels/Motels/Inns/Resorts' or cat_desc == 'Hotels, Motels, and Resorts') and (UseCase[j] == '28' or UseCase[j] == '29')): if (maxCheckin == ''): checkin = maxDate + timedelta(days=randrange(365, 389, 1)) checkout = checkin + timedelta(days=randrange(4, 11, 1)) maxCheckin = checkin tmp2 = gen_data.create_name() addr = gen_data.create_city_state_zip() hotel = tmp2[1] + ' Hotels; ' + '; Address: ' + addr[ 1] + ' ' + addr[2] + ', ' + addr[0] transDetail = 'Checkin: ' + str( checkin) + '; Checkout: ' + str( checkout) + '; Hotel: ' + hotel else: checkin = maxCheckin + timedelta(days=randrange(2, 5, 1)) checkout = checkin + timedelta(days=randrange(4, 11, 1)) maxCheckin = checkin tmp2 = gen_data.create_name() addr = gen_data.create_city_state_zip() hotel = tmp2[1] + ' Hotels; ' + '; Address: ' + addr[ 1] + ' ' + addr[2] + ', ' + addr[0] transDetail = 'Checkin: ' + str( checkin) + '; Checkout: ' + str( checkout) + '; Hotel: ' + hotel if ((cat_desc == 'Hotels/Motels/Inns/Resorts' or cat_desc == 'Hotels, Motels, and Resorts') and UseCase[j] == '30'): checkin = maxDate + timedelta(days=randrange(30, 200, 1)) checkout = checkin + timedelta(days=randrange(4, 11, 1)) tmp2 = gen_data.create_name() addr = gen_data.create_city_state_zip() hotel = tmp2[1] + ' Hotels; ' + '; Address: ' + addr[ 1] + ' ' + addr[2] + ', ' + addr[0] transDetail = 'Checkin: ' + str(checkin) + '; Checkout: ' + str( checkout) + '; Hotel: ' + hotel #Add details or Airline Transactions if (cat_desc == 'Airlines' and (UseCase[j] == '31' or UseCase[j] == '32')): if (maxBook == ''): booking = maxDate + timedelta(days=randrange(1, 15, 1)) maxBook = booking tmp2 = gen_data.create_name() addr = gen_data.create_city_state_zip() transDetail = 'Date Booked: ' + str( booking) + '; Name Booked: ' + tmp2[0] + tmp2[ 1] + '; Address: ' + addr[1] + ' ' + addr[ 2] + ', ' + addr[0] + '; Source :' + random.choice( Airport_Code ) + '; Destination:' + random.choice(Airport_Code) else: booking = maxBook + timedelta(days=randrange(1, 15, 1)) maxBook = booking tmp2 = gen_data.create_name() addr = gen_data.create_city_state_zip() transDetail = 'Date Booked: ' + str( booking) + '; Name Booked: ' + tmp2[0] + tmp2[ 1] + '; Address: ' + addr[1] + ' ' + addr[ 2] + ', ' + addr[0] + '; Source :' + random.choice( Airport_Code ) + '; Destination:' + random.choice(Airport_Code) if (cat_desc == 'Airlines' and UseCase[j] == '33'): booking = maxDate + timedelta(days=randrange(1, 15, 1)) tmp2 = gen_data.create_name() addr = gen_data.create_city_state_zip() transDetail = 'Date Booked: ' + str( booking) + '; Name Booked: ' + tmp2[0] + tmp2[ 1] + '; Address: ' + addr[1] + ' ' + addr[2] + ', ' + addr[ 0] + '; Source :' + random.choice( Airport_Code) + '; Destination:' + random.choice( Airport_Code) row.append(transDetail) writer.writerow(row) #post generating all transactions, check account balance - if overpaid - refund $ and add a refund transaction if Balance > limit: row = [str(count) + '_' + dt] + [acct] + ['Uber Bank'] + ['0000'] + [ 'Refund to Customer from Bank' ] + [random.choice(Tran_Country_Debits)] date1 = maxDate + timedelta(days=90) date2 = date1 - timedelta(days=1) row.extend([ date1, date2, 'Credit Balance Refund', 'D', limit, Balance - limit, limit, CCs[j], CCTypes[j], usecase, Holders[j], CCsCount[j], Cities[j], States[j], ZIPs[j], Countries[j], '' ]) count = count + 1 usedAmt = 0 maxDate = datetime(0001, 01, 01) else: date1 = maxDate + tdelta maxDate = date1 #date of transaction a day later date2 = date1 - timedelta(days=1) row = [str(count) + '_' + dt] + [acct] + ['Customer Payment'] + [ '1111' ] + ['Customer Payment'] + [random.choice(Tran_Country_Credits)] row.extend([ date1, date2, 'Payment', 'C', limit, limit - Balance, limit, CCs[j], CCTypes[j], usecase, Holders[j], CCsCount[j], Cities[j], States[j], ZIPs[j], Countries[j], '' ]) count = count + 1 usedAmt = 0 writer.writerow(row)
def createCusts(N): #List for client whose net worth is over $500K HighNetWorth = ['Yes'] + ['No'] * 30 #List for type of account Related_Type = ['Primary','Secondary','Joint'] #List for how the account was opened Party_Type = ['Person','Non-Person'] #List for a BMO customer Party_Relation = ['Customer','Non-Customer'] #List for random Yes/No Flag Yes_No = ['Yes'] + ['No'] * 12 #List for random Yes/No Consent Yes_No_Consent = ['Yes'] + ['No'] * 4 #List for equal Yes/No Flag Yes_No_50 = ['Yes','No'] #List for official language Official_Lang = ['English'] * 3 + ['French'] #List for method of communication Preffered_Channel = ['Direct Mail','Telemarketing','Email','SMS'] #List for status of customer #Customer_Status = ['Prospect','Inactive Customer','Past Customer'] + ['Active Customer'] * 56 #List for LOB Segment Type Seg_Model_Type = ['LOB Specific','Profitability','Geographical','Behavioral','Risk Tolerance'] #List for Model ID Model_ID = ['01','02','03','04','05'] #List for Model Name Seg_Model_Name = ['IRRI', 'CRS Risk Score','Geo Risk','Financial Behavior Risk','CM Risk'] #List for Model Score Seg_Model_Score = ['200','300','400','100','500'] #List for Model Group Seg_Model_Group = ['Group 1'] * 2 + ['Group 2','Group 3','Group 4'] #List for Model Description Seg_Model_Description = ['High Risk Tier','Mid Risk Tier','Low Risk Tier','Vertical Risk','Geographical Risk'] #List for random Arms Dealer flag Arms_Manufacturer=['Yes'] + ['No'] * 2 + [''] * 392 #List for random auction flag Auction=['Yes'] + ['No'] * 2 + [''] * 392 #List for random Cash Intensive flag CashIntensive_Business=['Yes'] + ['No'] * 2 + [''] * 392 #List for random Casino?Gaming flag Casino_Gambling=['Yes'] + ['No'] * 2 + [''] * 392 #List for random Client Onboarding flag Channel_Onboarding=['E-mail','In Person','In person - In Branch/Bank Office','In person - Offsite/Client Location','Mail','Online','Phone','Request for Proposal (RFP)'] + ['Not Applicable'] * 10 #List for random Transaction flag Channel_Ongoing_Transactions=['ATM','E-mail','Fax','Mail','Not Applicable','OTC Communication System','Phone'] + ['Online'] * 4 + ['In Person'] * 31 #List for random HI_Vehicle flag Complex_HI_Vehicle=['Yes'] + ['No'] * 2 + [''] * 392 #List for random Metals flag Dealer_Precious_Metal=['Yes'] + ['No'] * 2 + [''] * 392 #List for random Arms Dealer flag Digital_PM_Operator=['Yes'] + ['No'] * 2 + [''] * 392 #List for random Embassy flag Embassy_Consulate=['Yes'] + ['No'] * 2 + [''] * 392 #Sets variable to Embassy flag Exchange_Currency=Embassy_Consulate #Sets variable to Embassy flag Foreign_Financial_Institution=Embassy_Consulate #Sets variable to Embassy flag Foreign_Government=Embassy_Consulate #Sets variable to Embassy flag Foreign_NonBank_Financial_Institution=Embassy_Consulate #Sets variable to Embassy flag Internet_Gambling=Embassy_Consulate #Sets variable to Embassy flag Medical_Marijuana_Dispensary=Embassy_Consulate #Sets variable to Embassy flag Money_Service_Business=Embassy_Consulate #Sets variable to Embassy flag NonRegulated_Financial_Institution=Embassy_Consulate #Sets variable to Embassy flag Not_Profit=Embassy_Consulate #List for random occupation Occupation=['11-1011 Chief Executives',\ '11-3011 Administrative Services Managers',\ '11-3031 Financial Managers',\ '11-3061 Purchasing Managers',\ '13-1011 Agents and Business Managers of Artists Performers and Athletes',\ '13-1031 Claims Adjusters Examiners, and Investigators',\ '13-1199 Business Operations Specialists, All Other',\ '13-2099 Financial Specialists All Other',\ '17-1011 Architects Except Landscape and Naval',\ '23-1011 Lawyers',\ '23-1023 Judges, Magistrate Judges and Magistrates',\ '25-2012 Kindergarten Teachers Except Special Education',\ '25-2021 Elementary School Teachers Except Special Education',\ '29-1041 Optometrists',\ '29-2054 Respiratory Therapy Technicians',\ '33-2011 Firefighters',\ '37-1012 First-Line Supervisors of Landscaping Lawn Service and Groundskeeping Workers',\ '39-1011 Gaming Supervisors',\ '39-2011 Animal Trainers',\ '41-1011 First-Line Supervisors of Retail Sales Workers',\ '41-1012 First-Line Supervisors of Non-Retail Sales Workers',\ '41-2011 Cashiers',\ '41-2031 Retail Salespersons',\ '43-3021 Billing and Posting Clerks',\ '45-1011 First-Line Supervisors of Farming, Fishing, and Forestry Workers',\ '49-2011 Computer Automated Teller and Office Machine Repairers',\ '53-3021 Bus Drivers Transit and Intercity',\ '53-4031 Railroad Conductors and Yardmasters',\ '55-1011 Air Crew Officers',\ '55-1012 Aircraft Launch and Recovery Officers',\ '55-1013 Armored Assault Vehicle Officers',\ ] #Sets variable to Embassy flag Privately_ATM_Operator=Embassy_Consulate #List for random products Products=['Certificate of Deposit',\ 'Checking Account',\ 'Credit Card',\ 'Custodial and Investment Agency - Institutional',\ 'Custodial and Investment Agency - Personal',\ 'Custodial/Trust Outsourcing Services (BTOS)',\ 'Custody Accounts (PTIM)',\ 'Custody Accounts (RSTC)',\ 'DTF (BHFA)',\ 'Investment Agency - Personal',\ 'Investment Management Account (PTIM)',\ 'Lease',\ 'Loan / Letter of Credit',\ 'Money Market',\ 'Mortgage / Bond / Debentures',\ 'None',\ 'Savings Account',\ 'Trust Administration - Irrevocable and Revocable (PTIM)',\ 'Trust Administration - Irrevocable and Revocable Trusts (BDTC)',\ ] + ['Nondeposit Investment Products'] * 14 + ['Investment Agency - Institutional'] * 5 #Sets variable to Embassy flag Sales_Used_Vehicles=Embassy_Consulate #Dictionary for random Services Services=['Benefit Payment Services',\ 'Domestic Wires and Direct Deposit / ACH',\ 'Family Office Services (FOS)',\ 'Fiduciary Services',\ 'International Wires and IAT',\ 'Investment Advisory Services (IAS)',\ 'Investment Services',\ 'None',\ 'Online / Mobile Banking',\ 'Payroll',\ 'Short Term Cash Management',\ 'Trust Services',\ 'Trustee Services',\ 'Vault Cash Services',\ ] + ['Financial Planning'] * 6 + ['Retirement Plans'] * 19 #Dictionary for random SIC_Code SIC_Code=['6021 National Commercial Banks',\ '6211 Security Brokers Dealers and Flotation Companies',\ '6282 Investment Advice',\ '6311 Life Insurance',\ '6733 Trusts Except Educational Religious and Charitable',\ '8999 Services NEC',\ ] + ['6722 Management Investment Offices Open-End'] * 12 #Dictionary for random Market Listing Stock_Market_Listing=['Australian Stock Exchange',\ 'Brussels Stock Exchange',\ 'Montreal Stock Exchange',\ 'Tiers 1 and 2 of the TSX Venture Exchange (also known as Tiers 1 and 2 of the Canadian Venture Exchange)',\ 'Toronto Stock Exchange',\ ] + ['Not Found'] * 30 #Sets variable to Embassy flag Third_Party_Payment_Processor=Embassy_Consulate #Sets variable to Embassy flag Transacting_Provider=Embassy_Consulate #Dictionary for random Low Net Worth LowNet=[1,2] + [0] * 5 #Dictionary for Consumer vs Business Acct_Type = ['B'] + ['C'] * 5 #Dictionary for random number of credits cards per account Number_CC = [1] * 7 + [2] * 11 + [3] * 3 + [4] #Dictionary for Account list set to blank acct_list=[] #Dictionary for CreditCard list set to blank CC_list = [] #Dictionary for random Wolfsberg scenario Use_Case = [1,4,7,10,13,16,19,22,25,28,31,34,39] * 4 + [2,5,8,11,14,17,20,23,26,29,32,35,38] * 7 + [3,6,9,12,15,18,21,24,27,30,33,36] * 65 + [37] * 73 + [40,41] * 2 refrating = ['1','1','1','2','3','4','2','4','5','5','5','5','5','5','5','5','5','5','5','5'] fake = Faker() global liSSNMaster start=10786147 acct_list=[] liCSV = [] for i in xrange(N): #Initiate High Risk Flags #Politically Exposed Person PEP='No' #Customer with a Suspicous Activity Report SAR='No' #Customer with a closed account Clsd='No' #High risk customer flag high_risk='No' #High Risk Rating hr_rating='' #Customer that was demarketed by the bank demarket='No' dem_date='' #generate closed acct flag if (max((randrange(0,98,1)-96),0)==1): Clsd='Yes' #Random choice for number of credit card users per account number No_CCs = random.choice(Number_CC) #Generate account number acct=start+1+randrange(1,10,1) start=acct #Randomly generate customer name + middle name in tmp name = fake.name() tmp=gen_data.create_name() #Adds account number to account dictionary acct_list.extend([acct]) #Creates a new row and adds data elements row = [i]+[acct]+[random.choice(Acct_Type)]+[No_CCs]+[name]+[tmp[0]]+[liSSNMaster[i]] #Dictionary for names list set to blank names=[] #Dictionary for Social Security Number list set to blank ssn=[] #Middle Name to reduce name dups mdl=[] for j in range(No_CCs-1): names.insert(j,fake.name()) tmp2=gen_data.create_name() mdl.insert(j,tmp2[0]) ##Pull from SSN Master list randInt = randrange(1,len(liSSNMaster),1) if randInt != i: ssn.insert(j,liSSNMaster[randInt]) else: ssn.insert(j,liSSNMaster[randInt - 1]) #Name and SSN is set to blank if less than 4 customers on an account for k in range(4-No_CCs): names.insert(No_CCs+k,'') ssn.insert(No_CCs+k,'') mdl.insert(No_CCs,'') #Sets CC_NO to a random credit card number CC_NO=gen_data.cc_number() #Extract CC_Number from the tuple returned by CC_Number then scramble to ensure uniqueness...Tuple contains CC Number and Type CC_TRANS=CC_NO[1][0] dt = str(datetime.now()) clean=re.sub('\W','',dt) printCC=str(CC_TRANS[-4:])+str(clean[-12:-3])+str(randrange(1111,9999,randrange(1,10,1))) #Add data elements to current csv row row.extend([names[0],mdl[0],ssn[0],names[1],mdl[1],ssn[1],names[2],mdl[2],ssn[2],printCC,CC_NO[0],gen_data.create_company_name()+' '+tmp[1],\ gen_data.create_email(),gen_data.create_job_title()]) #Create Current Address zip=random.choice(zips.zip) addr=geo_data.create_city_state_zip[zip] #Create Previous address zip2=random.choice(zips.zip) addr2=geo_data.create_city_state_zip[zip2] #Add additional data elements to current csv row lrg_cash_ex=random.choice(Yes_No) #Condition for SARs and Demarketed Clients if(Clsd=='Yes'): #1% of closed accounts are demarketed but never had a SAR filed if (max((randrange(0,101,1)-99),0)==1 and SAR=='No'): demarket='Yes' dem_date=gen_data.create_date(past=True) if (max((randrange(0,11,1)-9),0)==1 and demarket=='No'): #10% of closed accounts have SARs SAR='Yes' #90% of closed accounts with SARs are demarketed if(max((randrange(0,11,1)-9),0)==0): demarket='Yes' dem_date=gen_data.create_date(past=True) if (max((randrange(0,101,1)-99),0)==1): PEP='Yes' row.extend([addr[0],addr[1],zip,'US',addr2[0],addr2[1],zip2,'US',gen_data.create_birthday(min_age=2, max_age=85),PEP,SAR,Clsd]) #Start Generating related accounts from account list once 10,000 accounts are generated - to avoid duplicating accounts in the beginning if i > 10000: rel = int(random.choice(acct_list))*max((randrange(0,10001,1)-9999),0) if rel <> 0: row.append(rel) row.append(random.choice(Related_Type)) else: row.append('') row.append('') else: row.append('') row.append('') #Randomly generates account start date party_start=gen_data.create_date(past=True) #Randomly selects consent option for sharing info Consent_Share = random.choice(Yes_No_Consent) #Add additional data elements to current csv row row.extend([random.choice(Party_Type),random.choice(Party_Relation),party_start,gen_data.create_date(past=True),\ lrg_cash_ex,demarket,dem_date,randrange(0,100,1),random.choice(Official_Lang)]) #Add data element preferred methond of contact for yes to share info...if not then blank to current row if Consent_Share == 'Yes': row.extend(['Yes',random.choice(Preffered_Channel)]) else: row.extend(['No','']) row.extend([zip,randrange(0,5,1)]) #Generate Segment ID then add additional Segment data based on the selection to the current csv row Segment_ID = randrange(0,5,1)%5 if Segment_ID == 0: row.extend([Model_ID[0],Seg_Model_Type[0],Seg_Model_Name[0],Seg_Model_Group[0],Seg_Model_Description[0],Seg_Model_Score[0]]) if Segment_ID == 1: row.extend([Model_ID[1],Seg_Model_Type[1],Seg_Model_Name[1],Seg_Model_Group[1],Seg_Model_Description[1],Seg_Model_Score[1]]) if Segment_ID == 2: row.extend([Model_ID[2],Seg_Model_Type[2],Seg_Model_Name[2],Seg_Model_Group[2],Seg_Model_Description[2],Seg_Model_Score[2]]) if Segment_ID == 3: row.extend([Model_ID[3],Seg_Model_Type[3],Seg_Model_Name[3],Seg_Model_Group[3],Seg_Model_Description[3],Seg_Model_Score[3]]) if Segment_ID == 4: row.extend([Model_ID[4],Seg_Model_Type[4],Seg_Model_Name[4],Seg_Model_Group[4],Seg_Model_Description[4],Seg_Model_Score[4]]) #Add additional data elements to current csv row hr0=random.choice(Arms_Manufacturer) hr01=random.choice(Auction) hr02=random.choice(CashIntensive_Business) hr03=random.choice(Casino_Gambling) hr04=random.choice(Channel_Onboarding) hr05=random.choice(Channel_Ongoing_Transactions) row.extend([hr0,hr01,hr02,hr03,hr04,hr05]) #Randomly select whether customer has a High Net Worth HighNetWorthFlag = random.choice(HighNetWorth) #Randomly Generate customer net worth based on the above flag if HighNetWorthFlag == 'Yes': row.append(max(max((randrange(0,101,1)-99),0)*randrange(1000000,25000000,1),randrange(1000000,5000000,1))) else: flag=random.choice(LowNet) if flag==0: row.append(randrange(-250000,600000,1)) else: if flag==1: row.append(randrange(149000,151000,1)) else: row.append(randrange(40000,50000,1)) #Add data elements to current csv row hr1=random.choice(Complex_HI_Vehicle) hr2=random.choice(Dealer_Precious_Metal) hr3=random.choice(Digital_PM_Operator) hr4=random.choice(Embassy_Consulate) hr5=random.choice(Exchange_Currency) hr6=random.choice(Foreign_Financial_Institution) hr7=random.choice(Foreign_Government) hr8=random.choice(Foreign_NonBank_Financial_Institution) hr9=random.choice(Internet_Gambling) hr10=random.choice(Medical_Marijuana_Dispensary) hr11=random.choice(Money_Service_Business) hr12=random.choice(NAICS.NAICS_Code) hr13=random.choice(NonRegulated_Financial_Institution) hr14=random.choice(Not_Profit) #hr15=random.choice(Occupation) - added before through gen_data hr16=random.choice(Privately_ATM_Operator) hr17=random.choice(Products) hr18=random.choice(Sales_Used_Vehicles) hr19=random.choice(Services) hr20=random.choice(SIC_Code) hr21=random.choice(Stock_Market_Listing) hr22=random.choice(Third_Party_Payment_Processor) hr23=random.choice(Transacting_Provider) if(PEP=='Yes' or SAR=='Yes' or lrg_cash_ex=='Yes' or demarket=='Yes' or hr0=='Yes' or hr01=='Yes' or hr02=='Yes' or hr03=='Yes' or hr1=='Yes' or hr2=='Yes' or hr3=='Yes' or hr4=='Yes' or hr5=='Yes' or hr6=='Yes' or hr7=='Yes' or hr8=='Yes' or hr9=='Yes' or hr10=='Yes' or hr11=='Yes' or hr13=='Yes' or hr14=='Yes' or hr16=='Yes' or hr17=='Yes' or hr18=='Yes' or hr22=='Yes' or hr23=='Yes' or HighNetWorthFlag=='Yes'): high_risk='Yes' hr_rating=random.choice(refrating) if(SAR=='No' and high_risk=='No'): if(max((randrange(0,101,1)-99),0)==1): high_risk='Yes' hr_rating=random.choice(refrating) if(PEP=='No' and high_risk=='No'): if(max((randrange(0,101,1)-99),0)==1): high_risk='Yes' hr_rating=random.choice(refrating) if(high_risk=='No'): if(max((randrange(0,101,1)-99),0)==1): high_risk='Yes' hr_rating=random.choice(refrating) row.extend([hr1,hr2,hr3,hr4,hr5,hr6,hr7,hr8,hr9,hr10,hr11,hr12,hr13,hr14,hr16,hr17,hr18,hr19,hr20,hr21,hr22,hr23, HighNetWorthFlag,high_risk,hr_rating,random.choice(Use_Case)]) liCSV.append(row) return liCSV
if (max((randrange(0, 98, 1) - 96), 0) == 1): Clsd = 'Yes' #Random number generator for account number #acct = randrange(100000,100000000,1) #Random choice for number of credit cards per account number No_CCs = random.choice(Number_CC) #while acct_list.count(acct) > 0: # acct = randrange(100000,100000000,1) #dt = str(datetime.now()) #acct=str(i)++re.sub('\W','',dt) acct = start + 1 + randrange(1, 10, 1) start = acct #Randomly generates customer name name = fake.name() tmp = gen_data.create_name() #Adds account number to account dictionary acct_list.extend([acct]) #Creates a new row and adds data elements ## JS - Main Account Holder SSN as current index in master SSN list ## row = [i]+[acct]+[random.choice(Acct_Type)]+[No_CCs]+[name]+[tmp[0]]+[(str(randrange(101,1000,1))+str(randrange(10,100,1))+str(randrange(1000,10000,1)))] row = [i] + [acct] + [random.choice(Acct_Type)] + [No_CCs] + [name] + [ tmp[0] ] + [liSSNMaster[i]] #Dictionary for names list set to blank names = [] #Dictionary for Social Security Number list set to blank ssn = [] #Generates Name and SSN for Credit Users #Middle Name to reduce name dups mdl = []
# barnum Python library - https://pypi.org/project/barnum/ # import the pandas library import pandas as pd # impor the barnum library from barnum import gen_data # Create an empty list to store users users = [] # Create 1000 records for i in range(1000): company = gen_data.create_company_name() fname = gen_data.create_name(full_name=False) lname = gen_data.create_name(full_name=False) title = gen_data.create_job_title() email = gen_data.create_email(name=(fname, lname)) pw = gen_data.create_pw() street = gen_data.create_street() city_state_zip = gen_data.create_city_state_zip() cc = gen_data.create_cc_number() # append a new user to the users list users.append( (company, fname, lname, title, email, pw, street, city_state_zip, cc)) # Create a set of labels for the first row of the excel spreadsheet labels = [ 'Company', 'First', 'Last', 'Title', 'Email', 'Password', 'Street', 'City/State/ZIP', 'Credit Card' ] # Create a pandas dataframe