def generate_accounts(amount): known_accounts = [] for person in range(0, amount): accounts = [] first_name, last_name = gen_data.create_name() zip, city, state = gen_data.create_city_state_zip() address_id = str(random.randint(0, sys.maxint)) UID = str(random.randint(0, sys.maxint)) birth_day = gen_data.create_birthday() street_address = gen_data.create_street() email_address = gen_data.create_email(name=(first_name, last_name)) print_instance(get_random_uuid_uri(), "nco:PersonContact") print_property("nco:fullname", str.join(' ', [first_name, last_name])) print_property("nco:nameGiven", first_name) print_property("nco:nameFamily", last_name) for j in range(0, random.randint(0, 4)): account_data = get_random_in_list(ACCOUNTS) user_account = str.join( '', [account_data[2], str(j), email_address]) print_property("nco:hasIMAccount", user_account, t="uri") accounts.append((user_account, account_data)) known_accounts.insert(0, user_account) print_property("nco:birthDate", str(birth_day), final=True) return known_accounts
def generate_accounts(amount): known_accounts = [] for person in range(0, amount): accounts = [] first_name, last_name = gen_data.create_name() zip, city, state = gen_data.create_city_state_zip() address_id = str(random.randint(0, sys.maxint)) UID = str(random.randint(0, sys.maxint)) birth_day = gen_data.create_birthday() street_address = gen_data.create_street() email_address = gen_data.create_email(name=(first_name, last_name)) print_instance(get_random_uuid_uri(), "nco:PersonContact") print_property("nco:fullname", str.join(" ", [first_name, last_name])) print_property("nco:nameGiven", first_name) print_property("nco:nameFamily", last_name) for j in range(0, random.randint(0, 4)): account_data = get_random_in_list(ACCOUNTS) user_account = str.join("", [account_data[2], str(j), email_address]) print_property("nco:hasIMAccount", user_account, t="uri") accounts.append((user_account, account_data)) known_accounts.insert(0, user_account) print_property("nco:birthDate", str(birth_day), final=True) return known_accounts
def get_rows(): i=501 #line = input("Enter a row (python dict) into the table: ") while i < 1000: fake = Faker() #Pick an account number and store it in acct #if the account hasn't been already generated then generate a record with all fields i=i+1 line = "{'rownum':"+str(i)+",'dunno':"+str(10)+",'CC':"+str(gen_data.cc_number())+",'Employer':"+str(gen_data.create_company_name())+\ ",'Custemail':"+str(gen_data.create_email())+",'name':"+\ str(gen_data.create_name())+",'occupation':"+str(gen_data.create_job_title())+",'address_street':"+\ str(gen_data.create_city_state_zip())+",'DOB':"+str(gen_data.create_birthday(min_age=2, max_age=85))+\ ",'previous_address_city_state_zip':"+str(gen_data.create_city_state_zip())+",'altcustomer_name':"+str(fake.name())+\ ",'altcustomer_occupation':"+str(gen_data.create_job_title())+",'altcustomer_dob':"+str(gen_data.create_birthday(min_age=2, max_age=85))+\ ",'ssn':"+str((randrange(101,1000,1),randrange(10,100,1),randrange(1000,10000,1)))+",'phone':"+\ str((randrange(101,1000,1),randrange(101,999,1),randrange(1000,10000,1)))+ \ ",'AccountID':"+str(randrange(100000,100000000,1))+",'PepFlag':"+str(max((randrange(0,101,1)-99,0)))+",'altcustomerssn':"+\ str((randrange(101,1000,1),randrange(10,100,1),randrange(1000,10000,1)))+",'demarketed_customer_flag':"+\ str(max((randrange(0,101,1)-99),0))+\ ",'SAR_flag':"+str(max((randrange(0,101,1)-99),0))+",'nolonger_a_customer':"+str(max((randrange(0,101,1)-99),0))+\ ",'closed_account'"+str(max((randrange(0,101,1)-90),0))+",'High_risk_flag':"+str(max((randrange(0,101,1)-99),0))+\ ",'Risk_rating':"+str(max((randrange(0,101,1)-99),0))+"}" yield ast.literal_eval(line)
def generate_vCard(): gender_initial = gender_vcard_list[random.randint(0, 4)] gender = None if gender_initial == 'M': gender = 'Male' elif gender_initial == 'F': gender = 'Female' (first_name, last_name) = gen_data.create_name(gender=gender) adr = gen_data.create_street() zip, city, state = gen_data.create_city_state_zip() properties = [] properties.append('FN:{} {}\r\n'.format(first_name, last_name)) if random.randint(0, 1): properties.append('N:{};{};;;\r\n'.format(last_name, first_name)) if random.randint(0, 1): properties.append('TEL:tel:{}\r\n'.format(gen_data.create_phone())) if random.randint(0, 1): properties.append('GENDER:{}\r\n'.format(gender_initial)) if random.randint(0, 1): properties.append('EMAIL:{}\r\n'.format( gen_data.create_email(name=(first_name, last_name)).lower())) if random.randint(0, 1): properties.append('IMPP:sip:{}@{}\r\n'.format(first_name.lower(), 'sip.linphone.org')) if random.randint(0, 1): properties.append('ADR:;;{};{};{};{};\r\n'.format( adr, city, state, zip)) if random.randint(0, 1): properties.append('NOTE:{}\r\n'.format(gen_data.create_sentence())) if random.randint(0, 1): properties.append('ORG:{}\r\n'.format(gen_data.create_company_name())) if random.randint(0, 1): properties.append('BDAY:{0:%Y%m%d}\r\n'.format( gen_data.create_birthday())) shuffle(properties) vCard = 'BEGIN:VCARD\r\n' vCard += 'VERSION:4.0\r\n' for property in properties: vCard += property vCard += 'END:VCARD\r\n' return vCard
def fake_user(): f = open('test_user.txt', 'w') lat = 40.4365 lng = -99.3925 jobs = [gen_data.create_job_title() for i in xrange(200)] for i in xrange(100): lat += 0.001 lng -= 0.001 zipcode, city, state = gen_data.create_city_state_zip() f.write(fake.first_name() + '|' + fake.last_name() + '|' + gen_data.create_street() + '|' + city + '|' + state + '|' + zipcode + '|' + fake.date() + '|' + str(sample(jobs, 1)[0]) + '|' + str(i) + '|' + fake.email() + '|' + fake.password(length=6, special_chars=True, digits=True, upper_case=True, lower_case=True) + '|' + str(lat) + '|' + str(lng) + '\n') f.close()
def generate_vCard(): gender_initial = gender_vcard_list[random.randint(0, 4)] gender = None if gender_initial == 'M': gender = 'Male' elif gender_initial == 'F': gender = 'Female' (first_name, last_name) = gen_data.create_name(gender=gender) adr = gen_data.create_street() zip, city, state = gen_data.create_city_state_zip() properties = [] properties.append('FN:{} {}\r\n'.format(first_name, last_name)) if random.randint(0, 1): properties.append('N:{};{};;;\r\n'.format(last_name, first_name)) if random.randint(0, 1): properties.append('TEL:tel:{}\r\n'.format(gen_data.create_phone())) if random.randint(0, 1): properties.append('GENDER:{}\r\n'.format(gender_initial)) if random.randint(0, 1): properties.append('EMAIL:{}\r\n'.format(gen_data.create_email(name=(first_name, last_name)).lower())) if random.randint(0, 1): properties.append('IMPP:sip:{}@{}\r\n'.format(first_name.lower(), 'sip.linphone.org')) if random.randint(0, 1): properties.append('ADR:;;{};{};{};{};\r\n'.format(adr, city, state, zip)) if random.randint(0, 1): properties.append('NOTE:{}\r\n'.format(gen_data.create_sentence())) if random.randint(0, 1): properties.append('ORG:{}\r\n'.format(gen_data.create_company_name())) if random.randint(0, 1): properties.append('BDAY:{0:%Y%m%d}\r\n'.format(gen_data.create_birthday())) shuffle(properties) vCard = 'BEGIN:VCARD\r\n' vCard += 'VERSION:4.0\r\n' for property in properties: vCard += property vCard += 'END:VCARD\r\n' return vCard
from random import random from random import shuffle from faker import Faker from barnum import gen_data import csv fake = Faker() with open('large.csv','w') as f1: writer=csv.writer(f1, delimiter=',',lineterminator='\n',) writer.writerow(['rownum'] +['dunno'] + ['CC'] + ['Employer'] + ['Custemail'] + ['name'] \ + ['occupation'] + ['address_street'] + ['DOB']+['previous address_city_state_zip']+ ['altcustomer_name'] \ + ['altcustomer_occupation'] + ['altcustomer_dob'] + ['ssn'] + ['phone'] + \ ['AccountID'] + ['PepFlag'] + ['altcustomerssn'] + ['demarketed_customer_flag'] + \ ['SAR_flag'] + ['nolonger_a_customer'] + ['closed_account'] +['High_risk_flag'] +['Risk_rating']) for i in range(50000000): row = [i] + [10] + [gen_data.cc_number()]+[gen_data.create_company_name()] + \ [gen_data.create_email()]+[gen_data.create_name()] +[gen_data.create_job_title()] + \ [gen_data.create_city_state_zip()] + [gen_data.create_birthday(min_age=2, max_age=85)] + \ [gen_data.create_city_state_zip()] + [fake.name()] + [gen_data.create_job_title()] + \ [gen_data.create_birthday(min_age=2, max_age=85)] +\ [(randrange(101,1000,1),randrange(10,100,1),randrange(1000,10000,1))] + \ [(randrange(101,1000,1),randrange(101,999,1),randrange(1000,10000,1))] + \ [randrange(100000,100000000,1)] + \ [max((randrange(0,101,1)-99),0)] + \ [(randrange(101,1000,1),randrange(10,100,1),randrange(1000,10000,1))] + \ [max((randrange(0,101,1)-99),0)] + [max((randrange(0,101,1)-99),0)] + \ [max((randrange(0,101,1)-99),0)] + [max((randrange(0,101,1)-90),0)] + \ [max((randrange(0,101,1)-99),0)] + [max((randrange(0,101,1)-99),0)] writer.writerow(row)
from barnum import gen_data import csv with open('demographic.csv','w') as csvfile: csvwriter =csv.writer(csvfile, delimiter=' ') for i in range (0,100): name=gen_data.create_name() job_title=gen_data.create_job_title() phone=gen_data.create_phone() address=gen_data.create_city_state_zip() csvwriter.writerow([name,job_title,phone,address]) csvfile.close()
f1, delimiter=',', lineterminator='\n', ) writer.writerow(['rownum'] +['dunno'] + ['CC'] + ['Employer'] + ['Custemail'] + ['name'] \ + ['occupation'] + ['address_street'] + ['DOB']+['previous address_city_state_zip']+ ['altcustomer_name'] \ + ['altcustomer_occupation'] + ['altcustomer_dob'] + ['ssn'] + ['phone'] + \ ['AccountID'] + ['PepFlag'] + ['altcustomerssn'] + ['demarketed_customer_flag'] + \ ['SAR_flag'] + ['nolonger_a_customer'] + ['closed_account'] +['High_risk_flag'] +['Risk_rating']) while i < 50000000: #Pick an account number and store it in acct acct = randrange(100000, 100000000, 1) #if the account hasn't been already generated then generate a record with all fields if d.has_key(str(acct)) == False: row = [i] + [10] + [gen_data.cc_number()]+[gen_data.create_company_name()] + \ [gen_data.create_email()]+[gen_data.create_name()] +[gen_data.create_job_title()] + \ [gen_data.create_city_state_zip()] + [gen_data.create_birthday(min_age=2, max_age=85)] + \ [gen_data.create_city_state_zip()] + [fake.name()] + [gen_data.create_job_title()] + \ [gen_data.create_birthday(min_age=2, max_age=85)] +\ [(randrange(101,1000,1),randrange(10,100,1),randrange(1000,10000,1))] + \ [(randrange(101,1000,1),randrange(101,999,1),randrange(1000,10000,1))] + \ [acct] + \ [max((randrange(0,101,1)-99),0)] + \ [(randrange(101,1000,1),randrange(10,100,1),randrange(1000,10000,1))] + \ [max((randrange(0,101,1)-99),0)] + [max((randrange(0,101,1)-99),0)] + \ [max((randrange(0,101,1)-99),0)] + [max((randrange(0,101,1)-90),0)] + \ [max((randrange(0,101,1)-99),0)] + [max((randrange(0,101,1)-99),0)] d[str(acct)] = acct i = i + 1 writer.writerow(row)
#date of transaction a day later date2 = date1-timedelta(days=1) row.extend([country,date1,date2,tranType,cr_dbt,limit,tmpAmt,Balance,CCs[j], CCTypes[j],usecase,Holders[j],CCsCount[j],Cities[j],States[j],ZIPs[j],Countries[j]]) count = count + 1 checkin='' checkout='' transDetail='' #Add details or Hotel Transactions if((cat_desc=='Hotels/Motels/Inns/Resorts' or cat_desc=='Hotels, Motels, and Resorts') and (UseCase[j]=='28' or UseCase[j]=='29')): if (maxCheckin == ''): checkin=maxDate+timedelta(days=randrange(365,389,1)) checkout=checkin+timedelta(days=randrange(4,11,1)) maxCheckin=checkin tmp2=gen_data.create_name() addr=gen_data.create_city_state_zip() hotel=tmp2[1]+' Hotels; '+'; Address: '+addr[1]+' '+addr[2]+', '+addr[0] transDetail='Checkin: '+str(checkin)+'; Checkout: '+str(checkout)+'; Hotel: '+hotel else: checkin=maxCheckin + timedelta(days=randrange(2,5,1)) checkout=checkin+timedelta(days=randrange(4,11,1)) maxCheckin=checkin tmp2=gen_data.create_name() addr=gen_data.create_city_state_zip() hotel=tmp2[1]+' Hotels; '+'; Address: '+addr[1]+' '+addr[2]+', '+addr[0] transDetail='Checkin: '+str(checkin)+'; Checkout: '+str(checkout)+'; Hotel: '+hotel if((cat_desc=='Hotels/Motels/Inns/Resorts' or cat_desc=='Hotels, Motels, and Resorts') and UseCase[j]=='30'): checkin=maxDate+timedelta(days=randrange(30,200,1)) checkout=checkin+timedelta(days=randrange(4,11,1)) tmp2=gen_data.create_name() addr=gen_data.create_city_state_zip()
def create_customers(n): # List for client whose net worth is over $500K HighNetWorth = ['Yes'] + ['No'] * 30 # List for type of account Related_Type = ['Primary', 'Secondary', 'Joint'] # List for how the account was opened Party_Type = ['Person', 'Non-Person'] # List for a BMO customer Party_Relation = ['Customer', 'Non-Customer'] # List for random Yes/No Flag Yes_No = ['Yes'] + ['No'] * 12 # List for random Yes/No Consent Yes_No_Consent = ['Yes'] + ['No'] * 4 # List for equal Yes/No Flag Yes_No_50 = ['Yes', 'No'] # List for official language Official_Lang = ['English'] * 3 + ['French'] # List for method of communication Preffered_Channel = ['Direct Mail', 'Telemarketing', 'Email', 'SMS'] # List for status of customer # customer_status = ['Prospect','Inactive Customer','Past Customer'] + ['Active Customer'] * 56 # List for LOB Segment Type Seg_Model_Type = ['LOB Specific', 'Profitability', 'Geographical', 'Behavioral', 'Risk Tolerance'] # List for Model ID Model_ID = ['01', '02', '03', '04', '05'] # List for Model Name Seg_Model_Name = ['IRRI', 'CRS Risk Score', 'Geo Risk', 'Financial Behavior Risk', 'CM Risk'] # List for Model Score Seg_Model_Score = ['200', '300', '400', '100', '500'] # List for Model Group Seg_Model_Group = ['Group 1'] * 2 + ['Group 2', 'Group 3', 'Group 4'] # List for Model Description Seg_Model_Description = ['High Risk Tier', 'Mid Risk Tier', 'Low Risk Tier', 'Vertical Risk', 'Geographical Risk'] # List for random Arms Dealer flag Arms_Manufacturer = ['Yes'] + ['No'] * 2 + [''] * 392 # List for random auction flag Auction = ['Yes'] + ['No'] * 2 + [''] * 392 # List for random Cash Intensive flag CashIntensive_Business = ['Yes'] + ['No'] * 2 + [''] * 392 # List for random Casino?Gaming flag Casino_Gambling = ['Yes'] + ['No'] * 2 + [''] * 392 # List for random Client Onboarding flag Channel_Onboarding = ['E-mail', 'In Person', 'In person - In Branch/Bank Office', 'In person - Offsite/Client Location', 'Mail', 'Online', 'Phone', 'Request for Proposal (RFP)'] + ['Not Applicable'] * 10 # List for random Transaction flag Channel_Ongoing_Transactions = ['ATM', 'E-mail', 'Fax', 'Mail', 'Not Applicable', 'OTC Communication System', 'Phone'] + ['Online'] * 4 + ['In Person'] * 31 # List for random HI_Vehicle flag Complex_HI_Vehicle = ['Yes'] + ['No'] * 2 + [''] * 392 # List for random Metals flag Dealer_Precious_Metal = ['Yes'] + ['No'] * 2 + [''] * 392 # List for random Arms Dealer flag Digital_PM_Operator = ['Yes'] + ['No'] * 2 + [''] * 392 # List for random Embassy flag Embassy_Consulate = ['Yes'] + ['No'] * 2 + [''] * 392 # Sets variable to Embassy flag Exchange_Currency = Embassy_Consulate # Sets variable to Embassy flag Foreign_Financial_Institution = Embassy_Consulate # Sets variable to Embassy flag Foreign_Government = Embassy_Consulate # Sets variable to Embassy flag Foreign_NonBank_Financial_Institution = Embassy_Consulate # Sets variable to Embassy flag Internet_Gambling = Embassy_Consulate # Sets variable to Embassy flag Medical_Marijuana_Dispensary = Embassy_Consulate # Sets variable to Embassy flag Money_Service_Business = Embassy_Consulate # Sets variable to Embassy flag NonRegulated_Financial_Institution = Embassy_Consulate # Sets variable to Embassy flag Not_Profit = Embassy_Consulate # List for random occupation Occupation = ['11-1011 Chief Executives', \ '11-3011 Administrative services Managers', \ '11-3031 Financial Managers', \ '11-3061 Purchasing Managers', \ '13-1011 Agents and Business Managers of Artists Performers and Athletes', \ '13-1031 Claims Adjusters Examiners, and Investigators', \ '13-1199 Business Operations Specialists, All Other', \ '13-2099 Financial Specialists All Other', \ '17-1011 Architects Except Landscape and Naval', \ '23-1011 Lawyers', \ '23-1023 Judges, Magistrate Judges and Magistrates', \ '25-2012 Kindergarten Teachers Except Special Education', \ '25-2021 Elementary School Teachers Except Special Education', \ '29-1041 Optometrists', \ '29-2054 Respiratory Therapy Technicians', \ '33-2011 Firefighters', \ '37-1012 First-Line Supervisors of Landscaping Lawn Service and Groundskeeping Workers', \ '39-1011 Gaming Supervisors', \ '39-2011 Animal Trainers', \ '41-1011 First-Line Supervisors of Retail Sales Workers', \ '41-1012 First-Line Supervisors of Non-Retail Sales Workers', \ '41-2011 Cashiers', \ '41-2031 Retail Salespersons', \ '43-3021 Billing and Posting Clerks', \ '45-1011 First-Line Supervisors of Farming, Fishing, and Forestry Workers', \ '49-2011 Computer Automated Teller and Office Machine Repairers', \ '53-3021 Bus Drivers Transit and Intercity', \ '53-4031 Railroad Conductors and Yardmasters', \ '55-1011 Air Crew Officers', \ '55-1012 Aircraft Launch and Recovery Officers', \ '55-1013 Armored Assault Vehicle Officers', \ ] # Sets variable to Embassy flag Privately_ATM_Operator = Embassy_Consulate # List for random products Products = ['Certificate of Deposit', \ 'Checking Account', \ 'Credit Card', \ 'Custodial and Investment Agency - Institutional', \ 'Custodial and Investment Agency - Personal', \ 'Custodial/Trust Outsourcing services (BTOS)', \ 'Custody Accounts (PTIM)', \ 'Custody Accounts (RSTC)', \ 'DTF (BHFA)', \ 'Investment Agency - Personal', \ 'Investment Management Account (PTIM)', \ 'Lease', \ 'Loan / Letter of Credit', \ 'Money Market', \ 'Mortgage / Bond / Debentures', \ 'None', \ 'Savings Account', \ 'Trust Administration - Irrevocable and Revocable (PTIM)', \ 'Trust Administration - Irrevocable and Revocable Trusts (BDTC)', \ ] + ['Nondeposit Investment products'] * 14 + ['Investment Agency - Institutional'] * 5 # Sets variable to Embassy flag Sales_Used_Vehicles = Embassy_Consulate # Dictionary for random services Services = ['Benefit Payment services', \ 'Domestic Wires and Direct Deposit / ACH', \ 'Family Office services (FOS)', \ 'Fiduciary services', \ 'International Wires and IAT', \ 'Investment Advisory services (IAS)', \ 'Investment services', \ 'None', \ 'Online / Mobile Banking', \ 'Payroll', \ 'Short Term Cash Management', \ 'Trust services', \ 'Trustee services', \ 'Vault Cash services', \ ] + ['Financial Planning'] * 6 + ['Retirement Plans'] * 19 # Dictionary for random sic_code SIC_Code = ['6021 National Commercial Banks', \ '6211 Security Brokers Dealers and Flotation Companies', \ '6282 Investment Advice', \ '6311 Life Insurance', \ '6733 Trusts Except Educational Religious and Charitable', \ '8999 services NEC', \ ] + ['6722 Management Investment Offices Open-End'] * 12 # Dictionary for random Market Listing Stock_Market_Listing = ['Australian Stock Exchange', \ 'Brussels Stock Exchange', \ 'Montreal Stock Exchange', \ 'Tiers 1 and 2 of the TSX Venture Exchange (also known as Tiers 1 and 2 of the Canadian Venture Exchange)', \ 'Toronto Stock Exchange', \ ] + ['Not Found'] * 30 # Sets variable to Embassy flag Third_Party_Payment_Processor = Embassy_Consulate # Sets variable to Embassy flag Transacting_Provider = Embassy_Consulate # Dictionary for random Low Net Worth LowNet = [1, 2] + [0] * 5 # Dictionary for Consumer vs Business Acct_Type = ['B'] + ['C'] * 5 # Dictionary for random number of credits cards per account Number_CC = [1] * 7 + [2] * 11 + [3] * 3 + [4] # Dictionary for Account list set to blank acct_list = [] # Dictionary for CreditCard list set to blank CC_list = [] # Dictionary for random Wolfsberg scenario Use_Case = [1, 4, 7, 10, 13, 16, 19, 22, 25, 28, 31, 34, 39] * 4 + [2, 5, 8, 11, 14, 17, 20, 23, 26, 29, 32, 35, 38] * 7 + [3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 36] * 65 + [37] * 73 + [40, 41] * 2 refrating = ['1', '1', '1', '2', '3', '4', '2', '4', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5'] fake = Faker() global liSSNMaster start = 10786147 acct_list = [] risk_flags = {'PEP': 'No', 'SAR': 'No', 'Clsd': 'No', 'high_risk': 'No', 'hr_rating': 'No', 'demarket': ''} liCSV = [] for i in xrange(n): # Initiate High Risk Flags # Politically Exposed Person PEP = 'No' # Customer with a Suspicous Activity Report SAR = 'No' # Customer with a closed account Clsd = 'No' # High risk customer flag high_risk = 'No' # High Risk Rating hr_rating = '' # Customer that was demarketed by the bank demarket = 'No' dem_date = '' # generate closed acct flag if (max((randrange(0, 98, 1) - 96), 0) == 1): Clsd = 'Yes' # Random choice for number of credit card users per account number No_CCs = random.choice(Number_CC) # Generate account number acct = start + 1 + randrange(1, 10, 1) start = acct # Randomly generate customer name + middle name in tmp name = fake.name() tmp = gen_data.create_name() # Adds account number to account dictionary acct_list.extend([acct]) # Creates a new row and adds data elements row = [i] + [acct] + [random.choice(Acct_Type)] + [No_CCs] + [name] + [tmp[0]] + [liSSNMaster[i]] # Dictionary for names list set to blank names = [] # Dictionary for Social Security Number list set to blank ssn = [] # Middle Name to reduce name dups mdl = [] for j in range(No_CCs - 1): names.insert(j, fake.name()) tmp2 = gen_data.create_name() mdl.insert(j, tmp2[0]) ##Pull from SSN Master list randInt = randrange(1, len(liSSNMaster), 1) if randInt != i: ssn.insert(j, liSSNMaster[randInt]) else: ssn.insert(j, liSSNMaster[randInt - 1]) # Name and SSN is set to blank if less than 4 customers on an account for k in range(4 - No_CCs): names.insert(No_CCs + k, '') ssn.insert(No_CCs + k, '') mdl.insert(No_CCs, '') # Sets CC_NO to a random credit card number CC_NO = gen_data.create_cc_number() # Extract CC_Number from the tuple returned by CC_Number then scramble to ensure uniqueness...Tuple contains CC Number and Type CC_TRANS = CC_NO[1][0] dt = str(datetime.now()) clean = re.sub('\W', '', dt) printCC = str(CC_TRANS[-4:]) + str(clean[-12:-3]) + str(randrange(1111, 9999, randrange(1, 10, 1))) # Add data elements to current csv row row.extend([names[0], mdl[0], ssn[0], names[1], mdl[1], ssn[1], names[2], mdl[2], ssn[2], printCC, CC_NO[0], gen_data.create_company_name() + ' ' + tmp[1], gen_data.create_email(), gen_data.create_job_title()]) # Create Current Address zip = random.choice(zips.zip) addr = gen_data.create_city_state_zip([zip]) # Create Previous address zip2 = random.choice(zips.zip) addr2 = gen_data.create_city_state_zip([zip2]) # Add additional data elements to current csv row lrg_cash_ex = random.choice(Yes_No) # Condition for SARs and Demarketed Clients if (Clsd == 'Yes'): # 1% of closed accounts are demarketed but never had a SAR filed if (max((randrange(0, 101, 1) - 99), 0) == 1 and SAR == 'No'): demarket = 'Yes' dem_date = gen_data.create_date(past=True) if (max((randrange(0, 11, 1) - 9), 0) == 1 and demarket == 'No'): # 10% of closed accounts have SARs SAR = 'Yes' # 90% of closed accounts with SARs are demarketed if (max((randrange(0, 11, 1) - 9), 0) == 0): demarket = 'Yes' dem_date = gen_data.create_date(past=True) if max((randrange(0, 101, 1) - 99), 0) == 1: PEP = 'Yes' row.extend([addr[0], addr[1], zip, 'US', addr2[0], addr2[1], zip2, 'US', gen_data.create_birthday(min_age=2, max_age=85), PEP, SAR, Clsd]) # Start Generating related accounts from account list once 10,000 accounts are generated - to avoid duplicating accounts in the beginning if i > 10000: rel = int(random.choice(acct_list)) * max((randrange(0, 10001, 1) - 9999), 0) if rel <> 0: row.append(rel) row.append(random.choice(Related_Type)) else: row.append('') row.append('') else: row.append('') row.append('') # Randomly generates account start date party_start = gen_data.create_date(past=True) # Randomly selects consent option for sharing info Consent_Share = random.choice(Yes_No_Consent) # Add additional data elements to current csv row row.extend( [random.choice(Party_Type), random.choice(Party_Relation), party_start, gen_data.create_date(past=True), \ lrg_cash_ex, demarket, dem_date, randrange(0, 100, 1), random.choice(Official_Lang)]) # Add data element preferred methond of contact for yes to share info...if not then blank to current row if Consent_Share == 'Yes': row.extend(['Yes', random.choice(Preffered_Channel)]) else: row.extend(['No', '']) row.extend([zip, randrange(0, 5, 1)]) # Generate Segment ID then add additional Segment data based on the selection to the current csv row Segment_ID = randrange(0, 5, 1) % 5 if Segment_ID == 0: row.extend([Model_ID[0], Seg_Model_Type[0], Seg_Model_Name[0], Seg_Model_Group[0], Seg_Model_Description[0], Seg_Model_Score[0]]) if Segment_ID == 1: row.extend([Model_ID[1], Seg_Model_Type[1], Seg_Model_Name[1], Seg_Model_Group[1], Seg_Model_Description[1], Seg_Model_Score[1]]) if Segment_ID == 2: row.extend([Model_ID[2], Seg_Model_Type[2], Seg_Model_Name[2], Seg_Model_Group[2], Seg_Model_Description[2], Seg_Model_Score[2]]) if Segment_ID == 3: row.extend([Model_ID[3], Seg_Model_Type[3], Seg_Model_Name[3], Seg_Model_Group[3], Seg_Model_Description[3], Seg_Model_Score[3]]) if Segment_ID == 4: row.extend([Model_ID[4], Seg_Model_Type[4], Seg_Model_Name[4], Seg_Model_Group[4], Seg_Model_Description[4], Seg_Model_Score[4]]) # Add additional data elements to current csv row hr0 = random.choice(Arms_Manufacturer) hr01 = random.choice(Auction) hr02 = random.choice(CashIntensive_Business) hr03 = random.choice(Casino_Gambling) hr04 = random.choice(Channel_Onboarding) hr05 = random.choice(Channel_Ongoing_Transactions) row.extend([hr0, hr01, hr02, hr03, hr04, hr05]) # Randomly select whether customer has a High Net Worth HighNetWorthFlag = random.choice(HighNetWorth) # Randomly Generate customer net worth based on the above flag if HighNetWorthFlag == 'Yes': row.append(max(max((randrange(0, 101, 1) - 99), 0) * randrange(1000000, 25000000, 1), randrange(1000000, 5000000, 1))) else: flag = random.choice(LowNet) if flag == 0: row.append(randrange(-250000, 600000, 1)) else: if flag == 1: row.append(randrange(149000, 151000, 1)) else: row.append(randrange(40000, 50000, 1)) # Add data elements to current csv row hr1 = random.choice(Complex_HI_Vehicle) hr2 = random.choice(Dealer_Precious_Metal) hr3 = random.choice(Digital_PM_Operator) hr4 = random.choice(Embassy_Consulate) hr5 = random.choice(Exchange_Currency) hr6 = random.choice(Foreign_Financial_Institution) hr7 = random.choice(Foreign_Government) hr8 = random.choice(Foreign_NonBank_Financial_Institution) hr9 = random.choice(Internet_Gambling) hr10 = random.choice(Medical_Marijuana_Dispensary) hr11 = random.choice(Money_Service_Business) hr12 = random.choice(NAICS.NAICS_Code) hr13 = random.choice(NonRegulated_Financial_Institution) hr14 = random.choice(Not_Profit) # hr15=random.choice(occupation) - added before through gen_data hr16 = random.choice(Privately_ATM_Operator) hr17 = random.choice(Products) hr18 = random.choice(Sales_Used_Vehicles) hr19 = random.choice(Services) hr20 = random.choice(SIC_Code) hr21 = random.choice(Stock_Market_Listing) hr22 = random.choice(Third_Party_Payment_Processor) hr23 = random.choice(Transacting_Provider) if (PEP == 'Yes' or SAR == 'Yes' or lrg_cash_ex == 'Yes' or demarket == 'Yes' or hr0 == 'Yes' or hr01 == 'Yes' or hr02 == 'Yes' or hr03 == 'Yes' or hr1 == 'Yes' or hr2 == 'Yes' or hr3 == 'Yes' or hr4 == 'Yes' or hr5 == 'Yes' or hr6 == 'Yes' or hr7 == 'Yes' or hr8 == 'Yes' or hr9 == 'Yes' or hr10 == 'Yes' or hr11 == 'Yes' or hr13 == 'Yes' or hr14 == 'Yes' or hr16 == 'Yes' or hr17 == 'Yes' or hr18 == 'Yes' or hr22 == 'Yes' or hr23 == 'Yes' or HighNetWorthFlag == 'Yes'): high_risk = 'Yes' hr_rating = random.choice(refrating) if (SAR == 'No' and high_risk == 'No'): if (max((randrange(0, 101, 1) - 99), 0) == 1): high_risk = 'Yes' hr_rating = random.choice(refrating) if (PEP == 'No' and high_risk == 'No'): if (max((randrange(0, 101, 1) - 99), 0) == 1): high_risk = 'Yes' hr_rating = random.choice(refrating) if (high_risk == 'No'): if (max((randrange(0, 101, 1) - 99), 0) == 1): high_risk = 'Yes' hr_rating = random.choice(refrating) row.extend( [hr1, hr2, hr3, hr4, hr5, hr6, hr7, hr8, hr9, hr10, hr11, hr12, hr13, hr14, hr16, hr17, hr18, hr19, hr20, hr21, hr22, hr23, HighNetWorthFlag, high_risk, hr_rating, random.choice(Use_Case)]) liCSV.append(row) return liCSV
+ ["Risk_rating"] ) while i < 50000000: # Pick an account number and store it in acct acct = randrange(100000, 100000000, 1) # if the account hasn't been already generated then generate a record with all fields if d.has_key(str(acct)) == False: row = ( [i] + [10] + [gen_data.cc_number()] + [gen_data.create_company_name()] + [gen_data.create_email()] + [gen_data.create_name()] + [gen_data.create_job_title()] + [gen_data.create_city_state_zip()] + [gen_data.create_birthday(min_age=2, max_age=85)] + [gen_data.create_city_state_zip()] + [fake.name()] + [gen_data.create_job_title()] + [gen_data.create_birthday(min_age=2, max_age=85)] + [(randrange(101, 1000, 1), randrange(10, 100, 1), randrange(1000, 10000, 1))] + [(randrange(101, 1000, 1), randrange(101, 999, 1), randrange(1000, 10000, 1))] + [acct] + [max((randrange(0, 101, 1) - 99), 0)] + [(randrange(101, 1000, 1), randrange(10, 100, 1), randrange(1000, 10000, 1))] + [max((randrange(0, 101, 1) - 99), 0)] + [max((randrange(0, 101, 1) - 99), 0)] + [max((randrange(0, 101, 1) - 99), 0)] + [max((randrange(0, 101, 1) - 90), 0)] + [max((randrange(0, 101, 1) - 99), 0)]
def gen_tran(MCC_credits,MCC_debits,Tran_Country_Credits,Tran_Country_Debits,Tran_Type_C,Tran_Type_D,Upper_Limit,Delta,count,j,usecase): liTrans = [] #Initiate start date for transactions startDate=date(2015,01,01) #Pick out account based on counter acct=ACCTs[j] #Set customer credit limit - skew to clients with $1000-$25000 and 10% with $25K - $50K limit = max(max((randrange(1,101,1)-99),0)* randrange(25000,50000,1000),randrange(1000,25000,1000)) #local Amt variable to calculate customer total usage usedAmt = 0 tmpAmt = 0 Balance = limit maxDate= startDate #Random number generator for transactions per customer NoTrans = randrange(100,150,1) desc='' flag=0 maxCheckin='' maxBook='' #loop to generate NoTrans transactions per customer for k in range(NoTrans): dt = datetime.now().strftime("%Y-%m-%d %H:%M:%S") cr_dbt='D' tranType = '' country=[] cat_desc='' flag=0 #If Balance is within the credit limit, generate credits/debits if(Balance>0 and Balance<=limit*1.2): #Probability of credits (tmpAmt>0) and debits (tmpAmt==0) is driven by parameters Upper_Limit and Delta tmpAmt = max((randrange(1,Upper_Limit,1)+Delta),0)*randrange(1,Balance+1,1) flag=1 #Define time delta for next transaction tdelta = timedelta(days=randrange(1,4,1)) row = [str(count)+'_'+dt] + [acct] #If we have credit or debit within balance if tmpAmt == 0 and flag==1: tmpAmt=random.randrange(1,Balance+1,1) tranType = random.choice(Tran_Type_D) cat = random.choice(MCC_debits) cat_desc=python_merchant_cat.All_Merchant_Cat[cat] Balance = Balance - tmpAmt merch=gen_data.create_company_name() row.append(merch) row.append(cat) row.append(cat_desc) country=random.choice(Tran_Country_Debits) else: if tmpAmt > 0 and flag==1: cr_dbt='C' tranType=random.choice(Tran_Type_C) Balance = Balance + tmpAmt merch='' cat = random.choice(MCC_credits) cat_desc=python_merchant_cat.All_Merchant_Cat[cat] if(tranType=='Merchant Credit'): merch=gen_data.create_company_name() cat=random.choice(Merchant_Category.Green) cat_desc=python_merchant_cat.All_Merchant_Cat[cat] if(tranType=='Refund'): cat='0000' cat_desc=python_merchant_cat.All_Merchant_Cat[cat] row.append(merch) row.append(cat) row.append(cat_desc) country=random.choice(Tran_Country_Credits) #If we need to make a payment or get credit then assign codes if Balance > limit and flag==0: tmpAmt=random.randrange(1,Balance-limit+1,1) tranType = random.choice(Tran_Type_D) cat = random.choice(MCC_debits) cat_desc=python_merchant_cat.All_Merchant_Cat[cat] Balance = Balance - tmpAmt merch=gen_data.create_company_name() row.append(merch) row.append(cat) row.append(cat_desc) country=random.choice(Tran_Country_Debits) else: if ((Balance < 0 or Balance==0)and flag==0): cr_dbt='C' tranType='Payment' tmpAmt = random.randrange(1,limit/2,1) Balance = Balance + tmpAmt merch = '' cat = '1111' cat_desc=python_merchant_cat.All_Merchant_Cat[cat] row.append(merch) row.append(cat) row.append(cat_desc) country=random.choice(Tran_Country_Credits) #date posted date1 = maxDate+tdelta maxDate = date1 #date of transaction a day later date2 = date1-timedelta(days=1) row.extend([country,date1,date2,tranType,cr_dbt,limit,tmpAmt,Balance,CCs[j], CCTypes[j],usecase,Holders[j],CCsCount[j],Cities[j],States[j],ZIPs[j],Countries[j]]) count = count + 1 checkin='' checkout='' transDetail='' #Add details or Hotel Transactions if((cat_desc=='Hotels/Motels/Inns/Resorts' or cat_desc=='Hotels, Motels, and Resorts') and (UseCase[j]=='28' or UseCase[j]=='29')): if (maxCheckin == ''): checkin=maxDate+timedelta(days=randrange(365,389,1)) checkout=checkin+timedelta(days=randrange(4,11,1)) maxCheckin=checkin tmp2=gen_data.create_name() addr=gen_data.create_city_state_zip() hotel=tmp2[1]+' Hotels; '+'; Address: '+addr[1]+' '+addr[2]+', '+addr[0] transDetail='Checkin: '+str(checkin)+'; Checkout: '+str(checkout)+'; Hotel: '+hotel else: checkin=maxCheckin + timedelta(days=randrange(2,5,1)) checkout=checkin+timedelta(days=randrange(4,11,1)) maxCheckin=checkin tmp2=gen_data.create_name() addr=gen_data.create_city_state_zip() hotel=tmp2[1]+' Hotels; '+'; Address: '+addr[1]+' '+addr[2]+', '+addr[0] transDetail='Checkin: '+str(checkin)+'; Checkout: '+str(checkout)+'; Hotel: '+hotel if((cat_desc=='Hotels/Motels/Inns/Resorts' or cat_desc=='Hotels, Motels, and Resorts') and UseCase[j]=='30'): checkin=maxDate+timedelta(days=randrange(30,200,1)) checkout=checkin+timedelta(days=randrange(4,11,1)) tmp2=gen_data.create_name() addr=gen_data.create_city_state_zip() hotel=tmp2[1]+' Hotels; '+'; Address: '+addr[1]+' '+addr[2]+', '+addr[0] transDetail='Checkin: '+str(checkin)+'; Checkout: '+str(checkout)+'; Hotel: '+hotel #Add details or Airline Transactions if(cat_desc=='Airlines' and (UseCase[j]=='31' or UseCase[j]=='32')): if (maxBook == ''): booking=maxDate+timedelta(days=randrange(1,15,1)) maxBook=booking tmp2=gen_data.create_name() addr=gen_data.create_city_state_zip() transDetail='Date Booked: '+str(booking)+'; Name Booked: '+tmp2[0]+tmp2[1]+'; Address: '+addr[1]+' '+addr[2]+', '+addr[0]+'; Source :'+random.choice(Airport_Code)+'; Destination:'+random.choice(Airport_Code) else: booking=maxBook + timedelta(days=randrange(1,15,1)) maxBook=booking tmp2=gen_data.create_name() addr=gen_data.create_city_state_zip() transDetail='Date Booked: '+str(booking)+'; Name Booked: '+ tmp2[0] + tmp2[1] + '; Address: '+ addr[1] + ' ' + addr[2]+', '+addr[0]+'; Source :'+random.choice(Airport_Code)+'; Destination:'+random.choice(Airport_Code) if(cat_desc=='Airlines' and UseCase[j]=='33'): booking=maxDate + timedelta(days=randrange(1,15,1)) tmp2=gen_data.create_name() addr=gen_data.create_city_state_zip() transDetail='Date Booked: '+str(booking)+'; Name Booked: '+ tmp2[0] + tmp2[1] + '; Address: '+addr[1]+' '+addr[2]+', '+addr[0]+'; Source :'+random.choice(Airport_Code)+'; Destination:'+random.choice(Airport_Code) row.append(transDetail) writer.writerow(row) #post generating all transactions, check account balance - if overpaid - refund $ and add a refund transaction if Balance > limit: row = [str(count)+'_'+dt]+ [acct]+['Uber Bank']+['0000']+['Refund to Customer from Bank']+[random.choice(Tran_Country_Debits)] date1=maxDate+timedelta(days=90) date2=date1-timedelta(days=1) row.extend([date1, date2, 'Credit Balance Refund','D',limit,Balance-limit,limit,CCs[j],CCTypes[j], usecase,Holders[j],CCsCount[j],Cities[j],States[j],ZIPs[j],Countries[j],'']) count = count + 1 usedAmt = 0 maxDate= datetime(0001,01,01) else: date1 = maxDate+tdelta maxDate = date1 #date of transaction a day later date2 = date1-timedelta(days=1) row = [str(count)+'_'+dt]+[acct]+['Customer Payment']+['1111']+['Customer Payment']+[random.choice(Tran_Country_Credits)] row.extend([date1, date2, 'Payment','C',limit,limit-Balance,limit,CCs[j],CCTypes[j],usecase, Holders[j],CCsCount[j],Cities[j],States[j],ZIPs[j],Countries[j],'']) count = count + 1 usedAmt = 0 writer.writerow(row)
sys.stdout.write('\tnco:hasPhoneNumber <tel:+11111111111>.\n') sys.stdout.write('\n') sys.stdout.write('<tel:+11111111111> a nco:PhoneNumber; \n') sys.stdout.write('\tnco:phoneNumber "(111) 111-1111".\n') sys.stdout.write('\n') #TODO need to create some email folders myOwnPhoneNumberURI = "tel:+11111111111" previousContacts = [] previousEmailAddresses = [] previousIMAccounts = [] allchars = string.maketrans('', '') for dummy in range(0, count): firstName, lastName = gen_data.create_name() zip, city, state = gen_data.create_city_state_zip() postalAddressID = str(random.randint(0, sys.maxint)) UID = str(random.randint(0, sys.maxint)) phoneNumber = gen_data.create_phone() phoneUri = 'tel:+1' + phoneNumber.translate(allchars, ' -()') birthDay = gen_data.create_birthday() streetAddress = gen_data.create_street() emailAddress = gen_data.create_email(name=(firstName, lastName)) xmppAddress = str(firstName + "." + lastName + "@gmail.com").lower() hasIMAccount = False hasPhoneNumber = False jobTitle = gen_data.create_job_title() generatePostalAddress() generateEmailAddress()
import random from barnum import gen_data import addressbook_pb2 # Barnum generates US data but that's ok for the example names = [gen_data.create_name() for _ in range(0, 15)] phones = [gen_data.create_phone() for _ in range(0, 30)] postcodes = [gen_data.create_city_state_zip() for _ in range(0, 15)] streets = [gen_data.create_street() for _ in range(0, 30)] contacts = [] for name in names: address = {} # Simulate the fact that postcode are optionals if random.choice([True, False]): address['postcode'] = random.choice(postcodes)[0] address['address_lines'] = random.sample(streets, random.randint(0, 2)) phone_numbers = [] for _ in range(0, random.randint(0, 2)): phone_numbers.append({ 'type': random.choice(['MOBILE', 'LANDLINE']), 'number': random.choice(phones) }) contacts.append({ 'first_name': name[0], 'last_name': name[1], 'address': address, 'phone_numbers': phone_numbers
comments_count = "0" community_id = "1" created_at = str(datetime.datetime.now())[0:-7] currency = "USD" deleted = "0" delta = "1" description = "shipping" destination = "NULL" language = "NULL" last_modified = "NULL" listing_shape_id = "1" listing_type_old = "NULL" old_category_id = "NULL" _open = "1" organization_id = "NULL" origin = gen_data.create_city_state_zip()[0] pickup_enabled = "0" price_cents = str(random.randint(0, 10000)) privacy = "private" quantity = "NULL" quantity_selector = "NULL" require_shipping_address = "1" shape_name_tr_key = get_random_tr_key() share_type_id = "NULL" share_type_old = "NULL" #INSERT INTO `listings` (`action_button_tr_key`, `author_id`, `category_id`, `category_old`, #`comments_count`, `community_id`, `created_at`, `currency`, `deleted`, `delta`, `description`, `destination`, #`language`, `last_modified`, `listing_shape_id`, `listing_type_old`, `old_category_id`, `open`, `organization_id`, #`origin`, `pickup_enabled`, `price_cents`, `privacy`, `quantity`, `quantity_selector`, `require_shipping_address`, #`shape_name_tr_key`, `share_type_id`, `share_type_old`, `shipping_price_additional_cents`,
import random from barnum import gen_data import addressbook_pb2 # Barnum generates US data but that's ok for the example names = [gen_data.create_name() for _ in range(0, 15)] phones = [gen_data.create_phone() for _ in range(0, 30)] postcodes = [gen_data.create_city_state_zip() for _ in range(0, 15)] streets = [gen_data.create_street() for _ in range(0, 30)] contacts = [] for name in names: address = {} # Simulate the fact that postcode are optionals if random.choice([True, False]): address['postcode'] = random.choice(postcodes)[0] address['address_lines'] = random.sample(streets, random.randint(0, 2)) phone_numbers = [] for _ in range(0, random.randint(0, 2)): phone_numbers.append({ 'type': random.choice(['MOBILE', 'LANDLINE']), 'number': random.choice(phones) }) contacts.append({ 'first_name': name[0], 'last_name': name[1], 'address': address,
import random from random import shuffle from faker import Faker from barnum import gen_data import csv fake = Faker() ON_US_INDICATOR = ['ON','OFF'] DebitCredit = ['CR','DR'] bank= ['0','8','9'] select = '1' #ATM_FLAG = ['1','0','0','0','0','0','0','0','0','0','0','0','0','0','0','0'] ATM_FLAG = ['1','0'] with open('largetrns.csv','w') as f1: writer=csv.writer(f1, delimiter=',',lineterminator='\n',) writer.writerow(['rownum'] +['TXN_ROUTING_TRANSIT'] + ['DEBIT_CREDIT_INDICATOR'] +['Bank']+ \ ['AccountID'] + ['Amount'] + ['ON_US_INDICATOR'] + \ ['COMPANY_NAME'] + ['DATE_POSTED']+ ['City_State_Zip'] + ['ATM_FLAG'] +['RDCFLAG']) for i in range(500): row = [i] + [random.choice(Python_aba_transit_numbers.aba)] + \ [DebitCredit[random.randint(0,len(DebitCredit)-1)]] + \ [random.choice(bank)] + \ [random.choice(python_account_ID.accountid)] + \ [max ( max((randrange(0,101,1)-99),0)* randrange(20000,500000,1),randrange(5,35000,1))] + \ [ON_US_INDICATOR[random.randint(0,len(ON_US_INDICATOR)-1)]] + \ [gen_data.create_company_name()] + \ [gen_data.create_birthday(min_age=1, max_age=8)] + \ [gen_data.create_city_state_zip()] + \ [random.choice(ATM_FLAG)* max((randrange(0,101,1)-99),0)] + [random.choice(ATM_FLAG)* max((randrange(0,101,1)-99),0)] writer.writerow(row)
sys.stdout.write('\tnco:hasPhoneNumber <tel:+11111111111>.\n') sys.stdout.write('\n') sys.stdout.write('<tel:+11111111111> a nco:PhoneNumber; \n') sys.stdout.write('\tnco:phoneNumber "(111) 111-1111".\n') sys.stdout.write('\n') #TODO need to create some email folders myOwnPhoneNumberURI = "tel:+11111111111" previousContacts = [] previousEmailAddresses = [] previousIMAccounts = [] allchars = string.maketrans('','') for dummy in range (0, count): firstName, lastName = gen_data.create_name() zip, city, state = gen_data.create_city_state_zip() postalAddressID=str(random.randint(0, sys.maxint)) UID = str(random.randint(0, sys.maxint)) phoneNumber = gen_data.create_phone() phoneUri = 'tel:+1' + phoneNumber.translate(allchars,' -()') birthDay = gen_data.create_birthday() streetAddress = gen_data.create_street() emailAddress = gen_data.create_email(name=(firstName, lastName)) xmppAddress = str(firstName+"." + lastName + "@gmail.com").lower() hasIMAccount = False hasPhoneNumber = False jobTitle = gen_data.create_job_title() generatePostalAddress() generateEmailAddress()
def pop_transDetail(cat_desc, maxDate, j, maxBook, maxCheckin, randomrange, randomchoice): checkin = date(2000, 1, 1) checkout = date(2000, 1, 1) booking = date(2000, 1, 1) transDetail = '' tmp2 = gen_data.create_name() addr = gen_data.create_city_state_zip() #Add details or Hotel Transactions if (cat_desc == 'Hotels/Motels/Inns/Resorts' or cat_desc == 'Hotels, Motels, and Resorts'): if (UseCase[j] == '28' or UseCase[j] == '29'): if (maxCheckin == ''): checkin = maxDate + timedelta(days=randomrange(365, 389, 1)) else: checkin = maxCheckin + timedelta(days=randomrange(2, 5, 1)) maxCheckin = checkin elif UseCase[j] == '30': checkin = maxDate + timedelta(days=randomrange(30, 200, 1)) checkout = checkin + timedelta(days=randomrange(4, 11, 1)) hotel = tmp2[1] + ' Hotels; ' + '; Address: ' + addr[1] + ' ' + addr[ 2] + ', ' + addr[0] transDetail = 'Checkin: ' + str(checkin) + '; Checkout: ' + str( checkout) + '; Hotel: ' + hotel #Add details or Airline Transactions elif cat_desc == 'Airlines': if (UseCase[j] == '31' or UseCase[j] == '32'): if (maxBook == ''): booking = maxDate + timedelta(days=randomrange(1, 15, 1)) else: booking = maxBook + timedelta(days=randomrange(1, 15, 1)) maxBook = booking elif UseCase[j] == '33': booking = maxDate + timedelta(days=randomrange(1, 15, 1)) Airport_Code = [ '0AK', '16A', '1G4', '2A3', '2A9', '3A5', '3T7', '3W2', '6R7', '74S', 'A61', 'A85', 'ABE', 'ABI', 'ABQ', 'ABR', 'ABY', 'ACB', 'ACK', 'ACT', 'ACV', 'ACY', 'ADK', 'ADQ', 'AEX', 'AFM', 'AGC', 'AGN', 'AGS', 'AHN', 'AIA', 'AID', 'AIY', 'AIZ', 'AKN', 'AKP', 'AKW', 'ALB', 'ALM', 'ALN', 'ALO', 'ALS', 'ALW', 'AMA', 'ANB', 'ANC', 'AND', 'ANI', 'AOO', 'APF', 'APN', 'AQH', 'AQT', 'ART', 'ASE', 'ASN', 'AST', 'ATK', 'ATL', 'ATW', 'ATY', 'AUG', 'AUK', 'AUS', 'AVL', 'AVP', 'AWI', 'AXN', 'AZO', 'BAF', 'BAK', 'BCE', 'BDE', 'BDL', 'BDR', 'BED', 'BEH', 'BET', 'BFD', 'BFF', 'BFI', 'BFL', 'BGM', 'BGR', 'BHB', 'BHM', 'BID', 'BIG', 'BIL', 'BIS', 'BJI', 'BKL', 'BKW', 'BKX', 'BLI', 'BLM', 'BLV', 'BMG', 'BMI', 'BNA', 'BOI', 'BOS', 'BPK', 'BPT', 'BQK', 'BQN', 'BRD', 'BRL', 'BRO', 'BRW', 'BTI', 'BTL', 'BTM', 'BTR', 'BTV', 'BUF', 'BUR', 'BVK', 'BWG', 'BWI', 'BZN', 'CAE', 'CAK', 'CCR', 'CDB', 'CDC', 'CDV', 'CDW', 'CEC', 'CEF', 'CEZ', 'CFK', 'CGA', 'CGF', 'CGI', 'CGX', 'CHA', 'CHO', 'CHS', 'CIC', 'CID', 'CIU', 'CKB', 'CLE', 'CLL', 'CLM', 'CLT', 'CMH', 'CMI', 'CMX', 'CNM', 'CNY', 'COD', 'COE', 'COS', 'COU', 'CPR', 'CPX', 'CRP', 'CRQ', 'CRW', 'CSG', 'CVG', 'CVO', 'CVX', 'CWA', 'CWI', 'CYS', 'D76', 'DAB', 'DAL', 'DAN', 'DAY', 'DBQ', 'DCA', 'DDC', 'DDH', 'DEC', 'DEN', 'DET', 'DFW', 'DHN', 'DIK', 'DLG', 'DLH', 'DNV', 'DRO', 'DRT', 'DSM', 'DTW', 'DUJ', 'DUT', 'DUY', 'DVL', 'DVT', 'DXR', 'EAR', 'EAT', 'EAU', 'EEK', 'EEN', 'EFD', 'EFK', 'EGE', 'EKM', 'EKO', 'ELI', 'ELM', 'ELO', 'ELP', 'ELY', 'ENA', 'ENM', 'ENW', 'ERI', 'ESC', 'ESF', 'EUG', 'EVV', 'EWB', 'EWN', 'EWR', 'EWU', 'EYW', 'FAI', 'FAQ', 'FAR', 'FAT', 'FAY', 'FHR', 'FHU', 'FKL', 'FLG', 'FLL', 'FLO', 'FMN', 'FNL', 'FNT', 'FOD', 'FOE', 'FRG', 'FRM', 'FSD', 'FSM', 'FTW', 'FWA', 'FYU', 'FYV', 'GAL', 'GAM', 'GBD', 'GBH', 'GCC', 'GCK', 'GCN', 'GED', 'GEG', 'GFK', 'GFL', 'GGG', 'GGV', 'GGW', 'GJT', 'GKN', 'GLD', 'GLH', 'GLR', 'GLS', 'GNV', 'GON', 'GPI', 'GPT', 'GPZ', 'GRB', 'GRI', 'GRK', 'GRO', 'GRR', 'GSN', 'GSO', 'GSP', 'GST', 'GTF', 'GTR', 'GUC', 'GUM', 'GUP', 'GYH', 'GYR', 'GYY', 'HDN', 'HFD', 'HGR', 'HIB', 'HII', 'HKS', 'HKY', 'HLA', 'HLN', 'HND', 'HNH', 'HNL', 'HNM', 'HNS', 'HOB', 'HOM', 'HON', 'HOT', 'HOU', 'HPB', 'HPN', 'HRL', 'HRO', 'HSL', 'HSV', 'HTS', 'HUF', 'HUT', 'HVN', 'HXD', 'HYA', 'HYL', 'HYS', 'IAD', 'IAH', 'IAN', 'ICT', 'IDA', 'IFP', 'IGM', 'IIK', 'ILE', 'ILG', 'ILI', 'ILL', 'ILM', 'IMT', 'IND', 'INL', 'INT', 'IPL', 'IPT', 'IRK', 'ISN', 'ISO', 'ISP', 'ITH', 'ITO', 'IWA', 'IWD', 'IXD', 'IYK', 'JAC', 'JAN', 'JAX', 'JBR', 'JEF', 'JFK', 'JHW', 'JLN', 'JMS', 'JNU', 'JRB', 'JST', 'JVL', 'JXN', 'KAE', 'KAL', 'KDK', 'KEB', 'KKA', 'KLG', 'KOA', 'KSM', 'KTB', 'KTN', 'KVC', 'KVL', 'KWT', 'LAA', 'LAF', 'LAL', 'LAN', 'LAR', 'LAS', 'LAW', 'LAX', 'LBB', 'LBE', 'LBF', 'LBL', 'LBX', 'LCH', 'LCK', 'LEB', 'LEX', 'LFT', 'LGA', 'LGB', 'LHD', 'LIH', 'LIT', 'LMT', 'LNK', 'LNS', 'LNY', 'LPR', 'LRD', 'LRU', 'LSE', 'LUK', 'LWB', 'LWS', 'LYH', 'MAF', 'MAZ', 'MBA', 'MBL', 'MBS', 'MCC', 'MCE', 'MCG', 'MCI', 'MCK', 'MCN', 'MCO', 'MCW', 'MDH', 'MDM', 'MDT', 'MDW', 'MDY', 'MEI', 'MEM', 'MFD', 'MFE', 'MFR', 'MGM', 'MGW', 'MHE', 'MHK', 'MHT', 'MIA', 'MIE', 'MIV', 'MJX', 'MKC', 'MKE', 'MKG', 'MKK', 'MKL', 'MKT', 'MLB', 'MLI', 'MLL', 'MLU', 'MMH', 'MMU', 'MMV', 'MNM', 'MOB', 'MOD', 'MOT', 'MOU', 'MPV', 'MQI', 'MQJ', 'MQY', 'MRI', 'MRY', 'MSL', 'MSN', 'MSO', 'MSP', 'MSS', 'MSV', 'MSY', 'MTH', 'MTJ', 'MTM', 'MTO', 'MUE', 'MVL', 'MVN', 'MVY', 'MWA', 'MWH', 'MYR', 'MZJ', 'N93', 'NEW', 'NQA', 'NUL', 'OAJ', 'OAK', 'OCF', 'OFK', 'OGD', 'OGG', 'OGS', 'OKC', 'OLM', 'OMA', 'OME', 'ONP', 'ONT', 'OOK', 'OQU', 'ORD', 'ORF', 'ORH', 'ORI', 'ORS', 'ORV', 'OSH', 'OSU', 'OTG', 'OTH', 'OTM', 'OTZ', 'OWB', 'OXC', 'OXR', 'PAE', 'PAH', 'PBI', 'PCW', 'PDT', 'PDX', 'PFN', 'PGA', 'PGD', 'PGM', 'PGV', 'PHF', 'PHL', 'PHO', 'PHX', 'PIA', 'PIB', 'PIE', 'PIH', 'PIR', 'PIT', 'PKB', 'PLB', 'PLK', 'PLN', 'PMD', 'PNC', 'PNS', 'POU', 'PPC', 'PPG', 'PQI', 'PQL', 'PRB', 'PRC', 'PSC', 'PSE', 'PSG', 'PSM', 'PSP', 'PTH', 'PTK', 'PUB', 'PUW', 'PVC', 'PVD', 'PVU', 'PWM', 'PWT', 'RAP', 'RDD', 'RDG', 'RDM', 'RDU', 'RFD', 'RHI', 'RIC', 'RIW', 'RKD', 'RKS', 'RME', 'RMG', 'RNO', 'ROA', 'ROC', 'ROW', 'RSH', 'RST', 'RSW', 'RUT', 'RWI', 'SAF', 'SAN', 'SAT', 'SAV', 'SAW', 'SBA', 'SBD', 'SBN', 'SBP', 'SBY', 'SCC', 'SCK', 'SCM', 'SDF', 'SDP', 'SDY', 'SEA', 'SFB', 'SFO', 'SFZ', 'SGF', 'SGH', 'SGJ', 'SGU', 'SGY', 'SHD', 'SHG', 'SHH', 'SHR', 'SHV', 'SIG', 'SIT', 'SJC', 'SJT', 'SJU', 'SKX', 'SLC', 'SLE', 'SLK', 'SLN', 'SMF', 'SMX', 'SNA', 'SNP', 'SOP', 'SOV', 'SOW', 'SPI', 'SPS', 'SQI', 'SRQ', 'SRR', 'STC', 'STJ', 'STL', 'STP', 'STS', 'STT', 'STX', 'SUN', 'SUS', 'SUX', 'SVA', 'SVC', 'SWF', 'SWO', 'SYR', 'T44', 'TAL', 'TBN', 'TCL', 'TEB', 'TEX', 'TIX', 'TLH', 'TLT', 'TNI', 'TOG', 'TOL', 'TPA', 'TPL', 'TRI', 'TTN', 'TUL', 'TUP', 'TUS', 'TVC', 'TVF', 'TVL', 'TVR', 'TWF', 'TXK', 'TYR', 'TYS', 'UCA', 'UIN', 'UNK', 'UNV', 'UOX', 'UUU', 'VAK', 'VCT', 'VCV', 'VDZ', 'VGT', 'VIS', 'VLD', 'VPS', 'VPZ', 'VQQ', 'VQS', 'VRB', 'WBB', 'WDG', 'WLK', 'WNA', 'WRG', 'WRL', 'WST', 'WTK', 'WWD', 'WYS', 'X44', 'X95', 'XNA', 'YAK', 'YKM', 'YKN', 'YNG', 'YUM', 'Z08', 'Z09' ] transDetail = 'Date Booked: ' + str( booking ) + '; Name Booked: ' + tmp2[0] + tmp2[1] + '; Address: ' + addr[ 1] + ' ' + addr[2] + ', ' + addr[0] + '; Source :' + randomchoice( Airport_Code) + '; Destination:' + randomchoice(Airport_Code) return transDetail
from barnum import gen_data import csv #gen_data = gen_data() with open('large.csv','w') as f1: writer=csv.writer(f1, delimiter=',',lineterminator='\n',) writer.writerow([''] + range(10)) for i in range(50000000): row = [i] + [10] + [gen_data.cc_number()]+[gen_data.create_company_name()] +[gen_data.create_email()]+[gen_data.create_name()] +[gen_data.create_job_title()] + [gen_data.create_city_state_zip()] + [gen_data.create_birthday(min_age=2, max_age=85)] writer.writerow(row) #row = [i] + [10] + [fake.name()] +[fake.address()]
def gen_tran(MCC_credits, MCC_debits, Tran_Country_Credits, Tran_Country_Debits, Tran_Type_C, Tran_Type_D, Upper_Limit, Delta, count, j, usecase): liTrans = [] #Initiate start date for transactions startDate = date(2015, 01, 01) #Pick out account based on counter acct = ACCTs[j] #Set customer credit limit - skew to clients with $1000-$25000 and 10% with $25K - $50K limit = max( max((randrange(1, 101, 1) - 99), 0) * randrange(25000, 50000, 1000), randrange(1000, 25000, 1000)) #local Amt variable to calculate customer total usage usedAmt = 0 tmpAmt = 0 Balance = limit maxDate = startDate #Random number generator for transactions per customer NoTrans = randrange(100, 150, 1) desc = '' flag = 0 maxCheckin = '' maxBook = '' #loop to generate NoTrans transactions per customer for k in range(NoTrans): dt = datetime.now().strftime("%Y-%m-%d %H:%M:%S") cr_dbt = 'D' tranType = '' country = [] cat_desc = '' flag = 0 #If Balance is within the credit limit, generate credits/debits if (Balance > 0 and Balance <= limit * 1.2): #Probability of credits (tmpAmt>0) and debits (tmpAmt==0) is driven by parameters Upper_Limit and Delta tmpAmt = max( (randrange(1, Upper_Limit, 1) + Delta), 0) * randrange( 1, Balance + 1, 1) flag = 1 #Define time delta for next transaction tdelta = timedelta(days=randrange(1, 4, 1)) row = [str(count) + '_' + dt] + [acct] #If we have credit or debit within balance if tmpAmt == 0 and flag == 1: tmpAmt = random.randrange(1, Balance + 1, 1) tranType = random.choice(Tran_Type_D) cat = random.choice(MCC_debits) cat_desc = python_merchant_cat.All_Merchant_Cat[cat] Balance = Balance - tmpAmt merch = gen_data.create_company_name() row.append(merch) row.append(cat) row.append(cat_desc) country = random.choice(Tran_Country_Debits) else: if tmpAmt > 0 and flag == 1: cr_dbt = 'C' tranType = random.choice(Tran_Type_C) Balance = Balance + tmpAmt merch = '' cat = random.choice(MCC_credits) cat_desc = python_merchant_cat.All_Merchant_Cat[cat] if (tranType == 'Merchant Credit'): merch = gen_data.create_company_name() cat = random.choice(Merchant_Category.Green) cat_desc = python_merchant_cat.All_Merchant_Cat[cat] if (tranType == 'Refund'): cat = '0000' cat_desc = python_merchant_cat.All_Merchant_Cat[cat] row.append(merch) row.append(cat) row.append(cat_desc) country = random.choice(Tran_Country_Credits) #If we need to make a payment or get credit then assign codes if Balance > limit and flag == 0: tmpAmt = random.randrange(1, Balance - limit + 1, 1) tranType = random.choice(Tran_Type_D) cat = random.choice(MCC_debits) cat_desc = python_merchant_cat.All_Merchant_Cat[cat] Balance = Balance - tmpAmt merch = gen_data.create_company_name() row.append(merch) row.append(cat) row.append(cat_desc) country = random.choice(Tran_Country_Debits) else: if ((Balance < 0 or Balance == 0) and flag == 0): cr_dbt = 'C' tranType = 'Payment' tmpAmt = random.randrange(1, limit / 2, 1) Balance = Balance + tmpAmt merch = '' cat = '1111' cat_desc = python_merchant_cat.All_Merchant_Cat[cat] row.append(merch) row.append(cat) row.append(cat_desc) country = random.choice(Tran_Country_Credits) #date posted date1 = maxDate + tdelta maxDate = date1 #date of transaction a day later date2 = date1 - timedelta(days=1) row.extend([ country, date1, date2, tranType, cr_dbt, limit, tmpAmt, Balance, CCs[j], CCTypes[j], usecase, Holders[j], CCsCount[j], Cities[j], States[j], ZIPs[j], Countries[j] ]) count = count + 1 checkin = '' checkout = '' transDetail = '' #Add details or Hotel Transactions if ((cat_desc == 'Hotels/Motels/Inns/Resorts' or cat_desc == 'Hotels, Motels, and Resorts') and (UseCase[j] == '28' or UseCase[j] == '29')): if (maxCheckin == ''): checkin = maxDate + timedelta(days=randrange(365, 389, 1)) checkout = checkin + timedelta(days=randrange(4, 11, 1)) maxCheckin = checkin tmp2 = gen_data.create_name() addr = gen_data.create_city_state_zip() hotel = tmp2[1] + ' Hotels; ' + '; Address: ' + addr[ 1] + ' ' + addr[2] + ', ' + addr[0] transDetail = 'Checkin: ' + str( checkin) + '; Checkout: ' + str( checkout) + '; Hotel: ' + hotel else: checkin = maxCheckin + timedelta(days=randrange(2, 5, 1)) checkout = checkin + timedelta(days=randrange(4, 11, 1)) maxCheckin = checkin tmp2 = gen_data.create_name() addr = gen_data.create_city_state_zip() hotel = tmp2[1] + ' Hotels; ' + '; Address: ' + addr[ 1] + ' ' + addr[2] + ', ' + addr[0] transDetail = 'Checkin: ' + str( checkin) + '; Checkout: ' + str( checkout) + '; Hotel: ' + hotel if ((cat_desc == 'Hotels/Motels/Inns/Resorts' or cat_desc == 'Hotels, Motels, and Resorts') and UseCase[j] == '30'): checkin = maxDate + timedelta(days=randrange(30, 200, 1)) checkout = checkin + timedelta(days=randrange(4, 11, 1)) tmp2 = gen_data.create_name() addr = gen_data.create_city_state_zip() hotel = tmp2[1] + ' Hotels; ' + '; Address: ' + addr[ 1] + ' ' + addr[2] + ', ' + addr[0] transDetail = 'Checkin: ' + str(checkin) + '; Checkout: ' + str( checkout) + '; Hotel: ' + hotel #Add details or Airline Transactions if (cat_desc == 'Airlines' and (UseCase[j] == '31' or UseCase[j] == '32')): if (maxBook == ''): booking = maxDate + timedelta(days=randrange(1, 15, 1)) maxBook = booking tmp2 = gen_data.create_name() addr = gen_data.create_city_state_zip() transDetail = 'Date Booked: ' + str( booking) + '; Name Booked: ' + tmp2[0] + tmp2[ 1] + '; Address: ' + addr[1] + ' ' + addr[ 2] + ', ' + addr[0] + '; Source :' + random.choice( Airport_Code ) + '; Destination:' + random.choice(Airport_Code) else: booking = maxBook + timedelta(days=randrange(1, 15, 1)) maxBook = booking tmp2 = gen_data.create_name() addr = gen_data.create_city_state_zip() transDetail = 'Date Booked: ' + str( booking) + '; Name Booked: ' + tmp2[0] + tmp2[ 1] + '; Address: ' + addr[1] + ' ' + addr[ 2] + ', ' + addr[0] + '; Source :' + random.choice( Airport_Code ) + '; Destination:' + random.choice(Airport_Code) if (cat_desc == 'Airlines' and UseCase[j] == '33'): booking = maxDate + timedelta(days=randrange(1, 15, 1)) tmp2 = gen_data.create_name() addr = gen_data.create_city_state_zip() transDetail = 'Date Booked: ' + str( booking) + '; Name Booked: ' + tmp2[0] + tmp2[ 1] + '; Address: ' + addr[1] + ' ' + addr[2] + ', ' + addr[ 0] + '; Source :' + random.choice( Airport_Code) + '; Destination:' + random.choice( Airport_Code) row.append(transDetail) writer.writerow(row) #post generating all transactions, check account balance - if overpaid - refund $ and add a refund transaction if Balance > limit: row = [str(count) + '_' + dt] + [acct] + ['Uber Bank'] + ['0000'] + [ 'Refund to Customer from Bank' ] + [random.choice(Tran_Country_Debits)] date1 = maxDate + timedelta(days=90) date2 = date1 - timedelta(days=1) row.extend([ date1, date2, 'Credit Balance Refund', 'D', limit, Balance - limit, limit, CCs[j], CCTypes[j], usecase, Holders[j], CCsCount[j], Cities[j], States[j], ZIPs[j], Countries[j], '' ]) count = count + 1 usedAmt = 0 maxDate = datetime(0001, 01, 01) else: date1 = maxDate + tdelta maxDate = date1 #date of transaction a day later date2 = date1 - timedelta(days=1) row = [str(count) + '_' + dt] + [acct] + ['Customer Payment'] + [ '1111' ] + ['Customer Payment'] + [random.choice(Tran_Country_Credits)] row.extend([ date1, date2, 'Payment', 'C', limit, limit - Balance, limit, CCs[j], CCTypes[j], usecase, Holders[j], CCsCount[j], Cities[j], States[j], ZIPs[j], Countries[j], '' ]) count = count + 1 usedAmt = 0 writer.writerow(row)
from barnum import gen_data import json f = open('Users_old.txt', 'r') f_out = open('Users.txt', 'w') for line in f.readlines(): line = line.strip().split('\t') new_line = [] new_line.append(line[0]) new_line.append(line[1]) new_line.append(line[2]) info = {} info['birthday'] = str(gen_data.create_birthday(min_age=18, max_age=60)) info['email'] = gen_data.create_email(tld="com") info['mobile'] = gen_data.create_phone() tmp = gen_data.create_city_state_zip() info['city'] = tmp[1] + ', ' + tmp[2] new_line.append(json.dumps(info)) f_out.write('\t'.join(new_line) + '\n') f.close() f_out.close()