def generate_actors(): f = open("actors.csv", 'w') person = Person() id_PK = 1 print("Generating...") for _ in range(2500): # generate male id_FK = randint(1, 1000) f.write(str(id_PK) + ',' + str(id_FK) + ','\ + person.full_name(gender=Gender.MALE) + ','\ + str(person.age(minimum=18, maximum=70)) + ','\ + 'M' + ','\ + person.nationality() + ','\ + str(randint(1000, 1000000)) + ','\ + str(randint(0, 5)) + '\n') id_PK += 1 id_FK = randint(1, 1000) # generate female f.write(str(id_PK) + ','+ str(id_FK) + ','\ + person.full_name(gender=Gender.FEMALE) + ','\ + str(person.age(minimum=18, maximum=70)) + ','\ + 'F' + ','\ + person.nationality() + ','\ + str(randint(1000, 1000000)) + ','\ + str(randint(0, 5)) + '\n') id_PK += 1 f.close() print("DONE")
def handle(self, *args, **options): UserProfile.objects.all().delete() Author.objects.all().delete() person = Person('en') for _ in range(COUNT_ELEM): first_name = person.first_name() last_name = person.last_name() login = f'{first_name}.{last_name}' email = person.email() birthday_year = datetime.today().year - person.age(minimum=14, maximum=100) print(f'Created:{login} - {email}') user = { "user_name": login, "first_name": first_name, "last_name": last_name, "gender": person.sex('M', 'F'), "birthday_year": birthday_year, "email": email } author = { "first_name": first_name, "last_name": last_name, "birthday_year": birthday_year, } new_user = UserProfile(**user) new_user.save() new_author = Author(**author) new_author.save() User.objects.all().delete() User.objects.create_superuser('admin', 'admin@localhost', 'admin')
def create_human(n): """ :param n: язык генерации данных :return: сгенерированные фейковые данные """ log.info("Генерируем тестовые данные") if n == 1: person = Person('ru') else: person = Person('en') log.info("person: " + str(person)) client_info = generate_id_passport() print('\nфамилия:\t', person.last_name()) print('имя:\t\t', person.name()) print('отчество:\t', person.surname(), '\n') passport = generate_number_passport() print('личный №:\t', client_info.get('idn')) print('документ:\t', passport) print('пол:\t\t', client_info.get('sex')) print('возраст:\t', person.age(13, 70), '\n') # print('аватар\t', person.avatar()) print('гражд-во:\t', person.get_current_locale()) print('Нац-сть:\t',person.nationality()) print('телефон:\t', person.telephone('+37529#######')) print('email:\t\t', person.email()) print('проф-ия:\t', person.occupation(), '\n') print('обращение:\t', person.title()) print('взгляды:\t', person.views_on()) print('вера:\t\t', person.worldview(), '\n')
def create_human(n): # function of creating new client if n == 1: person = Person('ru') else: person = Person('en') # Выводим исландские мужские имена. client_info = generate_id_passport() print('\nфамилия:\t', person.last_name()) print('имя:\t\t', person.name()) print('отчество:\t', person.surname(), '\n') passport = generate_number_passport() print('личный №:\t', client_info.get('idn')) print('документ:\t', passport) print('пол:\t\t', client_info.get('sex')) print('возраст:\t', person.age(13, 70), '\n') # print('аватар\t', person.avatar()) print('гражд-во:\t', person.get_current_locale()) print('Нац-сть:\t', person.nationality()) print('телефон:\t', person.telephone('+37529#######')) print('email:\t\t', '*****@*****.**') # print('email:\t\t', person.email()) print('проф-ия:\t', person.occupation(), '\n') print('обращение:\t', person.title()) print('взгляды:\t', person.views_on()) print('вера:\t\t', person.worldview(), '\n')
def createPlayers(team): person = Person("en") adress = Address("en") players = [] MAX_N = len(team) arrOfIds = [i + 1 for i in range(MAX_N)] for i in range(MAX_N): positions = [1, 2, 3, 4, 5] numbers = [k for k in range(1, 99, 1)] curTeamID = choice(arrOfIds) arrOfIds.remove(curTeamID) for _ in range(len(positions)): curID = str(len(players) + 1) teamID = str(curTeamID) name = person.full_name() position = choice(positions) height = str(randint(150, 220)) weight = str(person.weight()) number = str(choice(numbers)) age = str(person.age()) country = adress.country(allow_random=True) positions.remove(position) numbers.remove(int(number)) players.append([curID, teamID, name, position, height, weight, number, age, country]) f = open("Players.csv", "w", encoding='utf-8') for player in players: line = str(player[0]) + ',' + str(player[1]) + ',' + str(player[2]) + ',' +\ str(player[3]) + ',' + str(player[4]) + ',' + str(player[5]) + ',' +\ str(player[6]) + ',' + str(player[7]) + ',' + str(player[8]) + '\n' f.write(line) f.close()
def seedDatabase(rows): person = Person('en') for _ in range(0, int(rows)): first_name = person.first_name() age = person.age() new_user = User(first_name=first_name, age=age) Db.session.add(new_user) Db.session.commit() return redirect(url_for('index'))
def create_personalData(): # function of generate new client data passport = '' person = Person('ru') # Выводим исландские мужские имена. lastname = person.last_name() personalData['lastname'] = lastname # print('\nфамилия:\t', personalData['lastname']) name = person.name() personalData['name'] = name # print('имя:\t\t', personalData['name']) surname = person.surname() personalData['surname'] = surname # print('отчество:\t', personalData['surname']) gender = person.gender() personalData['gender'] = gender # print('пол:\t\t', personalData['gender']) for _ in range(2): varLet = random.choice(string.ascii_uppercase) passport += varLet passport += person.telephone('#######') personalData['passport'] = passport # print('паспорт:\t', personalData['passport']) ### birthday currentDate() personalData['age'] = age personalData['age'][2] = str( int(personalData['age'][2]) - person.age(16, 70)) # print('возраст:\t', personalData['age'], '\n') ### # print('гражд-во:\t', person.get_current_locale()) # print('Нац-сть:\t',person.nationality()) ### mobile mobile = person.telephone('+37529#######') personalData['mobile'] = mobile # print('телефон:\t', personalData['mobile']) ### ### email # personalData['email'] = person.email() # print('email:\t\t', personalData['email']) ### # print('проф-ия:\t', person.occupation(), '\n') # print('обращение:\t', person.title()) # print('взгляды:\t', person.views_on()) # print('вера:\t\t', person.worldview(), '\n') return personalData
def gen_data_simple_schema(self, data_path, partition_date, num_rows, file_format): """ Input - data_path: path where the partition will be created (string) - partition_date: partition date to be created (date) - num_rows: number of rows to be generated (integer) - file_format: format of file to be generated (parquet or avro) This function creates a data sample with a simple schema """ person = Person('en') # Create a simple schema schema_df = StructType([ StructField('identifier', StringType(), True), StructField('first_name', StringType(), True), StructField('last_name', StringType(), True), StructField('occupation', StringType(), True), StructField('age', IntegerType(), True), StructField('date', DateType(), True) ]) # generate data for _ in range(num_rows): df_temp = self.spark.createDataFrame([[ person.identifier(), person.first_name(), person.last_name(), person.occupation(), person.age(), partition_date ]], schema_df) try: df = df.union(df_temp) except: df = df_temp df.coalesce(1).write.partitionBy('date').mode('overwrite').format( file_format).save(data_path) print('Partition created: {data_path}/date={date}'.format( data_path=data_path, date=partition_date)) print('# Rows:', df.count()) print('Schema:') df.printSchema() print('\n') return
def generate_student() -> dict: """Generate a student record.""" person = Person() student = { "academic_degree": person.academic_degree(), "age": person.age(), "full_name": person.full_name(), "gender": person.gender(), "nationality": person.nationality(), "university": person.university(), } return student
def generate_directors(): f = open("directors.csv", 'w') person = Person() ID = 1 for _ in range(500): # generate male f.write(str(ID) + ','\ + person.full_name(gender=Gender.MALE) + ','\ + str(person.age(minimum=24, maximum=70)) + ','\ + 'M' + ','\ + str(randint(1, 9)) + '\n') ID += 1 # generate female f.write(str(ID) + ','\ + person.full_name(gender=Gender.FEMALE) + ','\ + str(person.age(minimum=24, maximum=70)) + ','\ + 'F' + ','\ + str(randint(1, 9)) + '\n') ID += 1 f.close()
def make_full_name(self, loc, value): global sex per = Person(loc) rsp = RussiaSpecProvider() if value == 'male': sex = Gender.MALE elif value == 'famale': sex = Gender.FEMALE name = per.name(sex) surname = per.surname(sex) patron = rsp.patronymic(sex) age = per.age(16, 66) occup = per.occupation() return name, surname, patron, age, occup
def get_user(number): """ Creates random user data :param number: number of users :return: user data list """ data = [] for i in range(number): user_gender = random.sample([Gender.MALE, Gender.FEMALE], 1)[0] person = Person('ru') data.append( OrderedDict(first_name=person.first_name(gender=user_gender), last_name=person.last_name(gender=user_gender), age=person.age(minimum=18, maximum=66), email=person.email(domains=('yandex.ru', 'gmail.com')), username=person.username(template='UU_d'))) return data
def update(): db.users.remove({}) db.goods.remove({}) users, goods = [], [] for _ in range(30): person = Person(locales.ZH) user = { "name": person.name(), "age": person.age(), "sex": choice(["boy", "girl"]), "mobile": person.telephone(), "wechat": person.email(), "status": choice(["normal", "in_debt"]), "isVip": Development().boolean(), } users.append(user) db.users.insert_many(users)
""") conn.commit() for i in range(20): #idd = g.code.imei() idd = n gender = random.choice(gen) if gender == 'Женский': first_name = p.first_name(gender=Gender.FEMALE) last_name = p.last_name(gender=Gender.FEMALE) #self.patron = rus.patronymic(gender = Gender.FEMALE) elif gender == 'Мужской': first_name = p.first_name(gender=Gender.MALE) last_name = p.last_name(gender=Gender.MALE) #self.patron = rus.patronymic(gender = Gender.MALE) age = p.age(minimum=18, maximum=70) month_ob = dt.month() day_ob = dt.day_of_month() year_ob = 2020 - age city_ob = adr.city() city = adr.city() address = adr.address() phone = p.telephone(mask='+7(###)-###-####') email = p.email(domains=['mimesis.name']) user = (int(idd), str(gender), str(first_name), str(last_name), str(age), str(month_ob), str(day_ob), str(year_ob), str(city_ob), str(city), str(address), str(phone), str(email)) cur.execute( "INSERT INTO people VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?);",
def gen_data_remove_column(self, data_path, partition_date, num_rows, file_format): """ Input - data_path: path where the partition will be created (string) - partition_date: partition date to be created (date) - num_rows: number of rows to be generated (integer) - file_format: format of file to be generated (parquet or avro) This function creates a data sample removing some columns """ person = Person('en') address = Address('en') schema_street = StructType([ StructField('street_name', StringType(), True) # StructField('lat', FloatType(), True), #column removed # StructField('long', FloatType(), True) #column removed ]) schema_address_details = StructType([ StructField('street', schema_street, True), StructField('number', IntegerType(), True) ]) schema_address = StructType([ StructField('address_details', schema_address_details, True), StructField('city', StringType(), True), StructField('country', StringType(), True), # StructField('country_code', StringType(), True), #column removed StructField('state', StringType(), True), StructField('postal_code', IntegerType(), True) ]) schema_df = StructType([ StructField('identifier', StringType(), True), StructField('first_name', StringType(), True), StructField('last_name', StringType(), True), StructField('occupation', StringType(), True), StructField('age', IntegerType(), True), StructField('address', schema_address, True), # StructField('title_name', StringType(), True), #column removed StructField('date', DateType(), True) ]) for _ in range(num_rows): df_temp = self.spark.createDataFrame( [[ person.identifier(), person.first_name(), person.last_name(), person.occupation(), person.age(), [ [ [ address.street_name() #float(address.latitude()), #float(address.longitude()) ], int(address.street_number()) ], address.city(), address.country(), #address.country_code(), address.state(), int(address.postal_code()) ], #person.title(), partition_date ]], schema_df) try: df = df.union(df_temp) except: df = df_temp df.coalesce(1).write.partitionBy('date').mode('overwrite').format( file_format).save(data_path) print('Partition created: {data_path}/date={date}'.format( data_path=data_path, date=partition_date)) print('# Rows:', df.count()) print('Schema:') df.printSchema() print('\n') return
from mimesis import Person Person = Person('de') print(Person.email()) print(Person.telephone()) print(Person.age())
from collections import OrderedDict from csv import DictWriter import os from mimesis import Person COUNT = 100 LOCAL = 'en-gb' HERE = os.path.dirname(__file__) person = Person(LOCAL) records = [] for _ in range(COUNT): record = OrderedDict( name=person.full_name(), email=person.email(), age=person.age(minimum=18, maximum=45), height=person.height(), blood_type=person.blood_type() ) records.append(record) with open(os.path.join(HERE, './fakedata.csv'), 'w') as f: writer = DictWriter(f, fieldnames=records[0].keys()) writer.writeheader() writer.writerows(records)
#!/usr/bin/env python3 # -*- conding:utf8 -*- from mimesis import Person person_en = Person('en') print(person_en.full_name()) print(person_en.age()) print(person_en.favorite_movie()) print('*' * 20) person_zh = Person('zh') print(person_zh.full_name()) print(person_zh.age()) print(person_zh.favorite_movie())
def person( cls, *, locale=Locales.EN, qualification=None, age=None, blood_type=None, email=None, first_name=None, last_name=None, gender=None, height=None, id=None, language=None, nationality=None, occupation=None, phone=None, title=None, university=None, weight=None, work_experience=None, ): ''' Create an Person Data Entity object. All individual fields are automatically randomly generated based on locale. If provided, the corresponding values are overriden. Note: All individual fields are randomly generated. Don't expect correct correlation e.g. correct postal code for the generated city. Keyword Arguments: locale: Approprite Random.locale.<local_name> object. Default is Random.locale.EN qualification: Educational Qualification age: Age blood_type: Blood type email: Email address first_name: First name last_name: Last name gender: Gender height: Height id: Identifier language: Language nationality: Nationality occupation: Occupation phone: Phone number title: Title university: University weight: Weight work_experience: Work Experience ''' person = Person(locale=locale) from arjuna.engine.data.entity.person import Person as ArjPerson first_name = first_name is not None and first_name or person.first_name( ) last_name = last_name is not None and last_name or person.last_name() return ArjPerson( qualification=qualification is not None and qualification or person.academic_degree(), age=age is not None and age or person.age(), blood_type=blood_type is not None and blood_type or person.blood_type(), email=email is not None and email or person.email(), first_name=first_name, last_name=last_name, name=first_name + " " + last_name, gender=gender is not None and gender or person.gender(), height=height is not None and height or person.height(), id=id is not None and id or person.identifier(), language=language is not None and language or person.language(), nationality=nationality is not None and nationality or person.nationality(), occupation=occupation is not None and occupation or person.occupation(), phone=phone is not None and phone or person.telephone(), title=title is not None and title or person.title(), university=university is not None and university or person.university(), weight=weight is not None and weight or person.weight(), work_experience=work_experience is not None and work_experience or person.work_experience(), )
return 0.01*pow(np.abs(df[age] - 30), 2.5) + df[age] + 50*df[marketing_level]*gated_age + 2*df[account_balance] + noise ################################################## ################################################## ### Generate a DataFrame of user information ################################################## # Generate 10,000 rows of the following: # user_id, first_name, last_name, email, password, address, # birth_date, credit_card_num, credit_card_exp, security_answer, # account_balance user_df = pd.DataFrame([[x, person.name(), person.surname(), person.gender(), person.email(), hashed_passwd(person.password()), address.address(), person.age(), payment.credit_card_number(), payment.credit_card_expiration_date(), text.word(), account_balance(), np.random.randint(1, 11)] for x in range(10000)]) user_df.columns = ["user_id", "first_name", "last_name", "gender", "email", "password_hashed", "address", "age", "credit_card_num", "credit_card_exp", "security_answer", "account_balance", "marketing_level"] # Generate sales, based on a noisy linear model user_df['sales'] = generate_sales(user_df) user_df['sales'] = user_df['sales'] - user_df['sales'].min() user_df['sales'] /= 40
### Generate a DataFrame of user information ################################################## # Generate 10,000 rows of the following: # user_id, first_name, last_name, email, password, address, # birth_date, credit_card_num, credit_card_exp, security_answer, # account_balance user_df = pd.DataFrame([[ x, person.name(), person.surname(), person.gender(), person.email(), hashed_passwd(person.password()), address.address(), person.age(), payment.credit_card_number(), payment.credit_card_expiration_date(), text.word(), account_balance(), np.random.randint(1, 11) ] for x in range(10000)]) user_df.columns = [ "user_id", "first_name", "last_name", "gender", "email", "password_hashed", "address", "age", "credit_card_num", "credit_card_exp", "security_answer", "account_balance", "marketing_level" ] # Generate sales, based on a noisy linear model user_df['sales'] = generate_sales(user_df)
# -*- encoding : utf-8 -*- """ @File : __init__.py.py @Time :2021/3/29 18:46 @Author :kuang congxian @Contact :[email protected] @Description : null """ from mimesis import Person from pprint import pprint person = Person("zh") pprint("{}[age:{}, sex:{}, university:{}]".format(person.name(), person.age(), person.sex(), person.university()))