def create_human(n): # function of creating new client if n == 1: person = Person('ru') else: person = Person('en') # Выводим исландские мужские имена. client_info = generate_id_passport() print('\nфамилия:\t', person.last_name()) print('имя:\t\t', person.name()) print('отчество:\t', person.surname(), '\n') passport = generate_number_passport() print('личный №:\t', client_info.get('idn')) print('документ:\t', passport) print('пол:\t\t', client_info.get('sex')) print('возраст:\t', person.age(13, 70), '\n') # print('аватар\t', person.avatar()) print('гражд-во:\t', person.get_current_locale()) print('Нац-сть:\t', person.nationality()) print('телефон:\t', person.telephone('+37529#######')) print('email:\t\t', '*****@*****.**') # print('email:\t\t', person.email()) print('проф-ия:\t', person.occupation(), '\n') print('обращение:\t', person.title()) print('взгляды:\t', person.views_on()) print('вера:\t\t', person.worldview(), '\n')
def create_human(n): """ :param n: язык генерации данных :return: сгенерированные фейковые данные """ log.info("Генерируем тестовые данные") if n == 1: person = Person('ru') else: person = Person('en') log.info("person: " + str(person)) client_info = generate_id_passport() print('\nфамилия:\t', person.last_name()) print('имя:\t\t', person.name()) print('отчество:\t', person.surname(), '\n') passport = generate_number_passport() print('личный №:\t', client_info.get('idn')) print('документ:\t', passport) print('пол:\t\t', client_info.get('sex')) print('возраст:\t', person.age(13, 70), '\n') # print('аватар\t', person.avatar()) print('гражд-во:\t', person.get_current_locale()) print('Нац-сть:\t',person.nationality()) print('телефон:\t', person.telephone('+37529#######')) print('email:\t\t', person.email()) print('проф-ия:\t', person.occupation(), '\n') print('обращение:\t', person.title()) print('взгляды:\t', person.views_on()) print('вера:\t\t', person.worldview(), '\n')
def gen_data_simple_schema(self, data_path, partition_date, num_rows, file_format): """ Input - data_path: path where the partition will be created (string) - partition_date: partition date to be created (date) - num_rows: number of rows to be generated (integer) - file_format: format of file to be generated (parquet or avro) This function creates a data sample with a simple schema """ person = Person('en') # Create a simple schema schema_df = StructType([ StructField('identifier', StringType(), True), StructField('first_name', StringType(), True), StructField('last_name', StringType(), True), StructField('occupation', StringType(), True), StructField('age', IntegerType(), True), StructField('date', DateType(), True) ]) # generate data for _ in range(num_rows): df_temp = self.spark.createDataFrame([[ person.identifier(), person.first_name(), person.last_name(), person.occupation(), person.age(), partition_date ]], schema_df) try: df = df.union(df_temp) except: df = df_temp df.coalesce(1).write.partitionBy('date').mode('overwrite').format( file_format).save(data_path) print('Partition created: {data_path}/date={date}'.format( data_path=data_path, date=partition_date)) print('# Rows:', df.count()) print('Schema:') df.printSchema() print('\n') return
def make_full_name(self, loc, value): global sex per = Person(loc) rsp = RussiaSpecProvider() if value == 'male': sex = Gender.MALE elif value == 'famale': sex = Gender.FEMALE name = per.name(sex) surname = per.surname(sex) patron = rsp.patronymic(sex) age = per.age(16, 66) occup = per.occupation() return name, surname, patron, age, occup
def generate_random_data(): p = Person() a = Address() return { "name": p.full_name(), "email": p.email(), "nationality": p.nationality(), "occupation": p.occupation(), "password": p.password(), "phone": p.telephone(), "address": a.address(), "city": a.city(), "street_no": a.street_number(), "created": Datetime().date().isoformat(), }
def main(): try: num_gen = int(sys.argv[1]) except: num_gen = -1 if len(sys.argv) < 2 or num_gen == -1: print( "Usage: python faker_customers_to_csv.py <INT: Number of Customers to generate>" ) print("Exymple: python faker_customers_to_csv.py 10000") return # Generate Customers that are between 18 and 100 years old ls_dates = [ fake_de.date_time_between(start_date="-100y", end_date="-18y", tzinfo=None) for i in range(0, num_gen) ] ls_dates.sort() ls_customer = [] for i in range(0, len(ls_dates)): s_country = random_country() address = None person = None s_nationality = None if s_country == 'DE': address = Address('de') person = Person('de') s_nationality = 'Germany' elif s_country == 'AT': address = Address('de-at') person = Person('de-at') s_nationality = 'Austria' else: address = Address('de-ch') person = Person('de-ch') s_nationality = 'Switzerland' s_sex = random_mf_flag() gender = mimesis.enums.Gender.FEMALE if s_sex == 'F' else mimesis.enums.Gender.MALE s_first_name = person.name(gender) s_last_name = person.last_name(gender) s_marital_status = random_marital_status_flag() s_job = person.occupation() s_email = person.email() s_phone = person.telephone() i_number_children = random.randint(0, 4) s_address_street = address.address() s_address_zip = address.postal_code() s_address_city = address.city() t_customer = (customer_number(i), s_sex, s_first_name, s_last_name, s_job, s_email, s_phone, i_number_children, s_marital_status, ls_dates[i].replace(hour=0, minute=0, second=0, microsecond=0), s_address_street, s_address_zip, s_address_city, s_country, s_nationality) ls_customer.append(t_customer) ls_columns = [ 'customer_id', 'gender', 'first_name', 'last_name', 'job', 'email', 'phone', 'number_children', 'marital_status', 'date_of_birth', 'street', 'zip', 'city', 'country_code', 'nationality' ] df_customer = pd.DataFrame(ls_customer, columns=ls_columns) df_customer.to_csv('output/customers__de_at_ch.csv', sep=',', index=False, header=ls_columns)
def person( cls, *, locale=Locales.EN, qualification=None, age=None, blood_type=None, email=None, first_name=None, last_name=None, gender=None, height=None, id=None, language=None, nationality=None, occupation=None, phone=None, title=None, university=None, weight=None, work_experience=None, ): ''' Create an Person Data Entity object. All individual fields are automatically randomly generated based on locale. If provided, the corresponding values are overriden. Note: All individual fields are randomly generated. Don't expect correct correlation e.g. correct postal code for the generated city. Keyword Arguments: locale: Approprite Random.locale.<local_name> object. Default is Random.locale.EN qualification: Educational Qualification age: Age blood_type: Blood type email: Email address first_name: First name last_name: Last name gender: Gender height: Height id: Identifier language: Language nationality: Nationality occupation: Occupation phone: Phone number title: Title university: University weight: Weight work_experience: Work Experience ''' person = Person(locale=locale) from arjuna.engine.data.entity.person import Person as ArjPerson first_name = first_name is not None and first_name or person.first_name( ) last_name = last_name is not None and last_name or person.last_name() return ArjPerson( qualification=qualification is not None and qualification or person.academic_degree(), age=age is not None and age or person.age(), blood_type=blood_type is not None and blood_type or person.blood_type(), email=email is not None and email or person.email(), first_name=first_name, last_name=last_name, name=first_name + " " + last_name, gender=gender is not None and gender or person.gender(), height=height is not None and height or person.height(), id=id is not None and id or person.identifier(), language=language is not None and language or person.language(), nationality=nationality is not None and nationality or person.nationality(), occupation=occupation is not None and occupation or person.occupation(), phone=phone is not None and phone or person.telephone(), title=title is not None and title or person.title(), university=university is not None and university or person.university(), weight=weight is not None and weight or person.weight(), work_experience=work_experience is not None and work_experience or person.work_experience(), )
avatarUrl = person.avatar() r = requests.get(avatarUrl) imageString = "" if r.status_code == 200: imageString = base64.b64encode(r.content).decode("utf-8") else: img = pagan.Avatar(userName, pagan.SHA512) img.save('.\\', 't') with open(".\\t.png", "rb") as binary_file: dataImage = binary_file.read() imageString = base64.b64encode(dataImage).decode("utf-8") entry = { 'cn': fn, 'name': userName, 'displayName': fn, 'mail': mail, 'givenName': fn[:fn.find(' ')], 'title': person.occupation(), "mobile": person.telephone(), "sAMAccountName": userName, "company": domain, "userPrincipalName": userName + "@" + domain } if usePhoto: entry["photo"] = imageString writeToJson(fileName, entry) print(i, fn, userName, mail, userName, userName + "@" + domain)
def gen_data_remove_column(self, data_path, partition_date, num_rows, file_format): """ Input - data_path: path where the partition will be created (string) - partition_date: partition date to be created (date) - num_rows: number of rows to be generated (integer) - file_format: format of file to be generated (parquet or avro) This function creates a data sample removing some columns """ person = Person('en') address = Address('en') schema_street = StructType([ StructField('street_name', StringType(), True) # StructField('lat', FloatType(), True), #column removed # StructField('long', FloatType(), True) #column removed ]) schema_address_details = StructType([ StructField('street', schema_street, True), StructField('number', IntegerType(), True) ]) schema_address = StructType([ StructField('address_details', schema_address_details, True), StructField('city', StringType(), True), StructField('country', StringType(), True), # StructField('country_code', StringType(), True), #column removed StructField('state', StringType(), True), StructField('postal_code', IntegerType(), True) ]) schema_df = StructType([ StructField('identifier', StringType(), True), StructField('first_name', StringType(), True), StructField('last_name', StringType(), True), StructField('occupation', StringType(), True), StructField('age', IntegerType(), True), StructField('address', schema_address, True), # StructField('title_name', StringType(), True), #column removed StructField('date', DateType(), True) ]) for _ in range(num_rows): df_temp = self.spark.createDataFrame( [[ person.identifier(), person.first_name(), person.last_name(), person.occupation(), person.age(), [ [ [ address.street_name() #float(address.latitude()), #float(address.longitude()) ], int(address.street_number()) ], address.city(), address.country(), #address.country_code(), address.state(), int(address.postal_code()) ], #person.title(), partition_date ]], schema_df) try: df = df.union(df_temp) except: df = df_temp df.coalesce(1).write.partitionBy('date').mode('overwrite').format( file_format).save(data_path) print('Partition created: {data_path}/date={date}'.format( data_path=data_path, date=partition_date)) print('# Rows:', df.count()) print('Schema:') df.printSchema() print('\n') return