def address(cls, *, locale=Locales.EN, calling_code=None, city=None, country=None, country_code=None, latitude=None, longitude=None, postal_code=None, state=None, street_name=None, street_number=None, street_suffix=None): ''' Create an Address Data Entity object. All individual fields are automatically randomly generated based on locale. If provided, the corresponding values are overriden. Note: All individual fields are randomly generated. Don't expect correct correlation e.g. correct postal code for the generated city. Keyword Arguments: locale: Approprite Random.locale.<local_name> object. Default is Random.locale.EN calling_code: Calling Code city: City country: Country Name country_code: Country Code latitude: Latitude longitude: Longitde postal_code: Postal Code state: State street_name: Street Name street_number Street Number street_suffix: Street Suffix ''' address = Address(locale=locale) from arjuna.engine.data.entity.address import Address as ArjAddress return ArjAddress( calling_code=calling_code is not None and calling_code or address.calling_code(), city=city and city is not None or address.city(), country=country is not None and country or address.country(), country_code=country_code is not None and country_code or address.country_code(), latitude=latitude is not None and latitude or address.latitude(), longitude=longitude is not None and longitude or address.longitude(), postal_code=postal_code is not None and postal_code or address.postal_code(), state=state is not None and state or address.state(), street_name=street_name is not None and street_name or address.street_name(), street_number=street_number is not None and street_number or address.street_number(), street_suffix=street_suffix is not None and street_suffix or address.street_suffix(), )
def get_fake_address(self, loc): address = Address(loc) city = address.city() kind_street = address.street_suffix() street = address.street_name() house = address.street_number() area = address.state() zip = address.zip_code() country = address.country() return zip, city, kind_street, street, house, area, country
def generate_addresses(): address = Address('en-gb') city = address.city() country = address.country() postal_code = address.postal_code() street_name = address.street_name() street_number = address.street_number() return { 'id': id, "street_number": street_number, "street_name": street_name, "city": city, "postal_code": postal_code, "country": country }
def generate_random_data(): p = Person() a = Address() return { "name": p.full_name(), "email": p.email(), "nationality": p.nationality(), "occupation": p.occupation(), "password": p.password(), "phone": p.telephone(), "address": a.address(), "city": a.city(), "street_no": a.street_number(), "created": Datetime().date().isoformat(), }
class CSVData: def __init__(self): self.person = Person(locale='zh') self.address = Address(locale='zh') self.code = Code() self.business = Business(locale='zh') self.text = Text(locale='zh') self.datetime = Datetime(locale='zh') self.file = File() self.path = Path() self.internet = Internet() self.structure = Structure() def mime_data(self): # col1 = self.person.full_name() col1 = self.person.last_name() + self.person.first_name() col2 = self.address.city() col3 = self.address.street_name() col4 = self.address.calling_code() col5 = self.address.longitude() col6 = self.code.imei() col7 = self.business.company() col8 = self.text.hex_color() col9 = self.datetime.formatted_datetime() col10 = self.datetime.time() col11 = self.file.file_name() col12 = self.path.dev_dir() col13 = self.internet.ip_v4() col14 = self.internet.ip_v6() col15 = self.internet.home_page() col16 = self.internet.stock_image() col17 = self.internet.user_agent() col18 = self.internet.mac_address() col19 = self.person.email() col20 = self.person.telephone() col21 = self.code.issn() col22 = self.person.social_media_profile() col23 = self.structure.html() line = '\"{0}\", \"{1}\", \"{2}\", \"{3}\", {4}, \"{5}\", \"{6}\" , \"{7}\" , \"{8}\" , \"{9}\" , \"{10}\" , \"{11}\" , \"{12}\" , \"{13}\" , \"{14}\" , \"{15}\" , \"{16}\" , \"{17}\" , \"{18}\" , \"{19}\" , \"{20}\" , \"{21}\" , \"{22}\"\n'.format( col1, col2, col3, col4, col5, col6, col7, col8, col9, col10, col11, col12, col13, col14, col15, col16, col17, col18, col19, col20, col21, col22, col23) # line = "mime data" # print(line) return line
def get_data(address: Address, lang: str): return { 'address': address.address(), 'calling_code': address.calling_code(), 'city': address.city(), 'continent': address.continent(), 'coordinates': address.coordinates(), 'country': address.country(), 'country_code': get_country_code(lang), 'latitude': address.latitude(), 'longitude': address.longitude(), 'postal_code': address.postal_code(), 'state': address.state(), 'street_name': address.street_name(), 'street_number': address.street_number(), 'street_suffix': address.street_suffix(), 'zip_code': address.zip_code() }
def generatingData(num, locale, country): person = Person(locale) address = Address(locale) buffer = io.StringIO() writer = csv.writer(buffer, delimiter=';', lineterminator="\n", quoting=csv.QUOTE_NONE, escapechar='\\') for i in range(int(num)): writer.writerow([ person.full_name(), country if randint(0, 1) == 1 else address.country(), ', '.join([address.province(), address.city(), address.address()]), person.telephone() ]) return buffer.getvalue()
def generatePeople(row_num: int, region: str): if region == 'en_US': locale = 'en' elif region == 'ru_RU': locale = 'ru' else: locale = 'uk' writer = csv.writer(stdout, quoting=csv.QUOTE_NONE, delimiter=';', escapechar='"') person = Person(locale) address = Address(locale) for i in range(row_num): writer.writerow([ person.full_name(), address.country(), address.region(), address.city(), address.address(), address.zip_code(), person.telephone() ])
def getting_started_example(): generic = Generic() #generic = Generic(locales.EN) print('Month =', generic.datetime.month()) print('Datetime =', generic.datetime.datetime(start=1900, end=2035, timezone=None)) # Type: datetime.datetime. print('IMEI =', generic.code.imei()) print('Fruit =', generic.food.fruit()) print('RNA =', generic.science.rna_sequence()) print('Word =', generic.text.word()) with generic.text.override_locale(locales.FR): print('Word =', generic.text.word()) print('Word =', generic.text.word()) generic = Generic('en') generic.add_provider(USASpecProvider) print('SSN =', generic.usa_provider.ssn()) #print('CPF =', generic.usa_provider.cpf()) # AttributeError: 'USASpecProvider' object has no attribute 'cpf'. generic = Generic('pt-br') #generic = Generic(locales.PT_BR) generic.add_provider(BrazilSpecProvider) #print('SSN =', generic.brazil_provider.ssn()) # AttributeError: 'BrazilSpecProvider' object has no attribute 'ssn'. print('CPF =', generic.brazil_provider.cpf()) #-------------------- numbers = Numbers() print('Numbers =', numbers.between()) # Type: int. print('Numbers =', numbers.between(10, 10000000000000000)) # Type: int. #-------------------- person = Person(locales.KO) print('Full name =', person.full_name(gender=Gender.FEMALE)) print('Full name =', person.full_name(gender=Gender.MALE, reverse=True)) with person.override_locale(locales.RU): print('Full name =', person.full_name()) print('Full name =', person.full_name()) print('Telephone =', person.telephone()) print('Telephone =', person.telephone(mask='(###)-###-####')) print('Identifier =', person.identifier()) print('Identifier =', person.identifier(mask='######-#######')) #-------------------- de = Address('de') ru = Address('ru') print('Region =', de.region()) print('Federal subject =', ru.federal_subject()) print('Address =', de.address()) print('Address =', ru.address()) ko = Address('ko') print('Address =', ko.province(), ko.city(), ko.address()) print('Zip code =', ko.zip_code()) #-------------------- business = Business('ko') #print('Price =', business.price(minimum=1.0, maximum=1000000000.0)) # Type: str. #print('Price =', business.price(minimum=1.0, maximum=1000000000.0)[:-2]) # Type: str. print('Price =', business.price(minimum=1.0, maximum=1000000000.0)[:-5]) # Type: str. #-------------------- payment = Payment() print('Credit card =', payment.credit_card_number(card_type=None)) # Type: str.
def main(): try: num_gen = int(sys.argv[1]) except: num_gen = -1 if len(sys.argv) < 2 or num_gen == -1: print( "Usage: python faker_customers_to_csv.py <INT: Number of Customers to generate>" ) print("Exymple: python faker_customers_to_csv.py 10000") return # Generate Customers that are between 18 and 100 years old ls_dates = [ fake_de.date_time_between(start_date="-100y", end_date="-18y", tzinfo=None) for i in range(0, num_gen) ] ls_dates.sort() ls_customer = [] for i in range(0, len(ls_dates)): s_country = random_country() address = None person = None s_nationality = None if s_country == 'DE': address = Address('de') person = Person('de') s_nationality = 'Germany' elif s_country == 'AT': address = Address('de-at') person = Person('de-at') s_nationality = 'Austria' else: address = Address('de-ch') person = Person('de-ch') s_nationality = 'Switzerland' s_sex = random_mf_flag() gender = mimesis.enums.Gender.FEMALE if s_sex == 'F' else mimesis.enums.Gender.MALE s_first_name = person.name(gender) s_last_name = person.last_name(gender) s_marital_status = random_marital_status_flag() s_job = person.occupation() s_email = person.email() s_phone = person.telephone() i_number_children = random.randint(0, 4) s_address_street = address.address() s_address_zip = address.postal_code() s_address_city = address.city() t_customer = (customer_number(i), s_sex, s_first_name, s_last_name, s_job, s_email, s_phone, i_number_children, s_marital_status, ls_dates[i].replace(hour=0, minute=0, second=0, microsecond=0), s_address_street, s_address_zip, s_address_city, s_country, s_nationality) ls_customer.append(t_customer) ls_columns = [ 'customer_id', 'gender', 'first_name', 'last_name', 'job', 'email', 'phone', 'number_children', 'marital_status', 'date_of_birth', 'street', 'zip', 'city', 'country_code', 'nationality' ] df_customer = pd.DataFrame(ls_customer, columns=ls_columns) df_customer.to_csv('output/customers__de_at_ch.csv', sep=',', index=False, header=ls_columns)
print(person.nationality()) print(person.work_experience()) print(person.political_views()) print(person.worldview()) # 自定义名字pattern templates = ['l-d', 'U-d'] for item in templates: print(person.username(template=item)) print('\n') print('#' * 5 + '地址' + '#' * 5) address = Address('zh') print(address.coordinates()) print(address.city()) print('\n') print('#' * 5 + '地址' + '#' * 5) business = Business('zh') print(business.company()) print(business.company_type()) print('\n') print('#' * 5 + '支付' + '#' * 5) payment = Payment('zh') print(payment.paypal()) print(payment.credit_card_expiration_date()) print('\n') print('#' * 5 + '文字' + '#' * 5)
from mimesis import Person from mimesis import Address from mimesis import random from mimesis.enums import Gender import random person = Person('ru') address = Address('ru') file = open("shops.txt", 'w') list_rating = [] list_cities = [] for i in range(50): list_cities.append(address.city()) print(list_cities) for i in range(1000): list_rating.append(round(random.uniform(0, 5), 2)) ''' for i in range(1000): if (list_age[i] < 30): list_seniority.append(random.randint(0, 5)) elif (list_age[i] > 30 and list_age[i] < 35): list_seniority.append(random.randint(0, 10)) elif (list_age[i] > 35 and list_age[i] < 45): list_seniority.append(random.randint(0, 15))
def generatorStr(columns, numbOfEl): # двумерный массив: название столбца, тип arrDictGenerator = [] strNew = [] for j in range(0, numbOfEl): dictGenerator = {} marqModel = ['', ''] for i in range(1, len(columns)): if columns[i][0] == 'sex': if 'sex' not in dictGenerator: data = generatorGender() dictGenerator[i] = data if columns[i][0] == 'last_name': if 'sex' in dictGenerator: gender = dictGenerator.get('sex') else: gender = generatorGender() dictGenerator['sex'] = gender dictGenerator['last_name'] = generatorLastName(gender) if columns[i][0] == 'first_name': if 'sex' in dictGenerator: gender = dictGenerator.get('sex') else: gender = generatorGender() dictGenerator['sex'] = gender dictGenerator['first_name'] = generatorFirstName(gender) if columns[i][0] == ('OGRN'): r = rsp() data = r.ogrn() dictGenerator['OGRN'] = data if columns[i][0] == 'marque' and 'model' not in dictGenerator: dictGenerator['marque'] = '0' elif columns[i][0] == 'marque' and 'model' in dictGenerator and marqModel == ['', '']: tr = Transport() carStr = tr.car() carArr = carStr.split(' ') dictGenerator['marque'] = carArr[0] marqModel[0] = carArr[0] if len(carArr) > 2: strCar = '' for i in range(1, len(carArr)): strCar += str(i) marqModel[1] = strCar elif len(carArr) < 2: marqModel[1] = '' else: marqModel[1] = carArr[1] elif columns[i][0] == 'marque' and 'model' in dictGenerator and marqModel != ['','']: dictGenerator['marque'] = marqModel[0] if columns[i][0] == 'model' and 'marque' not in dictGenerator: dictGenerator['model'] = '0' elif columns[i][0] == 'model' and 'marque' in dictGenerator and marqModel == ['','']: tr = Transport() carStr = tr.car() carArr = carStr.split(' ') marqModel[0] = carArr[0] if len(carArr) > 2: strCar = '' for i in range(1, len(carArr)): strCar += str(i) marqModel[1] = strCar elif len(carArr) < 2: marqModel[1] = '' else: marqModel[1] = carArr[1] dictGenerator['model'] = marqModel[1] if dictGenerator['marque'] == '0': dictGenerator['marque'] = marqModel[0] elif columns[i][0] == 'model' and 'marque' in dictGenerator and marqModel != ['','']: if dictGenerator['marque'] == '0': dictGenerator['marque'] = marqModel[0] dictGenerator['model'] = marqModel[1] if columns[i][0] == 'name': comp = Business() dictGenerator['name'] = comp.company() if columns[i][0] == 'deal_date': dictGenerator['deal_date'] = generatorDate() if columns[i][0] == 'city': addr = Address() dictGenerator['city'] = addr.city() if columns[i][0] == 'transmission': dictGenerator['transmission'] = generatorTrans() if columns[i][0] == 'engine': dictGenerator['engine'] = generatorEngine() if columns[i][0] == 'color': dictGenerator['color'] = generatorColor() if columns[i][0] == 'VIN': vin = generatorVIN() dictGenerator['VIN'] = vin if columns[i][0] == 'kilometrage': dictGenerator[columns[i][0]] = random.randrange(5000, 150000, 5000) if columns[i][0] == 'salary': dictGenerator[columns[i][0]] = random.randrange(1000, 6000, 100) if columns[i][0] == 'power': dictGenerator[columns[i][0]] = random.randrange(130, 220, 10) if columns[i][0] == 'price': dictGenerator[columns[i][0]] = random.randrange(1300, 40000, 1000) if columns[i][1] == 'int' and columns[i][0] not in dictGenerator: dictGenerator[columns[i][0]] = 0 if columns[i][1] == 'tinyint': dictGenerator[columns[i][0]] = 0 if columns[i][1] == 'varchar(255)' and columns[i][0] not in dictGenerator: colstr = '' for j in (0, 12): colstr += random.choice(string.ascii_letters) dictGenerator[columns[i][0]] = colstr arrDictGenerator.append(dictGenerator) #print(dictGenerator) return arrDictGenerator
#idd = g.code.imei() idd = n gender = random.choice(gen) if gender == 'Женский': first_name = p.first_name(gender=Gender.FEMALE) last_name = p.last_name(gender=Gender.FEMALE) #self.patron = rus.patronymic(gender = Gender.FEMALE) elif gender == 'Мужской': first_name = p.first_name(gender=Gender.MALE) last_name = p.last_name(gender=Gender.MALE) #self.patron = rus.patronymic(gender = Gender.MALE) age = p.age(minimum=18, maximum=70) month_ob = dt.month() day_ob = dt.day_of_month() year_ob = 2020 - age city_ob = adr.city() city = adr.city() address = adr.address() phone = p.telephone(mask='+7(###)-###-####') email = p.email(domains=['mimesis.name']) user = (int(idd), str(gender), str(first_name), str(last_name), str(age), str(month_ob), str(day_ob), str(year_ob), str(city_ob), str(city), str(address), str(phone), str(email)) cur.execute( "INSERT INTO people VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?);", user) conn.commit() n += 1
def gen_data_add_nested_struct(self, data_path, partition_date, num_rows, file_format): """ Input - data_path: path where the partition will be created (string) - partition_date: partition date to be created (date) - num_rows: number of rows to be generated (integer) - file_format: format of file to be generated (parquet or avro) This function creates a data sample adding a nested struct to the schema """ person = Person('en') address = Address('en') # Create schema schema_address = StructType([ StructField('address', StringType(), True), StructField('city', StringType(), True), StructField('country', StringType(), True), StructField('state', StringType(), True), StructField('postal_code', StringType(), True) ]) schema_df = StructType([ StructField('identifier', StringType(), True), StructField('first_name', StringType(), True), StructField('last_name', StringType(), True), StructField('occupation', StringType(), True), StructField('age', IntegerType(), True), StructField('address', schema_address, True), StructField('date', DateType(), True) ]) # Generate data for _ in range(num_rows): df_temp = self.spark.createDataFrame([[ person.identifier(), person.first_name(), person.last_name(), person.occupation(), person.age(), [ address.address(), address.city(), address.country(), address.state(), address.postal_code() ], partition_date ]], schema_df) try: df = df.union(df_temp) except: df = df_temp df.coalesce(1).write.partitionBy('date').mode('overwrite').format( file_format).save(data_path) print('Partition created: {data_path}/date={date}'.format( data_path=data_path, date=partition_date)) print('# Rows:', df.count()) print('Schema:') df.printSchema() print('\n') return
def gen_data_remove_column(self, data_path, partition_date, num_rows, file_format): """ Input - data_path: path where the partition will be created (string) - partition_date: partition date to be created (date) - num_rows: number of rows to be generated (integer) - file_format: format of file to be generated (parquet or avro) This function creates a data sample removing some columns """ person = Person('en') address = Address('en') schema_street = StructType([ StructField('street_name', StringType(), True) # StructField('lat', FloatType(), True), #column removed # StructField('long', FloatType(), True) #column removed ]) schema_address_details = StructType([ StructField('street', schema_street, True), StructField('number', IntegerType(), True) ]) schema_address = StructType([ StructField('address_details', schema_address_details, True), StructField('city', StringType(), True), StructField('country', StringType(), True), # StructField('country_code', StringType(), True), #column removed StructField('state', StringType(), True), StructField('postal_code', IntegerType(), True) ]) schema_df = StructType([ StructField('identifier', StringType(), True), StructField('first_name', StringType(), True), StructField('last_name', StringType(), True), StructField('occupation', StringType(), True), StructField('age', IntegerType(), True), StructField('address', schema_address, True), # StructField('title_name', StringType(), True), #column removed StructField('date', DateType(), True) ]) for _ in range(num_rows): df_temp = self.spark.createDataFrame( [[ person.identifier(), person.first_name(), person.last_name(), person.occupation(), person.age(), [ [ [ address.street_name() #float(address.latitude()), #float(address.longitude()) ], int(address.street_number()) ], address.city(), address.country(), #address.country_code(), address.state(), int(address.postal_code()) ], #person.title(), partition_date ]], schema_df) try: df = df.union(df_temp) except: df = df_temp df.coalesce(1).write.partitionBy('date').mode('overwrite').format( file_format).save(data_path) print('Partition created: {data_path}/date={date}'.format( data_path=data_path, date=partition_date)) print('# Rows:', df.count()) print('Schema:') df.printSchema() print('\n') return