Example #1
0
    def address(cls,
                *,
                locale=Locales.EN,
                calling_code=None,
                city=None,
                country=None,
                country_code=None,
                latitude=None,
                longitude=None,
                postal_code=None,
                state=None,
                street_name=None,
                street_number=None,
                street_suffix=None):
        '''
            Create an Address Data Entity object.

            All individual fields are automatically randomly generated based on locale. If provided, the corresponding values are overriden.

            Note:
                All individual fields are randomly generated. Don't expect correct correlation e.g. correct postal code for the generated city.

            Keyword Arguments:
                locale: Approprite Random.locale.<local_name> object. Default is Random.locale.EN
                calling_code: Calling Code
                city: City
                country: Country Name
                country_code: Country Code 
                latitude: Latitude
                longitude: Longitde
                postal_code: Postal Code
                state: State
                street_name: Street Name
                street_number Street Number
                street_suffix: Street Suffix
        '''
        address = Address(locale=locale)
        from arjuna.engine.data.entity.address import Address as ArjAddress

        return ArjAddress(
            calling_code=calling_code is not None and calling_code
            or address.calling_code(),
            city=city and city is not None or address.city(),
            country=country is not None and country or address.country(),
            country_code=country_code is not None and country_code
            or address.country_code(),
            latitude=latitude is not None and latitude or address.latitude(),
            longitude=longitude is not None and longitude
            or address.longitude(),
            postal_code=postal_code is not None and postal_code
            or address.postal_code(),
            state=state is not None and state or address.state(),
            street_name=street_name is not None and street_name
            or address.street_name(),
            street_number=street_number is not None and street_number
            or address.street_number(),
            street_suffix=street_suffix is not None and street_suffix
            or address.street_suffix(),
        )
Example #2
0
 def get_fake_address(self, loc):
     address = Address(loc)
     city = address.city()
     kind_street = address.street_suffix()
     street = address.street_name()
     house = address.street_number()
     area = address.state()
     zip = address.zip_code()
     country = address.country()
     return zip, city, kind_street, street, house, area, country
Example #3
0
def generate_addresses():
    address = Address('en-gb')
    city = address.city()
    country = address.country()
    postal_code = address.postal_code()
    street_name = address.street_name()
    street_number = address.street_number()
    return {
        'id': id,
        "street_number": street_number,
        "street_name": street_name,
        "city": city,
        "postal_code": postal_code,
        "country": country
    }
Example #4
0
def generate_random_data():
    p = Person()
    a = Address()

    return {
        "name": p.full_name(),
        "email": p.email(),
        "nationality": p.nationality(),
        "occupation": p.occupation(),
        "password": p.password(),
        "phone": p.telephone(),
        "address": a.address(),
        "city": a.city(),
        "street_no": a.street_number(),
        "created": Datetime().date().isoformat(),
    }
Example #5
0
class CSVData:
    def __init__(self):
        self.person = Person(locale='zh')
        self.address = Address(locale='zh')
        self.code = Code()
        self.business = Business(locale='zh')
        self.text = Text(locale='zh')
        self.datetime = Datetime(locale='zh')
        self.file = File()
        self.path = Path()
        self.internet = Internet()
        self.structure = Structure()

    def mime_data(self):
        # col1 = self.person.full_name()
        col1 = self.person.last_name() + self.person.first_name()
        col2 = self.address.city()
        col3 = self.address.street_name()
        col4 = self.address.calling_code()
        col5 = self.address.longitude()
        col6 = self.code.imei()
        col7 = self.business.company()
        col8 = self.text.hex_color()
        col9 = self.datetime.formatted_datetime()
        col10 = self.datetime.time()

        col11 = self.file.file_name()
        col12 = self.path.dev_dir()
        col13 = self.internet.ip_v4()
        col14 = self.internet.ip_v6()
        col15 = self.internet.home_page()
        col16 = self.internet.stock_image()
        col17 = self.internet.user_agent()
        col18 = self.internet.mac_address()
        col19 = self.person.email()
        col20 = self.person.telephone()

        col21 = self.code.issn()
        col22 = self.person.social_media_profile()
        col23 = self.structure.html()

        line = '\"{0}\", \"{1}\", \"{2}\", \"{3}\", {4}, \"{5}\", \"{6}\" , \"{7}\" , \"{8}\" , \"{9}\" , \"{10}\" , \"{11}\" , \"{12}\" , \"{13}\" , \"{14}\" , \"{15}\" , \"{16}\" , \"{17}\" , \"{18}\" , \"{19}\" , \"{20}\" , \"{21}\" , \"{22}\"\n'.format(
                col1, col2, col3, col4, col5, col6, col7, col8, col9, col10, col11, col12, col13, col14, col15, col16, col17, col18, col19, col20, col21, col22, col23)

        # line = "mime data"
        # print(line)
        return line
Example #6
0
def get_data(address: Address, lang: str):
    return {
        'address': address.address(),
        'calling_code': address.calling_code(),
        'city': address.city(),
        'continent': address.continent(),
        'coordinates': address.coordinates(),
        'country': address.country(),
        'country_code': get_country_code(lang),
        'latitude': address.latitude(),
        'longitude': address.longitude(),
        'postal_code': address.postal_code(),
        'state': address.state(),
        'street_name': address.street_name(),
        'street_number': address.street_number(),
        'street_suffix': address.street_suffix(),
        'zip_code': address.zip_code()
    }
Example #7
0
def generatingData(num, locale, country):
    person = Person(locale)
    address = Address(locale)
    buffer = io.StringIO()
    writer = csv.writer(buffer,
                        delimiter=';',
                        lineterminator="\n",
                        quoting=csv.QUOTE_NONE,
                        escapechar='\\')
    for i in range(int(num)):
        writer.writerow([
            person.full_name(),
            country if randint(0, 1) == 1 else address.country(),
            ', '.join([address.province(),
                       address.city(),
                       address.address()]),
            person.telephone()
        ])
    return buffer.getvalue()
Example #8
0
def generatePeople(row_num: int, region: str):
    if region == 'en_US':
        locale = 'en'
    elif region == 'ru_RU':
        locale = 'ru'
    else:
        locale = 'uk'
    writer = csv.writer(stdout,
                        quoting=csv.QUOTE_NONE,
                        delimiter=';',
                        escapechar='"')
    person = Person(locale)
    address = Address(locale)
    for i in range(row_num):
        writer.writerow([
            person.full_name(),
            address.country(),
            address.region(),
            address.city(),
            address.address(),
            address.zip_code(),
            person.telephone()
        ])
Example #9
0
def getting_started_example():
    generic = Generic()
    #generic = Generic(locales.EN)

    print('Month =', generic.datetime.month())
    print('Datetime =',
          generic.datetime.datetime(start=1900, end=2035,
                                    timezone=None))  # Type: datetime.datetime.
    print('IMEI =', generic.code.imei())
    print('Fruit =', generic.food.fruit())
    print('RNA =', generic.science.rna_sequence())

    print('Word =', generic.text.word())

    with generic.text.override_locale(locales.FR):
        print('Word =', generic.text.word())

    print('Word =', generic.text.word())

    generic = Generic('en')
    generic.add_provider(USASpecProvider)

    print('SSN =', generic.usa_provider.ssn())
    #print('CPF =', generic.usa_provider.cpf())  # AttributeError: 'USASpecProvider' object has no attribute 'cpf'.

    generic = Generic('pt-br')
    #generic = Generic(locales.PT_BR)
    generic.add_provider(BrazilSpecProvider)

    #print('SSN =', generic.brazil_provider.ssn())  # AttributeError: 'BrazilSpecProvider' object has no attribute 'ssn'.
    print('CPF =', generic.brazil_provider.cpf())

    #--------------------
    numbers = Numbers()

    print('Numbers =', numbers.between())  # Type: int.
    print('Numbers =', numbers.between(10, 10000000000000000))  # Type: int.

    #--------------------
    person = Person(locales.KO)

    print('Full name =', person.full_name(gender=Gender.FEMALE))
    print('Full name =', person.full_name(gender=Gender.MALE, reverse=True))

    with person.override_locale(locales.RU):
        print('Full name =', person.full_name())

    print('Full name =', person.full_name())
    print('Telephone =', person.telephone())
    print('Telephone =', person.telephone(mask='(###)-###-####'))
    print('Identifier =', person.identifier())
    print('Identifier =', person.identifier(mask='######-#######'))

    #--------------------
    de = Address('de')
    ru = Address('ru')

    print('Region =', de.region())
    print('Federal subject =', ru.federal_subject())
    print('Address =', de.address())
    print('Address =', ru.address())

    ko = Address('ko')

    print('Address =', ko.province(), ko.city(), ko.address())
    print('Zip code =', ko.zip_code())

    #--------------------
    business = Business('ko')

    #print('Price =', business.price(minimum=1.0, maximum=1000000000.0))  # Type: str.
    #print('Price =', business.price(minimum=1.0, maximum=1000000000.0)[:-2])  # Type: str.
    print('Price =',
          business.price(minimum=1.0, maximum=1000000000.0)[:-5])  # Type: str.

    #--------------------
    payment = Payment()

    print('Credit card =',
          payment.credit_card_number(card_type=None))  # Type: str.
def main():

    try:
        num_gen = int(sys.argv[1])
    except:
        num_gen = -1

    if len(sys.argv) < 2 or num_gen == -1:
        print(
            "Usage: python faker_customers_to_csv.py <INT: Number of Customers to generate>"
        )
        print("Exymple: python faker_customers_to_csv.py 10000")
        return

    # Generate Customers that are between 18 and 100 years old
    ls_dates = [
        fake_de.date_time_between(start_date="-100y",
                                  end_date="-18y",
                                  tzinfo=None) for i in range(0, num_gen)
    ]
    ls_dates.sort()

    ls_customer = []
    for i in range(0, len(ls_dates)):
        s_country = random_country()
        address = None
        person = None
        s_nationality = None
        if s_country == 'DE':
            address = Address('de')
            person = Person('de')
            s_nationality = 'Germany'
        elif s_country == 'AT':
            address = Address('de-at')
            person = Person('de-at')
            s_nationality = 'Austria'
        else:
            address = Address('de-ch')
            person = Person('de-ch')
            s_nationality = 'Switzerland'

        s_sex = random_mf_flag()
        gender = mimesis.enums.Gender.FEMALE if s_sex == 'F' else mimesis.enums.Gender.MALE
        s_first_name = person.name(gender)
        s_last_name = person.last_name(gender)
        s_marital_status = random_marital_status_flag()
        s_job = person.occupation()
        s_email = person.email()
        s_phone = person.telephone()
        i_number_children = random.randint(0, 4)
        s_address_street = address.address()
        s_address_zip = address.postal_code()
        s_address_city = address.city()

        t_customer = (customer_number(i), s_sex, s_first_name, s_last_name,
                      s_job, s_email, s_phone, i_number_children,
                      s_marital_status, ls_dates[i].replace(hour=0,
                                                            minute=0,
                                                            second=0,
                                                            microsecond=0),
                      s_address_street, s_address_zip, s_address_city,
                      s_country, s_nationality)
        ls_customer.append(t_customer)

    ls_columns = [
        'customer_id', 'gender', 'first_name', 'last_name', 'job', 'email',
        'phone', 'number_children', 'marital_status', 'date_of_birth',
        'street', 'zip', 'city', 'country_code', 'nationality'
    ]

    df_customer = pd.DataFrame(ls_customer, columns=ls_columns)

    df_customer.to_csv('output/customers__de_at_ch.csv',
                       sep=',',
                       index=False,
                       header=ls_columns)
print(person.nationality())
print(person.work_experience())
print(person.political_views())
print(person.worldview())

# 自定义名字pattern
templates = ['l-d', 'U-d']
for item in templates:
    print(person.username(template=item))

print('\n')

print('#' * 5 + '地址' + '#' * 5)
address = Address('zh')
print(address.coordinates())
print(address.city())
print('\n')

print('#' * 5 + '地址' + '#' * 5)
business = Business('zh')
print(business.company())
print(business.company_type())
print('\n')

print('#' * 5 + '支付' + '#' * 5)
payment = Payment('zh')
print(payment.paypal())
print(payment.credit_card_expiration_date())
print('\n')

print('#' * 5 + '文字' + '#' * 5)
Example #12
0
from mimesis import Person
from mimesis import Address
from mimesis import random
from mimesis.enums import Gender
import random

person = Person('ru')
address = Address('ru')
file = open("shops.txt", 'w')

list_rating = []
list_cities = []

for i in range(50):
    list_cities.append(address.city())

print(list_cities)

for i in range(1000):
    list_rating.append(round(random.uniform(0, 5), 2))



'''
    for i in range(1000):
    if (list_age[i] < 30):
    list_seniority.append(random.randint(0, 5))
    elif (list_age[i] > 30 and list_age[i] < 35):
    list_seniority.append(random.randint(0, 10))
    elif (list_age[i] > 35 and list_age[i] < 45):
    list_seniority.append(random.randint(0, 15))
def generatorStr(columns, numbOfEl): # двумерный массив: название столбца, тип
   arrDictGenerator = []
    strNew = []
    for j in range(0, numbOfEl):
        dictGenerator = {}
        marqModel = ['', '']
        for i in range(1, len(columns)):
            if columns[i][0] == 'sex':
                if 'sex' not in dictGenerator:
                    data = generatorGender()
                    dictGenerator[i] = data
            if columns[i][0] == 'last_name':
                if 'sex' in dictGenerator:
                    gender = dictGenerator.get('sex')
                else:
                    gender = generatorGender()
                    dictGenerator['sex'] = gender
                dictGenerator['last_name'] = generatorLastName(gender)
                    
            if columns[i][0] == 'first_name':
                if 'sex' in dictGenerator:
                    gender = dictGenerator.get('sex')
                else:
                    gender = generatorGender()
                    dictGenerator['sex'] = gender
                dictGenerator['first_name'] = generatorFirstName(gender)
                
            if columns[i][0] == ('OGRN'):
                r = rsp()
                data = r.ogrn()
                dictGenerator['OGRN'] = data
            
            if columns[i][0] == 'marque' and 'model' not in dictGenerator:
                dictGenerator['marque'] = '0'
            elif columns[i][0] == 'marque' and 'model' in dictGenerator and marqModel == ['', '']:
                tr = Transport()
                carStr = tr.car()
                carArr = carStr.split(' ')
                dictGenerator['marque'] = carArr[0]
                marqModel[0] = carArr[0]
                if len(carArr) > 2:
                    strCar = ''
                    for i in range(1, len(carArr)):
                        strCar += str(i)
                    marqModel[1] = strCar
                elif len(carArr) < 2:
                    marqModel[1] = ''
                else:
                    marqModel[1] = carArr[1]
            elif columns[i][0] == 'marque' and 'model' in dictGenerator and marqModel != ['','']:
                dictGenerator['marque'] = marqModel[0]
                                
            if columns[i][0] == 'model' and 'marque' not in dictGenerator:
                dictGenerator['model'] = '0'
            elif columns[i][0] == 'model' and 'marque' in dictGenerator and marqModel == ['','']:
                tr = Transport()
                carStr = tr.car()
                carArr = carStr.split(' ')
                marqModel[0] = carArr[0]
                if len(carArr) > 2:
                    strCar = ''
                    for i in range(1, len(carArr)):
                        strCar += str(i)
                    marqModel[1] = strCar
                elif len(carArr) < 2:
                    marqModel[1] = ''
                else:
                    marqModel[1] = carArr[1]
                dictGenerator['model'] = marqModel[1]
                if dictGenerator['marque'] == '0':
                    dictGenerator['marque'] = marqModel[0]
            elif columns[i][0] == 'model' and 'marque' in dictGenerator and marqModel != ['','']:
                if dictGenerator['marque'] == '0':
                    dictGenerator['marque'] = marqModel[0]
                dictGenerator['model'] = marqModel[1]
                
            if columns[i][0] == 'name':
                comp = Business()
                dictGenerator['name'] = comp.company()
            
            if columns[i][0] == 'deal_date':
                dictGenerator['deal_date'] = generatorDate()
            
            if columns[i][0] == 'city':
                addr = Address()
                dictGenerator['city'] = addr.city()

            if columns[i][0] == 'transmission':
                dictGenerator['transmission'] = generatorTrans()
            
            if columns[i][0] == 'engine':
                dictGenerator['engine'] = generatorEngine()

            if columns[i][0] == 'color':
                dictGenerator['color'] = generatorColor()
            
            if columns[i][0] == 'VIN':
                vin = generatorVIN()
                dictGenerator['VIN'] = vin

            if columns[i][0] == 'kilometrage':
                dictGenerator[columns[i][0]] = random.randrange(5000, 150000, 5000)

            if columns[i][0] == 'salary':
                dictGenerator[columns[i][0]] = random.randrange(1000, 6000, 100)

            if columns[i][0] == 'power':
                dictGenerator[columns[i][0]] = random.randrange(130, 220, 10)

            if columns[i][0] == 'price':
                dictGenerator[columns[i][0]] = random.randrange(1300, 40000, 1000)

            if columns[i][1] == 'int' and columns[i][0] not in dictGenerator:
                dictGenerator[columns[i][0]] = 0

            if columns[i][1] == 'tinyint':
                dictGenerator[columns[i][0]] = 0
            
            if columns[i][1] == 'varchar(255)' and columns[i][0] not in dictGenerator:
                colstr = ''
                for j in (0, 12):
                    colstr += random.choice(string.ascii_letters)
                dictGenerator[columns[i][0]] = colstr
        arrDictGenerator.append(dictGenerator)
        #print(dictGenerator)        
    return arrDictGenerator
Example #14
0
    #idd =  g.code.imei()
    idd = n
    gender = random.choice(gen)
    if gender == 'Женский':
        first_name = p.first_name(gender=Gender.FEMALE)
        last_name = p.last_name(gender=Gender.FEMALE)
        #self.patron = rus.patronymic(gender = Gender.FEMALE)
    elif gender == 'Мужской':
        first_name = p.first_name(gender=Gender.MALE)
        last_name = p.last_name(gender=Gender.MALE)
        #self.patron = rus.patronymic(gender = Gender.MALE)
    age = p.age(minimum=18, maximum=70)
    month_ob = dt.month()
    day_ob = dt.day_of_month()
    year_ob = 2020 - age
    city_ob = adr.city()
    city = adr.city()
    address = adr.address()
    phone = p.telephone(mask='+7(###)-###-####')
    email = p.email(domains=['mimesis.name'])

    user = (int(idd), str(gender), str(first_name), str(last_name), str(age),
            str(month_ob), str(day_ob), str(year_ob), str(city_ob), str(city),
            str(address), str(phone), str(email))

    cur.execute(
        "INSERT INTO people VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?);",
        user)
    conn.commit()
    n += 1
Example #15
0
    def gen_data_add_nested_struct(self, data_path, partition_date, num_rows,
                                   file_format):
        """
        Input
        - data_path: path where the partition will be created (string)
        - partition_date: partition date to be created (date)
        - num_rows: number of rows to be generated (integer)
        - file_format: format of file to be generated (parquet or avro)

        This function creates a data sample adding a nested struct to the schema
        """

        person = Person('en')
        address = Address('en')

        # Create schema
        schema_address = StructType([
            StructField('address', StringType(), True),
            StructField('city', StringType(), True),
            StructField('country', StringType(), True),
            StructField('state', StringType(), True),
            StructField('postal_code', StringType(), True)
        ])

        schema_df = StructType([
            StructField('identifier', StringType(), True),
            StructField('first_name', StringType(), True),
            StructField('last_name', StringType(), True),
            StructField('occupation', StringType(), True),
            StructField('age', IntegerType(), True),
            StructField('address', schema_address, True),
            StructField('date', DateType(), True)
        ])

        # Generate data
        for _ in range(num_rows):
            df_temp = self.spark.createDataFrame([[
                person.identifier(),
                person.first_name(),
                person.last_name(),
                person.occupation(),
                person.age(),
                [
                    address.address(),
                    address.city(),
                    address.country(),
                    address.state(),
                    address.postal_code()
                ], partition_date
            ]], schema_df)

            try:
                df = df.union(df_temp)
            except:
                df = df_temp

        df.coalesce(1).write.partitionBy('date').mode('overwrite').format(
            file_format).save(data_path)

        print('Partition created: {data_path}/date={date}'.format(
            data_path=data_path, date=partition_date))
        print('# Rows:', df.count())
        print('Schema:')
        df.printSchema()
        print('\n')

        return
Example #16
0
    def gen_data_remove_column(self, data_path, partition_date, num_rows,
                               file_format):
        """
        Input
        - data_path: path where the partition will be created (string)
        - partition_date: partition date to be created (date)
        - num_rows: number of rows to be generated (integer)
        - file_format: format of file to be generated (parquet or avro)

        This function creates a data sample removing some columns
        """

        person = Person('en')
        address = Address('en')

        schema_street = StructType([
            StructField('street_name', StringType(), True)
            # StructField('lat', FloatType(), True), #column removed
            # StructField('long', FloatType(), True) #column removed
        ])

        schema_address_details = StructType([
            StructField('street', schema_street, True),
            StructField('number', IntegerType(), True)
        ])

        schema_address = StructType([
            StructField('address_details', schema_address_details, True),
            StructField('city', StringType(), True),
            StructField('country', StringType(), True),
            # StructField('country_code', StringType(), True), #column removed
            StructField('state', StringType(), True),
            StructField('postal_code', IntegerType(), True)
        ])

        schema_df = StructType([
            StructField('identifier', StringType(), True),
            StructField('first_name', StringType(), True),
            StructField('last_name', StringType(), True),
            StructField('occupation', StringType(), True),
            StructField('age', IntegerType(), True),
            StructField('address', schema_address, True),
            # StructField('title_name', StringType(), True), #column removed
            StructField('date', DateType(), True)
        ])

        for _ in range(num_rows):
            df_temp = self.spark.createDataFrame(
                [[
                    person.identifier(),
                    person.first_name(),
                    person.last_name(),
                    person.occupation(),
                    person.age(),
                    [
                        [
                            [
                                address.street_name()
                                #float(address.latitude()),
                                #float(address.longitude())
                            ],
                            int(address.street_number())
                        ],
                        address.city(),
                        address.country(),
                        #address.country_code(),
                        address.state(),
                        int(address.postal_code())
                    ],
                    #person.title(),
                    partition_date
                ]],
                schema_df)

            try:
                df = df.union(df_temp)
            except:
                df = df_temp

        df.coalesce(1).write.partitionBy('date').mode('overwrite').format(
            file_format).save(data_path)

        print('Partition created: {data_path}/date={date}'.format(
            data_path=data_path, date=partition_date))
        print('# Rows:', df.count())
        print('Schema:')
        df.printSchema()
        print('\n')

        return