Ejemplo n.º 1
0
def create_human(n):  # function of creating new client
    if n == 1:
        person = Person('ru')
    else:
        person = Person('en')

    # Выводим исландские мужские имена.
    client_info = generate_id_passport()

    print('\nфамилия:\t', person.last_name())
    print('имя:\t\t', person.name())
    print('отчество:\t', person.surname(), '\n')

    passport = generate_number_passport()
    print('личный №:\t', client_info.get('idn'))
    print('документ:\t', passport)

    print('пол:\t\t', client_info.get('sex'))
    print('возраст:\t', person.age(13, 70), '\n')
    # print('аватар\t', person.avatar())
    print('гражд-во:\t', person.get_current_locale())
    print('Нац-сть:\t', person.nationality())
    print('телефон:\t', person.telephone('+37529#######'))
    print('email:\t\t', '*****@*****.**')
    # print('email:\t\t', person.email())
    print('проф-ия:\t', person.occupation(), '\n')
    print('обращение:\t', person.title())
    print('взгляды:\t', person.views_on())
    print('вера:\t\t', person.worldview(), '\n')
Ejemplo n.º 2
0
def create_human(n):
    """

    :param n: язык генерации данных
    :return: сгенерированные фейковые данные
    """
    log.info("Генерируем тестовые данные")
    if n == 1:
        person = Person('ru')
    else:
        person = Person('en')
    log.info("person: " + str(person))

    client_info = generate_id_passport()

    print('\nфамилия:\t', person.last_name())
    print('имя:\t\t', person.name())
    print('отчество:\t', person.surname(), '\n')

    passport = generate_number_passport()
    print('личный №:\t', client_info.get('idn'))
    print('документ:\t', passport)

    print('пол:\t\t', client_info.get('sex'))
    print('возраст:\t', person.age(13, 70), '\n')
    # print('аватар\t', person.avatar())
    print('гражд-во:\t', person.get_current_locale())
    print('Нац-сть:\t',person.nationality())
    print('телефон:\t', person.telephone('+37529#######'))
    print('email:\t\t', person.email())
    print('проф-ия:\t', person.occupation(), '\n')
    print('обращение:\t', person.title())
    print('взгляды:\t', person.views_on())
    print('вера:\t\t', person.worldview(), '\n')
Ejemplo n.º 3
0
    def gen_data_simple_schema(self, data_path, partition_date, num_rows,
                               file_format):
        """
        Input
        - data_path: path where the partition will be created (string)
        - partition_date: partition date to be created (date)
        - num_rows: number of rows to be generated (integer)
        - file_format: format of file to be generated (parquet or avro)

        This function creates a data sample with a simple schema
        """

        person = Person('en')

        # Create a simple schema
        schema_df = StructType([
            StructField('identifier', StringType(), True),
            StructField('first_name', StringType(), True),
            StructField('last_name', StringType(), True),
            StructField('occupation', StringType(), True),
            StructField('age', IntegerType(), True),
            StructField('date', DateType(), True)
        ])

        # generate data
        for _ in range(num_rows):
            df_temp = self.spark.createDataFrame([[
                person.identifier(),
                person.first_name(),
                person.last_name(),
                person.occupation(),
                person.age(), partition_date
            ]], schema_df)

            try:
                df = df.union(df_temp)
            except:
                df = df_temp

        df.coalesce(1).write.partitionBy('date').mode('overwrite').format(
            file_format).save(data_path)

        print('Partition created: {data_path}/date={date}'.format(
            data_path=data_path, date=partition_date))
        print('# Rows:', df.count())
        print('Schema:')
        df.printSchema()
        print('\n')

        return
Ejemplo n.º 4
0
    def make_full_name(self, loc, value):
        global sex
        per = Person(loc)
        rsp = RussiaSpecProvider()

        if value == 'male':
            sex = Gender.MALE
        elif value == 'famale':
            sex = Gender.FEMALE
        name = per.name(sex)
        surname = per.surname(sex)
        patron = rsp.patronymic(sex)
        age = per.age(16, 66)
        occup = per.occupation()
        return name, surname, patron, age, occup
Ejemplo n.º 5
0
def generate_random_data():
    p = Person()
    a = Address()

    return {
        "name": p.full_name(),
        "email": p.email(),
        "nationality": p.nationality(),
        "occupation": p.occupation(),
        "password": p.password(),
        "phone": p.telephone(),
        "address": a.address(),
        "city": a.city(),
        "street_no": a.street_number(),
        "created": Datetime().date().isoformat(),
    }
def main():

    try:
        num_gen = int(sys.argv[1])
    except:
        num_gen = -1

    if len(sys.argv) < 2 or num_gen == -1:
        print(
            "Usage: python faker_customers_to_csv.py <INT: Number of Customers to generate>"
        )
        print("Exymple: python faker_customers_to_csv.py 10000")
        return

    # Generate Customers that are between 18 and 100 years old
    ls_dates = [
        fake_de.date_time_between(start_date="-100y",
                                  end_date="-18y",
                                  tzinfo=None) for i in range(0, num_gen)
    ]
    ls_dates.sort()

    ls_customer = []
    for i in range(0, len(ls_dates)):
        s_country = random_country()
        address = None
        person = None
        s_nationality = None
        if s_country == 'DE':
            address = Address('de')
            person = Person('de')
            s_nationality = 'Germany'
        elif s_country == 'AT':
            address = Address('de-at')
            person = Person('de-at')
            s_nationality = 'Austria'
        else:
            address = Address('de-ch')
            person = Person('de-ch')
            s_nationality = 'Switzerland'

        s_sex = random_mf_flag()
        gender = mimesis.enums.Gender.FEMALE if s_sex == 'F' else mimesis.enums.Gender.MALE
        s_first_name = person.name(gender)
        s_last_name = person.last_name(gender)
        s_marital_status = random_marital_status_flag()
        s_job = person.occupation()
        s_email = person.email()
        s_phone = person.telephone()
        i_number_children = random.randint(0, 4)
        s_address_street = address.address()
        s_address_zip = address.postal_code()
        s_address_city = address.city()

        t_customer = (customer_number(i), s_sex, s_first_name, s_last_name,
                      s_job, s_email, s_phone, i_number_children,
                      s_marital_status, ls_dates[i].replace(hour=0,
                                                            minute=0,
                                                            second=0,
                                                            microsecond=0),
                      s_address_street, s_address_zip, s_address_city,
                      s_country, s_nationality)
        ls_customer.append(t_customer)

    ls_columns = [
        'customer_id', 'gender', 'first_name', 'last_name', 'job', 'email',
        'phone', 'number_children', 'marital_status', 'date_of_birth',
        'street', 'zip', 'city', 'country_code', 'nationality'
    ]

    df_customer = pd.DataFrame(ls_customer, columns=ls_columns)

    df_customer.to_csv('output/customers__de_at_ch.csv',
                       sep=',',
                       index=False,
                       header=ls_columns)
Ejemplo n.º 7
0
    def person(
        cls,
        *,
        locale=Locales.EN,
        qualification=None,
        age=None,
        blood_type=None,
        email=None,
        first_name=None,
        last_name=None,
        gender=None,
        height=None,
        id=None,
        language=None,
        nationality=None,
        occupation=None,
        phone=None,
        title=None,
        university=None,
        weight=None,
        work_experience=None,
    ):
        '''
            Create an Person Data Entity object.

            All individual fields are automatically randomly generated based on locale. If provided, the corresponding values are overriden.

            Note:
                All individual fields are randomly generated. Don't expect correct correlation e.g. correct postal code for the generated city.

            Keyword Arguments:
                locale: Approprite Random.locale.<local_name> object. Default is Random.locale.EN
                qualification: Educational Qualification
                age: Age
                blood_type: Blood type
                email: Email address
                first_name: First name
                last_name: Last name
                gender: Gender
                height: Height
                id: Identifier
                language: Language
                nationality: Nationality
                occupation: Occupation
                phone: Phone number
                title: Title
                university: University
                weight: Weight
                work_experience: Work Experience
        '''
        person = Person(locale=locale)
        from arjuna.engine.data.entity.person import Person as ArjPerson

        first_name = first_name is not None and first_name or person.first_name(
        )
        last_name = last_name is not None and last_name or person.last_name()
        return ArjPerson(
            qualification=qualification is not None and qualification
            or person.academic_degree(),
            age=age is not None and age or person.age(),
            blood_type=blood_type is not None and blood_type
            or person.blood_type(),
            email=email is not None and email or person.email(),
            first_name=first_name,
            last_name=last_name,
            name=first_name + " " + last_name,
            gender=gender is not None and gender or person.gender(),
            height=height is not None and height or person.height(),
            id=id is not None and id or person.identifier(),
            language=language is not None and language or person.language(),
            nationality=nationality is not None and nationality
            or person.nationality(),
            occupation=occupation is not None and occupation
            or person.occupation(),
            phone=phone is not None and phone or person.telephone(),
            title=title is not None and title or person.title(),
            university=university is not None and university
            or person.university(),
            weight=weight is not None and weight or person.weight(),
            work_experience=work_experience is not None and work_experience
            or person.work_experience(),
        )
Ejemplo n.º 8
0
        avatarUrl = person.avatar()
        r = requests.get(avatarUrl)
        imageString = ""
        if r.status_code == 200:
            imageString = base64.b64encode(r.content).decode("utf-8")
        else:
            img = pagan.Avatar(userName, pagan.SHA512)
            img.save('.\\', 't')
            with open(".\\t.png", "rb") as binary_file:
                dataImage = binary_file.read()
            imageString = base64.b64encode(dataImage).decode("utf-8")

    entry = {
        'cn': fn,
        'name': userName,
        'displayName': fn,
        'mail': mail,
        'givenName': fn[:fn.find(' ')],
        'title': person.occupation(),
        "mobile": person.telephone(),
        "sAMAccountName": userName,
        "company": domain,
        "userPrincipalName": userName + "@" + domain
    }

    if usePhoto:
        entry["photo"] = imageString

    writeToJson(fileName, entry)
    print(i, fn, userName, mail, userName, userName + "@" + domain)
Ejemplo n.º 9
0
    def gen_data_remove_column(self, data_path, partition_date, num_rows,
                               file_format):
        """
        Input
        - data_path: path where the partition will be created (string)
        - partition_date: partition date to be created (date)
        - num_rows: number of rows to be generated (integer)
        - file_format: format of file to be generated (parquet or avro)

        This function creates a data sample removing some columns
        """

        person = Person('en')
        address = Address('en')

        schema_street = StructType([
            StructField('street_name', StringType(), True)
            # StructField('lat', FloatType(), True), #column removed
            # StructField('long', FloatType(), True) #column removed
        ])

        schema_address_details = StructType([
            StructField('street', schema_street, True),
            StructField('number', IntegerType(), True)
        ])

        schema_address = StructType([
            StructField('address_details', schema_address_details, True),
            StructField('city', StringType(), True),
            StructField('country', StringType(), True),
            # StructField('country_code', StringType(), True), #column removed
            StructField('state', StringType(), True),
            StructField('postal_code', IntegerType(), True)
        ])

        schema_df = StructType([
            StructField('identifier', StringType(), True),
            StructField('first_name', StringType(), True),
            StructField('last_name', StringType(), True),
            StructField('occupation', StringType(), True),
            StructField('age', IntegerType(), True),
            StructField('address', schema_address, True),
            # StructField('title_name', StringType(), True), #column removed
            StructField('date', DateType(), True)
        ])

        for _ in range(num_rows):
            df_temp = self.spark.createDataFrame(
                [[
                    person.identifier(),
                    person.first_name(),
                    person.last_name(),
                    person.occupation(),
                    person.age(),
                    [
                        [
                            [
                                address.street_name()
                                #float(address.latitude()),
                                #float(address.longitude())
                            ],
                            int(address.street_number())
                        ],
                        address.city(),
                        address.country(),
                        #address.country_code(),
                        address.state(),
                        int(address.postal_code())
                    ],
                    #person.title(),
                    partition_date
                ]],
                schema_df)

            try:
                df = df.union(df_temp)
            except:
                df = df_temp

        df.coalesce(1).write.partitionBy('date').mode('overwrite').format(
            file_format).save(data_path)

        print('Partition created: {data_path}/date={date}'.format(
            data_path=data_path, date=partition_date))
        print('# Rows:', df.count())
        print('Schema:')
        df.printSchema()
        print('\n')

        return