Example #1
0
def generate_personal_data(num_records):
    person = Person('en')
    payment = Payment()

    dtype = np.dtype([
        ('id', np.unicode_, 16),
        ('full_name', np.unicode_, 32),
        ('credit_card_number', np.unicode_, 19),
        ('credit_card_expiration_date', np.unicode_, 7),
        ('gender', np.unicode_, 1),
    ])

    np.random.seed()
    rng = np.random.randint(0, 2, num_records)

    records_npy = np.empty(num_records, dtype=dtype)
    for idx in range(num_records):
        gender = Gender.MALE if rng[idx] == 0 else Gender.FEMALE
        records_npy['id'][idx] = str(idx).zfill(16)
        records_npy['full_name'][idx] = person.full_name(gender=gender)
        records_npy['credit_card_number'][idx] = payment.credit_card_number(
            card_type=CardType.VISA)
        records_npy['credit_card_expiration_date'][
            idx] = payment.credit_card_expiration_date(maximum=21)
        records_npy['gender'][idx] = 'M' if rng[idx] == 0 else 'F'

    return records_npy
Example #2
0
    def generate_data(self):
        np.random.seed(seed=self.seed)
        gender = np.random.randint(2, size=self.num)
        loan = np.random.randint(self.loan_min / self.loan_step,
                                 self.loan_max / self.loan_step,
                                 size=self.num) * self.loan_step

        person = Person('en')
        payment = Payment()

        self.main_array = np.empty(shape=self.num, dtype=self.dtype)

        for idx in range(self.num):
            self.main_array['id'][idx] = str(idx).zfill(16)
            self.main_array['full_name'][idx] = person.full_name(
                gender=Gender.MALE if gender[idx] else Gender.FEMALE)
            self.main_array['credit_card_number'][
                idx] = payment.credit_card_number(card_type=CardType.VISA)
            self.main_array['credit_card_expiration_date'][
                idx] = payment.credit_card_expiration_date(maximum=22)
            self.main_array['gender'][idx] = 'M' if gender[idx] else 'F'
            self.main_array['loan'][idx] = loan[idx]

        same_entries_indices = np.random.choice(np.arange(self.num),
                                                self.number_same_name_entries,
                                                replace=False)

        self.main_array['full_name'][same_entries_indices] = self.main_array[
            'full_name'][same_entries_indices[0]]
        self.main_array['gender'][same_entries_indices] = self.main_array[
            'gender'][same_entries_indices[0]]  # makes more sense
Example #3
0
 def _initialize_counts_seed(self, seed, similar_people_count,
                             max_repeat_count):
     self.seed = seed
     self.similar_people_count = similar_people_count
     self.max_repeat_count = max_repeat_count
     self.records_count = self._compute_records_count()
     np.random.seed(self.seed)
     self.person = Person('en', seed=self.seed)
     self.payment = Payment(seed=self.seed)
Example #4
0
    def __init__(self, *args, **kwargs):
        """Initialize attributes lazily.

        :param args: Arguments.
        :param kwargs: Keyword arguments.
        """
        super().__init__(*args, **kwargs)
        self._person = Person
        self._address = Address
        self._datetime = Datetime
        self._business = Business
        self._text = Text
        self._food = Food
        self._science = Science
        self._code = Code
        self._transport = Transport
        self.unit_system = UnitSystem(seed=self.seed)
        self.file = File(seed=self.seed)
        self.numbers = Numbers(seed=self.seed)
        self.development = Development(seed=self.seed)
        self.hardware = Hardware(seed=self.seed)
        self.clothing_size = ClothingSize(seed=self.seed)
        self.internet = Internet(seed=self.seed)
        self.path = Path(seed=self.seed)
        self.payment = Payment(seed=self.seed)
        self.games = Games(seed=self.seed)
        self.cryptographic = Cryptographic(seed=self.seed)
        self.structure = Structure(seed=self.seed)
Example #5
0
 def __init__(self, *args, **kwargs):
     super().__init__(*args, **kwargs)
     self._personal = Personal
     self._address = Address
     self._datetime = Datetime
     self._business = Business
     self._text = Text
     self._food = Food
     self._science = Science
     self._code = Code
     self._transport = Transport
     self.unit_system = UnitSystem()
     self.file = File()
     self.numbers = Numbers()
     self.development = Development()
     self.hardware = Hardware()
     self.clothing_sizes = ClothingSizes()
     self.internet = Internet()
     self.path = Path()
     self.payment = Payment()
     self.games = Games()
     self.cryptographic = Cryptographic()
Example #6
0
print('other', np.random.randint(100, size=12))

for seed in [seed1, seed2, seed3, seed4]:
    np.random.seed(seed=seed)
    print(seed, '\t', np.random.randint(100, size=12))

print('Numpy in comprehension')
for seed in [seed1, seed2, seed3, seed4]:
    np.random.seed(seed=seed)
    print(seed, [np.random.randint(100) for i in range(12)])


print('Mimesis')
person = Person('en')
payment = Payment()
for seed in [seed1, seed2, seed3, seed4]:
    person = Person('en', seed=seed)
    print(seed, [person.full_name() for i in range(5)])
for seed in [seed1, seed2, seed3, seed4]:
    person = Person('en', seed=seed)
    print(seed, [person.full_name() for i in range(5)])

for seed in [seed1, seed2, seed3, seed4]:
    np.random.seed(seed=seed4)
    person = Person('en')
    print(seed, [person.full_name() for i in range(5)])
for seed in [seed1, seed2, seed3, seed4]:
    np.random.seed(seed=seed4)
    person = Person('en')
    print(seed, [person.full_name() for i in range(5)])
Example #7
0
class DataGenerator:
    def __init__(self, seed=13, similar_people_count=5, max_repeat_count=12):
        self._initialize_counts_seed(seed, similar_people_count,
                                     max_repeat_count)

        self.df_people = self._generate_people()
        self.df_cards = self._generate_cards()
        self.df_records = pd.merge(self.df_people,
                                   self.df_cards,
                                   left_on='id_person',
                                   right_on='id_person')

    def _initialize_counts_seed(self, seed, similar_people_count,
                                max_repeat_count):
        self.seed = seed
        self.similar_people_count = similar_people_count
        self.max_repeat_count = max_repeat_count
        self.records_count = self._compute_records_count()
        np.random.seed(self.seed)
        self.person = Person('en', seed=self.seed)
        self.payment = Payment(seed=self.seed)

    def _compute_records_count(self):
        num_records = 0
        for i in range(1, self.max_repeat_count + 1):
            num_records += i * self.similar_people_count

        return num_records

    def get_records(self):
        return self.df_records

    def _generate_cards(self):
        person_ids = iter(self._generate_ids())
        int_loans = np.random.randint(1, 101, self.records_count)
        float_loans = np.array(int_loans * 1000, np.float64)
        loans = iter(float_loans)

        description_c = (lambda: {
            'id_person':
            next(person_ids),
            'credit_card_num':
            self.payment.credit_card_number(card_type=CardType.VISA),
            'credit_card_exp_date':
            self.payment.credit_card_expiration_date(maximum=21, minimum=19),
            'loan':
            next(loans),
        })
        schema_card = Schema(schema=description_c)
        cards = schema_card.create(iterations=self.records_count)
        return pd.DataFrame(cards)

    def _generate_ids(self):
        person_ids = self.df_people['id_person'].tolist()
        repeat_count = 0
        ids_for_cards = []

        for i in range(len(person_ids)):
            if i % self.similar_people_count == 0 and repeat_count < self.max_repeat_count:
                repeat_count = repeat_count + 1

            ids_for_cards = ids_for_cards + [i] * repeat_count

        return ids_for_cards

    def _generate_people(self):
        people_count = self.similar_people_count * self.max_repeat_count
        ids = iter(range(people_count))
        description_female = (lambda: {
            'id_person': next(ids),
            'full_name': self.person.full_name(Gender.FEMALE),
            'gender': 'F',
        })
        description_male = (lambda: {
            'id_person': next(ids),
            'full_name': self.person.full_name(Gender.MALE),
            'gender': 'M',
        })

        female_count = people_count // 2
        male_count = people_count - female_count
        schema_female = Schema(schema=description_female)
        females = schema_female.create(iterations=female_count)

        schema_male = Schema(schema=description_male)
        males = schema_male.create(iterations=male_count)
        return pd.DataFrame(females + males)
Example #8
0
import timeit


dtype = np.dtype([
    ('id', np.unicode_, 16),
    ('full_name', np.unicode_, 32),
    ('credit_card_number', np.unicode_, 32),
    ('credit_card_expiration_date', np.unicode_, 8),
    ('gender', np.unicode_, 1),
])

num = 10**2
rng = np.random.randint(0, 2, num)
person = Person('en')
payment = Payment()
array = np.empty(num, dtype=dtype)
for idx in range(num):
    gender = Gender.MALE if rng[idx] == 0 else Gender.FEMALE
    array['id'][idx] = str(idx).zfill(16)
    array['full_name'][idx] = person.full_name(gender=gender)
    array['credit_card_number'][idx] = payment.credit_card_number(card_type=CardType.VISA)
    array['credit_card_expiration_date'][idx] = payment.credit_card_expiration_date(maximum=21)
    array['gender'][idx] = 'M' if rng[idx] == 0 else 'F'

print(array)
print(array['full_name'])


ar_id = np.vectorize(lambda x: int(x[0]))(array)
ar_gen = np.vectorize(lambda x: 1 if x[4]=='M' else 0)(array)