def generate_personal_data(num_records): person = Person('en') payment = Payment() dtype = np.dtype([ ('id', np.unicode_, 16), ('full_name', np.unicode_, 32), ('credit_card_number', np.unicode_, 19), ('credit_card_expiration_date', np.unicode_, 7), ('gender', np.unicode_, 1), ]) np.random.seed() rng = np.random.randint(0, 2, num_records) records_npy = np.empty(num_records, dtype=dtype) for idx in range(num_records): gender = Gender.MALE if rng[idx] == 0 else Gender.FEMALE records_npy['id'][idx] = str(idx).zfill(16) records_npy['full_name'][idx] = person.full_name(gender=gender) records_npy['credit_card_number'][idx] = payment.credit_card_number( card_type=CardType.VISA) records_npy['credit_card_expiration_date'][ idx] = payment.credit_card_expiration_date(maximum=21) records_npy['gender'][idx] = 'M' if rng[idx] == 0 else 'F' return records_npy
def generate_data(self): np.random.seed(seed=self.seed) gender = np.random.randint(2, size=self.num) loan = np.random.randint(self.loan_min / self.loan_step, self.loan_max / self.loan_step, size=self.num) * self.loan_step person = Person('en') payment = Payment() self.main_array = np.empty(shape=self.num, dtype=self.dtype) for idx in range(self.num): self.main_array['id'][idx] = str(idx).zfill(16) self.main_array['full_name'][idx] = person.full_name( gender=Gender.MALE if gender[idx] else Gender.FEMALE) self.main_array['credit_card_number'][ idx] = payment.credit_card_number(card_type=CardType.VISA) self.main_array['credit_card_expiration_date'][ idx] = payment.credit_card_expiration_date(maximum=22) self.main_array['gender'][idx] = 'M' if gender[idx] else 'F' self.main_array['loan'][idx] = loan[idx] same_entries_indices = np.random.choice(np.arange(self.num), self.number_same_name_entries, replace=False) self.main_array['full_name'][same_entries_indices] = self.main_array[ 'full_name'][same_entries_indices[0]] self.main_array['gender'][same_entries_indices] = self.main_array[ 'gender'][same_entries_indices[0]] # makes more sense
def _initialize_counts_seed(self, seed, similar_people_count, max_repeat_count): self.seed = seed self.similar_people_count = similar_people_count self.max_repeat_count = max_repeat_count self.records_count = self._compute_records_count() np.random.seed(self.seed) self.person = Person('en', seed=self.seed) self.payment = Payment(seed=self.seed)
def __init__(self, *args, **kwargs): """Initialize attributes lazily. :param args: Arguments. :param kwargs: Keyword arguments. """ super().__init__(*args, **kwargs) self._person = Person self._address = Address self._datetime = Datetime self._business = Business self._text = Text self._food = Food self._science = Science self._code = Code self._transport = Transport self.unit_system = UnitSystem(seed=self.seed) self.file = File(seed=self.seed) self.numbers = Numbers(seed=self.seed) self.development = Development(seed=self.seed) self.hardware = Hardware(seed=self.seed) self.clothing_size = ClothingSize(seed=self.seed) self.internet = Internet(seed=self.seed) self.path = Path(seed=self.seed) self.payment = Payment(seed=self.seed) self.games = Games(seed=self.seed) self.cryptographic = Cryptographic(seed=self.seed) self.structure = Structure(seed=self.seed)
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self._personal = Personal self._address = Address self._datetime = Datetime self._business = Business self._text = Text self._food = Food self._science = Science self._code = Code self._transport = Transport self.unit_system = UnitSystem() self.file = File() self.numbers = Numbers() self.development = Development() self.hardware = Hardware() self.clothing_sizes = ClothingSizes() self.internet = Internet() self.path = Path() self.payment = Payment() self.games = Games() self.cryptographic = Cryptographic()
print('other', np.random.randint(100, size=12)) for seed in [seed1, seed2, seed3, seed4]: np.random.seed(seed=seed) print(seed, '\t', np.random.randint(100, size=12)) print('Numpy in comprehension') for seed in [seed1, seed2, seed3, seed4]: np.random.seed(seed=seed) print(seed, [np.random.randint(100) for i in range(12)]) print('Mimesis') person = Person('en') payment = Payment() for seed in [seed1, seed2, seed3, seed4]: person = Person('en', seed=seed) print(seed, [person.full_name() for i in range(5)]) for seed in [seed1, seed2, seed3, seed4]: person = Person('en', seed=seed) print(seed, [person.full_name() for i in range(5)]) for seed in [seed1, seed2, seed3, seed4]: np.random.seed(seed=seed4) person = Person('en') print(seed, [person.full_name() for i in range(5)]) for seed in [seed1, seed2, seed3, seed4]: np.random.seed(seed=seed4) person = Person('en') print(seed, [person.full_name() for i in range(5)])
class DataGenerator: def __init__(self, seed=13, similar_people_count=5, max_repeat_count=12): self._initialize_counts_seed(seed, similar_people_count, max_repeat_count) self.df_people = self._generate_people() self.df_cards = self._generate_cards() self.df_records = pd.merge(self.df_people, self.df_cards, left_on='id_person', right_on='id_person') def _initialize_counts_seed(self, seed, similar_people_count, max_repeat_count): self.seed = seed self.similar_people_count = similar_people_count self.max_repeat_count = max_repeat_count self.records_count = self._compute_records_count() np.random.seed(self.seed) self.person = Person('en', seed=self.seed) self.payment = Payment(seed=self.seed) def _compute_records_count(self): num_records = 0 for i in range(1, self.max_repeat_count + 1): num_records += i * self.similar_people_count return num_records def get_records(self): return self.df_records def _generate_cards(self): person_ids = iter(self._generate_ids()) int_loans = np.random.randint(1, 101, self.records_count) float_loans = np.array(int_loans * 1000, np.float64) loans = iter(float_loans) description_c = (lambda: { 'id_person': next(person_ids), 'credit_card_num': self.payment.credit_card_number(card_type=CardType.VISA), 'credit_card_exp_date': self.payment.credit_card_expiration_date(maximum=21, minimum=19), 'loan': next(loans), }) schema_card = Schema(schema=description_c) cards = schema_card.create(iterations=self.records_count) return pd.DataFrame(cards) def _generate_ids(self): person_ids = self.df_people['id_person'].tolist() repeat_count = 0 ids_for_cards = [] for i in range(len(person_ids)): if i % self.similar_people_count == 0 and repeat_count < self.max_repeat_count: repeat_count = repeat_count + 1 ids_for_cards = ids_for_cards + [i] * repeat_count return ids_for_cards def _generate_people(self): people_count = self.similar_people_count * self.max_repeat_count ids = iter(range(people_count)) description_female = (lambda: { 'id_person': next(ids), 'full_name': self.person.full_name(Gender.FEMALE), 'gender': 'F', }) description_male = (lambda: { 'id_person': next(ids), 'full_name': self.person.full_name(Gender.MALE), 'gender': 'M', }) female_count = people_count // 2 male_count = people_count - female_count schema_female = Schema(schema=description_female) females = schema_female.create(iterations=female_count) schema_male = Schema(schema=description_male) males = schema_male.create(iterations=male_count) return pd.DataFrame(females + males)
import timeit dtype = np.dtype([ ('id', np.unicode_, 16), ('full_name', np.unicode_, 32), ('credit_card_number', np.unicode_, 32), ('credit_card_expiration_date', np.unicode_, 8), ('gender', np.unicode_, 1), ]) num = 10**2 rng = np.random.randint(0, 2, num) person = Person('en') payment = Payment() array = np.empty(num, dtype=dtype) for idx in range(num): gender = Gender.MALE if rng[idx] == 0 else Gender.FEMALE array['id'][idx] = str(idx).zfill(16) array['full_name'][idx] = person.full_name(gender=gender) array['credit_card_number'][idx] = payment.credit_card_number(card_type=CardType.VISA) array['credit_card_expiration_date'][idx] = payment.credit_card_expiration_date(maximum=21) array['gender'][idx] = 'M' if rng[idx] == 0 else 'F' print(array) print(array['full_name']) ar_id = np.vectorize(lambda x: int(x[0]))(array) ar_gen = np.vectorize(lambda x: 1 if x[4]=='M' else 0)(array)