def create_first_df(num_rows): myDB = pydbgen.pydb() df1 = myDB.gen_dataframe(num_rows,['name', 'age']) for index, row in df1.iterrows(): # row['age'] = np.random.randint(10) row['age'] = np.random.zipf(2) # Change some age values to negative values for index, row in df1.iterrows(): if index / 2 == 0: row['age'] == 0 - row['age'] return df1
else: b = random.random() if b < .5: return address elif b < .65: return address.split(" ")[0] elif b < .8: return address.split(" ")[1] elif b < .95: return address.split(" ")[2] else: return "8876 Heather Ave." if __name__ == '__main__': myDB = pydb() df = pd.DataFrame() data = myDB.gen_dataframe(10000, [ 'name', 'date', 'ssn', 'country', 'street_address', 'city', 'state', 'zipcode', 'company', 'phone_number_full' ]) df = pd.concat([df, data]) for i in range(4): df = pd.concat([df, data.sample(9000)]) df.sort_index(inplace=True) df['duns'] = df['ssn'].apply(lambda x: 1 + int("".join(x.split("-")))) df['rssd_id'] = df['ssn'].apply(lambda x: 2 + int("".join(x.split("-"))))
# -*- coding: utf-8 -*- """ Created on Sun Oct 27 14:07:22 2019 @author: arifr """ import pydbgen import random from pydbgen import pydbgen db = pydbgen.pydb() import mysql.connector from mysql.connector import Error import sys nurseidlist = [] for i in range(5000, 6000): nurseidlist.append(random.randrange(5000, 6000)) pidlist = [] for i in range(5000, 6000): pidlist.append(random.randrange(1000, 2100)) l = [] i = 0 while (i < 1000): x = [] i += 1 x.append(nurseidlist.pop(0)) x.append(pidlist.pop(0))
filter = "JSON file (*.json)|*.json|All Files (*.*)|*.*||" filename = rs.OpenFileName( "DataBase\etablissements-denseignement-superieur.json", filter) #Read JSON data into the datastore variable if filename: with open(filename, 'r') as f: datastore = json.load(f) #Use the new datastore datastructure print(datastore["lon"]) Names = [] fake = Faker("fr_FR") for i in range(0, 200): name = fake.name() if name not in Names: Names.append(name) print(name) GenDB = pydbgen.pydb() Loc = [] for i in range(0, 200): loc = fake.local_latlng(country_code='FR', coords_only=False) if loc not in Loc: Loc.append(loc) print(loc) print(fake.address())
# genère des données random grace à faker et pydbgen import faker import pydbgen from pydbgen import pydbgen print("import success") # création d'un objet base de donnée myDB = pydbgen.pydb() # File faker\proxy.py", line 83, in __getattribute__ # raise TypeError(msg) erreur génerée ici !!! # Enlever ça : # if attr == 'seed': # msg = ( # 'Calling `.seed()` on instances is deprecated. ' # 'Use the class method `Faker.seed()` instead.' # ) # raise TypeError(msg) # else: # car MAJ de Faker ^^ # affiche 10 villes avec des noms random print(myDB.gen_data_series(num=8, data_type='city')) # genère un fichier excel, il faut installer openpy et ajouter DOmain.txt au dossier !!! myDB.gen_excel(10000, fields=['name', 'year', 'email', 'license_plate', 'Job title'], filename='Employe.xlsx', real_email=True) print("Le fichier Excel a été généré !!!")