def gen_syn_adv(self): adv = random.choice(self.advs) synAdv = "" if random.random() < 0.2: synAdv = self.gen_syn_adv() return utility.concat(synAdv, adv)
def gen_syn_adj(self): synAdv = "" adj = random.choice(self.adjs) if random.random() < 0.2: synAdv = self.gen_syn_adv() return utility.concat(synAdv, adj)
def gen_syn_nominale(self): article = random.choice(self.articles) nom = random.choice(self.noms) prep = "" adj = "" # Structure optionnel ajoutée au hasard if random.random() < 0.3: prep = self.gen_syn_prep() if random.random() < 0.3: adj = self.gen_syn_adj() return utility.concat(article, nom, prep, adj)
def prep_dummy_csv(): from hashlib import sha256 # We need to hash customer emails as a semi-unique identifier def hash(inp): bytes=inp.encode('utf-8') h_e = sha256() h_e.update(bytes) return h_e.hexdigest()[:6] concat = u.concat('CSV_Files') to_dummy_csv = u.clean(concat) email_hashes = [hash(i) for i in to_dummy_csv['Email_Billing']] to_dummy_csv['Email_Billing'] = email_hashes to_dummy_csv[['Full_Name_Billing', 'Full_Name_Shipping', 'Address_1_Shipping']]='Restricted' to_dummy_csv.drop('Unnamed:_0', axis=1, inplace=True) to_dummy_csv['State_Name_Shipping'].loc[to_dummy_csv['State_Name_Shipping'].isna()] = 'Missing' to_dummy_csv['Discount_Amount'].loc[to_dummy_csv['Discount_Amount'].isna()] = to_dummy_csv['Discount_Amount'].mean() print(to_dummy_csv.isna().sum()) to_dummy_csv.dropna(inplace=True) to_dummy_csv= to_dummy_csv.loc[to_dummy_csv['Order_Date']<='2020-12-31-23:59'] print(to_dummy_csv.head(5)) print(to_dummy_csv.columns) print(to_dummy_csv.tail(5)) to_dummy_csv.to_csv('dummy.csv')
def serve_frame(file): frame = u.concat(file) frame = u.clean(frame) return frame
dfa = ma.DataframeAnalysis(frame) dfa.avg_discount_rate() elif sys.argv[1] == 'pcp': frame = serve_frame('CSV_Files') dfpa = ma.ProductAnalysis(frame) dfpa.highest_positive_product_change_over_month_analysis() elif sys.argv[1] == 'pcp': frame = serve_frame('CSV_Files') dfpa = ma.ProductAnalysis(frame) dfpa.highest_negative_product_change_over_month_analysis() elif sys.argv[1] == 'plg': frame = serve_frame('CSV_Files') dfpa = ma.ProductAnalysis(frame) dfpa.product_line_change_over_month_graph() file = u.concat('CSV_FIles') # Test slice for functionality file = u.clean(file) cp1 = u.FieldLoader(file) u.date_difference() u.date_difference_email_list_compiler() #profiles_master_init() '''Work log: 2/25 functionality and indexing for time: reading, saving and analyzing time fields (completed?) Streamline initializations: maybe make a initialization function in Analysis so we can simply click on the py file and run the program **As of now customer_profile line 63 should protect from repeated inputs of sales information. This will need to be tested.
def gen_syn_verbale(self): verbe = self.gen_verbe_conjugue() complement = self.gen_syn_nominale() return utility.concat(verbe, complement)
def gen_phrase(self): nominal = self.gen_syn_nominale() verbal = self.gen_syn_verbale() return utility.concat(nominal, verbal + ".") # On ajoute un point pour finir la phrase
def gen_syn_prep(self): prep = random.choice(self.preps) syn_nominal = self.gen_syn_nominale() return utility.concat(prep, syn_nominal)