def score(tp,fp,fn,epsilon=10**-5): beta = 2 p = tp/(tp+fp+epsilon) r = tp/(tp+fn+epsilon) beta_squared = beta ** 2 f = (beta_squared + 1) * (p * r) / (beta_squared * p + r+epsilon) def_context.Report("|| precison: "+str(p)+"|| recall: "+str(r)+"|| fbeta: "+str(f)) def_context.Report('--------------------------------------------------')
def mesure(y_score,y_test,p1=0.5,p2=0.5): y = get_label(y_score,p1,p2) TP1,FP1,FN1 = mesure_class(y,y_test,0) TP2,FP2,FN2 = mesure_class(y,y_test,1) TP3,FP3,FN3 = mesure_class(y,y_test,2) def_context.Report("pour la classe 0") score(TP1,FP1,FN1) def_context.Report("pour la classe 1") score(TP2,FP2,FN2) def_context.Report("pour la classe 2") score(TP3,FP3,FN3)
def load(fileX): df = pd.read_csv(PATH_IN+'hop/'+fileX) if('labels' not in df.columns.values): def_context.Report('Pas de labels pour le fichier '+str(fileX)) y = df['labels'] y = y.fillna(0) return df.drop(['labels'],axis=1),y
def acc(y_score,y_test,p1=0.5,p2=0.5): res = 0 y = get_label(y_score,p1,p2) for i in range(len(y)): if(y[i] == y_test[i]): res+=1 else: pass def_context.Report("accuracy: "+str(res/len(y)))
def pred(file,numb_folder): relecture = True EPSILON = 1e-15 f = ((file.split('.'))[0].split('_'))[2] c = ((file.split('.'))[0].split('_'))[-1] try: df = pd.read_csv(PATH_OUT+'T'+str(numb_folder)+'/new_ptv/new_PTV_'+str(f)+'_'+str(c)+'.csv') def_context.Report("file %s already exists. I won't do it again"%(PATH_OUT+'T'+str(numb_folder)+'/new_ptv/new_PTV_'+str(f)+'_'+str(c)+'.csv')) except Exception as e: try: def_context.Report(str(f)+"-"+str(c)) PTV,proba = def_context.load_file(str(f),str(c)) if(len(PTV) == 0): return 0 index_PTV = PTV.index[(PTV['debut'] <= 3*60) & (PTV['debut']+PTV['DUREE'] > 3*60+5)].tolist()[0] def_context.Report('Starting with: %s'%(PTV['TITRE'].iloc[index_PTV])) lastend = PTV['debut'].loc[index_PTV] currentduree = PTV['DUREE'].loc[index_PTV] newPTV = def_context.init_newPTV(PTV,str(c)) historyofpoints = def_context.init_history(str(c),PTV,lastend,currentduree) temp_context = historyofpoints.iloc[0] importantpts = def_context.get_important_points(c,PTV,index_PTV) #{#{#{#{#{#{#{#{{{{{{{{{{{{#############}}}}}}}}}}}}}}}}}}} if(numb_folder == '0'): if(c == 'TF1'): from PTVTF1 import main as arbre1 l,temp_newPTV,temp_history,index_PTV,temp_context = arbre1([str(f),str(c)]) else: from PTVM6 import main as arbre2 l,temp_newPTV,temp_history,index_PTV,temp_context = arbre2([str(f),str(c)]) else: for i in range(3): def_context.Report(str(i)+' '+str(c)+' '+str(f)) from predictPTV import main as pred1 l1,temp_newPTV1,temp_history1,index_PTV1,temp_context1 = pred1([str(c),str(f),i,newPTV.iloc[newPTV.shape[0]-1],temp_context,index_PTV,importantpts,PATH_OUT+'T'+str(numb_folder)+'/']) if(l1>0 and relecture): def_context.Report("Utilisation de la relecture "+str(i)+' '+str(c)+' '+str(f)) from RLPTV import main as RL l,temp_newPTV,temp_history,index_PTV,temp_context = RL([str(c),str(f),i,newPTV.iloc[newPTV.shape[0]-1],temp_context,index_PTV,importantpts,PATH_OUT+'T'+str(numb_folder)+'/']) else: l,temp_newPTV,temp_history,index_PTV,temp_context =l1,temp_newPTV1,temp_history1,index_PTV1,temp_context1 if(l == 4): pass else: newPTV = pd.concat([newPTV,temp_newPTV.iloc[1:]]) historyofpoints = pd.concat([historyofpoints,temp_history]) newPTV['Heure'] = newPTV['minute'].apply(lambda x: str(int(x/60))+':'+str(x%60)) historyofpoints['Heure'] = historyofpoints['minute'].apply(lambda x: str(int(x/60))+':'+str(x%60)) newPTV.to_html(PATH_IN+'new_ptv/new_PTV_'+str(f)+'_'+str(c)+'.html') newPTV.to_csv(PATH_IN+'new_ptv/new_PTV_'+str(f)+'_'+str(c)+'.csv',index=False) historyofpoints.to_html(PATH_IN+'hop/historyofpoints_'+str(f)+'_'+str(c)+'.html') historyofpoints.to_csv(PATH_IN+'hop/historyofpoints_'+str(f)+'_'+str(c)+'.csv',index=False) newPTV.to_html(PATH_OUT+'T'+str(numb_folder)+'/new_ptv/new_PTV_'+str(f)+'_'+str(c)+'.html') newPTV.to_csv(PATH_OUT+'T'+str(numb_folder)+'/new_ptv/new_PTV_'+str(f)+'_'+str(c)+'.csv',index=False) except Exception as e: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] def_context.Report("Failed to process {0} at line {2} in {3}: {1}".format(str(file), str(e),sys.exc_info()[-1].tb_lineno,fname))
def mismatch(y_score,y_test,p1=0.5,p2=0.5): y = get_label(y_score,p1,p2) FP = 0 FF = 0 for i in range(len(y)): if(y[i]==1): if(y_test[i]==2): FP += 1 else: pass if(y[i]==2): if(y_test[i]==1): FF += 1 else: pass else: pass def_context.Report("fausses publicités") def_context.Report(FP) def_context.Report("fausses fins") def_context.Report(FF) return 0
def get_tuple(argv): df = pd.read_csv('Equivalence.csv', sep=';') try: argv = int(argv) key = 'id_unique' except Exception: key = 'nom_chaine' try: return str(df[df[key] == argv]['id_unique'].values[0]), str( df[df[key] == argv]['nom_chaine'].values[0]) except Exception as e: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] def_context.Report( "Failed to process {0} at line {2} in {3}: {1}".format( str(argv), str(e), sys.exc_info()[-1].tb_lineno, fname)) Report("Mauvais numéro/nom de chaîne") return 0, 0
def load_all(CHAINE): X = pd.DataFrame() Y = pd.DataFrame() files = os.listdir(PATH_IN+'hop/') for file in files: if(file.split('.')[-1] != 'csv'): pass elif(file.split('_')[-2] in ['2017-12-20'] or (file.split('_')[-2]).split('-')[0] == '2018'): pass else: def_context.Report(file.split('_')[-2]) df,y = load(file) if(len(df)==1): continue df = df.replace([np.inf, -np.inf], np.nan) df = df.fillna(0) X_train = df y_train = y X = pd.concat([X,X_train]) Y = pd.concat([Y,y_train]) return def_context.process(X),Y
def main(argv): global PATH_IN, PATH_SCRIPT, PATH_OUT PATH_IN, PATH_SCRIPT, PATH_OUT = get_path() import pandas as pd import pickle end = 30 if (len(argv) == 0): start = 0 for i in range(start, end): t = time.time() update_temp_path(i) if (i == 0): p1 = Popen(['python', PATH_SCRIPT + 'PTVall.py', '2017-12']) p1.wait() time.sleep(60) os.system('python ' + PATH_SCRIPT + 'MLforPTV.py') def_context.Report("fin du tour " + str(i)) def_context.Report(time.time() - t) else: p1 = Popen( ['python', PATH_SCRIPT + 'fasttrain.py', '2017-12', 'TF1']) p2 = Popen( ['python', PATH_SCRIPT + 'fasttrain.py', '2017-12', 'M6']) p3 = Popen([ 'python', PATH_SCRIPT + 'fasttrain.py', '2017-12', 'France 2' ]) p4 = Popen([ 'python', PATH_SCRIPT + 'fasttrain.py', '2017-12', 'France 3' ]) p1.wait() p2.wait() p3.wait() p4.wait() print('end of prediction') time.sleep(60) os.system('python ' + PATH_SCRIPT + 'MLforPTV.py') def_context.Report("fin du tour " + str(i)) def_context.Report(time.time() - t) def_context.Report(time.time() - t) if (len(argv) == 1): try: start = int(argv[0]) for i in range(start, end): update_temp_path(i) if (i == 0): p1 = Popen( ['python', PATH_SCRIPT + 'PTVall.py', '2017-12']) p1.wait() time.sleep(60) os.system('python ' + PATH_SCRIPT + 'MLforPTV.py') time.sleep(60) def_context.Report("fin du tour " + str(i)) else: p1 = Popen([ 'python', PATH_SCRIPT + 'fasttrain.py', '2017-12', 'TF1' ]) p2 = Popen([ 'python', PATH_SCRIPT + 'fasttrain.py', '2017-12', 'M6' ]) p3 = Popen([ 'python', PATH_SCRIPT + 'fasttrain.py', '2017-12', 'France 2' ]) p4 = Popen([ 'python', PATH_SCRIPT + 'fasttrain.py', '2017-12', 'France 3' ]) p1.wait() p2.wait() p3.wait() p4.wait() print('end of prediction') time.sleep(60) os.system('python ' + PATH_SCRIPT + 'MLforPTV.py') time.sleep(60) def_context.Report("fin du tour " + str(i)) except Exception as e: pred(argv[0]) elif (len(argv) == 2): PATH_OUT = get_temp_path() import pandas as pd import random relecture = True EPSILON = 1e-15 files = os.listdir(PATH_IN + 'PTV/') nb_files = len(files) Processes = [] for file in files: f = ((file.split('.'))[0].split('_'))[2] c = ((file.split('.'))[0].split('_'))[-1] if (f == '2017-12-20' or (f in ['2017-12-09', '2017-12-06', '2018-02-22'] and c == 'TF1') or (f in ['2018-02-22'] and c == 'M6') or (f.split('-')[0] != str(argv[0].split('-')[0])) or f.split('-')[1] != argv[0].split('-')[1]): #or (f in ['2018-02-22'] and c=='M6') pass elif (c == ''): pass elif ('2018' in f): pass elif (c == argv[1]): def_context.Report(file) while (len(Processes) >= MAX_PROCESSES): lenp = len(Processes) for p in range( lenp): # Check the processes in reverse order if Processes[enp - 1 - p].poll( ) is not None: # If the process hasn't finished will return None del Processes[ lenp - 1 - p] # Remove from list - this is why we needed reverse order time.sleep(5) Processes.append( Popen(['python', PATH_SCRIPT + 'fasttrain.py', file])) else: pass while (len(Processes)): lenp = len(Processes) for p in range(lenp): # Check the processes in reverse order if Processes[enp - 1 - p].poll( ) is not None: # If the process hasn't finished will return None del Processes[ lenp - 1 - p] # Remove from list - this is why we needed reverse order time.sleep(5)
def main(argv): start = 10 if(len(argv) == 0): print('bonjour') start = int(input("A quelle partie voulez vous commencer?")) if(start<1): Chaines = str(input("Quelle Chaînes devont nous traiter?(separez les par un '-'):")) chaines = Chaines.split('-') C = [[def_context.get_tuple(chaine)] for chaine in chaines] Processes = [] if(len(argv) == 2): pred(argv[0],argv[1]) return 0 ##### Première partie ##### if(start < 1): for chaine in chaines: while(len(Processes)>= MAX_PROCESSES/2): lenp = len(Processes) for p in range(lenp): # Check the processes in reverse order if Processes[lenp-1-p].poll() is not None: # If the process hasn't finished will return None del Processes[lenp-1-p] # Remove from list - this is why we needed reverse order time.sleep(5) Processes.append(Popen(['python','extractdatafromPTV.py',chaine])) Processes.append(Popen(['python','cleaningRTSfiles.py','0',Chaines,'0'])) ##### emptying the process queue ###### while(len(Processes)): lenp = len(Processes) for p in range(lenp): # Check the processes in reverse order if Processes[lenp-1-p].poll() is not None: # If the process hasn't finished will return None del Processes[lenp-1-p] # Remove from list - this is why we needed reverse order time.sleep(5) Processes.append(Popen(['python','processingdata.py'])) if(start<=1): while(len(Processes)): lenp = len(Processes) for p in range(lenp): # Check the processes in reverse order if Processes[lenp-1-p].poll() is not None: # If the process hasn't finished will return None del Processes[lenp-1-p] # Remove from list - this is why we needed reverse order time.sleep(5) Processes.append(Popen(['python','predict.py'])) while(len(Processes)): lenp = len(Processes) for p in range(lenp): # Check the processes in reverse order if Processes[lenp-1-p].poll() is not None: # If the process hasn't finished will return None del Processes[lenp-1-p] # Remove from list - this is why we needed reverse order time.sleep(5) if (start <= 2): Processes = [] pass_files = [] nb_files_true = 0 for i in range(31): update_temp_path(i) files = os.listdir(PATH_IN+'PTV/') nb_files = len(files) nb_files_true =0 for file in files: if(file in pass_files): pass elif(i%10 != 0 or i ==0): while (len(Processes)>= MAX_PROCESSES): lenp = len(Processes) for p in range(lenp): # Check the processes in reverse order if Processes[lenp-1-p].poll() is not None: # If the process hasn't finished will return None del Processes[lenp-1-p] # Remove from list - this is why we needed reverse order time.sleep(5) def_context.Report('process launch for %s at turn %s'%(file,i)) Processes.append(Popen(['python', 'out.py', file ,str(i)])) else: date = file.split('_')[2] chaine = file.split('_')[-1].split('.')[0] numero,nom_chaine = def_context.get_tuple(chaine) os.system('python cost.py '+str(chaine)+' '+str(date)) couts = pd.read_csv('cout.csv') l = np.bincount(couts[date+'_'+nom_chaine+'_tout']) if(min(couts[date+'_'+nom_chaine+'_tout'])<1 and max(l)>=4): pass_files.append(file) else: while(len(Processes)>= MAX_PROCESSES): lenp = len(Processes) for p in range(lenp): # Check the processes in reverse order if Processes[lenp-1-p].poll() is not None: # If the process hasn't finished will return None del Processes[lenp-1-p] # Remove from list - this is why we needed reverse order time.sleep(5) Processes.append(Popen(['python','out.py',file,str(i)])) time.sleep(2) def_context.Report("treated %s files instead of %s"%(nb_files*30,nb_files_true)) ######### Toute les prédictions on été faites ######## if(start<=3): os.system("python cost.py") time.sleep(10) ###################################################### if(start <=4): create_res_file() df = pd.read_csv('cout.csv') index_of_best = [0]*31 for col in df.columns.values: if('tout' not in col): pass else: df_final=[] col = ''.join(list(col)[:-4]) for mm in ['matinee','apresmidi','soiree']: i = find_best(df[col+mm]) index_of_best[i]+=1 a,b = def_context.get_tuple(col.split('_')[1]) df_final.append(exit_file(col.split('_')[0],a,b,i,mm)) def_context.Report('Best Prediction for %s %s %s occured at %s'%(col.split('_')[1],col.split('_')[0],mm,str(i))) #def_context.Report(str(df_final[0].shape)+' '+str(df_final[1].shape)+' '+str(df_final[2].shape)) df_final = df_final[0].append(df_final[1].append(df_final[2])) try: df_final.to_csv('../DatasOut/out/new_PTV_'+col.split('_')[0]+'_'+b+'.csv',index=False) except Exception as e: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] def_context.Report("Failed to process {0} at line {2} in {3}: {1}".format(str(argv), str(e),sys.exc_info()[-1].tb_lineno,fname)) with open('res.txt', 'w') as f: for item in index_of_best: f.write("%s\n" % item) if(start<=5): res = pd.read_csv('res_out.csv') res.loc[1] = res.loc[0]*0 for col in res: if('count' in col): pass elif(col == 'nombre_fichier'): pass elif(col in [str(i) for i in range(31)]): pass else: res[col].loc[1] = 1-(res[col].loc[0])/res['count_'+col].loc[0] def_context.Report('pour %s : %s erreurs soit %s '%(col,res[col][0],res[col][1])) res.to_csv('res_out.csv',index=False) def_context.Report("EXIT THE PROGRAM WITH NO ERROR. Congratulation bro!")
def exit_file(date,numero,nom_chaine,num_folder,part): ''' création et remplissage d'un DataFrame pour calculer simplement le coûte ''' #### file = PATH_IN+'PTV/IPTV_'+numero+'_'+date+'_'+nom_chaine+'.csv' otherfile = PATH_OUT+'T'+str(num_folder)+'/new_ptv/new_PTV_'+date+'_'+nom_chaine+'.csv' #### try: ptv = pd.read_csv(file) new_ptv = pd.read_csv(otherfile) df = pd.DataFrame() except Exception as e: def_context.Report('petit problème: '+str(e)) return [0,0,0,0] df['titre'] = ptv['TITRE'] df['clediff'] = ptv['@CLEDIF'] df['debut'] = ptv['debut']%1440 df['duree'] = ptv['DUREE']%1440 df['fin'] = (ptv['debut']+ptv['DUREE'])%1440 df['vrai fin'] = 0 df['ND'] = 0 df['pourcentage vu'] = 0 new_ptv_ = new_ptv[new_ptv['Évenement'].apply(lambda x: x.split(' ')[0]) == 'fin'] current = 0 for j in range(df.shape[0]): for i in range(current,new_ptv_.shape[0]): if(new_ptv_['TITRE'].iloc[i] == df['titre'].iloc[j]): if(abs(df['fin'].iloc[j] - new_ptv_['minute'].iloc[i])<40 or df[df['titre'] == df['titre'].iloc[j]].shape[0] == 1): df['vrai fin'].iloc[j] = new_ptv_['minute'].iloc[i] df['pourcentage vu'].iloc[j] = new_ptv_['pourcentage vu'].iloc[i] if(new_ptv_['Évenement'].iloc[i] == "fin d'un programme" ): df['ND'].iloc[j] = 0 else: df['ND'].iloc[j] = 1 current = i+1 break else: pass else: pass df['vrai debut'] = (df['vrai fin'] - df['duree']*df['pourcentage vu'])%1440 df['vrai fin'].iloc[df.shape[0]-1] = df['vrai fin'].iloc[df.shape[0]-2] + df['duree'].iloc[df.shape[0]-1] df['chaine'] = nom_chaine df['date'] = date temp_df = pd.DataFrame() temp_df[['titre','vrai debut']] = new_ptv[new_ptv['TITRE'] == 'publicité'][['TITRE','minute']] for v in df.columns.values: if v not in ['titre','vrai debut']: temp_df[v] = 0 temp_df['chaine'] = nom_chaine temp_df['date'] = date temp_df['vrai fin'] = temp_df['vrai debut'].apply(lambda x: x+6) temp_df['fin'] = temp_df['vrai fin'] temp_df['debut'] = temp_df['vrai debut'] df = df.append(temp_df).reset_index(drop=True) r = pd.read_csv('res_out.csv') month = '-'.join(date.split('-')[:-1]) if('count_'+nom_chaine+'_'+month+'_'+part in r.columns): r['count_'+nom_chaine+'_'+month+'_'+part] = r['count_'+nom_chaine+'_'+month+'_'+part]+1 else: r['count_'+nom_chaine+'_'+month+'_'+part] = 1 if('count_'+nom_chaine+'_'+month in r.columns): r['count_'+nom_chaine+'_'+month] = r['count_'+nom_chaine+'_'+month]+1 else: r['count_'+nom_chaine+'_'+month] = 1 if('count_'+part in r.columns): r['count_'+part] = r['count_'+part]+1 else: r['count_'+part] = 1 if('count_'+month in r.columns): r['count_'+month] = r['count_'+month]+1 else: r['count_'+month] = 1 if('count_'+nom_chaine in r.columns): r['count_'+nom_chaine] = r['count_'+nom_chaine]+1 else: r['count_'+nom_chaine] = 1 for index,x in new_ptv[['Évenement','minute']].iterrows(): if 'HARD RESET OF ALGORITHM' in x['Évenement']: if(x['minute']<=13*60+40 and part == 'matinee' and x['minute']>180): if(nom_chaine+'_'+month+'_'+part in r.columns): r[nom_chaine+'_'+month+'_'+part] = r[nom_chaine+'_'+month+'_'+part]+1 else: r[nom_chaine+'_'+month+'_'+part] = 1 if(nom_chaine+'_'+month in r.columns): r[nom_chaine+'_'+month] = r[nom_chaine+'_'+month]+1 else: r[nom_chaine+'_'+month] = 1 if(part in r.columns): r[part] = r[part]+1 else: r[part] = 1 if(month in r.columns): r[month] = r[month]+1 else: r[month] = 1 if(nom_chaine in r.columns): r[nom_chaine] = r[nom_chaine]+1 else: r[nom_chaine] = 1 def_context.Report('message: %s à la minute %s de la journée %s pour la chaîne %s pour la partie %s. Recherche dans le folder %s ' %(x['Évenement'],x['minute'],date,nom_chaine,part,num_folder)) elif(x['minute']<=20*60+35 and part == 'apresmidi' and x['minute']>13*60+40): if(nom_chaine+'_'+month+'_'+part in r.columns): r[nom_chaine+'_'+month+'_'+part] = r[nom_chaine+'_'+month+'_'+part]+1 else: r[nom_chaine+'_'+month+'_'+part] = 1 if(nom_chaine+'_'+month in r.columns): r[nom_chaine+'_'+month] = r[nom_chaine+'_'+month]+1 else: r[nom_chaine+'_'+month] = 1 if(part in r.columns): r[part] = r[part]+1 else: r[part] = 1 if(month in r.columns): r[month] = r[month]+1 else: r[month] = 1 if(nom_chaine in r.columns): r[nom_chaine] = r[nom_chaine]+1 else: r[nom_chaine] = 1 def_context.Report('message: %s à la minute %s de la journée %s pour la chaîne %s pour la partie %s. Recherche dans le folder %s ' %(x['Évenement'],x['minute'],date,nom_chaine,part,num_folder)) elif(part == 'soiree' and (x['minute']>20*60+35 or x['minute']<180) ): if(nom_chaine+'_'+month+'_'+part in r.columns): r[nom_chaine+'_'+month+'_'+part] = r[nom_chaine+'_'+month+'_'+part]+1 else: r[nom_chaine+'_'+month+'_'+part] = 1 if(nom_chaine+'_'+month in r.columns): r[nom_chaine+'_'+month] = r[nom_chaine+'_'+month]+1 else: r[nom_chaine+'_'+month] = 1 if(part in r.columns): r[part] = r[part]+1 else: r[part] = 1 if(month in r.columns): r[month] = r[month]+1 else: r[month] = 1 if(nom_chaine in r.columns): r[nom_chaine] = r[nom_chaine]+1 else: r[nom_chaine] = 1 def_context.Report('message: %s à la minute %s de la journée %s pour la chaîne %s pour la partie %s. Recherche dans le folder %s ' %(x['Évenement'],x['minute'],date,nom_chaine,part,num_folder)) r['nombre_fichier'] = r['nombre_fichier']+1 if str(num_folder) in r: r[str(num_folder)]+=1 else: r[str(num_folder)] = 1 r.to_csv('res_out.csv',index=False) if(part == 'matinee'): return df[(df['fin']<=13*60+40) & (df['fin']>180)] elif(part == 'apresmidi'): return df[(df['fin']<=20*60+35) & (df['fin']>13*60+40)] elif(part == 'soiree'): return df[df['fin']>20*60+35].append(df[df['fin']<=180])
def find_cost(date, numero, nom_chaine, i): ''' création et remplissage d'un DataFrame pour calculer simplement le coûte enfin....simplement est un grand mot...certain PTV sont tellement WTF qu'on arrive même pas à calculer l'erreur. On devrait peut être directement les jeter. Enfin bref c'est pas simple d'être nul... ''' #### file = PATH_IN + 'PTV/IPTV_' + numero + '_' + date + '_' + nom_chaine + '.csv' otherfile = PATH_OUT + 'T' + str( i) + '/new_ptv/new_PTV_' + date + '_' + nom_chaine + '.csv' #### try: ptv = pd.read_csv(file) new_ptv = pd.read_csv(otherfile) df = pd.DataFrame() except Exception as e: def_context.Report('petit problème: ' + str(e)) return [3 + i / 2000, 1 + i / 2000, 1 + i / 2000, 1 + i / 2000] df['titre'] = ptv['TITRE'] df['debut'] = ptv['debut'] % 1440 df['duree'] = ptv['DUREE'] % 1440 df['fin'] = (ptv['debut'] + ptv['DUREE']) % 1440 df['vrai fin'] = 0 df['coef'] = df['fin'].apply(lambda x: coef(x)) df['ND'] = 0 df['pourcentage vu'] = 0 new_ptv_ = new_ptv[new_ptv['Évenement'].apply(lambda x: x.split(' ')[0]) == 'fin'] current = 0 for j in range(df.shape[0]): for i in range(current, new_ptv_.shape[0]): if (new_ptv_['TITRE'].iloc[i] == df['titre'].iloc[j]): if (abs(df['fin'].iloc[j] - new_ptv_['minute'].iloc[i]) < 40 or df[df['titre'] == df['titre'].iloc[j]].shape[0] == 1): df['vrai fin'].iloc[j] = new_ptv_['minute'].iloc[i] df['pourcentage vu'].iloc[j] = new_ptv_[ 'pourcentage vu'].iloc[i] if (new_ptv_['Évenement'].iloc[i] == "fin d'un programme"): df['ND'].iloc[j] = 0 else: df['ND'].iloc[j] = 1 current = i break else: pass else: pass df['vrai debut'] = (df['vrai fin'] - df['duree'] * df['pourcentage vu']) % 1440 df['vrai fin'].iloc[df.shape[0] - 1] = df['vrai fin'].iloc[ df.shape[0] - 2] + df['duree'].iloc[df.shape[0] - 1] df2 = df[df['pourcentage vu'] == 0] df = df[df['pourcentage vu'] > 0] df['cout'] = 0 df2['cout'] = 1 df = df.reset_index(drop=True) df2 = df2.reset_index(drop=True) for index, row in df.iterrows(): df['cout'].iloc[index - 1] = (min( abs(row['debut'] - row['vrai debut']) % 1440, abs(row['debut'] % 1440 - row['vrai debut'] % 1440)) + min( abs(row['fin'] - row['vrai fin']) % 1440, abs(row['fin'] % 1440 - row['vrai fin'] % 1440))) * row['coef'] cout = np.sum(df['cout']) + 20 * np.sum(df2['cout']) cout_matin = np.sum(df[ (df['fin'] <= 13 * 60 + 40) & (df['fin'] > 180)]['cout']) + np.sum(df2[ (df2['fin'] <= 13 * 60 + 40) & (df2['fin'] > 180)]['cout']) * 20 cout_aprem = np.sum( df[(df['fin'] <= 20 * 60 + 35) & (df['fin'] > 13 * 60 + 40)] ['cout']) + 100 * np.sum(df2[(df2['fin'] <= 20 * 60 + 35) & (df2['fin'] > 13 * 60 + 40)]['cout']) cout_soir = np.sum(df[df['fin'] > 20 * 60 + 35]['cout']) + np.sum( df[df['fin'] <= 180] ['cout']) + 100 * (np.sum(df2[df2['fin'] > 20 * 60 + 35]['cout']) + np.sum(df2[df2['fin'] <= 180]['cout'])) for index, x in new_ptv[['Évenement', 'minute']].iterrows(): if 'HARD RESET OF ALGORITHM' in x['Évenement']: if (x['minute'] <= 13 * 60 + 40 and x['minute'] > 180): cout_matin += 2000 + i cout += 2000 + i elif (x['minute'] < 20 * 60 + 35 and x['minute'] > 13 * 60 + 40): cout_aprem += 2000 + i cout += 2000 + i else: cout_soir += 2000 + i cout += 2000 + i df['cout'] = df['cout'] / 2000 df.to_html('test/' + date + '_' + nom_chaine + '_' + str(i) + '.html') return ([ cout / 2000, cout_matin / 2000, cout_aprem / 2000, cout_soir / 2000 ])
def pred(file): try: PATH_OUT = get_temp_path() relecture = True EPSILON = 1e-15 f = ((file.split('.'))[0].split('_'))[2] c = ((file.split('.'))[0].split('_'))[-1] PTV, proba = def_context.load_file(str(f), str(c)) if (len(PTV) == 0): return 0 index_PTV = PTV.index[(PTV['debut'] <= 3 * 60) & ( PTV['debut'] + PTV['DUREE'] > 3 * 60 + 5)].tolist()[0] def_context.Report('Starting with: %s' % (PTV['TITRE'].iloc[index_PTV])) lastend = PTV['debut'].loc[index_PTV] currentduree = PTV['DUREE'].loc[index_PTV] newPTV = def_context.init_newPTV(PTV, str(c)) historyofpoints = def_context.init_history(str(c), PTV, lastend, currentduree) temp_context = historyofpoints.iloc[0] importantpts = def_context.get_important_points(c, PTV, index_PTV) for i in range(3): def_context.Report(str(i) + ' ' + str(c) + ' ' + str(f)) from predictPTV import main as pred l1, temp_newPTV1, temp_history1, index_PTV1, temp_context1 = pred([ str(c), str(f), i, newPTV.iloc[newPTV.shape[0] - 1], temp_context, index_PTV, importantpts ]) if (l1 > 0 and relecture): def_context.Report("Utilisation de la relecture " + str(i) + ' ' + str(c) + ' ' + str(f)) from RLPTV import main as RL l2, temp_newPTV2, temp_history2, index_PTV2, temp_context2 = RL( [ str(c), str(f), i, newPTV.iloc[newPTV.shape[0] - 1], temp_context, index_PTV, importantpts ]) if (l2 > 5): def_context.Report("Utilisation de l'arbre de décision", f, c, i) if (chaine == 'TF1'): from PTVTF1 import main as arbre1 l3, temp_newPTV3, temp_history3, index_PTV3, temp_context3 = arbre1( [ str(c), str(f), i, newPTV.loc[newPTV.shape[0] - 1], temp_context, index_PTV, importantpts ]) elif (chaine == 'M6'): from PTVM6 import main as arbre2 l3, temp_newPTV3, temp_history3, index_PTV3, temp_context3 = arbre2( [ str(c), str(f), i, newPTV.loc[newPTV.shape[0] - 1], temp_context, index_PTV, importantpts ]) else: l3 > 5 if (l3 > 0): def_context.Report("AUCUNE DÉCISION NE CONVIENT", f, c) l, temp_newPTV, temp_history, index_PTV, temp_context = l2, temp_newPTV2, temp_history2, index_PTV2, temp_context2 else: l, temp_newPTV, temp_history, index_PTV, temp_context = l3, temp_newPTV3, temp_history3, index_PTV3, temp_context3 else: l, temp_newPTV, temp_history, index_PTV, temp_context = l2, temp_newPTV2, temp_history2, index_PTV2, temp_context2 else: l, temp_newPTV, temp_history, index_PTV, temp_context = l1, temp_newPTV1, temp_history1, index_PTV1, temp_context1 if (l == 4): pass else: newPTV = pd.concat([newPTV, temp_newPTV.iloc[1:]]) historyofpoints = pd.concat([historyofpoints, temp_history]) newPTV['Heure'] = newPTV['minute'].apply( lambda x: str(int(x / 60)) + ':' + str(x % 60)) historyofpoints['Heure'] = historyofpoints['minute'].apply( lambda x: str(int(x / 60)) + ':' + str(x % 60)) newPTV.to_html(PATH_IN + 'new_ptv/new_PTV_' + str(f) + '_' + str(c) + '.html') newPTV.to_csv(PATH_IN + 'new_ptv/new_PTV_' + str(f) + '_' + str(c) + '.csv', index=False) historyofpoints.to_html(PATH_IN + 'hop/historyofpoints_' + str(f) + '_' + str(c) + '.html') historyofpoints.to_csv(PATH_IN + 'hop/historyofpoints_' + str(f) + '_' + str(c) + '.csv', index=False) newPTV.to_html(PATH_OUT + 'new_ptv/new_PTV_' + str(f) + '_' + str(c) + '.html') newPTV.to_csv(PATH_OUT + 'new_ptv/new_PTV_' + str(f) + '_' + str(c) + '.csv', index=False) except Exception as e: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] def_context.Report( "Failed to process {0} at line {2} in {3}: {1}".format( str(file), str(e), sys.exc_info()[-1].tb_lineno, fname))
def main(argv): t = time.time() if (len(argv) == 0): Report("Merci de renseigner une année et un mois (ex: 2017-12)") EPSILON = 1e-15 err = 0 m = 0 err_TF1 = 0 m_TF1 = 0 err_M6 = 0 m_M6 = 0 err_F2 = 0 m_F2 = 0 err_F3 = 0 m_F3 = 0 err_type_1 = 0 err_type_2 = 0 err_type_3 = 0 try: df = pd.read_csv('scores.csv') except Exception as e: df = pd.DataFrame() df['score TF1'] = [0] df['score M6'] = 0 df['score France 2'] = 0 df['score France 3'] = 0 df['score Total'] = 0 df['score sur la matinée'] = 0 df["score sur l'après midi"] = 0 df['score sur la soirée'] = 0 df['part de relecture'] = 0 df['temps de calcul'] = 0 df['mois'] = '55-55' df.to_csv('scores.csv', index=False) files = os.listdir(PATH_IN + 'PTV/') for file in files: def_context.Report('-------------------------------------') f = ((file.split('.'))[0].split('_'))[2] c = ((file.split('.'))[0].split('_'))[-1] if (f == '2017-12-20' or (f in ['2017-12-09', '2017-12-06', '2018-02-22'] and c == 'TF1') or (f in ['2018-02-22'] and c == 'M6') or (f.split('-')[0] != str(argv[0].split('-')[0])) or f.split('-')[1] != argv[0].split('-')[1]): def_context.Report(f) elif (c == ''): pass else: def_context.Report(c) if (c in ['M6', 'TF1']): chaine = c else: chaine = 'TF1' number, name = get_tuple(c) if (len(list(number)) < 4): number = "0" + number def_context.Report('Using PTV%s for %s' % (chaine, f)) l = os.system('python ' + PATH_SCRIPT + 'PTV' + str(chaine) + '.py ' + str(f) + ' ' + str(number)) if (l / 256 == 4): pass else: l = l / 256 err += int(l / 100) + int((l % 100) / 10) + int((l % 10)) err_type_1 += int(l / 100) err_type_2 += int((l % 100) / 10) err_type_3 += int((l % 10)) m += 3 if (c == 'M6'): err_M6 += int(l / 100) + int((l % 100) / 10) + int( (l % 10)) m_M6 += 3 if (c == 'TF1'): err_TF1 += int(l / 100) + int((l % 100) / 10) + int( (l % 10)) m_TF1 += 3 if (c == 'France 3'): err_F2 += int(l / 100) + int((l % 100) / 10) + int( (l % 10)) m_F2 += 3 if (c == 'France 3'): err_F3 += int(l / 100) + int((l % 100) / 10) + int( (l % 10)) m_F3 += 3 def_context.Report(err) def_context.Report(m) if (m == 0): def_context( "aucun fichier n'a été traité. Merci de vérifier la date et les données d'entrée." ) def_context.Report("score Total:" + str(1 - (err / (m + EPSILON)))) def_context.Report("score TF1:" + str(1 - (err_TF1 / (m_TF1 + EPSILON)))) def_context.Report("score M6:" + str(1 - (err_M6 / (m_M6 + EPSILON)))) def_context.Report("score France 2:" + str(1 - (err_F2 / (m_F2 + EPSILON)))) def_context.Report("score France 3:" + str(1 - (err_F3 / (m_F3 + EPSILON)))) def_context.Report("score sur la matinée:" + str(1 - ((err_type_1 * 3) / (m + EPSILON)))) def_context.Report("score sur l'après midi:" + str(1 - ((err_type_2 * 3) / (m + EPSILON)))) def_context.Report("score sur la soirée:" + str(1 - ((err_type_3 * 3) / (m + EPSILON)))) def_context.Report("temps de calcul:" + str(time.time() - t)) try: df = pd.read_csv('scores.csv') df.loc[df.shape[0]] = [ 1 - (err_TF1 / (m_TF1 + EPSILON)), 1 - (err_M6 / (m_M6 + EPSILON)), 1 - (err_F2 / (m_F2 + EPSILON)), 1 - (err_F3 / (m_F3 + EPSILON)), 1 - (err / (m + EPSILON)), 1 - ((err_type_1 * 3) / (m + EPSILON)), 1 - ((err_type_2 * 3) / (m + EPSILON)), 1 - ((err_type_3 * 3) / (m + EPSILON)), 0.5 * m / (m + EPSILON), time.time() - t, argv[0] ] df.to_csv('scores.csv', index=False) except Exception as e: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] def_context.Report( "Failed to process {0} at line {2} in {3}: {1}".format( '', str(e), sys.exc_info()[-1].tb_lineno, fname)) Report("fichier non conforme ou non existant: %s" % (e))
def main(argv): global PATH_IN,PATH_SCRIPT,PATH_OUT PATH_IN,PATH_SCRIPT,PATH_OUT = def_context.get_path() PATH_OUT = get_temp_path() if not os.path.exists(PATH_OUT+'model_PTV/'): os.makedirs(PATH_OUT+'model_PTV/') if(len(argv) == 0): argv = ['all'] if(argv[0] == 'test'): Y_test = pd.read_csv('results.csv').values y_pred = pd.read_csv('y_pred.csv') y_pred2 = pd.read_csv('y_pred2.csv') y_pred3 = pd.read_csv('y_pred2.csv') y_pred4 = pd.read_csv('y_pred4.csv') y_pred5 = pd.read_csv('y_pred5.csv') logreg = use_logisticreg(y_pred,y_pred2,y_pred3,y_pred4,y_pred5,Y_test) res = pd.concat([y_pred,y_pred2,y_pred3,y_pred4,y_pred5],axis=1).values res = logreg.predict_proba(res) for p1 in [0]: for p2 in [0]: def_context.Report('################### '+str(p1)+' ### '+str(p2)+'###################') def_context.Report('############XGB##############') mesure(y_pred.values,Y_test,p1,p2) mismatch(y_pred.values,Y_test,p1,p2) acc(y_pred.values,Y_test,p1,p2) def_context.Report('############CatBoost##############') mesure(y_pred2.values,Y_test,p1,p2) mismatch(y_pred2.values,Y_test,p1,p2) acc(y_pred2.values,Y_test,p1,p2) def_context.Report('############GradientBoostingClassifier##############') mesure(y_pred4.values,Y_test,p1,p2) mismatch(y_pred4.values,Y_test,p1,p2) acc(y_pred4.values,Y_test,p1,p2) def_context.Report('############RandomForestClassifier##############') mesure(y_pred5.values,Y_test,p1,p2) mismatch(y_pred5.values,Y_test,p1,p2) acc(y_pred5.values,Y_test,p1,p2) def_context.Report('############Stack##############') mesure(res,Y_test,p1,p2) mismatch(res,Y_test,p1,p2) acc(res,Y_test,p1,p2) elif(len(argv) == 1): X,Y = load_all(argv[0]) X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2) X_train = X_train.replace([np.inf, -np.inf], np.nan) X_train = X_train.fillna(0) X_test = X_test.replace([np.inf, -np.inf], np.nan) X_test = X_test.fillna(0) Y_test = [Y[0] for Y in Y_test.values] ########################################## np.random.seed(42) clf = Classifier() clf.fit(X_train,Y_train) y_pred = clf.predict_proba(X_test) clf2 = Classifier2() clf2.fit(X_train,Y_train) y_pred2 = clf2.predict_proba(X_test) dtree_model = DecisionTreeClassifier(max_depth = 10).fit(X_train,Y_train) y_pred3 = dtree_model.predict_proba(X_test) tpot = GradientBoostingClassifier(learning_rate=0.05, max_depth=10, max_features=0.75, min_samples_leaf=7, min_samples_split=16, n_estimators=500, subsample=0.9) tpot.fit(X_train,Y_train) def_context.Report(tpot.score(X_test, Y_test)) y_pred4 = tpot.predict_proba(X_test) RF_model = RandomForestClassifier(max_depth = 10).fit(X_train,Y_train) y_pred5 = RF_model.predict_proba(X_test) y_p = clf.predict_proba(X_train) y_p2 = clf2.predict_proba(X_train) y_p3 = dtree_model.predict_proba(X_train) y_p4 = tpot.predict_proba(X_train) y_p5 = RF_model.predict_proba(X_train) logreg = use_logisticreg(y_p,y_p2,y_p3,y_p4,y_p5,Y_train) ########################################## save_model_xgb(clf) save_model_cat(clf2) save_model(dtree_model,"DT") save_model(RF_model,"RF") pickle.dump(tpot, open(PATH_OUT+"model_PTV/GradientBoostingClassifier.pickle.dat", "wb")) pickle.dump(RF_model, open(PATH_OUT+"model_PTV/RandomForestClassifier.pickle.dat", "wb")) X = pd.concat([pd.DataFrame(y_pred),pd.DataFrame(y_pred2),pd.DataFrame(y_pred3),pd.DataFrame(y_pred4),pd.DataFrame(y_pred5)],axis = 1).values res = logreg.predict_proba(X) for p1,p2 in zip([0],[0]): def_context.Report('############XGB##############') mesure(y_pred,Y_test,p1,p2) mismatch(y_pred,Y_test,p1,p2) acc(y_pred,Y_test,p1,p2) def_context.Report('############CatBoost##############') mesure(y_pred2,Y_test,p1,p2) mismatch(y_pred2,Y_test,p1,p2) acc(y_pred2,Y_test,p1,p2) def_context.Report('############DecisionTreeClassifier##############') mesure(y_pred3,Y_test,p1,p2) mismatch(y_pred3,Y_test,p1,p2) acc(y_pred3,Y_test,p1,p2) def_context.Report('############GradientBoostingClassifier##############') mesure(y_pred4,Y_test,p1,p2) mismatch(y_pred4,Y_test,p1,p2) acc(y_pred4,Y_test,p1,p2) def_context.Report('############RandomForestClassifier##############') mesure(y_pred5,Y_test,p1,p2) mismatch(y_pred5,Y_test,p1,p2) acc(y_pred5,Y_test,p1,p2) def_context.Report('############Stack##############') mesure(res,Y_test,p1,p2) mismatch(res,Y_test,p1,p2) acc(res,Y_test,p1,p2) #ROC_curve(y_pred,Y_test) #ROC_curve(y_pred2,Y_test) pd.DataFrame(Y_test).to_csv('results.csv',index=False) pd.DataFrame(y_pred).to_csv('y_pred.csv',index=False) pd.DataFrame(y_pred2).to_csv('y_pred2.csv',index=False) pd.DataFrame(y_pred3).to_csv('y_pred3.csv',index=False) pd.DataFrame(y_pred4).to_csv('y_pred4.csv',index=False) pd.DataFrame(y_pred5).to_csv('y_pred5.csv',index=False) return ("process achevé sans erreures")
def Report(error): with open(LOG, 'a+') as file: file.write(str(error) + ' \n') def_context.Report(str(error))
def update_temp_path(i): datas = pd.read_csv('path.csv') datas['temp_path'] = datas['PathtoDatasOut'] + 'T' + str(i) + "/" def_context.Report('Updated Temp path to: ' + datas['PathtoDatasOut'][0] + 'T' + str(i) + "/") datas.to_csv('path.csv', index=False)
def main(argv): global PATH_IN, PATH_SCRIPT, PATH_OUT PATH_IN, PATH_SCRIPT, PATH_OUT = get_path() import pandas as pd import pickle createfile = False end = 30 t = time.time() if (len(argv) == 0): argv = ['2015'] if (argv[0] == 'start'): if (len(argv) == 1): start = 0 createfile = True else: start = int(argv[1]) if (start == 0): createfile = True if (createfile): df = pd.DataFrame() df['score TF1'] = [0] df['score M6'] = 0 df['score France 2'] = 0 df['score France 3'] = 0 df['score Total'] = 0 df['score sur la matinée'] = 0 df["score sur l'après midi"] = 0 df['score sur la soirée'] = 0 df['part de relecture'] = 0 df['temps de calcul'] = 0 df['mois'] = '55-55' df.to_csv('scores.csv', index=False) time.sleep(10) for i in range(start, end): update_temp_path(i) try: open(PATH_OUT + 'res.txt', 'w').close() def_context.Report('file cleaned') except Exception as e: pass if (createfile and i == 0): p1 = Popen(['python', PATH_SCRIPT + 'PTVall.py', '2017-12']) p2 = Popen(['python', PATH_SCRIPT + 'PTVall.py', '2018-02']) p3 = Popen(['python', PATH_SCRIPT + 'PTVall.py', '2018-03']) p1.wait() p2.wait() p3.wait() """ os.system('python '+PATH_SCRIPT+'PTVall.py 2017-12') os.system('python '+PATH_SCRIPT+'PTVall.py 2018-02') os.system('python '+PATH_SCRIPT+'PTVall.py 2018-03') """ time.sleep(60) os.system('python ' + PATH_SCRIPT + 'MLforPTV.py') time.sleep(60) def_context.Report("fin du tour " + str(i)) else: p1 = Popen( ['python', PATH_SCRIPT + 'makenewPTV.py', '2017-12']) p2 = Popen( ['python', PATH_SCRIPT + 'makenewPTV.py', '2018-02']) p3 = Popen( ['python', PATH_SCRIPT + 'makenewPTV.py', '2018-03']) p1.wait() p2.wait() p3.wait() """ os.system('python '+PATH_SCRIPT+'makenewPTV.py 2017-12') os.system('python '+PATH_SCRIPT+'makenewPTV.py 2018-02') os.system('python '+PATH_SCRIPT+'makenewPTV.py 2018-03') """ time.sleep(60) os.system('python ' + PATH_SCRIPT + 'MLforPTV.py') time.sleep(60) def_context.Report("fin du tour " + str(i)) elif (len(argv) == 1 and argv[0] != 'start'): PATH_OUT = get_temp_path() import pandas as pd import random relecture = True EPSILON = 1e-15 err = 0 m = 0 err_TF1 = 0 m_TF1 = 0 err_M6 = 0 m_M6 = 0 err_F2 = 0 m_F2 = 0 err_F3 = 0 m_F3 = 0 err_type_1 = 0 err_type_2 = 0 err_type_3 = 0 nb_rel = 0 files = os.listdir(PATH_IN + 'PTV/') nb_files = len(files) for file in files: def_context.Report('Il reste encore %s fichiers à traiter' % (nb_files)) nb_files -= 1 try: f = ((file.split('.'))[0].split('_'))[2] c = ((file.split('.'))[0].split('_'))[-1] if (f == '2017-12-20' or (f in ['2017-12-09', '2017-12-06', '2018-02-22'] and c == 'TF1') or (f in ['2018-02-22'] and c == 'M6') or (f.split('-')[0] != str(argv[0].split('-')[0])) or f.split('-')[1] != argv[0].split('-')[1]): #or (f in ['2018-02-22'] and c=='M6') pass elif (c == ''): pass else: PTV, proba = def_context.load_file(str(f), str(c)) if (len(PTV) == 0): continue index_PTV = PTV.index[(PTV['debut'] <= 3 * 60) & ( PTV['debut'] + PTV['DUREE'] > 3 * 60 + 5)].tolist()[0] def_context.Report('Starting with: %s' % (PTV['TITRE'].iloc[index_PTV])) lastend = PTV['debut'].loc[index_PTV] currentduree = PTV['DUREE'].loc[index_PTV] newPTV = def_context.init_newPTV(PTV, str(c)) historyofpoints = def_context.init_history( str(c), PTV, lastend, currentduree) temp_context = historyofpoints.iloc[0] importantpts = def_context.get_important_points( c, PTV, index_PTV) file_ = open(PATH_OUT + 'res.txt', 'a+') file_.write(str(f + ' ' + c + ':').rstrip('\n')) for i in range(3): def_context.Report( str(i) + ' ' + str(c) + ' ' + str(f)) from predictPTV import main as pred l1, temp_newPTV1, temp_history1, index_PTV1, temp_context1 = pred( [ str(c), str(f), i, newPTV.iloc[newPTV.shape[0] - 1], temp_context, index_PTV, importantpts, PATH_OUT ]) if (l1 > 0 and relecture): nb_rel += 1 def_context.Report("Utilisation de la relecture " + str(i) + ' ' + str(c) + ' ' + str(f)) from RLPTV import main as RL l2, temp_newPTV2, temp_history2, index_PTV2, temp_context2 = RL( [ str(c), str(f), i, newPTV.iloc[newPTV.shape[0] - 1], temp_context, index_PTV, importantpts, PATH_OUT ]) if (l2 > 5): def_context.Report( "Utilisation de l'arbre de décision", f, c, i) if (chaine == 'TF1'): from PTVTF1 import main as arbre elif (chaine == 'M6'): from PTVM6 import main as arbre else: l3 > 5 l3, temp_newPTV3, temp_history3, index_PTV3, temp_context3 = arbre( [ str(c), str(f), i, newPTV.loc[newPTV.shape[0] - 1], temp_context, index_PTV, importantpts ]) if (l3 > 0): def_context.Report( "AUCUNE DÉCISION NE CONVIENT", f, c) l, temp_newPTV, temp_history, index_PTV, temp_context = l2, temp_newPTV2, temp_history2, index_PTV2, temp_context2 else: l, temp_newPTV, temp_history, index_PTV, temp_context = l3, temp_newPTV3, temp_history3, index_PTV3, temp_context3 else: l, temp_newPTV, temp_history, index_PTV, temp_context = l2, temp_newPTV2, temp_history2, index_PTV2, temp_context2 else: l, temp_newPTV, temp_history, index_PTV, temp_context = l1, temp_newPTV1, temp_history1, index_PTV1, temp_context1 if (l == 4): pass else: newPTV = pd.concat([newPTV, temp_newPTV.iloc[1:]]) historyofpoints = pd.concat( [historyofpoints, temp_history]) err += l if (i == 0): err_type_1 += l if (i == 1): err_type_2 += l if (i == 2): err_type_3 += l m += 1 if (c == 'M6'): err_M6 += l m_M6 += 1 if (c == 'TF1'): err_TF1 += l m_TF1 += 1 if (c == 'France 2'): err_F2 += l m_F2 += 1 if (c == 'France 3'): err_F3 += l m_F3 += 1 file_.write(str(l).rstrip('\n')) file_.write(" ".rstrip('\n')) newPTV['Heure'] = newPTV['minute'].apply( lambda x: str(int(x / 60)) + ':' + str(x % 60)) historyofpoints['Heure'] = historyofpoints['minute'].apply( lambda x: str(int(x / 60)) + ':' + str(x % 60)) newPTV.to_html(PATH_IN + 'new_ptv/new_PTV_' + str(f) + '_' + str(c) + '.html') newPTV.to_csv(PATH_IN + 'new_ptv/new_PTV_' + str(f) + '_' + str(c) + '.csv', index=False) historyofpoints.to_html(PATH_IN + 'hop/historyofpoints_' + str(f) + '_' + str(c) + '.html') historyofpoints.to_csv(PATH_IN + 'hop/historyofpoints_' + str(f) + '_' + str(c) + '.csv', index=False) newPTV.to_html(PATH_OUT + 'new_ptv/new_PTV_' + str(f) + '_' + str(c) + '.html') newPTV.to_csv(PATH_OUT + 'new_ptv/new_PTV_' + str(f) + '_' + str(c) + '.csv', index=False) #newPTV.to_csv(PATH_OUT+'new_ptv/new_PTV_'+str(f)+'_'+str(c)+'.csv',index=False) #historyofpoints.to_html(PATH_OUT+'hop/historyofpoints_'+str(f)+'_'+str(c)+'.html') #historyofpoints.to_csv(PATH_OUT+'hop/historyofpoints_'+str(f)+'_'+str(c)+'.csv',index=False) file_.write("\n") file_.close() def_context.Report(err) except Exception as e: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] def_context.Report( "Failed to process {0} at line {2} in {3}: {1}".format( str(file), str(e), sys.exc_info()[-1].tb_lineno, fname)) def_context.Report(m) def_context.Report("score Total:" + str(1 - (err / (m + EPSILON)))) def_context.Report("score TF1:" + str(1 - (err_TF1 / (m_TF1 + EPSILON)))) def_context.Report("score M6:" + str(1 - (err_M6 / (m_M6 + EPSILON)))) def_context.Report("score France 2:" + str(1 - (err_F2 / (m_F2 + EPSILON)))) def_context.Report("score France 3:" + str(1 - (err_F3 / (m_F3 + EPSILON)))) def_context.Report("score sur la matinée:" + str(1 - ((err_type_1 * 3) / (m + EPSILON)))) def_context.Report("score sur l'après midi:" + str(1 - ((err_type_2 * 3) / (m + EPSILON)))) def_context.Report("score sur la soirée:" + str(1 - ((err_type_3 * 3) / (m + EPSILON)))) def_context.Report("temps de calcul:" + str(time.time() - t)) df = pd.read_csv('scores.csv') df.loc[df.shape[0]] = [ 1 - (err_TF1 / (m_TF1 + EPSILON)), 1 - (err_M6 / (m_M6 + EPSILON)), 1 - (err_F2 / (m_F2 + EPSILON)), 1 - (err_F3 / (m_F3 + EPSILON)), 1 - (err / (m + EPSILON)), 1 - ((err_type_1 * 3) / (m + EPSILON)), 1 - ((err_type_2 * 3) / (m + EPSILON)), 1 - ((err_type_3 * 3) / (m + EPSILON)), nb_rel / (m + EPSILON), (time.time() - t) * 3 / (m + EPSILON), argv[0] ] df.to_csv('scores.csv', index=False) elif (len(argv) == 2): PATH_OUT = get_temp_path() relecture = True import pandas as pd c = argv[0] f = argv[1] PTV, proba = def_context.load_file(str(f), str(c)) index_PTV = PTV.index[(PTV['debut'] <= 3 * 60) & ( PTV['debut'] + PTV['DUREE'] > 3 * 60 + 5)].tolist()[0] def_context.Report('Starting with: %s' % (PTV['TITRE'].iloc[index_PTV])) lastend = PTV['debut'].loc[index_PTV] currentduree = PTV['DUREE'].loc[index_PTV] if (len(PTV) == 0): return ("Fichier manquant") newPTV = def_context.init_newPTV(PTV, str(c)) historyofpoints = def_context.init_history(str(c), PTV, lastend, currentduree) index_PTV = PTV.index[(PTV['debut'] <= 3 * 60) & ( PTV['debut'] + PTV['DUREE'] > 3 * 60)].tolist()[0] temp_context = historyofpoints.iloc[0] importantpts = def_context.get_important_points(c, PTV, index_PTV) help = def_context.get_help(c, PTV) print(help) for i in range(3): def_context.Report(str(i) + ' ' + str(c) + ' ' + str(f)) from predictPTV import main as pred l1, temp_newPTV1, temp_history1, index_PTV1, temp_context1 = pred([ str(c), str(f), i, newPTV.iloc[newPTV.shape[0] - 1], temp_context, index_PTV, importantpts, PATH_OUT ]) if (l1 > 0 and relecture): def_context.Report("Utilisation de la relecture " + str(i) + ' ' + str(c) + ' ' + str(f)) from RLPTV import main as RL l2, temp_newPTV2, temp_history2, index_PTV2, temp_context2 = RL( [ str(c), str(f), i, newPTV.iloc[newPTV.shape[0] - 1], temp_context, index_PTV, importantpts, PATH_OUT ]) if (l2 > 5): def_context.Report("Utilisation de l'arbre de décision", f, c, i) if (chaine == 'TF1'): from PTVTF1 import main as arbre elif (chaine == 'M6'): from PTVM6 import main as arbre else: l3 > 5 l3, temp_newPTV3, temp_history3, index_PTV3, temp_context3 = arbre( [ str(c), str(f), i, newPTV.loc[newPTV.shape[0] - 1], temp_context, index_PTV, importantpts ]) if (l3 > 0): def_context.Report("AUCUNE DÉCISION NE CONVIENT", f, c) l, temp_newPTV, temp_history, index_PTV, temp_context = l2, temp_newPTV2, temp_history2, index_PTV2, temp_context2 else: l, temp_newPTV, temp_history, index_PTV, temp_context = l3, temp_newPTV3, temp_history3, index_PTV3, temp_context3 else: l, temp_newPTV, temp_history, index_PTV, temp_context = l2, temp_newPTV2, temp_history2, index_PTV2, temp_context2 else: l, temp_newPTV, temp_history, index_PTV, temp_context = l1, temp_newPTV1, temp_history1, index_PTV1, temp_context1 if (l == 4): pass else: newPTV = pd.concat([newPTV, temp_newPTV.iloc[1:]]) historyofpoints = pd.concat([historyofpoints, temp_history]) newPTV['Heure'] = newPTV['minute'].apply( lambda x: str(int(x / 60)) + ':' + str(x % 60)) historyofpoints['Heure'] = historyofpoints['minute'].apply( lambda x: str(int(x / 60)) + ':' + str(x % 60)) newPTV.to_html(PATH_IN + 'new_ptv/new_PTV_' + str(f) + '_' + str(c) + '.html') newPTV.to_csv(PATH_IN + 'new_ptv/new_PTV_' + str(f) + '_' + str(c) + '.csv', index=False) historyofpoints.to_html(PATH_IN + 'hop/historyofpoints_' + str(f) + '_' + str(c) + '.html') historyofpoints.to_csv(PATH_IN + 'hop/historyofpoints_' + str(f) + '_' + str(c) + '.csv', index=False)
def make_newPTV(PTV, proba, chaine, index, lastPTV, lastcontext, index_PTV, importantpts, date, path): #Initialisation des Variables verbose = False index_PTV = index_PTV ########################## Predictiontimer = 200 Pubinhour = lastcontext[12] lastCP = lastcontext[8] lastPub = lastcontext[9] lastend = lastcontext[10] currentduree = lastcontext[11] planifiedend = lastcontext[10] + lastcontext[11] begin = True nbpub = 0 Recall = 1 wait = 4 error = 0 per = 1 index_ipts = index importantpts = importantpts help = def_context.get_help(chaine, PTV) newPTV = def_context.init_newPTV(PTV, chaine) historyofpoints = def_context.init_history(chaine, PTV, lastend, currentduree) #################################### historyofpoints.loc[0] = lastcontext labels = [0] start = lastcontext[0] + 1 end = importantpts[index][0] #########init Classifier############# XGB, CatBoost, rf, dt, gb, logistic = def_context.load_models(path) #################################### for i in tqdm(range(start, min(end + 5, 1620))): if (i == end + 5 and index == 2): newPTV.loc[newPTV.shape[0]] = [(i + currentduree) % 1440, PTV['TITRE'].iloc[index_PTV], 'non', 1, "fin d'un programme"] #Update time of commercials (Reset) if (i % 60 == 0): Pubinhour = 0 #Update timmers lastPub += 1 lastCP += 1 if (index_ipts == len(importantpts)): index_ipts -= 1 #let's get the context: context = def_context.get_context(i, PTV.iloc[index_PTV], lastCP, lastPub, lastend, currentduree, planifiedend, Pubinhour, proba, nbpub, chaine, per, PTV, index_PTV, date) #Sur M6 il y a 16 minutes de pub entre deux films!!!!!!!!!!!!.....!!!!!!!....!!.!.!.!.!....!.!...!..!.!.!.! if (PTV['GENRESIMPLE'].iloc[index_PTV].split(' ')[0] == PTV['GENRESIMPLE'].iloc[index_PTV - 1].split(' ')[0] and PTV['GENRESIMPLE'].iloc[index_PTV].split(' ')[0] == 'Téléfilm' and (i - lastend) < 2 and Recall > 0 and per < 0.97 and chaine == 'M6'): lastend = i + 5 lastPub = -25 Recall -= 0.5 elif ((i - lastend) < 2 and Recall > 0 and per < 0.97 and chaine == 'M6' and 15 * 60 < i < 16 * 60): lastend = i + 5 lastPub = -25 Recall -= 0.5 ###### Let's verify that the algo is not doing a crappy predicitions and if this the case, clean his historic ##### elif (i == importantpts[index_ipts][0]): if (3 * 60 < i < 22 * 60): #### we are at an important point, let's now see what the algo has predict if (PTV['TITRE'].iloc[index_PTV] == importantpts[index_ipts][1] ): #Well he doesn't have the programme wrong, that's a good start #let's now find out if we are at a logical point of the programme if (i - lastend > 13): #Wellllll, the programme began way too early...something went wrong before...Let's rest for now, we'll correct the algo later Predictiontimer = 200 Pubinhour = 0 lastCP = 0 lastPub = 0 lastend = i currentduree = PTV['DUREE'].iloc[index_PTV] planifiedend = (lastend + currentduree) nbpub = 0 if (index_ipts == 0): def_context.Report("erreur sur la matinée") elif (index_ipts == 1): def_context.Report("erreur sur l'après midi") else: def_context.Report("erreur sur la soirée") error += 1 #we can now keep going throw the process like before #we just add a line to the history to say that a reset occured newPTV.loc[newPTV.shape[0]] = [ i % 1440, PTV['TITRE'].iloc[index_PTV], 'non', context[3], "--HARD RESET OF ALGORITHM--(in programme)" ] index_ipts += 1 else: # OMG the ALGO IS RIGHT...here is a candy, let's rest a litle just in case...we never know.... Predictiontimer = 200 Pubinhour = 0 lastCP = 0 lastPub = 0 lastend = i currentduree = PTV['DUREE'].iloc[index_PTV] planifiedend = (lastend + currentduree) nbpub = 0 #we can now keep going throw the process like before #we just add a line to the history to say that a reset occured newPTV.loc[newPTV.shape[0]] = [ i % 1440, PTV['TITRE'].iloc[index_PTV], 'non', context[3], "--soft reset to avoid any error--" ] index_ipts += 1 else: #maybe it's the next programme so calme the f**k down! if (PTV['TITRE'].iloc[(index_PTV + 1) % PTV.shape[0]] == importantpts[index_ipts][1]): if (planifiedend - i < 10): #here you go, it's the next one...just terminate this one and we're good to go newPTV.loc[newPTV.shape[0]] = [ i % 1440, PTV['TITRE'].iloc[index_PTV], 'oui', context[3], "fin d'un programme" ] lastend = i lastCP = 0 index_PTV += 1 index_PTV = index_PTV % (PTV.shape[0]) currentduree = PTV['DUREE'].iloc[index_PTV] planifiedend = (lastend + currentduree) Predictiontimer = 200 nbpub = 0 index_ipts += 1 else: #here you go, it's the next one...But it's far far away newPTV.loc[newPTV.shape[0]] = [ i % 1440, PTV['TITRE'].iloc[index_PTV], 'oui', context[3], "--HARD RESET OF ALGORITHM--(Oups)" ] lastend = i lastCP = 0 index_PTV += 1 index_PTV = index_PTV % (PTV.shape[0]) currentduree = PTV['DUREE'].iloc[index_PTV] planifiedend = (lastend + currentduree) Predictiontimer = 200 nbpub = 0 if (index_ipts == 0): def_context.Report("erreur sur la matinée") elif (index_ipts == 1): def_context.Report("erreur sur l'après midi") else: def_context.Report("erreur sur la soirée") error += 1 index_ipts += 1 else: #well the programme is wrong, and we are not even close to it, let's terminate this thing before it goes completly south. REBOOT The algo, erase the memory, just like in Westworld. #BUT FIRST LET'S VERIFY THAT THERE IS INDEED AN IMPORTANT PROGRAMME THAT DAY...Don't go f**k everything up for no reason l = PTV.index[ (PTV['TITRE'] == importantpts[index_ipts][1]) & (PTV['debut'] == i)].tolist() if (len(l) > 0): index_PTV = l[0] ########################## Predictiontimer = 200 Pubinhour = 0 lastCP = 0 lastPub = 0 lastend = i currentduree = PTV['DUREE'].iloc[index_PTV] planifiedend = (lastend + currentduree) nbpub = 0 #we can now keep going throw the process like before #we just add a line to the history to say that a reset occured if (index_ipts == 0): def_context.Report("erreur sur la matinée") elif (index_ipts == 1): def_context.Report("erreur sur l'après midi") else: def_context.Report("erreur sur la soirée") error += 1 newPTV.loc[newPTV.shape[0]] = [ i % 1440, PTV['TITRE'].iloc[index_PTV], 'non', context[3], "--HARD RESET OF ALGORITHM--(out of programme)" ] index_ipts += 1 else: index_ipts += 1 else: #### we are at an important point, let's now see what the algo has predict if (PTV['TITRE'].iloc[index_PTV] == importantpts[index_ipts][1] ): #Well he doesn't have the programme wrong, that's a good start #let's now find out if we are at a logical point of the programme if (i - lastend > 20): #Wellllll, the programme began way too early...something went wrong before...Let's rest for now, we'll correct the algo later Predictiontimer = 200 Pubinhour = 0 lastCP = 0 lastPub = 0 lastend = i currentduree = PTV['DUREE'].iloc[index_PTV] planifiedend = (lastend + currentduree) nbpub = 0 if (index_ipts == 0): def_context.Report("erreur sur la matinée") elif (index_ipts == 1): def_context.Report("erreur sur l'après midi") else: def_context.Report("erreur sur la soirée") error += 1 #we can now keep going throw the process like before #we just add a line to the history to say that a reset occured newPTV.loc[newPTV.shape[0]] = [ i % 1440, PTV['TITRE'].iloc[index_PTV], 'non', context[3], "--HARD RESET OF ALGORITHM--(in programme)" ] index_ipts += 1 else: # OMG the ALGO IS RIGHT...here is a candy, let's rest a litle just in case...we never know.... Predictiontimer = 200 Pubinhour = 0 lastCP = 0 lastPub = 0 lastend = i currentduree = PTV['DUREE'].iloc[index_PTV] planifiedend = (lastend + currentduree) nbpub = 0 #we can now keep going throw the process like before #we just add a line to the history to say that a reset occured newPTV.loc[newPTV.shape[0]] = [ i % 1440, PTV['TITRE'].iloc[index_PTV], 'non', context[3], "--soft reset to avoid any error--" ] index_ipts += 1 else: #maybe it's the next programme so calme the f**k down! if (PTV['TITRE'].iloc[(index_PTV + 1) % PTV.shape[0]] == importantpts[index_ipts][1]): if (planifiedend - i < 20): #here you go, it's the next one...just terminate this one and we're good to go newPTV.loc[newPTV.shape[0]] = [ i % 1440, PTV['TITRE'].iloc[index_PTV], 'oui', context[3], "fin d'un programme" ] lastend = i lastCP = 0 index_PTV += 1 index_PTV = index_PTV % (PTV.shape[0]) currentduree = PTV['DUREE'].iloc[index_PTV] planifiedend = (lastend + currentduree) Predictiontimer = 200 nbpub = 0 index_ipts += 1 else: #here you go, it's the next one...But it's far far away newPTV.loc[newPTV.shape[0]] = [ i % 1440, PTV['TITRE'].iloc[index_PTV], 'oui', context[3], "--HARD RESET OF ALGORITHM--(Oups)" ] lastend = i lastCP = 0 index_PTV += 1 index_PTV = index_PTV % (PTV.shape[0]) currentduree = PTV['DUREE'].iloc[index_PTV] planifiedend = (lastend + currentduree) Predictiontimer = 200 nbpub = 0 if (index_ipts == 0): def_context.Report("erreur sur la matinée") elif (index_ipts == 1): def_context.Report("erreur sur l'après midi") else: def_context.Report("erreur sur la soirée") error += 1 index_ipts += 1 else: #well the programme is wrong, and we are not even close to it, let's terminate this thing before it goes completly south. REBOOT The algo, erase the memory, just like in Westworld. #BUT FIRST LET'S VERIFY THAT THERE IS INDEED AN IMPORTANT PROGRAMME THAT DAY...Don't go f**k everything up for no reason l = PTV.index[ (PTV['TITRE'] == importantpts[index_ipts][1]) & (PTV['debut'] == i)].tolist() if (len(l) > 0): index_PTV = l[0] ########################## Predictiontimer = 200 Pubinhour = 0 lastCP = 0 lastPub = 0 lastend = i currentduree = PTV['DUREE'].iloc[index_PTV] planifiedend = (lastend + currentduree) nbpub = 0 #we can now keep going throw the process like before #we just add a line to the history to say that a reset occured if (index_ipts == 0): def_context.Report("erreur sur la matinée") elif (index_ipts == 1): def_context.Report("erreur sur l'après midi") else: def_context.Report("erreur sur la soirée") error += 1 newPTV.loc[newPTV.shape[0]] = [ i % 1440, PTV['TITRE'].iloc[index_PTV], 'non', context[3], "--HARD RESET OF ALGORITHM--(out of programme)" ] index_ipts += 1 else: index_ipts += 1 if (context[2]): historyofpoints.loc[historyofpoints.shape[0]] = context if (lastCP < min(4, currentduree)): labels.append(0) continue else: X = def_context.process( pd.DataFrame( [context], index=[0], columns=[ 'minute', 'partie de la journée', 'Change Point', 'pourcentage', 'partie du programme', 'programme', 'duree', 'nombre de pub potentiel', 'lastCP', 'lastPub', 'lastend', 'currentduree', 'Pubinhour', 'probability of CP', 'nb de pubs encore possible', 'chaine', 'CLE-FORMAT', 'CLE-GENRE', 'day', 'part' ])).values #,'per' res1 = CatBoost[0].predict_proba(X) res2 = CatBoost[1].predict_proba(X) res3 = XGB[0].predict(xgb.DMatrix(X), ntree_limit=XGB[0].best_ntree_limit) res4 = XGB[1].predict(xgb.DMatrix(X), ntree_limit=XGB[1].best_ntree_limit) res5 = rf.predict_proba(X) res6 = gb.predict_proba(X) res7 = dt.predict_proba(X) res = [(res1[0][0] + res2[0][0] + res3[0][0] + res4[0][0] + res5[0][0] + res6[0][0]) / 6, (res1[0][1] + res2[0][1] + res3[0][1] + res4[0][1] + res5[0][1] + res6[0][1]) / 6, (res1[0][2] + res2[0][2] + res3[0][2] + res4[0][2] + res5[0][2] + res6[0][2]) / 6] y_pred = [(res1[0][0] + res2[0][0]) * 0.5, (res1[0][1] + res2[0][1]) * 0.5, (res1[0][2] + res2[0][2]) * 0.5] y_pred2 = [(res3[0][0] + res4[0][0]) * 0.5, (res3[0][1] + res4[0][1]) * 0.5, (res3[0][2] + res4[0][2]) * 0.5] X = pd.concat([ pd.DataFrame(y_pred).T, pd.DataFrame(y_pred2).T, pd.DataFrame(res7), pd.DataFrame(res6), pd.DataFrame(res5) ], axis=1) X = X.replace([np.inf, -np.inf], np.nan) X = X.fillna(1) X = X.values res = logistic.predict_proba(X) cla = np.argmax(res) if (cla == 1 and context[14] == 0): cla = 0 if (cla == 2 and context[3] < 0.5 and context[11] > 30): cla = 0 if (cla == 2 and context[3] < 0.9 and context[11] >= 180): cla = 0 if (cla == 2 and PTV['TITRE'].loc[index_PTV] == 'Programmes de la nuit' and context[3] < 1): cla = 0 if (cla == 1): newPTV.loc[newPTV.shape[0]] = [ i % 1440, "publicité", 'oui', context[3], "publicité dans un programme" ] lastCP = 0 lastPub = 0 Pubinhour += 4 nbpub += 1 wait = 4 labels.append(1) elif (cla == 2): newPTV.loc[newPTV.shape[0]] = [ i % 1440, PTV['TITRE'].iloc[index_PTV], 'oui', context[3], "fin d'un programme" ] lastend = i lastCP = 0 index_PTV += 1 index_PTV = index_PTV % (PTV.shape[0]) currentduree = PTV['DUREE'].iloc[index_PTV] planifiedend = (lastend + currentduree) Predictiontimer = 200 nbpub = 0 wait = 5 per = context[3] labels.append(2) else: labels.append(0) elif (i in help): historyofpoints.loc[historyofpoints.shape[0]] = context if (lastCP < min(4, currentduree)): labels.append(0) continue else: X = def_context.process( pd.DataFrame( [context], index=[0], columns=[ 'minute', 'partie de la journée', 'Change Point', 'pourcentage', 'partie du programme', 'programme', 'duree', 'nombre de pub potentiel', 'lastCP', 'lastPub', 'lastend', 'currentduree', 'Pubinhour', 'probability of CP', 'nb de pubs encore possible', 'chaine', 'CLE-FORMAT', 'CLE-GENRE', 'day', 'part' ])).values #,'per' res1 = CatBoost[0].predict_proba(X) res2 = CatBoost[1].predict_proba(X) res3 = XGB[0].predict(xgb.DMatrix(X), ntree_limit=XGB[0].best_ntree_limit) res4 = XGB[1].predict(xgb.DMatrix(X), ntree_limit=XGB[1].best_ntree_limit) res5 = rf.predict_proba(X) res6 = gb.predict_proba(X) res7 = dt.predict_proba(X) res = [(res1[0][0] + res2[0][0] + res3[0][0] + res4[0][0] + res5[0][0] + res6[0][0]) / 6, (res1[0][1] + res2[0][1] + res3[0][1] + res4[0][1] + res5[0][1] + res6[0][1]) / 6, (res1[0][2] + res2[0][2] + res3[0][2] + res4[0][2] + res5[0][2] + res6[0][2]) / 6] y_pred = [(res1[0][0] + res2[0][0]) * 0.5, (res1[0][1] + res2[0][1]) * 0.5, (res1[0][2] + res2[0][2]) * 0.5] y_pred2 = [(res3[0][0] + res4[0][0]) * 0.5, (res3[0][1] + res4[0][1]) * 0.5, (res3[0][2] + res4[0][2]) * 0.5] X = pd.concat([ pd.DataFrame(y_pred).T, pd.DataFrame(y_pred2).T, pd.DataFrame(res7), pd.DataFrame(res6), pd.DataFrame(res5) ], axis=1) X = X.replace([np.inf, -np.inf], np.nan) X = X.fillna(1) X = X.values res = logistic.predict_proba(X) cla = np.argmax(res) if (cla == 1 and context[14] == 0): cla = 0 if (cla == 2 and context[3] < 0.5 and context[11] > 30): cla = 0 if (cla == 2 and context[3] < 0.9 and context[11] >= 180): cla = 0 if (cla == 2 and PTV['TITRE'].loc[index_PTV] == 'Programmes de la nuit' and context[3] < 1): cla = 0 if (cla == 1): newPTV.loc[newPTV.shape[0]] = [ i % 1440, "publicité", 'oui', context[3], "publicité dans un programme" ] lastCP = 0 lastPub = 0 Pubinhour += 4 nbpub += 1 wait = 4 labels.append(1) elif (cla == 2): newPTV.loc[newPTV.shape[0]] = [ i % 1440, PTV['TITRE'].iloc[index_PTV], 'oui', context[3], "fin d'un programme" ] lastend = i lastCP = 0 index_PTV += 1 index_PTV = index_PTV % (PTV.shape[0]) currentduree = PTV['DUREE'].iloc[index_PTV] planifiedend = (lastend + currentduree) Predictiontimer = 200 nbpub = 0 wait = 5 per = context[3] labels.append(2) else: labels.append(0) else: #labels.append(0) #Not a Change Point, we'll just check that nothing is wrong in the PTV at this time if (Predictiontimer <= 0): historyofpoints.loc[historyofpoints.shape[0]] = context labels.append(2) newPTV.loc[newPTV.shape[0]] = [ i % 1440, PTV['TITRE'].iloc[index_PTV], 'non', context[3], "fin non détectée d'un programme" ] lastend = i lastCP = 0 index_PTV += 1 index_PTV = index_PTV % (PTV.shape[0]) currentduree = PTV['DUREE'].iloc[index_PTV] planifiedend = (lastend + currentduree) Predictiontimer = 200 nbpub = 0 per = context[3] elif (context[3] == 1): #Dépassement autorisé: Modulable en fonction de la position dans la journée si besoin # C'est sur ces valeurs que l'on va jouer pour avoir le meilleur PTV possible # Plus les valeurs sont grandes, plus on fait confiance a l'algo # Il est important de bien découper la journée celon les périodes horaires que l'on qualifie # de "sous tension" si plusieurs programmes courts se succédent. Bien évidement une telle analyse sera #plus tard fait automatiquement. if (i < 20 * 60 + 30): if (chaine == 'TF1'): if (11.5 * 60 <= i <= 14 * 60 or 19.5 * 60 < i < 21 * 60): Predictiontimer = 1 elif (context[6] == "très court"): Predictiontimer = 0 elif (PTV['TITRE'].iloc[index_PTV] == 'Téléshopping'): Predictiontimer = 5 elif (context[6] == "court"): Predictiontimer = 5 elif (context[6] == "moyen"): Predictiontimer = 5 elif (context[6] == "très long" or context[6] == "long"): Predictiontimer = 15 else: Predictiontimer = 5 elif (chaine == 'M6'): #Dépassement autorisé: Modulable en fonction de la position dans la journée si besoin #Dépassement autorisé: Modulable en fonction de la position dans la journée si besoin if (i < 8 * 60 + 56): Predictiontimer = 0 elif (13 * 60 < i < 14 * 60): Predictiontimer = 5 elif (PTV['TITRE'].iloc[index_PTV] in ['M6 boutique']): Predictiontimer = 0 elif (context[6] == "très court"): Predictiontimer = 0 elif (context[6] == "court"): Predictiontimer = 2 elif (context[6] == "moyen"): Predictiontimer = 5 elif (context[6] == "très long"): Predictiontimer = 5 elif (context[6] == 'long'): Predictiontimer = 15 else: Predictiontimer = 5 else: if (11.5 * 60 <= i <= 14 * 60 or 19.5 * 60 < i < 21 * 60): Predictiontimer = 1 elif (context[6] == "très court"): Predictiontimer = 0 elif (PTV['TITRE'].iloc[index_PTV] == 'Téléshopping'): Predictiontimer = 5 elif (context[6] == "court"): Predictiontimer = 5 elif (context[6] == "moyen"): Predictiontimer = 5 elif (context[6] == "très long" or context[6] == "long"): Predictiontimer = 15 else: Predictiontimer = 5 else: if (chaine == 'TF1'): if (context[5] == 'Journal'): if (i < 20 * 60): Predictiontimer = 10 else: Predictiontimer = 0 elif (context[6] == "très court"): Predictiontimer = 4 elif (context[6] == "court"): Predictiontimer = 5 elif (context[6] == "moyen"): Predictiontimer = 5 elif (context[6] == "très long"): Predictiontimer = 5 elif (context[6] == 'long'): Predictiontimer = 15 else: Predictiontimer = 5 elif (chaine == 'M6'): #Dépassement autorisé: Modulable en fonction de la position dans la journée si besoin #Dépassement autorisé: Modulable en fonction de la position dans la journée si besoin if (context[6] == "très court"): Predictiontimer = 15 elif (context[6] == "court"): Predictiontimer = 15 elif (context[6] == "moyen"): Predictiontimer = 15 elif (context[6] == "très long"): Predictiontimer = 15 elif (context[6] == 'long'): Predictiontimer = 15 else: Predictiontimer = 5 else: if (context[5] == 'Journal'): if (i < 20 * 60): Predictiontimer = 10 else: Predictiontimer = 0 elif (context[6] == "très court"): Predictiontimer = 4 elif (context[6] == "court"): Predictiontimer = 5 elif (context[6] == "moyen"): Predictiontimer = 5 elif (context[6] == "très long"): Predictiontimer = 5 elif (context[6] == 'long'): Predictiontimer = 15 else: Predictiontimer = 5 elif (context[3] > 1): Predictiontimer -= 1 else: pass return newPTV, historyofpoints, labels, error, index_PTV, context
def main(argv): global PATH_IN, PATH_SCRIPT, PATH_OUT PATH_IN, PATH_SCRIPT, PATH_OUT = def_context.get_path() if (len(argv) == 2): chaine = argv[0] date = argv[1] numero, nom_chaine = def_context.get_tuple(chaine) res = [] files_ = os.listdir(PATH_OUT) for i in range(len(files_)): res.append(find_cost(date, numero, nom_chaine, i)) def_context.Report(res) LOCK.acquire() try: couts = pd.read_csv('cout.csv') except Exception as e: couts = pd.DataFrame() try: couts[date + '_' + nom_chaine + '_tout'] = [i[0] for i in res] couts[date + '_' + nom_chaine + '_matinee'] = [i[1] for i in res] couts[date + '_' + nom_chaine + '_apresmidi'] = [i[2] for i in res] couts[date + '_' + nom_chaine + '_soiree'] = [i[3] for i in res] except Exception as e: def_context.Report('humm: ' + str(e)) couts.to_csv('cout.csv', index=False) LOCK.release() elif (len(argv) == 1): Processes = [] files = os.listdir(PATH_IN + 'PTV/') for file in files: date = file.split('_')[2] chaine = file.split('_')[-1].split('.')[0] print(type(date), type(chaine), date, chaine) if (str(argv[0]) in [str(date), str(chaine)]): print(argv[0]) while (len(Processes) >= MAX_PROCESSES): lenp = len(Processes) for p in range( lenp): # Check the processes in reverse order if Processes[enp - 1 - p].poll( ) is not None: # If the process hasn't finished will return None del Processes[ lenp - 1 - p] # Remove from list - this is why we needed reverse order time.sleep(5) Processes.append( Popen(['python', 'cost.py', str(chaine), str(date)])) def_context.Report( 'calcul des coûts pour la journée du %s sur la chaîne %s' % (date, chaine)) else: continue else: t = time.time() Processes = [] files = os.listdir(PATH_IN + 'PTV/') for file in files: print(len(Processes)) date = file.split('_')[2] chaine = file.split('_')[-1].split('.')[0] while (len(Processes) >= 5): time.sleep(5) for p in range(len( Processes)): # Check the processes in reverse order lenp = len(Processes) for p in range( lenp): # Check the processes in reverse order if Processes[enp - 1 - p].poll( ) is not None: # If the process hasn't finished will return None del Processes[ lenp - 1 - p] # Remove from list - this is why we needed reverse order time.sleep(5) Processes.append( Popen(['python', 'cost.py', str(chaine), str(date)])) def_context.Report( 'calcul des coûts pour la journée du %s sur la chaîne %s' % (date, chaine)) time.sleep(2) while (len(Processes)): lenp = len(Processes) for p in range(lenp): # Check the processes in reverse order if Processes[enp - 1 - p].poll( ) is not None: # If the process hasn't finished will return None del Processes[ lenp - 1 - p] # Remove from list - this is why we needed reverse order time.sleep(5) def_context.Report(len(files)) def_context.Report(time.time() - t)