Beispiel #1
0
def score(tp,fp,fn,epsilon=10**-5):
    beta = 2
    p = tp/(tp+fp+epsilon)
    r = tp/(tp+fn+epsilon)
    beta_squared = beta ** 2
    f = (beta_squared + 1) * (p * r) / (beta_squared * p + r+epsilon)

    def_context.Report("|| precison: "+str(p)+"|| recall: "+str(r)+"|| fbeta: "+str(f))
    def_context.Report('--------------------------------------------------')
Beispiel #2
0
def mesure(y_score,y_test,p1=0.5,p2=0.5):
    y = get_label(y_score,p1,p2)
    TP1,FP1,FN1 = mesure_class(y,y_test,0)
    TP2,FP2,FN2 = mesure_class(y,y_test,1)
    TP3,FP3,FN3 = mesure_class(y,y_test,2)
    def_context.Report("pour la classe 0")
    score(TP1,FP1,FN1)
    def_context.Report("pour la classe 1")
    score(TP2,FP2,FN2)
    def_context.Report("pour la classe 2")
    score(TP3,FP3,FN3)
Beispiel #3
0
def load(fileX):
    df = pd.read_csv(PATH_IN+'hop/'+fileX)
    if('labels' not in df.columns.values):
        def_context.Report('Pas de labels pour le fichier '+str(fileX))
    y = df['labels']
    y = y.fillna(0)
    return df.drop(['labels'],axis=1),y
Beispiel #4
0
def acc(y_score,y_test,p1=0.5,p2=0.5):
    res = 0
    y = get_label(y_score,p1,p2)
    for i in range(len(y)):
        if(y[i] == y_test[i]):
            res+=1
        else:
            pass
    def_context.Report("accuracy: "+str(res/len(y)))
Beispiel #5
0
def pred(file,numb_folder):
    relecture = True
    EPSILON = 1e-15
    f = ((file.split('.'))[0].split('_'))[2]
    c = ((file.split('.'))[0].split('_'))[-1]
    try:
        df = pd.read_csv(PATH_OUT+'T'+str(numb_folder)+'/new_ptv/new_PTV_'+str(f)+'_'+str(c)+'.csv')
        def_context.Report("file %s already exists. I won't do it again"%(PATH_OUT+'T'+str(numb_folder)+'/new_ptv/new_PTV_'+str(f)+'_'+str(c)+'.csv'))
    except Exception as e:
        try:
            def_context.Report(str(f)+"-"+str(c))
            PTV,proba = def_context.load_file(str(f),str(c))
            if(len(PTV) == 0):
                return 0
            index_PTV = PTV.index[(PTV['debut'] <= 3*60) & (PTV['debut']+PTV['DUREE'] > 3*60+5)].tolist()[0]
            def_context.Report('Starting with: %s'%(PTV['TITRE'].iloc[index_PTV]))
            lastend = PTV['debut'].loc[index_PTV]
            currentduree = PTV['DUREE'].loc[index_PTV]
            newPTV = def_context.init_newPTV(PTV,str(c))
            historyofpoints = def_context.init_history(str(c),PTV,lastend,currentduree)
            temp_context = historyofpoints.iloc[0]
            importantpts = def_context.get_important_points(c,PTV,index_PTV)
            #{#{#{#{#{#{#{#{{{{{{{{{{{{#############}}}}}}}}}}}}}}}}}}}
            if(numb_folder == '0'):
                if(c == 'TF1'):
                    from PTVTF1 import main as arbre1
                    l,temp_newPTV,temp_history,index_PTV,temp_context = arbre1([str(f),str(c)])
                else:
                    from PTVM6 import main as arbre2
                    l,temp_newPTV,temp_history,index_PTV,temp_context = arbre2([str(f),str(c)])

            else:
                for i in range(3):
                    def_context.Report(str(i)+' '+str(c)+' '+str(f))
                    from predictPTV import main as pred1
                    l1,temp_newPTV1,temp_history1,index_PTV1,temp_context1 = pred1([str(c),str(f),i,newPTV.iloc[newPTV.shape[0]-1],temp_context,index_PTV,importantpts,PATH_OUT+'T'+str(numb_folder)+'/'])
                    if(l1>0 and relecture):
                        def_context.Report("Utilisation de la relecture "+str(i)+' '+str(c)+' '+str(f))
                        from RLPTV import main as RL
                        l,temp_newPTV,temp_history,index_PTV,temp_context = RL([str(c),str(f),i,newPTV.iloc[newPTV.shape[0]-1],temp_context,index_PTV,importantpts,PATH_OUT+'T'+str(numb_folder)+'/'])
                    else:
                        l,temp_newPTV,temp_history,index_PTV,temp_context =l1,temp_newPTV1,temp_history1,index_PTV1,temp_context1
                    if(l == 4):
                        pass
                    else:
                        newPTV = pd.concat([newPTV,temp_newPTV.iloc[1:]])
                        historyofpoints = pd.concat([historyofpoints,temp_history])

            newPTV['Heure'] = newPTV['minute'].apply(lambda x: str(int(x/60))+':'+str(x%60))
            historyofpoints['Heure'] = historyofpoints['minute'].apply(lambda x: str(int(x/60))+':'+str(x%60))
            newPTV.to_html(PATH_IN+'new_ptv/new_PTV_'+str(f)+'_'+str(c)+'.html')
            newPTV.to_csv(PATH_IN+'new_ptv/new_PTV_'+str(f)+'_'+str(c)+'.csv',index=False)
            historyofpoints.to_html(PATH_IN+'hop/historyofpoints_'+str(f)+'_'+str(c)+'.html')
            historyofpoints.to_csv(PATH_IN+'hop/historyofpoints_'+str(f)+'_'+str(c)+'.csv',index=False)
            newPTV.to_html(PATH_OUT+'T'+str(numb_folder)+'/new_ptv/new_PTV_'+str(f)+'_'+str(c)+'.html')
            newPTV.to_csv(PATH_OUT+'T'+str(numb_folder)+'/new_ptv/new_PTV_'+str(f)+'_'+str(c)+'.csv',index=False)
        except Exception as e:
            exc_type, exc_obj, exc_tb = sys.exc_info()
            fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
            def_context.Report("Failed to process {0} at line {2} in {3}: {1}".format(str(file), str(e),sys.exc_info()[-1].tb_lineno,fname))
Beispiel #6
0
def mismatch(y_score,y_test,p1=0.5,p2=0.5):
    y = get_label(y_score,p1,p2)
    FP = 0
    FF = 0
    for i in range(len(y)):
        if(y[i]==1):
            if(y_test[i]==2):
                FP += 1
            else:
                pass
        if(y[i]==2):
            if(y_test[i]==1):
                FF += 1
            else:
                pass
        else:
            pass
    def_context.Report("fausses publicités")
    def_context.Report(FP)
    def_context.Report("fausses fins")
    def_context.Report(FF)
    return 0
Beispiel #7
0
def get_tuple(argv):
    df = pd.read_csv('Equivalence.csv', sep=';')
    try:
        argv = int(argv)
        key = 'id_unique'
    except Exception:
        key = 'nom_chaine'
    try:
        return str(df[df[key] == argv]['id_unique'].values[0]), str(
            df[df[key] == argv]['nom_chaine'].values[0])
    except Exception as e:
        exc_type, exc_obj, exc_tb = sys.exc_info()
        fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
        def_context.Report(
            "Failed to process {0} at line {2} in {3}: {1}".format(
                str(argv), str(e),
                sys.exc_info()[-1].tb_lineno, fname))
        Report("Mauvais numéro/nom de chaîne")
        return 0, 0
Beispiel #8
0
def load_all(CHAINE):
    X = pd.DataFrame()
    Y = pd.DataFrame()
    files = os.listdir(PATH_IN+'hop/')
    for file in files:
        if(file.split('.')[-1] != 'csv'):
            pass
        elif(file.split('_')[-2] in ['2017-12-20'] or (file.split('_')[-2]).split('-')[0] == '2018'):
            pass

        else:
            def_context.Report(file.split('_')[-2])
            df,y = load(file)
            if(len(df)==1):
                continue
            df = df.replace([np.inf, -np.inf], np.nan)
            df = df.fillna(0)
            X_train = df
            y_train = y
            X = pd.concat([X,X_train])
            Y = pd.concat([Y,y_train])
    return def_context.process(X),Y
Beispiel #9
0
def main(argv):
    global PATH_IN, PATH_SCRIPT, PATH_OUT
    PATH_IN, PATH_SCRIPT, PATH_OUT = get_path()
    import pandas as pd
    import pickle
    end = 30

    if (len(argv) == 0):
        start = 0
        for i in range(start, end):
            t = time.time()
            update_temp_path(i)
            if (i == 0):
                p1 = Popen(['python', PATH_SCRIPT + 'PTVall.py', '2017-12'])
                p1.wait()
                time.sleep(60)
                os.system('python ' + PATH_SCRIPT + 'MLforPTV.py')
                def_context.Report("fin du tour " + str(i))
                def_context.Report(time.time() - t)
            else:
                p1 = Popen(
                    ['python', PATH_SCRIPT + 'fasttrain.py', '2017-12', 'TF1'])
                p2 = Popen(
                    ['python', PATH_SCRIPT + 'fasttrain.py', '2017-12', 'M6'])
                p3 = Popen([
                    'python', PATH_SCRIPT + 'fasttrain.py', '2017-12',
                    'France 2'
                ])
                p4 = Popen([
                    'python', PATH_SCRIPT + 'fasttrain.py', '2017-12',
                    'France 3'
                ])
                p1.wait()
                p2.wait()
                p3.wait()
                p4.wait()
                print('end of prediction')
                time.sleep(60)
                os.system('python ' + PATH_SCRIPT + 'MLforPTV.py')
                def_context.Report("fin du tour " + str(i))
                def_context.Report(time.time() - t)
                def_context.Report(time.time() - t)
    if (len(argv) == 1):
        try:
            start = int(argv[0])
            for i in range(start, end):
                update_temp_path(i)
                if (i == 0):
                    p1 = Popen(
                        ['python', PATH_SCRIPT + 'PTVall.py', '2017-12'])
                    p1.wait()
                    time.sleep(60)
                    os.system('python ' + PATH_SCRIPT + 'MLforPTV.py')
                    time.sleep(60)
                    def_context.Report("fin du tour " + str(i))
                else:
                    p1 = Popen([
                        'python', PATH_SCRIPT + 'fasttrain.py', '2017-12',
                        'TF1'
                    ])
                    p2 = Popen([
                        'python', PATH_SCRIPT + 'fasttrain.py', '2017-12', 'M6'
                    ])
                    p3 = Popen([
                        'python', PATH_SCRIPT + 'fasttrain.py', '2017-12',
                        'France 2'
                    ])
                    p4 = Popen([
                        'python', PATH_SCRIPT + 'fasttrain.py', '2017-12',
                        'France 3'
                    ])
                    p1.wait()
                    p2.wait()
                    p3.wait()
                    p4.wait()
                    print('end of prediction')
                    time.sleep(60)
                    os.system('python ' + PATH_SCRIPT + 'MLforPTV.py')
                    time.sleep(60)
                    def_context.Report("fin du tour " + str(i))
        except Exception as e:
            pred(argv[0])
    elif (len(argv) == 2):
        PATH_OUT = get_temp_path()
        import pandas as pd
        import random
        relecture = True
        EPSILON = 1e-15
        files = os.listdir(PATH_IN + 'PTV/')
        nb_files = len(files)
        Processes = []
        for file in files:
            f = ((file.split('.'))[0].split('_'))[2]
            c = ((file.split('.'))[0].split('_'))[-1]
            if (f == '2017-12-20'
                    or (f in ['2017-12-09', '2017-12-06', '2018-02-22']
                        and c == 'TF1') or (f in ['2018-02-22'] and c == 'M6')
                    or (f.split('-')[0] != str(argv[0].split('-')[0]))
                    or f.split('-')[1] != argv[0].split('-')[1]):
                #or (f in ['2018-02-22'] and c=='M6')
                pass
            elif (c == ''):
                pass
            elif ('2018' in f):
                pass
            elif (c == argv[1]):
                def_context.Report(file)
                while (len(Processes) >= MAX_PROCESSES):
                    lenp = len(Processes)
                    for p in range(
                            lenp):  # Check the processes in reverse order
                        if Processes[enp - 1 - p].poll(
                        ) is not None:  # If the process hasn't finished will return None
                            del Processes[
                                lenp - 1 -
                                p]  # Remove from list - this is why we needed reverse order
                    time.sleep(5)
                Processes.append(
                    Popen(['python', PATH_SCRIPT + 'fasttrain.py', file]))
            else:
                pass
        while (len(Processes)):
            lenp = len(Processes)
            for p in range(lenp):  # Check the processes in reverse order
                if Processes[enp - 1 - p].poll(
                ) is not None:  # If the process hasn't finished will return None
                    del Processes[
                        lenp - 1 -
                        p]  # Remove from list - this is why we needed reverse order
            time.sleep(5)
Beispiel #10
0
def main(argv):
    start = 10
    if(len(argv) == 0):
        print('bonjour')
        start = int(input("A quelle partie voulez vous commencer?"))
        if(start<1):
            Chaines = str(input("Quelle Chaînes devont nous traiter?(separez les par un '-'):"))
            chaines = Chaines.split('-')
            C = [[def_context.get_tuple(chaine)] for chaine in chaines]
        Processes = []
    if(len(argv) == 2):
        pred(argv[0],argv[1])
        return 0

    ##### Première partie #####
    if(start < 1):
        for chaine in chaines:
            while(len(Processes)>= MAX_PROCESSES/2):
                    lenp = len(Processes)
                    for p in range(lenp): # Check the processes in reverse order
                        if Processes[lenp-1-p].poll() is not None: # If the process hasn't finished will return None
                            del Processes[lenp-1-p] # Remove from list - this is why we needed reverse order
                    time.sleep(5)

            Processes.append(Popen(['python','extractdatafromPTV.py',chaine]))
        Processes.append(Popen(['python','cleaningRTSfiles.py','0',Chaines,'0']))
        ##### emptying the process queue ######
        while(len(Processes)):
            lenp = len(Processes)
            for p in range(lenp): # Check the processes in reverse order
                if Processes[lenp-1-p].poll() is not None: # If the process hasn't finished will return None
                    del Processes[lenp-1-p] # Remove from list - this is why we needed reverse order
            time.sleep(5)
        Processes.append(Popen(['python','processingdata.py']))
    if(start<=1):
        while(len(Processes)):
            lenp = len(Processes)
            for p in range(lenp): # Check the processes in reverse order
                if Processes[lenp-1-p].poll() is not None: # If the process hasn't finished will return None
                    del Processes[lenp-1-p] # Remove from list - this is why we needed reverse order
            time.sleep(5)
        Processes.append(Popen(['python','predict.py']))
        while(len(Processes)):
            lenp = len(Processes)
            for p in range(lenp): # Check the processes in reverse order
                if Processes[lenp-1-p].poll() is not None: # If the process hasn't finished will return None
                    del Processes[lenp-1-p] # Remove from list - this is why we needed reverse order
            time.sleep(5)
    if (start <= 2):
        Processes = []
        pass_files = []
        nb_files_true = 0
        for i in range(31):
            update_temp_path(i)
            files = os.listdir(PATH_IN+'PTV/')
            nb_files = len(files)
            nb_files_true =0
            for file in files:
                if(file in pass_files):
                    pass
                elif(i%10 != 0 or i ==0):
                    while (len(Processes)>= MAX_PROCESSES):
                        lenp = len(Processes)
                        for p in range(lenp): # Check the processes in reverse order
                            if Processes[lenp-1-p].poll() is not None: # If the process hasn't finished will return None
                                del Processes[lenp-1-p] # Remove from list - this is why we needed reverse order
                        time.sleep(5)
                    def_context.Report('process launch for %s at turn %s'%(file,i))
                    Processes.append(Popen(['python', 'out.py', file ,str(i)]))
                else:
                    date = file.split('_')[2]
                    chaine = file.split('_')[-1].split('.')[0]
                    numero,nom_chaine = def_context.get_tuple(chaine)
                    os.system('python cost.py '+str(chaine)+' '+str(date))
                    couts = pd.read_csv('cout.csv')
                    l = np.bincount(couts[date+'_'+nom_chaine+'_tout'])
                    if(min(couts[date+'_'+nom_chaine+'_tout'])<1 and max(l)>=4):
                        pass_files.append(file)
                    else:
                        while(len(Processes)>= MAX_PROCESSES):
                            lenp = len(Processes)
                            for p in range(lenp): # Check the processes in reverse order
                                if Processes[lenp-1-p].poll() is not None: # If the process hasn't finished will return None
                                    del Processes[lenp-1-p] # Remove from list - this is why we needed reverse order
                            time.sleep(5)
                        Processes.append(Popen(['python','out.py',file,str(i)]))
                        time.sleep(2)
        def_context.Report("treated %s files instead of %s"%(nb_files*30,nb_files_true))
######### Toute les prédictions on été faites ########
    if(start<=3):
        os.system("python cost.py")
        time.sleep(10)
######################################################
    if(start <=4):
        create_res_file()
        df = pd.read_csv('cout.csv')
        index_of_best = [0]*31
        for col in df.columns.values:
            if('tout' not in col):
                pass
            else:
                df_final=[]

                col = ''.join(list(col)[:-4])
                for mm in ['matinee','apresmidi','soiree']:
                    i = find_best(df[col+mm])
                    index_of_best[i]+=1
                    a,b = def_context.get_tuple(col.split('_')[1])
                    df_final.append(exit_file(col.split('_')[0],a,b,i,mm))
                    def_context.Report('Best Prediction for %s %s %s occured at %s'%(col.split('_')[1],col.split('_')[0],mm,str(i)))
                #def_context.Report(str(df_final[0].shape)+' '+str(df_final[1].shape)+' '+str(df_final[2].shape))
                df_final = df_final[0].append(df_final[1].append(df_final[2]))
                try:
                    df_final.to_csv('../DatasOut/out/new_PTV_'+col.split('_')[0]+'_'+b+'.csv',index=False)
                except Exception as e:
                    exc_type, exc_obj, exc_tb = sys.exc_info()
                    fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
                    def_context.Report("Failed to process {0} at line {2} in {3}: {1}".format(str(argv), str(e),sys.exc_info()[-1].tb_lineno,fname))
        with open('res.txt', 'w') as f:
            for item in index_of_best:
                f.write("%s\n" % item)
    if(start<=5):
        res = pd.read_csv('res_out.csv')
        res.loc[1] = res.loc[0]*0
        for col in res:
            if('count' in col):
                pass
            elif(col == 'nombre_fichier'):
                pass
            elif(col in [str(i) for i in range(31)]):
                pass
            else:
                res[col].loc[1] = 1-(res[col].loc[0])/res['count_'+col].loc[0]
                def_context.Report('pour %s : %s erreurs soit %s '%(col,res[col][0],res[col][1]))
        res.to_csv('res_out.csv',index=False)
    def_context.Report("EXIT THE PROGRAM WITH NO ERROR. Congratulation bro!")
Beispiel #11
0
def exit_file(date,numero,nom_chaine,num_folder,part):
    '''
    création et remplissage d'un DataFrame pour calculer simplement le coûte
    '''
    ####
    file = PATH_IN+'PTV/IPTV_'+numero+'_'+date+'_'+nom_chaine+'.csv'
    otherfile = PATH_OUT+'T'+str(num_folder)+'/new_ptv/new_PTV_'+date+'_'+nom_chaine+'.csv'
    ####
    try:
        ptv = pd.read_csv(file)
        new_ptv = pd.read_csv(otherfile)
        df = pd.DataFrame()
    except Exception as e:
        def_context.Report('petit problème: '+str(e))
        return [0,0,0,0]

    df['titre'] = ptv['TITRE']
    df['clediff'] = ptv['@CLEDIF']
    df['debut'] = ptv['debut']%1440
    df['duree'] = ptv['DUREE']%1440
    df['fin'] = (ptv['debut']+ptv['DUREE'])%1440
    df['vrai fin'] = 0
    df['ND'] = 0
    df['pourcentage vu'] = 0
    new_ptv_ = new_ptv[new_ptv['Évenement'].apply(lambda x: x.split(' ')[0]) == 'fin']
    current = 0
    for j in range(df.shape[0]):
        for i in range(current,new_ptv_.shape[0]):
            if(new_ptv_['TITRE'].iloc[i] == df['titre'].iloc[j]):
                if(abs(df['fin'].iloc[j] - new_ptv_['minute'].iloc[i])<40 or df[df['titre'] == df['titre'].iloc[j]].shape[0] == 1):
                    df['vrai fin'].iloc[j] = new_ptv_['minute'].iloc[i]
                    df['pourcentage vu'].iloc[j] = new_ptv_['pourcentage vu'].iloc[i]
                    if(new_ptv_['Évenement'].iloc[i] == "fin d'un programme" ):
                        df['ND'].iloc[j] = 0
                    else:
                        df['ND'].iloc[j] = 1
                    current = i+1
                    break
                else:
                    pass

            else:
                pass

    df['vrai debut'] = (df['vrai fin'] - df['duree']*df['pourcentage vu'])%1440
    df['vrai fin'].iloc[df.shape[0]-1] = df['vrai fin'].iloc[df.shape[0]-2] + df['duree'].iloc[df.shape[0]-1]
    df['chaine'] = nom_chaine
    df['date'] = date
    temp_df = pd.DataFrame()
    temp_df[['titre','vrai debut']] = new_ptv[new_ptv['TITRE'] == 'publicité'][['TITRE','minute']]
    for v in df.columns.values:
        if v not in ['titre','vrai debut']:
            temp_df[v] = 0
    temp_df['chaine'] = nom_chaine
    temp_df['date'] = date
    temp_df['vrai fin'] = temp_df['vrai debut'].apply(lambda x: x+6)
    temp_df['fin'] = temp_df['vrai fin']
    temp_df['debut'] = temp_df['vrai debut']
    df = df.append(temp_df).reset_index(drop=True)
    r = pd.read_csv('res_out.csv')
    month = '-'.join(date.split('-')[:-1])
    if('count_'+nom_chaine+'_'+month+'_'+part in r.columns):
        r['count_'+nom_chaine+'_'+month+'_'+part] = r['count_'+nom_chaine+'_'+month+'_'+part]+1
    else:
        r['count_'+nom_chaine+'_'+month+'_'+part] = 1
    if('count_'+nom_chaine+'_'+month in r.columns):
        r['count_'+nom_chaine+'_'+month] = r['count_'+nom_chaine+'_'+month]+1
    else:
        r['count_'+nom_chaine+'_'+month] = 1
    if('count_'+part in r.columns):
        r['count_'+part] = r['count_'+part]+1
    else:
        r['count_'+part] = 1
    if('count_'+month in r.columns):
        r['count_'+month] = r['count_'+month]+1
    else:
        r['count_'+month] = 1
    if('count_'+nom_chaine in r.columns):
        r['count_'+nom_chaine] = r['count_'+nom_chaine]+1
    else:
        r['count_'+nom_chaine] = 1
    for index,x in new_ptv[['Évenement','minute']].iterrows():
        if 'HARD RESET OF ALGORITHM' in x['Évenement']:
            if(x['minute']<=13*60+40 and part == 'matinee' and x['minute']>180):
                if(nom_chaine+'_'+month+'_'+part in r.columns):
                    r[nom_chaine+'_'+month+'_'+part] = r[nom_chaine+'_'+month+'_'+part]+1
                else:
                    r[nom_chaine+'_'+month+'_'+part] = 1
                if(nom_chaine+'_'+month in r.columns):
                    r[nom_chaine+'_'+month] = r[nom_chaine+'_'+month]+1
                else:
                    r[nom_chaine+'_'+month] = 1
                if(part in r.columns):
                    r[part] = r[part]+1
                else:
                    r[part] = 1
                if(month in r.columns):
                    r[month] = r[month]+1
                else:
                    r[month] = 1
                if(nom_chaine in r.columns):
                    r[nom_chaine] = r[nom_chaine]+1
                else:
                    r[nom_chaine] = 1
                def_context.Report('message: %s à la minute %s de la journée %s pour la chaîne %s pour la partie %s. Recherche dans le folder %s ' %(x['Évenement'],x['minute'],date,nom_chaine,part,num_folder))

            elif(x['minute']<=20*60+35 and part == 'apresmidi' and x['minute']>13*60+40):
                if(nom_chaine+'_'+month+'_'+part in r.columns):
                    r[nom_chaine+'_'+month+'_'+part] = r[nom_chaine+'_'+month+'_'+part]+1
                else:
                    r[nom_chaine+'_'+month+'_'+part] = 1
                if(nom_chaine+'_'+month in r.columns):
                    r[nom_chaine+'_'+month] = r[nom_chaine+'_'+month]+1
                else:
                    r[nom_chaine+'_'+month] = 1
                if(part in r.columns):
                    r[part] = r[part]+1
                else:
                    r[part] = 1
                if(month in r.columns):
                    r[month] = r[month]+1
                else:
                    r[month] = 1
                if(nom_chaine in r.columns):
                    r[nom_chaine] = r[nom_chaine]+1
                else:
                    r[nom_chaine] = 1
                def_context.Report('message: %s à la minute %s de la journée %s pour la chaîne %s pour la partie %s. Recherche dans le folder %s ' %(x['Évenement'],x['minute'],date,nom_chaine,part,num_folder))
            elif(part == 'soiree' and (x['minute']>20*60+35 or x['minute']<180) ):
                if(nom_chaine+'_'+month+'_'+part in r.columns):
                    r[nom_chaine+'_'+month+'_'+part] = r[nom_chaine+'_'+month+'_'+part]+1
                else:
                    r[nom_chaine+'_'+month+'_'+part] = 1
                if(nom_chaine+'_'+month in r.columns):
                    r[nom_chaine+'_'+month] = r[nom_chaine+'_'+month]+1
                else:
                    r[nom_chaine+'_'+month] = 1
                if(part in r.columns):
                    r[part] = r[part]+1
                else:
                    r[part] = 1
                if(month in r.columns):
                    r[month] = r[month]+1
                else:
                    r[month] = 1
                if(nom_chaine in r.columns):
                    r[nom_chaine] = r[nom_chaine]+1
                else:
                    r[nom_chaine] = 1
                def_context.Report('message: %s à la minute %s de la journée %s pour la chaîne %s pour la partie %s. Recherche dans le folder %s ' %(x['Évenement'],x['minute'],date,nom_chaine,part,num_folder))
    r['nombre_fichier'] = r['nombre_fichier']+1
    if str(num_folder) in r:
        r[str(num_folder)]+=1
    else:
        r[str(num_folder)] = 1
    r.to_csv('res_out.csv',index=False)

    if(part == 'matinee'):
        return df[(df['fin']<=13*60+40) & (df['fin']>180)]
    elif(part == 'apresmidi'):
        return df[(df['fin']<=20*60+35) & (df['fin']>13*60+40)]
    elif(part == 'soiree'):
        return df[df['fin']>20*60+35].append(df[df['fin']<=180])
Beispiel #12
0
def find_cost(date, numero, nom_chaine, i):
    '''
	création et remplissage d'un DataFrame pour calculer simplement le coûte
	enfin....simplement est un grand mot...certain PTV sont tellement WTF qu'on arrive
	même pas à calculer l'erreur. On devrait peut être directement les jeter. Enfin bref
	c'est pas simple d'être nul...

	'''
    ####
    file = PATH_IN + 'PTV/IPTV_' + numero + '_' + date + '_' + nom_chaine + '.csv'
    otherfile = PATH_OUT + 'T' + str(
        i) + '/new_ptv/new_PTV_' + date + '_' + nom_chaine + '.csv'
    ####
    try:
        ptv = pd.read_csv(file)
        new_ptv = pd.read_csv(otherfile)
        df = pd.DataFrame()
    except Exception as e:
        def_context.Report('petit problème: ' + str(e))
        return [3 + i / 2000, 1 + i / 2000, 1 + i / 2000, 1 + i / 2000]

    df['titre'] = ptv['TITRE']
    df['debut'] = ptv['debut'] % 1440
    df['duree'] = ptv['DUREE'] % 1440
    df['fin'] = (ptv['debut'] + ptv['DUREE']) % 1440
    df['vrai fin'] = 0
    df['coef'] = df['fin'].apply(lambda x: coef(x))
    df['ND'] = 0
    df['pourcentage vu'] = 0
    new_ptv_ = new_ptv[new_ptv['Évenement'].apply(lambda x: x.split(' ')[0]) ==
                       'fin']
    current = 0
    for j in range(df.shape[0]):
        for i in range(current, new_ptv_.shape[0]):
            if (new_ptv_['TITRE'].iloc[i] == df['titre'].iloc[j]):
                if (abs(df['fin'].iloc[j] - new_ptv_['minute'].iloc[i]) < 40 or
                        df[df['titre'] == df['titre'].iloc[j]].shape[0] == 1):
                    df['vrai fin'].iloc[j] = new_ptv_['minute'].iloc[i]
                    df['pourcentage vu'].iloc[j] = new_ptv_[
                        'pourcentage vu'].iloc[i]
                    if (new_ptv_['Évenement'].iloc[i] == "fin d'un programme"):
                        df['ND'].iloc[j] = 0
                    else:
                        df['ND'].iloc[j] = 1
                    current = i
                    break
                else:
                    pass

            else:
                pass

    df['vrai debut'] = (df['vrai fin'] -
                        df['duree'] * df['pourcentage vu']) % 1440
    df['vrai fin'].iloc[df.shape[0] - 1] = df['vrai fin'].iloc[
        df.shape[0] - 2] + df['duree'].iloc[df.shape[0] - 1]
    df2 = df[df['pourcentage vu'] == 0]
    df = df[df['pourcentage vu'] > 0]
    df['cout'] = 0
    df2['cout'] = 1
    df = df.reset_index(drop=True)
    df2 = df2.reset_index(drop=True)
    for index, row in df.iterrows():
        df['cout'].iloc[index - 1] = (min(
            abs(row['debut'] - row['vrai debut']) % 1440,
            abs(row['debut'] % 1440 - row['vrai debut'] % 1440)) + min(
                abs(row['fin'] - row['vrai fin']) % 1440,
                abs(row['fin'] % 1440 - row['vrai fin'] % 1440))) * row['coef']
    cout = np.sum(df['cout']) + 20 * np.sum(df2['cout'])
    cout_matin = np.sum(df[
        (df['fin'] <= 13 * 60 + 40) & (df['fin'] > 180)]['cout']) + np.sum(df2[
            (df2['fin'] <= 13 * 60 + 40) & (df2['fin'] > 180)]['cout']) * 20
    cout_aprem = np.sum(
        df[(df['fin'] <= 20 * 60 + 35) & (df['fin'] > 13 * 60 + 40)]
        ['cout']) + 100 * np.sum(df2[(df2['fin'] <= 20 * 60 + 35) &
                                     (df2['fin'] > 13 * 60 + 40)]['cout'])
    cout_soir = np.sum(df[df['fin'] > 20 * 60 + 35]['cout']) + np.sum(
        df[df['fin'] <= 180]
        ['cout']) + 100 * (np.sum(df2[df2['fin'] > 20 * 60 + 35]['cout']) +
                           np.sum(df2[df2['fin'] <= 180]['cout']))

    for index, x in new_ptv[['Évenement', 'minute']].iterrows():
        if 'HARD RESET OF ALGORITHM' in x['Évenement']:
            if (x['minute'] <= 13 * 60 + 40 and x['minute'] > 180):
                cout_matin += 2000 + i
                cout += 2000 + i
            elif (x['minute'] < 20 * 60 + 35 and x['minute'] > 13 * 60 + 40):
                cout_aprem += 2000 + i
                cout += 2000 + i
            else:
                cout_soir += 2000 + i
                cout += 2000 + i
    df['cout'] = df['cout'] / 2000
    df.to_html('test/' + date + '_' + nom_chaine + '_' + str(i) + '.html')
    return ([
        cout / 2000, cout_matin / 2000, cout_aprem / 2000, cout_soir / 2000
    ])
Beispiel #13
0
def pred(file):
    try:
        PATH_OUT = get_temp_path()
        relecture = True
        EPSILON = 1e-15
        f = ((file.split('.'))[0].split('_'))[2]
        c = ((file.split('.'))[0].split('_'))[-1]
        PTV, proba = def_context.load_file(str(f), str(c))
        if (len(PTV) == 0):
            return 0
        index_PTV = PTV.index[(PTV['debut'] <= 3 * 60) & (
            PTV['debut'] + PTV['DUREE'] > 3 * 60 + 5)].tolist()[0]
        def_context.Report('Starting with: %s' %
                           (PTV['TITRE'].iloc[index_PTV]))
        lastend = PTV['debut'].loc[index_PTV]
        currentduree = PTV['DUREE'].loc[index_PTV]
        newPTV = def_context.init_newPTV(PTV, str(c))
        historyofpoints = def_context.init_history(str(c), PTV, lastend,
                                                   currentduree)
        temp_context = historyofpoints.iloc[0]
        importantpts = def_context.get_important_points(c, PTV, index_PTV)
        for i in range(3):
            def_context.Report(str(i) + ' ' + str(c) + ' ' + str(f))
            from predictPTV import main as pred
            l1, temp_newPTV1, temp_history1, index_PTV1, temp_context1 = pred([
                str(c),
                str(f), i, newPTV.iloc[newPTV.shape[0] - 1], temp_context,
                index_PTV, importantpts
            ])
            if (l1 > 0 and relecture):
                def_context.Report("Utilisation de la relecture " + str(i) +
                                   ' ' + str(c) + ' ' + str(f))
                from RLPTV import main as RL
                l2, temp_newPTV2, temp_history2, index_PTV2, temp_context2 = RL(
                    [
                        str(c),
                        str(f), i, newPTV.iloc[newPTV.shape[0] - 1],
                        temp_context, index_PTV, importantpts
                    ])
                if (l2 > 5):
                    def_context.Report("Utilisation de l'arbre de décision", f,
                                       c, i)
                    if (chaine == 'TF1'):
                        from PTVTF1 import main as arbre1
                        l3, temp_newPTV3, temp_history3, index_PTV3, temp_context3 = arbre1(
                            [
                                str(c),
                                str(f), i, newPTV.loc[newPTV.shape[0] - 1],
                                temp_context, index_PTV, importantpts
                            ])
                    elif (chaine == 'M6'):
                        from PTVM6 import main as arbre2
                        l3, temp_newPTV3, temp_history3, index_PTV3, temp_context3 = arbre2(
                            [
                                str(c),
                                str(f), i, newPTV.loc[newPTV.shape[0] - 1],
                                temp_context, index_PTV, importantpts
                            ])
                    else:
                        l3 > 5
                    if (l3 > 0):
                        def_context.Report("AUCUNE DÉCISION NE CONVIENT", f, c)
                        l, temp_newPTV, temp_history, index_PTV, temp_context = l2, temp_newPTV2, temp_history2, index_PTV2, temp_context2
                    else:
                        l, temp_newPTV, temp_history, index_PTV, temp_context = l3, temp_newPTV3, temp_history3, index_PTV3, temp_context3
                else:
                    l, temp_newPTV, temp_history, index_PTV, temp_context = l2, temp_newPTV2, temp_history2, index_PTV2, temp_context2
            else:
                l, temp_newPTV, temp_history, index_PTV, temp_context = l1, temp_newPTV1, temp_history1, index_PTV1, temp_context1
            if (l == 4):
                pass
            else:
                newPTV = pd.concat([newPTV, temp_newPTV.iloc[1:]])
                historyofpoints = pd.concat([historyofpoints, temp_history])

        newPTV['Heure'] = newPTV['minute'].apply(
            lambda x: str(int(x / 60)) + ':' + str(x % 60))
        historyofpoints['Heure'] = historyofpoints['minute'].apply(
            lambda x: str(int(x / 60)) + ':' + str(x % 60))
        newPTV.to_html(PATH_IN + 'new_ptv/new_PTV_' + str(f) + '_' + str(c) +
                       '.html')
        newPTV.to_csv(PATH_IN + 'new_ptv/new_PTV_' + str(f) + '_' + str(c) +
                      '.csv',
                      index=False)
        historyofpoints.to_html(PATH_IN + 'hop/historyofpoints_' + str(f) +
                                '_' + str(c) + '.html')
        historyofpoints.to_csv(PATH_IN + 'hop/historyofpoints_' + str(f) +
                               '_' + str(c) + '.csv',
                               index=False)
        newPTV.to_html(PATH_OUT + 'new_ptv/new_PTV_' + str(f) + '_' + str(c) +
                       '.html')
        newPTV.to_csv(PATH_OUT + 'new_ptv/new_PTV_' + str(f) + '_' + str(c) +
                      '.csv',
                      index=False)
    except Exception as e:
        exc_type, exc_obj, exc_tb = sys.exc_info()
        fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
        def_context.Report(
            "Failed to process {0} at line {2} in {3}: {1}".format(
                str(file), str(e),
                sys.exc_info()[-1].tb_lineno, fname))
Beispiel #14
0
def main(argv):
    t = time.time()
    if (len(argv) == 0):
        Report("Merci de renseigner une année et un mois (ex: 2017-12)")
    EPSILON = 1e-15
    err = 0
    m = 0
    err_TF1 = 0
    m_TF1 = 0
    err_M6 = 0
    m_M6 = 0
    err_F2 = 0
    m_F2 = 0
    err_F3 = 0
    m_F3 = 0

    err_type_1 = 0
    err_type_2 = 0
    err_type_3 = 0
    try:
        df = pd.read_csv('scores.csv')
    except Exception as e:
        df = pd.DataFrame()
        df['score TF1'] = [0]
        df['score M6'] = 0
        df['score France 2'] = 0
        df['score France 3'] = 0
        df['score Total'] = 0
        df['score sur la matinée'] = 0
        df["score sur l'après midi"] = 0
        df['score sur la soirée'] = 0
        df['part de relecture'] = 0
        df['temps de calcul'] = 0
        df['mois'] = '55-55'
        df.to_csv('scores.csv', index=False)

    files = os.listdir(PATH_IN + 'PTV/')
    for file in files:
        def_context.Report('-------------------------------------')
        f = ((file.split('.'))[0].split('_'))[2]
        c = ((file.split('.'))[0].split('_'))[-1]
        if (f == '2017-12-20' or
            (f in ['2017-12-09', '2017-12-06', '2018-02-22'] and c == 'TF1')
                or (f in ['2018-02-22'] and c == 'M6')
                or (f.split('-')[0] != str(argv[0].split('-')[0]))
                or f.split('-')[1] != argv[0].split('-')[1]):
            def_context.Report(f)
        elif (c == ''):
            pass
        else:
            def_context.Report(c)
            if (c in ['M6', 'TF1']):
                chaine = c
            else:
                chaine = 'TF1'
            number, name = get_tuple(c)
            if (len(list(number)) < 4):
                number = "0" + number
            def_context.Report('Using PTV%s for %s' % (chaine, f))
            l = os.system('python ' + PATH_SCRIPT + 'PTV' + str(chaine) +
                          '.py ' + str(f) + ' ' + str(number))
            if (l / 256 == 4):
                pass
            else:
                l = l / 256
                err += int(l / 100) + int((l % 100) / 10) + int((l % 10))
                err_type_1 += int(l / 100)
                err_type_2 += int((l % 100) / 10)
                err_type_3 += int((l % 10))
                m += 3
                if (c == 'M6'):
                    err_M6 += int(l / 100) + int((l % 100) / 10) + int(
                        (l % 10))
                    m_M6 += 3
                if (c == 'TF1'):
                    err_TF1 += int(l / 100) + int((l % 100) / 10) + int(
                        (l % 10))
                    m_TF1 += 3
                if (c == 'France 3'):
                    err_F2 += int(l / 100) + int((l % 100) / 10) + int(
                        (l % 10))
                    m_F2 += 3
                if (c == 'France 3'):
                    err_F3 += int(l / 100) + int((l % 100) / 10) + int(
                        (l % 10))
                    m_F3 += 3

        def_context.Report(err)
    def_context.Report(m)
    if (m == 0):
        def_context(
            "aucun fichier n'a été traité. Merci de vérifier la date et les données d'entrée."
        )
    def_context.Report("score Total:" + str(1 - (err / (m + EPSILON))))
    def_context.Report("score TF1:" + str(1 - (err_TF1 / (m_TF1 + EPSILON))))
    def_context.Report("score M6:" + str(1 - (err_M6 / (m_M6 + EPSILON))))
    def_context.Report("score France 2:" + str(1 - (err_F2 /
                                                    (m_F2 + EPSILON))))
    def_context.Report("score France 3:" + str(1 - (err_F3 /
                                                    (m_F3 + EPSILON))))
    def_context.Report("score sur la matinée:" + str(1 - ((err_type_1 * 3) /
                                                          (m + EPSILON))))
    def_context.Report("score sur l'après midi:" + str(1 - ((err_type_2 * 3) /
                                                            (m + EPSILON))))
    def_context.Report("score sur la soirée:" + str(1 - ((err_type_3 * 3) /
                                                         (m + EPSILON))))
    def_context.Report("temps de calcul:" + str(time.time() - t))
    try:
        df = pd.read_csv('scores.csv')
        df.loc[df.shape[0]] = [
            1 - (err_TF1 / (m_TF1 + EPSILON)), 1 - (err_M6 / (m_M6 + EPSILON)),
            1 - (err_F2 / (m_F2 + EPSILON)), 1 - (err_F3 / (m_F3 + EPSILON)),
            1 - (err / (m + EPSILON)), 1 - ((err_type_1 * 3) / (m + EPSILON)),
            1 - ((err_type_2 * 3) / (m + EPSILON)),
            1 - ((err_type_3 * 3) / (m + EPSILON)), 0.5 * m / (m + EPSILON),
            time.time() - t, argv[0]
        ]
        df.to_csv('scores.csv', index=False)
    except Exception as e:
        exc_type, exc_obj, exc_tb = sys.exc_info()
        fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
        def_context.Report(
            "Failed to process {0} at line {2} in {3}: {1}".format(
                '', str(e),
                sys.exc_info()[-1].tb_lineno, fname))
        Report("fichier non conforme ou non existant: %s" % (e))
Beispiel #15
0
def main(argv):
    global PATH_IN,PATH_SCRIPT,PATH_OUT
    PATH_IN,PATH_SCRIPT,PATH_OUT = def_context.get_path()
    PATH_OUT = get_temp_path()
    if not os.path.exists(PATH_OUT+'model_PTV/'):
        os.makedirs(PATH_OUT+'model_PTV/')
    if(len(argv) == 0):
        argv = ['all']
    if(argv[0] == 'test'):
        Y_test = pd.read_csv('results.csv').values
        y_pred = pd.read_csv('y_pred.csv')
        y_pred2 = pd.read_csv('y_pred2.csv')
        y_pred3 = pd.read_csv('y_pred2.csv')
        y_pred4 = pd.read_csv('y_pred4.csv')
        y_pred5 = pd.read_csv('y_pred5.csv')

        logreg = use_logisticreg(y_pred,y_pred2,y_pred3,y_pred4,y_pred5,Y_test)
        res = pd.concat([y_pred,y_pred2,y_pred3,y_pred4,y_pred5],axis=1).values
        res = logreg.predict_proba(res)
        for p1 in [0]:
            for p2 in [0]:
                def_context.Report('################### '+str(p1)+' ### '+str(p2)+'###################')
                def_context.Report('############XGB##############')
                mesure(y_pred.values,Y_test,p1,p2)
                mismatch(y_pred.values,Y_test,p1,p2)
                acc(y_pred.values,Y_test,p1,p2)
                def_context.Report('############CatBoost##############')
                mesure(y_pred2.values,Y_test,p1,p2)
                mismatch(y_pred2.values,Y_test,p1,p2)
                acc(y_pred2.values,Y_test,p1,p2)
                def_context.Report('############GradientBoostingClassifier##############')
                mesure(y_pred4.values,Y_test,p1,p2)
                mismatch(y_pred4.values,Y_test,p1,p2)
                acc(y_pred4.values,Y_test,p1,p2)
                def_context.Report('############RandomForestClassifier##############')
                mesure(y_pred5.values,Y_test,p1,p2)
                mismatch(y_pred5.values,Y_test,p1,p2)
                acc(y_pred5.values,Y_test,p1,p2)
                def_context.Report('############Stack##############')
                mesure(res,Y_test,p1,p2)
                mismatch(res,Y_test,p1,p2)
                acc(res,Y_test,p1,p2)

    elif(len(argv) == 1):
        X,Y = load_all(argv[0])
        X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2)
        X_train = X_train.replace([np.inf, -np.inf], np.nan)
        X_train = X_train.fillna(0)
        X_test = X_test.replace([np.inf, -np.inf], np.nan)
        X_test = X_test.fillna(0)
        Y_test  = [Y[0] for Y in Y_test.values]
        ##########################################
        np.random.seed(42)
        clf = Classifier()
        clf.fit(X_train,Y_train)
        y_pred = clf.predict_proba(X_test)
        clf2 = Classifier2()
        clf2.fit(X_train,Y_train)
        y_pred2 = clf2.predict_proba(X_test)

        dtree_model = DecisionTreeClassifier(max_depth = 10).fit(X_train,Y_train)
        y_pred3 = dtree_model.predict_proba(X_test)

        tpot = GradientBoostingClassifier(learning_rate=0.05, max_depth=10, max_features=0.75, min_samples_leaf=7, min_samples_split=16, n_estimators=500, subsample=0.9)
        tpot.fit(X_train,Y_train)
        def_context.Report(tpot.score(X_test, Y_test))
        y_pred4 = tpot.predict_proba(X_test)

        RF_model = RandomForestClassifier(max_depth = 10).fit(X_train,Y_train)
        y_pred5 = RF_model.predict_proba(X_test)

        y_p = clf.predict_proba(X_train)
        y_p2 = clf2.predict_proba(X_train)
        y_p3 = dtree_model.predict_proba(X_train)
        y_p4 = tpot.predict_proba(X_train)
        y_p5 = RF_model.predict_proba(X_train)

        logreg = use_logisticreg(y_p,y_p2,y_p3,y_p4,y_p5,Y_train)

        ##########################################
        save_model_xgb(clf)
        save_model_cat(clf2)
        save_model(dtree_model,"DT")
        save_model(RF_model,"RF")
        pickle.dump(tpot, open(PATH_OUT+"model_PTV/GradientBoostingClassifier.pickle.dat", "wb"))
        pickle.dump(RF_model, open(PATH_OUT+"model_PTV/RandomForestClassifier.pickle.dat", "wb"))
        X = pd.concat([pd.DataFrame(y_pred),pd.DataFrame(y_pred2),pd.DataFrame(y_pred3),pd.DataFrame(y_pred4),pd.DataFrame(y_pred5)],axis = 1).values
        res = logreg.predict_proba(X)
        for p1,p2 in zip([0],[0]):
            def_context.Report('############XGB##############')
            mesure(y_pred,Y_test,p1,p2)
            mismatch(y_pred,Y_test,p1,p2)
            acc(y_pred,Y_test,p1,p2)
            def_context.Report('############CatBoost##############')
            mesure(y_pred2,Y_test,p1,p2)
            mismatch(y_pred2,Y_test,p1,p2)
            acc(y_pred2,Y_test,p1,p2)
            def_context.Report('############DecisionTreeClassifier##############')
            mesure(y_pred3,Y_test,p1,p2)
            mismatch(y_pred3,Y_test,p1,p2)
            acc(y_pred3,Y_test,p1,p2)
            def_context.Report('############GradientBoostingClassifier##############')
            mesure(y_pred4,Y_test,p1,p2)
            mismatch(y_pred4,Y_test,p1,p2)
            acc(y_pred4,Y_test,p1,p2)
            def_context.Report('############RandomForestClassifier##############')
            mesure(y_pred5,Y_test,p1,p2)
            mismatch(y_pred5,Y_test,p1,p2)
            acc(y_pred5,Y_test,p1,p2)
            def_context.Report('############Stack##############')
            mesure(res,Y_test,p1,p2)
            mismatch(res,Y_test,p1,p2)
            acc(res,Y_test,p1,p2)

        #ROC_curve(y_pred,Y_test)
        #ROC_curve(y_pred2,Y_test)
        pd.DataFrame(Y_test).to_csv('results.csv',index=False)
        pd.DataFrame(y_pred).to_csv('y_pred.csv',index=False)
        pd.DataFrame(y_pred2).to_csv('y_pred2.csv',index=False)
        pd.DataFrame(y_pred3).to_csv('y_pred3.csv',index=False)
        pd.DataFrame(y_pred4).to_csv('y_pred4.csv',index=False)
        pd.DataFrame(y_pred5).to_csv('y_pred5.csv',index=False)


    return ("process achevé sans erreures")
Beispiel #16
0
def Report(error):
    with open(LOG, 'a+') as file:
        file.write(str(error) + ' \n')
        def_context.Report(str(error))
Beispiel #17
0
def update_temp_path(i):
    datas = pd.read_csv('path.csv')
    datas['temp_path'] = datas['PathtoDatasOut'] + 'T' + str(i) + "/"
    def_context.Report('Updated Temp path to: ' + datas['PathtoDatasOut'][0] +
                       'T' + str(i) + "/")
    datas.to_csv('path.csv', index=False)
Beispiel #18
0
def main(argv):
    global PATH_IN, PATH_SCRIPT, PATH_OUT
    PATH_IN, PATH_SCRIPT, PATH_OUT = get_path()
    import pandas as pd
    import pickle
    createfile = False
    end = 30
    t = time.time()
    if (len(argv) == 0):
        argv = ['2015']
    if (argv[0] == 'start'):
        if (len(argv) == 1):
            start = 0
            createfile = True
        else:
            start = int(argv[1])
            if (start == 0):
                createfile = True
        if (createfile):
            df = pd.DataFrame()
            df['score TF1'] = [0]
            df['score M6'] = 0
            df['score France 2'] = 0
            df['score France 3'] = 0
            df['score Total'] = 0
            df['score sur la matinée'] = 0
            df["score sur l'après midi"] = 0
            df['score sur la soirée'] = 0
            df['part de relecture'] = 0
            df['temps de calcul'] = 0
            df['mois'] = '55-55'

            df.to_csv('scores.csv', index=False)
            time.sleep(10)
        for i in range(start, end):
            update_temp_path(i)
            try:
                open(PATH_OUT + 'res.txt', 'w').close()
                def_context.Report('file cleaned')
            except Exception as e:
                pass
            if (createfile and i == 0):
                p1 = Popen(['python', PATH_SCRIPT + 'PTVall.py', '2017-12'])
                p2 = Popen(['python', PATH_SCRIPT + 'PTVall.py', '2018-02'])
                p3 = Popen(['python', PATH_SCRIPT + 'PTVall.py', '2018-03'])
                p1.wait()
                p2.wait()
                p3.wait()
                """
                os.system('python '+PATH_SCRIPT+'PTVall.py 2017-12')
                os.system('python '+PATH_SCRIPT+'PTVall.py 2018-02')
                os.system('python '+PATH_SCRIPT+'PTVall.py 2018-03')
                """
                time.sleep(60)
                os.system('python ' + PATH_SCRIPT + 'MLforPTV.py')
                time.sleep(60)
                def_context.Report("fin du tour " + str(i))
            else:
                p1 = Popen(
                    ['python', PATH_SCRIPT + 'makenewPTV.py', '2017-12'])
                p2 = Popen(
                    ['python', PATH_SCRIPT + 'makenewPTV.py', '2018-02'])
                p3 = Popen(
                    ['python', PATH_SCRIPT + 'makenewPTV.py', '2018-03'])
                p1.wait()
                p2.wait()
                p3.wait()
                """
                os.system('python '+PATH_SCRIPT+'makenewPTV.py 2017-12')
                os.system('python '+PATH_SCRIPT+'makenewPTV.py 2018-02')
                os.system('python '+PATH_SCRIPT+'makenewPTV.py 2018-03')
                """
                time.sleep(60)
                os.system('python ' + PATH_SCRIPT + 'MLforPTV.py')
                time.sleep(60)
                def_context.Report("fin du tour " + str(i))

    elif (len(argv) == 1 and argv[0] != 'start'):
        PATH_OUT = get_temp_path()
        import pandas as pd
        import random
        relecture = True
        EPSILON = 1e-15
        err = 0
        m = 0
        err_TF1 = 0
        m_TF1 = 0
        err_M6 = 0
        m_M6 = 0
        err_F2 = 0
        m_F2 = 0
        err_F3 = 0
        m_F3 = 0
        err_type_1 = 0
        err_type_2 = 0
        err_type_3 = 0
        nb_rel = 0

        files = os.listdir(PATH_IN + 'PTV/')
        nb_files = len(files)
        for file in files:
            def_context.Report('Il reste encore %s fichiers à traiter' %
                               (nb_files))
            nb_files -= 1
            try:
                f = ((file.split('.'))[0].split('_'))[2]
                c = ((file.split('.'))[0].split('_'))[-1]
                if (f == '2017-12-20'
                        or (f in ['2017-12-09', '2017-12-06', '2018-02-22']
                            and c == 'TF1')
                        or (f in ['2018-02-22'] and c == 'M6')
                        or (f.split('-')[0] != str(argv[0].split('-')[0]))
                        or f.split('-')[1] != argv[0].split('-')[1]):
                    #or (f in ['2018-02-22'] and c=='M6')
                    pass
                elif (c == ''):
                    pass
                else:
                    PTV, proba = def_context.load_file(str(f), str(c))
                    if (len(PTV) == 0):
                        continue
                    index_PTV = PTV.index[(PTV['debut'] <= 3 * 60) & (
                        PTV['debut'] + PTV['DUREE'] > 3 * 60 + 5)].tolist()[0]
                    def_context.Report('Starting with: %s' %
                                       (PTV['TITRE'].iloc[index_PTV]))
                    lastend = PTV['debut'].loc[index_PTV]
                    currentduree = PTV['DUREE'].loc[index_PTV]
                    newPTV = def_context.init_newPTV(PTV, str(c))
                    historyofpoints = def_context.init_history(
                        str(c), PTV, lastend, currentduree)
                    temp_context = historyofpoints.iloc[0]
                    importantpts = def_context.get_important_points(
                        c, PTV, index_PTV)
                    file_ = open(PATH_OUT + 'res.txt', 'a+')
                    file_.write(str(f + ' ' + c + ':').rstrip('\n'))
                    for i in range(3):
                        def_context.Report(
                            str(i) + ' ' + str(c) + ' ' + str(f))
                        from predictPTV import main as pred
                        l1, temp_newPTV1, temp_history1, index_PTV1, temp_context1 = pred(
                            [
                                str(c),
                                str(f), i, newPTV.iloc[newPTV.shape[0] - 1],
                                temp_context, index_PTV, importantpts, PATH_OUT
                            ])
                        if (l1 > 0 and relecture):
                            nb_rel += 1
                            def_context.Report("Utilisation de la relecture " +
                                               str(i) + ' ' + str(c) + ' ' +
                                               str(f))
                            from RLPTV import main as RL
                            l2, temp_newPTV2, temp_history2, index_PTV2, temp_context2 = RL(
                                [
                                    str(c),
                                    str(f), i,
                                    newPTV.iloc[newPTV.shape[0] - 1],
                                    temp_context, index_PTV, importantpts,
                                    PATH_OUT
                                ])
                            if (l2 > 5):
                                def_context.Report(
                                    "Utilisation de l'arbre de décision", f, c,
                                    i)
                                if (chaine == 'TF1'):
                                    from PTVTF1 import main as arbre
                                elif (chaine == 'M6'):
                                    from PTVM6 import main as arbre
                                else:
                                    l3 > 5
                                l3, temp_newPTV3, temp_history3, index_PTV3, temp_context3 = arbre(
                                    [
                                        str(c),
                                        str(f), i,
                                        newPTV.loc[newPTV.shape[0] - 1],
                                        temp_context, index_PTV, importantpts
                                    ])
                                if (l3 > 0):
                                    def_context.Report(
                                        "AUCUNE DÉCISION NE CONVIENT", f, c)
                                    l, temp_newPTV, temp_history, index_PTV, temp_context = l2, temp_newPTV2, temp_history2, index_PTV2, temp_context2
                                else:
                                    l, temp_newPTV, temp_history, index_PTV, temp_context = l3, temp_newPTV3, temp_history3, index_PTV3, temp_context3
                            else:
                                l, temp_newPTV, temp_history, index_PTV, temp_context = l2, temp_newPTV2, temp_history2, index_PTV2, temp_context2
                        else:
                            l, temp_newPTV, temp_history, index_PTV, temp_context = l1, temp_newPTV1, temp_history1, index_PTV1, temp_context1
                        if (l == 4):
                            pass
                        else:
                            newPTV = pd.concat([newPTV, temp_newPTV.iloc[1:]])
                            historyofpoints = pd.concat(
                                [historyofpoints, temp_history])
                            err += l
                            if (i == 0):
                                err_type_1 += l
                            if (i == 1):
                                err_type_2 += l
                            if (i == 2):
                                err_type_3 += l
                            m += 1
                            if (c == 'M6'):
                                err_M6 += l
                                m_M6 += 1
                            if (c == 'TF1'):
                                err_TF1 += l
                                m_TF1 += 1
                            if (c == 'France 2'):
                                err_F2 += l
                                m_F2 += 1
                            if (c == 'France 3'):
                                err_F3 += l
                                m_F3 += 1
                            file_.write(str(l).rstrip('\n'))
                            file_.write(" ".rstrip('\n'))

                    newPTV['Heure'] = newPTV['minute'].apply(
                        lambda x: str(int(x / 60)) + ':' + str(x % 60))
                    historyofpoints['Heure'] = historyofpoints['minute'].apply(
                        lambda x: str(int(x / 60)) + ':' + str(x % 60))
                    newPTV.to_html(PATH_IN + 'new_ptv/new_PTV_' + str(f) +
                                   '_' + str(c) + '.html')
                    newPTV.to_csv(PATH_IN + 'new_ptv/new_PTV_' + str(f) + '_' +
                                  str(c) + '.csv',
                                  index=False)
                    historyofpoints.to_html(PATH_IN + 'hop/historyofpoints_' +
                                            str(f) + '_' + str(c) + '.html')
                    historyofpoints.to_csv(PATH_IN + 'hop/historyofpoints_' +
                                           str(f) + '_' + str(c) + '.csv',
                                           index=False)
                    newPTV.to_html(PATH_OUT + 'new_ptv/new_PTV_' + str(f) +
                                   '_' + str(c) + '.html')
                    newPTV.to_csv(PATH_OUT + 'new_ptv/new_PTV_' + str(f) +
                                  '_' + str(c) + '.csv',
                                  index=False)
                    #newPTV.to_csv(PATH_OUT+'new_ptv/new_PTV_'+str(f)+'_'+str(c)+'.csv',index=False)
                    #historyofpoints.to_html(PATH_OUT+'hop/historyofpoints_'+str(f)+'_'+str(c)+'.html')
                    #historyofpoints.to_csv(PATH_OUT+'hop/historyofpoints_'+str(f)+'_'+str(c)+'.csv',index=False)
                    file_.write("\n")
                    file_.close()

                def_context.Report(err)
            except Exception as e:
                exc_type, exc_obj, exc_tb = sys.exc_info()
                fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
                def_context.Report(
                    "Failed to process {0} at line {2} in {3}: {1}".format(
                        str(file), str(e),
                        sys.exc_info()[-1].tb_lineno, fname))

        def_context.Report(m)
        def_context.Report("score Total:" + str(1 - (err / (m + EPSILON))))
        def_context.Report("score TF1:" + str(1 - (err_TF1 /
                                                   (m_TF1 + EPSILON))))
        def_context.Report("score M6:" + str(1 - (err_M6 / (m_M6 + EPSILON))))
        def_context.Report("score France 2:" + str(1 - (err_F2 /
                                                        (m_F2 + EPSILON))))
        def_context.Report("score France 3:" + str(1 - (err_F3 /
                                                        (m_F3 + EPSILON))))
        def_context.Report("score sur la matinée:" +
                           str(1 - ((err_type_1 * 3) / (m + EPSILON))))
        def_context.Report("score sur l'après midi:" +
                           str(1 - ((err_type_2 * 3) / (m + EPSILON))))
        def_context.Report("score sur la soirée:" + str(1 - ((err_type_3 * 3) /
                                                             (m + EPSILON))))
        def_context.Report("temps de calcul:" + str(time.time() - t))
        df = pd.read_csv('scores.csv')
        df.loc[df.shape[0]] = [
            1 - (err_TF1 / (m_TF1 + EPSILON)), 1 - (err_M6 / (m_M6 + EPSILON)),
            1 - (err_F2 / (m_F2 + EPSILON)), 1 - (err_F3 / (m_F3 + EPSILON)),
            1 - (err / (m + EPSILON)), 1 - ((err_type_1 * 3) / (m + EPSILON)),
            1 - ((err_type_2 * 3) / (m + EPSILON)),
            1 - ((err_type_3 * 3) / (m + EPSILON)), nb_rel / (m + EPSILON),
            (time.time() - t) * 3 / (m + EPSILON), argv[0]
        ]
        df.to_csv('scores.csv', index=False)

    elif (len(argv) == 2):
        PATH_OUT = get_temp_path()
        relecture = True
        import pandas as pd
        c = argv[0]
        f = argv[1]
        PTV, proba = def_context.load_file(str(f), str(c))
        index_PTV = PTV.index[(PTV['debut'] <= 3 * 60) & (
            PTV['debut'] + PTV['DUREE'] > 3 * 60 + 5)].tolist()[0]
        def_context.Report('Starting with: %s' %
                           (PTV['TITRE'].iloc[index_PTV]))
        lastend = PTV['debut'].loc[index_PTV]
        currentduree = PTV['DUREE'].loc[index_PTV]
        if (len(PTV) == 0):
            return ("Fichier manquant")
        newPTV = def_context.init_newPTV(PTV, str(c))
        historyofpoints = def_context.init_history(str(c), PTV, lastend,
                                                   currentduree)
        index_PTV = PTV.index[(PTV['debut'] <= 3 * 60) & (
            PTV['debut'] + PTV['DUREE'] > 3 * 60)].tolist()[0]
        temp_context = historyofpoints.iloc[0]
        importantpts = def_context.get_important_points(c, PTV, index_PTV)
        help = def_context.get_help(c, PTV)
        print(help)
        for i in range(3):
            def_context.Report(str(i) + ' ' + str(c) + ' ' + str(f))
            from predictPTV import main as pred
            l1, temp_newPTV1, temp_history1, index_PTV1, temp_context1 = pred([
                str(c),
                str(f), i, newPTV.iloc[newPTV.shape[0] - 1], temp_context,
                index_PTV, importantpts, PATH_OUT
            ])
            if (l1 > 0 and relecture):
                def_context.Report("Utilisation de la relecture " + str(i) +
                                   ' ' + str(c) + ' ' + str(f))
                from RLPTV import main as RL
                l2, temp_newPTV2, temp_history2, index_PTV2, temp_context2 = RL(
                    [
                        str(c),
                        str(f), i, newPTV.iloc[newPTV.shape[0] - 1],
                        temp_context, index_PTV, importantpts, PATH_OUT
                    ])
                if (l2 > 5):
                    def_context.Report("Utilisation de l'arbre de décision", f,
                                       c, i)
                    if (chaine == 'TF1'):
                        from PTVTF1 import main as arbre
                    elif (chaine == 'M6'):
                        from PTVM6 import main as arbre
                    else:
                        l3 > 5
                    l3, temp_newPTV3, temp_history3, index_PTV3, temp_context3 = arbre(
                        [
                            str(c),
                            str(f), i, newPTV.loc[newPTV.shape[0] - 1],
                            temp_context, index_PTV, importantpts
                        ])
                    if (l3 > 0):
                        def_context.Report("AUCUNE DÉCISION NE CONVIENT", f, c)
                        l, temp_newPTV, temp_history, index_PTV, temp_context = l2, temp_newPTV2, temp_history2, index_PTV2, temp_context2
                    else:
                        l, temp_newPTV, temp_history, index_PTV, temp_context = l3, temp_newPTV3, temp_history3, index_PTV3, temp_context3
                else:
                    l, temp_newPTV, temp_history, index_PTV, temp_context = l2, temp_newPTV2, temp_history2, index_PTV2, temp_context2
            else:
                l, temp_newPTV, temp_history, index_PTV, temp_context = l1, temp_newPTV1, temp_history1, index_PTV1, temp_context1
            if (l == 4):
                pass
            else:
                newPTV = pd.concat([newPTV, temp_newPTV.iloc[1:]])
                historyofpoints = pd.concat([historyofpoints, temp_history])
        newPTV['Heure'] = newPTV['minute'].apply(
            lambda x: str(int(x / 60)) + ':' + str(x % 60))
        historyofpoints['Heure'] = historyofpoints['minute'].apply(
            lambda x: str(int(x / 60)) + ':' + str(x % 60))
        newPTV.to_html(PATH_IN + 'new_ptv/new_PTV_' + str(f) + '_' + str(c) +
                       '.html')
        newPTV.to_csv(PATH_IN + 'new_ptv/new_PTV_' + str(f) + '_' + str(c) +
                      '.csv',
                      index=False)
        historyofpoints.to_html(PATH_IN + 'hop/historyofpoints_' + str(f) +
                                '_' + str(c) + '.html')
        historyofpoints.to_csv(PATH_IN + 'hop/historyofpoints_' + str(f) +
                               '_' + str(c) + '.csv',
                               index=False)
Beispiel #19
0
def make_newPTV(PTV, proba, chaine, index, lastPTV, lastcontext, index_PTV,
                importantpts, date, path):
    #Initialisation des Variables
    verbose = False
    index_PTV = index_PTV
    ##########################
    Predictiontimer = 200
    Pubinhour = lastcontext[12]
    lastCP = lastcontext[8]
    lastPub = lastcontext[9]
    lastend = lastcontext[10]
    currentduree = lastcontext[11]
    planifiedend = lastcontext[10] + lastcontext[11]
    begin = True
    nbpub = 0
    Recall = 1
    wait = 4
    error = 0
    per = 1
    index_ipts = index
    importantpts = importantpts
    help = def_context.get_help(chaine, PTV)
    newPTV = def_context.init_newPTV(PTV, chaine)
    historyofpoints = def_context.init_history(chaine, PTV, lastend,
                                               currentduree)
    ####################################
    historyofpoints.loc[0] = lastcontext
    labels = [0]
    start = lastcontext[0] + 1
    end = importantpts[index][0]
    #########init Classifier#############
    XGB, CatBoost, rf, dt, gb, logistic = def_context.load_models(path)
    ####################################
    for i in tqdm(range(start, min(end + 5, 1620))):
        if (i == end + 5 and index == 2):
            newPTV.loc[newPTV.shape[0]] = [(i + currentduree) % 1440,
                                           PTV['TITRE'].iloc[index_PTV], 'non',
                                           1, "fin d'un programme"]
        #Update time of commercials (Reset)
        if (i % 60 == 0):
            Pubinhour = 0
        #Update timmers
        lastPub += 1
        lastCP += 1
        if (index_ipts == len(importantpts)):
            index_ipts -= 1
        #let's get the context:
        context = def_context.get_context(i, PTV.iloc[index_PTV], lastCP,
                                          lastPub, lastend, currentduree,
                                          planifiedend, Pubinhour, proba,
                                          nbpub, chaine, per, PTV, index_PTV,
                                          date)
        #Sur M6 il y a 16 minutes de pub entre deux films!!!!!!!!!!!!.....!!!!!!!....!!.!.!.!.!....!.!...!..!.!.!.!
        if (PTV['GENRESIMPLE'].iloc[index_PTV].split(' ')[0]
                == PTV['GENRESIMPLE'].iloc[index_PTV - 1].split(' ')[0] and
                PTV['GENRESIMPLE'].iloc[index_PTV].split(' ')[0] == 'Téléfilm'
                and (i - lastend) < 2 and Recall > 0 and per < 0.97
                and chaine == 'M6'):

            lastend = i + 5
            lastPub = -25
            Recall -= 0.5
        elif ((i - lastend) < 2 and Recall > 0 and per < 0.97
              and chaine == 'M6' and 15 * 60 < i < 16 * 60):

            lastend = i + 5
            lastPub = -25
            Recall -= 0.5

        ###### Let's verify that the algo is not doing a crappy predicitions and if this the case, clean his historic #####
        elif (i == importantpts[index_ipts][0]):
            if (3 * 60 < i < 22 * 60):
                #### we are at an important point, let's now see what the algo has predict
                if (PTV['TITRE'].iloc[index_PTV] == importantpts[index_ipts][1]
                    ):
                    #Well he doesn't have the programme wrong, that's a good start
                    #let's now find out if we are at a logical point of the programme
                    if (i - lastend > 13):
                        #Wellllll, the programme began way too early...something went wrong before...Let's rest for now, we'll correct the algo later
                        Predictiontimer = 200
                        Pubinhour = 0
                        lastCP = 0
                        lastPub = 0
                        lastend = i
                        currentduree = PTV['DUREE'].iloc[index_PTV]
                        planifiedend = (lastend + currentduree)
                        nbpub = 0
                        if (index_ipts == 0):
                            def_context.Report("erreur sur la matinée")
                        elif (index_ipts == 1):
                            def_context.Report("erreur sur l'après midi")
                        else:
                            def_context.Report("erreur sur la soirée")
                        error += 1
                        #we can now keep going throw the process like before
                        #we just add a line to the history to say that a reset occured
                        newPTV.loc[newPTV.shape[0]] = [
                            i % 1440, PTV['TITRE'].iloc[index_PTV], 'non',
                            context[3],
                            "--HARD RESET OF ALGORITHM--(in programme)"
                        ]

                        index_ipts += 1

                    else:
                        # OMG the ALGO IS RIGHT...here is a candy, let's rest a litle just in case...we never know....
                        Predictiontimer = 200
                        Pubinhour = 0
                        lastCP = 0
                        lastPub = 0
                        lastend = i
                        currentduree = PTV['DUREE'].iloc[index_PTV]
                        planifiedend = (lastend + currentduree)
                        nbpub = 0
                        #we can now keep going throw the process like before
                        #we just add a line to the history to say that a reset occured
                        newPTV.loc[newPTV.shape[0]] = [
                            i % 1440, PTV['TITRE'].iloc[index_PTV], 'non',
                            context[3], "--soft reset to avoid any error--"
                        ]
                        index_ipts += 1

                else:
                    #maybe it's the next programme so calme the f**k down!
                    if (PTV['TITRE'].iloc[(index_PTV + 1) % PTV.shape[0]] ==
                            importantpts[index_ipts][1]):
                        if (planifiedend - i < 10):
                            #here you go, it's the next one...just terminate this one and we're good to go
                            newPTV.loc[newPTV.shape[0]] = [
                                i % 1440, PTV['TITRE'].iloc[index_PTV], 'oui',
                                context[3], "fin d'un programme"
                            ]
                            lastend = i
                            lastCP = 0
                            index_PTV += 1
                            index_PTV = index_PTV % (PTV.shape[0])
                            currentduree = PTV['DUREE'].iloc[index_PTV]
                            planifiedend = (lastend + currentduree)
                            Predictiontimer = 200
                            nbpub = 0
                            index_ipts += 1
                        else:
                            #here you go, it's the next one...But it's far far away
                            newPTV.loc[newPTV.shape[0]] = [
                                i % 1440, PTV['TITRE'].iloc[index_PTV], 'oui',
                                context[3], "--HARD RESET OF ALGORITHM--(Oups)"
                            ]
                            lastend = i
                            lastCP = 0
                            index_PTV += 1
                            index_PTV = index_PTV % (PTV.shape[0])
                            currentduree = PTV['DUREE'].iloc[index_PTV]
                            planifiedend = (lastend + currentduree)
                            Predictiontimer = 200
                            nbpub = 0
                            if (index_ipts == 0):
                                def_context.Report("erreur sur la matinée")
                            elif (index_ipts == 1):
                                def_context.Report("erreur sur l'après midi")
                            else:
                                def_context.Report("erreur sur la soirée")
                            error += 1
                            index_ipts += 1

                    else:
                        #well the programme is wrong, and we are not even close to it, let's terminate this thing before it goes completly south. REBOOT The algo, erase the memory, just like in Westworld.
                        #BUT FIRST LET'S VERIFY THAT THERE IS INDEED AN IMPORTANT PROGRAMME THAT DAY...Don't go f**k everything up for no reason
                        l = PTV.index[
                            (PTV['TITRE'] == importantpts[index_ipts][1])
                            & (PTV['debut'] == i)].tolist()
                        if (len(l) > 0):
                            index_PTV = l[0]
                            ##########################
                            Predictiontimer = 200
                            Pubinhour = 0
                            lastCP = 0
                            lastPub = 0
                            lastend = i
                            currentduree = PTV['DUREE'].iloc[index_PTV]
                            planifiedend = (lastend + currentduree)
                            nbpub = 0
                            #we can now keep going throw the process like before
                            #we just add a line to the history to say that a reset occured
                            if (index_ipts == 0):
                                def_context.Report("erreur sur la matinée")
                            elif (index_ipts == 1):
                                def_context.Report("erreur sur l'après midi")
                            else:
                                def_context.Report("erreur sur la soirée")
                            error += 1
                            newPTV.loc[newPTV.shape[0]] = [
                                i % 1440, PTV['TITRE'].iloc[index_PTV], 'non',
                                context[3],
                                "--HARD RESET OF ALGORITHM--(out of programme)"
                            ]
                            index_ipts += 1
                        else:
                            index_ipts += 1
            else:
                #### we are at an important point, let's now see what the algo has predict
                if (PTV['TITRE'].iloc[index_PTV] == importantpts[index_ipts][1]
                    ):
                    #Well he doesn't have the programme wrong, that's a good start
                    #let's now find out if we are at a logical point of the programme
                    if (i - lastend > 20):
                        #Wellllll, the programme began way too early...something went wrong before...Let's rest for now, we'll correct the algo later
                        Predictiontimer = 200
                        Pubinhour = 0
                        lastCP = 0
                        lastPub = 0
                        lastend = i
                        currentduree = PTV['DUREE'].iloc[index_PTV]
                        planifiedend = (lastend + currentduree)
                        nbpub = 0
                        if (index_ipts == 0):
                            def_context.Report("erreur sur la matinée")
                        elif (index_ipts == 1):
                            def_context.Report("erreur sur l'après midi")
                        else:
                            def_context.Report("erreur sur la soirée")
                        error += 1
                        #we can now keep going throw the process like before
                        #we just add a line to the history to say that a reset occured
                        newPTV.loc[newPTV.shape[0]] = [
                            i % 1440, PTV['TITRE'].iloc[index_PTV], 'non',
                            context[3],
                            "--HARD RESET OF ALGORITHM--(in programme)"
                        ]

                        index_ipts += 1

                    else:
                        # OMG the ALGO IS RIGHT...here is a candy, let's rest a litle just in case...we never know....
                        Predictiontimer = 200
                        Pubinhour = 0
                        lastCP = 0
                        lastPub = 0
                        lastend = i
                        currentduree = PTV['DUREE'].iloc[index_PTV]
                        planifiedend = (lastend + currentduree)
                        nbpub = 0
                        #we can now keep going throw the process like before
                        #we just add a line to the history to say that a reset occured
                        newPTV.loc[newPTV.shape[0]] = [
                            i % 1440, PTV['TITRE'].iloc[index_PTV], 'non',
                            context[3], "--soft reset to avoid any error--"
                        ]
                        index_ipts += 1

                else:
                    #maybe it's the next programme so calme the f**k down!
                    if (PTV['TITRE'].iloc[(index_PTV + 1) % PTV.shape[0]] ==
                            importantpts[index_ipts][1]):
                        if (planifiedend - i < 20):
                            #here you go, it's the next one...just terminate this one and we're good to go
                            newPTV.loc[newPTV.shape[0]] = [
                                i % 1440, PTV['TITRE'].iloc[index_PTV], 'oui',
                                context[3], "fin d'un programme"
                            ]
                            lastend = i
                            lastCP = 0
                            index_PTV += 1
                            index_PTV = index_PTV % (PTV.shape[0])
                            currentduree = PTV['DUREE'].iloc[index_PTV]
                            planifiedend = (lastend + currentduree)
                            Predictiontimer = 200
                            nbpub = 0
                            index_ipts += 1
                        else:
                            #here you go, it's the next one...But it's far far away
                            newPTV.loc[newPTV.shape[0]] = [
                                i % 1440, PTV['TITRE'].iloc[index_PTV], 'oui',
                                context[3], "--HARD RESET OF ALGORITHM--(Oups)"
                            ]
                            lastend = i
                            lastCP = 0
                            index_PTV += 1
                            index_PTV = index_PTV % (PTV.shape[0])
                            currentduree = PTV['DUREE'].iloc[index_PTV]
                            planifiedend = (lastend + currentduree)
                            Predictiontimer = 200
                            nbpub = 0
                            if (index_ipts == 0):
                                def_context.Report("erreur sur la matinée")
                            elif (index_ipts == 1):
                                def_context.Report("erreur sur l'après midi")
                            else:
                                def_context.Report("erreur sur la soirée")
                            error += 1
                            index_ipts += 1

                    else:
                        #well the programme is wrong, and we are not even close to it, let's terminate this thing before it goes completly south. REBOOT The algo, erase the memory, just like in Westworld.
                        #BUT FIRST LET'S VERIFY THAT THERE IS INDEED AN IMPORTANT PROGRAMME THAT DAY...Don't go f**k everything up for no reason
                        l = PTV.index[
                            (PTV['TITRE'] == importantpts[index_ipts][1])
                            & (PTV['debut'] == i)].tolist()
                        if (len(l) > 0):
                            index_PTV = l[0]
                            ##########################
                            Predictiontimer = 200
                            Pubinhour = 0
                            lastCP = 0
                            lastPub = 0
                            lastend = i
                            currentduree = PTV['DUREE'].iloc[index_PTV]
                            planifiedend = (lastend + currentduree)
                            nbpub = 0
                            #we can now keep going throw the process like before
                            #we just add a line to the history to say that a reset occured
                            if (index_ipts == 0):
                                def_context.Report("erreur sur la matinée")
                            elif (index_ipts == 1):
                                def_context.Report("erreur sur l'après midi")
                            else:
                                def_context.Report("erreur sur la soirée")
                            error += 1
                            newPTV.loc[newPTV.shape[0]] = [
                                i % 1440, PTV['TITRE'].iloc[index_PTV], 'non',
                                context[3],
                                "--HARD RESET OF ALGORITHM--(out of programme)"
                            ]
                            index_ipts += 1
                        else:
                            index_ipts += 1
        if (context[2]):
            historyofpoints.loc[historyofpoints.shape[0]] = context
            if (lastCP < min(4, currentduree)):
                labels.append(0)
                continue
            else:
                X = def_context.process(
                    pd.DataFrame(
                        [context],
                        index=[0],
                        columns=[
                            'minute', 'partie de la journée', 'Change Point',
                            'pourcentage', 'partie du programme', 'programme',
                            'duree', 'nombre de pub potentiel', 'lastCP',
                            'lastPub', 'lastend', 'currentduree', 'Pubinhour',
                            'probability of CP', 'nb de pubs encore possible',
                            'chaine', 'CLE-FORMAT', 'CLE-GENRE', 'day', 'part'
                        ])).values  #,'per'
                res1 = CatBoost[0].predict_proba(X)
                res2 = CatBoost[1].predict_proba(X)
                res3 = XGB[0].predict(xgb.DMatrix(X),
                                      ntree_limit=XGB[0].best_ntree_limit)
                res4 = XGB[1].predict(xgb.DMatrix(X),
                                      ntree_limit=XGB[1].best_ntree_limit)
                res5 = rf.predict_proba(X)
                res6 = gb.predict_proba(X)
                res7 = dt.predict_proba(X)
                res = [(res1[0][0] + res2[0][0] + res3[0][0] + res4[0][0] +
                        res5[0][0] + res6[0][0]) / 6,
                       (res1[0][1] + res2[0][1] + res3[0][1] + res4[0][1] +
                        res5[0][1] + res6[0][1]) / 6,
                       (res1[0][2] + res2[0][2] + res3[0][2] + res4[0][2] +
                        res5[0][2] + res6[0][2]) / 6]
                y_pred = [(res1[0][0] + res2[0][0]) * 0.5,
                          (res1[0][1] + res2[0][1]) * 0.5,
                          (res1[0][2] + res2[0][2]) * 0.5]
                y_pred2 = [(res3[0][0] + res4[0][0]) * 0.5,
                           (res3[0][1] + res4[0][1]) * 0.5,
                           (res3[0][2] + res4[0][2]) * 0.5]
                X = pd.concat([
                    pd.DataFrame(y_pred).T,
                    pd.DataFrame(y_pred2).T,
                    pd.DataFrame(res7),
                    pd.DataFrame(res6),
                    pd.DataFrame(res5)
                ],
                              axis=1)
                X = X.replace([np.inf, -np.inf], np.nan)
                X = X.fillna(1)
                X = X.values
                res = logistic.predict_proba(X)
                cla = np.argmax(res)

                if (cla == 1 and context[14] == 0):
                    cla = 0
                if (cla == 2 and context[3] < 0.5 and context[11] > 30):
                    cla = 0
                if (cla == 2 and context[3] < 0.9 and context[11] >= 180):
                    cla = 0
                if (cla == 2 and PTV['TITRE'].loc[index_PTV]
                        == 'Programmes de la nuit' and context[3] < 1):
                    cla = 0

                if (cla == 1):
                    newPTV.loc[newPTV.shape[0]] = [
                        i % 1440, "publicité", 'oui', context[3],
                        "publicité dans un programme"
                    ]
                    lastCP = 0
                    lastPub = 0
                    Pubinhour += 4
                    nbpub += 1
                    wait = 4
                    labels.append(1)
                elif (cla == 2):
                    newPTV.loc[newPTV.shape[0]] = [
                        i % 1440, PTV['TITRE'].iloc[index_PTV], 'oui',
                        context[3], "fin d'un programme"
                    ]
                    lastend = i
                    lastCP = 0
                    index_PTV += 1
                    index_PTV = index_PTV % (PTV.shape[0])
                    currentduree = PTV['DUREE'].iloc[index_PTV]
                    planifiedend = (lastend + currentduree)
                    Predictiontimer = 200
                    nbpub = 0
                    wait = 5
                    per = context[3]
                    labels.append(2)
                else:
                    labels.append(0)

        elif (i in help):
            historyofpoints.loc[historyofpoints.shape[0]] = context
            if (lastCP < min(4, currentduree)):
                labels.append(0)
                continue
            else:
                X = def_context.process(
                    pd.DataFrame(
                        [context],
                        index=[0],
                        columns=[
                            'minute', 'partie de la journée', 'Change Point',
                            'pourcentage', 'partie du programme', 'programme',
                            'duree', 'nombre de pub potentiel', 'lastCP',
                            'lastPub', 'lastend', 'currentduree', 'Pubinhour',
                            'probability of CP', 'nb de pubs encore possible',
                            'chaine', 'CLE-FORMAT', 'CLE-GENRE', 'day', 'part'
                        ])).values  #,'per'
                res1 = CatBoost[0].predict_proba(X)
                res2 = CatBoost[1].predict_proba(X)
                res3 = XGB[0].predict(xgb.DMatrix(X),
                                      ntree_limit=XGB[0].best_ntree_limit)
                res4 = XGB[1].predict(xgb.DMatrix(X),
                                      ntree_limit=XGB[1].best_ntree_limit)
                res5 = rf.predict_proba(X)
                res6 = gb.predict_proba(X)
                res7 = dt.predict_proba(X)
                res = [(res1[0][0] + res2[0][0] + res3[0][0] + res4[0][0] +
                        res5[0][0] + res6[0][0]) / 6,
                       (res1[0][1] + res2[0][1] + res3[0][1] + res4[0][1] +
                        res5[0][1] + res6[0][1]) / 6,
                       (res1[0][2] + res2[0][2] + res3[0][2] + res4[0][2] +
                        res5[0][2] + res6[0][2]) / 6]
                y_pred = [(res1[0][0] + res2[0][0]) * 0.5,
                          (res1[0][1] + res2[0][1]) * 0.5,
                          (res1[0][2] + res2[0][2]) * 0.5]
                y_pred2 = [(res3[0][0] + res4[0][0]) * 0.5,
                           (res3[0][1] + res4[0][1]) * 0.5,
                           (res3[0][2] + res4[0][2]) * 0.5]
                X = pd.concat([
                    pd.DataFrame(y_pred).T,
                    pd.DataFrame(y_pred2).T,
                    pd.DataFrame(res7),
                    pd.DataFrame(res6),
                    pd.DataFrame(res5)
                ],
                              axis=1)
                X = X.replace([np.inf, -np.inf], np.nan)
                X = X.fillna(1)
                X = X.values
                res = logistic.predict_proba(X)
                cla = np.argmax(res)

                if (cla == 1 and context[14] == 0):
                    cla = 0
                if (cla == 2 and context[3] < 0.5 and context[11] > 30):
                    cla = 0
                if (cla == 2 and context[3] < 0.9 and context[11] >= 180):
                    cla = 0
                if (cla == 2 and PTV['TITRE'].loc[index_PTV]
                        == 'Programmes de la nuit' and context[3] < 1):
                    cla = 0

                if (cla == 1):
                    newPTV.loc[newPTV.shape[0]] = [
                        i % 1440, "publicité", 'oui', context[3],
                        "publicité dans un programme"
                    ]
                    lastCP = 0
                    lastPub = 0
                    Pubinhour += 4
                    nbpub += 1
                    wait = 4
                    labels.append(1)
                elif (cla == 2):
                    newPTV.loc[newPTV.shape[0]] = [
                        i % 1440, PTV['TITRE'].iloc[index_PTV], 'oui',
                        context[3], "fin d'un programme"
                    ]
                    lastend = i
                    lastCP = 0
                    index_PTV += 1
                    index_PTV = index_PTV % (PTV.shape[0])
                    currentduree = PTV['DUREE'].iloc[index_PTV]
                    planifiedend = (lastend + currentduree)
                    Predictiontimer = 200
                    nbpub = 0
                    wait = 5
                    per = context[3]
                    labels.append(2)
                else:
                    labels.append(0)

        else:
            #labels.append(0)
            #Not a Change Point, we'll just check that nothing is wrong in the PTV at this time
            if (Predictiontimer <= 0):
                historyofpoints.loc[historyofpoints.shape[0]] = context
                labels.append(2)
                newPTV.loc[newPTV.shape[0]] = [
                    i % 1440, PTV['TITRE'].iloc[index_PTV], 'non', context[3],
                    "fin non détectée d'un programme"
                ]
                lastend = i
                lastCP = 0
                index_PTV += 1
                index_PTV = index_PTV % (PTV.shape[0])
                currentduree = PTV['DUREE'].iloc[index_PTV]
                planifiedend = (lastend + currentduree)
                Predictiontimer = 200
                nbpub = 0
                per = context[3]
            elif (context[3] == 1):
                #Dépassement autorisé: Modulable en fonction de la position dans la journée si besoin
                # C'est sur ces valeurs que l'on va jouer pour avoir le meilleur PTV possible
                # Plus les valeurs sont grandes, plus on fait confiance a l'algo
                # Il est important de bien découper la journée celon les périodes horaires que l'on qualifie
                # de "sous tension" si plusieurs programmes courts se succédent. Bien évidement une telle analyse sera
                #plus tard fait automatiquement.
                if (i < 20 * 60 + 30):
                    if (chaine == 'TF1'):
                        if (11.5 * 60 <= i <= 14 * 60
                                or 19.5 * 60 < i < 21 * 60):
                            Predictiontimer = 1
                        elif (context[6] == "très court"):
                            Predictiontimer = 0
                        elif (PTV['TITRE'].iloc[index_PTV] == 'Téléshopping'):
                            Predictiontimer = 5
                        elif (context[6] == "court"):
                            Predictiontimer = 5
                        elif (context[6] == "moyen"):
                            Predictiontimer = 5
                        elif (context[6] == "très long"
                              or context[6] == "long"):
                            Predictiontimer = 15
                        else:
                            Predictiontimer = 5
                    elif (chaine == 'M6'):
                        #Dépassement autorisé: Modulable en fonction de la position dans la journée si besoin
                        #Dépassement autorisé: Modulable en fonction de la position dans la journée si besoin
                        if (i < 8 * 60 + 56):
                            Predictiontimer = 0
                        elif (13 * 60 < i < 14 * 60):
                            Predictiontimer = 5
                        elif (PTV['TITRE'].iloc[index_PTV] in ['M6 boutique']):
                            Predictiontimer = 0
                        elif (context[6] == "très court"):
                            Predictiontimer = 0
                        elif (context[6] == "court"):
                            Predictiontimer = 2
                        elif (context[6] == "moyen"):
                            Predictiontimer = 5
                        elif (context[6] == "très long"):
                            Predictiontimer = 5
                        elif (context[6] == 'long'):
                            Predictiontimer = 15
                        else:
                            Predictiontimer = 5
                    else:
                        if (11.5 * 60 <= i <= 14 * 60
                                or 19.5 * 60 < i < 21 * 60):
                            Predictiontimer = 1
                        elif (context[6] == "très court"):
                            Predictiontimer = 0
                        elif (PTV['TITRE'].iloc[index_PTV] == 'Téléshopping'):
                            Predictiontimer = 5
                        elif (context[6] == "court"):
                            Predictiontimer = 5
                        elif (context[6] == "moyen"):
                            Predictiontimer = 5
                        elif (context[6] == "très long"
                              or context[6] == "long"):
                            Predictiontimer = 15
                        else:
                            Predictiontimer = 5
                else:
                    if (chaine == 'TF1'):
                        if (context[5] == 'Journal'):
                            if (i < 20 * 60):
                                Predictiontimer = 10
                            else:
                                Predictiontimer = 0
                        elif (context[6] == "très court"):
                            Predictiontimer = 4
                        elif (context[6] == "court"):
                            Predictiontimer = 5
                        elif (context[6] == "moyen"):
                            Predictiontimer = 5
                        elif (context[6] == "très long"):
                            Predictiontimer = 5
                        elif (context[6] == 'long'):
                            Predictiontimer = 15
                        else:
                            Predictiontimer = 5
                    elif (chaine == 'M6'):
                        #Dépassement autorisé: Modulable en fonction de la position dans la journée si besoin
                        #Dépassement autorisé: Modulable en fonction de la position dans la journée si besoin
                        if (context[6] == "très court"):
                            Predictiontimer = 15
                        elif (context[6] == "court"):
                            Predictiontimer = 15
                        elif (context[6] == "moyen"):
                            Predictiontimer = 15
                        elif (context[6] == "très long"):
                            Predictiontimer = 15
                        elif (context[6] == 'long'):
                            Predictiontimer = 15
                        else:
                            Predictiontimer = 5
                    else:
                        if (context[5] == 'Journal'):
                            if (i < 20 * 60):
                                Predictiontimer = 10
                            else:
                                Predictiontimer = 0
                        elif (context[6] == "très court"):
                            Predictiontimer = 4
                        elif (context[6] == "court"):
                            Predictiontimer = 5
                        elif (context[6] == "moyen"):
                            Predictiontimer = 5
                        elif (context[6] == "très long"):
                            Predictiontimer = 5
                        elif (context[6] == 'long'):
                            Predictiontimer = 15
                        else:
                            Predictiontimer = 5

            elif (context[3] > 1):
                Predictiontimer -= 1
            else:
                pass
    return newPTV, historyofpoints, labels, error, index_PTV, context
Beispiel #20
0
def main(argv):
    global PATH_IN, PATH_SCRIPT, PATH_OUT
    PATH_IN, PATH_SCRIPT, PATH_OUT = def_context.get_path()
    if (len(argv) == 2):
        chaine = argv[0]
        date = argv[1]
        numero, nom_chaine = def_context.get_tuple(chaine)
        res = []
        files_ = os.listdir(PATH_OUT)
        for i in range(len(files_)):
            res.append(find_cost(date, numero, nom_chaine, i))
        def_context.Report(res)
        LOCK.acquire()
        try:

            couts = pd.read_csv('cout.csv')
        except Exception as e:
            couts = pd.DataFrame()
        try:
            couts[date + '_' + nom_chaine + '_tout'] = [i[0] for i in res]
            couts[date + '_' + nom_chaine + '_matinee'] = [i[1] for i in res]
            couts[date + '_' + nom_chaine + '_apresmidi'] = [i[2] for i in res]
            couts[date + '_' + nom_chaine + '_soiree'] = [i[3] for i in res]
        except Exception as e:
            def_context.Report('humm: ' + str(e))
        couts.to_csv('cout.csv', index=False)
        LOCK.release()
    elif (len(argv) == 1):
        Processes = []
        files = os.listdir(PATH_IN + 'PTV/')
        for file in files:
            date = file.split('_')[2]
            chaine = file.split('_')[-1].split('.')[0]
            print(type(date), type(chaine), date, chaine)
            if (str(argv[0]) in [str(date), str(chaine)]):
                print(argv[0])
                while (len(Processes) >= MAX_PROCESSES):
                    lenp = len(Processes)
                    for p in range(
                            lenp):  # Check the processes in reverse order
                        if Processes[enp - 1 - p].poll(
                        ) is not None:  # If the process hasn't finished will return None
                            del Processes[
                                lenp - 1 -
                                p]  # Remove from list - this is why we needed reverse order
                    time.sleep(5)

                Processes.append(
                    Popen(['python', 'cost.py',
                           str(chaine),
                           str(date)]))
                def_context.Report(
                    'calcul des coûts pour la journée du %s sur la chaîne %s' %
                    (date, chaine))
            else:
                continue
    else:
        t = time.time()
        Processes = []
        files = os.listdir(PATH_IN + 'PTV/')
        for file in files:

            print(len(Processes))
            date = file.split('_')[2]
            chaine = file.split('_')[-1].split('.')[0]
            while (len(Processes) >= 5):
                time.sleep(5)
                for p in range(len(
                        Processes)):  # Check the processes in reverse order
                    lenp = len(Processes)
                    for p in range(
                            lenp):  # Check the processes in reverse order
                        if Processes[enp - 1 - p].poll(
                        ) is not None:  # If the process hasn't finished will return None
                            del Processes[
                                lenp - 1 -
                                p]  # Remove from list - this is why we needed reverse order
                    time.sleep(5)
            Processes.append(
                Popen(['python', 'cost.py',
                       str(chaine),
                       str(date)]))
            def_context.Report(
                'calcul des coûts pour la journée du %s sur la chaîne %s' %
                (date, chaine))
            time.sleep(2)
        while (len(Processes)):
            lenp = len(Processes)
            for p in range(lenp):  # Check the processes in reverse order
                if Processes[enp - 1 - p].poll(
                ) is not None:  # If the process hasn't finished will return None
                    del Processes[
                        lenp - 1 -
                        p]  # Remove from list - this is why we needed reverse order
            time.sleep(5)
        def_context.Report(len(files))
        def_context.Report(time.time() - t)