Example #1
0
def create_report_prediction(report_dict, event_target, ruta_relativa_datos_auxiliares,
                             ruta_directorio_informes, enco):
    '''This funcion allows to get the pdf for the current model with the information
    obtained fate rthe prediction phase'''

    env = Environment(loader=FileSystemLoader('.'))
    ruta_plantilla_temporal = os.path.join(ruta_relativa_datos_auxiliares, 'temp_html.html')

    template = env.get_template(ruta_relativa_datos_auxiliares + '/' + glod.get_prediction_template_name())

    event = event_target[0]
    target_to_predict = event_target[1]

    dic_info_event = report_dict[event]
    summary_target_to_predict = glod.get_empty_string()

    template_vars = {glod.get_title_key(): "Prediction report for " + event,
                     glod.get_logo_key():\
                     encode_image(report_dict[glod.get_logo_key()].replace('\'', glod.get_empty_string())),
                    }

    if target_to_predict in dic_info_event:
        model = str(dic_info_event[target_to_predict][glod.get_best_model_key()])
        model = model.split("(")
        model = model[0]
        summary_target_to_predict = "<p><strong>Target: <strong>" + '&nbsp' +\
        target_to_predict + "</br></br>"
        summary_target_to_predict += "<p><strong>Model: <strong>" + '&nbsp' + model + "</br>"
        summary_target_to_predict += "<p><strong>Accuracy: <strong>" + '&nbsp' +\
        str(dic_info_event[target_to_predict][glod.get_accuracy_parameter_name()]) + "</br>"
        summary_target_to_predict += "<strong>Correct classifications: <strong>" + '&nbsp' +\
        str(dic_info_event[target_to_predict]['Correct']) + "</br>"
        summary_target_to_predict += "<strong>Total number of observations: <strong>" + '&nbsp' +\
        str(dic_info_event[target_to_predict]['Total']) + "</br>"
        summary_target_to_predict += "<strong>Total number of unknown observations classified: <strong>" + '&nbsp' + str(dic_info_event[target_to_predict]['Predicted']) + "</br>"
        cm_target = encode_image(dic_info_event[target_to_predict]\
                                 ['target_to_predict_cm'].replace('\'', glod.get_empty_string()))
        template_vars['target_to_predict_cm'] = cm_target

    template_vars['target'] = summary_target_to_predict


    with codecs.open(ruta_plantilla_temporal, glod.get_write_mode(), encoding=enco) as output_file:
        output_file.write(template.render(template_vars))

    with codecs.open(ruta_plantilla_temporal, mode=glod.get_read_mode(), encoding=enco) as read_html:
        pdf_resultante = os.path.join(ruta_directorio_informes,\
                                    "Prediction_report_for_"+ event +".pdf")
        with open(pdf_resultante, mode=glod.get_writebyte_mode()) as pdf_gen:
            pisa.CreatePDF(read_html.read(), pdf_gen)
            logging.getLogger("xhtml2pdf").addHandler(PisaNullHandler())

    if os.path.exists(ruta_plantilla_temporal):
        os.remove(ruta_plantilla_temporal)
Example #2
0
def create_report_current_dictionary_models(dictionary_of_models,ruta_relativa_datos_auxiliares,ruta_directorio_resultados,list_of_parameters_models_events_dict,logo_path,enco):
    env = Environment(loader=FileSystemLoader('.'))
    ruta_plantilla_temporal = os.path.join(ruta_relativa_datos_auxiliares,'temp_html.html')
    template = env.get_template(ruta_relativa_datos_auxiliares + '/' + 'dictionary_models_template.html')
    
    tabulacion = "&nbsp;&nbsp;&nbsp;&nbsp;"
    
    template_vars = {glod.get_title_key(): "Report of the information of the Dictionary of models",
                     glod.get_logo_key(): encode_image(logo_path.replace('\'',''))
                     }
                     
    #['learning', 'features', 'original_features', 'model_path'
    list_of_parameters_models_events_dict
    list_elements = [list_of_parameters_models_events_dict[0],list_of_parameters_models_events_dict[3],list_of_parameters_models_events_dict[1]] #list_of_parameters_models_events_dict[4]    
    informacion= ""
    for event in dictionary_of_models:
        informacion+= "<strong><u>"+ event +"</u></strong></br></br>"
        for target in dictionary_of_models[event]:
            informacion+= tabulacion + tabulacion + "<strong><i>Target:</i></strong>" + "&nbsp;&nbsp;" + target + "</br>"
            for key in list_elements:
                informacion+=tabulacion + tabulacion + "<strong><i>" + key + ": </i></strong>"                
                if(type(list()) == type(dictionary_of_models[event][target][key])):
                    informacion+="<br>"
                    contador = 0
                    ordered_list_features = sorted(dictionary_of_models[event][target][key])
                    while(contador < len(ordered_list_features)):
                        element = ordered_list_features[contador]
                        informacion+=tabulacion + tabulacion + tabulacion +tabulacion + element + "</br>"
                        contador+=1                    
                else:                                            
                    informacion+= dictionary_of_models[event][target][key] + "</br>"
                    if(key == list_of_parameters_models_events_dict[0]):
                        informacion+= tabulacion + tabulacion + "<strong><i>best model: </i></strong>&nbsp;&nbsp;" + dictionary_of_models[event][target][list_of_parameters_models_events_dict[1]].split('_')[-1].split('.')[0] + "</br>" #get model name
                        if(dictionary_of_models[event][target][key] == glod.get_unsupervised_name()):
                            informacion+= tabulacion + tabulacion + "<strong><i>dic_reassingment: </i></strong>&nbsp;&nbsp;" + str(dictionary_of_models[event][target][list_of_parameters_models_events_dict[2]]) + "</br>"
            informacion+="</br>"
        
    
    if(informacion == ""):
        informacion = "No models were created yet"
    template_vars[glod.get_info_key()] = informacion
    
    #html
    with codecs.open(ruta_plantilla_temporal,'w',encoding='utf-8') as output_file:
        renderizado = template.render(template_vars)                                
        output_file.write(renderizado)
                    
    #pdf
    with codecs.open(ruta_plantilla_temporal, mode='r',encoding=enco) as read_html:
        pdf_resultante=os.path.join(ruta_directorio_resultados,"Current_status_dictionary_events_and_models.pdf")
        with open(pdf_resultante, mode='wb') as pdf_gen:                                             
            pisa.CreatePDF(read_html.read().encode(enco, 'ignore').decode(enco),pdf_gen)
    
    if(os.path.exists(ruta_plantilla_temporal)):
        os.remove(ruta_plantilla_temporal)        
Example #3
0
def create_basic_report_data_dict(umbral,target,main_metric,feature_selection_method,penalize_falses,lista_variables_descartadas,ruta_logo):
    report_data = {glod.get_title_key(): "Overview With Execution Information",
                   glod.get_logo_key():ruta_logo,                   
                   glod.get_umbral_key(): str(umbral),
                   glod.get_main_metric_key(): str(main_metric),
                   glod.get_feature_selection_key(): str(feature_selection_method),
                   glod.get_penalization_name(): str(penalize_falses),                   
                   glod.get_objective_target_key(): target,
                   glod.get_variables_key():{glod.get_deleted_by_user_key():lista_variables_descartadas},
                   glod.get_general_info_execution_key():''
                    }
    return report_data
Example #4
0
def create_report_data_dict(evento,umbral,target,lista_variables_descartadas,ruta_logo):
    report_data = {glod.get_objective_target_key(): target,
                   glod.get_event_key():evento,
                   glod.get_logo_key():ruta_logo,
                   glod.get_report_general_info_key():{glod.get_report_generic_target_key():{},
                                  glod.get_variables_key():{glod.get_deleted_by_user_key():lista_variables_descartadas,glod.get_empty_or_constant_key():[],glod.get_score_relevant_key():[]},
                                  glod.get_training_division_key():{},
                                  glod.get_test_division_key():{},                                  
                                  },
                    glod.get_umbral_key(): str(umbral),
                    glod.get_warning_key(): ''
                        }
    return report_data
Example #5
0
def create_report_data_dict(evento, umbral, target, lista_variables_descartadas, ruta_logo):
    '''This funcion allows to create the structure for the report data dictionary
    for the current event'''

    report_data = {glod.get_objective_target_key(): target,
                   glod.get_event_key():evento,
                   glod.get_logo_key():ruta_logo,
                   glod.get_report_general_info_key():{glod.get_report_generic_target_key():{},
                                                       glod.get_variables_key():{glod.get_deleted_by_user_key():lista_variables_descartadas, glod.get_empty_or_constant_key():[], glod.get_score_relevant_key():[]},
                                                       glod.get_training_division_key():{},
                                                       glod.get_test_division_key():{},
                                                      },
                   glod.get_umbral_key(): str(umbral),
                   glod.get_warning_key(): glod.get_empty_string()
                  }
    return report_data
Example #6
0
def create_basic_report_data_dict(basic_parameters, lista_variables_descartadas, ruta_logo):
    '''This funcion allows to create the structure for the report data dictionary'''

    umbral = basic_parameters[0]
    target = basic_parameters[1]
    main_metric = basic_parameters[2]
    feature_selection_method = basic_parameters[3]
    penalize_falses = basic_parameters[4]

    report_data = {glod.get_title_key(): "Overview With Execution Information",
                   glod.get_logo_key():ruta_logo,
                   glod.get_umbral_key(): str(umbral),
                   glod.get_main_metric_key(): str(main_metric),
                   glod.get_feature_selection_key(): str(feature_selection_method),
                   glod.get_penalization_name(): str(penalize_falses),
                   glod.get_objective_target_key(): target,
                   glod.get_variables_key():{glod.get_deleted_by_user_key():\
                                         lista_variables_descartadas},
                   glod.get_general_info_execution_key():glod.get_empty_string()
                  }
    return report_data
Example #7
0
def create_report_current_model(report_dict,lista_modelos,ruta_relativa_datos_auxiliares,ruta_directorio_informes,enco):

    env = Environment(loader=FileSystemLoader('.'))
    ruta_plantilla_temporal = os.path.join(ruta_relativa_datos_auxiliares,'temp_html.html')    
    
    if(lista_modelos == []): #if process not completed
        template = env.get_template(ruta_relativa_datos_auxiliares + '/' + 'incomplete_event_report_template.html') #usamos la plantilla de informes incompletos
        
        template_vars = {glod.get_title_key(): "Incomplete Execution Report",
                         glod.get_logo_key(): encode_image(report_dict[glod.get_logo_key()].replace('\'','')),
                         glod.get_report_generic_target_key(): report_dict[glod.get_objective_target_key()],
                         glod.get_event_key(): report_dict[glod.get_event_key()],
                         glod.get_info_key(): "&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;" + report_dict['Warning_info']
                         }
        
        #html
        with codecs.open(ruta_plantilla_temporal,'w',encoding=enco) as output_file:
            output_file.write(template.render(template_vars))        
        
        #pdf
        with codecs.open(ruta_plantilla_temporal, 'r') as html_leido:
            pdf_resultante=os.path.join(ruta_directorio_informes,"report_"+report_dict[glod.get_event_key()]+"_incomplete.pdf")
            with open(pdf_resultante, "wb") as incomplete_rep:
                pisa.CreatePDF(html_leido.read(),incomplete_rep)
                logging.getLogger("xhtml2pdf").addHandler(PisaNullHandler())
        
    else:
        lista_pares_modelo_indice = auxf.order_models_by_score_and_time(report_dict,lista_modelos)        
        template = env.get_template(ruta_relativa_datos_auxiliares + '/' +'report_template.html') #using standard template
        for modelo in lista_modelos:
            if(modelo in report_dict):
            
                observations_targets="<p><strong>Target distribution of observations</strong></br>"
                for ob_target in auxf.natsorted(report_dict[glod.get_report_general_info_key()][glod.get_report_generic_target_key()].keys()):
                    observations_targets+="&nbsp;&nbsp;&nbsp;&nbsp;"+ "With target " + str(ob_target) + " :"+ str(report_dict[glod.get_report_general_info_key()][glod.get_report_generic_target_key()][ob_target]) + "</br>"
                observations_targets+="</p>"
            
                variables_summary="<p><strong>Summary of variables</strong></br>"
            
            
                discarded_for_event = report_dict[glod.get_report_general_info_key()][glod.get_variables_key()][glod.get_user_discarded_key()]
                
                variables_summary+="<br><i><u>Deleted by the user at the begining:</i></u></br>"
                for deleted_var in report_dict[glod.get_report_general_info_key()][glod.get_variables_key()][glod.get_deleted_by_user_key()]:
                    variable_dis=''
                    if deleted_var in discarded_for_event:
                        variable_dis = "<strong>" + deleted_var + "</strong>"
                    else:
                        variable_dis = deleted_var
                    variables_summary+="&nbsp;&nbsp;&nbsp;&nbsp;"+ variable_dis + "</br>"
                variables_summary+="&nbsp;&nbsp;&nbsp;&nbsp;<i>*variables in bold were specified by the user to be discarded specifically for this event<i></br>"
                variables_summary+="</br>"
                                                
                variables_summary+="<br><i><u>Deleted in execution time(Empty or Constant):</i></u></br>"
                for emp_con_var in report_dict[glod.get_report_general_info_key()][glod.get_variables_key()][glod.get_empty_or_constant_key()]:
                    variables_summary+="&nbsp;&nbsp;&nbsp;&nbsp;"+ emp_con_var + "</br>"
                variables_summary+="</br>"
                
                variables_summary+="<br><i><u>Requested for the event by the user:</i></u></br>"
                for req_var in report_dict[glod.get_report_general_info_key()][glod.get_variables_key()][glod.get_user_requested_key()]:
                    variables_summary+="&nbsp;&nbsp;&nbsp;&nbsp;"+ req_var + "</br>"
                variables_summary+="</br>"
                                       
                variables_summary+="<br><i><u>Used during the process:</i></u></br>"
                
                diccionario_relevantes_mif = report_dict[glod.get_report_general_info_key()][glod.get_variables_key()][glod.get_score_relevant_key()]
                sorted_relevant_vars = sorted(diccionario_relevantes_mif.items(), key=operator.itemgetter(1), reverse=True)
                for relevant_var in sorted_relevant_vars:
                    rel_variable= relevant_var[0]
                    rel_variable = "<strong>" + rel_variable +'&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;'+ str(diccionario_relevantes_mif[rel_variable]) +"</strong>"
                    variables_summary+="&nbsp;&nbsp;&nbsp;&nbsp;"+ rel_variable + "</br>"
                
                for relevant_var in report_dict[glod.get_report_general_info_key()][glod.get_variables_key()][glod.get_used_in_process()]:
                    if (relevant_var not in diccionario_relevantes_mif)   :
                        variables_summary+="&nbsp;&nbsp;&nbsp;&nbsp;"+ relevant_var + "</br>"
                variables_summary+="&nbsp;&nbsp;&nbsp;&nbsp;<i>*variables in bold were used to train the models<i></br>"
                variables_summary+="</p>"
            
            
                #Information about the model                    
                accuracy = "</br></br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<strong>Accuracy: "+ str(float(round(report_dict[modelo][glod.get_accuracy_parameter_name()],5)))+"</strong>"
            
                ranking = get_string_with_ranking_of_models(lista_pares_modelo_indice,modelo)
                
                model_info = "<p><strong>Parameters used to configure the model</strong></br>"
                for param in report_dict[modelo][glod.get_parameters_key()]:
                    model_info+= "&nbsp;&nbsp;&nbsp;&nbsp;<i>"+ param + "</i>: " + str(report_dict[modelo][glod.get_parameters_key()][param]) + "</br>"
                model_info+="</p>"
                
                time_info = "<p><strong>Time elapsed</strong></br>"
                tiempo_seleccion_parametros = report_dict[modelo][glod.get_time_parameters_key()][glod.get_time_sel_finish_key()] - report_dict[modelo][glod.get_time_parameters_key()][glod.get_time_sel_init_key()]
                tiempo_entrenamiento = report_dict[modelo][glod.get_time_parameters_key()][glod.get_time_train_finish_key()] - report_dict[modelo][glod.get_time_parameters_key()][glod.get_time_train_init_key()]
                time_info+="&nbsp;&nbsp;&nbsp;&nbsp;"+ "Parameters selection time: "+ str(tiempo_seleccion_parametros) + "</br>"
                time_info+="&nbsp;&nbsp;&nbsp;&nbsp;"+ "Training time: "+ str(tiempo_entrenamiento) + "</br>"
                time_info+="</p>"
                
                
                total_train = 0.0
                vector_of_targets = []
                vector_of_values_by_target = []
                vector_of_percentages_by_target = []
                train_distribution_info ="<p></br><strong>Training Data Distribution</strong></br>"
                for train_target in auxf.natsorted(report_dict[glod.get_report_general_info_key()][glod.get_training_division_key()].keys()):
                    train_distribution_info+="&nbsp;&nbsp;&nbsp;&nbsp;"+ "With target " + str(train_target) + " :"+ str(report_dict[glod.get_report_general_info_key()][glod.get_training_division_key()][train_target]) + "</br>"
                    vector_of_targets.append(train_target)
                    vector_of_values_by_target.append(float(report_dict[glod.get_report_general_info_key()][glod.get_training_division_key()][train_target]))
                    total_train+=float(report_dict[glod.get_report_general_info_key()][glod.get_training_division_key()][train_target])
                train_distribution_info+="</p>"
                #getting null train accuracy
                null_train_accuracy = 0.0
                for indice_t in range(len(vector_of_values_by_target)):
                    vector_of_percentages_by_target.append(round(vector_of_values_by_target[indice_t]/total_train,4))
                
                null_train_accuracy = max(vector_of_percentages_by_target)
                                                            
                total_test = 0.0
                vector_of_targets = []
                vector_of_values_by_target = []
                vector_of_percentages_by_target = []
                test_distribution_info ="<p><strong>Test Data Distribution</strong></br>"
                for test_target in auxf.natsorted(report_dict[glod.get_report_general_info_key()][glod.get_test_division_key()].keys()):
                    test_distribution_info+="&nbsp;&nbsp;&nbsp;&nbsp;"+ "With target " + str(test_target) + " :"+ str(report_dict[glod.get_report_general_info_key()][glod.get_test_division_key()][test_target]) + "</br>"
                    vector_of_targets.append(test_target)
                    vector_of_values_by_target.append(float(report_dict[glod.get_report_general_info_key()][glod.get_test_division_key()][test_target]))
                    total_test+=float(report_dict[glod.get_report_general_info_key()][glod.get_test_division_key()][test_target])
                test_distribution_info+="</p>"
                null_test_accuracy = 0.0
                for indice_t in range(len(vector_of_values_by_target)):
                    vector_of_percentages_by_target.append(round(vector_of_values_by_target[indice_t]/total_test,4))                
                null_test_accuracy = max(vector_of_percentages_by_target)
                                           
                event = report_dict[glod.get_event_key()]
                template_vars = {glod.get_title_key(): "Execution Report",
                             glod.get_logo_key():encode_image(report_dict[glod.get_logo_key()].replace('\'','')),                             
                             glod.get_model_key(): modelo,
                             glod.get_report_generic_target_key(): report_dict[glod.get_objective_target_key()],
                             glod.get_event_key(): event,
                             glod.get_accuracy_parameter_name(): str(accuracy)+"<br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<strong>Null train acc: "+ str(null_train_accuracy)+"</strong>"+"<br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<strong>Null test acc: "+ str(null_test_accuracy)+ "</strong></p>",
                             glod.get_models_ranking_key(): ranking,
                             glod.get_observations_targets_key(): observations_targets,
                             glod.get_variables_summary_key(): variables_summary,
                             glod.get_models_info_key(): model_info,
                             glod.get_time_info_key(): time_info,
                             glod.get_train_distribution_info_key(): train_distribution_info,
                             glod.get_test_distribution_info_key(): test_distribution_info
                         }
                template_vars[glod.get_metrics_info_key()] =""
                for metric in report_dict[modelo][glod.get_metrics_micro_avg_key()]:
                    template_vars[glod.get_metrics_info_key()] +="<p>"+"<strong>"+metric+"</strong>: " + report_dict[modelo][glod.get_metrics_micro_avg_key()][metric] +"</br>"
                template_vars[glod.get_metrics_info_key()] +="</p>"
                
                if glod.get_model_parameters_plot_name() in report_dict[modelo]:
                    template_vars[glod.get_image_parameters_accuracy_key()] = encode_image(report_dict[modelo][glod.get_model_parameters_plot_name()].replace('\'',''))
                
                if glod.get_confussion_matrix_train_path_key() in report_dict[modelo]:
                    template_vars[glod.get_conf_train_img_key()] = encode_image(report_dict[modelo][glod.get_confussion_matrix_train_path_key()].replace('\'',''))
                    
                if glod.get_confussion_matrix_test_path_key() in report_dict[modelo]:
                    template_vars[glod.get_conf_test_img_key()] = encode_image(report_dict[modelo][glod.get_confussion_matrix_test_path_key()].replace('\'',''))
            
                if(glod.get_learning_curve_key() in report_dict[modelo]):                
                    template_vars[glod.get_learning_curve_key()] = encode_image(report_dict[modelo][glod.get_learning_curve_key()].replace('\'',''))
                
                
                metrics_by_label = "<table width='100%' border='1' cellspacing='0' cellpadding='5'>"
                keys = ''
                for elemento in auxf.natsorted(report_dict[modelo][glod.get_metrics_key()].keys()):
                    if(keys == ''):
                        keys = report_dict[modelo][glod.get_metrics_key()][elemento].keys()
                        metrics_by_label+="<tr><td align='center' class='black'>"+ glod.get_report_generic_target_key() +"</td>"
                        for cabecera in keys:                        
                            metrics_by_label+="<td align='center' class='black'>" + cabecera +"</td>"
                        metrics_by_label += "</tr>"
                    metrics_by_label+= "<tr><td>" + elemento.replace('target_','') + "</td>"
                    for key in keys:
                        metrics_by_label += "<td>"+str(report_dict[modelo][glod.get_metrics_key()][elemento][key])+"</td>"
                    metrics_by_label+= "</tr>"
                metrics_by_label+="</table>"
                template_vars[glod.get_metrics_by_label_key()] = metrics_by_label
                                
                #generamos el html                
                with codecs.open(ruta_plantilla_temporal,'w',encoding=enco) as output_file:
                    output_file.write(template.render(template_vars))
                                        
                #generamos el pdf            
                with codecs.open(ruta_plantilla_temporal, mode='r',encoding=enco) as read_html:
                    pdf_resultante=os.path.join(ruta_directorio_informes,modelo + "_report_for_"+ event +".pdf")
                    with open(pdf_resultante, mode='wb') as pdf_gen:                                             
                        pisa.CreatePDF(read_html.read(),pdf_gen)                        
                        logging.getLogger("xhtml2pdf").addHandler(PisaNullHandler())
    
    if(os.path.exists(ruta_plantilla_temporal)):
        os.remove(ruta_plantilla_temporal)
Example #8
0
def create_report_current_execution(report_dict,lista_eventos,lista_variables_usuario,lista_listas_variables_descartadas,lista_aprendizajes,lista_modelos, diccionario_aprendizajes, ruta_relativa_datos_auxiliares, ruta_directorio_resultados,enco):
    env = Environment(loader=FileSystemLoader('.'))
    ruta_plantilla_temporal = os.path.join(ruta_relativa_datos_auxiliares,'temp_html.html')
    template = env.get_template(ruta_relativa_datos_auxiliares + '/' + 'general_execution_template.html')
       
    template_vars = {glod.get_title_key(): report_dict[glod.get_title_key()],
                     glod.get_logo_key():encode_image(report_dict[glod.get_logo_key()].replace('\'','')),                     
                     glod.get_general_info_execution_key():''
                     }
                     
    #General parameters (target,umbral,variables_descartadas)
    target = report_dict[glod.get_objective_target_key()]
    umbral = report_dict[glod.get_umbral_key()]
    main_metric = report_dict[glod.get_main_metric_key()]
    feature_selection_method = report_dict[glod.get_feature_selection_key()]
    penalize_falses = report_dict[glod.get_penalization_name()]
    lista_variables_descartadas = report_dict[glod.get_variables_key()][glod.get_deleted_by_user_key()]
    
    tabulacion = "&nbsp;&nbsp;&nbsp;&nbsp;"
    informacion= "<h3>Common Parameters </h3></p>"
    informacion+= tabulacion+tabulacion + "<i>Objective Target: </i>" + target + "</br></br>"
    informacion+=tabulacion+tabulacion + "<i>Percentil for Scoring Function: </i>" + umbral + "</br></br>"
    informacion+=tabulacion+tabulacion + "<i>Main metric: </i>" + main_metric + "</br></br>"    
    informacion+=tabulacion+tabulacion + "<i>Feature selection method: </i>" + feature_selection_method + "</br></br>"    
    informacion+=tabulacion+tabulacion + "<i>Penalize falses: </i>" + penalize_falses + "</br></br>"    
    informacion+=tabulacion+tabulacion + "<i>Common Discarded Variables:</i></br>"
    for variable_descartada in lista_variables_descartadas:
        informacion+=tabulacion+tabulacion+tabulacion + variable_descartada + "</br>"
    if(lista_variables_descartadas == []):
        informacion+=tabulacion+"No variables were selected to be discarded</br>"
    informacion+="</p>"
        
    informacion+= "<h3>Events to be processed: </h3><p>"
    for indice in range(len(lista_eventos)):
        informacion+=tabulacion+"<strong>"+ lista_eventos[indice] + "</strong></br>"        
        informacion+=tabulacion+tabulacion+"<i>Important features for the user:</i> </br>"
        if(lista_variables_usuario[indice]):            
            for variable in lista_variables_usuario[indice]:
                informacion+=tabulacion+tabulacion+tabulacion+variable + "</br>"
        else:
            informacion+=tabulacion+tabulacion+tabulacion+"No important features were specified</br>"
        informacion+="</br>"
        
        informacion+=tabulacion+tabulacion+"<i>Discarded variables by the user:</i> </br>"
        if(lista_listas_variables_descartadas[indice]):            
            for variable in lista_listas_variables_descartadas[indice]:
                informacion+=tabulacion+tabulacion+tabulacion+variable + "</br>"
        else:
            informacion+=tabulacion+tabulacion+tabulacion+"No variables were discarded</br>"
        informacion+="</br>"
        
        informacion += tabulacion+tabulacion+"<i>Learnings to be applied: </i></br>"
        aprendizaje = lista_aprendizajes[indice]
        modelos = lista_modelos[indice]
        if aprendizaje == glod.get_all_learning_modes_name():#looping supervised models
            informacion += tabulacion+tabulacion+tabulacion+"<u>" +\
            str(diccionario_aprendizajes[1]) + "</u>:</br>"
            modelos_sup = modelos[0]
            for modelo_act in modelos_sup:
                informacion += tabulacion+tabulacion+tabulacion+tabulacion + modelo_act + "</br>"
            informacion += "</br>"
            
            informacion += tabulacion+tabulacion+tabulacion+"<u>" +\
            str(diccionario_aprendizajes[2]) + "</u>:</br>"
            modelos_unsup = modelos[1]
            for modelo_act in modelos_unsup:
                informacion += tabulacion+tabulacion+tabulacion+tabulacion + modelo_act + "</br>"
            informacion += "</br>"

        else:
            informacion += tabulacion+tabulacion+tabulacion+"<u>"+aprendizaje + "</u>:</br>"
            for modelo_act in modelos:
                informacion += tabulacion+tabulacion+tabulacion+tabulacion + modelo_act + "</br>"

        informacion += "</p>"

        template_vars[glod.get_general_info_execution_key()] = informacion
                    
    with codecs.open(ruta_plantilla_temporal,'w',encoding=enco) as output_file:
        output_file.write(template.render(template_vars))
                
    with codecs.open(ruta_plantilla_temporal, 'r') as html_leido:
        pdf_resultante=os.path.join(ruta_directorio_resultados,"General_execution_report_"+ target +".pdf")
        with open(pdf_resultante, "wb") as gen_report:
            pisa.CreatePDF(html_leido.read(),gen_report)
            logging.getLogger("xhtml2pdf").addHandler(PisaNullHandler())
            
    if(os.path.exists(ruta_plantilla_temporal)):
        os.remove(ruta_plantilla_temporal)
Example #9
0
def create_report_current_dictionary_models(dictionary_of_models, basic_paths,
                                            list_of_parameters_models_events_dict, logo_path, enco):
    '''This funcion allows to get the pdf file with the current status of the models,
    relevant features and the events to which are applied'''

    ruta_relativa_datos_auxiliares = basic_paths[0]
    ruta_directorio_resultados = basic_paths[1]
    env = Environment(loader=FileSystemLoader('.'))
    ruta_plantilla_temporal = os.path.join(ruta_relativa_datos_auxiliares, 'temp_html.html')
    template = env.get_template(ruta_relativa_datos_auxiliares + '/' +\
                                glod.get_dictionary_models_template_name())

    tabulacion = "&nbsp;&nbsp;&nbsp;&nbsp;"

    template_vars = {glod.get_title_key(): "Report of the information of the Dictionary of models",
                     glod.get_logo_key(): encode_image(logo_path.replace('\'', glod.get_empty_string()))
                    }

    list_elements = [list_of_parameters_models_events_dict[0],
                     list_of_parameters_models_events_dict[3],
                     list_of_parameters_models_events_dict[1]]
    informacion = glod.get_empty_string()
    for event in dictionary_of_models:
        informacion += "<strong><u>"+ event +"</u></strong></br></br>"
        for target in dictionary_of_models[event]:
            informacion += tabulacion + tabulacion + "<strong><i>Target:</i></strong>" + "&nbsp;&nbsp;" + target + "</br>"
            for key in list_elements:
                informacion += tabulacion + tabulacion + "<strong><i>" + key + ": </i></strong>"
                if type(list()) == type(dictionary_of_models[event][target][key]):
                    informacion += "<br>"
                    contador = 0
                    ordered_list_features = sorted(dictionary_of_models[event][target][key])
                    while contador < len(ordered_list_features):
                        element = ordered_list_features[contador]
                        informacion += tabulacion + tabulacion + tabulacion +tabulacion + element + "</br>"
                        contador += 1
                else:
                    informacion += dictionary_of_models[event][target][key] + "</br>"
                    if key == list_of_parameters_models_events_dict[0]:
                        informacion += tabulacion + tabulacion + "<strong><i>best model: </i></strong>&nbsp;&nbsp;" + dictionary_of_models[event][target][list_of_parameters_models_events_dict[1]].split('_')[-1].split('.')[0] + "</br>" #get model name
                        if dictionary_of_models[event][target][key] == glod.get_unsupervised_name():
                            informacion += tabulacion + tabulacion + "<strong><i>dic_reassingment: </i></strong>&nbsp;&nbsp;" + str(dictionary_of_models[event][target][list_of_parameters_models_events_dict[2]]) + "</br>"
            informacion += "</br>"


    if informacion == glod.get_empty_string():
        informacion = "No models were created yet"
    template_vars[glod.get_info_key()] = informacion

    #html
    with codecs.open(ruta_plantilla_temporal, glod.get_write_mode(), encoding=enco) as output_file:
        renderizado = template.render(template_vars)
        output_file.write(renderizado)

    #pdf
    with codecs.open(ruta_plantilla_temporal, mode=glod.get_read_mode(), encoding=enco) as read_html:
        pdf_resultante = os.path.join(ruta_directorio_resultados, "Current_status_dictionary_events_and_models.pdf")
        with open(pdf_resultante, mode=glod.get_writebyte_mode()) as pdf_gen:
            pisa.CreatePDF(read_html.read().encode(enco, 'ignore').decode(enco), pdf_gen)

    if os.path.exists(ruta_plantilla_temporal):
        os.remove(ruta_plantilla_temporal)
Example #10
0
def prediction_function(BASE_PATH):
    ''' Step 0: Reading configuration parameters and creating log files'''
    '''Creating variable that parses the configuration file. If the file is
    not found, an exception is thrown and finishes the execution'''

    path_to_configuration_file = os.path.join(BASE_PATH,
                                              glod.get_config_parser_name())
    config_parser = conp.ConfigParser()
    config_parser.optionxform = str
    enco = glod.get_encoding()

    if (os.path.exists(path_to_configuration_file)):
        config_parser_file = open(path_to_configuration_file, encoding=enco)
        config_parser.readfp(config_parser_file)
    else:
        raise Exception('Configuration file (conf.ini) was not found')

    logs_section = glod.get_log_section_name()
    auxiliary_data_section = glod.get_auxiliary_section_name()
    input_data_section = glod.get_input_section_name()
    prediction_section = glod.get_prediction_section_name()
    '''Creating log files'''
    log_path = os.path.join(
        BASE_PATH,
        config_parser.get(logs_section, glod.get_log_directory_name()))
    execution_log_path = os.path.join(
        log_path,
        config_parser.get(logs_section,
                          glod.get_prediction_log_execution_name()) + '.' +
        glod.get_log_files_extension())
    time_log_path = os.path.join(
        log_path,
        config_parser.get(logs_section,
                          glod.get_prediction_log_time_execution_name()) +
        '.' + glod.get_log_files_extension())
    ruta_modelos_prediccion = config_parser.get(
        prediction_section, glod.get_path_to_prediction_models_name())
    auxf.create_directory(log_path)

    step_init_time = datetime.datetime.fromtimestamp(time.time())
    repg.register_log([execution_log_path],
                      '>>>>>>Prediction Phase <<<<<<<<   \n' +
                      step_init_time.strftime('%Y-%m-%d %H:%M:%S') + "\n", 0,
                      enco)
    repg.register_log([execution_log_path],
                      '>>>> Step 0 - Reading parameters from conf.ini \n' +
                      step_init_time.strftime('%Y-%m-%d %H:%M:%S') + "\n", '',
                      enco)
    repg.register_log([time_log_path], '>>>>Step 0 starts:\n', 0, enco)
    '''Reading from conf.ini necessary variables for the prediction phase'''
    extension = glod.get_input_files_extension()
    name = config_parser.get(input_data_section,
                             glod.get_event_name_feature_name())

    observation_number = config_parser.get(input_data_section,
                                           glod.get_obsnumber_parameter_name())
    input_files_delimiter_not_catalogued_data = config_parser.get(
        prediction_section, glod.get_delimiter_non_catalogued_data_name())
    input_files_delimiter_not_catalogued_data = input_files_delimiter_not_catalogued_data.replace(
        '\'', '')
    label_non_catalogued_data = int(
        config_parser.get(input_data_section,
                          glod.get_non_catalogued_label_name()))

    maximum_number_of_files_to_catalogue = int(
        config_parser.get(prediction_section,
                          glod.get_number_of_files_parameter_name()))
    path_to_directory_with_input_files_to_catalogue = os.path.join(
        BASE_PATH,
        config_parser.get(prediction_section,
                          glod.get_name_directory_to_input_files_catalogue()))

    step_finish_time = datetime.datetime.fromtimestamp(time.time())
    repg.register_log([execution_log_path], '>>>> Step 0 ends \n' +
                      step_init_time.strftime('%Y-%m-%d %H:%M:%S') + "\n", '',
                      enco)
    repg.register_log(
        [time_log_path],
        '>>>>Step 0 - Reading parameters from conf.ini total elapsed time :' +
        str(step_finish_time - step_init_time) + '\n', '', enco)
    ''' Step 1: Reading observations from files and concatenating them '''
    step_init_time = datetime.datetime.fromtimestamp(time.time())
    repg.register_log(
        [execution_log_path],
        '>>>>Step 1 Loading observations from files into dataframes \n' +
        step_init_time.strftime('%Y-%m-%d %H:%M:%S') + "\n", '', enco)
    repg.register_log([time_log_path], '>>>>Step 1 starts:\n', '', enco)
    vector_fullpaths_to_input_files_with_observations_to_catalogue = auxf.get_all_files_in_dir_with_extension(
        path_to_directory_with_input_files_to_catalogue,
        maximum_number_of_files_to_catalogue, extension)
    ''' Path to file with relevant variables '''
    auxiliary_directory_filename = config_parser.get(
        auxiliary_data_section, glod.get_auxiliary_directory_parameter_name())
    path_to_directory_auxiliary_files = os.path.join(
        BASE_PATH,
        config_parser.get(auxiliary_data_section,
                          glod.get_auxiliary_directory_parameter_name()))

    report_dict = {
        glod.get_logo_key():
        "'" +
        os.path.join(path_to_directory_auxiliary_files, glod.get_logo_name()) +
        "'"
    }
    ''' Substep 1.1 - Reading input files '''
    substep_init_time = datetime.datetime.fromtimestamp(time.time())
    list_registers_to_catalogue = []
    repg.register_log([execution_log_path], '>>>>Step 1.1 \n', '', enco)
    for i in range(
            len(vector_fullpaths_to_input_files_with_observations_to_catalogue)
    ):
        repg.register_log(
            [execution_log_path],
            '>>Reading Csv to predict number ' + str(i) + ': ' +
            vector_fullpaths_to_input_files_with_observations_to_catalogue[i] +
            '\n', '', enco)
        print(
            "To catalogue : ",
            vector_fullpaths_to_input_files_with_observations_to_catalogue[i])
        print("\n")
        original_data = pd.read_csv(
            vector_fullpaths_to_input_files_with_observations_to_catalogue[i],
            sep=input_files_delimiter_not_catalogued_data)
        list_registers_to_catalogue.append(original_data)

    substep_finish_time = datetime.datetime.fromtimestamp(time.time())
    repg.register_log([time_log_path],
                      '>>>>Subtep 1.1 - reading csv total elapsed time: ' +
                      str(substep_finish_time - substep_init_time) + '\n', '',
                      enco)
    repg.register_log([execution_log_path],
                      '>>>>Subtep 1.1 - reading csv total elapsed time: ' +
                      str(substep_finish_time - substep_init_time) + '\n', '',
                      enco)

    if (list_registers_to_catalogue == list()):
        repg.register_log(
            [time_log_path],
            '>>>> Prediction process finished: Observations were not found ' +
            str(substep_finish_time - substep_init_time) + '\n', '', enco)
        repg.register_log(
            [execution_log_path],
            '>>>> Prediction process finished: Obervations were not found ' +
            str(substep_finish_time - substep_init_time) + '\n', '', enco)
        print('>>>> Prediction process finished: Obervations were not found ')
    ''' Substep 1.2 - Concatenating read csv'''
    substep_init_time = datetime.datetime.fromtimestamp(time.time())
    df_data_to_catalogue = pd.concat(list_registers_to_catalogue)
    reco_pandas_features = []
    for feature in df_data_to_catalogue.columns:
        reco_pandas_features.append(feature)
    df_data_to_catalogue.columns = reco_pandas_features

    try:
        df_data_to_catalogue[name]
    except Exception as e:
        repg.register_log([
            execution_log_path
        ], '>> An Eception has happened: Incorrect name of feature with events '
                          + str(e) + ' ' + datetime.datetime.fromtimestamp(
                              time.time()).strftime('%Y-%m-%d %H:%M:%S') +
                          '\n', '', enco)
        print(
            '>> An Exception has happened, check configuration file: Incorrect name of feature with events "'
            + str(e) + '"')
        error_trace = "Full trace:\n" + str(traceback.format_exc())
        repg.register_log([execution_log_path], error_trace, '', enco)
        raise Exception(e)
    ''' Erasing indexes introduced by pandas, if any '''
    if 'index' in df_data_to_catalogue.columns:
        df_data_to_catalogue = df_data_to_catalogue.drop('index', axis=1)
    if 'Unnamed: 0' in df_data_to_catalogue.columns:
        df_data_to_catalogue = df_data_to_catalogue.drop('Unnamed: 0', axis=1)

    substep_finish_time = datetime.datetime.fromtimestamp(time.time())
    step_finish_time = datetime.datetime.fromtimestamp(time.time())
    total_time_step_1 = step_finish_time - step_init_time
    repg.register_log([
        time_log_path
    ], '>>>> Substep 1.2 - Loading observations from files into dataframes total elapsed time: '
                      + str(substep_finish_time - substep_init_time) + "\n",
                      '', enco)
    repg.register_log([execution_log_path], '>>>>Substep 1.2 ends ' +
                      step_init_time.strftime('%Y-%m-%d %H:%M:%S') + "\n", '',
                      enco)
    repg.register_log([
        time_log_path
    ], '>>>> Step 1 - Reading and concatenating csv into dataframe total elapsed time: '
                      + str(total_time_step_1) + "\n", '', enco)
    repg.register_log([execution_log_path], '>>>>Step 1 ends ' +
                      step_init_time.strftime('%Y-%m-%d %H:%M:%S') + "\n", '',
                      enco)
    ''' Step 2: Reading prediction models dictionary and preloading best pkl models'''
    step_init_time = datetime.datetime.fromtimestamp(time.time())
    repg.register_log(
        [execution_log_path],
        '>>>>Step 2 Reading models dict and preload best pkl models \n' +
        step_init_time.strftime('%Y-%m-%d %H:%M:%S') + "\n", '', enco)
    repg.register_log([time_log_path], '>>>>Step 2 starts:\n', '', enco)
    '''Getting dictionary features in order to recodify and the events to catalogue'''
    substep_init_time = datetime.datetime.fromtimestamp(time.time())
    dic_event_model, handler = auxf.open_dictionary_pickle_format_for_reading(
        ruta_modelos_prediccion)

    substep_finish_time = datetime.datetime.fromtimestamp(time.time())
    repg.register_log([
        time_log_path
    ], '>>>> Substep 2.1 - Reading dictionary with models total elapsed time: '
                      + str(substep_finish_time - substep_init_time) + "\n",
                      '', enco)
    repg.register_log([execution_log_path],
                      '>>>>Substep 2.1 Reading dictionary with models ends ' +
                      substep_finish_time.strftime('%Y-%m-%d %H:%M:%S') + "\n",
                      '', enco)
    '''Preloading models in memory'''
    substep_init_time = datetime.datetime.fromtimestamp(time.time())
    repg.register_log([execution_log_path],
                      '>>>>Substep 2.2 - Preloading best pkl models \n' +
                      substep_init_time.strftime('%Y-%m-%d %H:%M:%S') + "\n",
                      '', enco)
    list_features_to_catalogue = []
    dictionary_of_events_preloaded_models = {}
    print('''
                                          Events, target and predictions models
             ##############################################################################################
             ##                                                                                          ##'''
          )
    for event in dic_event_model.keys():
        dictionary_of_events_preloaded_models[event] = {}
        try:
            for target_trained in dic_event_model[event]:
                dictionary_of_events_preloaded_models[event][
                    target_trained] = {}
                best_model = joblib.load(dic_event_model[event][target_trained]
                                         [glod.get_model_path_key()])
                print(
                    "\t\t\t\t", dic_event_model[event][target_trained][
                        glod.get_model_path_key()])
                dictionary_of_events_preloaded_models[event][target_trained][
                    glod.get_best_model_key()] = best_model
                list_features_to_catalogue += dic_event_model[event][
                    target_trained][glod.get_current_features_key()]
        except Exception as e:
            print('''
             ##                                                                                          ##
             ##############################################################################################'''
                  )
            print(
                'The pkl neccesary for the prediction of the observations of '
                + event + ' was not found ')
            repg.register_log(
                [execution_log_path],
                'The pkl neccesary for the prediction of the observations of '
                + event + ' was not found ' +
                substep_finish_time.strftime('%Y-%m-%d %H:%M:%S') + "\n", '',
                enco)
            raise Exception(e)
    print('''
             ##                                                                                          ##
             ##############################################################################################'''
          )

    substep_finish_time = datetime.datetime.fromtimestamp(time.time())
    repg.register_log(
        [time_log_path],
        '>>>>Substep 2.2 - Preloading best pkl models total elapsed time: ' +
        str(substep_finish_time - substep_init_time) + '\n', '', enco)
    repg.register_log([execution_log_path],
                      '>>>>Substep 2.2 - Preloading best pkl models ends \n' +
                      substep_finish_time.strftime('%Y-%m-%d %H:%M:%S') + "\n",
                      '', enco)
    step_finish_time = datetime.datetime.fromtimestamp(time.time())
    total_time_step_2 = step_finish_time - step_init_time
    repg.register_log([
        time_log_path
    ], '>>>> Step 2 - Reading models dict and preload best pkl models total elapsed time: '
                      + str(total_time_step_2) + "\n", '', enco)
    repg.register_log([execution_log_path], '>>>>Step 2 ends \n' +
                      step_finish_time.strftime('%Y-%m-%d %H:%M:%S') + "\n",
                      '', enco)
    ''' Step 3: Classifying observations usin preloaded models '''

    maximum_number_of_observations_to_catalogue = len(df_data_to_catalogue)

    step_init_time = datetime.datetime.fromtimestamp(time.time())
    substep_init_time = datetime.datetime.fromtimestamp(time.time())

    repg.register_log([time_log_path], '>>>> Step 3 starts \n', '', enco)
    repg.register_log([execution_log_path],
                      '>>>>Step 3 - Predicting targets using best models \n' +
                      substep_init_time.strftime('%Y-%m-%d %H:%M:%S') + "\n",
                      '', enco)
    repg.register_log(
        [execution_log_path],
        '>>>>Substep 3.1 - Preparing global dataframe of results \n' +
        substep_init_time.strftime('%Y-%m-%d %H:%M:%S') + "\n", '', enco)
    observations = df_data_to_catalogue.iloc[
        0:maximum_number_of_observations_to_catalogue]
    events_to_predict = list(set(observations[name].values))

    #target to predict
    target_to_predict = config_parser.get(prediction_section,
                                          glod.get_target_parameter_name())

    #column for predictions
    prediction_column = target_to_predict + '_pred'

    df_global_predictions = pd.DataFrame(
        data=[], columns=[observation_number, prediction_column])

    substep_finish_time = datetime.datetime.fromtimestamp(time.time())
    repg.register_log([
        time_log_path
    ], '>>>>Subtep 3.1 - Preparing global dataframe of results total elapsed time: '
                      + str(substep_finish_time - substep_init_time) + "\n",
                      '', enco)
    repg.register_log([execution_log_path], '>>>>Substep 3.1 ends \n' +
                      substep_finish_time.strftime('%Y-%m-%d %H:%M:%S') + "\n",
                      '', enco)
    total_number_predicted_observations = 0
    final_list_events_to_predict = []
    for event in events_to_predict:
        substep_init_time = datetime.datetime.fromtimestamp(time.time())
        repg.register_log([time_log_path],
                          '>>>>Subtep 3.2 - Predicting targets for event ' +
                          event + ' \n', '', enco)
        repg.register_log(
            [execution_log_path],
            '>>>>Substep 3.2 - Predicting targets for event ' + event +
            substep_init_time.strftime('%Y-%m-%d %H:%M:%S') + "\n", '', enco)
        df_event = observations[observations[name] == event]
        df_event_obsnumber = pd.DataFrame(
            data=df_event[observation_number].values,
            columns=[observation_number])

        try:
            dic_event = dictionary_of_events_preloaded_models[event]
            total_number_predicted_observations += len(df_event)

            if target_to_predict not in df_event.columns:
                repg.register_log(
                    [execution_log_path],
                    '>> ###Error: The target ' + target_to_predict +
                    ' does not exist in the dataset of the event ' + event +
                    '\n\n', '', enco)
                raise Exception(
                    '>> ###Error: The target ' + target_to_predict +
                    ' does not exist in the dataset of the event ' + event)

            if target_to_predict in dic_event:
                repg.register_log(
                    [execution_log_path],
                    '>> The event ' + event + ' (with ' + str(len(df_event)) +
                    ' observations), has a model for predict target labels \n',
                    '', enco)
                features_event = dic_event_model[event][target_to_predict][
                    glod.get_current_features_key()]

                model_event = dictionary_of_events_preloaded_models[event][
                    target_to_predict][
                        glod.get_best_model_key()]  #se referencia al modelo
                predictions = model_event.predict(df_event[features_event])
                df_event_obsnumber[prediction_column] = predictions

                recatalogued_predictions = []
                if dic_event_model[event][target_trained][
                        glod.get_learning_key()] == glod.get_unsupervised_name(
                        ):
                    for pred in predictions:
                        recatalogued_predictions.append(
                            dic_event_model[event][target_trained][
                                glod.get_reasignment_dict_key()][pred])
                    predictions = recatalogued_predictions
                df_event_obsnumber[prediction_column] = predictions
                df_event_obsnumber[name] = event

            else:
                repg.register_log(
                    [execution_log_path],
                    '>> The event ' + event + ' (with ' + str(len(df_event)) +
                    ' observations), has not models for predicting target (' +
                    target_to_predict + '). Taking original prediction \n', '',
                    enco)
                total_number_predicted_observations += len(df_event)
                df_event_obsnumber[prediction_column] = df_event[
                    target_to_predict].values

            final_list_events_to_predict.append(event)

        except Exception as excep:  #no predictions models
            repg.register_log([execution_log_path],
                              '>> The prediction process has been aborted ' +
                              str(excep) + '\n', '', enco)
            #raise Exception(e)

        df_global_predictions = pd.concat(
            [df_global_predictions, df_event_obsnumber])
        df_global_predictions[observation_number] = df_global_predictions[
            observation_number].apply(int)

        substep_finish_time = datetime.datetime.fromtimestamp(time.time())
        repg.register_log([time_log_path],
                          '>>>>Substep 3.2 - Predicting targets for event ' +
                          event + ' total elapsed time: ' +
                          str(substep_finish_time - substep_init_time) + "\n",
                          '', enco)
        repg.register_log([
            time_log_path
        ], '>>>>Substep 3.2 - Estimated elapsed time predicting one observation for event '
                          + event + ': ' + str(
                              float((substep_finish_time -
                                     substep_init_time).total_seconds()) /
                              float(len(df_event))) + "\n", '', enco)
        repg.register_log(
            [execution_log_path],
            '>>>>Substep 3.2 - Predicting targets for event ' + event +
            ' ends ' + substep_init_time.strftime('%Y-%m-%d %H:%M:%S') + "\n",
            '', enco)

    type_observation_number = df_global_predictions[observation_number].dtypes
    observations[observation_number] = observations[observation_number].astype(
        type_observation_number)
    observations = pd.merge(observations,
                            df_global_predictions,
                            on=[observation_number, name])

    for not_proc_event in set.difference(set(events_to_predict),
                                         set(final_list_events_to_predict)):
        repg.register_log(
            [execution_log_path], '>> WARNING: Event ' + not_proc_event +
            ' has not models, but validation/unkown samples dataset was provided\n',
            '', enco)
        print(
            "**WARNING**: Event " + not_proc_event +
            " has not models, but validation/unkown samples dataset was provided"
        )

    for event in events_to_predict:
        print('\n-> Event: ', event)
        df_event = observations[observations[name] == event]
        path_to_predicted_data_root = 'Prediction_models'
        path_to_predicted_data = config_parser.get(
            prediction_section, glod.get_path_predicted_data_key())

        #Accuracy print
        if (event in dic_event_model
                and target_to_predict in dic_event_model[event]):
            report_dict[event] = {target_to_predict: {}}
            print(
                '\t\tObservations with known target ',
                len(df_event[
                    df_event[target_to_predict] != label_non_catalogued_data]))
            print(
                '\t\tObservations with unknown target ',
                len(df_event[df_event[target_to_predict] ==
                             label_non_catalogued_data]))
            df_observaciones = df_event[
                df_event[target_to_predict] != label_non_catalogued_data]
            total_obs = len(df_observaciones)

            #computing confusion matrix
            pred_labels = list(df_observaciones[prediction_column].values)
            true_labels = list(df_observaciones[target_to_predict].values)

            if (pred_labels != [] and true_labels != []):
                df_observaciones_temp = df_observaciones[
                    df_observaciones[target_to_predict] ==
                    df_observaciones[prediction_column]]
                total_aciertos = len(df_observaciones_temp)

                confusion_matrix = metr.get_confusion_matrix(
                    true_labels, pred_labels,
                    sorted(
                        list(set(df_observaciones[target_to_predict].values))))
                confusion_matrix_name = 'confusion_matrix_' + event + '_' + target_to_predict
                metr.save_confusion_matrix(
                    confusion_matrix,
                    sorted(
                        list(set(df_observaciones[target_to_predict].values))),
                    os.path.join(path_to_predicted_data_root,
                                 confusion_matrix_name), 'png')

                report_dict[event][target_to_predict][
                    glod.get_best_model_key()] = str(
                        dictionary_of_events_preloaded_models[event]
                        [target_to_predict][glod.get_best_model_key()])
                report_dict[event][target_to_predict][
                    'Correct'] = total_aciertos
                report_dict[event][target_to_predict]['Total'] = len(df_event[
                    df_event[target_to_predict] != label_non_catalogued_data])
                report_dict[event][target_to_predict][
                    glod.get_accuracy_parameter_name()] = float(
                        float(total_aciertos) / float(
                            len(df_event[df_event[target_to_predict] !=
                                         label_non_catalogued_data])))
                report_dict[event][target_to_predict][
                    'target_to_predict_cm'] = os.path.join(
                        path_to_predicted_data_root,
                        confusion_matrix_name) + '.png'
                report_dict[event][target_to_predict]['Predicted'] = len(
                    df_event[df_event[target_to_predict] ==
                             label_non_catalogued_data])

        else:
            total_obs = 0

        if (total_obs != 0):
            repg.register_log(
                [time_log_path],
                '>>>>Substep 3.2 Extra - Accuracy of the model for event ' +
                event + ' and target ' + target_to_predict + '(' +
                str(float(total_aciertos)) + '/' + str(float(total_obs)) +
                '): ' + str(float(total_aciertos) / float(total_obs)) + "\n",
                '', enco)
            repg.register_log(
                [execution_log_path],
                '>>>>Substep 3.2 Extra - Accuracy of the model for event ' +
                event + ' and target ' + target_to_predict + '(' +
                str(float(total_aciertos)) + '/' + str(float(total_obs)) +
                '): ' + str(float(total_aciertos) / float(total_obs)) + "\n",
                '', enco)
        else:
            repg.register_log(
                [time_log_path],
                '>>>>Substep 3.2 Extra - Accuracy of the model for event ' +
                event + ' and target ' + target_to_predict +
                ': not calculated (no observations found) \n', '', enco)
            repg.register_log(
                [execution_log_path],
                '>>>>Substep 3.2 Extra - Accuracy of the model for event ' +
                event + ' and target ' + target_to_predict +
                ': not calculated (no observations found) \n', '', enco)

        print('''
                                            Clasification for known targets results
             ##############################################################################################'
             ##                                                                                          ##'''
              )
        if (total_obs != 0):
            print('\t\t\t\tCorrect predictions performed for ' +
                  target_to_predict + ' of event ' + event + ': ' +
                  str(total_aciertos) + '/' + str(total_obs))
        else:
            print('\t\t\t\tNo predictions performed for severity of event ' +
                  event)

        print('\t\t\t\tCheck output data at: ', path_to_predicted_data)
        print(
            '\t\t\t\tCheck predictions log for accuracy summary and more information'
        )
        print('\n\t\t\t\t\t\tThanks for using RADSSo')
        print('''             
             ##                                                                                          ##
             ##############################################################################################'''
              )

        #Determinamos cuantas predicciones sobre datos desconocidos se han realizado
        if (event in dic_event_model
                and target_to_predict in dic_event_model[event]):
            df_observaciones = df_event[df_event[target_to_predict] ==
                                        label_non_catalogued_data]
            total_obs = len(df_observaciones)

        print('''
                                                Prediction of unknown target results
             ##############################################################################################'
             ##                                                                                          ##'''
              )
        print(
            '\t\t\t\tTotal of predictions performed for ' + target_to_predict +
            ': ', str(total_obs))
        print('\t\t\t\tCheck output data at: ', path_to_predicted_data)
        print(
            '\t\t\t\tCheck predictions log for accuracy summary and more information'
        )
        print('\n\t\t\t\t\t\tThanks for using RADSSo')
        print('''             
             ##                                                                                          ##
             ##############################################################################################\n'''
              )
        try:
            if (event in report_dict):
                if (pred_labels != [] and true_labels != []):
                    repg.create_report_prediction(
                        report_dict, [event, target_to_predict],
                        auxiliary_directory_filename, 'Prediction_models',
                        enco)
        except Exception as e:
            print(''''**********************
                   ****Critical Exception****
                   **************************''')
            print(e)

    step_finish_time = datetime.datetime.fromtimestamp(time.time())
    total_time_step_3 = step_finish_time - step_init_time
    print('\n\n--- Total Elapsed time --- ' + str(total_time_step_3))

    repg.register_log(
        [time_log_path],
        '>>>>Step 3 - Predicting using best models total elapsed time: ' +
        str(total_time_step_3) + "\n", '', enco)

    repg.register_log([time_log_path],
                      '>>>> Number of observations processed by second ' + str(
                          float(total_number_predicted_observations) /
                          float(total_time_step_3.total_seconds())) + "\n", '',
                      enco)
    repg.register_log([time_log_path],
                      '>>>> Number of seconds by prediction ' + str(
                          float(total_time_step_3.total_seconds()) /
                          float(total_number_predicted_observations)) + "\n",
                      '', enco)

    repg.register_log(
        [time_log_path],
        '>>>> Recodification and Prediction Phase - total elapsed time: ' +
        str(total_time_step_1 + total_time_step_2 + total_time_step_3) + "\n",
        '', enco)

    observations.to_csv(path_to_predicted_data,
                        sep=input_files_delimiter_not_catalogued_data,
                        encoding=enco)
    return ()