def update_report_user_discarded_features(report_dict,lista_descartadas):
    report_dict[glod.get_report_general_info_key()][glod.get_variables_key()][glod.get_user_discarded_key()] = lista_descartadas
    return report_dict
def update_report_training_models_features(report_dict,diccionario_variables_scores):
    report_dict[glod.get_report_general_info_key()][glod.get_variables_key()][glod.get_score_relevant_key()] = diccionario_variables_scores
    return report_dict
def update_report_empty_constant_features(report_dict,lista_vacias_constantes):
    report_dict[glod.get_report_general_info_key()][glod.get_variables_key()][glod.get_empty_or_constant_key()] = lista_vacias_constantes
    return report_dict
def update_report_relevant_user_features(report_dict,lista_importantes):
    report_dict[glod.get_report_general_info_key()][glod.get_variables_key()][glod.get_user_requested_key()] = lista_importantes
    return report_dict
Exemple #5
0
def create_report_current_model(report_dict, lista_modelos, ruta_relativa_datos_auxiliares,
                                ruta_directorio_informes, enco):
    '''This funcion allows to get information of the current model in pdf format
    with the full charactristics fo the model'''

    env = Environment(loader=FileSystemLoader('.'))
    ruta_plantilla_temporal = os.path.join(ruta_relativa_datos_auxiliares, 'temp_html.html')

    if lista_modelos == []: #if process not completed
        template = env.get_template(ruta_relativa_datos_auxiliares + '/' +\
                                    glod.get_incomplete_event_report_template_name())

        template_vars = {glod.get_title_key(): "Incomplete Execution Report",
                         glod.get_logo_key(): \
                         encode_image(report_dict[glod.get_logo_key()].replace('\'', glod.get_empty_string())),
                         glod.get_report_generic_target_key(): report_dict[glod.get_objective_target_key()],
                         glod.get_event_key(): report_dict[glod.get_event_key()],
                         glod.get_info_key(): "        " +\
                         report_dict[glod.get_warning_key()]
                        }


        with codecs.open(ruta_plantilla_temporal, glod.get_write_mode(), encoding=enco) as output_file:
            output_file.write(template.render(template_vars))


        with codecs.open(ruta_plantilla_temporal, glod.get_read_mode(), encoding=enco) as html_leido:
            pdf_resultante = os.path.join(ruta_directorio_informes, "report_" +\
                                          report_dict[glod.get_event_key()]+"_incomplete.pdf")
            with open(pdf_resultante, glod.get_writebyte_mode()) as incomplete_rep:
                pisa.CreatePDF(html_leido.read(), incomplete_rep)
                logging.getLogger("xhtml2pdf").addHandler(PisaNullHandler())

    else:
        lista_pares_modelo_indice = auxf.order_models_by_score_and_time(report_dict, lista_modelos)
        template = env.get_template(ruta_relativa_datos_auxiliares + '/' + glod.get_report_template_name())
        for modelo in lista_modelos:
            if modelo in report_dict:

                observations_targets = "<p><strong>Target distribution of observations\
                </strong></br>"
                final_targets_list = list(report_dict[glod.get_report_general_info_key()]\
                                          [glod.get_report_generic_target_key()].keys())
                for ob_target in auxf.natsorted(final_targets_list):
                    observations_targets += "&nbsp;&nbsp;&nbsp;&nbsp;"+ "With target " +\
                    str(ob_target) + " :"+ str(report_dict[glod.get_report_general_info_key()]\
                       [glod.get_report_generic_target_key()][ob_target]) + "</br>"
                observations_targets += "</p>"

                variables_summary = "<p><strong>Summary of variables</strong></br>"
                discarded_for_event = report_dict[glod.get_report_general_info_key()]\
                [glod.get_variables_key()][glod.get_user_discarded_key()]

                variables_summary += "<br><i><u>Deleted by the user at the begining:</i></u></br>"
                for deleted_var in report_dict[glod.get_report_general_info_key()]\
                [glod.get_variables_key()][glod.get_deleted_by_user_key()]:
                    variable_dis = glod.get_empty_string()
                    if deleted_var in discarded_for_event:
                        variable_dis = "<strong>" + deleted_var + "</strong>"
                    else:
                        variable_dis = deleted_var
                    variables_summary += "&nbsp;&nbsp;&nbsp;&nbsp;"+ variable_dis + "</br>"
                variables_summary += "&nbsp;&nbsp;&nbsp;&nbsp;<i>*variables in bold were\
                specified by the user to be discarded specifically for this event<i></br>"
                variables_summary += "</br>"

                variables_summary += "<br><i><u>Deleted in execution time(Empty or Constant)\
                :</i></u></br>"
                for emp_con_var in report_dict[glod.get_report_general_info_key()]\
                [glod.get_variables_key()][glod.get_empty_or_constant_key()]:
                    variables_summary += "&nbsp;&nbsp;&nbsp;&nbsp;"+ emp_con_var + "</br>"
                variables_summary += "</br>"

                variables_summary += "<br><i><u>Requested for the event by the user:</i></u></br>"
                for req_var in report_dict[glod.get_report_general_info_key()]\
                [glod.get_variables_key()][glod.get_user_requested_key()]:
                    variables_summary += "&nbsp;&nbsp;&nbsp;&nbsp;"+ req_var + "</br>"
                variables_summary += "</br>"

                variables_summary += "<br><i><u>Used during the process:</i></u></br>"

                diccionario_relevantes_mif = report_dict[glod.get_report_general_info_key()]\
                [glod.get_variables_key()][glod.get_score_relevant_key()]
                sorted_relevant_vars = sorted(diccionario_relevantes_mif.items(),
                                              key=operator.itemgetter(1),
                                              reverse=True)
                for relevant_var in sorted_relevant_vars:
                    rel_variable = relevant_var[0]
                    rel_variable = "<strong>" + rel_variable +'&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;\
                    &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;'+\
                    str(diccionario_relevantes_mif[rel_variable]) +"</strong>"
                    variables_summary += "&nbsp;&nbsp;&nbsp;&nbsp;"+ rel_variable + "</br>"

                for relevant_var in report_dict[glod.get_report_general_info_key()][glod.get_variables_key()][glod.get_used_in_process()]:
                    if relevant_var not in diccionario_relevantes_mif:
                        variables_summary += "&nbsp;&nbsp;&nbsp;&nbsp;"+ relevant_var + "</br>"
                variables_summary += "&nbsp;&nbsp;&nbsp;&nbsp;<i>*variables in bold were used\
                to train the models<i></br>"
                variables_summary += "</p>"


                #Information about the model
                accuracy = "</br></br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;\
                &nbsp;<strong>Accuracy: "+\
                str(float(round(report_dict[modelo][glod.get_accuracy_parameter_name()], 5)))+\
                "</strong>"

                ranking = get_string_with_ranking_of_models(lista_pares_modelo_indice, modelo)

                model_info = "<p><strong>Parameters used to configure the model</strong></br>"
                for param in report_dict[modelo][glod.get_parameters_key()]:
                    model_info += "&nbsp;&nbsp;&nbsp;&nbsp;<i>"+ param + "</i>: " +\
                    str(report_dict[modelo][glod.get_parameters_key()][param]) + "</br>"
                model_info += "</p>"

                time_info = "<p><strong>Time elapsed</strong></br>"
                tiempo_seleccion_parametros = report_dict[modelo][glod.get_time_parameters_key()][glod.get_time_sel_finish_key()] - report_dict[modelo][glod.get_time_parameters_key()][glod.get_time_sel_init_key()]
                tiempo_entrenamiento = report_dict[modelo][glod.get_time_parameters_key()][glod.get_time_train_finish_key()] - report_dict[modelo][glod.get_time_parameters_key()][glod.get_time_train_init_key()]
                time_info += "&nbsp;&nbsp;&nbsp;&nbsp;"+ "Parameters selection time: "+\
                str(tiempo_seleccion_parametros) + "</br>"
                time_info += "&nbsp;&nbsp;&nbsp;&nbsp;"+ "Training time: "+\
                str(tiempo_entrenamiento) + "</br>"
                time_info += "</p>"


                total_train = 0.0
                vector_of_targets = []
                vector_of_values_by_target = []
                vector_of_percentages_by_target = []
                train_distribution_info = "<p></br><strong>Training Data Distribution\
                </strong></br>"
                for train_target in auxf.natsorted(list(report_dict[glod.get_report_general_info_key()][glod.get_training_division_key()].keys())):
                    train_distribution_info += "&nbsp;&nbsp;&nbsp;&nbsp;"+ "With target " + str(train_target) + " :"+ str(report_dict[glod.get_report_general_info_key()][glod.get_training_division_key()][train_target]) + "</br>"
                    vector_of_targets.append(train_target)
                    vector_of_values_by_target.append(float(report_dict[glod.get_report_general_info_key()][glod.get_training_division_key()][train_target]))
                    total_train += float(report_dict[glod.get_report_general_info_key()][glod.get_training_division_key()][train_target])
                train_distribution_info += "</p>"
                #getting null train accuracy
                null_train_accuracy = 0.0
                for indice_t in range(len(vector_of_values_by_target)):
                    vector_of_percentages_by_target.append(round(vector_of_values_by_target[indice_t]/total_train, 4))

                null_train_accuracy = max(vector_of_percentages_by_target)

                total_test = 0.0
                vector_of_targets = []
                vector_of_values_by_target = []
                vector_of_percentages_by_target = []
                test_distribution_info = "<p><strong>Test Data Distribution</strong></br>"
                for test_target in auxf.natsorted(list(report_dict[glod.get_report_general_info_key()][glod.get_test_division_key()].keys())):
                    test_distribution_info += "&nbsp;&nbsp;&nbsp;&nbsp;"+ "With target " + str(test_target) + " :"+ str(report_dict[glod.get_report_general_info_key()][glod.get_test_division_key()][test_target]) + "</br>"
                    vector_of_targets.append(test_target)
                    vector_of_values_by_target.append(float(report_dict[glod.get_report_general_info_key()][glod.get_test_division_key()][test_target]))
                    total_test += float(report_dict[glod.get_report_general_info_key()][glod.get_test_division_key()][test_target])
                test_distribution_info += "</p>"
                null_test_accuracy = 0.0
                for indice_t in range(len(vector_of_values_by_target)):
                    vector_of_percentages_by_target.append(round(vector_of_values_by_target[indice_t]/total_test, 4))
                null_test_accuracy = max(vector_of_percentages_by_target)

                event = report_dict[glod.get_event_key()]
                template_vars = {glod.get_title_key(): "Execution Report",
                                 glod.get_logo_key():encode_image(report_dict[glod.get_logo_key()].replace('\'', glod.get_empty_string())),
                                 glod.get_model_key(): modelo,
                                 glod.get_report_generic_target_key():\
                                 report_dict[glod.get_objective_target_key()],
                                 glod.get_event_key(): event,
                                 glod.get_accuracy_parameter_name():\
                                 str(accuracy)+"<br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;\
                                 &nbsp;&nbsp;&nbsp;&nbsp;<strong>Null train acc: "+\
                                 str(null_train_accuracy)+"</strong>"+"<br>&nbsp;&nbsp;&nbsp;\
                                 &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;\
                                 <strong>Null test acc: "+ str(null_test_accuracy)+\
                                 "</strong></p>",
                                 glod.get_models_ranking_key(): ranking,
                                 glod.get_observations_targets_key(): observations_targets,
                                 glod.get_variables_summary_key(): variables_summary,
                                 glod.get_models_info_key(): model_info,
                                 glod.get_time_info_key(): time_info,
                                 glod.get_train_distribution_info_key(): train_distribution_info,
                                 glod.get_test_distribution_info_key(): test_distribution_info
                                }
                template_vars[glod.get_metrics_info_key()] = glod.get_empty_string()
                for metric in report_dict[modelo][glod.get_metrics_micro_avg_key()]:
                    template_vars[glod.get_metrics_info_key()] += "<p>"+"<strong>"+metric+"</strong>: " + report_dict[modelo][glod.get_metrics_micro_avg_key()][metric] +"</br>"
                template_vars[glod.get_metrics_info_key()] += "</p>"

                if glod.get_model_parameters_plot_name() in report_dict[modelo]:
                    template_vars[glod.get_image_parameters_accuracy_key()] = encode_image(report_dict[modelo][glod.get_model_parameters_plot_name()].replace('\'', glod.get_empty_string()))

                if glod.get_confussion_matrix_train_path_key() in report_dict[modelo]:
                    template_vars[glod.get_conf_train_img_key()] = encode_image(report_dict[modelo][glod.get_confussion_matrix_train_path_key()].replace('\'', glod.get_empty_string()))

                if glod.get_confussion_matrix_test_path_key() in report_dict[modelo]:
                    template_vars[glod.get_conf_test_img_key()] = encode_image(report_dict[modelo][glod.get_confussion_matrix_test_path_key()].replace('\'', glod.get_empty_string()))

                if glod.get_learning_curve_key() in report_dict[modelo]:
                    template_vars[glod.get_learning_curve_key()] = encode_image(report_dict[modelo][glod.get_learning_curve_key()].replace('\'', glod.get_empty_string()))


                metrics_by_label = "<table width='100%' border='1' cellspacing='0' cellpadding='5'>"
                keys = glod.get_empty_string()
                for elemento in auxf.natsorted(list(report_dict[modelo][glod.get_metrics_key()].keys())):
                    if keys == glod.get_empty_string():
                        keys = report_dict[modelo][glod.get_metrics_key()][elemento].keys()
                        metrics_by_label += "<tr><td align='center' class='black'>"+ glod.get_report_generic_target_key() +"</td>"
                        for cabecera in keys:
                            metrics_by_label += "<td align='center' class='black'>" + cabecera +"</td>"
                        metrics_by_label += "</tr>"
                    metrics_by_label += "<tr><td>" + elemento.replace('target_', glod.get_empty_string()) + "</td>"
                    for key in keys:
                        metrics_by_label += "<td>"+str(report_dict[modelo][glod.get_metrics_key()][elemento][key])+"</td>"
                    metrics_by_label += "</tr>"
                metrics_by_label += "</table>"
                template_vars[glod.get_metrics_by_label_key()] = metrics_by_label

                #generamos el html
                with codecs.open(ruta_plantilla_temporal, glod.get_write_mode(), encoding=enco) as output_file:
                    output_file.write(template.render(template_vars))

                #generamos el pdf
                with codecs.open(ruta_plantilla_temporal, mode=glod.get_read_mode(), encoding=enco) as read_html:
                    pdf_resultante = os.path.join(ruta_directorio_informes, modelo + "_report_for_"+ event +".pdf")
                    with open(pdf_resultante, mode=glod.get_writebyte_mode()) as pdf_gen:
                        pisa.CreatePDF(read_html.read(), pdf_gen)
                        logging.getLogger("xhtml2pdf").addHandler(PisaNullHandler())

    if os.path.exists(ruta_plantilla_temporal):
        os.remove(ruta_plantilla_temporal)
Exemple #6
0
def update_test_division(report_dict, key, valor):
    '''This funcion allows to register the distribution of observations that will be used
    to test the models in the report'''

    report_dict[glod.get_report_general_info_key()][glod.get_test_division_key()][key] = valor
    return report_dict
Exemple #7
0
def update_report_user_discarded_features(report_dict, lista_descartadas):
    '''This funcion allows to register the discarded features in the report'''

    report_dict[glod.get_report_general_info_key()][glod.get_variables_key()]\
    [glod.get_user_discarded_key()] = lista_descartadas
    return report_dict
Exemple #8
0
def update_report_relevant_user_features(report_dict, lista_importantes):
    '''This funcion allows to register the relevant features in the report'''

    report_dict[glod.get_report_general_info_key()][glod.get_variables_key()]\
    [glod.get_user_requested_key()] = lista_importantes
    return report_dict
Exemple #9
0
def update_report_empty_constant_features(report_dict, lista_vacias_constantes):
    '''This funcion allows to register the empty or constant features in the report'''

    report_dict[glod.get_report_general_info_key()][glod.get_variables_key()]\
    [glod.get_empty_or_constant_key()] = lista_vacias_constantes
    return report_dict