Esempio n. 1
0
 def check_path(self, datapath):
     if isinstance(datapath, str):
         try:
             self._logging.log_info('gDayF', "Workflow",
                                    self._labels["input_param"], datapath)
             pd_dataset = inputHandlerCSV().inputCSV(filename=datapath)
             return None, pd_dataset.copy()
         except [IOError, OSError, JSONDecodeError]:
             self._logging.log_critical('gDayF', "Workflow",
                                        self._labels["failed_input"],
                                        datapath)
             return self._labels['failed_input'], None
     elif isinstance(datapath, DataFrame):
         self._logging.log_info('gDayF', "Controller",
                                self._labels["input_param"],
                                str(datapath.shape))
         return None, datapath
     else:
         self._logging.log_critical('gDayF', "Workflow",
                                    self._labels["failed_input"], datapath)
         return self._labels['failed_input'], None
Esempio n. 2
0
    def exec_sanalysis(self,
                       datapath,
                       list_ar_metadata,
                       metric='combined_accuracy',
                       deep_impact=1,
                       **kwargs):

        self._logging.log_info('gDayF', "Controller", self._labels["start"])
        self._logging.log_info('gDayF', "Controller",
                               self._labels["ana_param"], metric)
        self._logging.log_info('gDayF', "Controller",
                               self._labels["dep_param"], deep_impact)

        if isinstance(datapath, str):
            try:
                self._logging.log_info('gDayF', "Controller",
                                       self._labels["input_param"], datapath)
                pd_dataset = inputHandlerCSV().inputCSV(filename=datapath)
                id_datapath = Path(datapath).name
                hash_dataframe = hash_key('MD5', datapath)
            except IOError:
                self._logging.log_critical('gDayF', "Controller",
                                           self._labels["failed_input"],
                                           datapath)
                return self._labels['failed_input']
            except OSError:
                self._logging.log_critical('gDayF', "Controller",
                                           self._labels["failed_input"],
                                           datapath)
                return self._labels['failed_input']
            except JSONDecodeError:
                self._logging.log_critical('gDayF', "Controller",
                                           self._labels["failed_input"],
                                           datapath)
                return self._labels['failed_input']
        elif isinstance(datapath, DataFrame):
            hash_dataframe = None
            self._logging.log_critical('gDayF', "Controller",
                                       self._labels["input_param"],
                                       str(datapath.shape))
            pd_dataset = datapath
            id_datapath = 'Dataframe' + \
                          '_' + str(pd_dataset.size) + \
                          '_' + str(pd_dataset.shape[0]) + \
                          '_' + str(pd_dataset.shape[1])
        else:
            self._logging.log_critical('gDayF', "Controller",
                                       self._labels["failed_input"], datapath)
            return self._labels['failed_input'], None

        pd_test_dataset = None
        if self._config['common']['minimal_test_split'] <= len(pd_dataset.index) \
                and (metric in ACCURACY_METRICS or metric in REGRESSION_METRICS):
            pd_dataset, pd_test_dataset = pandas_split_data(
                pd_dataset,
                train_perc=self._config['common']['test_frame_ratio'])

        df = DFMetada().getDataFrameMetadata(pd_dataset, 'pandas')
        self._ec.set_id_analysis(self._ec.get_id_user() + '_' + id_datapath +
                                 '_' + str(time()))
        adviser = self.adviser.AdviserAStar(e_c=self._ec,
                                            metric=metric,
                                            deep_impact=deep_impact,
                                            dataframe_name=id_datapath,
                                            hash_dataframe=hash_dataframe)

        adviser.analysis_specific(dataframe_metadata=df,
                                  list_ar_metadata=list_ar_metadata)

        while adviser.next_analysis_list is not None:

            for each_model in adviser.next_analysis_list:
                fw = get_model_fw(each_model)

                self.init_handler(fw)

                if pd_test_dataset is not None:
                    _, analyzed_model = self.model_handler[fw][
                        'handler'].order_training(training_pframe=pd_dataset,
                                                  base_ar=each_model,
                                                  test_frame=pd_test_dataset,
                                                  filtering='NONE')
                else:
                    _, analyzed_model = self.model_handler[fw][
                        'handler'].order_training(training_pframe=pd_dataset,
                                                  base_ar=each_model,
                                                  filtering='NONE')
                if analyzed_model is not None:
                    adviser.analysis_recommendation_order.append(
                        analyzed_model)

            adviser.next_analysis_list.clear()
            adviser.analysis_recommendation_order = adviser.priorize_models(
                model_list=adviser.analysis_recommendation_order)
            adviser.analysis_specific(
                dataframe_metadata=df,
                list_ar_metadata=adviser.analysis_recommendation_order)

        self._logging.log_info(self._ec.get_id_analysis(), 'controller',
                               self._labels["ana_models"],
                               str(len(adviser.analyzed_models)))
        self._logging.log_info(self._ec.get_id_analysis(), 'controller',
                               self._labels["exc_models"],
                               str(len(adviser.excluded_models)))

        self.log_model_list(adviser.analysis_recommendation_order, metric)

        self._logging.log_info(self._ec.get_id_analysis(), 'controller',
                               self._labels["end"])

        self.clean_handlers()

        adviser.analysis_recommendation_order = adviser.priorize_models(
            model_list=adviser.analysis_recommendation_order)

        return self._labels[
            'success_op'], adviser.analysis_recommendation_order
Esempio n. 3
0
    def exec_analysis(self,
                      datapath,
                      objective_column,
                      amode=POC,
                      metric='test_accuracy',
                      deep_impact=3,
                      **kwargs):
        # Clustering variables
        k = None
        estimate_k = False

        #Force analysis variable
        atype = None

        hash_dataframe = ''

        for pname, pvalue in kwargs.items():
            if pname == 'k':
                assert isinstance(pvalue, int)
                k = pvalue
            elif pname == 'estimate_k':
                assert isinstance(pvalue, bool)
                estimate_k = pvalue
            elif pname == 'atype':
                assert pvalue in atypes
                atype = pvalue

        supervised = True
        if objective_column is None:
            supervised = False

        self._logging.log_info('gDayF', "Controller", self._labels["start"])
        self._logging.log_info('gDayF', "Controller",
                               self._labels["ana_param"], metric)
        self._logging.log_info('gDayF', "Controller",
                               self._labels["dep_param"], deep_impact)
        self._logging.log_info('gDayF', "Controller", self._labels["ana_mode"],
                               amode)

        if isinstance(datapath, str):
            try:
                self._logging.log_info('gDayF', "Controller",
                                       self._labels["input_param"], datapath)
                pd_dataset = inputHandlerCSV().inputCSV(filename=datapath)
                id_datapath = Path(datapath).name
                hash_dataframe = hash_key('MD5', datapath)
            except IOError:
                self._logging.log_critical('gDayF', "Controller",
                                           self._labels["failed_input"],
                                           datapath)
                return self._labels['failed_input']
            except OSError:
                self._logging.log_critical('gDayF', "Controller",
                                           self._labels["failed_input"],
                                           datapath)
                return self._labels['failed_input']
            except JSONDecodeError:
                self._logging.log_critical('gDayF', "Controller",
                                           self._labels["failed_input"],
                                           datapath)
                return self._labels['failed_input']
        elif isinstance(datapath, DataFrame):
            self._logging.log_info('gDayF', "Controller",
                                   self._labels["input_param"],
                                   str(datapath.shape))
            pd_dataset = datapath
            id_datapath = 'Dataframe' + \
                          '_' + str(pd_dataset.size) + \
                          '_' + str(pd_dataset.shape[0]) + \
                          '_' + str(pd_dataset.shape[1])
            #hash_dataframe = md5(datapath.to_msgpack()).hexdigest()
            hash_dataframe = md5(
                datapath.to_json().encode('utf-8')).hexdigest()
        else:
            self._logging.log_critical('gDayF', "Controller",
                                       self._labels["failed_input"], datapath)
            return self._labels['failed_input'], None

        pd_test_dataset = None
        ''' Changed 05/04/2018
        if metric == 'combined_accuracy' or 'test_accuracy':'''
        if self._config['common']['minimal_test_split'] <= len(pd_dataset.index) \
                and (metric in ACCURACY_METRICS or metric in REGRESSION_METRICS):
            pd_dataset, pd_test_dataset = pandas_split_data(
                pd_dataset,
                train_perc=self._config['common']['test_frame_ratio'])

        df = DFMetada().getDataFrameMetadata(pd_dataset, 'pandas')

        self._ec.set_id_analysis(self._ec.get_id_user() + '_' + id_datapath +
                                 '_' + str(time()))
        adviser = self.adviser.AdviserAStar(e_c=self._ec,
                                            metric=metric,
                                            deep_impact=deep_impact,
                                            dataframe_name=id_datapath,
                                            hash_dataframe=hash_dataframe)

        adviser.set_recommendations(dataframe_metadata=df,
                                    objective_column=objective_column,
                                    amode=amode,
                                    atype=atype)

        while adviser.next_analysis_list is not None:
            for each_model in adviser.next_analysis_list:
                fw = get_model_fw(each_model)

                if k is not None:
                    try:
                        each_model["model_parameters"][fw]["parameters"]["k"][
                            "value"] = k
                        each_model["model_parameters"][fw]["parameters"]["k"][
                            "seleccionable"] = True
                        each_model["model_parameters"][fw]["parameters"][
                            "estimate_k"]["value"] = estimate_k
                        each_model["model_parameters"][fw]["parameters"][
                            "estimate_k"]["seleccionable"] = True
                    except KeyError:
                        pass

                self.init_handler(fw)
                if pd_test_dataset is not None:
                    _, analyzed_model = self.model_handler[fw][
                        'handler'].order_training(training_pframe=pd_dataset,
                                                  base_ar=each_model,
                                                  test_frame=pd_test_dataset,
                                                  filtering='STANDARDIZE')
                else:
                    _, analyzed_model = self.model_handler[fw][
                        'handler'].order_training(training_pframe=pd_dataset,
                                                  base_ar=each_model,
                                                  test_frame=pd_dataset,
                                                  filtering='STANDARDIZE')

                if analyzed_model is not None:
                    adviser.analysis_recommendation_order.append(
                        analyzed_model)
            adviser.next_analysis_list.clear()
            adviser.analysis_recommendation_order = adviser.priorize_models(
                model_list=adviser.analysis_recommendation_order)
            adviser.set_recommendations(dataframe_metadata=df,
                                        objective_column=objective_column,
                                        amode=amode)

        self._logging.log_info(self._ec.get_id_analysis(), 'controller',
                               self._labels["ana_models"],
                               str(len(adviser.analyzed_models)))
        self._logging.log_info(self._ec.get_id_analysis(), 'controller',
                               self._labels["exc_models"],
                               str(len(adviser.excluded_models)))

        self._logging.log_exec(self._ec.get_id_analysis(), 'controller',
                               self._labels["end"])

        self.clean_handlers()

        adviser.analysis_recommendation_order = adviser.priorize_models(
            model_list=adviser.analysis_recommendation_order)

        return self._labels[
            'success_op'], adviser.analysis_recommendation_order
Esempio n. 4
0
    def exec_prediction(self, datapath, armetadata=None, model_file=None):

        self._logging.log_info('gDayF', "Controller", self._labels["ana_mode"],
                               'prediction')
        if armetadata is None and model_file is None:
            self._logging.log_critical('gDayF', "Controller",
                                       self._labels["failed_model"], datapath)
            return self._labels["failed_model"]
        elif armetadata is not None:
            try:
                assert isinstance(armetadata, ArMetadata)
                base_ar = deep_ordered_copy(armetadata)
            except AssertionError:
                self._logging.log_critical('gDayF', "Controller",
                                           self._labels["failed_model"],
                                           armetadata)
                return self._labels["failed_model"]
        elif model_file is not None:
            try:
                #json_file = open(model_file)
                persistence = PersistenceHandler(self._ec)
                invalid, base_ar = persistence.get_ar_from_engine(model_file)
                del persistence

                if invalid:
                    self._logging.log_critical('gDayF', "Controller",
                                               self._labels["failed_model"],
                                               model_file)
                    return self._labels["failed_model"]
            except IOError as iexecution_error:
                print(repr(iexecution_error))
                self._logging.log_critical('gDayF', "Controller",
                                           self._labels["failed_model"],
                                           model_file)
                return self._labels["failed_model"]
            except OSError as oexecution_error:
                print(repr(oexecution_error))
                self._logging.log_critical('gDayF', "Controller",
                                           self._labels["failed_model"],
                                           model_file)
                return self._labels["failed_model"]

        if isinstance(datapath, str):
            try:
                self._logging.log_info('gDayF', "Controller",
                                       self._labels["input_param"], datapath)
                pd_dataset = inputHandlerCSV().inputCSV(filename=datapath)
            except [IOError, OSError, JSONDecodeError]:
                self._logging.log_critical('gDayF', "Controller",
                                           self._labels["failed_input"],
                                           datapath)
                return self._labels['failed_input']
        elif isinstance(datapath, DataFrame):
            pd_dataset = datapath
            self._logging.log_info('gDayF', "Controller",
                                   self._labels["input_param"],
                                   str(datapath.shape))
        else:
            self._logging.log_critical('gDayF', "Controller",
                                       self._labels["failed_input"], datapath)
            return self._labels['failed_input']

        fw = get_model_fw(base_ar)

        self.init_handler(fw)

        prediction_frame = None
        try:
            prediction_frame, _ = self.model_handler[fw]['handler'].predict(
                predict_frame=pd_dataset, base_ar=base_ar)
        except TypeError:
            self._logging.log_critical('gDayF', "Controller",
                                       self._labels["failed_model"],
                                       model_file)

        self.clean_handler(fw)

        self._logging.log_info('gDayF', 'controller', self._labels["pred_end"])

        return prediction_frame
Esempio n. 5
0
    else:
        ddict1 = dumps(OrderedDict(dict1))
        ddict2 = dumps(OrderedDict(dict2))
        #print( md5(ddict1.encode('utf-8')))
        #print( md5(ddict2.encode('utf-8')))
        return md5(ddict1.encode('utf-8')) == md5(ddict2.encode('utf-8'))


if __name__ == "__main__":
    from gdayf.handlers.inputhandler import inputHandlerCSV
    from pandas import concat
    import operator
    from gdayf.core.experiment_context import Experiment_Context
    from os import path
    from gdayf.common.constants import *

    e_c = Experiment_Context(user_id='Crulogic')

    source_data = list()
    source_data.append(
        path.join(
            path.dirname(__file__),
            '../../../../../source data/Transformados-PDI/Crulogic-2017/'))
    source_data.append("Crulogic-17-18.csv")

    pd_train_dataset = inputHandlerCSV().inputCSV(''.join(source_data))

    m = DFMetada()
    print(OrderedDict(m.getDataFrameMetadata(pd_train_dataset, 'pandas')))
    print(dumps(m.getDataFrameMetadata(pd_train_dataset, 'pandas'), indent=4))
if __name__ == "__main__":

    from gdayf.core.controller import Controller
    from gdayf.common.utils import get_model_fw
    from gdayf.common.constants import *
    from pandas import set_option, DataFrame, read_excel, concat
    from os import path
    from collections import OrderedDict
    from gdayf.handlers.inputhandler import inputHandlerCSV

    source_data = list()
    source_data.append(
        path.join(path.dirname(__file__),
                  '../../../../source data/Transformados-PDI/Crulogic-2017/'))
    source_data.append("Crulogic-17-18.csv")
    model_data = inputHandlerCSV().inputCSV(filename=''.join(source_data))

    source_1_data = list()
    source_1_data.append(
        '/Data/gdayf-v1/experiments/Crulogic-r2/CRULOGIC-avg-fuel-speed-predicted_1537911231.396293/'
    )
    source_1_data.append('summary/predict/')
    source_1_data.append('Avg-speed_a2_p_prediction.xls')
    model_1_data = read_excel(io=''.join(source_1_data))

    source_2_data = list()
    source_2_data.append(
        '/Data/gdayf-v1/experiments/Crulogic-r2/CRULOGIC-avg-fuel-speed-predicted_1537911231.396293/'
    )
    source_2_data.append('summary/predict/')
    source_2_data.append('Avg-speed_a4_p_prediction.xls')