Beispiel #1
0
    from pandas import set_option
    from gdayf.common.dataload import DataLoad

    #Analysis
    controller = Controller()
    if controller.config_checks():
        data_train, data_test = DataLoad().dm()
        status, recomendations = controller.exec_analysis(
            datapath=data_train,
            objective_column='Weather_Temperature',
            amode=POC,
            metric='test_rmse',
            deep_impact=5)

        controller.reconstruct_execution_tree(arlist=None,
                                              metric='test_rmse',
                                              store=True)
        controller.remove_models(recomendations, mode=EACH_BEST)

        set_option('display.max_rows', 500)
        set_option('display.max_columns', 50)
        set_option('display.max_colwidth', 100)
        set_option('display.precision', 4)
        set_option('display.width', 1024)

        #Prediction
        print('Starting Prediction\'s Phase')

        prediction_frame = controller.exec_prediction(
            datapath=data_test,
            model_file=recomendations[0]['json_path'][0]['value'])
Beispiel #2
0
    from gdayf.common.constants import *
    from pandas import set_option
    from gdayf.common.dataload import DataLoad

    #Analysis
    controller = Controller()
    if controller.config_checks():
        data_train, data_test = DataLoad().footset()
        status, recomendations = controller.exec_analysis(
            datapath=data_train,
            objective_column='HomeWin',
            amode=FAST_PARANOIAC,
            metric='combined_accuracy',
            deep_impact=3)

        controller.reconstruct_execution_tree(metric='test_accuracy',
                                              store=True)
        controller.remove_models(arlist=recomendations, mode=EACH_BEST)

        set_option('display.max_rows', 500)
        set_option('display.max_columns', 50)
        set_option('display.max_colwidth', 100)
        set_option('display.precision', 4)
        set_option('display.width', 1024)

        #Prediction
        print('Starting Prediction\'s Phase')
        print(recomendations[0]['load_path'][0]['value'])
        prediction_frame = controller.exec_prediction(
            datapath=data_test,
            model_file=recomendations[0]['json_path'][0]['value'])
Beispiel #3
0
    from pandas import set_option
    from gdayf.common.dataload import DataLoad

    #Analysis

    controller = Controller()
    if controller.config_checks():
        data_train, data_test = DataLoad().dm()
        status, recomendations = controller.exec_analysis(
            datapath=data_train,
            objective_column=None,
            amode=ANOMALIES,
            metric='train_rmse',
            deep_impact=5)

        controller.reconstruct_execution_tree(recomendations,
                                              metric='train-rmse')
        controller.remove_models(recomendations, mode=EACH_BEST)

        set_option('display.max_rows', 500)
        set_option('display.max_columns', 50)
        set_option('display.max_colwidth', 100)
        set_option('display.precision', 4)
        set_option('display.width', 1024)

        #Prediction
        print('Starting Prediction\'s Phase')
        prediction_frame = controller.exec_prediction(
            datapath=data_test,
            model_file=recomendations[0]['json_path'][0]['value'])
        print(prediction_frame)
        '''
    from gdayf.common.dataload import DataLoad

    #Analysis
    controller = Controller()
    if controller.config_checks():
        data_train, data_test = DataLoad().dm()
        status, recomendations = controller.exec_analysis(
            datapath=data_train,
            objective_column=None,
            amode=CLUSTERING,
            metric='cdistance',
            deep_impact=4,
            k=8,
            estimate_k=False)

        controller.reconstruct_execution_tree(recomendations,
                                              metric='cdistance')
        controller.remove_models(recomendations, mode=EACH_BEST)

        set_option('display.max_rows', 500)
        set_option('display.max_columns', 50)
        set_option('display.max_colwidth', 100)
        set_option('display.precision', 4)
        set_option('display.width', 1024)

        #Prediction
        print('Starting Prediction\'s Phase')
        prediction_frame = controller.exec_prediction(
            datapath=data_test,
            model_file=recomendations[0]['json_path'][0]['value'])
        print(prediction_frame)
        '''# Save Pojo
Beispiel #5
0
    def train_workflow(self,
                       datapath,
                       wkey,
                       workflow,
                       prefix='main',
                       remove_models=EACH_BEST):
        set_option('display.max_rows', 500)
        set_option('display.max_columns', 500)
        set_option('display.width', 1000)

        wf = workflow
        pfix = prefix

        error, dataset = self.check_path(datapath)
        if dataset is None:
            return error

        controller = Controller(e_c=self._ec)
        if controller.config_checks():
            variables = dataset.columns.tolist()

            #for wkey, wvalue in wf.items():
            if wf["data"]["filtered_columns"] is not None:
                for delete in wf["data"]["filtered_columns"]:
                    try:
                        variables.remove(delete)
                    except Exception:
                        self._logging.log_info('gDayF', "Workflow",
                                               self._labels["failed_var"],
                                               delete)
            self._logging.log_info('gDayF', "Workflow",
                                   self._labels["variables_desc"], variables)
            if wf["data"]["for_each"] is not None:
                fe_column = wf["data"]["for_each"]
                fe_data_exclusions = wf["data"]["for_each_exclusions"]
                fe_filtered_data = wf["data"]["filtered_data"]
                fe_parameters = wf["parameters"]
                fe_next = wf["Next"]

                for each in eval('dataset.' + fe_column + '.unique()'):
                    if fe_data_exclusions is None or each not in fe_data_exclusions:
                        aux_dataset = eval('dataset[dataset.' + fe_column +
                                           '== each]')
                        pfix = xstr(prefix + '_' + str(each))

                        if fe_filtered_data is not None:
                            qcolumn = fe_filtered_data["column"]
                            quantile = aux_dataset[qcolumn].quantile(
                                q=fe_filtered_data["quantile"])
                            aux_dataset = eval('aux_dataset.loc[aux_dataset.' +
                                               qcolumn + '<= ' +
                                               str(quantile) + ']')
                            pfix = xstr(pfix + '_' +
                                        str(fe_filtered_data["quantile"]))

                        if fe_parameters is not None:
                            source_parameters = list()
                            source_parameters.append(
                                'controller.exec_analysis(')
                            source_parameters.append(
                                'datapath=aux_dataset.loc[:, variables]')
                            for ikey, ivalue in fe_parameters.items():
                                source_parameters.append(',')
                                source_parameters.append(ikey)
                                source_parameters.append('=')
                                if isinstance(ivalue, str) and ikey != "amode":
                                    source_parameters.append('\'')
                                    source_parameters.append(ivalue)
                                    source_parameters.append('\'')
                                else:
                                    source_parameters.append(str(ivalue))
                            source_parameters.append(')')

                            self._logging.log_info(
                                'gDayF', "Workflow",
                                self._labels["desc_operation"],
                                ''.join(source_parameters))
                            status, recomendations = eval(
                                ''.join(source_parameters))
                            controller.remove_models(recomendations,
                                                     mode=remove_models)
                            controller.reconstruct_execution_tree(
                                recomendations,
                                metric=fe_parameters['metric'],
                                store=True)

                            #model_id = recomendations[0]['model_id']
                            table_model_list = controller.table_model_list(
                                ar_list=recomendations,
                                metric=eval(fe_parameters['metric']))
                            self._logging.log_info(
                                'gDayF', 'workflow',
                                self._labels["results"] + '\n',
                                table_model_list.to_string(justify="left"))

                            #filename = self.storage_path('train', wkey + '_' + str(pfix) + '_' + 'train_performance'
                            if self._config['common'][
                                    'workflow_summary_enabled']:
                                filename = self.storage_path(
                                    'train',
                                    str(pfix) + '_' + 'train_performance',
                                    'xls')
                                table_model_list.to_excel(
                                    filename,
                                    index=False,
                                    sheet_name='performance')
                                self.replicate_file('train', filename=filename)

                            prediction_frame = controller.exec_prediction(
                                datapath=aux_dataset,
                                model_file=recomendations[0]['json_path'][0]
                                ['value'])
                            try:
                                if 'predict' in prediction_frame.columns.values:
                                    prediction_frame.rename(
                                        columns={"predict": wkey},
                                        inplace=True)
                                elif 'prediction' in prediction_frame.columns.values:
                                    prediction_frame.rename(
                                        columns={"prediction": wkey},
                                        inplace=True)

                                self._logging.log_info(
                                    'gDayF', 'workflow',
                                    self._labels["results"] + '\n',
                                    prediction_frame.to_string(
                                        index_names=False, justify="left"))
                                '''filename = self.storage_path('train', wkey + '_'
                                                             + str(pfix) + '_' + 'prediction', 'xls')'''
                                if self._config['common'][
                                        'workflow_summary_enabled']:
                                    filename = self.storage_path(
                                        'train',
                                        str(pfix) + '_' + 'prediction', 'xls')
                                    prediction_frame.to_excel(
                                        filename,
                                        index=False,
                                        sheet_name='train_prediction')
                                    self.replicate_file('train',
                                                        filename=filename)

                            except AttributeError as oexecution_error:
                                self._logging.log_info(
                                    'gDayF', "Workflow",
                                    self._labels["failed_model"],
                                    str(repr(oexecution_error)))

                            try:
                                if fe_next is not None and prediction_frame is not None:
                                    self.workflow(prediction_frame,
                                                  fe_next,
                                                  pfix,
                                                  remove_models=remove_models)
                            except Exception as oexecution_error:
                                self._logging.log_critical(
                                    'gDayF', "Workflow",
                                    self._labels["failed_wf"], str(fe_next))
            else:
                aux_dataset = dataset

                if wf["data"]["filtered_data"] is not None:
                    qcolumn = wf["data"]["filtered_data"]["column"]
                    quantile = aux_dataset[[qcolumn]].quatile(
                        [wf["data"]["filtered_data"]["quantile"]])
                    aux_dataset = aux_dataset.query('%s <= %s' %
                                                    (qcolumn, quantile))

                if wf['parameters'] is not None:
                    source_parameters = list()
                    source_parameters.append('controller.exec_analysis(')
                    source_parameters.append(
                        'datapath=aux_dataset.loc[:, variables]')
                    for ikey, ivalue in wf['parameters'].items():
                        source_parameters.append(',')
                        source_parameters.append(ikey)
                        source_parameters.append('=')
                        if isinstance(ivalue, str) and ikey != "amode":
                            source_parameters.append('\'')
                            source_parameters.append(ivalue)
                            source_parameters.append('\'')
                        else:
                            source_parameters.append(str(ivalue))
                    source_parameters.append(')')
                    self._logging.log_info('gDayF', "Workflow",
                                           self._labels["desc_operation"],
                                           ''.join(source_parameters))
                    status, recomendations = eval(''.join(source_parameters))
                    controller.remove_models(recomendations,
                                             mode=remove_models)
                    controller.reconstruct_execution_tree(
                        recomendations,
                        metric=wf['parameters']['metric'],
                        store=True)

                    model_id = recomendations[0]['model_id']
                    table_model_list = controller.table_model_list(
                        ar_list=recomendations,
                        metric=eval(wf['parameters']['metric']))
                    self._logging.log_info(
                        'gDayF', 'workflow', self._labels["results"] + '\n',
                        table_model_list.to_string(justify="left"))

                    if self._config['common']['workflow_summary_enabled']:
                        '''filename = self.storage_path('train', wkey + '_' + str(pfix) + '_'
                                                     + 'train_performance', 'xls')'''
                        filename = self.storage_path(
                            'train',
                            str(pfix) + '_' + 'train_performance', 'xls')
                        table_model_list.to_excel(filename,
                                                  index=False,
                                                  sheet_name="performace")
                        self.replicate_file('train', filename=filename)

                    prediction_frame = controller.exec_prediction(
                        datapath=aux_dataset,
                        model_file=recomendations[0]['json_path'][0]['value'])
                    try:
                        if 'predict' in prediction_frame.columns.values:
                            prediction_frame.rename(columns={"predict": wkey},
                                                    inplace=True)
                        elif 'prediction' in prediction_frame.columns.values:
                            prediction_frame.rename(
                                columns={"prediction": wkey}, inplace=True)

                        self._logging.log_info(
                            'gDayF', 'workflow',
                            self._labels["results"] + '\n',
                            prediction_frame.to_string(index_names=False,
                                                       justify="left"))
                        '''filename = self.storage_path('train', wkey + '_' + str(pfix) + '_'
                                                     + 'prediction', 'xls')'''
                        if self._config['common']['workflow_summary_enabled']:
                            filename = self.storage_path(
                                'train',
                                str(pfix) + '_' + 'prediction', 'xls')
                            prediction_frame.to_excel(
                                filename,
                                index=False,
                                sheet_name="train_prediction")
                            self.replicate_file('train', filename=filename)

                    except AttributeError as oexecution_error:
                        self._logging.log_info('gDayF', "Workflow",
                                               self._labels["failed_model"],
                                               str(repr(oexecution_error)))

                    if wf['Next'] is not None and prediction_frame is not None:
                        try:
                            self.workflow(datapath=prediction_frame,
                                          workflow=wf['Next'],
                                          prefix=pfix,
                                          remove_models=remove_models)
                        except Exception as oexecution_error:
                            self._logging.log_critical(
                                'gDayF', "Workflow", self._labels["failed_wf"],
                                str(wf['Next']))
                            self._logging.log_critical(
                                'gDayF', "Workflow", self._labels["failed_wf"],
                                repr(oexecution_error))

        controller.clean_handlers()
        del controller