Ejemplo n.º 1
0
    def exec(self) -> None:
        """
        Split data into test training set and save into split folder
        :return: None
        """

        # ----------
        # get data from preprocessed file
        # ----------
        importer_object = ImporterFactory('csv').generate()
        importer_manager = ImporterManager(importer_object)
        data = importer_manager.exec(self._exec_plan['source_dir'])[0]

        # ----------
        # split data
        # ----------
        splitter = SplitFactory(self._exec_plan['method']).generate()
        training, testing = SplitManager(splitter).exec(
            data, self._exec_plan['criteria'])

        # ----------
        # save file
        # ----------
        loader = ExportFactory('csv').generate()
        saver = ExportManager(loader)
        saver.exec(training.reset_index(drop=True),
                   self._exec_plan['target']['dir'], 'training')
        saver.exec(testing.reset_index(drop=True),
                   self._exec_plan['target']['dir'], 'testing')
        print('=== Finished split ===')
Ejemplo n.º 2
0
    def exec(self):
        # fetch csv file and split train/testing
        importer = ImporterFactory('csv').generate()
        importer_manager = ImporterManager(importer)
        testing = importer_manager.exec(self._exec_plan['source_dir'])[0]

        # Get model info data and to dict
        data = importer_manager.exec(self._exec_plan['models_summary'])[0]
        data_dict = data.to_dict()

        # Build Model Name List Using Model Info Data
        model_name_list = []
        for index, model in data_dict['model_name'].items():
            model_name_list.append(model)

        # Import a list of models and convert to dict
        model_files = [{
            'dir': self._exec_plan['models_dir'],
            'files': model_name_list
        }]

        model_importer = ImporterFactory('model').generate()
        model_manager = ImporterManager(model_importer)
        models = model_manager.exec(model_files)
        model_dict = {v: k for v, k in enumerate(models)}

        # Get a list of RMSE
        rmse_list = []
        for (index, response), (index, predictors), (index, model) in zip(
                data_dict['response'].items(), data_dict['predictors'].items(),
                model_dict.items()):
            predictors = predictors.split('/')
            response = response.strip("'][").split(', ')
            rmse = RMSE().get_rmse(testing, predictors, response, model)
            rmse_list.append(rmse)

        # Concat model info dataframe and rmse dataframe
        rmse_df = pd.DataFrame(rmse_list, columns=['rmse'])
        result_df = pd.concat([data, rmse_df], axis=1)

        # Export Result Dataframe
        csv_exporter = ExportFactory('csv').generate()
        exporter = ExportManager(csv_exporter)
        exporter.exec(result_df, self._exec_plan['summary_target']['dir'],
                      self._exec_plan['summary_target']['name'])
        print('=== Finish estimate ===')
Ejemplo n.º 3
0
Archivo: RMSE.py Proyecto: 10yung/Mobot
        rmse = cal_rmse(y_true, y_pred)
        return rmse


if __name__ == '__main__':
    print("### RMSE ###")

    # get source
    source_dir = [{
        'dir': '../../../data/split/',
        'files': ['training.csv']
    }, {
        'dir': '../../../data/split/',
        'files': ['testing.csv']
    }]
    importer = ImporterFactory('csv').generate()
    importer_manager = ImporterManager(importer)
    training, testing = importer_manager.exec(source_dir)

    # Import Model
    model_files = [{
        'dir': '../../../data/model/models/',
        'files': ["SimpleLm"]
    }]
    fetcher_object = ImporterFactory('model').generate()
    model_object = ImporterManager(fetcher_object)
    model_obj = model_object.exec(model_files)[0]

    # Test Parameters
    predictors_list = [
        'Health.expenditures....of.GDP.', 'Literacy....',
Ejemplo n.º 4
0
        return (model, predictor, result_df)


if __name__ == '__main__':
    print('### Simple Linear Model')

    # get source
    loader = SourceFactory('csv').generate()
    data = SourceManager(loader).exec('../../../data/preprocessed')[0]

    # initialize split object
    ratio_splitter = SplitFactory('ratio').generate()
    training, testing = SplitManager(ratio_splitter).exec(data, 0.8)

    importer_object = ImporterFactory('csv').generate()
    importer_manager = ImporterManager(importer_object)

    files = [{
        'dir': '../../../data/preprocessed/',
        'files': ['covid19_preprocessed.csv']
    }]
    data = importer_manager.exec(files)[0]

    training, testing = SplitManager(ratio_splitter).exec(data, 0.8)

    predictor_name = ["Health.expenditures....of.GDP.", "Literacy...."]
    response_name = ['Recovery Rate']

    model = SimpleLm(predictor_name, response_name)
    result = model.exec(data)
Ejemplo n.º 5
0
    def exec(self):
        """
        train model base on the experiments from execution plan
        :return:
        """
        # get source
        importer = ImporterFactory('csv').generate()
        importer_manager = ImporterManager(importer)
        data = importer_manager.exec(self._exec_plan['source_dir'])[0]

        # -----
        # import model and register model in command container
        #  TODO: import module dynamically
        # -----
        model_map = {'AIC': AIC, 'StepWise': StepWise, 'SimpleLm': SimpleLm}

        model_register = ModelRegister()

        for model in self._exec_plan['experiments']:
            model_name = model['name']
            model_register.register(model_name, model_map[model_name],
                                    self._exec_plan['predictor_name'],
                                    self._exec_plan['response_name'])

        # -----
        # Train model from register container
        # -----
        model_table = {}
        for model in self._exec_plan['experiments']:
            result = model_register.exec(model['name'], data,
                                         model['criteria'])

            # concat model name from criteria and model type
            if len(model['criteria'].items()) == 0:
                criteria = ''
                model_name = model['name']
            else:
                key, value = list(model['criteria'].items())[0]
                criteria = key + ' ' + str(value)
                model_name = model['name'] + '_' + str(value)

            model_exporter = ExportFactory('model').generate()
            exporter = ExportManager(model_exporter)
            exporter.exec(result[0], self._exec_plan['models_target']['dir'],
                          model_name)

            model_table[model_name] = [
                self._exec_plan['response_name'], '/'.join(result[1]), criteria
            ]

        # Construct model information csv file
        model_table = pd.DataFrame.from_dict(
            model_table,
            orient='index',
            columns=['response', 'predictors', 'criteria'])
        model_table = model_table.reset_index()
        model_table.columns = [
            'model_name', 'response', 'predictors', 'criteria'
        ]

        # export the result to model folder
        exporter = ExportFactory('csv').generate()
        export_manager = ExportManager(exporter)
        export_manager.exec(model_table,
                            self._exec_plan['summary_target']['dir'],
                            self._exec_plan['summary_target']['name'])
        print('=== Finish modeling ===')