Python DataFrame.create_dataframe 예제들, a2ml.api.utils.dataframe.DataFrame.create_dataframe Python 예제들

예제 #1

0

파일 보기

    def test_process_prediction(self):
        model_path = 'tests/fixtures/test_predict_by_model/iris'
        options = fsclient.read_json_file(
            os.path.join(model_path, "options.json"))
        target_categories = ["setosa", "versicolor", "virginica"]

        ds = DataFrame.create_dataframe(
            os.path.join(model_path, "iris_test.csv"))
        ds.drop([options['targetFeature']])
        results = [
            "setosa", "versicolor", "virginica", "setosa", "versicolor",
            "virginica"
        ]
        results_proba = None
        proba_classes = None

        ModelHelper.process_prediction(ds, results, results_proba,
                                       proba_classes, None,
                                       options.get('minority_target_class'),
                                       options['targetFeature'],
                                       target_categories)

        ds_test = DataFrame.create_dataframe(
            os.path.join(model_path, "iris_test.csv"))
        self.assertEqual(ds.dtypes, ds_test.dtypes)
        self.assertEqual(ds.df.values.tolist(), ds_test.df.values.tolist())

예제 #2

0

파일 보기

    def test_process_prediction_proba(self):
        model_path = 'tests/fixtures/test_predict_by_model/iris'
        options = fsclient.read_json_file(
            os.path.join(model_path, "options.json"))
        target_categories = ["setosa", "versicolor", "virginica"]

        ds = DataFrame.create_dataframe(
            os.path.join(model_path, "iris_test.csv"))
        ds.drop([options['targetFeature']])
        results = None  #[0, 1, 2, 0, 1, 2]
        results_proba = [[0.8, 0.1, 0.1], [0.4, 0.6, 0.1], [0.1, 0.2, 0.7],
                         [0.7, 0.2, 0.1], [0.3, 0.7, 0.1], [0.1, 0.3, 0.6]]
        results_proba = np.array(results_proba)
        proba_classes = [0, 1, 2]

        ModelHelper.process_prediction(ds, results, results_proba,
                                       proba_classes, 0.5, None,
                                       options['targetFeature'],
                                       target_categories)

        ds_test = DataFrame.create_dataframe(
            os.path.join(model_path, "iris_test.csv"))
        self.assertEqual(
            ds.columns, ds_test.columns +
            ["proba_setosa", "proba_versicolor", "proba_virginica"])
        self.assertEqual(ds.df[options['targetFeature']].values.tolist(),
                         ds_test.df[options['targetFeature']].values.tolist())

예제 #3

0

파일 보기

파일: predict.py 프로젝트: chrinide/a2ml

    def _predict_locally(self, filename_arg, model_id, threshold, data,
                         columns, output):
        model_deploy = ModelDeploy(self.ctx, None)
        is_model_loaded, model_path, model_name = \
            model_deploy.verify_local_model(model_id)

        if not is_model_loaded:
            raise AugerException(
                'Model isn\'t loaded locally. '
                'Please use a2ml deploy command to download model.')

        model_path, model_existed = self._extract_model(model_name)
        model_options = fsclient.read_json_file(
            os.path.join(model_path, "model", "options.json"))

        filename = filename_arg
        if not filename:
            ds = DataFrame.create_dataframe(filename, data, columns)
            filename = os.path.join(self.ctx.config.get_path(), '.augerml',
                                    'predict_data.csv')
            ds.saveToCsvFile(filename, compression=None)

        try:
            predicted = \
                self._docker_run_predict(filename, threshold, model_path)
        finally:
            # clean up unzipped model
            # if it wasn't unzipped before
            if not model_existed:
                shutil.rmtree(model_path, ignore_errors=True)
                model_path = None

        if not filename_arg:
            ds_result = DataFrame.create_dataframe(predicted)

            ds_result.options['data_path'] = None
            ds_result.loaded_columns = columns

            return ModelHelper.save_prediction_result(
                ds_result,
                prediction_id=None,
                support_review_model=model_options.get("support_review_model")
                if model_path else False,
                json_result=False,
                count_in_result=False,
                prediction_date=None,
                model_path=model_path,
                model_id=model_id,
                output=output)
        elif output:
            fsclient.move_file(predicted, output)
            predicted = output

        return predicted

예제 #4

0

파일 보기

파일: model_review.py 프로젝트: chrinide/a2ml

    def build_review_data(self, data_path=None, output=None):
        if not data_path:
            data_path = self.options['data_path']

        ds_train = DataFrame.create_dataframe(data_path)

        all_files = fsclient.list_folder(os.path.join(
            self.model_path, "predictions/*_actuals.feather.zstd"),
                                         wild=True,
                                         remove_folder_name=False,
                                         meta_info=True)
        all_files.sort(key=lambda f: f['last_modified'], reverse=True)

        for (file, ds_actuals) in DataFrame.load_from_files(all_files):
            if not ds_actuals.df.empty:
                ds_actuals.drop(['prediction_id', 'prediction_group_id'])

                ds_train.df = pd.concat(
                    [ds_train.df, ds_actuals.df[ds_train.columns]],
                    ignore_index=True)
                ds_train.drop_duplicates()

        if not output:
            output = os.path.splitext(
                data_path)[0] + "_review_%s.feather.zstd" % (get_uid())

        ds_train.saveToFile(output)
        return output

예제 #5

0

파일 보기

파일: model_helper.py 프로젝트: chrinide/a2ml

    def preprocess_target(model_path,
                          data_path=None,
                          records=None,
                          features=None):
        ds = DataFrame.create_dataframe(data_path, records, features)

        return ModelHelper.preprocess_target_ds(model_path, ds)

예제 #6

0

파일 보기

파일: model.py 프로젝트: chrinide/a2ml

    def predict(self, filename, model_id,
        threshold=None, locally=False, data=None, columns=None, output = None,
        json_result=False, count_in_result=False, prediction_date=None, prediction_id=None):
        ds = DataFrame.create_dataframe(filename, data, columns)
        model_path = self.ctx.config.get_model_path(model_id)
        options = fsclient.read_json_file(os.path.join(model_path, "options.json"))

        results, results_proba, proba_classes, target_categories = \
            self._predict_locally(ds.df, model_id, threshold) if locally else self._predict_remotely(ds.df, model_id, threshold)

        if target_categories and len(target_categories) == 2:
            for idx, item in enumerate(target_categories):
                if item == "False":
                    target_categories[idx] = False
                if item == "True":
                    target_categories[idx] = True

        ModelHelper.process_prediction(ds,
            results, results_proba, proba_classes,
            threshold,
            options.get('minority_target_class', self.ctx.config.get('minority_target_class')),
            options.get('targetFeature', self.ctx.config.get('target', None)),
            target_categories)

        predicted = ModelHelper.save_prediction(ds, prediction_id,
            options.get('support_review_model', True), json_result, count_in_result, prediction_date,
            model_path, model_id, output)

        if filename:
            self.ctx.log('Predictions stored in %s' % predicted)

        return {'predicted': predicted}

예제 #7

0

파일 보기

파일: predict.py 프로젝트: ANN-KOREA/a2ml

    def _predict_on_cloud(self, filename, model_id, threshold, data, columns,
                          output):
        ds = DataFrame.create_dataframe(filename, data, columns)

        pipeline_api = AugerPipelineApi(self.ctx, None, model_id)
        predictions = pipeline_api.predict(ds.get_records(), ds.columns,
                                           threshold)

        ds_result = DataFrame.create_dataframe(None,
                                               records=predictions['data'],
                                               features=predictions['columns'])
        ds_result.options['data_path'] = filename
        return ModelHelper.save_prediction_result(ds_result,
                                                  prediction_id=None,
                                                  support_review_model=False,
                                                  json_result=False,
                                                  count_in_result=False,
                                                  prediction_date=None,
                                                  model_path=None,
                                                  model_id=model_id,
                                                  output=output)

예제 #8

0

파일 보기

파일: model_review.py 프로젝트: chrinide/a2ml

    def add_actuals(self,
                    actuals_path=None,
                    actual_records=None,
                    prediction_group_id=None,
                    primary_prediction_group_id=None,
                    primary_model_path=None,
                    actual_date=None,
                    actuals_id=None,
                    calc_score=True):

        features = None
        if actuals_path or (actual_records
                            and type(actual_records[0]) == list):
            features = ['prediction_id', 'actual']

        ds_actuals = DataFrame.create_dataframe(actuals_path,
                                                actual_records,
                                                features=features)

        result = self._process_actuals(ds_actuals,
                                       prediction_group_id,
                                       primary_prediction_group_id,
                                       primary_model_path,
                                       actual_date,
                                       actuals_id,
                                       calc_score,
                                       raise_not_found=True)

        ds_actuals.drop(self.target_feature)
        ds_actuals.df = ds_actuals.df.rename(
            columns={'a2ml_actual': self.target_feature})

        if not actuals_id:
            actuals_id = get_uid()

        file_name = str(
            actual_date or
            datetime.date.today()) + '_' + actuals_id + "_actuals.feather.zstd"
        ds_actuals.saveToFeatherFile(
            os.path.join(self.model_path, "predictions", file_name))

        return result

예제 #9

0

파일 보기

    def test_save_prediction(self):
        model_path = 'tests/fixtures/test_predict_by_model/iris'
        options = fsclient.read_json_file(
            os.path.join(model_path, "options.json"))

        prediction_id = "123"
        prediction_date = "today"
        results_file_path = os.path.join(
            model_path, "predictions",
            prediction_date + '_' + prediction_id + "_results.feather.zstd")
        predicted_file_path = os.path.join(
            model_path, "predictions", "iris_test_" + prediction_id + "_" +
            options.get('uid') + "_predicted.csv")

        ds = DataFrame.create_dataframe(
            os.path.join(model_path, "iris_test.csv"))
        fsclient.remove_file(results_file_path)
        self.assertFalse(fsclient.is_file_exists(results_file_path))
        fsclient.remove_file(predicted_file_path)
        self.assertFalse(fsclient.is_file_exists(predicted_file_path))

        res = ModelHelper.save_prediction(ds,
                                          prediction_id,
                                          support_review_model=True,
                                          json_result=False,
                                          count_in_result=False,
                                          prediction_date=prediction_date,
                                          model_path=model_path,
                                          model_id=options.get('uid'))
        self.assertEqual(res, predicted_file_path)
        self.assertTrue(fsclient.is_file_exists(predicted_file_path))
        self.assertTrue(fsclient.is_file_exists(results_file_path))

        ds = DataFrame.create_dataframe(
            os.path.join(model_path, "iris_test.csv"))
        fsclient.remove_file(results_file_path)
        self.assertFalse(fsclient.is_file_exists(results_file_path))
        fsclient.remove_file(predicted_file_path)
        self.assertFalse(fsclient.is_file_exists(predicted_file_path))

        res = ModelHelper.save_prediction(ds,
                                          prediction_id,
                                          support_review_model=True,
                                          json_result=True,
                                          count_in_result=False,
                                          prediction_date=prediction_date,
                                          model_path=model_path,
                                          model_id=options.get('uid'))
        res = json.loads(res)
        self.assertEqual(res['columns'], ds.columns)
        self.assertEqual(len(res['data']), 6)

        ds = DataFrame.create_dataframe(
            os.path.join(model_path, "iris_test.csv"))
        fsclient.remove_file(results_file_path)
        self.assertFalse(fsclient.is_file_exists(results_file_path))
        fsclient.remove_file(predicted_file_path)
        self.assertFalse(fsclient.is_file_exists(predicted_file_path))

        ds.options['data_path'] = None
        res = ModelHelper.save_prediction(ds,
                                          prediction_id,
                                          support_review_model=False,
                                          json_result=False,
                                          count_in_result=False,
                                          prediction_date=prediction_date,
                                          model_path=model_path,
                                          model_id=options.get('uid'))
        self.assertEqual(type(res[0]), dict)
        self.assertEqual(res[0][options['targetFeature']], 'setosa')

        ds = DataFrame.create_dataframe(
            os.path.join(model_path, "iris_test.csv"))
        fsclient.remove_file(results_file_path)
        self.assertFalse(fsclient.is_file_exists(results_file_path))
        fsclient.remove_file(predicted_file_path)
        self.assertFalse(fsclient.is_file_exists(predicted_file_path))

        ds.options['data_path'] = None
        ds.loaded_columns = ds.columns
        res = ModelHelper.save_prediction(ds,
                                          prediction_id,
                                          support_review_model=False,
                                          json_result=False,
                                          count_in_result=False,
                                          prediction_date=prediction_date,
                                          model_path=model_path,
                                          model_id=options.get('uid'))
        self.assertEqual(res['columns'], ds.columns)
        self.assertEqual(len(res['data']), 6)
        self.assertEqual(type(res['data'][0]), list)