Python extract_sklearn_datasetの例、bdbcontrib.predictors.sklearn_utils.extract_sklearn_dataset Pythonの例

コード例 #1

0

ファイルを表示

ファイル: multiple_regression.py プロジェクト: vishalbelsare/bdbcontrib

    def train(self, df, targets, conditions):
        # Obtain the targets column.
        if len(targets) != 1:
            raise BLE(
                ValueError('MultipleRegression requires at least one column '
                           'in targets. Received {}'.format(targets)))
        if targets[0][1].lower() != 'numerical':
            raise BLE(
                ValueError('MultipleRegression can only regress NUMERICAL '
                           'columns. Received {}'.format(targets)))
        self.targets = [targets[0][0]]

        # Obtain the condition columns.
        if len(conditions) < 1:
            raise BLE(
                ValueError(
                    'MultipleRegression requires at least one '
                    'column in conditions. Received {}'.format(conditions)))
        self.conditions_categorical = []
        self.conditions_numerical = []
        for c in conditions:
            if c[1].lower() == 'categorical':
                self.conditions_categorical.append(c[0])
            else:
                self.conditions_numerical.append(c[0])
        self.conditions = self.conditions_numerical + \
            self.conditions_categorical

        # The dataset.
        self.dataset = pd.DataFrame()
        # Lookup for categoricals to code.
        self.categories_to_val_map = dict()
        # Training set (regressors and labels)
        self.X_numerical = np.ndarray(0)
        self.X_categorical = np.ndarray(0)
        self.Y = np.ndarray(0)
        # Linear regressors.
        self.mr_partial = LinearRegression()
        self.mr_full = LinearRegression()

        # Preprocess the data.
        self.dataset = utils.extract_sklearn_dataset(self.conditions,
                                                     self.targets, df)
        self.categories_to_val_map = utils.build_categorical_to_value_map(
            self.conditions_categorical, self.dataset)
        self.X_categorical = utils.extract_sklearn_features_categorical(
            self.conditions_categorical, self.categories_to_val_map,
            self.dataset)
        self.X_numerical = utils.extract_sklearn_features_numerical(
            self.conditions_numerical, self.dataset)
        self.Y = utils.extract_sklearn_univariate_target(
            self.targets, self.dataset)
        # Train the multiple regression.
        self._train_mr()

コード例 #2

0

ファイルを表示

ファイル: multiple_regression.py プロジェクト: jayelm/bdbcontrib

    def train(self, df, targets, conditions):
        # Obtain the targets column.
        if len(targets) != 1:
            raise BLE(ValueError(
                'MultipleRegression requires at least one column '
                'in targets. Received {}'.format(targets)))
        if targets[0][1].lower() != 'numerical':
            raise BLE(ValueError(
                'MultipleRegression can only regress NUMERICAL '
                'columns. Received {}'.format(targets)))
        self.targets = [targets[0][0]]

        # Obtain the condition columns.
        if len(conditions) < 1:
            raise BLE(ValueError('MultipleRegression requires at least one '
                'column in conditions. Received {}'.format(conditions)))
        self.conditions_categorical = []
        self.conditions_numerical = []
        for c in conditions:
            if c[1].lower() == 'categorical':
                self.conditions_categorical.append(c[0])
            else:
                self.conditions_numerical.append(c[0])
        self.conditions = self.conditions_numerical + \
            self.conditions_categorical

        # The dataset.
        self.dataset = pd.DataFrame()
        # Lookup for categoricals to code.
        self.categories_to_val_map = dict()
        # Training set (regressors and labels)
        self.X_numerical = np.ndarray(0)
        self.X_categorical = np.ndarray(0)
        self.Y = np.ndarray(0)
        # Linear regressors.
        self.mr_partial = LinearRegression()
        self.mr_full = LinearRegression()

        # Preprocess the data.
        self.dataset = utils.extract_sklearn_dataset(self.conditions,
            self.targets, df)
        self.categories_to_val_map = utils.build_categorical_to_value_map(
            self.conditions_categorical, self.dataset)
        self.X_categorical = utils.extract_sklearn_features_categorical(
            self.conditions_categorical, self.categories_to_val_map,
            self.dataset)
        self.X_numerical = utils.extract_sklearn_features_numerical(
            self.conditions_numerical, self.dataset)
        self.Y = utils.extract_sklearn_univariate_target(self.targets,
            self.dataset)
        # Train the multiple regression.
        self._train_mr()

コード例 #3

0

ファイルを表示

ファイル: test_sklearn_utils.py プロジェクト: alxempirical/bdbcontrib

def test_extract_sklearn_dataset():
    dataset = pd.DataFrame({
        'A':[1.1, 2.1, 3.9, 4.5, 5.1],
        'B':[5.1, 4.1, 3.9, 2.5, 1.1],
        'C':['1', '2', '3', '4', '5'],
        'D':[1, None, 3, 4, 5],
        })
    conditions, targets = ['A', 'B'], ['D']
    df = sku.extract_sklearn_dataset(conditions, targets, dataset)
    # Test that column 'C' is not included.
    assert set(df.columns) == set(conditions + targets)
    # Test that the second row is dropped (it has a None target).
    assert len(df) == 4

コード例 #4

0

ファイルを表示

ファイル: test_sklearn_utils.py プロジェクト: vishalbelsare/bdbcontrib

def test_extract_sklearn_dataset():
    dataset = pd.DataFrame({
        'A': [1.1, 2.1, 3.9, 4.5, 5.1],
        'B': [5.1, 4.1, 3.9, 2.5, 1.1],
        'C': ['1', '2', '3', '4', '5'],
        'D': [1, None, 3, 4, 5],
    })
    conditions, targets = ['A', 'B'], ['D']
    df = sku.extract_sklearn_dataset(conditions, targets, dataset)
    # Test that column 'C' is not included.
    assert set(df.columns) == set(conditions + targets)
    # Test that the second row is dropped (it has a None target).
    assert len(df) == 4

コード例 #5

0

ファイルを表示

 def train(self, df, targets, conditions):
     # Obtain the targets column.
     if len(targets) != 1:
         raise BLE(
             ValueError('RandomForest requires exactly one column in '
                        'targets. Received {}'.format(targets)))
     if targets[0][1].lower() != 'categorical':
         raise BLE(
             ValueError('RandomForest can only classify CATEGORICAL '
                        'columns. Received {}'.format(targets)))
     self.targets = [targets[0][0]]
     # Obtain the condition columns.
     if len(conditions) < 1:
         raise BLE(
             ValueError('RandomForest requires at least one column in '
                        'conditions. Received {}'.format(conditions)))
     self.conditions_categorical = []
     self.conditions_numerical = []
     for c in conditions:
         if c[1].lower() == 'categorical':
             self.conditions_categorical.append(c[0])
         else:
             self.conditions_numerical.append(c[0])
     self.conditions = self.conditions_numerical + \
         self.conditions_categorical
     # The dataset.
     self.dataset = pd.DataFrame()
     # Lookup for categoricals to code.
     self.categories_to_val_map = dict()
     # Training set (regressors and labels)
     self.X_numerical = np.ndarray(0)
     self.X_categorical = np.ndarray(0)
     self.Y = np.ndarray(0)
     # Random Forests.
     self.rf_partial = RandomForestClassifier(n_estimators=100)
     self.rf_full = RandomForestClassifier(n_estimators=100)
     # Preprocess the data.
     self.dataset = utils.extract_sklearn_dataset(self.conditions,
                                                  self.targets, df)
     self.categories_to_val_map = utils.build_categorical_to_value_map(
         self.conditions_categorical, self.dataset)
     self.X_categorical = utils.extract_sklearn_features_categorical(
         self.conditions_categorical, self.categories_to_val_map,
         self.dataset)
     self.X_numerical = utils.extract_sklearn_features_numerical(
         self.conditions_numerical, self.dataset)
     self.Y = utils.extract_sklearn_univariate_target(
         self.targets, self.dataset)
     # Train the random forest.
     self._train_rf()

コード例 #6

0

ファイルを表示

ファイル: random_forest.py プロジェクト: jayelm/bdbcontrib

 def train(self, df, targets, conditions):
     # Obtain the targets column.
     if len(targets) != 1:
         raise BLE(ValueError('RandomForest requires exactly one column in '
             'targets. Received {}'.format(targets)))
     if targets[0][1].lower() != 'categorical':
         raise BLE(ValueError('RandomForest can only classify CATEGORICAL '
             'columns. Received {}'.format(targets)))
     self.targets = [targets[0][0]]
     # Obtain the condition columns.
     if len(conditions) < 1:
         raise BLE(ValueError('RandomForest requires at least one column in '
             'conditions. Received {}'.format(conditions)))
     self.conditions_categorical = []
     self.conditions_numerical = []
     for c in conditions:
         if c[1].lower() == 'categorical':
             self.conditions_categorical.append(c[0])
         else:
             self.conditions_numerical.append(c[0])
     self.conditions = self.conditions_numerical + \
         self.conditions_categorical
     # The dataset.
     self.dataset = pd.DataFrame()
     # Lookup for categoricals to code.
     self.categories_to_val_map = dict()
     # Training set (regressors and labels)
     self.X_numerical = np.ndarray(0)
     self.X_categorical = np.ndarray(0)
     self.Y = np.ndarray(0)
     # Random Forests.
     self.rf_partial = RandomForestClassifier(n_estimators=100)
     self.rf_full = RandomForestClassifier(n_estimators=100)
     # Preprocess the data.
     self.dataset = utils.extract_sklearn_dataset(self.conditions,
         self.targets, df)
     self.categories_to_val_map = utils.build_categorical_to_value_map(
         self.conditions_categorical, self.dataset)
     self.X_categorical = utils.extract_sklearn_features_categorical(
         self.conditions_categorical, self.categories_to_val_map,
         self.dataset)
     self.X_numerical = utils.extract_sklearn_features_numerical(
         self.conditions_numerical, self.dataset)
     self.Y = utils.extract_sklearn_univariate_target(self.targets,
         self.dataset)
     # Train the random forest.
     self._train_rf()