Python build_categorical_to_value_mapの例、bdbcontrib.predictors.sklearn_utils.build_categorical_to_value_map Pythonの例

コード例 #1

0

ファイルを表示

ファイル: multiple_regression.py プロジェクト: vishalbelsare/bdbcontrib

    def train(self, df, targets, conditions):
        # Obtain the targets column.
        if len(targets) != 1:
            raise BLE(
                ValueError('MultipleRegression requires at least one column '
                           'in targets. Received {}'.format(targets)))
        if targets[0][1].lower() != 'numerical':
            raise BLE(
                ValueError('MultipleRegression can only regress NUMERICAL '
                           'columns. Received {}'.format(targets)))
        self.targets = [targets[0][0]]

        # Obtain the condition columns.
        if len(conditions) < 1:
            raise BLE(
                ValueError(
                    'MultipleRegression requires at least one '
                    'column in conditions. Received {}'.format(conditions)))
        self.conditions_categorical = []
        self.conditions_numerical = []
        for c in conditions:
            if c[1].lower() == 'categorical':
                self.conditions_categorical.append(c[0])
            else:
                self.conditions_numerical.append(c[0])
        self.conditions = self.conditions_numerical + \
            self.conditions_categorical

        # The dataset.
        self.dataset = pd.DataFrame()
        # Lookup for categoricals to code.
        self.categories_to_val_map = dict()
        # Training set (regressors and labels)
        self.X_numerical = np.ndarray(0)
        self.X_categorical = np.ndarray(0)
        self.Y = np.ndarray(0)
        # Linear regressors.
        self.mr_partial = LinearRegression()
        self.mr_full = LinearRegression()

        # Preprocess the data.
        self.dataset = utils.extract_sklearn_dataset(self.conditions,
                                                     self.targets, df)
        self.categories_to_val_map = utils.build_categorical_to_value_map(
            self.conditions_categorical, self.dataset)
        self.X_categorical = utils.extract_sklearn_features_categorical(
            self.conditions_categorical, self.categories_to_val_map,
            self.dataset)
        self.X_numerical = utils.extract_sklearn_features_numerical(
            self.conditions_numerical, self.dataset)
        self.Y = utils.extract_sklearn_univariate_target(
            self.targets, self.dataset)
        # Train the multiple regression.
        self._train_mr()

コード例 #2

0

ファイルを表示

ファイル: multiple_regression.py プロジェクト: jayelm/bdbcontrib

    def train(self, df, targets, conditions):
        # Obtain the targets column.
        if len(targets) != 1:
            raise BLE(ValueError(
                'MultipleRegression requires at least one column '
                'in targets. Received {}'.format(targets)))
        if targets[0][1].lower() != 'numerical':
            raise BLE(ValueError(
                'MultipleRegression can only regress NUMERICAL '
                'columns. Received {}'.format(targets)))
        self.targets = [targets[0][0]]

        # Obtain the condition columns.
        if len(conditions) < 1:
            raise BLE(ValueError('MultipleRegression requires at least one '
                'column in conditions. Received {}'.format(conditions)))
        self.conditions_categorical = []
        self.conditions_numerical = []
        for c in conditions:
            if c[1].lower() == 'categorical':
                self.conditions_categorical.append(c[0])
            else:
                self.conditions_numerical.append(c[0])
        self.conditions = self.conditions_numerical + \
            self.conditions_categorical

        # The dataset.
        self.dataset = pd.DataFrame()
        # Lookup for categoricals to code.
        self.categories_to_val_map = dict()
        # Training set (regressors and labels)
        self.X_numerical = np.ndarray(0)
        self.X_categorical = np.ndarray(0)
        self.Y = np.ndarray(0)
        # Linear regressors.
        self.mr_partial = LinearRegression()
        self.mr_full = LinearRegression()

        # Preprocess the data.
        self.dataset = utils.extract_sklearn_dataset(self.conditions,
            self.targets, df)
        self.categories_to_val_map = utils.build_categorical_to_value_map(
            self.conditions_categorical, self.dataset)
        self.X_categorical = utils.extract_sklearn_features_categorical(
            self.conditions_categorical, self.categories_to_val_map,
            self.dataset)
        self.X_numerical = utils.extract_sklearn_features_numerical(
            self.conditions_numerical, self.dataset)
        self.Y = utils.extract_sklearn_univariate_target(self.targets,
            self.dataset)
        # Train the multiple regression.
        self._train_mr()

コード例 #3

0

ファイルを表示

 def train(self, df, targets, conditions):
     # Obtain the targets column.
     if len(targets) != 1:
         raise BLE(
             ValueError('RandomForest requires exactly one column in '
                        'targets. Received {}'.format(targets)))
     if targets[0][1].lower() != 'categorical':
         raise BLE(
             ValueError('RandomForest can only classify CATEGORICAL '
                        'columns. Received {}'.format(targets)))
     self.targets = [targets[0][0]]
     # Obtain the condition columns.
     if len(conditions) < 1:
         raise BLE(
             ValueError('RandomForest requires at least one column in '
                        'conditions. Received {}'.format(conditions)))
     self.conditions_categorical = []
     self.conditions_numerical = []
     for c in conditions:
         if c[1].lower() == 'categorical':
             self.conditions_categorical.append(c[0])
         else:
             self.conditions_numerical.append(c[0])
     self.conditions = self.conditions_numerical + \
         self.conditions_categorical
     # The dataset.
     self.dataset = pd.DataFrame()
     # Lookup for categoricals to code.
     self.categories_to_val_map = dict()
     # Training set (regressors and labels)
     self.X_numerical = np.ndarray(0)
     self.X_categorical = np.ndarray(0)
     self.Y = np.ndarray(0)
     # Random Forests.
     self.rf_partial = RandomForestClassifier(n_estimators=100)
     self.rf_full = RandomForestClassifier(n_estimators=100)
     # Preprocess the data.
     self.dataset = utils.extract_sklearn_dataset(self.conditions,
                                                  self.targets, df)
     self.categories_to_val_map = utils.build_categorical_to_value_map(
         self.conditions_categorical, self.dataset)
     self.X_categorical = utils.extract_sklearn_features_categorical(
         self.conditions_categorical, self.categories_to_val_map,
         self.dataset)
     self.X_numerical = utils.extract_sklearn_features_numerical(
         self.conditions_numerical, self.dataset)
     self.Y = utils.extract_sklearn_univariate_target(
         self.targets, self.dataset)
     # Train the random forest.
     self._train_rf()

コード例 #4

0

ファイルを表示

ファイル: random_forest.py プロジェクト: jayelm/bdbcontrib

 def train(self, df, targets, conditions):
     # Obtain the targets column.
     if len(targets) != 1:
         raise BLE(ValueError('RandomForest requires exactly one column in '
             'targets. Received {}'.format(targets)))
     if targets[0][1].lower() != 'categorical':
         raise BLE(ValueError('RandomForest can only classify CATEGORICAL '
             'columns. Received {}'.format(targets)))
     self.targets = [targets[0][0]]
     # Obtain the condition columns.
     if len(conditions) < 1:
         raise BLE(ValueError('RandomForest requires at least one column in '
             'conditions. Received {}'.format(conditions)))
     self.conditions_categorical = []
     self.conditions_numerical = []
     for c in conditions:
         if c[1].lower() == 'categorical':
             self.conditions_categorical.append(c[0])
         else:
             self.conditions_numerical.append(c[0])
     self.conditions = self.conditions_numerical + \
         self.conditions_categorical
     # The dataset.
     self.dataset = pd.DataFrame()
     # Lookup for categoricals to code.
     self.categories_to_val_map = dict()
     # Training set (regressors and labels)
     self.X_numerical = np.ndarray(0)
     self.X_categorical = np.ndarray(0)
     self.Y = np.ndarray(0)
     # Random Forests.
     self.rf_partial = RandomForestClassifier(n_estimators=100)
     self.rf_full = RandomForestClassifier(n_estimators=100)
     # Preprocess the data.
     self.dataset = utils.extract_sklearn_dataset(self.conditions,
         self.targets, df)
     self.categories_to_val_map = utils.build_categorical_to_value_map(
         self.conditions_categorical, self.dataset)
     self.X_categorical = utils.extract_sklearn_features_categorical(
         self.conditions_categorical, self.categories_to_val_map,
         self.dataset)
     self.X_numerical = utils.extract_sklearn_features_numerical(
         self.conditions_numerical, self.dataset)
     self.Y = utils.extract_sklearn_univariate_target(self.targets,
         self.dataset)
     # Train the random forest.
     self._train_rf()

コード例 #5

0

ファイルを表示

ファイル: test_sklearn_utils.py プロジェクト: alxempirical/bdbcontrib

def test_build_categorical_to_value_map():
    dataset = pd.DataFrame({
        'Nationality':['USA', 'USA', 'France', 'Germany', 'Bengal'],
        'Gender':['M', 'F', 'M', 'M', 'T'],
        'C':['1', '2', '3', '4', '5'],
        'D':[1, None, 3, 4, 5],
        })
    columns = ['Nationality', 'Gender']
    categories_to_val_map = sku.build_categorical_to_value_map(columns, dataset)
    # Assert all the 'columns' have a codemap.
    assert set(categories_to_val_map.keys()) == set(columns)
    for col, valmap in categories_to_val_map.iteritems():
        # Assert each unique val in the column has codes for all its values.
        unique_vals = set(dataset[col].unique())
        assert unique_vals == set(valmap.keys())
        # Assert that all codes are unique.
        assert len(set(code for _, code in valmap.iteritems())) == \
            len(unique_vals)

コード例 #6

0

ファイルを表示

ファイル: test_sklearn_utils.py プロジェクト: vishalbelsare/bdbcontrib

def test_build_categorical_to_value_map():
    dataset = pd.DataFrame({
        'Nationality': ['USA', 'USA', 'France', 'Germany', 'Bengal'],
        'Gender': ['M', 'F', 'M', 'M', 'T'],
        'C': ['1', '2', '3', '4', '5'],
        'D': [1, None, 3, 4, 5],
    })
    columns = ['Nationality', 'Gender']
    categories_to_val_map = sku.build_categorical_to_value_map(
        columns, dataset)
    # Assert all the 'columns' have a codemap.
    assert set(categories_to_val_map.keys()) == set(columns)
    for col, valmap in categories_to_val_map.iteritems():
        # Assert each unique val in the column has codes for all its values.
        unique_vals = set(dataset[col].unique())
        assert unique_vals == set(valmap.keys())
        # Assert that all codes are unique.
        assert len(set(code for _, code in valmap.iteritems())) == \
            len(unique_vals)