Python MDLP.transform Examples

Programming Language: Python

Namespace/Package Name: mdlp.discretization

Class/Type: MDLP

Method/Function: transform

Examples at hotexamples.com: 4

Python MDLP.transform - 4 examples found. These are the top rated real world Python examples of mdlp.discretization.MDLP.transform extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

MDLP(25)

fit_transform(13)

cat2intervals(4)

transform(4)

fit(3)

toarray(2)

cts2cat(1)

Example #1

Show file

File: rule_list_test.py Project: wintericie/pysbrl

def test_BayesianRuleList2():
    dataset = load_breast_cancer()
    x, y = dataset['data'], dataset['target']
    feature_names = dataset['feature_names']
    x_train, x_test, y_train, y_test = train_test_split(
        x, y, test_size=0.33, random_state=42)
    discretizer = MDLP(random_state=42).fit(x_train, y_train)
    x_train_cat = discretizer.transform(x_train)
    category_names = compute_intervals(discretizer)
    rule_list = BayesianRuleList(seed=1, feature_names=feature_names, category_names=category_names, verbose=2)
    rule_list.fit(x_train_cat, y_train)
    print(rule_list)
    x_test_cat = discretizer.transform(x_test)

    print('acc: %.4f' % rule_list.score(x_test_cat, y_test))

Example #2

Show file

def bayesian_rule_list(X_train, y_train, X_test, y_test):
    from mdlp.discretization import MDLP
    from sklearn import preprocessing

    # First one hot encode
    X_train, X_test = apply_one_hot_encoding(X_train, X_test)


    # Then need to convert classes to integers
    encoder = preprocessing.LabelEncoder()
    y_train = encoder.fit_transform(y_train)
    y_test = encoder.transform(y_test)

    # Then discretize features
    transformer = MDLP()
    X_train = transformer.fit_transform(X_train, y_train)
    X_test = transformer.transform(X_test)

    brl = pysbrl.BayesianRuleList()
    brl.fit(X_train, y_train)

    print(brl)

    # The complexity is the number of split points + the number of extra conditions
    # (i.e. if x1 > 0 and x2 = 1 then .. counts as 2 not 1), for this reason we do not use brl.n_rules
    brl_str = str(brl)
    brl_complexity = brl_str.count("IF") + brl_str.count("AND")

    training_recreations = brl.predict(X_train)
    brl_training_recreating_pct = scorer(training_recreations, y_train) * 100
    testing_recreations = brl.predict(X_test)
    brl_testing_recreating_pct = scorer(testing_recreations, y_test) * 100

    return brl_training_recreating_pct, brl_testing_recreating_pct, brl_complexity

Example #3

Show file

File: supervised_discretization_strategy.py Project: alexis20/gti770-student-framework

class SupervisedDiscretizationStrategy(object):
    """
        A class used for supervised data discretization.
    """
    def __init__(self):
        self.transformer = MDLP()

    def discretize(self, data_set, validation_size, nb_bins=None):
        """ Discretize continuous attribute using MDLP method.

        Args:
            data_set: The data set containing continuous data.
            validation_size: The validation size of the newly created discretized data set.

        Returns:
            discretized_dataset: A DataSet object containing discretized data.
        """

        # Create strategy object to further create the discretized data set.
        galaxy_dataset_feature_strategy = GalaxyDataSetFeatureStrategy()

        # Get data from training set.
        X_train = data_set.train.get_features
        y_train = data_set.train.get_labels

        # Supervised discretization of the training data set using MDLP.
        X_train_discretized = self.transformer.fit_transform(X=X_train,
                                                             y=y_train)

        # Get data from validation set.
        X_valid = data_set.valid.get_features
        y_valid = data_set.valid.get_labels

        # Unsupervised discretization using MDLP.
        X_valid_discretized = self.transformer.transform(X=X_valid)

        # Merge both training and validation data.
        X = np.append(X_train_discretized, X_valid_discretized, axis=0)
        y = np.append(y_train, y_valid, axis=0)

        # Create a new data set.
        discretized_dataset = galaxy_dataset_feature_strategy.create_datasets(
            X, y, validation_size)

        return discretized_dataset

Example #4

Show file

File: TP02_rev2.py Project: ryanmortazi/Machine-Learning-projects_2018

plt.figure()
plot_confusion_matrix(cm_galaxy_test_k3u,
                      classes=['Smooth', 'Spiral'],
                      normalize=True,
                      title='Confusion matrix, with normalization with k:3')
plt.show()
# In[33]:
# In[33]:
print("       Bayes Naif models with hold-out set ")

# Scale data for train, validation (hold out), and test
# first method of discretization using MDLP
from mdlp.discretization import MDLP
mdlp = MDLP()
Xtrain_galaxy_MDLP = mdlp.fit_transform(X_train_galaxy, Y_train_galaxy)
Xtest_galaxy_MDLP = mdlp.transform(X_test_galaxy, Y_test_galaxy)
Xvalid_galaxy_MDLP = mdlp.transform(X_valid_galaxy, Y_valid_galaxy)

# In[33]:
# Second method of discretization using MinMaxScaler
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
Xtrain_galaxy_unsupervised = scaler.fit_transform(X_train_galaxy)
Xtest_galaxy_unsupervised = scaler.transform(X_test_galaxy)
Xvalid_galaxy_unsupervised = scaler.transform(X_valid_galaxy)
# In[33]:

# Bayes naïf gaussien with 2 different parameters i.e.
# 1. priors = probaility of each class

# In[33]: