Esempi in Python per ClassificationTree, esempi in Python per ClassTree.ClassificationTree

Esempio n. 1

0

Mostra file

File: ClassTreeBagging.py Progetto: metjush/decision_tree

    def train(self, X, y):
        #check dimensions
        if not len(X) == len(y):
            raise IndexError("The number of samples in X and y do not match")
        #check if X and y are numpy arrays
        if type(X) is not np.ndarray:
            X = self.__numpify(X)
            if not X:
                raise TypeError("input dataset X is not a valid numeric array")
        if type(y) is not np.ndarray:
            y = self.__numpify(y)
            if not y:
                raise TypeError("input label vector y is not a valid numeric array")

        #check if trained
        if self.trained:
            self.__untrain()

        indices = np.arange(len(X))
        #determine the size of the bootstrap sample
        strapsize = np.int(len(X)*self.fraction)
        for t in xrange(self.n_trees):
            #creat a new classification tree
            tree = ClassificationTree(depth_limit=self.depth_limit, impurity=self.impurity)
            #bootstrap a sample
            bootstrap = np.random.choice(indices, strapsize)
            Xstrap = X[bootstrap,:]
            ystrap = y[bootstrap]
            #train the t-th tree with the strapped sample
            tree.train(Xstrap,ystrap)
            self.trees[t] = tree
        self.trained = True
        print("%d trees grown" % self.n_trees)

Esempio n. 2

0

Mostra file

File: ClassTreeBagging.py Progetto: prashant-r/CS57300

    def train(self, X, y):
        #check dimensions
        if not len(X) == len(y):
            raise IndexError("The number of samples in X and y do not match")
        #check if X and y are numpy arrays
        if type(X) is not np.ndarray:
            X = self.__numpify(X)
            if not X:
                raise TypeError("input dataset X is not a valid numeric array")
        if type(y) is not np.ndarray:
            y = self.__numpify(y)
            if not y:
                raise TypeError(
                    "input label vector y is not a valid numeric array")

        #check if trained
        if self.trained:
            self.__untrain()

        indices = np.arange(len(X))
        #determine the size of the bootstrap sample
        strapsize = np.int(len(X) * self.fraction)
        for t in xrange(self.n_trees):
            #creat a new classification tree
            tree = ClassificationTree(depth_limit=self.depth_limit,
                                      impurity=self.impurity)
            #bootstrap a sample
            bootstrap = np.random.choice(indices, strapsize)
            Xstrap = X[bootstrap, :]
            ystrap = y[bootstrap]
            #train the t-th tree with the strapped sample
            tree.train(Xstrap, ystrap)
            self.trees[t] = tree
        self.trained = True
        print("%d trees grown" % self.n_trees)

Esempio n. 3

0

Mostra file

    def train(self, X, y):
        # check dimensions
        if not len(X) == len(y):
            raise IndexError("The number of samples in X and y do not match")
        # check if X and y are numpy arrays
        if type(X) is not np.ndarray:
            X = self.__numpify(X)
            if not X:
                raise TypeError("input dataset X is not a valid numeric array")
        if type(y) is not np.ndarray:
            y = self.__numpify(y)
            if not y:
                raise TypeError(
                    "input label vector y is not a valid numeric array")

        # check if trained
        if self.trained:
            self.__untrain()

        indices = np.arange(len(X))
        # determine the size of the bootstrap sample
        strapsize = np.int(len(X) * self.fraction)
        features = np.arange(X.shape[1])
        # determine the number of features to subsample each iteration
        # using the sqrt(n) rule of thumb if n > 10
        subsize = np.ceil(np.sqrt(X.shape[1])).astype(
            np.int) if X.shape[1] >= 9 else X.shape[1]

        # start growing the tree
        for t in range(self.n_trees):
            # creat a new classification tree
            tree = ClassificationTree(depth_limit=self.depth_limit,
                                      impurity=self.impurity)
            # bootstrap a sample
            bootstrap = np.random.choice(indices, strapsize)
            subfeature = np.random.choice(
                features, subsize,
                replace=False)  #features are not sampled with replacement
            Xstrap = X[bootstrap, :][:, subfeature]
            ystrap = y[bootstrap]
            # train the t-th tree with the strapped sample
            tree.train(Xstrap, ystrap)
            # for each tree, need to save which features to use
            self.trees[t] = [tree, subfeature]
        self.trained = True
        print("%d trees grown" % self.n_trees)

Esempio n. 4

0

Mostra file

File: ClassForest.py Progetto: metjush/decision_tree

    def train(self, X, y):
        # check dimensions
        if not len(X) == len(y):
            raise IndexError("The number of samples in X and y do not match")
        # check if X and y are numpy arrays
        if type(X) is not np.ndarray:
            X = self.__numpify(X)
            if not X:
                raise TypeError("input dataset X is not a valid numeric array")
        if type(y) is not np.ndarray:
            y = self.__numpify(y)
            if not y:
                raise TypeError("input label vector y is not a valid numeric array")

        # check if trained
        if self.trained:
            self.__untrain()

        indices = np.arange(len(X))
        # determine the size of the bootstrap sample
        strapsize = np.int(len(X)*self.fraction)
        features = np.arange(X.shape[1])
        # determine the number of features to subsample each iteration
        # using the sqrt(n) rule of thumb if n > 10
        subsize = np.ceil(np.sqrt(X.shape[1])).astype(np.int) if X.shape[1] >= 9 else X.shape[1]

        # start growing the tree
        for t in xrange(self.n_trees):
            # creat a new classification tree
            tree = ClassificationTree(depth_limit=self.depth_limit, impurity=self.impurity)
            # bootstrap a sample
            bootstrap = np.random.choice(indices, strapsize)
            subfeature = np.random.choice(features, subsize, replace=False) #features are not sampled with replacement
            Xstrap = X[bootstrap,:][:,subfeature]
            ystrap = y[bootstrap]
            # train the t-th tree with the strapped sample
            tree.train(Xstrap,ystrap)
            # for each tree, need to save which features to use
            self.trees[t] = [tree, subfeature]
        self.trained = True
        print("%d trees grown" % self.n_trees)

Esempio n. 5

0

Mostra file

File: Examples.py Progetto: metjush/decision_tree

__author__ = "metjush"

# An example file for the decision_tree repository, using datasets from scikit-learn
# to demonstrate classification with a single tree, bagged forest and random forest.
# If you just want to see if the package works, run this file.

# Importing all requirements

import numpy as np
from ClassTree import ClassificationTree
from ClassTreeBagging import TreeBagger
from ClassForest import RandomForest

# Create the classifier objects
tree = ClassificationTree()
bag = TreeBagger(n_trees=50)
forest = RandomForest(n_trees=50)

# Get datasets from scikit-learn
from sklearn.datasets import load_iris # iris classification

# Save to arrays
iris = load_iris()

X_iris = iris.data
y_iris = iris.target

# Train classifiers with Iris data

# Simple tree training
tree.train(X_iris, y_iris)

Esempio n. 6

0

Mostra file

File: TreeWebRequest.py Progetto: metjush/decision_tree

if fileitem.filename:

    # strip leading path
    fn = os.path.basename(fileitem.filename)
    filehash.update(fn)
    name = filehash.hexdigest()
    open(UPLOAD_DIR + name + ".csv", 'wb').write(fileitem.file.read())
    savedfile = open(UPLOAD_DIR + name + ".csv", 'r')
else:
    raise IOError("Upload of file failed")

# we will be returning a json file, so set header
message_header = "header('Content-type: application/json');"

# Import ClassificationTree class
from ClassTree import ClassificationTree
import numpy as np




# read the saved file as a numpy array
data = np.loadtxt(savedfile, delimiter=",")
X = np.concatenate((data[:,0:label_column], data[:,(label_column+1):]))
y = data[:,label_column]

train_tree = ClassificationTree(depth_limit=depth)
train_tree.train()
train_json = train_tree.to_json(JSON_DIR + name + ".json")

Esempio n. 7

0

Mostra file

depth = form['depth']
label_column = form['label']

# checking
if fileitem.filename:

    # strip leading path
    fn = os.path.basename(fileitem.filename)
    filehash.update(fn)
    name = filehash.hexdigest()
    open(UPLOAD_DIR + name + ".csv", 'wb').write(fileitem.file.read())
    savedfile = open(UPLOAD_DIR + name + ".csv", 'r')
else:
    raise IOError("Upload of file failed")

# we will be returning a json file, so set header
message_header = "header('Content-type: application/json');"

# Import ClassificationTree class
from ClassTree import ClassificationTree
import numpy as np

# read the saved file as a numpy array
data = np.loadtxt(savedfile, delimiter=",")
X = np.concatenate((data[:, 0:label_column], data[:, (label_column + 1):]))
y = data[:, label_column]

train_tree = ClassificationTree(depth_limit=depth)
train_tree.train()
train_json = train_tree.to_json(JSON_DIR + name + ".json")

Esempio n. 8

0

Mostra file

__author__ = "metjush"

# An example file for the decision_tree repository, using datasets from scikit-learn
# to demonstrate classification with a single tree, bagged forest and random forest.
# If you just want to see if the package works, run this file.

# Importing all requirements

import numpy as np
from ClassTree import ClassificationTree
from ClassTreeBagging import TreeBagger
from ClassForest import RandomForest

# Create the classifier objects
tree = ClassificationTree()
bag = TreeBagger(n_trees=50)
forest = RandomForest(n_trees=50)

# Get datasets from scikit-learn
from sklearn.datasets import load_iris  # iris classification

# Save to arrays
iris = load_iris()

X_iris = iris.data
y_iris = iris.target

# Train classifiers with Iris data

# Simple tree training
tree.train(X_iris, y_iris)

Esempio n. 9

0

Mostra file

import numpy as np
from ClassTree import ClassificationTree
from ClassTreeBagging import TreeBagger
from ClassForest import RandomForest
import sys
import string
import copy
from collections import Counter
from operator import itemgetter

# Create the classifier objects
tree = ClassificationTree()
bag = TreeBagger(n_trees=50)
forest = RandomForest(n_trees=50)

# Get datasets from scikit-learn
from sklearn.datasets import load_iris # iris classification


def process_str(s):
    rem_punc = str.maketrans('', '', string.punctuation)
    return s.translate(rem_punc).lower().split()

def read_dataset(file_name):
    dataset = []
    with open(file_name) as f:
        for line in f:
            index, class_label, text = line.strip().split('\t')
            words = process_str(text)
            dataset.append( (int(class_label), words) )