Python DecTreeの例

プログラミング言語: Python

名前空間/パッケージ名: dec_tree

クラス/型: DecTree

hotexamples.comのコード掲載数: 6

Python DecTree - 6件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのdec_tree.DecTreeの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

DecTree(4)

classify(3)

size(1)

コード例 #1

ファイルを表示

  def __init__(self, int_dm, real_dm, cat, tree_count = 128, option_count = 3, minimum_size = 1, weight = None, index = None, callback = None, compress = False):
    """Constructs and trains the stochastic wood - basically all its doing is constructing lots of trees, each with a different bootstrap sample of the input and calculating the out-of-bound error estimates. The parameters are as follows: int_dm & real_dm - the data matrices, one for discrete attributes and one for continuous; you can set one to None if there are none of that kind. cat - The category vector, aligned with the data matrices, where each category is represented by an integer. tree_count - The number of decision trees to create. option_count - The number of attributes to consider at each level of the decision trees - maps to the rand parameter of the DecTree class. minimum_size - Nodes in the trees do not suffer further splits once they are this size or smaller. weight - Optionally allows you to weight the trainning examples, aligned with data matrices. index - Using this you can optionally tell it which examples to use from the other matrices/vectors, and/or duplicate examples. callback - An optional function of the form (steps done,steps overall) used to report progress during construction. compress - if True trees are stored pickled and compressed, in a bid to make them consume less memory - this will obviously destroy classification performance unless multi_classify is used with suitably large blocks. Allows the algorithm to be run with larger quantities of data, but only use as a last resort."""
    
    # Generate weight/index vectors if not provided, and also put in a dummy callback if needed to avoid if statements...
    if weight==None: weight = numpy.ones(cat.shape[0], dtype=numpy.float32)
    if index==None: index = numpy.arange(cat.shape[0], dtype=numpy.int32)
    if callback==None: callback = lambda a, b: None

    # Create data structure to calculate the oob error rate...
    oob_success = numpy.zeros(cat.shape[0], dtype=numpy.float32)
    oob_total = numpy.zeros(cat.shape[0], dtype=numpy.int32)

    # Iterate and create all the trees...
    self.trees = []
    for itr in xrange(tree_count):
      callback(itr, tree_count)

      # Select the bootstrap sample...
      b_ind = numpy.random.randint(index.shape[0], size=index.shape[0])
      b_ind.sort() # Should improve cache coherance slightly.
      bootstrap = index[b_ind]

      # Train the classifier...
      dt = DecTree(int_dm, real_dm, cat, weight, bootstrap, option_count, minimum_size)
      if compress: self.trees.append(bz2.compress(pickle.dumps(dt)))
      else: self.trees.append(dt)

      # Get the indices of the oob set...
      oob_set = numpy.ones(index.shape[0], dtype=numpy.bool_)
      oob_set[b_ind] = False
      oob_set = index[oob_set]

      # Store the oob info...
      for ind in oob_set:
        dist = dt.classify(int_dm[ind,:], real_dm[ind,:])
        if cat[ind] in dist:
          oob_success[ind] += float(dist[cat[ind]]) / float(sum(dist.itervalues()))
        oob_total[ind] += 1

    # Combine the oob info to calculate the error rate, include being robust to a smaple never being a member of the oob set...
    oob_total[oob_total==0] = 1
    self.success = (oob_success[index] / oob_total[index]).mean()

    del callback # Should not need this, but apparently I do.

コード例 #2

ファイルを表示

ファイル: test_tree_continuous.py プロジェクト: zerocolar/Project_Code

for i in xrange(total_count):
  if i<politician_count:
    dm[i,:] = make_politician()
    cat[i] = 0
  elif i<(politician_count+marketing_count):
    dm[i,:] = make_marketing()
    cat[i] = 1
  else:
    dm[i,:] = make_tele_sales()
    cat[i] = 2



# Train the model...
dt = DecTree(None, dm, cat)
print 'Generated a tree with %i nodes'%dt.size()



# Test...
politician_test = 256
politician_success = 0
politician_unsure = 0
for i in xrange(politician_test):
  t = make_politician()
  dist = dt.classify(None,t)
  if 0 in dist: politician_success += 1
  if len(dist)>1: politician_unsure += 1

print 'Of %i politicians %i (%.1f%%) were correctly detected, with %i uncertain.'%(politician_test, politician_success, 100.0*politician_success/float(politician_test), politician_unsure)

コード例 #3

ファイルを表示

ファイル: test_tree_discrete.py プロジェクト: PeterZhouSZ/helit

dm = numpy.empty((total_count, len(attributes)), dtype=numpy.int32)
cat = numpy.empty(total_count, dtype=numpy.int32)

for i in xrange(total_count):
  if i<zombie_count:
    dm[i,:] = make_zombie()
    cat[i] = 0
  else:
    dm[i,:] = make_human()
    cat[i] = 1



# Train the model...
dt = DecTree(dm, None, cat)
print 'Generated a tree with %i nodes'%dt.size()



# Test...
zombie_test = 256
zombie_success = 0
zombie_unsure = 0
for i in xrange(zombie_test):
  z = make_zombie()
  dist = dt.classify(z,None)
  if 0 in dist: zombie_success += 1
  if len(dist)>1: zombie_unsure += 1

print 'Of %i zombies %i (%.1f%%) were correctly detected, with %i uncertain.'%(zombie_test, zombie_success, 100.0*zombie_success/float(zombie_test), zombie_unsure)

コード例 #4

ファイルを表示

ファイル: test_tree_continuous.py プロジェクト: zoginni/helit

dm = numpy.empty((total_count, feat_length), dtype=numpy.float32)
cat = numpy.empty(total_count, dtype=numpy.int32)

for i in xrange(total_count):
    if i < politician_count:
        dm[i, :] = make_politician()
        cat[i] = 0
    elif i < (politician_count + marketing_count):
        dm[i, :] = make_marketing()
        cat[i] = 1
    else:
        dm[i, :] = make_tele_sales()
        cat[i] = 2

# Train the model...
dt = DecTree(None, dm, cat)
print 'Generated a tree with %i nodes' % dt.size()

# Test...
politician_test = 256
politician_success = 0
politician_unsure = 0
for i in xrange(politician_test):
    t = make_politician()
    dist = dt.classify(None, t)
    if 0 in dist: politician_success += 1
    if len(dist) > 1: politician_unsure += 1

print 'Of %i politicians %i (%.1f%%) were correctly detected, with %i uncertain.' % (
    politician_test, politician_success,
    100.0 * politician_success / float(politician_test), politician_unsure)

コード例 #5

ファイルを表示

#! /usr/bin/env python

# Copyright 2011 Tom SF Haines

# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at

#   http://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.

from dec_tree import DecTree
import test_model as mod

# Same as test_tree_model, but includes randomisation of attribute selection...

# Get trainning data...
int_dm, real_dm, cats, weights = mod.generate_train()

# Train...
dt = DecTree(int_dm, real_dm, cats, weights, rand=3)

# Test...
mod.test(dt.classify)

コード例 #6

ファイルを表示

ファイル: test_tree_discrete.py プロジェクト: zoginni/helit

human_count = 16
total_count = zombie_count + human_count

dm = numpy.empty((total_count, len(attributes)), dtype=numpy.int32)
cat = numpy.empty(total_count, dtype=numpy.int32)

for i in xrange(total_count):
    if i < zombie_count:
        dm[i, :] = make_zombie()
        cat[i] = 0
    else:
        dm[i, :] = make_human()
        cat[i] = 1

# Train the model...
dt = DecTree(dm, None, cat)
print 'Generated a tree with %i nodes' % dt.size()

# Test...
zombie_test = 256
zombie_success = 0
zombie_unsure = 0
for i in xrange(zombie_test):
    z = make_zombie()
    dist = dt.classify(z, None)
    if 0 in dist: zombie_success += 1
    if len(dist) > 1: zombie_unsure += 1

print 'Of %i zombies %i (%.1f%%) were correctly detected, with %i uncertain.' % (
    zombie_test, zombie_success, 100.0 * zombie_success / float(zombie_test),
    zombie_unsure)