Beispiel #1
0
 def test_tc(self):
     d = data.fromfile(testfile("testdata5.txt"))
     d.discretize()
     tasks = [greedy.GreedyLearner(d) for x in range(5)]
     tc = ipy1.IPython1Controller("127.0.0.1:10113")
     results = tc.run(tasks)
     results = result.merge(results)
     assert isinstance(results, result.LearnerResult)
Beispiel #2
0
 def setUp(self):
     dat = data.fromfile(testfile("testdata5.txt"))
     dat.discretize()
     g = greedy.GreedyLearner(dat, max_iterations=100)
     g.run()
     self.result = g.result
     self.tempdir = tempfile.mkdtemp()
     self.result.tohtml(self.tempdir)
Beispiel #3
0
def test_pebl(numvars, numsamples, greedy_iters, cachesize):
    print "Testing with #vars=%d, #samples=%d, iters=%d, cachesize=%d" % (
    numvars, numsamples, greedy_iters, cachesize)

    config.set('localscore_cache.maxsize', cachesize)
    d = data.Dataset(N.random.rand(numsamples, numvars))
    d.discretize()
    g = greedy.GreedyLearner(d, max_iterations=greedy_iters)
    g.run()
    return g
Beispiel #4
0
    def test_example1(self):
        outdir = os.path.join(self.tmpdir, "example1-result")

        dataset = data.fromfile(testfile("pebl-tutorial-data1.txt"))
        dataset.discretize()
        learner = greedy.GreedyLearner(dataset)
        ex1result = learner.run()
        ex1result.tohtml(outdir)

        assert os.path.exists(os.path.join(outdir, 'index.html'))
Beispiel #5
0
from pebl import data
from pebl import prior
from pebl.learner import greedy

dataset = data.fromfile("output.txt")
dataset.discretize(numbins=3)

node_src = [32 for ii in range(32)] + [3 for ii in range(32)] + [
    30 for ii in range(32)
] + [31 for ii in range(32)]
node_dest = range(32) + range(32) + range(32) + range(32)
prior = prior.Prior(33, prohibited_edges=zip(node_src, node_dest))

learner = greedy.GreedyLearner(dataset, prior, max_iterations=120000)
result = learner.run()
result.tohtml()
Beispiel #6
0
#!/usr/bin/env python
# Bayesian with five greedy and 5 simulated annearling learners serially
# http://pythonhosted.org/pebl/tutorial.html

from pebl import data, result
from pebl.learner import greedy, simanneal
from pebl.taskcontroller import multiprocess
dataset = data.fromfile("pebl-tutorial-data2.txt")
learners = [ greedy.GreedyLearner(dataset, max_iterations=1000000) for i in range(5) ] + \
  [ simanneal.SimulatedAnnealingLearner(dataset) for i in range(5) ]
tc = multiprocess.MultiProcessController(poolsize=2)
results = tc.run(learners)
merged_result = result.merge(results)
merged_result.tofile("example4-result")
Beispiel #7
0
from pebl.learner import greedy
from pebl.taskcontroller import ec2
from pebl.test import testfile

help = """Test the EC2 TaskController.

USAGE: test_ec2.py configfile

You need to provide the configfile for use with EC2Controller.

###############################################################################
    WARNING for pebl devs: 
        Do NOT put your configfile under svn. 
        It contains sensitve information.
###############################################################################
"""

if len(sys.argv) < 2:
    print help
    sys.exit(1)

d = data.fromfile(testfile("testdata5.txt"))
d.discretize()

tc = ec2.EC2Controller(config=sys.argv[1], min_count=3)
results = tc.run([greedy.GreedyLearner(d, max_time=10) for i in xrange(10)])
results = result.merge(results)

print results
print[r.host for r in results.runs]
Beispiel #8
0
start_time = time.time()
classifyUsingNB(trainList, classVariable, testList, testClassVariable)
elapsed_time = time.time() - start_time
print("Time elapsed Gaussian Naive Bayes : ", elapsed_time)

#Decision tree
start_time = time.time()
classifyUsingDecisionTree(trainList, classVariable, testList,
                          testClassVariable)
elapsed_time = time.time() - start_time
print("Time elapsed Decision : ", elapsed_time)

#logistic regression
solver = 'sag'
multiclass = 'ovr'
start_time = time.time()
classifyUsingLogisticRegression(trainList, classVariable, testList,
                                testClassVariable, solver, multiclass)
elapsed_time = time.time() - start_time
print("Time elapsed Logistic Regression : ", elapsed_time)

#Bayesian Network using PEBL for the data
start_time = time.time()
dataset = data.fromfile(trainFileNameForBayesian)
dataset.discretize()
learner = greedy.GreedyLearner(dataset)
ex1result = learner.run()
ex1result.tohtml("dataset")
elapsed_time = time.time() - start_time
print("Time elapsed Bayesian Network : ", elapsed_time)
Beispiel #9
0
 def setUp(self):
     config.set('evaluator.missingdata_evaluator', 'exact')
     self.data = data.fromfile(
         testfile('testdata13.txt')).subset(samples=range(5))
     self.learner = greedy.GreedyLearner(self.data, max_iterations=10)
import datasets

DEFAULT_DATASET = datasets.load("greedytest")
DEFAULT_REPORT_DIR = "./report/"

ds = DEFAULT_DATASET
report_dir = DEFAULT_REPORT_DIR

if len(sys.argv) > 1:
    ds = datasets.load(sys.argv[1])
    if ds == None:
        raise Exception("Could not find dataset '%s'" % dataset)

if len(sys.argv) > 2:
    report_dir = sys.argv[2]

dataset = ds.dataset
prior = ds.prior

greedy_lrn = greedy.GreedyLearner(dataset, prior, max_iterations=1000)
#anneal_lrn = simanneal.SimulatedAnnealingLearner(dataset, prior)
#results = result.merge(*[ greedy_lrn.run(), anneal_lrn.run() ])
results = greedy_lrn.run()

try:
    shutil.rmtree(report_dir)
except:
    pass

results.tohtml(report_dir)
Beispiel #11
0
 def setUp(self):
     d = data.fromfile(testfile("testdata5.txt"))
     d.discretize()
     
     self.tc = self.tctype(*self.args)
     self.tasks = [greedy.GreedyLearner(d, max_iterations=100) for i in xrange(6)]
Beispiel #12
0
#!/usr/bin/env python
# http://pythonhosted.org/pebl/tutorial.html

from pebl import data, result
from pebl.learner import greedy
dataset = data.fromfile("pebl-tutorial-data2.txt")
learner1 = greedy.GreedyLearner(dataset, max_iterations=1000000)
learner2 = greedy.GreedyLearner(dataset, max_time=120)  # in seconds
result1 = learner1.run()
result2 = learner2.run()
merged_result = result.merge(result1, result2)
merged_result.tofile("example2-result")