def test_tc(self): d = data.fromfile(testfile("testdata5.txt")) d.discretize() tasks = [greedy.GreedyLearner(d) for x in range(5)] tc = ipy1.IPython1Controller("127.0.0.1:10113") results = tc.run(tasks) results = result.merge(results) assert isinstance(results, result.LearnerResult)
def setUp(self): dat = data.fromfile(testfile("testdata5.txt")) dat.discretize() g = greedy.GreedyLearner(dat, max_iterations=100) g.run() self.result = g.result self.tempdir = tempfile.mkdtemp() self.result.tohtml(self.tempdir)
def test_pebl(numvars, numsamples, greedy_iters, cachesize): print "Testing with #vars=%d, #samples=%d, iters=%d, cachesize=%d" % ( numvars, numsamples, greedy_iters, cachesize) config.set('localscore_cache.maxsize', cachesize) d = data.Dataset(N.random.rand(numsamples, numvars)) d.discretize() g = greedy.GreedyLearner(d, max_iterations=greedy_iters) g.run() return g
def test_example1(self): outdir = os.path.join(self.tmpdir, "example1-result") dataset = data.fromfile(testfile("pebl-tutorial-data1.txt")) dataset.discretize() learner = greedy.GreedyLearner(dataset) ex1result = learner.run() ex1result.tohtml(outdir) assert os.path.exists(os.path.join(outdir, 'index.html'))
from pebl import data from pebl import prior from pebl.learner import greedy dataset = data.fromfile("output.txt") dataset.discretize(numbins=3) node_src = [32 for ii in range(32)] + [3 for ii in range(32)] + [ 30 for ii in range(32) ] + [31 for ii in range(32)] node_dest = range(32) + range(32) + range(32) + range(32) prior = prior.Prior(33, prohibited_edges=zip(node_src, node_dest)) learner = greedy.GreedyLearner(dataset, prior, max_iterations=120000) result = learner.run() result.tohtml()
#!/usr/bin/env python # Bayesian with five greedy and 5 simulated annearling learners serially # http://pythonhosted.org/pebl/tutorial.html from pebl import data, result from pebl.learner import greedy, simanneal from pebl.taskcontroller import multiprocess dataset = data.fromfile("pebl-tutorial-data2.txt") learners = [ greedy.GreedyLearner(dataset, max_iterations=1000000) for i in range(5) ] + \ [ simanneal.SimulatedAnnealingLearner(dataset) for i in range(5) ] tc = multiprocess.MultiProcessController(poolsize=2) results = tc.run(learners) merged_result = result.merge(results) merged_result.tofile("example4-result")
from pebl.learner import greedy from pebl.taskcontroller import ec2 from pebl.test import testfile help = """Test the EC2 TaskController. USAGE: test_ec2.py configfile You need to provide the configfile for use with EC2Controller. ############################################################################### WARNING for pebl devs: Do NOT put your configfile under svn. It contains sensitve information. ############################################################################### """ if len(sys.argv) < 2: print help sys.exit(1) d = data.fromfile(testfile("testdata5.txt")) d.discretize() tc = ec2.EC2Controller(config=sys.argv[1], min_count=3) results = tc.run([greedy.GreedyLearner(d, max_time=10) for i in xrange(10)]) results = result.merge(results) print results print[r.host for r in results.runs]
start_time = time.time() classifyUsingNB(trainList, classVariable, testList, testClassVariable) elapsed_time = time.time() - start_time print("Time elapsed Gaussian Naive Bayes : ", elapsed_time) #Decision tree start_time = time.time() classifyUsingDecisionTree(trainList, classVariable, testList, testClassVariable) elapsed_time = time.time() - start_time print("Time elapsed Decision : ", elapsed_time) #logistic regression solver = 'sag' multiclass = 'ovr' start_time = time.time() classifyUsingLogisticRegression(trainList, classVariable, testList, testClassVariable, solver, multiclass) elapsed_time = time.time() - start_time print("Time elapsed Logistic Regression : ", elapsed_time) #Bayesian Network using PEBL for the data start_time = time.time() dataset = data.fromfile(trainFileNameForBayesian) dataset.discretize() learner = greedy.GreedyLearner(dataset) ex1result = learner.run() ex1result.tohtml("dataset") elapsed_time = time.time() - start_time print("Time elapsed Bayesian Network : ", elapsed_time)
def setUp(self): config.set('evaluator.missingdata_evaluator', 'exact') self.data = data.fromfile( testfile('testdata13.txt')).subset(samples=range(5)) self.learner = greedy.GreedyLearner(self.data, max_iterations=10)
import datasets DEFAULT_DATASET = datasets.load("greedytest") DEFAULT_REPORT_DIR = "./report/" ds = DEFAULT_DATASET report_dir = DEFAULT_REPORT_DIR if len(sys.argv) > 1: ds = datasets.load(sys.argv[1]) if ds == None: raise Exception("Could not find dataset '%s'" % dataset) if len(sys.argv) > 2: report_dir = sys.argv[2] dataset = ds.dataset prior = ds.prior greedy_lrn = greedy.GreedyLearner(dataset, prior, max_iterations=1000) #anneal_lrn = simanneal.SimulatedAnnealingLearner(dataset, prior) #results = result.merge(*[ greedy_lrn.run(), anneal_lrn.run() ]) results = greedy_lrn.run() try: shutil.rmtree(report_dir) except: pass results.tohtml(report_dir)
def setUp(self): d = data.fromfile(testfile("testdata5.txt")) d.discretize() self.tc = self.tctype(*self.args) self.tasks = [greedy.GreedyLearner(d, max_iterations=100) for i in xrange(6)]
#!/usr/bin/env python # http://pythonhosted.org/pebl/tutorial.html from pebl import data, result from pebl.learner import greedy dataset = data.fromfile("pebl-tutorial-data2.txt") learner1 = greedy.GreedyLearner(dataset, max_iterations=1000000) learner2 = greedy.GreedyLearner(dataset, max_time=120) # in seconds result1 = learner1.run() result2 = learner2.run() merged_result = result.merge(result1, result2) merged_result.tofile("example2-result")