def main(): if len(sys.argv) < 5: print "Usage: ./create-app-server.py <server_name (http://localhost:8001)> <documents_database_name> <modules_database_name> <app_server_port>" print "Documents and Modules databases will not be created - you need to have done that already" sys.exit(1) server_name, database_name, modules_name, port= sys.argv[1:5] appserver_name = "http-" + port booster.configureAuthHttpProcess(server_name, "admin", "admin") booster.booster(server_name, { "action":"appserver-create-http", "group-name":"Default", "root": "/", "database-name": database_name, "modules-name": modules_name, "appserver-name": appserver_name, "port": port }) booster.booster(server_name, { "action":"appserver-set", "appserver-name": appserver_name, "group-name": "Default", "setting": "authentication", "value": "application-level"}) booster.booster(server_name, { "action":"appserver-set", "appserver-name": appserver_name, "group-name": "Default", "setting": "default-user", "value": "admin"})
def main() : # ------------------------------------------------------------ # Prepare data-sets full_data = data_struct() full_data.readTable( "Data/format_winequality-red.dat", separator=",", ignore = "@" ) train_data, test_data = full_data.splitTestTrain( splitfactor = 0.50 ) # train_data.setUniClassNorm() # Train to be unbiased in occurences of different classes train_data.setEqualNorm() # test_data.setEqualNorm() # test data should not have 'class knowledge' # print train_data # print test_data # ------------------------------------------------------------ # Setup BDT classifier classifier = booster() classifier.setData( train_data ) classifier.setAttributes( maxnodes = 8, maxtrees = 100, maxpur = 0.5, ncuts = 10, minnode = 0.1 ) classifier.train() print classifier # ------------------------------------------------------------ # Setup BDT classifier # Calculate discriminant values for each data-entry right, wrong = 0,0 for icl, iw, irow in zip( test_data.cl, test_data.weight, test_data.data ) : # print icl, iw, classifier.disc( irow ) # raw_input( ' ... ' ) # print classifier.disc( irow )[0], # print irow if icl == classifier.disc(irow)[0] : right += 1 else : wrong += 1 print "" print "%7.3f/100 good, %7.4f/100 bad"%( float(100*right) / (right+wrong) , float(100*wrong)/(right+wrong) ) # classifier.train() # # print classifier # # test_data = data_struct() # test_data.readTable( "Data/format_winequality-red.dat", separator=",", ignore = "@" ) # test_data.setUniClassNorm() # test_data.setEqualNorm() # print test_data # for icl in xrange(test_data.classes) : # print icl, test_data.cl.count(icl) # iris = data_struct() # iris.readTable( "Data/Iris.txt", separator="," ) # print "Read Data : " # print iris # # maxnodes = 8 # # n_trees = 20 return forest = [] for itree in xrange( n_trees ) : # print "%d / %d"%(itree, 50) tree = decision_tree() end_tree = False while not end_tree : # Get None-terminated nodes inter_nodes = tree.getInterNodes() # Everything set and done if len(tree.getInterNodes()) == 0 : end_tree = True continue elif len(tree.nodes) >= maxnodes : end_tree = True # End nodes that have not been assigned a class for inode in tree.getInterNodes() : tree.nodes[inode].cl=iris.getClass() continue # NTS: In principle, nodes should be added in order # of their predictive power here! for inode in tree.getInterNodes() : # NTS: for deeper trees this is extremely inefficient!!! RETHINK node_data = iris.reduce( tree.getNodeCrit(inode) ) node_class, node_pur = node_data.getClassAndPurity() if node_pur >= 0.66666 : tree.nodes[inode].cl = node_class continue obs, val, grt, sep = guessCut( node_data ) iris_left, iris_right = iris.split( obs=obs, value=val ) # Figure out if node should terminate left_class = None x_class, x_pur = iris_left.getClassAndPurity() if x_pur >= 0.666666 : left_class = x_class right_class = None x_class, x_pur = iris_right.getClassAndPurity() if x_pur >= 0.666666 : right_class = x_class tree.addNodes( inode, obs, val, cl_left=left_class, cl_right=right_class ) # ------------------------------------------------------------ # Calculate tree error err = 0.0 sum_w = 0.0 for icl, iw, irow in zip( iris.cl, iris.weight, iris.data ) : sum_w += iw if icl != tree.getPrediction( irow ) : err += iw tree.err = err/sum_w tree.alpha = math.log( (1.0-tree.err)/tree.err ) + math.log( iris.classes-1 ) print tree # raw_input( ' ... ' ) # ------------------------------------------------------------ # Reweight dataset - BOOST sum_w = 0.0 for row_count, icl, iw, irow in zip( xrange(iris.size), iris.cl, iris.weight, iris.data ) : weight_fact = 1.0 if icl != tree.getPrediction( irow ) : weight_fact = math.exp( tree.alpha ) new_weight = iw*weight_fact sum_w += new_weight iris.weight[row_count] = new_weight renorm = float(iris.size)/sum_w iris.weight = array.array('f', [iw * renorm for iw in iris.weight]) forest.append( tree ) # Calculate discriminant values for each data-entry for icl, irow in zip( iris.cl, iris.data ) : disc_weights = array.array('f', [0.0 for _ in xrange(iris.classes)]) for itree in forest : disc_weights[itree.getPrediction(irow)] += itree.alpha pred = disc_weights.index(max(disc_weights)) print icl, pred # disc = max(disc_weights) / n_trees # print icl, irow, pred, disc_weights # print icl, disc raw_input( ' ... ' )
def main(): if len(sys.argv) < 4: print "Usage: ./create-modules-database.py <server_name (http://localhost:8001)> <modules_database_name> <forest_data_directory>" sys.exit(1) server_name, database_name, forest_data_directory = sys.argv[1:4] forest_name = database_name+"-1" booster.configureAuthHttpProcess(server_name, "admin", "admin") booster.booster(server_name, { "action":"database-create", "database-name": database_name, "security-db-name": "Security", "schema-db-name": "Schemas" }) booster.booster(server_name, { "action":"forest-create", "forest-name": forest_name, "host-name": "localhost", "data-directory": forest_data_directory }) booster.booster(server_name, { "action":"database-attach-forest", "database-name": database_name, "forest-name": forest_name }) booster.booster(server_name, { "action":"database-set", "database-name": database_name, "setting": "stemmed-searches", "value": "off"}) booster.booster(server_name, { "action":"database-set", "database-name": database_name, "setting": "fast-phrase-searches", "value": "false"}) booster.booster(server_name, { "action":"database-set", "database-name": database_name, "setting": "fast-case-sensitive-searches", "value": "false"}) booster.booster(server_name, { "action":"database-set", "database-name": database_name, "setting": "fast-diacritic-sensitive-searches", "value": "false"}) booster.booster(server_name, { "action":"database-set", "database-name": database_name, "setting": "fast-element-word-searches", "value": "false"}) booster.booster(server_name, { "action":"database-set", "database-name": database_name, "setting": "fast-element-phrase-searches", "value": "false"})