def randomForestBin(trainD, testD, args): trees = args["trees"] if type(args) == dict and args.has_key("trees") else 100 permutations = args["permutations"] if type(args) == dict and args.has_key("permutations") else 1000 nonzero = args["nonzero"] if type(args) == dict and args.has_key("nonzero") else 20 duplicateCount = args["duplicateCount"] if type(args) == dict and args.has_key("duplicateCount") else 500 trainX, trainy, _ = trainD.to_numpy() testX, testy, _ = testD.to_numpy() if type(duplicateCount) == float: duplicateCount = int(trainX.shape[1] * duplicateCount) binVal, gains = infoGain.getGains(trainX, trainy, permutations, nonzero) ind = [x[2] for x in gains[0] if x[1] > nonzero][:duplicateCount] meje = [binVal[i] for i in ind] trainX = np.concatenate((trainX, (trainX.T[ind].T > meje).astype(float)), axis=1) testX = np.concatenate((testX, (testX.T[ind].T > meje).astype(float)), axis=1) X = np.concatenate((trainX, testX), axis=0) y = np.concatenate((trainy, testy), axis=0) data = functions.listToOrangeSingleClass(X, y.astype(int)) ind = [0] * trainy.size + [1] * testy.size trainD = data.select(ind, 0) testD = data.select(ind, 1) rf = Orange.ensemble.forest.RandomForestLearner(trees=trees, name="forest") return getProb(rf, trainD, testD)
import cPickle import numpy as np import functions uniques = 10 #meja za zveznost data = Orange.data.Table("data/train.tab") trainD,y,_ = data.to_numpy() m,n = trainD.shape y = y.astype(int) razlicnih = np.asarray([np.unique(trainD[:,i]).size for i in xrange(n)]) zvezni = [i for i,x in enumerate(razlicnih) if x > uniques] trainD = trainD[:,zvezni] preslikavaIndex = [(i,j) for i,j in enumerate(zvezni)] data = functions.listToOrangeSingleClass(trainD, y) trainD,y,_ = data.to_numpy() m,n = trainD.shape razlicnih = np.asarray([np.unique(trainD[:,i]).size for i in xrange(n)]) reliefScore = {} for attr in data.domain.attributes: reliefScore[attr] = Orange.feature.scoring.Relief(attr, data) print attr,reliefScore[attr] cPickle.dump({"ind":preslikavaIndex,"rel":reliefScore},\ open("relief_score_continous_filter_uniques_%d.pkl" % uniques,"w"))