Esempio n. 1
0
def randomForestBin(trainD, testD, args):
    trees = args["trees"] if type(args) == dict and args.has_key("trees") else 100
    permutations = args["permutations"] if type(args) == dict and args.has_key("permutations") else 1000
    nonzero = args["nonzero"] if type(args) == dict and args.has_key("nonzero") else 20
    duplicateCount = args["duplicateCount"] if type(args) == dict and args.has_key("duplicateCount") else 500

    trainX, trainy, _ = trainD.to_numpy()
    testX, testy, _ = testD.to_numpy()

    if type(duplicateCount) == float:
        duplicateCount = int(trainX.shape[1] * duplicateCount)
    binVal, gains = infoGain.getGains(trainX, trainy, permutations, nonzero)
    ind = [x[2] for x in gains[0] if x[1] > nonzero][:duplicateCount]
    meje = [binVal[i] for i in ind]
    trainX = np.concatenate((trainX, (trainX.T[ind].T > meje).astype(float)), axis=1)
    testX = np.concatenate((testX, (testX.T[ind].T > meje).astype(float)), axis=1)

    X = np.concatenate((trainX, testX), axis=0)
    y = np.concatenate((trainy, testy), axis=0)
    data = functions.listToOrangeSingleClass(X, y.astype(int))
    ind = [0] * trainy.size + [1] * testy.size
    trainD = data.select(ind, 0)
    testD = data.select(ind, 1)

    rf = Orange.ensemble.forest.RandomForestLearner(trees=trees, name="forest")
    return getProb(rf, trainD, testD)
Esempio n. 2
0
import cPickle
import numpy as np
import functions

uniques = 10 #meja za zveznost
data = Orange.data.Table("data/train.tab")
trainD,y,_ = data.to_numpy()
m,n = trainD.shape
y = y.astype(int)

razlicnih = np.asarray([np.unique(trainD[:,i]).size for i in xrange(n)])
zvezni = [i for i,x in enumerate(razlicnih) if x > uniques]

trainD = trainD[:,zvezni]
preslikavaIndex = [(i,j) for i,j in enumerate(zvezni)]

data = functions.listToOrangeSingleClass(trainD, y)
trainD,y,_ = data.to_numpy()
m,n = trainD.shape
razlicnih = np.asarray([np.unique(trainD[:,i]).size for i in xrange(n)])

reliefScore = {}
for attr in data.domain.attributes:
    reliefScore[attr] = Orange.feature.scoring.Relief(attr, data)
    print attr,reliefScore[attr]

cPickle.dump({"ind":preslikavaIndex,"rel":reliefScore},\
             open("relief_score_continous_filter_uniques_%d.pkl" % uniques,"w"))