def createDiagram(source, quality): ds, featuresNames = labanUtil.getPybrainDataSet(source) X, Y = labanUtil.fromDStoXY(ds) qualities, combinations = cp.getCombinations() y = Y[qualities.index(quality)] fileName = source+quality ig.createDiagram(X, y, featuresNames, fileName)
import LabanUtils.util as labanUtil import LabanUtils.informationGain as ig import mocapUtils.utils as utils import matplotlib.pyplot as plt import LabanUtils.combinationsParser as cp CMAs = ['Rachelle', 'Karen'] trainSource = CMAs[0] testSource = CMAs[1] tstdata, featuresNames = labanUtil.getPybrainDataSet(testSource) print 'Data was read' X2, Y2 = labanUtil.fromDStoXY(tstdata) y=Y2[0] igs, ps = ig.recursiveRanking(X2, y) print igs print max(igs) """ trndata, featuresNames = labanUtil.getPybrainDataSet(trainSource) X1, Y1 = labanUtil.fromDStoXY(trndata) cors = [] for y1, y2 in zip(Y1, Y2): im1 = ig.infoGain(X1, y1) print im1 ind = [i for i, e in enumerate(im1) if e != 0] print ind im2 = ig.infoGain(X2, y2) print im2 ind = [i for i, e in enumerate(im2) if e != 0] print ind cor = mocapUtils.corr(im1, im2) print cor
from pybrain.supervised.trainers import BackpropTrainer from pybrain.utilities import percentError import numpy as np import matplotlib.pyplot as plt import LabanUtils.util as labanUtil import LabanUtils.combinationsParser as cp from sklearn import metrics qualities, combinations = cp.getCombinations() ds, featuresNames = labanUtil.getPybrainDataSet() inLayerSize = len(ds.getSample(0)[0]) outLayerSize = len(ds.getSample(0)[1]) splitProportion = 0.2 decay= 0.999995 myWeightdecay = 0.00015#0.99999 initialLearningrate= 0.01 hiddenSize = 150 epochs=3000 momentum=0#.25 #ds.nClasses = len(qualities) tstdata, trndata = ds.splitWithProportion( splitProportion ) res = labanUtil.constructNet(inLayerSize, hiddenSize, outLayerSize) n=res[0] trainer = BackpropTrainer(n, trndata, learningrate=initialLearningrate,\ lrdecay=decay, verbose=True, weightdecay=myWeightdecay, momentum=momentum) description = 'ds '+ str(len(ds))+\ ', h: '+ str(hiddenSize)+ \ ', lr ' + str(initialLearningrate) description = description+ ', decay ' + str(decay) + ' sp: ' + str(splitProportion) + \ ' wd ' + str(myWeightdecay)+ ' momentum ' + str(momentum)
import numpy as np import pylab as pl from sklearn import datasets, svm from sklearn.feature_selection import SelectPercentile, f_classif import LabanUtils.util as labanUtil import LabanUtils.combinationsParser as cp from multiprocessing import Pool ds = labanUtil.getPybrainDataSet() X, Y = labanUtil.fromDStoXY(ds) X, Y = np.array(X), np.array(Y) X_indices = np.arange(X.shape[-1]) ############################################################################### # Univariate feature selection with F-test for feature scoring # We use the default selection function: the 10% most significant features selector = SelectPercentile(f_classif, percentile=10) selector. selector.fit(X, Y[0]) scores = -np.log10(selector.pvalues_) #scores /= scores.max() pl.bar(X_indices - .45, scores, width=.2, label=r'Univariate score ($-Log(p_{value})$)', color='g') ############################################################################### # Compare to the weights of an SVM clf = svm.SVC(kernel='linear') clf.fit(X, y)
np.mean(allFeaturesRecall), np.mean(featureSelctedRecall), \ np.mean(allFeaturesPrecision), np.mean(featureSelctedPrecision), \ name """ ds = labanUtil.getPybrainDataSet('Rachelle') print ds.getLength() print len(ds.getSample(0)[0]) print eval(ds, 20) """ if __name__ == '__main__': qualities, combinations = cp.getCombinations() pool = Pool(6) source = 'Rachelle' ds = labanUtil.getPybrainDataSet(source) #print 'input diamention: ', len(ds.getSample(0)[0]) inLayerSize = len(ds.getSample(0)[0]) outLayerSize = len(ds.getSample(0)[1]) allFeaturesF1=[] allFeaturesRecall=[] allFeaturesPrecision=[] featureSelctedF1=[] featureSelctedRecall = [] featureSelctedPrecision = [] #params = np.linspace(0.1, 1, 10) params = [1,3,4,5,6,99] m = {}
selector = SelectPercentile(chooser, percentile=p) selector.fit(X, y) name = str(clf).split()[0].split('(')[0] clf.fit(selector.transform(X), y) pred = clf.predict(selector.transform(X_test)) f1 = metrics.f1_score(y_test, pred) f1s.append(f1) ps.append(metrics.precision_score(y_test, pred)) rs.append(metrics.recall_score(y_test, pred)) return f1s, ps, rs if __name__ == '__main__': p = Pool(7) qualities, combinations = cp.getCombinations() source = 'Rachelle' ds = labanUtil.getPybrainDataSet('Karen') second = labanUtil.getPybrainDataSet('Rachelle') for inp, target in second: ds.addSample(inp, target) inLayerSize = len(ds.getSample(0)[0]) outLayerSize = len(ds.getSample(0)[1]) f1s = [] ps=[] rs=[] testNum=70 for _ in qualities: f1s.append([]) ps.append([]) rs.append([]) m = {} clf = AdaBoostClassifier()
import LabanUtils.util as labanUtil import matplotlib.pyplot as plt import pylab as pl from sklearn.linear_model import SGDClassifier import numpy as np from sklearn import svm from sklearn.feature_selection import SelectKBest, f_classif from sklearn.pipeline import Pipeline import LabanUtils.combinationsParser as cp from sklearn import manifold, datasets, decomposition, ensemble, lda, random_projection quality = 'Advance' trainSource = 'Karen' testSource = 'Rachelle' trndata, featuresNames = labanUtil.getPybrainDataSet(trainSource) #tstdata, featuresNames = labanUtil.getPybrainDataSet(trainSource) #X_test, Y_test = labanUtil.fromDStoXY(tstdata) X, Y = labanUtil.fromDStoXY(trndata) qualities, combinations = cp.getCombinations() y=Y[qualities.index(quality)] """ c=80 selectedFeaturesNum = 25 ratio ='auto' clf = svm.LinearSVC(C=c, loss='LR', penalty='L1', dual=False, class_weight='auto')#{1: ratio}) chooser=f_classif#ig.infoGain#ig.recursiveRanking anova_filter = SelectKBest(chooser, k=selectedFeaturesNum) pipe = Pipeline([ ('feature_selection', anova_filter),
def getXYforMultiSet(source): ds, featuresNames = labanUtil.getPybrainDataSet(source) X, Y = labanUtil.fromDStoXY(ds) return X, np.transpose(Y)