def evalSensor(sensorIndex, featureSpaceIndices): #reset data structure allByTime = {} f = open(fileName, 'r') headers = f.readline().split() for line in f: splited = line.split() timeStamp = int(splited[0]) allByTime[timeStamp] = {} f.close() allByJoint = {} for inputIndices in featureSpaceIndices: allByJoint[inputIndices] = {} clfs = {} grades = {} for inputIndices in featureSpaceIndices: allByTime, allByJoint = angleExtraction.prepareAnglesFromInput(fileName, inputIndices, sensorIndex, True, allByTime, allByJoint) #normalizing allByJoint timeSet = Set([]) for inputIndices in featureSpaceIndices: vec = [] for timeStamp in allByTime.keys(): if(timeStamp in allByJoint[inputIndices].keys()): timeSet.add(timeStamp) x = allByJoint[inputIndices][timeStamp] vec.append(x) if(len(vec) > 0): vec = angleExtraction.normelizeVector(vec) i=0 for timeStamp in allByTime.keys(): if(timeStamp in allByJoint[inputIndices].keys()): allByJoint[inputIndices][timeStamp] = vec[i] i = i + 1 #time set to list, output dict to list time = [] for timeStamp in timeSet: time.append(timeStamp) time.sort() allOutput = [] tmpTime = [] #clean zeros, craete time ordered output vector for timeStamp in time: out = allByTime[timeStamp]['output'] if(out != 0): tmpTime.append(timeStamp) allOutput.append(out) time = tmpTime #normalize allOutput allOutput = normalByPercentile(allOutput) allOutputMap = {} i=0 for timeStamp in time: allOutputMap[timeStamp] = allOutput[i] i = i + 1 #training a clf for each joint for inputIndices in featureSpaceIndices: x=[] y=[] for timeStamp, angle in allByJoint[inputIndices].iteritems(): if(timeStamp in allOutputMap.keys()): x.append(angle) y.append(allOutputMap[timeStamp]) if(len(y) == 0): continue; gradeFactor, clf = getEstimationFactored(x, y) clfs[inputIndices] = clf grades[inputIndices] = gradeFactor #classifying by all the clfs trainSize = int(0.7*len(time)) time_train, time_test = time[:trainSize], time[trainSize:] y_train, y_test = allOutput[:trainSize], allOutput[trainSize:] expectedOutputMap = {} predictionMap = {} weights = {} for timeStamp in time_test: expectedOutputMap[timeStamp] = allOutputMap[timeStamp] for inputIndices in featureSpaceIndices: if(timeStamp in allByJoint[inputIndices].keys()): x = allByJoint[inputIndices][timeStamp] p = clfs[inputIndices].predict(x) if(p > 0): y = 1 else: y=-1 predictionMap[timeStamp] = predictionMap.get(timeStamp, 0) + y*grades[inputIndices] weights[timeStamp] = weights.get(timeStamp, 0) + grades[inputIndices] #normalizing by the weights for timeStamp in predictionMap.keys(): if(weights[timeStamp] != 0): predictionMap[timeStamp] = predictionMap[timeStamp] / weights[timeStamp] else: predictionMap.pop(timeStamp) if(len(predictionMap) == 0): grade = 0 out = open(outFile, 'a') res = str(featureSpaceIndices) + " " + str(grade) + '\n' out.write(res) out.close() return grade #converting the maps to corresponding lists expectedOutput = [] prediction = [] #for timeStamp, value in predictionMap.items(): time = [] for timeStamp in time_test: if(timeStamp in predictionMap.keys()): time.append(timeStamp) prediction.append(predictionMap[timeStamp]) expectedOutput.append(expectedOutputMap[timeStamp]) """ plt.plot(time,expectedOutput, color='blue') plt.plot(time,prediction, color='green') plt.show() """ var = np.var(np.array(allOutput)) #grade = var / mean_squared_error(expectedOutput, prediction) hits = 0.0 for i in range(len(prediction)): if(prediction[i]*expectedOutput[i] > 0): hits+=1 grade = hits / float(len(prediction)) out = open(outFile, 'a') res = str(featureSpaceIndices) + " " + str(grade) + '\n' out.write(res) out.close() #if(grade > best): # best = grade # print res return grade
def evalSensor(sensorIndex, featureSpaceIndices): #reset data structure allByTime = {} f = open(fileName, 'r') headers = f.readline().split() for line in f: splited = line.split() timeStamp = int(splited[0]) allByTime[timeStamp] = {} f.close() allByJoint = {} for inputIndices in featureSpaceIndices: allByJoint[inputIndices] = {} clfs = {} grades = {} for inputIndices in featureSpaceIndices: allByTime, allByJoint = angleExtraction.prepareAnglesFromInput(fileName, inputIndices, sensorIndex, True, allByTime, allByJoint) #normalizing allByJoint timeSet = Set([]) for inputIndices in featureSpaceIndices: vec = [] for timeStamp in allByTime.keys(): if(timeStamp in allByJoint[inputIndices].keys()): timeSet.add(timeStamp) x = allByJoint[inputIndices][timeStamp] vec.append(x) if(len(vec) > 0): vec = angleExtraction.normelizeVector(vec) i=0 for timeStamp in allByTime.keys(): if(timeStamp in allByJoint[inputIndices].keys()): allByJoint[inputIndices][timeStamp] = vec[i] i = i + 1 #time set to list, output dict to list time = [] for timeStamp in timeSet: time.append(timeStamp) time.sort() allOutput = [] tmpTime = [] #clean zeros, create time ordered output vector for timeStamp in time: out = allByTime[timeStamp]['output'] if(out != 0 and len(allByTime[timeStamp]) == featureNum + 1): tmpTime.append(timeStamp) allOutput.append(out) time = tmpTime #normalize allOutput allOutput = normalByPercentile(allOutput) #create a net hiddenSize = (featureNum + 2)/2 net = buildNetwork(featureNum, hiddenSize, 1, hiddenclass=LSTMLayer, outclass=SigmoidLayer, recurrent=True, bias=True) #build dataset ds = SequentialDataSet(featureNum, 1) i=0 lastTimeStamp = time[0] for timeStamp in time: if(len(allByTime[timeStamp]) == featureNum+1):#it is a full vector if(timeStamp - lastTimeStamp > 100): ds.newSequence() sample = [] for inputIndices in featureSpaceIndices: sample.append(allByTime[timeStamp][inputIndices]) ds.appendLinked(sample, allOutput[i]) i = i + 1 lastTimeStamp = timeStamp #train net.randomize() tstdata, trndata = ds.splitWithProportion( 0.25 ) trainer = BackpropTrainer(net, trndata) print len(ds) min = 100 trainNum = 100 bestTrainer = None for i in range(trainNum): res = trainer.train() if(res < min): min = res bestTrainer = trainer net.randomize() print min """ res = 100 while(res > min): net.randomize() res = trainer.train() """ trainer = bestTrainer for i in range(trainNum): res = trainer.train() if(i % (trainNum/10) == 0): print res print 'trndata.evaluateModuleMSE ' + str(trndata.evaluateModuleMSE(net)) print 'tstdata.evaluateModuleMSE ' + str(tstdata.evaluateModuleMSE(net)) #print net.activateOnDataset(tstdata) hits = 0.0 total = 0.0 #res = net.activate(tstdata) for i in range(trndata.getNumSequences()): for input, target in trndata.getSequenceIterator(i): res = net.activate(input) total += 1 if(res[0]*target[0] > 0): hits+=1 grade = hits / total print 'grade ' + str(grade) print 'total ' + str(total)