def Cascade(trn, max_level): if len(trn) > 2: X = array([x for x, y in trn]) Y = array([y for x, y in trn]) else: X, Y = trn T = 512 classifiers = [] td = training_utils.TrainingData(X, Y) SD = distance_utils.calcDistanceMatrix2([X]) trunk = td.trunk sensitivity = 0.99 o = metric_learning.search_threshold(SD, trunk, sensitivity) thres = o[0] TP, FP = o[1] num_FN = (1 - sensitivity) * (len(TP) / sensitivity) pairs = TP, FP MAX_NUM_POS = 2000 MAX_NUM_NEG = 4000 num_pos = min(len(pairs[0]), MAX_NUM_POS) num_neg = min(len(pairs[1]), MAX_NUM_NEG) print "TP vs FP: %d vs %d" % (len(TP), len(FP)) trunk = o[2] classifiers.append((thres, (TP, FP))) FPvsFN = len(FP) / num_FN print "#FP/#FN = ", FPvsFN #return classifiers for level in range(1, max_level + 1): print "level = %d" % level random.shuffle(TP) random.shuffle(FP) pos = TP[-1:-num_pos - 1:-1] neg = FP[0:num_neg] pairs = pos, neg print len(pairs[0]), len(pairs[1]) XX, yy = training_utils.create_training_sample(X, pairs) classifier = adaboost.AdaBoost(rank1_metric.Rank1_Metric) classifier.set_training_sample(XX, yy) classifier.train(T, 1) #classifier VD, vy = training_utils.create_training_sample( X, (TP, FP)) # for validation SD = -classifier.predict(VD) # be careful about the sign!!! sensitivity = 0.99 o = metric_learning.search_threshold(SD, trunk, sensitivity) thres = o[0] TP, FP = o[1] num_FN = (1 - sensitivity) * (len(TP) / sensitivity) print "TP vs FP: %d vs %d" % (len(TP), len(FP)) trunk = o[2] classifiers.append((classifier, thres, (TP, FP))) FPvsFN = len(FP) / num_FN print "#FP/#FN = ", FPvsFN if len(TP) == 0 or len(FP) == 0 or FPvsFN < 1.2: break return classifiers
def Cascade(trn, max_level): if len(trn)>2: X = array([x for x,y in trn]) Y = array([y for x,y in trn]) else: X,Y = trn T = 512 classifiers = [] td = training_utils.TrainingData(X,Y) SD = distance_utils.calcDistanceMatrix2([X]) trunk = td.trunk sensitivity = 0.99 o = metric_learning.search_threshold(SD, trunk, sensitivity) thres = o[0] TP,FP = o[1] num_FN = (1-sensitivity)*(len(TP)/sensitivity) pairs = TP,FP MAX_NUM_POS = 2000 MAX_NUM_NEG = 4000 num_pos = min(len(pairs[0]),MAX_NUM_POS) num_neg = min(len(pairs[1]),MAX_NUM_NEG) print "TP vs FP: %d vs %d"%(len(TP),len(FP)) trunk = o[2] classifiers.append((thres, (TP,FP))) FPvsFN = len(FP)/num_FN print "#FP/#FN = ",FPvsFN #return classifiers for level in range(1,max_level+1): print "level = %d"%level random.shuffle(TP) random.shuffle(FP) pos = TP[-1:-num_pos-1:-1] neg = FP[0:num_neg] pairs = pos, neg print len(pairs[0]), len(pairs[1]) XX,yy = training_utils.create_training_sample(X, pairs) classifier = adaboost.AdaBoost(rank1_metric.Rank1_Metric) classifier.set_training_sample(XX,yy) classifier.train(T, 1) #classifier VD, vy = training_utils.create_training_sample(X, (TP,FP)) # for validation SD = -classifier.predict(VD) # be careful about the sign!!! sensitivity = 0.99 o = metric_learning.search_threshold(SD, trunk, sensitivity) thres = o[0] TP,FP = o[1] num_FN = (1-sensitivity)*(len(TP)/sensitivity) print "TP vs FP: %d vs %d"%(len(TP),len(FP)) trunk = o[2] classifiers.append((classifier, thres, (TP,FP))) FPvsFN = len(FP)/num_FN print "#FP/#FN = ",FPvsFN if len(TP)==0 or len(FP)==0 or FPvsFN < 1.2 : break return classifiers
def compute_distance(self, X): f = self.dist_func X = numpy.array(X) training_X = self.training_X # compute the distance matrix from X to training_X dist = distance_utils.calcDistanceMatrix2((X, training_X), distFunc=f) ## 'dist' is reshaped s.t. dist[i,j] = d(X[i],training_X[j]) labels = numpy.matlib.repmat(self.training_y, len(X), 1) dist.setfield(labels, dtype=labels.dtype) # for each x in X, sort the training samples w.r.t the distance to x self.sorted_dist = numpy.sort(dist) self.sorted_labels = self.sorted_dist.getfield(labels.dtype)
def compute_distance(self, X): f = self.dist_func X = numpy.array(X) training_X = self.training_X # compute the distance matrix from X to training_X dist = distance_utils.calcDistanceMatrix2((X, training_X), distFunc = f) ## 'dist' is reshaped s.t. dist[i,j] = d(X[i],training_X[j]) labels = numpy.matlib.repmat(self.training_y, len(X), 1) dist.setfield(labels, dtype=labels.dtype) # for each x in X, sort the training samples w.r.t the distance to x self.sorted_dist = numpy.sort(dist) self.sorted_labels = self.sorted_dist.getfield(labels.dtype)