Esempio n. 1
0
def Cascade(trn, max_level):
    if len(trn) > 2:
        X = array([x for x, y in trn])
        Y = array([y for x, y in trn])
    else:
        X, Y = trn
    T = 512
    classifiers = []

    td = training_utils.TrainingData(X, Y)
    SD = distance_utils.calcDistanceMatrix2([X])
    trunk = td.trunk
    sensitivity = 0.99
    o = metric_learning.search_threshold(SD, trunk, sensitivity)
    thres = o[0]
    TP, FP = o[1]
    num_FN = (1 - sensitivity) * (len(TP) / sensitivity)
    pairs = TP, FP
    MAX_NUM_POS = 2000
    MAX_NUM_NEG = 4000
    num_pos = min(len(pairs[0]), MAX_NUM_POS)
    num_neg = min(len(pairs[1]), MAX_NUM_NEG)
    print "TP vs FP: %d vs %d" % (len(TP), len(FP))
    trunk = o[2]
    classifiers.append((thres, (TP, FP)))
    FPvsFN = len(FP) / num_FN
    print "#FP/#FN = ", FPvsFN
    #return classifiers

    for level in range(1, max_level + 1):
        print "level = %d" % level
        random.shuffle(TP)
        random.shuffle(FP)
        pos = TP[-1:-num_pos - 1:-1]
        neg = FP[0:num_neg]
        pairs = pos, neg
        print len(pairs[0]), len(pairs[1])
        XX, yy = training_utils.create_training_sample(X, pairs)
        classifier = adaboost.AdaBoost(rank1_metric.Rank1_Metric)
        classifier.set_training_sample(XX, yy)
        classifier.train(T, 1)
        #classifier

        VD, vy = training_utils.create_training_sample(
            X, (TP, FP))  # for validation
        SD = -classifier.predict(VD)  # be careful about the sign!!!

        sensitivity = 0.99
        o = metric_learning.search_threshold(SD, trunk, sensitivity)
        thres = o[0]
        TP, FP = o[1]
        num_FN = (1 - sensitivity) * (len(TP) / sensitivity)
        print "TP vs FP: %d vs %d" % (len(TP), len(FP))
        trunk = o[2]
        classifiers.append((classifier, thres, (TP, FP)))
        FPvsFN = len(FP) / num_FN
        print "#FP/#FN = ", FPvsFN
        if len(TP) == 0 or len(FP) == 0 or FPvsFN < 1.2: break
    return classifiers
Esempio n. 2
0
def Cascade(trn, max_level):
    if len(trn)>2:
        X = array([x for x,y in trn])
        Y = array([y for x,y in trn])
    else:
        X,Y = trn
    T = 512
    classifiers = []

    td = training_utils.TrainingData(X,Y)
    SD = distance_utils.calcDistanceMatrix2([X])
    trunk = td.trunk
    sensitivity = 0.99
    o = metric_learning.search_threshold(SD, trunk, sensitivity)
    thres = o[0]
    TP,FP = o[1]
    num_FN = (1-sensitivity)*(len(TP)/sensitivity)
    pairs = TP,FP
    MAX_NUM_POS = 2000
    MAX_NUM_NEG = 4000
    num_pos = min(len(pairs[0]),MAX_NUM_POS)
    num_neg = min(len(pairs[1]),MAX_NUM_NEG)
    print "TP vs FP: %d vs %d"%(len(TP),len(FP))
    trunk = o[2]
    classifiers.append((thres, (TP,FP)))
    FPvsFN = len(FP)/num_FN
    print "#FP/#FN = ",FPvsFN
    #return classifiers

    for level in range(1,max_level+1):
        print "level = %d"%level
        random.shuffle(TP)
        random.shuffle(FP)
        pos = TP[-1:-num_pos-1:-1]
        neg = FP[0:num_neg]
        pairs = pos, neg
        print len(pairs[0]), len(pairs[1])
        XX,yy = training_utils.create_training_sample(X, pairs)
        classifier = adaboost.AdaBoost(rank1_metric.Rank1_Metric)
        classifier.set_training_sample(XX,yy)
        classifier.train(T, 1)
        #classifier

        VD, vy = training_utils.create_training_sample(X, (TP,FP)) # for validation
        SD = -classifier.predict(VD)  # be careful about the sign!!!

        sensitivity = 0.99
        o = metric_learning.search_threshold(SD, trunk, sensitivity)
        thres = o[0]
        TP,FP = o[1]
        num_FN = (1-sensitivity)*(len(TP)/sensitivity)
        print "TP vs FP: %d vs %d"%(len(TP),len(FP))
        trunk = o[2]
        classifiers.append((classifier, thres, (TP,FP)))
        FPvsFN = len(FP)/num_FN
        print "#FP/#FN = ",FPvsFN
        if len(TP)==0 or len(FP)==0 or FPvsFN < 1.2 :  break
    return classifiers
Esempio n. 3
0
 def compute_distance(self, X):
     f = self.dist_func
     X = numpy.array(X)
     training_X = self.training_X
     # compute the distance matrix from X to training_X
     dist = distance_utils.calcDistanceMatrix2((X, training_X), distFunc=f)
     ## 'dist' is reshaped s.t. dist[i,j] = d(X[i],training_X[j])
     labels = numpy.matlib.repmat(self.training_y, len(X), 1)
     dist.setfield(labels, dtype=labels.dtype)
     # for each x in X, sort the training samples w.r.t the distance to x
     self.sorted_dist = numpy.sort(dist)
     self.sorted_labels = self.sorted_dist.getfield(labels.dtype)
Esempio n. 4
0
 def compute_distance(self, X):
     f = self.dist_func
     X = numpy.array(X)
     training_X = self.training_X
     # compute the distance matrix from X to training_X
     dist = distance_utils.calcDistanceMatrix2((X, training_X), distFunc = f)
     ## 'dist' is reshaped s.t. dist[i,j] = d(X[i],training_X[j])
     labels = numpy.matlib.repmat(self.training_y, len(X), 1)
     dist.setfield(labels, dtype=labels.dtype)
     # for each x in X, sort the training samples w.r.t the distance to x
     self.sorted_dist = numpy.sort(dist)
     self.sorted_labels = self.sorted_dist.getfield(labels.dtype)