Ejemplo n.º 1
0
def train_test(Xtrain, Ytrain, Xtest, la, R):
    if la == "logistic":
        lr = LogisticRegression(Xtrain, Ytrain, Xtest, 1.0 / R)
        lr.train()
        return 2 * lr.test_predictions() - 1
    if la == "ridge":
        wvec = ridge_regression(Xtrain, Ytrain, R)
    if la == "svm":
        wvec = simple_SVM(Xtrain, Ytrain, R)
    if la == "boosting":
        wvec = adaboost_feats(Xtrain, Ytrain, R)
    return dot(wvec, Xtest.T)
Ejemplo n.º 2
0
def train_test(Xtrain,Ytrain,Xtest,la,R):
    if la=="logistic":
        lr = LogisticRegression(Xtrain,Ytrain,Xtest,1.0/R)
        lr.train()
        return 2*lr.test_predictions()-1   
    if la=="ridge":
        wvec = ridge_regression(Xtrain,Ytrain,R)
    if la=="svm":
        wvec = simple_SVM(Xtrain,Ytrain,R)
    if la=="boosting":
        wvec = adaboost_feats(Xtrain,Ytrain,R)
    return dot(wvec,Xtest.T)
Ejemplo n.º 3
0
 def help_learn( self, train, test ):
     data = self.data
     hf = self.hf.get()
     mf = self.mf.get()
     la = self.learning_alg.get()
     #print (mf,hf),"FEATURES",self.X.shape
     tmpY = [(i in data.po)*2-1. for i in train]             
     if (mf == hf) or self.X.shape[1]!=1256:
         if data.tags!=[]:
             print "Learning with tags!"
             tmpX = hstack([2*data.Mtag[train]-1.,self.X[train]])
         else:
             tmpX = self.X[train]
         self.R=self.scale.get()
         print "Learning, R=",self.R, la
         if la=="Logistic regression":
             if data.tags!=[]:
                 tstX = hstack([2*data.Mtag[test]-1.,self.X[test]])
             else:
                 tstX = self.X[test]
             print "Training:",tmpX.shape[0]                
             lr = LogisticRegression(tmpX,tmpY,tstX,1.0/self.R)
             lr.train()
             print "Testing:",tstX.shape[0]
             u=lr.test_predictions()
         else:
             if la=="Linear SVM":
                 wvec = simple_SVM(tmpX,tmpY,self.R)
             else:
                 assert la=="Boosting"
                 wvec = adaboost_feats(tmpX,tmpY,self.R)
             print sorted(abs(wvec),reverse=True)[:10]
             if data.tags!=[]:
                 tmpX = hstack([2*data.Mtag[test]-1.,self.X[test]])
             else:
                 tmpX = self.X[test]
             u= dot(tmpX,wvec)
         assert nan not in u
     else:
         a = 255
         b = 1255
         if hf==1:
             print "Human Features"
             assert mf==0
             rang = range(0,a)
             rang.append(b) # the all 1's col
         else:
             print "Machine Features"
             assert mf==1
             rang = range(a,b+1)
         if data.tags!=[]:
             print "Learning with tags"
             tmpX = hstack([2*data.Mtag[train]-1.,self.X[train,:][:,rang]])
         else:
             tmpX = self.X[train,:][:,rang]
         self.R=self.scale.get()
         print "Learning, R=",self.R,la
         if la=="Logistic regression":
             if data.tags!=[]:
                 tstX = hstack([2*data.Mtag[test]-1.,self.X[test,:][:,rang]])
             else:
                 tstX = self.X[test,:][:,rang]
             print "Training:",tmpX.shape[0]
             lr = LogisticRegression(tmpX,tmpY,tstX,1.0/self.R)
             lr.train()
             print "Testing:",tstX.shape[0]
             u=lr.test_predictions()
         else:
             if la=="Linear SVM":
                 wvec = simple_SVM(tmpX,tmpY,self.R)
             else:
                 assert la=="Boosting"
                 wvec = adaboost_feats(tmpX,tmpY,self.R)
             print sorted(abs(wvec),reverse=True)[:10]
             if data.tags!=[]:
                 tmpX = hstack([2*data.Mtag[test]-1.,self.X[test,:][:,rang]])
             else:
                 tmpX = self.X[test][:,rang]
             u= dot(tmpX,wvec)
     return u
Ejemplo n.º 4
0
def learn(pos,neg,X,la="boosting",R=20):
    assert len(pos)>1 and len(neg)>1
    n = X.shape[0]
    lpos = [le2n(i) for i in pos]
    lneg = [le2n(i) for i in neg]
    Y = zeros(len(lpos)+len(lneg))
    for i in range(len(lpos)):
        Y[i]=1.
    for i in range(len(lneg)):
        Y[i+len(lpos)]=-1.
    y = {}
    unl = set(alpha())-set(pos)-set(neg)
    if len(unl)>0:
        if la=="svm":
            wvec = simple_SVM(X[lpos+lneg],Y,R)
            for i in unl:
                y[i]=dot(wvec,X[le2n(i)])
        if la=="boosting":
            wvec = adaboost_feats(X[lpos+lneg],Y,R)
            for i in unl:
                y[i]=dot(wvec,X[le2n(i)])
        if la=="logistic":
            lr = LogisticRegression(X[lpos+lneg],Y,X[[le2n(i) for i in unl]],1.0/R)
            lr.train()
            u=lr.test_predictions()
            i=0
            for a in unl:
                y[a] = 2*u[i]-1
                i+=1
                
    for i in pos:
        lpos2 = lpos[:]
        lpos2.remove(le2n(i))
        if la=="boosting":
            wvec = adaboost_feats(X[lpos2+lneg],Y[1:],R)
            y[i]=dot(wvec,X[le2n(i)])
        if la=="svm":
            wvec = simple_SVM(X[lpos2+lneg],Y[1:],R)
            y[i]=dot(wvec,X[le2n(i)])
        if la=="logistic":
            lr = LogisticRegression(X[lpos2+lneg],Y[1:],X[[le2n(i)]],1.0/R)
            lr.train()
            y[i]=2*lr.test_predictions()[0]-1

    for i in neg:
        lneg2 = lneg[:]
        lneg2.remove(le2n(i))
        if la=="svm":
            wvec = simple_SVM(X[lpos+lneg2],Y[:-1],R)
            y[i]=dot(wvec,X[le2n(i)])      
        if la=="boosting":
            wvec = adaboost_feats(X[lpos+lneg2],Y[:-1],R)
            y[i]=dot(wvec,X[le2n(i)])      
        if la=="logistic":
            lr = LogisticRegression(X[lpos+lneg2],Y[:-1],X[[le2n(i)]],1.0/R)
            lr.train()
            y[i]=2*lr.test_predictions()[0]-1

    errs = 0
    sqerr  = 0.0
    for i in pos:
        sqerr+=(y[i]-1)**2
        if y[i]<0:
            errs+=1.
    for i in neg:
        sqerr+=(y[i]+1)**2
        if y[i]>=0:
            errs+=1.
    m = len(pos)+len(neg)
    print "Err: ",errs/m,"\tSq err:",sqerr/m
    return (y,errs/m)
Ejemplo n.º 5
0
def learn(pos, neg, X, la="boosting", R=20):
    assert len(pos) > 1 and len(neg) > 1
    n = X.shape[0]
    lpos = [le2n(i) for i in pos]
    lneg = [le2n(i) for i in neg]
    Y = zeros(len(lpos) + len(lneg))
    for i in range(len(lpos)):
        Y[i] = 1.
    for i in range(len(lneg)):
        Y[i + len(lpos)] = -1.
    y = {}
    unl = set(alpha()) - set(pos) - set(neg)
    if len(unl) > 0:
        if la == "svm":
            wvec = simple_SVM(X[lpos + lneg], Y, R)
            for i in unl:
                y[i] = dot(wvec, X[le2n(i)])
        if la == "boosting":
            wvec = adaboost_feats(X[lpos + lneg], Y, R)
            for i in unl:
                y[i] = dot(wvec, X[le2n(i)])
        if la == "logistic":
            lr = LogisticRegression(X[lpos + lneg], Y,
                                    X[[le2n(i) for i in unl]], 1.0 / R)
            lr.train()
            u = lr.test_predictions()
            i = 0
            for a in unl:
                y[a] = 2 * u[i] - 1
                i += 1

    for i in pos:
        lpos2 = lpos[:]
        lpos2.remove(le2n(i))
        if la == "boosting":
            wvec = adaboost_feats(X[lpos2 + lneg], Y[1:], R)
            y[i] = dot(wvec, X[le2n(i)])
        if la == "svm":
            wvec = simple_SVM(X[lpos2 + lneg], Y[1:], R)
            y[i] = dot(wvec, X[le2n(i)])
        if la == "logistic":
            lr = LogisticRegression(X[lpos2 + lneg], Y[1:], X[[le2n(i)]],
                                    1.0 / R)
            lr.train()
            y[i] = 2 * lr.test_predictions()[0] - 1

    for i in neg:
        lneg2 = lneg[:]
        lneg2.remove(le2n(i))
        if la == "svm":
            wvec = simple_SVM(X[lpos + lneg2], Y[:-1], R)
            y[i] = dot(wvec, X[le2n(i)])
        if la == "boosting":
            wvec = adaboost_feats(X[lpos + lneg2], Y[:-1], R)
            y[i] = dot(wvec, X[le2n(i)])
        if la == "logistic":
            lr = LogisticRegression(X[lpos + lneg2], Y[:-1], X[[le2n(i)]],
                                    1.0 / R)
            lr.train()
            y[i] = 2 * lr.test_predictions()[0] - 1

    errs = 0
    sqerr = 0.0
    for i in pos:
        sqerr += (y[i] - 1)**2
        if y[i] < 0:
            errs += 1.
    for i in neg:
        sqerr += (y[i] + 1)**2
        if y[i] >= 0:
            errs += 1.
    m = len(pos) + len(neg)
    print "Err: ", errs / m, "\tSq err:", sqerr / m
    return (y, errs / m)
Ejemplo n.º 6
0
 def help_learn(self, train, test):
     data = self.data
     hf = self.hf.get()
     mf = self.mf.get()
     la = self.learning_alg.get()
     # print (mf,hf),"FEATURES",self.X.shape
     tmpY = [(i in data.po) * 2 - 1.0 for i in train]
     if (mf == hf) or self.X.shape[1] != 1256:
         if data.tags != []:
             print "Learning with tags!"
             tmpX = hstack([2 * data.Mtag[train] - 1.0, self.X[train]])
         else:
             tmpX = self.X[train]
         self.R = self.scale.get()
         print "Learning, R=", self.R, la
         if la == "Logistic regression":
             if data.tags != []:
                 tstX = hstack([2 * data.Mtag[test] - 1.0, self.X[test]])
             else:
                 tstX = self.X[test]
             print "Training:", tmpX.shape[0]
             lr = LogisticRegression(tmpX, tmpY, tstX, 1.0 / self.R)
             lr.train()
             print "Testing:", tstX.shape[0]
             u = lr.test_predictions()
         else:
             if la == "Linear SVM":
                 wvec = simple_SVM(tmpX, tmpY, self.R)
             else:
                 assert la == "Boosting"
                 wvec = adaboost_feats(tmpX, tmpY, self.R)
             print sorted(abs(wvec), reverse=True)[:10]
             if data.tags != []:
                 tmpX = hstack([2 * data.Mtag[test] - 1.0, self.X[test]])
             else:
                 tmpX = self.X[test]
             u = dot(tmpX, wvec)
         assert nan not in u
     else:
         a = 255
         b = 1255
         if hf == 1:
             print "Human Features"
             assert mf == 0
             rang = range(0, a)
             rang.append(b)  # the all 1's col
         else:
             print "Machine Features"
             assert mf == 1
             rang = range(a, b + 1)
         if data.tags != []:
             print "Learning with tags"
             tmpX = hstack([2 * data.Mtag[train] - 1.0, self.X[train, :][:, rang]])
         else:
             tmpX = self.X[train, :][:, rang]
         self.R = self.scale.get()
         print "Learning, R=", self.R, la
         if la == "Logistic regression":
             if data.tags != []:
                 tstX = hstack([2 * data.Mtag[test] - 1.0, self.X[test, :][:, rang]])
             else:
                 tstX = self.X[test, :][:, rang]
             print "Training:", tmpX.shape[0]
             lr = LogisticRegression(tmpX, tmpY, tstX, 1.0 / self.R)
             lr.train()
             print "Testing:", tstX.shape[0]
             u = lr.test_predictions()
         else:
             if la == "Linear SVM":
                 wvec = simple_SVM(tmpX, tmpY, self.R)
             else:
                 assert la == "Boosting"
                 wvec = adaboost_feats(tmpX, tmpY, self.R)
             print sorted(abs(wvec), reverse=True)[:10]
             if data.tags != []:
                 tmpX = hstack([2 * data.Mtag[test] - 1.0, self.X[test, :][:, rang]])
             else:
                 tmpX = self.X[test][:, rang]
             u = dot(tmpX, wvec)
     return u