Esempio n. 1
0
def make_SVM_1(sw, dssp, x, dataset):
    inputs_train = []
    outputs_train = []

    #read dataset file
    f = open(dataset, 'r')
    
    for i in range(x):
        desc = f.readline().strip()
        primlen = int(desc.split('#')[1])
        prim = []
        
        for j in range(primlen):
            prim.append(f.readline().strip())
        sec = f.readline().strip()
        
        #prim = prim[-2:]
        
        primx = inOutFunctions.merge_sequences(prim)
        ins, outs = inOutFunctions.prepare_input_forX(sec, primx, sw, dssp)
        
        for q in ins:
            inputs_train.append(q)
        for q in outs:
            outputs_train.append(q)
    
    f.close()

    clf = svm.SVC(C=2.5, gamma=0.05)
    clf.fit(inputs_train, outputs_train)
    
    return clf
Esempio n. 2
0
def test_SMV_1(sw, dssp, w, clfx, z):
    f = open('rs126.fa', 'r')

    cq = 0
    cqp = 0
    
    for i in range(z+w):
        desc = f.readline().strip()
        primlen = int(desc.split('#')[1])
        prim = []
        
        for j in range(primlen):
            prim.append(f.readline().strip())
        sec = f.readline().strip()
        
        #prim = prim[-2:]

        if i >= z:
            primx = inOutFunctions.merge_sequences(prim)
            ins, outs = inOutFunctions.prepare_input_forX(sec, primx, sw, dssp)

            pred = inOutFunctions.display_result(clfx.predict(np.array(ins, np.float32)), {0:'X', 1:dssp})
            print sec
            print pred
            print "\n"
            cq += measurePrediction.calcQ(pred, sec, dssp)
            cqp += measurePrediction.calcQpred(pred, sec, dssp)

    return (cq/w, cqp/w)
Esempio n. 3
0
def make_SVM_2(sw, dataset, groups, without, struct):
    inputs_train = []
    outputs_train = []
    
    protCodes = []
    for i in range(len(groups)):
        if i == without:
            continue
        else:
            for s in groups[i]:
                protCodes.append(s)

    for p in protCodes:
        sec = dataset[p]['sec']
        
        prim = dataset[p]['prim']
        
        #prim = prim[-2:]
        
        primx = inOutFunctions.merge_sequences(prim)
        ins, outs = inOutFunctions.prepare_input_forX(sec, primx, sw, struct)
        
        for q in ins:
            inputs_train.append(q)
        for q in outs:
            outputs_train.append(q)

    clf = svm.SVC(C=1.5, gamma=0.1)
    clf.fit(inputs_train, outputs_train)

    return clf
Esempio n. 4
0
def test_SMV_2(clf, sw, dataset, groups, without, dssp):
    cq = 0
    cqp = 0
    cc = 0
    sov = 0
    q = 18
    qq = 0
    
    protCodes = []
    for s in groups[without]:
        protCodes.append(s)

    for p in protCodes:
        
        sec = dataset[p]['sec']
        
        prim = dataset[p]['prim']
        
        #prim = prim[-2:]

        primx = inOutFunctions.merge_sequences(prim)
    
        ins, outs = inOutFunctions.prepare_input_forX(sec, primx, sw, dssp)

        pred = inOutFunctions.display_result(clf.predict(np.array(ins, np.float32)), {0:'X', 1:dssp})
#        print sec
#        print pred
#        print "\n"
        cq += measurePrediction.calcQ(pred, sec, dssp)
        cqp += measurePrediction.calcQpred(pred, sec, dssp)
        cc += measurePrediction.calcC(pred, sec, dssp)
        sovx = measurePrediction.calcSOV(pred, sec, dssp)
        
        if sovx != None:
            sov += sovx
            qq += 1

    return (cq/q, cqp/q, cc/q, sov/qq)