Ejemplo n.º 1
0
def make_NN(sw, dataset, groups, without):
    ann = create_ann(2, sw, len(codes.aac))
    
    inputs_train = []
    outputs_train = []
    
    protCodes = []
    for i in range(len(groups)):
        if i == without:
            continue
        else:
            for s in groups[i]:
                protCodes.append(s)

    for p in protCodes:
        sec = dataset[p]['sec']
        
        prim = dataset[p]['prim']
        
        #prim = prim[-2:]
        
        primx = inOutFunctions.merge_sequences(prim)
        ins, outs = inOutFunctions.convert_inputNN(sec, primx, sw)
        
        for q in ins:
            inputs_train.append(q)
        for q in outs:
            outputs_train.append(q)

    ann = train_ann(ann, inputs_train, outputs_train)
    
    return ann
Ejemplo n.º 2
0
def make_NN2(ann, sw, dataset, groups, without):
    ann2 = create_ann(2, sw, 3)
    
    inputs_train = []
    outputs_train = []
    
    protCodes = []
    for s in groups[without]:
        protCodes.append(s)

    for p in protCodes:
        
        sec = dataset[p]['sec']
        
        prim = dataset[p]['prim']
        
        #prim = prim[-2:]
        prim = inOutFunctions.merge_sequences(prim)
        ins, outs = inOutFunctions.convert_inputNN(sec, prim, sw)
        pred = ann.predict(np.array(ins, np.float32))
        
        ins, outs = inOutFunctions.convert_inputNN2(sec, pred, sw)
        
        for q in ins:
            inputs_train.append(q)
        for q in outs:
            outputs_train.append(q)

    ann2 = train_ann(ann2, inputs_train, outputs_train)
    
    return ann2
Ejemplo n.º 3
0
def make_SVM_1(sw, dssp, x, dataset):
    inputs_train = []
    outputs_train = []

    #read dataset file
    f = open(dataset, 'r')
    
    for i in range(x):
        desc = f.readline().strip()
        primlen = int(desc.split('#')[1])
        prim = []
        
        for j in range(primlen):
            prim.append(f.readline().strip())
        sec = f.readline().strip()
        
        #prim = prim[-2:]
        
        primx = inOutFunctions.merge_sequences(prim)
        ins, outs = inOutFunctions.prepare_input_forX(sec, primx, sw, dssp)
        
        for q in ins:
            inputs_train.append(q)
        for q in outs:
            outputs_train.append(q)
    
    f.close()

    clf = svm.SVC(C=2.5, gamma=0.05)
    clf.fit(inputs_train, outputs_train)
    
    return clf
Ejemplo n.º 4
0
def test_NN(ann, sw, dataset, groups, without):
    sum = 0 
    q3 = 0
    qh = 0
    qhp = 0
    qe = 0
    qep = 0
    qc = 0
    qcp = 0
    sovh, zh = 0, 0.01
    sove, ze = 0, 0.01
    sovc, zc = 0, 0.01
    z = 126/7

    protCodes = []
    for s in groups[without]:
        protCodes.append(s)

    for p in protCodes:
        
        sec = dataset[p]['sec']
        
        prim = dataset[p]['prim']
        
        #prim = prim[-2:]
        prim = inOutFunctions.merge_sequences(prim)
        ins, outs = inOutFunctions.convert_inputNN(sec, prim, sw)
        pred = ann.predict(np.array(ins, np.float32))
        
        predx = []
        for p in pred:
            predx.append(winner(p))
        pred = inOutFunctions.display_result(predx, codes.alphabeth)

        sum += measurePrediction.compare(pred, sec)
        
        q3 += measurePrediction.calcQ3(pred, sec)
        
        qh += measurePrediction.calcQ(pred, sec, 'H')
        qhp += measurePrediction.calcQpred(pred, sec, 'H')
        qe += measurePrediction.calcQ(pred, sec, 'E')
        qep += measurePrediction.calcQpred(pred, sec, 'E')
        qc += measurePrediction.calcQ(pred, sec, 'C')
        qcp += measurePrediction.calcQpred(pred, sec, 'C')
        
        _sovh = measurePrediction.calcSOV(pred, sec, 'H')
        _sove = measurePrediction.calcSOV(pred, sec, 'E')
        _sovc = measurePrediction.calcSOV(pred, sec, 'C')
        
        if _sovh != None:
            sovh += _sovh
            zh += 1
        if _sove != None:
            sove += _sove
            ze += 1
        if _sovc != None:
            sovc += _sovc
            zc += 1
    
    return (q3/z, qh/z, qhp/z, qe/z, qep/z, qc/z, qcp/z, sovh/zh, sove/ze, sovc/zc)
Ejemplo n.º 5
0
def test_SMV_1(sw, dssp, w, clfx, z):
    f = open('rs126.fa', 'r')

    cq = 0
    cqp = 0
    
    for i in range(z+w):
        desc = f.readline().strip()
        primlen = int(desc.split('#')[1])
        prim = []
        
        for j in range(primlen):
            prim.append(f.readline().strip())
        sec = f.readline().strip()
        
        #prim = prim[-2:]

        if i >= z:
            primx = inOutFunctions.merge_sequences(prim)
            ins, outs = inOutFunctions.prepare_input_forX(sec, primx, sw, dssp)

            pred = inOutFunctions.display_result(clfx.predict(np.array(ins, np.float32)), {0:'X', 1:dssp})
            print sec
            print pred
            print "\n"
            cq += measurePrediction.calcQ(pred, sec, dssp)
            cqp += measurePrediction.calcQpred(pred, sec, dssp)

    return (cq/w, cqp/w)
Ejemplo n.º 6
0
def make_SVM_3X(sw, dataset, groups, without):
    inputs_train = []
    outputs_train = []
    
    protCodes = []
    for i in range(len(groups)):
        if i == without:
            continue
        else:
            for s in groups[i]:
                protCodes.append(s)

    for p in protCodes:
        sec = dataset[p]['sec']
        
        prim = dataset[p]['prim']
        
        prim = prim[-2:]
        
        primx = inOutFunctions.merge_sequences(prim)
        ins, outs = inOutFunctions.convert_inputX(sec, primx, sw)
        
        for q in ins:
            inputs_train.append(q)
        for q in outs:
            outputs_train.append(q)

    clf = svm.SVC(C=1.5, gamma=0.1)
    clf.fit(inputs_train, outputs_train)
    
    return clf
Ejemplo n.º 7
0
def test_SVM_3(clf, z, x, sw, filename):
    f = open(filename, 'r') 
    q3 = 0
    qh = 0
    qhp = 0
    qe = 0
    qep = 0
    qc = 0
    qcp = 0
    sovh, zh = 0, 0.01
    sove, ze = 0, 0.01
    sovc, zc = 0, 0.01

    for i in range(x+z):
        desc = f.readline().strip()
        primlen = int(desc.split('#')[1])
        prim = []
        
        for j in range(primlen):
            prim.append(f.readline().strip())
        sec = f.readline().strip()
        
        #prim = prim[-2:]

        if i >= x:
            primx = inOutFunctions.merge_sequences(prim)
            ins, outs = inOutFunctions.convert_inputX(sec, primx, sw)
            pred = inOutFunctions.display_result(clf.predict(np.array(ins, np.float32)), codes.alphabeth)
            
            q3 += measurePrediction.calcQ3(pred, sec)
            
            qh += measurePrediction.calcQ(pred, sec, 'H')
            qhp += measurePrediction.calcQpred(pred, sec, 'H')
            qe += measurePrediction.calcQ(pred, sec, 'E')
            qep += measurePrediction.calcQpred(pred, sec, 'E')
            qc += measurePrediction.calcQ(pred, sec, 'C')
            qcp += measurePrediction.calcQpred(pred, sec, 'C')
            
            _sovh = measurePrediction.calcSOV(pred, sec, 'H')
            _sove = measurePrediction.calcSOV(pred, sec, 'E')
            _sovc = measurePrediction.calcSOV(pred, sec, 'C')
            
            #print sec
            #print pred
            
            if _sovh != None:
                sovh += _sovh
                zh += 1
            if _sove != None:
                sove += _sove
                ze += 1
            if _sovc != None:
                sovc += _sovc
                zc += 1
    
    f.close()

    return (q3/z, qh/z, qhp/z, qe/z, qep/z, qc/z, qcp/z, sovh/zh, sove/ze, sovc/zc)
Ejemplo n.º 8
0
def test_SVM_3X(clf, sw, dataset, groups, without):
    q3 = 0
    qh = 0
    qhp = 0
    qe = 0
    qep = 0
    qc = 0
    qcp = 0
    sovh, zh = 0, 0.01
    sove, ze = 0, 0.01
    sovc, zc = 0, 0.01
    
    protCodes = []
    for s in groups[without]:
        protCodes.append(s)

    for p in protCodes:
        
        sec = dataset[p]['sec']
        
        prim = dataset[p]['prim']
        
        #prim = prim[-2:]

        primx = inOutFunctions.merge_sequences(prim)
        ins, outs = inOutFunctions.convert_inputX(sec, primx, sw)
        pred = inOutFunctions.display_result(clf.predict(np.array(ins, np.float32)), codes.alphabeth)
        
        q3 += measurePrediction.calcQ3(pred, sec)
        
        qh += measurePrediction.calcQ(pred, sec, 'H')
        qhp += measurePrediction.calcQpred(pred, sec, 'H')
        qe += measurePrediction.calcQ(pred, sec, 'E')
        qep += measurePrediction.calcQpred(pred, sec, 'E')
        qc += measurePrediction.calcQ(pred, sec, 'C')
        qcp += measurePrediction.calcQpred(pred, sec, 'C')
        
        _sovh = measurePrediction.calcSOV(pred, sec, 'H')
        _sove = measurePrediction.calcSOV(pred, sec, 'E')
        _sovc = measurePrediction.calcSOV(pred, sec, 'C')
        
        
        if _sovh != None:
            sovh += _sovh
            zh += 1
        if _sove != None:
            sove += _sove
            ze += 1
        if _sovc != None:
            sovc += _sovc
            zc += 1
    
    z = len(groups[without])

    return (q3/z, qh/z, qhp/z, qe/z, qep/z, qc/z, qcp/z, sovh/zh, sove/ze, sovc/zc)
Ejemplo n.º 9
0
def test_SMV_2(clf, sw, dataset, groups, without, dssp):
    cq = 0
    cqp = 0
    cc = 0
    sov = 0
    q = 18
    qq = 0
    
    protCodes = []
    for s in groups[without]:
        protCodes.append(s)

    for p in protCodes:
        
        sec = dataset[p]['sec']
        
        prim = dataset[p]['prim']
        
        #prim = prim[-2:]

        primx = inOutFunctions.merge_sequences(prim)
    
        ins, outs = inOutFunctions.prepare_input_forX(sec, primx, sw, dssp)

        pred = inOutFunctions.display_result(clf.predict(np.array(ins, np.float32)), {0:'X', 1:dssp})
#        print sec
#        print pred
#        print "\n"
        cq += measurePrediction.calcQ(pred, sec, dssp)
        cqp += measurePrediction.calcQpred(pred, sec, dssp)
        cc += measurePrediction.calcC(pred, sec, dssp)
        sovx = measurePrediction.calcSOV(pred, sec, dssp)
        
        if sovx != None:
            sov += sovx
            qq += 1

    return (cq/q, cqp/q, cc/q, sov/qq)