Ejemplo n.º 1
0
def main():

    args = parse_args()
    dihedrals = read_dihedrals()

    fasta = FASTA(args.fasta)
    fasta.read()
    (peptides, mhcSeq, mhcAllele) = totalNineMers(fasta)
    #grooves = readGrooves(args.grooves, mhcSeq, peptides)
    universalGrooves = universalGroove(args.grooves, mhcSeq, peptides)
    intersectGrooves = IntersectionGroove(args.grooves, mhcSeq, peptides)
    #for u in universalGrooves:
    #    print (u, universalGrooves[u])

    #for u in intersectGrooves:
    #    print (intersectGrooves[u])

    labels = read_rmsd_file(args.rms)
    pdbids = read_datafile(args.t)

    outputfilehandler = open(args.pdbids, 'w')

    for pdbid in pdbids:
        if pdbid in dihedrals:
            if args.pep:
                finalSeqCode = oneHotEncoding(peptides[pdbid])
                finalLabelCode = dihedrals[pdbid]
                if args.label == 'x':
                    print(', '.join(finalSeqCode))
                    outputfilehandler.write(pdbid + '\n')
                elif args.label == 'y':
                    print(', '.join(finalLabelCode))
                    outputfilehandler.write(pdbid + '\n')
            else:
                finalSeqCode = oneHotEncoding(universalGrooves[pdbid] +
                                              peptides[pdbid])
                finalLabelCode = dihedrals[pdbid]
                if args.label == 'x':
                    print(', '.join(finalSeqCode))
                    outputfilehandler.write(pdbid + '\n')
                elif args.label == 'y':
                    print(', '.join(finalLabelCode))
                    outputfilehandler.write(pdbid + '\n')

    outputfilehandler.close()
Ejemplo n.º 2
0
def read_fasta(args):

    fasta = FASTA(args.fasta)
    fasta.read()
    headers = fasta.get_headers()
    pep_chain = {}
    pep_seq = {}

    for header in headers:
        fields = header.split('|')
        pdbid = fields[0]
        chainid = fields[1]
        seq = fasta.get_sequence(header)

        if len(seq) == 9:
            pep_chain[pdbid] = chainid
            pep_seq[pdbid] = seq

    return (pep_chain, pep_seq)
Ejemplo n.º 3
0
def main():

    args = parse_args()

    fasta = FASTA(args.fasta)
    fasta.read()
    (peptides, mhcSeq, mhcAllele) = totalNineMers(fasta)
    #grooves = readGrooves(args.grooves, mhcSeq, peptides)
    universalGrooves = universalGroove(args.grooves, mhcSeq, peptides)
    intersectGrooves = IntersectionGroove(args.grooves, mhcSeq, peptides)
    labels = read_rmsd_file(args.rms)
    pdbids = read_datafile(args.t)

    aaindex = Aaindex()
    #for result in aaindex.search('charge'):
    #    print(result)

    record = aaindex.get('FASG890101')
    #print (record.title)
    index_data = record.index_data
    #print (index_data)

    charge = aaindex.get('KLEP840101')
    charge_data = charge.index_data
    #print (charge_data)

    for l in labels:
        (pdbid1, pdbid2) = l.split('_')
        #if pdbid1 in pdbids and pdbid2 in pdbids:
        if pdbid1 in pdbids or pdbid2 in pdbids:
            if args.pep:
                finalSeqCode, finalLabelCode = oneHotEncoding(peptides[pdbid1]+'|'+peptides[pdbid2], labels[l], index_data, charge_data)
                if args.label == 'x':
                    print (', '.join(finalSeqCode))
                elif args.label == 'y':
                    print (', '.join(finalLabelCode))
            else:
                finalSeqCode, finalLabelCode = oneHotEncoding(universalGrooves[pdbid1]+peptides[pdbid1]+'|'+universalGrooves[pdbid2]+peptides[pdbid2], labels[l], index_data, charge_data)
                if args.label == 'x':
                    print (', '.join(finalSeqCode))
                elif args.label == 'y':
                    print (', '.join(finalLabelCode))
Ejemplo n.º 4
0
def main():

    args = parse_args()

    fasta = FASTA(args.fasta)
    fasta.read()
    peptides = totalNineMers(fasta)
    pdbids = peptides.keys()
    testsetlen = int(args.percent * len(pdbids))

    trainset = []
    testset = []
    for i in range(0, len(pdbids)):
        r = random()
        if len(testset) < testsetlen and r < 0.5:
            testset.append(pdbids[i])
        else:
            trainset.append(pdbids[i])

    write_to_file('train/90_10/train.txt', trainset)
    write_to_file('test/90_10/test.txt', testset)
Ejemplo n.º 5
0
def main():

    args = parse_args()

    fasta = FASTA(args.fasta)
    fasta.read()
    peptides, alleles = totalNineMers(fasta)
    pdbids = peptides.keys()
    testsetlen = int(args.percent * len(pdbids))

    trainset = []
    testset = []
    for p in pdbids:
        r = random()
        if len(testset) < testsetlen and r < 0.5 and alleles[p] == 'A0201':
            testset.append(p)
        else:
            trainset.append(p)

    write_to_file('train.txt', trainset)
    write_to_file('test.txt', testset)
Ejemplo n.º 6
0
def main():

    args = parse_args()

    fasta = FASTA(args.fasta)
    fasta.read()
    (peptides, mhcSeq, mhcAllele) = totalNineMers(fasta)
    #grooves = readGrooves(args.grooves, mhcSeq, peptides)
    universalGrooves = universalGroove(args.grooves, mhcSeq, peptides)
    intersectGrooves = IntersectionGroove(args.grooves, mhcSeq, peptides)
    #for u in universalGrooves:
    #    print (u, universalGrooves[u])

    #for u in intersectGrooves:
    #    print (intersectGrooves[u])

    labels = read_rmsd_file(args.rms)
    pdbids = read_datafile(args.t)

    for l in labels:
        (pdbid1, pdbid2) = l.split('_')
        #if pdbid1 in pdbids and pdbid2 in pdbids:
        if pdbid1 in pdbids or pdbid2 in pdbids:
            if args.pep:
                finalSeqCode, finalLabelCode = oneHotEncoding(
                    peptides[pdbid1] + '|' + peptides[pdbid2], labels[l])
                if args.label == 'x':
                    print(', '.join(finalSeqCode))
                elif args.label == 'y':
                    print(', '.join(finalLabelCode))
            else:
                finalSeqCode, finalLabelCode = oneHotEncoding(
                    universalGrooves[pdbid1] + peptides[pdbid1] + '|' +
                    universalGrooves[pdbid2] + peptides[pdbid2], labels[l])
                if args.label == 'x':
                    print(', '.join(finalSeqCode))
                elif args.label == 'y':
                    print(', '.join(finalLabelCode))