def main(): args = parse_args() dihedrals = read_dihedrals() fasta = FASTA(args.fasta) fasta.read() (peptides, mhcSeq, mhcAllele) = totalNineMers(fasta) #grooves = readGrooves(args.grooves, mhcSeq, peptides) universalGrooves = universalGroove(args.grooves, mhcSeq, peptides) intersectGrooves = IntersectionGroove(args.grooves, mhcSeq, peptides) #for u in universalGrooves: # print (u, universalGrooves[u]) #for u in intersectGrooves: # print (intersectGrooves[u]) labels = read_rmsd_file(args.rms) pdbids = read_datafile(args.t) outputfilehandler = open(args.pdbids, 'w') for pdbid in pdbids: if pdbid in dihedrals: if args.pep: finalSeqCode = oneHotEncoding(peptides[pdbid]) finalLabelCode = dihedrals[pdbid] if args.label == 'x': print(', '.join(finalSeqCode)) outputfilehandler.write(pdbid + '\n') elif args.label == 'y': print(', '.join(finalLabelCode)) outputfilehandler.write(pdbid + '\n') else: finalSeqCode = oneHotEncoding(universalGrooves[pdbid] + peptides[pdbid]) finalLabelCode = dihedrals[pdbid] if args.label == 'x': print(', '.join(finalSeqCode)) outputfilehandler.write(pdbid + '\n') elif args.label == 'y': print(', '.join(finalLabelCode)) outputfilehandler.write(pdbid + '\n') outputfilehandler.close()
def read_fasta(args): fasta = FASTA(args.fasta) fasta.read() headers = fasta.get_headers() pep_chain = {} pep_seq = {} for header in headers: fields = header.split('|') pdbid = fields[0] chainid = fields[1] seq = fasta.get_sequence(header) if len(seq) == 9: pep_chain[pdbid] = chainid pep_seq[pdbid] = seq return (pep_chain, pep_seq)
def main(): args = parse_args() fasta = FASTA(args.fasta) fasta.read() (peptides, mhcSeq, mhcAllele) = totalNineMers(fasta) #grooves = readGrooves(args.grooves, mhcSeq, peptides) universalGrooves = universalGroove(args.grooves, mhcSeq, peptides) intersectGrooves = IntersectionGroove(args.grooves, mhcSeq, peptides) labels = read_rmsd_file(args.rms) pdbids = read_datafile(args.t) aaindex = Aaindex() #for result in aaindex.search('charge'): # print(result) record = aaindex.get('FASG890101') #print (record.title) index_data = record.index_data #print (index_data) charge = aaindex.get('KLEP840101') charge_data = charge.index_data #print (charge_data) for l in labels: (pdbid1, pdbid2) = l.split('_') #if pdbid1 in pdbids and pdbid2 in pdbids: if pdbid1 in pdbids or pdbid2 in pdbids: if args.pep: finalSeqCode, finalLabelCode = oneHotEncoding(peptides[pdbid1]+'|'+peptides[pdbid2], labels[l], index_data, charge_data) if args.label == 'x': print (', '.join(finalSeqCode)) elif args.label == 'y': print (', '.join(finalLabelCode)) else: finalSeqCode, finalLabelCode = oneHotEncoding(universalGrooves[pdbid1]+peptides[pdbid1]+'|'+universalGrooves[pdbid2]+peptides[pdbid2], labels[l], index_data, charge_data) if args.label == 'x': print (', '.join(finalSeqCode)) elif args.label == 'y': print (', '.join(finalLabelCode))
def main(): args = parse_args() fasta = FASTA(args.fasta) fasta.read() peptides = totalNineMers(fasta) pdbids = peptides.keys() testsetlen = int(args.percent * len(pdbids)) trainset = [] testset = [] for i in range(0, len(pdbids)): r = random() if len(testset) < testsetlen and r < 0.5: testset.append(pdbids[i]) else: trainset.append(pdbids[i]) write_to_file('train/90_10/train.txt', trainset) write_to_file('test/90_10/test.txt', testset)
def main(): args = parse_args() fasta = FASTA(args.fasta) fasta.read() peptides, alleles = totalNineMers(fasta) pdbids = peptides.keys() testsetlen = int(args.percent * len(pdbids)) trainset = [] testset = [] for p in pdbids: r = random() if len(testset) < testsetlen and r < 0.5 and alleles[p] == 'A0201': testset.append(p) else: trainset.append(p) write_to_file('train.txt', trainset) write_to_file('test.txt', testset)
def main(): args = parse_args() fasta = FASTA(args.fasta) fasta.read() (peptides, mhcSeq, mhcAllele) = totalNineMers(fasta) #grooves = readGrooves(args.grooves, mhcSeq, peptides) universalGrooves = universalGroove(args.grooves, mhcSeq, peptides) intersectGrooves = IntersectionGroove(args.grooves, mhcSeq, peptides) #for u in universalGrooves: # print (u, universalGrooves[u]) #for u in intersectGrooves: # print (intersectGrooves[u]) labels = read_rmsd_file(args.rms) pdbids = read_datafile(args.t) for l in labels: (pdbid1, pdbid2) = l.split('_') #if pdbid1 in pdbids and pdbid2 in pdbids: if pdbid1 in pdbids or pdbid2 in pdbids: if args.pep: finalSeqCode, finalLabelCode = oneHotEncoding( peptides[pdbid1] + '|' + peptides[pdbid2], labels[l]) if args.label == 'x': print(', '.join(finalSeqCode)) elif args.label == 'y': print(', '.join(finalLabelCode)) else: finalSeqCode, finalLabelCode = oneHotEncoding( universalGrooves[pdbid1] + peptides[pdbid1] + '|' + universalGrooves[pdbid2] + peptides[pdbid2], labels[l]) if args.label == 'x': print(', '.join(finalSeqCode)) elif args.label == 'y': print(', '.join(finalLabelCode))