inputfile1 = open (sys.argv[1], 'r') # Database from dbPTM inputfile2 = open (sys.argv[2], 'r') # Uniprort DB outputfile = open (sys.argv[3], 'w') # output dic ={} for num2, y in enumerate(SeqIO.parse(inputfile2,'fasta')): # make dictionary for fasta file hd = y.id seq = str(y.seq) hd_ls = hd.split("|") acc1 = hd_ls[1].strip() dic[acc1] = seq oneper = progress_counter.linenum(inputfile1,'nonfasta') for num1, x in enumerate(inputfile1): progress_counter.progress(num1,oneper) if num1 > 0: x_ls = x.split('\t') acc = x_ls[1].strip() pos = int(x_ls[2].strip()) residue = x_ls[6].strip() pro_seq = dic.get(acc) if pro_seq != None: if pos < 21: pep40 = pro_seq[:(pos -1) + 21] elif len(pro_seq) - pos < 20: pep40 = pro_seq[(pos - 1) -20 : len(pro_seq) -1] else: pep40 = pro_seq[(pos-1) -20 : (pos -1) + 21]
#!/usr/bin/python import sys from Bio import SeqIO import progress_counter import re inputfile1 = open (sys.argv[1], 'r') # input db outputfile = open (sys.argv[2], 'w') # output db for num1, x in enumerate(SeqIO.parse(inputfile1,"fasta")): # RefSeq fasta progress_counter.progress(inputfile1,num1,"fasta") # inputfile name, line number, fasta or nonfasta header = x.description seq = str(x.seq) strain = re.findall("\[(.*)\]",header) if "Saccharomyces cerevisiae" in strain[0]: # Define strain name outputfile.write('>' + header + "\n" + seq + "\n")