def searchAndSave(searchString, filename): """ convenince function to search database and save results to a FASTA file. If a file of that name already exists then the whole process is skipped searchString - this is what is being searched for filename - name of the file where to save the results """ if not(path.isfile(filename)): sequences = [] """ set of ids returned as part of the protein """ ids = seq.searchSequences(searchString) print "Processing ", len(ids), " sequences ..." """ iterate over the ids and for each fetch the record from the database and append it to ex5_sequences """ for seq_id in ids: print "Fetching sequence: ", seq_id sequences.append(seq.getSequence(seq_id)) """ save the completed list of sequences to a FASTA file """ seq.writeFastaFile(filename, sequences) else: print filename, " exists. skipping."
seq2 = seq.Sequence('AAAAAGGGUG') print seq2.alphabet seq3 = seq.Sequence('AWAAAAAAGGVG') print seq3.alphabet #seq4 = seq.Sequence('Z') #print seq3.alphabet rns1 = seq.getSequence('RNS1_ARATH', 'uniprot') print rns1.count('S') # ex5_ids = seq.searchSequences("signal+peptide+AND+organism:Arabidopsis+thaliana[3702]+AND+length:[100+TO+*]") id6 = seq.searchSequences("Lipid+metabolism+AND+organism:3702+AND+fragment:no+AND+length:[100+TO+*]") print "ID5: ", ex5_ids.__len__(), " ID6: ", id6.__len__() to_be_written = [] ids = set(ex5_ids).intersection(set(id6)) print ids.__len__() for i in ids: pass #s = seq.getSequence(i) #to_be_written.append(s) #print s # iteritems??? #seq.writeFastaFile("results.fasta", to_be_written)