''' Created on 30/03/2014 @author: jacekrad ''' import sequence as seq import util q6b_filename="q6b.fasta" util.searchAndSave("surface+protein+AND+organism:1280", q6b_filename) sequences = seq.readFastaFile(q6b_filename) print len(sequences), " total sequences" matched_sequences = [] for sequence in sequences: if "RAFKPS" in str(sequence.sequence): matched_sequences.append(sequence) """ print the final results """ print len(matched_sequences), " matched sequences:" for sequence in matched_sequences: print sequence
''' Created on 08/04/2014 @author: s4361277 ''' from sequence import * from alignments import AlignmentCollection, AlignmentThread import util p08684_filename="p08884.fasta" q16802_filename="q16802.fasta" # question 8 util.searchAndSave("P08684", p08684_filename) util.searchAndSave("Q16802", q16802_filename) matrix_names = ["blosum62.matrix", "p450.matrix"] # list of matrices though which we shall iterate to obtain various alignments matrices = [] for matrix_name in matrix_names: matrix = readSubstMatrix(matrix_name, Protein_Alphabet) print "Adding matrix: " + matrix_name + "\n", matrix matrices.append(matrix) # read our residue sequences from FASTAs P08684 = readFastaFile(p08684_filename)[0] Q16802 = readFastaFile(q16802_filename)[0] local_alignments = AlignmentCollection("question 10 local")
''' Created on 29/03/2014 @author: jacekrad ''' import sequence as seq import os.path as path import util """ filenames where we save the results from question 5 & 6 respectively """ ex5_filename = "sigpep_at.fa" ex6_filename = "lipmet_at.fa" """ for both questions 5 and 6 we check if the fasta file already exists and only do the searches and obtain sequences if false this way we can rerun the program quickly without rebuilding the fasta file each and every time """ util.searchAndSave("signal+peptide+AND+organism:Arabidopsis+thaliana[3702]+" + "AND+length:[100+TO+*]", ex5_filename) util.searchAndSave("Lipid+metabolism+AND+organism:3702+AND+fragment:no+" + "AND+length:[100+TO+*]", ex6_filename)