def main(): # create a substitution matrix sub_matrix = SubstitutionMatrix('blosum50') # set up for alignment aligner = NWAlign(sub_matrix) print "Testing a simple alignment..." seq1 = "HEAGAWGHEE" seq2 = "PAWHEAE" aligner.align(seq1, seq2) align1, align2 = aligner.get_optimal_alignment() score = aligner.get_optimal_score() print "Alignment Score:", score print align1.data print align2.data print "Testing a more complex alignment..." test_file = "PEPCarboxylase.fasta" print "Getting sequences from the file PEPCarboxylase.fasta..." seq_list = [] scanner = Fasta._Scanner() handler = FASTAHandler(seq_list) file = open(test_file, 'r') scanner.feed(file, handler) scanner.feed(file, handler) #print seq_list print "Aligning sequences..." aligner = NWAlign(sub_matrix) aligner.align(seq_list[0][0:150], seq_list[1][0:150]) align1, align2 = aligner.get_optimal_alignment() score = aligner.get_optimal_score() print "Alignment Score:", score line_width = 25 current_position = 0 current_position = current_position + line_width # pretty print the alignment while current_position < len(align1): print "" print align1.data[current_position - line_width:current_position] print align2.data[current_position - line_width:current_position] current_position = current_position + line_width # print whatever is left print "" print align1.data[current_position - line_width:len(align1) - 1] print align2.data[current_position - line_width:len(align2) - 1]
def extract_organisms(file, num_records): scanner = Fasta._Scanner() consumer = SpeciesExtractor() file_to_parse = UndoHandle(open(file, "r")) for fasta_record in range(num_records): scanner.feed(file_to_parse, consumer) file_to_parse.close() return consumer.species_list