예제 #1
0
def main():
    # create a substitution matrix
    sub_matrix = SubstitutionMatrix('blosum50')
    
    # set up for alignment
    aligner = NWAlign(sub_matrix)
    print "Testing a simple alignment..."
    seq1 = "HEAGAWGHEE"
    seq2 = "PAWHEAE"
    
    aligner.align(seq1, seq2)
    
    align1, align2 = aligner.get_optimal_alignment()
    score = aligner.get_optimal_score()
    
    print "Alignment Score:", score
    print align1.data
    print align2.data
    
    print "Testing a more complex alignment..."
    test_file = "PEPCarboxylase.fasta"
    
    print "Getting sequences from the file PEPCarboxylase.fasta..."
    seq_list = []
    scanner = Fasta._Scanner()
    handler = FASTAHandler(seq_list)
    file = open(test_file, 'r')
    scanner.feed(file, handler)
    scanner.feed(file, handler)
    #print seq_list
    
    print "Aligning sequences..."
    aligner = NWAlign(sub_matrix)
    aligner.align(seq_list[0][0:150], seq_list[1][0:150])
    
    align1, align2 = aligner.get_optimal_alignment()
    score = aligner.get_optimal_score()
    
    print "Alignment Score:", score
    line_width = 25
    current_position = 0
    current_position = current_position + line_width
    # pretty print the alignment
    while current_position < len(align1):
        print ""
        print align1.data[current_position - line_width:current_position]
        print align2.data[current_position - line_width:current_position]
        current_position = current_position + line_width
        
    # print whatever is left
    print ""
    print align1.data[current_position - line_width:len(align1) - 1]
    print align2.data[current_position - line_width:len(align2) - 1]
예제 #2
0
def extract_organisms(file, num_records):
    scanner = Fasta._Scanner()
    consumer = SpeciesExtractor()

    file_to_parse = UndoHandle(open(file, "r"))

    for fasta_record in range(num_records):
        scanner.feed(file_to_parse, consumer)

    file_to_parse.close()

    return consumer.species_list