def _get_aln_diff(self, blast_result_files): file_handlers = FileHandlers() aln_diff = [] for result_file in blast_result_files: data = self._open_results_file(result_file) cleaned_data = [] for i in range(len(data)): fields = data[i].split(' ') cleaned = file_handlers.clean(fields) while cleaned.count('') > 0: cleaned.remove('') cleaned_data.append(cleaned) #for line in cleaned_data: gene_start = '' pdb_start = '' while gene_start == '': for line in cleaned_data: if ( len(line) == 4 and line[0] == 'Query' ): gene_start = int(line[1]) break else: pass while pdb_start == '': for line in cleaned_data: if ( len(line) == 4 and line[0] == 'Sbjct' ): pdb_start = int(line[1]) break else: pass seq_name = result_file.split('_')[0] + '_' + result_file.split('_')[3] + '_' + result_file.split('_')[4] start_site_difference = gene_start - pdb_start aln_diff.append((seq_name, start_site_difference)) #print aln_diff return aln_diff