def check_range(start, end, protein): # Observe what positions of each protein in the gonnet alignment and the # bbtm alignment correspond to a given range in the pdb sequence. # Positions ocrresponding to gaps in the pdb sequence not shown. print('name then seq distance then bbtm segment then gonnet segment') pdb_seq_name = 'chaina_' + protein.lower() for name in bbtm_alignments[protein].keys(): print(name) print(matrices.compare(bbtm_alignments[protein][name], bbtm_alignments[protein][pdb_seq_name], id_matrix)) print(matrices.compare(gonnet_alignments[protein][name], gonnet_alignments[protein][pdb_seq_name], id_matrix)) print(bbtm_alignments[protein][name][start: end]) print(gonnet_alignments[protein][name][start:end]) print()
# Calculate the family moments, that is, the moments for all # sequences in the alignments family_moments = CIDict((pdbid, list()) for pdbid in alignments.keys()) for pdbid in family_moments.keys(): for seq_index in range(len(oracles[pdbid].get_alignment())): # Calculate the moment family_moment = moment(structure_dict[pdbid], resi_lists[pdbid], centers[pdbid], partial(calculator_adapter, calc), oracles[pdbid].sequence(seq_index)) # Calculate the %identity with the pdb sequence pdb_sequence = oracles[pdbid].get_pdb_seq_record().seq sequence = oracles[pdbid].get_alignment()[seq_index].seq normalized_distance = matrices.compare(pdb_sequence, sequence, identity) seq_id = oracles[pdbid].get_alignment()[seq_index].id family_moments[pdbid].append((seq_id, normalized_distance, family_moment)) # If this is the pdb sequence, then calculating the moment using # only structural information, without using the MSA, should give # the same result. If it doesn't, something went wrong! if 'chaina' in seq_id: no_seq_moment = moment(structure_dict[pdbid], resi_lists[pdbid], centers[pdbid], partial(no_seq_calculator_adapter, calc), None) assert (no_seq_moment == family_moment).all(), \