Beispiel #1
0
def test_pairwise_identity(sequences, mode):
    """
    Test correct calculation of `get_pairwise_sequence_identity()` via
    pairwise calls of `get_sequence_identity()`.
    """
    sequences = sequences
    msa, _, _, _ = align.align_multiple(
        sequences,
        matrix=align.SubstitutionMatrix.std_protein_matrix()
    )
    
    ref_identity_matrix = np.zeros((len(sequences), len(sequences)))
    for i in range(len(sequences)):
        for j in range(len(sequences)):
            ref_identity_matrix[i,j] = align.get_sequence_identity(
                msa[:, [i,j]], mode=mode
            )
    
    test_identity_matrix = align.get_pairwise_sequence_identity(msa, mode=mode)
    
    # Identity of two equal sequences should be 1, if only the length of
    # the sequence is counted
    if mode == "shortest":
        assert (np.diag(test_identity_matrix) == 1).all()
    # Identity must be between 0 and 1
    assert ((test_identity_matrix <= 1) & (test_identity_matrix >= 0)).all()
    # Identity matrix is symmetric
    assert (test_identity_matrix == test_identity_matrix.T).all()
    # Pairwise identity must be equal in the two functions
    assert (test_identity_matrix == ref_identity_matrix).all()
Beispiel #2
0
            ids.append(ncbi_id)

# Download sequences a file-like object and read the sequences from it
fasta_file = fasta.FastaFile.read(
    entrez.fetch_single_file(ids,
                             file_name=None,
                             db_name="protein",
                             ret_type="fasta"))
sequences = [seq.ProteinSequence(seq_str) for seq_str in fasta_file.values()]
# Create multiple sequence alignment with Clustal Omega
alignment = clustalo.ClustalOmegaApp.align(sequences)

# The distance measure required for the tree calculation is the
# percentage of non-identical amino acids in the respective two
# sequences
distances = 1 - align.get_pairwise_sequence_identity(alignment,
                                                     mode="shortest")
# Create tree via neighbor joining
tree = phylo.neighbor_joining(distances)
# Convert to NetworkX graph
#For the graph visualization, the edge directions are unnecessary
graph = tree.as_graph().to_undirected()

fig = plt.figure(figsize=(8.0, 8.0))
ax = fig.gca()
ax.axis("off")
# Calculate position of nodes in the plot
pos = nx.kamada_kawai_layout(graph)
# Assign the gene names to the nodes that represent a reference index
node_labels = {i: name for i, name in enumerate(genes)}
nx.draw_networkx(
    graph,