from Bio.Phylo import CDAO from ._cdao_owl import cdao_elements, cdao_namespaces, resolve_uri import os class CDAOError(Exception): """Exception raised when CDAO object construction cannot continue (DEPRECATED).""" pass try: import rdflib rdfver = rdflib.__version__ if rdfver[0] in ["1", "2"] or (rdfver in ["3.0.0", "3.1.0", "3.2.0"]): raise MissingPythonDependencyError( 'Support for CDAO tree format requires RDFlib v3.2.1 or later.') except ImportError: raise MissingPythonDependencyError( 'Support for CDAO tree format requires RDFlib.') RDF_NAMESPACES = { 'owl': 'http://www.w3.org/2002/07/owl#', 'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#', 'rdfs': 'http://www.w3.org/2000/01/rdf-schema#', } RDF_NAMESPACES.update(cdao_namespaces) # pad node ids with zeroes until they're at least this length ZEROES = 8 def qUri(x):
# Copyright 2019 by Robert T. Miller. All rights reserved. # This file is part of the Biopython distribution and governed by your # choice of the "Biopython License Agreement" or the "BSD 3-Clause License". # Please see the LICENSE file that should have been included as part of this # package. """PICIO: read and write Protein Internal Coordinate (.pic) data files.""" import re try: import numpy except ImportError: from Bio import MissingPythonDependencyError raise MissingPythonDependencyError( "Install NumPy to build proteins from internal coordinates.") from Bio.File import as_handle from Bio.PDB.StructureBuilder import StructureBuilder from Bio.PDB.parse_pdb_header import _parse_pdb_header_list from Bio.PDB.PDBExceptions import PDBException from Bio.PDB.internal_coords import IC_Residue, IC_Chain, Edron, AtomKey from typing import Dict, TextIO from Bio.PDB.Structure import Structure def read_PIC(file: TextIO, verbose: bool = False) -> Structure: """Load Protein Internal Coordinate (.pic) data from file.
The Bio.Cluster provides commonly used clustering algorithms and was designed with the application to gene expression data in mind. However, this module can also be used for cluster analysis of other types of data. Bio.Cluster and the underlying C Clustering Library is described in M. de Hoon et al. (2004) https://doi.org/10.1093/bioinformatics/bth078 """ import numbers try: import numpy except ImportError: from Bio import MissingPythonDependencyError raise MissingPythonDependencyError( "Please install numpy if you want to use Bio.Cluster. " "See http://www.numpy.org/") from . import _cluster __all__ = ('Node', 'Tree', 'kcluster', 'kmedoids', 'treecluster', 'somcluster', 'clusterdistance', 'clustercentroids', 'distancematrix', 'pca', 'Record', 'read') __version__ = _cluster.version() class Node(_cluster.Node): __doc__ = _cluster.Node.__doc__
Functions: - train - Train a new naive Bayes classifier. - calculate - Calculate the probabilities of each class, given an observation. - classify - Classify an observation into a class. """ try: import numpy except ImportError: from Bio import MissingPythonDependencyError raise MissingPythonDependencyError( "Install NumPy if you want to use Bio.MaxEntropy." ) def _contents(items): """Return a dictionary where the key is the item and the value is the probablity associated (PRIVATE).""" term = 1.0 / len(items) counts = {} for item in items: counts[item] = counts.get(item, 0) + term return counts class NaiveBayes: """Hold information for a NaiveBayes classifier.
import unittest import math from Bio._py3k import zip from Bio._py3k import range # Do we have ReportLab? Raise error if not present. from Bio import MissingPythonDependencyError try: from reportlab.lib import colors from reportlab.pdfbase import pdfmetrics from reportlab.pdfbase.ttfonts import TTFont from reportlab.lib.units import cm except ImportError: raise MissingPythonDependencyError( "Install reportlab if you want to use Bio.Graphics.") try: import Image from reportlab.graphics import renderPM except ImportError: #This is an optional part of ReportLab, so may not be installed. #We'll raise a missing dependency error if rendering to a #bitmap format is attempted. renderPM=None # Biopython core from Bio import SeqIO from Bio.SeqFeature import SeqFeature, FeatureLocation from Bio import SeqUtils
# Copyright 2017 by Maximilian Greil. All rights reserved. # This code is part of the Biopython distribution and governed by its # license. Please see the LICENSE file that should have been included # as part of this package. import unittest try: from numpy import array from numpy import dot # missing in old PyPy's micronumpy from numpy import around from numpy import array_equal except ImportError: from Bio import MissingPythonDependencyError raise MissingPythonDependencyError( "Install NumPy if you want to use Bio.QCPSuperimposer.") try: from Bio.PDB.QCPSuperimposer import QCPSuperimposer except ImportError: from Bio import MissingExternalDependencyError raise MissingExternalDependencyError( "C module in Bio.QCPSuperimposer not compiled") class QCPSuperimposerTest(unittest.TestCase): def setUp(self): self.x = array([[51.65, -1.90, 50.07], [50.40, -1.23, 50.65], [50.68, -0.04, 51.54], [50.22, -0.02, 52.85]]) self.y = array([[51.30, -2.99, 46.54], [51.09, -1.88, 47.58],
... ''' ... >>> CreatePeople(connection=db) CreatePeople(message=Success) """ from __future__ import print_function import sys from Bio import MissingPythonDependencyError try: import MySQLdb except: raise MissingPythonDependencyError("Install MySQLdb if you want to use " "Bio.DocSQL.") connection = None class NoInsertionError(Exception): pass def _check_is_public(name): if name[:6] == "_names": raise AttributeError class QueryRow(list): def __init__(self, cursor):
from Bio import NaiveBayes # Importing NaiveBayes will itself raise MissingPythonDependencyError # if NumPy is unavailable. import numpy try: hash(numpy.float64(123.456)) except TypeError: # Due to a bug in NumPy 1.12.1, this is unhashable under # PyPy3.5 v5.7 beta - it has been fixed in NumPy from Bio import MissingPythonDependencyError raise MissingPythonDependencyError( "Please update NumPy if you want to use Bio.NaiveBayes " "(under this version numpy.float64 is unhashable).") from None del numpy class CarTest(unittest.TestCase): def test_car_data(self): """Simple example using car data.""" # Car data from example 'Naive Bayes Classifier example' # by Eric Meisner November 22, 2003 # http://www.inf.u-szeged.hu/~ormandi/teaching/mi2/02-naiveBayes-example.pdf xcar = [ ["Red", "Sports", "Domestic"], ["Red", "Sports", "Domestic"], ["Red", "Sports", "Domestic"], ["Yellow", "Sports", "Domestic"],
def draw_graphviz(tree, label_func=str, prog="twopi", args="", node_color="#c0deff", **kwargs): """Display a tree or clade as a graph, using the graphviz engine. Requires NetworkX, matplotlib, Graphviz and either PyGraphviz or pydot. The third and fourth parameters apply to Graphviz, and the remaining arbitrary keyword arguments are passed directly to networkx.draw(), which in turn mostly wraps matplotlib/pylab. See the documentation for Graphviz and networkx for detailed explanations. The NetworkX/matplotlib parameters are described in the docstrings for networkx.draw() and pylab.scatter(), but the most reasonable options to try are: *alpha, node_color, node_size, node_shape, edge_color, style, font_size, font_color, font_weight, font_family* :Parameters: label_func : callable A function to extract a label from a node. By default this is str(), but you can use a different function to select another string associated with each node. If this function returns None for a node, no label will be shown for that node. The label will also be silently skipped if the throws an exception related to ordinary attribute access (LookupError, AttributeError, ValueError); all other exception types will still be raised. This means you can use a lambda expression that simply attempts to look up the desired value without checking if the intermediate attributes are available:: from Bio import Phylo, AlignIO from Bio.Phylo.TreeConstruction import DistanceCalculator, DistanceTreeConstructor constructor = DistanceTreeConstructor() aln = AlignIO.read(open('TreeConstruction/msa.phy'), 'phylip') calculator = DistanceCalculator('identity') dm = calculator.get_distance(aln) tree = constructor.upgma(dm) Phylo.draw_graphviz(tree, lambda n: n.taxonomies[0].code) prog : string The Graphviz program to use when rendering the graph. 'twopi' behaves the best for large graphs, reliably avoiding crossing edges, but for moderate graphs 'neato' looks a bit nicer. For small directed graphs, 'dot' may produce a normal-looking cladogram, but will cross and distort edges in larger graphs. (The programs 'circo' and 'fdp' are not recommended.) args : string Options passed to the external graphviz program. Normally not needed, but offered here for completeness. Examples -------- Load a PhyloXML format tree, and draw a PNG using GraphViz:: import pylab from Bio import Phylo tree = Phylo.read('PhyloXML/apaf.xml', 'phyloxml') Phylo.draw_graphviz(tree) pylab.show() pylab.savefig('apaf.png') """ # Deprecated in Biopython 1.70 (#1247) import warnings from Bio import BiopythonDeprecationWarning warnings.warn("draw_graphviz is deprecated; use Bio.Phylo.draw instead", BiopythonDeprecationWarning) try: import networkx except ImportError: raise MissingPythonDependencyError( "Install NetworkX if you want to use to_networkx.") G = to_networkx(tree) try: # NetworkX version 1.8 or later (2013-01-20) Gi = networkx.convert_node_labels_to_integers(G, label_attribute="label") int_labels = {} for integer, nodeattrs in Gi.node.items(): int_labels[nodeattrs["label"]] = integer except TypeError: # Older NetworkX versions (before 1.8) Gi = networkx.convert_node_labels_to_integers(G, discard_old_labels=False) int_labels = Gi.node_labels try: if hasattr(networkx, "graphviz_layout"): # networkx versions before 1.11 (#1247) graphviz_layout = networkx.graphviz_layout else: # networkx version 1.11 graphviz_layout = networkx.drawing.nx_agraph.graphviz_layout posi = graphviz_layout(Gi, prog, args=args) except ImportError: raise MissingPythonDependencyError( "Install PyGraphviz or pydot if you want to use draw_graphviz.") def get_label_mapping(G, selection): """Apply the user-specified node relabeling.""" for node in G.nodes(): if (selection is None) or (node in selection): try: label = label_func(node) if label not in (None, node.__class__.__name__): yield (node, label) except (LookupError, AttributeError, ValueError): pass if "nodelist" in kwargs: labels = dict(get_label_mapping(G, set(kwargs["nodelist"]))) else: labels = dict(get_label_mapping(G, None)) kwargs["nodelist"] = list(labels.keys()) if "edge_color" not in kwargs: kwargs["edge_color"] = [ isinstance(e[2], dict) and e[2].get("color", "k") or "k" for e in G.edges(data=True) ] if "width" not in kwargs: kwargs["width"] = [ isinstance(e[2], dict) and e[2].get("width", 1.0) or 1.0 for e in G.edges(data=True) ] posn = {n: posi[int_labels[n]] for n in G} networkx.draw(G, posn, labels=labels, with_labels=True, node_color=node_color, **kwargs)
(http://www.kegg.jp/kegg/xml/docs/) Classes: """ import os import tempfile from io import BytesIO try: from reportlab.lib import colors from reportlab.pdfgen import canvas except ImportError: from Bio import MissingPythonDependencyError raise MissingPythonDependencyError( "Install reportlab if you want to use KGML_vis.") from None try: from PIL import Image except ImportError: from Bio import MissingPythonDependencyError raise MissingPythonDependencyError( "Install pillow if you want to use KGML_vis.") from None from urllib.request import urlopen from Bio.KEGG.KGML.KGML_pathway import Pathway def darken(color, factor=0.7):
# This code is part of the Biopython distribution and governed by its # license. Please see the LICENSE file that should have been included # as part of this package. """Tests for the Alignment class in Bio.Align.""" import os import unittest try: import numpy except ImportError: from Bio import MissingPythonDependencyError raise MissingPythonDependencyError( "Install numpy if you want to use Bio.Align.") from None from Bio import Align, SeqIO from Bio.Seq import Seq, reverse_complement from Bio.SeqUtils import GC class TestPairwiseAlignment(unittest.TestCase): def check_indexing_slicing(self, alignment, msg): self.assertEqual( repr(alignment), "<Bio.Align.Alignment object (2 rows x 12 columns) at 0x%x>" % id(alignment), ) self.assertEqual( str(alignment), """\
# This code is part of the Biopython distribution and governed by its # license. Please see the LICENSE file that should have been included # as part of this package. import unittest # We require NumPy as a build dependency and runtime dependency, # so check this first: try: import numpy except ImportError: from Bio import MissingPythonDependencyError raise MissingPythonDependencyError( "Install NumPy if you want to use Bio.Cluster") # Given NumPy is installed, if we can't import Cluster this is # most likely due to Biopython being installed without NumPy try: from Bio import Cluster except ImportError: from Bio import MissingPythonDependencyError raise MissingPythonDependencyError("If you want to use Bio.Cluster, " "install NumPy first and then " "reinstall Biopython") class TestCluster(unittest.TestCase): module = 'Bio.Cluster' def test_median_mean(self):
gompertz Gompertz growth model. richards Richards growth model. guess_plateau Guess the plateau point to improve sigmoid fitting. guess_lag Guess the lag point to improve sigmoid fitting. fit Sigmoid functions fit. get_area Calculate the area under the PM curve. """ import numpy as np try: from scipy.optimize.minpack import curve_fit from scipy.integrate import trapz except ImportError: from Bio import MissingPythonDependencyError raise MissingPythonDependencyError( 'Install scipy to extract curve parameters.') def logistic(x, A, u, d, v, y0): """Logistic growth model Proposed in Zwietering et al., 1990 (PMID: 16348228) """ y = (A / (1 + np.exp((((4 * u) / A) * (d - x)) + 2))) + y0 return y def gompertz(x, A, u, d, v, y0): """Gompertz growth model Proposed in Zwietering et al., 1990 (PMID: 16348228)
"""Tests for MarkovModel module.""" import warnings import unittest from io import StringIO try: from numpy import array from numpy import random # missing in PyPy's micronumpy from numpy import array_equal from numpy import around from numpy import log except ImportError: from Bio import MissingPythonDependencyError raise MissingPythonDependencyError( "Install NumPy if you want to use Bio.MarkovModel.") from None with warnings.catch_warnings(): # Silence this warning: # For optimal speed, please update to Numpy version 1.3 or later warnings.simplefilter("ignore", UserWarning) from Bio import MarkovModel class TestMarkovModel(unittest.TestCase): def test_train_visible(self): states = ["0", "1", "2", "3"] alphabet = ["A", "C", "G", "T"] training_data = [ ("AACCCGGGTTTTTTT", "001112223333333"),
def draw( tree, label_func=str, do_show=True, show_confidence=True, # For power users axes=None, branch_labels=None, label_colors=None, *args, **kwargs): """Plot the given tree using matplotlib (or pylab). The graphic is a rooted tree, drawn with roughly the same algorithm as draw_ascii. Additional keyword arguments passed into this function are used as pyplot options. The input format should be in the form of: pyplot_option_name=(tuple), pyplot_option_name=(tuple, dict), or pyplot_option_name=(dict). Example using the pyplot options 'axhspan' and 'axvline': >>> Phylo.draw(tree, axhspan=((0.25, 7.75), {'facecolor':'0.5'}), ... axvline={'x':'0', 'ymin':'0', 'ymax':'1'}) Visual aspects of the plot can also be modified using pyplot's own functions and objects (via pylab or matplotlib). In particular, the pyplot.rcParams object can be used to scale the font size (rcParams["font.size"]) and line width (rcParams["lines.linewidth"]). :Parameters: label_func : callable A function to extract a label from a node. By default this is str(), but you can use a different function to select another string associated with each node. If this function returns None for a node, no label will be shown for that node. do_show : bool Whether to show() the plot automatically. show_confidence : bool Whether to display confidence values, if present on the tree. axes : matplotlib/pylab axes If a valid matplotlib.axes.Axes instance, the phylogram is plotted in that Axes. By default (None), a new figure is created. branch_labels : dict or callable A mapping of each clade to the label that will be shown along the branch leading to it. By default this is the confidence value(s) of the clade, taken from the ``confidence`` attribute, and can be easily toggled off with this function's ``show_confidence`` option. But if you would like to alter the formatting of confidence values, or label the branches with something other than confidence, then use this option. label_colors : dict or callable A function or a dictionary specifying the color of the tip label. If the tip label can't be found in the dict or label_colors is None, the label will be shown in black. """ try: import matplotlib.pyplot as plt except ImportError: try: import pylab as plt except ImportError: raise MissingPythonDependencyError( "Install matplotlib or pylab if you want to use draw.") import matplotlib.collections as mpcollections # Arrays that store lines for the plot of clades horizontal_linecollections = [] vertical_linecollections = [] # Options for displaying branch labels / confidence def conf2str(conf): if int(conf) == conf: return str(int(conf)) return str(conf) if not branch_labels: if show_confidence: def format_branch_label(clade): if hasattr(clade, 'confidences'): # phyloXML supports multiple confidences return '/'.join( conf2str(cnf.value) for cnf in clade.confidences) if clade.confidence: return conf2str(clade.confidence) return None else: def format_branch_label(clade): return None elif isinstance(branch_labels, dict): def format_branch_label(clade): return branch_labels.get(clade) else: assert callable(branch_labels), \ "branch_labels must be either a dict or a callable (function)" format_branch_label = branch_labels # options for displaying label colors. if label_colors: if callable(label_colors): def get_label_color(label): return label_colors(label) else: # label_colors is presumed to be a dict def get_label_color(label): return label_colors.get(label, 'black') else: def get_label_color(label): # if label_colors is not specified, use black return 'black' # Layout def get_x_positions(tree): """Create a mapping of each clade to its horizontal position. Dict of {clade: x-coord} """ depths = tree.depths() # If there are no branch lengths, assume unit branch lengths if not max(depths.values()): depths = tree.depths(unit_branch_lengths=True) return depths def get_y_positions(tree): """Create a mapping of each clade to its vertical position. Dict of {clade: y-coord}. Coordinates are negative, and integers for tips. """ maxheight = tree.count_terminals() # Rows are defined by the tips heights = dict((tip, maxheight - i) for i, tip in enumerate(reversed(tree.get_terminals()))) # Internal nodes: place at midpoint of children def calc_row(clade): for subclade in clade: if subclade not in heights: calc_row(subclade) # Closure over heights heights[clade] = (heights[clade.clades[0]] + heights[clade.clades[-1]]) / 2.0 if tree.root.clades: calc_row(tree.root) return heights x_posns = get_x_positions(tree) y_posns = get_y_positions(tree) # The function draw_clade closes over the axes object if axes is None: fig = plt.figure() axes = fig.add_subplot(1, 1, 1) elif not isinstance(axes, plt.matplotlib.axes.Axes): raise ValueError("Invalid argument for axes: %s" % axes) def draw_clade_lines(use_linecollection=False, orientation='horizontal', y_here=0, x_start=0, x_here=0, y_bot=0, y_top=0, color='black', lw='.1'): """Create a line with or without a line collection object. Graphical formatting of the lines representing clades in the plot can be customized by altering this function. """ if not use_linecollection and orientation == 'horizontal': axes.hlines(y_here, x_start, x_here, color=color, lw=lw) elif use_linecollection and orientation == 'horizontal': horizontal_linecollections.append( mpcollections.LineCollection([[(x_start, y_here), (x_here, y_here)]], color=color, lw=lw), ) elif not use_linecollection and orientation == 'vertical': axes.vlines(x_here, y_bot, y_top, color=color) elif use_linecollection and orientation == 'vertical': vertical_linecollections.append( mpcollections.LineCollection([[(x_here, y_bot), (x_here, y_top)]], color=color, lw=lw), ) def draw_clade(clade, x_start, color, lw): """Recursively draw a tree, down from the given clade.""" x_here = x_posns[clade] y_here = y_posns[clade] # phyloXML-only graphics annotations if hasattr(clade, 'color') and clade.color is not None: color = clade.color.to_hex() if hasattr(clade, 'width') and clade.width is not None: lw = clade.width * plt.rcParams['lines.linewidth'] # Draw a horizontal line from start to here draw_clade_lines(use_linecollection=True, orientation='horizontal', y_here=y_here, x_start=x_start, x_here=x_here, color=color, lw=lw) # Add node/taxon labels label = label_func(clade) if label not in (None, clade.__class__.__name__): axes.text(x_here, y_here, ' %s' % label, verticalalignment='center', color=get_label_color(label)) # Add label above the branch (optional) conf_label = format_branch_label(clade) if conf_label: axes.text(0.5 * (x_start + x_here), y_here, conf_label, fontsize='small', horizontalalignment='center') if clade.clades: # Draw a vertical line connecting all children y_top = y_posns[clade.clades[0]] y_bot = y_posns[clade.clades[-1]] # Only apply widths to horizontal lines, like Archaeopteryx draw_clade_lines(use_linecollection=True, orientation='vertical', x_here=x_here, y_bot=y_bot, y_top=y_top, color=color, lw=lw) # Draw descendents for child in clade: draw_clade(child, x_here, color, lw) draw_clade(tree.root, 0, 'k', plt.rcParams['lines.linewidth']) # If line collections were used to create clade lines, here they are added # to the pyplot plot. for i in horizontal_linecollections: axes.add_collection(i) for i in vertical_linecollections: axes.add_collection(i) # Aesthetics if hasattr(tree, 'name') and tree.name: axes.set_title(tree.name) axes.set_xlabel('branch length') axes.set_ylabel('taxa') # Add margins around the tree to prevent overlapping the axes xmax = max(x_posns.values()) axes.set_xlim(-0.05 * xmax, 1.25 * xmax) # Also invert the y-axis (origin at the top) # Add a small vertical margin, but avoid including 0 and N+1 on the y axis axes.set_ylim(max(y_posns.values()) + 0.8, 0.2) # Parse and process key word arguments as pyplot options for key, value in kwargs.items(): try: # Check that the pyplot option input is iterable, as required [i for i in value] except TypeError: raise ValueError( 'Keyword argument "%s=%s" is not in the format ' 'pyplot_option_name=(tuple), pyplot_option_name=(tuple, dict),' ' or pyplot_option_name=(dict) ' % (key, value)) if isinstance(value, dict): getattr(plt, str(key))(**dict(value)) elif not (isinstance(value[0], tuple)): getattr(plt, str(key))(*value) elif (isinstance(value[0], tuple)): getattr(plt, str(key))(*value[0], **dict(value[1])) if do_show: plt.show()
Functions: read Read a cel file and store its contents in a Record Classes: Record Contains the information from a cel file """ # We use print in the doctests from __future__ import print_function try: import numpy except ImportError: from Bio import MissingPythonDependencyError raise MissingPythonDependencyError( "Install NumPy if you want to use Bio.Affy.CelFile") class Record(object): """Stores the information in a cel file Example usage: >>> from Bio.Affy import CelFile >>> with open('Affy/affy_v3_example.CEL') as handle: ... c = CelFile.read(handle) ... >>> print(c.ncols, c.nrows) 5 5 >>> print(c.intensities) [[ 234. 170. 22177. 164. 22104.]
def draw_graphviz(tree, label_func=str, prog='twopi', args='', node_color='#c0deff', **kwargs): """Display a tree or clade as a graph, using the graphviz engine. Requires NetworkX, matplotlib, Graphviz and either PyGraphviz or pydot. The third and fourth parameters apply to Graphviz, and the remaining arbitrary keyword arguments are passed directly to networkx.draw(), which in turn mostly wraps matplotlib/pylab. See the documentation for Graphviz and networkx for detailed explanations. The NetworkX/matplotlib parameters are described in the docstrings for networkx.draw() and pylab.scatter(), but the most reasonable options to try are: *alpha, node_color, node_size, node_shape, edge_color, style, font_size, font_color, font_weight, font_family* :Parameters: label_func : callable A function to extract a label from a node. By default this is str(), but you can use a different function to select another string associated with each node. If this function returns None for a node, no label will be shown for that node. The label will also be silently skipped if the throws an exception related to ordinary attribute access (LookupError, AttributeError, ValueError); all other exception types will still be raised. This means you can use a lambda expression that simply attempts to look up the desired value without checking if the intermediate attributes are available: >>> Phylo.draw_graphviz(tree, lambda n: n.taxonomies[0].code) prog : string The Graphviz program to use when rendering the graph. 'twopi' behaves the best for large graphs, reliably avoiding crossing edges, but for moderate graphs 'neato' looks a bit nicer. For small directed graphs, 'dot' may produce a normal-looking cladogram, but will cross and distort edges in larger graphs. (The programs 'circo' and 'fdp' are not recommended.) args : string Options passed to the external graphviz program. Normally not needed, but offered here for completeness. Example ------- >>> import pylab >>> from Bio import Phylo >>> tree = Phylo.read('ex/apaf.xml', 'phyloxml') >>> Phylo.draw_graphviz(tree) >>> pylab.show() >>> pylab.savefig('apaf.png') """ try: import networkx except ImportError: raise MissingPythonDependencyError( "Install NetworkX if you want to use to_networkx.") G = to_networkx(tree) try: # NetworkX version 1.8 or later (2013-01-20) Gi = networkx.convert_node_labels_to_integers(G, label_attribute='label') int_labels = {} for integer, nodeattrs in Gi.node.items(): int_labels[nodeattrs['label']] = integer except TypeError: # Older NetworkX versions (before 1.8) Gi = networkx.convert_node_labels_to_integers(G, discard_old_labels=False) int_labels = Gi.node_labels try: posi = networkx.graphviz_layout(Gi, prog, args=args) except ImportError: raise MissingPythonDependencyError( "Install PyGraphviz or pydot if you want to use draw_graphviz.") def get_label_mapping(G, selection): """Apply the user-specified node relabeling.""" for node in G.nodes(): if (selection is None) or (node in selection): try: label = label_func(node) if label not in (None, node.__class__.__name__): yield (node, label) except (LookupError, AttributeError, ValueError): pass if 'nodelist' in kwargs: labels = dict(get_label_mapping(G, set(kwargs['nodelist']))) else: labels = dict(get_label_mapping(G, None)) kwargs['nodelist'] = list(labels.keys()) if 'edge_color' not in kwargs: kwargs['edge_color'] = [ isinstance(e[2], dict) and e[2].get('color', 'k') or 'k' for e in G.edges(data=True) ] if 'width' not in kwargs: kwargs['width'] = [ isinstance(e[2], dict) and e[2].get('width', 1.0) or 1.0 for e in G.edges(data=True) ] posn = dict((n, posi[int_labels[n]]) for n in G) networkx.draw(G, posn, labels=labels, with_labels=True, node_color=node_color, **kwargs)
import warnings from Bio import BiopythonDeprecationWarning with warnings.catch_warnings(): warnings.simplefilter('ignore', BiopythonDeprecationWarning) warnings.simplefilter('ignore', RuntimeWarning) # for the trie module try: from Bio import trie from Bio import triefind except ImportError: import os from Bio import MissingPythonDependencyError if os.name == "java": message = "Not available on Jython, Bio.trie requires compiled C code." else: message = "Could not import Bio.trie, check C code was compiled." raise MissingPythonDependencyError(message) class TestTrie(unittest.TestCase): def test_get_set(self): trieobj = trie.trie() trieobj["hello world"] = "s1" trieobj["bye"] = "s2" trieobj["hell sucks"] = "s3" trieobj["hebee"] = "s4" self.assertEqual(trieobj["hello world"], "s1") self.assertEqual(trieobj["bye"], "s2") self.assertEqual(trieobj["hell sucks"], "s3") self.assertEqual(trieobj["hebee"], "s4") trieobj["blah"] = "s5" self.assertEqual(trieobj["blah"], "s5")
def __init__(self, index_filename, filenames, proxy_factory, format, key_function, repr, max_open=10): self._proxy_factory = proxy_factory self._repr = repr random_access_proxies = {} #TODO? - Don't keep filename list in memory (just in DB)? #Should save a chunk of memory if dealing with 1000s of files. #Furthermore could compare a generator to the DB on reloading #(no need to turn it into a list) if not _sqlite: # Hack for Jython (of if Python is compiled without it) from Bio import MissingPythonDependencyError raise MissingPythonDependencyError("Requires sqlite3, which is " "included Python 2.5+") if filenames is not None: filenames = list(filenames) # In case it was a generator if os.path.isfile(index_filename): #Reuse the index. con = _sqlite.connect(index_filename) self._con = con #Check the count... try: count, = con.execute( "SELECT value FROM meta_data WHERE key=?;", ("count", )).fetchone() self._length = int(count) if self._length == -1: con.close() raise ValueError("Unfinished/partial database") count, = con.execute( "SELECT COUNT(key) FROM offset_data;").fetchone() if self._length != int(count): con.close() raise ValueError("Corrupt database? %i entries not %i" % (int(count), self._length)) self._format, = con.execute( "SELECT value FROM meta_data WHERE key=?;", ("format", )).fetchone() if format and format != self._format: con.close() raise ValueError("Index file says format %s, not %s" % (self._format, format)) self._filenames = [ row[0] for row in con.execute("SELECT name FROM file_data " "ORDER BY file_number;").fetchall() ] if filenames and len(filenames) != len(self._filenames): con.close() raise ValueError("Index file says %i files, not %i" % (len(self._filenames), len(filenames))) if filenames and filenames != self._filenames: con.close() raise ValueError("Index file has different filenames") except _OperationalError as err: con.close() raise ValueError("Not a Biopython index database? %s" % err) #Now we have the format (from the DB if not given to us), if not proxy_factory(self._format): con.close() raise ValueError("Unsupported format '%s'" % self._format) else: self._filenames = filenames self._format = format if not format or not filenames: raise ValueError("Filenames to index and format required") if not proxy_factory(format): raise ValueError("Unsupported format '%s'" % format) #Create the index con = _sqlite.connect(index_filename) self._con = con #print("Creating index") # Sqlite PRAGMA settings for speed con.execute("PRAGMA synchronous=OFF") con.execute("PRAGMA locking_mode=EXCLUSIVE") #Don't index the key column until the end (faster) #con.execute("CREATE TABLE offset_data (key TEXT PRIMARY KEY, " # "offset INTEGER);") con.execute("CREATE TABLE meta_data (key TEXT, value TEXT);") con.execute("INSERT INTO meta_data (key, value) VALUES (?,?);", ("count", -1)) con.execute("INSERT INTO meta_data (key, value) VALUES (?,?);", ("format", format)) #TODO - Record the alphabet? #TODO - Record the file size and modified date? con.execute( "CREATE TABLE file_data (file_number INTEGER, name TEXT);") con.execute( "CREATE TABLE offset_data (key TEXT, file_number INTEGER, offset INTEGER, length INTEGER);" ) count = 0 for i, filename in enumerate(filenames): con.execute( "INSERT INTO file_data (file_number, name) VALUES (?,?);", (i, filename)) random_access_proxy = proxy_factory(format, filename) if key_function: offset_iter = ((key_function(k), i, o, l) for (k, o, l) in random_access_proxy) else: offset_iter = ((k, i, o, l) for (k, o, l) in random_access_proxy) while True: batch = list(itertools.islice(offset_iter, 100)) if not batch: break #print("Inserting batch of %i offsets, %s ... %s" \ # % (len(batch), batch[0][0], batch[-1][0])) con.executemany( "INSERT INTO offset_data (key,file_number,offset,length) VALUES (?,?,?,?);", batch) con.commit() count += len(batch) if len(random_access_proxies) < max_open: random_access_proxies[i] = random_access_proxy else: random_access_proxy._handle.close() self._length = count #print("About to index %i entries" % count) try: con.execute("CREATE UNIQUE INDEX IF NOT EXISTS " "key_index ON offset_data(key);") except _IntegrityError as err: self._proxies = random_access_proxies self.close() con.close() raise ValueError("Duplicate key? %s" % err) con.execute("PRAGMA locking_mode=NORMAL") con.execute("UPDATE meta_data SET value = ? WHERE key = ?;", (count, "count")) con.commit() #print("Index created") self._proxies = random_access_proxies self._max_open = max_open self._index_filename = index_filename self._key_function = key_function
#!/usr/bin/env python # This code is part of the Biopython distribution and governed by its # license. Please see the LICENSE file that should have been included # as part of this package. # Remove unittest2 import after dropping support for Python2.6 import sys if sys.version_info < (2, 7): try: import unittest2 as unittest except ImportError: from Bio import MissingPythonDependencyError raise MissingPythonDependencyError( "Under Python 2.6 this test needs the unittest2 library") else: import unittest from Bio import pairwise2 from Bio.SubsMat.MatrixInfo import blosum62 class TestPairwiseErrorConditions(unittest.TestCase): """Test several error conditions""" def test_function_name(self): """Test for wrong function names""" # Function name pattern must be globalXX or localXX self.assertRaises(AttributeError, lambda: pairwise2.align.globalxxx) self.assertRaises(AttributeError, lambda: pairwise2.align.localxxx) self.assertRaises(AttributeError, lambda: pairwise2.align.glocalxx) # First X must be from (x, m, d, c), second from (x, s, d, c)
# This code is part of the Biopython distribution and governed by its # license. Please see the LICENSE file that should have been included # as part of this package. """Unit tests for the MMCIF portion of the Bio.PDB module.""" import unittest try: import numpy from numpy import dot # Missing on old PyPy's micronumpy del dot from numpy.linalg import svd, det # Missing in PyPy 2.0 numpypy except ImportError: from Bio import MissingPythonDependencyError raise MissingPythonDependencyError( "Install NumPy if you want to use Bio.PDB.") from Bio.Seq import Seq from Bio.Alphabet import generic_protein from Bio.PDB.PDBExceptions import PDBConstructionException, PDBConstructionWarning from Bio.PDB import PPBuilder, CaPPBuilder from Bio.PDB.MMCIFParser import MMCIFParser class ParseReal(unittest.TestCase): """Testing with real CIF file(s).""" def test_parser(self): """Extract polypeptides from 1A80."""
#!/usr/bin/env python """Test for the Uniprot parser on Uniprot XML files. """ from __future__ import with_statement import os import unittest from Bio import SeqIO from Bio.SeqRecord import SeqRecord #Left as None if the import within UniProtIO fails if SeqIO.UniprotIO.ElementTree is None: from Bio import MissingPythonDependencyError raise MissingPythonDependencyError( "No ElementTree module was found. " "Use Python 2.5+, lxml or elementtree if you " "want to use Bio.SeqIO.UniprotIO.") from seq_tests_common import compare_reference, compare_record class TestUniprot(unittest.TestCase): def test_uni001(self): "Parsing Uniprot file uni001" filename = 'uni001' # test the record parser datafile = os.path.join('SwissProt', filename) with open(datafile) as test_handle: seq_record = SeqIO.read(test_handle, "uniprot-xml")
# as part of this package. """Tests for SeqIO PdbIO module.""" import unittest import warnings try: import numpy from numpy import dot # Missing on PyPy's micronumpy del dot # We don't need this (?) but Bio.PDB imports it automatically :( from numpy.linalg import svd, det # Missing in PyPy 2.0 numpypy except ImportError: from Bio import MissingPythonDependencyError raise MissingPythonDependencyError( "Install NumPy if you want to use PDB formats with SeqIO.") from None from Bio import SeqIO from Bio import BiopythonParserWarning from Bio.PDB.PDBExceptions import PDBConstructionWarning def SeqresTestGenerator(extension, parser): """Test factory for tests reading SEQRES (or similar) records. This is a factory returning a parameterised superclass for tests reading sequences from the sequence records of structure files. Arguments: extension: The extension of the files to read from the ``PDB`` directory (e.g.
The example itself is essentially a repeat from test_GraphicsGeneral.py. """ import os import random import unittest from Bio import MissingExternalDependencyError from Bio import MissingPythonDependencyError try: # Skip the test if reportlab is not installed import reportlab as r del r except: raise MissingPythonDependencyError( "Install ReportLab if you want to use Bio.Graphics.") try: # Skip the test if reportlab is not installed from reportlab.graphics import renderPM except: raise MissingPythonDependencyError( "Install ReportLab's renderPM module if you want to create " "bitmaps with Bio.Graphics.") try: # Skip the test if PIL is not installed import Image as i del i except: raise MissingPythonDependencyError( "Install PIL (Python Imaging Library) if you want to create " "bitmaps with Bio.Graphics.")
import requires_internet requires_internet.check() from Bio import Entrez from Bio import Medline from Bio import SeqIO from Bio.SeqRecord import SeqRecord if os.name == 'java': try: from xml.parsers.expat import XML_PARAM_ENTITY_PARSING_ALWAYS del XML_PARAM_ENTITY_PARSING_ALWAYS except ImportError: from Bio import MissingPythonDependencyError raise MissingPythonDependencyError( "The Bio.Entrez XML parser fails on " "Jython, see http://bugs.jython.org/issue1447") # This lets us set the email address to be sent to NCBI Entrez: Entrez.email = "*****@*****.**" URL_HEAD = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/" URL_TOOL = "tool=biopython" URL_EMAIL = "email=biopython-dev%40biopython.org" class EntrezOnlineCase(unittest.TestCase): def test_read_from_url(self): """Test Entrez.read from URL""" handle = Entrez.einfo() self.assertTrue(handle.url.startswith(URL_HEAD + "einfo.fcgi?"),
from io import BytesIO h = gzip.GzipFile(fileobj=BytesIO(bgzf._bgzf_eof)) try: data = h.read() h.close() assert not data, "Should be zero length, not %i" % len(data) return False except TypeError as err: # TypeError: integer argument expected, got 'tuple' return True if _have_bug17666(): from Bio import MissingPythonDependencyError raise MissingPythonDependencyError( "Your Python has a broken gzip library, see " "http://bugs.python.org/issue17666 for details") class BgzfTests(unittest.TestCase): def setUp(self): self.temp_file = "temp.bgzf" if os.path.isfile(self.temp_file): os.remove(self.temp_file) def tearDown(self): if os.path.isfile(self.temp_file): os.remove(self.temp_file) def rewrite(self, compressed_input_file, output_file): h = gzip.open(compressed_input_file, "rb")
>>> >>> for motif in motifs: ... pass # do something with the motif """ from __future__ import print_function import warnings from Bio import BiopythonWarning from Bio import MissingPythonDependencyError try: import MySQLdb as mdb except: raise MissingPythonDependencyError("Install MySQLdb if you want to use " "Bio.motifs.jaspar.db") from Bio.Alphabet.IUPAC import unambiguous_dna as dna from Bio.motifs import jaspar, matrix JASPAR_DFLT_COLLECTION = 'CORE' class JASPAR5(object): """ Class representing a JASPAR5 DB. The methods within are loosely based on the perl TFBS::DB::JASPAR5 module. Note: We will only implement reading of JASPAR motifs from the DB. Unlike the perl module, we will not attempt to implement any methods to store JASPAR motifs or create a new DB at this time.
def to_networkx(tree): """Convert a Tree object to a networkx graph. The result is useful for graph-oriented analysis, and also interactive plotting with pylab, matplotlib or pygraphviz, though the resulting diagram is usually not ideal for displaying a phylogeny. Requires NetworkX version 0.99 or later. """ try: import networkx except ImportError: raise MissingPythonDependencyError( "Install NetworkX if you want to use to_networkx.") # NB (1/2010): the networkx API stabilized at v.1.0 # 1.0+: edges accept arbitrary data as kwargs, weights are floats # 0.99: edges accept weight as a string, nothing else # pre-0.99: edges accept no additional data # Ubuntu Lucid LTS uses v0.99, let's support everything if networkx.__version__ >= '1.0': def add_edge(graph, n1, n2): graph.add_edge(n1, n2, weight=n2.branch_length or 1.0) # Copy branch color value as hex, if available if hasattr(n2, 'color') and n2.color is not None: graph[n1][n2]['color'] = n2.color.to_hex() elif hasattr(n1, 'color') and n1.color is not None: # Cascading color attributes graph[n1][n2]['color'] = n1.color.to_hex() n2.color = n1.color # Copy branch weight value (float) if available if hasattr(n2, 'width') and n2.width is not None: graph[n1][n2]['width'] = n2.width elif hasattr(n1, 'width') and n1.width is not None: # Cascading width attributes graph[n1][n2]['width'] = n1.width n2.width = n1.width elif networkx.__version__ >= '0.99': def add_edge(graph, n1, n2): graph.add_edge(n1, n2, (n2.branch_length or 1.0)) else: def add_edge(graph, n1, n2): graph.add_edge(n1, n2) def build_subgraph(graph, top): """Walk down the Tree, building graphs, edges and nodes.""" for clade in top: graph.add_node(clade.root) add_edge(graph, top.root, clade.root) build_subgraph(graph, clade) if tree.rooted: G = networkx.DiGraph() else: G = networkx.Graph() G.add_node(tree.root) build_subgraph(G, tree.root) return G
# This code is part of the Biopython distribution and governed by its # license. Please see the LICENSE file that should have been included # as part of this package. """Parser for PDB files.""" import warnings try: import numpy except ImportError: from Bio import MissingPythonDependencyError raise MissingPythonDependencyError( "Install NumPy if you want to use the PDB parser." ) from None from Bio.File import as_handle from Bio.PDB.PDBExceptions import PDBConstructionException from Bio.PDB.PDBExceptions import PDBConstructionWarning from Bio.PDB.StructureBuilder import StructureBuilder from Bio.PDB.parse_pdb_header import _parse_pdb_header_list # If PDB spec says "COLUMNS 18-20" this means line[17:20] class PDBParser:
def __init__(self, index_filename, filenames, format, alphabet, key_function, max_open=10): random_access_proxies = {} #TODO? - Don't keep filename list in memory (just in DB)? #Should save a chunk of memory if dealing with 1000s of files. #Furthermore could compare a generator to the DB on reloading #(no need to turn it into a list) if not _sqlite: #Hack for Python 2.4 (of if Python is compiled without it) from Bio import MissingPythonDependencyError raise MissingPythonDependencyError("Requires sqlite3, which is " "included Python 2.5+") if filenames is not None: filenames = list(filenames) #In case it was a generator if os.path.isfile(index_filename): #Reuse the index. con = _sqlite.connect(index_filename) self._con = con #Check the count... try: count, = con.execute( "SELECT value FROM meta_data WHERE key=?;", ("count", )).fetchone() self._length = int(count) if self._length == -1: con.close() raise ValueError("Unfinished/partial database") count, = con.execute( "SELECT COUNT(key) FROM offset_data;").fetchone() if self._length <> int(count): con.close() raise ValueError("Corrupt database? %i entries not %i" \ % (int(count), self._length)) self._format, = con.execute( "SELECT value FROM meta_data WHERE key=?;", ("format", )).fetchone() if format and format != self._format: con.close() raise ValueError("Index file says format %s, not %s" \ % (self._format, format)) self._filenames = [row[0] for row in \ con.execute("SELECT name FROM file_data " "ORDER BY file_number;").fetchall()] if filenames and len(filenames) != len(self._filenames): con.close() raise ValueError("Index file says %i files, not %i" \ % (len(self.filenames) != len(filenames))) if filenames and filenames != self._filenames: con.close() raise ValueError("Index file has different filenames") except _OperationalError, err: con.close() raise ValueError("Not a Biopython index database? %s" % err) #Now we have the format (from the DB if not given to us), try: proxy_class = _FormatToRandomAccess[self._format] except KeyError: con.close() raise ValueError("Unsupported format '%s'" % self._format)