def _normalize_matrix(df): """ Normalizes a matrix df to a probability matrix prob_df """ # Validate matrix df = validate_matrix(df) # Make sure all df values are greater than or equal to zero check(all(df.values.ravel() >= 0), 'Some data frame entries are negative.') # Check to see if values sum to one sums = df.sum(axis=1).values # If any sums are close to zero, abort check(not any(np.isclose(sums, 0.0)), 'Some columns in df sum to nearly zero.') # Create normalized version of input matrix prob_df = df.copy() prob_df.loc[:, :] = df.values / sums[:, np.newaxis] # Validate and return probability matrix prob_df = validate_matrix(prob_df, matrix_type='probability') return prob_df
def highlight_position(self, p, **kwargs): """ Draws a rectangular box highlighting a specific position. parameters ---------- p: (int) Single position to highlight. **kwargs: Other parameters to pass to highlight_position_range() returns ------- None """ # validate p check(isinstance(p, int), 'type(p) = %s must be of type int' % type(p)) # to avoid highlighting positions outside of the logo #check(0 <= p < len(self.df), # 'position p is invalid, must be between 0 and %d' %len(self.df)) # pass the buck to highlight_position_range self.highlight_position_range(pmin=p, pmax=p, **kwargs)
def fade_glyphs_in_probability_logo(self, v_alpha0=0.0, v_alpha1=1.0): """ Fades glyphs in probability logo according to value. parameters ---------- v_alpha0, v_alpha1: (number in [0,1]) Matrix values marking values that are rendered using alpha=0 and alpha=1, respectively. These values must satisfy v_alpha0 < v_alpha1. returns ------- None """ # validate alpha0 check(isinstance(v_alpha0, (float, int)), 'type(v_alpha0) = %s must be a number' % type(v_alpha0)) # ensure that v_alpha0 is between 0 and 1 check(0.0 <= v_alpha0 <= 1.0, 'v_alpha0 must be between 0 and 1; value is %f.' % v_alpha0) # validate alpha1 check(isinstance(v_alpha1, (float, int)), 'type(v_alpha1) = %s must be a number' % type(v_alpha1)) # ensure that v_alpha1 is between 0 and 1 check(0.0 <= v_alpha1 <= 1.0, 'v_alpha1 must be between 0 and 1; value is %f' % v_alpha1) # check that v_alpha0 < v_alpha1 check( v_alpha0 < v_alpha1, 'must have v_alpha0 < v_alpha1;' 'here, v_alpha0 = %f and v_alpha1 = %f' % (v_alpha0, v_alpha1)) # make sure matrix is a probability matrix self.df = validate_matrix(self.df, matrix_type='probability') # iterate over all positions and characters for p in self.ps: for c in self.cs: # grab both glyph and value v = self.df.loc[p, c] g = self.glyph_df.loc[p, c] # compute new alpha if v <= v_alpha0: alpha = 0 elif v >= v_alpha1: alpha = 1 else: alpha = (v - v_alpha0) / (v_alpha1 - v_alpha0) # Set glyph attributes g.set_attributes(alpha=alpha)
def open_example_datafile(name=None, print_description=True): """ Returns a file handle to an example dataset parameters ---------- name: (None or str) Name of example matrix. print_description: (bool) If true, a description of the example matrix will be printed returns ------- f: (file handle) A handle to the requested file """ # get list of valid data files valid_datafiles = list_example_datafiles() # check that specified datafile is valid check( name in valid_datafiles, 'Matrix "%s" not recognized. Please choose from: \n%s' % (name, '\n'.join([repr(x) for x in valid_datafiles]))) # check that print_description is boolean check( isinstance(print_description, bool), 'type(print_description) = %s; must be of type bool ' % type(print_description)) # set datafile file name file_name = '%s/%s' % (data_dir, name) assert os.path.isfile(file_name), 'File %s does not exist!' % file_name # if user wants a description of the datafile, provide it if print_description: print('Description of example matrix "%s":' % name) with open(file_name, 'r') as f: lines = f.readlines() lines = [l for l in lines if len(l) > 0 and l[0] == '#'] description = "".join(lines) print(description) # if file is a gzipped file, use gzip.open() if len(file_name) >= 3 and file_name[-3:] == '.gz': f = gzip.open(file_name, 'r') # otherwise, use regular open() else: f = open(file_name, 'r') # return file handle to user return f
def get_example_matrix(name=None, print_description=True): """ Returns an example matrix from which a logo can be made. parameters ---------- name: (None or str) Name of example matrix. print_description: (bool) If true, a description of the example matrix will be printed returns ------- df: (data frame) A data frame containing an example matrix. """ # get list of valid matrices valid_matrices = list_example_matrices() # check that matrix name is valid check( name in valid_matrices, 'Matrix "%s" not recognized. Please choose from: \n%s' % (name, '\n'.join([repr(x) for x in valid_matrices]))) # check that print_description is boolean check( isinstance(print_description, bool), 'type(print_description) = %s; must be of type bool ' % type(print_description)) # set matrix file file_name = '%s/%s.txt' % (matrix_dir, name) assert os.path.isfile(file_name), 'File %s does not exist!' % file_name # if user wants a description of the example matrix, provide it if print_description: print('Description of example matrix "%s":' % name) with open(file_name, 'r') as f: lines = f.readlines() lines = [l for l in lines if len(l) > 0 and l[0] == '#'] description = "".join(lines) print(description) # return matrix data frame return pd.read_csv(file_name, sep='\t', index_col=0, comment='#')
def style_single_glyph(self, p, c, **kwargs): """ Modifies the properties of a single character in Logo. parameters ---------- p: (int) Position of modified glyph. Must index a row in the matrix df passed to the Logo constructor. c: (str of length 1) Character to modify. Must be the name of a column in the matrix df passed to the Logo constructor. **kwargs: Keyword arguments to pass to Glyph.set_attributes() returns ------- None """ # validate p is an integer check(isinstance(p, (int, np.int64)), 'type(p) = %s must be of type int or numpy.int64' % type(p)) # check p is a valid position check(p in self.glyph_df.index, 'p=%s is not a valid position' % p) # validate c is a str check(isinstance(c, str), 'type(c) = %s must be of type str' % type(c)) # validate that c has length 1 check(len(c) == 1, 'c = %s; must have length 1.' % repr(c)) # check c is a valid character check(c in self.glyph_df.columns, 'c=%s is not a valid character' % c) # Get glyph from glyph_df g = self.glyph_df.loc[p, c] # update glyph attributes g.set_attributes(**kwargs)
def draw(self, clear=False): """ Draws characters in Logo. parameters ---------- clear: (bool) If True, Axes will be cleared before logo is drawn. returns ------- None """ # validate clear check(isinstance(clear, bool), 'type(clear) = %s; must be of type bool ' % type(clear)) # clear previous content from ax if requested if clear: self.ax.clear() # draw each glyph for g in self.glyph_list: g.draw() # draw baseline self.draw_baseline(linewidth=self.baseline_width) # set xlims xmin = min([g.p - .5*g.width for g in self.glyph_list]) xmax = max([g.p + .5*g.width for g in self.glyph_list]) self.ax.set_xlim([xmin, xmax]) # set ylims ymin = min([g.floor for g in self.glyph_list]) ymax = max([g.ceiling for g in self.glyph_list]) self.ax.set_ylim([ymin, ymax]) # style spines if requested if self.show_spines is not None: self.style_spines(visible=self.show_spines)
def demo(name='fig1b'): """ Performs a demonstration of the Logomaker software. parameters ----------- name: (str) Must be one of {'fig1b', 'fig1c', 'fig1d', 'fig1e', 'fig1f', 'logo'}. returns ------- None. """ # build list of demo names and corresponding file names example_dir = '%s/examples' % os.path.dirname(__file__) all_base_file_names = os.listdir(example_dir) example_file_names = ['%s/%s' % (example_dir, temp_name) for temp_name in all_base_file_names if re.match('demo_.*\.py', temp_name)] examples_dict = {} for file_name in example_file_names: key = file_name.split('_')[-1][:-3] examples_dict[key] = file_name # check that name is valid check(name in examples_dict.keys(), 'name = %s is not valid. Must be one of %s' % (repr(name), examples_dict.keys())) # open and run example file file_name = examples_dict[name] with open(file_name, 'r') as f: content = f.read() line = '-------------------------------------------------------------' print('Running %s:\n%s\n%s\n%s' % \ (file_name, line, content, line)) exec(open(file_name).read()) # return the current matplotlib Figure object return plt.gcf()
def _counts_mat_to_probability_mat(counts_df, pseudocount=1.0): """ Converts a counts matrix to a probability matrix """ # Validate matrix before use counts_df = validate_matrix(counts_df) # Check pseudocount value check(pseudocount >= 0, "pseudocount must be >= 0.") # Compute prob_df prob_df = counts_df.copy() vals = counts_df.values + pseudocount prob_df.loc[:, :] = vals / vals.sum(axis=1)[:, np.newaxis] prob_df = _normalize_matrix(prob_df) # Validate and return prob_df = validate_matrix(prob_df, matrix_type='probability') return prob_df
def validate_probability_mat(df): """ Verifies that the input dataframe df indeed represents a probability matrix. Renormalizes df with a text warning if it is not already normalized. Throws an error if df cannot be reliably normalized. parameters ---------- df: (dataframe) A pandas dataframe where each row represents an (integer) position and each column represents to a (single) character. returns ------- prob_df: (dataframe) A cleaned-up and normalized version of df (if possible). """ # Validate as a matrix. Make sure this contains no NaN values prob_df = validate_matrix(df, allow_nan=False) # Make sure all values are non-negative check(all(prob_df.values.ravel() >= 0), 'not all values in df are >=0.') # Check to see if values sum to one sums = prob_df.sum(axis=1).values # If any sums are close to zero, abort check(not any(np.isclose(sums, 0.0)), 'some columns in prob_df sum to nearly zero.') # If any sums are not close to one, renormalize all sums if not all(np.isclose(sums, 1.0)): print('in validate_probability_mat(): ' 'Row sums in df are not close to 1. ' 'Reormalizing rows...') prob_df.loc[:, :] = prob_df.values / sums[:, np.newaxis] # Return validated probability matrix to user return prob_df
def style_glyphs_in_sequence(self, sequence, **kwargs): """ Restyles the glyphs in a specific sequence. parameters ---------- sequence: (str) A string the same length as the logo, specifying which character to restyle at each position. Characters in sequence that are not in the columns of the Logo's df are ignored. **kwargs: Keyword arguments to pass to Glyph.set_attributes() returns ------- None """ # validate sequence is a string check(isinstance(sequence, str), 'type(sequence) = %s must be of type str' % type(sequence)) # validate sequence has correct length check(len(sequence) == self.L, 'sequence to restyle (length %d) ' % len(sequence) + 'must have same length as logo (length %d).' % self.L) # for each position in the logo... for i, p in enumerate(self.glyph_df.index): # get character to highlight c = sequence[i] # modify the glyph corresponding character c at position p. # only modify if c is a valid character; if not, ignore position if c in self.cs: self.style_single_glyph(p, c, **kwargs)
def get_rgb(color_spec): """ Safely returns an RGB np.ndarray given a valid color specification """ # TODO: the following code should be reviewed for edge-cases: # initalizing rgb to handle referenced before assignment type error rgb = None # If color specification is a string if isinstance(color_spec, str): try: rgb = np.array(to_rgb(color_spec)) # This will trigger if to_rgb does not recognize color_spec. # In this case, raise an error to user. except: check(False, 'invalid choice: color_spec=%s' % color_spec) # Otherwise, if color_specification is array-like, it should # specify RGB values; convert to np.ndarray elif isinstance(color_spec, (list, tuple, np.ndarray)): # color_spec must have length 3 to be RGB check( len(color_spec) == 3, 'color_scheme, if array, must be of length 3.') # color_spec must only contain numbers between 0 and 1 check(all(0 <= x <= 1 for x in color_spec), 'Values of color_spec must be between 0 and 1 inclusive.') # Cast color_spec as RGB rgb = np.array(color_spec) # Otherwise, throw error else: check(False, 'type(color_spec) = %s is invalid.' % type(color_spec)) # Return RGB as an np.ndarray return rgb
def draw_baseline(self, zorder=-1, color='black', linewidth=0.5, **kwargs): """ Draws a horizontal line along the x-axis. parameters ---------- zorder: (number) This governs what other objects drawn on ax will appear in front or behind the baseline. Logo characters are, by default, drawn in front of the baseline. color: (matplotlib color) Color to use for the baseline. Can be a named matplotlib color or an RGB array. linewidth: (number >= 0) Width of the baseline. **kwargs: Additional keyword arguments to be passed to ax.axhline() returns ------- None """ # validate zorder check(isinstance(zorder, (float, int)), 'type(zorder) = %s; must a float or int.' % type(zorder)) # validate color color = get_rgb(color) # validate that linewidth is a number check(isinstance(linewidth, (float, int)), 'type(linewidth) = %s; must be a number ' % type(linewidth)) # validate that linewidth >= 0 check(linewidth >= 0, 'linewidth must be >= 0') # Render baseline self.ax.axhline(zorder=zorder, color=color, linewidth=linewidth, **kwargs)
def _get_background_mat(df, background): """ Creates a background matrix given a background specification. There are three possiblities: 1. background is None => out_df represents a uniform background 2. background is a vector => this vector is normalized then used as the entries of the rows of out_df. Vector must be the same length as the number of columns in df 3. background is a dataframe => it is then normalized and use as out_df. In this case, background must have the same rows and cols as df """ # Get dimensions of df num_pos, num_cols = df.shape # Create background using df as template bg_df = df.copy() # If background is not specified, use uniform background if background is None: bg_df.loc[:, :] = 1 / num_cols # If background is array-like elif isinstance(background, (np.ndarray, list, tuple)): background = np.array(background) check( len(background) == num_cols, 'df and background have mismatched dimensions.') bg_df.loc[:, :] = background[np.newaxis, :] bg_df = _normalize_matrix(bg_df) # If background is a dataframe elif isinstance(background, pd.core.frame.DataFrame): bg_df = validate_matrix(background) check(all(df.index == bg_df.index), 'Error: df and bg_mat have different indexes.') check(all(df.columns == bg_df.columns), 'Error: df and bg_mat have different columns.') bg_df = _normalize_matrix(bg_df) # validate as probability matrix bg_df = validate_matrix(bg_df, matrix_type='probability') return bg_df
def transform_matrix(df, center_values=False, normalize_values=False, from_type=None, to_type=None, background=None, pseudocount=1): """ Performs transformations on a matrix. There are three types of transformations that can be performed: 1. Center values: Subtracts the mean from each row in df. This is common for weight matrices or energy matrices. To do this, set center_values=True. 2. Normalize values: Divides each row by the sum of the row. This is needed for probability matrices. To do this, set normalize_values=True. 3. From/To transformations: Transforms from one type of matrix (e.g. 'counts') to another type of matrix (e.g. 'information'). To do this, set from_type and to_type arguments. Here are the mathematical formulas invoked by From/To transformations: from_type='counts' -> to_type='probability': P_ic = (N_ic + l)/(N_i + C*l), N_i = sum_c(N_ic) from_type='probability' -> to_type='weight': W_ic = log_2(P_ic / Q_ic) from_type='weight' -> to_type='probability': P_ic = Q_ic * 2^(W_ic) from_type='probability' -> to_type='information': I_ic = P_ic * sum_d(P_id * log2(P_id / W_id)) from_type='information' -> to_type='probability': P_ic = I_ic / sum_d(I_id) notation: i = position c, d = character l = pseudocount C = number of characters N_ic = counts matrix element P_ic = probability matrix element Q_ic = background probability matrix element W_ic = weight matrix element I_ic = information matrix element Using these five 1-step transformations, 2-step transformations are also enabled, e.g., from_type='counts' -> to_type='information'. parameters ---------- df: (dataframe) The matrix to be transformed. center_values: (bool) Whether to center matrix values, i.e., subtract the mean from each row. normalize_values: (bool) Whether to normalize each row, i.e., divide each row by the sum of that row. from_type: (str) Type of input matrix. Must be one of 'counts', 'probability', 'weight', or 'information'. to_type: (str) Type of output matrix. Must be one of 'probability', 'weight', or 'information'. Can be 'counts' ONLY if from_type is 'counts' too. background: (array, or df) Specification of background probabilities. If array, should be the same length as df.columns and correspond to the probability of each column's character. If df, should be a probability matrix the same shape as df. pseudocount: (number >= 0) Pseudocount to use when transforming from a counts matrix to a probability matrix. returns ------- out_df: (dataframe) Transformed matrix """ # validate matrix dataframe df = validate_matrix(df) # validate center_values check( isinstance(center_values, bool), 'type(center_values) = %s must be of type bool' % type(center_values)) # validate normalize_values check( isinstance(normalize_values, bool), 'type(normalize_values) = %s must be of type bool' % type(normalize_values)) # validate from_type check((from_type in MATRIX_TYPES) or (from_type is None), 'from_type = %s must be None or in %s' % (from_type, MATRIX_TYPES)) # validate to_type check((to_type in MATRIX_TYPES) or (to_type is None), 'to_type = %s must be None or in %s' % (to_type, MATRIX_TYPES)) # validate background check( isinstance(background, (type([]), np.ndarray, pd.DataFrame)) or (background is None), 'type(background) = %s must be None or array-like or a dataframe.' % type(background)) # validate pseudocount check(isinstance(pseudocount, (int, float)), 'type(pseudocount) = %s must be a number' % type(pseudocount)) check(pseudocount >= 0, 'pseudocount=%s must be >= 0' % pseudocount) # If centering values, do that if center_values is True: check((from_type is None) and (to_type is None), "If center_values is True, both from_type and to_type" "must be None. Here, from_type=%s, to_type=%s" % (from_type, to_type)) # Do centering out_df = _center_matrix(df) # Otherwise, if normalizing values, do that elif normalize_values is True: check((from_type is None) and (to_type is None), "If normalize_values is True, both from_type and to_type" "must be None. Here, from_type=%s, to_type=%s" % (from_type, to_type)) # Do centering out_df = _normalize_matrix(df) # otherwise, if to_type == from_type, just return matrix # Note, this is the only way that to_type='counts' is valid elif from_type == to_type: out_df = df.copy() # Otherwise, we're converting from one type of matrix to another. Do this. else: # Check that from_type and to_type are not None check((from_type is not None) and (to_type is not None), 'Unless center_values is True or normalize_values is True,' 'Neither from_type (=%s) nor to_type (=%s) can be None.' % (from_type, to_type)) # Check that to_type != 'counts' check( to_type != 'counts', "Can only have to_type='counts' if " "from_type='counts'. Here, however, " "from_type='%s'" % from_type) # If converting from a probability matrix if from_type == 'probability': # ... to a weight matrix if to_type == 'weight': out_df = _probability_mat_to_weight_mat(df, background) # ... to an information matrix elif to_type == 'information': out_df = _probability_mat_to_information_mat(df, background) # This should never execute else: assert False, 'THIS SHOULD NEVER EXECUTE' # Otherwise, convert to probability matrix, then call function again else: # If converting from a counts matrix, # convert to probability matrix first if from_type == 'counts': prob_df = _counts_mat_to_probability_mat(df, pseudocount) # If converting from a weight matrix, # convert to probability matrix first elif from_type == 'weight': prob_df = _weight_mat_to_probability_mat(df, background) # If converting from an information matrix, # convert to probability matrix first elif from_type == 'information': prob_df = _information_mat_to_probability_mat(df, background) # This should never execute else: assert False, 'THIS SHOULD NEVER EXECUTE' # Now that we have the probability matrix, # onvert to user-specified to_type out_df = transform_matrix(prob_df, from_type='probability', to_type=to_type, background=background) # Validate and return out_df = validate_matrix(out_df) return out_df
def style_spines(self, spines=('top', 'bottom', 'left', 'right'), visible=True, color='black', linewidth=1.0, bounds=None): """ Styles the spines of the Axes object in which the logo is drawn. Note: "spines" refers to the edges of the Axes bounding box. parameters ---------- spines: (tuple of str) Specifies which of the four spines to modify. The default value for this parameter lists all four spines. visible: (bool) Whether to show or not show the spines listed in the parameter spines. color: (matplotlib color) Color of the spines. Can be a named matplotlib color or an RGB array. linewidth: (float >= 0) Width of lines used to draw the spines. bounds: (None or [float, float]) If not None, specifies the values between which a spine (or spines) will be drawn. returns ------- None """ # clear the self.show_spines attribute; # the user calling this means they want to override this attribute self.show_spines = None # validate that spines is a set-like object check( isinstance(spines, (tuple, list, set)), 'type(spines) = %s; must be of type (tuple, list, set) ' % type(spines)) spines = set(spines) # validate that spines is a subset of a the valid spines choices valid_spines = {'top', 'bottom', 'left', 'right'} check( spines <= valid_spines, 'spines has invalid entry; valid entries are: %s' % repr(valid_spines)) # validate visible check(isinstance(visible, bool), 'type(visible) = %s; must be of type bool ' % type(visible)) # validate that linewidth is a number check(isinstance(linewidth, (float, int)), 'type(linewidth) = %s; must be a number ' % type(linewidth)) # validate that linewidth >= 0 check(linewidth >= 0, 'linewidth must be >= 0') # validate color color = get_rgb(color) # validate bounds. If not None, validate entries. if bounds is not None: # check that bounds are of valid type bounds_types = (list, tuple, np.ndarray) check( isinstance(bounds, bounds_types), 'type(bounds) = %s; must be one of %s' % (type(bounds), bounds_types)) # check that bounds has right length check( len(bounds) == 2, 'len(bounds) = %d; must be %d' % (len(bounds), 2)) # ensure that elements of bounds are numbers check(all([isinstance(bound, (float, int)) for bound in bounds]), 'bounds = %s; all entries must be numbers' % repr(bounds)) # bounds entries must be sorted check( bounds[0] < bounds[1], 'bounds = %s; must have bounds[0] < bounds[1]' % repr(bounds)) # iterate over all spines for name, spine in self.ax.spines.items(): # If name is in the set of spines to modify if name in spines: # Modify given spine spine.set_visible(visible) spine.set_color(color) spine.set_linewidth(linewidth) if bounds is not None: spine.set_bounds(bounds[0], bounds[1])
def style_xticks(self, anchor=0, spacing=1, fmt='%d', rotation=0.0, **kwargs): """ Formats and styles tick marks along the x-axis. parameters ---------- anchor: (int) Anchors tick marks at a specific number. Even if this number is not within the x-axis limits, it fixes the register for tick marks. spacing: (int > 0) The spacing between adjacent tick marks fmt: (str) String used to format tick labels. rotation: (number) Angle, in degrees, with which to draw tick mark labels. **kwargs: Additional keyword arguments to be passed to ax.set_xticklabels() returns ------- None """ # validate anchor check(isinstance(anchor, int), 'type(anchor) = %s must be of type int' % type(anchor)) # validate spacing check( isinstance(spacing, int) and spacing > 0, 'spacing = %s must be an int > 0' % repr(spacing)) # validate fmt check(isinstance(fmt, str), 'type(fmt) = %s must be of type str' % type(fmt)) # validate rotation check( isinstance(rotation, (float, int)), 'type(rotation) = %s; must be of type float or int ' % type(rotation)) # Get list of positions that span all positions in the matrix df p_min = min(self.ps) p_max = max(self.ps) ps = np.arange(p_min, p_max + 1) # Compute and set xticks xticks = ps[(ps - anchor) % spacing == 0] self.ax.set_xticks(xticks) # Compute and set xticklabels xticklabels = [fmt % p for p in xticks] self.ax.set_xticklabels(xticklabels, rotation=rotation, **kwargs)
def highlight_position_range(self, pmin, pmax, padding=0.0, color='yellow', edgecolor=None, floor=None, ceiling=None, zorder=-2, **kwargs): """ Draws a rectangular box highlighting multiple positions within the Logo parameters ---------- pmin: (int) Lowest position to highlight. pmax: (int) Highest position to highlight. padding: (number >= -0.5) Amount of padding to add on the left and right sides of highlight. color: (None or matplotlib color) Color to use for highlight. Can be a named matplotlib color or an RGB array. edgecolor: (None or matplotlib color) Color to use for highlight box edges. Can be a named matplotlib color or an RGB array. floor: (None number) Lowest y-axis extent of highlight box. If None, is set to ymin of the Axes object. ceiling: (None or number) Highest y-axis extent of highlight box. If None, is set to ymax of the Axes object. zorder: (number) This governs which other objects drawn on ax will appear in front or behind of the highlight. Logo characters are, by default, drawn in front of the highlight box. returns ------- None """ # get ymin and ymax from Axes object ymin, ymax = self.ax.get_ylim() # validate pmin check(isinstance(pmin, (float, int)), 'type(pmin) = %s must be a number' % type(pmin)) # validate pmax check(isinstance(pmax, (float, int)), 'type(pmax) = %s must be a number' % type(pmax)) # Make sure pmin <= pmax check(pmin <= pmax, 'pmin <= pmax not satisfied.') # validate that padding is a valid number check( isinstance(padding, (float, int)) and padding >= -0.5, 'padding = %s must be a number >= -0.5' % repr(padding)) # validate color if color is not None: color = get_rgb(color) # validate edegecolor if edgecolor is not None: edgecolor = get_rgb(edgecolor) # validate floor and set to ymin if None if floor is None: floor = ymin else: check(isinstance(floor, (float, int)), 'type(floor) = %s must be a number' % type(floor)) # validate ceiling and set to ymax if None if ceiling is None: ceiling = ymax else: check(isinstance(ceiling, (float, int)), 'type(ceiling) = %s must be a number' % type(ceiling)) # now that floor and ceiling are set, validate that floor <= ceiling check( floor <= ceiling, 'must have floor <= ceiling; as is, floor = %f, ceiling = %s' % (floor, ceiling)) # validate zorder check(isinstance(zorder, (float, int)), 'type(zorder) = %s; must a float or int.' % type(zorder)) # compute coordinates of highlight rectangle x = pmin - .5 - padding y = floor width = pmax - pmin + 1 + 2 * padding height = ceiling - floor # specify rectangle patch = Rectangle(xy=(x, y), width=width, height=height, facecolor=color, edgecolor=edgecolor, zorder=zorder, **kwargs) # add rectangle to Axes self.ax.add_patch(patch)
def style_glyphs_below(self, color=None, alpha=None, shade=0.0, fade=0.0, flip=None, **kwargs): """ Modifies the properties of all characters drawn below the x-axis. parameters ---------- color: (color specification) Color to use before shade is applied. alpha: (number in [0,1]) Opacity to use when rendering characters, before fade is applied. shade: (number in [0,1]) The amount to shade characters below the x-axis. fade: (number in [0,1]) The amount to fade characters below the x-axis. flip: (bool) If True, characters below the x-axis will be flipped upside down. **kwargs: Keyword arguments to pass to Glyph.set_attributes(), but only for characters below the x-axis. returns ------- None """ # validate color and transform to RBG if color is not None: color = get_rgb(color) # validate alpha if alpha is not None: # check alpha is a number check(isinstance(alpha, (float, int)), 'type(alpha) = %s must be a float or int' % type(alpha)) self.alpha = float(alpha) # check 0 <= alpha <= 1.0 check(0 <= alpha <= 1.0, 'alpha must be between 0.0 and 1.0 (inclusive)') # validate shade check(isinstance(shade, (float, int)), 'type(shade) = %s must be a number' % type(shade)) # ensure that v_alpha0 is between 0 and 1 check(0.0 <= shade <= 1.0, 'shade must be between 0 and 1; value is %f.' % shade) # validate fade check(isinstance(fade, (float, int)), 'type(fade) = %s must be a number' % type(fade)) # ensure that fade is between 0 and 1 check(0.0 <= fade <= 1.0, 'fade must be between 0 and 1; value is %f' % fade) # check that flip is a boolean if flip is not None: check(isinstance(flip, (bool)), 'type(flip) = %s; must be of type bool ' % type(flip)) # iterate over all positions and characters for p in self.ps: for c in self.cs: # check if matrix value is < 0 v = self.df.loc[p, c] if v < 0: # get glyph g = self.glyph_df.loc[p, c] # modify color if color is None: this_color = get_rgb(g.color) else: this_color = color # modify alpha if alpha is None: this_alpha = g.alpha else: this_alpha = alpha # set glyph attributes g.set_attributes(color=this_color * (1.0 - shade), alpha=this_alpha * (1.0 - fade), flip=flip, **kwargs)
def alignment_to_matrix(sequences, counts=None, to_type='counts', background=None, characters_to_ignore='.-', center_weights=False, pseudocount=1.0): """ Generates matrix from a sequence alignment parameters ---------- sequences: (list of strings) A list of sequences, all of which must be the same length counts: (None or list of numbers) If not None, must be a list of numbers the same length os sequences, containing the (nonnegative) number of times that each sequence was observed. If None, defaults to 1. to_type: (str) The type of matrix to output. Must be 'counts', 'probability', 'weight', or 'information' background: (array, or df) Specification of background probabilities. If array, should be the same length as df.columns and correspond to the probability of each column's character. If df, should be a probability matrix the same shape as df. characters_to_ignore: (str) Characters to ignore within sequences. This is often needed when creating matrices from gapped alignments. center_weights: (bool) Whether to subtract the mean of each row, but only if to_type=='weight'. pseudocount: (number >= 0.0) Pseudocount to use when converting from counts to probabilities. returns ------- out_df: (dataframe) A matrix of the requested type. """ # validate inputs # Make sure sequences is list-like check(isinstance(sequences, (list, tuple, np.ndarray, pd.Series)), 'sequences must be a list, tuple, np.ndarray, or pd.Series.') sequences = list(sequences) # Make sure sequences has at least 1 element check(len(sequences) > 0, 'sequences must have length > 0.') # Make sure all elements are sequences check(all(isinstance(seq, str) for seq in sequences), 'sequences must all be of type string') # validate characters_to_ignore check(isinstance(characters_to_ignore, str), 'type(seq) = %s must be of type str' % type(characters_to_ignore)) # validate center_weights check(isinstance(center_weights, bool), 'type(center_weights) = %s; must be bool.' % type(center_weights)) # Get sequence length L = len(sequences[0]) # Make sure all sequences are the same length check(all([len(s) == L for s in sequences]), 'all elements of sequences must have the same length.') # validate counts as list-like check( isinstance(counts, (list, tuple, np.ndarray, pd.Series)) or (counts is None), 'counts must be None or a list, tuple, np.ndarray, or pd.Series.') # make sure counts has the same length as sequences if counts is None: counts = np.ones(len(sequences)) else: check( len(counts) == len(sequences), 'counts must be the same length as sequences;' 'len(counts) = %d; len(sequences) = %d' % (len(counts), len(sequences))) # validate background check( isinstance(background, (type([]), np.ndarray, pd.DataFrame)) or (background is None), 'type(background) = %s must be None or array-like or a dataframe.' % type(background)) # Define valid types valid_types = MATRIX_TYPES.copy() # Check that to_type is valid check(to_type in valid_types, 'to_type=%s; must be in %s' % (to_type, valid_types)) # Create a 2D array of characters char_array = np.array([np.array(list(seq)) for seq in sequences]) # Get list of unique characters unique_characters = np.unique(char_array.ravel()) unique_characters.sort() # Remove characters to ignore columns = [c for c in unique_characters if not c in characters_to_ignore] index = list(range(L)) counts_df = pd.DataFrame(data=0, columns=columns, index=index) # Sum of the number of occurrences of each character at each position for c in columns: tmp_mat = (char_array == c).astype(float) * counts[:, np.newaxis] counts_df.loc[:, c] = tmp_mat.sum(axis=0).T # Convert counts matrix to matrix of requested type out_df = transform_matrix(counts_df, from_type='counts', to_type=to_type, pseudocount=pseudocount, background=background) # Center values only if center_weights is True and to_type is 'weight' if center_weights and to_type == 'weight': out_df = transform_matrix(out_df, center_values=True) return out_df
def sequence_to_matrix(seq, cols=None, alphabet=None, is_iupac=False, to_type='probability', center_weights=False): """ Generates a matrix from a sequence. With default keyword arguments, this is a one-hot-encoded version of the sequence provided. Alternatively, is_iupac=True allows users to get matrix models based in IUPAC motifs. parameters ---------- seq: (str) Sequence from which to construct matrix. cols: (str or array-like or None) The characters to use for the matrix columns. If None, cols is constructed from the unqiue characters in seq. Overriden by alphabet and is_iupac. alphabet: (str or None) The alphabet used to determine the columns of the matrix. Options are: 'dna', 'rna', 'protein'. Ignored if None. Overrides cols. is_iupac: (bool) If True, it is assumed that the sequence represents an IUPAC DNA string. In this case, cols is overridden, and alphabet must be None. to_type: (str) The type of matrix to output. Must be 'probability', 'weight', or 'information' center_weights: (bool) Whether to subtract the mean of each row, but only if to_type='weight'. returns ------- seq_df: (dataframe) the matrix returned to the user. """ # Define valid types valid_types = MATRIX_TYPES.copy() valid_types.remove('counts') # validate seq check(isinstance(seq, str), 'type(seq) = %s must be of type str' % type(seq)) # validate center_weights check(isinstance(center_weights, bool), 'type(center_weights) = %s; must be bool.' % type(center_weights)) # If cols is None, set to list of unique characters in sequence if cols is None: cols = list(set(seq)) cols.sort() # Otherwise, validate cols else: cols_types = (str, list, set, np.ndarray) check(isinstance(cols, cols_types), 'cols = %s must be None or a string, set, list, or np.ndarray') # If alphabet is specified, override cols if alphabet is not None: # Validate alphabet valid_alphabets = list(ALPHABET_DICT.keys()) check(alphabet in valid_alphabets, 'alphabet = %s; must be in %s.' % (alphabet, valid_alphabets)) # Set cols cols = list(ALPHABET_DICT[alphabet]) # validate to_type check(to_type in valid_types, 'invalid to_type=%s; to_type must be in %s' % (to_type, valid_types)) # validate is_iupac check(isinstance(is_iupac, bool), 'type(is_iupac) = %s; must be bool.' % type(is_iupac)) # If is_iupac, override alphabet and cols if is_iupac: # Check that alphabet has not been specified check(alphabet is None, 'must have alphabet=None if is_iupac=True') cols = list(ALPHABET_DICT['dna']) # Initialize counts dataframe L = len(seq) index = list(range(L)) counts_df = pd.DataFrame(data=0.0, columns=cols, index=index) # If is_iupac, fill counts_df: if is_iupac: # Get list of valid IUPAC characters iupac_characters = list(IUPAC_DICT.keys()) # Iterate over sequence positions for i, c in enumerate(seq): # Check that c is in the set of valid IUPAC characters check( c in iupac_characters, 'character %s at position %d is not a valid IUPAC character;' 'must be one of %s' % (c, i, iupac_characters)) # Fill in a count for each possible base bs = IUPAC_DICT[c] for b in bs: counts_df.loc[i, b] = 1.0 # Otherwise, fill counts the normal way else: # Iterate over sequence positions for i, c in enumerate(seq): # Check that c is in columns check( c in cols, 'character %s at position %d is not in cols=%s' % (c, i, cols)) # Increment counts_df counts_df.loc[i, c] = 1.0 # Convert to requested type out_df = transform_matrix(counts_df, pseudocount=0, from_type='counts', to_type=to_type) # Center values only if center_weights is True and to_type is 'weight' if center_weights and to_type == 'weight': out_df = transform_matrix(out_df, center_values=True) return out_df
def saliency_to_matrix(seq, values, cols=None, alphabet=None): """ Takes a sequence string and an array of values values and outputs a values dataframe. The returned dataframe is a L by C matrix where C is the number ofcharacters and L is sequence length. If matrix is denoted as S, i indexes positions and c indexes characters, then S_ic will be non-zero (equal to the value in the values array at position p) only if character c occurs at position p in sequence. All other elements of S are zero. example usage: saliency_mat = logomaker.saliency_to_matrix(sequence,values) logomaker.Logo(saliency_mat) parameters ---------- seq: (str or array-like list of single characters) sequence for which values matrix is constructed values: (array-like list of numbers) array of values values for each character in sequence cols: (str or array-like or None) The characters to use for the matrix columns. If None, cols is constructed from the unqiue characters in seq. Overridden by alphabet and is_iupac. alphabet: (str or None) The alphabet used to determine the columns of the matrix. Options are: 'dna', 'rna', 'protein'. Ignored if None. Overrides cols. returns ------- saliency_df: (dataframe) values matrix in the form of a dataframe """ # try to convert seq to str; throw exception if fail if isinstance(seq, (list, np.ndarray, pd.Series)): try: seq = ''.join([str(x) for x in seq]) except: check(False, 'could not convert %s to type str' % repr(str)) else: try: seq = str(seq) except: check(False, 'could not convert %s to type str' % repr(str)) # validate seq check(isinstance(seq, str), 'type(seq) = %s must be of type str' % type(seq)) # validate values: check that it is a list or array check(isinstance(values, (type([]), np.ndarray, pd.Series)), 'type(values) = %s must be of type list' % type(values)) # cast values as a list just to be sure what we're working with values = list(values) # check length of seq and values are equal check( len(seq) == len(values), 'length of seq and values list must be equal.') # If cols is None, set to list of unique characters in sequence if cols is None: cols = list(set(seq)) cols.sort() # Otherwise, validate cols else: cols_types = (str, list, set, np.ndarray) check(isinstance(cols, cols_types), 'cols = %s must be None or a string, set, list, or np.ndarray') # perform additional checks to validate cols check( len(set(cols)) == len(set(seq)), 'length of set of unique characters must be equal for "cols " and "seq"' ) check( set(cols) == set(seq), 'unique characters for "cols" and "seq" must be equal.') # If alphabet is specified, override cols if alphabet is not None: # Validate alphabet valid_alphabets = list(ALPHABET_DICT.keys()) check(alphabet in valid_alphabets, 'alphabet = %s; must be in %s.' % (alphabet, valid_alphabets)) # Set cols cols = list(ALPHABET_DICT[alphabet]) # turn seq into binary one-hot encoded matrix. ohe_sequence = sequence_to_matrix(seq, cols=cols) # multiply values list with one-hot encoded seq to get # values matrix or dataframe saliency_df = ohe_sequence.copy() saliency_df.loc[:, :] = ohe_sequence.values * \ np.array(values)[:, np.newaxis] return saliency_df
def validate_matrix(df, matrix_type=None, allow_nan=False): """ Checks to make sure that the input dataframe, df, represents a valid matrix, i.e., an object that can be displayed as a logo. parameters ---------- df: (dataframe) A pandas dataframe where each row represents an (integer) position and each column represents to a (single) character. matrix_type: (None or str) If 'probability', validates df as a probability matrix, i.e., all elements are in [0,1] and rows are normalized). If 'information', validates df as an information matrix, i.e., all elements >= 0. allow_nan: (bool) Whether to allow NaN entries in the matrix. returns ------- out_df: (dataframe) A cleaned-up version of df (if possible). """ # check that df is a dataframe check( isinstance(df, pd.DataFrame), 'out_df needs to be a valid pandas out_df, ' 'out_df entered: %s' % type(df)) # create copy of df so we don't overwrite the user's data out_df = df.copy() # check that type is valid check( matrix_type in {None, 'probability', 'information'}, 'matrix_type = %s; must be None, "probability", or "information"' % matrix_type) # check that allow_nan is boolean check(isinstance(allow_nan, bool), 'allow_nan must be of type bool; is type %s.' % type(allow_nan)) if not allow_nan: # make sure all entries are finite numbers check( np.isfinite(out_df.values).all(), 'some matrix elements are not finite. ' 'Set allow_nan=True to allow this.') # make sure the matrix has a finite number of rows and columns check(out_df.shape[0] >= 1, 'df has zero rows. Needs multiple rows.') check(out_df.shape[1] >= 1, 'df has zero columns. Needs multiple columns.') # check that all column names are strings and have length 1 for i, col in enumerate(out_df.columns): # convert from unicode to string for python 2 col = str(col) check(isinstance(col, str), 'column number %d is of type %s; must be a str' % (i, col)) check( len(col) == 1, 'column %d is %s and has length %d; ' % (i, repr(col), len(col)) + 'must have length 1.') # sort columns alphabetically char_cols = list(out_df.columns) char_cols.sort() out_df = out_df[char_cols] # name out_df.index as 'pos' out_df.index.name = 'pos' # try casting df.index as type int try: int_index = out_df.index.astype(int) except TypeError: check( False, 'could not convert df.index to type int. Check that ' 'all positions have integer numerical values.') # make sure that df.index values have not changed check( all(int_index == out_df.index), 'could not convert df.index values to int without changing' 'some values. Make sure that df.index values are integers.') # check that all index values are unique check( len(set(out_df.index)) == len(out_df.index), 'not all values of df.index are unique. Make sure all are unique.') # if type is 'information', make sure elements are nonnegative if matrix_type is 'information': # make sure all elements are nonnegative check(all(df.values.ravel() >= 0), 'not all values in df are >=0.') # if type is 'probability', make sure elements are valid probabilities elif matrix_type is 'probability': # make sure all values are non-negative check(all(df.values.ravel() >= 0), 'not all values in df are >=0.') # check to see if values sum to one sums = df.sum(axis=1).values # if any sums are close to zero, abort check(not any(np.isclose(sums, 0.0)), 'some columns in df sum to nearly zero.') # if any sums are not close to one, renormalize all sums if not all(np.isclose(sums, 1.0)): print('in validate_matrix(): Row sums in df are not close to 1. ' 'Reormalizing rows...') df.loc[:, :] = df.values / sums[:, np.newaxis] out_df = df.copy() # nothing more to check if type is None elif matrix_type is None: pass # return cleaned-up out_df return out_df
def _input_checks(self): """ check input parameters in the Logo constructor for correctness """ from numbers import Number # validate p check(isinstance(int(self.p), (float, int)), 'type(p) = %s must be a number' % type(self.p)) # check c is of type str check(isinstance(self.c, str), 'type(c) = %s; must be of type str ' % type(self.c)) # validate floor check(isinstance(self.floor, (float, int)), 'type(floor) = %s must be a number' % type(self.floor)) self.floor = float(self.floor) # validate ceiling check(isinstance(self.ceiling, (float, int)), 'type(ceiling) = %s must be a number' % type(self.ceiling)) self.ceiling = float(self.ceiling) # check floor <= ceiling check( self.floor <= self.ceiling, 'must have floor <= ceiling. Currently, ' 'floor=%f, ceiling=%f' % (self.floor, self.ceiling)) # check ax check((self.ax is None) or isinstance(self.ax, Axes), 'ax must be either a matplotlib Axes object or None.') # validate width check( isinstance(self.width, (float, int)), 'type(width) = %s; must be of type float or int ' % type(self.width)) check(self.width > 0, "width = %d must be > 0 " % self.width) # validate vpad check( isinstance(self.vpad, (float, int)), 'type(vpad) = %s; must be of type float or int ' % type(self.vpad)) check(0 <= self.vpad < 1, "vpad = %d must be >= 0 and < 1 " % self.vpad) # validate font_name check( isinstance(self.font_name, str), 'type(font_name) = %s must be of type str' % type(self.font_name)) # check font_weight check( isinstance(self.font_weight, (str, int)), 'type(font_weight) = %s should either be a string or an int' % (type(self.font_weight))) if isinstance(self.font_weight, str): check(self.font_weight in VALID_FONT_WEIGHT_STRINGS, 'font_weight must be one of %s' % VALID_FONT_WEIGHT_STRINGS) elif isinstance(self.font_weight, int): check(0 <= self.font_weight <= 1000, 'font_weight must be in range [0,1000]') # check color safely self.color = get_rgb(self.color) # validate edgecolor safely self.edgecolor = get_rgb(self.edgecolor) # Check that edgewidth is a number check(isinstance(self.edgewidth, (float, int)), 'type(edgewidth) = %s must be a number' % type(self.edgewidth)) self.edgewidth = float(self.edgewidth) # Check that edgewidth is nonnegative check(self.edgewidth >= 0, ' edgewidth must be >= 0; is %f' % self.edgewidth) # check dont_stretch_more_than is of type str check( isinstance(self.dont_stretch_more_than, str), 'type(dont_stretch_more_than) = %s; must be of type str ' % type(self.dont_stretch_more_than)) # check that dont_stretch_more_than is a single character check( len(self.dont_stretch_more_than) == 1, 'dont_stretch_more_than must have length 1; ' 'currently len(dont_stretch_more_than)=%d' % len(self.dont_stretch_more_than)) # check that flip is a boolean check(isinstance(self.flip, (bool, np.bool_)), 'type(flip) = %s; must be of type bool ' % type(self.flip)) self.flip = bool(self.flip) # check that mirror is a boolean check(isinstance(self.mirror, (bool, np.bool_)), 'type(mirror) = %s; must be of type bool ' % type(self.mirror)) self.mirror = bool(self.mirror) # validate zorder if self.zorder is not None: check( isinstance(self.zorder, (float, int)), 'type(zorder) = %s; must be of type float or int ' % type(self.zorder)) # Check alpha is a number check(isinstance(self.alpha, (float, int)), 'type(alpha) = %s must be a float or int' % type(self.alpha)) self.alpha = float(self.alpha) # Check 0 <= alpha <= 1.0 check(0 <= self.alpha <= 1.0, 'alpha must be between 0.0 and 1.0 (inclusive)') # validate that figsize is array=like check( isinstance(self.figsize, (tuple, list, np.ndarray)), 'type(figsize) = %s; figsize must be array-like.' % type(self.figsize)) self.figsize = tuple(self.figsize) # Just to pin down variable type. # validate length of figsize check(len(self.figsize) == 2, 'figsize must have length two.') # validate that each element of figsize is a number check( all([isinstance(n, (int, float)) and n > 0 for n in self.figsize]), 'all elements of figsize array must be numbers > 0.')
def get_color_dict(color_scheme, chars): """ Return a color_dict constructed from a user-specified color_scheme and a list of characters """ # Check that chars is a list check(isinstance(chars, (str, list, tuple, np.ndarray)), "chars must be a str or be array-like") # Check that chars has length of at least 1 check(len(chars) >= 1, 'chars must have length >= 1') # Sort characters chars = list(chars) chars.sort() # Check that all entries in chars are strings of length 1 for i, c in enumerate(chars): c = str(c) # convert from unicode to string to work with python 2 check( isinstance(c, str) and len(c) == 1, 'entry number %d in chars is %s; ' % (i, repr(c)) + 'must instead be a single character') # if color_scheme is None, choose default based on chars if color_scheme is None: key = tuple(chars) color_scheme = CHARS_TO_COLORS_DICT.get(key, 'gray') color_dict = get_color_dict(color_scheme, chars) # otherwise, if color_scheme is a dictionary elif isinstance(color_scheme, dict): # make sure all the keys are strings for key in color_scheme.keys(): check( isinstance(key, str), 'color_scheme dict contains a key (%s) ' % repr(key) + 'that is not of type str.') # expand the dictionary color_dict = _expand_color_dict(color_scheme) # set all values to rgb for key in color_dict.keys(): color_dict[key] = to_rgb(color_dict[key]) # otherwise, if color_scheme is a string, it must either be a valid key in # COLOR_SCHEME_DICT or a named matplotlib color elif isinstance(color_scheme, str): # If a valid key, get the color scheme dict and expand if color_scheme in COLOR_SCHEME_DICT.keys(): tmp_dict = COLOR_SCHEME_DICT[color_scheme] color_dict = _expand_color_dict(tmp_dict) # Force each color to rgb for c in color_dict.keys(): color = color_dict[c] rgb = to_rgb(color) color_dict[c] = np.array(rgb) # Otherwise, try to convert color_scheme to RGB value, then create # color_dict using keys from chars and setting all values to RGB value. else: try: rgb = to_rgb(color_scheme) color_dict = dict([(c, rgb) for c in chars]) # This will trigger if to_rgb does not recognize color_scheme. # In this case, raise an error to user. except: check(False, 'invalid choice: color_scheme=%s' % color_scheme) # Otherwise, if color_scheme is array-like, it should be an RGB value elif isinstance(color_scheme, (list, tuple, np.ndarray)): # color_scheme must have length 3 to be RGB check( len(color_scheme) == 3, 'color_scheme, if array, must be of length 3.') # Cast color_scheme as RGB rgb = np.ndarray(color_scheme) # Construct color_dict with rgb as value for every character in chars color_dict = dict([(c, rgb) for c in chars]) # Otherwise, raise error else: check(False, 'Error: color_scheme has invalid type %s' % type(color_scheme)) # If all the characters in chars are not also within the keys of color_dict, # add them with color 'black' if not set(chars) <= set(color_dict.keys()): for c in chars: if not c in color_dict: warnings.warn( " Warning: Character '%s' is not in color_dict. " % c + "Using black.") color_dict[c] = to_rgb('black') return color_dict
def _input_checks(self): """ Validate parameters passed to the Logo constructor EXCEPT for color_scheme; that is validated in the Logo constructor """ # validate dataframe self.df = validate_matrix(self.df) # CANNOT validate color_scheme here; this is done in Logo constructor. # validate that font_name is a str check( isinstance(self.font_name, str), 'type(font_name) = %s must be of type str' % type(self.font_name)) # validate stack_order valid_stack_orders = {'big_on_top', 'small_on_top', 'fixed'} check( self.stack_order in valid_stack_orders, 'stack_order = %s; must be in %s.' % (self.stack_order, valid_stack_orders)) # check that center_values is a boolean check( isinstance(self.center_values, bool), 'type(center_values) = %s; must be of type bool.' % type(self.center_values)) # check baseline_width is a number check( isinstance(self.baseline_width, (int, float)), 'type(baseline_width) = %s must be of type number' % (type(self.baseline_width))) # check baseline_width >= 0.0 check(self.baseline_width >= 0.0, 'baseline_width = %s must be >= 0.0' % self.baseline_width) # check that flip_below is boolean check( isinstance(self.flip_below, bool), 'type(flip_below) = %s; must be of type bool ' % type(self.flip_below)) # validate that shade_below is a number check( isinstance(self.shade_below, (float, int)), 'type(shade_below) = %s must be of type float' % type(self.shade_below)) # validate that shade_below is between 0 and 1 check(0.0 <= self.shade_below <= 1.0, 'shade_below must be between 0 and 1') # validate that fade_below is a number check( isinstance(self.fade_below, (float, int)), 'type(fade_below) = %s must be of type float' % type(self.fade_below)) # validate that fade_below is between 0 and 1 check(0.0 <= self.fade_below <= 1.0, 'fade_below must be between 0 and 1') # validate that fade_probabilities is boolean check( isinstance(self.fade_probabilities, bool), 'type(fade_probabilities) = %s; must be of type bool ' % type(self.fade_probabilities)) # validate that vpad is a number check(isinstance(self.vpad, (float, int)), 'type(vpad) = %s must be of type float' % type(self.vpad)) # validate that vpad is between 0 and 1 check(0.0 <= self.vpad <= 1.0, 'vpad must be between 0 and 1') # validate that vsep is a number check( isinstance(self.vsep, (float, int)), 'type(vsep) = %s; must be of type float or int ' % type(self.vsep)) # validate that vsep is >= 0 check(self.vsep >= 0, "vsep = %d must be greater than 0 " % self.vsep) # validate that alpha is a number check(isinstance(self.alpha, (float, int)), 'type(alpha) = %s must be of type float' % type(self.alpha)) # validate that alpha is between 0 and 1 check(0.0 <= self.alpha <= 1.0, 'alpha must be between 0 and 1') # validate show_spines is None or boolean check( isinstance(self.show_spines, bool) or (self.show_spines is None), 'show_spines = %s; show_spines must be None or boolean.' % repr(self.show_spines)) # validate ax check( isinstance(self.ax, Axes) or (self.ax is None), 'ax = %s; ax must be None or a matplotlib.Axes object.' % repr(self.ax)) # validate zorder check( isinstance(self.zorder, (float, int)), 'type(zorder) = %s; zorder must be a number.' % type(self.zorder)) # validate that figsize is array=like check( isinstance(self.figsize, (tuple, list, np.ndarray)), 'type(figsize) = %s; figsize must be array-like.' % type(self.figsize)) self.figsize = tuple(self.figsize) # Just to pin down variable type. # validate length of figsize check(len(self.figsize) == 2, 'figsize must have length two.') # validate that each element of figsize is a number check( all([isinstance(n, (int, float)) and n > 0 for n in self.figsize]), 'all elements of figsize array must be numbers > 0.')