def find(self, direction='+', frame=0, minlen=0, includeseq=False): """----------------------------------------------------------------------------------------- find the open reading frames in a specific frame and direction. For the reverse complement, the coordinates are in terms of the reversed sequence :param direction: string, '+' or '-' :param frame: int, 0 - 2 :param minlen: int, only save if the orf is longer than minlen :param includeseq: boolean, if true, include the sequence in the identified reading frames :return: int, number of rfs added to self.list -----------------------------------------------------------------------------------------""" seq = self.sequence if direction == '-': seq = Fasta.reverseComplement(self.sequence) nrf = 0 pos = frame begin = pos while pos < len(seq) - 2: codon = seq[pos:pos + 3] if codon in Orf.stop: # end of an ORF if pos - begin > 3: nrf += 1 if pos - begin >= minlen: self.rflist.append({'direction': direction, 'frame': frame, 'begin': begin, 'end': pos}) if includeseq: newrf = self.rflist[-1] newrf['seq'] = seq[newrf['begin']:newrf['end']] begin = pos + 3 pos += 3 if pos - begin > 2: nrf += 1 if pos - begin >= minlen: self.rflist.append({'direction': direction, 'frame': frame, 'begin': begin, 'end': pos}) if includeseq: newrf = self.rflist[-1] newrf['seq'] = seq[newrf['begin']:newrf['end']] return nrf
def findall(self, minlen=0, includeseq=False): """----------------------------------------------------------------------------------------- Find orfs in all six reading frames :param minlen: int, only save if the orf is longer than minlen :param includeseq: boolean, if true, include the sequence in the identified reading frames :return: int, number of orfs found -----------------------------------------------------------------------------------------""" nrf = 0 for direction in ('+', '-'): s = self.sequence if direction == '-': s = Fasta.reverseComplement(self.sequence) for frame in range(3): nrf += self.find(direction=direction, frame=frame, minlen=minlen, includeseq=includeseq) return nrf
class Diagonal(Score, Fasta): """============================================================================================= =============================================================================================""" def __init__(self): """----------------------------------------------------------------------------------------- Diagonal class constructor. Subclass of Score Delegates to Fasta via self.s1 and self.s2 Delegates to pyplot via self.fig diagonal: one diagonal of scores yinc: direction of y axis, 1 or -1 means forward or reverse respectively window: length of window for calculation threshold: minimum value for window to be plotted -----------------------------------------------------------------------------------------""" Score.__init__(self) self.diagonal = [] self.single = False self.yinc = 1 self.threshold = 0 self.window = 0 self.nscore = 0 self.nrun = 0 self.frame = {} # data frames self.function = {} # functions for populating data frames # Plotting variables # sizes of panels are defined in setupBokeh() self.title = '' self.figure = {} self.grid = None self.palette = None self.cmap = None self.alpha = 0.5 self.mindotsize = 2 self.maxdotsize = 10 # sequences, s1 is horizontal, s2 is vertical self.s1 = Fasta() self.s2 = Fasta() self.i1 = None # integer array representation of sequences self.i2 = None self.l1 = 0 self.l2 = 0 self.seqreverse = False # only applies to s2 def setupCalculation(self, seq1, seq2, window=5, threshold=3, resetstat=True): """----------------------------------------------------------------------------------------- Load the sequences and do some basic setup for score calculations. Sequences are passed as Fasta object to make it easier to use multi fasta files. :param seq1: Fasta object :param seq2: Fasta object :param window: int, length of window for calculation :param threshold: float, minimum score in window to plot :param resetstat: boolean, if False, reset score and run counts to zero :return: True -----------------------------------------------------------------------------------------""" # sequence setup self.s1 = seq1 self.s2 = seq2 self.l1 = len(seq1.seq) self.l2 = len(seq2.seq) # move shorter sequence to s2 if necessary if self.l1 < self.l2: # shorter sequence is always s2 self.s1, self.s2 = self.s2, self.s1 self.l1, self.l2 = self.l2, self.l1 # reverse sequence 2 if necessary yinc = 1 if self.seqreverse: self.s2.seq = self.s2.reverseComplement() self.yinc = -1 # setup integer array version of sequence self.seqToInt() self.diagonal = [0 for _ in range(min(self.l1, self.l2))] self.window = window self.threshold = threshold # stat() histograms. nrun is always positive if resetstat: self.nscore = 0 self.nrun = 0 for frame in self.frame: self.resetFrame(frame) return True def setupBokeh(self, cbase=None, clevels=None, creverse=None): """----------------------------------------------------------------------------------------- SEt up four plot in 2 x 2 grid, but with differing sizes mainplot is the dotplot itself, upper right legend shows the colorbar legend scoreplot shows the window score distribution runploot shows the log of the run length distribution :param cbase: string, e.g. Greys, Blues, Reds, Viridis, etc :param clevels: int, usually 0-9 or 256 :param creverse: boolean, if True highest color is dark :return: True -----------------------------------------------------------------------------------------""" # turn off MISSING_RENDERERS warning caused by plotting colorbars in empty plot silence(MISSING_RENDERERS, True) self.palette = self.setupPalette(cbase=cbase, clevels=clevels, creverse=creverse) if self.title: titlestr = self.title else: now = date.today() titlestr = 'Dotplot of {} and {} - {}'.format( self.s1.id, self.s2.id, now) xlabel = '\n'.join([self.s1.id, self.s1.doc]) ylabel = '\n'.join([self.s2.doc, self.s2.id]) # account for sequence length difference, ylen scaling affects main and legend panels xlen = 800 ylen = xlen * self.l2 / self.l1 # define each panel as a figure label = '({}, {}, score)'.format(self.s1.id, self.s2.id) TIPS = [(label, '($x{0}, $y{0}, @score)')] self.figure['main'] = figure(title=titlestr, x_axis_label=xlabel, y_axis_label=ylabel, height=int(ylen), width=int(xlen), align='center', tooltips=TIPS) self.figure['legend'] = figure(height=int(ylen), width=200) TIPS = [('score, number', '$x{0}, $y{0.00}')] self.figure['scoredist'] = figure(height=300, width=500, tooltips=TIPS) TIPS = [('length,count', '$x{0}, $y{0}')] self.figure['rundist'] = figure(height=300, width=500, y_axis_type='log', tooltips=TIPS) # grid layout self.grid = layout([[self.figure['main'], self.figure['legend']], [self.figure['scoredist'], self.figure['rundist']]]) return True def setupFrame(self, defs): """----------------------------------------------------------------------------------------- Setup data frames for the defined analyses with empty ndata fields. Each def in defs defines name - name of data frame function - a callback function used to construct the data from a diagonal of scores variables - variables that will be populated As used here, a dataframes are stored in the object as self.frame[name] self.frame[name] = {function, var1: [], var2: [], var3: [], ...} :param defs: list, see above :return: int, number of frames define -----------------------------------------------------------------------------------------""" n = 0 for defin in defs: n += 1 self.frame[defin['data']] = {} self.function[defin['data']] = defin['fn'] for v in defin['var']: self.frame[defin['data']][v] = [] return n def resetFrame(self, framename): """----------------------------------------------------------------------------------------- Reset the data in one frame to empty lists. Needed for reverse plots :param framename: :return: True -----------------------------------------------------------------------------------------""" frame = self.frame[framename] for var in frame: frame[var] = [] return True def setupPalette(self, cbase, clevels, creverse): """----------------------------------------------------------------------------------------- Colormaps are used in multiple methods so this utility provides a unified safe method for setup. Bokeh handles colormaps a little differently than other plotting programs :param cbase: string, e.g. Greys, Blues, Reds, Viridis, etc :param clevels: int, usually 0-9 or 256 :param creverse: boolean, if True highest color is dark :return: -----------------------------------------------------------------------------------------""" from bokeh.palettes import all_palettes # the defaults are here instead of in definition so that they never change default_base = 'Greys' default_levels = 256 default_reverse = True try: palette = all_palettes[cbase][clevels] except (KeyError, IndexError) as error: # if lookup fails, use default palette = all_palettes[default_base][default_levels] creverse = default_reverse sys.stderr.write( 'Diagonal::setupPalettes - {}, color {} levels {} is undefined.\n' .format(error, cbase, clevels)) sys.stderr.write('\tUsing default {}{}\n'.format( default_base, default_levels)) if creverse: # reverse the orde of colors palette = palette[::-1] return palette def seqToInt(self): """----------------------------------------------------------------------------------------- Convert sequence strings to an integer arrays and stores in object. An integer array is more convenient for direct lookups in the scoring table than a string :return: int, int length of sequence lists -----------------------------------------------------------------------------------------""" a2i = self.a2i self.i1 = [a2i[c] for c in self.s1.seq] self.i2 = [a2i[c] for c in self.s2.seq] return len(self.i1), len(self.i2) def rle2coord(self): """----------------------------------------------------------------------------------------- Return a list of beginning and ending positions of each run. List is a list of four coordinates for each run [s1begin, s1end, s2begin, s2end] :return: 4 x int, beg1, end1, beg2, end2 -----------------------------------------------------------------------------------------""" coord = [] l2 = self.l2 for diag in range(len(self.diagonal)): for offset, length in self.diagonal[diag]: end1 = max(diag - l2 + 1, 0) + offset end2 = max(l2 - diag - 1, 0) + offset beg1 = end1 - length + 1 beg2 = end2 - length + 1 coord.append([beg1, end1, beg2, end2]) return coord def diagLenBegin(self, diag): """----------------------------------------------------------------------------------------- Calculates the length of diagonal diag and the beginning position of the diagonal in each sequence :param diag: int, diagonal number :return: int (diagonal length), int (seq1 begin), int (seq2 begin) -----------------------------------------------------------------------------------------""" pos1 = max(diag - self.l2 + 1, 0) pos2 = max(self.l2 - diag - 1, 0) diaglen = min(self.l1 - pos1, self.l2 - pos2) # if self.seqreverse: # pos2 = self.l2 - pos2 return diaglen, pos1, pos2 def diagonalScore(self, d): """----------------------------------------------------------------------------------------- Calculate the moving window sum of comparison score along one diagonal and store in the object. :param d: int, diagonal number :return: list, scores along diagonal -----------------------------------------------------------------------------------------""" diaglen, pos1, pos2 = self.diagLenBegin(d) i1 = self.i1 i2 = self.i2 window = self.window cmp = self.table diagonal = self.diagonal old1 = pos1 old2 = pos2 if diaglen < window: # skip diagonals shorter than window length return [] diagonal[:] = map(lambda i: 0, diagonal) # lambda much faster to set all values # to zero score = 0 # first window for offset in range(window): score += cmp[i1[pos1]][i2[pos2]] pos1 += 1 pos2 += 1 dpos = 0 diagonal[dpos] = score # rest of diagonal for offset in range(window, diaglen): # sys.stderr.write('{}\t{}\n'.format(pos1,pos2)) score -= cmp[i1[old1]][i2[old2]] score += cmp[i1[pos1]][i2[pos2]] dpos += 1 diagonal[dpos] = score old1 += 1 old2 += 1 pos1 += 1 pos2 += 1 return diagonal def random(self, n=10000): """----------------------------------------------------------------------------------------- Calculate random score distribution using current scoring table, window, and threshold. Use stat() to get distributions and run lengths. Use n = number of windows calculated for actual sequences. :param n: int, number of windows to calculate :return: list of n scores -----------------------------------------------------------------------------------------""" window = self.window cmp = self.table i1 = self.i1 i2 = self.i2 if n == 0: n = self.l1 * self.l2 self.diagonal = [0 for _ in range(n - window)] dist = self.diagonal win = [0 for _ in range(window)] wsum = 0 for i in range(window): a = choice(i1) b = choice(i2) score = cmp[a][b] win[i] = score wsum += score newpos = 0 pos = 0 for i in range(n - window): dist[pos] = wsum wsum -= win[newpos] a = choice(i1) b = choice(i2) score = cmp[a][b] wsum += score win[newpos] = score newpos = (newpos + 1) % window pos += 1 return dist def allDiagonals(self, select): """----------------------------------------------------------------------------------------- Iterate over all diagonals and apply specified actions to each diagonal. Each action is a tuple that specifies the name of the resulting data frame, and a function to process the diagonal. The frames are usable as Bokeh sources for plotting. :param select: list, names of dataframes to calculate from each diagonal :return: True -----------------------------------------------------------------------------------------""" frame = self.frame function = self.function for d in range(self.l1 + self.l2 - 1): dscore = self.diagonalScore(d) if not dscore: continue for data in select: # apply each selected function to this diagonal of scores to populate the # dataframes fxn = function[data] fxn(data, d) return True def windowThreshold(self, framename, d): """----------------------------------------------------------------------------------------- Callback function for allDiagonals. Savs windows with score >= threshold in dataframe framename. Works on the internally stored diagonal of scores calculated by diagonalScore() :param framename: string, name of a dataframe in self.frame :param d: int, diagonal number :return: True -----------------------------------------------------------------------------------------""" frame = self.frame[framename] dscore = self.diagonal window = self.window halfwindow = (window - 1) / 2.0 threshold = self.threshold yinc = self.yinc diaglen, xpos, ypos = self.diagLenBegin(d) if diaglen < window: return False xpos += halfwindow if self.yinc < 0: ypos = self.l2 - ypos - halfwindow - 1 else: ypos += halfwindow for pos in range(diaglen - window + 1): if dscore[pos] >= threshold: frame['x'].append(xpos) frame['y'].append(ypos) frame['score'].append(dscore[pos]) xpos += 1 ypos += yinc self.nscore += 1 return True def scaleColumn(self, framename, column_source, column_dest, value, scale): """----------------------------------------------------------------------------------------- Performs a simple linear scaling on a column :param framename: string, a data frame in self.frame :param column_source: string, the column in frame to be scaled :param column_dest: string, name for the scaled column (in frame) :param value: tuple, low and high value for the input data :param scale: tuple, low and high value for the scaled data :return: -----------------------------------------------------------------------------------------""" frame = self.frame[framename] values = frame[column_source] frame[column_dest] = [] # width = frame[column_dest] rangeval = value[1] - value[0] rangesize = scale[1] - scale[0] m = rangesize / rangeval for v in values: size = scale[0] + (v - value[0]) * m frame[column_dest].append(size) return def histogramScore(self, scoreframe, d): """----------------------------------------------------------------------------------------- Callback function for allDiagonals. Creates data frames with the score distribution. Works on the internally stored diagonal of scores calculated by diagonalScore() :param scoreframe: string, name of dataframe in self.frame :param d: int, diagonal number :return: int, number of values in columns of dataframe -----------------------------------------------------------------------------------------""" scoreframe = self.frame[scoreframe] diagonal = self.diagonal window = self.window if self.single: diaglen = len(diagonal) else: diaglen, xpos, ypos = self.diagLenBegin(d) diaglen -= window - 1 nscore = 0 score = {} for s in diagonal[:diaglen]: try: score[s] += 1 except KeyError: score[s] = 1 nscore += 1 # insert into data frame, the dateframe is randomly ordered for s in score: try: i = scoreframe['score'].index(s) scoreframe['count'][i] += score[s] except ValueError: scoreframe['score'].append(s) scoreframe['count'].append(score[s]) return len(scoreframe['score']) def histogramRun(self, runframe, d): """----------------------------------------------------------------------------------------- Callback function for allDiagonals. Create a dataframe with the run length distribution, apply the threshold stored in self.threshold. Works on the internally stored diagonal of scores calculated by diagonalScore() :param runframe: string, name of dataframe in self.frame :param d: int, diagonal number :return: int, number of values in columns of dataframe -----------------------------------------------------------------------------------------""" runframe = self.frame[runframe] diagonal = self.diagonal window = self.window threshold = self.threshold if self.single: diaglen = len(diagonal) else: diaglen, xpos, ypos = self.diagLenBegin(d) diaglen -= window - 1 run = {} nrun = 0 runlen = 0 for offset in range(diaglen): if diagonal[offset] >= threshold: runlen += 1 else: try: run[runlen] += 1 except KeyError: # runlen key doesn't exist yet run[runlen] = 1 runlen = 0 nrun += 1 if runlen: try: run[runlen] += 1 except KeyError: # runlen key doesn't exist yet run[runlen] = 1 nrun += 1 # insert into data frame, the dataframe is randomly ordered for r in run: try: i = runframe['len'].index(r) runframe['count'][i] += run[r] except ValueError: runframe['len'].append(r) runframe['count'].append(run[r]) return len(runframe['len']) def sortFrame(self, frame, keyvar): """----------------------------------------------------------------------------------------- Sort all the variables in the dataframe according to the order of keyvar TODO should this and return the min and max values? :param frame: string :param keyvar: string :return: True -----------------------------------------------------------------------------------------""" unsorted = self.frame[frame] # save the order so it can be applied to all viariables in the dataframe order = sorted(range(len(unsorted[keyvar])), key=lambda x: unsorted[keyvar][x]) sorted_frame = {} for column in unsorted: sorted_frame[column] = [] for i in order: sorted_frame[column].append(unsorted[column][i]) self.frame[frame] = sorted_frame return True def bdot(self, dataname, figurename, width=1, color=1, mode='dot', set_colormap=True): """----------------------------------------------------------------------------------------- Bokeh plot of dots in the main panel, and colorbar in the legend panel :param dataname: string, name of a dataframe in self.frame :param figurename: string, a figure defined in setupBokeh and stored in self.figure :param width: boolean, scale size of markers by the score :param color: boolean, scale the color of the markers by the score :param mode: string, if dot use the circle renderer, otherwise segment renderer :param set_colormap: boolean, set the colormap based on score range, turn off for second plot to use the same scale :return: True -----------------------------------------------------------------------------------------""" data = self.frame[dataname] figure = self.figure[figurename] legend = self.figure['legend'] window = self.window threshold = self.threshold alpha = self.alpha scoremin, scoremax = self.valueMinMax(data['score']) if width == 1: self.scaleColumn('dots', 'score', 'size', (threshold - 1, scoremax), (self.mindotsize, self.maxdotsize)) else: data['size'] = [self.mindotsize for _ in range(len(data['score']))] if color == 1: pass else: data['score'] = [scoremax for _ in range(len(data['score']))] if set_colormap: if color == 1: cmap = LinearColorMapper(self.palette, low=max(threshold - 1.0, scoremin - 1), high=scoremax) else: cmap = LinearColorMapper(self.palette, low=threshold - 0.1, high=threshold) self.cmap = cmap else: cmap = self.cmap source = ColumnDataSource(data) if mode == 'dot': figure.circle(source=source, x='x', y='y', size='size', line_color=transform('score', cmap), line_alpha=alpha, fill_color=transform('score', cmap), fill_alpha=alpha) else: # line mode figure.segment(source=source, x0='x', x1='x1', y0='y', y1='y1', line_width='size', line_color=transform('score', cmap), alpha=alpha) # color bar is in a separate window, self.legend, so it doesn't disturb the # aspect ratio if color: color_bar = ColorBar(color_mapper=cmap, label_standoff=3, bar_line_color='black', scale_alpha=alpha, width=20, margin=0, location=(0, 0), major_tick_in=20, major_tick_out=5, major_tick_line_color='black') legend.add_layout(color_bar, 'left') return True def bscoreDist(self, figurename, dataname, color): """----------------------------------------------------------------------------------------- Bokeh plot of score distribution and cumulative score distribution. :param figurename: string, name of figures (stored in self.figure) :param dataname: string, name of data frame (stored in self.frame) :param figurename: string, name of figures (stored in self.figure) :param dataname: string, name of data frame (stored in self.frame) :param color: string, and valid Bokeh color, used to fill bars :return: True -----------------------------------------------------------------------------------------""" data = self.frame[dataname] figure = self.figure[figurename] minp, maxp = self.valueMinMax(data['count']) source = ColumnDataSource(data) # observed score density figure.vbar(source=source, x='score', top='count', width=0.8, color=color, line_color='black', alpha=self.alpha, bottom=0.0) figure.y_range = Range1d(0.0, maxp * 1.1) return True def brunDist(self, figurename, dataname, color): """----------------------------------------------------------------------------------------- Bokeh plot of run length distribution :param figurename: string, name of figures (stored in self.figure) :param dataname: string, name of data frame (stored in self.frame) :param color: string, and valid Bokeh color, used to fill bars :return: True -----------------------------------------------------------------------------------------""" run = self.frame[dataname] figure = self.figure[figurename] source = ColumnDataSource(run) minrun = 1 # x = [i for i in range(minrun, maxrun + 1)] # observed and simulated run lengths, need bottom=1 because of log axis figure.vbar(source=source, x='len', top='count', width=0.8, color=color, line_color='black', alpha=self.alpha, line_width=0.5, bottom=0.1) return True def bscoreCumulative(self, figurename, dataname): """----------------------------------------------------------------------------------------- Bokeh plot of cumulative distribution as a line on right hand axis :param figurename: string, name of figures (stored in self.figure) :param dataname: string, name of data frame (stored in self.frame) :return: True -----------------------------------------------------------------------------------------""" data = self.frame[dataname] figure = self.figure[figurename] source = ColumnDataSource(data) figure.extra_y_ranges = {"cumulative": Range1d(start=0.0, end=1.0)} axis2 = LinearAxis(y_range_name="cumulative") axis2.ticker.num_minor_ticks = 10 figure.add_layout(axis2, 'right') figure.line(source=source, x='score', y='cumulative', y_range_name='cumulative', line_width=2, color='#1122cc') # shaded box showing 95% level box = BoxAnnotation(bottom=0.95, top=1.0, y_range_name='cumulative', fill_color='#FFBBBB', line_width=3, line_dash='dashed') figure.add_layout(box) return True def writeFrame(self, framename, key='x', out=sys.stdout): """----------------------------------------------------------------------------------------- Write the dataframe out as a table to the specified output file. Output file should be opened for writing in advance. TODO figure out how to format values more nicely :param framename: string, name of a dataframe in self.frame :param key: string, name of column to use as key (first column in table) :param out: open output file :return: True -----------------------------------------------------------------------------------------""" frame = self.frame[framename] out.write('\n{} dataframe\n'.format(framename)) out.write('\t{}'.format(key)) for column in frame: if column == key: continue out.write('\t{}'.format(column)) out.write('\n') n = len(frame[key]) for i in range(n): out.write('\t{}'.format(frame[key][i])) for column in frame: if column == key: continue out.write('\t{}'.format(frame[column][i])) out.write('\n') return True def show(self, *args, **kwargs): """----------------------------------------------------------------------------------------- Delegate to plt.show(). Makes syntax a little easier in application since the object is used instead of the plotting class :param args: arguments to pass to show() :param kwargs: arguments to pass to show() :return: True -----------------------------------------------------------------------------------------""" show(self.grid, *args, **kwargs) return True @staticmethod def cumulative(score, total): """----------------------------------------------------------------------------------------- Return cumulative score probability distribution as a list. :param score: list :param total: int, number of observations :return: list -----------------------------------------------------------------------------------------""" cumulative = [] wsum = 0 for i in range(len(score)): wsum += score[i] / total cumulative.append(wsum) return cumulative def addCumulative(self, data, sourcecol, destcol): """----------------------------------------------------------------------------------------- Add cumulative distribution to dataframe data, based on column sourcecol and stored in a new column named destcol :param data: string (dataframe in self.frames) :param sourcecol: string, column name in self.frame[data] :param destcol: string, new column name for cumulative distribution :return: True -----------------------------------------------------------------------------------------""" data = self.frame[data] source = data[sourcecol] cum = [] total = 0 for v in source: total += v cum.append(total) for i in range(len(cum)): cum[i] /= total data[destcol] = cum return True def addSegment(self, framename, xcol='x', ycol='y', xnew='x1', ynew='y1'): """----------------------------------------------------------------------------------------- convert x, y dot positions to line segments; the segment renderer requires beginning and ending points for each segment. The existing x and y are modified to be the beginning and new variables (xnew and ynew) are added for the end points. :param xcol: string, name of x column in data frame :param ycol: string, name of y column in data frame :param xnew: string, name of new x column in data frame (end of segment) :param ynew: string, name of new y column in data frame (end of segment) :return: True -----------------------------------------------------------------------------------------""" frame = self.frame[framename] frame[xnew] = [] frame[ynew] = [] # correct the direction when sequence 2 is reversed yinc = self.yinc dither = 0.5 ydither = [-dither * yinc, dither * yinc] for pos in range(len(frame[xcol])): frame[xnew].append(frame[xcol][pos] + dither) frame[ynew].append(frame[ycol][pos] + ydither[1]) frame[xcol][pos] -= dither frame[ycol][pos] += ydither[0] return True @staticmethod def density(score, total): """----------------------------------------------------------------------------------------- Convert a list representing the score distribution to a density by dividing by total :param score: list of int or float :param total: total number of scores (sum(score)) :return: -----------------------------------------------------------------------------------------""" maxp = 0.0 for i in range(len(score)): score[i] /= total maxp = max(maxp, score[i]) return maxp @staticmethod def scoreMinMax(score): """----------------------------------------------------------------------------------------- Returns the first and last non-zero positions in a list of scores. Use to get ranges for score histograms :param score: list :return: int, int -----------------------------------------------------------------------------------------""" scoremin = None scoremax = None for i in range(len(score)): if score[i] > 0: if scoremin is None: scoremin = i scoremax = i return scoremin, scoremax @staticmethod def valueMinMax(score): """----------------------------------------------------------------------------------------- Returns the minimum and maximum value in a list of values. :param score: list :return: float, float -----------------------------------------------------------------------------------------""" scoremin = score[0] scoremax = score[0] for s in score: scoremin = min(scoremin, s) scoremax = max(scoremax, s) return scoremin, scoremax
nrf += self.find(direction=direction, frame=frame, minlen=minlen, includeseq=includeseq) return nrf # -------------------------------------------------------------------------------------------------- # Testing # -------------------------------------------------------------------------------------------------- if __name__ == '__main__': orf = Orf() orf.sequence = 'TAAATGATGTGACCCTCACCGTGA' print(orf.sequence) nrf = orf.findall(includeseq=True) print(f'{nrf} reading frames found') for i in range(nrf): rf = orf.rflist[i] s = orf.sequence if rf['direction'] == '-': s = Fasta.reverseComplement(orf.sequence) begin = rf["begin"] end = rf["end"] # print(f'f:{rf["frame"]}{rf["direction"]}\tbegin:{begin:4d}\tend:{end:4d}\t{s[begin:end]}') print(f'f:{rf["frame"]}{rf["direction"]}\tbegin:{begin:4d}\tend:{end:4d}\t{rf["seq"]}') exit(0)