def plot(self, plot_type): if plot_type == "stacked": plot.stacked(self) if plot_type == "line": plot.line(self) if plot_type == "pie": plot.pie(self) if plot_type == "all": plot.stacked(self) plot.line(self) plot.pie(self)
def plot_step_function(v, vertical_lines=True, **kwds): r""" Return the line graphics object that gives the plot of the step function `f` defined by the list `v` of pairs `(a,b)`. Here if `(a,b)` is in `v`, then `f(a) = b`. The user does not have to worry about sorting the input list `v`. INPUT: - ``v`` -- list of pairs (a,b) - ``vertical_lines`` -- bool (default: True) if True, draw vertical risers at each step of this step function. Technically these vertical lines are not part of the graph of this function, but they look very nice in the plot so we include them by default EXAMPLES: We plot the prime counting function:: sage: plot_step_function([(i,prime_pi(i)) for i in range(20)]) Graphics object consisting of 1 graphics primitive sage: plot_step_function([(i,sin(i)) for i in range(5,20)]) Graphics object consisting of 1 graphics primitive We pass in many options and get something that looks like "Space Invaders":: sage: v = [(i,sin(i)) for i in range(5,20)] sage: plot_step_function(v, vertical_lines=False, thickness=30, rgbcolor='purple', axes=False) Graphics object consisting of 14 graphics primitives """ from plot import line # make sorted copy of v (don't change in place, since that would be rude). v = list(sorted(v)) if len(v) <= 1: return line([]) # empty line if vertical_lines: w = [] for i in range(len(v)): w.append(v[i]) if i + 1 < len(v): w.append((v[i + 1][0], v[i][1])) return line(w, **kwds) else: return sum( line([v[i], (v[i + 1][0], v[i][1])], **kwds) for i in range(len(v) - 1))
def plot_step_function(v, vertical_lines=True, **kwds): r""" Return the line graphics object that gives the plot of the step function `f` defined by the list `v` of pairs `(a,b)`. Here if `(a,b)` is in `v`, then `f(a) = b`. The user does not have to worry about sorting the input list `v`. INPUT: - ``v`` -- list of pairs (a,b) - ``vertical_lines`` -- bool (default: True) if True, draw vertical risers at each step of this step function. Technically these vertical lines are not part of the graph of this function, but they look very nice in the plot so we include them by default EXAMPLES: We plot the prime counting function:: sage: plot_step_function([(i,prime_pi(i)) for i in range(20)]) Graphics object consisting of 1 graphics primitive sage: plot_step_function([(i,sin(i)) for i in range(5,20)]) Graphics object consisting of 1 graphics primitive We pass in many options and get something that looks like "Space Invaders":: sage: v = [(i,sin(i)) for i in range(5,20)] sage: plot_step_function(v, vertical_lines=False, thickness=30, rgbcolor='purple', axes=False) Graphics object consisting of 14 graphics primitives """ from plot import line # make sorted copy of v (don't change in place, since that would be rude). v = list(sorted(v)) if len(v) <= 1: return line([]) # empty line if vertical_lines: w = [] for i in range(len(v)): w.append(v[i]) if i + 1 < len(v): w.append((v[i + 1][0], v[i][1])) return line(w, **kwds) else: return sum(line([v[i], (v[i + 1][0], v[i][1])], **kwds) for i in range(len(v) - 1))
def render_ln(self, output_port, Nmax=None, Lvals=None, title=None): """ Create and print L-N figures (for fixed L, how many configurations are L-close to an N-good, as N increases?) """ Nmax = Nmax or self.Nmax Lvals = Lvals or self.Lvals figs = [] yspace = np.linspace(0, self.num_configs, 6) yround = 0 if self.num_configs < 1000 else -2 yticks = [int(yspace[0]), int(round(yspace[1], yround)), int(round(yspace[2], yround)), int(yspace[3]), int(round(yspace[4], yround)), int(yspace[5])] xposns = [1] + list(range(5, 1+Nmax, 5)) xlbls = ["%sx" % n for n in xposns] for L in Lvals: figs.append(plot.line([1, Nmax] ,[lambda N_float: self.countLM_continuous(L, N_float)] ,title=title # no title by default ,xlabel="Overhead (vs. untyped)" ,ylabel="Count" ,xticks=(xposns, xlbls) ,yticks=(yticks, yticks) ,samples=self.num_samples ,output="%s/%s" % (self.output_dir, "%s%s" % (self.project_name.split("-", 1)[0], L)) ,hlines=[self.RED_HLINE] ,vlines=[self.DELIVERABLE_VLINE, self.USABLE_VLINE] ,ymax=self.num_configs)) print(("\n%s" % latex.FIGSKIP).join([latex.figure(fg) for fg in figs]), file=output_port)
class StatisWorker(Worker): def __init__(self, unit='Mb', letter='ATGC', dpi=300): super(StatisWorker, self).__init__() self.unit = unit self.letter = letter self.dpi = dpi def process(self): self.emit_message("Doing sequence statistics...") try: seq_statis = Statistics(self.unit, self.letter).results() except Exception, e: self.emit_finish(str(e)) return self.db.set_option('seq_statis', json.dumps(seq_statis)) if not self.db.is_empty('ssr'): self.emit_message("Doing perfect SSR statistics...") ssr_statis = SSRStatistics().results() self.db.set_option('ssr_statis', json.dumps(ssr_statis)) #generate ssr type distribution pie plot x = [row[1] for row in ssr_statis.type[1:]] l = [row[0] for row in ssr_statis.type[1:]] plot.pie(x, l, "ssr_type", self.dpi) #generate most abundant ssr motif distribution bar plot motifs = [[], [], [], [], [], []] for row in ssr_statis.category[1:]: motifs[len(row[0]) - 1].append((row[0], row[1])) x = [] l1 = [] for m in motifs: m = sorted(m, key=lambda x: (x[0], -x[1])) for a, b in m[:10]: x.append(b) l1.append(a) plot.bar(l1, x, "SSR motif category", "SSR counts", "ssr_motif", self.dpi) #generate ssr repeat distribution box plot x = ssr_statis.repeat plot.box(x, l, "SSR repeats", "ssr_repeat", self.dpi) #generate ssr length distribution box plot x = ssr_statis.ssrlen plot.box(x, l, "SSR length (bp)", "ssr_length", self.dpi) #generate ssr distribution in diff regions pie plot if ssr_statis.region: x = [row[1] for row in ssr_statis.region] l = [row[0] for row in ssr_statis.region] plot.pie(x, l, "ssr_region", self.dpi) else: self.db.set_option('ssr_statis', '[]') if not self.db.is_empty('issr'): self.emit_message("Doing imperfect SSR statistics...") issr_statis = ISSRStatistics().results() self.db.set_option('issr_statis', json.dumps(issr_statis)) #generate issr type distribution pie plot x = [row[1] for row in issr_statis.type[1:]] l = [row[0] for row in issr_statis.type[1:]] plot.pie(x, l, "issr_type", self.dpi) #generate ssr repeat distribution box plot x = issr_statis.score plot.box(x, l, "iSSR score", "issr_score", self.dpi) #generate ssr length distribution box plot x = issr_statis.issrlen plot.box(x, l, "iSSR length (bp)", "issr_length", self.dpi) #generate ssr distribution in diff regions pie plot if issr_statis.region: x = [row[1] for row in issr_statis.region] l = [row[0] for row in issr_statis.region] plot.pie(x, l, "issr_region", self.dpi) else: self.db.set_option('issr_statis', '[]') if not self.db.is_empty('cssr'): self.emit_message("Doing compound SSR statistics...") cssr_statis = CSSRStatistics().results() self.db.set_option('cssr_statis', json.dumps(cssr_statis)) #generate cssr complexity distribution x = [row[0] for row in cssr_statis.complexity[1:]] y = [row[1] for row in cssr_statis.complexity[1:]] plot.line(x, y, 'cSSR complexity', 'cSSR Counts', 'cssr_complexity', self.dpi) #genrate cssr length distribution x = [row[0] for row in cssr_statis.cssrlen[1:]] y = [row[1] for row in cssr_statis.cssrlen[1:]] plot.line(x, y, 'cSSR length (bp)', 'cSSR Counts', 'cssr_length', self.dpi) #genrate cssr gap distribution x = [row[0] for row in cssr_statis.gap[1:]] y = [row[1] for row in cssr_statis.gap[1:]] plot.line(x, y, 'Gap length (bp)', 'cSSR Counts', 'cssr_gap', self.dpi) #generate ssr distribution in diff regions pie plot if cssr_statis.region: x = [row[1] for row in cssr_statis.region] l = [row[0] for row in cssr_statis.region] plot.pie(x, l, "cssr_region", self.dpi) else: self.db.set_option('cssr_statis', '[]') if not self.db.is_empty('vntr'): self.emit_message("Doing VNTR statistics...") vntr_statis = VNTRStatistics().results() self.db.set_option('vntr_statis', json.dumps(vntr_statis)) #generate vntr type distribution x = [row[0] for row in vntr_statis.type] y = [row[1] for row in vntr_statis.type] plot.line(x, y, 'VNTR motif length (bp)', 'VNTR Counts', 'vntr_type', self.dpi) #genrate vntr length distribution x = [row[0] for row in vntr_statis.vntrlen] y = [row[1] for row in vntr_statis.vntrlen] plot.line(x, y, 'VNTR length (bp)', 'VNTR Counts', 'vntr_length', self.dpi) #genrate vntr repeat distribution x = [row[0] for row in vntr_statis.repeat] y = [row[1] for row in vntr_statis.repeat] plot.line(x, y, 'VNTR repeats', 'VNTR Counts', 'vntr_repeat', self.dpi) #generate ssr distribution in diff regions pie plot if vntr_statis.region: x = [row[1] for row in vntr_statis.region] l = [row[0] for row in vntr_statis.region] plot.pie(x, l, "vntr_region", self.dpi) else: self.db.set_option('vntr_statis', '[]') self.emit_finish("Statistics was successfully completed")
def centroid_bootstrap(catalog): centfile = settings.RMSD_CENTROID_FILE centroid = np.load(centfile) cent_npts = [1, 1, 1, 1, 1] # TBD numLabels = len(centroid) binlist = [(a, b) for a in range(numLabels) for b in range(numLabels)] logging.info("Loaded Starting Centroids from %s", centfile) name = catalog.get('name') if name is None: logging.info('Name not configured in this catalog. Set it and try again') return # Load/Set initial (current) Configs from Catalog if catalog.exists('thetas'): thetas = catalog.loadNPArray('thetas') else: thetas = np.zeros(shape=(numLabels, numLabels)) thetas[:] = 0.25 if catalog.exists('transition_sensitivity'): trans_factor = catalog.loadNPArray('transition_sensitivity') else: trans_factor = 0.2 use_gradient = True obs_count = {ab: 0 for ab in binlist} C_delta = [] T_delta = [] # Configure Noise Filter noise = int(catalog.get('obs_noise')) dcdfreq = int(catalog.get('dcdfreq')) stepsize = int(catalog.get('sim_step_size')) nwidth = noise//(2*stepsize) noisefilt = lambda x, i: np.mean(x[max(0,i-nwidth):min(i+nwidth, len(x))], axis=0) # Get previously Labeled data (or label data IAW current settings) eid = db.get_expid(name) obslist = [i[0] for i in db.runquery('SELECT obs FROM obs WHERE expid=%d' % eid)] jobs = [i[0] for i in sorted(catalog.hgetall('anl_sequence').items(), key=lambda x: x[1])] shape = None # Initialize lists for pair-wise distances (top 2 nearest centroids) diffList = {} transList = {} scatPlot = {} for A in range(0, numLabels-1): for B in range(A+1, numLabels): diffList[(A, B)] = [] transList[(A, B)] = [] scatPlot[(A, B)] = [] allScat = [] # Load trajectories & filter obs_global = [] # Process learning in batches (static batch size to start) batch_size = 25 max_obs = 150 batch = 0 while batch <= max_obs: logging.info("Procssing Jobs %d - %d", batch, batch+batch_size) exec_sim = [] obs_list = [] for job in jobs[batch:batch+25]: conf = catalog.hgetall('jc_' + job) traj = md.load(conf['dcd'], top=conf['pdb']) alpha = datareduce.filter_alpha(traj) conf['alpha'] = alpha.xyz exec_sim.append(conf) if shape is None: shape = conf['alpha'].shape[1:] # xyz_filtered = np.array([noisefilt(alpha.xyz, i) for i in range(alpha.n_frames)]) rmslist = calc_rmsd(alpha, centroid) labels = [] for rms in rmslist: # [cw[i]*LA.norm(pt - centroid[i]) for i in range(5)] A, B = np.argsort(rms)[:2] delta = np.abs(rms[B] - rms[A]) if delta < thetas[A][B]: sub_state = B else: sub_state = A classify = (A, sub_state) labels.append(classify) obs_count[classify] += 1 # For globally updating Thetas obs_global.append(classify) if A < B: diffList[(A, B)].append(rms[A] - rms[B]) else: diffList[(B, A)].append(rms[B] - rms[A]) for a in range(0, numLabels-1): for b in range(a+1, numLabels): transList[(a, b)].append(rms[a] - rms[b]) if (a, a) == classify or (b, b) == classify: c = 'b' elif (a, b) == classify or (b, a) == classify: c = 'g' elif a == A or b == A: c = 'r' else: c = 'black' scatPlot[(a, b)].append((rms[a] - rms[b], c)) obs_list.append(labels) logging.info('Bin Distribution:') grpby = {} for llist in obs_list: for l in llist: if l not in grpby: grpby[l] = 0 grpby[l] += 1 for k in sorted(grpby.keys()): logging.info('%s: %5d', k, grpby[k]) for A in range(0, numLabels-1): for B in range(A+1, numLabels): d = diffList[(A, B)] logging.info('Diff list for %d,%d: %d, %5.2f, %5.2f', A, B, len(d), min(d), max(d)) # # 6. Apply Heuristics Labeling # # logging.debug('Applying Labeling Heuristic. Origin: %d, %d', srcA, srcB) # rmslabel = [] # # label_count = {ab: 0 for ab in binlist} # groupbystate = [[] for i in range(numLabels)] # groupbybin = {ab: [] for ab in binlist} # For each frame in each traj: ID labeled well pts & build avg op logging.info('Selecting observed Well States') coor_sum = {i: np.zeros(shape=shape) for i in range(numLabels)} coor_tot = {i: 0 for i in range(numLabels)} for job, obslist in zip(exec_sim, obs_list): # offset = int(job['xid:start']) # for i, frame in enumerate(job['alpha']): for frame, label in zip(job['alpha'], obslist): # A, B = eval(obslist[offset+i]) A, B = label if A != B: continue coor_sum[A] += frame coor_tot[A] += 1 logging.info('Calculating Avg from following stats:') logging.info(' Total Frames: %d', sum([len(sim['alpha']) for sim in exec_sim])) # Calculate New Centroids (w/deltas) delta = [] for S in range(numLabels): if coor_tot[S] == 0: logging.info(" State: %d --- NO OBSERVATIONS IN THIS WELL STATE", S) continue cent_local = coor_sum[S] / coor_tot[S] diff_local = LA.norm(centroid[S] - cent_local) update = ((centroid[S] * cent_npts[S]) + (cent_local * coor_tot[S])) / (cent_npts[S] + coor_tot[S]) delta.append(LA.norm(update - centroid[S])) logging.info(' State %d: NewPts=%5d Delta=%5.2f LocalDiff=%5.2f', S, coor_tot[S], delta[-1], diff_local) centroid[S] = update cent_npts[S] += coor_tot[S] centroid_change = np.mean(delta) if len(C_delta) > 1: rel_change = np.abs((centroid_change - C_delta[-1]) / C_delta[-1]) logging.info('Centroid Change: %5.2f (%5.2f%%)', centroid_change, 100*rel_change) C_delta.append(centroid_change) batch += batch_size # Update Thetas (usig global data ?????) delta = [] for A in range(0, numLabels-1): for B in range(A+1, numLabels): X = sorted(diffList[(A, B)]) if len(X) < 100: logging.info('Lacking data on %d, %d', A, B) continue # logging.info(' Total # Obs: %d', len(X)) crossover = 0 for i, x in enumerate(X): if x > 0: crossover = i break # logging.info(' Crossover at Index: %d', crossover) if crossover < 50 or (len(X)-crossover) < 50: logging.info(' Lacking local data skipping.') continue # Find local max gradient (among 50% of points) if use_gradient: thetas_updated = np.copy(thetas) zoneA = int((1-trans_factor) * crossover) zoneB = crossover + int(trans_factor * (len(X) - crossover)) gradA = zoneA + np.argmax(np.gradient(X[zoneA:crossover])) gradB = crossover + np.argmax(np.gradient(X[crossover:zoneB])) thetaA = X[gradA] thetaB = X[gradB] thetas_updated[A][B] = np.abs(thetaA) thetas_updated[B][A] = np.abs(thetaB) tdeltA = np.abs(thetas_updated[A][B] - thetas[A][B]) tdeltB = np.abs(thetas_updated[B][A] - thetas[B][A]) delta.append(tdeltA) delta.append(tdeltB) logging.info(' Theta Change (%d,%d): %4.2f (%4.1f)', A, B, tdeltA, (100*tdeltA/thetas[A][B])) logging.info(' Theta Change (%d,%d): %4.2f (%4.1f)', B, A, tdeltB, (100*tdeltB/thetas[B][A])) thetas[A][B] = thetas_updated[A][B] thetas[B][A] = thetas_updated[B][A] else: # Classify Fixed Percent of observations as Transitional thetas_updated = np.copy(thetas) transitionPtA = int((1-trans_factor) * crossover) transitionPtB = crossover + int(trans_factor * (len(X) - crossover)) thetaA = X[transitionPtA] thetaB = X[transitionPtB] thetas_updated[A][B] = np.abs(thetaA) thetas_updated[B][A] = np.abs(thetaB) tdeltA = np.abs(thetas_updated[A][B] - thetas[A][B]) tdeltB = np.abs(thetas_updated[B][A] - thetas[B][A]) delta.append(tdeltA) delta.append(tdeltB) logging.info(' Theta Change (%d,%d): %4.2f (%4.1f)', A, B, tdeltA, (100*tdeltA/thetas[A][B])) logging.info(' Theta Change (%d,%d): %4.2f (%4.1f)', B, A, tdeltB, (100*tdeltB/thetas[B][A])) thetas[A][B] = thetas_updated[A][B] thetas[B][A] = thetas_updated[B][A] T_delta.append(np.mean(delta)) P.line(np.array(C_delta), 'Avg_CHANGE_Centroid_Pos_%s' % name) P.line(np.array(T_delta), 'Avg_CHANGE_Theta_Val_%s' % name) P.bargraph_simple(obs_count, 'Final_Histogram_%s' % name) # for k, X in diffList.items(): # A, B = k # P.transition_line(sorted(X), A, B, title='-X', trans_factor=.5) # for k, X in transList.items(): # A, B = k # P.transition_line(sorted(X), A, B, title='-ALL', trans_factor=.5) for k, X in scatPlot.items(): collab = {'b': 'Well', 'g': 'Trans', 'r': 'Primary', 'brown': 'Secondary', 'black': 'None'} ptmap = {k: [] for k in collab.keys()} ordpts = sorted(X, key = lambda x : x[0]) for i, tup in enumerate(ordpts): y, c = tup ptmap[c].append((i, y)) # if c == 'b' or c == 'g': # ptmap[c].append((i, y)) # else: # ptmap[c].append((i, 0)) A, B = k P.scat_Transtions(ptmap, title='-%d_%d'%(A,B), size=1, labels=collab)
if not ylabel: warning("no label for the y axis. Data field will be used.") ylabel = field if plot_type == 'b' or plot_type == 'B': if plot_type == 'b': input.sort(key=lambda x:x.b) input = [bar.file for bar in input] ylist,error = generate_data(input, field, conf) plot.bars(ylist, error, output, title, xlabel, ylabel) elif plot_type == 'L': ylist,error = generate_data(input, field, conf) plot.line(ylist, error, output, title, xlabel, ylabel) elif plot_type == 'l' or plot_type == 'lp': ylist = [] error = [] aux = [] flist = [] input.sort(key=lambda x:x.l) if plot_type == 'l': i=1 while True: aux = [line.files for line in input if line.l==i] if not len(aux):