Beispiel #1
0
 def plot(self, plot_type):
     if plot_type == "stacked":
         plot.stacked(self)
     if plot_type == "line":
         plot.line(self)
     if plot_type == "pie":
         plot.pie(self)
     if plot_type == "all":
         plot.stacked(self)
         plot.line(self)
         plot.pie(self)
Beispiel #2
0
def plot_step_function(v, vertical_lines=True, **kwds):
    r"""
    Return the line graphics object that gives the plot of the step
    function `f` defined by the list `v` of pairs `(a,b)`.  Here if
    `(a,b)` is in `v`, then `f(a) = b`.  The user does not have to
    worry about sorting the input list `v`.

    INPUT:

    - ``v`` -- list of pairs (a,b)

    - ``vertical_lines`` -- bool (default: True) if True, draw
      vertical risers at each step of this step function.
      Technically these vertical lines are not part of the graph
      of this function, but they look very nice in the plot so we
      include them by default

    EXAMPLES:

    We plot the prime counting function::

        sage: plot_step_function([(i,prime_pi(i)) for i in range(20)])
        Graphics object consisting of 1 graphics primitive

        sage: plot_step_function([(i,sin(i)) for i in range(5,20)])
        Graphics object consisting of 1 graphics primitive

    We pass in many options and get something that looks like "Space Invaders"::

        sage: v = [(i,sin(i)) for i in range(5,20)]
        sage: plot_step_function(v, vertical_lines=False, thickness=30, rgbcolor='purple', axes=False)
        Graphics object consisting of 14 graphics primitives
    """
    from plot import line
    # make sorted copy of v (don't change in place, since that would be rude).
    v = list(sorted(v))
    if len(v) <= 1:
        return line([])  # empty line
    if vertical_lines:
        w = []
        for i in range(len(v)):
            w.append(v[i])
            if i + 1 < len(v):
                w.append((v[i + 1][0], v[i][1]))
        return line(w, **kwds)
    else:
        return sum(
            line([v[i], (v[i + 1][0], v[i][1])], **kwds)
            for i in range(len(v) - 1))
Beispiel #3
0
def plot_step_function(v, vertical_lines=True, **kwds):
    r"""
    Return the line graphics object that gives the plot of the step
    function `f` defined by the list `v` of pairs `(a,b)`.  Here if
    `(a,b)` is in `v`, then `f(a) = b`.  The user does not have to
    worry about sorting the input list `v`.

    INPUT:

    - ``v`` -- list of pairs (a,b)

    - ``vertical_lines`` -- bool (default: True) if True, draw
      vertical risers at each step of this step function.
      Technically these vertical lines are not part of the graph
      of this function, but they look very nice in the plot so we
      include them by default

    EXAMPLES:

    We plot the prime counting function::

        sage: plot_step_function([(i,prime_pi(i)) for i in range(20)])
        Graphics object consisting of 1 graphics primitive

        sage: plot_step_function([(i,sin(i)) for i in range(5,20)])
        Graphics object consisting of 1 graphics primitive

    We pass in many options and get something that looks like "Space Invaders"::

        sage: v = [(i,sin(i)) for i in range(5,20)]
        sage: plot_step_function(v, vertical_lines=False, thickness=30, rgbcolor='purple', axes=False)
        Graphics object consisting of 14 graphics primitives
    """
    from plot import line

    # make sorted copy of v (don't change in place, since that would be rude).
    v = list(sorted(v))
    if len(v) <= 1:
        return line([])  # empty line
    if vertical_lines:
        w = []
        for i in range(len(v)):
            w.append(v[i])
            if i + 1 < len(v):
                w.append((v[i + 1][0], v[i][1]))
        return line(w, **kwds)
    else:
        return sum(line([v[i], (v[i + 1][0], v[i][1])], **kwds) for i in range(len(v) - 1))
 def render_ln(self, output_port, Nmax=None, Lvals=None, title=None):
     """
         Create and print L-N figures
         (for fixed L, how many configurations are L-close to an N-good,
          as N increases?)
     """
     Nmax = Nmax or self.Nmax
     Lvals = Lvals or self.Lvals
     figs = []
     yspace = np.linspace(0, self.num_configs, 6)
     yround = 0 if self.num_configs < 1000 else -2
     yticks = [int(yspace[0]),
               int(round(yspace[1], yround)),
               int(round(yspace[2], yround)),
               int(yspace[3]),
               int(round(yspace[4], yround)),
               int(yspace[5])]
     xposns = [1] + list(range(5, 1+Nmax, 5))
     xlbls  = ["%sx" % n for n in xposns]
     for L in Lvals:
         figs.append(plot.line([1, Nmax]
                              ,[lambda N_float: self.countLM_continuous(L, N_float)]
                              ,title=title # no title by default
                              ,xlabel="Overhead  (vs. untyped)"
                              ,ylabel="Count"
                               ,xticks=(xposns, xlbls)
                              ,yticks=(yticks, yticks)
                              ,samples=self.num_samples
                              ,output="%s/%s" % (self.output_dir, "%s%s" % (self.project_name.split("-", 1)[0], L))
                              ,hlines=[self.RED_HLINE]
                              ,vlines=[self.DELIVERABLE_VLINE, self.USABLE_VLINE]
                              ,ymax=self.num_configs))
     print(("\n%s" % latex.FIGSKIP).join([latex.figure(fg) for fg in figs]), file=output_port)
Beispiel #5
0
class StatisWorker(Worker):
    def __init__(self, unit='Mb', letter='ATGC', dpi=300):
        super(StatisWorker, self).__init__()
        self.unit = unit
        self.letter = letter
        self.dpi = dpi

    def process(self):
        self.emit_message("Doing sequence statistics...")

        try:
            seq_statis = Statistics(self.unit, self.letter).results()
        except Exception, e:
            self.emit_finish(str(e))
            return

        self.db.set_option('seq_statis', json.dumps(seq_statis))

        if not self.db.is_empty('ssr'):
            self.emit_message("Doing perfect SSR statistics...")
            ssr_statis = SSRStatistics().results()
            self.db.set_option('ssr_statis', json.dumps(ssr_statis))

            #generate ssr type distribution pie plot
            x = [row[1] for row in ssr_statis.type[1:]]
            l = [row[0] for row in ssr_statis.type[1:]]
            plot.pie(x, l, "ssr_type", self.dpi)

            #generate most abundant ssr motif distribution bar plot
            motifs = [[], [], [], [], [], []]
            for row in ssr_statis.category[1:]:
                motifs[len(row[0]) - 1].append((row[0], row[1]))

            x = []
            l1 = []
            for m in motifs:
                m = sorted(m, key=lambda x: (x[0], -x[1]))
                for a, b in m[:10]:
                    x.append(b)
                    l1.append(a)

            plot.bar(l1, x, "SSR motif category", "SSR counts", "ssr_motif",
                     self.dpi)

            #generate ssr repeat distribution box plot
            x = ssr_statis.repeat
            plot.box(x, l, "SSR repeats", "ssr_repeat", self.dpi)

            #generate ssr length distribution box plot
            x = ssr_statis.ssrlen
            plot.box(x, l, "SSR length (bp)", "ssr_length", self.dpi)

            #generate ssr distribution in diff regions pie plot
            if ssr_statis.region:
                x = [row[1] for row in ssr_statis.region]
                l = [row[0] for row in ssr_statis.region]
                plot.pie(x, l, "ssr_region", self.dpi)

        else:
            self.db.set_option('ssr_statis', '[]')

        if not self.db.is_empty('issr'):
            self.emit_message("Doing imperfect SSR statistics...")
            issr_statis = ISSRStatistics().results()
            self.db.set_option('issr_statis', json.dumps(issr_statis))

            #generate issr type distribution pie plot
            x = [row[1] for row in issr_statis.type[1:]]
            l = [row[0] for row in issr_statis.type[1:]]
            plot.pie(x, l, "issr_type", self.dpi)

            #generate ssr repeat distribution box plot
            x = issr_statis.score
            plot.box(x, l, "iSSR score", "issr_score", self.dpi)

            #generate ssr length distribution box plot
            x = issr_statis.issrlen
            plot.box(x, l, "iSSR length (bp)", "issr_length", self.dpi)

            #generate ssr distribution in diff regions pie plot
            if issr_statis.region:
                x = [row[1] for row in issr_statis.region]
                l = [row[0] for row in issr_statis.region]
                plot.pie(x, l, "issr_region", self.dpi)

        else:
            self.db.set_option('issr_statis', '[]')

        if not self.db.is_empty('cssr'):
            self.emit_message("Doing compound SSR statistics...")
            cssr_statis = CSSRStatistics().results()
            self.db.set_option('cssr_statis', json.dumps(cssr_statis))

            #generate cssr complexity distribution
            x = [row[0] for row in cssr_statis.complexity[1:]]
            y = [row[1] for row in cssr_statis.complexity[1:]]
            plot.line(x, y, 'cSSR complexity', 'cSSR Counts',
                      'cssr_complexity', self.dpi)

            #genrate cssr length distribution
            x = [row[0] for row in cssr_statis.cssrlen[1:]]
            y = [row[1] for row in cssr_statis.cssrlen[1:]]
            plot.line(x, y, 'cSSR length (bp)', 'cSSR Counts', 'cssr_length',
                      self.dpi)

            #genrate cssr gap distribution
            x = [row[0] for row in cssr_statis.gap[1:]]
            y = [row[1] for row in cssr_statis.gap[1:]]
            plot.line(x, y, 'Gap length (bp)', 'cSSR Counts', 'cssr_gap',
                      self.dpi)

            #generate ssr distribution in diff regions pie plot
            if cssr_statis.region:
                x = [row[1] for row in cssr_statis.region]
                l = [row[0] for row in cssr_statis.region]
                plot.pie(x, l, "cssr_region", self.dpi)

        else:
            self.db.set_option('cssr_statis', '[]')

        if not self.db.is_empty('vntr'):
            self.emit_message("Doing VNTR statistics...")
            vntr_statis = VNTRStatistics().results()
            self.db.set_option('vntr_statis', json.dumps(vntr_statis))

            #generate vntr type distribution
            x = [row[0] for row in vntr_statis.type]
            y = [row[1] for row in vntr_statis.type]
            plot.line(x, y, 'VNTR motif length (bp)', 'VNTR Counts',
                      'vntr_type', self.dpi)

            #genrate vntr length distribution
            x = [row[0] for row in vntr_statis.vntrlen]
            y = [row[1] for row in vntr_statis.vntrlen]
            plot.line(x, y, 'VNTR length (bp)', 'VNTR Counts', 'vntr_length',
                      self.dpi)

            #genrate vntr repeat distribution
            x = [row[0] for row in vntr_statis.repeat]
            y = [row[1] for row in vntr_statis.repeat]
            plot.line(x, y, 'VNTR repeats', 'VNTR Counts', 'vntr_repeat',
                      self.dpi)

            #generate ssr distribution in diff regions pie plot
            if vntr_statis.region:
                x = [row[1] for row in vntr_statis.region]
                l = [row[0] for row in vntr_statis.region]
                plot.pie(x, l, "vntr_region", self.dpi)

        else:
            self.db.set_option('vntr_statis', '[]')

        self.emit_finish("Statistics was successfully completed")
Beispiel #6
0
def centroid_bootstrap(catalog):
  centfile = settings.RMSD_CENTROID_FILE
  centroid = np.load(centfile)
  cent_npts = [1, 1, 1, 1, 1]  # TBD
  numLabels = len(centroid)
  binlist = [(a, b) for a in range(numLabels) for b in range(numLabels)]
  logging.info("Loaded Starting Centroids from %s", centfile)

  name = catalog.get('name')
  if name is None:
    logging.info('Name not configured in this catalog. Set it and try again')
    return

  # Load/Set initial (current) Configs from Catalog
  if catalog.exists('thetas'):
    thetas = catalog.loadNPArray('thetas')
  else:
    thetas = np.zeros(shape=(numLabels, numLabels))
    thetas[:] = 0.25

  if catalog.exists('transition_sensitivity'):
    trans_factor = catalog.loadNPArray('transition_sensitivity')
  else:
    trans_factor = 0.2
    
  use_gradient = True
  obs_count = {ab: 0 for ab in binlist}
  C_delta = []
  T_delta = []

  # Configure Noise Filter
  noise = int(catalog.get('obs_noise'))
  dcdfreq = int(catalog.get('dcdfreq'))
  stepsize = int(catalog.get('sim_step_size'))
  nwidth = noise//(2*stepsize)
  noisefilt = lambda x, i: np.mean(x[max(0,i-nwidth):min(i+nwidth, len(x))], axis=0)


  # Get previously Labeled data (or label data IAW current settings)
  eid = db.get_expid(name)
  obslist = [i[0] for i in db.runquery('SELECT obs FROM obs WHERE expid=%d' % eid)]
  jobs = [i[0] for i in sorted(catalog.hgetall('anl_sequence').items(), key=lambda x: x[1])]
  shape = None

  # Initialize lists for pair-wise distances (top 2 nearest centroids)
  diffList  = {}
  transList = {}
  scatPlot  = {}
  for A in range(0, numLabels-1):
    for B in range(A+1, numLabels):
      diffList[(A, B)]  = []
      transList[(A, B)] = []
      scatPlot[(A, B)]  = []
  allScat = []
  # Load trajectories & filter
  obs_global = []

  # Process learning in batches (static batch size to start)
  batch_size = 25
  max_obs = 150
  batch = 0
  while batch <= max_obs:
    logging.info("Procssing Jobs %d - %d", batch, batch+batch_size)
    exec_sim = []
    obs_list = []
    for job in jobs[batch:batch+25]:
      conf = catalog.hgetall('jc_' + job)
      traj = md.load(conf['dcd'], top=conf['pdb'])
      alpha = datareduce.filter_alpha(traj)
      conf['alpha'] = alpha.xyz
      exec_sim.append(conf)
      if shape is None:
        shape = conf['alpha'].shape[1:]

      # xyz_filtered = np.array([noisefilt(alpha.xyz, i) for i in range(alpha.n_frames)])
      rmslist = calc_rmsd(alpha, centroid)
      labels = []
      for rms in rmslist:
        # [cw[i]*LA.norm(pt - centroid[i]) for i in range(5)]
        A, B = np.argsort(rms)[:2]
        delta = np.abs(rms[B] - rms[A])
        if delta < thetas[A][B]:
          sub_state = B
        else:
          sub_state = A
        classify = (A, sub_state)
        labels.append(classify)
        obs_count[classify] += 1

        # For globally updating Thetas
        obs_global.append(classify)
        if A < B:
          diffList[(A, B)].append(rms[A] - rms[B])
        else:
          diffList[(B, A)].append(rms[B] - rms[A])

        for a in range(0, numLabels-1):
          for b in range(a+1, numLabels):
            transList[(a, b)].append(rms[a] - rms[b])
            if (a, a) == classify or (b, b) == classify:
              c = 'b'
            elif (a, b) == classify or (b, a) == classify:
              c = 'g'
            elif a == A or b == A:
              c = 'r'
            else:
              c = 'black'
            scatPlot[(a, b)].append((rms[a] - rms[b], c))
      obs_list.append(labels)

    logging.info('Bin Distribution:')
    grpby = {}
    for llist in obs_list:
      for l in llist:
        if l not in grpby:
          grpby[l] = 0
        grpby[l] += 1
    for k in sorted(grpby.keys()):
      logging.info('%s:  %5d', k, grpby[k])
    for A in range(0, numLabels-1):
      for B in range(A+1, numLabels):
        d = diffList[(A, B)]
        logging.info('Diff list for %d,%d:  %d, %5.2f, %5.2f', A, B, len(d), min(d), max(d))


    # # 6. Apply Heuristics Labeling
    # # logging.debug('Applying Labeling Heuristic. Origin:   %d, %d', srcA, srcB)
    # rmslabel = []
    # 
    # label_count = {ab: 0 for ab in binlist}
    # groupbystate = [[] for i in range(numLabels)]
    # groupbybin = {ab: [] for ab in binlist}


    # For each frame in each traj: ID labeled well pts & build avg op
    logging.info('Selecting observed Well States')
    coor_sum = {i: np.zeros(shape=shape) for i in range(numLabels)}
    coor_tot = {i: 0 for i in range(numLabels)}
    for job, obslist in zip(exec_sim, obs_list):
      # offset = int(job['xid:start'])
      # for i, frame in enumerate(job['alpha']):
      for frame, label in zip(job['alpha'], obslist):
        # A, B = eval(obslist[offset+i])
        A, B = label
        if A != B:
          continue
        coor_sum[A] += frame
        coor_tot[A] += 1

    logging.info('Calculating Avg from following stats:')
    logging.info('   Total Frames: %d', sum([len(sim['alpha']) for sim in exec_sim]))

    # Calculate New Centroids (w/deltas)
    delta = []
    for S in range(numLabels):
      if coor_tot[S] == 0:
        logging.info("   State: %d --- NO OBSERVATIONS IN THIS WELL STATE", S)
        continue
      cent_local = coor_sum[S] / coor_tot[S]
      diff_local = LA.norm(centroid[S] - cent_local)
      update = ((centroid[S] * cent_npts[S]) + (cent_local * coor_tot[S])) / (cent_npts[S] + coor_tot[S])
      delta.append(LA.norm(update - centroid[S]))
      logging.info('   State %d:  NewPts=%5d   Delta=%5.2f   LocalDiff=%5.2f', 
        S, coor_tot[S], delta[-1], diff_local)
      centroid[S] = update
      cent_npts[S] += coor_tot[S]
    centroid_change = np.mean(delta)
    if len(C_delta) > 1:
      rel_change = np.abs((centroid_change - C_delta[-1]) / C_delta[-1])
      logging.info('Centroid Change:  %5.2f   (%5.2f%%)', centroid_change, 100*rel_change)
    C_delta.append(centroid_change)
    batch += batch_size


    # Update Thetas (usig global data ?????)
    delta = []
    for A in range(0, numLabels-1):
      for B in range(A+1, numLabels):
        X = sorted(diffList[(A, B)])
        if len(X) < 100:
          logging.info('Lacking data on %d, %d', A, B)
          continue
        # logging.info('  Total # Obs: %d', len(X))
        crossover = 0
        for i, x in enumerate(X):
          if x > 0:
            crossover = i
            break
        # logging.info('  Crossover at Index: %d', crossover)
        if crossover < 50 or (len(X)-crossover) < 50:
          logging.info('  Lacking local data skipping.')
          continue

        # Find local max gradient  (among 50% of points)
        
        if use_gradient:
          thetas_updated = np.copy(thetas)
          zoneA = int((1-trans_factor) * crossover)
          zoneB = crossover + int(trans_factor * (len(X) - crossover))
          gradA = zoneA + np.argmax(np.gradient(X[zoneA:crossover]))
          gradB = crossover + np.argmax(np.gradient(X[crossover:zoneB]))
          thetaA = X[gradA]
          thetaB = X[gradB]
          thetas_updated[A][B] = np.abs(thetaA)
          thetas_updated[B][A] = np.abs(thetaB)
          tdeltA = np.abs(thetas_updated[A][B] - thetas[A][B])
          tdeltB = np.abs(thetas_updated[B][A] - thetas[B][A])
          delta.append(tdeltA)
          delta.append(tdeltB)
          logging.info('  Theta Change (%d,%d):  %4.2f  (%4.1f)', A, B, tdeltA, (100*tdeltA/thetas[A][B]))
          logging.info('  Theta Change (%d,%d):  %4.2f  (%4.1f)', B, A, tdeltB, (100*tdeltB/thetas[B][A]))
          thetas[A][B] = thetas_updated[A][B]
          thetas[B][A] = thetas_updated[B][A]
        else:
          # Classify Fixed Percent of observations as Transitional
          thetas_updated = np.copy(thetas)
          transitionPtA = int((1-trans_factor) * crossover)
          transitionPtB = crossover + int(trans_factor * (len(X) - crossover))
          thetaA = X[transitionPtA]
          thetaB = X[transitionPtB]
          thetas_updated[A][B] = np.abs(thetaA)
          thetas_updated[B][A] = np.abs(thetaB)
          tdeltA = np.abs(thetas_updated[A][B] - thetas[A][B])
          tdeltB = np.abs(thetas_updated[B][A] - thetas[B][A])
          delta.append(tdeltA)
          delta.append(tdeltB)
          logging.info('  Theta Change (%d,%d):  %4.2f  (%4.1f)', A, B, tdeltA, (100*tdeltA/thetas[A][B]))
          logging.info('  Theta Change (%d,%d):  %4.2f  (%4.1f)', B, A, tdeltB, (100*tdeltB/thetas[B][A]))
          thetas[A][B] = thetas_updated[A][B]
          thetas[B][A] = thetas_updated[B][A]

    T_delta.append(np.mean(delta))
  P.line(np.array(C_delta), 'Avg_CHANGE_Centroid_Pos_%s' % name)
  P.line(np.array(T_delta), 'Avg_CHANGE_Theta_Val_%s' % name)
  P.bargraph_simple(obs_count, 'Final_Histogram_%s' % name)
  # for k, X in diffList.items():
  #   A, B = k
  #   P.transition_line(sorted(X), A, B, title='-X', trans_factor=.5)
  # for k, X in transList.items():
  #   A, B = k
  #   P.transition_line(sorted(X), A, B, title='-ALL', trans_factor=.5)
  for k, X in scatPlot.items():
    collab = {'b': 'Well', 'g': 'Trans', 'r': 'Primary', 'brown': 'Secondary', 'black': 'None'}
    ptmap = {k: [] for k in collab.keys()}
    ordpts = sorted(X, key = lambda x : x[0])
    for i, tup in enumerate(ordpts):
      y, c = tup
      ptmap[c].append((i, y))
      # if c == 'b' or c == 'g':
      #   ptmap[c].append((i, y))
      # else:
      #   ptmap[c].append((i, 0))
    A, B = k
    P.scat_Transtions(ptmap, title='-%d_%d'%(A,B), size=1, labels=collab)
Beispiel #7
0
if not ylabel:
	warning("no label for the y axis. Data field will be used.")	
	ylabel = field


if plot_type == 'b' or plot_type == 'B':
	if plot_type == 'b':
		input.sort(key=lambda x:x.b)
		input = [bar.file for bar in input]
	ylist,error = generate_data(input, field, conf)
	plot.bars(ylist, error, output, title, xlabel, ylabel)

elif plot_type == 'L':
	ylist,error = generate_data(input, field, conf)
	plot.line(ylist, error, output, title, xlabel, ylabel)

elif plot_type == 'l' or plot_type == 'lp':
	
	ylist = []
	error = []
	aux = []
	flist = []

	input.sort(key=lambda x:x.l)

	if plot_type == 'l':
		i=1
		while True:
			aux = [line.files for line in input if line.l==i]
			if not len(aux):