コード例 #1
0
ファイル: convergence.py プロジェクト: DaMSL/ddc
def total_time(name):
  end_ts = lambda x: du.parse(x[0]).timestamp() + x[1]
  eid = db.get_expid(name)
  sw_list=db.runquery('select start,time from sw where expid=%d order by start'%eid)
  start = du.parse(sw_list[0][0]).timestamp()
  sw = sorted([dict(start=x[0], time=x[1], end=end_ts(x)-start) for x in sw_list], key=lambda i: i['end'])
  return sw[-1]['end']
コード例 #2
0
ファイル: convergence.py プロジェクト: DaMSL/ddc
def conv_over_time(name, step=10000, tw=False):
  eid = db.get_expid(name)
  obs = getobs(name)
  if tw:
    obs = obs2tw(obs)

  V = getdistinct(obs)
  N = len(obs)
  plotlists = {v: [] for v in V}
  sw_list=db.runquery('select start,time,numobs from sw where expid=%d order by start'%eid)
  end_ts = lambda x: du.parse(x[0]).timestamp() + x[1]
  ts_0 = du.parse(sw_list[0][0]).timestamp()
  sw = sorted([dict(start=x[0], time=x[1], numobs=x[2], end=end_ts(x)-ts_0) for x in sw_list], key=lambda i: i['end'])
  n = 0
  snum = 0
  nextcalc = step
  while n < N and snum < len(sw):
      n += sw[snum]['numobs']
      if n > nextcalc:
        t = sw[snum]['end'] / 3600.
        # c = bootstrap_sampler(obs[:min(n,N)], samplesize=.25)
        c = bootstrap_iter(obs[:min(n,N)], size=step)
        for v in V:
          if v in c.keys():
            # plotlists[v].append((t, min(c[v][3], 1.)))
            plotlists[v].append((t, min(c[v][1]/c[v][0], 1.)))
          else:
            plotlists[v].append((t, 1.))
        nextcalc += step
      snum += 1
  return plotlists
コード例 #3
0
def traj_results(name):
  expid = db.get_expid(name)
  obslist = getobs(name)
  tlist = []
  jc = db.runquery('select bin, start, end from jc where expid=%d order by start'%expid)
  for b, s, e in jc:
    tlist.append((b, obslist[s:e]))
  return tlist
コード例 #4
0
ファイル: convergence.py プロジェクト: DaMSL/ddc
def all_elas():
  rlist = [5, 10, 25, 50, 75, 100, 200]
  rex   = {k: F.ExprAnl(port=6391+i) for i, k in enumerate(rlist)}
  for k, e in rex.items(): 
    print('Loading', k)
    e.load(min(750, len(e.conf)))

  name = 'rtime250_%d' % k
  eid = db.get_expid(name)
  db.adhoc('select swname, start, time, cpu from sw where expid=%d' % eid)

  allboot = {k: C.elas_boot(v, 1000, 'dsw', limit=151000) for k,v in rex.items()}
  plots = [{k: v[s] for k,v in allboot.items()} for s in range(5)]
  for i, p in enumerate(plots):
    P.scats(p, 'Elas_%d'%i)
コード例 #5
0
def by_src_bin(name):
  expid = db.get_expid(name)
  obslist = getobs(name)
  obs = {}
  jc = db.runquery('select bin, start, end from jc where expid=%d'%expid)
  for b, s, e in jc:
    if b not in obs:
      obs[b] = []
    obs[b].extend(obslist[s:e])
  D = {k: {str((a,b)): 0 for a, b in binlist} for k in obs.keys()}
  total = {k: 0 for k in obs.keys()}
  for k, v in obs.items():
    for i in v:
      total[k] += 1
      D[k][i] += 1
  return D
コード例 #6
0
def by_src_hcube(name):
  expid = db.get_expid(name)
  obslist = getobs(name)
  obs = {}
  jc = db.runquery('select bin, hcube, start, end from jc where expid=%d'%expid)
  for b, h, s, e in jc:
    if b not in obs:
      obs[b] = {}
    if h not in obs[b]:
      obs[b][h] = []
    obs[b][h].extend(obslist[s:e])
  D = {k: {h: {s: 0 for s in sbinlist} for h in obs[k].keys()} for k in obs.keys()}
  for k, v1 in obs.items():
    for h, v2 in v1.items():
      for i in v2:
        if i == '0-D':
          continue
        D[k][h][i] += 1
  return D
コード例 #7
0
ファイル: convergence.py プロジェクト: DaMSL/ddc
def elas_feal(name, feal_list, max_obs, step=2000):
  eid = db.get_expid(name)
  plotlist = []
  sw_list=db.runquery('select start,time,numobs from sw where expid=%d order by start'%eid)
  end_ts = lambda x: du.parse(x[0]).timestamp() + x[1]
  ts_0 = du.parse(sw_list[0][0]).timestamp()
  sw = sorted([dict(start=x[0], time=x[1], numobs=x[2], end=end_ts(x)-ts_0) for x in sw_list], key=lambda i: i['end'])
  n = 0
  snum = 0
  nextcalc = step
  while n < max_obs and snum < len(sw):
      n += sw[snum]['numobs']
      if n > nextcalc:
        t = sw[snum]['end'] / 3600.
        # c = bootstrap_sampler(obs[:min(n,N)], samplesize=.25)
        c = op.bootstrap_block(feal_list[:n], step)
        plotlist.append((t, min(np.max(c[1]), 1.)))
        nextcalc += step
      snum += 1
  return plotlist
コード例 #8
0
def centroid_bootstrap(catalog):
  centfile = settings.RMSD_CENTROID_FILE
  centroid = np.load(centfile)
  cent_npts = [1, 1, 1, 1, 1]  # TBD
  numLabels = len(centroid)
  binlist = [(a, b) for a in range(numLabels) for b in range(numLabels)]
  logging.info("Loaded Starting Centroids from %s", centfile)

  name = catalog.get('name')
  if name is None:
    logging.info('Name not configured in this catalog. Set it and try again')
    return

  # Load/Set initial (current) Configs from Catalog
  if catalog.exists('thetas'):
    thetas = catalog.loadNPArray('thetas')
  else:
    thetas = np.zeros(shape=(numLabels, numLabels))
    thetas[:] = 0.25

  if catalog.exists('transition_sensitivity'):
    trans_factor = catalog.loadNPArray('transition_sensitivity')
  else:
    trans_factor = 0.2
    
  use_gradient = True
  obs_count = {ab: 0 for ab in binlist}
  C_delta = []
  T_delta = []

  # Configure Noise Filter
  noise = int(catalog.get('obs_noise'))
  dcdfreq = int(catalog.get('dcdfreq'))
  stepsize = int(catalog.get('sim_step_size'))
  nwidth = noise//(2*stepsize)
  noisefilt = lambda x, i: np.mean(x[max(0,i-nwidth):min(i+nwidth, len(x))], axis=0)


  # Get previously Labeled data (or label data IAW current settings)
  eid = db.get_expid(name)
  obslist = [i[0] for i in db.runquery('SELECT obs FROM obs WHERE expid=%d' % eid)]
  jobs = [i[0] for i in sorted(catalog.hgetall('anl_sequence').items(), key=lambda x: x[1])]
  shape = None

  # Initialize lists for pair-wise distances (top 2 nearest centroids)
  diffList  = {}
  transList = {}
  scatPlot  = {}
  for A in range(0, numLabels-1):
    for B in range(A+1, numLabels):
      diffList[(A, B)]  = []
      transList[(A, B)] = []
      scatPlot[(A, B)]  = []
  allScat = []
  # Load trajectories & filter
  obs_global = []

  # Process learning in batches (static batch size to start)
  batch_size = 25
  max_obs = 150
  batch = 0
  while batch <= max_obs:
    logging.info("Procssing Jobs %d - %d", batch, batch+batch_size)
    exec_sim = []
    obs_list = []
    for job in jobs[batch:batch+25]:
      conf = catalog.hgetall('jc_' + job)
      traj = md.load(conf['dcd'], top=conf['pdb'])
      alpha = datareduce.filter_alpha(traj)
      conf['alpha'] = alpha.xyz
      exec_sim.append(conf)
      if shape is None:
        shape = conf['alpha'].shape[1:]

      # xyz_filtered = np.array([noisefilt(alpha.xyz, i) for i in range(alpha.n_frames)])
      rmslist = calc_rmsd(alpha, centroid)
      labels = []
      for rms in rmslist:
        # [cw[i]*LA.norm(pt - centroid[i]) for i in range(5)]
        A, B = np.argsort(rms)[:2]
        delta = np.abs(rms[B] - rms[A])
        if delta < thetas[A][B]:
          sub_state = B
        else:
          sub_state = A
        classify = (A, sub_state)
        labels.append(classify)
        obs_count[classify] += 1

        # For globally updating Thetas
        obs_global.append(classify)
        if A < B:
          diffList[(A, B)].append(rms[A] - rms[B])
        else:
          diffList[(B, A)].append(rms[B] - rms[A])

        for a in range(0, numLabels-1):
          for b in range(a+1, numLabels):
            transList[(a, b)].append(rms[a] - rms[b])
            if (a, a) == classify or (b, b) == classify:
              c = 'b'
            elif (a, b) == classify or (b, a) == classify:
              c = 'g'
            elif a == A or b == A:
              c = 'r'
            else:
              c = 'black'
            scatPlot[(a, b)].append((rms[a] - rms[b], c))
      obs_list.append(labels)

    logging.info('Bin Distribution:')
    grpby = {}
    for llist in obs_list:
      for l in llist:
        if l not in grpby:
          grpby[l] = 0
        grpby[l] += 1
    for k in sorted(grpby.keys()):
      logging.info('%s:  %5d', k, grpby[k])
    for A in range(0, numLabels-1):
      for B in range(A+1, numLabels):
        d = diffList[(A, B)]
        logging.info('Diff list for %d,%d:  %d, %5.2f, %5.2f', A, B, len(d), min(d), max(d))


    # # 6. Apply Heuristics Labeling
    # # logging.debug('Applying Labeling Heuristic. Origin:   %d, %d', srcA, srcB)
    # rmslabel = []
    # 
    # label_count = {ab: 0 for ab in binlist}
    # groupbystate = [[] for i in range(numLabels)]
    # groupbybin = {ab: [] for ab in binlist}


    # For each frame in each traj: ID labeled well pts & build avg op
    logging.info('Selecting observed Well States')
    coor_sum = {i: np.zeros(shape=shape) for i in range(numLabels)}
    coor_tot = {i: 0 for i in range(numLabels)}
    for job, obslist in zip(exec_sim, obs_list):
      # offset = int(job['xid:start'])
      # for i, frame in enumerate(job['alpha']):
      for frame, label in zip(job['alpha'], obslist):
        # A, B = eval(obslist[offset+i])
        A, B = label
        if A != B:
          continue
        coor_sum[A] += frame
        coor_tot[A] += 1

    logging.info('Calculating Avg from following stats:')
    logging.info('   Total Frames: %d', sum([len(sim['alpha']) for sim in exec_sim]))

    # Calculate New Centroids (w/deltas)
    delta = []
    for S in range(numLabels):
      if coor_tot[S] == 0:
        logging.info("   State: %d --- NO OBSERVATIONS IN THIS WELL STATE", S)
        continue
      cent_local = coor_sum[S] / coor_tot[S]
      diff_local = LA.norm(centroid[S] - cent_local)
      update = ((centroid[S] * cent_npts[S]) + (cent_local * coor_tot[S])) / (cent_npts[S] + coor_tot[S])
      delta.append(LA.norm(update - centroid[S]))
      logging.info('   State %d:  NewPts=%5d   Delta=%5.2f   LocalDiff=%5.2f', 
        S, coor_tot[S], delta[-1], diff_local)
      centroid[S] = update
      cent_npts[S] += coor_tot[S]
    centroid_change = np.mean(delta)
    if len(C_delta) > 1:
      rel_change = np.abs((centroid_change - C_delta[-1]) / C_delta[-1])
      logging.info('Centroid Change:  %5.2f   (%5.2f%%)', centroid_change, 100*rel_change)
    C_delta.append(centroid_change)
    batch += batch_size


    # Update Thetas (usig global data ?????)
    delta = []
    for A in range(0, numLabels-1):
      for B in range(A+1, numLabels):
        X = sorted(diffList[(A, B)])
        if len(X) < 100:
          logging.info('Lacking data on %d, %d', A, B)
          continue
        # logging.info('  Total # Obs: %d', len(X))
        crossover = 0
        for i, x in enumerate(X):
          if x > 0:
            crossover = i
            break
        # logging.info('  Crossover at Index: %d', crossover)
        if crossover < 50 or (len(X)-crossover) < 50:
          logging.info('  Lacking local data skipping.')
          continue

        # Find local max gradient  (among 50% of points)
        
        if use_gradient:
          thetas_updated = np.copy(thetas)
          zoneA = int((1-trans_factor) * crossover)
          zoneB = crossover + int(trans_factor * (len(X) - crossover))
          gradA = zoneA + np.argmax(np.gradient(X[zoneA:crossover]))
          gradB = crossover + np.argmax(np.gradient(X[crossover:zoneB]))
          thetaA = X[gradA]
          thetaB = X[gradB]
          thetas_updated[A][B] = np.abs(thetaA)
          thetas_updated[B][A] = np.abs(thetaB)
          tdeltA = np.abs(thetas_updated[A][B] - thetas[A][B])
          tdeltB = np.abs(thetas_updated[B][A] - thetas[B][A])
          delta.append(tdeltA)
          delta.append(tdeltB)
          logging.info('  Theta Change (%d,%d):  %4.2f  (%4.1f)', A, B, tdeltA, (100*tdeltA/thetas[A][B]))
          logging.info('  Theta Change (%d,%d):  %4.2f  (%4.1f)', B, A, tdeltB, (100*tdeltB/thetas[B][A]))
          thetas[A][B] = thetas_updated[A][B]
          thetas[B][A] = thetas_updated[B][A]
        else:
          # Classify Fixed Percent of observations as Transitional
          thetas_updated = np.copy(thetas)
          transitionPtA = int((1-trans_factor) * crossover)
          transitionPtB = crossover + int(trans_factor * (len(X) - crossover))
          thetaA = X[transitionPtA]
          thetaB = X[transitionPtB]
          thetas_updated[A][B] = np.abs(thetaA)
          thetas_updated[B][A] = np.abs(thetaB)
          tdeltA = np.abs(thetas_updated[A][B] - thetas[A][B])
          tdeltB = np.abs(thetas_updated[B][A] - thetas[B][A])
          delta.append(tdeltA)
          delta.append(tdeltB)
          logging.info('  Theta Change (%d,%d):  %4.2f  (%4.1f)', A, B, tdeltA, (100*tdeltA/thetas[A][B]))
          logging.info('  Theta Change (%d,%d):  %4.2f  (%4.1f)', B, A, tdeltB, (100*tdeltB/thetas[B][A]))
          thetas[A][B] = thetas_updated[A][B]
          thetas[B][A] = thetas_updated[B][A]

    T_delta.append(np.mean(delta))
  P.line(np.array(C_delta), 'Avg_CHANGE_Centroid_Pos_%s' % name)
  P.line(np.array(T_delta), 'Avg_CHANGE_Theta_Val_%s' % name)
  P.bargraph_simple(obs_count, 'Final_Histogram_%s' % name)
  # for k, X in diffList.items():
  #   A, B = k
  #   P.transition_line(sorted(X), A, B, title='-X', trans_factor=.5)
  # for k, X in transList.items():
  #   A, B = k
  #   P.transition_line(sorted(X), A, B, title='-ALL', trans_factor=.5)
  for k, X in scatPlot.items():
    collab = {'b': 'Well', 'g': 'Trans', 'r': 'Primary', 'brown': 'Secondary', 'black': 'None'}
    ptmap = {k: [] for k in collab.keys()}
    ordpts = sorted(X, key = lambda x : x[0])
    for i, tup in enumerate(ordpts):
      y, c = tup
      ptmap[c].append((i, y))
      # if c == 'b' or c == 'g':
      #   ptmap[c].append((i, y))
      # else:
      #   ptmap[c].append((i, 0))
    A, B = k
    P.scat_Transtions(ptmap, title='-%d_%d'%(A,B), size=1, labels=collab)
コード例 #9
0
def getobs(name):
  eid = db.get_expid(name)
  t = db.runquery('select idx, obs from obs where expid=%d order by idx'%eid)
  return [i[1] for i in t]
コード例 #10
0
ファイル: convergence.py プロジェクト: DaMSL/ddc
def elas_boot(ex, size, method=None, limit=375000, state=None):
  name = ex.r.get('name')
  print(name, '-', end=' ')
  eid = db.get_expid(name)
  # Get feature landscape (for convergence)
  if method is None:
    feal = ex.all_feal()[:limit]
  else:
    feal = ex.all_feal(True, method)[:limit]
  plotlist = []

  # Get list of all simulations
  sw_list=db.runquery('select start,time,numobs from sw where expid=%d order by start'%eid)
  print('Simulations read: ', len(sw_list))
  end_ts = lambda x: du.parse(x[0]).timestamp() + x[1]

  # Account for any gaps in execution & adjust (ensure real time is logically grouped)
  ts_0 = du.parse(sw_list[0][0]).timestamp()
  last = ts_0
  sw_seq = []
  gap = 0
  cutoff = 30*60  # 30 min gap is bad
  swbystart = sorted(sw_list, key=lambda i: i[0])
  for s, t, n in swbystart:
    sim_start = du.parse(s).timestamp() - ts_0
    if sim_start - last > cutoff:
      print('FOUND GAP:', int(sim_start - last))
      gap += sim_start - last + cutoff
    new_start = sim_start - gap
    end = new_start + t
    sw_seq.append({'start': new_start, 'end':end, 'numobs':n})
    last = sim_start

  maxobs = sum([i['numobs'] for i in sw_seq])

  # Sort by end time (for convergence)
  sw = sorted(sw_seq, key=lambda i: i['end'])
  N = min(limit, maxobs, len(feal))
  dnum = 0      # Data item # (as in stream)
  snum = 0      # Sim #
  lastcalc = 0
  nextcalc = size
  i = 0
  boot = []
  ci = []

  # Process each simulation's observations, batch into step-sizes and calc bootstrap
  last_conv = 1.
  while dnum < N and snum < len(sw):
    dnum += sw[snum]['numobs']
    if dnum > nextcalc:
      t = sw[snum]['end'] / 3600.  # get time
      arr = np.array(feal[lastcalc:dnum]).T
      feal_ci = []
      straps = np.array([bootstrap_std(arr[feat]) for feat in range(5, 20)])
      for feat in range(5, 15):
        feal_ci.append(straps[feat][1]/straps[feat][0])
        # feal_ci.append(straps[feat-5][1])
      ci.append(feal_ci)
      feal_ci = []
      for feat in range(10):
        calc = bootstrap_std([x[feat] for x in ci])
        feal_ci.append(calc[1]/calc[0])
        # feal_ci.append(calc[1])
      # conv = min(1., np.mean(feal_ci[1:]))
      conv = np.mean(np.nan_to_num(feal_ci))
      if conv == 0:
        conv = last_conv
      plotlist.append((t, conv))
      lastcalc = dnum
      nextcalc += size
      nextcalc = min(nextcalc, N)
      last_conv = conv
    snum += 1
  return plotlist
コード例 #11
0
ファイル: convergence.py プロジェクト: DaMSL/ddc
def getobs(name):
  burnin = {'serial':0, 'parallel':0,'uniform3':25000,'biased4':50000, 'feal1':25000, 'reweight4':50000}
  eid = db.get_expid(name)
  t = db.runquery('select idx, obs from obs where expid=%d order by idx'%eid)
  b = 0 if name not in burnin else burnin[name]
  return [i[1] for i in t[b:b+510000]]