Example #1
0
remres = set([i for i, r in enumerate(restrack) if len(r) > 0])
rev_pr_idx = {k: i for i, k in enumerate(prs)}
minsize = 1
F3 = set()
while len(remres) > 0:
  selprs = [p for p in restrack if len(p) == minsize]
  for sp in selprs:
    for a, b in sp:
      if a not in remres and b not in remres:
        continue
      if a in remres:
        remres.remove(a)
      if b in remres:
        remres.remove(b)
      F3.add(rev_pr_idx[(a,b)])
  minsize += 1

Kr = sorted(F3)
CMr, Dr = CM[:,Kr], D[:,Kr]

logging.info('FINAL Input Matrix:  %s', Dr.shape)
U = lat.unique_events(CMr)
logging.info('\n MAX-MINER running')
MFIS = lat.maxminer(CMr, 100)
pickle.dump(MFIS, open(home + '/work/mfis.p', 'wb'))
logging.info('\n Max Miner Complete. Constructing derived lattice')
dlat, Ik = lat.dlattice_mm(MFIS, Dr, CMr, 100)
logging.info('\n ALL DONE! Pickling Out')
pickle.dump(Ik, open(home + '/work/iset.p', 'wb'))
pickle.dump(dlat, open(home + '/work/dlat.p', 'wb'))
Example #2
0
def bootstrap_lattice(catalog, num=10, build_new=False):
  ''' Bootstrap After TimeScape has run on source trajectory '''
  home = os.getenv("HOME")
  support = 1
  cutoff  = 8

  start_coord = ['de2586_315', 'de531_20', 'de3765_63', 'de3305_668', 'de1732_139']
  dcdfile = lambda x: home + '/work/data/{0}.dcd'.format(x)
  outloc  = lambda x: home+'/work/jc/denovouniform1/{0}/{0}'.format(x)


  traj_list = {}

  basin_list = catalog.lrange('basin:list', 0, -1)
  if len(basin_list) == 134:
    logging.info('Basin Data already loaded!')
    rms_delta_list = [(i, np.sum(pickle.loads(catalog.get('basin:rmsdelta:'+b)))) for i, b in enumerate(basin_list)]
  else:
    logging.info('Loading all bootstrap data to initialize...')
    basin_list = []
    rms_delta_list = []
    pdb_file = home+'/work/data/alpha.pdb'
    topo = md.load(pdb_file)
    ref_alpha = md.load(home+'/work/' + catalog.get('pdb:ref:0'))
    ref_alpha.atom_slice(ref_alpha.top.select_atom_indices('alpha'), inplace=True)
    res_rms_Kr = FEATURE_SET

    for sc in start_coord:
      dist_space = []
      srcfile = outloc(sc) + '.dcd'
      pdbfile = srcfile.replace('dcd', 'pdb')
      logging.debug('LOADING TRAJ:  %s', srcfile)
      traj = md.load(srcfile, top = pdbfile)
      traj_list[sc] = traj
      alpha = traj.atom_slice(traj.top.select_atom_indices('alpha'))

      logging.info('Grabbing TS data...')
      W = TS.TimeScape.windows(outloc(sc) + '_transitions.log')
      ts_traj = TS.TimeScapeParser(pdbfile, outloc(sc), sc, dcd=srcfile, traj=traj)
      basins = ts_traj.load_basins()

      logging.info("Processing distance space and residue RMS")
      dsa = DR.distance_space(alpha)
      resrmsd = 10*np.array([LA.norm(i-ref_alpha.xyz[0], axis=1) for i in alpha.xyz])
      basin_res_rms = np.zeros(shape=(len(ts_traj.basins), alpha.n_atoms))
      for i, (a,b) in enumerate(W):
        dist_space.append(dsa[a:b].mean(0))
        basin_res_rms[i] = np.median(resrmsd[a:b], axis=0)

      basin_res_rms_delta = np.array([rms_delta(i) for i in basin_res_rms.T]).T
      logging.debug('RMS LEN CHECK:  %d =?= %d    -- Updating RMS Delta',len(basins), len(basin_res_rms_delta))


      for i, basin in enumerate(basins):
        pipe = catalog.pipeline()
        bid = basin.id

        # Store on Disk and in redis
        jc_filename = os.path.join(settings.datadir, 'basin_%s.pdb' % bid)
        logging.info('MIN for %s:   Idx# %d  to %s', bid, basin.mindex, jc_filename)
        minima_frame = traj.slice(basin.mindex)  #md.load_frame(src_traj, basin.mindex, top=src_traj.replace('dcd', 'pdb'))
        minima_frame.save_pdb(jc_filename)

        basin_hash = basin.kv()
        basin_hash['pdbfile'] = jc_filename
        logging.info('  Basin: %(id)s  %(start)d - %(end)d   Minima: %(mindex)d    size=%(len)d' % basin_hash)

        pipe.rpush('basin:list', bid)
        pipe.hmset('basin:%s'%bid, basin_hash)
        pipe.set('basin:dmu:'+bid, pickle.dumps(dist_space[i]))
        pipe.set('minima:%s'%bid, pickle.dumps(minima_frame))

        # FOR RESIDUE RMSD
        resrms_d = np.sum(basin_res_rms_delta[i][res_rms_Kr])
        basin_hash['resrms_delta'] = resrms_d
        rms_delta_list.append((len(basin_list), resrms_d))
        basin_list.append(basin_hash)
        pipe.set('basin:rmsdelta:'+bid, pickle.dumps(basin_res_rms_delta[i]))

        pipe.execute()




  # FOR RESIDUE RMSD

  # FOR SEED SAMPLING USING RMS_DELTA

  # Note: skip the first basin



  # Re-Construct the Lattice from 
  if build_new:
    dist_space = 10*np.array(dist_space)
    cm = ds<cutoff
    fs = lat.reduced_feature_set(cm,.115); len(fs)
    dr, cr = ds[:,fs], cm[:,fs]


    mfis,lfis = lat.maxminer(cr, 1)
    dlat, ik = lat.derived_lattice(mfis, dr, cr)
    pickle.dump(mfis, open(home + '/work/data/denovo_mfis.p', 'wb'))
    pickle.dump(lfis, open(home + '/work/data/denovo_lfis.p', 'wb'))
    pickle.dump(ik, open(home + '/work/data/denovo_iset.p', 'wb'))
    pickle.dump(dlat, open(home + '/work/data/denovo_dlat.p', 'wb'))

  else:

    logging.info('Loading Pre-Constructed Lattice Data')
    dlat = pickle.load(open(home + '/work/data/denovo_dlat.p', 'rb'))
    mfis = pickle.load(open(home + '/work/data/denovo_mfis.p', 'rb'))
    lfis = pickle.load(open(home + '/work/data/denovo_lfis.p', 'rb'))
    ik = pickle.load(open(home + '/work/data/denovo_iset.p', 'rb'))

  with catalog.pipeline() as pipe:
    pipe.set('lattice:max_fis', pickle.dumps(mfis))
    pipe.set('lattice:low_fis', pickle.dumps(lfis))
    pipe.set('lattice:dlat', pickle.dumps(dlat))
    pipe.set('lattice:iset', pickle.dumps(ik))
    pipe.execute()

  # logging.info('Building Existing lattice object')
  # lattice=lat.Lattice(ds, fs, cutoff, support)
  # lattice.set_fis(max_fis, low_fis)
  # lattice.set_dlat(dlat, Ik)
  # sampler = LatticeSampler(lattice)

  # Sample -- FOR USING LATTICE TO BOOTSTRAP
  # cl,sc,el = lat.clusterlattice(dlat, cr, dr, ik, num_k=8, invert=True)
  # cl_list = sorted(el, key=lambda x: len(x))

  # TODO: Check if fan out > single item clusters
  # start_indices = [clu[0][0] for clu in cl_list[:num]]

  rms_delta_ranked = [x[0] for x in sorted(rms_delta_list, key=lambda i: i[1], reverse=True)]
  start_indices = rms_delta_ranked[:num]

  seedlist = [catalog.lindex('basin:list', i) for i in start_indices]
  sim_init = {key: catalog.get(key) for key in settings.sim_params.keys()}
  global_params = getSimParameters(sim_init, 'seed')
  global_params['psf'] = home+'/work/jc/serial2/de0_0/de0_0.psf'

  for seed in seedlist:
    logging.debug('\nSeeding Job: %s ', seed)
    basin = catalog.hgetall('basin:%s'%seed)
    catalog.rpush('executed', seed)

    # Generate new set of params/coords
    jcID, config = generateFromBasin(basin)

    # Update Additional JC Params and Decision History, as needed
    config.update(global_params)

    # Push to catalog
    logging.info("New Simulation Job Created: %s", jcID)
    for k, v in config.items():
      logging.debug("   %s:  %s", k, str(v))
    catalog.rpush('jcqueue', jcID)
    catalog.hmset(wrapKey('jc', jcID), config)
Example #3
0
console = logging.StreamHandler()
console.setLevel(logging.INFO)
logging.getLogger("").addHandler(console)

logging.info("SUPPORT   :  %d", support)

cutoff = 8.0

DS = 10 * np.load("../data/de_ds_mu.npy")
CM = DS < cutoff
Kr = [2, 52, 56, 60, 116, 258, 311, 460, 505, 507, 547, 595, 640, 642, 665, 683, 728, 767, 851, 1244, 1485, 1629, 1636]
CMr, Dr = CM[:, Kr], DS[:, Kr]
logging.info("FINAL Input Matrix:  %s", Dr.shape)
logging.info("Reduction Rate:  %7.4f", CMr.sum() / np.multiply(*CM.shape))

MFIS, low_fis = lat.maxminer(CMr, support)
dlat, Ik = lat.derived_lattice(MFIS, Dr, CMr)
logging.info("LATTICE,mfis,%d", len(MFIS))
logging.info("LATTICE,lfis,%d", len(low_fis))
logging.info("LATTICE,dlat,%d", len(dlat))
logging.info("LATTICE,iset,%d", len(Ik))
logging.info("LATTICE,edges,%d", sum([len(v) for v in Ik.values()]))
logging.info("SIZE,mfis,%d", sys.getsizeof(MFIS))
logging.info("SIZE,lfis,%d", sys.getsizeof(low_fis))
logging.info("SIZE,dlat,%d", sys.getsizeof(dlat))
logging.info("SIZE,iset,%d", sys.getsizeof(Ik))
logging.info("\n ALL DONE! Pickling Out")

outfile = open(home + "/work/latt_intrinsics/iset_%d.p" % support, "wb")
pickle.dump(Ik, outfile)
outfile.close()
Example #4
0
remres = set([i for i, r in enumerate(restrack) if len(r) > 0])
rev_pr_idx = {k: i for i, k in enumerate(prs)}
minsize = 1
F3 = set()
while len(remres) > 0:
    selprs = [p for p in restrack if len(p) == minsize]
    for sp in selprs:
        for a, b in sp:
            if a not in remres and b not in remres:
                continue
            if a in remres:
                remres.remove(a)
            if b in remres:
                remres.remove(b)
            F3.add(rev_pr_idx[(a, b)])
    minsize += 1

Kr = sorted(F3)
CMr, Dr = CM[:, Kr], D[:, Kr]

logging.info('FINAL Input Matrix:  %s', Dr.shape)
U = lat.unique_events(CMr)
logging.info('\n MAX-MINER running')
MFIS = lat.maxminer(CMr, 100)
pickle.dump(MFIS, open(home + '/work/mfis.p', 'wb'))
logging.info('\n Max Miner Complete. Constructing derived lattice')
dlat, Ik = lat.dlattice_mm(MFIS, Dr, CMr, 100)
logging.info('\n ALL DONE! Pickling Out')
pickle.dump(Ik, open(home + '/work/iset.p', 'wb'))
pickle.dump(dlat, open(home + '/work/dlat.p', 'wb'))
Example #5
0
CM = DS<cutoff
CMr, Dr = CM[:,Kr], DS[:,Kr]
CMm, Dm = CM[:,Km], DS[:,Km]

delabel = np.load(home+'/work/results/DE_label_full.npy')
DW = []
for i in range(42):
  for a,b in TS.TimeScape.windows(home+'/work/timescape/desh_%02d_transitions.log'%i):
    DW.append((a+i*100000, b+i*100000))

dL = [delabel[a:b] for a,b in DW]
DE_LABEL = [LABEL10(i,.9) for i in dL]

SPT = [i[0] for i in db.runquery('select distinct support from latt order by support')]
NC  = [i[0] for i in db.runquery('select distinct numclu from latt order by numclu')]

mf, lf = {}, {}
dl, ik  = {}, {}
key, clu, cent, var, Gm = {}, {}, {}, {}, {}

s=support
mf[s], lf[s] = lat.maxminer(CMr, s)
dl[s], ik[s] = lat.derived_lattice(mf[s], Dr, CMr)
pickle.dump(dl[s], open(home + '/work/latt_intrinsics/dlat2_%d.p' % support, 'wb'))
for num_clu in NC:
  key[s], clu[s], cent[s], var[s], Gm[s] = lat.cluster_harch(dl[s], CMr, Dr, theta=.5, num_k=num_clu, dL=None, verbose=False)  
  w, t = lat.score_clusters(clu[s], Dr, cent[s], var[s], Gm[s], sigma, DE_LABEL)
  for k in TBIN10:
    logging.info('SCORE,W,%d,%d,%s,%.5f', support, num_clu, k, w[k])
  for k in TBIN10:
    logging.info('SCORE,T,%d,%d,%s,%.5f', support, num_clu, k, t[k])
Example #6
0
console = logging.StreamHandler()
console.setLevel(logging.INFO)
logging.getLogger('').addHandler(console)

logging.info("SUPPORT   :  %d", support)

cutoff = 8.

DS = 10*np.load('../data/de_ds_mu.npy')
CM = (DS<cutoff)
Kr = [2, 52, 56, 60, 116, 258, 311, 460, 505, 507, 547, 595, 640, 642, 665, 683, 728, 767, 851, 1244, 1485, 1629, 1636]
CMr, Dr = CM[:,Kr], DS[:,Kr]
logging.info('FINAL Input Matrix:  %s', Dr.shape)
logging.info('Reduction Rate:  %7.4f', CMr.sum()/np.multiply(*CM.shape))

MFIS, low_fis = lat.maxminer(CMr, support)
dlat, Ik = lat.derived_lattice(MFIS, Dr, CMr)
logging.info('LATTICE,mfis,%d', len(MFIS))
logging.info('LATTICE,lfis,%d', len(low_fis))
logging.info('LATTICE,dlat,%d', len(dlat))
logging.info('LATTICE,iset,%d', len(Ik))
logging.info('LATTICE,edges,%d', sum([len(v) for v in Ik.values()]))
logging.info('SIZE,mfis,%d', sys.getsizeof(MFIS))
logging.info('SIZE,lfis,%d', sys.getsizeof(low_fis))
logging.info('SIZE,dlat,%d', sys.getsizeof(dlat))
logging.info('SIZE,iset,%d', sys.getsizeof(Ik))
logging.info('\n ALL DONE! Pickling Out')

outfile = open(home + '/work/latt_intrinsics/iset_%d.p'%support, 'wb')
pickle.dump(Ik, outfile)
outfile.close()
Example #7
0
SPT = [
    i[0]
    for i in db.runquery('select distinct support from latt order by support')
]
NC = [
    i[0]
    for i in db.runquery('select distinct numclu from latt order by numclu')
]

mf, lf = {}, {}
dl, ik = {}, {}
key, clu, cent, var, Gm = {}, {}, {}, {}, {}

s = support
mf[s], lf[s] = lat.maxminer(CMr, s)
dl[s], ik[s] = lat.derived_lattice(mf[s], Dr, CMr)
pickle.dump(dl[s],
            open(home + '/work/latt_intrinsics/dlat2_%d.p' % support, 'wb'))
for num_clu in NC:
    key[s], clu[s], cent[s], var[s], Gm[s] = lat.cluster_harch(dl[s],
                                                               CMr,
                                                               Dr,
                                                               theta=.5,
                                                               num_k=num_clu,
                                                               dL=None,
                                                               verbose=False)
    w, t = lat.score_clusters(clu[s], Dr, cent[s], var[s], Gm[s], sigma,
                              DE_LABEL)
    for k in TBIN10:
        logging.info('SCORE,W,%d,%d,%s,%.5f', support, num_clu, k, w[k])
Example #8
0
def bootstrap_lattice(catalog, num=10, build_new=False):
    ''' Bootstrap After TimeScape has run on source trajectory '''
    home = os.getenv("HOME")
    support = 1
    cutoff = 8

    start_coord = [
        'de2586_315', 'de531_20', 'de3765_63', 'de3305_668', 'de1732_139'
    ]
    dcdfile = lambda x: home + '/work/data/{0}.dcd'.format(x)
    outloc = lambda x: home + '/work/jc/denovouniform1/{0}/{0}'.format(x)

    traj_list = {}

    basin_list = catalog.lrange('basin:list', 0, -1)
    if len(basin_list) == 134:
        logging.info('Basin Data already loaded!')
        rms_delta_list = [
            (i, np.sum(pickle.loads(catalog.get('basin:rmsdelta:' + b))))
            for i, b in enumerate(basin_list)
        ]
    else:
        logging.info('Loading all bootstrap data to initialize...')
        basin_list = []
        rms_delta_list = []
        pdb_file = home + '/work/data/alpha.pdb'
        topo = md.load(pdb_file)
        ref_alpha = md.load(home + '/work/' + catalog.get('pdb:ref:0'))
        ref_alpha.atom_slice(ref_alpha.top.select_atom_indices('alpha'),
                             inplace=True)
        res_rms_Kr = FEATURE_SET

        for sc in start_coord:
            dist_space = []
            srcfile = outloc(sc) + '.dcd'
            pdbfile = srcfile.replace('dcd', 'pdb')
            logging.debug('LOADING TRAJ:  %s', srcfile)
            traj = md.load(srcfile, top=pdbfile)
            traj_list[sc] = traj
            alpha = traj.atom_slice(traj.top.select_atom_indices('alpha'))

            logging.info('Grabbing TS data...')
            W = TS.TimeScape.windows(outloc(sc) + '_transitions.log')
            ts_traj = TS.TimeScapeParser(pdbfile,
                                         outloc(sc),
                                         sc,
                                         dcd=srcfile,
                                         traj=traj)
            basins = ts_traj.load_basins()

            logging.info("Processing distance space and residue RMS")
            dsa = DR.distance_space(alpha)
            resrmsd = 10 * np.array(
                [LA.norm(i - ref_alpha.xyz[0], axis=1) for i in alpha.xyz])
            basin_res_rms = np.zeros(shape=(len(ts_traj.basins),
                                            alpha.n_atoms))
            for i, (a, b) in enumerate(W):
                dist_space.append(dsa[a:b].mean(0))
                basin_res_rms[i] = np.median(resrmsd[a:b], axis=0)

            basin_res_rms_delta = np.array(
                [rms_delta(i) for i in basin_res_rms.T]).T
            logging.debug('RMS LEN CHECK:  %d =?= %d    -- Updating RMS Delta',
                          len(basins), len(basin_res_rms_delta))

            for i, basin in enumerate(basins):
                pipe = catalog.pipeline()
                bid = basin.id

                # Store on Disk and in redis
                jc_filename = os.path.join(settings.datadir,
                                           'basin_%s.pdb' % bid)
                logging.info('MIN for %s:   Idx# %d  to %s', bid, basin.mindex,
                             jc_filename)
                minima_frame = traj.slice(
                    basin.mindex
                )  #md.load_frame(src_traj, basin.mindex, top=src_traj.replace('dcd', 'pdb'))
                minima_frame.save_pdb(jc_filename)

                basin_hash = basin.kv()
                basin_hash['pdbfile'] = jc_filename
                logging.info(
                    '  Basin: %(id)s  %(start)d - %(end)d   Minima: %(mindex)d    size=%(len)d'
                    % basin_hash)

                pipe.rpush('basin:list', bid)
                pipe.hmset('basin:%s' % bid, basin_hash)
                pipe.set('basin:dmu:' + bid, pickle.dumps(dist_space[i]))
                pipe.set('minima:%s' % bid, pickle.dumps(minima_frame))

                # FOR RESIDUE RMSD
                resrms_d = np.sum(basin_res_rms_delta[i][res_rms_Kr])
                basin_hash['resrms_delta'] = resrms_d
                rms_delta_list.append((len(basin_list), resrms_d))
                basin_list.append(basin_hash)
                pipe.set('basin:rmsdelta:' + bid,
                         pickle.dumps(basin_res_rms_delta[i]))

                pipe.execute()

    # FOR RESIDUE RMSD

    # FOR SEED SAMPLING USING RMS_DELTA

    # Note: skip the first basin

    # Re-Construct the Lattice from
    if build_new:
        dist_space = 10 * np.array(dist_space)
        cm = ds < cutoff
        fs = lat.reduced_feature_set(cm, .115)
        len(fs)
        dr, cr = ds[:, fs], cm[:, fs]

        mfis, lfis = lat.maxminer(cr, 1)
        dlat, ik = lat.derived_lattice(mfis, dr, cr)
        pickle.dump(mfis, open(home + '/work/data/denovo_mfis.p', 'wb'))
        pickle.dump(lfis, open(home + '/work/data/denovo_lfis.p', 'wb'))
        pickle.dump(ik, open(home + '/work/data/denovo_iset.p', 'wb'))
        pickle.dump(dlat, open(home + '/work/data/denovo_dlat.p', 'wb'))

    else:

        logging.info('Loading Pre-Constructed Lattice Data')
        dlat = pickle.load(open(home + '/work/data/denovo_dlat.p', 'rb'))
        mfis = pickle.load(open(home + '/work/data/denovo_mfis.p', 'rb'))
        lfis = pickle.load(open(home + '/work/data/denovo_lfis.p', 'rb'))
        ik = pickle.load(open(home + '/work/data/denovo_iset.p', 'rb'))

    with catalog.pipeline() as pipe:
        pipe.set('lattice:max_fis', pickle.dumps(mfis))
        pipe.set('lattice:low_fis', pickle.dumps(lfis))
        pipe.set('lattice:dlat', pickle.dumps(dlat))
        pipe.set('lattice:iset', pickle.dumps(ik))
        pipe.execute()

    # logging.info('Building Existing lattice object')
    # lattice=lat.Lattice(ds, fs, cutoff, support)
    # lattice.set_fis(max_fis, low_fis)
    # lattice.set_dlat(dlat, Ik)
    # sampler = LatticeSampler(lattice)

    # Sample -- FOR USING LATTICE TO BOOTSTRAP
    # cl,sc,el = lat.clusterlattice(dlat, cr, dr, ik, num_k=8, invert=True)
    # cl_list = sorted(el, key=lambda x: len(x))

    # TODO: Check if fan out > single item clusters
    # start_indices = [clu[0][0] for clu in cl_list[:num]]

    rms_delta_ranked = [
        x[0] for x in sorted(rms_delta_list, key=lambda i: i[1], reverse=True)
    ]
    start_indices = rms_delta_ranked[:num]

    seedlist = [catalog.lindex('basin:list', i) for i in start_indices]
    sim_init = {key: catalog.get(key) for key in settings.sim_params.keys()}
    global_params = getSimParameters(sim_init, 'seed')
    global_params['psf'] = home + '/work/jc/serial2/de0_0/de0_0.psf'

    for seed in seedlist:
        logging.debug('\nSeeding Job: %s ', seed)
        basin = catalog.hgetall('basin:%s' % seed)
        catalog.rpush('executed', seed)

        # Generate new set of params/coords
        jcID, config = generateFromBasin(basin)

        # Update Additional JC Params and Decision History, as needed
        config.update(global_params)

        # Push to catalog
        logging.info("New Simulation Job Created: %s", jcID)
        for k, v in config.items():
            logging.debug("   %s:  %s", k, str(v))
        catalog.rpush('jcqueue', jcID)
        catalog.hmset(wrapKey('jc', jcID), config)