remres = set([i for i, r in enumerate(restrack) if len(r) > 0]) rev_pr_idx = {k: i for i, k in enumerate(prs)} minsize = 1 F3 = set() while len(remres) > 0: selprs = [p for p in restrack if len(p) == minsize] for sp in selprs: for a, b in sp: if a not in remres and b not in remres: continue if a in remres: remres.remove(a) if b in remres: remres.remove(b) F3.add(rev_pr_idx[(a,b)]) minsize += 1 Kr = sorted(F3) CMr, Dr = CM[:,Kr], D[:,Kr] logging.info('FINAL Input Matrix: %s', Dr.shape) U = lat.unique_events(CMr) logging.info('\n MAX-MINER running') MFIS = lat.maxminer(CMr, 100) pickle.dump(MFIS, open(home + '/work/mfis.p', 'wb')) logging.info('\n Max Miner Complete. Constructing derived lattice') dlat, Ik = lat.dlattice_mm(MFIS, Dr, CMr, 100) logging.info('\n ALL DONE! Pickling Out') pickle.dump(Ik, open(home + '/work/iset.p', 'wb')) pickle.dump(dlat, open(home + '/work/dlat.p', 'wb'))
def bootstrap_lattice(catalog, num=10, build_new=False): ''' Bootstrap After TimeScape has run on source trajectory ''' home = os.getenv("HOME") support = 1 cutoff = 8 start_coord = ['de2586_315', 'de531_20', 'de3765_63', 'de3305_668', 'de1732_139'] dcdfile = lambda x: home + '/work/data/{0}.dcd'.format(x) outloc = lambda x: home+'/work/jc/denovouniform1/{0}/{0}'.format(x) traj_list = {} basin_list = catalog.lrange('basin:list', 0, -1) if len(basin_list) == 134: logging.info('Basin Data already loaded!') rms_delta_list = [(i, np.sum(pickle.loads(catalog.get('basin:rmsdelta:'+b)))) for i, b in enumerate(basin_list)] else: logging.info('Loading all bootstrap data to initialize...') basin_list = [] rms_delta_list = [] pdb_file = home+'/work/data/alpha.pdb' topo = md.load(pdb_file) ref_alpha = md.load(home+'/work/' + catalog.get('pdb:ref:0')) ref_alpha.atom_slice(ref_alpha.top.select_atom_indices('alpha'), inplace=True) res_rms_Kr = FEATURE_SET for sc in start_coord: dist_space = [] srcfile = outloc(sc) + '.dcd' pdbfile = srcfile.replace('dcd', 'pdb') logging.debug('LOADING TRAJ: %s', srcfile) traj = md.load(srcfile, top = pdbfile) traj_list[sc] = traj alpha = traj.atom_slice(traj.top.select_atom_indices('alpha')) logging.info('Grabbing TS data...') W = TS.TimeScape.windows(outloc(sc) + '_transitions.log') ts_traj = TS.TimeScapeParser(pdbfile, outloc(sc), sc, dcd=srcfile, traj=traj) basins = ts_traj.load_basins() logging.info("Processing distance space and residue RMS") dsa = DR.distance_space(alpha) resrmsd = 10*np.array([LA.norm(i-ref_alpha.xyz[0], axis=1) for i in alpha.xyz]) basin_res_rms = np.zeros(shape=(len(ts_traj.basins), alpha.n_atoms)) for i, (a,b) in enumerate(W): dist_space.append(dsa[a:b].mean(0)) basin_res_rms[i] = np.median(resrmsd[a:b], axis=0) basin_res_rms_delta = np.array([rms_delta(i) for i in basin_res_rms.T]).T logging.debug('RMS LEN CHECK: %d =?= %d -- Updating RMS Delta',len(basins), len(basin_res_rms_delta)) for i, basin in enumerate(basins): pipe = catalog.pipeline() bid = basin.id # Store on Disk and in redis jc_filename = os.path.join(settings.datadir, 'basin_%s.pdb' % bid) logging.info('MIN for %s: Idx# %d to %s', bid, basin.mindex, jc_filename) minima_frame = traj.slice(basin.mindex) #md.load_frame(src_traj, basin.mindex, top=src_traj.replace('dcd', 'pdb')) minima_frame.save_pdb(jc_filename) basin_hash = basin.kv() basin_hash['pdbfile'] = jc_filename logging.info(' Basin: %(id)s %(start)d - %(end)d Minima: %(mindex)d size=%(len)d' % basin_hash) pipe.rpush('basin:list', bid) pipe.hmset('basin:%s'%bid, basin_hash) pipe.set('basin:dmu:'+bid, pickle.dumps(dist_space[i])) pipe.set('minima:%s'%bid, pickle.dumps(minima_frame)) # FOR RESIDUE RMSD resrms_d = np.sum(basin_res_rms_delta[i][res_rms_Kr]) basin_hash['resrms_delta'] = resrms_d rms_delta_list.append((len(basin_list), resrms_d)) basin_list.append(basin_hash) pipe.set('basin:rmsdelta:'+bid, pickle.dumps(basin_res_rms_delta[i])) pipe.execute() # FOR RESIDUE RMSD # FOR SEED SAMPLING USING RMS_DELTA # Note: skip the first basin # Re-Construct the Lattice from if build_new: dist_space = 10*np.array(dist_space) cm = ds<cutoff fs = lat.reduced_feature_set(cm,.115); len(fs) dr, cr = ds[:,fs], cm[:,fs] mfis,lfis = lat.maxminer(cr, 1) dlat, ik = lat.derived_lattice(mfis, dr, cr) pickle.dump(mfis, open(home + '/work/data/denovo_mfis.p', 'wb')) pickle.dump(lfis, open(home + '/work/data/denovo_lfis.p', 'wb')) pickle.dump(ik, open(home + '/work/data/denovo_iset.p', 'wb')) pickle.dump(dlat, open(home + '/work/data/denovo_dlat.p', 'wb')) else: logging.info('Loading Pre-Constructed Lattice Data') dlat = pickle.load(open(home + '/work/data/denovo_dlat.p', 'rb')) mfis = pickle.load(open(home + '/work/data/denovo_mfis.p', 'rb')) lfis = pickle.load(open(home + '/work/data/denovo_lfis.p', 'rb')) ik = pickle.load(open(home + '/work/data/denovo_iset.p', 'rb')) with catalog.pipeline() as pipe: pipe.set('lattice:max_fis', pickle.dumps(mfis)) pipe.set('lattice:low_fis', pickle.dumps(lfis)) pipe.set('lattice:dlat', pickle.dumps(dlat)) pipe.set('lattice:iset', pickle.dumps(ik)) pipe.execute() # logging.info('Building Existing lattice object') # lattice=lat.Lattice(ds, fs, cutoff, support) # lattice.set_fis(max_fis, low_fis) # lattice.set_dlat(dlat, Ik) # sampler = LatticeSampler(lattice) # Sample -- FOR USING LATTICE TO BOOTSTRAP # cl,sc,el = lat.clusterlattice(dlat, cr, dr, ik, num_k=8, invert=True) # cl_list = sorted(el, key=lambda x: len(x)) # TODO: Check if fan out > single item clusters # start_indices = [clu[0][0] for clu in cl_list[:num]] rms_delta_ranked = [x[0] for x in sorted(rms_delta_list, key=lambda i: i[1], reverse=True)] start_indices = rms_delta_ranked[:num] seedlist = [catalog.lindex('basin:list', i) for i in start_indices] sim_init = {key: catalog.get(key) for key in settings.sim_params.keys()} global_params = getSimParameters(sim_init, 'seed') global_params['psf'] = home+'/work/jc/serial2/de0_0/de0_0.psf' for seed in seedlist: logging.debug('\nSeeding Job: %s ', seed) basin = catalog.hgetall('basin:%s'%seed) catalog.rpush('executed', seed) # Generate new set of params/coords jcID, config = generateFromBasin(basin) # Update Additional JC Params and Decision History, as needed config.update(global_params) # Push to catalog logging.info("New Simulation Job Created: %s", jcID) for k, v in config.items(): logging.debug(" %s: %s", k, str(v)) catalog.rpush('jcqueue', jcID) catalog.hmset(wrapKey('jc', jcID), config)
console = logging.StreamHandler() console.setLevel(logging.INFO) logging.getLogger("").addHandler(console) logging.info("SUPPORT : %d", support) cutoff = 8.0 DS = 10 * np.load("../data/de_ds_mu.npy") CM = DS < cutoff Kr = [2, 52, 56, 60, 116, 258, 311, 460, 505, 507, 547, 595, 640, 642, 665, 683, 728, 767, 851, 1244, 1485, 1629, 1636] CMr, Dr = CM[:, Kr], DS[:, Kr] logging.info("FINAL Input Matrix: %s", Dr.shape) logging.info("Reduction Rate: %7.4f", CMr.sum() / np.multiply(*CM.shape)) MFIS, low_fis = lat.maxminer(CMr, support) dlat, Ik = lat.derived_lattice(MFIS, Dr, CMr) logging.info("LATTICE,mfis,%d", len(MFIS)) logging.info("LATTICE,lfis,%d", len(low_fis)) logging.info("LATTICE,dlat,%d", len(dlat)) logging.info("LATTICE,iset,%d", len(Ik)) logging.info("LATTICE,edges,%d", sum([len(v) for v in Ik.values()])) logging.info("SIZE,mfis,%d", sys.getsizeof(MFIS)) logging.info("SIZE,lfis,%d", sys.getsizeof(low_fis)) logging.info("SIZE,dlat,%d", sys.getsizeof(dlat)) logging.info("SIZE,iset,%d", sys.getsizeof(Ik)) logging.info("\n ALL DONE! Pickling Out") outfile = open(home + "/work/latt_intrinsics/iset_%d.p" % support, "wb") pickle.dump(Ik, outfile) outfile.close()
remres = set([i for i, r in enumerate(restrack) if len(r) > 0]) rev_pr_idx = {k: i for i, k in enumerate(prs)} minsize = 1 F3 = set() while len(remres) > 0: selprs = [p for p in restrack if len(p) == minsize] for sp in selprs: for a, b in sp: if a not in remres and b not in remres: continue if a in remres: remres.remove(a) if b in remres: remres.remove(b) F3.add(rev_pr_idx[(a, b)]) minsize += 1 Kr = sorted(F3) CMr, Dr = CM[:, Kr], D[:, Kr] logging.info('FINAL Input Matrix: %s', Dr.shape) U = lat.unique_events(CMr) logging.info('\n MAX-MINER running') MFIS = lat.maxminer(CMr, 100) pickle.dump(MFIS, open(home + '/work/mfis.p', 'wb')) logging.info('\n Max Miner Complete. Constructing derived lattice') dlat, Ik = lat.dlattice_mm(MFIS, Dr, CMr, 100) logging.info('\n ALL DONE! Pickling Out') pickle.dump(Ik, open(home + '/work/iset.p', 'wb')) pickle.dump(dlat, open(home + '/work/dlat.p', 'wb'))
CM = DS<cutoff CMr, Dr = CM[:,Kr], DS[:,Kr] CMm, Dm = CM[:,Km], DS[:,Km] delabel = np.load(home+'/work/results/DE_label_full.npy') DW = [] for i in range(42): for a,b in TS.TimeScape.windows(home+'/work/timescape/desh_%02d_transitions.log'%i): DW.append((a+i*100000, b+i*100000)) dL = [delabel[a:b] for a,b in DW] DE_LABEL = [LABEL10(i,.9) for i in dL] SPT = [i[0] for i in db.runquery('select distinct support from latt order by support')] NC = [i[0] for i in db.runquery('select distinct numclu from latt order by numclu')] mf, lf = {}, {} dl, ik = {}, {} key, clu, cent, var, Gm = {}, {}, {}, {}, {} s=support mf[s], lf[s] = lat.maxminer(CMr, s) dl[s], ik[s] = lat.derived_lattice(mf[s], Dr, CMr) pickle.dump(dl[s], open(home + '/work/latt_intrinsics/dlat2_%d.p' % support, 'wb')) for num_clu in NC: key[s], clu[s], cent[s], var[s], Gm[s] = lat.cluster_harch(dl[s], CMr, Dr, theta=.5, num_k=num_clu, dL=None, verbose=False) w, t = lat.score_clusters(clu[s], Dr, cent[s], var[s], Gm[s], sigma, DE_LABEL) for k in TBIN10: logging.info('SCORE,W,%d,%d,%s,%.5f', support, num_clu, k, w[k]) for k in TBIN10: logging.info('SCORE,T,%d,%d,%s,%.5f', support, num_clu, k, t[k])
console = logging.StreamHandler() console.setLevel(logging.INFO) logging.getLogger('').addHandler(console) logging.info("SUPPORT : %d", support) cutoff = 8. DS = 10*np.load('../data/de_ds_mu.npy') CM = (DS<cutoff) Kr = [2, 52, 56, 60, 116, 258, 311, 460, 505, 507, 547, 595, 640, 642, 665, 683, 728, 767, 851, 1244, 1485, 1629, 1636] CMr, Dr = CM[:,Kr], DS[:,Kr] logging.info('FINAL Input Matrix: %s', Dr.shape) logging.info('Reduction Rate: %7.4f', CMr.sum()/np.multiply(*CM.shape)) MFIS, low_fis = lat.maxminer(CMr, support) dlat, Ik = lat.derived_lattice(MFIS, Dr, CMr) logging.info('LATTICE,mfis,%d', len(MFIS)) logging.info('LATTICE,lfis,%d', len(low_fis)) logging.info('LATTICE,dlat,%d', len(dlat)) logging.info('LATTICE,iset,%d', len(Ik)) logging.info('LATTICE,edges,%d', sum([len(v) for v in Ik.values()])) logging.info('SIZE,mfis,%d', sys.getsizeof(MFIS)) logging.info('SIZE,lfis,%d', sys.getsizeof(low_fis)) logging.info('SIZE,dlat,%d', sys.getsizeof(dlat)) logging.info('SIZE,iset,%d', sys.getsizeof(Ik)) logging.info('\n ALL DONE! Pickling Out') outfile = open(home + '/work/latt_intrinsics/iset_%d.p'%support, 'wb') pickle.dump(Ik, outfile) outfile.close()
SPT = [ i[0] for i in db.runquery('select distinct support from latt order by support') ] NC = [ i[0] for i in db.runquery('select distinct numclu from latt order by numclu') ] mf, lf = {}, {} dl, ik = {}, {} key, clu, cent, var, Gm = {}, {}, {}, {}, {} s = support mf[s], lf[s] = lat.maxminer(CMr, s) dl[s], ik[s] = lat.derived_lattice(mf[s], Dr, CMr) pickle.dump(dl[s], open(home + '/work/latt_intrinsics/dlat2_%d.p' % support, 'wb')) for num_clu in NC: key[s], clu[s], cent[s], var[s], Gm[s] = lat.cluster_harch(dl[s], CMr, Dr, theta=.5, num_k=num_clu, dL=None, verbose=False) w, t = lat.score_clusters(clu[s], Dr, cent[s], var[s], Gm[s], sigma, DE_LABEL) for k in TBIN10: logging.info('SCORE,W,%d,%d,%s,%.5f', support, num_clu, k, w[k])
def bootstrap_lattice(catalog, num=10, build_new=False): ''' Bootstrap After TimeScape has run on source trajectory ''' home = os.getenv("HOME") support = 1 cutoff = 8 start_coord = [ 'de2586_315', 'de531_20', 'de3765_63', 'de3305_668', 'de1732_139' ] dcdfile = lambda x: home + '/work/data/{0}.dcd'.format(x) outloc = lambda x: home + '/work/jc/denovouniform1/{0}/{0}'.format(x) traj_list = {} basin_list = catalog.lrange('basin:list', 0, -1) if len(basin_list) == 134: logging.info('Basin Data already loaded!') rms_delta_list = [ (i, np.sum(pickle.loads(catalog.get('basin:rmsdelta:' + b)))) for i, b in enumerate(basin_list) ] else: logging.info('Loading all bootstrap data to initialize...') basin_list = [] rms_delta_list = [] pdb_file = home + '/work/data/alpha.pdb' topo = md.load(pdb_file) ref_alpha = md.load(home + '/work/' + catalog.get('pdb:ref:0')) ref_alpha.atom_slice(ref_alpha.top.select_atom_indices('alpha'), inplace=True) res_rms_Kr = FEATURE_SET for sc in start_coord: dist_space = [] srcfile = outloc(sc) + '.dcd' pdbfile = srcfile.replace('dcd', 'pdb') logging.debug('LOADING TRAJ: %s', srcfile) traj = md.load(srcfile, top=pdbfile) traj_list[sc] = traj alpha = traj.atom_slice(traj.top.select_atom_indices('alpha')) logging.info('Grabbing TS data...') W = TS.TimeScape.windows(outloc(sc) + '_transitions.log') ts_traj = TS.TimeScapeParser(pdbfile, outloc(sc), sc, dcd=srcfile, traj=traj) basins = ts_traj.load_basins() logging.info("Processing distance space and residue RMS") dsa = DR.distance_space(alpha) resrmsd = 10 * np.array( [LA.norm(i - ref_alpha.xyz[0], axis=1) for i in alpha.xyz]) basin_res_rms = np.zeros(shape=(len(ts_traj.basins), alpha.n_atoms)) for i, (a, b) in enumerate(W): dist_space.append(dsa[a:b].mean(0)) basin_res_rms[i] = np.median(resrmsd[a:b], axis=0) basin_res_rms_delta = np.array( [rms_delta(i) for i in basin_res_rms.T]).T logging.debug('RMS LEN CHECK: %d =?= %d -- Updating RMS Delta', len(basins), len(basin_res_rms_delta)) for i, basin in enumerate(basins): pipe = catalog.pipeline() bid = basin.id # Store on Disk and in redis jc_filename = os.path.join(settings.datadir, 'basin_%s.pdb' % bid) logging.info('MIN for %s: Idx# %d to %s', bid, basin.mindex, jc_filename) minima_frame = traj.slice( basin.mindex ) #md.load_frame(src_traj, basin.mindex, top=src_traj.replace('dcd', 'pdb')) minima_frame.save_pdb(jc_filename) basin_hash = basin.kv() basin_hash['pdbfile'] = jc_filename logging.info( ' Basin: %(id)s %(start)d - %(end)d Minima: %(mindex)d size=%(len)d' % basin_hash) pipe.rpush('basin:list', bid) pipe.hmset('basin:%s' % bid, basin_hash) pipe.set('basin:dmu:' + bid, pickle.dumps(dist_space[i])) pipe.set('minima:%s' % bid, pickle.dumps(minima_frame)) # FOR RESIDUE RMSD resrms_d = np.sum(basin_res_rms_delta[i][res_rms_Kr]) basin_hash['resrms_delta'] = resrms_d rms_delta_list.append((len(basin_list), resrms_d)) basin_list.append(basin_hash) pipe.set('basin:rmsdelta:' + bid, pickle.dumps(basin_res_rms_delta[i])) pipe.execute() # FOR RESIDUE RMSD # FOR SEED SAMPLING USING RMS_DELTA # Note: skip the first basin # Re-Construct the Lattice from if build_new: dist_space = 10 * np.array(dist_space) cm = ds < cutoff fs = lat.reduced_feature_set(cm, .115) len(fs) dr, cr = ds[:, fs], cm[:, fs] mfis, lfis = lat.maxminer(cr, 1) dlat, ik = lat.derived_lattice(mfis, dr, cr) pickle.dump(mfis, open(home + '/work/data/denovo_mfis.p', 'wb')) pickle.dump(lfis, open(home + '/work/data/denovo_lfis.p', 'wb')) pickle.dump(ik, open(home + '/work/data/denovo_iset.p', 'wb')) pickle.dump(dlat, open(home + '/work/data/denovo_dlat.p', 'wb')) else: logging.info('Loading Pre-Constructed Lattice Data') dlat = pickle.load(open(home + '/work/data/denovo_dlat.p', 'rb')) mfis = pickle.load(open(home + '/work/data/denovo_mfis.p', 'rb')) lfis = pickle.load(open(home + '/work/data/denovo_lfis.p', 'rb')) ik = pickle.load(open(home + '/work/data/denovo_iset.p', 'rb')) with catalog.pipeline() as pipe: pipe.set('lattice:max_fis', pickle.dumps(mfis)) pipe.set('lattice:low_fis', pickle.dumps(lfis)) pipe.set('lattice:dlat', pickle.dumps(dlat)) pipe.set('lattice:iset', pickle.dumps(ik)) pipe.execute() # logging.info('Building Existing lattice object') # lattice=lat.Lattice(ds, fs, cutoff, support) # lattice.set_fis(max_fis, low_fis) # lattice.set_dlat(dlat, Ik) # sampler = LatticeSampler(lattice) # Sample -- FOR USING LATTICE TO BOOTSTRAP # cl,sc,el = lat.clusterlattice(dlat, cr, dr, ik, num_k=8, invert=True) # cl_list = sorted(el, key=lambda x: len(x)) # TODO: Check if fan out > single item clusters # start_indices = [clu[0][0] for clu in cl_list[:num]] rms_delta_ranked = [ x[0] for x in sorted(rms_delta_list, key=lambda i: i[1], reverse=True) ] start_indices = rms_delta_ranked[:num] seedlist = [catalog.lindex('basin:list', i) for i in start_indices] sim_init = {key: catalog.get(key) for key in settings.sim_params.keys()} global_params = getSimParameters(sim_init, 'seed') global_params['psf'] = home + '/work/jc/serial2/de0_0/de0_0.psf' for seed in seedlist: logging.debug('\nSeeding Job: %s ', seed) basin = catalog.hgetall('basin:%s' % seed) catalog.rpush('executed', seed) # Generate new set of params/coords jcID, config = generateFromBasin(basin) # Update Additional JC Params and Decision History, as needed config.update(global_params) # Push to catalog logging.info("New Simulation Job Created: %s", jcID) for k, v in config.items(): logging.debug(" %s: %s", k, str(v)) catalog.rpush('jcqueue', jcID) catalog.hmset(wrapKey('jc', jcID), config)