def execute(self, new_basin_list): """Executing the Controler Algorithm. Load pre-analyzed lower dimensional subspaces, process user query and identify the sampling space with corresponding distribution function for each user query. Calculate convergence rates, run sampler, and then execute fairness policy to distribute resources among users' sampled values. """ logging.debug('CTL MT') # PRE-PROCESSING --------------------------------------------------------------------------------- logging.debug("============================ <PRE-PROCESS> =============================") np.set_printoptions(precision=4, linewidth=150) self.data['timestep'] += 1 logging.info('TIMESTEP: %d', self.data['timestep']) settings = systemsettings() bench = microbench('ctl_%s' % settings.name, self.seqNumFromID()) stat = StatCollector('ctl_%s' % settings.name, self.seqNumFromID()) if self.force_decision: new_basin_list = [] # Connect to the cache self.cacheclient = CacheClient(settings.APPL_LABEL) # create the "binlist": numresources = self.data['numresources'] explore_factor = float(self.data['sampler:explore']) topo = self.protein.top # Load new RMS Labels -- load all for now bench.start() logging.debug('Loading RMS Labels') start_index = max(0, self.data['ctl_index_head']) logging.debug(" Start_index=%d, batch_size=%d", start_index, len(new_basin_list)) # Calculate variable PDF estimations for each subspace via bootstrapping: logging.debug("======================= <SUBSPACE CONVERGENCE> (skip) ===================") # IMPLEMENT USER QUERY with REWEIGHTING: logging.debug("======================= <QUERY PROCESSING> =========================") stat.collect('new_basin', len(new_basin_list)) ##### BARRIER self.wait_catalog() selected_index_list = [] # QUERY PROCESSING & SAMPLING BELOW to select indices. EXPERIMENT_NUMBER = self.experiment_number logging.info("RUNNING EXPER CONFIGURATION #%d", EXPERIMENT_NUMBER) # TODO: ABSTRACT FEATURE DIMENSIONALITY n_features = 1653 # Basin List will be the list of basin representing the new Job Candidates selected_basin_list = [] all_basins = self.data['basin:list'] num_prev_basins = int(self.data['basin:processed']) # Load Previous Distance Space (historical data) prev_ds_raw = self.catalog.lrange('dspace', 0, num_prev_basins) local_basins = {} if len(prev_ds_raw) > 0: ds_prev = np.zeros(shape=(len(prev_ds_raw), n_features)) logging.info("Unpickling distance space to array: %s", ds_prev.shape) for i, elm in enumerate(prev_ds_raw): ds_prev[i] = pickle.loads(elm) logging.info('Prev DS loaded. Size = %d', len(ds_prev)) else: logging.info('NO Prev DS') ds_prev = [] # FOR Supervised Classification (included for all for post-partem processing) bin_labels_10 = ['T0', 'T1', 'T2', 'T3', 'T4', 'W0', 'W1', 'W2', 'W3', 'W4'] bin_labels_25 = [(a,b) for a in range(5) for b in range(5)] bin_list_10 = {k: [int(i) for i in self.catalog.lrange('bin:10:%s' % k, 0, -1)] for k in bin_labels_10} bin_list_25 = {k: [int(i) for i in self.catalog.lrange('bin:25:%d_%d' % k, 0, -1)] for k in bin_labels_25} # Merge locally analyzed distance space delta_ds = np.zeros(shape=(len(new_basin_list), n_features)) logging.info('Collecting new data from basins: %s', new_basin_list) for i, bid in enumerate(new_basin_list): basin = self.data['basin:'+bid] global_basin_index = int(basin['dsidx']) local_basins[global_basin_index] = basin # Update Distance Space dmu_ = self.catalog.lindex('dspace', global_basin_index) if dmu_ is None: print("ERROR!!!!") print(global_basin_index, i) for k,v in basin.items(): print(k,v) sys.exit(0) delta_ds[i] = pickle.loads(dmu_) # Update Supervise Classification label_seq = [int(i) for i in self.catalog.lrange('basin:labelseq:'+bid, 0, -1)] local_basins[global_basin_index]['label:10'] = label10 = bin_label_10(label_seq) local_basins[global_basin_index]['label:25'] = label25 = bin_label_25(label_seq) bin_list_10[label10].append(global_basin_index) bin_list_25[label25].append(global_basin_index) self.data['basin:processed'] += 1 if len(new_basin_list) > 0 and len(prev_ds_raw) > 0: dist_space = np.vstack((ds_prev, delta_ds)) elif len(delta_ds) == 0: dist_space = np.array(ds_prev) elif len(ds_prev) == 0: logging.info('First Set of Distance Coord laoded') dist_space = delta_ds else: logging.error("ERROR! NO DISTANCE SPACE IN THE CATALOG") # UNIFORM SAMPLER (BASIC) if EXPERIMENT_NUMBER == 12: basin_idx_list = [] candidate_list = [[] for i in range(5)] for k,v in bin_list_10.items(): candidate_list[int(k[1])].extend(v) # UNIFORM SAMPLING for sel_num in range(numresources): # Select Random start state start_state = np.random.randint(5) # Select random index rand_index = np.random.choice(len(candidate_list[start_state])) basin_idx = candidate_list[start_state][rand_index] basin_idx_list.append(basin_idx) for i in basin_idx_list: if i < num_prev_basins: logging.info("Select index: %s (Retrieve from Catalog)", i) bid = self.data['basin:list'][i] basin = self.catalog.hgetall('basin:%s'%bid) else: logging.info("Select index: %s (New locally built basin in mem)", i) basin = local_basins[i] logging.debug(' BASIN: %s', basin['id']) selected_basin_list.append(basin) # BIASED SAMPLER (UPDATED) if EXPERIMENT_NUMBER == 13: # USING 10-BIN LABELS distro = [len(bin_list_10[i]) for i in bin_labels_10] # Create and invoke the sampler logging.info('Running the biased (umbrella) samplers') sampler = BiasSampler(distro) samplecount = np.zeros(len(bin_labels_10), dtype=np.int16) # Find the first index for each bin: explore_direction = 1 if explore_factor < .5 else -1 for i, b in enumerate(bin_list_10): if len(b) == 0: idx = 0 else: idx = np.floor(explore_factor * (len(b) - 1)) samplecount[i] = idx sel_bins = sampler.execute(numresources) logging.info('Processing selected bins to find starting candidates') candidate_list = {} basin_idx_list = [] for b in sel_bins: target_bin = bin_labels_10[b] if target_bin not in candidate_list: candidate_list[target_bin] = bin_list_10[target_bin] # TODO: FULLY IMPLEMENT EXPLORE/EXPLOIT BUT INCL HISTORY/PROVONANCE # Lazy Update to centroid -- push to catalog immediately # vals = dist_space[bin_list_10[target_bin]] # logging.info('Updating Centroid for bin %s, bindata: %s', target_bin, vals.shape) # centroid = np.mean(vals, axis=0) # self.catalog.set('bin:10:centroid:%s' % target_bin, pickle.dumps(centroid)) # dist_center = [LA.norm(centroid - dist_space[i]) for i in bin_list_10[target_bin]] # candidate_list[target_bin] = sorted(zip(bin_list_10[target_bin], dist_center), key=lambda x: x[1]) # basin_idx, basin_diff = candidate_list[target_bin][samplecount[b]] # samplecount[b] += explore_direction # # Wrap # if samplecount[b] == 0: # samplecount = len(candidate_list[target_bin]) - 1 # if samplecount[b] == len(candidate_list[target_bin]): # samplecount = 0 # FOR NOW PICK A RANDOM CANDIDATE rand_index = np.random.choice(len(candidate_list[target_bin])) basin_idx = candidate_list[target_bin][rand_index] logging.info('BIAS SAMPLER:\n Bin: %s\n basin: %d Delta from Center: %6.3f (note: dist not factored in)', \ target_bin, basin_idx, 0.) basin_idx_list.append(basin_idx) for i in basin_idx_list: if i < num_prev_basins: logging.info("Select index: %s (Retrieve from Catalog)", i) bid = self.data['basin:list'][i] basin = self.catalog.hgetall('basin:%s'%bid) else: logging.info("Select index: %s (New locally built basin in mem)", i) basin = local_basins[i] logging.debug(' BASIN: %s', basin['id']) selected_basin_list.append(basin) # LATTICE SAMPLER (WITH HISTORICAL DATA) if EXPERIMENT_NUMBER == 14: # Merge Existing delta with DEShaw Pre-Processed data: logging.info('Merging DEShaw with existing generated data') # Set parameters for lattice Kr = [int(i) for i in self.catalog.lrange('lattice:features', 0, -1)] support = int(self.data['lattice:support']) dspt = self.catalog.get('lattice:delta_support') delta_support = 5 if dspt is None else int(dspt) cutoff = float(self.data['lattice:cutoff']) logging.info('PARAMS Kr:%s\n support:%d dspt:%d cutoff:%f', Kr, support, delta_support, cutoff) # Load existing (base) lattice data logging.info("Unpickling max/low FIS and derived lattice EMD values") max_fis = pickle.loads(self.catalog.get('lattice:max_fis')) low_fis = pickle.loads(self.catalog.get('lattice:low_fis')) dlat = pickle.loads(self.catalog.get('lattice:dlat')) # TODO: MOVE iset completely into catalog # Ik = pickle.loads(self.catalog.get('lattice:iset')) # FOR NOW: Full DEShaw Index is saved on disk (TODO: MOVE TO CATALOG) logging.info("Loading full Itemset from disk (TODO: Det optimization on mem/time)") Ik = pickle.load(open(settings.datadir + '/iset.p', 'rb')) # Item_set Keys (Ik) are only saved as a delta for space conservation if os.path.exists(settings.datadir + '/iset_delta.p'): Ik_delta = pickle.load(open(settings.datadir + '/iset_delta.p', 'rb')) else: Ik_delta = {} # Merge previous item set delta with DEShaw index logging.info("Merging DEShaw Ik with Delta IK") for k,v in Ik_delta.items(): Ik[k] = np.concatenate((Ik[k], v)) if k in Ik else v # Build Base Lattice Object base_lattice=lat.Lattice(ds_prev, Kr, cutoff, support) base_lattice.set_fis(max_fis, low_fis) base_lattice.set_dlat(dlat, Ik) if not self.force_decision and len(delta_ds) > 0: # Build Delta Lattice Object logging.info('Building Delta lattice. Num new items: %d', len(delta_ds)) delta_lattice = lat.Lattice(delta_ds, Kr, cutoff, delta_support, invert=invert_vals) delta_lattice.maxminer() delta_lattice.derive_lattice() # Update non-DEShaw delta itemset key index logging.info("Updating Itemsets and Distance Space Matrix") for k,v in delta_lattice.Ik.items(): Ik_delta[k] = np.concatenate((Ik_delta[k], v)) if k in Ik_delta else v # Save Ik delta to disk logging.info("Saving Delta Itemset (to disk)") pickle.dump(Ik_delta, open(settings.datadir + '/iset_delta.p', 'wb')) # Perform incremental maintenance logging.info('Merging Delta lattice with Base Lattice') base_lattice.merge(delta_lattice) # Create the Sampler object (also make clusters) logging.info('Invoking the Lattice Sampler') sampler = LatticeSampler(base_lattice) basin_id_list = sampler.execute(numresources) for index in basin_id_list: bid = all_basins[index] selected_basin_list.append(self.catalog.hgetall('basin:%s'%bid)) # LATTICE SAMPLER (DE NOVO) if EXPERIMENT_NUMBER == 15: # PREPROCESS N_features_src = topo.n_residues N_features_corr = (N_features_src**2 - N_features_src) // 2 upt = np.triu_indices(N_features_src, 1) old_basin_ids = all_basind[:num_prev_basins] # DENOVO Exploratory Bootstrapping (RMSD) explore_factor = float(self.data['sampler:explore']) # TODO: Better transtion plan from explore to exploit self.data['sampler:explore'] *= .75 executed_basins = self.catalog.lrange('executed', 0, -1) if explore_factor > 0: logging.info("EXPLORING Most active basins....") basindata = [self.catalog.hgetall(bid) for bid in old_basin_ids] for bid in new_basin_list: basindata.append(self.data['basin:'+bid]) basins_with_rms = [b for b in basindata if 'resrms_delta' in b] basin_by_rmsd = sorted(basins_with_rms, key=lambda x: float(x['resrms_delta']), reverse=True) explore_samples = int(np.floor(numresources * explore_factor)) logging.info('Num to explore: %d out of %d', explore_samples, len(basin_by_rmsd)) idx, num_sampled = 0, 0 while idx < len(basin_by_rmsd) and num_sampled < explore_samples: selb = basin_by_rmsd[idx] if selb['id'] in executed_basins: logging.info('Selected %s, but it has been executed. Selecting next option', selb['id']) else: selected_basin_list.append(selb) logging.info(' (%d) EXPLORE BASIN: %s %f', selb['id'], selb['id'], float(selb['resrms_delta'])) numresources -= 1 num_sampled += 1 idx += 1 # TODO: Reduced Feature Sets # Using Reduced Feature Set Alg #2 HERE support = int(.01 * len(dist_space)) cutoff = 8 # RE-Calc the whole lattice: logging.info("Building the new lattice") BUILD_NEW = not self.catalog.exists('lattice:bootstrapped') # TODO: Decision to go from build new to incr maint if BUILD_NEW: tval = .05 Kr = lat.reduced_feature_set2(dist_space, cutoff, theta=tval, maxk=25) retry = 5 while len(Kr) < 12 and retry > 0: tval /= 2 retry -= 1 Kr = lat.reduced_feature_set2(dist_space, cutoff, theta=tval, maxk=25) base_lattice = lat.Lattice(dist_space, Kr, cutoff, support) base_lattice.maxminer() base_lattice.derive_lattice() with self.catalog.pipeline() as pipe: pipe.delete('lattice:kr') for i in sorted(Kr): pipe.rpush('lattice:kr', i) pipe.execute() else: # Load existing (base) lattice data max_fis = pickle.loads(self.catalog.get('lattice:max_fis')) low_fis = pickle.loads(self.catalog.get('lattice:low_fis')) dlat = pickle.loads(self.catalog.get('lattice:dlat')) Ik = pickle.loads(self.catalog.get('lattice:iset')) num_k = self.catalog.get('lattice:num_k') Kr = [int(i) for i in self.catalog.lrange('lattice:kr', 0, -1)] if num_k is None: num_k = max(8, min(15, numresources*2)) # Build Lattice Object logging.info('Building Existing lattice object') base_lattice=lat.Lattice(ds_prev, Kr, cutoff, support) base_lattice.set_fis(max_fis, low_fis) base_lattice.set_dlat(dlat, Ik) # Build Delta Lattice Object logging.info('Building Delta lattice. Num new items: %d', len(delta_ds)) delta_lattice = lat.Lattice(delta_ds, Kr, cutoff, 1) delta_lattice.maxminer() delta_lattice.derive_lattice() # Perform incremental maintenance logging.info('Merging Delta lattice with Base Lattice') base_lattice.merge(delta_lattice) if numresources > 0: logging.info('Invoking the Lattice Sampler') sampler = LatticeSampler(base_lattice) basin_id_list = sampler.execute(numresources) # For now retrieve immediately from catalog self.wait_catalog() for index in basin_id_list: bid = all_basins[index] key = 'basin:%s'%bid # Check to ensure its not a new basin and that it exists in the DB if self.catalog.exists(key): logging.debug('KEY EXISTS: %s', key) selbasin = self.catalog.hgetall(key) else: logging.debug('NO KEY: %s\n%s', key, self.data[key]) selbasin = self.data[key] selected_basin_list.append(selbasin) # CORRELATION SAMPLER (KMEANS) if EXPERIMENT_NUMBER == 18: sampler = CorrelationSampler(cm_all, mu=dist_space) basin_id_list = sampler.execute(numresources) for bid in basin_id_list: selected_basin_list.append(self.catalog.hgetall('basin:%s'%bid)) bench.mark('GlobalAnalysis') # Generate new starting positions jcqueue = OrderedDict() src_traj_list = [] for basin in selected_basin_list: src_traj_list.append(basin['traj']) if basin['traj'].startswith('desh'): global_params = getSimParameters(self.data, 'deshaw') # fileno = int(basin['traj'][-4:]) # frame = int(basin['mindex']) # jcID, config = generateDEShawJC(fileno, frame) else: global_params = getSimParameters(self.data, 'gen') src_psf = self.catalog.hget('jc_' + basin['traj'], 'psf') global_params.update({'psf': src_psf}) # jcID, config = generateExplJC(basin, jcid=None) jcID, config = generateFromBasin(basin) config.update(global_params) config['name'] = jcID logging.info("New Simulation Job Created: %s", jcID) for k, v in config.items(): logging.debug(" %s: %s", k, str(v)) # Add to the output queue & save config info jcqueue[jcID] = config logging.info("New Job Candidate Completed: %s #%d on the Queue", jcID, len(jcqueue)) stat.collect('src_traj_list', src_traj_list) bench.mark('GenInputParams') # POST-PROCESSING ------------------------------------- logging.debug("============================ <POST-PROCESSING & OUTPUT> =============================") self.wait_catalog() with self.catalog.pipeline() as pipe: for basin in selected_basin_list: pipe.rpush('executed', basin['id']) pipe.execute() # Append new distance values if EXPERIMENT_NUMBER == 14: # Save Ik delta to disk logging.info("Saving Delta Itemset (to disk)") pickle.dump(Ik_delta, open(settings.datadir + '/iset_delta.p', 'wb')) with self.catalog.pipeline() as pipe: pipe.set('lattice:max_fis', pickle.dumps(base_lattice.max_fis)) pipe.set('lattice:low_fis', pickle.dumps(base_lattice.low_fis)) pipe.set('lattice:dlat', pickle.dumps(base_lattice.dlat)) pipe.execute() if EXPERIMENT_NUMBER == 15: with self.catalog.pipeline() as pipe: pipe.set('lattice:max_fis', pickle.dumps(base_lattice.max_fis)) pipe.set('lattice:low_fis', pickle.dumps(base_lattice.low_fis)) pipe.set('lattice:dlat', pickle.dumps(base_lattice.dlat)) pipe.execute() self.catalog.set('lattice:iset', pickle.dumps(base_lattice.Ik)) for basin_idx, basin in local_basins.items(): key = 'basin:' + basin['id'] with self.catalog.pipeline() as pipe: pipe.hset(key, 'label:10', basin['label:10']) pipe.hset(key, 'label:25', basin['label:25']) pipe.rpush('bin:10:%s' % basin['label:10'], basin_idx) pipe.rpush('bin:25:%d_%d' % basin['label:25'], basin_idx) pipe.execute() self.catalog.set('basin:processed', num_prev_basins + len(local_basins)) # Clear current queue, mark previously queues jobs for GC, push new queue qlen = self.catalog.llen('jcqueue') logging.debug('Current queue len; %s', str(qlen)) if qlen > 0: curqueue = self.catalog.lrange('jcqueue', 0, -1) logging.info("Marking %d obsolete jobs for garbage collection", len(curqueue)) for jc_key in curqueue: key = wrapKey('jc', jc_key) config = self.catalog.hgetall(key) config['gc'] = 0 # Add gc jobs it to the state to write back to catalog (flags it for gc) self.addMut(key, config) self.catalog.delete('jcqueue') # Update cache hit/miss # hit = self.cache_hit # miss = self.cache_miss # logging.info('##CACHE_HIT_MISS %d %d %.3f', hit, miss, (hit)/(hit+miss)) # self.catalog.rpush('cache:hit', self.cache_hit) # self.catalog.rpush('cache:miss', self.cache_miss) self.data['jcqueue'] = list(jcqueue.keys()) logging.debug(" JCQUEUE: %s", str(self.data['jcqueue'])) # Update Each new job with latest convergence score and save to catalog(TODO: save may not be nec'y) logging.debug("Updated Job Queue length: %d", len(self.data['jcqueue'])) for jcid, config in jcqueue.items(): # config['converge'] = self.data['converge'] self.addMut(wrapKey('jc', jcid), config) self.notify('sim') bench.mark('PostProcessing') print ('## TS=%d' % self.data['timestep']) bench.show() stat.show() return list(jcqueue.keys())
def execute(self, new_basin_list): """Executing the Controler Algorithm. Load pre-analyzed lower dimensional subspaces, process user query and identify the sampling space with corresponding distribution function for each user query. Calculate convergence rates, run sampler, and then execute fairness policy to distribute resources among users' sampled values. """ logging.debug('CTL MT') # PRE-PROCESSING --------------------------------------------------------------------------------- logging.debug( "============================ <PRE-PROCESS> =============================" ) np.set_printoptions(precision=4, linewidth=150) self.data['timestep'] += 1 logging.info('TIMESTEP: %d', self.data['timestep']) settings = systemsettings() bench = microbench('ctl_%s' % settings.name, self.seqNumFromID()) stat = StatCollector('ctl_%s' % settings.name, self.seqNumFromID()) if self.force_decision: new_basin_list = [] # Connect to the cache self.cacheclient = CacheClient(settings.APPL_LABEL) # create the "binlist": numresources = self.data['numresources'] explore_factor = float(self.data['sampler:explore']) topo = self.protein.top # Load new RMS Labels -- load all for now bench.start() logging.debug('Loading RMS Labels') start_index = max(0, self.data['ctl_index_head']) logging.debug(" Start_index=%d, batch_size=%d", start_index, len(new_basin_list)) # Calculate variable PDF estimations for each subspace via bootstrapping: logging.debug( "======================= <SUBSPACE CONVERGENCE> (skip) ===================" ) # IMPLEMENT USER QUERY with REWEIGHTING: logging.debug( "======================= <QUERY PROCESSING> =========================" ) stat.collect('new_basin', len(new_basin_list)) ##### BARRIER self.wait_catalog() selected_index_list = [] # QUERY PROCESSING & SAMPLING BELOW to select indices. EXPERIMENT_NUMBER = self.experiment_number logging.info("RUNNING EXPER CONFIGURATION #%d", EXPERIMENT_NUMBER) # TODO: ABSTRACT FEATURE DIMENSIONALITY n_features = 1653 # Basin List will be the list of basin representing the new Job Candidates selected_basin_list = [] all_basins = self.data['basin:list'] num_prev_basins = int(self.data['basin:processed']) # Load Previous Distance Space (historical data) prev_ds_raw = self.catalog.lrange('dspace', 0, num_prev_basins) local_basins = {} if len(prev_ds_raw) > 0: ds_prev = np.zeros(shape=(len(prev_ds_raw), n_features)) logging.info("Unpickling distance space to array: %s", ds_prev.shape) for i, elm in enumerate(prev_ds_raw): ds_prev[i] = pickle.loads(elm) logging.info('Prev DS loaded. Size = %d', len(ds_prev)) else: logging.info('NO Prev DS') ds_prev = [] # FOR Supervised Classification (included for all for post-partem processing) bin_labels_10 = [ 'T0', 'T1', 'T2', 'T3', 'T4', 'W0', 'W1', 'W2', 'W3', 'W4' ] bin_labels_25 = [(a, b) for a in range(5) for b in range(5)] bin_list_10 = { k: [int(i) for i in self.catalog.lrange('bin:10:%s' % k, 0, -1)] for k in bin_labels_10 } bin_list_25 = { k: [int(i) for i in self.catalog.lrange('bin:25:%d_%d' % k, 0, -1)] for k in bin_labels_25 } # Merge locally analyzed distance space delta_ds = np.zeros(shape=(len(new_basin_list), n_features)) logging.info('Collecting new data from basins: %s', new_basin_list) for i, bid in enumerate(new_basin_list): basin = self.data['basin:' + bid] global_basin_index = int(basin['dsidx']) local_basins[global_basin_index] = basin # Update Distance Space dmu_ = self.catalog.lindex('dspace', global_basin_index) if dmu_ is None: print("ERROR!!!!") print(global_basin_index, i) for k, v in basin.items(): print(k, v) sys.exit(0) delta_ds[i] = pickle.loads(dmu_) # Update Supervise Classification label_seq = [ int(i) for i in self.catalog.lrange('basin:labelseq:' + bid, 0, -1) ] local_basins[global_basin_index][ 'label:10'] = label10 = bin_label_10(label_seq) local_basins[global_basin_index][ 'label:25'] = label25 = bin_label_25(label_seq) bin_list_10[label10].append(global_basin_index) bin_list_25[label25].append(global_basin_index) self.data['basin:processed'] += 1 if len(new_basin_list) > 0 and len(prev_ds_raw) > 0: dist_space = np.vstack((ds_prev, delta_ds)) elif len(delta_ds) == 0: dist_space = np.array(ds_prev) elif len(ds_prev) == 0: logging.info('First Set of Distance Coord laoded') dist_space = delta_ds else: logging.error("ERROR! NO DISTANCE SPACE IN THE CATALOG") # UNIFORM SAMPLER (BASIC) if EXPERIMENT_NUMBER == 12: basin_idx_list = [] candidate_list = [[] for i in range(5)] for k, v in bin_list_10.items(): candidate_list[int(k[1])].extend(v) # UNIFORM SAMPLING for sel_num in range(numresources): # Select Random start state start_state = np.random.randint(5) # Select random index rand_index = np.random.choice(len(candidate_list[start_state])) basin_idx = candidate_list[start_state][rand_index] basin_idx_list.append(basin_idx) for i in basin_idx_list: if i < num_prev_basins: logging.info("Select index: %s (Retrieve from Catalog)", i) bid = self.data['basin:list'][i] basin = self.catalog.hgetall('basin:%s' % bid) else: logging.info( "Select index: %s (New locally built basin in mem)", i) basin = local_basins[i] logging.debug(' BASIN: %s', basin['id']) selected_basin_list.append(basin) # BIASED SAMPLER (UPDATED) if EXPERIMENT_NUMBER == 13: # USING 10-BIN LABELS distro = [len(bin_list_10[i]) for i in bin_labels_10] # Create and invoke the sampler logging.info('Running the biased (umbrella) samplers') sampler = BiasSampler(distro) samplecount = np.zeros(len(bin_labels_10), dtype=np.int16) # Find the first index for each bin: explore_direction = 1 if explore_factor < .5 else -1 for i, b in enumerate(bin_list_10): if len(b) == 0: idx = 0 else: idx = np.floor(explore_factor * (len(b) - 1)) samplecount[i] = idx sel_bins = sampler.execute(numresources) logging.info( 'Processing selected bins to find starting candidates') candidate_list = {} basin_idx_list = [] for b in sel_bins: target_bin = bin_labels_10[b] if target_bin not in candidate_list: candidate_list[target_bin] = bin_list_10[target_bin] # TODO: FULLY IMPLEMENT EXPLORE/EXPLOIT BUT INCL HISTORY/PROVONANCE # Lazy Update to centroid -- push to catalog immediately # vals = dist_space[bin_list_10[target_bin]] # logging.info('Updating Centroid for bin %s, bindata: %s', target_bin, vals.shape) # centroid = np.mean(vals, axis=0) # self.catalog.set('bin:10:centroid:%s' % target_bin, pickle.dumps(centroid)) # dist_center = [LA.norm(centroid - dist_space[i]) for i in bin_list_10[target_bin]] # candidate_list[target_bin] = sorted(zip(bin_list_10[target_bin], dist_center), key=lambda x: x[1]) # basin_idx, basin_diff = candidate_list[target_bin][samplecount[b]] # samplecount[b] += explore_direction # # Wrap # if samplecount[b] == 0: # samplecount = len(candidate_list[target_bin]) - 1 # if samplecount[b] == len(candidate_list[target_bin]): # samplecount = 0 # FOR NOW PICK A RANDOM CANDIDATE rand_index = np.random.choice(len(candidate_list[target_bin])) basin_idx = candidate_list[target_bin][rand_index] logging.info('BIAS SAMPLER:\n Bin: %s\n basin: %d Delta from Center: %6.3f (note: dist not factored in)', \ target_bin, basin_idx, 0.) basin_idx_list.append(basin_idx) for i in basin_idx_list: if i < num_prev_basins: logging.info("Select index: %s (Retrieve from Catalog)", i) bid = self.data['basin:list'][i] basin = self.catalog.hgetall('basin:%s' % bid) else: logging.info( "Select index: %s (New locally built basin in mem)", i) basin = local_basins[i] logging.debug(' BASIN: %s', basin['id']) selected_basin_list.append(basin) # LATTICE SAMPLER (WITH HISTORICAL DATA) if EXPERIMENT_NUMBER == 14: # Merge Existing delta with DEShaw Pre-Processed data: logging.info('Merging DEShaw with existing generated data') # Set parameters for lattice Kr = [ int(i) for i in self.catalog.lrange('lattice:features', 0, -1) ] support = int(self.data['lattice:support']) dspt = self.catalog.get('lattice:delta_support') delta_support = 5 if dspt is None else int(dspt) cutoff = float(self.data['lattice:cutoff']) logging.info('PARAMS Kr:%s\n support:%d dspt:%d cutoff:%f', Kr, support, delta_support, cutoff) # Load existing (base) lattice data logging.info( "Unpickling max/low FIS and derived lattice EMD values") max_fis = pickle.loads(self.catalog.get('lattice:max_fis')) low_fis = pickle.loads(self.catalog.get('lattice:low_fis')) dlat = pickle.loads(self.catalog.get('lattice:dlat')) # TODO: MOVE iset completely into catalog # Ik = pickle.loads(self.catalog.get('lattice:iset')) # FOR NOW: Full DEShaw Index is saved on disk (TODO: MOVE TO CATALOG) logging.info( "Loading full Itemset from disk (TODO: Det optimization on mem/time)" ) Ik = pickle.load(open(settings.datadir + '/iset.p', 'rb')) # Item_set Keys (Ik) are only saved as a delta for space conservation if os.path.exists(settings.datadir + '/iset_delta.p'): Ik_delta = pickle.load( open(settings.datadir + '/iset_delta.p', 'rb')) else: Ik_delta = {} # Merge previous item set delta with DEShaw index logging.info("Merging DEShaw Ik with Delta IK") for k, v in Ik_delta.items(): Ik[k] = np.concatenate((Ik[k], v)) if k in Ik else v # Build Base Lattice Object base_lattice = lat.Lattice(ds_prev, Kr, cutoff, support) base_lattice.set_fis(max_fis, low_fis) base_lattice.set_dlat(dlat, Ik) if not self.force_decision and len(delta_ds) > 0: # Build Delta Lattice Object logging.info('Building Delta lattice. Num new items: %d', len(delta_ds)) delta_lattice = lat.Lattice(delta_ds, Kr, cutoff, delta_support, invert=invert_vals) delta_lattice.maxminer() delta_lattice.derive_lattice() # Update non-DEShaw delta itemset key index logging.info("Updating Itemsets and Distance Space Matrix") for k, v in delta_lattice.Ik.items(): Ik_delta[k] = np.concatenate( (Ik_delta[k], v)) if k in Ik_delta else v # Save Ik delta to disk logging.info("Saving Delta Itemset (to disk)") pickle.dump(Ik_delta, open(settings.datadir + '/iset_delta.p', 'wb')) # Perform incremental maintenance logging.info('Merging Delta lattice with Base Lattice') base_lattice.merge(delta_lattice) # Create the Sampler object (also make clusters) logging.info('Invoking the Lattice Sampler') sampler = LatticeSampler(base_lattice) basin_id_list = sampler.execute(numresources) for index in basin_id_list: bid = all_basins[index] selected_basin_list.append( self.catalog.hgetall('basin:%s' % bid)) # LATTICE SAMPLER (DE NOVO) if EXPERIMENT_NUMBER == 15: # PREPROCESS N_features_src = topo.n_residues N_features_corr = (N_features_src**2 - N_features_src) // 2 upt = np.triu_indices(N_features_src, 1) old_basin_ids = all_basind[:num_prev_basins] # DENOVO Exploratory Bootstrapping (RMSD) explore_factor = float(self.data['sampler:explore']) # TODO: Better transtion plan from explore to exploit self.data['sampler:explore'] *= .75 executed_basins = self.catalog.lrange('executed', 0, -1) if explore_factor > 0: logging.info("EXPLORING Most active basins....") basindata = [ self.catalog.hgetall(bid) for bid in old_basin_ids ] for bid in new_basin_list: basindata.append(self.data['basin:' + bid]) basins_with_rms = [b for b in basindata if 'resrms_delta' in b] basin_by_rmsd = sorted(basins_with_rms, key=lambda x: float(x['resrms_delta']), reverse=True) explore_samples = int(np.floor(numresources * explore_factor)) logging.info('Num to explore: %d out of %d', explore_samples, len(basin_by_rmsd)) idx, num_sampled = 0, 0 while idx < len( basin_by_rmsd) and num_sampled < explore_samples: selb = basin_by_rmsd[idx] if selb['id'] in executed_basins: logging.info( 'Selected %s, but it has been executed. Selecting next option', selb['id']) else: selected_basin_list.append(selb) logging.info(' (%d) EXPLORE BASIN: %s %f', selb['id'], selb['id'], float(selb['resrms_delta'])) numresources -= 1 num_sampled += 1 idx += 1 # TODO: Reduced Feature Sets # Using Reduced Feature Set Alg #2 HERE support = int(.01 * len(dist_space)) cutoff = 8 # RE-Calc the whole lattice: logging.info("Building the new lattice") BUILD_NEW = not self.catalog.exists('lattice:bootstrapped') # TODO: Decision to go from build new to incr maint if BUILD_NEW: tval = .05 Kr = lat.reduced_feature_set2(dist_space, cutoff, theta=tval, maxk=25) retry = 5 while len(Kr) < 12 and retry > 0: tval /= 2 retry -= 1 Kr = lat.reduced_feature_set2(dist_space, cutoff, theta=tval, maxk=25) base_lattice = lat.Lattice(dist_space, Kr, cutoff, support) base_lattice.maxminer() base_lattice.derive_lattice() with self.catalog.pipeline() as pipe: pipe.delete('lattice:kr') for i in sorted(Kr): pipe.rpush('lattice:kr', i) pipe.execute() else: # Load existing (base) lattice data max_fis = pickle.loads(self.catalog.get('lattice:max_fis')) low_fis = pickle.loads(self.catalog.get('lattice:low_fis')) dlat = pickle.loads(self.catalog.get('lattice:dlat')) Ik = pickle.loads(self.catalog.get('lattice:iset')) num_k = self.catalog.get('lattice:num_k') Kr = [int(i) for i in self.catalog.lrange('lattice:kr', 0, -1)] if num_k is None: num_k = max(8, min(15, numresources * 2)) # Build Lattice Object logging.info('Building Existing lattice object') base_lattice = lat.Lattice(ds_prev, Kr, cutoff, support) base_lattice.set_fis(max_fis, low_fis) base_lattice.set_dlat(dlat, Ik) # Build Delta Lattice Object logging.info('Building Delta lattice. Num new items: %d', len(delta_ds)) delta_lattice = lat.Lattice(delta_ds, Kr, cutoff, 1) delta_lattice.maxminer() delta_lattice.derive_lattice() # Perform incremental maintenance logging.info('Merging Delta lattice with Base Lattice') base_lattice.merge(delta_lattice) if numresources > 0: logging.info('Invoking the Lattice Sampler') sampler = LatticeSampler(base_lattice) basin_id_list = sampler.execute(numresources) # For now retrieve immediately from catalog self.wait_catalog() for index in basin_id_list: bid = all_basins[index] key = 'basin:%s' % bid # Check to ensure its not a new basin and that it exists in the DB if self.catalog.exists(key): logging.debug('KEY EXISTS: %s', key) selbasin = self.catalog.hgetall(key) else: logging.debug('NO KEY: %s\n%s', key, self.data[key]) selbasin = self.data[key] selected_basin_list.append(selbasin) # CORRELATION SAMPLER (KMEANS) if EXPERIMENT_NUMBER == 18: sampler = CorrelationSampler(cm_all, mu=dist_space) basin_id_list = sampler.execute(numresources) for bid in basin_id_list: selected_basin_list.append( self.catalog.hgetall('basin:%s' % bid)) bench.mark('GlobalAnalysis') # Generate new starting positions jcqueue = OrderedDict() src_traj_list = [] for basin in selected_basin_list: src_traj_list.append(basin['traj']) if basin['traj'].startswith('desh'): global_params = getSimParameters(self.data, 'deshaw') # fileno = int(basin['traj'][-4:]) # frame = int(basin['mindex']) # jcID, config = generateDEShawJC(fileno, frame) else: global_params = getSimParameters(self.data, 'gen') src_psf = self.catalog.hget('jc_' + basin['traj'], 'psf') global_params.update({'psf': src_psf}) # jcID, config = generateExplJC(basin, jcid=None) jcID, config = generateFromBasin(basin) config.update(global_params) config['name'] = jcID logging.info("New Simulation Job Created: %s", jcID) for k, v in config.items(): logging.debug(" %s: %s", k, str(v)) # Add to the output queue & save config info jcqueue[jcID] = config logging.info("New Job Candidate Completed: %s #%d on the Queue", jcID, len(jcqueue)) stat.collect('src_traj_list', src_traj_list) bench.mark('GenInputParams') # POST-PROCESSING ------------------------------------- logging.debug( "============================ <POST-PROCESSING & OUTPUT> =============================" ) self.wait_catalog() with self.catalog.pipeline() as pipe: for basin in selected_basin_list: pipe.rpush('executed', basin['id']) pipe.execute() # Append new distance values if EXPERIMENT_NUMBER == 14: # Save Ik delta to disk logging.info("Saving Delta Itemset (to disk)") pickle.dump(Ik_delta, open(settings.datadir + '/iset_delta.p', 'wb')) with self.catalog.pipeline() as pipe: pipe.set('lattice:max_fis', pickle.dumps(base_lattice.max_fis)) pipe.set('lattice:low_fis', pickle.dumps(base_lattice.low_fis)) pipe.set('lattice:dlat', pickle.dumps(base_lattice.dlat)) pipe.execute() if EXPERIMENT_NUMBER == 15: with self.catalog.pipeline() as pipe: pipe.set('lattice:max_fis', pickle.dumps(base_lattice.max_fis)) pipe.set('lattice:low_fis', pickle.dumps(base_lattice.low_fis)) pipe.set('lattice:dlat', pickle.dumps(base_lattice.dlat)) pipe.execute() self.catalog.set('lattice:iset', pickle.dumps(base_lattice.Ik)) for basin_idx, basin in local_basins.items(): key = 'basin:' + basin['id'] with self.catalog.pipeline() as pipe: pipe.hset(key, 'label:10', basin['label:10']) pipe.hset(key, 'label:25', basin['label:25']) pipe.rpush('bin:10:%s' % basin['label:10'], basin_idx) pipe.rpush('bin:25:%d_%d' % basin['label:25'], basin_idx) pipe.execute() self.catalog.set('basin:processed', num_prev_basins + len(local_basins)) # Clear current queue, mark previously queues jobs for GC, push new queue qlen = self.catalog.llen('jcqueue') logging.debug('Current queue len; %s', str(qlen)) if qlen > 0: curqueue = self.catalog.lrange('jcqueue', 0, -1) logging.info("Marking %d obsolete jobs for garbage collection", len(curqueue)) for jc_key in curqueue: key = wrapKey('jc', jc_key) config = self.catalog.hgetall(key) config['gc'] = 0 # Add gc jobs it to the state to write back to catalog (flags it for gc) self.addMut(key, config) self.catalog.delete('jcqueue') # Update cache hit/miss # hit = self.cache_hit # miss = self.cache_miss # logging.info('##CACHE_HIT_MISS %d %d %.3f', hit, miss, (hit)/(hit+miss)) # self.catalog.rpush('cache:hit', self.cache_hit) # self.catalog.rpush('cache:miss', self.cache_miss) self.data['jcqueue'] = list(jcqueue.keys()) logging.debug(" JCQUEUE: %s", str(self.data['jcqueue'])) # Update Each new job with latest convergence score and save to catalog(TODO: save may not be nec'y) logging.debug("Updated Job Queue length: %d", len(self.data['jcqueue'])) for jcid, config in jcqueue.items(): # config['converge'] = self.data['converge'] self.addMut(wrapKey('jc', jcid), config) self.notify('sim') bench.mark('PostProcessing') print('## TS=%d' % self.data['timestep']) bench.show() stat.show() return list(jcqueue.keys())
def execute(self, thru_index): """Executing the Controler Algorithm. Load pre-analyzed lower dimensional subspaces, process user query and identify the sampling space with corresponding distribution function for each user query. Calculate convergence rates, run sampler, and then execute fairness policy to distribute resources among users' sampled values. """ logging.debug('CTL MT') # PRE-PROCESSING --------------------------------------------------------------------------------- logging.debug("============================ <PRE-PROCESS> =============================") np.set_printoptions(precision=4, linewidth=150) self.data['timestep'] += 1 logging.info('TIMESTEP: %d', self.data['timestep']) settings = systemsettings() bench = microbench('ctl_%s' % settings.name, self.seqNumFromID()) stat = StatCollector('ctl_%s' % settings.name, self.seqNumFromID()) # Connect to the cache self.cacheclient = CacheClient(settings.APPL_LABEL) # create the "binlist": numLabels = self.data['numLabels'] numresources = self.data['numresources'] # LOAD all new subspaces (?) and values ##### BARRIER self.wait_catalog() # Load new RMS Labels -- load all for now bench.start() logging.debug('Loading RMS Labels') start_index = max(0, self.data['ctlIndexHead']) # labeled_pts_rms = self.catalog.lrange('label:rms', self.data['ctlIndexHead'], thru_index) logging.debug(" Start_index=%d, thru_index=%d, ctlIndexHead=%d", start_index, thru_index, self.data['ctlIndexHead']) feallist = [np.fromstring(i) for i in self.catalog.lrange('subspace:feal', 0, -1)] num_pts = len(feallist) self.data['ctlIndexHead'] = thru_index thru_count = self.data['observe:count'] logging.debug('##NUM_RMS_THIS_ROUND: %d', num_pts) stat.collect('numpts', len(feallist)) # Calculate variable PDF estimations for each subspace via bootstrapping: logging.debug("======================= <SUBSPACE CONVERGENCE> =========================") # Bootstrap current sample for RMS logging.info("Feature Landscapes for %d points loaded. Calculating PDF.....", len(feallist)) # Static for now blocksize = 5000 mv_convergence = op.bootstrap_block(feallist, blocksize) global_landscape = np.mean(feallist, axis=0) stat.collect('convergence', mv_convergence) stat.collect('globalfeal', global_landscape) # logging.info('MV Convergence values:\nCONV,%s', ','.join(['%5.3f'%i for i in mv_convergence])) # logging.info('Global Feature Landscape:\nFEAL,%s', ','.join(['%5.3f'%i for i in global_landscape])) logging.info('MV Convergence values:\nCONV,%s', str(mv_convergence[-1])) logging.info('Global Feature Landscape:\n%s', feal.tostring(global_landscape)) # IMPLEMENT USER QUERY with REWEIGHTING: logging.debug("======================= <QUERY PROCESSING> =========================") ##### BARRIER self.wait_catalog() selected_index_list = [] # QUERY PROCESSING & SAMPLING BELOW to select indices. EXPERIMENT_NUMBER = self.experiment_number logging.info("RUNNING EXPER CONFIGURATION #%d", EXPERIMENT_NUMBER) ###### EXPERIMENT #5: BIASED (Umbrella) SAMPLER if EXPERIMENT_NUMBER == 5: if self.catalog.exists('label:deshaw'): logging.info("Loading DEShaw historical points.... From Catalog") rmslabel = [eval(x) for x in self.catalog.lrange('label:deshaw', 0, -1)] else: logging.info("Loading DEShaw historical points.... From File (and recalculating)") rmslabel = deshaw.labelDEShaw_rmsd() deshaw_samples = {b:[] for b in binlist} for i, b in enumerate(rmslabel): deshaw_samples[b].append(i) coord_origin = [] conv_vals = np.array([v for k, v in sorted(convergence_rms.items())]) norm_pdf_conv = conv_vals / sum(conv_vals) logging.info("Umbrella Samping PDF (Bootstrapping):") sampled_distro_perbin = {b: 0 for b in binlist} while numresources > 0: # First sampling is BIASED selected_bin = np.random.choice(len(binlist), p=norm_pdf_conv) A, B = binlist[selected_bin] sampled_distro_perbin[binlist[selected_bin]] += 1 if bincounts[selected_bin] is not None and bincounts[selected_bin] > 0: # Secondary Sampling is Uniform sample_num = np.random.randint(bincounts[selected_bin]) logging.debug('SAMPLER: selecting sample #%d from bin %s', sample_num, str(binlist[selected_bin])) index = self.catalog.lindex('varbin:rms:%d_%d' % binlist[selected_bin], sample_num) selected_index_list.append(index) coord_origin.append(('sim', index, binlist[selected_bin], '%d-D'%A)) numresources -= 1 elif len(deshaw_samples[binlist[selected_bin]]) > 0: index = np.random.choice(deshaw_samples[binlist[selected_bin]]) logging.debug('SAMPLER: selecting DEShaw frame #%d from bin %s', index, str(binlist[selected_bin])) # Negation indicates an historical index number selected_index_list.append(-index) coord_origin.append(('deshaw', index, binlist[selected_bin], '%d-D'%A)) numresources -= 1 else: logging.info("NO Candidates for bin: %s", binlist[selected_bin]) ###### EXPERIMENT #10: MutiVariate Nearest Neighbor (MVNN) SAMPLER if EXPERIMENT_NUMBER == 10: # Create the KD Tree from all feature landscapes (ignore first 5 features) kd = KDTree(100, 15, np.array(feallist), 'median') # Collect hypercubes hc = kd.getleaves() logging.info('KD Tree Stats') logging.info(' # HCubes : %5d', len(hc)) logging.info(' Largest HC: %5d', max([v['count'] for k,v in hc.items()])) logging.info(' Smallest HC: %5d', min([v['count'] for k,v in hc.items()])) for key, hcube in hc.items(): hc_feal = [feallist[i] for i in hcube['elm']] hc[key]['feal'] = np.mean(hc_feal, axis=0) # Det scale and/or sep scales for each feature set desired = 10 - global_landscape logging.info('Desired Landscape:\n%s', feal.tostring(desired)) # Calc euclidean dist to each mean HC's feal nn = {k: LA.norm(desired[5:] - v['feal'][5:]) for k,v in hc.items()} # Grab top N Neighbors (10 for now) neighbors = sorted(nn.items(), key=lambda x: x[1])[:10] logging.info('BestFit Landscape:\n%s', feal.tostring(hc[neighbors[0][0]]['feal'])) ## DATA SAMPLER nn_keys = [i for i,w in neighbors] nn_wgts = np.array([w for i,w in neighbors]) nn_wgts /= np.sum(nn_wgts) # normalize coord_origin = [] while numresources > 0: # First sampling is BIASED selected_hc = np.random.choice(nn_keys, p=nn_wgts) # Second is UNIFORM (within the HCube) index = np.random.choice(hc[selected_hc]['elm']) selected_index_list.append(index) src_state = np.argmax(feallist[index][:5]) coord_origin.append(('sim', index, src_state, selected_hc)) logging.info('Sampled Landscape [hc=%s]:\n%s', selected_hc, feal.tostring(feallist[index])) numresources -= 1 elif EXPERIMENT_NUMBER == 11: # Use only right most 10 features (non-normalized ones) inventory = np.array([f[10:] for f in feallist]) desired = 10 - global_landscape logging.info('Desired Landscape (NOTE Only Including A-B values:\n%s', feal.tostring(desired)) selected_index_list = mvkp.knapsack(desired[10:], inventory, numresources, 2000000) coord_origin = [('sim', index, np.argmax(feallist[index][:5]), 'D') for index in selected_index_list] logging.info("KNAPSACK Completed:") logging.info('Target Distribution:\n%s', str(desired[10:])) logging.info('Target Distribution:\n%s', '\n'.join(['%s'%feallist[i] for i in selected_index_list])) # Back Project to get new starting Coords for each sample logging.debug("======================= <INPUT PARAM GENERATION> =================") logging.info('All Indices sampled. Back projecting to high dim coords') sampled_set = [] for i in selected_index_list: traj = self.backProjection([i]) sampled_set.append(traj) bench.mark('Sampler') # Generate new starting positions runtime = self.data['runtime'] jcqueue = OrderedDict() for i, start_traj in enumerate(sampled_set): jcID, params = generateNewJC(start_traj) # TODO: Update/check adaptive runtime, starting state jcConfig = dict(params, name = jcID, runtime = runtime, # In timesteps dcdfreq = self.data['dcdfreq'], # Frame save rate interval = self.data['dcdfreq'] * self.data['sim_step_size'], temp = 310, timestep = self.data['timestep'], gc = 1, origin = coord_origin[i][0], src_index = coord_origin[i][1], src_bin = coord_origin[i][2], src_hcube = coord_origin[i][3], application = settings.APPL_LABEL) logging.info("New Simulation Job Created: %s", jcID) for k, v in jcConfig.items(): logging.debug(" %s: %s", k, str(v)) # Add to the output queue & save config info jcqueue[jcID] = jcConfig logging.info("New Job Candidate Completed: %s #%d on the Queue", jcID, len(jcqueue)) bench.mark('GenInputParams') # POST-PROCESSING ------------------------------------- logging.debug("============================ <POST-PROCESSING & OUTPUT> =============================") self.wait_catalog() # Clear current queue, mark previously queues jobs for GC, push new queue qlen = self.catalog.llen('jcqueue') logging.debug('Current queue len; %s', str(qlen)) if qlen > 0: curqueue = self.catalog.lrange('jcqueue', 0, -1) logging.info("Marking %d obsolete jobs for garbage collection", len(curqueue)) for jc_key in curqueue: key = wrapKey('jc', jc_key) config = self.catalog.hgetall(key) config['gc'] = 0 # Add gc jobs it to the state to write back to catalog (flags it for gc) self.addMut(key, config) self.catalog.delete('jcqueue') # CATALOG UPDATES self.catalog.rpush('datacount', len(feallist)) # EXPR 7 Update: if EXPERIMENT_NUMBER > 5 and EXPERIMENT_NUMBER < 10: # self.catalog.storeNPArray(np.array(centroid), 'subspace:covar:centroid:%d' % cov_iteration) self.catalog.rpush('subspace:covar:thruindex', len(covar_pts)) # Update cache hit/miss hit = self.cache_hit miss = self.cache_miss logging.info('##CACHE_HIT_MISS %d %d %.3f', hit, miss, (hit)/(hit+miss)) self.catalog.rpush('cache:hit', self.cache_hit) self.catalog.rpush('cache:miss', self.cache_miss) self.data['jcqueue'] = list(jcqueue.keys()) logging.debug(" JCQUEUE: %s", str(self.data['jcqueue'])) # Update Each new job with latest convergence score and save to catalog(TODO: save may not be nec'y) logging.debug("Updated Job Queue length: %d", len(self.data['jcqueue'])) for jcid, config in jcqueue.items(): # config['converge'] = self.data['converge'] self.addMut(wrapKey('jc', jcid), config) bench.mark('PostProcessing') print ('## TS=%d' % self.data['timestep']) bench.show() stat.show() return list(jcqueue.keys())
def backProjection(self, index_list): """Perform back projection function for a list of indices. Return a list of high dimensional points (one per index). Check cache for each point and condolidate file I/O for all cache misses. """ logging.debug('-------- BACK PROJECTION: %d POINTS ---', len(index_list)) bench = microbench('bkproj', self.seqNumFromID()) # reverse_index = {index_list[i]: i for i in range(len(index_list))} source_points = [] cache_miss = [] self.trajlist_async = deque() # DEShaw topology is assumed here bench.start() # Derefernce indices to file, frame tuple: historical_framelist = [] generated_framelist = [] if self.xidreference is None: self.xidreference = self.catalog.lrange('xid:reference', 0, -1) # pipe = self.catalog.pipeline() logging.debug('Select Index List size = %d', len(index_list)) for idx in index_list: # Negation indicates historical index: index = int(idx) if index < 0: file_index, frame = deshaw.refFromIndex(-idx) historical_framelist.append((file_index, frame)) # logging.debug('[BP] DEShaw: file #%d, frame#%d', file_index, frame) else: generated_framelist.append(self.xidreference[index]) # pipe.lindex('xid:reference', index) # Load higher dim point indices from catalog # logging.debug('Exectuting...') # start = dt.datetime.now() # generated_framelist = pipe.execute() # logging.debug('...Exectuted in %4.1f sec', ((dt.datetime.now()-start).total_seconds())) # start = dt.datetime.now() # all_idx = self.catalog.lrange('xid:reference', 0, -1) # logging.debug('Got ALL pts in %4.1f sec', ((dt.datetime.now()-start).total_seconds())) bench.mark('BP:LD:Redis:xidlist') ref = deshaw.topo_prot # Hard coded for now # Group all Historical indidces by file number and add to frame Mask logging.debug('Group By file idx (DEshaw)') historical_frameMask = {} for i, idx in enumerate(historical_framelist): file_index, frame = idx if file_index not in historical_frameMask: historical_frameMask[file_index] = [] historical_frameMask[file_index].append(frame) for k, v in historical_frameMask.items(): logging.debug('[BP] Deshaw lookups: %d, %s', k, str(v)) # Group all Generated indidces by file index logging.debug('Group By file idx (Gen data)') groupbyFileIdx = {} for i, idx in enumerate(generated_framelist): file_index, frame = eval(idx) if file_index not in groupbyFileIdx: groupbyFileIdx[file_index] = [] groupbyFileIdx[file_index].append(frame) # Dereference File index to filenames logging.debug('Deref fileidx -> file names') generated_frameMask = {} generated_filemap = {} for file_index in groupbyFileIdx.keys(): filename = self.catalog.lindex('xid:filelist', file_index) if filename is None: logging.error('Error file not found in catalog: %s', filename) else: key = os.path.splitext(os.path.basename(filename))[0] generated_frameMask[key] = groupbyFileIdx[file_index] generated_filemap[key] = filename bench.mark('BP:GroupBy:Files') # Ensure the cache is alive an connected logging.debug('Check Cache client') self.cacheclient.connect() # Check cache for historical data points logging.debug('Checking cache for %d DEShaw points to back-project', len(historical_frameMask.keys())) for fileno, frames in historical_frameMask.items(): # handle 1 frame case (to allow follow on multi-frame, mix cache hit/miss) if len(frames) == 1: datapt = self.cacheclient.get(fileno, frames[0], 'deshaw') dataptlist = [datapt] if datapt is not None else None else: dataptlist = self.cacheclient.get_many(fileno, frames, 'deshaw') if dataptlist is None: self.cache_miss += 1 # logging.debug('[BP] Cache MISS on: %d', fileno) cache_miss.append(('deshaw', fileno, frames)) else: self.cache_hit += 1 # logging.debug('[BP] Cache HIT on: %d', fileno) source_points.extend(dataptlist) # Check cache for generated data points logging.debug('Checking cache for %d Generated points to back-project', len(generated_frameMask.keys())) for filename, frames in generated_frameMask.items(): # handle 1 frame case (to allow follow on multi-frame, mix cache hit/miss) if len(frames) == 1: datapt = self.cacheclient.get(filename, frames[0], 'sim') dataptlist = [datapt] if datapt is not None else None else: dataptlist = self.cacheclient.get_many(filename, frames, 'sim') if dataptlist is None: self.cache_miss += 1 # logging.debug('[BP] Cache MISS on: %s', filename) cache_miss.append(('sim', generated_filemap[filename], frames)) else: self.cache_hit += 1 # logging.debug('[BP] Cache HIT on: %s', filename) source_points.extend(dataptlist) # Package all cached points into one trajectory logging.debug('Cache hits: %d points.', len(source_points)) if len(source_points) > 0: source_traj_cached = md.Trajectory(source_points, ref.top) else: source_traj_cached = None # All files were cached. Return back-projected points if len(cache_miss) == 0: return source_traj_cached # Add high-dim points to list of source points in a trajectory # Optimized for parallel file loading source_points_uncached = [] logging.debug('Sequentially Loading all trajectories') for miss in cache_miss: ftype, fileno, framelist = miss if ftype == 'deshaw': pdb, dcd = deshaw.getHistoricalTrajectory_prot(fileno) traj = md.load(dcd, top=pdb) elif ftype == 'sim': traj = datareduce.load_trajectory(fileno) selected_frames = traj.slice(framelist) source_points_uncached.extend(selected_frames.xyz) bench.mark('BP:LD:File') logging.debug('All Uncached Data collected Total # points = %d', len(source_points_uncached)) source_traj_uncached = md.Trajectory(np.array(source_points_uncached), ref.top) bench.mark('BP:Build:Traj') # bench.show() logging.info('-------- Back Projection Complete ---------------') if source_traj_cached is None: return source_traj_uncached else: return source_traj_cached.join(source_traj_uncached)
def execute(self, job): # PRE-PREOCESS ---------------------------------------------------------- settings = systemsettings() bench = microbench("sim_%s" % settings.name, self.seqNumFromID()) bench.start() stat = StatCollector("sim_%s" % settings.name, self.seqNumFromID()) mylogical_seqnum = str(self.seqNumFromID()) # Prepare working directory, input/output files conFile = os.path.join(job["workdir"], job["name"] + ".conf") logFile = conFile.replace("conf", "log") # log in same place as config file dcdFile = conFile.replace("conf", "dcd") # dcd in same place as config file USE_SHM = True SIMULATE_RATIO = settings.SIMULATE_RATIO if SIMULATE_RATIO > 1: logging.warning(" USING SIMULATION RATIO OF %d -- THis is ONLY for debugging", SIMULATE_RATIO) frame_size = (SIMULATE_RATIO * int(job["interval"])) / (1000) logging.info("Frame Size is %f Using Sim Ratio of 1:%d", frame_size, SIMULATE_RATIO) EXPERIMENT_NUMBER = settings.EXPERIMENT_NUMBER logging.info("Running Experiment Configuration #%d", EXPERIMENT_NUMBER) # # Grab historical basin data's relative time to start (for lineage) traj = None # EXECUTE SIMULATION --------------------------------------------------------- if self.skip_simulation: logging.info("1. SKIPPING SIMULATION.....") USE_SHM = False job["dcd"] = dcdFile key = wrapKey("jc", job["name"]) self.data[key]["dcd"] = dcdFile else: logging.info("1. Run Simulation") # Prepare & source to config file with open(self.data["sim_conf_template"], "r") as template: source = template.read() # >>>>Storing DCD into shared memory on this node if USE_SHM: ramdisk = gettempdir() job["outputloc"] = ramdisk dcd_ramfile = os.path.join(ramdisk, job["name"] + ".dcd") else: job["outputloc"] = "" with open(conFile, "w") as sysconfig: sysconfig.write(source % job) logging.info("Config written to: " + conFile) # # Run simulation in parallel # if 'parallel' in job: # numnodes = job['parallel'] # total_tasks = numnodes * 24 # cmd = 'mpiexec -n %d namd2 %s > %s' % (total_tasks, conFile, logFile) # # Run simulation single threaded # else: # cmd = 'namd2 %s > %s' % (conFile, logFile) # cmd = 'mpirun -n %d namd2 %s > %s' % (PARALLELISM, conFile, logFile) check = executecmd("module list") logging.debug("%s", check) cmd = "namd2 +p%d %s > %s" % (PARALLELISM, conFile, logFile) # MICROBENCH #1 (file to Lustre) # logging.debug("Executing Simulation:\n %s\n", cmd) # bench = microbench() # bench.start() # stdout = executecmd(cmd) # logging.info("SIMULATION Complete! STDOUT/ERR Follows:") # bench.mark('SimExec:%s' % job['name']) # shm_contents = os.listdir('/dev/shm/out') # logging.debug('Ramdisk contents (should have files) : %s', str(shm_contents)) # shutil.copy(ramdisk + job['name'] + '.dcd', job['workdir']) # logging.info("Copy Complete to Lustre.") # bench.mark('CopyLustre:%s' % job['name']) # shutil.rmtree(ramdisk) # shm_contents = os.listdir('/dev/shm') # logging.debug('Ramdisk contents (should be empty) : %s', str(shm_contents)) # bench.show() max_expected_obs = int(job["runtime"]) // int(job["dcdfreq"]) # Retry upto 3 attempts if the sim fails MAX_TRY = 3 for i in range(MAX_TRY, 0, -1): min_required_obs = int(max_expected_obs * ((i - 1) / (MAX_TRY))) logging.debug("Executing Simulation:\n %s\n", cmd) logging.debug("# Obs Expected to see: %d", max_expected_obs) stdout = executecmd(cmd) logging.info("SIMULATION Complete! STDOUT/ERR Follows:") # Check file for expected data if USE_SHM: traj = md.load(dcd_ramfile, top=job["pdb"]) else: traj = md.load(dcdFile, top=job["pdb"]) logging.info("Obs Threshold = %4d", min_required_obs) logging.info("#Obs This Traj = %4d", traj.n_frames) if traj.n_frames >= min_required_obs: logging.info("Full (enough) Sim Completed") break logging.info("Detected a failed Simulation. Retrying the same sim.") break bench.mark("simulation") # bench.mark('SimExec:%s' % job['name']) # Internal stats sim_length = self.data["sim_step_size"] * int(job["runtime"]) sim_realtime = bench.delta_last() sim_run_ratio = (sim_realtime / 60) / (sim_length / 1000000) logging.info("##SIM_RATIO %6.3f min-per-ns-sim", sim_run_ratio) stat.collect("sim_ratio", sim_run_ratio) if USE_SHM: shm_contents = os.listdir(ramdisk) logging.debug("Ramdisk contents (should have files) : %s", str(shm_contents)) if not os.path.exists(dcd_ramfile): logging.warning("DCD FILE NOT FOUND!!!! Wait 10 seconds for sim to close it (???)") time.sleep(10) if not os.path.exists(dcd_ramfile): logging.warning("DCD STIILL FILE NOT FOUND!!!!") else: logging.info("DCD File was found") # # MICROBENCH #2 (file to Alluxio) # allux = AlluxioClient() # # copy to Aluxio FS # allux.put(ramdisk + job['name'] + '.dcd', '/') # logging.info("Copy Complete to Alluxio.") # bench.mark('CopyAllux:%s' % job['name']) # And copy to Lustre # shutil.copy(ramdisk + job['name'] + '.dcd', job['workdir']) # And copy to Lustre (usng zero-copy): if USE_SHM: # ALT: X-Mit to Cache Service and let cache write to disk lazily src = open(dcd_ramfile, "rb") dest = open(dcdFile, "w+b") offset = 0 dcdfilesize = os.path.getsize(dcd_ramfile) while True: sent = sendfile(dest.fileno(), src.fileno(), offset, dcdfilesize) if sent == 0: break offset += sent logging.info("Copy Complete to Lustre.") bench.mark("CopyLustre:%s" % job["name"]) # TODO: Update job's metadata key = wrapKey("jc", job["name"]) self.data[key]["dcd"] = dcdFile # ANALYSIS ------- --------------------------------------------------------- # ANALYSIS ALGORITHM # 1. With combined Sim-analysis: file is loaded locally from shared mem logging.debug("2. Load DCD") # Load topology and define filters topo = self.protein.pdb pdb = md.load(job["pdb"]) hfilt = pdb.top.select_atom_indices("heavy") pfilt = pdb.top.select("protein") afilt = pdb.top.select_atom_indices("alpha") # Load full higher dim trajectory if traj is None: if USE_SHM: traj = md.load(dcd_ramfile, top=pdb) else: traj = md.load(dcdFile, top=pdb) logging.debug("Trajectory Loaded: %s (%s)", job["name"], str(traj)) traj_prot = traj.atom_slice(pfilt) traj_heavy = traj.atom_slice(hfilt) traj_alpha = traj.atom_slice(afilt) # Superpose Coordinates to Common Reference traj_prot.superpose(topo) # Calculate output Distance Space # Use of the second side chain atom is discussed in the ref paper on Timescape # The atom pairs and selected atoms are in the timescape module sc_pairs = side_chain_pairs(traj_prot) n_features = len(sc_pairs) # Use the CA atoms to calculate distance space # NOTE THE CONVERSION FROM NM TO ANG!!!!!! dist_space = 10 * DR.distance_space(traj_alpha) # Get the frame rate to for conversion between source and timescape # NOTE: Timescae is run at a more coarse level logging.debug("Preprocessing output for TimeScapes: terrain") traj_frame_per_ps = SIMULATE_RATIO * int(job["interval"]) / 1000.0 # jc interval is in fs ts_frame_per_ps = int(self.data["timescape:rate"]) # this value is in ps frame_rate = int(ts_frame_per_ps / traj_frame_per_ps) logging.debug( "%5.2f fr/ps (Traj) %5.2f fr/ps (TS) FrameRate= %4.1f", traj_frame_per_ps, ts_frame_per_ps, frame_rate, ) # Execute Timescapes agility program to detect spatial-temporal basins output_prefix = os.path.join(job["workdir"], job["name"]) if not self.skip_timescape: # Prep file and save locally in shm tmploc = gettempdir() ts_out = tmploc + "traj_ts" ts_dcd = ts_out + ".dcd" ts_pdb = ts_out + ".pdb" heavy = traj_heavy.slice(range(0, traj.n_frames, frame_rate)) heavy.slice(0).save_pdb(ts_pdb) heavy.save_dcd(ts_dcd) # Gaussuan Full Width at Half-Max value affects sliding window size # ref: http://timescapes.biomachina.org/guide.pdf gmd_cut1 = int(self.data["timescape:gmd:low"]) gmd_cut2 = int(self.data["timescape:gmd:hi"]) gauss_wght_delta = int(self.data["timescape:delta"]) # Execute timescapes' terrain.py on the pre-processed trajectory cmd = "terrain.py %s %s %d %d %d GMD %s" % ( ts_pdb, ts_dcd, gmd_cut1, gmd_cut2, gauss_wght_delta, output_prefix, ) logging.info("Running Timescapes:\n %s", cmd) stdout = executecmd(cmd) logging.info("TimeScapes COMPLETE:\n%s", stdout) # Collect and parse Timescape output logging.debug("Parsing Timescapes output") ts_parse = TimeScapeParser(job["pdb"], output_prefix, job["name"], dcd=dcdFile, traj=traj, uniqueid=False) basin_list = ts_parse.load_basins(frame_ratio=frame_rate) n_basins = len(basin_list) minima_coords = {} basin_rms = {} basins = {} new_dmu = {} new_dsig = {} resid_rms_delta = {} stat.collect("num_basin", n_basins) downstream_list = [] # FOR BOOTSTRAPPING USING RMSD ref_file = os.path.join(settings.workdir, self.data["pdb:ref:0"]) logging.info("Loading RMSD reference frame from %s", self.data["pdb:ref:0"]) refframe = md.load(ref_file) ref_alpha = refframe.atom_slice(refframe.top.select_atom_indices("alpha")) rmsd = 10 * md.rmsd(traj_alpha, ref_alpha) # FOR RESIDUE RMSD res_rms_Kr = FEATURE_SET_RESID resrmsd = 10 * np.array([LA.norm(i - ref_alpha.xyz[0], axis=1) for i in traj_alpha.xyz]) basin_res_rms = np.zeros(shape=(len(basin_list), traj_alpha.n_atoms)) # LOAD CENROIDS -- todo move to immut centroid = pickle.loads(self.catalog.get("centroid:ds")) basin_label_list = {} # Process each basin for i, basin in enumerate(basin_list): logging.info(" Processing basin #%2d", i) bid = basin.id downstream_list.append(bid) # Slice out minima coord & save to disk (for now) # TODO: Store in memory in cache minima_coords[bid] = traj.slice(basin.mindex) jc_filename = os.path.join(settings.datadir, "basin_%s.pdb" % bid) minima_coords[bid].save_pdb(jc_filename) # METRIC CALCULATION a, b = basin.start, basin.end new_dmu[bid] = np.mean(dist_space[a:b], axis=0) new_dsig[bid] = np.std(dist_space[a:b], axis=0) # Collect Basin metadata basin_hash = basin.kv() basin_hash["pdbfile"] = jc_filename basin_hash["rmsd"] = np.median(rmsd[a:b]) basin_res_rms[i] = np.median(resrmsd[a:b], axis=0) # FOR EXP #16 -- Residue RMSD # Set relative time (in ns) # basin_hash['time'] = reltime_start + (a * frame_rate) / 1000 basins[bid] = basin_hash # LABEL ATEMPORAL DATA # Take every 4th frame stride = [dist_space[i] for i in range(a, b, 4)] rms = [LA.norm(centroid - i, axis=1) for i in stride] label_seq = [np.argmin(i) for i in rms] basin_label_list[bid] = label_seq logging.info(" Basin Processed: #%s, %d - %d", basin_hash["traj"], basin_hash["start"], basin_hash["end"]) # RMSD DELTA (RESIDUE) basin_res_rms_delta = np.array([rms_delta(i) for i in basin_res_rms.T]).T basin_rms_delta_bykey = {basin.id: basin_res_rms_delta[i] for i, basin in enumerate(basin_list)} for k in basins.keys(): basins[k]["resrms_delta"] = np.sum(basin_rms_delta_bykey[k][res_rms_Kr]) # TODO: Use Min Index as snapshot, median (or mean) DistSpace vals for each basin????? bench.mark("analysis") # BARRIER: WRITE TO CATALOG HERE -- Ensure Catalog is available # try: self.wait_catalog() # except OverlayNotAvailable as e: # logging.warning("Catalog Overlay Service is not available. Scheduling ASYNC Analysis") # FOR EXPERIMENT METRICS src_basin = self.catalog.hgetall("basin:" + job["src_basin"]) with open("/home-1/[email protected]/ddc/results/{0}_prov.log".format(settings.name), "a") as metric_out: for i, basin in enumerate(basin_list): bid = basin.id label_seq = basin_label_list[bid] basin_metric_label = LABEL10(label_seq) metric_out.write( "BASIN,%s,%s,%s,%s\n" % (bid, src_basin["label:10"], basin_metric_label, "".join([str(i) for i in label_seq])) ) # Update Index Synchronized data lists basin_list_sorted = sorted(basins.keys(), key=lambda x: (x.split("_")[0], int(x.split("_")[1]))) for bid in basin_list_sorted: with self.catalog.pipeline() as pipe: while True: try: logging.debug("Updating %s basin indeces and distance space", len(basins)) pipe.rpush("basin:list", bid) pipe.rpush("dspace", pickle.dumps(new_dmu[bid])) basin_index, _ = pipe.execute() break except redis.WatchError as e: logging.debug("WATCH ERROR. Someone else is writing to the catalog. Retrying...") continue basins[bid]["dsidx"] = basin_index - 1 # Update Catalog with 1 Long Atomic Transaction with self.catalog.pipeline() as pipe: while True: try: logging.debug("Update Filelist") pipe.watch(wrapKey("jc", job["name"])) file_idx = pipe.rpush("xid:filelist", job["dcd"]) - 1 # HD Points logging.debug("Update HD Points") start_index = pipe.llen("xid:reference") pipe.multi() pipe.rpush("xid:reference", *[(file_idx, x) for x in range(traj.n_frames)]) pipe.set("resrms:" + job["name"], resrmsd) # Store all basin data logging.debug("Updating %s basins", len(basins)) for bid in sorted(basins.keys(), key=lambda x: (x.split("_")[0], int(x.split("_")[1]))): pipe.hmset("basin:" + bid, basins[bid]) pipe.set("minima:%s" % bid, pickle.dumps(minima_coords[bid])) for i in basin_label_list[bid]: pipe.rpush("basin:labelseq:" + bid, i) pipe.hset("anl_sequence", job["name"], mylogical_seqnum) logging.debug("Executing") pipe.execute() break except redis.WatchError as e: logging.debug("WATCH ERROR. Someone else is writing to the catalog. Retrying...") continue self.data[key]["xid:start"] = start_index self.data[key]["xid:end"] = start_index + traj.n_frames bench.mark("catalog") # ---- POST PROCESSING # Check for total data in downstream queue & Notify control manager to run # 1. Det. what's still running job_queue = slurm.currentqueue() n_simjobs = -1 # -1 to exclude self for j in job_queue: if j["name"].startswith("sw"): n_simjobs += 1 logging.info("SIM WORKER has detected %d other simulations running/pending.", len(job_queue)) remain_percent = n_simjobs / self.data["numresources"] # Fault Tolerance: ensure the pipeline persists if n_simjobs == 0: logging.info("I am the last simulation. Ensuring the controler executes.") self.catalog.set("ctl:force", 1) # Send downstream notification when less than 10% of jobs are still running if not self.skip_notify and remain_percent < 0.1: # Notify the control manager 'CM' self.notify("ctl") if USE_SHM: shutil.rmtree(ramdisk) shm_contents = os.listdir("/dev/shm") logging.debug("Ramdisk contents (should be empty of DDC) : %s", str(shm_contents)) # For benchmarching: bench.show() stat.show() # Return # of observations (frames) processed return downstream_list
def execute(self, job): # PRE-PREOCESS ---------------------------------------------------------- settings = systemsettings() bench = microbench('sim_%s' % settings.name, self.seqNumFromID()) bench.start() stat = StatCollector('sim_%s' % settings.name, self.seqNumFromID()) mylogical_seqnum = str(self.seqNumFromID()) # Prepare working directory, input/output files conFile = os.path.join(job['workdir'], job['name'] + '.conf') logFile = conFile.replace('conf', 'log') # log in same place as config file dcdFile = conFile.replace('conf', 'dcd') # dcd in same place as config file USE_SHM = True ADAPTIVE_CENTROID = False SIMULATE_RATIO = settings.SIMULATE_RATIO if SIMULATE_RATIO > 1: logging.warning(" USING SIMULATION RATIO OF %d -- THis is ONLY for debugging", SIMULATE_RATIO) frame_size = (SIMULATE_RATIO * int(job['interval'])) / (1000) logging.info('Frame Size is %f Using Sim Ratio of 1:%d', \ frame_size, SIMULATE_RATIO) EXPERIMENT_NUMBER = settings.EXPERIMENT_NUMBER logging.info('Running Experiment Configuration #%d', EXPERIMENT_NUMBER) # TODO: FOR LINEAGE # srcA, srcB = eval(job['src_bin']) # stat.collect('src_bin', [str(srcA), str(srcB)]) traj = None # EXECUTE SIMULATION --------------------------------------------------------- if self.skip_simulation: logging.info('1. SKIPPING SIMULATION.....') USE_SHM = False job['dcd'] = dcdFile key = wrapKey('jc', job['name']) self.data[key]['dcd'] = dcdFile else: logging.info('1. Run Simulation') # Prepare & source to config file with open(self.data['sim_conf_template'], 'r') as template: source = template.read() # >>>>Storing DCD into shared memory on this node if USE_SHM: # ramdisk = '/dev/shm/out/' ramdisk = '/tmp/ddc/' if not os.path.exists(ramdisk): os.mkdir(ramdisk) job['outputloc'] = ramdisk dcd_ramfile = os.path.join(ramdisk, job['name'] + '.dcd') else: job['outputloc'] = '' with open(conFile, 'w') as sysconfig: sysconfig.write(source % job) logging.info("Config written to: " + conFile) # # Run simulation in parallel # if 'parallel' in job: # numnodes = job['parallel'] # total_tasks = numnodes * 24 # cmd = 'mpiexec -n %d namd2 %s > %s' % (total_tasks, conFile, logFile) # # Run simulation single threaded # else: # cmd = 'namd2 %s > %s' % (conFile, logFile) # cmd = 'mpirun -n %d namd2 %s > %s' % (PARALLELISM, conFile, logFile) check = executecmd('module list') logging.debug('%s', check) cmd = 'namd2 +p%d %s > %s' % (PARALLELISM, conFile, logFile) # MICROBENCH #1 (file to Lustre) # logging.debug("Executing Simulation:\n %s\n", cmd) # bench = microbench() # bench.start() # stdout = executecmd(cmd) # logging.info("SIMULATION Complete! STDOUT/ERR Follows:") # bench.mark('SimExec:%s' % job['name']) # shm_contents = os.listdir('/dev/shm/out') # logging.debug('Ramdisk contents (should have files) : %s', str(shm_contents)) # shutil.copy(ramdisk + job['name'] + '.dcd', job['workdir']) # logging.info("Copy Complete to Lustre.") # bench.mark('CopyLustre:%s' % job['name']) # shutil.rmtree(ramdisk) # shm_contents = os.listdir('/dev/shm') # logging.debug('Ramdisk contents (should be empty) : %s', str(shm_contents)) # bench.show() max_expected_obs = int(job['runtime']) // int(job['dcdfreq']) # Retry upto 3 attempts if the sim fails MAX_TRY = 3 for i in range(MAX_TRY, 0, -1): min_required_obs = int(max_expected_obs * ((i-1)/(MAX_TRY))) logging.debug("Executing Simulation:\n %s\n", cmd) logging.debug('# Obs Expected to see: %d', max_expected_obs) stdout = executecmd(cmd) logging.info("SIMULATION Complete! STDOUT/ERR Follows:") # Check file for expected data if USE_SHM: traj = md.load(dcd_ramfile, top=job['pdb']) else: traj = md.load(dcdFile, top=job['pdb']) logging.info("Obs Threshold = %4d", min_required_obs) logging.info("#Obs This Traj = %4d", traj.n_frames) if traj.n_frames >= min_required_obs: logging.info('Full (enough) Sim Completed') break logging.info('Detected a failed Simulation. Retrying the same sim.') bench.mark('SimExec:%s' % job['name']) # Internal stats sim_length = self.data['sim_step_size'] * int(job['runtime']) sim_realtime = bench.delta_last() sim_run_ratio = (sim_realtime/60) / (sim_length/1000000) logging.info('##SIM_RATIO %6.3f min-per-ns-sim', sim_run_ratio) stat.collect('sim_ratio', sim_run_ratio) if USE_SHM: shm_contents = os.listdir(ramdisk) logging.debug('Ramdisk contents (should have files) : %s', str(shm_contents)) if not os.path.exists(dcd_ramfile): logging.warning("DCD FILE NOT FOUND!!!! Wait 10 seconds for sim to close it (???)") time.sleep(10) if not os.path.exists(dcd_ramfile): logging.warning("DCD STIILL FILE NOT FOUND!!!!") else: logging.info("DCD File was found") # # MICROBENCH #2 (file to Alluxio) # allux = AlluxioClient() # # copy to Aluxio FS # allux.put(ramdisk + job['name'] + '.dcd', '/') # logging.info("Copy Complete to Alluxio.") # bench.mark('CopyAllux:%s' % job['name']) # And copy to Lustre # shutil.copy(ramdisk + job['name'] + '.dcd', job['workdir']) # And copy to Lustre (usng zero-copy): if USE_SHM: src = open(dcd_ramfile, 'rb') dest = open(dcdFile, 'w+b') offset = 0 dcdfilesize = os.path.getsize(dcd_ramfile) while True: sent = sendfile(dest.fileno(), src.fileno(), offset, dcdfilesize) if sent == 0: break offset += sent logging.info("Copy Complete to Lustre.") bench.mark('CopyLustre:%s' % job['name']) # TODO: Update job's metadata key = wrapKey('jc', job['name']) self.data[key]['dcd'] = dcdFile # ANALYSIS ------- --------------------------------------------------------- # ANALYSIS ALGORITHM # 1. With combined Sim-analysis: file is loaded locally from shared mem logging.debug("2. Load DCD") # Load full higher dim trajectory # traj = datareduce.filter_heavy(dcd_ramfile, job['pdb']) if traj is None: if USE_SHM: traj = md.load(dcd_ramfile, top=job['pdb']) else: traj = md.load(dcdFile, top=job['pdb']) # Center Coordinates traj.center_coordinates() bench.mark('File_Load') logging.debug('Trajectory Loaded: %s (%s)', job['name'], str(traj)) # DIMENSIONALITY REDUCTION -------------------------------------------------- # 4-A. Subspace Calcuation: RMS using Alpha-Filter #------ A: RMSD-ALPHA ------------------ # S_A = rmslist logging.info('---- RMSD Calculation against pre-defined centroids ----') # RMSD is calculated on the Ca ('alpha') atoms in distance space # whereby all pairwise distances are calculated for each frame. # Pairwise distances are plotted in euclidean space # Distance to each of the 5 pre-calculated centroids is calculated # 1. Filter to Alpha atoms alpha = traj.atom_slice(deshaw.FILTER['alpha']) # 2. (IF USED) Convert to distance space: pairwise dist for all atom combinations # alpha_dist = dr.distance_space(alpha) # 3. Calc RMS for each conform to all centroids # Heuristic centroid weight (TODO: make this trained)\ # 4. For adaptive Centriods # Centroids Will be pulled & updated. logging.info('CENTROID Retrieval & Updating') self.wait_catalog() # If they were mutable.... # logging.info('Acquiring a Lock on the Centroids') # centroids = self.catalog.loadNPArray('centroid') # thetas = self.catalog.loadNPArray('thetas') # lock = self.catalog.lock_acquire('centroid') # if lock is None: # logging.info('Could not lock the Centroids. Will use current cached (possibly stale) data.') # bench.mark('ConcurrLockCentroid'%(A,B)) # Implemented as a Transactional Data Structure.... if ADAPTIVE_CENTROID: centroids = [] for state in range(numLabels): cent_raw = self.catalog.lrange('centroid:xyz:%d'%state, 0, -1) cent_xyz = [pickle.loads(i) for i in cent_raw] cent_npts = [int(i) for i in self.catalog.lrange('centroid:npts:%d'%state, 0, -1)] c_sum = np.zeros(shape=cent_xyz[0].shape) c_tot = 0 for x, n in zip(cent_xyz, cent_npts): c = x * n c_sum += c c_tot += n centroids.append(c_sum / c_tot) else: centroids = self.catalog.loadNPArray('centroid') # if EXPERIMENT_NUMBER < 10: # 5. Calculate the RMSD for each filtered point to 5 pre-determined centroids # cw = [.92, .94, .96, .99, .99] cw = [.94, .95, .97, .99, .99] numLabels = len(self.data['centroid']) numConf = len(traj.xyz) stat.collect('numpts',numConf) # 4. Account for noise : Simple spatial mean filter over a small window # Where size of window captures extent of noise # (e.g. 10000fs window => motions under 10ps are considered "noisy") noise = self.data['obs_noise'] stepsize = 500 if 'interval' not in job else int(job['interval']) nwidth = noise//(2*stepsize) noisefilt = lambda x, i: np.mean(x[max(0,i-nwidth):min(i+nwidth, len(x))], axis=0) rms_filtered = np.array([noisefilt(alpha.xyz, i) for i in range(numConf)]) # Notes: Delta_S == rmslist rmslist_sv = calc_rmsd(rms_filtered, centroids, weights=cw) # rmslist = adaptive_rmsd(rms_filtered, centroids, theta) # else: rmslist = calc_rmsd(alpha, centroids) numConf = traj.n_frames numLabels = len(centroids) # rmslist = calc_rmsd(alpha.xyz, self.data['centroid'], weights=cw) logging.debug(' RMS: %d points projected to %d centroid-distances', \ numConf, numLabels) # 6. Apply Heuristics Labeling -- Single Variate rmslabel = [] binlist = [(a, b) for a in range(numLabels) for b in range(numLabels)] label_count = {ab: 0 for ab in binlist} groupbystate = [[] for i in range(numLabels)] groupbybin = {ab: [] for ab in binlist} for i, rms in enumerate(rmslist_sv): # Sort RMSD by proximity & set state A as nearest state's centroid A, B = np.argsort(rms)[:2] # Calc Absolute proximity between nearest 2 states' centroids # THETA Calc derived from static run. it is based from the average std dev of all rms's from a static run # of BPTI without solvent. It could be dynamically calculated, but is hard coded here # The theta is divided by four based on the analysis of DEShaw: # est based on ~3% of DEShaw data in transition (hence ) # avg_stddev = 0.34119404492089034 # theta = settings.RMSD_THETA ## FOR ADAPTIVE Cantroids. Theta is now updated dyamically # NOTE: Original formulate was relative. Retained here for reference: # Rel vs Abs: Calc relative proximity for top 2 nearest centroids # relproximity = rms[A] / (rms[A] + rms[rs[1]]) # B = rs[1] if relproximity > (.5 - theta) else A # proximity = abs(rms[prox[1]] - rms[A]) / (rms[prox[1]] + rms[A]) #relative #proximity = abs(rms[prox[1]] - rms[A]) #abs # Update for Adaptive Centroid. delta = np.abs(rms[B] - rms[A]) # (TODO: Factor in more than top 2, better noise) # Label secondary sub-state # sub_state = B prox[1] if proximity < theta else A # For ADAPTIVE Centroids if delta < 0.33: sub_state = B else: sub_state = A rmslabel.append((A, sub_state)) # Add this index to the set of indices for this respective label # TODO: Should we evict if binsize is too big??? # logging.debug('Label for observation #%3d: %s', i, str((A, B))) label_count[(A, sub_state)] += 1 # Group high-dim point by state # TODO: Consider grouping by stateonly or well/transitions (5 vs 10 bins) groupbystate[A].append(i) groupbybin[(A, sub_state)].append(i) # stat.collect('observe', label_count) bench.mark('RMS') logging.info('Labeled the following:') for A in range(numLabels): if len(groupbystate[A]) > 0: logging.info('label,state,%d,num,%d', A, len(groupbystate[A])) for ab in binlist: if len(groupbybin[ab]) > 0: A, B = ab logging.info('label,bin,%d,%d,num,%d', A, B, len(groupbybin[ab])) # FEATURE LANDSCAPE -- Multi-Variate # Calc Feature landscape for each frame's RMSD feal_list = [feal.atemporal(rms) for rms in rmslist] logging.info('Calculated Feature Landscape. Aggregate for this traj') # For logging purposes agg_feal = np.mean(feal_list, axis=0) logging.info('CountsMax [C]: %s', str(agg_feal[:5])) logging.info('StateDist [S]: %s', str(agg_feal[5:10])) logging.info('RelDist [A-B]: %s', str(agg_feal[10:])) # ADAPTIVE CENTROID & THETA CALCULATION # if lock is None: # logging.info('Never acqiured a lock. Skipping adaptive update (TODO: Mark pts as stale)') # else: # logging.info('Updating Adaptive Centroid') if ADAPTIVE_CENTROID: pipe = self.catalog.pipeline() for state in range(numLabels): n_pts = len(groupbybin[(state, state)]) if n_pts == 0: logging.info('Skipping State %d Centroid -- Well not visited on this trajectory') continue cent_xyz = [alpha.xyz[i] for i in groupbybin[(state, state)]] cent_npts = len(groupbybin[(state, state)]) c_sum = np.zeros(shape=alpha.xyz[0].shape) for pt in cent_xyz: c_sum += pt centroid_local = c_sum / n_pts centroid_delta = LA.norm(centroids[state] - cent) pipe.rpush('centroid:xyz:%d' % state, pickle.dumps(centroid_local)) pipe.rpush('centroid:npts:%d' % state, n_pts) pipe.rpush('centroid:delta:%d' % state, centroid_delta) pipe.execute() # 4-B. Subspace Calcuation: COVARIANCE Matrix, 200ns windows, Full Protein #------ B: Covariance Matrix ----------------- if EXPERIMENT_NUMBER > 5: # 1. Project Pt to PC's for each conform (top 3 PC's) logging.info('---- Covariance Calculation on 200ns windows (Full Protein, cartesian Space) ----') # Calculate Covariance over 200 ps Windows sliding every 100ps # These could be user influenced... WIN_SIZE_NS = .2 SLIDE_AMT_NS = .1 logging.debug("Calculating Covariance over trajectory. frame_size = %.1f, WINSIZE = %dps, Slide = %dps", frame_size, WIN_SIZE_NS*1000, SLIDE_AMT_NS*1000) covar = dr.calc_covar(alpha.xyz, WIN_SIZE_NS, frame_size, slide=SLIDE_AMT_NS) bench.mark('CalcCovar') stat.collect('numcovar', len(covar)) logging.debug("Calcualted %d covariance matrices. Storing variances", len(covar)) # BARRIER: WRITE TO CATALOG HERE -- Ensure Catalog is available # try: self.wait_catalog() # except OverlayNotAvailable as e: # logging.warning("Catalog Overlay Service is not available. Scheduling ASYNC Analysis") # Update Catalog with 1 Long Atomic Transaction global_index = [] with self.catalog.pipeline() as pipe: while True: try: logging.debug('Update Filelist') pipe.watch(wrapKey('jc', job['name'])) file_idx = pipe.rpush('xid:filelist', job['dcd']) - 1 # HD Points logging.debug('Update HD Points') for x in range(traj.n_frames): # Note: Pipelined insertions "should" return contiguous set of index points index = pipe.rpush('xid:reference', (file_idx, x)) - 1 global_index.append(index - 1) pipe.multi() logging.debug('Update RMS Subspace') for x in range(traj.n_frames): A, B = rmslabel[x] index = global_index[x] # Labeled Observation (from RMSD) pipe.rpush('label:rms', rmslabel[x]) pipe.rpush('varbin:rms:%d_%d' % (A, B), index) # pipe.rpush('lineage:rms:%d_%d:%d_%d' % (srcA, srcB, A, B), index) # pipe.rpush('lineage:pca:%s:%d_%d' % (job['src_hcube'], A, B), index) pipe.rpush('subspace:rms', bytes(rmslist_sv[x])) pipe.rpush('subspace:feal', bytes(feal_list[x])) logging.debug('Update OBS Counts') for b in binlist: pipe.rpush('observe:rms:%d_%d' % b, label_count[b]) pipe.incr('observe:count') pipe.hset('anl_sequence', job['name'], mylogical_seqnum) if EXPERIMENT_NUMBER > 5: logging.debug('Update Covar Subspace') for i, si in enumerate(covar): logging.debug('Update COVAR Pt #%d', i) local_index = int(i * frame_size * SLIDE_AMT_NS) pipe.rpush('subspace:covar:pts', bytes(si)) pipe.rpush('subspace:covar:xid', global_index[local_index]) pipe.rpush('subspace:covar:fidx', (file_idx, local_index)) logging.debug('Executing') pipe.execute() break except redis.WatchError as e: logging.debug('WATCH ERROR') continue self.data[key]['xid:start'] = global_index[0] self.data[key]['xid:end'] = global_index[-1] bench.mark('Indx_Update') # (Should we Checkpoint here?) # 4-C. Subspace Calcuation: PCA BY Strata (PER STATE) using Alpha Filter #------ C: GLOBAL PCA by state ----------------- # Update PCA Vectors for each state with new data if EXPERIMENT_NUMBER > 5 and EXPERIMENT_NUMBER < 10: logging.info('---- PCA per BIN over Alpha Filter in cartesian Space ----') # TODO: This will eventually get moved into a User process macrothread # which will set in between analysis and controller. # For now, we're recalculating using a lock # Check if vectors need to be recalculated # Connect to reservoir samples # TODO: Catalog or Cache? reservoir = ReservoirSample('rms', self.catalog) # STALENESS_FACTOR = .25 # Recent updates account for 25% of the sample (Just a guess) num_inserted = {ab: 0 for ab in binlist} num_params = np.prod(alpha.xyz.shape[1:]) for A, B in binlist: num_observations = len(groupbybin[(A,B)]) if num_observations == 0: logging.info('No data received for bin (%d,%d). Not processing this bin here.', A, B) continue res_label = '%d_%d' % (A,B) updateVectors = False kpca_key = 'subspace:pca:kernel:%d_%d' % (A, B) kpca = PCAnalyzer.load(self.catalog, kpca_key) newkpca = False if kpca is None: # kpca = PCAKernel(None, 'sigmoid') kpca = PCAKernel(6, 'rbf') newkpca = True logging.info('PCA: Checking if current vectors for state %d are out of date', A) rsize = reservoir.getsize(res_label) tsize = kpca.trainsize # KPCA is out of date is the sample size is 20% larger than previously used set # Heuristics --- this could be a different "staleness" factor or we can check it some other way if newkpca or rsize > (tsize * 1.5): # Should we only use a sample here??? (not now -- perhaps with larger rervoirs or if KPCA is slow traindata = reservoir.get(res_label) if newkpca: logging.info('New PCA Kernel. Trained on data set of size %d. Current \ reservoir is %d pts.', tsize, rsize) logging.info('Projecting %d points on Kernel PCA for bin (%d,%d)', num_observations, A, B) traindata = np.zeros(shape=((num_observations,)+alpha.xyz.shape[1:]), dtype=np.float32) for i, index in enumerate(groupbybin[(A,B)]): np.copyto(traindata[i], alpha.xyz[index]) else: logging.info('PCA Kernel is old (Updating it). Trained on data set of \ size %d. Current reservoir is %d pts.', tsize, rsize) if len(traindata) <= num_params: logging.info("Not enough data to calculate PC's (Need at least %d \ observations). Skipping PCA for Bin (%d,%d)", num_params, A, B) hd_pts = np.zeros(shape=((num_observations,)+alpha.xyz.shape[1:]), dtype=np.float32) for i, index in enumerate(groupbybin[(A,B)]): np.copyto(hd_pts[i], alpha.xyz[index]) num_inserted[(A,B)] = reservoir.insert(res_label, hd_pts) logging.debug('Updating reservoir Sample for Bin (%d, %d)') continue logging.info(' Performing Kernel PCA (Gaussian) for bin (%d,%d) using traindata of size %d', \ A, B, len(traindata)) kpca.solve(traindata) # NOTE: Pick PCA Algorithm HERE # pca = calc_kpca(np.array(traindata), kerneltype='sigmoid') # pca = calc_pca(np.array(traindata)) bench.mark('CalcKPCA_%d_%d'%(A,B)) # new_vect = pca.alphas_.T lock = self.catalog.lock_acquire(kpca_key) if lock is None: logging.info('Could not lock the PC Kernel for Bin (%d,%d). Not updating', A, B) else: kpca.store(self.catalog, kpca_key) lock = self.catalog.lock_release(kpca_key, lock) bench.mark('ConcurrPCAWrite_%d_%d'%(A,B)) # Project Reservoir Sample to the Kernel and overwrite current set of points # This should only happen up until the reservior is filled # If we are approx above to train, be sure to project all reservor points if not newkpca: logging.info('Clearing and Re-Projecting the entire reservoir of %d points for Bin (%d,%d).', \ rsize, A, B) rsamp_lowdim = kpca.project(traindata) pipe = self.catalog.pipeline() pipe.delete('subspace:pca:%d_%d'%(A,B)) for si in rsamp_lowdim: pipe.rpush('subspace:pca:%d_%d'%(A,B), bytes(si)) pipe.execute() else: logging.info('PCA Kernel is good -- no need to change them') bench.mark('start_ProjPCA') logging.info('Projecting %d points on Kernel PCA for Bin (%d,%d)', num_observations, A, B) hd_pts = np.zeros(shape=((num_observations,)+alpha.xyz.shape[1:]), dtype=np.float32) for i, index in enumerate(groupbybin[(A,B)]): np.copyto(hd_pts[i], alpha.xyz[index]) pc_proj = kpca.project(hd_pts) bench.mark('ProjPCA_%d_%d'%(A,B)) # 2. Append subspace in catalog pipe = self.catalog.pipeline() for si in pc_proj: pipe.rpush('subspace:pca:%d_%d' % (A,B), bytes(si)) pipe.execute() logging.debug('Updating reservoir Sample') num_inserted[(A,B)] = reservoir.insert(res_label, hd_pts) bench.mark('PCA') pipe = self.catalog.pipeline() for ab, num in num_inserted.items(): if num > 0: pipe.rpush('subspace:pca:updates:%d_%d' % (A, B), num) pipe.execute() # ---- POST PROCESSING if USE_SHM: shutil.rmtree(ramdisk) # shm_contents = os.listdir('/dev/shm') shm_contents = os.listdir('/tmp') logging.debug('Ramdisk contents (should be empty of DDC) : %s', str(shm_contents)) # For benchmarching: # print('##', job['name'], dcdfilesize/(1024*1024*1024), traj.n_frames) bench.show() stat.show() # Return # of observations (frames) processed return [numConf]
def execute(self, job): # PRE-PREOCESS ---------------------------------------------------------- settings = systemsettings() bench = microbench('sim_%s' % settings.name, self.seqNumFromID()) bench.start() stat = StatCollector('sim_%s' % settings.name, self.seqNumFromID()) mylogical_seqnum = str(self.seqNumFromID()) # Prepare working directory, input/output files conFile = os.path.join(job['workdir'], job['name'] + '.conf') logFile = conFile.replace('conf', 'log') # log in same place as config file dcdFile = conFile.replace('conf', 'dcd') # dcd in same place as config file USE_SHM = True SIMULATE_RATIO = settings.SIMULATE_RATIO if SIMULATE_RATIO > 1: logging.warning(" USING SIMULATION RATIO OF %d -- THis is ONLY for debugging", SIMULATE_RATIO) frame_size = (SIMULATE_RATIO * int(job['interval'])) / (1000) logging.info('Frame Size is %f Using Sim Ratio of 1:%d', \ frame_size, SIMULATE_RATIO) EXPERIMENT_NUMBER = settings.EXPERIMENT_NUMBER logging.info('Running Experiment Configuration #%d', EXPERIMENT_NUMBER) # # Grab historical basin data's relative time to start (for lineage) traj = None # EXECUTE SIMULATION --------------------------------------------------------- if self.skip_simulation: logging.info('1. SKIPPING SIMULATION.....') USE_SHM = False job['dcd'] = dcdFile key = wrapKey('jc', job['name']) self.data[key]['dcd'] = dcdFile else: logging.info('1. Run Simulation') # Prepare & source to config file with open(self.data['sim_conf_template'], 'r') as template: source = template.read() # >>>>Storing DCD into shared memory on this node if USE_SHM: ramdisk = gettempdir() job['outputloc'] = ramdisk dcd_ramfile = os.path.join(ramdisk, job['name'] + '.dcd') else: job['outputloc'] = '' with open(conFile, 'w') as sysconfig: sysconfig.write(source % job) logging.info("Config written to: " + conFile) # # Run simulation in parallel # if 'parallel' in job: # numnodes = job['parallel'] # total_tasks = numnodes * 24 # cmd = 'mpiexec -n %d namd2 %s > %s' % (total_tasks, conFile, logFile) # # Run simulation single threaded # else: # cmd = 'namd2 %s > %s' % (conFile, logFile) # cmd = 'mpirun -n %d namd2 %s > %s' % (PARALLELISM, conFile, logFile) check = executecmd('module list') logging.debug('%s', check) cmd = 'namd2 +p%d %s > %s' % (PARALLELISM, conFile, logFile) # MICROBENCH #1 (file to Lustre) # logging.debug("Executing Simulation:\n %s\n", cmd) # bench = microbench() # bench.start() # stdout = executecmd(cmd) # logging.info("SIMULATION Complete! STDOUT/ERR Follows:") # bench.mark('SimExec:%s' % job['name']) # shm_contents = os.listdir('/dev/shm/out') # logging.debug('Ramdisk contents (should have files) : %s', str(shm_contents)) # shutil.copy(ramdisk + job['name'] + '.dcd', job['workdir']) # logging.info("Copy Complete to Lustre.") # bench.mark('CopyLustre:%s' % job['name']) # shutil.rmtree(ramdisk) # shm_contents = os.listdir('/dev/shm') # logging.debug('Ramdisk contents (should be empty) : %s', str(shm_contents)) # bench.show() max_expected_obs = int(job['runtime']) // int(job['dcdfreq']) # Retry upto 3 attempts if the sim fails MAX_TRY = 3 for i in range(MAX_TRY, 0, -1): min_required_obs = int(max_expected_obs * ((i-1)/(MAX_TRY))) logging.debug("Executing Simulation:\n %s\n", cmd) logging.debug('# Obs Expected to see: %d', max_expected_obs) stdout = executecmd(cmd) logging.info("SIMULATION Complete! STDOUT/ERR Follows:") # Check file for expected data if USE_SHM: traj = md.load(dcd_ramfile, top=job['pdb']) else: traj = md.load(dcdFile, top=job['pdb']) logging.info("Obs Threshold = %4d", min_required_obs) logging.info("#Obs This Traj = %4d", traj.n_frames) if traj.n_frames >= min_required_obs: logging.info('Full (enough) Sim Completed') break logging.info('Detected a failed Simulation. Retrying the same sim.') break bench.mark('simulation') # bench.mark('SimExec:%s' % job['name']) # Internal stats sim_length = self.data['sim_step_size'] * int(job['runtime']) sim_realtime = bench.delta_last() sim_run_ratio = (sim_realtime/60) / (sim_length/1000000) logging.info('##SIM_RATIO %6.3f min-per-ns-sim', sim_run_ratio) stat.collect('sim_ratio', sim_run_ratio) if USE_SHM: shm_contents = os.listdir(ramdisk) logging.debug('Ramdisk contents (should have files) : %s', str(shm_contents)) if not os.path.exists(dcd_ramfile): logging.warning("DCD FILE NOT FOUND!!!! Wait 10 seconds for sim to close it (???)") time.sleep(10) if not os.path.exists(dcd_ramfile): logging.warning("DCD STIILL FILE NOT FOUND!!!!") else: logging.info("DCD File was found") # # MICROBENCH #2 (file to Alluxio) # allux = AlluxioClient() # # copy to Aluxio FS # allux.put(ramdisk + job['name'] + '.dcd', '/') # logging.info("Copy Complete to Alluxio.") # bench.mark('CopyAllux:%s' % job['name']) # And copy to Lustre # shutil.copy(ramdisk + job['name'] + '.dcd', job['workdir']) # And copy to Lustre (usng zero-copy): if USE_SHM: # ALT: X-Mit to Cache Service and let cache write to disk lazily src = open(dcd_ramfile, 'rb') dest = open(dcdFile, 'w+b') offset = 0 dcdfilesize = os.path.getsize(dcd_ramfile) while True: sent = sendfile(dest.fileno(), src.fileno(), offset, dcdfilesize) if sent == 0: break offset += sent logging.info("Copy Complete to Lustre.") bench.mark('CopyLustre:%s' % job['name']) # TODO: Update job's metadata key = wrapKey('jc', job['name']) self.data[key]['dcd'] = dcdFile # ANALYSIS ------- --------------------------------------------------------- # ANALYSIS ALGORITHM # 1. With combined Sim-analysis: file is loaded locally from shared mem logging.debug("2. Load DCD") # Load topology and define filters topo = self.protein.pdb pdb = md.load(job['pdb']) hfilt = pdb.top.select_atom_indices('heavy') pfilt = pdb.top.select('protein') afilt = pdb.top.select_atom_indices('alpha') # Load full higher dim trajectory if traj is None: if USE_SHM: traj = md.load(dcd_ramfile, top=pdb) else: traj = md.load(dcdFile, top=pdb) logging.debug('Trajectory Loaded: %s (%s)', job['name'], str(traj)) traj_prot = traj.atom_slice(pfilt) traj_heavy = traj.atom_slice(hfilt) traj_alpha = traj.atom_slice(afilt) # Superpose Coordinates to Common Reference traj_prot.superpose(topo) # Calculate output Distance Space # Use of the second side chain atom is discussed in the ref paper on Timescape # The atom pairs and selected atoms are in the timescape module sc_pairs = side_chain_pairs(traj_prot) n_features = len(sc_pairs) # Use the CA atoms to calculate distance space # NOTE THE CONVERSION FROM NM TO ANG!!!!!! dist_space = 10 * DR.distance_space(traj_alpha) # Get the frame rate to for conversion between source and timescape # NOTE: Timescae is run at a more coarse level logging.debug('Preprocessing output for TimeScapes: terrain') traj_frame_per_ps = SIMULATE_RATIO * int(job['interval']) / 1000. # jc interval is in fs ts_frame_per_ps = int(self.data['timescape:rate']) # this value is in ps frame_rate = int(ts_frame_per_ps / traj_frame_per_ps) logging.debug('%5.2f fr/ps (Traj) %5.2f fr/ps (TS) FrameRate= %4.1f', traj_frame_per_ps, ts_frame_per_ps, frame_rate) # Execute Timescapes agility program to detect spatial-temporal basins output_prefix = os.path.join(job['workdir'], job['name']) if not self.skip_timescape: # Prep file and save locally in shm tmploc = gettempdir() ts_out = tmploc + 'traj_ts' ts_dcd = ts_out + '.dcd' ts_pdb = ts_out + '.pdb' heavy = traj_heavy.slice(range(0, traj.n_frames, frame_rate)) heavy.slice(0).save_pdb(ts_pdb) heavy.save_dcd(ts_dcd) # Gaussuan Full Width at Half-Max value affects sliding window size # ref: http://timescapes.biomachina.org/guide.pdf gmd_cut1 = int(self.data['timescape:gmd:low']) gmd_cut2 = int(self.data['timescape:gmd:hi']) gauss_wght_delta = int(self.data['timescape:delta']) # Execute timescapes' terrain.py on the pre-processed trajectory cmd = 'terrain.py %s %s %d %d %d GMD %s' %\ (ts_pdb, ts_dcd, gmd_cut1, gmd_cut2, gauss_wght_delta, output_prefix) logging.info('Running Timescapes:\n %s', cmd) stdout = executecmd(cmd) logging.info('TimeScapes COMPLETE:\n%s', stdout) # Collect and parse Timescape output logging.debug('Parsing Timescapes output') ts_parse = TimeScapeParser(job['pdb'], output_prefix, job['name'], dcd=dcdFile, traj=traj, uniqueid=False) basin_list = ts_parse.load_basins(frame_ratio=frame_rate) n_basins = len(basin_list) minima_coords = {} basin_rms = {} basins = {} new_dmu={} new_dsig={} resid_rms_delta = {} stat.collect('num_basin', n_basins) downstream_list = [] # FOR BOOTSTRAPPING USING RMSD ref_file = os.path.join(settings.workdir, self.data['pdb:ref:0']) logging.info('Loading RMSD reference frame from %s', self.data['pdb:ref:0']) refframe = md.load(ref_file) ref_alpha = refframe.atom_slice(refframe.top.select_atom_indices('alpha')) rmsd = 10*md.rmsd(traj_alpha, ref_alpha) # FOR RESIDUE RMSD res_rms_Kr = FEATURE_SET_RESID resrmsd = 10*np.array([LA.norm(i-ref_alpha.xyz[0], axis=1) for i in traj_alpha.xyz]) basin_res_rms = np.zeros(shape=(len(basin_list), traj_alpha.n_atoms)) # LOAD CENROIDS -- todo move to immut centroid = pickle.loads(self.catalog.get('centroid:ds')) basin_label_list = {} # Process each basin for i, basin in enumerate(basin_list): logging.info(' Processing basin #%2d', i) bid = basin.id downstream_list.append(bid) # Slice out minima coord & save to disk (for now) # TODO: Store in memory in cache minima_coords[bid] = traj.slice(basin.mindex) jc_filename = os.path.join(settings.datadir, 'basin_%s.pdb' % bid) minima_coords[bid].save_pdb(jc_filename) # METRIC CALCULATION a, b = basin.start, basin.end new_dmu[bid] = np.mean(dist_space[a:b], axis=0) new_dsig[bid] = np.std(dist_space[a:b], axis=0) # Collect Basin metadata basin_hash = basin.kv() basin_hash['pdbfile'] = jc_filename basin_hash['rmsd'] = np.median(rmsd[a:b]) basin_res_rms[i] = np.median(resrmsd[a:b], axis=0) # FOR EXP #16 -- Residue RMSD # Set relative time (in ns) # basin_hash['time'] = reltime_start + (a * frame_rate) / 1000 basins[bid] = basin_hash # LABEL ATEMPORAL DATA # Take every 4th frame stride = [dist_space[i] for i in range(a,b,4)] rms = [LA.norm(centroid - i, axis=1) for i in stride] label_seq = [np.argmin(i) for i in rms] basin_label_list[bid] = label_seq logging.info(' Basin Processed: #%s, %d - %d', basin_hash['traj'], basin_hash['start'], basin_hash['end']) # RMSD DELTA (RESIDUE) basin_res_rms_delta = np.array([rms_delta(i) for i in basin_res_rms.T]).T basin_rms_delta_bykey = {basin.id: basin_res_rms_delta[i] for i, basin in enumerate(basin_list)} for k in basins.keys(): basins[k]['resrms_delta'] = np.sum(basin_rms_delta_bykey[k][res_rms_Kr]) # TODO: Use Min Index as snapshot, median (or mean) DistSpace vals for each basin????? bench.mark('analysis') # BARRIER: WRITE TO CATALOG HERE -- Ensure Catalog is available # try: self.wait_catalog() # except OverlayNotAvailable as e: # logging.warning("Catalog Overlay Service is not available. Scheduling ASYNC Analysis") # FOR EXPERIMENT METRICS src_basin = self.catalog.hgetall("basin:" + job['src_basin']) with open('/home-1/[email protected]/ddc/results/{0}_prov.log'.format(settings.name), 'a') as metric_out: for i, basin in enumerate(basin_list): bid = basin.id label_seq = basin_label_list[bid] basin_metric_label = LABEL10(label_seq) metric_out.write('BASIN,%s,%s,%s,%s\n'% \ (bid, src_basin['label:10'], basin_metric_label, ''.join([str(i) for i in label_seq]))) # Update Index Synchronized data lists basin_list_sorted =sorted(basins.keys(), key=lambda x: (x.split('_')[0], int(x.split('_')[1]))) for bid in basin_list_sorted: with self.catalog.pipeline() as pipe: while True: try: logging.debug('Updating %s basin indeces and distance space', len(basins)) pipe.rpush('basin:list', bid) pipe.rpush('dspace', pickle.dumps(new_dmu[bid])) basin_index,_ = pipe.execute() break except redis.WatchError as e: logging.debug('WATCH ERROR. Someone else is writing to the catalog. Retrying...') continue basins[bid]['dsidx'] = basin_index - 1 # Update Catalog with 1 Long Atomic Transaction with self.catalog.pipeline() as pipe: while True: try: logging.debug('Update Filelist') pipe.watch(wrapKey('jc', job['name'])) file_idx = pipe.rpush('xid:filelist', job['dcd']) - 1 # HD Points logging.debug('Update HD Points') start_index = pipe.llen('xid:reference') pipe.multi() pipe.rpush('xid:reference', *[(file_idx, x) for x in range(traj.n_frames)]) pipe.set('resrms:' + job['name'], resrmsd) # Store all basin data logging.debug('Updating %s basins', len(basins)) for bid in sorted(basins.keys(), key=lambda x: (x.split('_')[0], int(x.split('_')[1]))): pipe.hmset('basin:'+bid, basins[bid]) pipe.set('minima:%s'%bid, pickle.dumps(minima_coords[bid])) for i in basin_label_list[bid]: pipe.rpush('basin:labelseq:'+bid, i) pipe.hset('anl_sequence', job['name'], mylogical_seqnum) logging.debug('Executing') pipe.execute() break except redis.WatchError as e: logging.debug('WATCH ERROR. Someone else is writing to the catalog. Retrying...') continue self.data[key]['xid:start'] = start_index self.data[key]['xid:end'] = start_index + traj.n_frames bench.mark('catalog') # ---- POST PROCESSING # Check for total data in downstream queue & Notify control manager to run # 1. Det. what's still running job_queue = slurm.currentqueue() n_simjobs = -1 # -1 to exclude self for j in job_queue: if j['name'].startswith('sw'): n_simjobs += 1 logging.info('SIM WORKER has detected %d other simulations running/pending.', len(job_queue)) remain_percent = n_simjobs / self.data['numresources'] # Fault Tolerance: ensure the pipeline persists if n_simjobs == 0: logging.info('I am the last simulation. Ensuring the controler executes.') self.catalog.set('ctl:force', 1) # Send downstream notification when less than 10% of jobs are still running if not self.skip_notify and remain_percent < .1: # Notify the control manager 'CM' self.notify('ctl') if USE_SHM: shutil.rmtree(ramdisk) shm_contents = os.listdir('/dev/shm') logging.debug('Ramdisk contents (should be empty of DDC) : %s', str(shm_contents)) # For benchmarching: bench.show() stat.show() # Return # of observations (frames) processed return downstream_list