Ejemplo n.º 1
0
def backProjection(db, index_list):
    """Perform OFFLINE back projection function for a list of indices using
      given DB. Return a list of high dimensional points (one per index). 
      Assumes NO CACHE or DESHAW.
      """
    logging.debug('--------  BACK PROJECTION:  %d POINTS ---', len(index_list))
    # Derefernce indices to file, frame tuple:
    pipe = db.pipeline()
    for idx in index_list:
        pipe.lindex('xid:reference', int(idx))
    generated_framelist = pipe.execute()
    # Group all Generated indidces by file index
    groupbyFileIdx = {}
    for i, idx in enumerate(generated_framelist):
        try:
            file_index, frame = eval(idx)
        except TypeError as e:
            print('Bad Index:', str(idx))
            continue
        if file_index not in groupbyFileIdx:
            groupbyFileIdx[file_index] = []
        groupbyFileIdx[file_index].append(frame)
    # Dereference File index to filenames
    generated_frameMask = {}
    generated_filemap = {}
    for file_index in groupbyFileIdx.keys():
        filename = db.lindex('xid:filelist', file_index)
        if filename is None:
            logging.warning('Error file not found in catalog: %s', filename)
        if not os.path.exists(filename):
            logging.warning('DCD File not found: %s', filename)
        else:
            key = os.path.splitext(os.path.basename(filename))[0]
            generated_frameMask[key] = groupbyFileIdx[file_index]
            generated_filemap[key] = filename
    # Add high-dim points to list of source points in a trajectory
    # Optimized for parallel file loading
    logging.debug('Sequentially Loading all trajectories')
    source_points = []
    for key, framelist in generated_frameMask.items():
        traj = datareduce.load_trajectory(generated_filemap[key])
        traj = datareduce.filter_alpha(traj)
        selected_frames = traj.slice(framelist)
        source_points.extend(selected_frames.xyz)
    return np.array(source_points)
Ejemplo n.º 2
0
def backProjection(db, index_list):
      """Perform OFFLINE back projection function for a list of indices using
      given DB. Return a list of high dimensional points (one per index). 
      Assumes NO CACHE or DESHAW.
      """
      logging.debug('--------  BACK PROJECTION:  %d POINTS ---', len(index_list))
      # Derefernce indices to file, frame tuple:
      pipe = db.pipeline()
      for idx in index_list:
        pipe.lindex('xid:reference', int(idx))
      generated_framelist = pipe.execute()
      # Group all Generated indidces by file index 
      groupbyFileIdx = {}
      for i, idx in enumerate(generated_framelist):
        try:
          file_index, frame = eval(idx)
        except TypeError as e:
          print('Bad Index:', str(idx))
          continue
        if file_index not in groupbyFileIdx:
          groupbyFileIdx[file_index] = []
        groupbyFileIdx[file_index].append(frame)
      # Dereference File index to filenames
      generated_frameMask = {}
      generated_filemap = {}
      for file_index in groupbyFileIdx.keys():
        filename = db.lindex('xid:filelist', file_index)
        if filename is None:
          logging.warning('Error file not found in catalog: %s', filename)
        if not os.path.exists(filename):
          logging.warning('DCD File not found: %s', filename)
        else:
          key = os.path.splitext(os.path.basename(filename))[0]
          generated_frameMask[key] = groupbyFileIdx[file_index]
          generated_filemap[key] = filename
      # Add high-dim points to list of source points in a trajectory
      # Optimized for parallel file loading
      logging.debug('Sequentially Loading all trajectories')
      source_points = []
      for key, framelist in generated_frameMask.items():
        traj = datareduce.load_trajectory(generated_filemap[key])
        traj = datareduce.filter_alpha(traj)
        selected_frames = traj.slice(framelist)
        source_points.extend(selected_frames.xyz)
      return np.array(source_points)     
Ejemplo n.º 3
0
Archivo: ctl_mv.py Proyecto: DaMSL/ddc
    def backProjection(self, index_list):
      """Perform back projection function for a list of indices. Return a list 
      of high dimensional points (one per index). Check cache for each point and
      condolidate file I/O for all cache misses.
      """

      logging.debug('--------  BACK PROJECTION:  %d POINTS ---', len(index_list))
      bench = microbench('bkproj', self.seqNumFromID())

      # reverse_index = {index_list[i]: i for i in range(len(index_list))}

      source_points = []
      cache_miss = []

      self.trajlist_async = deque()
      
      # DEShaw topology is assumed here
      bench.start()

      # Derefernce indices to file, frame tuple:
      historical_framelist = []
      generated_framelist = []

      if self.xidreference is None:
        self.xidreference = self.catalog.lrange('xid:reference', 0, -1)

      # pipe = self.catalog.pipeline()
      logging.debug('Select Index List size = %d', len(index_list))
      for idx in index_list:
        # Negation indicates  historical index:
        index = int(idx)
        if index < 0:
          file_index, frame = deshaw.refFromIndex(-idx)
          historical_framelist.append((file_index, frame))
          # logging.debug('[BP] DEShaw:  file #%d,   frame#%d', file_index, frame)
        else:
          generated_framelist.append(self.xidreference[index])
          # pipe.lindex('xid:reference', index)

      # Load higher dim point indices from catalog
      # logging.debug('Exectuting...')  
      # start = dt.datetime.now()
      # generated_framelist = pipe.execute()
      # logging.debug('...Exectuted in %4.1f sec', ((dt.datetime.now()-start).total_seconds()))  

      # start = dt.datetime.now()
      # all_idx = self.catalog.lrange('xid:reference', 0, -1)
      # logging.debug('Got ALL pts in %4.1f sec', ((dt.datetime.now()-start).total_seconds()))  


      bench.mark('BP:LD:Redis:xidlist')


      ref = deshaw.topo_prot  # Hard coded for now

      # Group all Historical indidces by file number and add to frame Mask 
      logging.debug('Group By file idx (DEshaw)')
      historical_frameMask = {}
      for i, idx in enumerate(historical_framelist):
        file_index, frame = idx
        if file_index not in historical_frameMask:
          historical_frameMask[file_index] = []
        historical_frameMask[file_index].append(frame)

      for k, v in historical_frameMask.items():
        logging.debug('[BP] Deshaw lookups: %d, %s', k, str(v))


      # Group all Generated indidces by file index 
      logging.debug('Group By file idx (Gen data)')
      groupbyFileIdx = {}
      for i, idx in enumerate(generated_framelist):
        file_index, frame = eval(idx)
        if file_index not in groupbyFileIdx:
          groupbyFileIdx[file_index] = []
        groupbyFileIdx[file_index].append(frame)

      # Dereference File index to filenames
      logging.debug('Deref fileidx -> file names')
      generated_frameMask = {}
      generated_filemap = {}
      for file_index in groupbyFileIdx.keys():
        filename = self.catalog.lindex('xid:filelist', file_index)
        if filename is None:
          logging.error('Error file not found in catalog: %s', filename)
        else:
          key = os.path.splitext(os.path.basename(filename))[0]
          generated_frameMask[key] = groupbyFileIdx[file_index]
          generated_filemap[key] = filename
      bench.mark('BP:GroupBy:Files')

      #  Ensure the cache is alive an connected
      logging.debug('Check Cache client')
      self.cacheclient.connect()

      # Check cache for historical data points
      logging.debug('Checking cache for %d DEShaw points to back-project', len(historical_frameMask.keys()))
      for fileno, frames in historical_frameMask.items():
        # handle 1 frame case (to allow follow on multi-frame, mix cache hit/miss)
        if len(frames) == 1:
          datapt = self.cacheclient.get(fileno, frames[0], 'deshaw')
          dataptlist = [datapt] if datapt is not None else None
        else:
          dataptlist = self.cacheclient.get_many(fileno, frames, 'deshaw')
        if dataptlist is None:
          self.cache_miss += 1
          # logging.debug('[BP] Cache MISS on: %d', fileno)
          cache_miss.append(('deshaw', fileno, frames))
        else:
          self.cache_hit += 1
          # logging.debug('[BP] Cache HIT on: %d', fileno)
          source_points.extend(dataptlist)

      # Check cache for generated data points
      logging.debug('Checking cache for %d Generated points to back-project', len(generated_frameMask.keys()))
      for filename, frames in generated_frameMask.items():
        # handle 1 frame case (to allow follow on multi-frame, mix cache hit/miss)
        if len(frames) == 1:
          datapt = self.cacheclient.get(filename, frames[0], 'sim')
          dataptlist = [datapt] if datapt is not None else None
        else:
          dataptlist = self.cacheclient.get_many(filename, frames, 'sim')
        if dataptlist is None:
          self.cache_miss += 1
          # logging.debug('[BP] Cache MISS on: %s', filename)
          cache_miss.append(('sim', generated_filemap[filename], frames))
        else:
          self.cache_hit += 1
          # logging.debug('[BP] Cache HIT on: %s', filename)
          source_points.extend(dataptlist)


      # Package all cached points into one trajectory
      logging.debug('Cache hits: %d points.', len(source_points))
      if len(source_points) > 0:
        source_traj_cached = md.Trajectory(source_points, ref.top)
      else:
        source_traj_cached = None

      # All files were cached. Return back-projected points
      if len(cache_miss) == 0:
        return source_traj_cached
        
      # Add high-dim points to list of source points in a trajectory
      # Optimized for parallel file loading
      source_points_uncached = []
      logging.debug('Sequentially Loading all trajectories')
      for miss in cache_miss:
        ftype, fileno, framelist = miss
        if ftype == 'deshaw':
          pdb, dcd = deshaw.getHistoricalTrajectory_prot(fileno)
          traj = md.load(dcd, top=pdb)
        elif ftype == 'sim':
          traj = datareduce.load_trajectory(fileno)
        selected_frames = traj.slice(framelist)
        source_points_uncached.extend(selected_frames.xyz)
        bench.mark('BP:LD:File')

      logging.debug('All Uncached Data collected Total # points = %d', len(source_points_uncached))
      source_traj_uncached = md.Trajectory(np.array(source_points_uncached), ref.top)
      bench.mark('BP:Build:Traj')
      # bench.show()

      logging.info('--------  Back Projection Complete ---------------')
      if source_traj_cached is None:
        return source_traj_uncached
      else:
        return source_traj_cached.join(source_traj_uncached)
Ejemplo n.º 4
0
def backProjection(r, index_list):
        """Perform back projection function for a list of indices. Return a list 
        of high dimensional points (one per index). Check cache for each point and
        condolidate file I/O for all cache misses.
        """

        logging.debug('--------  BACK PROJECTION:  %d POINTS ---', len(index_list))

        # reverse_index = {index_list[i]: i for i in range(len(index_list))}

        source_points = []

        pipe = r.pipeline()
        for idx in index_list:
          # Negation indicates  historical index:
          index = int(idx)
          if index < 0:
            continue
          else:
            pipe.lindex('xid:reference', index)

        # Load higher dim point indices from catalog
        generated_framelist = [i for i in pipe.execute() if i is not None]

        ref = deshaw.topo_prot  # Hard coded for now

        # Group all Generated indidces by file index 
        groupbyFileIdx = {}
        for i, idx in enumerate(generated_framelist):
          file_index, frame = eval(idx)
          if file_index not in groupbyFileIdx:
            groupbyFileIdx[file_index] = []
          groupbyFileIdx[file_index].append(frame)

        # Dereference File index to filenames
        generated_frameMask = {}
        generated_filemap = {}
        for file_index in groupbyFileIdx.keys():
          filename = r.lindex('xid:filelist', file_index)
          if filename is None:
            logging.error('Error file not found in catalog: %s', filename)
          else:
            key = os.path.splitext(os.path.basename(filename))[0]
            generated_frameMask[key] = groupbyFileIdx[file_index]
            generated_filemap[key] = filename

        # Check cache for generated data points
        bplist = []
        for filename, frames in generated_frameMask.items():
          bplist.append(('sim', generated_filemap[filename], frames))

        source_points = []
        logging.debug('Sequentially Loading %d trajectories', len(bplist))
        for ftype, fileno, framelist in bplist:
          traj = datareduce.load_trajectory(fileno)
          selected_frames = traj.slice(framelist)
          source_points.extend(selected_frames.xyz)

        logging.debug('All Uncached Data collected Total # points = %d', len(source_points))
        source_traj = md.Trajectory(np.array(source_points), ref.top)

        logging.info('--------  Back Projection Complete ---------------')
        return source_traj