def index(self, persist=True): """(re)builds the interval tree index into the interval list you have stored. .. note: interval tree performance will not be obtained unless the user *explicitly* calls this method. This is typically called after a set of intervals has been added. :param persist: permanently store the index on disk :type persist: bool """ refcountdict = None tbl = self.hdf_group._f_getChild(INTERVAL_TABLE) tree_group = self.hdf_group._f_getChild(INTERVAL_INDEX_GROUP) for rname, idx in self.indexes.iteritems(): # see if index needs to be generated if (idx.tree is None) or idx.dirty: # count number of items for each reference if refcountdict is None: refcountdict = self._get_ref_count_dict() # iterate and insert all intervals tree = IntervalTree(refcountdict[rname]) cur_id = 1 for row in tbl.where('ref == rname'): start = row[START_COL_NAME] end = row[END_COL_NAME] tree.insert(cur_id, start, end, row.nrow) cur_id += 1 # update index idx.tree = tree idx.dirty = False # save for future use if persist: tree.tohdf(tree_group, rname)
def _init_index(self): if not INTERVAL_INDEX_GROUP in self.hdf_group: h5file = self._get_hdf_file() h5file.createGroup(self.hdf_group, INTERVAL_INDEX_GROUP) parentgroup = self.hdf_group._f_getChild(INTERVAL_INDEX_GROUP) self.indexes = {} for rname in self.get_rnames(): idx = self._Index() # load index if it exists if rname in parentgroup: tree = IntervalTree.fromhdf(parentgroup._f_getChild(rname)) idx.tree = tree self.indexes[rname] = idx