def _plant_trees(self): """ This is where we figure out how many trees there are, create the array to hold them, and instantiate a root tree_node for each tree. """ # open file, count trees ntrees_tot = 0 for lht in self._lhtfiles: ntrees_tot += lht.ntrees self._trees = np.empty(ntrees_tot, dtype=object) pbar = get_pbar("Loading tree roots", ntrees_tot) itot = 0 for ifile, lht in enumerate(self._lhtfiles): ntrees = lht.ntrees root_uids = lht.all_uids[lht.nhalos_before_tree] for i in range(ntrees): # get a uid (unique id) from file or assign one my_node = TreeNode(root_uids[i], arbor=self, root=True) # assign any helpful attributes, such as start # index in field arrays, etc. my_node._lht = lht my_node._index_in_lht = i self._trees[itot] = my_node itot += 1 pbar.update(itot) pbar.finish()
def _generate_tree_node(self, root_node, node_link): """ Create a non-root node in a tree. """ tree_id = node_link.tree_id if tree_id == 0: return root_node uid = root_node.uids[tree_id] node = TreeNode(uid, arbor=self, root=False) node.root = root_node node._link = node_link return node
def _grow_tree(self, tree_node, **kwargs): """ Create an array of TreeNodes hanging off the root node and assemble the tree structure. """ # skip this if not a root or if already grown if self.is_grown(tree_node): return self._setup_tree(tree_node, **kwargs) nhalos = tree_node.uids.size nodes = np.empty(nhalos, dtype=np.object) nodes[0] = tree_node for i in range(1, nhalos): nodes[i] = TreeNode(tree_node.uids[i], arbor=self) tree_node._nodes = nodes # Add tree information to nodes uidmap = {} for i, node in enumerate(nodes): node.treeid = i node.root = tree_node uidmap[tree_node.uids[i]] = i # Link ancestor/descendents # Separate loop for trees like lhalotree where descendent # can follow in order for i, node in enumerate(nodes): descid = tree_node.desc_uids[i] if descid != -1: desc = nodes[uidmap[descid]] desc.add_ancestor(node) node.descendent = desc
def _plant_trees(self): fh = h5py.File(self.filename, "r") ntrees = fh.attrs["total_trees"] uids = fh["data"]["uid"][()].astype(np.int64) self._node_io._si = fh["index"]["tree_start_index"][()] self._node_io._ei = fh["index"]["tree_end_index"][()] fh.close() self._node_io.data_files = \ [YTreeDataFile("%s_%04d%s" % (self._prefix, i, self._suffix)) for i in range(self._node_io._si.size)] self._trees = np.empty(ntrees, dtype=np.object) for i in range(ntrees): my_node = TreeNode(uids[i], arbor=self, root=True) my_node._ai = i self._trees[i] = my_node
def _generate_root_node(self, index): """ Create a root node given its index in the array of uids. """ args = tuple(self._node_info[attr][index] for attr in self._node_con_attrs) my_node = TreeNode(*args, arbor=self, root=True) my_node._arbor_index = index for attr in self._node_io_attrs: setattr(my_node, attr, self._node_info[attr][index]) for attr in self._node_too_attrs: val = self._node_info[attr][index] if val != -1: setattr(my_node, attr, self._node_info[attr][index]) return my_node
def _plant_trees(self): fh = h5py.File(self.filename, "r") uids = fh["data"]["uid"][()].astype(np.int64) descids = fh["data"]["desc_id"][()].astype(np.int64) treeids = fh["data"]["tree_id"][()].astype(np.int64) fh.close() root_filter = descids == -1 roots = uids[root_filter] ntrees = roots.size self._trees = np.empty(ntrees, dtype=np.object) for i, root in enumerate(roots): my_node = TreeNode(root, arbor=self, root=True) my_node._fi = np.where(root == treeids)[0] my_node._tree_size = my_node._fi.size self._trees[i] = my_node self._field_cache = {} self._field_cache["uid"] = uids self._field_cache["desc_id"] = descids self._ri = np.where(root_filter)[0]
def _plant_trees(self): lkey = len("tree ")+1 block_size = 32768 data_file = self._node_io.data_file data_file.open() data_file.fh.seek(0, 2) file_size = data_file.fh.tell() pbar = get_pbar("Loading tree roots", file_size) data_file.fh.seek(self._hoffset) self._trees = np.empty(self._ntrees, dtype=np.object) offset = self._hoffset itree = 0 nblocks = np.ceil(float(file_size-self._hoffset) / block_size).astype(np.int64) for ib in range(nblocks): my_block = min(block_size, file_size - offset) if my_block <= 0: break buff = data_file.fh.read(my_block) lihash = -1 for ih in range(buff.count("#")): ihash = buff.find("#", lihash+1) inl = buff.find("\n", ihash+1) if inl < 0: buff += data_file.fh.readline() inl = len(buff) uid = int(buff[ihash+lkey:inl]) lihash = ihash my_node = TreeNode(uid, arbor=self, root=True) my_node._si = offset + inl + 1 self._trees[itree] = my_node if itree > 0: self._trees[itree-1]._ei = offset + ihash - 1 itree += 1 offset = data_file.fh.tell() pbar.update(offset) self._trees[-1]._ei = offset data_file.close() pbar.finish()
def _plant_trees(self): """ Construct all trees. Since nodes are spread out over multiple files, we will plant all trees and create all ancestor/descendent links. The links will be held by the nodes themselves and we will not store the nodes in an array until _setup_tree is called. """ if self.is_planted: return # this can be called once with the list, but fields are # not guaranteed to be returned in order. if self._has_uids: id_fields = ["uid", "desc_uid"] else: id_fields = ["halo_id", "desc_id"] fields = \ [self.field_info.resolve_field_dependencies([field])[0][0] for field in id_fields] halo_id_f, desc_id_f = fields dtypes = dict((field, np.int64) for field in fields) uid = 0 trees = [] nfiles = len(self.data_files) descs = lastids = None pbar = get_pbar("Planting trees", len(self.data_files)) for i, dfl in enumerate(self.data_files): if not isinstance(dfl, list): dfl = [dfl] batches = [] bsize = [] hids = [] ancs = defaultdict(list) for data_file in dfl: data = data_file._read_fields(fields, dtypes=dtypes) nhalos = len(data[halo_id_f]) batch = np.empty(nhalos, dtype=object) for it in range(nhalos): descid = data[desc_id_f][it] if self._has_uids: my_uid = data[halo_id_f][it] else: my_uid = uid root = i == 0 or descid == -1 # The data says a descendent exists, but it's not there. # This shouldn't happen, but it does sometimes. if not root and descid not in lastids: root = True descid = data[desc_id_f][it] = -1 tree_node = TreeNode(my_uid, arbor=self, root=root) tree_node._fi = it tree_node.data_file = data_file batch[it] = tree_node if root: trees.append(tree_node) else: ancs[descid].append(tree_node) uid += 1 data_file.trees = batch batches.append(batch) bsize.append(batch.size) hids.append(data[halo_id_f]) if i > 0: for descid, ancestors in ancs.items(): # this will not be fast descendent = descs[descid == lastids][0] descendent._ancestors = ancestors for ancestor in ancestors: ancestor._descendent = descendent if i < nfiles - 1: descs = np.empty(sum(bsize), dtype=object) lastids = np.empty(descs.size, dtype=np.int64) ib = 0 for batch, hid, bs in zip(batches, hids, bsize): descs[ib:ib+bs] = batch lastids[ib:ib+bs] = hid ib += bs pbar.update(i+1) pbar.finish() self._trees = np.array(trees) self._size = self._trees.size