Beispiel #1
0
    def _read_mtree(self):
        """
        Read map of progenitors to descendents.
        This is the ".AHF_mtree" file.
        """
        if self.mtree_filename is None:
            return None

        data = defaultdict(list)
        descid = descpart = None

        f = open(self.mtree_filename, "r")
        for line, offset in f_text_block(f):
            if line.startswith("#"):
                continue
            if line[0].isdigit():
                oline = line.split()
                descid = int(oline[0])
                descpart = int(oline[1])
            else:
                oline = line.split()
                data["shared"].append(int(oline[0]))
                data["prog_id"].append(int(oline[1]))
                data["prog_part"].append(int(oline[2]))
                data["desc_id"].append(descid)
                data["desc_part"].append(descpart)
        f.close()

        if not data:
            return None

        for field in data:
            data[field] = np.array(data[field])
        return data
Beispiel #2
0
    def _read_data_default(self, rfields, dtypes):
        if not rfields:
            return {}

        fi = self.arbor.field_info
        field_data = \
          self._create_field_arrays(rfields, dtypes)
        offsets = []

        self.open()
        f = self.fh
        f.seek(self._hoffset)
        file_size = self.file_size - self._hoffset
        for line, offset in f_text_block(f, file_size=file_size):
            offsets.append(offset)
            sline = line.split()
            for field in rfields:
                field_data[field].append(sline[fi[field]["column"]])
        self.close()

        for field in rfields:
            field_data[field] = \
              np.array(field_data[field], dtype=dtypes[field])

        if self.offsets is None:
            self.offsets = np.array(offsets)

        return field_data
Beispiel #3
0
    def _read_fields(self, fields, tree_nodes=None, dtypes=None):
        if dtypes is None:
            dtypes = {}

        fi = self.arbor.field_info
        hfields = [
            field for field in fields if fi[field]["column"] == "header"
        ]
        rfields = set(fields).difference(hfields)

        hfield_values = dict(
            (field, getattr(self, field)) for field in hfields)

        if tree_nodes is None:
            field_data = dict((field, []) for field in fields)
            offsets = []
            self.open()
            f = self.fh
            f.seek(self._hoffset)
            file_size = self.file_size - self._hoffset
            for line, offset in f_text_block(f, file_size=file_size):
                offsets.append(offset)
                sline = line.split()
                for field in hfields:
                    field_data[field].append(hfield_values[field])
                for field in rfields:
                    dtype = dtypes.get(field, self._default_dtype)
                    field_data[field].append(dtype(sline[fi[field]["column"]]))
            self.close()
            if self.offsets is None:
                self.offsets = np.array(offsets)

        else:
            ntrees = len(tree_nodes)
            field_data = \
              dict((field,
                    np.empty(ntrees,
                             dtype=dtypes.get(field, self._default_dtype)))
                    for field in fields)

            # fields from the file header
            for field in hfields:
                field_data[field][:] = hfield_values[field]

            # fields from the actual data
            self.open()
            f = self.fh
            for i in range(ntrees):
                f.seek(self.offsets[tree_nodes[i]._fi])
                line = f.readline()
                sline = line.split()
                for field in rfields:
                    dtype = dtypes.get(field, self._default_dtype)
                    field_data[field][i] = dtype(sline[fi[field]["column"]])
            self.close()

        return field_data
Beispiel #4
0
    def _parse_data_header(self):
        """
        Get header sizes from the two data files ending
        in .AHF_halos and .AHF_mtree.
        """

        self.open()
        fh = self.fh
        fh.seek(0, 2)
        self.file_size = fh.tell()
        fh.seek(0)
        for line, loc in f_text_block(fh):
            if not line.startswith("#"):
                loc -= len(line) + 1
                break
        self._hoffset = loc + len(line) + 1
        self.close()
Beispiel #5
0
def test_f_text_block():
    for block_size in [40, 32768]:
        lines = []
        locs = []
        f = open(R63, "r")
        for line, loc in f_text_block(f, block_size=block_size):
            lines.append(line)
            locs.append(loc)

        lines2 = []
        for loc in locs:
            f.seek(loc)
            line = f.readline()
            lines2.append(line.strip())

        for l1, l2 in zip(lines, lines2):
            assert l1 == l2
Beispiel #6
0
    def _read_fields(self, fields, tree_nodes=None, dtypes=None):
        if dtypes is None:
            dtypes = {}

        fi = self.arbor.field_info
        # Separate fields into one that come from the file header,
        # the mtree file, and the halos file.
        data_fields = defaultdict(list)
        for field in fields:
            source = fi[field]["file"]
            data_fields[source].append(field)

        hfields = data_fields.pop("header", [])
        hfield_values = dict((field, getattr(self, field))
                             for field in hfields)
        rfields = data_fields.pop("halos", [])
        tfields = data_fields.pop("mtree", [])
        # If we needs desc_ids, make sure to get IDs so
        # we can link them.
        if tfields:
            if "ID" not in rfields:
                rfields.append("ID")

        if tree_nodes is None:
            field_data = dict((field, []) for field in rfields + hfields)
            offsets = []
            self.open()
            f = self.fh
            f.seek(self._hoffset)
            file_size = self.file_size - self._hoffset
            for line, offset in f_text_block(f, file_size=file_size):
                offsets.append(offset)
                sline = line.split()
                for field in hfields:
                    field_data[field].append(hfield_values[field])
                for field in rfields:
                    dtype = dtypes.get(field, self._default_dtype)
                    field_data[field].append(dtype(sline[fi[field]["column"]]))
            self.close()
            if self.offsets is None:
                self.offsets = np.array(offsets)

        else:
            ntrees = len(tree_nodes)
            field_data = \
              dict((field, np.empty(
                  ntrees, dtype=dtypes.get(field, self._default_dtype)))
                  for field in rfields + hfields)

            # fields from the file header
            for field in hfields:
                field_data[field][:] = hfield_values[field]

            # fields from the actual data
            self.open()
            f = self.fh
            for i in range(ntrees):
                f.seek(self.offsets[tree_nodes[i]._fi])
                line = f.readline()
                sline = line.split()
                for field in rfields:
                    dtype = dtypes.get(field, self._default_dtype)
                    field_data[field][i] = dtype(sline[fi[field]["column"]])
            self.close()

        # use data from the mtree file to get descendant ids
        if tfields:
            links = self.links
            descids = np.empty(
                len(field_data["ID"]),
                dtype=dtypes.get(field, self._default_dtype))
            if self.links == -1:
                descids[:] = -1
            else:
                for i, hid in enumerate(field_data["ID"]):
                    inlink = hid == links["prog_id"]
                    if not inlink.any():
                        descids[i] = -1
                    else:
                        descids[i] = links["desc_id"][np.where(inlink)[0][0]]
            field_data["desc_id"] = descids

        for field in field_data:
            if isinstance(field_data[field], np.ndarray):
                continue
            field_data[field] = \
              np.array(field_data[field],
                       dtype=dtypes.get(field, self._default_dtype))

        return field_data
Beispiel #7
0
    def _plant_trees(self):
        if self.is_planted:
            return

        f = open(self.filename, 'r')
        f.seek(self._hoffset)
        ldata = list(
            map(lambda x: [int(x[0]),
                           int(x[1]),
                           int(x[2]), x[3],
                           len(x[0])], [
                               line.split() for line, _ in f_text_block(
                                   f, pbar_string='Reading locations')
                           ]))
        f.close()

        self._size = len(ldata)

        # It's faster to create and sort arrays and then sort ldata
        # for some reason.
        dfns = np.unique([datum[3] for datum in ldata])
        dfns.sort()
        fids = np.array([datum[1] for datum in ldata])
        fids.sort()
        ufids = np.unique(fids)
        ufids.sort()

        # Some data files may be empty and so unlisted.
        # Make sure file ids and names line up.
        data_files = [None] * (ufids.max() + 1)
        for i, fid in enumerate(ufids):
            data_files[fid] = dfns[i]
        self.data_files = \
          [None if fn is None
           else ConsistentTreesDataFile(os.path.join(self.directory, fn))
           for fn in data_files]

        ldata.sort(key=operator.itemgetter(1, 2))
        pbar = get_pbar("Loading tree roots", self._size)

        # Set end offsets for each tree.
        # We don't get them from the location file.
        lkey = len("tree ") + 3  # length of the separation line between trees
        same_file = np.diff(fids, append=fids[-1] + 1) == 0

        for i, tdata in enumerate(ldata):
            self._node_info['uid'][i] = tdata[0]
            self._node_info['_fi'][i] = tdata[1]
            self._node_info['_si'][i] = tdata[2]
            # Get end index from next tree.
            if same_file[i]:
                self._node_info['_ei'][i] = ldata[i + 1][2] - lkey - tdata[4]
            pbar.update(i + 1)
        pbar.finish()

        # Get end index for last trees in files.
        for i in np.where(~same_file)[0]:
            data_file = self.data_files[fids[i]]
            data_file.open()
            data_file.fh.seek(0, 2)
            self._node_info['_ei'][i] = data_file.fh.tell()
            data_file.close()