Ejemplo n.º 1
0
    def readall(self):
        """
        Read all available data, returning a dictionary
        
        This provides ``Position`` and optionally ``Velocity`` columns
        """
        from nbodykit.ndarray import extend_dtype

        # read in the plain text file as a recarray
        kwargs = {}
        kwargs['comments'] = '#'
        kwargs['names'] = self.names
        kwargs['usecols'] = self.usecols
        data = numpy.recfromtxt(self.path, **kwargs)
        nobj = len(data)

        # copy the data
        new_dtypes = [('Position', ('f4', len(self.poscols)))]
        if self.velcols is not None or self.rsd is not None:
            new_dtypes += [('Velocity', ('f4', len(self.velcols)))]
        data = extend_dtype(data, new_dtypes)

        # get position and velocity, if we have it
        pos = numpy.vstack(data[k] for k in self.poscols).T.astype('f4')
        pos *= self.posf
        if self.velcols is not None or self.rsd is not None:
            vel = numpy.vstack(data[k] for k in self.velcols).T.astype('f4')
            vel *= self.velf
            data['Velocity'] = Velocity

        # do RSD
        if self.rsd is not None:
            dir = "xyz".index(self.rsd)
            pos[:, dir] += vel[:, dir]
            pos[:, dir] %= self.BoxSize[dir]
        data['Position'] = pos

        # select based on input conditions
        if self.select is not None:
            mask = self.select.get_mask(data)
            data = data[mask]
        self.logger.info("total number of objects selected is %d / %d" %
                         (len(data), nobj))

        toret = {}
        for name in data.dtype.names:
            toret[name] = data[name].copy()

        return toret
Ejemplo n.º 2
0
    def readall(self):
        """
        Read all available data, returning a dictionary
        
        This provides ``Position`` and optionally ``Velocity`` columns
        """
        from nbodykit.ndarray import extend_dtype

        # read in the plain text file as a recarray
        kwargs = {}
        kwargs["comments"] = "#"
        kwargs["names"] = self.names
        kwargs["usecols"] = self.usecols
        data = numpy.recfromtxt(self.path, **kwargs)
        nobj = len(data)

        # copy the data
        new_dtypes = [("Position", ("f4", len(self.poscols)))]
        if self.velcols is not None or self.rsd is not None:
            new_dtypes += [("Velocity", ("f4", len(self.velcols)))]
        data = extend_dtype(data, new_dtypes)

        # get position and velocity, if we have it
        pos = numpy.vstack(data[k] for k in self.poscols).T.astype("f4")
        pos *= self.posf
        if self.velcols is not None or self.rsd is not None:
            vel = numpy.vstack(data[k] for k in self.velcols).T.astype("f4")
            vel *= self.velf
            data["Velocity"] = Velocity

        # do RSD
        if self.rsd is not None:
            dir = "xyz".index(self.rsd)
            pos[:, dir] += vel[:, dir]
            pos[:, dir] %= self.BoxSize[dir]
        data["Position"] = pos

        # select based on input conditions
        if self.select is not None:
            mask = self.select.get_mask(data)
            data = data[mask]
        self.logger.info("total number of objects selected is %d / %d" % (len(data), nobj))

        toret = {}
        for name in data.dtype.names:
            toret[name] = data[name].copy()

        return toret
Ejemplo n.º 3
0
    def readall(self):
        """
        Read all available data, returning a dictionary
        
        This provides ``Position`` and optionally ``Velocity`` columns, 
        as well as any columns listed in ``names``
        """
        from nbodykit.ndarray import extend_dtype
        try:
            import pandas as pd
        except:
            name = self.__class__.__name__
            raise ImportError("pandas must be installed to use %s" % name)

        if self.ftype == 'auto':
            if self.path.endswith('.hdf5'):
                self.ftype = 'hdf5'
            else:
                self.ftype = 'text'

        # read in the hdf5 file using pandas
        if self.ftype == 'hdf5':
            data = pd.read_hdf(self.path, self.names[0], columns=self.usecols)
        # read in the plain text file using pandas
        elif self.ftype == 'text':
            kwargs = {}
            kwargs['comment'] = '#'
            kwargs['names'] = self.names
            kwargs['header'] = None
            kwargs['engine'] = 'c'
            kwargs['delim_whitespace'] = True
            kwargs['usecols'] = self.usecols
            data = pd.read_csv(self.path, **kwargs)

        # make sure 'Position' or 'Velocity' aren't columns already
        if 'Position' in data.columns:
            raise ValueError(
                "'Position' should not be a named column in input data")
        if 'Velocity' in data.columns:
            raise ValueError(
                "'Velocity' should not be a named column in input data")

        # objects read initially
        nobj = len(data)

        # copy the data
        new_dtypes = [('Position', ('f4', len(self.poscols)))]
        if self.velcols is not None or self.rsd is not None:
            new_dtypes += [('Velocity', ('f4', len(self.velcols)))]
        toret = extend_dtype(data.to_records(), new_dtypes)

        # get position and velocity, if we have it
        pos = data[self.poscols].values.astype('f4')
        pos *= self.posf
        if self.velcols is not None or self.rsd is not None:
            vel = data[self.velcols].values.astype('f4')
            vel *= self.velf
            toret['Velocity'] = vel

        del data

        # shift position by RSD
        if self.rsd is not None:
            dir = "xyz".index(self.rsd)
            pos[:, dir] += vel[:, dir]
            pos[:, dir] %= self.BoxSize[dir]
        toret['Position'] = pos

        # select based on input conditions
        if self.select is not None:
            mask = self.select.get_mask(toret)
            toret = toret[mask]
        self.logger.info("total number of objects selected is %d / %d" %
                         (len(toret), nobj))

        toret_dict = {}
        for name in toret.dtype.names:
            toret_dict[name] = toret[name].copy()

        return toret_dict
Ejemplo n.º 4
0
    def readall(self):
        """
        Read all available data, returning a dictionary
        
        This provides ``Position`` and optionally ``Velocity`` columns, 
        as well as any columns listed in ``names``
        """
        from nbodykit.ndarray import extend_dtype
        try:
            import pandas as pd
        except:
            name = self.__class__.__name__
            raise ImportError("pandas must be installed to use %s" %name)
                
        if self.ftype == 'auto':
            if self.path.endswith('.hdf5'):
                self.ftype = 'hdf5'
            else: 
                self.ftype = 'text'
                
        # read in the hdf5 file using pandas
        if self.ftype == 'hdf5':
            data = pd.read_hdf(self.path, self.names[0], columns=self.usecols)
        # read in the plain text file using pandas
        elif self.ftype == 'text':
            kwargs = {}
            kwargs['comment'] = '#'
            kwargs['names'] = self.names
            kwargs['header'] = None
            kwargs['engine'] = 'c'
            kwargs['delim_whitespace'] = True
            kwargs['usecols'] = self.usecols
            data = pd.read_csv(self.path, **kwargs)

        # make sure 'Position' or 'Velocity' aren't columns already
        if 'Position' in data.columns:
            raise ValueError("'Position' should not be a named column in input data")
        if 'Velocity' in data.columns:
            raise ValueError("'Velocity' should not be a named column in input data")
            
        # objects read initially
        nobj = len(data)

        # copy the data
        new_dtypes = [('Position', ('f4', len(self.poscols)))]
        if self.velcols is not None or self.rsd is not None:
            new_dtypes += [('Velocity', ('f4', len(self.velcols)))]
        toret = extend_dtype(data.to_records(), new_dtypes)
                    
        # get position and velocity, if we have it
        pos = data[self.poscols].values.astype('f4')
        pos *= self.posf
        if self.velcols is not None or self.rsd is not None:
            vel = data[self.velcols].values.astype('f4')
            vel *= self.velf
            toret['Velocity'] = vel

        del data

        # shift position by RSD
        if self.rsd is not None:
            dir = "xyz".index(self.rsd)
            pos[:, dir] += vel[:, dir]
            pos[:, dir] %= self.BoxSize[dir]
        toret['Position'] = pos
        
        
        # select based on input conditions
        if self.select is not None:
            mask = self.select.get_mask(toret)
            toret = toret[mask]
        self.logger.info("total number of objects selected is %d / %d" % (len(toret), nobj))

        toret_dict = {}
        for name in toret.dtype.names:
            toret_dict[name] = toret[name].copy()
        
        return toret_dict