Ejemplos de BigData en Python, ejemplos de bigfile.BigData en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: fastpm.py Proyecto: slhale/LensTools

    def getPositions(self, first=None, last=None, save=True):

        #Get data pointer
        data = bigfile.BigData(self.fp)

        #Read in positions in Mpc/h
        if (first is None) or (last is None):
            positions = data["Position"][:] * self.Mpc_over_h
            aemit = data["Aemit"][:]
        else:
            positions = data["Position"][first:last] * self.Mpc_over_h
            aemit = data["Aemit"][first:last]

        #Enforce periodic boundary conditions
        for n in (0, 1):
            positions[:, n][positions[:, n] < 0] += self.header["box_size"]
            positions[:, n][positions[:, n] >
                            self.header["box_size"]] -= self.header["box_size"]

        #Maybe save
        if save:
            self.positions = positions
            self.aemit = aemit

        #Initialize useless attributes to None
        self.weights = None
        self.virial_radius = None
        self.concentration = None

        #Return
        return positions

Ejemplo n.º 2

0

Mostrar archivo

Archivo: catalogue.py Proyecto: qmxp55/imaginglss

    def __init__(self, cachedir, aliases):
        import bigfile
        self.cachedir = cachedir

        with bigfile.BigFile(cachedir, create=True) as bf:
            bd = bigfile.BigData(bf)

            self._size = bd.size
            self._dtype = bd.dtype

        self.aliases = dict([(new, (old, transform))
                             for old, new, transform in aliases])
        ColumnStore.__init__(self)

Ejemplo n.º 3

0

Mostrar archivo

Archivo: bigfile.py Proyecto: nickhand/nbodykit-docs-bare

    def read(self, columns, start, stop, step=1):
        """
        Read the specified column(s) over the given range,
        as a dictionary

        'start' and 'stop' should be between 0 and :attr:`size`,
        which is the total size of the binary file (in particles)
        """
        import bigfile
        if isinstance(columns, string_types): columns = [columns]

        with bigfile.BigFile(filename=self.path)[self.dataset] as f:
            ds = bigfile.BigData(f, columns)
            return ds[start:stop][::step]

Ejemplo n.º 4

0

Mostrar archivo

    def setLimits(self):

        if self.pool is None:
            self._first = None
            self._last = None
        else:

            #Divide equally between tasks
            Nt, Np = self.pool.size + 1, bigfile.BigData(self.fp).size
            part_per_task = Np // Nt
            self._first = part_per_task * self.pool.rank
            self._last = part_per_task * (self.pool.rank + 1)

            #Add the remainder to the last task
            if (Np % Nt) and (self.pool.rank == Nt - 1):
                self._last += Np % Nt

Ejemplo n.º 5

0

Mostrar archivo

Archivo: bigfile.py Proyecto: nickhand/nbodykit-docs-bare

    def __init__(self, path, exclude=None, header=Automatic, dataset='./'):

        if not dataset.endswith('/'): dataset = dataset + '/'

        import bigfile

        self.dataset = dataset
        self.path = path

        # store the attributes
        self.attrs = {}

        # the file path
        with bigfile.BigFile(filename=path) as ff:
            columns = ff[self.dataset].blocks
            if header is Automatic:
                for header in ['Header', 'header', './']:
                    if header in columns: break

            if exclude is None:
                exclude = [header]

            columns = list(set(columns) - set(exclude))

            ds = bigfile.BigData(ff[self.dataset], columns)

            # set the data type and size
            self.dtype = ds.dtype
            self.size = ds.size

            header = ff[header]
            attrs = header.attrs

            # copy over the attrs
            for k in attrs.keys():

                # load a JSON representation if str starts with json:://
                if isinstance(attrs[k],
                              string_types) and attrs[k].startswith('json://'):
                    self.attrs[k] = json.loads(attrs[k][7:], cls=JSONDecoder)
                # copy over an array
                else:
                    self.attrs[k] = numpy.array(attrs[k], copy=True)

Ejemplo n.º 6

0

Mostrar archivo

Archivo: HaloLabel.py Proyecto: mjvakili/nbodykit

    def parallel_read(self, columns, full=False):
        f = bigfile.BigFileMPI(self.comm, self.path)
        readcolumns = set(columns)

        # remove columns not in the file (None will be returned)
        for col in list(readcolumns):
            if col not in f:
                readcolumns.remove(col)

        done = False
        i = 0
        while not numpy.all(self.comm.allgather(done)):
            ret = []
            dataset = bigfile.BigData(f, readcolumns)

            Ntot = dataset.size
            start = self.comm.rank * Ntot // self.comm.size
            end = (self.comm.rank + 1) * Ntot // self.comm.size

            if not full:
                bunchstart = start + i * self.bunchsize
                bunchend = start + (i + 1) * self.bunchsize
                if bunchend > end: bunchend = end
                if bunchstart > end: bunchstart = end
            else:
                bunchstart = start
                bunchend = end

            if bunchend == end:
                done = True

            P = {}

            for column in readcolumns:
                data = dataset[column][bunchstart:bunchend]
                P[column] = data

            i = i + 1
            yield [P.get(column, None) for column in columns]

Ejemplo n.º 7

0

Mostrar archivo

Archivo: FastPM.py Proyecto: mjvakili/nbodykit

    def parallel_read(self, columns, full=False):
        f = bigfile.BigFileMPI(self.comm, self.path)
        try:
            header = f['.']
        except:
            header = f['header']
        boxsize = header.attrs['BoxSize'][0]
        RSD = header.attrs['RSDFactor'][0]
        if boxsize != self.BoxSize[0]:
            raise ValueError("Box size mismatch, expecting %g" % boxsize)

        readcolumns = set(columns)
        if self.rsd is not None:
            readcolumns = set(columns + ['Velocity'])
        if 'InitialPosition' in columns:
            readcolumns.add('ID')
            readcolumns.remove('InitialPosition')

        if 'Mass' in readcolumns:
            readcolumns.remove('Mass')

        # remove columns not in the file (None will be returned)
        for col in list(readcolumns):
            if col not in f:
                readcolumns.remove(col)

        done = False
        i = 0
        while not numpy.all(self.comm.allgather(done)):
            ret = []
            dataset = bigfile.BigData(f, readcolumns)

            Ntot = dataset.size
            start = self.comm.rank * Ntot // self.comm.size
            end = (self.comm.rank + 1) * Ntot // self.comm.size

            if not full:
                bunchstart = start + i * self.bunchsize
                bunchend = start + (i + 1) * self.bunchsize
                if bunchend > end: bunchend = end
                if bunchstart > end: bunchstart = end
            else:
                bunchstart = start
                bunchend = end

            if bunchend == end:
                done = True

            P = {}

            for column in readcolumns:
                data = dataset[column][bunchstart:bunchend]
                P[column] = data

            if 'Velocity' in P:
                P['Velocity'] *= RSD

            if 'Mass' in columns:
                P['Mass'] = numpy.ones(bunchend - bunchstart,
                                       dtype='u1') * self.M0

            if self.rsd is not None:
                dir = "xyz".index(self.rsd)
                P['Position'][:, dir] += P['Velocity'][:, dir]
                P['Position'][:, dir] %= self.BoxSize[dir]
            if 'InitialPosition' in columns:
                P['InitialPosition'] = numpy.empty((len(P['ID']), 3), 'f4')
                nc = int(self.size**(1. / 3) + 0.5)
                id = P['ID'].copy()
                for nc in range(nc - 10, nc + 10):
                    if nc**3 == self.size: break
                for d in [2, 1, 0]:
                    P['InitialPosition'][:, d] = id % nc
                    id[:] //= nc
                cellsize = self.BoxSize[0] / nc
                P['InitialPosition'][:] += 0.5
                P['InitialPosition'][:] *= cellsize

            i = i + 1
            yield [P.get(column, None) for column in columns]