Ejemplo n.º 1
0
def open(path, mode="r", prefix="object", delimiter="-"):
    def explode(x):
        parsed = urlparse(x)
        if parsed.scheme == "file" or len(parsed.scheme) == 0:
            return sorted(
                glob.glob(os.path.expanduser(parsed.netloc + parsed.path)))
        else:
            raise ValueError("URL scheme '{0}' not recognized".format(
                parsed.scheme))

    if isinstance(path, basestring):
        paths = explode(path)
    else:
        paths = [y for x in path for y in explode(x)]

    if len(paths) == 0:
        raise ValueError("no matching filenames")

    npzfile = numpy.load(paths[0])
    try:
        datasetarray = npzfile[prefix]
        assert datasetarray.dtype == numpy.dtype(numpy.uint8) and len(
            datasetarray.shape) == 1
        dataset = oamap.schema.Dataset.fromjsonstring(datasetarray.tostring())
    except:
        schema = oamap.inference.fromnames(npzfile.keys(),
                                           prefix=prefix,
                                           delimiter=delimiter)
    else:
        schema = dataset.schema

    generator = schema.generator()
    listofarrays = [NumpyFileArrays(paths[0], npzfile)
                    ] + [NumpyFileArrays(x, None) for x in paths[1:]]
    return oamap.proxy.PartitionedListProxy(generator, listofarrays)
Ejemplo n.º 2
0
def _proxy(tree, namespace=None, extension=oamap.extension.common):
    if namespace is None:
        namespace = "root.cmsnano({0})".format(repr(path))

    schema = _schema(tree, namespace=namespace)
    generator = schema.generator(extension=extension)

    return oamap.proxy.ListProxy(
        generator,
        oamap.backend.root.ROOTArrays(
            tree,
            oamap.backend.root.ROOTBackend([tree._context.sourcepath],
                                           tree._context.treename, namespace)),
        generator._newcache(), 0, 1, tree.numentries)
Ejemplo n.º 3
0
def _proxy(tree, namespace="", extension=oamap.extension.common):
    schema = _schema(tree, namespace=namespace)
    generator = schema.generator(extension=extension)
    return oamap.proxy.ListProxy(generator, ROOTArrays(tree),
                                 generator._newcache(), 0, 1, tree.numentries)
Ejemplo n.º 4
0
def _proxy(tree, namespace="", extension=oamap.extension.common):
    schema = _schema(tree, namespace=namespace)
    generator = schema.generator(extension=extension)
    return oamap.proxy.ListProxy(generator, ROOTArrays(tree, ROOTBackend([tree._context.sourcepath], tree._context.treename, namespace)), generator._newcache(), 0, 1, tree.numentries)
Ejemplo n.º 5
0
    def fromdata(self, key, value, schema=None, inferencelimit=None, partitionlimit=None, pointer_fromequal=False):
        if schema is None:
            schema = oamap.inference.fromdata(value, limit=inferencelimit)
        if partitionlimit is not None:
            if not (isinstance(schema, oamap.schema.List) and not schema.nullable):
                raise TypeError("if limit is not None, the schema must be a partitionable List")
            if not callable(partitionlimit):
                raise TypeError("partitionlimit must be None or a callable function")

        if isinstance(schema, oamap.schema.Dataset):
            dataset = schema
            schema = dataset.schema
        else:
            dataset = oamap.schema.Dataset(schema, prefix=key)

        if dataset.prefix is None:
            prefix = key
        else:
            prefix = dataset.prefix

        if dataset.delimiter is None:
            delimiter = "-"
        else:
            delimiter = dataset.delimiter

        if dataset.extension is None:
            extension = import_module("oamap.extension.common")
        elif isinstance(dataset.extension, basestring):
            extension = import_module(dataset.extension)
        else:
            extension = [import_module(x) for x in dataset.extension]

        generator = schema.generator(prefix=prefix, delimiter=delimiter, extension=extension)

        if partitionlimit is None:
            arrays = oamap.fill.fromdata(value, generator=generator, pointer_fromequal=pointer_fromequal)

            if key in self:
                del self[key]

            if dataset.partitioning is None:
                for n, x in arrays.items():
                    self.dbm[_asbytes(self.ARRAY + n)] = x.tostring()

            else:
                partitionlookup = dataset.partitioning.empty_partitionlookup(delimiter)
                partitionlookup.append(arrays[generator.stops][0] - arrays[generator.starts][0], arrays.keys())

                for n, x in arrays.items():
                    self.dbm[_asbytes(self.ARRAY + partitionlookup.id2name(n, 0))] = x.tostring()
                self.dbm[_asbytes(self.ARRAY + dataset.partitioning.key)] = numpy.array(partitionlookup).tostring()

            self.dbm[_asbytes(self.DATASET + key)] = dataset.tojsonstring()

        else:
            dataset = dataset.copy(partitioning=dataset._get_partitioning(prefix, delimiter))

            partitionlookup = dataset.partitioning.empty_partitionlookup(delimiter)

            values = iter(value)
            if key in self:
                del self[key]

            self.dbm[_asbytes(self.ARRAY + key)] = numpy.array(partitionlookup).tostring()
            self.dbm[_asbytes(self.DATASET + key)] = dataset.tojsonstring()

            for partitionid, (numentries, arrays) in enumerate(oamap.fill.fromiterdata(values, generator=generator, limit=partitionlimit, pointer_fromequal=pointer_fromequal)):
                partitionlookup.append(numentries, arrays.keys())

                for n, x in arrays.items():
                    self.dbm[_asbytes(self.ARRAY + partitionlookup.id2name(n, partitionid))] = x.tostring()
                self.dbm[_asbytes(self.ARRAY + dataset.partitioning.key)] = numpy.array(partitionlookup).tostring()
Ejemplo n.º 6
0
    def fromdata(self, name, schema, *partitions, **opts):
        try:
            pointer_fromequal = opts.pop("pointer_fromequal", False)
        except KeyError:
            pass
        try:
            namespace = opts.pop("namespace", self._namespace)
        except KeyError:
            pass
        try:
            extension = opts.pop("extension", None)
        except KeyError:
            pass
        try:
            packing = opts.pop("packing", None)
        except KeyError:
            pass
        try:
            doc = opts.pop("doc", None)
        except KeyError:
            pass
        try:
            metadata = opts.pop("metadata", None)
        except KeyError:
            pass
        if len(opts) > 0:
            raise TypeError("unrecognized options: {0}".format(" ".join(opts)))

        if namespace not in self._backends:
            self[namespace] = DictBackend()
        backend = self[namespace]

        def setnamespace(node):
            node.namespace = namespace
            return node

        schema = schema.replace(setnamespace)

        generator = schema.generator(prefix=backend.prefix(name),
                                     delimiter=backend.delimiter(),
                                     packing=packing)
        generator._requireall()
        roles = generator._togetall({}, generator._newcache(), True, set())

        if isinstance(schema, (oamap.schema.Record, oamap.schema.Tuple)):
            if len(partitions) != 1:
                raise TypeError(
                    "only lists can have more or less than one partition")
            data = generator.fromdata(partitions[0])
            roles2arrays = dict((x, data._arrays[str(x)]) for x in roles)

            active = backend.instantiate(0)
            if hasattr(active, "putall"):
                active.putall(roles2arrays)
            else:
                for n, x in roles2arrays.items():
                    active[str(n)] = x

            out = oamap.dataset.Data(name,
                                     generator.namedschema(),
                                     self._backends,
                                     self._executor,
                                     extension=extension,
                                     packing=packing,
                                     doc=doc,
                                     metadata=metadata)

        elif isinstance(schema, oamap.schema.List):
            offsets = [0]
            for partitionid, partition in enumerate(partitions):
                data = generator.fromdata(partition)
                roles2arrays = dict((x, data._arrays[str(x)]) for x in roles)
                startsrole = oamap.generator.StartsRole(
                    generator.starts, generator.namespace, None)
                stopsrole = oamap.generator.StopsRole(generator.stops,
                                                      generator.namespace,
                                                      None)
                startsrole.stops = stopsrole
                stopsrole.starts = startsrole
                if schema.nullable:
                    maskrole = oamap.generator.MaskRole(
                        generator.mask, generator.namespace, {
                            startsrole: roles2arrays[startsrole],
                            stopsrole: roles2arrays[stopsrole]
                        })
                del roles2arrays[startsrole]
                del roles2arrays[stopsrole]
                if schema.nullable:
                    del roles2arrays[maskrole]

                active = backend.instantiate(partitionid)
                if hasattr(active, "putall"):
                    active.putall(roles2arrays)
                else:
                    for n, x in roles2arrays.items():
                        active[str(n)] = x

                offsets.append(offsets[-1] + len(data))

            out = oamap.dataset.Dataset(name,
                                        generator.namedschema(),
                                        self._backends,
                                        self._executor,
                                        offsets,
                                        extension=extension,
                                        packing=packing,
                                        doc=doc,
                                        metadata=metadata)

        else:
            raise TypeError(
                "can only create datasets from proxy types (list, records, tuples)"
            )

        self.put(name, out, namespace=namespace)
Ejemplo n.º 7
0
def savez(file,
          value,
          schema=None,
          prefix="object",
          delimiter=None,
          extension=None,
          saveschema=True,
          compressed=False,
          inferencelimit=None,
          pointer_fromequal=False):
    if schema is None:
        if isinstance(value, oamap.proxy.Proxy):
            schema = value._generator.schema
        else:
            schema = oamap.inference.fromdata(value, limit=inferencelimit)

    if isinstance(schema, oamap.schema.Dataset):
        dataset = schema
        schema = dataset.schema
    else:
        dataset = oamap.schema.Dataset(schema,
                                       prefix=prefix,
                                       delimiter="-",
                                       extension=extension)

    if dataset.partitioning is not None:
        raise ValueError("npz files do not support partitioning")

    if delimiter is None:
        if dataset.delimiter is None:
            delimiter = "-"
        else:
            delimiter = dataset.delimiter

    if extension is None:
        if dataset.extension is None:
            extension = import_module("oamap.extension.common")
        elif isinstance(dataset.extension, basestring):
            extension = import_module(dataset.extension)
        else:
            extension = [import_module(x) for x in dataset.extension]

    generator = schema.generator(prefix=prefix,
                                 delimiter=delimiter,
                                 extension=extension)

    if isinstance(value, oamap.proxy.Proxy) and hasattr(
            value._arrays, "items"):
        arrays = dict(value._arrays.items())
    elif isinstance(value, oamap.proxy.Proxy) and hasattr(
            value._arrays, "keys"):
        arrays = dict((n, value._arrays[n]) for n in value._arrays.keys())
    elif isinstance(value, oamap.proxy.Proxy) and hasattr(
            value._arrays, "__iter__"):
        arrays = dict((n, value._arrays[n]) for n in value._arrays)
    else:
        arrays = oamap.fill.fromdata(value,
                                     generator=generator,
                                     pointer_fromequal=pointer_fromequal)

    if saveschema and prefix not in arrays:
        arrays[prefix] = numpy.frombuffer(dataset.tojsonstring(),
                                          dtype=numpy.uint8)

    if compressed:
        numpy.savez_compressed(file, **arrays)
    else:
        numpy.savez(file, **arrays)
Ejemplo n.º 8
0
        def fromdata(self,
                     key,
                     value,
                     schema=None,
                     inferencelimit=None,
                     partitionlimit=None,
                     pointer_fromequal=False):
            if schema is None:
                schema = oamap.inference.fromdata(value, limit=inferencelimit)
            if partitionlimit is not None:
                if not (isinstance(schema, oamap.schema.List)
                        and not schema.nullable):
                    raise TypeError(
                        "if limit is not None, the schema must be a partitionable List"
                    )
                if not callable(partitionlimit):
                    raise TypeError(
                        "partitionlimit must be None or a callable function")

            if isinstance(schema, oamap.schema.Dataset):
                dataset = schema
                schema = dataset.schema
            else:
                dataset = oamap.schema.Dataset(schema, prefix=key)

            if dataset.prefix is None:
                prefix = key
            else:
                prefix = dataset.prefix

            if dataset.delimiter is None:
                delimiter = "-"
            else:
                delimiter = dataset.delimiter

            if dataset.extension is None:
                extension = import_module("oamap.extension.common")
            elif isinstance(dataset.extension, basestring):
                extension = import_module(dataset.extension)
            else:
                extension = [import_module(x) for x in dataset.extension]

            generator = schema.generator(prefix=prefix,
                                         delimiter=delimiter,
                                         extension=extension)

            if partitionlimit is None:
                arrays = oamap.fill.fromdata(
                    value,
                    generator=generator,
                    pointer_fromequal=pointer_fromequal)
                for n in arrays:
                    if super(OAMapGroup, self).__contains__(n):
                        raise RuntimeError(
                            "cannot assign to {0} (dataset exists)".format(
                                repr(n)))

                if dataset.partitioning is None:
                    for n, x in arrays.items():
                        super(OAMapGroup, self).__setitem__(n, x)

                else:
                    partitionlookup = dataset.partitioning.empty_partitionlookup(
                        delimiter)
                    partitionlookup.append(
                        arrays[generator.stops][0] -
                        arrays[generator.starts][0], arrays.keys())

                    for n, x in arrays.items():
                        super(OAMapGroup,
                              self).__setitem__(partitionlookup.id2name(n, 0),
                                                x)
                    super(OAMapGroup,
                          self).__setitem__(dataset.partitioning.key,
                                            numpy.array(partitionlookup))

                self.attrs[key] = dataset.tojsonstring()

            else:
                dataset = dataset.copy(
                    partitioning=dataset._get_partitioning(prefix, delimiter))

                partitionlookup = dataset.partitioning.empty_partitionlookup(
                    delimiter)

                super(OAMapGroup,
                      self).__setitem__(dataset.partitioning.key,
                                        numpy.array(partitionlookup))
                self.attrs[key] = dataset.tojsonstring()

                for partitionid, (numentries, arrays) in enumerate(
                        oamap.fill.fromiterdata(
                            value,
                            generator=generator,
                            limit=partitionlimit,
                            pointer_fromequal=pointer_fromequal)):
                    partitionlookup.append(numentries, arrays.keys())

                    for n in arrays:
                        if super(OAMapGroup, self).__contains__(n):
                            raise RuntimeError(
                                "cannot assign to {0} (dataset exists)".format(
                                    repr(n)))
                    for n, x in arrays.items():
                        super(OAMapGroup, self).__setitem__(
                            partitionlookup.id2name(n, partitionid), x)
                    super(OAMapGroup,
                          self).__delitem__(dataset.partitioning.key)
                    super(OAMapGroup,
                          self).__setitem__(dataset.partitioning.key,
                                            numpy.array(partitionlookup))