Example #1
0
    def initExisting(self, allocation, values, copy=True, stringToCategory=True, stringToBuffer=True, loadFields=None):
        for name, arr in values.items():
            if len(arr) != allocation:
                raise ValueError("pre-existing array \"%s\" must have allocation %d, not %d" % (name, allocation, len(arr)))

        self.allocation = self.length = allocation

        if not hasattr(self, "data") or loadFields is None:
            if loadFields is None: loadFields = self.fields

            self.data = [None] * len(self.fields)
            self.categories = [None] * len(self.fields)
            self.buffers = [None] * len(self.fields)

        for i, field in enumerate(self.fields):
            if field in loadFields:
                arr = values[field]
                if len(arr) != allocation:
                    raise ValueError("length of field \"%s\" is %d, but allocating %d" % (field, len(arr), allocation))

                if stringToCategory and self.types[i] == "category":
                    try:
                        uniqueValues, indicies = numpy.unique(arr, return_inverse=True)
                    except TypeError:
                        if map(int, numpy.__version__.split(".")) < [1, 3, 0]:
                            indicies, uniqueValues = numpy.unique1d(arr, return_inverse=True)
                        else:
                            uniqueValues, indicies = numpy.unique1d(arr, return_inverse=True)

                    v_to_n = dict((v, n) for n, v in enumerate(uniqueValues))
                    n_to_v = dict((n, v) for n, v in enumerate(uniqueValues))

                    self.data[i] = indicies
                    self.categories[i] = (v_to_n, n_to_v)

                elif self.types[i] == "string":
                    if stringToBuffer:
                        buf = BytesIO()
                        arr2 = numpy.empty(allocation, dtype=typeToDtype.get(self.types[i], self.types[i]))
                        for j, v in enumerate(arr):
                            buf.write(str(v).encode("utf-8"))
                            arr2[j] = buf.tell()
                    else:
                        arr2 = arr
                        buf = None

                    self.data[i] = arr2
                    self.buffers[i] = buf

                elif self.types[i] == "object":
                    if copy or isinstance(arr, numpy.ndarray):
                        arr = list(arr)

                    self.data[i] = arr

                else:
                    if copy or not isinstance(arr, numpy.ndarray):
                        arr = numpy.array(arr, dtype=typeToDtype.get(self.types[i], self.types[i]))
                    self.data[i] = arr
Example #2
0
 def _write_NAB_header(self, fileName):
     types = list(self.types)
     types2 = list(self.types)
     for i in xrange(len(types)):
         if types[i] in ("category", "string"):
             stringLength = self.stringLength(self.fields[i])
             if stringLength == 0: stringLength = 256
             types[i] = "a%d" % stringLength
             types2[i] = "|S%d" % stringLength
         else:
             types[i] = typeToDtype.get(types[i], types[i])
             types2[i] = typeToDtype.get(types2[i], types[i])
     file = open(fileName, "wb")
     file.write("RecArray names=".encode("utf-8"))
     file.write(self.comma.join(self.fields).encode("utf-8"))
     file.write(" formats=".encode("utf-8"))
     file.write(self.comma.join(types).encode("utf-8"))
     file.write("\n".encode("utf-8"))
     return file, types2
Example #3
0
 def _write_NAB_header(self, fileName):
     types = list(self.types)
     types2 = list(self.types)
     for i in xrange(len(types)):
         if types[i] in ("category", "string"):
             stringLength = self.stringLength(self.fields[i])
             if stringLength == 0: stringLength = 256
             types[i] = "a%d" % stringLength
             types2[i] = "|S%d" % stringLength
         else:
             types[i] = typeToDtype.get(types[i], types[i])
             types2[i] = typeToDtype.get(types2[i], types[i])
     file = open(fileName, "wb")
     file.write("RecArray names=".encode("utf-8"))
     file.write(self.comma.join(self.fields).encode("utf-8"))
     file.write(" formats=".encode("utf-8"))
     file.write(self.comma.join(types).encode("utf-8"))
     file.write("\n".encode("utf-8"))
     return file, types2
Example #4
0
 def initMemory(self, allocation):
     self.allocation = allocation
     self.data = [
         numpy.empty(allocation, dtype=typeToDtype.get(self.types[i], self.types[i]))
         if self.types[i] != "object"
         else [None] * allocation
         for i in xrange(len(self.fields))
     ]
     self.categories = [({}, {}) if self.types[i] == "category" else None for i in xrange(len(self.fields))]
     self.buffers = [BytesIO() if self.types[i] == "string" else None for i in xrange(len(self.fields))]
     self.length = 0
Example #5
0
    def getcolumn(self, field, categoryToString=True):
        if field not in self.fields:
            raise KeyError("unrecognized UniTable field \"%s\"" % field)

        t = self._types[field]
        if t == "string" or (categoryToString and t == "category"):
            output = list(itertools.chain(*[p.getcolumn(field, categoryToString=categoryToString) for p in self.pages]))
        else:
            output = numpy.empty(self.length, dtype=typeToDtype.get(t, t))
            for theStart, page in zip(self.starts, self.pages):
                output[theStart:theStart+page.length] = page.getcolumn(field, categoryToString=categoryToString)

        return output
Example #6
0
    def getcolumn(self, field, categoryToString=True):
        if field not in self.fields:
            raise KeyError("unrecognized UniTable field \"%s\"" % field)

        t = self._types[field]
        if t == "string" or (categoryToString and t == "category"):
            output = list(
                itertools.chain(*[
                    p.getcolumn(field, categoryToString=categoryToString)
                    for p in self.pages
                ]))
        else:
            output = numpy.empty(self.length, dtype=typeToDtype.get(t, t))
            for theStart, page in zip(self.starts, self.pages):
                output[theStart:theStart + page.length] = page.getcolumn(
                    field, categoryToString=categoryToString)

        return output
Example #7
0
 def initMemory(self, allocation):
     self.allocation = allocation
     self.data = [numpy.empty(allocation, dtype=typeToDtype.get(self.types[i], self.types[i])) if self.types[i] != "object" else [None]*allocation for i in xrange(len(self.fields))]
     self.categories = [({}, {}) if self.types[i] == "category" else None for i in xrange(len(self.fields))]
     self.buffers = [BytesIO() if self.types[i] == "string" else None for i in xrange(len(self.fields))]
     self.length = 0
Example #8
0
    def initExisting(self, allocation, values, copy=True, stringToCategory=True, stringToBuffer=True, loadFields=None):
        for name, arr in values.items():
            if len(arr) != allocation:
                raise ValueError(
                    'pre-existing array "%s" must have allocation %d, not %d' % (name, allocation, len(arr))
                )

        self.allocation = self.length = allocation

        if not hasattr(self, "data") or loadFields is None:
            if loadFields is None:
                loadFields = self.fields

            self.data = [None] * len(self.fields)
            self.categories = [None] * len(self.fields)
            self.buffers = [None] * len(self.fields)

        for i, field in enumerate(self.fields):
            if field in loadFields:
                arr = values[field]
                if len(arr) != allocation:
                    raise ValueError('length of field "%s" is %d, but allocating %d' % (field, len(arr), allocation))

                if stringToCategory and self.types[i] == "category":
                    try:
                        uniqueValues, indicies = numpy.unique(arr, return_inverse=True)
                    except TypeError:
                        if map(int, numpy.__version__.split(".")) < [1, 3, 0]:
                            indicies, uniqueValues = numpy.unique1d(arr, return_inverse=True)
                        else:
                            uniqueValues, indicies = numpy.unique1d(arr, return_inverse=True)

                    v_to_n = dict((v, n) for n, v in enumerate(uniqueValues))
                    n_to_v = dict((n, v) for n, v in enumerate(uniqueValues))

                    self.data[i] = indicies
                    self.categories[i] = (v_to_n, n_to_v)

                elif self.types[i] == "string":
                    if stringToBuffer:
                        buf = BytesIO()
                        arr2 = numpy.empty(allocation, dtype=typeToDtype.get(self.types[i], self.types[i]))
                        for j, v in enumerate(arr):
                            buf.write(str(v).encode("utf-8"))
                            arr2[j] = buf.tell()
                    else:
                        arr2 = arr
                        buf = None

                    self.data[i] = arr2
                    self.buffers[i] = buf

                elif self.types[i] == "object":
                    if copy or isinstance(arr, numpy.ndarray):
                        arr = list(arr)

                    self.data[i] = arr

                else:
                    if copy or not isinstance(arr, numpy.ndarray):
                        arr = numpy.array(arr, dtype=typeToDtype.get(self.types[i], self.types[i]))
                    self.data[i] = arr