def initExisting(self, allocation, values, copy=True, stringToCategory=True, stringToBuffer=True, loadFields=None): for name, arr in values.items(): if len(arr) != allocation: raise ValueError("pre-existing array \"%s\" must have allocation %d, not %d" % (name, allocation, len(arr))) self.allocation = self.length = allocation if not hasattr(self, "data") or loadFields is None: if loadFields is None: loadFields = self.fields self.data = [None] * len(self.fields) self.categories = [None] * len(self.fields) self.buffers = [None] * len(self.fields) for i, field in enumerate(self.fields): if field in loadFields: arr = values[field] if len(arr) != allocation: raise ValueError("length of field \"%s\" is %d, but allocating %d" % (field, len(arr), allocation)) if stringToCategory and self.types[i] == "category": try: uniqueValues, indicies = numpy.unique(arr, return_inverse=True) except TypeError: if map(int, numpy.__version__.split(".")) < [1, 3, 0]: indicies, uniqueValues = numpy.unique1d(arr, return_inverse=True) else: uniqueValues, indicies = numpy.unique1d(arr, return_inverse=True) v_to_n = dict((v, n) for n, v in enumerate(uniqueValues)) n_to_v = dict((n, v) for n, v in enumerate(uniqueValues)) self.data[i] = indicies self.categories[i] = (v_to_n, n_to_v) elif self.types[i] == "string": if stringToBuffer: buf = BytesIO() arr2 = numpy.empty(allocation, dtype=typeToDtype.get(self.types[i], self.types[i])) for j, v in enumerate(arr): buf.write(str(v).encode("utf-8")) arr2[j] = buf.tell() else: arr2 = arr buf = None self.data[i] = arr2 self.buffers[i] = buf elif self.types[i] == "object": if copy or isinstance(arr, numpy.ndarray): arr = list(arr) self.data[i] = arr else: if copy or not isinstance(arr, numpy.ndarray): arr = numpy.array(arr, dtype=typeToDtype.get(self.types[i], self.types[i])) self.data[i] = arr
def _write_NAB_header(self, fileName): types = list(self.types) types2 = list(self.types) for i in xrange(len(types)): if types[i] in ("category", "string"): stringLength = self.stringLength(self.fields[i]) if stringLength == 0: stringLength = 256 types[i] = "a%d" % stringLength types2[i] = "|S%d" % stringLength else: types[i] = typeToDtype.get(types[i], types[i]) types2[i] = typeToDtype.get(types2[i], types[i]) file = open(fileName, "wb") file.write("RecArray names=".encode("utf-8")) file.write(self.comma.join(self.fields).encode("utf-8")) file.write(" formats=".encode("utf-8")) file.write(self.comma.join(types).encode("utf-8")) file.write("\n".encode("utf-8")) return file, types2
def initMemory(self, allocation): self.allocation = allocation self.data = [ numpy.empty(allocation, dtype=typeToDtype.get(self.types[i], self.types[i])) if self.types[i] != "object" else [None] * allocation for i in xrange(len(self.fields)) ] self.categories = [({}, {}) if self.types[i] == "category" else None for i in xrange(len(self.fields))] self.buffers = [BytesIO() if self.types[i] == "string" else None for i in xrange(len(self.fields))] self.length = 0
def getcolumn(self, field, categoryToString=True): if field not in self.fields: raise KeyError("unrecognized UniTable field \"%s\"" % field) t = self._types[field] if t == "string" or (categoryToString and t == "category"): output = list(itertools.chain(*[p.getcolumn(field, categoryToString=categoryToString) for p in self.pages])) else: output = numpy.empty(self.length, dtype=typeToDtype.get(t, t)) for theStart, page in zip(self.starts, self.pages): output[theStart:theStart+page.length] = page.getcolumn(field, categoryToString=categoryToString) return output
def getcolumn(self, field, categoryToString=True): if field not in self.fields: raise KeyError("unrecognized UniTable field \"%s\"" % field) t = self._types[field] if t == "string" or (categoryToString and t == "category"): output = list( itertools.chain(*[ p.getcolumn(field, categoryToString=categoryToString) for p in self.pages ])) else: output = numpy.empty(self.length, dtype=typeToDtype.get(t, t)) for theStart, page in zip(self.starts, self.pages): output[theStart:theStart + page.length] = page.getcolumn( field, categoryToString=categoryToString) return output
def initMemory(self, allocation): self.allocation = allocation self.data = [numpy.empty(allocation, dtype=typeToDtype.get(self.types[i], self.types[i])) if self.types[i] != "object" else [None]*allocation for i in xrange(len(self.fields))] self.categories = [({}, {}) if self.types[i] == "category" else None for i in xrange(len(self.fields))] self.buffers = [BytesIO() if self.types[i] == "string" else None for i in xrange(len(self.fields))] self.length = 0
def initExisting(self, allocation, values, copy=True, stringToCategory=True, stringToBuffer=True, loadFields=None): for name, arr in values.items(): if len(arr) != allocation: raise ValueError( 'pre-existing array "%s" must have allocation %d, not %d' % (name, allocation, len(arr)) ) self.allocation = self.length = allocation if not hasattr(self, "data") or loadFields is None: if loadFields is None: loadFields = self.fields self.data = [None] * len(self.fields) self.categories = [None] * len(self.fields) self.buffers = [None] * len(self.fields) for i, field in enumerate(self.fields): if field in loadFields: arr = values[field] if len(arr) != allocation: raise ValueError('length of field "%s" is %d, but allocating %d' % (field, len(arr), allocation)) if stringToCategory and self.types[i] == "category": try: uniqueValues, indicies = numpy.unique(arr, return_inverse=True) except TypeError: if map(int, numpy.__version__.split(".")) < [1, 3, 0]: indicies, uniqueValues = numpy.unique1d(arr, return_inverse=True) else: uniqueValues, indicies = numpy.unique1d(arr, return_inverse=True) v_to_n = dict((v, n) for n, v in enumerate(uniqueValues)) n_to_v = dict((n, v) for n, v in enumerate(uniqueValues)) self.data[i] = indicies self.categories[i] = (v_to_n, n_to_v) elif self.types[i] == "string": if stringToBuffer: buf = BytesIO() arr2 = numpy.empty(allocation, dtype=typeToDtype.get(self.types[i], self.types[i])) for j, v in enumerate(arr): buf.write(str(v).encode("utf-8")) arr2[j] = buf.tell() else: arr2 = arr buf = None self.data[i] = arr2 self.buffers[i] = buf elif self.types[i] == "object": if copy or isinstance(arr, numpy.ndarray): arr = list(arr) self.data[i] = arr else: if copy or not isinstance(arr, numpy.ndarray): arr = numpy.array(arr, dtype=typeToDtype.get(self.types[i], self.types[i])) self.data[i] = arr