def read_array(self, typecode, length): a = array(typecode) if self.is_real: a.fromfile(self.file, length) else: array_frombytes(a, self.file.read(length * _SIZEMAP[typecode])) if IS_LITTLE: a.byteswap() return a
def read_array(self, typecode, length): a = array(typecode) if self.is_real: a.fromfile(self.file, length) else: array_frombytes(a, self.read(length * _SIZEMAP[typecode])) if IS_LITTLE: a.byteswap() return a
def deminimize_weights(count, string, compression=0): if not string: return array("f", (1.0 for _ in xrange(count))) if compression: string = decompress(string) arry = array("f") array_frombytes(arry, string) if not IS_LITTLE: arry.byteswap() return arry
def deminimize_ids(typecode, count, string, compression=0): if compression: string = decompress(string) if typecode == '': return loads(string) else: arry = array(typecode) array_frombytes(arry, string) if not IS_LITTLE: arry.byteswap() return arry
def read_values(self): postfile = self.postfile startoffset = self.values_offset endoffset = self.nextoffset postcount = self.count postingsize = self.postingsize if postingsize != 0: postfile.seek(startoffset) values_string = postfile.read(endoffset - startoffset) if self.wtslen: # Values string is compressed values_string = decompress(values_string) if postingsize < 0: # Pull the array of value lengths off the front of the string lengths = array("i") array_frombytes(lengths, values_string[:_INT_SIZE * postcount]) values_string = values_string[_INT_SIZE * postcount:] # Chop up the block string into individual valuestrings if postingsize > 0: # Format has a fixed posting size, just chop up the values # equally values = [ values_string[i * postingsize:i * postingsize + postingsize] for i in xrange(postcount) ] else: # Format has a variable posting size, use the array of lengths # to chop up the values. pos = 0 values = [] for length in lengths: values.append(values_string[pos:pos + length]) pos += length else: # Format does not store values (i.e. Existence), just create fake # values values = (None, ) * postcount self.values = values
def read_values(self): postfile = self.postfile startoffset = self.values_offset endoffset = self.nextoffset postcount = self.count postingsize = self.postingsize if postingsize != 0: postfile.seek(startoffset) values_string = postfile.read(endoffset - startoffset) if self.wtslen: # Values string is compressed values_string = decompress(values_string) if postingsize < 0: # Pull the array of value lengths off the front of the string lengths = array("i") array_frombytes(lengths, values_string[:_INT_SIZE * postcount]) values_string = values_string[_INT_SIZE * postcount:] # Chop up the block string into individual valuestrings if postingsize > 0: # Format has a fixed posting size, just chop up the values # equally values = [values_string[i * postingsize: i * postingsize + postingsize] for i in xrange(postcount)] else: # Format has a variable posting size, use the array of lengths # to chop up the values. pos = 0 values = [] for length in lengths: values.append(values_string[pos:pos + length]) pos += length else: # Format does not store values (i.e. Existence), just create fake # values values = (None,) * postcount self.values = values
def read_weights(self): postfile = self.postfile offset = self.weights_offset postfile.seek(offset) weightslen = self.wtslen postcount = self.count if weightslen == 1: weights = None newoffset = offset elif weightslen: weights = array("f") array_frombytes(weights, decompress(postfile.read(weightslen))) if IS_LITTLE: weights.byteswap() newoffset = offset + weightslen else: weights = postfile.get_array(offset, "f", postcount) newoffset = offset + _FLOAT_SIZE * postcount self.weights = weights self.values_offset = newoffset return weights
def read_ids(self): postfile = self.postfile offset = self.dataoffset postcount = self.count postfile.seek(offset) if self.stringids: rs = postfile.read_string ids = [utf8decode(rs())[0] for _ in xrange(postcount)] newoffset = postfile.tell() elif self.idslen: ids = array("I") array_frombytes(ids, decompress(postfile.read(self.idslen))) if IS_LITTLE: ids.byteswap() newoffset = offset + self.idslen else: ids = postfile.read_array("I", postcount) newoffset = offset + _INT_SIZE * postcount self.ids = ids self.weights_offset = newoffset return ids
def get_array(self, position, typecode, length): a = array(typecode) array_frombytes(a, self.get(position, length * _SIZEMAP[typecode])) if IS_LITTLE: a.byteswap() return a