def encode(self, posns_chars): # posns_chars = [(pos, startchar, endchar), ...] codes = [] posbase = 0 charbase = 0 for pos, startchar, endchar in posns_chars: codes.append(varint(pos - posbase)) posbase = pos codes.extend((varint(startchar - charbase), varint(endchar - startchar))) charbase = endchar return pack("!I", len(posns_chars)) + "".join(codes)
def encode(self, posns_chars): # posns_chars = [(pos, startchar, endchar), ...] codes = [] posbase = 0 charbase = 0 for pos, startchar, endchar in posns_chars: codes.append(varint(pos - posbase)) posbase = pos codes.extend( (varint(startchar - charbase), varint(endchar - startchar))) charbase = endchar return pack("!I", len(posns_chars)) + "".join(codes)
def encode(self, posns_chars_boosts): # posns_chars_boosts = [(pos, startchar, endchar, boost), ...] codes = [] posbase = 0 charbase = 0 summedboost = 0 for pos, startchar, endchar, boost in posns_chars_boosts: summedboost += boost codes.append(varint(pos - posbase)) posbase = pos codes.extend((varint(startchar - charbase), varint(endchar - startchar), float_to_byte(boost))) charbase = endchar b = pack("!If", len(posns_chars_boosts), summedboost) return b + "".join(codes)
def encode(self, positions): # positions = [pos1, pos2, ...] codes = [] base = 0 for pos in positions: codes.append(varint(pos - base)) base = pos return pack("!I", len(positions)) + "".join(codes)
def encode(self, posns_chars_boosts): # posns_chars_boosts = [(pos, startchar, endchar, boost), ...] codes = [] posbase = 0 charbase = 0 summedboost = 0 for pos, startchar, endchar, boost in posns_chars_boosts: summedboost += boost codes.append(varint(pos - posbase)) posbase = pos codes.extend((varint(startchar - charbase), varint(endchar - startchar), float_to_byte(boost))) charbase = endchar return pack("!If", len(posns_chars_boosts), summedboost) + "".join(codes)
def encode(self, posns_boosts): # posns_boosts = [(pos, boost), ...] codes = [] base = 0 summedboost = 0 for pos, boost in posns_boosts: summedboost += boost codes.extend((varint(pos - base), float_to_byte(boost))) base = pos return pack("!If", len(posns_boosts), summedboost) + "".join(codes)
def write_varint(self, i): """Writes a variable-length unsigned integer to the wrapped file. """ self.file.write(varint(i))
def write_varint(self, i): """Writes a variable-length integer to the wrapped file. """ self.file.write(varint(i))
def _write_node(self, uncnode): vtype = self.vtype dbfile = self.dbfile arcs = uncnode.arcs numarcs = len(arcs) if not numarcs: if uncnode.accept: return None else: # What does it mean for an arc to stop but not be accepted? raise Exception self.node_count += 1 buf = StructFile(BytesIO()) nodestart = dbfile.tell() #self.count += 1 #self.arccount += numarcs fixedsize = -1 arcstart = buf.tell() for i, arc in enumerate(arcs): self.arc_count += 1 target = arc.target label = arc.label flags = 0 if len(label) > 1: flags += MULTIBYTE_LABEL if i == numarcs - 1: flags += ARC_LAST if arc.accept: flags += ARC_ACCEPT if target is None: flags += ARC_STOP if arc.value is not None: flags += ARC_HAS_VAL if arc.acceptval is not None: flags += ARC_HAS_ACCEPT_VAL buf.write(pack_byte(flags)) if len(label) > 1: buf.write(varint(len(label))) buf.write(label) if target is not None: buf.write(pack_uint(target)) if arc.value is not None: vtype.write(buf, arc.value) if arc.acceptval is not None: vtype.write(buf, arc.acceptval) here = buf.tell() thissize = here - arcstart arcstart = here if fixedsize == -1: fixedsize = thissize elif fixedsize > 0 and thissize != fixedsize: fixedsize = 0 if fixedsize > 0: # Write a fake arc containing the fixed size and number of arcs dbfile.write_byte(255) # FIXED_SIZE dbfile.write_int(fixedsize) dbfile.write_int(numarcs) self.fixed_count += 1 dbfile.write(buf.file.getvalue()) return nodestart