def deserialize(self, stream: Stream) -> bool: vers = 0 b = stream.readbyte() if (b == (0xAA)): b = (stream.readbyte()) vers = (b) else: stream.position = stream.position - (1) self.__m_sofa = SourceOfAnalysis(None) self.__m_sofa.deserialize(stream) self.base_language = MorphLang._new56( SerializerHelper.deserialize_int(stream)) self.__m_entities = list() cou = SerializerHelper.deserialize_int(stream) i = 0 while i < cou: typ = SerializerHelper.deserialize_string(stream) r = ProcessorService.create_referent(typ) if (r is None): r = Referent("UNDEFINED") self.__m_entities.append(r) i += 1 i = 0 while i < cou: self.__m_entities[i].deserialize(stream, self.__m_entities, self.__m_sofa) i += 1 self.first_token = SerializerHelper.deserialize_tokens( stream, self, vers) self.__create_statistics() return True
def deserialize_string(stream : Stream) -> str: len0_ = SerializerHelper.deserialize_int(stream) if (len0_ < 0): return None if (len0_ == 0): return "" data = Utils.newArrayOfBytes(len0_, 0) stream.read(data, 0, len(data)) return data.decode("UTF-8", 'ignore')
def serialize_string(stream : Stream, val : str) -> None: if (val is None): SerializerHelper.serialize_int(stream, -1) return if (Utils.isNullOrEmpty(val)): SerializerHelper.serialize_int(stream, 0) return data = val.encode("UTF-8", 'ignore') SerializerHelper.serialize_int(stream, len(data)) stream.write(data, 0, len(data))
def _deserialize(self, stream: Stream, kit_: 'AnalysisKit', vers: int) -> None: from pullenti.ner.core.internal.SerializerHelper import SerializerHelper super()._deserialize(stream, kit_, vers) if (vers == 0): buf = Utils.newArrayOfBytes(8, 0) stream.read(buf, 0, 8) lo = int.from_bytes(buf[0:0 + 8], byteorder="little") self.value = str(lo) else: self.value = SerializerHelper.deserialize_string(stream) self.typ = (Utils.valToEnum(SerializerHelper.deserialize_int(stream), NumberSpellingType))
def __deserialize_item(self, stream: Stream) -> 'MorphBaseInfo': from pullenti.ner.core.internal.SerializerHelper import SerializerHelper ty = stream.readbyte() res = (MorphBaseInfo() if ty == 0 else MorphWordForm()) res.class0_ = MorphClass._new53( SerializerHelper.deserialize_short(stream)) res.case_ = MorphCase._new29( SerializerHelper.deserialize_short(stream)) res.gender = Utils.valToEnum( SerializerHelper.deserialize_short(stream), MorphGender) res.number = Utils.valToEnum( SerializerHelper.deserialize_short(stream), MorphNumber) res.language = MorphLang._new56( SerializerHelper.deserialize_short(stream)) if (ty == 0): return res wf = Utils.asObjectOrNull(res, MorphWordForm) wf.normal_case = SerializerHelper.deserialize_string(stream) wf.normal_full = SerializerHelper.deserialize_string(stream) wf.undef_coef = SerializerHelper.deserialize_short(stream) cou = SerializerHelper.deserialize_int(stream) i = 0 while i < cou: if (wf.misc is None): wf.misc = MorphMiscInfo() wf.misc.attrs.append(SerializerHelper.deserialize_string(stream)) i += 1 return res
def deserialize(self, stream: Stream, all0_: typing.List['Referent'], sofa: 'SourceOfAnalysis') -> None: typ = SerializerHelper.deserialize_string(stream) cou = SerializerHelper.deserialize_int(stream) i = 0 while i < cou: typ = SerializerHelper.deserialize_string(stream) c = SerializerHelper.deserialize_int(stream) id0_ = SerializerHelper.deserialize_int(stream) val = None if ((id0_ < 0) and all0_ is not None): id1 = (-id0_) - 1 if (id1 < len(all0_)): val = (all0_[id1]) elif (id0_ > 0): stream.position = stream.position - (4) val = (SerializerHelper.deserialize_string(stream)) self.add_slot(typ, val, False, c) i += 1 cou = SerializerHelper.deserialize_int(stream) self.__m_occurrence = list() i = 0 while i < cou: a = TextAnnotation._new2863(sofa, self) self.__m_occurrence.append(a) a.begin_char = SerializerHelper.deserialize_int(stream) a.end_char = SerializerHelper.deserialize_int(stream) attr = SerializerHelper.deserialize_int(stream) if (((attr & 1)) != 0): a.essential_for_occurence = True i += 1
def serialize(self, stream: Stream) -> None: stream.writebyte(0xAA) stream.writebyte(1) self.__m_sofa.serialize(stream) SerializerHelper.serialize_int(stream, self.base_language.value) if (len(self.__m_entities) == 0): for d in self.__m_datas.items(): self.__m_entities.extend(d[1].referents) SerializerHelper.serialize_int(stream, len(self.__m_entities)) i = 0 while i < len(self.__m_entities): self.__m_entities[i].tag = i + 1 SerializerHelper.serialize_string(stream, self.__m_entities[i].type_name) i += 1 for e0_ in self.__m_entities: e0_.serialize(stream) SerializerHelper.serialize_tokens(stream, self.first_token, 0)
def __serialize_item(self, stream: Stream, bi: 'MorphBaseInfo') -> None: from pullenti.ner.core.internal.SerializerHelper import SerializerHelper ty = 0 if (isinstance(bi, MorphWordForm)): ty = (1) stream.writebyte(ty) SerializerHelper.serialize_short(stream, bi.class0_.value) SerializerHelper.serialize_short(stream, bi.case_.value) SerializerHelper.serialize_short(stream, bi.gender) SerializerHelper.serialize_short(stream, bi.number) SerializerHelper.serialize_short(stream, bi.language.value) wf = Utils.asObjectOrNull(bi, MorphWordForm) if (wf is None): return SerializerHelper.serialize_string(stream, wf.normal_case) SerializerHelper.serialize_string(stream, wf.normal_full) SerializerHelper.serialize_short(stream, wf.undef_coef) SerializerHelper.serialize_int( stream, (0 if wf.misc is None else len(wf.misc.attrs))) if (wf.misc is not None): for a in wf.misc.attrs: SerializerHelper.serialize_string(stream, a)
def deflate_gzip(str0_: Stream, res: Stream) -> None: with Stream(gzip.GzipFile(fileobj=str0_.getstream(), mode='r')) as deflate: buf = Utils.newArrayOfBytes(100000, 0) len0_ = len(buf) while True: i = -1 try: ii = 0 while ii < len0_: buf[ii] = (0) ii += 1 i = deflate.read(buf, 0, len0_) except Exception as ex: for i in range(len0_ - 1, -1, -1): if (buf[i] != (0)): res.write(buf, 0, i + 1) break else: i = -1 break if (i < 1): break res.write(buf, 0, i)
def deserialize_short(stream : Stream) -> int: buf = Utils.newArrayOfBytes(2, 0) stream.read(buf, 0, 2) return int.from_bytes(buf[0:0+2], byteorder="little")
def serialize_short(stream : Stream, val : int) -> None: stream.write((val).to_bytes(2, byteorder="little"), 0, 2)