def _tokenize(self, full_text): paragraphs = [] if not full_text: return paragraphs tok = _Eolian_Doc_Token_Struct() for paragraph in full_text.split('\n\n'): tokens = [] c_paragraph = _str_to_bytes(paragraph) # keep c_paragraph alive ! lib.eolian_doc_token_init(byref(tok)) next_chunk = lib.eolian_documentation_tokenize( c_paragraph, byref(tok)) while next_chunk: typ = lib.eolian_doc_token_type_get(byref(tok)) txt = lib.eolian_doc_token_text_get(byref(tok)) ref_obj = c_void_p(0) ref_attr = c_void_p(0) ref_type = lib.eolian_doc_token_ref_resolve( byref(tok), self.unit.state, byref(ref_obj), byref(ref_attr)) tokens.append( Documentation_Token(typ, txt, ref_type, ref_obj, ref_attr)) lib.free(c_void_p(txt)) next_chunk = lib.eolian_documentation_tokenize( c_char_p(next_chunk), byref(tok)) paragraphs.append(tokens) return paragraphs
def _tokenize(cls, full_text): paragraphs = [] if not full_text: return paragraphs tok = _Eolian_Doc_Token_Struct() for paragraph in full_text.split('\n\n'): tokens = [] c_paragraph = _str_to_bytes(paragraph) # keep c_paragraph alive ! lib.eolian_doc_token_init(byref(tok)) next_chunk = lib.eolian_documentation_tokenize(c_paragraph, byref(tok)) while next_chunk: typ = lib.eolian_doc_token_type_get(byref(tok)) txt = lib.eolian_doc_token_text_get(byref(tok)) # ref = # TODO ... Stupido parametro '*unit' :( tokens.append(Documentation_Token(typ, txt)) lib.free(c_void_p(txt)) next_chunk = lib.eolian_documentation_tokenize(c_char_p(next_chunk), byref(tok)) paragraphs.append(tokens) return paragraphs