Python Document.dct_text Exemples

Langage de programmation: Python

Espace de nommage/Pack: model.document

Class/Type: Document

Méthode/Fonction: dct_text

Exemples au hotexamples.com: 2

Python Document.dct_text - 2 exemples trouvés. Ce sont les exemples réels les mieux notés de model.document.Document.dct_text extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

Document(20)

get(5)

selectedPart(2)

data(2)

upload(2)

title(1)

text_offset(1)

text(1)

store_gold_annotations(1)

parts(1)

setController(1)

undoStack(1)

sec_times(1)

resetViews(1)

removeAllParts(1)

refersTo(1)

put(1)

id(1)

load_image(1)

_coref(1)

graph(1)

gold_annotations(1)

from_tree(1)

file_path(1)

doc_id(1)

dct_text(1)

dct(1)

complete_structure(1)

addSquarePart(1)

addHoneycombPart(1)

addDnaSquarePart(1)

addDnaPart(1)

addDnaHoneycombPart(1)

words(1)

Méthodes fréquemment utilisées

Document (20)

get (5)

selectedPart (2)

data (2)

upload (2)

title (1)

text_offset (1)

text (1)

store_gold_annotations (1)

parts (1)

Méthodes fréquemment utilisées

setController (1)

undoStack (1)

sec_times (1)

resetViews (1)

removeAllParts (1)

refersTo (1)

put (1)

id (1)

load_image (1)

_coref (1)

graph (1)

gold_annotations (1)

from_tree (1)

file_path (1)

doc_id (1)

dct_text (1)

dct (1)

complete_structure (1)

addSquarePart (1)

addHoneycombPart (1)

Méthodes fréquemment utilisées

graph (1)

gold_annotations (1)

from_tree (1)

file_path (1)

doc_id (1)

dct_text (1)

dct (1)

complete_structure (1)

addSquarePart (1)

addHoneycombPart (1)

addDnaSquarePart (1)

addDnaPart (1)

addDnaHoneycombPart (1)

words (1)

Méthodes fréquemment utilisées

addDnaSquarePart (1)

addDnaPart (1)

addDnaHoneycombPart (1)

words (1)

Exemple #1

0

Afficher le fichier

Fichier : readers.py Projet : filannim/ManTIME

def parse(self, file_path): """It parses the content of file_path and extracts relevant information from a TempEval-3 annotated file. Those information are packed in a Document object, which is our internal representation. """ assert os.path.isfile(file_path), 'File path does not exist!' logging.info('Document {}: parsing...'.format( os.path.relpath(file_path))) xml = etree.parse(file_path) text_node = xml.findall(".//TEXT")[0] text_string = etree.tostring(text_node, method='text', encoding='utf8') text_xml = etree.tostring(text_node, method='xml', encoding='utf8') text_string = unicode(text_string, 'UTF-8') text_xml = unicode(text_xml, 'UTF-8') right_chars = len(text_xml.split('</TEXT>')[1]) text_string = text_string[:-right_chars] text_xml = etree.tostring(text_node) # StanfordParser strips internally the text :( left_chars = len(text_string) - len(text_string.lstrip()) with Mute_stderr(): stanford_tree = CORENLP.parse(text_string) document = Document(file_path) document.text_offset = left_chars document.file_path = os.path.abspath(file_path) document.doc_id = os.path.basename(file_path) document.sec_times = self.get_dct(file_path) document.dct = document.sec_times.admission_date document.dct_text = document.dct.replace('-', '') document.title = os.path.basename(file_path) document.text = text_string document._coref = stanford_tree.get('coref', []) for num_sen, stanford_sentence in\ enumerate(stanford_tree['sentences']): collp_deps = stanford_sentence.get('collapsed_dependencies', None) basic_deps = stanford_sentence.get('basic_dependencies', None) parsetree = stanford_sentence.get('parsetree', u'') sentence_text = stanford_sentence.get('text', u'') sentence = Sentence(id_sentence=num_sen, basic_dependencies=basic_deps, collapsed_dependencies=collp_deps, parsetree=parsetree, text=sentence_text) for num_word, (word_form, attr) in\ enumerate(stanford_sentence['words']): offset_begin = int(attr['CharacterOffsetBegin']) - left_chars offset_end = int(attr['CharacterOffsetEnd']) - left_chars word = Word(word_form=word_form, char_offset_begin=offset_begin, char_offset_end=offset_end, lemma=attr['Lemma'], named_entity_tag=attr['NamedEntityTag'], part_of_speech=attr['PartOfSpeech'], id_token=num_word, id_sentence=num_sen) sentence.words.append(word) document.sentences.append(sentence) document.gold_annotations = self._get_annotations( xml, document) document.store_gold_annotations() document.complete_structure() logging.info('Document {}: parsed.'.format(os.path.relpath(file_path))) return document

Exemple #2

0

Afficher le fichier

def parse(self, file_path): """It parses the content of file_path and extracts relevant information from a TempEval-3 annotated file. Those information are packed in a Document object, which is our internal representation. """ assert os.path.isfile(file_path), 'File path does not exist!' logging.info('Document {}: parsing...'.format( os.path.relpath(file_path))) xml = etree.parse(file_path) text_node = xml.findall(".//TEXT")[0] text_string = etree.tostring(text_node, method='text', encoding='utf8') text_xml = etree.tostring(text_node, method='xml', encoding='utf8') text_string = unicode(text_string, 'UTF-8') text_xml = unicode(text_xml, 'UTF-8') right_chars = len(text_xml.split('</TEXT>')[1]) text_string = text_string[:-right_chars] text_xml = etree.tostring(text_node) # StanfordParser strips internally the text :( left_chars = len(text_string) - len(text_string.lstrip()) with Mute_stderr(): stanford_tree = CORENLP.parse(text_string) document = Document(file_path) document.text_offset = left_chars document.file_path = os.path.abspath(file_path) document.doc_id = os.path.basename(file_path) document.sec_times = self.get_dct(file_path) document.dct = document.sec_times.admission_date document.dct_text = document.dct.replace('-', '') document.title = os.path.basename(file_path) document.text = text_string document._coref = stanford_tree.get('coref', []) for num_sen, stanford_sentence in\ enumerate(stanford_tree['sentences']): collp_deps = stanford_sentence.get('collapsed_dependencies', None) basic_deps = stanford_sentence.get('basic_dependencies', None) parsetree = stanford_sentence.get('parsetree', u'') sentence_text = stanford_sentence.get('text', u'') sentence = Sentence(id_sentence=num_sen, basic_dependencies=basic_deps, collapsed_dependencies=collp_deps, parsetree=parsetree, text=sentence_text) for num_word, (word_form, attr) in\ enumerate(stanford_sentence['words']): offset_begin = int(attr['CharacterOffsetBegin']) - left_chars offset_end = int(attr['CharacterOffsetEnd']) - left_chars word = Word(word_form=word_form, char_offset_begin=offset_begin, char_offset_end=offset_end, lemma=attr['Lemma'], named_entity_tag=attr['NamedEntityTag'], part_of_speech=attr['PartOfSpeech'], id_token=num_word, id_sentence=num_sen) sentence.words.append(word) document.sentences.append(sentence) document.gold_annotations = self._get_annotations(xml, document) document.store_gold_annotations() document.complete_structure() logging.info('Document {}: parsed.'.format(os.path.relpath(file_path))) return document