Python Document Examples

Programming Language: Python

Namespace/Package Name: docmodel.containers

Class/Type: Document

Examples at hotexamples.com: 4

Python Document - 4 examples found. These are the top rated real world Python examples of docmodel.containers.Document extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Document(2)

append(2)

sentences(2)

Example #1

Show file

 def read(self, text: str):
     sentences = list()
     for sent in text.split(self.linesep + self.linesep):
         if len(sent)-1 > 0:
             sentences.append(self.sentence_parser.read(sent))
     paragraph = Paragraph(sentences)
     document = Document()
     document.append(paragraph)
     return document

Example #2

Show file

 def read(self, text: str):
     # it parses the whole(!) analysed corpus
     sentences = list()
     for line in text.split(self.linesep):
         if len(line) > 0:
             sentences.append(self.sentence_parser.read(line))
     paragraph = Paragraph(sentences)
     document = Document()
     document.append(paragraph)
     return document

Example #3

Show file

 def train(self, document: Document):
     # todo read lines by lines. See the issue:
     # https://github.com/ppke-nlpg/purepos-python3/issues/5
     self.raw_model_data.eos_tag = self.data.tag_vocabulary.add_element(ModelData.EOS_TAG)
     for sentence in document.sentences():
         mysentence = Sentence(sentence)
         self.add_sentence_markers(mysentence)
         self.add_sentence(mysentence)
     self.build_suffix_trees()
     self.raw_model_data.combiner.calculate_params(document, self.raw_model_data, self.data)

Example #4

Show file

File: combiner.py Project: ppke-nlpg/purepos-python3

 def calculate_params(self, doc: Document,
                      raw_modeldata: RawModelData,
                      modeldata: ModelData):
     apriori_probs = raw_modeldata.tag_ngram_model.word_apriori_probs()
     theta = HashSuffixTree.calculate_theta(apriori_probs)
     lemma_suffix_guesser = raw_modeldata.lemma_suffix_tree.create_guesser(theta)
     lemma_prob = raw_modeldata.lemma_freq_tree.create_guesser(theta)
     lemma_unigram_model = raw_modeldata.lemma_unigram_model
     lambda_s = 1.0
     lambda_u = 1.0
     lambda_l = 1.0
     for sentence in doc.sentences():
         for tok in sentence:
             suffix_probs = lemma.batch_convert(lemma_suffix_guesser.tag_log_probabilities(
                 tok.token), tok.token, modeldata.tag_vocabulary)
             uni_probs = dict()
             for t in suffix_probs.keys():
                 uniscore = lemma_unigram_model.log_prob(t.stem)
                 uni_probs[t] = uniscore
             lemma_probs = dict()
             for t in suffix_probs.keys():
                 lemma_score = lemma_prob.tag_log_probability(t.stem, lemma.main_pos_tag(t.tag))
                 lemma_probs[t] = lemma_score
             uni_max = max(uni_probs.items(), key=lambda e: e[1])
             t = max(suffix_probs.items(), key=lambda e: e[1][1])
             suffix_max = (t[0], t[1][1])
             lemma_max = max(lemma_probs.items(), key=lambda e: e[1])
             act_uni_prob = lemma_unigram_model.log_prob(tok.stem)
             act_lemma_prob = lemma_prob.tag_log_probability(tok.stem, lemma.main_pos_tag(
                 tok.tag))
             if tok in suffix_probs.keys():
                 act_suff_prob = suffix_probs[tok][1]
             else:
                 act_suff_prob = UNKOWN_VALUE
             uni_prop = act_uni_prob - uni_max[1]
             suff_prop = act_suff_prob - suffix_max[1]
             lemma_prop = act_lemma_prob - lemma_max[1]
             if uni_prop > suff_prop and uni_prop > lemma_prop:
                 lambda_u += uni_prop
             elif suff_prop > uni_prop and suff_prop > lemma_prop:
                 lambda_s += suff_prop
             elif lemma_prop > uni_prop and lemma_prop > suff_prop:
                 lambda_l += lemma_prop
     s = lambda_u + lambda_s + lambda_l
     lambda_u /= s
     lambda_s /= s
     lambda_l /= s
     self.lambdas.append(lambda_u)
     self.lambdas.append(lambda_s)
     self.lambdas.append(lambda_l)