def process_tnode(self, tnode):
     "Check if the a-node corresponding to the given t-node should be dropped, and do so where appropriate."
     # skip nodes to which this should not apply
     if not re.search(r'(:1|drop)$', tnode.formeme) or tnode.is_member:
         return
     # special case: drop "to" under other verbs than "být" or "znamenat"
     if tnode.t_lemma == 'ten' and not tnode.parent.t_lemma in ['být', 'znamenat']:
         self.drop_anode(tnode)
         return
     # skip everything except personal pronouns
     if tnode.t_lemma != '#PersPron':
         return
     # special case: "On byl muž, který" -> "Byl to muž, který .."
     if tnode.parent.t_lemma == 'být':
         # find the nominal predicate
         tpnom = first(lambda n: n.formeme.endswith(':1'), tnode.parent.get_children(following_only=True)) 
         # if found, detect relative clause; if detected, proceed with the transformation
         if tpnom and first(lambda n: n.formeme == 'v:rc', tpnom.get_children()):
             anode = tnode.lex_anode
             anode.lemma = 'ten'
             anode.morphcat_gender = 'N'
             anode.morphcat_subpos = 'D'
             anode.morphcat_person = '-'
             anode.shift_after_node(anode.parent)
             return
     # otherwise just drop the personal pronoun
     self.drop_anode(tnode)
 def process_zone(self, zone):
     """\
     Find the first valid word in the sentence and capitalize it.
     """
     aroot = zone.atree
     troot = zone.ttree
     # take the first non-technical root (assume other to be parsing errors)
     sent_roots = aroot.get_children(ordered=True)
     if sent_roots:
         sent_roots = sent_roots[0:1]
     # add all direct speech roots
     sent_roots.extend([
         t.lex_anode for t in troot.get_descendants()
         if t.is_dsp_root and t.lex_anode
     ])
     # capitalize the 1st words under the selected roots
     for sent_root in sent_roots:
         # find the first word
         word1 = first(
             lambda n: n.morphcat_pos != 'Z' and not re.match(
                 self.OPEN_PUNCT, n.form or n.lemma or ''),
             sent_root.get_descendants(ordered=True, add_self=True))
         # skip empty sentences
         if not word1 or not word1.form:
             continue
         # compensate wrong parses in direct speech: check if the node
         # either starts the sentence or follows punctuation
         word0 = word1.get_prev_node()
         if word0 and word0.morphcat_pos != 'Z' and \
                 not re.match(self.OPEN_PUNCT,
                              word0.form or word0.lemma or ''):
             continue
         # make it uppercase
         word1.form = word1.form[0].upper() + word1.form[1:]
Beispiel #3
0
 def process_zone(self, zone):
     """\
     Find the first valid word in the sentence and capitalize it.
     """
     aroot = zone.atree
     troot = zone.ttree
     # take the first non-technical root (assume other to be parsing errors)
     sent_roots = aroot.get_children(ordered=True)
     if sent_roots:
         sent_roots = sent_roots[0:1]
     # add all direct speech roots
     sent_roots.extend([t.lex_anode for t in troot.get_descendants() if t.is_dsp_root and t.lex_anode])
     # capitalize the 1st words under the selected roots
     for sent_root in sent_roots:
         # find the first word
         word1 = first(
             lambda n: n.morphcat_pos != "Z" and not re.match(self.OPEN_PUNCT, n.form or n.lemma or ""),
             sent_root.get_descendants(ordered=True, add_self=True),
         )
         # skip empty sentences
         if not word1 or not word1.form:
             continue
         # compensate wrong parses in direct speech: check if the node
         # either starts the sentence or follows punctuation
         word0 = word1.get_prev_node()
         if word0 and word0.morphcat_pos != "Z" and not re.match(self.OPEN_PUNCT, word0.form or word0.lemma or ""):
             continue
         # make it uppercase
         word1.form = word1.form[0].upper() + word1.form[1:]
Beispiel #4
0
 def should_agree(self, tnode):
     "Find finite verbs, with/without a subject."
     # avoid everything except finite verbs
     if not re.match(r'v.+(fin|rc)$', tnode.formeme):
         return False
     anode = tnode.lex_anode
     asubj = first(lambda achild: achild.afun == 'Sb',
                   anode.get_echildren())
     return (anode, asubj)
Beispiel #5
0
 def should_agree(self, tnode):
     "Find finite verbs, with/without a subject."
     # avoid everything except finite verbs
     if not re.match(r'v.+(fin|rc)$', tnode.formeme):
         return False
     anode = tnode.lex_anode
     asubj = first(lambda achild: achild.afun == 'Sb',
                   anode.get_echildren())
     return (anode, asubj)
Beispiel #6
0
    def __inflect(self, anode, inflection):
        """\
        Set the anode's form according to the given inflection pattern.

        Supports front, back and mid changes (front changes currently
        unsupported by the model, there must be a different model to do
        them).
        """
        # start from lemma
        form = anode.lemma
        # replace irregular
        if inflection.startswith('*'):
            form = inflection[1:]
        # if there are changes, perform them
        elif inflection != '':
            # find out the front, mid, back changes
            diffs = inflection.split(",")
            front = first(lambda x: x.startswith('<'), diffs)
            back = first(lambda x: x.startswith('>'), diffs)
            mid = first(lambda x: '-' in x, diffs)
            # perform the changes
            add_back = ''
            # chop off the things from the back
            if back is not None:
                chop, add_back = self.BACK_REGEX.match(back).groups()
                chop = int(chop)
                if chop != 0:
                    form = form[0:-chop]
            # change mid vowel
            if mid is not None:
                orig, changed = mid.split('-')
                if len(orig) > 0:
                    pos = form.lower().rfind(orig, 0, -1)
                else:
                    pos = len(form) - 1
                if pos >= -1:
                    form = form[0:pos] + changed + form[pos + len(orig):]
            # add things to beginning and end
            if front is not None:
                form = front[1:] + form
            form = form + add_back
        # set the resulting form to the anode
        anode.form = form
    def get_target_zone(self, gen_doc):
        """Find the first bundle in the given document that does not have the target
        zone (or create it), then create the target zone and return it.

        @rtype: Zone
        """
        bundle = first(lambda bundle: not bundle.has_zone(self.language, self.selector),
                       gen_doc.bundles) or gen_doc.create_bundle()
        zone = bundle.create_zone(self.language, self.selector)
        return zone
    def __inflect(self, anode, inflection):
        """\
        Set the anode's form according to the given inflection pattern.

        Supports front, back and mid changes (front changes currently
        unsupported by the model, there must be a different model to do
        them).
        """
        # start from lemma
        form = anode.lemma
        # replace irregular
        if inflection.startswith('*'):
            form = inflection[1:]
        # if there are changes, perform them
        elif inflection != '':
            # find out the front, mid, back changes
            diffs = inflection.split(",")
            front = first(lambda x: x.startswith('<'), diffs)
            back = first(lambda x: x.startswith('>'), diffs)
            mid = first(lambda x: '-' in x, diffs)
            # perform the changes
            add_back = ''
            # chop off the things from the back
            if back is not None:
                chop, add_back = self.BACK_REGEX.match(back).groups()
                chop = int(chop)
                if chop != 0:
                    form = form[0:-chop]
            # change mid vowel
            if mid is not None:
                orig, changed = mid.split('-')
                if len(orig) > 0:
                    pos = form.lower().rfind(orig, 0, -1)
                else:
                    pos = len(form) - 1
                if pos >= -1:
                    form = form[0:pos] + changed + form[pos + len(orig):]
            # add things to beginning and end
            if front is not None:
                form = front[1:] + form
            form = form + add_back
        # set the resulting form to the anode
        anode.form = form
 def find_eo1st_pos(self, clause_root, clause_1st):
     """\
     Find the last word before the Wackernagel position.
     """
     # leftmost node is the root -- typical for subordinating
     # conjunctions (leave out the multi-word ones)
     if (clause_root == clause_1st and
         not [c for c in clause_root.get_children() if c.afun == 'AuxC']):
         return clause_root
     # otherwise return one of the clause root's children
     num = clause_root.clause_number
     return first(lambda node: not self.should_ignore(node, num),
                  clause_root.get_children(ordered=True, add_self=True),
                  clause_root)
Beispiel #10
0
 def __get_job_state(self):
     """\
     Parse the qstat command and try to retrieve the current job
     state and the machine it is running on.
     """
     # get state of job assuming it is in the queue
     output = self.__try_command('qstat')
     # get the relevant line of the qstat output
     output = first(lambda line: re.search(self.jobid, line),
                    output.split("\n"))
     # job does not exist anymore
     if output is None:
         return self.FINISH, None
     # parse the correct line:
     fields = re.split(r'\s+', output)
     state, host = fields[4], fields[7]
     host = re.sub(r'.*@([^.]+)\..*', r'\1', host)
     return state, host
Beispiel #11
0
 def __get_job_state(self):
     """\
     Parse the qstat command and try to retrieve the current job
     state and the machine it is running on.
     """
     # get state of job assuming it is in the queue
     output = self.__try_command('qstat')
     # get the relevant line of the qstat output
     output = first(lambda line: re.search(self.jobid, line),
                    output.split("\n"))
     # job does not exist anymore
     if output is None:
         return self.FINISH, None
     # parse the correct line:
     fields = re.split(r'\s+', output)
     state, host = fields[4], fields[7]
     host = re.sub(r'.*@([^.]+)\..*', r'\1', host)
     return state, host
Beispiel #12
0
 def remove_dependency(self, dependency):
     """\
     Removes the given Job(s) from the dependencies list.
     """
     # single element removed
     if isinstance(dependency, (Job, basestring, int)):
         if isinstance(dependency, int):
             jobid = str(dependency)
         else:
             jobid = dependency
         rem = first(lambda d: d == jobid, self.__dependencies)
         if rem is not None:
             self.__dependencies.remove(rem)
         else:
             raise ValueError('Cannot find dependency!')
     elif isinstance(dependency, collections.Iterable):
         for dep_elem in dependency:
             self.remove_dependency(dep_elem)
     else:
         raise ValueError('Unknown dependency type!')
Beispiel #13
0
 def remove_dependency(self, dependency):
     """\
     Removes the given Job(s) from the dependencies list.
     """
     # single element removed
     if isinstance(dependency, (Job, basestring, int)):
         if isinstance(dependency, int):
             jobid = str(dependency)
         else:
             jobid = dependency
         rem = first(lambda d: d == jobid, self.__dependencies)
         if rem is not None:
             self.__dependencies.remove(rem)
         else:
             raise ValueError('Cannot find dependency!')
     elif isinstance(dependency, collections.Iterable):
         for dep_elem in dependency:
             self.remove_dependency(dep_elem)
     else:
         raise ValueError('Unknown dependency type!')