def process_tnode(self, tnode): "Check if the a-node corresponding to the given t-node should be dropped, and do so where appropriate." # skip nodes to which this should not apply if not re.search(r'(:1|drop)$', tnode.formeme) or tnode.is_member: return # special case: drop "to" under other verbs than "být" or "znamenat" if tnode.t_lemma == 'ten' and not tnode.parent.t_lemma in ['být', 'znamenat']: self.drop_anode(tnode) return # skip everything except personal pronouns if tnode.t_lemma != '#PersPron': return # special case: "On byl muž, který" -> "Byl to muž, který .." if tnode.parent.t_lemma == 'být': # find the nominal predicate tpnom = first(lambda n: n.formeme.endswith(':1'), tnode.parent.get_children(following_only=True)) # if found, detect relative clause; if detected, proceed with the transformation if tpnom and first(lambda n: n.formeme == 'v:rc', tpnom.get_children()): anode = tnode.lex_anode anode.lemma = 'ten' anode.morphcat_gender = 'N' anode.morphcat_subpos = 'D' anode.morphcat_person = '-' anode.shift_after_node(anode.parent) return # otherwise just drop the personal pronoun self.drop_anode(tnode)
def process_zone(self, zone): """\ Find the first valid word in the sentence and capitalize it. """ aroot = zone.atree troot = zone.ttree # take the first non-technical root (assume other to be parsing errors) sent_roots = aroot.get_children(ordered=True) if sent_roots: sent_roots = sent_roots[0:1] # add all direct speech roots sent_roots.extend([ t.lex_anode for t in troot.get_descendants() if t.is_dsp_root and t.lex_anode ]) # capitalize the 1st words under the selected roots for sent_root in sent_roots: # find the first word word1 = first( lambda n: n.morphcat_pos != 'Z' and not re.match( self.OPEN_PUNCT, n.form or n.lemma or ''), sent_root.get_descendants(ordered=True, add_self=True)) # skip empty sentences if not word1 or not word1.form: continue # compensate wrong parses in direct speech: check if the node # either starts the sentence or follows punctuation word0 = word1.get_prev_node() if word0 and word0.morphcat_pos != 'Z' and \ not re.match(self.OPEN_PUNCT, word0.form or word0.lemma or ''): continue # make it uppercase word1.form = word1.form[0].upper() + word1.form[1:]
def process_zone(self, zone): """\ Find the first valid word in the sentence and capitalize it. """ aroot = zone.atree troot = zone.ttree # take the first non-technical root (assume other to be parsing errors) sent_roots = aroot.get_children(ordered=True) if sent_roots: sent_roots = sent_roots[0:1] # add all direct speech roots sent_roots.extend([t.lex_anode for t in troot.get_descendants() if t.is_dsp_root and t.lex_anode]) # capitalize the 1st words under the selected roots for sent_root in sent_roots: # find the first word word1 = first(lambda n: n.morphcat_pos != 'Z' and not re.match(self.OPEN_PUNCT, n.form or n.lemma or ''), sent_root.get_descendants(ordered=True, add_self=True)) # skip empty sentences if not word1 or not word1.form: continue # compensate wrong parses in direct speech: check if the node # either starts the sentence or follows punctuation word0 = word1.get_prev_node() if word0 and word0.morphcat_pos != 'Z' and \ not re.match(self.OPEN_PUNCT, word0.form or word0.lemma or ''): continue # make it uppercase word1.form = word1.form[0].upper() + word1.form[1:]
def should_agree(self, tnode): "Find finite verbs, with/without a subject." # avoid everything except finite verbs if not re.match(r'v.+(fin|rc)$', tnode.formeme): return False anode = tnode.lex_anode asubj = first(lambda achild: achild.afun == 'Sb', anode.get_echildren()) return (anode, asubj)
def __inflect(self, anode, inflection): """\ Set the anode's form according to the given inflection pattern. Supports front, back and mid changes (front changes currently unsupported by the model, there must be a different model to do them). """ # start from lemma form = anode.lemma # replace irregular if inflection.startswith('*'): form = inflection[1:] # if there are changes, perform them elif inflection != '': # find out the front, mid, back changes diffs = inflection.split(",") front = first(lambda x: x.startswith('<'), diffs) back = first(lambda x: x.startswith('>'), diffs) mid = first(lambda x: '-' in x, diffs) # perform the changes add_back = '' # chop off the things from the back if back is not None: chop, add_back = self.BACK_REGEX.match(back).groups() chop = int(chop) if chop != 0: form = form[0:-chop] # change mid vowel if mid is not None: orig, changed = mid.split('-') if len(orig) > 0: pos = form.lower().rfind(orig, 0, -1) else: pos = len(form) - 1 if pos >= -1: form = form[0:pos] + changed + form[pos + len(orig):] # add things to beginning and end if front is not None: form = front[1:] + form form = form + add_back # set the resulting form to the anode anode.form = form
def get_target_zone(self, gen_doc): """Find the first bundle in the given document that does not have the target zone (or create it), then create the target zone and return it. @rtype: Zone """ bundle = first(lambda bundle: not bundle.has_zone(self.language, self.selector), gen_doc.bundles) or gen_doc.create_bundle() zone = bundle.create_zone(self.language, self.selector) return zone
def find_eo1st_pos(self, clause_root, clause_1st): """\ Find the last word before the Wackernagel position. """ # leftmost node is the root -- typical for subordinating # conjunctions (leave out the multi-word ones) if (clause_root == clause_1st and not [c for c in clause_root.get_children() if c.afun == 'AuxC']): return clause_root # otherwise return one of the clause root's children num = clause_root.clause_number return first(lambda node: not self.should_ignore(node, num), clause_root.get_children(ordered=True, add_self=True), clause_root)
def find_eo1st_pos(self, clause_root, clause_1st): """\ Find the last word before the Wackernagel position. """ # leftmost node is the root -- typical for subordinating # conjunctions (leave out the multi-word ones) if (clause_root == clause_1st and not [ c for c in clause_root.get_children() if c.afun == 'AuxC' ]): return clause_root # otherwise return one of the clause root's children num = clause_root.clause_number return first(lambda node: not self.should_ignore(node, num), clause_root.get_children(ordered=True, add_self=True), clause_root)
def __get_job_state(self): """\ Parse the qstat command and try to retrieve the current job state and the machine it is running on. """ # get state of job assuming it is in the queue output = self.__try_command('qstat') # get the relevant line of the qstat output output = first(lambda line: re.search(self.jobid, line), output.split("\n")) # job does not exist anymore if output is None: return self.FINISH, None # parse the correct line: fields = re.split(r'\s+', output) state, host = fields[4], fields[7] host = re.sub(r'.*@([^.]+)\..*', r'\1', host) return state, host
def remove_dependency(self, dependency): """\ Removes the given Job(s) from the dependencies list. """ # single element removed if isinstance(dependency, (Job, basestring, int)): if isinstance(dependency, int): jobid = str(dependency) else: jobid = dependency rem = first(lambda d: d == jobid, self.__dependencies) if rem is not None: self.__dependencies.remove(rem) else: raise ValueError('Cannot find dependency!') elif isinstance(dependency, collections.Iterable): for dep_elem in dependency: self.remove_dependency(dep_elem) else: raise ValueError('Unknown dependency type!')