def __init__(self, form, msd=[], v_insts=[]):
     (self.form, self.msd) = (form.split('+'), msd)
     self.scount = 0
     r = ''
     #print self.form
     for f in self.form:
         #print (f)
         if f.isdigit():
             r += '(.+)'
         else:
             r += f
             self.scount += len(f)
     self.regex = r
     #print (r)
     #print '\n'
     self.cregex = re.compile(self.regex)
     # vars
     collect_vars = defaultdict(set)
     for vs in v_insts:
         #print vs
         for (i, v) in vs:
             collect_vars[i].add(v)
     self.v_regex = []
     for (_, ss) in collect_vars.iteritems():
         self.v_regex.append(
             re.compile(genregex.genregex(ss, pvalue=0.05).pyregex()))
예제 #2
0
def paradigms_to_foma(paradigms, grammarname, pval):
    """Converts iterable of paradigms to foma-script (as a string)."""
    parvars = {}
    rstring = u''
    defstring = u''
    substring = u''
    par_is_constrained = {}

    alphabet = paradigms_to_alphabet(paradigms)
    alphabet = {'"' + a + '"' for a in alphabet}
    alphstring = 'def Alph ' + u'|'.join(alphabet) + ';\n'

    for paradigm in paradigms:
        #      if paradigm.count < 3 and grammarname == 'Gunconstrained':
        #          continue
        par_is_constrained[paradigm.name] = False
        parstrings = []
        for formnumber, form in enumerate(paradigm.forms):
            tagstrings = map(
                lambda (feature, value): u'"' + feature + u'"' + u' = ' + u'"'
                + value + u'"', form.msd)
            parstring = u''
            for idx, (is_var, slot) in enumerate(paradigm.slots):
                if is_var:
                    parvarname = nospace(paradigm.name) + '=var' + str(idx)
                    if parvarname not in parvars:
                        r = genregex.genregex(slot, pvalue=pval, length=False)
                        parvars[parvarname] = True
                        if r.fomaregex() != '?+':
                            par_is_constrained[paradigm.name] = True
                        defstring += 'def ' + parvarname + ' ' + r.fomaregex(
                        ).replace('?', 'Alph') + ';\n'
                    parstring += ' [' + parvarname + '] '
                else:
                    thisslot = escape_fixed_string(slot[formnumber])
                    baseformslot = escape_fixed_string(slot[0])
                    parstring += u' [' + thisslot + u':' + baseformslot + u'] '
            parstring += u'0:["[" ' + u' " " '.join(tagstrings) + u' "]"]'
            parstrings.append(parstring)
        #if grammarname != 'Gcodnstrained' or par_is_constrained[paradigm.name]:
        rstring += u'def ' + nospace(
            paradigm.name) + u'|\n'.join(parstrings) + u';\n'

    #parnames = [nospace(paradigm.name) for paradigm in paradigms if ' ' not in paradigm.name]
    parnames = []
    for paradigm in paradigms:
        #if ' ' not in paradigm.name and (grammarname != 'Gconstrdained' or par_is_constrained[paradigm.name]):
        parnames.append(nospace(paradigm.name))

    rstring += u'def ' + grammarname + u' ' + u' | '.join(parnames) + u';'

    return alphstring + defstring + rstring
예제 #3
0
 def __init__(self, form, msd=[], v_insts=[]):
     (self.form,self.msd) = (form.split('+'), msd)
     self.scount = 0
     r = ''
     for f in self.form:
         if f.isdigit():
             r += '(.+)'
         else:
             r += f
             self.scount += len(f)
     self.regex = r
     self.cregex = re.compile(self.regex)
     # vars
     collect_vars = defaultdict(set)
     for vs in v_insts:
         for (i,v) in vs:
             collect_vars[i].add(v)
     self.v_regex = []
     for (_,ss) in collect_vars.iteritems():
         self.v_regex.append(re.compile(genregex.genregex(ss,pvalue=0.05).pyregex()))