def setUp(self):
     unittest.TestCase.setUp(self)
     self.myexpr=pgf.readExpr("AdjCN (PositA crucial_A) (UseN item_N)")
     self.myexprw=pgf.readExpr("AdjCN (PositA crucial_A) (UseN wildcard_1)")
     self.myexprq=pgf.readExpr("CompoundCN ? wildcard_3 (AdjCN (PositA wildcard_1) (UseN wildcard_2))")
     self.myexprs=pgf.readExpr('(PredVP (DetCN (DetQuant IndefArt NumSg) (PossNP (AdjCN (PositA complete_A) (UseN collapse_N)) (UseQuantPN DefArt (SymbPN (MkSymb "U"))))) (UseComp (CompNP (MassNP (UseN dollar_N)))))')
     self.extExpr=ExtendedExpr(self.myexpr,None)
     self.extExprW=ExtendedExpr(self.myexprw,None)
     self.extExprQ=ExtendedExpr(self.myexprq,None)
     self.extExprS=ExtendedExpr(self.myexprs,None)
     self.bilingualPhraseSet=BilingualPhraseSet()
     self.bilingualPhraseSet.add("NATO ||| la OTAN ||| 0-0 0-1")
     
     self.mwe1=ParallelMWE()
     self.mwe1.parse("( MassNP ( UseN safety_N ) ) | ( DetCN ( DetQuant wildcard_IGNORE wildcard_IGNORE ) ( UseN security_N ) )")
     
     self.mwe2=ParallelMWE()
     self.mwe2.parse("( PossNP ( UseN wildcard_1 ) ( MassNP ( AdjCN ( PositA wildcard_2 ) ( UseN politics_N ) ) ) ) | ( PossNP ( UseN wildcard_1 ) ( DetCN ( DetQuant wildcard_IGNORE wildcard_IGNORE ) ( AdjCN ( PositA wildcard_2 ) ( UseN policy_N ) ) ) )")
     
     self.bilphrase=BilingualExpr()
     self.bilphrase.parse("( MassNP ( AdjCN ( PositA wildcard_2 ) ( UseN politics_N ) ) )  | ( DetCN ( DetQuant wildcard_IGNORE wildcard_IGNORE ) ( AdjCN ( PositA wildcard_2 ) ( UseN policy_N ) ) )", ignoreFreq=True)
     
     synDict=dict()
     synDict["politics_N"]=set(["policy_N"])
     ParallelMWE.synonymDict=synDict
Exemple #2
0
def _condition_subtree(api_response):
    try:
        condition = api_response["weather"][0]["description"]
        function_name = condition.replace(" ","_")
        return pgf.readExpr(function_name)
    except KeyError:
        return pgf.readExpr("UnknownCondition")
 def testPrint(self):
     strrep=str(self.extExpr)
     myexpragain=pgf.readExpr(strrep)
     self.assertEqual(str(self.myexpr), str(myexpragain))
     
     strrep=str(self.extExprS)
     myexpragain=pgf.readExpr(strrep)
     self.assertEqual(str(self.myexprs), str(myexpragain))
 def parse(self, rawstr, ignoreFreq=False):
     parts = rawstr.split(" | ")
     offset = -1
     if not ignoreFreq:
         self.freq = int(parts[0])
         offset = 0
     self.slexpr = ExtendedExpr(pgf.readExpr(parts[1 + offset]), None)
     self.slexpr.compute_leaf_functions_recursively()
     self.tlexpr = ExtendedExpr(pgf.readExpr(parts[2 + offset]), None)
     self.tlexpr.compute_leaf_functions_recursively()
Exemple #5
0
def test_linearize_rus(rus, linearized, parsed, only):
    if only and "l" not in only:
        return
    e = pgf.readExpr(parsed)
    assert unicode(rus.linearize(e),
                   "utf-8").replace(" .",
                                    ".") == linearized.replace(" .", ".")
Exemple #6
0
def _temperature_subtree(api_response):
    tree = pgf.readExpr("UnknownNum")
    try:
        kelvin = api_response["main"]["temp"]
        tree = float_tree(kelvin-273.15)
    except KeyError:
        pass
    return tree
Exemple #7
0
def _float_subtree(api_response,keys):
    tree = pgf.readExpr("UnknownNum")
    try:
        value = lookup_value(api_response,keys)
        tree = float_tree(value)
    except KeyError:
        pass
    return tree
Exemple #8
0
 def parse_line(line):
   try:
     sentid, parsetime, parserepr = line.strip('\n').split('\t', 2);
   except ValueError:
     print("Line not in proper format: %s" %(line), file=stderr);
   parseprob, abstree = parserepr.split('\t') if parserepr.strip() \
       else (0, '');
   return ((int(sentid), float(parsetime), float(parseprob), \
       pgf.readExpr(abstree) if abstree else None));
Exemple #9
0
 def parse_line(line):
     try:
         sentid, parsetime, parserepr = line.strip('\n').split('\t', 2)
     except ValueError:
         print("Line not in proper format: %s" % (line), file=stderr)
     parseprob, abstree = parserepr.split('\t') if parserepr.strip() \
         else (0, '')
     return ((int(sentid), float(parsetime), float(parseprob), \
         pgf.readExpr(abstree) if abstree else None))
def regression_test(grammar, treelin_pairs, lang_code):
    failures = []
    conc_grammar = grammar.languages[grammar.abstractName + lang_code]
    for (tree, lin) in treelin_pairs:
        expr = pgf.readExpr(tree)
        genlin = conc_grammar.linearize(expr)
        if not genlin == lin:
            failures.append((tree, lin, genlin))
    return failures
Exemple #11
0
def country_texts_embedded(factsys, data):
    factsys.grammar.embed("G")
    import G

    fields = factsys.fieldnames.split()

    facts = []

    for tuple in data:

        countr = factsys.str2exp("Name", tuple[0])
        cap = factsys.str2exp('Name', tuple.capital)
        cont = factsys.str2exp('Name', tuple.continent).unpack()[1][0]  #CDNAME
        curr = factsys.str2exp('Name', tuple.currency)
        pop = pgf.readExpr(str(tuple.population))
        are = pgf.readExpr(str(tuple.area))

        doc = G.OneSentenceDoc(
            G.FactSentence(
                G.KindFact(
                    G.NameObject(countr),
                    G.ModifierKind(
                        G.PropertyKind(G.cdProperty(cont), G.country_Kind),
                        G.NumericKindModifier(G.IntNumeric(pop),
                                              G.inhabitant_Kind)))))
        doc = G.AddSentenceDoc(
            doc,
            G.FactSentence(
                G.AttributeFact(G.area_Attribute, G.PronObject(countr),
                                G.NumericValue(G.IntNumeric(are)))))
        doc = G.AddSentenceDoc(
            doc,
            G.ConjSentence(
                G.FactSentence(
                    G.AttributeFact(G.capital_Attribute, G.NameObject(countr),
                                    G.NameValue(cap))),
                G.FactSentence(
                    G.AttributeFact(G.currency_Attribute, G.PronObject(countr),
                                    G.NameValue(curr)))))
        facts.append(doc)
    return facts
Exemple #12
0
def _winddirection_subtree(api_response):
    try:
        degrees = api_response["wind"]["deg"]
    except KeyError:
        return pgf.readExpr("UnknownDir")
    if degrees < 22.5 or degrees > 337.5:
        return pgf.readExpr("North")
    if degrees < 67.5:
        return pgf.readExpr("NorthEast")
    if degrees < 112.5:
        return pgf.readExpr("East")
    if degrees < 157.5:
        return pgf.readExpr("SouthEast")
    if degrees < 202.5:
        return pgf.readExpr("South")
    if degrees < 247.5:
        return pgf.readExpr("SouthWest")
    if degrees < 292.5:
        return pgf.readExpr("West")
    if degrees < 337.5:
        return pgf.readExpr("NorthWest")
    return pgf.readExpr("North")
def main():
    gr = pgf.readPGF("Numeral.pgf")
    eng = list(gr.languages.values())[0]
    n = input()
    if n:
        ns = [int(n)]
    else:
        ns = [
            1, 2, 8, 10, 11, 20, 21, 40, 95, 100, 101, 234, 1000, 1001, 2021,
            630511
        ]
    for n in ns:
        print(n, eng.linearize(int2numeral_tree(pgf.readExpr(str(n)))))
Exemple #14
0
def listen():
    msg = request.args.get('msg', '')
    if msg == 'START':
        session['cur_state'] = STATE_IDLE
        return cgr.printName('Order')

    if session['cur_state'] == STATE_IDLE:
        org_order = cgr.parse(msg).next()[1]
        session['unpacked_order'] = unpack(org_order)
        session['cur_state'] = STATE_FILLING

    if session['cur_state'] == STATE_RECEIVING:
        parsed_msg = cgr.parse(msg, cat=pgf.Type(session['cur_type'])).next()[1]
        cur_t = traverse(session['unpacked_order'], session['cur_tid'][:-1])
        cur_t[1][session['cur_tid'][-1]] = unpack(parsed_msg)
        session['cur_state'] = STATE_FILLING

    if session['cur_state'] == STATE_FILLING:
        session['cur_tid'] = fill_missing(session['unpacked_order'])
        print(session['cur_tid'])
        if session['cur_tid'] is None:
            session['cur_state'] = STATE_DONE
        else:
            cur_t = traverse(session['unpacked_order'], session['cur_tid'][:-1])
            session['cur_state'] = STATE_RECEIVING
            session['cur_type'] = str(gr.functionType(cur_t[0]).unpack()[0][session['cur_tid'][-1]][2])
            print(session['cur_type'])
            if session['cur_type'] == 'ListItem':
                return cgr.printName(session['cur_type'])
            else:
                pre_str = cgr.linearize(pgf.readExpr(repack(cur_t))).replace('? ', '')
                return 'For ' + pre_str + '<br/>' + cgr.printName(session['cur_type'])

    if session['cur_state'] == STATE_DONE:
        confirm_order = ('confirm', session['unpacked_order'][1])
        confirm = cgr.linearize(pgf.readExpr(repack(confirm_order)))
        session['cur_state'] = STATE_IDLE
        return confirm
Exemple #15
0
def translateWord(grammar, language, tgtlanguages, word):
    possible_translations = translateWordsAsChunks(grammar, language, tgtlanguages, word);
    if len(possible_translations):
	return possible_translations;

    lowerword = word.lower();
    try:
	partialExprList = grammar.languages[language].parse(word, cat='Chunk');
	for expr in partialExprList:
	    return [(lang, gf_utils.gf_postprocessor( grammar.languages[lang].linearize(expr[1]) )) for lang in tgtlanguages];
    except pgf.ParseError:
	morphAnalysis = grammar.languages[language].lookupMorpho(word) + grammar.languages[language].lookupMorpho(lowerword);
	for morph in morphAnalysis:
	    countPositiveLanguages = filter(None, [grammar.languages[lang].hasLinearization(morph[0]) for lang in tgtlanguages]);
	    if len(countPositiveLanguages) > 0.5*len(tgtlanguages):
		return [(lang, gf_utils.gf_postprocessor( grammar.languages[lang].linearize( pgf.readExpr(morph[0]) ) )) for lang in tgtlanguages];
    return [(lang, word) for lang in tgtlanguages];
Exemple #16
0
def pgf_linearize(args):
    grammar = pgf.readPGF(args.pgfgrammar);
    outputPrinter = postprocessor;
    inputSet = [];
    for line in args.inputstream:
	try:
	    sentid, parsetime, parserepr = line.strip('\n').split('\t', 2);
	except ValueError:
	    print line.strip();
	parseprob, abstree = parserepr.split('\t') if parserepr.strip() else (0, '');
	inputSet.append( (int(sentid), float(parsetime), float(parseprob), pgf.readExpr(abstree) if abstree else None) );
    linearizer = grammar.languages[args.tgtlang].linearize;
    for sentid, _, _, abstree in inputSet:
	if abstree:
	    print >>args.outputstream, str(outputPrinter(linearizer(abstree)));
	else:
	    print >>args.outputstream, "";
    return;
Exemple #17
0
def pgf_linearize(args):
    grammar = pgf.readPGF(args.pgfgrammar);
    outputPrinter = postprocessor;
    inputSet = [];
    for line in args.inputstream:
	try:
	    sentid, parsetime, parserepr = line.strip('\n').split('\t', 2);
	except ValueError:
	    print line.strip();
	parseprob, abstree = parserepr.split('\t') if parserepr.strip() else (0, '');
	inputSet.append( (int(sentid), float(parsetime), float(parseprob), pgf.readExpr(abstree) if abstree else None) );
    linearizer = grammar.languages[args.tgtlang].linearize;
    for sentid, _, _, abstree in inputSet:
	if abstree:
	    print >>args.outputstream, str(outputPrinter(linearizer(abstree)));
	else:
	    print >>args.outputstream, "";
    return;
def generate_image(tree, grammar, filenamebase, outdirpath):

    imagepath = os.path.join(outdirpath, "images")
    try:
        os.makedirs(imagepath)
    except FileExistsError:
        pass

    expr = pgf.readExpr(tree)
    dotfilepath = os.path.join(imagepath, filenamebase + '.dot')
    pngfilename = filenamebase + '.png'
    pngfilepath = os.path.join(imagepath, pngfilename)
    dotfile = open(dotfilepath, 'w')
    dotfile.write(grammar.graphvizAbstractTree(expr))
    dotfile.close()

    subprocess.run(["dot", "-Tpng", dotfilepath, "-o", pngfilepath])

    return pngfilename
Exemple #19
0
def readJohnsonRerankerTrees(inputStream):
    endOfParse = False;
    while True:
	sentheader = inputStream.next();
	if sentheader == '':
	    break;
	parsescount, sentidx = map(int, sentheader.strip().split());
	parsesBlock = [];
	for i in xrange(parsescount):
	    parseprob = inputStream.next();
	    if parseprob.strip() == '':
		endOfParse = True;
		break;
	    parse = inputStream.next();
	    parsesBlock.append( (float(parseprob.strip()), pgf.readExpr(parse.strip())) );
	yield sentidx, parsesBlock;
	if not endOfParse:
	    _ = inputStream.next();
	endOfParse = False;
Exemple #20
0
def readJohnsonRerankerTrees(inputStream):
    endOfParse = False;
    while True:
	sentheader = inputStream.next();
	if sentheader == '':
	    break;
	parsescount, sentidx = map(int, sentheader.strip().split());
	parsesBlock = [];
	for i in xrange(parsescount):
	    parseprob = inputStream.next();
	    if parseprob.strip() == '':
		endOfParse = True;
		break;
	    parse = inputStream.next();
	    parsesBlock.append( (float(parseprob.strip()), pgf.readExpr(parse.strip())) );
	yield sentidx, parsesBlock;
	if not endOfParse:
	    _ = inputStream.next();
	endOfParse = False;
Exemple #21
0
def gen_translations(args, lang_source, lang_target, line):
    if args.type == 'gold':
        fields = line.split('\t')
        if fields[2] == '-1':
            yield fields[0],fields[1],fields[2],fields[3]
        else:
            tree = pgf.readExpr(fields[3])
            utt_target = lang_target.linearize(tree).decode('utf8')
            yield fields[0],utt_target,fields[2],fields[3]
    else:
        utt_source = line
        if args.tokenize:
            if args.tokenize == 'simple':
                utt_source = tokenize_simple(line)
            elif args.tokenize == 'nltk':
                utt_source = tokenize_nltk(line)
        if args.cat:
            for prob,tree in lang_source.parse(utt_source, n=args.n_best, cat=args.cat):
                utt_target = lang_target.linearize(tree).decode('utf8')
                yield utt_source,utt_target,prob,tree
        else:
            for prob,tree in lang_source.parse(utt_source, n=args.n_best):
                utt_target = lang_target.linearize(tree).decode('utf8')
                yield utt_source,utt_target,prob,tree
#!/usr/bin/env python
# coding=utf-8
# -*- encoding: utf-8 -*-

import sys,pgf,argparse
from lib.abstractLearningLib import ExtendedExpr



if __name__ == "__main__":
    
    parser = argparse.ArgumentParser(description='prints only fun name.')
    parser.add_argument('--offset',default='0')
    args = parser.parse_args(sys.argv[1:])
    
    offset=int(args.offset)
    
    for line in sys.stdin:
        
        parts = line.split("|")
        sl=parts[0+offset].strip()
        tl=parts[1+offset].strip()
        slrawexpr=pgf.readExpr(sl)
        tlrawexpr=pgf.readExpr(tl)
        slexpr=ExtendedExpr(slrawexpr,None)
        tlexpr=ExtendedExpr(tlrawexpr,None)
        print " | ".join( [ parts[i].strip() for i in range(offset)] + [slexpr.str_with_children_fun(),tlexpr.str_with_children_fun()])
        out = []
        for a in argx:
            out.extend(get_terminals(a))
        return out

def get_type(gr, fun):
    try:
        return gr.functionType(fun).cat
    except KeyError:
        return None

if __name__ == "__main__": 
    gr = pgf.readPGF('../data/translate-pgfs/TranslateEng.pgf')
    eng = gr.languages['TranslateEng']

    lin = lambda fun: eng.linearize(pgf.readExpr(fun))

    with open('../data/treebanks/rgl-api-trees.txt') as f:
        trees = [l.strip() for l in f]

    alts = []
    for tree in trees:
        exp = pgf.readExpr(tree)
        terms = get_terminals(exp)
        lins = [eng.linearize(pgf.readExpr(w)) for w in terms]
        alts.append({
            s: {
                x for x,_,_ in eng.lookupMorpho(l) 
                if get_type(gr, x) == get_type(gr, s)
                if lin(x) == lin(s)
            }
Exemple #24
0
 treebank = json.loads(treebank_str)
 
 import pgf
 gr = pgf.readPGF(args.grammar)
 conc = gr.languages[args.concrete]
 
 results = []
 times = []
 
 import time
 for o in treebank:
     tree = o["Abs"]
     gold = o["Afr"]
     
     try:
         e = pgf.readExpr(tree)
         start = time.clock()
         l = conc.linearize(e)
         end = time.clock()
         if unicode(l,'utf8') == gold:
             results.append((o,l,"s",end - start))
         else:
             results.append((o,l,"f",end - start))
     except pgf.PGFError:
         pass
 
 summary_str = print_summary(args.grammar, args.concrete, results)
 result_file = codecs.open(args.result, 'w', 'utf8')
 result_file.write(summary_str)
 
 for (o,l,r,t) in results:
Exemple #25
0
def string_year(s):
    return pgf.Expr('inYearDate', [pgf.readExpr(s[:4])])  # just the year part
 
 if DEBUG:
     print >> sys.stderr, "source trees:"
     for t in sourceTreesRaw:
         print >> sys.stderr, t
     print >> sys.stderr, "target trees:"
     for t in targetTreesRaw:
         print >> sys.stderr, t
 
 #ignore trees with non-ascii characters, which make the GF bindigs crash
 sourceExprs=[]
 for rawTree in sourceTreesRaw:
     try:
         ### ignore non ascii data ####
         rawTree.decode('ascii')
         expr=pgf.readExpr(rawTree)
         sourceExprs.append(expr)
     except (pgf.PGFError,UnicodeDecodeError):
         print >> sys.stderr, "Could not parse SL expr: "+rawTree
 
 targetExprs=[]
 for rawTree in targetTreesRaw:
     try:
         ### ignore non ascii data ####
         rawTree.decode('ascii')
         expr=pgf.readExpr(rawTree)
         targetExprs.append(expr)
     except (pgf.PGFError,UnicodeDecodeError):
         print >> sys.stderr, "Could not parse TL expr: "+rawTree
 
 #ignore trees which cannot be linearized, since we won't be able to align them
Exemple #27
0
    gr = read_gf(PORTABLE_GRAMMAR_FILE)

    print("Languages: {}".format(", ".join(gr.languages.keys())))

    eng = gr.languages["CrudEng"]
    fin = gr.languages["CrudFin"]
    rus = gr.languages["CrudRus"]

    PARSE_EXAMPLE = u"штука получилась ."
    print(u"Parse: {}".format(PARSE_EXAMPLE))
    parse_iter = rus.parse(PARSE_EXAMPLE.encode("utf-8"))
    expr = [k for k in parse_iter][0][1]

    expr_str = str(expr)
    e = pgf.readExpr(expr_str)

    print(u"Linearized: {}".format(unicode(eng.linearize(e), "utf-8")))

    PARSE_EXAMPLE_2 = u"штука получилась"
    print(u"Parse: {}".format(PARSE_EXAMPLE_2))
    try:
        parse_iter = rus.parse(PARSE_EXAMPLE_2.encode("utf-8"))
    except pgf.ParseError as x:
        print("expected ERROR: {}".format(x))

    print("Lexicons")
    for lang in eng, fin, rus:
        print("\n{}\n".format(lang.name))
        print(", ".join(entry[0] for entry in lang.fullFormLexicon()))
 def replace_subtree_with_MWE_ref(self, subtreeindex, mweid=0):
     self.children[subtreeindex]=ExtendedExpr(pgf.readExpr(ExtendedExpr.WILDCARD_SUBTREE_PREFIX+str(mweid)),None)
Exemple #29
0
import pgf
import sys
import sets
import readline
import locale

sys.stdout.write("loading...")
sys.stdout.flush()
gr = pgf.readPGF("../../../treebanks/PennTreebank/ParseEngAbs.pgf")
sys.stdout.write("\n")

source_lang = gr.languages["ParseEng"]
target_lang = gr.languages["ParseBul"]

we = pgf.readExpr("UttImpSg PPos (ImpVP (UseV try_V))")
print source_lang.linearize(we)

sys.stdout.write("start cat: " + gr.startCat + "\n\n")


class Completer():
    def __init__(self, lang):
        self.gr = lang

    def complete(self, prefix, state):
        if state == 0:
            line = readline.get_line_buffer()
            line = line[0:readline.get_begidx()]
            self.i = source_lang.complete(line, prefix=prefix)
            self.tokens = sets.Set()
Exemple #30
0
import pgf
import sys
import sets
import readline
import locale

sys.stdout.write("loading...")
sys.stdout.flush();
gr = pgf.readPGF("../../../treebanks/PennTreebank/ParseEngAbs.pgf")
sys.stdout.write("\n")

source_lang = gr.languages["ParseEng"]
target_lang = gr.languages["ParseBul"]

we = pgf.readExpr("UttImpSg PPos (ImpVP (UseV try_V))")
print source_lang.linearize(we)

sys.stdout.write("start cat: "+gr.startCat+"\n\n")

class Completer():
	def __init__(self, lang):
		self.gr = lang
		
	def complete(self, prefix, state):
		if state == 0:
			line = readline.get_line_buffer()
			line = line[0:readline.get_begidx()]
			self.i = source_lang.complete(line, prefix=prefix)
			self.tokens = sets.Set()

		if len(self.tokens) > 50:
Exemple #31
0
def round_int(s, r):
    e = len(s) - r
    p = 10**e
    i = (int(s) // p) * p
    return pgf.readExpr(str(i))
Exemple #32
0
print(">>> gr = pgf.readPGF('Facts.pgf')")
gr = pgf.readPGF('Facts.pgf')

print('>>> print(list(gr.languages.keys()))')
print(list(gr.languages.keys()))

print(">>> eng = gr.languages['FactsEng']")
eng = gr.languages['FactsEng']

print(">>> attr = pgf.Expr('area_Attribute',[])")
attr = pgf.Expr('area_Attribute', [])

print(">>> eng.linearize(attr)")
print(eng.linearize(attr))

print(">>> obj = pgf.readExpr('NameObject (StringName \"France\")')")
obj = pgf.readExpr('NameObject (StringName "France")')

print(">>> val = pgf.readExpr('123')")
val = pgf.readExpr('123')

print(">>> fact = pgf.Expr('AttributeFact',[attr,obj,val])")
fact = pgf.Expr('AttributeFact', [attr, obj, val])

print(">>> print(fact)")
print(fact)

print(">>> print(eng.linearize(fact))")
print(eng.linearize(fact))
Exemple #33
0
def string_expr(s):
    return pgf.readExpr(str('"' + s + '"'))
Exemple #34
0
def string_value(s):
    return pgf.Expr('StringValue', [pgf.readExpr(str('"' + s + '"'))])
Exemple #35
0
def mkInt(s):
    return (pgf.readExpr(str(s)))