def get_sentence_posteriors(sentence, iterations=1, extra_meaning=None): meaning_probs = {} # parse sentence with charniak and apply surgeries print 'parsing ...' modparse = get_modparse(sentence) t = ParentedTree.parse(modparse) print '\n%s\n' % t.pprint() num_ancestors = count_lmk_phrases(t) - 1 for _ in xrange(iterations): (lmk, _, _), (rel, _, _) = get_meaning(num_ancestors=num_ancestors) meaning = m2s(lmk,rel) if meaning not in meaning_probs: ps = get_tree_probs(t, lmk, rel)[0] # print "Tree probs: ", zip(ps,rls) meaning_probs[meaning] = np.prod(ps) print '.' if extra_meaning: meaning = m2s(*extra_meaning) if meaning not in meaning_probs: ps = get_tree_probs(t, lmk, rel)[0] # print "Tree prob: ", zip(ps,rls) meaning_probs[meaning] = np.prod(ps) print '.' summ = sum(meaning_probs.values()) for key in meaning_probs: meaning_probs[key] /= summ return meaning_probs.items()
def get_modparse(sentence): """returns the modified parse tree for a sentence""" sp_db = SentenceParse.get_sentence_parse(sentence) try: res = sp_db.all()[0] parsetree = res.original_parse modparsetree = res.modified_parse except: print "parse.py: 103: " + sentence parses = parse_sentences([sentence]) if len(parses) == 0: raise ParseError(printcolors.WARNING + ('ParseError: a sentence was empty')) modparses = modify_parses(parses) for i,chunk in enumerate(modparses[:]): for j,modparse in enumerate(chunk): if 'LANDMARK-PHRASE' in modparse: modparses[i] = modparse parses[i] = parses[i][j] break if isinstance(modparses[i],list): modparses[i] = modparses[i][0] parses[i] = parses[i][0] parsetree = parses[0] modparsetree = modparses[0] try: SentenceParse.add_sentence_parse(sentence, parsetree, modparsetree) except Exception as e: print e if count_lmk_phrases(ParentedTree.parse(modparsetree)) < 1: raise ParseError(printcolors.WARNING + ('ParseError: Parse contained no Landmark phrase.\nSentence: %s\nParse: %s\nModparse: %s' % (sentence,parsetree,modparsetree))) return parsetree, modparsetree
def get_all_sentence_posteriors(sentence, meanings, golden=False, printing=True): print 'parsing ...' _, modparse = get_modparse(sentence) t = ParentedTree.parse(modparse) print '\n%s\n' % t.pprint() num_ancestors = count_lmk_phrases(t) - 1 # logger(len(meanings)) lmks, rels = zip(*meanings) lmks = set(lmks) rels = set(rels) # logger('num things ' + str(len(lmks))+' '+str(len(rels))) syms = ['\\', '|', '/', '-'] sys.stdout.write('processing...\\') sys.stdout.flush() posteriors = {} for i,lmk in enumerate(lmks): if lmk.get_ancestor_count() != num_ancestors: p = 0 else: ps = get_tree_probs(t[1], lmk, golden=golden, printing=printing)[0] p = np.prod(ps) posteriors[lmk] = p sys.stdout.write("\b%s" % syms[i % len(syms)]) sys.stdout.flush() for i,rel in enumerate(rels): ps = get_tree_probs(t[0], rel=rel, golden=golden, printing=printing)[0] posteriors[rel] = np.prod(ps) sys.stdout.write("\b%s" % syms[i % len(syms)]) sys.stdout.flush() for j in range(50): sys.stdout.write("\b.%s" % syms[(i+j) % len(syms)]) sys.stdout.flush() print # for meaning in meanings: # lmk,rel = meaning # if lmk.get_ancestor_count() != num_ancestors: # p = 0 # else: # ps = get_tree_probs(t, lmk, rel, printing=False)[0] # p = np.prod(ps) # posteriors.append(p) # print p, lmk, lmk.ori_relations, rel, (rel.distance, rel.measurement.best_degree_class, rel.measurement.best_distance_class ) if hasattr(rel,'measurement') else 'No measurement' return posteriors
def get_all_sentence_posteriors(sentence, meanings): print 'parsing ...' modparse = get_modparse(sentence) t = ParentedTree.parse(modparse) print '\n%s\n' % t.pprint() num_ancestors = count_lmk_phrases(t) - 1 posteriors = [] for meaning in meanings: lmk,rel = meaning if lmk.get_ancestor_count() != num_ancestors: p = 0 else: ps = get_tree_probs(t, lmk, rel)[0] p = np.prod(ps) posteriors.append(p) return posteriors
def get_sentence_posteriors(sentence, iterations=1): probs = [] meanings = [] # parse sentence with charniak and apply surgeries print 'parsing ...' modparse = get_modparse(sentence) t = ParentedTree.parse(modparse) print '\n%s\n' % t.pprint() num_ancestors = count_lmk_phrases(t) - 1 for _ in xrange(iterations): meaning = get_meaning(num_ancestors=num_ancestors) lmk, rel = meaning probs.append(get_tree_prob(t, *meaning)) meanings.append(m2s(lmk,rel)) print '.' probs = np.array(probs) / sum(probs) return uniquify_distribution(meanings, probs)
for i, row in enumerate(reader, start=1): print "sentence", i # unpack row xloc, yloc, sentence, parse, modparse = row # convert variables to the right types xloc = float(xloc) yloc = float(yloc) loc = (xloc, yloc) parse = ParentedTree.parse(parse) modparse = ParentedTree.parse(modparse) # how many ancestors should the sampled landmark have? num_ancestors = count_lmk_phrases(modparse) - 1 # sample `args.iterations` times for each sentence for _ in xrange(args.iterations): lmk, rel = get_meaning(loc, num_ancestors) if args.verbose: print "utterance:", repr(sentence) print "location: %s" % repr(loc) print "landmark: %s (%s)" % (lmk, lmk_id(lmk)) print "relation: %s" % rel_type(rel) print "parse:" print parse.pprint() print "modparse:" print modparse.pprint() print "-" * 70
for i,row in enumerate(reader, start=1): print 'sentence', i # unpack row xloc, yloc, sentence, parse, modparse = row unique_sentences[sentence] = (parse, modparse) # convert variables to the right types xloc = float(xloc) yloc = float(yloc) loc = (xloc, yloc) parse = ParentedTree.parse(parse) modparse = ParentedTree.parse(modparse) # how many ancestors should the sampled landmark have? num_ancestors = count_lmk_phrases(modparse) - 1 if num_ancestors == -1: print 'Failed to parse %d [%s] [%s] [%s]' % (i, sentence, parse, modparse) continue # sample `args.iterations` times for each sentence for _ in xrange(args.iterations): lmk, rel = get_meaning(loc, num_ancestors) lmk, _, _ = lmk rel, _, _ = rel assert(not isinstance(lmk, tuple)) assert(not isinstance(rel, tuple)) if args.verbose: