def makeVerb(topic, parents, num, w2v, jux=False): key = topic + "".join(parents) if key in cache: final = cache[key] else: choices = set() stripped_topic = h.strip_tag(topic) for p in parents: for r in ['trg']: #bga is terrible addAll( choices, h.pos([ x[0] for x in dm.query(dm.related(r, p), dm.topics(stripped_topic)) ], 'v')) for r in ['CapableOf', 'UsedFor', 'Desires']: for phrase in [x[1] for x in cn.getOutgoing(p, r)]: addAll(choices, h.pos(phrase.split(), 'v')) final = list(choices) if jux: temp = set() for w in final: addAll( temp, h.pos([ x[0] for x in dm.query(dm.related('ant', w), dm.topics(stripped_topic)) ], 'v')) for phrase in [x[1] for x in cn.getOutgoing(w, 'Antonym')]: addAll(choices, h.pos(phrase.split(), 'v')) final = list(temp) #word2vec sort parents = [x + '_NN' for x in parents] relations = parents + [topic] #word2vec threshold final = [pen.conjugate(x) + '_VB' for x in final] final = h.w2vWeightsListNew(final, relations, w2v) if len(final) > 20: final = final[:-len(final) / 5] cache[key] = final if len(final) < 1: print "VERB FINAL IS EMPTY:", topic, parents, num, list(choices) #just in case if len(final) <= num: return [x[0] for x in final] return [pickOne(final) for x in range(num)]
def doit(topic, parent, pos, w2v=None): choices = {} for r in dmrs: addAllDict( choices, h.pos([ x[0] for x in dm.query(dm.related(r, parent), dm.topics(topic)) ], pos), 'dm:' + r) for r in cnrs: for phrase in [x[1] for x in cn.getOutgoing(parent, r)]: addAllDict(choices, h.pos(phrase.split(), pos), 'cn:' + r) #add w2v "custom relation" thing if w2v is not None: addAllDict(choices, h.pos(h.relation(parent, custom_rels, w2v), pos), 'custom') print len(choices.keys()) scores = {} for k in choices.keys(): s = ask(topic, parent, k) s -= 5 #(-5 to 5 scale) choices[k]['score'] += s for src in choices[k]['sources']: addScore(scores, src, s) print "Best words", sorted(choices.keys(), key=lambda x: choices[x]['score'], reverse=True) print "Best sources", scores
def genrec(node, parent, prev, force, w2v, fillin, w2vmax, w2vmin, verbgen): i = node['index'] if not force and fillin[i]: word = fillin[i] if parent: force = True #only force regen if its not the root (i.e. it has a parent) else: if parent['replace']: parent = parent['replace'] nodep = node['pos'] startTag = '_' + parent['pos'] endTag = '_' + nodep #maybe just have a set list to draw from for some restricted POS like IN, etc? cacheK = (prev, parent['word'], node['word']) if parent['dep'] == 'root' and cacheK in choiceCache: choices = choiceCache[cacheK] else: choices = w2vChoices(prev, parent['word'], startTag, node['word'], endTag, w2v, w2vmax, w2vmin) if parent[ 'dep'] == 'root': #only cache choices from root (more likely to be used, caching all is too much data for too little overlap) choiceCache[cacheK] = choices cacheK = (parent['word'], node['word']) if cacheK not in relsCache: relsCache[cacheK] = cn.getRels(parent['word'], node['word']) cnRels = relsCache[cacheK] for rel in cnRels: choices += [cn.stripPre(t[0]) for t in cn.getOutgoing(prev, rel)] final = [] for c in choices: if c == node['word']: #try to find a different word continue p = h.getPOS(c) if p == nodep: final.append(c) elif p in nodep or nodep in p or ('VB' in p and 'VB' in nodep): newc = h.tryPOS(c, p, nodep) if newc: final.append(newc) if not final: word = wb.get( nodep) #grab from wordbag insteadof using node['word'] else: if verbgen: print parent['word'], ":", node[ 'word'], "::", prev, ":\n", final print word = random.choice(final) #Can this be smarter? if not word: word = node['word'] fillin[i] = word if len(node['children']): for child in node['children']: genrec(child, node, word, force, w2v, fillin, w2vmax, w2vmin, verbgen)
def get_words(topic, parents, cn_relations, w2v_relations, w2v, isIncoming=True): res = [] stripped_topic = h.strip_tag(topic) for p in parents: stripped_parent = h.strip_tag(p) for rel in cn_relations: if isIncoming: res += (x[1] for x in cn.getIncoming(stripped_parent, rel)) else: res += (x[1] for x in cn.getOutgoing(stripped_parent, rel)) # TODO: find good datamuse relationships for outgoing edges. if isIncoming: res += (x[0] for x in dm.query(dm.related('trg',stripped_parent),dm.topics(stripped_topic), dm.metadata('p')) if ('tags' in x[1] and x[1]['tags'][0] == 'n')) # get only nouns w2v_words = h.get_nouns_from_verb(stripped_parent, w2v_relations, w2v) res += w2v_words #res = filterNoun(res) res = oldFilterNoun(res) removeMatch(res, topic, parents) res = [x+'_NN' for x in res] # should hopefully all be nouns at this point... return res
def comesBefore(word): return cn.getIncoming(word, "Causes") + cn.getIncoming( word, "HasSubevent") + cn.getOutgoing(word, "HasPrerequisite")