예제 #1
0
def makeVerb(topic, parents, num, w2v, jux=False):
    key = topic + "".join(parents)
    if key in cache:
        final = cache[key]
    else:
        choices = set()
        stripped_topic = h.strip_tag(topic)
        for p in parents:
            for r in ['trg']:  #bga is terrible
                addAll(
                    choices,
                    h.pos([
                        x[0] for x in dm.query(dm.related(r, p),
                                               dm.topics(stripped_topic))
                    ], 'v'))
            for r in ['CapableOf', 'UsedFor', 'Desires']:
                for phrase in [x[1] for x in cn.getOutgoing(p, r)]:
                    addAll(choices, h.pos(phrase.split(), 'v'))
        final = list(choices)

        if jux:
            temp = set()
            for w in final:
                addAll(
                    temp,
                    h.pos([
                        x[0] for x in dm.query(dm.related('ant', w),
                                               dm.topics(stripped_topic))
                    ], 'v'))
                for phrase in [x[1] for x in cn.getOutgoing(w, 'Antonym')]:
                    addAll(choices, h.pos(phrase.split(), 'v'))

            final = list(temp)

        #word2vec sort
        parents = [x + '_NN' for x in parents]
        relations = parents + [topic]

        #word2vec threshold

        final = [pen.conjugate(x) + '_VB' for x in final]
        final = h.w2vWeightsListNew(final, relations, w2v)
        if len(final) > 20:
            final = final[:-len(final) / 5]

        cache[key] = final

    if len(final) < 1:
        print "VERB FINAL IS EMPTY:", topic, parents, num, list(choices)
    #just in case
    if len(final) <= num:
        return [x[0] for x in final]
    return [pickOne(final) for x in range(num)]
예제 #2
0
def doit(topic, parent, pos, w2v=None):
    choices = {}
    for r in dmrs:
        addAllDict(
            choices,
            h.pos([
                x[0] for x in dm.query(dm.related(r, parent), dm.topics(topic))
            ], pos), 'dm:' + r)
    for r in cnrs:
        for phrase in [x[1] for x in cn.getOutgoing(parent, r)]:
            addAllDict(choices, h.pos(phrase.split(), pos), 'cn:' + r)
    #add w2v "custom relation" thing
    if w2v is not None:
        addAllDict(choices, h.pos(h.relation(parent, custom_rels, w2v), pos),
                   'custom')

    print len(choices.keys())

    scores = {}
    for k in choices.keys():
        s = ask(topic, parent, k)
        s -= 5  #(-5 to 5 scale)
        choices[k]['score'] += s
        for src in choices[k]['sources']:
            addScore(scores, src, s)
    print "Best words", sorted(choices.keys(),
                               key=lambda x: choices[x]['score'],
                               reverse=True)
    print "Best sources", scores
예제 #3
0
def genrec(node, parent, prev, force, w2v, fillin, w2vmax, w2vmin, verbgen):
    i = node['index']
    if not force and fillin[i]:
        word = fillin[i]
        if parent:
            force = True  #only force regen if its not the root (i.e. it has a parent)
    else:
        if parent['replace']:
            parent = parent['replace']
        nodep = node['pos']
        startTag = '_' + parent['pos']
        endTag = '_' + nodep
        #maybe just have a set list to draw from for some restricted POS like IN, etc?
        cacheK = (prev, parent['word'], node['word'])
        if parent['dep'] == 'root' and cacheK in choiceCache:
            choices = choiceCache[cacheK]
        else:

            choices = w2vChoices(prev, parent['word'], startTag, node['word'],
                                 endTag, w2v, w2vmax, w2vmin)
            if parent[
                    'dep'] == 'root':  #only cache choices from root (more likely to be used, caching all is too much data for too little overlap)
                choiceCache[cacheK] = choices

        cacheK = (parent['word'], node['word'])
        if cacheK not in relsCache:
            relsCache[cacheK] = cn.getRels(parent['word'], node['word'])
        cnRels = relsCache[cacheK]
        for rel in cnRels:
            choices += [cn.stripPre(t[0]) for t in cn.getOutgoing(prev, rel)]

        final = []
        for c in choices:
            if c == node['word']:  #try to find a different word
                continue
            p = h.getPOS(c)
            if p == nodep:
                final.append(c)
            elif p in nodep or nodep in p or ('VB' in p and 'VB' in nodep):
                newc = h.tryPOS(c, p, nodep)
                if newc:
                    final.append(newc)

        if not final:
            word = wb.get(
                nodep)  #grab from wordbag insteadof using node['word']
        else:
            if verbgen:
                print parent['word'], ":", node[
                    'word'], "::", prev, ":\n", final
                print
            word = random.choice(final)  #Can this be smarter?
        if not word:
            word = node['word']

        fillin[i] = word
    if len(node['children']):
        for child in node['children']:
            genrec(child, node, word, force, w2v, fillin, w2vmax, w2vmin,
                   verbgen)
예제 #4
0
def get_words(topic, parents, cn_relations, w2v_relations, w2v, isIncoming=True):
	res = []
	stripped_topic = h.strip_tag(topic)
	for p in parents:
		stripped_parent = h.strip_tag(p)
		for rel in cn_relations:
			if isIncoming:
				res += (x[1] for x in cn.getIncoming(stripped_parent, rel))
			else:
				res += (x[1] for x in cn.getOutgoing(stripped_parent, rel))

		# TODO: find good datamuse relationships for outgoing edges.
		if isIncoming:
			res += (x[0] for x in dm.query(dm.related('trg',stripped_parent),dm.topics(stripped_topic), dm.metadata('p'))
					 if ('tags' in x[1] and x[1]['tags'][0] == 'n')) # get only nouns

		w2v_words = h.get_nouns_from_verb(stripped_parent, w2v_relations, w2v)
		res += w2v_words

	#res = filterNoun(res)
	res = oldFilterNoun(res)
	removeMatch(res, topic, parents)
	res = [x+'_NN' for x in res] # should hopefully all be nouns at this point...
	return res
예제 #5
0
def comesBefore(word):
    return cn.getIncoming(word, "Causes") + cn.getIncoming(
        word, "HasSubevent") + cn.getOutgoing(word, "HasPrerequisite")