Example #1
0
def generate_noun_metaphors(sentence, parse, place):
    wordnet = wn("wordnet.db")
    stop_words = get_stop_words()


    words = sentence.split(" ")
    for i, word in enumerate(words):
        words[i] = re.sub(r'\W+', '', word).lower()

    noun = words[place[0]] # word to metaphorize
    replace = words[place[1]] # word to replace

    metmap = Mapping()

    context = remove_stopwords(words, stop_words)

    # Find metaphors ##########
    possible = metmap.map(noun)
    have_overlap = {}
    overlap = {}
    for sid in possible:
        text = wordnet.get_text(sid)
        text = remove_stopwords(text, stop_words)
        sim = jaccard(text, context)
        #print sim
        if sim > 0.0:
            have_overlap[sid] = text
            overlap[sid] = sim

    # Limit number of synsets by overlap
    MAX = 5
    keys = []
    overlap = sorted(overlap.iteritems(), key=operator.itemgetter(1))
    overlap = overlap[-1*MAX:]
    for pair in overlap:
        keys.append(pair[0])

    max_similarity = -1.0
    max_sid = None
    for sid in keys:
        text = have_overlap[sid]

        # TODO - CAN USE JACCARD OR SAP ------------------------------------------------^^^^^^^
        #sim = SAP_distance(text, context, wordnet)
        sim = jaccard(text, context)

        if sim >  max_similarity:
            max_sid = sid
            max_similarity = sim

    if max_sid is None:
        return False, sentence

    lemma = wordnet.get_lemma(max_sid)

    #metaphor = wordnet.synset_info(max_sid)
    #print "Found mapping from '%s' to: %s" % (word, metaphor)

    return True, sentence.replace(replace, lemma)
Example #2
0
def generate_noun_metaphors(sentence, parse, place):
    wordnet = wn("wordnet.db")
    stop_words = get_stop_words()

    words = sentence.split(" ")
    for i, word in enumerate(words):
        words[i] = re.sub(r'\W+', '', word).lower()

    noun = words[place[0]]  # word to metaphorize
    replace = words[place[1]]  # word to replace

    metmap = Mapping()

    context = remove_stopwords(words, stop_words)

    # Find metaphors ##########
    possible = metmap.map(noun)
    have_overlap = {}
    overlap = {}
    for sid in possible:
        text = wordnet.get_text(sid)
        text = remove_stopwords(text, stop_words)
        sim = jaccard(text, context)
        #print sim
        if sim > 0.0:
            have_overlap[sid] = text
            overlap[sid] = sim

    # Limit number of synsets by overlap
    MAX = 5
    keys = []
    overlap = sorted(overlap.iteritems(), key=operator.itemgetter(1))
    overlap = overlap[-1 * MAX:]
    for pair in overlap:
        keys.append(pair[0])

    max_similarity = -1.0
    max_sid = None
    for sid in keys:
        text = have_overlap[sid]

        # TODO - CAN USE JACCARD OR SAP ------------------------------------------------^^^^^^^
        #sim = SAP_distance(text, context, wordnet)
        sim = jaccard(text, context)

        if sim > max_similarity:
            max_sid = sid
            max_similarity = sim

    if max_sid is None:
        return False, sentence

    lemma = wordnet.get_lemma(max_sid)

    #metaphor = wordnet.synset_info(max_sid)
    #print "Found mapping from '%s' to: %s" % (word, metaphor)

    return True, sentence.replace(replace, lemma)
Example #3
0
def adj_test():
    w = wn("wordnet.db")
    adj = raw_input()
    wid = w.get_word_ids(adj)
    sids = w.get_synset_ids(wid)
    for sid in sids:
        print w.synset_info(sid)

        rel = w.get_related_adjs(sid)
        for r in rel:
            print '\t'+w.synset_info(r)
Example #4
0
def adj_test():
    w = wn("wordnet.db")
    adj = raw_input()
    wid = w.get_word_ids(adj)
    sids = w.get_synset_ids(wid)
    for sid in sids:
        print w.synset_info(sid)

        rel = w.get_related_adjs(sid)
        for r in rel:
            print '\t' + w.synset_info(r)
Example #5
0
from wn import *
import re

w = wn("wordnet.db")

out = open("input.txt", 'w')

dets = ["the", "an", "a"]

all_sids = w.get_all_synsets()

for sid in all_sids:

    tc = w.get_tagcount(sid)
    if tc > 5:

        gloss = w.get_gloss(sid).lower().strip()
        lemma = w.get_lemma(sid).lower().strip()

        words = gloss.split(" ")
        first = words[0]

        if first not in dets:
            continue

        sentence = "A %s is %s.\n" % (lemma, gloss)

        out.write(sentence)
Example #6
0
from wn import *

wn = wn('wordnet.db')

while True:

    lemma = str(raw_input())

    wid = wn.get_word_ids(lemma)
    sids = wn.get_synset_ids(wid)

    for sid in sids:
        info = wn.synset_more_info(sid)
        print info

    print
Example #7
0
from wn import *

wn = wn('wordnet.db')


while True:

    lemma = str(raw_input())

    wid = wn.get_word_ids(lemma)
    sids = wn.get_synset_ids(wid)

    for sid in sids:
        info = wn.synset_more_info(sid)
        print info

    print
Example #8
0
        ]  #lematizare in engleza
        print("Fraza lemmatizata engleza", lemmatized_sentence_en
              )  #nu am reusit sa il fac sa mearga si in romana

        stemmer_en = snowballstemmer.stemmer('english')
        stemmer_sentence_en = stemmer_en.stemWords(lemmatized_sentence_en)
        print("Fraza dupa stemmer in en ", stemmer_sentence_en)

else:  #teste:
    #posibila alternativa lematizare in romana : https://github.com/dumitrescustefan/RoWordNet
    wn = rwn.RoWordNet()
    cuvant_initial = 'carte'
    #stemmer_ro = snowballstemmer.stemmer('romanian');
    # stemmer_sentence_ro = stemmer_ro.stemWords([cuvant_initial])
    # print(stemmer_sentence_ro)
    # synset_ids = wn.synsets(literal=stemmer_sentence_ro[0])
    synset_ids = wn.synsets(literal=cuvant_initial)
    if len(synset_ids) >= 1:
        for synset_id in synset_ids:
            print("Posibila lematizare pt ", cuvant_initial, ": literals=",
                  wn(synset_id).literals, " tip=",
                  wn(synset_id).pos)
    else:
        print("NU are lematizare in acest modul: ", cuvant_initial)

    #wn.download('ronwn')
    #w = wn.words('arbusti')[0]
    #print(w.lemma())
    #nltk.download()
    #print("NLTK wordnet languages:",  sorted(wn_nltk_test.langs()))
Example #9
0
from wn import *
import re

w = wn("wordnet.db")

out = open("input.txt", 'w')


dets = ["the", "an", "a"]

all_sids = w.get_all_synsets()

for sid in all_sids:

    tc = w.get_tagcount(sid)
    if tc > 5:




        gloss = w.get_gloss(sid).lower().strip()
        lemma = w.get_lemma(sid).lower().strip()

        words = gloss.split(" ")
        first = words[0]

        if first not in dets:
            continue

        sentence = "A %s is %s.\n" % (lemma, gloss)
    def __init__(self):

        self.wn = wn("wordnet.db")

        nounToNoun = {}
        nounToAdj = {}

        nounToAdj = {
            105611302: [300708498],  # mind is brittle
            107480068: [302091020],  # emotion is blinding->concealing?
            107541053: [300269989],  # hope is light
            107511733: [300269989],  # (specific) hope is light
            104846770: [300417413, 302314584],  # morality is clean
            104849241: [400096333, 300269989, 300393105],  # good is up, white
            105144079: [400095320, 300273082, 300392812],  # bad is down, black
        }

        nounToNoun = {
            107480068: [
                302261386,
                111458624,
                105194578,
                111466043,
                100027167,
                107309781,
                201206218,
                114395018,
            ],  # emotions are liquids (in a person, in the eyes), forces
            100007846: [102810471, 100015388, 100017222, 102913152, 103699975],  # people are batteries
            107516354: [111466043, 114686186],  # anger is heat
            201188485: [111466043],  # lust is heat
            107544647: [105725527],  # affection is warmth
            100759335: [114395018],  # lust is madness
            107543288: [114395018, 113742573, 100306426, 105967977],  # love is madness
            114379501: [202743020, 104738641],  # emotional stability->sanity is balance
            107541053: [100032613, 109918248, 111473954],  # hope is a possession
            107511733: [100032613, 109918248, 111473954],  # (specific) hope is a possession
            113928388: [100015388, 104194289, 104468005, 102958343],  # relationship is an animal
            104655442: [105085572],  # emotional intimacy is physical closeness
            113781820: [100148653],  # emotional bonding is physical bonding
            109622928: [100032613],  # loved one is a possession
            201776727: [301251128],  # dislike is cold
            107484265: [111458624, 105194578, 114039534],  # desires are physical forces
            107519253: [301251128],  # fear is cold
            104713118: [107027180],  # emotional harmony is musical harmony
            107503260: [114359952],  # disgust is nausea
            104887129: [100367280],  # conceit is inflation
            107508486: [113501548],  # pride is swelling
            100027807: [107309781, 103094503],  # form is motion
            109387222: [110151570],  # paths are guides
            104673965: [104151940],  # appearance is a cover
            101072402: [100019613],  # laughter is a substance
            100658082: [100973077],  # treating illness is fighting a war
            103740161: [104565375],  # medicine is a weapon
            114018567: [107334490],  # intoxication is destruction
            111473954: [114939900, 302261386],  # light is a fluid
            113983515: [115046900],  # darkness is a solid
            103699975: [100007846],  # machines are people
            113384557: [302261386],  # money is a liquid
            113333237: [103094503],  # investments are containers
            104846770: [104896161],  # morality is cleanliness
            106784003: [108630039, 109225146],  # problems are regions
            101129920: [100032613, 103679986, 103094503],  # obligation is a possession
            103094503: [105194578],  # obligation is force
            107966140: [105216365],  # society is a body
        }

        # love is a journey
        nounToNoun[107543288] = [100306426]
        # career is a journey
        nounToNoun[100282613] = [100306426]
        # vehicle and person
        nounToNoun[104524313] = [100007846]
        # body and air
        nounToNoun[108436288] = [108653314]
        # harm is physical injury
        nounToNoun[107420770] = [114285662]
        # competition is a race
        nounToNoun[101168569] = [107472657]

        nounToNoun[long(105770926)] = [long(101170962)]
        nounToNoun[long(105805475)] = [long(100803617), long(113465809), long(105710860)]  # understanding
        nounToNoun[long(105770926)] = [long(100243918), long(100278810)]
        nounToNoun[long(106252138)] = [long(101057759)]
        nounToNoun[long(100636921)] = [long(100310063)]
        nounToNoun[long(105611302)] = [long(103094503), long(103699975)]  # mind
        nounToNoun[long(105833840)] = [
            long(106362953),
            long(109917593),
            long(109918248),
            long(105750657),
            long(105154676),
        ]  # idea
        nounToNoun[long(105941423)] = [long(103051540), long(109622302), long(110151570), long(114070360)]  # belief
        nounToNoun[long(106283764)] = [long(104565375)]

        self.NounToNoun = nounToNoun
        self.NounToAdj = nounToAdj
Example #11
0
    def __init__(self):

        self.wn = wn("wordnet.db")

        nounToNoun = {}
        nounToAdj = {}

        nounToAdj = {
        105611302: [300708498], # mind is brittle
        107480068: [302091020], # emotion is blinding->concealing?
        107541053: [300269989], # hope is light
        107511733: [300269989], # (specific) hope is light
        104846770: [300417413, 302314584], # morality is clean
        104849241: [400096333, 300269989, 300393105], # good is up, white
        105144079: [400095320, 300273082, 300392812], # bad is down, black

        }

        nounToNoun = {
        107480068: [302261386, 111458624, 105194578, 111466043, 100027167, 107309781, 201206218, 114395018], # emotions are liquids (in a person, in the eyes), forces
        100007846: [102810471, 100015388, 100017222, 102913152, 103699975], # people are batteries
        107516354: [111466043, 114686186], # anger is heat
        201188485: [111466043], # lust is heat
        107544647: [105725527], # affection is warmth
        100759335: [114395018], # lust is madness
        107543288: [114395018, 113742573, 100306426, 105967977], # love is madness
        114379501: [202743020, 104738641], # emotional stability->sanity is balance
        107541053: [100032613, 109918248, 111473954], # hope is a possession
        107511733: [100032613, 109918248, 111473954], # (specific) hope is a possession
        113928388: [100015388, 104194289, 104468005, 102958343], # relationship is an animal
        104655442: [105085572], # emotional intimacy is physical closeness
        113781820: [100148653], # emotional bonding is physical bonding
        109622928: [100032613], # loved one is a possession
        201776727: [301251128], # dislike is cold
        107484265: [111458624, 105194578, 114039534], # desires are physical forces
        107519253: [301251128], # fear is cold
        104713118: [107027180], # emotional harmony is musical harmony
        107503260: [114359952], # disgust is nausea
        104887129: [100367280], # conceit is inflation
        107508486: [113501548], # pride is swelling
        100027807: [107309781, 103094503], # form is motion
        109387222: [110151570], # paths are guides
        104673965: [104151940], # appearance is a cover
        101072402: [100019613], # laughter is a substance
        100658082: [100973077], # treating illness is fighting a war
        103740161: [104565375], # medicine is a weapon
        114018567: [107334490], # intoxication is destruction
        111473954: [114939900, 302261386], # light is a fluid
        113983515: [115046900], # darkness is a solid
        103699975: [100007846], # machines are people
        113384557: [302261386], # money is a liquid
        113333237: [103094503], # investments are containers
        104846770: [104896161], # morality is cleanliness
        106784003: [108630039, 109225146], # problems are regions
        101129920: [100032613, 103679986, 103094503], # obligation is a possession
        103094503: [105194578], # obligation is force
        107966140: [105216365], # society is a body
        }



        # love is a journey
        nounToNoun[107543288] = [100306426]
        # career is a journey
        nounToNoun[100282613] = [100306426]
        # vehicle and person
        nounToNoun[104524313] = [100007846]
        # body and air
        nounToNoun[108436288] = [108653314]
        # harm is physical injury
        nounToNoun[107420770] = [114285662]
        # competition is a race
        nounToNoun[101168569] = [107472657]


        nounToNoun[long(105770926)] = [long(101170962)]
        nounToNoun[long(105805475)] = [long(100803617), long(113465809), long(105710860)] # understanding
        nounToNoun[long(105770926)] = [long(100243918), long(100278810)]
        nounToNoun[long(106252138)] = [long(101057759)]
        nounToNoun[long(100636921)] = [long(100310063)]
        nounToNoun[long(105611302)] = [long(103094503), long(103699975)] # mind
        nounToNoun[long(105833840)] = [long(106362953), long(109917593), long(109918248), long(105750657), long(105154676)] #idea
        nounToNoun[long(105941423)] = [long(103051540), long(109622302), long(110151570), long(114070360)] # belief
        nounToNoun[long(106283764)] = [long(104565375)]

        self.NounToNoun = nounToNoun
        self.NounToAdj = nounToAdj