def generate_noun_metaphors(sentence, parse, place): wordnet = wn("wordnet.db") stop_words = get_stop_words() words = sentence.split(" ") for i, word in enumerate(words): words[i] = re.sub(r'\W+', '', word).lower() noun = words[place[0]] # word to metaphorize replace = words[place[1]] # word to replace metmap = Mapping() context = remove_stopwords(words, stop_words) # Find metaphors ########## possible = metmap.map(noun) have_overlap = {} overlap = {} for sid in possible: text = wordnet.get_text(sid) text = remove_stopwords(text, stop_words) sim = jaccard(text, context) #print sim if sim > 0.0: have_overlap[sid] = text overlap[sid] = sim # Limit number of synsets by overlap MAX = 5 keys = [] overlap = sorted(overlap.iteritems(), key=operator.itemgetter(1)) overlap = overlap[-1*MAX:] for pair in overlap: keys.append(pair[0]) max_similarity = -1.0 max_sid = None for sid in keys: text = have_overlap[sid] # TODO - CAN USE JACCARD OR SAP ------------------------------------------------^^^^^^^ #sim = SAP_distance(text, context, wordnet) sim = jaccard(text, context) if sim > max_similarity: max_sid = sid max_similarity = sim if max_sid is None: return False, sentence lemma = wordnet.get_lemma(max_sid) #metaphor = wordnet.synset_info(max_sid) #print "Found mapping from '%s' to: %s" % (word, metaphor) return True, sentence.replace(replace, lemma)
def generate_noun_metaphors(sentence, parse, place): wordnet = wn("wordnet.db") stop_words = get_stop_words() words = sentence.split(" ") for i, word in enumerate(words): words[i] = re.sub(r'\W+', '', word).lower() noun = words[place[0]] # word to metaphorize replace = words[place[1]] # word to replace metmap = Mapping() context = remove_stopwords(words, stop_words) # Find metaphors ########## possible = metmap.map(noun) have_overlap = {} overlap = {} for sid in possible: text = wordnet.get_text(sid) text = remove_stopwords(text, stop_words) sim = jaccard(text, context) #print sim if sim > 0.0: have_overlap[sid] = text overlap[sid] = sim # Limit number of synsets by overlap MAX = 5 keys = [] overlap = sorted(overlap.iteritems(), key=operator.itemgetter(1)) overlap = overlap[-1 * MAX:] for pair in overlap: keys.append(pair[0]) max_similarity = -1.0 max_sid = None for sid in keys: text = have_overlap[sid] # TODO - CAN USE JACCARD OR SAP ------------------------------------------------^^^^^^^ #sim = SAP_distance(text, context, wordnet) sim = jaccard(text, context) if sim > max_similarity: max_sid = sid max_similarity = sim if max_sid is None: return False, sentence lemma = wordnet.get_lemma(max_sid) #metaphor = wordnet.synset_info(max_sid) #print "Found mapping from '%s' to: %s" % (word, metaphor) return True, sentence.replace(replace, lemma)
def adj_test(): w = wn("wordnet.db") adj = raw_input() wid = w.get_word_ids(adj) sids = w.get_synset_ids(wid) for sid in sids: print w.synset_info(sid) rel = w.get_related_adjs(sid) for r in rel: print '\t'+w.synset_info(r)
def adj_test(): w = wn("wordnet.db") adj = raw_input() wid = w.get_word_ids(adj) sids = w.get_synset_ids(wid) for sid in sids: print w.synset_info(sid) rel = w.get_related_adjs(sid) for r in rel: print '\t' + w.synset_info(r)
from wn import * import re w = wn("wordnet.db") out = open("input.txt", 'w') dets = ["the", "an", "a"] all_sids = w.get_all_synsets() for sid in all_sids: tc = w.get_tagcount(sid) if tc > 5: gloss = w.get_gloss(sid).lower().strip() lemma = w.get_lemma(sid).lower().strip() words = gloss.split(" ") first = words[0] if first not in dets: continue sentence = "A %s is %s.\n" % (lemma, gloss) out.write(sentence)
from wn import * wn = wn('wordnet.db') while True: lemma = str(raw_input()) wid = wn.get_word_ids(lemma) sids = wn.get_synset_ids(wid) for sid in sids: info = wn.synset_more_info(sid) print info print
] #lematizare in engleza print("Fraza lemmatizata engleza", lemmatized_sentence_en ) #nu am reusit sa il fac sa mearga si in romana stemmer_en = snowballstemmer.stemmer('english') stemmer_sentence_en = stemmer_en.stemWords(lemmatized_sentence_en) print("Fraza dupa stemmer in en ", stemmer_sentence_en) else: #teste: #posibila alternativa lematizare in romana : https://github.com/dumitrescustefan/RoWordNet wn = rwn.RoWordNet() cuvant_initial = 'carte' #stemmer_ro = snowballstemmer.stemmer('romanian'); # stemmer_sentence_ro = stemmer_ro.stemWords([cuvant_initial]) # print(stemmer_sentence_ro) # synset_ids = wn.synsets(literal=stemmer_sentence_ro[0]) synset_ids = wn.synsets(literal=cuvant_initial) if len(synset_ids) >= 1: for synset_id in synset_ids: print("Posibila lematizare pt ", cuvant_initial, ": literals=", wn(synset_id).literals, " tip=", wn(synset_id).pos) else: print("NU are lematizare in acest modul: ", cuvant_initial) #wn.download('ronwn') #w = wn.words('arbusti')[0] #print(w.lemma()) #nltk.download() #print("NLTK wordnet languages:", sorted(wn_nltk_test.langs()))
from wn import * import re w = wn("wordnet.db") out = open("input.txt", 'w') dets = ["the", "an", "a"] all_sids = w.get_all_synsets() for sid in all_sids: tc = w.get_tagcount(sid) if tc > 5: gloss = w.get_gloss(sid).lower().strip() lemma = w.get_lemma(sid).lower().strip() words = gloss.split(" ") first = words[0] if first not in dets: continue sentence = "A %s is %s.\n" % (lemma, gloss)
def __init__(self): self.wn = wn("wordnet.db") nounToNoun = {} nounToAdj = {} nounToAdj = { 105611302: [300708498], # mind is brittle 107480068: [302091020], # emotion is blinding->concealing? 107541053: [300269989], # hope is light 107511733: [300269989], # (specific) hope is light 104846770: [300417413, 302314584], # morality is clean 104849241: [400096333, 300269989, 300393105], # good is up, white 105144079: [400095320, 300273082, 300392812], # bad is down, black } nounToNoun = { 107480068: [ 302261386, 111458624, 105194578, 111466043, 100027167, 107309781, 201206218, 114395018, ], # emotions are liquids (in a person, in the eyes), forces 100007846: [102810471, 100015388, 100017222, 102913152, 103699975], # people are batteries 107516354: [111466043, 114686186], # anger is heat 201188485: [111466043], # lust is heat 107544647: [105725527], # affection is warmth 100759335: [114395018], # lust is madness 107543288: [114395018, 113742573, 100306426, 105967977], # love is madness 114379501: [202743020, 104738641], # emotional stability->sanity is balance 107541053: [100032613, 109918248, 111473954], # hope is a possession 107511733: [100032613, 109918248, 111473954], # (specific) hope is a possession 113928388: [100015388, 104194289, 104468005, 102958343], # relationship is an animal 104655442: [105085572], # emotional intimacy is physical closeness 113781820: [100148653], # emotional bonding is physical bonding 109622928: [100032613], # loved one is a possession 201776727: [301251128], # dislike is cold 107484265: [111458624, 105194578, 114039534], # desires are physical forces 107519253: [301251128], # fear is cold 104713118: [107027180], # emotional harmony is musical harmony 107503260: [114359952], # disgust is nausea 104887129: [100367280], # conceit is inflation 107508486: [113501548], # pride is swelling 100027807: [107309781, 103094503], # form is motion 109387222: [110151570], # paths are guides 104673965: [104151940], # appearance is a cover 101072402: [100019613], # laughter is a substance 100658082: [100973077], # treating illness is fighting a war 103740161: [104565375], # medicine is a weapon 114018567: [107334490], # intoxication is destruction 111473954: [114939900, 302261386], # light is a fluid 113983515: [115046900], # darkness is a solid 103699975: [100007846], # machines are people 113384557: [302261386], # money is a liquid 113333237: [103094503], # investments are containers 104846770: [104896161], # morality is cleanliness 106784003: [108630039, 109225146], # problems are regions 101129920: [100032613, 103679986, 103094503], # obligation is a possession 103094503: [105194578], # obligation is force 107966140: [105216365], # society is a body } # love is a journey nounToNoun[107543288] = [100306426] # career is a journey nounToNoun[100282613] = [100306426] # vehicle and person nounToNoun[104524313] = [100007846] # body and air nounToNoun[108436288] = [108653314] # harm is physical injury nounToNoun[107420770] = [114285662] # competition is a race nounToNoun[101168569] = [107472657] nounToNoun[long(105770926)] = [long(101170962)] nounToNoun[long(105805475)] = [long(100803617), long(113465809), long(105710860)] # understanding nounToNoun[long(105770926)] = [long(100243918), long(100278810)] nounToNoun[long(106252138)] = [long(101057759)] nounToNoun[long(100636921)] = [long(100310063)] nounToNoun[long(105611302)] = [long(103094503), long(103699975)] # mind nounToNoun[long(105833840)] = [ long(106362953), long(109917593), long(109918248), long(105750657), long(105154676), ] # idea nounToNoun[long(105941423)] = [long(103051540), long(109622302), long(110151570), long(114070360)] # belief nounToNoun[long(106283764)] = [long(104565375)] self.NounToNoun = nounToNoun self.NounToAdj = nounToAdj
def __init__(self): self.wn = wn("wordnet.db") nounToNoun = {} nounToAdj = {} nounToAdj = { 105611302: [300708498], # mind is brittle 107480068: [302091020], # emotion is blinding->concealing? 107541053: [300269989], # hope is light 107511733: [300269989], # (specific) hope is light 104846770: [300417413, 302314584], # morality is clean 104849241: [400096333, 300269989, 300393105], # good is up, white 105144079: [400095320, 300273082, 300392812], # bad is down, black } nounToNoun = { 107480068: [302261386, 111458624, 105194578, 111466043, 100027167, 107309781, 201206218, 114395018], # emotions are liquids (in a person, in the eyes), forces 100007846: [102810471, 100015388, 100017222, 102913152, 103699975], # people are batteries 107516354: [111466043, 114686186], # anger is heat 201188485: [111466043], # lust is heat 107544647: [105725527], # affection is warmth 100759335: [114395018], # lust is madness 107543288: [114395018, 113742573, 100306426, 105967977], # love is madness 114379501: [202743020, 104738641], # emotional stability->sanity is balance 107541053: [100032613, 109918248, 111473954], # hope is a possession 107511733: [100032613, 109918248, 111473954], # (specific) hope is a possession 113928388: [100015388, 104194289, 104468005, 102958343], # relationship is an animal 104655442: [105085572], # emotional intimacy is physical closeness 113781820: [100148653], # emotional bonding is physical bonding 109622928: [100032613], # loved one is a possession 201776727: [301251128], # dislike is cold 107484265: [111458624, 105194578, 114039534], # desires are physical forces 107519253: [301251128], # fear is cold 104713118: [107027180], # emotional harmony is musical harmony 107503260: [114359952], # disgust is nausea 104887129: [100367280], # conceit is inflation 107508486: [113501548], # pride is swelling 100027807: [107309781, 103094503], # form is motion 109387222: [110151570], # paths are guides 104673965: [104151940], # appearance is a cover 101072402: [100019613], # laughter is a substance 100658082: [100973077], # treating illness is fighting a war 103740161: [104565375], # medicine is a weapon 114018567: [107334490], # intoxication is destruction 111473954: [114939900, 302261386], # light is a fluid 113983515: [115046900], # darkness is a solid 103699975: [100007846], # machines are people 113384557: [302261386], # money is a liquid 113333237: [103094503], # investments are containers 104846770: [104896161], # morality is cleanliness 106784003: [108630039, 109225146], # problems are regions 101129920: [100032613, 103679986, 103094503], # obligation is a possession 103094503: [105194578], # obligation is force 107966140: [105216365], # society is a body } # love is a journey nounToNoun[107543288] = [100306426] # career is a journey nounToNoun[100282613] = [100306426] # vehicle and person nounToNoun[104524313] = [100007846] # body and air nounToNoun[108436288] = [108653314] # harm is physical injury nounToNoun[107420770] = [114285662] # competition is a race nounToNoun[101168569] = [107472657] nounToNoun[long(105770926)] = [long(101170962)] nounToNoun[long(105805475)] = [long(100803617), long(113465809), long(105710860)] # understanding nounToNoun[long(105770926)] = [long(100243918), long(100278810)] nounToNoun[long(106252138)] = [long(101057759)] nounToNoun[long(100636921)] = [long(100310063)] nounToNoun[long(105611302)] = [long(103094503), long(103699975)] # mind nounToNoun[long(105833840)] = [long(106362953), long(109917593), long(109918248), long(105750657), long(105154676)] #idea nounToNoun[long(105941423)] = [long(103051540), long(109622302), long(110151570), long(114070360)] # belief nounToNoun[long(106283764)] = [long(104565375)] self.NounToNoun = nounToNoun self.NounToAdj = nounToAdj