Ejemplo n.º 1
0
Archivo: geo.py Proyecto: magul/asm3
def parse_nominatim(dbo, jr, j, q):
    if len(j) == 0:
        al.debug("no response from nominatim for %s (response %s)" % (q, str(jr)), "geo.parse_nominatim", dbo)
        return None
    try:
        latlon = "%s,%s,%s" % (str(utils.strip_non_ascii(j[0]["lat"])), str(utils.strip_non_ascii(j[0]["lon"])), "na")
        al.debug("contacted nominatim to get geocode for %s = %s" % (q, latlon), "geo.parse_nominatim", dbo)
        return latlon
    except Exception,err:
        al.error("couldn't find geocode in nominatim response: %s, %s" % (str(err), jr), "geo.parse_nominatim", dbo)
        return None
Ejemplo n.º 2
0
Archivo: geo.py Proyecto: rutaq/asm3
 def parse(self):
     h = self.address_hash()
     j = self.json_response
     if len(j) == 0:
         al.debug(
             "no response from nominatim for %s (response %s)" %
             (self.url, str(self.response)), "geo.parse_nominatim",
             self.dbo)
         return "0,0,%s" % h
     try:
         latlon = "%s,%s,%s" % (str(utils.strip_non_ascii(
             j[0]["lat"])), str(utils.strip_non_ascii(j[0]["lon"])), h)
         al.debug(
             "contacted nominatim to get geocode for %s = %s" %
             (self.url, latlon), "geo.parse_nominatim", self.dbo)
         return latlon
     except Exception as err:
         al.error(
             "couldn't find geocode in nominatim response: %s, %s" %
             (str(err), self.response), "geo.parse_nominatim", self.dbo)
         return "0,0,%s" % h
Ejemplo n.º 3
0
def generate_tweet_text(mood):
    filename = ("emotions/{}.txt").format(mood)
    with open(filename, encoding='utf-8') as f:
        text = f.read()

    text = utils.strip_non_ascii(text)

    text_model = markovify.Text(text)

    sentence = text_model.make_short_sentence(120)  # generate short tweet

    synonymset = dictionary.synonym(mood)
    synonym = choice(synonymset)

    sentence += " #{}".format(synonym)  # generate hashtag

    return sentence.encode('utf-8')
Ejemplo n.º 4
0
def generate_tweet_text(mood):
    filename = ("emotions/{}.txt").format(mood)
    with open(filename, encoding='utf-8') as f:
        text = f.read()

    text = utils.strip_non_ascii(text)

    text_model = markovify.Text(text)

    sentence = text_model.make_short_sentence(120)  # generate short tweet

    synonymset = dictionary.synonym(mood)
    synonym = choice(synonymset)

    sentence += " #{}".format(synonym)  # generate hashtag

    return sentence.encode('utf-8')
Ejemplo n.º 5
0
def gks(m, f):
    """ reads field f from map m, returning a string. 
        string is empty if key not present """
    if f not in m: return ""
    return str(utils.strip_non_ascii(m[f]))
Ejemplo n.º 6
0
db = conn.sentiment_analysis_db

import pickle
path = '../files/hindi/'
word2Synset = pickle.load(open(path + "WordSynsetDict.pk"))

# dmetaphone = fuzzy.DMetaphone()
soundex = fuzzy.Soundex(4)

print db.hindi_dictionary.drop_indexes()
print db.hindi_dictionary.remove({})

words = []

for word in word2Synset.keys():
    transliterated = strip_non_ascii(transliterate(word, DEVANAGARI, HK))
    synsets = []
    for vv in word2Synset[word].values():
        synsets.extend(vv)

    lower = transliterated.lower()
    sound = soundex(lower.decode('ascii', errors='ignore'))
    words.append({
        'word': word,
        'synsets': synsets,
        'transliteration': lower,
        'sound': sound
    })
    if len(words) > 1000:
        db.hindi_dictionary.insert_many(words)
        words = []
import fuzzy
import sys

conn = MongoClient()
db = conn.sentiment_analysis_db

soundex = fuzzy.Soundex(4)

for line in open('../../resources/word-frequency-hindi.txt'):
    line = line.strip()
    word, freq = line.split('\t')
    word = word.decode('utf-8')  # .replace('\0xef', '')
    found = db.hindi_dictionary.find_one({'word': word})
    if not found:
        transliterated = transliterate(word, DEVANAGARI, HK)
        transliterated = strip_non_ascii(transliterated)
        found = db.hindi_dictionary.find_one(
            {'transliterated': transliterated})
        if not found:
            sound = soundex(transliterated)
            sounding_same = list(db.hindi_dictionary.find({'sound': sound}))
            if len(sounding_same) > 0:
                found = sorted([(i['word'], distance(word, i['word']))
                                for i in sounding_same],
                               key=lambda x: x[1])[0][0]
        else:
            found = found['word']
    else:
        found = found['word']
    print word, found