Esempi in Python per strip_non_ascii

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: utils

Metodo/funzione: strip_non_ascii

Esempi su hotexamples.com: 7

strip_non_ascii in Python: 7 esempi trovati. Questi sono i migliori esempi reali in Python per utils.strip_non_ascii, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Esempio n. 1

Mostra file

File: geo.py Progetto: magul/asm3

def parse_nominatim(dbo, jr, j, q):
    if len(j) == 0:
        al.debug("no response from nominatim for %s (response %s)" % (q, str(jr)), "geo.parse_nominatim", dbo)
        return None
    try:
        latlon = "%s,%s,%s" % (str(utils.strip_non_ascii(j[0]["lat"])), str(utils.strip_non_ascii(j[0]["lon"])), "na")
        al.debug("contacted nominatim to get geocode for %s = %s" % (q, latlon), "geo.parse_nominatim", dbo)
        return latlon
    except Exception,err:
        al.error("couldn't find geocode in nominatim response: %s, %s" % (str(err), jr), "geo.parse_nominatim", dbo)
        return None

Esempio n. 2

Mostra file

File: geo.py Progetto: rutaq/asm3

 def parse(self):
     h = self.address_hash()
     j = self.json_response
     if len(j) == 0:
         al.debug(
             "no response from nominatim for %s (response %s)" %
             (self.url, str(self.response)), "geo.parse_nominatim",
             self.dbo)
         return "0,0,%s" % h
     try:
         latlon = "%s,%s,%s" % (str(utils.strip_non_ascii(
             j[0]["lat"])), str(utils.strip_non_ascii(j[0]["lon"])), h)
         al.debug(
             "contacted nominatim to get geocode for %s = %s" %
             (self.url, latlon), "geo.parse_nominatim", self.dbo)
         return latlon
     except Exception as err:
         al.error(
             "couldn't find geocode in nominatim response: %s, %s" %
             (str(err), self.response), "geo.parse_nominatim", self.dbo)
         return "0,0,%s" % h

Esempio n. 3

Mostra file

File: main.py Progetto: avinassh/Mercury

def generate_tweet_text(mood):
    filename = ("emotions/{}.txt").format(mood)
    with open(filename, encoding='utf-8') as f:
        text = f.read()

    text = utils.strip_non_ascii(text)

    text_model = markovify.Text(text)

    sentence = text_model.make_short_sentence(120)  # generate short tweet

    synonymset = dictionary.synonym(mood)
    synonym = choice(synonymset)

    sentence += " #{}".format(synonym)  # generate hashtag

    return sentence.encode('utf-8')

Esempio n. 4

Mostra file

File: main.py Progetto: onthelake/Mercury

def generate_tweet_text(mood):
    filename = ("emotions/{}.txt").format(mood)
    with open(filename, encoding='utf-8') as f:
        text = f.read()

    text = utils.strip_non_ascii(text)

    text_model = markovify.Text(text)

    sentence = text_model.make_short_sentence(120)  # generate short tweet

    synonymset = dictionary.synonym(mood)
    synonym = choice(synonymset)

    sentence += " #{}".format(synonym)  # generate hashtag

    return sentence.encode('utf-8')

Esempio n. 5

Mostra file

def gks(m, f):
    """ reads field f from map m, returning a string. 
        string is empty if key not present """
    if f not in m: return ""
    return str(utils.strip_non_ascii(m[f]))

Esempio n. 6

Mostra file

db = conn.sentiment_analysis_db

import pickle
path = '../files/hindi/'
word2Synset = pickle.load(open(path + "WordSynsetDict.pk"))

# dmetaphone = fuzzy.DMetaphone()
soundex = fuzzy.Soundex(4)

print db.hindi_dictionary.drop_indexes()
print db.hindi_dictionary.remove({})

words = []

for word in word2Synset.keys():
    transliterated = strip_non_ascii(transliterate(word, DEVANAGARI, HK))
    synsets = []
    for vv in word2Synset[word].values():
        synsets.extend(vv)

    lower = transliterated.lower()
    sound = soundex(lower.decode('ascii', errors='ignore'))
    words.append({
        'word': word,
        'synsets': synsets,
        'transliteration': lower,
        'sound': sound
    })
    if len(words) > 1000:
        db.hindi_dictionary.insert_many(words)
        words = []

Esempio n. 7

Mostra file

File: check_hindi_freq.py Progetto: arshadansari27/Sentiment-Analysis-of-Code-Mixed-Texts

import fuzzy
import sys

conn = MongoClient()
db = conn.sentiment_analysis_db

soundex = fuzzy.Soundex(4)

for line in open('../../resources/word-frequency-hindi.txt'):
    line = line.strip()
    word, freq = line.split('\t')
    word = word.decode('utf-8')  # .replace('\0xef', '')
    found = db.hindi_dictionary.find_one({'word': word})
    if not found:
        transliterated = transliterate(word, DEVANAGARI, HK)
        transliterated = strip_non_ascii(transliterated)
        found = db.hindi_dictionary.find_one(
            {'transliterated': transliterated})
        if not found:
            sound = soundex(transliterated)
            sounding_same = list(db.hindi_dictionary.find({'sound': sound}))
            if len(sounding_same) > 0:
                found = sorted([(i['word'], distance(word, i['word']))
                                for i in sounding_same],
                               key=lambda x: x[1])[0][0]
        else:
            found = found['word']
    else:
        found = found['word']
    print word, found