Python normalize Beispiele, unicodedata.normalize Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: the_numbers_web_crawler.py Projekt: fx2323/project_luther

 def scrape_movie_data_to_dic(self,request):
     """
     Takes a reuest.get() and returns a dictionary with the desired values
     """
     soup = BeautifulSoup(request.text)
     dic={}
     
     ##add the title
     value= soup.find("h1", { "itemprop" : "name" }).text
     value= unicodedata.normalize('NFKD', value).encode('ascii','ignore')
     dic['Movie Title']=value
     
     ##grab the data from each section in finance section
     finances=soup.find("table", { "id" : "movie_finances" })
     for item in finances.findAll("td", { "class" : "data" }):
         ##get the key
         key=item.previousSibling.previousSibling.text
         key=unicodedata.normalize('NFKD', key).encode('ascii','ignore')
         ##get the value
         value=unicodedata.normalize('NFKD', item.text).encode('ascii','ignore')
         dic[key]= item.text
     
     ##grab the data from each section in the summary section
     data=soup.find("div", { "id" : "summary" }).findAll('table')[1]
     for item in data.findAll('tr'):
         ##change text to string and replace new line with '' and split by :
         s=unicodedata.normalize('NFKD', item.text).encode('ascii','ignore')
         s= s.replace("\n",'').split(':')
         dic[s[0]]=[s[1]]
     
     return dic

Beispiel #2

0

Datei anzeigen

Datei: albumart2mediaart.py Projekt: javiermon/albumart2mediaart

def parseFileName(name):
    nameString = dropInsideContent(name,"[","]" )
    nameString = dropInsideContent(nameString,"{","}" )
    nameString = dropInsideContent(nameString,"(",")" )    
    nameString = nameString.strip('()_{}[]!@#$^&*+=|\\/"\'?<>~`')
    nameString = nameString.lstrip(' ')
    nameString = nameString.rstrip(' ')
    nameString = dropInsideContent(nameString,"{","}" )
    nameString = nameString.lower()
    nameString = string.replace(nameString,"\t"," ")
    nameString = string.replace(nameString,"  "," ")    
    
    try: 
        nameString = unicodedata.normalize('NFKD',nameString).encode()
        nameString = nameString.encode()
    except:
        try:
            nameString = nameString.encode('latin-1', 'ignore')
            nameString = unicodedata.normalize('NFKD',nameString).encode("ascii")
            nameString = str(nameString)
        except:
            nameString = "unknown"
    if len(nameString)==0: nameString=" "
    
    return nameString

Beispiel #3

0

Datei anzeigen

Datei: ptshell.py Projekt: Katkamaculikova123/ipython

    def get_completions(self, document, complete_event):
        if not document.current_line.strip():
            return

        used, matches = self.ipy_completer.complete(
                            line_buffer=document.current_line,
                            cursor_pos=document.cursor_position_col
        )
        start_pos = -len(used)
        for m in matches:
            m = unicodedata.normalize('NFC', m)

            # When the first character of the completion has a zero length,
            # then it's probably a decomposed unicode character. E.g. caused by
            # the "\dot" completion. Try to compose again with the previous
            # character.
            if wcwidth(m[0]) == 0:
                if document.cursor_position + start_pos > 0:
                    char_before = document.text[document.cursor_position + start_pos - 1]
                    m = unicodedata.normalize('NFC', char_before + m)

                    # Yield the modified completion instead, if this worked.
                    if wcwidth(m[0:1]) == 1:
                        yield Completion(m, start_position=start_pos - 1)
                        continue

            yield Completion(m, start_position=start_pos)

Beispiel #4

0

Datei anzeigen

Datei: the_numbers_web_crawler.py Projekt: fx2323/project_luther

    def get_cast_crew(self,url):
        request=get_file(url)
        soup = BeautifulSoup(request.text)
        main_dic={}

        lst=[u'Cast',u'Production and Technical Credits']
        for i in xrange(len(lst)):
            main_dic[lst[i]]=np.nan
            dic={}
            try:
                lst[i]=soup.findAll('div',{'id':'cast'})[i].find('h1').text
                for row in soup.findAll('div',{'id':'cast'})[i].findAll('tr'):
                    position, filler, name = row.findAll('td')
                    position= unicodedata.normalize('NFKD', position.text).encode('ascii','ignore')
                    name = unicodedata.normalize('NFKD', name.text).encode('ascii','ignore')
                    if position in dic:
                        dic[position]+=[name]
                    else:
                        dic[position]=[name]
                dic=json.dumps(dic)
            except:
                dic=np.nan

            main_dic[lst[i]]=dic
        return main_dic

Beispiel #5

0

Datei anzeigen

Datei: filipino-dictionary-downloader.py Projekt: RoseySoft/auto-downloaders

def find_all_translations(soup):
	file_string = ''

	for word_data in soup.find_all("td", class_="list-title"):
		part_link = word_data.find("a")['href']
		full_link = domain + part_link

		soup2 = getSoup(full_link)

		translations = soup2.find("article", class_="item-page").find_all(style="text-align: center;")

		for translation in translations:
			tagalog = translation.find(['b', 'strong'])
			new_line = translation.find('br')

			if new_line:
				english = new_line.next_sibling
			else:
				english = None

			if tagalog and english and tagalog.string and english.string is not None:
				if ' ' not in tagalog.string.strip() and tagalog.string is not english.string:
					file_string += unicodedata.normalize('NFD', tagalog.string.strip()).encode('ascii', 'ignore').decode("utf-8") + "\n"
					file_string += unicodedata.normalize('NFD', str([word.strip() for word in english.string.strip().split(',')])).encode('ascii', 'ignore').decode("utf-8") + "\n"
					file_string += "\n"

	f = open('translations.txt', 'a')
	f.write(file_string)
	f.close()

	next_page_link = soup.find('li', class_='pagination-next').find('a')['href']

	print('Parsing %s...'%(domain + next_page_link))
	find_all_translations(getSoup(domain + next_page_link))

Beispiel #6

0

Datei anzeigen

Datei: find_skwiki_birth_death_places.py Projekt: Borichimitu/wikipedia

def getPerson(num, file):
        res1=list()
        os.system('clear')
        while len(res1)==0:
          x = input(str(num) + '. name : ')  #
          file.seek(0, 0)
          for line in file.readlines():
            input_normalized = unicodedata.normalize('NFKD', x.strip().lower()).encode('ascii','ignore').decode('ascii')
            line_normalized = unicodedata.normalize('NFKD', line.split(';')[0].lower()).encode('ascii','ignore').decode('ascii')
            birth=line.split(';')[1]
            full_match=True
            for input_word in input_normalized.split(' '):
            	if  input_word not in line_normalized:
            		full_match= False
            if full_match and input_normalized.strip() and birth and '?' not in birth:
            	res1.append(line.strip())
          os.system('clear')   
          if len(res1)>SEARCH_LIMIT:  #limit number of found entries for easier item selection
             res1=list()
             print('Please enter more specific keyword')
          elif len(res1)==0:
             print('No matching entry found')   
          

        sel=0
        while len(res1)>1:
           c=getsel(res1, sel) #get user action - selection or cursor move
           if c==curses.KEY_DOWN and sel<(len(res1)-1):  
                  sel+=1
           elif c ==curses.KEY_UP and sel>0:
                  sel-=1
           elif c == curses.KEY_ENTER or c == 13:
           	break
        return res1[sel]       #return selected item

Beispiel #7

0

Datei anzeigen

Datei: imdb-crawler.py Projekt: meenal5gupta/imdb-crawler

def crawler():
        arr=["http://www.imdb.com/title/tt0111161/?pf_rd_m=A2FGELUUNOQJNL&pf_rd_p=2398042102&pf_rd_r=07XG6QFJZEE6BBVY6J2Z&pf_rd_s=center-1&pf_rd_t=15506&pf_rd_i=top&ref_=chttp_tt_1"]
        fp=open('data.csv',"w")
        a=csv.writer(fp,delimiter=',',quotechar="$")
        visited=[]
        c=0
        while c<200:
            page=arr.pop()
            if page not in visited: 
                r=requests.get(page)
                soup=bs4.BeautifulSoup(r.text)
                rate=unicodedata.normalize('NFKD',soup.find("span",attrs={"itemprop":"ratingValue"}).string).encode('ascii','ignore')
                n=float(rate)
                if n>6.5 and n<=8.5:
                    c=c+1
                    name=unicodedata.normalize('NFKD',soup.find("h1",attrs={"itemprop":"name"}).text).encode('ascii','ignore')
                    year=soup.find(attrs={"id":"titleYear"}).text
                    director=unicodedata.normalize('NFKD',soup.find("span",attrs={"itemprop":"name"}).string).encode('ascii','ignore')
                    print([c,name,year,director,n])
                    a.writerow([c,name,year,director,n])
                divs=soup.find_all('div',attrs={"class":"rec-title"})
                links=[div.find('a')['href'] for div in divs]
                links=[urljoin(page,link) for link in links]
                arr=list(set(arr)|set(links))
                visited.append(page)
        fp.close()

Beispiel #8

0

Datei anzeigen

Datei: trezor.py Projekt: paulmadore/electrum-ltc

 def mnemonic_to_seed(self, mnemonic, passphrase):
     # trezor uses bip39
     import pbkdf2, hashlib, hmac
     PBKDF2_ROUNDS = 2048
     mnemonic = unicodedata.normalize('NFKD', ' '.join(mnemonic.split()))
     passphrase = unicodedata.normalize('NFKD', passphrase)
     return pbkdf2.PBKDF2(mnemonic, 'mnemonic' + passphrase, iterations = PBKDF2_ROUNDS, macmodule = hmac, digestmodule = hashlib.sha512).read(64)

Beispiel #9

0

Datei anzeigen

Datei: test_phonology.py Projekt: TylerKirby/cltk

 def test_greek_print_ipa(self):
     """Test the Word class's `_print_ipa` in Greek."""
     w = grc.Word("élipe", grc.GREEK["Attic"]["Probert"])
     output = [w._print_ipa(True), w._print_ipa(False)]
     target = [unicodedata.normalize('NFC', "é.li.pe"),
                 unicodedata.normalize('NFC', "élipe")]
     self.assertEqual(output, target)

Beispiel #10

0

Datei anzeigen

Datei: TocHw4.py Projekt: chenstine/TOC_HW4

def output(index):
	i = 0
	totalList = []
	while i < index:
		totalList.append(addr[i].total)
		i += 1

	totalList.sort()
	maxTotal = totalList[index-1]
	line = 0
	i = 0
	while i < index:
		if addr[i].total == maxTotal:
			line += 1
		i += 1
	i = 0
	l = 0
	print "\"",
	while i < index:
		if addr[i].total == maxTotal:
			if l < line-1:
				print u"\b%s, 最高成交價:%d, 最低成交價:%d" %(unicodedata.normalize('NFKD', addr[i].road), addr[i].maxP, addr[i].minP)
				l += 1
			else:
				print u"%s, 最高成交價:%d, 最低成交價:%d\"" %(unicodedata.normalize('NFKD', addr[i].road), addr[i].maxP, addr[i].minP)
		i += 1

Beispiel #11

0

Datei anzeigen

Datei: utility.py Projekt: kcjuntunen/arduino_log

def ok_to_send(day_start, day_end):
    now = datetime.datetime.now().time()
    dstart = str.split(
        unicodedata.normalize(
            'NFKD', day_start).encode(
                'ascii', 'ignore'), ":")

    dend = str.split(
        unicodedata.normalize(
            'NFKD', day_end).encode(
                'ascii', 'ignore'), ":")

    on_time = datetime.time(int(dstart[0]), int(dstart[1]))
    off_time = datetime.time(int(dend[0]), int(dend[1]))
    when, matching = check_time(now, on_time, off_time)
    should_I_send = False
    if matching:
        if when == DAY:
            return True
        elif when == NIGHT:
            return False
        else:
            return False
    else:
        return False

Beispiel #12

0

Datei anzeigen

Datei: test_timestamp.py Projekt: TomAugspurger/pandas

    def test_names(self, data, time_locale):
        # GH 17354
        # Test .weekday_name, .day_name(), .month_name
        with tm.assert_produces_warning(FutureWarning,
                                        check_stacklevel=False):
            assert data.weekday_name == 'Monday'
        if time_locale is None:
            expected_day = 'Monday'
            expected_month = 'August'
        else:
            with tm.set_locale(time_locale, locale.LC_TIME):
                expected_day = calendar.day_name[0].capitalize()
                expected_month = calendar.month_name[8].capitalize()

        result_day = data.day_name(time_locale)
        result_month = data.month_name(time_locale)

        # Work around https://github.com/pandas-dev/pandas/issues/22342
        # different normalizations

        if not PY2:
            expected_day = unicodedata.normalize("NFD", expected_day)
            expected_month = unicodedata.normalize("NFD", expected_month)

            result_day = unicodedata.normalize("NFD", result_day,)
            result_month = unicodedata.normalize("NFD", result_month)

        assert result_day == expected_day
        assert result_month == expected_month

        # Test NaT
        nan_ts = Timestamp(NaT)
        assert np.isnan(nan_ts.day_name(time_locale))
        assert np.isnan(nan_ts.month_name(time_locale))

Beispiel #13

0

Datei anzeigen

Datei: vfs.py Projekt: obnam-mirror/obnam

    def test_listdir2_returns_name_stat_pairs(self):
        funny_unicode = u'M\u00E4kel\u00E4'
        funny_utf8 = funny_unicode.encode('utf-8')

        self.fs.write_file(funny_utf8, 'data')
        pairs = self.fs.listdir2('.')
        self.assertEqual(len(pairs), 1)
        self.assertEqual(len(pairs[0]), 2)
        name_utf8, st = pairs[0]

        self.assertEqual(type(name_utf8), str)
        name_unicode = name_utf8.decode('utf-8')

        # See https://en.wikipedia.org/wiki/Unicode_equivalence for
        # background. The NFKD normalisation seems to be the best way
        # to ensure things work across Linux and Mac OS X both (their
        # default normalisation for filenames is different).
        self.assertEqual(
            unicodedata.normalize('NFKD', name_unicode),
            unicodedata.normalize('NFKD', funny_unicode))

        self.assertTrue(hasattr(st, 'st_mode'))
        self.assertFalse(hasattr(st, 'st_mtime'))
        self.assertTrue(hasattr(st, 'st_mtime_sec'))
        self.assertTrue(hasattr(st, 'st_mtime_nsec'))

Beispiel #14

0

Datei anzeigen

Datei: lLyrics.py Projekt: Neptilo/lLyrics

 def clean_song_data(self, artist, title):
     # convert to lowercase
     artist = artist.lower()
     title = title.lower()
     
     # remove accents
     artist = unicodedata.normalize('NFKD', artist)
     artist = "".join([c for c in artist if not unicodedata.combining(c)])
     title = unicodedata.normalize('NFKD', title)
     title = "".join([c for c in title if not unicodedata.combining(c)])
     
     if self.ignore_brackets:
         LYRICS_TITLE_STRIP.append("\(.*\)")
 
     # replace ampersands and the like
     for exp in LYRICS_ARTIST_REPLACE:
         artist = re.sub(exp[0], exp[1], artist)
     for exp in LYRICS_TITLE_REPLACE:
         title = re.sub(exp[0], exp[1], title)
 
     # strip things like "(live at Somewhere)", "(acoustic)", etc
     for exp in LYRICS_TITLE_STRIP:
         title = re.sub (exp, '', title)
 
     # compress spaces
     title = title.strip()
     artist = artist.strip()
             
     return (artist, title)

Beispiel #15

0

Datei anzeigen

Datei: __init__.py Projekt: meantheory/Frozen-Flask

    def freeze(self):
        """Clean the destination and build all URLs from generators."""
        remove_extra = self.app.config['FREEZER_REMOVE_EXTRA_FILES']
        if not os.path.isdir(self.root):
            os.makedirs(self.root)
        if remove_extra:
            ignore = self.app.config['FREEZER_DESTINATION_IGNORE']
            previous_files = set(
                # See https://github.com/SimonSapin/Frozen-Flask/issues/5
                normalize('NFC', os.path.join(self.root, *name.split('/')))
                for name in walk_directory(self.root, ignore=ignore))
        seen_urls = set()
        seen_endpoints = set()
        built_files = set()

        for url, endpoint in self._generate_all_urls():
            seen_endpoints.add(endpoint)
            if url in seen_urls:
                # Don't build the same URL more than once
                continue
            seen_urls.add(url)
            new_filename = self._build_one(url)
            built_files.add(normalize('NFC', new_filename))

        self._check_endpoints(seen_endpoints)
        if remove_extra:
            # Remove files from the previous build that are not here anymore.
            for extra_file in previous_files - built_files:
                os.remove(extra_file)
                parent = os.path.dirname(extra_file)
                if not os.listdir(parent):
                    # The directory is now empty, remove it.
                    os.removedirs(parent)
        return seen_urls

Beispiel #16

0

Datei anzeigen

Datei: test_notebooks_api.py Projekt: pyarnold/ipython

    def test_list_notebooks(self):
        nbs = notebooks_only(self.nb_api.list().json())
        self.assertEqual(len(nbs), 1)
        self.assertEqual(nbs[0]['name'], 'inroot.ipynb')

        nbs = notebooks_only(
            self.nb_api.list('/Directory with spaces in/').json())
        self.assertEqual(len(nbs), 1)
        self.assertEqual(nbs[0]['name'], 'inspace.ipynb')

        nbs = notebooks_only(self.nb_api.list(u'/unicodé/').json())
        self.assertEqual(len(nbs), 1)
        self.assertEqual(nbs[0]['name'], 'innonascii.ipynb')
        self.assertEqual(nbs[0]['path'], u'unicodé')

        nbs = notebooks_only(self.nb_api.list('/foo/bar/').json())
        self.assertEqual(len(nbs), 1)
        self.assertEqual(nbs[0]['name'], 'baz.ipynb')
        self.assertEqual(nbs[0]['path'], 'foo/bar')

        nbs = notebooks_only(self.nb_api.list('foo').json())
        self.assertEqual(len(nbs), 4)
        nbnames = {normalize('NFC', n['name']) for n in nbs}
        expected = [u'a.ipynb', u'b.ipynb',
                    u'name with spaces.ipynb', u'unicodé.ipynb']
        expected = {normalize('NFC', name) for name in expected}
        self.assertEqual(nbnames, expected)

        nbs = notebooks_only(self.nb_api.list('ordering').json())
        nbnames = [n['name'] for n in nbs]
        expected = ['A.ipynb', 'b.ipynb', 'C.ipynb']
        self.assertEqual(nbnames, expected)

Beispiel #17

0

Datei anzeigen

Datei: reviewer.py Projekt: ACEfanatic02/anki

 def tokenizeComparison(self, given, correct):
     # compare in NFC form so accents appear correct
     given = ucd.normalize("NFC", given)
     correct = ucd.normalize("NFC", correct)
     s = difflib.SequenceMatcher(None, given, correct, autojunk=False)
     givenElems = []
     correctElems = []
     givenPoint = 0
     correctPoint = 0
     offby = 0
     def logBad(old, new, str, array):
         if old != new:
             array.append((False, str[old:new]))
     def logGood(start, cnt, str, array):
         if cnt:
             array.append((True, str[start:start+cnt]))
     for x, y, cnt in s.get_matching_blocks():
         # if anything was missed in correct, pad given
         if cnt and y-offby > x:
             givenElems.append((False, "-"*(y-x-offby)))
             offby = y-x
         # log any proceeding bad elems
         logBad(givenPoint, x, given, givenElems)
         logBad(correctPoint, y, correct, correctElems)
         givenPoint = x+cnt
         correctPoint = y+cnt
         # log the match
         logGood(x, cnt, given, givenElems)
         logGood(y, cnt, correct, correctElems)
     return givenElems, correctElems

Beispiel #18

0

Datei anzeigen

Datei: create_random_data.py Projekt: aagarg/saleor

def create_fake_user():
    first_name = fake.first_name()
    last_name = fake.last_name()

    _first = unicodedata.normalize('NFD', first_name).encode('ascii', 'ignore')
    _last = unicodedata.normalize('NFD', last_name).encode('ascii', 'ignore')

    email = u'*****@*****.**' % (_first.lower(), _last.lower())

    user = User.objects.create_user(email=email, password='******')

    address = Address.objects.create(
        first_name=first_name,
        last_name=last_name,
        street_address_1=fake.street_address(),
        city=fake.city(),
        postal_code=fake.postcode(),
        country=fake.country_code())

    user.addresses.add(address)
    user.default_billing_address = address
    user.default_shipping_address = address
    user.is_active = True
    user.save()
    return user

Beispiel #19

0

Datei anzeigen

Datei: android.py Projekt: AndroidTamer/Mobile-Security-Framework-MobSF

def HandleSqlite(SFile):
    print "\n[INFO] SQLite DB Extraction"
    try:
        data = ''
        con = sq.connect(SFile)
        cur = con.cursor()
        cur.execute("SELECT name FROM sqlite_master WHERE type='table';")
        tables = cur.fetchall()
        for table in tables:
            data += "\nTABLE: " + str(table[0]).decode('utf8', 'ignore') + \
                " \n=====================================================\n"
            cur.execute("PRAGMA table_info('%s')" % table)
            rows = cur.fetchall()
            head = ''
            for r in rows:
                z = r[1]
                if type(z) is unicode:
                    z = unicodedata.normalize(
                        'NFKD', z).encode('ascii', 'ignore')
                head += str(z).decode('utf8', 'ignore') + " | "
            data += head + " \n=====================================================================\n"
            cur.execute("SELECT * FROM '%s'" % table)
            rows = cur.fetchall()
            for r in rows:
                dat = ''
                for x in r:
                    if type(x) is unicode:
                        x = unicodedata.normalize(
                            'NFKD', x).encode('ascii', 'ignore')
                    dat += str(x).decode('utf8', 'ignore') + " | "
                data += dat + "\n"
        return data
    except:
        PrintException("[ERROR] SQLite DB Extraction")
        pass

Beispiel #20

0

Datei anzeigen

Datei: nl_phase_a_DirCrawl.py Projekt: Darth-Neo/DirCrawler

    def _getPDFText(self, filename, d):
        logger.debug(u"filename: %s" % filename)
        newparatextlist = list()

        try:
            pdfDoc = PdfFileReader(file(filename, u"rb"))

            pdfDict = pdfDoc.getDocumentInfo()

            for x in pdfDict.keys():
                d.addConceptKeyType(x[1:], pdfDict[x])

            # c.logConcepts()

            for page in pdfDoc.pages:
                text = page.extractText()
                if not isinstance(text, str):
                    unicodedata.normalize(u'NFKD', text).encode(u'ascii', u'ignore')

                logger.debug(u"PDF : %s" % text)

                newparatextlist.append(text + u". ")

            return newparatextlist

        except Exception, msg:
            logger.error(u"%s" % msg)

Beispiel #21

0

Datei anzeigen

Datei: utils.py Projekt: EasonMax007/pyotp

def strings_equal(s1, s2):
    """
    Timing-attack resistant string comparison.

    Normal comparison using == will short-circuit on the first mismatching
    character. This avoids that by scanning the whole string, though we
    still reveal to a timing attack whether the strings are the same
    length.
    """
    s1 = unicodedata.normalize('NFKC', s1)
    s2 = unicodedata.normalize('NFKC', s2)
    try:
        # Python 3.3+ and 2.7.7+ include a timing-attack-resistant
        # comparison function, which is probably more reliable than ours.
        # Use it if available.
        from hmac import compare_digest

        return compare_digest(s1, s2)
    except ImportError:
        pass

    if len(s1) != len(s2):
        return False

    differences = 0
    for c1, c2 in zip(s1, s2):
        differences |= ord(c1) ^ ord(c2)
    return differences == 0

Beispiel #22

0

Datei anzeigen

Datei: __init__.py Projekt: kmoore134/plexmediaserver-freebsd-10.1-amd64

def artist_search(results, media, lang, artist_name):

  # Precompose.
  try:
    artist_name = unicodedata.normalize('NFKD', artist_name.decode('utf-8'))
  except UnicodeError:
    artist_name = unicodedata.normalize('NFKD', artist_name)

  # Strip diacritics.
  stripped = u''
  for i in range(len(artist_name)):
    point = artist_name[i]
    if not unicodedata.combining(point):
      stripped += point
  artist_name = stripped


  json_obj = JSON.ObjectFromURL('http://127.0.0.1:32400/services/vevo/search?q=%s&artistsLimit=6&videosLimit=1' % (String.Quote(artist_name)))

  score = 100
  normalized_artist_name = Core.messaging.call_external_function('com.plexapp.agents.plexmusic', 'MessageKit:NormalizeArtist', kwargs = dict(artist=artist_name))
  for artist in json_obj['artists']:

    # Require a perfect match after normalization to avoid false positives.
    normalized_artist_result = Core.messaging.call_external_function('com.plexapp.agents.plexmusic', 'MessageKit:NormalizeArtist', kwargs = dict(artist=artist['name']))
    Log('Sanity checking normalized artist: %s against Vevo result: %s' % (normalized_artist_name, normalized_artist_result))
    if normalized_artist_name == normalized_artist_result:        
      results.add(SearchResult(
        id = artist['urlSafeName'],
        score = score
      ))
      score = score - 1

Beispiel #23

0

Datei anzeigen

Datei: wsaa.py Projekt: psgreco/pyafipws

    def CrearPedidoCertificado(self, cuit="", empresa="", nombre="pyafipws",
                                     filename="empresa.csr"):
        "Crear un certificate signing request (X509 CSR)"
        from M2Crypto import RSA, EVP, X509

        # create the certificate signing request (CSR):
        self.x509_req = X509.Request ()

        # normalizar encoding (reemplazar acentos, eñe, etc.)
        if isinstance(empresa, unicode):
            empresa = unicodedata.normalize('NFKD', empresa).encode('ASCII', 'ignore')
        if isinstance(nombre, unicode):
            nombre = unicodedata.normalize('NFKD', nombre).encode('ASCII', 'ignore')

        # subjet: C=AR/O=[empresa]/CN=[nombre]/serialNumber=CUIT [nro_cuit]
        x509name = X509.X509_Name ()
        # default OpenSSL parameters:
        kwargs = {"type": 0x1000 | 1, "len": -1, "loc": -1, "set": 0}
        x509name.add_entry_by_txt(field='C', entry='AR', **kwargs)
        x509name.add_entry_by_txt(field='O', entry=empresa, **kwargs)
        x509name.add_entry_by_txt(field='CN', entry=nombre, **kwargs)
        x509name.add_entry_by_txt(field='serialNumber', entry="CUIT %s" % str(cuit), **kwargs)     
        self.x509_req.set_subject_name(x509name)

        # sign the request with the previously created key (CrearClavePrivada)
        self.x509_req.set_pubkey (pkey=self.pkey)
        self.x509_req.sign(pkey=self.pkey, md='sha256')
        # save the CSR result to a file:
        f = open(filename, "w")
        f.write(self.x509_req.as_pem())
        f.close()
        return True

Beispiel #24

0

Datei anzeigen

Datei: commit.py Projekt: sirvaliance/code-audit-feed

	def toRSSItem(self):
		title = self.repo.tagname
		if self.message and len(self.message) > 50: title += " - " + self.message[:50] + "..."
		elif self.message: title += " - " + self.message
		if self.dbkeywords: title += " - " + ",".join(self.dbkeywords)
		
		description  = "<pre>"
		description += self.getpprint()
		description += "</pre>"
		
		title = unicodedata.normalize('NFKD', unicode(title, 'utf-8')).encode('ascii', 'ignore')
		description = unicodedata.normalize('NFKD', unicode(description, 'utf-8')).encode('ascii', 'ignore')

		link = ''
		if self.repo.viewlink:
			link = self.repo.viewlink.replace('%ID', self.uniqueid)

		item = RSSItem(
			title = title,
			link = link,
			description = description,
			guid = Config.rooturl + "/commit/" + self.repo.tagname + "/" + self.uniqueid,
			pubDate = unixToDatetime(self.date)
			)
		return item

Beispiel #25

0

Datei anzeigen

Datei: import_helpers.py Projekt: bydesign/openscriptures

def normalize_token(data):
    # credit: http://stackoverflow.com/questions/517923/what-is-the-best-way-to-remove-accents-in-a-python-unicode-string
    data = unicodedata.normalize(
        "NFC", "".join((c for c in unicodedata.normalize("NFD", data) if unicodedata.category(c) != "Mn")).lower()
    )
    data = re.sub(ur"['’]", "", data)
    return data

Beispiel #26

0

Datei anzeigen

Datei: code.py Projekt: bfalling/openlibrary

 def redirect_if_needed(self, i):
     params = {}
     need_redirect = False
     for k, v in i.items():
         if k in plurals:
             params[k] = None
             k = plurals[k]
             need_redirect = True
         if isinstance(v, list):
             if v == []:
                 continue
             clean = [normalize('NFC', b.strip()) for b in v]
             if clean != v:
                 need_redirect = True
             if len(clean) == 1 and clean[0] == u'':
                 clean = None
         else:
             clean = normalize('NFC', v.strip())
             if clean == '':
                 need_redirect = True
                 clean = None
             if clean != v:
                 need_redirect = True
         params[k] = clean
     if need_redirect:
         raise web.seeother(web.changequery(**params))

Beispiel #27

0

Datei anzeigen

Datei: xmlParser.py Projekt: noba3/KoTos

 def __init__(self):
     if xbmc:
         self.RssFeedsPath = xbmc.translatePath('special://userdata/RssFeeds.xml').decode("utf-8")
     else:
         self.RssFeedsPath = r'C:\Documents and Settings\Xerox\Application Data\XBMC\userdata\RssFeeds.xml'
     sane = self.checkRssFeedPathSanity()
     if sane:
         try:
             self.feedsTree = parse(self.RssFeedsPath)
         except:
             log('[script] RSS Editor --> Failed to parse ' + unicodedata.normalize( 'NFKD', self.RssFeedsPath ).encode( 'ascii', 'ignore' ))
             regen = xbmcgui.Dialog().yesno(getLS(40), getLS(51), getLS(52), getLS(53))
             if regen:
                 log('[script] RSS Editor --> Attempting to Regenerate RssFeeds.xml')
                 xml = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n<rssfeeds>\n\
                 <!-- RSS feeds. To have multiple feeds, just add a feed to the set. You can also have multiple sets. 	!-->\n\
                 <!-- To use different sets in your skin, each must be called from skin with a unique id.             	!-->\n\
                 <set id="1">\n    <feed updateinterval="30">http://feeds.feedburner.com/xbmc</feed>\n  </set>\n</rssfeeds>'
                 f = open(self.RssFeedsPath, 'w')
                 f.write(xml)
                 f.close()
                 self.__init__()
             else:
                 log('[script] RSS Editor --> User opted to not regenerate RssFeeds.xml.  Script Exiting')
                 self.feedsTree = False
         if self.feedsTree:
             self.feedsList = self.getCurrentRssFeeds()
     else:
         self.feedsTree = False
         self.feedsList = False
         log('[SCRIPT] RSS Editor --> Could not open ' + unicodedata.normalize( 'NFKD', self.RssFeedsPath ).encode( 'ascii', 'ignore' ) +'. Either the file does not exist, or its size is zero.')

Beispiel #28

0

Datei anzeigen

Datei: views.py Projekt: pgiitu/Online_transactions_iteration2

def add_other_bank_account(request):
  """
  function to add a receiver of another bank to which user wants to transfer the money.
  It fills in all the details of the receiver and also validates them.
  """
  try:
    cust_id=request.session.get('user_id')
    name=request.POST["name"]
    connected_acc_no1=request.POST["account_no"]
    confirm_acc_no=request.POST["account_no_2"]
    addressline1=request.POST["line1"]
    addressline2=request.POST["line2"]
    addressline3=request.POST["line3"]
    IFSC_code1=request.POST["IFSC"]
    limit1=request.POST["limit"]

    error1="Account Confirmation Failed"
    error2="Please Enter Valid numbers in fields"
    error3="Please Enter numeral entries in fields"
    error4="Sorry The account you wish to connect does not exist"
    error6="Account Already Added"
    error7="IFSC code does no exists"
    if(connected_acc_no1!=confirm_acc_no):
	return render_to_response("add_other_bank_account.html",{'error':error1,'STATIC_URL':"/static/"})
    limit=unicodedata.normalize('NFKD', limit1).encode('ascii','ignore')
    connected_acc_no=unicodedata.normalize('NFKD', connected_acc_no1).encode('ascii','ignore')
    IFSC_code=unicodedata.normalize('NFKD', IFSC_code1).encode('ascii','ignore')
    try:
    	i = float(limit)
    except ValueError, TypeError:
    	return render_to_response("add_other_bank_account.html",{'error':error3,'STATIC_URL':"/static/"})
    else:

Beispiel #29

0

Datei anzeigen

Datei: __init__.py Projekt: MGDevelopment/library

def noDiacritics(s):
    """Removes any diacritics"""

    # sanity check
    if s is None:
        return None

    # try the right way first
    try:
        strAux = unicode(s, 'utf-8')
        # remove some chars
        strAux = strAux.replace(unichr(0xba), "")     # 4o
        strAux = strAux.replace(unichr(0xaa), "")     # 4a
        # normalization
        ret = unicodedata.normalize('NFKD', strAux)
        ret = ret.encode('ascii', 'ignore')
    except:
        ret = None

    # try as a unicode encoded string
    if ret is None:
        try:
            strAux = s.decode(s, 'utf-8')
            # remove some chars
            strAux = strAux.replace(unichr(0xba), "")     # 4o
            strAux = strAux.replace(unichr(0xaa), "")     # 4a
            # normalization
            ret = unicodedata.normalize('NFKD', strAux)
            ret = ret.encode('ascii', 'ignore')
        except:
            ret = s     # return as received

    return ret

Beispiel #30

0

Datei anzeigen

Datei: tweetimporter.py Projekt: AlexSoudant/twitter-bot-detection

    def fromUser(self, screen_name, tweets_number=10, is_bot=False):
        user = self.createUser(screen_name, is_bot)

        tweets = self.twitter_client.user_timeline(screen_name=screen_name, count=tweets_number)
        for i, status in enumerate(tweets):
            tweet = status._json
            text = tweet['text']
            date = tweet['created_at']
            entities = tweet['entities']
            user_mentions = entities['user_mentions']
            mentions_list = []

            if len(user_mentions) > 0:
                for mention in user_mentions:
                    mentions_list.append(mention['screen_name'])

            text_string = unicodedata.normalize('NFKD', text).encode('ascii','ignore')
            date_string = unicodedata.normalize('NFKD', date).encode('ascii','ignore')
            name_mentions_string = ",".join(mentions_list)

            Tweet.create(
                    user = user,
                    text = text_string,
                    date = date_string,
                    source = status.source,
                    mentions = name_mentions_string
            )

Beispiel #31

0

Datei anzeigen

def remove_accents(s: str) -> str:
    return "".join(c for c in unicodedata.normalize("NFD", s)
                   if not unicodedata.combining(c))

Beispiel #32

0

Datei anzeigen

Datei: ShowUnicodeProperty.py Projekt: yuikns/tmconfig

def main():

    os_v = os.uname()[2].split(".")[0]

    if os_v == "10":
    	source1 = "/System/Library/Input Methods/CharacterPalette.app/Contents/Frameworks/CharacterPaletteFramework.framework/Resources/kanji.db"
    elif (os_v > "10" and os_v < "18"):
    	source1 = "/System/Library/Input Methods/CharacterPalette.app/Contents/Resources/CharacterDB.sqlite3"
    else:
    	source1 = "/System/Library/Components/CharacterPalette.component/Contents/SharedSupport/\
    	CharPaletteServer.app/Contents/Frameworks/CharacterPaletteFramework.framework/Versions/A/Resources/kanji.db"

    bundleLibPath = os.environ["TM_BUNDLE_SUPPORT"] + "/lib/"

    source2 = bundleLibPath + "allHanForRadical.txt.zip"

    def lastCharInUCSdec(s):
        isPaneB = False
        if s:
            if u"\udc00" <= s[-1] <= u"\udfff" and len(s) >= 2 and u"\ud800" <= s[-2] <= u"\udbff":
                isPaneB = True
                return (((ord(s[-2])&0x3ff)<<10 | (ord(s[-1])&0x3ff)) + 0x10000, isPaneB)
            return (ord(s[-1]), isPaneB)
        return (-1, isPaneB)


    if "TM_SELECTED_TEXT" in os.environ: sys.exit(200)

    if os.environ["DIALOG"][-1] == '2':
        dialog2 = True
    else:
        dialog2 = False

    outDict = SeqDict()

    if "TM_CURRENT_LINE" in os.environ and "TM_LINE_INDEX" in os.environ and int(os.environ["TM_LINE_INDEX"]):
        line, x = os.environ["TM_CURRENT_LINE"], int(os.environ["TM_LINE_INDEX"])
    else:
        sys.exit(206)


    (lastCharDecCode, charIsPaneB) = lastCharInUCSdec(unicode(line[:x], "UTF-8"))
    char = wunichr(lastCharDecCode)
    lastCharUCShexCode = "%04X" % lastCharDecCode

    UnicodeData = os.popen("zgrep '^" + lastCharUCShexCode + ";' '" + bundleLibPath + 
                            "UnicodeData.txt.gz'").read().decode("utf-8")

    name = ""

    if not UnicodeData:
        name = getNameForRange(lastCharDecCode)
    else:
        (dummy1, name, category, combiningclass, bididir, 
        decomposition, numtype1, numtype2, numtype3, bidimirror, 
        oldname, comment, upcase, lowcase, titlecase) = UnicodeData.strip().split(';')

    if name[0] == '<': name = getNameForRange(lastCharDecCode)
    block = getBlockName(lastCharDecCode)

    outDict['Character'] = char
    outDict['Name'] = name
    outDict['Block'] = block

    # look for related chars
    frel = open(bundleLibPath + "relatedChars.txt", "rb")
    reldata = frel.read().decode("UTF-8")
    frel.close()
    for part in reldata.split('\n'):
        if char in part: break
    if part: outDict["Related to"] = part

    if "CJK" in name and ("IDEO" in name or "Ideo" in name):
        cmd = "zgrep -F '" + char + ",' '" + source2 + "'"
        gdata = os.popen(cmd.encode("UTF-8")).read().decode("UTF-8")
        if len(gdata) > 0:
            RadNum, RadStrokeCnt, RadName, Rad, ExtStrokeCnt, Dummy = gdata.split('\t')
            outDict['Radical (trad.)'] = [Rad, RadStrokeCnt, u"画", RadName, RadNum, ExtStrokeCnt]
            outDict['Strokes (trad.)'] = str(int(RadStrokeCnt) + int(ExtStrokeCnt))

        # get all data from Apple's internal UniDict
        cmd = "sqlite3 '" + source1 + "' 'select * from unihan_dict where uchr=\"" + char + "\";' 2>/dev/null"
        udata = os.popen(cmd.encode("UTF-8")).read().decode("UTF-8")
        if udata:
            (uChar, a1, readings, hangul_name_sound, pinyin, zhWubiXing, 
            zhWubiHua, zhBianhao, a2, zhCangjieCh, zhDayi, pinyin1, 
            Bopomofo, jaKun, jaOn, pinyin, zhCangjie) = udata.split('|')
            zhCangjie = zhCangjie.strip()
            if readings:
                japDict = SeqDict()
                kunon = readings.split('/')
                if kunon[0]: japDict['Kun'] = kunon[0]
                if kunon[1]: japDict['On']  = kunon[1]
                outDict['Japanese'] = japDict

            # get Chinese simplified/traditional equivalent
            cmd = "egrep '^" + char + "' '" + bundleLibPath + "zhSimTradHanzi.txt'"
            simtrad = os.popen(cmd.encode("UTF-8")).read().decode("UTF-8")
            data = ""
            if simtrad: c1, st, data = simtrad.split('\t')
            if pinyin1 or Bopomofo or data or zhWubiXing or zhWubiHua or \
                zhBianhao or zhCangjie or zhCangjieCh or zhDayi:
                zhDict = SeqDict()
                if data:
                    if st == 'T': zhDict['Traditional'] = data.rstrip()
                    elif st == 'S': zhDict['Simplified'] = data.rstrip()
                if pinyin1: zhDict['Pinyin'] = pinyin1
                if Bopomofo: zhDict['Zhuyin'] = Bopomofo
                if zhWubiXing: zhDict['Wubi Xing'] = zhWubiXing
                if zhWubiHua: zhDict['Wubi Hua'] = zhWubiHua
                if zhBianhao: zhDict['Bishu Bianhao'] = zhBianhao
                if zhCangjie: zhDict['Cangjie'] =  zhCangjie + " " + zhCangjieCh
                if zhDayi: zhDict['Dayi'] = zhDayi
                outDict['Chinese'] = zhDict
            if hangul_name_sound:
                korDict = SeqDict()
                korDict['Hangul'] = hangul_name_sound
                outDict['Korean'] = korDict
    else:
        if 'HANGUL' in name and not 'Jamo' in block:
            outDict['Decomposition'] = " ".join(unicodedata.normalize("NFKD", char))

        if UnicodeData:
            if category:       outDict['Category'] = expandUniCategories(category)
            if oldname:        outDict['Old Name'] = oldname
            if bididir:        outDict['Bidirectional'] = expandUniDirectionClass(bididir)
            if combiningclass: outDict['Combining Class'] = expandUniCombiningClass(combiningclass)
            if bidimirror:     outDict['Mirrored'] = bidimirror
            if upcase:         outDict['Upper Case'] = wunichr(int(upcase,16)) + " (U+" + upcase + ")"
            if lowcase:        outDict['Lower Case'] = wunichr(int(lowcase,16)) + " (U+" + lowcase + ")"
            if titlecase:      outDict['Title Case'] = wunichr(int(titlecase,16)) + " (U+" + titlecase + ")"
            if numtype1:       outDict['Numeral Type'] = (numtype1 + " " + numtype2 + " " + numtype3).strip()

            if decomposition and not charIsPaneB:
                decompDict = SeqDict()
                if decomposition[0] == '<':
                    dc = decomposition.split(' ')
                    decompDict['Class'] = expandUniDecompositionClass(dc[0])
                    decomposition = " ".join(dc[1:])
                decomp = decomposition
                def cDec(x): return unichr(int(x,16))
                def rDec(x): return "U+%04X" % ord(x)
                clist = decomp.split(' ')
                decomp = " ".join(map(cDec, clist)) + " (U+" + " U+".join(clist) + ")"
                cflist = unicodedata.normalize("NFKD", char)
                if len(clist) != len(cflist):
                    decompDict['into'] = decomp + "; " + " ".join(cflist) + "(" + " ".join(map(rDec, cflist)) + ")"
                else:
                    decompDict['into'] = decomp
                outDict['Decomposition'] = decompDict


    cpDict = SeqDict()
    cpDict['UCS dec/hex'] = "%s / U+%s" % (str(lastCharDecCode), lastCharUCShexCode)
    cpDict['UTF-8'] = " ".join([hex(ord(c))[2:].upper() for c in char.encode("utf-8")])
    utf16be = hexlify(char.encode("utf-16-be")).upper()
    if len(utf16be)>4: cpDict['UTF-16BE'] = utf16be[:4] + "+" + utf16be[4:]
    outDict['Codepoints'] = cpDict

    if dialog2:
        dlgout = "<table style=\"border-collapse:collapse;\">"
        plh = ""
        if outDict.has_key('Category') and "Nonspacing" in outDict['Category']: plh = u"o"
        dlgout += "<tr><td rowspan=2 style=\"border:1px dotted silver;font-size:20pt;text-align:center;\"><font color=#CCCCCC>%s</font>%s</td><td>&nbsp;</td><td style=\"color:grey;\">Name</td><td>%s</td></tr>" % (plh, outDict['Character'], outDict['Name'])
        dlgout += "<tr><td>&nbsp;</td><td style=\"color:grey;\">Block</td><td>%s</td></tr>" % outDict['Block']
        dlgout += "</table><table style=\"border-collapse:collapse;width:200px;\">"
        del outDict['Character']
        del outDict['Name']
        del outDict['Block']
        for k, v in outDict.items():
            if "Radical" in k:
                dlgout += "<tr><td align=right style=\"color:grey;\">%s</td><td>&nbsp;</td><td style=\"white-space:nowrap;\">%s (%s%s - %s) %s.%s" % (k, v[0], v[1], v[2], v[3], v[4], v[5])
            elif "Related" in k:
                #  and len(v) > 60
                dlgout += "<tr><td align=right style=\"color:grey;\">%s</td><td>&nbsp;</td><td>%s</td></tr>" % (k, v)
            else:
                try:
                    v.items()
                    dlgout += "<tr><td colspan=2 align=right style=\"color:grey;\"><b><i>%s</i></b></td></tr>" % k
                    for ku, vu in v.items():
                        dlgout += "<tr><td align=right style=\"color:grey;white-space:nowrap;\">%s</td><td>&nbsp;</td><td style=\"white-space:nowrap;\">%s</td></tr>" % (ku, vu)
                except AttributeError:
                    dlgout += "<tr><td align=right style=\"color:grey;white-space:nowrap;\">%s</td><td>&nbsp;</td><td style=\"white-space:nowrap;\">%s</td></tr>" % (k, v)

        cmd = "'%s' tooltip --html '%s'" % (os.environ["DIALOG"], dlgout.replace("'", u"＇"))
        os.popen(cmd.encode("UTF-8"))
        sys.exit(206)
    else:
        sep = u"┊"
        for k, v in outDict.items():
            if "Radical" in k:
                print "%-15s %s %s (%s%s - %s) %s.%s" % (k, sep, v[0], v[1], v[2], v[3], v[4], v[5])
            else:
                try:
                    v.items()
                    print "%-15s" % k
                    for ku, vu in v.items():
                        print "%15s %s %s" % (ku, sep, vu)
                except AttributeError:
                    print "%-15s %s %s" % (k, sep, v)
        sys.exit(206)

Beispiel #33

0

Datei anzeigen

Datei: possible_nested_annotations_QA_to_csv.py Projekt: makerling/oxes_to_sfm

    file.write(fstrip)

with open(srcfile, 'r') as file:
    for fileline in file:
        matchesv = re.match(r'\\\\v\*\*\\\\v \d+==[^#]', fileline)
        matchesmt = re.match(r'\\\\mt\*\*\\\\mt \d+==[^#]', fileline)
        #print ('#### verse or mt is ####: ',fileline)
        if matchesv or matchesmt:
            linesplit = fileline.split('==####')
            line2 = linesplit[0]
            line1 = line2.replace(' ',
                                  '##').replace('\\\\',
                                                '\\').replace('##-', '## ')
            #print ('39 #### working line sfm file	####: ',fileline)
            #print ('40 #### verse spaces removed	####: ',line1)
            line = unicodedata.normalize('NFC', line1)
            versetext1 = linesplit[1]
            versetext2 = versetext1.rstrip('\n')
            versetext = unicodedata.normalize('NFC', versetext2)
            #print ()
            #print ('44 #### bare verse normalized	####: ',versetext)
            #sanitize variable - https://stackoverflow.com/questions/8237647/clear-variable-in-python
            cars = None
            #cars = dict(x.split('**') for x in line.split('==')) - old try, but makes values into str, needs to be list so I an append suffixes later
            #https://stackoverflow.com/questions/4627981/creating-a-dictionary-from-a-string - key thing here is the [v] which turns it into a list
            cars = dict(
                (k, [v]) for k, v in (e.split('**') for e in line.split('==')))
            #only for debugging log
            #print ('51 #### annots as dict keys	####: ', cars.keys())
            newlist = []
            for key in cars:

Beispiel #34

0

Datei anzeigen

def unicodeToAscii(s):
    return ''.join(c for c in unicodedata.normalize('NFD', s)
                   if unicodedata.category(c) != 'Mn')

Beispiel #35

0

Datei anzeigen

Datei: pg.py Projekt: geary/election-maps.br2012

def TTYstr(ustr):
    return unicodedata.normalize('NFKD', ustr).encode('ascii',
                                                      'ignore').upper()

Beispiel #36

0

Datei anzeigen

Datei: pm.py Projekt: littlejeem/lastfmplaylistgeneratorPM

 def unicode_normalize_string(self, text):
     return unicodedata.normalize('NFD', unicode(text, 'utf-8')).encode(
         'ascii', 'ignore').upper().replace("-", "")

Beispiel #37

0

Datei anzeigen

Datei: forms.py Projekt: Didred/Trans

 def to_python(self, value):
     return unicodedata.normalize(
         'NFKC',
         super(UsernameField, self).to_python(value))

Beispiel #38

0

Datei anzeigen

Datei: wizard.py Projekt: vageesh79/esphomeyaml

def strip_accents(string):
    return u''.join(c for c in unicodedata.normalize('NFD', unicode(string))
                    if unicodedata.category(c) != 'Mn')

Beispiel #39

0

Datei anzeigen

Datei: Cadastre_FR_WMS_lieu_dit.py Projekt: igeofr/qgis3

    def processAlgorithm(self, parameters, context,  feedback):
        """
        Here is where the processing itself takes place.
        """

        source = self.parameterAsVectorLayer(parameters, self.INPUT, context)
        field_insee = self.parameterAsString(parameters,  self.INSEE_CODE, context)
        field_commune = self.parameterAsString(parameters,  self.COMMUNE_NAME, context)
        value_epsg = self.parameterAsString(parameters, self.EPSG_CODE, context)

        if value_epsg == '2154' or value_epsg == '3942' or value_epsg == '3943' or value_epsg == '3944' or value_epsg == '3945' or value_epsg == '3946' or value_epsg == '3947' or value_epsg == '3948' or value_epsg == '3949' or value_epsg == '3950' or value_epsg == '32630' or value_epsg == ' 32631' or value_epsg == '32632' or value_epsg == '3857' or value_epsg == '4326' or value_epsg == '4258' or value_epsg == '32620' or value_epsg == '2970' or value_epsg == '2972' or value_epsg == '2973' or value_epsg == '2975' or value_epsg == '32622' or value_epsg == '32740' or value_epsg == '32738' or value_epsg == '4471' or value_epsg == '32621' :

            feedback.pushInfo('EPSG code' + value_epsg)
            tab = []

            for f in source.getFeatures():

                col_select=f[field_insee],(''.join((c for c in unicodedata.normalize('NFD', f[field_commune]) if unicodedata.category(c) != 'Mn')))

                # Insere chaque ligne du CSV dans le tableau
                tab.append(col_select)

                #Permet la suppression des doublons et le tri
                Lt=sorted(set(tab))

                print (Lt)

            for c_insee, n_couche in Lt  :

                urlWithParams ="url=http://inspire.cadastre.gouv.fr/scpc/"+c_insee+".wms?contextualWMSLegend=0&crs=EPSG:"+value_epsg+"&dpiMode=7&featureCount=10&format=image/png&layers=LIEUDIT&styles=&maxHeight=1024&maxWidth=1280"
                rlayer = QgsRasterLayer(urlWithParams,'Lieu_dit_'+n_couche+'_'+c_insee, 'wms')
                feedback.pushInfo('Category :'+ n_couche +' - '+c_insee)
                feedback.pushInfo('Validity of WMS : %s' % rlayer.isValid())
                if not rlayer.isValid():
                    print('Lieu_dit_'+n_couche+'_'+c_insee + ' failed to load!')
                    feedback.pushInfo('WMS INVALID : Cadastre_'+n_couche+'_'+c_insee)
                else:
                    #Source : https://gis.stackexchange.com/questions/342802/loading-openstreetmap-in-pyqgis
                    output_layers = []
                    output_layers.append(rlayer)
                    context.temporaryLayerStore().addMapLayer(rlayer)
                    context.addLayerToLoadOnCompletion(
                        rlayer.id(),
                        QgsProcessingContext.LayerDetails(
                            'Lieu_dit_'+n_couche+'_'+c_insee,
                            context.project(),
                            self.OUTPUT_LAYERS
                        )
                    )
        else :
            feedback.pushInfo('Error EPSG code')


        # Return the results of the algorithm. In this case our only result is
        # the feature sink which contains the processed features, but some
        # algorithms may return multiple feature sinks, calculated numeric
        # statistics, etc. These should all be included in the returned
        # dictionary, with keys matching the feature corresponding parameter
        # or output names.
        # At the end of the processAlgorithmn
        # Add the layer to the project
        return {}

Beispiel #40

0

Datei anzeigen

def strip_accents(s):
    #retira acentos de strings
    s = s.replace('`', '').replace("'", '')
    return ''.join(c for c in unicodedata.normalize('NFD', s)
                   if unicodedata.category(c) != 'Mn')

Beispiel #41

0

Datei anzeigen

Datei: recipeYieldFilter.py Projekt: sff1019/wsd1

def normalize(y):
    if pd.isnull(y):
        return y  # 欠損値
    return ud.normalize('NFKC', y)  # 全角数字を半角数字へ

Beispiel #42

0

Datei anzeigen

Datei: char_utils.py Projekt: zandor587/mpspam

def remove_words_accents(word: str):
    normalized = unicodedata.normalize('NFD', word)
    ascii_text = normalized.encode('ascii', 'ignore')

    return str(ascii_text.decode("utf-8"))

Beispiel #43

0

Datei anzeigen

Datei: scraper.py Projekt: leandromacrini/ES-scraper

def normalize(s):
    return ''.join((c for c in unicodedata.normalize('NFKD', unicode(s))
                    if unicodedata.category(c) != 'Mn'))

Beispiel #44

0

Datei anzeigen

    def parse_store(self, response):
        lat = response.xpath('//*[@id="location-lat"]/@value').extract_first()

        lon = response.xpath('//*[@id="location-lng"]/@value').extract_first()

        name = response.xpath(
            '//div[@class="title-wrap"]/h2/text()').extract_first()

        phone = response.xpath(
            '//div[@class="title-wrap"]/div/text()').extract_first()

        street = response.xpath(
            '//li[@itemprop="streetAddress"]/text()').extract_first().strip()

        city = response.xpath(
            '//span[@itemprop="addressLocality"]/text()').extract_first()

        state = response.xpath(
            '//span[@itemprop="addressRegion"]/text()').extract_first()

        postcode = response.xpath(
            '//span[@itemprop="postalCode"]/text()').extract_first()

        website = response.xpath(
            '//*[@id="my_location_url"]/@value').extract_first()

        address = "{}{} {} {}".format(street, city, state, postcode)

        # Some pages post notices such as "No longer accepting checks"
        # in the day/hours open section
        hour = response.xpath(
            '//*[@class="location-sidebar-item"][2]/descendant::*[contains('
            '., "am") or contains(., "pm") or contains('
            '., "Closed")]/text()').extract()

        day = self.convert_days(
            response.xpath(
                '//*[@class="location-sidebar-item"][2]/descendant::*[contains('
                '., "Sunday") or contains(., "Monday") or contains('
                '., "Tuesday") or contains(., "Wednesday") or contains('
                '., "Thursday") or contains(., "Friday") or contains('
                '., "Saturday")]/text()').extract())

        for i in range(len(hour)):
            hour[i] = unicodedata.normalize("NFKD", hour[i])  # handle \xa0
            hour[i] = hour[i].strip()
        hour = [x for x in hour if x]
        hour = self.convert_hours(hour)

        opening_hours = ', '.join('{} : {}'.format(*t) for t in zip(day, hour))

        yield GeojsonPointItem(
            lat=lat,
            lon=lon,
            addr_full=address,
            street=street,
            city=city,
            state=state,
            postcode=postcode,
            phone=phone,
            website=website,
            opening_hours=opening_hours,
            ref=response.url,
        )

Beispiel #45

0

Datei anzeigen

Datei: sezonlukdizi.py Projekt: rrosajp/plugin.video.covenant

 def lat2asc(self, title):
     title = title.decode('iso8859-1')
     return unicodedata.normalize('NFKD', title).encode('ascii', 'ignore')

Beispiel #46

0

Datei anzeigen

def normalize_ascii(value):
    return unicodedata.normalize('NFKD', value) \
        .encode('ascii', 'ignore')

Beispiel #47

0

Datei anzeigen

Datei: trainer.py Projekt: mohsinkhn/Jigsaw_toxic_comment_classifcation

def unicodeToAscii(series):
    return series.apply(lambda s: unicodedata.normalize('NFKC', str(s)))

Beispiel #48

0

Datei anzeigen

Datei: classJeu.py Projekt: WilliamWithering/API_Python

    def execute(self, commande):
        """
        Fonction permettant de reconnaître l'ordre donné par l'utilisateur.
        On vérifie d'abord la présence des verbes prendre et poser dans la chaine, puis celle d'aller.
        Pour prendre et poser, on change la position de l'objet. Pour aller, on modifie la position actuelle.

        On vérifie également la présence d'autres commandes comme l'inventaire.
        """

        commande = unicodedata.normalize('NFD', commande).encode(
            'ascii', 'ignore').decode('utf8')

        commande = commande.replace("'", " ")
        words = commande.strip(" ").split(" ")

        mots_reconnus = 0

        if words[0] == "prendre":
            for mot in words[1:]:
                for obj in self.lieu[self.lieu_actuel].contenu:
                    if mot == obj.raccourci:
                        mots_reconnus += 1
                        self.personnage.inventaire.append(obj)
                        self.lieu[self.lieu_actuel].contenu.remove(obj)
                        print("Vous avez obtenu : " + obj.nom)

            if mots_reconnus == 0:
                print("Impossible de prendre cet objet.")

# en l'état actuel, la commande poser crée des problèmes
#        elif words[0] == "poser":
#            for mot in words[1:]:
#                for obj in self.personnage.inventaire:
#                    if mot == obj.raccourci:
#                        self.lieu[self.lieu_actuel].contenu.append(obj)
#                        self.personnage.inventaire.remove(obj)

        elif words[0] == "aller":
            for mot in words[1:]:
                if mot in self.lieu[self.lieu_actuel].adjacence:
                    self.lieu_actuel = self.lieu[
                        self.lieu_actuel].adjacence[mot]
                    self.transition = 1
                    mots_reconnus += 1

            if mots_reconnus == 0:
                print(
                    "La destination n'a pas été reconnue, ou est inaccessible depuis ce lieu."
                )
            if mots_reconnus > 1:
                print(
                    "Attention, plusieurs lieux ont été reconnus. Vous arrivez dans le dernier possible"
                )

        elif words[0] == "parler":
            for mot in words[1:]:
                if mot in self.lieu[self.lieu_actuel].dialogues:
                    mots_reconnus += 1
                    print("\n\033[1m" + mot.capitalize() + "\033[0m : " +
                          self.lieu[self.lieu_actuel].dialogues[mot])
            if mots_reconnus == 0:
                print("Impossible de parler à cette personne.")

        elif words[0] == "utiliser":
            for mot in words[1:]:
                if (mot in self.lieu[self.lieu_actuel].utilisation):
                    for obj in self.personnage.inventaire:
                        if mot == obj.raccourci:
                            self.declencher(
                                self.lieu_actuel,
                                self.lieu[self.lieu_actuel].utilisation[mot])
                            mots_reconnus += 1

            if not mots_reconnus:
                print("Utilisation impossible.")

        elif words[0] == "inventaire":
            self.personnage.afficher_inventaire()

        else:
            print("Verbe non reconnu.")

Beispiel #49

0

Datei anzeigen

Datei: carinos.py Projekt: RafatRifaie/Alscrapy-store-locations

	def parse_hours(self, item):
		item = unicodedata.normalize('NFKD', item).encode('ascii','ignore').strip()
		symbals= ['\r', '\n', '<br>', '<br/>', '<p>', '<ul>','</ul>', '</li>', '<h3>', '</h3>', '<li style="list-style: initial;">']
		for s in symbals:
			item = item.replace(s,'')
		return item

Beispiel #50

0

Datei anzeigen

Datei: 2.12.py Projekt: CavalcanteLucas/cookbook

remap = {
    ord('\t') : ' ',
    ord('\f') : ' ',
    ord('\r') : None    # Deleted
}

a = s.translate(remap)
a


import unicodedata
import sys
sys.maxunicode
cmb_chrs = dict.fromkeys(c for c in range(sys.maxunicode) if unicodedata.combining(chr(c)))
b = unicodedata.normalize('NFD', a)
b
b.translate(cmb_chrs)


digitmap = { c: ord('0') + unicodedata.digit(chr(c))
            for c in range(sys.maxunicode)
            if unicodedata.category(chr(c)) == 'Nd'}

len(digitmap)
# Arabic digits
x = '\u0661\u0662\u0663'
x
x.translate(digitmap)

Beispiel #51

0

Datei anzeigen

Datei: word2vec_interpreter.py Projekt: KChikai/seq2seq-examples

def interpreter(data_path, model_path):
    """
    Run this function, if you want to talk to seq2seq model.
    if you type "exit", finish to talk.
    :param data_path: the path of corpus you made model learn
    :param model_path: the path of model you made learn
    :return:
    """
    # call dictionary class
    if args.lang == 'en':
        corpus = ConvCorpus(file_path=None)
        corpus.load(load_dir=data_path)
    elif args.lang == 'ja':
        corpus = JaConvCorpus(file_path=None)
        corpus.load(load_dir=data_path)
    else:
        print('You gave wrong argument to this system. Check out your argument about languages.')
        raise ValueError
    print('Vocabulary Size (number of words) :', len(corpus.dic.token2id))
    print('')

    # rebuild seq2seq model
    model = Seq2Seq(len(corpus.dic.token2id), feature_num=args.feature_num,
                    hidden_num=args.hidden_num, batch_size=1, gpu_flg=args.gpu)
    serializers.load_hdf5(model_path, model)

    # load word2vec model
    sim_th = 50
    w2v_model = gensim.models.KeyedVectors.load_word2vec_format(W2V_MODEL_PATH, binary=False)

    # run conversation system
    print('The system is ready to run, please talk to me!')
    print('( If you want to end a talk, please type "exit". )')
    print('')
    while True:
        print('>> ', end='')
        sentence = input()
        if sentence == 'exit':
            print('See you again!')
            break

        if args.lang == 'en':
            input_vocab = [unicodedata.normalize('NFKC', word.lower()) for word in word_tokenize(sentence)]
        elif args.lang == 'ja':
            input_vocab = parse_ja_text(sentence)
        input_vocab.reverse()
        input_vocab.insert(0, "<eos>")

        # convert word into ID
        input_sentence = []
        for word in input_vocab:
            if corpus.dic.token2id.get(word) is not None:
                input_sentence.append(corpus.dic.token2id.get(word))
            else:
                try:
                    sim_words = w2v_model.most_similar(positive=[word], topn=sim_th)
                    for index, candidate_tuple in enumerate(sim_words):
                        if corpus.dic.token2id.get(candidate_tuple[0]) is not None:
                            input_sentence.append(corpus.dic.token2id.get(candidate_tuple[0]))
                            break
                        if index == sim_th - 1:
                            input_sentence.append(corpus.dic.token2id['<unk>'])
                except KeyError:
                    input_sentence.append(corpus.dic.token2id['<unk>'])

        # input a sentence into model
        model.initialize()          # initialize cell
        sentence = model.generate(input_sentence, sentence_limit=len(input_sentence) + 30,
                                  word2id=corpus.dic.token2id, id2word=corpus.dic)
        print("-> ", sentence)
        print('')

Beispiel #52

0

Datei anzeigen

Datei: webhook.py Projekt: alepmalagon/covid19-dialogflow

def ganadores_por_artista(req):

    try:
        artista = req.get('queryResult').get('parameters').get('artista')

    except AttributeError:
        return '¿Podrías especificar el nombre de un artista?'
    if str(artista)=='':
        return 'Estas seguro de que esa persona esta compitiendo?. Lucas acá me dice que no.'
    print(str(artista), file=sys.stderr)

    slug = str(unicodedata.normalize('NFKD', artista)).lower().replace(" ", "-")

    print(slug, file=sys.stderr)

    #tag = translate_tags[cat]
    data = {}
    response = requests.get(
        url_win+'categories?slug='+slug,
        params=data
    )

    rjson = response.json()
    print(str(rjson), file=sys.stderr)
    cat_id = rjson[0].get('id') # need cat id to get posts/videos
    print(cat_id, file=sys.stderr)
    r = 'posts?categories='+str(cat_id)

    response = requests.get(
        url_win+r,
        params=data
    )

    rjson = response.json()
    answer = 'Este artista ganó '
    i=0
    for video in rjson:

        slug = video.get('slug')
        data = {}
        response = requests.get(
            url_win+'posts?slug='+slug,
            params=data
        )

        _rjson = response.json()[0]

        tags = _rjson.get('tags')

        r = 'tags?include='
        r = r + str(tags).replace('[','').replace(']', '')

        response = requests.get(
            url_win+r,
            params=data
        )
        _rjson = response.json()
        if type(_rjson)!=list:
            continue
        print('tags----' + str(_rjson), file=sys.stderr)
        video_cats = []
        answer = answer + 'por el video "'+ video.get('title').get('rendered') + '" en '
        for categoria in _rjson:
            #print(str(video[0]), file=sys.stderr)
            print(str(categoria), file=sys.stderr)
            video_cats.append(categoria.get('description'))

        answer = answer + and_last_comma(str(video_cats).replace('[','').replace(']', '').replace('\'','')) + ''
        if i != len(rjson) - 1:
            answer = answer + ', por '
        i = i+1
    if answer=="Este artista ganó ":
        return "Este artista no obtuvo premios"
    return html.unescape(answer)

Beispiel #53

0

Datei anzeigen

def normalize_unicode(text):
    """
    unicode string normalization
    """
    return unicodedata.normalize("NFKD", text)

Beispiel #54

0

Datei anzeigen

Datei: carinos.py Projekt: RafatRifaie/Alscrapy-store-locations

	def validate(self, item):
		try:
			return unicodedata.normalize('NFKD', item).encode('ascii','ignore').strip().replace(';','')
		except:
			return ''

Beispiel #55

0

Datei anzeigen

Datei: utils.py Projekt: storey/lexeis-public

def getNormalizedFilesList(dir):
    return list(map(lambda x: unicodedata.normalize("NFD", x),
                    os.listdir(dir)))

Beispiel #56

0

Datei anzeigen

Datei: GoalNews(Colombia).py Projekt: CVasquezG/WebCraw

def remove_accents(input_str):
    nfkd_form = unicodedata.normalize('NFKD', input_str)
    only_ascii = nfkd_form.encode('ASCII', 'ignore')
    return only_ascii

Beispiel #57

0

Datei anzeigen

Datei: word_utils.py Projekt: stonozawa/WordPull

def tag2sentences(tag):
    sentence = unicodedata.normalize("NFKC", tag.text)
    return [sentence]

Beispiel #58

0

Datei anzeigen

Datei: utils.py Projekt: storey/lexeis-public

def getNormalizedString(fname):
    return unicodedata.normalize("NFD", fname)

Beispiel #59

0

Datei anzeigen

Datei: __init__.py Projekt: sandip3120/private

def remove_accents(input_str):
    # Borrowed from https://stackoverflow.com/a/517974/1509718
    nfkd_form = unicodedata.normalize('NFKD', input_str)
    return u"".join([c for c in nfkd_form if not unicodedata.combining(c)])

Beispiel #60

0

Datei anzeigen

 def to_python(self, value):
     return unicodedata.normalize('NFKC', super().to_python(value))