Python unidecode Examples, isounidecode.unidecode Python Examples

Example #1

0

Show file

File: fields.py Project: proft/django-picturefield

    def validate(self, value, model_instance):
        super(PictureField, self).validate(value, model_instance)
        if hasattr(value, 'file'):
            # format file name to ascii
            value.name = unidecode(value.name)

            # check size
            if value.width < self.min_size['width'] or value.height < self.min_size['height']:
                raise ValidationError(_(u'Size of the file is smaller than {}x{} px.').format(self.min_size['width'], self.min_size['height']))

            elif value.width > self.max_size['width'] or value.height > self.max_size['height']:
                raise ValidationError(
                    _(u'Size of the file is bigger than {}x{} px.').format(self.max_size['width'], self.max_size['height']))

            # check file size
            if value.size > MAX_UPLOAD_SIZE:
                raise ValidationError(_(u'Maximal size of file is {}.').format(filesize(MAX_UPLOAD_SIZE)))

            # check format
            try:
                im = Image.open(value)
                if im.format not in ALLOWED_FORMATS:
                    raise ValidationError(_(u'Unsupported file extension. You can upload {}.').format(', '.join(ALLOWED_FORMATS)))
            except ImportError:
                raise
            except Exception: # Python Imaging Library doesn't recognize it as an image
                raise ValidationError(self.error_messages['invalid_image'])

Example #2

0

Show file

File: t_lyrics.py Project: PietroPate/TauonMusicBox

def genius(artist, title, return_url=False):

    line = f"{artist}-{title}"
    line = re.sub("[,._@!#%^*+:;'()]", "", line)
    line = line.replace("]", "")
    line = line.replace("[", "")
    line = line.replace(" ", "-")
    line = line.replace("/", "-")
    line = line.replace("-&-", "-and-")
    line = line.replace("&", "-and-")
    line = unidecode(line).decode()
    line = urllib.parse.quote(line)
    line = f"https://genius.com/{line}-lyrics"

    if return_url:
        return line

    page = requests.get(line)
    html = BeautifulSoup(page.text, 'html.parser')
    lyrics = html.find('div', class_='lyrics').get_text()

    lyrics2 = []
    for line in lyrics.splitlines():
        if line.startswith("["):
            pass
        else:
            lyrics2.append(line)

    lyrics = "\n".join(lyrics2)
    lyrics = lyrics.strip("\n")
    return lyrics

Example #3

0

Show file

 def cleaner(name):
     name = re.sub('[\s!@#$%^&\*(\)\.:";'
                   '/?\\\\|<>\[\]~\-=]', '_',
                   unidecode(name).strip().lower())
     while '__' in name:
         name = name.replace('__', '_')
     if re.match('\d.*', name): name = '_' + name
     return name

Example #4

0

Show file

File: populate.py Project: verkkodemokratiaseura/verkkovaali

    def genusername(name):
        base = candidate = unicode(unidecode(u'.'.join(name.strip().lower().split())))

        suffix = count(2)
        while candidate in users:
            candidate = base + unicode(suffix.next())

        users.add(candidate)
        return candidate

Example #5

0

Show file

File: process_AIRBASE_all_nr.py Project: DeneBowdalo/AtmosChem_Tools

def parse_AIRBASE_xml(xml_files):
    import untangle
    timezone_meta = {}
    # loop over countries
    for fi in xml_files:
        print fi
        cmeta = untangle.parse(fi)
        # loop over stations in each file
        for i in xrange(len(cmeta.airbase.country.station)):
            # get station name
            station = 'AB'+unidecode(cmeta.airbase.country.station[i].station_european_code.cdata)
            # get time zone information.  If there is none, assume UTC
            try:
                TZ = unidecode(cmeta.airbase.country.station[i].network_info.network_time_reference_basis.cdata)
            except:
                TZ = 'UTC'
            timezone_meta[station] = TZ
            tz_meta = pd.Series(timezone_meta)
            
    return tz_meta

Example #6

0

Show file

File: source.py Project: yourapi/biz.data.processing

def _flatten(s):
    """pytables only stores plain ascii. For precision data (like mailings), the original data
    must be retrieved (tbd)"""
    if not isinstance(s, basestring):
        return s
    try:
        return s.encode('ascii')
    except:
        try:
            return _isounidecode.unidecode(s)
        except:
            return ''

Example #7

0

Show file

File: user.py Project: NLPDev/Wine_Project

def generate_username(first_name, last_name):
    username = '******'.join(' '.join(filter(None,
                                        (first_name, last_name))).replace(
                                            '.', ' ').split())
    try:
        username = unidecode(username, 'ascii').decode('ascii')
    except:
        logging.warning('Failed to unidecode username: %s',
                        username,
                        exc_info=True)
    return ''.join([
        c if re.match(r'[a-zA-Z0-9.\-_]', c) else '_' for c in username
    ]).lower()

Example #8

0

Show file

def parse_AIRBASE_xml(xml_files):
    import untangle
    timezone_meta = {}
    # loop over countries
    for fi in xml_files:
        print fi
        cmeta = untangle.parse(fi)
        # loop over stations in each file
        for i in xrange(len(cmeta.airbase.country.station)):
            # get station name
            station = 'AB' + unidecode(
                cmeta.airbase.country.station[i].station_european_code.cdata)
            # get time zone information.  If there is none, assume UTC
            try:
                TZ = unidecode(cmeta.airbase.country.station[i].network_info.
                               network_time_reference_basis.cdata)
            except:
                TZ = 'UTC'
            timezone_meta[station] = TZ
            tz_meta = pd.Series(timezone_meta)

    return tz_meta

Example #9

0

Show file

File: populate.py Project: verkkodemokratiaseura/verkkovaali

    def genusername(self, firstnames, lastname):
        """Generates a username based on the given names."""
        # Generate a list of normalized first names.
        names = [unicode(unidecode(n)) for n in firstnames.strip().lower().split()]
        # In case the lastname consists of multiple parts we join them with
        # a period.
        lastname = unicode(unidecode(u'.'.join(lastname.strip().lower().split())))

        # Try the "firstname.lastname" option first.
        candidate = u'{0}.{1}'.format(names[0], lastname)
        if candidate not in self.usernames:
            self.usernames.add(candidate)
            return candidate

        # If a second name exists, try using the first letter.
        if len(names) > 1 and len(names[1]) > 0:
            candidate = u'{0}.{1}.{2}'.format(names[0], names[1][0], lastname)
            if candidate not in self.usernames:
                self.usernames.add(candidate)
                print >> sys.stderr, "-!- Using middle initial for {0}.".format(candidate)
                return candidate
            else:
                # Try with the whole second name.
                candidate = u'{0}.{1}.{2}'.format(names[0], names[1], lastname)
                if candidate not in self.usernames:
                    self.usernames.add(candidate)
                    print >> sys.stderr, "-!- Using middle name for {0}.".format(candidate)
                    return candidate

        # We've exhausted our options of readable usernames, start using a suffix.
        suffix = count(2)
        candidate = base = u'{0}.{1}'.format(names[0], lastname)
        while candidate in self.usernames:
            candidate = u'{0}{1}'.format(base, suffix.next())

        self.usernames.add(candidate)
        print >> sys.stderr, "-!- Using suffix for {0}.".format(candidate)
        return candidate

Example #10

0

Show file

File: t_lyrics.py Project: oeeckhoutte/TauonMusicBox

def genius(artist, title, return_url=False):

    line = f"{artist}-{title}"
    line = re.sub("[,._@!#%^*+:;'()]", "", line)
    line = line.replace("]", "")
    line = line.replace("[", "")
    line = line.replace("?", "")
    line = line.replace(" ", "-")
    line = line.replace("/", "-")
    line = line.replace("-&-", "-and-")
    line = line.replace("&", "-and-")
    line = unidecode(line).decode()
    line = urllib.parse.quote(line)
    line = f"https://genius.com/{line}-lyrics"

    if return_url:
        return line

    page = requests.get(line)
    html = BeautifulSoup(page.text, 'html.parser')

    result = html.find('div', class_='lyrics')  #.get_text()
    if result is not None:
        lyrics = result.get_text()
        lyrics2 = []
        for line in lyrics.splitlines():
            if line.startswith("["):
                pass
            else:
                lyrics2.append(line)

        lyrics = "\n".join(lyrics2)
        lyrics = lyrics.strip("\n")
        return lyrics

    # New layout type
    else:

        results = html.findAll(
            "div", {"class": lambda l: l and "Lyrics__Container" in l})
        lyrics = "".join([r.get_text("\n") for r in results])
        level = 0
        new = ""
        for cha in lyrics:
            if level <= 0:
                new += cha
            if cha == "[":
                level += 1
            if cha == "]":
                level -= 1
        lyrics = new

        lines = lyrics.splitlines()
        new_lines = []
        for line in lines:
            if "[" in line:
                line = line.split("[", 1)[0]
                if line:
                    line += "\n"

            new_lines.append(line.lstrip().rstrip(" ") + "\n")

        lyrics = "".join(new_lines)
        lyrics = lyrics.replace("(\n", "(")
        lyrics = lyrics.replace("\n)", ")")
        lyrics = lyrics.lstrip("\n")
        lyrics = lyrics.lstrip()
        return lyrics

Example #11

0

Show file

File: dataloader.py Project: codeforoakland/510eat

 def searchify(self,str):
     new_str = unidecode(str).lower().replace("'","")
     return re.sub(r'\W+',' ',new_str)

Example #12

0

Show file

File: dataloader.py Project: codeforoakland/510eat

 def slugify(self,str):
     new_str = unidecode(str).lower()
     return re.sub(r'\W+','-',new_str)

Example #13

0

Show file

File: populate.py Project: spqs/nettiaanestys

 def normalize(value):
     """Normalizes a value by first changing all non-ascii characters to
     their 7-bit representative values and then removing any invalid
     characters.
     """
     return unicode(RE_INVALID_CHARS.sub('', unidecode(value).lower()))