def validate(self, value, model_instance): super(PictureField, self).validate(value, model_instance) if hasattr(value, 'file'): # format file name to ascii value.name = unidecode(value.name) # check size if value.width < self.min_size['width'] or value.height < self.min_size['height']: raise ValidationError(_(u'Size of the file is smaller than {}x{} px.').format(self.min_size['width'], self.min_size['height'])) elif value.width > self.max_size['width'] or value.height > self.max_size['height']: raise ValidationError( _(u'Size of the file is bigger than {}x{} px.').format(self.max_size['width'], self.max_size['height'])) # check file size if value.size > MAX_UPLOAD_SIZE: raise ValidationError(_(u'Maximal size of file is {}.').format(filesize(MAX_UPLOAD_SIZE))) # check format try: im = Image.open(value) if im.format not in ALLOWED_FORMATS: raise ValidationError(_(u'Unsupported file extension. You can upload {}.').format(', '.join(ALLOWED_FORMATS))) except ImportError: raise except Exception: # Python Imaging Library doesn't recognize it as an image raise ValidationError(self.error_messages['invalid_image'])
def genius(artist, title, return_url=False): line = f"{artist}-{title}" line = re.sub("[,._@!#%^*+:;'()]", "", line) line = line.replace("]", "") line = line.replace("[", "") line = line.replace(" ", "-") line = line.replace("/", "-") line = line.replace("-&-", "-and-") line = line.replace("&", "-and-") line = unidecode(line).decode() line = urllib.parse.quote(line) line = f"https://genius.com/{line}-lyrics" if return_url: return line page = requests.get(line) html = BeautifulSoup(page.text, 'html.parser') lyrics = html.find('div', class_='lyrics').get_text() lyrics2 = [] for line in lyrics.splitlines(): if line.startswith("["): pass else: lyrics2.append(line) lyrics = "\n".join(lyrics2) lyrics = lyrics.strip("\n") return lyrics
def cleaner(name): name = re.sub('[\s!@#$%^&\*(\)\.:";' '/?\\\\|<>\[\]~\-=]', '_', unidecode(name).strip().lower()) while '__' in name: name = name.replace('__', '_') if re.match('\d.*', name): name = '_' + name return name
def genusername(name): base = candidate = unicode(unidecode(u'.'.join(name.strip().lower().split()))) suffix = count(2) while candidate in users: candidate = base + unicode(suffix.next()) users.add(candidate) return candidate
def parse_AIRBASE_xml(xml_files): import untangle timezone_meta = {} # loop over countries for fi in xml_files: print fi cmeta = untangle.parse(fi) # loop over stations in each file for i in xrange(len(cmeta.airbase.country.station)): # get station name station = 'AB'+unidecode(cmeta.airbase.country.station[i].station_european_code.cdata) # get time zone information. If there is none, assume UTC try: TZ = unidecode(cmeta.airbase.country.station[i].network_info.network_time_reference_basis.cdata) except: TZ = 'UTC' timezone_meta[station] = TZ tz_meta = pd.Series(timezone_meta) return tz_meta
def _flatten(s): """pytables only stores plain ascii. For precision data (like mailings), the original data must be retrieved (tbd)""" if not isinstance(s, basestring): return s try: return s.encode('ascii') except: try: return _isounidecode.unidecode(s) except: return ''
def generate_username(first_name, last_name): username = '******'.join(' '.join(filter(None, (first_name, last_name))).replace( '.', ' ').split()) try: username = unidecode(username, 'ascii').decode('ascii') except: logging.warning('Failed to unidecode username: %s', username, exc_info=True) return ''.join([ c if re.match(r'[a-zA-Z0-9.\-_]', c) else '_' for c in username ]).lower()
def parse_AIRBASE_xml(xml_files): import untangle timezone_meta = {} # loop over countries for fi in xml_files: print fi cmeta = untangle.parse(fi) # loop over stations in each file for i in xrange(len(cmeta.airbase.country.station)): # get station name station = 'AB' + unidecode( cmeta.airbase.country.station[i].station_european_code.cdata) # get time zone information. If there is none, assume UTC try: TZ = unidecode(cmeta.airbase.country.station[i].network_info. network_time_reference_basis.cdata) except: TZ = 'UTC' timezone_meta[station] = TZ tz_meta = pd.Series(timezone_meta) return tz_meta
def genusername(self, firstnames, lastname): """Generates a username based on the given names.""" # Generate a list of normalized first names. names = [unicode(unidecode(n)) for n in firstnames.strip().lower().split()] # In case the lastname consists of multiple parts we join them with # a period. lastname = unicode(unidecode(u'.'.join(lastname.strip().lower().split()))) # Try the "firstname.lastname" option first. candidate = u'{0}.{1}'.format(names[0], lastname) if candidate not in self.usernames: self.usernames.add(candidate) return candidate # If a second name exists, try using the first letter. if len(names) > 1 and len(names[1]) > 0: candidate = u'{0}.{1}.{2}'.format(names[0], names[1][0], lastname) if candidate not in self.usernames: self.usernames.add(candidate) print >> sys.stderr, "-!- Using middle initial for {0}.".format(candidate) return candidate else: # Try with the whole second name. candidate = u'{0}.{1}.{2}'.format(names[0], names[1], lastname) if candidate not in self.usernames: self.usernames.add(candidate) print >> sys.stderr, "-!- Using middle name for {0}.".format(candidate) return candidate # We've exhausted our options of readable usernames, start using a suffix. suffix = count(2) candidate = base = u'{0}.{1}'.format(names[0], lastname) while candidate in self.usernames: candidate = u'{0}{1}'.format(base, suffix.next()) self.usernames.add(candidate) print >> sys.stderr, "-!- Using suffix for {0}.".format(candidate) return candidate
def genius(artist, title, return_url=False): line = f"{artist}-{title}" line = re.sub("[,._@!#%^*+:;'()]", "", line) line = line.replace("]", "") line = line.replace("[", "") line = line.replace("?", "") line = line.replace(" ", "-") line = line.replace("/", "-") line = line.replace("-&-", "-and-") line = line.replace("&", "-and-") line = unidecode(line).decode() line = urllib.parse.quote(line) line = f"https://genius.com/{line}-lyrics" if return_url: return line page = requests.get(line) html = BeautifulSoup(page.text, 'html.parser') result = html.find('div', class_='lyrics') #.get_text() if result is not None: lyrics = result.get_text() lyrics2 = [] for line in lyrics.splitlines(): if line.startswith("["): pass else: lyrics2.append(line) lyrics = "\n".join(lyrics2) lyrics = lyrics.strip("\n") return lyrics # New layout type else: results = html.findAll( "div", {"class": lambda l: l and "Lyrics__Container" in l}) lyrics = "".join([r.get_text("\n") for r in results]) level = 0 new = "" for cha in lyrics: if level <= 0: new += cha if cha == "[": level += 1 if cha == "]": level -= 1 lyrics = new lines = lyrics.splitlines() new_lines = [] for line in lines: if "[" in line: line = line.split("[", 1)[0] if line: line += "\n" new_lines.append(line.lstrip().rstrip(" ") + "\n") lyrics = "".join(new_lines) lyrics = lyrics.replace("(\n", "(") lyrics = lyrics.replace("\n)", ")") lyrics = lyrics.lstrip("\n") lyrics = lyrics.lstrip() return lyrics
def searchify(self,str): new_str = unidecode(str).lower().replace("'","") return re.sub(r'\W+',' ',new_str)
def slugify(self,str): new_str = unidecode(str).lower() return re.sub(r'\W+','-',new_str)
def normalize(value): """Normalizes a value by first changing all non-ascii characters to their 7-bit representative values and then removing any invalid characters. """ return unicode(RE_INVALID_CHARS.sub('', unidecode(value).lower()))