def clean(self): import sys """returns a new ID3v1Comment object that's been cleaned of problems""" from audiotools.text import (CLEAN_REMOVE_TRAILING_WHITESPACE, CLEAN_REMOVE_LEADING_WHITESPACE) fixes_performed = [] fields = {} for (attr, name) in [("track_name", u"title"), ("artist_name", u"artist"), ("album_name", u"album"), ("year", u"year"), ("comment", u"comment")]: # strip out trailing NULL bytes initial_value = getattr(self, attr) if initial_value is not None: fix1 = initial_value.rstrip() if fix1 != initial_value: fixes_performed.append( CLEAN_REMOVE_TRAILING_WHITESPACE.format(name)) fix2 = fix1.lstrip() if fix2 != fix1: fixes_performed.append( CLEAN_REMOVE_LEADING_WHITESPACE.format(name)) # restore trailing NULL bytes fields[attr] = fix2 # copy non-text fields as-is return (ID3v1Comment(track_number=self.__track_number__, genre=self.__genre__, **fields), fixes_performed)
def clean(self): """returns a new MetaData object that's been cleaned of problems""" from audiotools.text import (CLEAN_REMOVE_TRAILING_WHITESPACE, CLEAN_REMOVE_LEADING_WHITESPACE, CLEAN_REMOVE_EMPTY_TAG, CLEAN_REMOVE_LEADING_WHITESPACE_ZEROES, CLEAN_REMOVE_LEADING_ZEROES) fixes_performed = [] reverse_attr_map = {} for (attr, key) in self.ATTRIBUTE_MAP.items(): reverse_attr_map[key] = attr if key in self.ALIASES: for alias in self.ALIASES[key]: reverse_attr_map[alias] = attr cleaned_fields = [] for comment_string in self.comment_strings: if u"=" in comment_string: (key, value) = comment_string.split(u"=", 1) if key.upper() in reverse_attr_map: attr = reverse_attr_map[key.upper()] # handle all text fields by stripping whitespace if len(value.strip()) == 0: fixes_performed.append( CLEAN_REMOVE_EMPTY_TAG.format(key)) else: fix1 = value.rstrip() if fix1 != value: fixes_performed.append( CLEAN_REMOVE_TRAILING_WHITESPACE.format(key)) fix2 = fix1.lstrip() if fix2 != fix1: fixes_performed.append( CLEAN_REMOVE_LEADING_WHITESPACE.format(key)) # integer fields also strip leading zeroes if (((attr == "track_number") or (attr == "album_number"))): match = re.match(r'(.*?)\s*/\s*(.*)', fix2) if match is not None: # fix whitespace/zeroes # on either side of slash fix3 = u"{}/{}".format( match.group(1).lstrip(u"0"), match.group(2).lstrip(u"0")) if fix3 != fix2: fixes_performed.append( CLEAN_REMOVE_LEADING_WHITESPACE_ZEROES. format(key)) else: # fix zeroes only fix3 = fix2.lstrip(u"0") if fix3 != fix2: fixes_performed.append( CLEAN_REMOVE_LEADING_ZEROES.format( key)) elif ((attr == "track_total") or (attr == "album_total")): fix3 = fix2.lstrip(u"0") if fix3 != fix2: fixes_performed.append( CLEAN_REMOVE_LEADING_ZEROES.format(key)) else: fix3 = fix2 cleaned_fields.append(u"{}={}".format(key, fix3)) else: cleaned_fields.append(comment_string) else: cleaned_fields.append(comment_string) return (self.__class__(cleaned_fields, self.vendor_string), fixes_performed)
def clean(self): """returns a new MetaData object that's been cleaned of problems""" from audiotools.text import (CLEAN_REMOVE_TRAILING_WHITESPACE, CLEAN_REMOVE_LEADING_WHITESPACE, CLEAN_REMOVE_EMPTY_TAG, CLEAN_REMOVE_LEADING_WHITESPACE_ZEROES, CLEAN_REMOVE_LEADING_ZEROES) fixes_performed = [] reverse_attr_map = {} for (attr, key) in self.ATTRIBUTE_MAP.items(): reverse_attr_map[key] = attr if key in self.ALIASES: for alias in self.ALIASES[key]: reverse_attr_map[alias] = attr cleaned_fields = [] for comment_string in self.comment_strings: if u"=" in comment_string: (key, value) = comment_string.split(u"=", 1) if key.upper() in reverse_attr_map: attr = reverse_attr_map[key.upper()] # handle all text fields by stripping whitespace if len(value.strip()) == 0: fixes_performed.append( CLEAN_REMOVE_EMPTY_TAG.format(key)) else: fix1 = value.rstrip() if fix1 != value: fixes_performed.append( CLEAN_REMOVE_TRAILING_WHITESPACE.format(key)) fix2 = fix1.lstrip() if fix2 != fix1: fixes_performed.append( CLEAN_REMOVE_LEADING_WHITESPACE.format(key)) # integer fields also strip leading zeroes if (((attr == "track_number") or (attr == "album_number"))): match = re.match(r'(.*?)\s*/\s*(.*)', fix2) if match is not None: # fix whitespace/zeroes # on either side of slash fix3 = u"{}/{}".format( match.group(1).lstrip(u"0"), match.group(2).lstrip(u"0")) if fix3 != fix2: fixes_performed.append( CLEAN_REMOVE_LEADING_WHITESPACE_ZEROES.format(key)) else: # fix zeroes only fix3 = fix2.lstrip(u"0") if fix3 != fix2: fixes_performed.append( CLEAN_REMOVE_LEADING_ZEROES.format(key)) elif ((attr == "track_total") or (attr == "album_total")): fix3 = fix2.lstrip(u"0") if fix3 != fix2: fixes_performed.append( CLEAN_REMOVE_LEADING_ZEROES.format(key)) else: fix3 = fix2 cleaned_fields.append(u"{}={}".format(key, fix3)) else: cleaned_fields.append(comment_string) else: cleaned_fields.append(comment_string) return (self.__class__(cleaned_fields, self.vendor_string), fixes_performed)
def clean(self): import re from audiotools.text import (CLEAN_REMOVE_DUPLICATE_TAG, CLEAN_REMOVE_TRAILING_WHITESPACE, CLEAN_REMOVE_LEADING_WHITESPACE, CLEAN_FIX_TAG_FORMATTING, CLEAN_REMOVE_EMPTY_TAG) fixes_performed = [] used_tags = set() tag_items = [] for tag in self.tags: if tag.key.upper() in used_tags: fixes_performed.append( CLEAN_REMOVE_DUPLICATE_TAG.format(tag.key.decode('ascii'))) elif tag.type == 0: used_tags.add(tag.key.upper()) text = tag.__unicode__() # check trailing whitespace fix1 = text.rstrip() if fix1 != text: fixes_performed.append( CLEAN_REMOVE_TRAILING_WHITESPACE.format( tag.key.decode('ascii'))) # check leading whitespace fix2 = fix1.lstrip() if fix2 != fix1: fixes_performed.append( CLEAN_REMOVE_LEADING_WHITESPACE.format( tag.key.decode('ascii'))) if tag.key in self.INTEGER_ITEMS: if u"/" in fix2: # item is a slashed field of some sort (current, total) = fix2.split(u"/", 1) current_int = re.search(r'\d+', current) total_int = re.search(r'\d+', total) if (current_int is None) and (total_int is None): # neither side contains an integer value # so ignore it altogether fix3 = fix2 elif ((current_int is not None) and (total_int is None)): fix3 = u"{:d}".format(int(current_int.group(0))) elif ((current_int is None) and (total_int is not None)): fix3 = u"{:d}/{:d}".format( 0, int(total_int.group(0))) else: # both sides contain an int fix3 = u"{:d}/{:d}".format( int(current_int.group(0)), int(total_int.group(0))) else: # item contains no slash current_int = re.search(r'\d+', fix2) if current_int is not None: # item contains an integer fix3 = u"{:d}".format(int(current_int.group(0))) else: # item contains no integer value so ignore it # (although 'Track' should only contain # integers, 'Media' may contain strings # so it may be best to simply ignore that case) fix3 = fix2 if fix3 != fix2: fixes_performed.append( CLEAN_FIX_TAG_FORMATTING.format( tag.key.decode('ascii'))) else: fix3 = fix2 if len(fix3) > 0: tag_items.append(ApeTagItem.string(tag.key, fix3)) else: fixes_performed.append( CLEAN_REMOVE_EMPTY_TAG.format(tag.key.decode('ascii'))) else: used_tags.add(tag.key.upper()) tag_items.append(tag) return (self.__class__(tag_items, self.contains_header, self.contains_footer), fixes_performed)