def match_text(self, text, artist, song): """ Filter search engine content against the original search terms. (Evaluate the search results for accuracy.) """ artist = artist.lower() song = song.lower() text = normalize_parens(text.lower()) do_not_download = [ 'preview', '(rip)', 'ringtone', '(cut)', 'edit', '(clean)', '(censored)', 'cover' ] dont = [flag for flag in do_not_download if flag in text] while True: if not artist in text or not song in text or dont: self.logger.debug('Not a match.') break elif 'remix' in text and 'remix' not in song: self.logger.debug('Passing on a bullshit remix') break elif 'mix' in text and 'original mix' not in text and 'remix' not in song: self.logger.debug('Passing on a bullshit remix') break elif ('vs.' in text or ' vs ' in text) and 'vs.' not in song: self.logger.debug('Passing on bullshit mashup') break elif 'bootleg' in text and 'bootleg' not in song: self.logger.debug('Passing on bootleg') break else: self.logger.debug('Match found!') return True return None
def scrub(value): """ Logic for scrubbing a value of trashy data """ value = normalize_parens(value) capture = r'[\.A-Za-z0-9\(\)]*' if not value: return None for ill in s.ILLEGALS: if ill in value: match = re.search(capture+ill, value, re.IGNORECASE) value = value.replace(match.group(0), '').strip() value = value.replace('_', ' ') value = value.replace(' ', ' ') value = value.replace(' .', '.') return value.strip()