def __init__(self, quality='all', **kwargs): """Creates a GenreTreeProvider with a certain quality. A GenreTreeProvider will try to normalize a genre by using a Tree of 705 single genres that will be matched with the input genre in a fast way. The result will be a list of Paths. A Path is a tuple of indices, representing a possible way through the Tree. For debugging purpose you can use GenreTreeProvider.resolve_path() on the path to get the full genre back. The Quality levels are: - ``all``: Try to find all possible paths through the Tree, sorted by the first index (which is useful for comparing.) - ``single``: Simply take the first possible path found. Fastest. - ``best_two``: Like list, but also uses the reverse word list in a second try. Might give better results than 'single' at the cost of some speed. Default is ``all``. This provider is reversible. :param quality: One of ``all``, ``best_two`` ``single`` [*default:* ``all``] :type quality: String """ Provider.__init__(self, **kwargs) self._root = load_genre_tree(get_cache_path('genre_tree.dump')) self._build_func = { 'all': build_genre_path_all, 'best_two': build_genre_path_best_of_two, 'single': build_genre_path_single }.get(quality, build_genre_path_all)
def __init__(self, provider_list, **kwargs): """Creates a proivder that applies subproviders in a certain order to it's input. :param provider_list: A ordered list of provider objects. """ self._provider_list = provider_list Provider.__init__(self, **kwargs)
def __init__(self, **kwargs): Provider.__init__(self, **kwargs) self._punctuation = re.compile("\W|_") self._split_reasons = frozenset(['feat', 'featuring', 'and']) self._strip_patterns = [re.compile(pattern) for pattern in [ r'^the\s*', r'^a\s*', r'\s*of\s*' ]]
def __init__(self, **kwargs): Provider.__init__(self, **kwargs) self._punctuation = re.compile("\W|_") self._strip_patterns = [re.compile(pattern) for pattern in [ r'\s*[\(\[{].*?[}\)\]]', # Strip everything in brackets ([{ r'\s*(cd|disc)\s*\d+' # remove CD <X> stuff. ]]
def __init__(self, **kwargs): if not HAS_PLYR: raise LookupError('Plyr could be imported, which is needed for lyrics') self._cache_failures = kwargs.pop('cache_failures', True) self.database = plyr.Database(get_cache_path(None)) Provider.__init__(self, **kwargs)
def __init__(self, **kwargs): Provider.__init__(self, **kwargs) self._punctuation = re.compile("\W|_") self._split_reasons = frozenset(['feat', 'featuring', 'and']) self._strip_patterns = [ re.compile(pattern) for pattern in [r'^the\s*', r'^a\s*', r'\s*of\s*'] ]
def __init__(self, use_cache=True, cache_fails=True, **kwargs): """ :param use_cache: Cache found results? :param cache_fails: Also cache missed results? """ Provider.__init__(self, **kwargs) self._use_cache, self._cache_fails = use_cache, cache_fails self._shelve = shelve.open(get_cache_path('discogs_genre.dump'), writeback=True)
def __init__(self, **kwargs): Provider.__init__(self, **kwargs) self._punctuation = re.compile("\W|_") self._strip_patterns = [ re.compile(pattern) for pattern in [ r'\s*[\(\[{].*?[}\)\]]', # Strip everything in brackets ([{ r'\s*(cd|disc)\s*\d+' # remove CD <X> stuff. ] ]
def __init__(self, use_cache=True, cache_fails=True, **kwargs): """ :param use_cache: Cache found results? :param cache_fails: Also cache missed results? """ Provider.__init__(self, **kwargs) self._use_cache, self._cache_fails = use_cache, cache_fails self._shelve = shelve.open( get_cache_path('discogs_genre.dump'), writeback=True )
def __init__(self, language='english', **kwargs): """ See here for a full list of languages: http://nltk.org/_modules/nltk/stem/snowball.html .. note:: This does not depend on nltk, it depends on the ``pystemmer`` package. :param language: language to use during stemming, defaults to english. """ Provider.__init__(self, **kwargs) self._stemmer = Stemmer(language)
def test_apply(self): provider = Provider(compress=True) dist = DistanceFunction(provider=provider) a = provider.process('Akrea') b = provider.process('Berta') c = provider.process('akrea'.capitalize()) self.assertEqual(a, (1, )) self.assertEqual(b, (2, )) self.assertEqual(c, (1, )) self.assertAlmostEqual(dist.compute(a, b), 1.0) self.assertAlmostEqual(dist.compute(a, c), 0.0) self.assertAlmostEqual(dist.compute([], []), 1.0) self.assertAlmostEqual(dist.compute(a + b, c), 0.5)
def __init__(self, name, mask, config=DEFAULT_CONFIG): """Create a new session: :param name: The name of the session. Used to load it again from disk. :param mask: The mask. See :term:`Mask` :param config: A dictionary with config values. See :class:`DefaultConfig` for available keys. """ self._config = config self._name = name # Publicly readable attribute. self.mapping = {} # Make access to the mask more efficient self._mask = copy(mask) self._attribute_list = sorted(mask) self._listidx_to_key = {k: i for i, k in enumerate(self._attribute_list)} # Lookup tables for those attributes (fast access is crucial here) def make_index(idx, default_func): index = {} for key, descr in self._mask.items(): if descr[idx] is not None: index[key] = descr[idx] else: index[key] = default_func(key) return index # Import this locally, since we might get circular import otherway: from munin.distance import DistanceFunction from munin.provider import Provider # Build indices and set default values: self._key_to_providers = make_index(0, lambda key: Provider() ) self._key_to_distfuncs = make_index(1, lambda key: DistanceFunction(self._key_to_providers[key]) ) self._key_to_weighting = make_index(2, lambda key: 1.0 ) # Sum of the individual weights, pre-calculated once. self._weight_sum = sum((descr[2] for descr in mask.values())) # Create the associated database. self._database = Database(self) # Filtering related: self._filtering_enabled = config['recom_history_sieving'] self._recom_history = RecommendationHistory( penalty_map=config['recom_history_penalty'] ) # Publicly readable attribute. self.mapping = bidict()
def test_process(self): from munin.provider import Provider from munin.provider.genre import GenreTreeProvider one = GenreTreeProvider() two = Provider() prv = one | two a = prv.process('metalcore') self.assertEqual(a, one.process('metalcore'))
def test_apply(self): provider = Provider(compress=True) dist = DistanceFunction( provider=provider ) a = provider.process('Akrea') b = provider.process('Berta') c = provider.process('akrea'.capitalize()) self.assertEqual(a, (1, )) self.assertEqual(b, (2, )) self.assertEqual(c, (1, )) self.assertAlmostEqual(dist.compute(a, b), 1.0) self.assertAlmostEqual(dist.compute(a, c), 0.0) self.assertAlmostEqual(dist.compute([], []), 1.0) self.assertAlmostEqual( dist.compute(a + b, c), 0.5 )
def __init__(self, cache_invalid=False, **kwargs): """ :param cache_invalid: Also cache invalid results of failed calculations? """ Provider.__init__(self, **kwargs) self._cache_invalid = cache_invalid