Beispiel #1
0
 def move_files(self, files, target, move_to_multi_tracks=True):
     if target is None:
         log.debug("Aborting move since target is invalid")
         return
     self.window.set_sorting(False)
     if isinstance(target, Cluster):
         for file in process_events_iter(files):
             file.move(target)
     elif isinstance(target, Track):
         album = target.album
         for file in process_events_iter(files):
             file.move(target)
             if move_to_multi_tracks:  # Assign next file to following track
                 target = album.get_next_track(
                     target) or album.unmatched_files
     elif isinstance(target, File):
         for file in process_events_iter(files):
             file.move(target.parent)
     elif isinstance(target, Album):
         self.move_files_to_album(files, album=target)
     elif isinstance(target, ClusterList):
         for file in process_events_iter(files):
             if isinstance(file.parent, Track):
                 file.parent.remove_file(file)
         self.cluster(files)
     self.window.set_sorting(True)
Beispiel #2
0
    def cluster(self, objs):
        """Group files with similar metadata to 'clusters'."""
        log.debug("Clustering %r", objs)
        if len(objs) <= 1 or self.unclustered_files in objs:
            files = list(self.unclustered_files.files)
        else:
            files = self.get_files_from_objects(objs)

        self.window.set_sorting(False)
        cluster_files = defaultdict(list)
        for name, artist, files in Cluster.cluster(files, 1.0, self):
            cluster = self.load_cluster(name, artist)
            cluster_files[cluster].extend(files)
        for cluster, files in process_events_iter(cluster_files.items()):
            cluster.add_files(files)
        self.window.set_sorting(True)
Beispiel #3
0
    def _clustering_finished(self, callback, result=None, error=None):
        if error:
            log.error('Error while clustering: %r', error)
            return

        with self.window.ignore_selection_changes:
            self.window.set_sorting(False)
            for file_cluster in process_events_iter(result):
                files = set(file_cluster.files)
                if len(files) > 1:
                    cluster = self.load_cluster(file_cluster.title, file_cluster.artist)
                else:
                    cluster = self.unclustered_files
                cluster.add_files(files)
            self.window.set_sorting(True)

        if callback:
            callback()
Beispiel #4
0
 def candidates():
     for track in process_events_iter(self.tracks):
         yield SimMatchAlbum(similarity=track.metadata.compare(
             file.orig_metadata),
                             track=track)
Beispiel #5
0
    def cluster(self, threshold, tagger=None):
        # Keep the matches sorted in a heap
        heap = []
        num_files = self.cluster_dict.get_size()

        # 20 evenly spaced indexes of files being clustered, used as checkpoints for every 5% progress
        status_update_steps = ProgressCheckpoints(num_files, 20)

        for y in process_events_iter(range(num_files)):
            token_y = self.cluster_dict.get_token(y).lower()
            for x in range(y):
                if x != y:
                    token_x = self.cluster_dict.get_token(x).lower()
                    c = similarity(token_x, token_y)
                    if c >= threshold:
                        heappush(heap, ((1.0 - c), [x, y]))

            word, count = self.cluster_dict.get_word_and_count(y)
            if word and count > 1:
                self.cluster_bins[self.cluster_count] = [y]
                self.index_id_cluster[y] = self.cluster_count
                self.cluster_count = self.cluster_count + 1

            if tagger and status_update_steps.is_checkpoint(y):
                statusmsg = N_(
                    "Clustering - step %(step)d/3: %(cluster_type)s (%(update)d%%)"
                )
                mparams = {
                    'step': self.cluster_type.value,
                    'cluster_type': _(self._cluster_type_label()),
                    'update': status_update_steps.progress(y),
                }
                tagger.window.set_statusbar_message(statusmsg, mparams)

        for i in range(len(heap)):
            c, pair = heappop(heap)
            c = 1.0 - c

            try:
                match0 = self.index_id_cluster[pair[0]]
            except BaseException:
                match0 = -1

            try:
                match1 = self.index_id_cluster[pair[1]]
            except BaseException:
                match1 = -1

            # if neither item is in a cluster, make a new cluster
            if match0 == -1 and match1 == -1:
                self.cluster_bins[self.cluster_count] = [pair[0], pair[1]]
                self.index_id_cluster[pair[0]] = self.cluster_count
                self.index_id_cluster[pair[1]] = self.cluster_count
                self.cluster_count = self.cluster_count + 1
                continue

            # If cluster0 is in a bin, stick the other match into that bin
            if match0 >= 0 and match1 < 0:
                self.cluster_bins[match0].append(pair[1])
                self.index_id_cluster[pair[1]] = match0
                continue

            # If cluster1 is in a bin, stick the other match into that bin
            if match1 >= 0 and match0 < 0:
                self.cluster_bins[match1].append(pair[0])
                self.index_id_cluster[pair[0]] = match1
                continue

            # If both matches are already in two different clusters, merge the clusters
            if match1 != match0:
                self.cluster_bins[match0].extend(self.cluster_bins[match1])
                for match in self.cluster_bins[match1]:
                    self.index_id_cluster[match] = match0
                del self.cluster_bins[match1]
Beispiel #6
0
    def cluster(files, threshold, tagger=None):
        config = get_config()
        win_compat = config.setting["windows_compatibility"] or IS_WIN
        artist_dict = ClusterDict()
        album_dict = ClusterDict()
        tracks = []
        num_files = len(files)

        # 10 evenly spaced indexes of files being clustered, used as checkpoints for every 10% progress
        status_update_steps = ProgressCheckpoints(num_files, 10)

        for i, file in process_events_iter(enumerate(files)):
            artist = file.metadata["albumartist"] or file.metadata["artist"]
            album = file.metadata["album"]
            # Improve clustering from directory structure if no existing tags
            # Only used for grouping and to provide cluster title / artist - not added to file tags.
            if win_compat:
                filename = ntpath.splitdrive(file.filename)[1]
            else:
                filename = file.filename
            album, artist = album_artist_from_path(filename, album, artist)
            # For each track, record the index of the artist and album within the clusters
            tracks.append((artist_dict.add(artist), album_dict.add(album)))

            if tagger and status_update_steps.is_checkpoint(i):
                statusmsg = N_(
                    "Clustering - step %(step)d/3: %(cluster_type)s (%(update)d%%)"
                )
                mparams = {
                    'step':
                    ClusterType.METADATA.value,
                    'cluster_type':
                    _(ClusterEngine.cluster_type_label(ClusterType.METADATA)),
                    'update':
                    status_update_steps.progress(i),
                }
                tagger.window.set_statusbar_message(statusmsg, mparams)

        artist_cluster_engine = ClusterEngine(artist_dict, ClusterType.ARTIST)
        artist_cluster_engine.cluster(threshold, tagger)

        album_cluster_engine = ClusterEngine(album_dict, ClusterType.ALBUM)
        album_cluster_engine.cluster(threshold, tagger)

        # Arrange tracks into albums
        albums = {}
        for i, track in enumerate(tracks):
            cluster = album_cluster_engine.get_cluster_from_id(track[1])
            if cluster is not None:
                albums.setdefault(cluster, []).append(i)

        # Now determine the most prominent names in the cluster and build the
        # final cluster list
        for album_id, album in albums.items():
            album_name = album_cluster_engine.get_cluster_title(album_id)

            artist_max = 0
            artist_id = None
            artist_hist = {}
            for track_id in album:
                cluster = artist_cluster_engine.get_cluster_from_id(
                    tracks[track_id][0])
                if cluster is not None:
                    cnt = artist_hist.get(cluster, 0) + 1
                    if cnt > artist_max:
                        artist_max = cnt
                        artist_id = cluster
                    artist_hist[cluster] = cnt

            if artist_id is None:
                artist_name = "Various Artists"
            else:
                artist_name = artist_cluster_engine.get_cluster_title(
                    artist_id)

            yield album_name, artist_name, (files[i] for i in album)