コード例 #1
0
def run_queries(discs, result_list, language, fingerprint, mixed,
                do_quick_match):

    # Try a text-based match first.
    (match1, albums1, arts1) = run_query_on_discs(discs, result_list, language,
                                                  fingerprint, mixed,
                                                  do_quick_match)
    final_match = match1

    # If the result looks shoddy, try with fingerprinting.
    if albums1 > len(discs) or match1 < 75 or arts1 == 0:
        Log('Not impressed, trying the other way (fingerprinting: %s)' %
            (not fingerprint))
        other_result_list = []
        (match2, albums2,
         arts2) = run_query_on_discs(discs, other_result_list, language,
                                     not fingerprint, mixed, do_quick_match)

        if match2 > match1 or (match2 == match1 and
                               (albums2 < albums1 or arts2 > arts1)):
            Log('This way gave a better match, keeping.')
            result_list[:] = other_result_list
            final_match = match2

    return final_match
コード例 #2
0
def group_tracks_by_disc(query_list):
    tracks_by_disc = defaultdict(list)

    # See if we have multiple disks, first checking tags.
    discs = set([t.disc for t in query_list if t.disc is not None])
    if len(discs) > 1:
        for t in query_list:
            tracks_by_disc[t.disc].append(t)
        return tracks_by_disc.values()

    # Otherwise, let's sort by filename, and see if we have clusters of tracks.
    sorted_tracks = sorted(query_list, key=lambda track: track.parts[0])

    disc = 1
    last_index = 0
    for t in sorted_tracks:
        if t.index < last_index:
            disc += 1
            if t.index != 1:
                Log('Disc %d didn\'t start with first track, we won\'t use this method.'
                    % disc)
                tracks_by_disc = defaultdict(list)
                break
        tracks_by_disc[disc].append(t)
        last_index = t.index

    if len(tracks_by_disc) > 1:
        return tracks_by_disc.values()

    # Otherwise, let's consider it a single disc.
    return [query_list]
コード例 #3
0
def improve_from_tag(existing, file, tag):
    tags = None
    try:
        tags = mutagen.File(file, easy=True)
    except:
        Log('There was an exception thrown reading tags.')

    if tags and tag in tags:
        existing = tags[tag][0]

    return toBytes(existing)
コード例 #4
0
def run_query_on_discs(discs, result_list, language, fingerprint, mixed,
                       do_quick_match):
    match1 = albums1 = total_tracks = 0
    for tracks in discs:
        (match, albums1, arts1) = lookup(tracks,
                                         result_list,
                                         language=language,
                                         fingerprint=fingerprint,
                                         mixed=mixed,
                                         do_quick_match=do_quick_match)
        total_tracks += len(tracks)
        match1 += match * len(tracks)

    match1 = match1 / float(total_tracks)
    Log('Querying all discs generated %d albums and a total match of %d' %
        (albums1, match1))

    return (match1, albums1, arts1)
コード例 #5
0
def lookup(query_list,
           result_list,
           language=None,
           fingerprint=False,
           mixed=False,
           multiple=False,
           do_quick_match=False):

    # This shouldn't happen, but be safe.
    if len(query_list) == 0:
        return (0, 0, 0)

    # See if input looks like a sane album
    (sane_input_tracks, unique_input_albums,
     input_discs) = compute_input_sanity(query_list)

    # Build up the query with the contents of the query list.
    args = ''
    parts = {}

    Log('Running Gracenote match on %d tracks with fingerprinting: %d and mixedContent: %d and multiple: %d'
        % (len(query_list), fingerprint, mixed, multiple))
    for i, track in enumerate(query_list):

        # We need to pass at least a path and an identifier for each track that we know about.
        args += '&tracks[%d].path=%s' % (i, quote(track.parts[0], ''))
        args += '&tracks[%d].userData=%d' % (i, i)

        # Keep track of the identifier -> part mapping so we can reassemble later.
        parts[i] = track.parts[0]

        if track.name:
            args += '&tracks[%d].title=%s' % (
                i, quote(toBytes(track.title or track.name), ''))
        if track.artist and track.artist != 'Various Artists':
            args += '&tracks[%d].artist=%s' % (
                i, quote(toBytes(track.artist), ''))
        if track.album_artist:
            args += '&tracks[%d].albumArtist=%s' % (
                i, quote(toBytes(track.album_artist), ''))
        elif track.artist and track.artist != 'Various Artists':
            args += '&tracks[%d].albumArtist=%s' % (
                i, quote(toBytes(track.artist), ''))
        if track.album and track.album != '[Unknown Album]':
            args += '&tracks[%d].album=%s' % (i, quote(toBytes(track.album),
                                                       ''))
        if track.index:
            args += '&tracks[%d].index=%s' % (i, track.index)
        if track.disc:
            args += '&tracks[%d].parentIndex=%s' % (i, track.disc)
        Log(' - %s/%s - %s/%s - %s' %
            (toBytes(track.artist), toBytes(track.album), toBytes(
                track.disc), toBytes(track.index), toBytes(track.name)))

    url = 'http://127.0.0.1:32400/services/gracenote/search?fingerprint=%d&mixedContent=%d&multiple=%d%s&lang=%s' % (
        fingerprint, mixed, multiple, args, language)
    try:
        res = minidom.parse(urlopen(url))
    except Exception, e:
        Log('Error parsing Gracenote response: ' + str(e))
        return (0, 0, 0)
コード例 #6
0
def Scan(path,
         files,
         media_list,
         subdirs,
         language=None,
         root=None,
         respect_tags=False):

    # Scan for audio files.
    AudioFiles.Scan(path, files, media_list, subdirs, root)

    root_str = root or ''
    loc_str = os.path.join(root_str, path)
    Log('Scanning: ' + loc_str)
    Log('Files: ' + str(files))
    Log('Subdirs: ' + str(subdirs))

    # Look at the files and determine whether we can do a quick match (minimal tag parsing).
    do_quick_match = True
    mixed = False

    # Make sure we're looking at a leaf directory (no audio files below here).
    if len(subdirs) > 0:
        Log('Found directories below this one; won\'t attempt quick matching.')
        do_quick_match = False

    if files:

        # Make sure we're not sitting in the section root.
        parent_path = os.path.split(files[0])[0]
        if parent_path == root:
            Log('File(s) are in section root; doing expensive matching with mixed content.'
                )
            do_quick_match = False
            mixed = True

        # Make sure we have reliable track indices for all files and there are no dupes.
        tracks = {}
        for f in files:
            try:
                index = re.search(r'^([0-9]{1,2})[^0-9].*',
                                  os.path.split(f)[-1]).groups(0)[0]
            except:
                do_quick_match = False
                Log('Couldn\'t find track indices in all filenames; doing expensive matching.'
                    )
                break
            if tracks.get(index):
                do_quick_match = False
                mixed = True
                Log('Found duplicate track index: %s; doing expensive matching with mixed content.'
                    % index)
                break
            else:
                tracks[index] = True

        # Read the first track's tags to check for milti-disc and VA.
        if do_quick_match:
            disc = album_artist = None
            try:
                (artist, album, title, track, disc, album_artist,
                 compil) = AudioFiles.getInfoFromTag(files[0], language)
            except:
                Log('Exception reading tags from first file; doing expensive matching.'
                    )
                do_quick_match = False

            # Make sure we are on the first disc.
            if disc is not None and disc > 1:
                Log('Skipping quick match because of non-first disc.')
                do_quick_match = False

            # We want to read all the tags for VA albums to pick up track artists.
            if album_artist is not None and album_artist == 'Various Artists':
                Log('Skipping quick match for Various Artists album.')
                do_quick_match = False

        artist = None
        album = None

        if do_quick_match:
            Log('Doing quick match')

            # See if we have some consensus on artist/album by reading a few tags.
            for i in range(3):
                if i < len(files):
                    this_artist = this_album = tags = None
                    try:
                        tags = mutagen.File(files[i], easy=True)
                    except:
                        Log('There was an exception thrown reading tags.')

                    if tags:
                        # See if there's an album artist tag.
                        album_artist_tags = [
                            t for t in ['albumartist', 'TPE2', 'performer']
                            if t in tags
                        ]
                        album_artist_tag = album_artist_tags[0] if len(
                            album_artist_tags) else None

                        this_artist = tags[album_artist_tag][
                            0] if album_artist_tag else tags['artist'][
                                0] if 'artist' in tags else None
                        this_album = tags['album'][
                            0] if 'album' in tags else None

                    if artist and artist != this_artist:
                        Log('Found different artists in tags (%s vs. %s); doing expensive matching.'
                            % (artist, this_artist))
                        do_quick_match = False
                        break

                    if album and album != this_album:
                        Log('Found different albums in tags (%s vs. %s); doing expensive matching.'
                            % (album, this_album))
                        do_quick_match = False
                        break

                    artist = this_artist
                    album = this_album

            if not artist or not album:
                Log('Couldn\'t determine unique artist or album from tags; doing expensive matching.'
                    )
                do_quick_match = False

        query_list = []
        result_list = []
        fingerprint = False

        # Directory looks clean, let's build a query list directly from info gleaned from file names.
        if do_quick_match:
            Log('Building query list for quickmatch with artist: %s, album: %s'
                % (artist, album))

            # Determine if the artist and/or album appears in all filenames, since we'll want to strip these out for clean titles.
            strip_artist = True if len([
                f for f in files if artist.lower() in Unicodize(
                    os.path.basename(f), language).lower()
            ]) == len(files) else False
            strip_album = True if len([
                f for f in files if album.lower() in Unicodize(
                    os.path.basename(f), language).lower()
            ]) == len(files) else False

            for f in files:
                try:
                    filename = os.path.splitext(os.path.split(f)[1])[0]
                    (head, index, title) = re.split(r'^([0-9]{1,2})', filename)

                    # Replace underscores and dots with spaces.
                    title = re.sub(r'[_\. ]+', ' ', title)

                    # Things in parens seem to confuse Gracenote, so let's strip them out.
                    title = re.sub(r' ?\(.*\)', '', title)

                    # Remove artist name from title if it appears in all of them.
                    if strip_artist and len(files) > 2:
                        title = re.sub(r'(?i)' + artist, '', title)

                    # Remove album title from title if it appears in all of them.
                    if strip_album and len(files) > 2:
                        title = re.sub(r'(?i)' + album, '', title)

                    # Remove any remaining index-, artist-, and album-related cruft from the head of the track title.
                    title = re.sub(r'^[\W\-]+', '', title).strip()

                    # Last chance for artist or album prefix.
                    if not strip_artist and Unicodize(
                            title, language).lower().find(artist.lower()) == 0:
                        title = title[len(artist):]

                    if not strip_album and Unicodize(
                            title, language).lower().find(album.lower()) == 0:
                        title = title[len(album):]

                    t = Media.Track(artist=toBytes(artist),
                                    album=toBytes(album),
                                    title=toBytes(title),
                                    index=int(index))
                    t.parts.append(f)

                    Log(' - Adding: %s - %s' % (index, title))
                    query_list.append(t)

                except Exception as e:
                    Log('Error preparing tracks for quick matching: ' + str(e))

        # Otherwise, let's do old school directory crawling and tag reading.
        else:
            AudioFiles.Process(path, files, media_list, subdirs, root)
            query_list = list(media_list)

        # Try as-is first (ask for everything at once).
        discs = [query_list]
        final_match = run_queries(discs, result_list, language, fingerprint,
                                  mixed, do_quick_match)

        # If the match was still shitty, and it looks like we have multiple discs, try splitting.
        if final_match < 75:
            discs = group_tracks_by_disc(query_list)
            if len(discs) > 1:
                Log('Result still looked bad, we will try splitting into separate per-disc queries.'
                    )
                other_result_list = []
                other_match = run_queries(discs, other_result_list, language,
                                          fingerprint, mixed, do_quick_match)

                if other_match > final_match:
                    Log('The split result was best, we will use it.')
                    result_list = other_result_list
                    final_match = other_match

        # If we have a crappy match, don't use it.
        if final_match < 50.0:
            Log('That was terrible, let us not use it.')
            result_list = []

        # Finalize the results.
        used_tags = False
        del media_list[:]
        if len(result_list) > 0:
            # Gracenote results.
            for result in result_list:
                media_list.append(result)
        else:
            # We bailed during the GN lookup, fall back to tags.
            used_tags = True
            AudioFiles.Process(path, files, media_list, subdirs, root)

        # If we wanted to respect tags, then make sure we used tags.
        if not used_tags and respect_tags:

            # Let's grab tag results, and then set GUIDs we found.
            tag_media_list = []
            AudioFiles.Process(path, files, tag_media_list, subdirs, root)

            # Now suck GN data out.
            path_map = {}
            for track in media_list:
                path_map[track.parts[0]] = track

            for track in tag_media_list:
                if track.parts[0] in path_map:
                    gn_track = path_map[track.parts[0]]
                    track.guid = gn_track.guid
                    track.album_guid = gn_track.album_guid
                    track.artist_guid = gn_track.artist_guid
                    track.album_thumb_url = gn_track.album_thumb_url
                    track.artist_thumb_url = gn_track.artist_thumb_url

                    # If the tags failed, fill in key data from Gracenote.
                    if track.album == '[Unknown Album]':
                        track.album = gn_track.album

                    if track.artist == '[Unknown Artist]':
                        track.artist = gn_track.artist

            media_list[:] = tag_media_list
コード例 #7
0
    }

    # Figure out the unique artists/albums/indexes.
    unique_artists = len(
        set([
            t[1].getAttribute('grandparentTitle')
            for t in matched_tracks.items()
        ]))
    unique_albums = len(
        set([t[1].getAttribute('parentTitle')
             for t in matched_tracks.items()]))
    unique_indices = len(
        set([t[1].getAttribute('index') for t in matched_tracks.items()]))

    if DEBUG:
        Log('Raw track matches:')
        for track in [match[1] for match in matched_tracks.items()]:
            Log('  - %s / %s - %s/%s - %s' %
                (track.getAttribute('grandparentTitle'),
                 track.getAttribute('parentTitle'),
                 track.getAttribute('parentIndex'),
                 track.getAttribute('index'), track.getAttribute('title')))

    # Look through the results and determine some consensus metadata so we can do a better job of keeping rogue and
    # unmatched tracks together. We're going to weight matches in the first third of the tracks twice as high, for
    # cases in which matches come through for the last half of tracks.
    #
    sorted_items = sorted(
        matched_tracks.items(),
        key=lambda t: int(t[1].getAttribute('parentIndex') or 1) * 100 + int(t[
            1].getAttribute('index') or -1))