Example #1
0
def merge_hints(query_track, consensus_track, part, do_quick_match):

    # If we did a quick match, read tags, as it may have much better tags.
    track_title = query_track.name
    if do_quick_match:
        track_title = improve_from_tag(track_title, part, 'title')

    merged_track = Media.Track(
        index=int(query_track.index) if query_track.index is not None else -1,
        album=toBytes(consensus_track.album),
        artist=toBytes(consensus_track.artist),
        title=toBytes(track_title),
        disc=toBytes(consensus_track.disc),
        album_thumb_url=toBytes(consensus_track.album_thumb_url),
        artist_thumb_url=toBytes(consensus_track.artist_thumb_url),
        year=toBytes(consensus_track.year),
        album_guid=toBytes(consensus_track.album_guid),
        artist_guid=toBytes(consensus_track.artist_guid))

    merged_track.parts.append(part)

    #if DEBUG:
    #  merged_track.name = toBytes(merged_track.name + ' [MERGED GN MISS]')

    return merged_track
def merge_hints(query_track, consensus_track, part, do_quick_match):

  # If we did a quick match, read tags, as it may have much better tags.
  track_title = query_track.name
  if do_quick_match:
    track_title = improve_from_tag(track_title, part, 'title')

  merged_track = Media.Track(
    index=int(query_track.index) if query_track.index is not None else -1,
    album=toBytes(consensus_track.album),
    artist=toBytes(consensus_track.artist),
    title=toBytes(track_title),
    disc=toBytes(consensus_track.disc),
    album_thumb_url=toBytes(consensus_track.album_thumb_url),
    artist_thumb_url=toBytes(consensus_track.artist_thumb_url),
    year=toBytes(consensus_track.year),
    album_guid=toBytes(consensus_track.album_guid),
    artist_guid=toBytes(consensus_track.artist_guid))

  merged_track.parts.append(part)

  #if DEBUG:
  #  merged_track.name = toBytes(merged_track.name + ' [MERGED GN MISS]')

  return merged_track
def merge_hints(query_track, consensus_track, part, do_quick_match):

  # If we did a quick match, read tags, as it may have much better tags.
  track_title = query_track.name
  if do_quick_match:
    track_title = improve_from_tag(track_title, part, 'title')

  # We don't want to use consensus disc numbers, since tags are more reliable. It's common for bonus discs, etc. to get "split".
  try: disc = improve_from_tag('1', part, 'discnumber').split('/')[0].split('of')[0].strip()
  except: disc = '1'

  merged_track = Media.Track(
    index=int(query_track.index) if (query_track.index is not None and str(query_track.index).isdigit()) else -1,
    album=toBytes(consensus_track.album),
    artist=toBytes(consensus_track.artist),
    title=toBytes(track_title),
    disc=disc,
    album_thumb_url=toBytes(consensus_track.album_thumb_url),
    artist_thumb_url=toBytes(consensus_track.artist_thumb_url),
    year=toBytes(consensus_track.year),
    album_guid=toBytes(consensus_track.album_guid),
    artist_guid=toBytes(consensus_track.artist_guid))

  merged_track.parts.append(part)

  return merged_track
Example #4
0
def merge_hints(query_track, consensus_track, part, do_quick_match):

    # If we did a quick match, read tags, as it may have much better tags.
    track_title = query_track.name
    if do_quick_match:
        track_title = improve_from_tag(track_title, part, 'title')

    # We don't want to use consensus disc numbers, since tags are more reliable. It's common for bonus discs, etc. to get "split".
    try:
        disc = improve_from_tag(
            '1', part, 'discnumber').split('/')[0].split('of')[0].strip()
    except:
        disc = '1'

    merged_track = Media.Track(
        index=int(query_track.index) if
        (query_track.index is not None
         and str(query_track.index).isdigit()) else -1,
        album=toBytes(consensus_track.album),
        artist=toBytes(consensus_track.artist),
        title=toBytes(track_title),
        disc=disc,
        album_thumb_url=toBytes(consensus_track.album_thumb_url),
        artist_thumb_url=toBytes(consensus_track.artist_thumb_url),
        year=toBytes(consensus_track.year),
        album_guid=toBytes(consensus_track.album_guid),
        artist_guid=toBytes(consensus_track.artist_guid))

    merged_track.parts.append(part)

    return merged_track
Example #5
0
def improve_from_tag(existing, file, tag):
    tags = None
    try:
        tags = mutagen.File(file, easy=True)
    except:
        Log('There was an exception thrown reading tags.')

    if tags and tag in tags:
        existing = tags[tag][0]

    return toBytes(existing)
def lookup(query_list, result_list, language=None, fingerprint=False, mixed=False, multiple=False, do_quick_match=False):

  # This shouldn't happen, but be safe.
  if len(query_list) == 0:
    return (0, 0, 0)

  # See if input looks like a sane album
  (sane_input_tracks, unique_input_albums, input_discs) = compute_input_sanity(query_list)

  # Build up the query with the contents of the query list.
  args = ''
  parts = {}

  Log('Running Gracenote match on %d tracks with fingerprinting: %d and mixedContent: %d and multiple: %d' % (len(query_list), fingerprint, mixed, multiple))
  for i, track in enumerate(query_list):
    
    # We need to pass at least a path and an identifier for each track that we know about.
    args += '&tracks[%d].path=%s' % (i, quote(track.parts[0], ''))
    args += '&tracks[%d].userData=%d' % (i, i)
    
    # Keep track of the identifier -> part mapping so we can reassemble later.
    parts[i] = track.parts[0]

    if track.name:
      args += '&tracks[%d].title=%s' % (i, quote(toBytes(track.title or track.name), ''))
    if track.artist and track.artist != 'Various Artists':
      args += '&tracks[%d].artist=%s' % (i, quote(toBytes(track.artist), ''))
    if track.album_artist:
      args += '&tracks[%d].albumArtist=%s' % (i, quote(toBytes(track.album_artist), ''))      
    elif track.artist and track.artist != 'Various Artists':
      args += '&tracks[%d].albumArtist=%s' % (i, quote(toBytes(track.artist), ''))
    if track.album and track.album != '[Unknown Album]':
      args += '&tracks[%d].album=%s' % (i, quote(toBytes(track.album), ''))
    if track.index:
      args += '&tracks[%d].index=%s' % (i, track.index)
    if track.disc:
      args += '&tracks[%d].parentIndex=%s' % (i, track.disc)
    Log(' - %s/%s - %s/%s - %s' % (toBytes(track.artist), toBytes(track.album), toBytes(track.disc), toBytes(track.index), toBytes(track.name)))

  url = 'http://127.0.0.1:32400/services/gracenote/search?fingerprint=%d&mixedContent=%d&multiple=%d%s&lang=%s' % (fingerprint, mixed, multiple, args, language)
  try:
    res = minidom.parse(urlopen(url))
  except Exception, e:
    Log('Error parsing Gracenote response: ' + str(e))
    return (0, 0, 0)
def improve_from_tag(existing, file, tag):
  tags = mutagen.File(file, easy=True)
  if tags and tag in tags:
    existing = tags[tag][0]
    
  return toBytes(existing)
def Scan(path, files, media_list, subdirs, language=None, root=None, respect_tags=False):

  # Scan for audio files.
  AudioFiles.Scan(path, files, media_list, subdirs, root)
  
  root_str = root or ''
  loc_str = os.path.join(root_str, path)
  Log('Scanning: ' + loc_str)
  Log('Files: ' + str(files))
  Log('Subdirs: ' + str(subdirs))

  # Look at the files and determine whether we can do a quick match (minimal tag parsing).
  do_quick_match = True
  mixed = False

  # Make sure we're looking at a leaf directory (no audio files below here).
  if len(subdirs) > 0:
    Log('Found directories below this one; won\'t attempt quick matching.')
    do_quick_match = False

  if files:

    # Make sure we're not sitting in the section root.
    parent_path = os.path.split(files[0])[0]
    if parent_path == root:
      Log('File(s) are in section root; doing expensive matching with mixed content.')
      do_quick_match = False
      mixed = True

    # Make sure we have reliable track indices for all files and there are no dupes.
    tracks = {}
    for f in files:
      try: 
        index = re.search(r'^([0-9]{1,2})[^0-9].*', os.path.split(f)[-1]).groups(0)[0]
      except:
        do_quick_match = False
        Log('Couldn\'t find track indices in all filenames; doing expensive matching.')
        break
      if tracks.get(index):
        do_quick_match = False
        mixed = True
        Log('Found duplicate track index: %s; doing expensive matching with mixed content.' % index)
        break
      else:
        tracks[index] = True

    # Read the first track's tags to check for milti-disc and VA.
    if do_quick_match:
      disc = album_artist = None
      try:
        (artist, album, title, track, disc, album_artist, compil) = AudioFiles.getInfoFromTag(files[0], language)
      except:
        Log('Exception reading tags from first file; doing expensive matching.')
        do_quick_match = False

      # Make sure we are on the first disc.
      if disc is not None and disc > 1:
        Log('Skipping quick match because of non-first disc.')
        do_quick_match = False

      # We want to read all the tags for VA albums to pick up track artists.
      if album_artist is not None and album_artist == 'Various Artists':
        Log('Skipping quick match for Various Artists album.')      
        do_quick_match = False

    artist = None
    album = None

    if do_quick_match:
      Log('Doing quick match')
      
      # See if we have some consensus on artist/album by reading a few tags.
      for i in range(3):
        if i < len(files):
          this_artist = this_album = tags = None
          try: tags = mutagen.File(files[i], easy=True)
          except: Log('There was an exception thrown reading tags.')
          
          if tags:
            # See if there's an album artist tag.
            album_artist_tags = [t for t in ['albumartist', 'TPE2', 'performer'] if t in tags]
            album_artist_tag = album_artist_tags[0] if len(album_artist_tags) else None
            
            this_artist = tags[album_artist_tag][0] if album_artist_tag else tags['artist'][0] if 'artist' in tags else None
            this_album = tags['album'][0] if 'album' in tags else None

          if artist and artist != this_artist:
            Log('Found different artists in tags (%s vs. %s); doing expensive matching.' % (artist, this_artist))
            do_quick_match = False
            break

          if album and album != this_album:
            Log('Found different albums in tags (%s vs. %s); doing expensive matching.' % (artist, this_artist))
            do_quick_match = False
            break

          artist = this_artist
          album = this_album
      
      if not artist or not album:
        Log('Couldn\'t determine unique artist or album from tags; doing expensive matching.')
        do_quick_match = False

    query_list = []
    result_list = []
    fingerprint = False

    # Directory looks clean, let's build a query list directly from info gleaned from file names.
    if do_quick_match:
      Log('Building query list for quickmatch with artist: %s, album: %s' % (artist, album))

      # Determine if the artist and/or album appears in all filenames, since we'll want to strip these out for clean titles.
      strip_artist = True if len([f for f in files if artist.lower() in Unicodize(os.path.basename(f), language).lower()]) == len(files) else False
      strip_album = True if len([f for f in files if album.lower() in Unicodize(os.path.basename(f), language).lower()]) == len(files) else False

      for f in files:
        try:
          filename = os.path.splitext(os.path.split(f)[1])[0]
          (head, index, title) = re.split(r'^([0-9]{1,2})', filename)

          # Replace underscores and dots with spaces.
          title = re.sub(r'[_\. ]+', ' ', title)

          # Things in parens seem to confuse Gracenote, so let's strip them out.
          title = re.sub(r' ?\(.*\)', '', title)

          # Remove artist name from title if it appears in all of them.
          if strip_artist and len(files) > 2:
            title = re.sub(r'(?i)' + artist, '', title)

          # Remove album title from title if it appears in all of them.
          if strip_album and len(files) > 2:
            title = re.sub(r'(?i)' + album, '', title)

          # Remove any remaining index-, artist-, and album-related cruft from the head of the track title.
          title = re.sub(r'^[\W\-]+', '', title).strip()

          # Last chance for artist or album prefix.
          if not strip_artist and Unicodize(title, language).lower().find(artist.lower()) == 0:
            title = title[len(artist):]
            
          if not strip_album and Unicodize(title, language).lower().find(album.lower()) == 0:
            title = title[len(album):]
      
          t = Media.Track(artist=toBytes(artist), album=toBytes(album), title=toBytes(title), index=int(index))
          t.parts.append(f)

          Log(' - Adding: %s - %s' % (index, title))
          query_list.append(t)

        except Exception as e:
          Log('Error preparing tracks for quick matching: ' + str(e))

    # Otherwise, let's do old school directory crawling and tag reading.
    else:
      AudioFiles.Process(path, files, media_list, subdirs, root)
      query_list = list(media_list)
    
    # Try as-is first (ask for everything at once).
    discs = [query_list]
    final_match = run_queries(discs, result_list, language, fingerprint, mixed, do_quick_match)
    
    # If the match was still shitty, and it looks like we have multiple discs, try splitting.
    if final_match < 75:
      discs = group_tracks_by_disc(query_list)
      if len(discs) > 1:
        Log('Result still looked bad, we will try splitting into separate per-disc queries.')
        other_result_list = []
        other_match = run_queries(discs, other_result_list, language, fingerprint, mixed, do_quick_match)
        
        if other_match > final_match:
          Log('The split result was best, we will use it.')
          result_list = other_result_list
          final_match = other_match
        
    # If we have a crappy match, don't use it.
    if final_match < 50.0:
      Log('That was terrible, let us not use it.')
      result_list = []

    # Finalize the results.
    used_tags = False
    del media_list[:]
    if len(result_list) > 0:
      # Gracenote results.
      for result in result_list:
        media_list.append(result)
    else:
      # We bailed during the GN lookup, fall back to tags.
      used_tags = True
      AudioFiles.Process(path, files, media_list, subdirs, root)

    # If we wanted to respect tags, then make sure we used tags.
    if not used_tags and respect_tags:

      # Let's grab tag results, and then set GUIDs we found.
      tag_media_list = []
      AudioFiles.Process(path, files, tag_media_list, subdirs, root)
      
      # Now suck GN data out.
      path_map = {}
      for track in media_list:
        path_map[track.parts[0]] = track
        
      for track in tag_media_list:
        if track.parts[0] in path_map:
          gn_track = path_map[track.parts[0]]
          track.guid = gn_track.guid
          track.album_guid = gn_track.album_guid
          track.artist_guid = gn_track.artist_guid
          track.album_thumb_url = gn_track.album_thumb_url
          track.artist_thumb_url = gn_track.artist_thumb_url
          
          # If the tags failed, fill in key data from Gracenote.
          if track.album == '[Unknown Album]':
            track.album = gn_track.album
          
          if track.artist == '[Unknown Artist]':
            track.artist = gn_track.artist
      
      media_list[:] = tag_media_list
      average_album_ratio = total_album_ratio / len(query_list)
      
      # If we've got really excellent track matches on a good number of tracks, then it's likely
      # that the GN match is just calling the artist different (VA vs artist, etc.) Prefer the name
      # in the tag if we have one and it's consistent.
      #
      track_min_ratio = 0.88
      if average_album_ratio > 0.90:
        track_min_ratio = 0.75
      if average_album_ratio > 0.98 and number_of_matched_tracks == len(query_list):
        track_min_ratio = 0.50
      
      Log('Track average lev ratio %f, album lev ratio %f, required track ratio: %f' % (average_track_ratio, average_album_ratio, track_min_ratio))
      if len(query_list) >= 4 and average_track_ratio > track_min_ratio:
        if number_of_artists == 1:
          Log('Using override artist of %s' % toBytes(query_list[0].artist))
          artist_override = query_list[0].artist
      elif len(query_list) < 4 or average_track_ratio < 0.75 or ratio < 0.20:
        return (0, 0, 0)

  # Check for Various Artists albums which come back matching to an artist, or movie name.
  number_of_album_artists = len(set([q.album_artist for q in query_list if q.album_artist]))
  if number_of_artists > 1 and number_of_album_artists == 1 and query_list[0].album_artist and LevenshteinRatio(query_list[0].album_artist, 'Various Artists') > 0.9:
    Log('Using override artist of Various Artists')
    artist_override = 'Various Artists'

    # Restore track artists from tags if necessary.
    for i, query_track in enumerate(query_list):
      if str(i) in matched_tracks:
        track = matched_tracks[str(i)]
        if query_track.artist and not track.getAttribute('originalTitle'):
Example #10
0
def lookup(query_list,
           result_list,
           language=None,
           fingerprint=False,
           mixed=False,
           multiple=False,
           do_quick_match=False):

    # This shouldn't happen, but be safe.
    if len(query_list) == 0:
        return (0, 0, 0)

    # See if input looks like a sane album
    (sane_input_tracks, unique_input_albums,
     input_discs) = compute_input_sanity(query_list)

    # Build up the query with the contents of the query list.
    args = ''
    parts = {}

    Log('Running Gracenote match on %d tracks with fingerprinting: %d and mixedContent: %d and multiple: %d'
        % (len(query_list), fingerprint, mixed, multiple))
    for i, track in enumerate(query_list):

        # We need to pass at least a path and an identifier for each track that we know about.
        args += '&tracks[%d].path=%s' % (i, quote(track.parts[0], ''))
        args += '&tracks[%d].userData=%d' % (i, i)

        # Keep track of the identifier -> part mapping so we can reassemble later.
        parts[i] = track.parts[0]

        if track.name:
            args += '&tracks[%d].title=%s' % (
                i, quote(toBytes(track.title or track.name), ''))
        if track.artist and track.artist != 'Various Artists':
            args += '&tracks[%d].artist=%s' % (
                i, quote(toBytes(track.artist), ''))
        if track.album_artist:
            args += '&tracks[%d].albumArtist=%s' % (
                i, quote(toBytes(track.album_artist), ''))
        elif track.artist and track.artist != 'Various Artists':
            args += '&tracks[%d].albumArtist=%s' % (
                i, quote(toBytes(track.artist), ''))
        if track.album and track.album != '[Unknown Album]':
            args += '&tracks[%d].album=%s' % (i, quote(toBytes(track.album),
                                                       ''))
        if track.index:
            args += '&tracks[%d].index=%s' % (i, track.index)
        if track.disc:
            args += '&tracks[%d].parentIndex=%s' % (i, track.disc)
        Log(' - %s/%s - %s/%s - %s' %
            (toBytes(track.artist), toBytes(track.album), toBytes(
                track.disc), toBytes(track.index), toBytes(track.name)))

    url = 'http://127.0.0.1:32400/services/gracenote/search?fingerprint=%d&mixedContent=%d&multiple=%d%s&lang=%s' % (
        fingerprint, mixed, multiple, args, language)
    try:
        res = minidom.parse(urlopen(url))
    except Exception, e:
        Log('Error parsing Gracenote response: ' + str(e))
        return (0, 0, 0)
Example #11
0
def Scan(path,
         files,
         media_list,
         subdirs,
         language=None,
         root=None,
         respect_tags=False):

    # Scan for audio files.
    AudioFiles.Scan(path, files, media_list, subdirs, root)

    root_str = root or ''
    loc_str = os.path.join(root_str, path)
    Log('Scanning: ' + loc_str)
    Log('Files: ' + str(files))
    Log('Subdirs: ' + str(subdirs))

    # Look at the files and determine whether we can do a quick match (minimal tag parsing).
    do_quick_match = True
    mixed = False

    # Make sure we're looking at a leaf directory (no audio files below here).
    if len(subdirs) > 0:
        Log('Found directories below this one; won\'t attempt quick matching.')
        do_quick_match = False

    if files:

        # Make sure we're not sitting in the section root.
        parent_path = os.path.split(files[0])[0]
        if parent_path == root:
            Log('File(s) are in section root; doing expensive matching with mixed content.'
                )
            do_quick_match = False
            mixed = True

        # Make sure we have reliable track indices for all files and there are no dupes.
        tracks = {}
        for f in files:
            try:
                index = re.search(r'^([0-9]{1,2})[^0-9].*',
                                  os.path.split(f)[-1]).groups(0)[0]
            except:
                do_quick_match = False
                Log('Couldn\'t find track indices in all filenames; doing expensive matching.'
                    )
                break
            if tracks.get(index):
                do_quick_match = False
                mixed = True
                Log('Found duplicate track index: %s; doing expensive matching with mixed content.'
                    % index)
                break
            else:
                tracks[index] = True

        # Read the first track's tags to check for milti-disc and VA.
        if do_quick_match:
            disc = album_artist = None
            try:
                (artist, album, title, track, disc, album_artist,
                 compil) = AudioFiles.getInfoFromTag(files[0], language)
            except:
                Log('Exception reading tags from first file; doing expensive matching.'
                    )
                do_quick_match = False

            # Make sure we are on the first disc.
            if disc is not None and disc > 1:
                Log('Skipping quick match because of non-first disc.')
                do_quick_match = False

            # We want to read all the tags for VA albums to pick up track artists.
            if album_artist is not None and album_artist == 'Various Artists':
                Log('Skipping quick match for Various Artists album.')
                do_quick_match = False

        artist = None
        album = None

        if do_quick_match:
            Log('Doing quick match')

            # See if we have some consensus on artist/album by reading a few tags.
            for i in range(3):
                if i < len(files):
                    this_artist = this_album = tags = None
                    try:
                        tags = mutagen.File(files[i], easy=True)
                    except:
                        Log('There was an exception thrown reading tags.')

                    if tags:
                        # See if there's an album artist tag.
                        album_artist_tags = [
                            t for t in ['albumartist', 'TPE2', 'performer']
                            if t in tags
                        ]
                        album_artist_tag = album_artist_tags[0] if len(
                            album_artist_tags) else None

                        this_artist = tags[album_artist_tag][
                            0] if album_artist_tag else tags['artist'][
                                0] if 'artist' in tags else None
                        this_album = tags['album'][
                            0] if 'album' in tags else None

                    if artist and artist != this_artist:
                        Log('Found different artists in tags (%s vs. %s); doing expensive matching.'
                            % (artist, this_artist))
                        do_quick_match = False
                        break

                    if album and album != this_album:
                        Log('Found different albums in tags (%s vs. %s); doing expensive matching.'
                            % (album, this_album))
                        do_quick_match = False
                        break

                    artist = this_artist
                    album = this_album

            if not artist or not album:
                Log('Couldn\'t determine unique artist or album from tags; doing expensive matching.'
                    )
                do_quick_match = False

        query_list = []
        result_list = []
        fingerprint = False

        # Directory looks clean, let's build a query list directly from info gleaned from file names.
        if do_quick_match:
            Log('Building query list for quickmatch with artist: %s, album: %s'
                % (artist, album))

            # Determine if the artist and/or album appears in all filenames, since we'll want to strip these out for clean titles.
            strip_artist = True if len([
                f for f in files if artist.lower() in Unicodize(
                    os.path.basename(f), language).lower()
            ]) == len(files) else False
            strip_album = True if len([
                f for f in files if album.lower() in Unicodize(
                    os.path.basename(f), language).lower()
            ]) == len(files) else False

            for f in files:
                try:
                    filename = os.path.splitext(os.path.split(f)[1])[0]
                    (head, index, title) = re.split(r'^([0-9]{1,2})', filename)

                    # Replace underscores and dots with spaces.
                    title = re.sub(r'[_\. ]+', ' ', title)

                    # Things in parens seem to confuse Gracenote, so let's strip them out.
                    title = re.sub(r' ?\(.*\)', '', title)

                    # Remove artist name from title if it appears in all of them.
                    if strip_artist and len(files) > 2:
                        title = re.sub(r'(?i)' + artist, '', title)

                    # Remove album title from title if it appears in all of them.
                    if strip_album and len(files) > 2:
                        title = re.sub(r'(?i)' + album, '', title)

                    # Remove any remaining index-, artist-, and album-related cruft from the head of the track title.
                    title = re.sub(r'^[\W\-]+', '', title).strip()

                    # Last chance for artist or album prefix.
                    if not strip_artist and Unicodize(
                            title, language).lower().find(artist.lower()) == 0:
                        title = title[len(artist):]

                    if not strip_album and Unicodize(
                            title, language).lower().find(album.lower()) == 0:
                        title = title[len(album):]

                    t = Media.Track(artist=toBytes(artist),
                                    album=toBytes(album),
                                    title=toBytes(title),
                                    index=int(index))
                    t.parts.append(f)

                    Log(' - Adding: %s - %s' % (index, title))
                    query_list.append(t)

                except Exception as e:
                    Log('Error preparing tracks for quick matching: ' + str(e))

        # Otherwise, let's do old school directory crawling and tag reading.
        else:
            AudioFiles.Process(path, files, media_list, subdirs, root)
            query_list = list(media_list)

        # Try as-is first (ask for everything at once).
        discs = [query_list]
        final_match = run_queries(discs, result_list, language, fingerprint,
                                  mixed, do_quick_match)

        # If the match was still shitty, and it looks like we have multiple discs, try splitting.
        if final_match < 75:
            discs = group_tracks_by_disc(query_list)
            if len(discs) > 1:
                Log('Result still looked bad, we will try splitting into separate per-disc queries.'
                    )
                other_result_list = []
                other_match = run_queries(discs, other_result_list, language,
                                          fingerprint, mixed, do_quick_match)

                if other_match > final_match:
                    Log('The split result was best, we will use it.')
                    result_list = other_result_list
                    final_match = other_match

        # If we have a crappy match, don't use it.
        if final_match < 50.0:
            Log('That was terrible, let us not use it.')
            result_list = []

        # Finalize the results.
        used_tags = False
        del media_list[:]
        if len(result_list) > 0:
            # Gracenote results.
            for result in result_list:
                media_list.append(result)
        else:
            # We bailed during the GN lookup, fall back to tags.
            used_tags = True
            AudioFiles.Process(path, files, media_list, subdirs, root)

        # If we wanted to respect tags, then make sure we used tags.
        if not used_tags and respect_tags:

            # Let's grab tag results, and then set GUIDs we found.
            tag_media_list = []
            AudioFiles.Process(path, files, tag_media_list, subdirs, root)

            # Now suck GN data out.
            path_map = {}
            for track in media_list:
                path_map[track.parts[0]] = track

            for track in tag_media_list:
                if track.parts[0] in path_map:
                    gn_track = path_map[track.parts[0]]
                    track.guid = gn_track.guid
                    track.album_guid = gn_track.album_guid
                    track.artist_guid = gn_track.artist_guid
                    track.album_thumb_url = gn_track.album_thumb_url
                    track.artist_thumb_url = gn_track.artist_thumb_url

                    # If the tags failed, fill in key data from Gracenote.
                    if track.album == '[Unknown Album]':
                        track.album = gn_track.album

                    if track.artist == '[Unknown Artist]':
                        track.artist = gn_track.artist

            media_list[:] = tag_media_list
Example #12
0
            # that the GN match is just calling the artist different (VA vs artist, etc.) Prefer the name
            # in the tag if we have one and it's consistent.
            #
            track_min_ratio = 0.88
            if average_album_ratio > 0.90:
                track_min_ratio = 0.75
            if average_album_ratio > 0.98 and number_of_matched_tracks == len(
                    query_list):
                track_min_ratio = 0.50

            Log('Track average lev ratio %f, album lev ratio %f, required track ratio: %f'
                % (average_track_ratio, average_album_ratio, track_min_ratio))
            if len(query_list) >= 4 and average_track_ratio > track_min_ratio:
                if number_of_artists == 1:
                    Log('Using override artist of %s' %
                        toBytes(query_list[0].artist))
                    artist_override = query_list[0].artist
            elif len(query_list
                     ) < 4 or average_track_ratio < 0.75 or ratio < 0.20:
                return (0, 0, 0)

    # Check for Various Artists albums which come back matching to an artist, or movie name.
    number_of_album_artists = len(
        set([q.album_artist for q in query_list if q.album_artist]))
    if number_of_artists > 1 and number_of_album_artists == 1 and query_list[
            0].album_artist and LevenshteinRatio(query_list[0].album_artist,
                                                 'Various Artists') > 0.9:
        Log('Using override artist of Various Artists')
        artist_override = 'Various Artists'

        # Restore track artists from tags if necessary.
def improve_from_tag(existing, file, tag):
    tags = mutagen.File(file, easy=True)
    if tags and tag in tags:
        existing = tags[tag][0]

    return toBytes(existing)
Example #14
0
def Scan(path, files, mediaList, subdirs, exts, root=None):

  files_to_whack = []
  plexignore_files = []
  plexignore_dirs = []
  use_unicode = os.path.supports_unicode_filenames

  # Build a list of things to ignore based on a .plexignore file in this dir.
  if root and Utils.ContainsFile(files, '.plexignore'):
    ParsePlexIgnore(os.path.join(root,path,'.plexignore'), plexignore_files, plexignore_dirs)

  # Also look for a .plexignore in the 'root' for this source.
  if root and files and root != os.path.dirname(files[0]):
    if Utils.ContainsFile(os.listdir(root), '.plexignore'):
      ParsePlexIgnore(os.path.join(root,'.plexignore'), plexignore_files, plexignore_dirs)

  for f in files:
    # Only use unicode if it's supported, which it is on Windows and OS X,
    # but not Linux. This allows things to work with non-ASCII characters
    # without having to go through a bunch of work to ensure the Linux 
    # filesystem is UTF-8 "clean".
    #
    if use_unicode:
      try: filename = unicode(f.decode('utf-8'))
      except: files_to_whack.append(f)
    else:
      filename = f
      
    (basename, ext) = os.path.splitext(f)
    basename = os.path.basename(basename)
    
    # If extension is wrong, don't include.
    if not ext.lower()[1:] in exts:
      files_to_whack.append(f)
    
    # Broken symlinks and zero byte files need not apply.
    if os.path.exists(filename) == False or os.path.getsize(filename) == 0:
      files_to_whack.append(f)

    # Remove unreadable files.
    if not os.access(filename, os.R_OK):
      # If access() claims the file is unreadable, try to read a byte just to be sure.
      try:
        read_file = open(f,'rb')
        read_file.read(1)
        read_file.close()
      except:
        files_to_whack.append(f)
      
    # Remove hidden files.
    if len(basename) == 0 or basename[0] == '.':
      files_to_whack.append(f)

    # Remove .plexignore file regex matches.
    for rx in plexignore_files:
      if re.match(rx, os.path.basename(f), re.IGNORECASE):
        files_to_whack.append(f)

    # Remove files that look DRM'd.
    try:
      mp4_file = open(f, 'rb')
      codec = mp4.MP4Info(mp4.Atoms(mp4_file), mp4_file).codec
      if codec in ['drms', 'enca', 'encv']:
        Utils.Log('Skipping file %s because it looks DRM-protected (has codec: %s)' % (toBytes(f), codec))
        files_to_whack.append(f)
    except:
      pass

  # See what directories to ignore.
  ignore_dirs_total = IGNORE_DIRS
  if len(path) == 0:
    ignore_dirs_total += ROOT_IGNORE_DIRS

  dirs_to_whack = []
  for dir in subdirs:
    # See which directories to get rid of.
    baseDir = os.path.basename(dir)
    for rx in ignore_dirs_total:
      if re.match(rx, baseDir, re.IGNORECASE):
        dirs_to_whack.append(dir)
        break

  # Add glob matches from .plexignore before whacking.
  for pattern in plexignore_dirs:
    for match in glob.glob(pattern):
      if os.path.isdir(match):
        dirs_to_whack.append(os.path.dirname(match))
      else:
        files_to_whack.append(match)

  # Whack files.
  files_to_whack = list(set(files_to_whack))
  for f in files_to_whack:
    if f in files:
      files.remove(f)

  # Remove the directories.
  dirs_to_whack = list(set(dirs_to_whack))
  for f in dirs_to_whack:
    if f in subdirs:
      subdirs.remove(f)
Example #15
0
            # that the GN match is just calling the artist different (VA vs artist, etc.) Prefer the name
            # in the tag if we have one and it's consistent.
            #
            track_min_ratio = 0.88
            if average_album_ratio > 0.90:
                track_min_ratio = 0.75
            if average_album_ratio > 0.98 and number_of_matched_tracks == len(
                    query_list):
                track_min_ratio = 0.50

            Log('Track average lev ratio %f, album lev ratio %f, required track ratio: %f'
                % (average_track_ratio, average_album_ratio, track_min_ratio))
            if len(query_list) >= 4 and average_track_ratio > track_min_ratio:
                if number_of_artists == 1:
                    Log('Using override artist of %s' %
                        toBytes(query_list[0].artist))
                    artist_override = query_list[0].artist
            elif len(query_list
                     ) < 4 or average_track_ratio < 0.75 or ratio < 0.20:
                return (0, 0, 0)

    # Check for Various Artists albums which come back matching to an artist, or movie name.
    number_of_album_artists = len(
        set([q.album_artist for q in query_list if q.album_artist]))
    if number_of_artists > 1 and number_of_album_artists == 1 and query_list[
            0].album_artist and LevenshteinRatio(query_list[0].album_artist,
                                                 'Various Artists') > 0.9:
        Log('Using override artist of Various Artists')
        artist_override = 'Various Artists'

        # Restore track artists from tags if necessary.
      average_album_ratio = total_album_ratio / len(query_list)
      
      # If we've got really excellent track matches on a good number of tracks, then it's likely
      # that the GN match is just calling the artist different (VA vs artist, etc.) Prefer the name
      # in the tag if we have one and it's consistent.
      #
      track_min_ratio = 0.88
      if average_album_ratio > 0.90:
        track_min_ratio = 0.75
      if average_album_ratio > 0.98 and number_of_matched_tracks == len(query_list):
        track_min_ratio = 0.50
      
      Log('Track average lev ratio %f, album lev ratio %f, required track ratio: %f' % (average_track_ratio, average_album_ratio, track_min_ratio))
      if len(query_list) >= 4 and average_track_ratio > track_min_ratio:
        if number_of_artists == 1:
          Log('Using override artist of %s' % toBytes(query_list[0].artist))
          artist_override = query_list[0].artist
      elif len(query_list) < 4 or average_track_ratio < 0.75 or ratio < 0.20:
        return (0, 0, 0)

  # Check for Various Artists albums which come back matching to an artist, or movie name.
  number_of_album_artists = len(set([q.album_artist for q in query_list if q.album_artist]))
  if number_of_artists > 1 and number_of_album_artists == 1 and query_list[0].album_artist and LevenshteinRatio(query_list[0].album_artist, 'Various Artists') > 0.9:
    Log('Using override artist of Various Artists')
    artist_override = 'Various Artists'

    # Restore track artists from tags if necessary.
    for i, query_track in enumerate(query_list):
      if str(i) in matched_tracks:
        track = matched_tracks[str(i)]
        if query_track.artist and not track.getAttribute('originalTitle'):