class ReleaseFactory(object): def __init__(self, dbConn, dbSession): self.conn = dbConn self.session = dbSession self.logger = Logger() self.searcher = ArtistSearcher() def getAllForArtist(self, artist, forceRefresh=False): """ Query Database for a release with the given title, if not found search and if found save and return results :param artist: Artist :param forceRefresh: bool """ if not artist: return None printableArtistName = artist.name.encode('ascii', 'ignore').decode('utf-8') releases = self._getAllFromDatabaseForArtist(artist) if not releases or forceRefresh: if not releases: self.logger.info("Releases For Artist [" + printableArtistName + "] Not Found") else: self.logger.info("Refreshing Releases For Artist [" + printableArtistName + "]") releases = [] srList = self.searcher.searchForArtistReleases(artist, []) if not srList: self.logger.info("Releases For Artist [" + printableArtistName + "] Not Found") return None if srList: for sr in srList: title = sr.title release = self._createDatabaseModelFromSearchModel(artist, title, sr) self.session.add(release) releases.append(release) self.session.commit() return releases def get(self, artist, title, doFindIfNotInDB=True, forceRefresh=False): """ Query Database for a release with the given title, if not found search and if found save and return results :param forceRefresh: bool :param doFindIfNotInDB: bool :rtype : Release :param artist: Artist :param title: str """ try: if not title or not artist: return None startTime = arrow.utcnow().datetime printableTitle = title.encode('ascii', 'ignore').decode('utf-8') printableArtistName = artist.name.encode('ascii', 'ignore').decode('utf-8') release = self._getFromDatabaseByTitle(artist, title) if not release and doFindIfNotInDB or forceRefresh: if not release: self.logger.info("Release For Artist [" + printableArtistName + "] Not Found By Title [" + printableTitle + "]") else: self.logger.info("Refreshing Release [" + printableTitle + "] For Artist [" + printableArtistName) release = Release() artistReleaseImages = self.session.query(Image) \ .add_column(Image.signature) \ .join(Release) \ .filter(Release.artistId == artist.id).all() srList = self.searcher.searchForArtistReleases(artist, artistReleaseImages, title) if not srList: self.logger.info("Release For Artist [" + printableArtistName + "] Not Found By Title [" + printableTitle + "]") return None sr = srList[0] if sr: release = self._createDatabaseModelFromSearchModel(artist, title, sr) self.session.add(release) self.session.commit() elapsedTime = arrow.utcnow().datetime - startTime self.logger.info(": ReleaseFactory get elapsed time [" + str(elapsedTime) + "]") return release except: self.logger.exception("ReleaseFactory: Error In get()") pass return None def _createDatabaseModelFromSearchModel(self, artist, title, sr): """ Take the given SearchResult Release Model and create a Database Model :type artist: Artist :type title: str :type sr: searchEngines.models.Release.Release """ createDattabaseModelFromSearchModelRelease = Release() printableTitle = title.encode('ascii', 'ignore').decode('utf-8') releaseByExternalIds = self._getFromDatabaseByExternalIds(sr.musicBrainzId, sr.iTunesId, sr.lastFMId, sr.amgId, sr.spotifyId) if releaseByExternalIds: if not releaseByExternalIds.alternateNames: releaseByExternalIds.alternateNames = [] if title not in releaseByExternalIds.alternateNames: self.logger.debug("Found Title By External Ids [" + releaseByExternalIds.title.encode('ascii', 'ignore') .decode('utf-8') + "] Added [" + printableTitle + "] To AlternateNames") if not releaseByExternalIds.alternateNames: releaseByExternalIds.alternateNames = [] releaseByExternalIds.alternateNames.append(title) releaseByExternalIds.lastUpdated = arrow.utcnow().datetime self.session.commit() return releaseByExternalIds createDattabaseModelFromSearchModelRelease.artist = artist createDattabaseModelFromSearchModelRelease.roadieId = sr.roadieId createDattabaseModelFromSearchModelRelease.title = title createDattabaseModelFromSearchModelRelease.releaseDate = parseDate(sr.releaseDate) createDattabaseModelFromSearchModelRelease.trackCount = sr.trackCount createDattabaseModelFromSearchModelRelease.mediaCount = sr.mediaCount createDattabaseModelFromSearchModelRelease.thumbnail = sr.thumbnail createDattabaseModelFromSearchModelRelease.profile = sr.profile if sr.releaseType == SearchReleaseType.Album: createDattabaseModelFromSearchModelRelease.releaseType = 'Album' elif sr.releaseType == SearchReleaseType.EP: createDattabaseModelFromSearchModelRelease.releaseType = 'EP' elif sr.releaseType == SearchReleaseType.Single: createDattabaseModelFromSearchModelRelease.releaseType = 'Single' createDattabaseModelFromSearchModelRelease.iTunesId = sr.iTunesId createDattabaseModelFromSearchModelRelease.amgId = sr.amgId createDattabaseModelFromSearchModelRelease.lastFMId = sr.lastFMId createDattabaseModelFromSearchModelRelease.lastFMSummary = sr.lastFMSummary createDattabaseModelFromSearchModelRelease.musicBrainzId = sr.musicBrainzId createDattabaseModelFromSearchModelRelease.spotifyId = sr.spotifyId createDattabaseModelFromSearchModelRelease.amgId = sr.amgId createDattabaseModelFromSearchModelRelease.tags = sr.tags createDattabaseModelFromSearchModelRelease.alternateNames = sr.alternateNames createDattabaseModelFromSearchModelRelease.urls = sr.urls if sr.images: createDattabaseModelFromSearchModelReleaseimages = [] for image in sr.images: if image.image: i = Image() i.roadieId = image.roadieId i.url = image.url i.caption = image.caption i.image = image.image i.signature = image.signature createDattabaseModelFromSearchModelReleaseimages.append(i) createDattabaseModelFromSearchModelRelease.images = createDattabaseModelFromSearchModelReleaseimages self.logger.debug( "= Added [" + str(len(createDattabaseModelFromSearchModelRelease.images)) + "] Images to Release") # TODO # See if cover file found in Release Folder # coverFile = os.path.join(mp3Folder, "cover.jpg") # if os.path.isfile(coverFile): # ba = self.readImageThumbnailBytesFromFile(coverFile) # else: # coverFile = os.path.join(mp3Folder, "front.jpg") # if os.path.isfile(coverFile): # ba = self.readImageThumbnailBytesFromFile(coverFile) # # if no bytes found see if MusicBrainz has cover art # if not ba: # coverArtBytes = mb.lookupCoverArt(release.MusicBrainzId) # if coverArtBytes: # try: # img = Image.open(io.BytesIO(coverArtBytes)) # img.thumbnail(self.thumbnailSize) # b = io.BytesIO() # img.save(b, "JPEG") # ba = b.getvalue() # except: # pass if sr.genres: createDattabaseModelFromSearchModelRelease.genres = [] for genre in sr.genres: dbGenre = self.session.query(Genre).filter(Genre.name == genre.name).first() if not dbGenre: g = Genre() g.name = genre.name g.roadieId = genre.roadieId createDattabaseModelFromSearchModelRelease.genres.append(g) else: createDattabaseModelFromSearchModelRelease.genres.append(dbGenre) if sr.releaseLabels: createDattabaseModelFromSearchModelRelease.releaseLabels = [] for srReleaseLabel in sr.releaseLabels: l = self._getLabelFromDatabase(srReleaseLabel.label.name) if not l: l = Label() l.roadieId = srReleaseLabel.label.roadieId l.musicBrainzId = srReleaseLabel.label.musicBrainzId l.beginDate = srReleaseLabel.label.beginDate l.end = srReleaseLabel.label.endDate l.imageUrl = srReleaseLabel.label.imageUrl l.tags = srReleaseLabel.label.tags if srReleaseLabel.label.alternateNames: srLabelAlternateNames = [] for srLabelAn in srReleaseLabel.label.alternateNames: srLabelAlternateNames.append(srLabelAn.replace("|", ",")) l.alternateNames = srLabelAlternateNames l.sortName = srReleaseLabel.label.sortName l.name = srReleaseLabel.label.name if l: rl = ReleaseLabel() rl.roadieId = srReleaseLabel.roadieId rl.catalogNumber = srReleaseLabel.catalogNumber rl.beginDate = parseDate(srReleaseLabel.beginDate) rl.endDate = parseDate(srReleaseLabel.endDate) rl.label = l if rl not in createDattabaseModelFromSearchModelRelease.releaseLabels: createDattabaseModelFromSearchModelRelease.releaseLabels.append(rl) if sr.media: createDattabaseModelFromSearchModelRelease.media = [] for srMedia in sr.media: media = ReleaseMedia() media.roadieId = srMedia.roadieId media.releaseMediaNumber = int(srMedia.releaseMediaNumber) # The first media is release 1 not release 0 if media.releaseMediaNumber < 1: media.releaseMediaNumber = 1 media.releaseSubTitle = srMedia.releaseSubTitle media.trackCount = srMedia.trackCount if srMedia.tracks: media.tracks = [] for srTrack in srMedia.tracks: track = Track() track.roadieId = srTrack.roadieId track.partTitles = srTrack.partTitles track.musicBrainzId = srTrack.musicBrainzId track.amgId = srTrack.amgId track.spotifyId = srTrack.spotifyId track.title = srTrack.title track.trackNumber = srTrack.trackNumber track.duration = srTrack.duration track.tags = srTrack.tags track.alternateNames = [] cleanedTitle = createCleanedName(srTrack.title) if cleanedTitle != srTrack.title.lower().strip(): track.alternateNames.append(cleanedTitle) media.tracks.append(track) createDattabaseModelFromSearchModelRelease.media.append(media) createDattabaseModelFromSearchModelRelease.mediaCount = len( createDattabaseModelFromSearchModelRelease.media) return createDattabaseModelFromSearchModelRelease def _getAllFromDatabaseForArtist(self, artist): if not artist: return None return self.session.query(Release).filter(Release.artistId == artist.id).order_by(Release.releaseDate).all() def _getFromDatabaseByTitle(self, artist, title): if not title: return None title = title.lower().strip() cleanedTitle = createCleanedName(title) stmt = or_(func.lower(Release.title) == title, text("(lower(alternateNames) = '" + title.replace("'", "''") + "'" + "" " OR alternateNames like '" + title.replace( "'", "''") + "|%'" + " OR alternateNames like '%|" + title.replace("'", "''") + "|%'" + " OR alternateNames like '%|" + title.replace("'", "''") + "')"), text("(alternateNames = '" + cleanedTitle + "'" + "" " OR alternateNames like '" + cleanedTitle + "|%'" + " OR alternateNames like '%|" + cleanedTitle + "|%'" + " OR alternateNames like '%|" + cleanedTitle + "')") ) return self.session.query(Release).filter(Release.artistId == artist.id).filter(stmt).first() def _getLabelFromDatabase(self, name): if not name: return None name = name.lower().strip() stmt = or_(func.lower(Label.name) == name, text("(lower(alternateNames) = '" + name.replace("'", "''") + "'" + "" " OR alternateNames like '" + name.replace( "'", "''") + "|%'" + " OR alternateNames like '%|" + name.replace("'", "''") + "|%'" + " OR alternateNames like '%|" + name.replace("'", "''") + "')")) return self.session.query(Label).filter(stmt).first() def _getFromDatabaseByExternalIds(self, musicBrainzId, iTunesId, lastFMId, amgId, spotifyId): mb = and_(Release.musicBrainzId == musicBrainzId, musicBrainzId is not None) it = and_(Release.iTunesId == iTunesId, iTunesId is not None) lf = and_(Release.lastFMId == lastFMId, lastFMId is not None) ag = and_(Release.amgId == amgId, amgId is not None) sp = and_(Release.spotifyId == spotifyId, spotifyId is not None) stmt = or_(mb, it, lf, ag, sp) return self.session.query(Release).filter(stmt).first() def _getFromDatabaseByRoadieId(self, roadieId): return self.session.query(Release).filter(Release.roadieId == roadieId).first() def create(self, artist, title, trackCount, releaseDate): if not artist or not title or not trackCount or not releaseDate: return None release = Release() release.title = title release.releaseDate = parseDate(releaseDate) release.trackCount = trackCount release.artistId = artist.id release.createdDate = arrow.utcnow().datetime release.roadieId = str(uuid.uuid4()) release.alternateNames = [] cleanedTitle = createCleanedName(title) if cleanedTitle != title.lower().strip(): release.alternateNames.append(cleanedTitle) return release def add(self, release): self.session.add(release) self.session.commit() def delete(self, release, pathToTrack, deleteFiles=False): """ Performs all necessary steps to delete a Release and optionally Release Tracks :param pathToTrack: Method to generate Full Path for Release Media Tracks :param release: Releasesaasdf :type deleteFiles: bool """ if not release: return False try: if deleteFiles: try: for deleteReleaseMedia in release.media: for track in deleteReleaseMedia.tracks: trackPath = pathToTrack(track) trackFolder = os.path.dirname(trackPath) os.remove(trackPath) # if the folder is empty then delete the folder as well if trackFolder: if not os.listdir(trackFolder): os.rmdir(trackFolder) except OSError: pass release.genres = [] self.session.commit() self.session.delete(release) self.session.commit() return True except: self.session.rollback() self.logger.exception("Error Deleting Release") return False
class CollectionImporter(ProcessorBase): format = None positions = None filename = None collectionId = None collection = None def __init__(self, dbConn, dbSession, readOnly): self.logger = Logger() self.dbConn = dbConn self.dbSession = dbSession self.artistFactory = ArtistFactory(dbConn, dbSession) self.releaseFactory = ReleaseFactory(dbConn, dbSession) self.notFoundEntryInfo = [] self.readOnly = readOnly def _findColumns(self): self.position = -1 self.release = -1 self.artist = -1 for i, position in enumerate(self.positions): if position.lower() == "position": self.position = i elif position.lower() == "release" or position.lower() == "album": self.release = i elif position.lower() == "artist": self.artist = i if self.position < 0 or self.release < 0 or self.artist < 0: self.logger.critical("Unable To Find Required Positions") return False return True def importFile(self, collectionId, fileFormat, filename): self.collectionId = collectionId self.collection = self.dbSession.query(Collection).filter(Collection.id == collectionId).first() self.format = fileFormat self.positions = self.format.split(',') self.filename = filename if not os.path.exists(self.filename): self.logger.critical("Unable to Find CSV File [" + self.filename + "]") else: self.logger.debug("Importing [" + self.filename + "]") return self.importCsvData(open(self.filename)) def importCollection(self, collection): self.collectionId = collection.id self.collection = collection self.positions = collection.listInCSVFormat.split(',') self.importCsvData(io.StringIO(collection.listInCSV)) def importCsvData(self, csvData): try: if not self.collection: self.logger.critical("Unable to Find Collection Id [" + self.collectionId + "]") return False self._findColumns() reader = csv.reader(csvData) self.collection.collectionReleases = [] for row in reader: csvPosition = int(row[self.position].strip()) csvArtist = row[self.artist].strip() csvRelease = row[self.release].strip() artist = self.artistFactory.get(csvArtist, False) if not artist: self.logger.warn(("Artist [" + csvArtist + "] Not Found In Database").encode('utf-8')) self.notFoundEntryInfo.append( {'col': self.collection.name, 'position': csvPosition, 'artist': csvArtist, 'release': csvRelease}); continue release = self.releaseFactory.get(artist, csvRelease, False) if not release: self.logger.warn( ("Not able to find Release [" + csvRelease + "], Artist [" + csvArtist + "]").encode( 'utf-8')) self.notFoundEntryInfo.append( {'col': self.collection.name, 'position': csvPosition, 'artist': csvArtist, 'release': csvRelease}) continue colRelease = CollectionRelease() colRelease.releaseId = release.id colRelease.listNumber = csvPosition colRelease.createdDate = arrow.utcnow().datetime colRelease.roadieId = str(uuid.uuid4()) self.collection.collectionReleases.append(colRelease) self.logger.info( "Added Position [" + str(csvPosition) + "] Release [" + str(release) + "] To Collection") self.collection.lastUpdated = arrow.utcnow().datetime self.dbSession.commit() return True except: self.logger.exception("Error Importing Collection [" + self.collection.name + "]") self.dbSession.rollback() return False
class ArtistSearcher(object): """ Query Enabled Search Engines and Find Artist Information and aggregate results. """ allMusicSearcher = None spotifySearcher = None mbSearcher = None lastFMSearcher = None imageSearcher = None iTunesSearcher = None imageSearcher = None artistThumbnailSize = 160, 160 releaseThumbnailSize = 80, 80 imageMaximumSize = 500, 500 cache = dict() imageCache = dict() def __init__(self, referer=None): self.referer = referer if not self.referer or self.referer.startswith("http://localhost"): self.referer = "http://github.com/sphildreth/roadie" self.logger = Logger() self.allMusicSearcher = AllMusicGuide(self.referer) self.spotifySearcher = Spotify(self.referer) self.mbSearcher = MusicBrainz(self.referer) self.lastFMSearcher = LastFM(self.referer) self.imageSearcher = ImageSearcher() self.iTunesSearcher = iTunes(self.referer) self.imageSearcher = ImageSearcher(self.referer) def searchForArtist(self, name): """ Perform a search in all enabled search engines and return an aggregate Artist for the given Artist name :param name: String Name of the Artist to find :return: Artist Populated Artist or None if error or not found """ if not name: return None if name in self.cache: return self.cache[name] try: startTime = arrow.utcnow().datetime artist = Artist(name=name) artist.roadieId = str(uuid.uuid4()) if self.iTunesSearcher.IsActive: artist = artist.mergeWithArtist(self.iTunesSearcher.lookupArtist(name)) if self.mbSearcher.IsActive: artist = artist.mergeWithArtist(self.mbSearcher.lookupArtist(name)) if self.lastFMSearcher.IsActive: artist = artist.mergeWithArtist(self.lastFMSearcher.lookupArtist(name)) if self.spotifySearcher.IsActive: artist = artist.mergeWithArtist(self.spotifySearcher.lookupArtist(name)) if self.allMusicSearcher.IsActive: artist = artist.mergeWithArtist(self.allMusicSearcher.lookupArtist(name)) if artist: # Fetch images with only urls, remove any with neither URL or BLOB if artist.images: images = [] firstImageInImages = None for image in artist.images: if not image.image and image.url: image.image = self.imageSearcher.getImageBytesForUrl(image.url) if image.image: # Resize to maximum image size and convert to JPEG img = Image.open(io.BytesIO(image.image)).convert('RGB') img.resize(self.imageMaximumSize) b = io.BytesIO() img.save(b, "JPEG") image.image = b.getvalue() firstImageInImages = firstImageInImages or image.image image.signature = image.averageHash() images.append(image) if images: dedupedImages = [] imageSignatures = [] for image in images: if image.signature not in imageSignatures: imageSignatures.append(image.signature) dedupedImages.append(image) artist.images = dedupedImages if not artist.thumbnail and firstImageInImages: try: img = Image.open(io.BytesIO(firstImageInImages)).convert('RGB') img.thumbnail(self.artistThumbnailSize) b = io.BytesIO() img.save(b, "JPEG") artist.thumbnail = b.getvalue() except: pass # Add special search names to alternate names if not artist.alternateNames: artist.alternateNames = [] if artist.name not in artist.alternateNames: cleanedArtistName = createCleanedName(artist.name) if cleanedArtistName != artist.name.lower().strip() and \ cleanedArtistName not in artist.alternateNames: artist.alternateNames.append(cleanedArtistName) if not artist.bioContext: try: artist.bioContext = wikipedia.summary(artist.name) except: pass self.cache[name] = artist elapsedTime = arrow.utcnow().datetime - startTime printableName = name.encode('ascii', 'ignore').decode('utf-8') self.logger.debug("searchForArtist Elapsed Time [" + str(elapsedTime) + "] Name [" + printableName + "] Found [" + (artist.name if artist else "") + "] MusicBrainzId [" + str(artist.musicBrainzId) + "] " + " iTunesId [" + str(artist.iTunesId) + "] " + " amgId [" + str(artist.amgId) + "]" + " spotifyId [" + str(artist.spotifyId) + "]" .encode('ascii', 'ignore').decode('utf-8') + "]") return artist except: self.logger.exception("Error In searchForArtist") return None def _mergeReleaseLists(self, left, right): if left and not right: return left elif not left and right: return right elif not left and not right: return [] else: mergeReleaseListsStart = arrow.utcnow() mergedReleases = left # Merge the right to the result for rRelease in right: foundRightInMerged = False for mRelease in mergedReleases: if mRelease == rRelease: mRelease.mergeWithRelease(rRelease) foundRightInMerged = True break if not foundRightInMerged: mergedReleases.append(rRelease) mergedReleaseElapsed = arrow.utcnow() - mergeReleaseListsStart self.logger.debug("= MergeReleaseLists left size [" + str(len(left)) + "], right size [" + str( len(right)) + "] Elapsed Time [" + str(mergedReleaseElapsed) + "]") return mergedReleases def searchForArtistReleases(self, artist, artistReleaseImages, titleFilter=None): """ Using the given populated Artist find all releases, with an optional filter :param artist: Artist Artist to find releases for :param artistReleaseImages: list Collection if image signatures for Artist for deduping :param titleFilter: String Optional filter of release Title to only include in results :return: iterable Release Collection of releases found for artist """ if not artist: return None try: startTime = arrow.utcnow().datetime releases = [] if self.iTunesSearcher.IsActive: releases = self._mergeReleaseLists(releases, self.iTunesSearcher.searchForRelease(artist, titleFilter)) if self.mbSearcher.IsActive: releases = self._mergeReleaseLists(releases, self.mbSearcher.searchForRelease(artist, titleFilter)) if self.lastFMSearcher.IsActive and releases: mbIdList = [] if not titleFilter: mbIdList = [x.musicBrainzId for x in releases if x.musicBrainzId] else: for x in releases: if isEqual(x.title, titleFilter): mbIdList.append(x.musicBrainzId) break if mbIdList: releases = self._mergeReleaseLists(releases, self.lastFMSearcher.lookupReleasesForMusicBrainzIdList(artist, mbIdList)) if self.spotifySearcher.IsActive: releases = self._mergeReleaseLists(releases, self.spotifySearcher.searchForRelease(artist, titleFilter)) if releases: self.logger.debug( "searchForArtistReleases Found [" + str(len(releases)) + "] For title [" + str(titleFilter) + "]") for searchForArtistRelease in releases: if searchForArtistRelease.coverUrl: coverImage = ArtistImage(searchForArtistRelease.coverUrl) searchForArtistRelease.images.append(coverImage) # Fetch images with only urls, remove any with neither URL or BLOB if searchForArtistRelease.images: images = [] for image in searchForArtistRelease.images: if not image.image and image.url: image.image = self.getImageForUrl(image.url) if image.image: # Resize to maximum image size and convert to JPEG img = Image.open(io.BytesIO(image.image)).convert('RGB') img.resize(self.imageMaximumSize) b = io.BytesIO() img.save(b, "JPEG") image.image = b.getvalue() # Hash image for deduping image.signature = image.averageHash() if image.signature: images.append(image) if not images: searchForArtistRelease.images = [] else: dedupedImages = [] imageSignatures = artistReleaseImages or [] for image in images: if image.signature not in imageSignatures: imageSignatures.append(image.signature) dedupedImages.append(image) searchForArtistRelease.images = dedupedImages if not searchForArtistRelease.thumbnail: try: firstImageInImages = None for image in searchForArtistRelease.images: firstImageInImages = firstImageInImages or image.image if firstImageInImages: break img = Image.open(io.BytesIO(firstImageInImages)).convert('RGB') img.thumbnail(self.releaseThumbnailSize) b = io.BytesIO() img.save(b, "JPEG") searchForArtistRelease.thumbnail = b.getvalue() except: pass if titleFilter and releases: filteredReleases = [] cleanedTitleFilter = createCleanedName(titleFilter) for searchForArtistRelease in releases: if isEqual(searchForArtistRelease.title, titleFilter) or cleanedTitleFilter in searchForArtistRelease.alternateNames: filteredReleases.append(searchForArtistRelease) releases = filteredReleases elapsedTime = arrow.utcnow().datetime - startTime self.logger.debug("searchForArtistReleases ElapseTime [" + str(elapsedTime) + "]") return releases except: self.logger.exception("Error In searchForArtistReleases") pass return None def getImageForUrl(self, url): if url not in self.imageCache: self.imageCache[url] = self.imageSearcher.getImageBytesForUrl(url) self.logger.debug("= Downloading Image [" + str(url) + "]") return self.imageCache[url]