def __init__(self, record_type, file_path, track_id, session): self.file_path = file_path self.track_id = track_id self.session = session self.tag_record = None self.TagRecordEntity = getattr(tag_records, record_type) self.audio_file = AudioFile(self.file_path) self.row = self.create_row()
def update_track_table(self, tag_records): for track_file, tag_record in tag_records.items(): try: old_path = join(self.target_dir, track_file) _, ext = splitext(old_path) audio_file = AudioFile(old_path) metadata = audio_file.get_metadata() formatted_title = metadata[TrackDBCols.TITLE.value] + ext new_path = join(PROCESSED_MUSIC_DIR, formatted_title) metadata[TrackDBCols.FILE_PATH.value] = new_path track = self.session.query(Track).filter_by(id=tag_record.track_id).first() metadata[TrackDBCols.DATE_ADDED.value] = track.date_added metadata[TrackDBCols.COMMENT.value] = audio_file.generate_comment(metadata) for col, val in metadata.items(): setattr(track, col, val) copyfile(old_path, new_path) audio_file = AudioFile(new_path) audio_file.write_tags(metadata) except Exception as e: handle(e, 'Exception occurred processing %s:' % track_file) continue
def write_tags(self, tag_records): for track_file, tag_record in tag_records.items(): try: old_path = join(self.source_dir, track_file) new_path = join(self.target_dir, track_file) copyfile(old_path, new_path) audio_file = AudioFile(new_path) audio_file.write_tags({ TrackDBCols.BPM.value: float(tag_record.bpm), TrackDBCols.KEY.value: tag_record.key }) except Exception as e: handle(e, 'Exception occurred processing %s:' % track_file) continue
def ingest_tracks(input_dir, target_dir=PROCESSED_MUSIC_DIR): """ Ingest new tracks - extract tags, format fields, and create track table entries. """ session = database.create_session() try: input_files = get_audio_files(input_dir) tracks_to_save = {} for f in input_files: old_path = join(input_dir, f) # Load track and read ID3 tags try: track = AudioFile(old_path) except Exception as e: handle(e, 'Couldn\'t read ID3 tags for %s' % old_path) continue # Verify requisite ID3 tags exist id3_data = track.get_tags() if not REQUIRED_ID3_TAGS.issubset(set(id3_data.keys())): print( 'Can\'t ingest %s due to missing requisite ID3 tags' % old_path) continue # Copy to target directory new_path = join(target_dir, f) try: print('\nCopying:\t%s\nto:\t\t%s' % (old_path, new_path)) copyfile(old_path, new_path) except Exception as e: handle(e, 'Couldn\'t copy %s to target directory' % new_path) continue tracks_to_save[new_path] = track # Update database DataManager.insert_tracks(tracks_to_save) except Exception as e: handle(e) finally: session.close()
def find_artist_disparities(): session = database.create_session() try: for track in session.query(Track).all(): # Generate comment track_comment = track.comment if track_comment is None: try: track_model = AudioFile(track.file_path) track_metadata = track_model.get_metadata() track_comment = track_metadata.get( TrackDBCols.COMMENT.value, '{}') except Exception: track_comment = '{}' track_comment = load_comment(track_comment) # Extract artist names from comment artist_str = track_comment.get(ArtistFields.ARTISTS.value, '') remixer_str = track_comment.get(ArtistFields.REMIXERS.value, '') comment_artists = set([ ca for ca in [a.strip() for a in artist_str.split(',')] + [r.strip() for r in remixer_str.split(',')] if not is_empty(ca) ]) # Get artist names in DB artist_tracks = session.query(ArtistTrack).filter_by( track_id=track.id).all() artist_rows = set() for artist_track in artist_tracks: artist_row = session.query(Artist).filter_by( id=artist_track.artist_id).first() artist_rows.add(artist_row.name) # Find diff between comment and DB entries if len(comment_artists.difference(artist_rows)) > 0: print('Artist disparity for track %s' % track.title) print('Comment artist entry: %s' % str(comment_artists)) print('DB artist entries: %s' % str(artist_rows)) print('-------\n') except Exception as e: handle(e, 'Top-level exception occurred while syncing track fields') session.rollback() finally: session.close()
def sync_track_tags(tracks): for track in tracks: af = AudioFile(track.file_path) track_pk = track.get_id_title_identifier() try: comment = load_comment(track.comment, '{}') tags_to_update = {} for field in ID3_COMMENT_FIELDS: id3_tag = METADATA_KEY_TO_ID3.get(field) col_value = normalize_tag_text(getattr(track, field, None)) comment_value = normalize_tag_text(comment.get( field, None)) old_value = af.get_tag(id3_tag) new_value = col_value or comment_value if str(new_value) != str(old_value): tags_to_update[field] = new_value if len(tags_to_update) > 0: af.write_tags(tags_to_update) progress_msg = 'Tags saved for %s' % track_pk banner = get_banner(progress_msg) print('\n%s' % banner) print(progress_msg) print('%s' % banner) print('\n'.join([ '%s: %s' % (k, v) for k, v in tags_to_update.items() ])) except Exception as e: handle(e, 'Unexpected exception syncing tags for %s' % track_pk) continue
class TagRecordFactory: """ Create an ID3 tag record for use in the ingestion pipeline. """ def __init__(self, record_type, file_path, track_id, session): self.file_path = file_path self.track_id = track_id self.session = session self.tag_record = None self.TagRecordEntity = getattr(tag_records, record_type) self.audio_file = AudioFile(self.file_path) self.row = self.create_row() def create_tag_record(self): if self.session.query(self.TagRecordEntity).filter_by( track_id=self.track_id).first() is not None: raise Exception( '%s already exists in table for %s record types' % (self.track_id, self.TagRecordEntity.__class__.__name__)) try: self.update_row() except Exception as e: handle(e) return self.update_database() return self.tag_record def create_row(self): row = {k.name.lower(): self.audio_file.get_tag(k) for k in TAG_COLUMNS} row['track_id'] = self.track_id return row def update_row(self): pass def update_database(self): self.tag_record = self.TagRecordEntity(**self.row) self.session.guarded_add(self.tag_record)
def generate_camelot_map(tracks): """ Generate and return map of camelot code -> BPM -> set of tracks, along with collection metadata. """ collection_md = { CollectionStat.NEWEST: -1, CollectionStat.OLDEST: float('inf'), CollectionStat.SMMS_MAX: get_max_smms() } label_counts = defaultdict(int) artist_counts = defaultdict(int) camelot_map = defaultdict(lambda: defaultdict(list)) track_mds = [] for track in tracks: file_path = track.file_path comment = track.comment track_comment = load_comment(comment or AudioFile(file_path).get_metadata().get(TrackDBCols.COMMENT.value)) # Increment artist/remixer counts artists = split_artist_string(track_comment.get(ArtistFields.ARTISTS.value, '')) remixers = split_artist_string(track_comment.get(ArtistFields.REMIXERS.value, '')) for artist in artists + remixers: artist_counts[artist] += 1 # Increment label count if not is_empty(track.label): label_counts[track.label] += 1 # Create track metadata dict and add to index track_mds.append({k: v for k, v in { TrackDBCols.ID: track.id, TrackDBCols.FILE_PATH: file_path, TrackDBCols.TITLE: track.title, TrackDBCols.BPM: get_or_default(track, 'bpm', float_transform), TrackDBCols.KEY: track.key, TrackDBCols.CAMELOT_CODE: track.camelot_code, TrackDBCols.LABEL: track.label, TrackDBCols.GENRE: track.genre, TrackDBCols.ENERGY: get_or_default(track, 'energy', int_transform), TrackDBCols.DATE_ADDED: get_or_default(track, 'date_added', datetime_transform), ArtistFields.ARTISTS: {artist: 0 for artist in artists}, ArtistFields.REMIXERS: {remixer: 0 for remixer in remixers} }.items() if not is_empty(v)}) # Add sum of counts to collection metadata counter collection_md[CollectionStat.LABEL_COUNTS] = sum(label_counts.values()) collection_md[CollectionStat.ARTIST_COUNTS] = sum(artist_counts.values()) for track_md in track_mds: # Update artist, remixer, and label counts for track if ArtistFields.ARTISTS in track_md: track_md[ArtistFields.ARTISTS] = generate_artist_counts(artist_counts, track_md[ArtistFields.ARTISTS]) if ArtistFields.REMIXERS in track_md: track_md[ArtistFields.REMIXERS] = generate_artist_counts(artist_counts, track_md[ArtistFields.REMIXERS]) if TrackDBCols.LABEL in track_md: label = track_md[TrackDBCols.LABEL] track_md[TrackDBCols.LABEL] = (label, label_counts[label]) # Update global timestamp extrema if TrackDBCols.DATE_ADDED in track_md: date_added = track_md[TrackDBCols.DATE_ADDED] if date_added > collection_md[CollectionStat.NEWEST]: collection_md[CollectionStat.NEWEST] = date_added if date_added < collection_md[CollectionStat.OLDEST]: collection_md[CollectionStat.OLDEST] = date_added # Add track metadata to Camelot map camelot_code = track_md[TrackDBCols.CAMELOT_CODE] bpm = track_md[TrackDBCols.BPM] camelot_map[camelot_code][bpm].append(track_md) time_range = collection_md[CollectionStat.NEWEST] - collection_md[CollectionStat.OLDEST] collection_md[CollectionStat.TIME_RANGE] = time_range return camelot_map, collection_md
def sync_track_fields(tracks): sync_statuses = {} update_msg = 'Updating %s field \'%s\' using %s value: %s -> %s' for track in tracks: af = AudioFile(track.file_path) track_pk = track.get_id_title_identifier() log_buffer = [] try: comment = load_comment(track.comment, '{}') tags_to_update = {} for field in COMMENT_FIELDS: col_value = normalize_tag_text(getattr(track, field, None)) comment_value = normalize_tag_text(comment.get( field, None)) tag_value = af.get_tag(METADATA_KEY_TO_ID3.get( field, None)) if (col_value is None and comment_value is None) and tag_value is not None: setattr(track, field, tag_value) col_value = tag_value if field == TrackDBCols.BPM.value or field == TrackDBCols.ENERGY.value: col_value = None if col_value is None else int( col_value) comment_value = None if comment_value is None else int( comment_value) # Skip any fields without values in either DB or comment if col_value is None and comment_value is None: log_buffer.append('%s is null in DB and comment' % field) continue # Dedupe titles if field == TrackDBCols.TITLE.value: updated_col_title = dedupe_title(col_value) updated_comment_title = dedupe_title(comment_value) title = updated_col_title or updated_comment_title if title != col_value or title != comment_value: log_buffer.append(update_msg % ('comment', field, 'deduped', str(comment_value), str(title))) log_buffer.append(update_msg % ('column', field, 'deduped', str(col_value), str(title))) comment[field] = title setattr(track, field, title) tags_to_update[field] = title continue if col_value == comment_value: continue # Prefer column value over comment value if col_value is not None: log_buffer.append(update_msg % ('comment', field, 'column', str(comment_value), str(col_value))) comment[field] = col_value tags_to_update[field] = col_value elif col_value is None and comment_value is not None: log_buffer.append(update_msg % ('column', field, 'comment', str(None), str(comment_value))) setattr(track, field, comment_value) tags_to_update[field] = comment_value if len(log_buffer) > 0: progress_msg = 'Sync log for %s' % track_pk banner = get_banner(progress_msg) print('\n%s' % banner) print(progress_msg) print('%s' % banner) print('\n'.join(log_buffer)) tags_to_update = { k: v for k, v in tags_to_update.items() if k in ID3_COMMENT_FIELDS } af.write_tags(tags_to_update) track.comment = str(comment) sync_statuses[track.id] = DBUpdateType.UPDATE.value else: sync_statuses[track.id] = DBUpdateType.NOOP.value except Exception as e: handle(e, 'Unexpected exception syncing fields for %s' % track_pk) sync_statuses[track.id] = DBUpdateType.FAILURE.value continue return sync_statuses