def update_track_table(self, tag_records): for track_file, tag_record in tag_records.items(): try: old_path = join(self.target_dir, track_file) _, ext = splitext(old_path) audio_file = AudioFile(old_path) metadata = audio_file.get_metadata() formatted_title = metadata[TrackDBCols.TITLE.value] + ext new_path = join(PROCESSED_MUSIC_DIR, formatted_title) metadata[TrackDBCols.FILE_PATH.value] = new_path track = self.session.query(Track).filter_by(id=tag_record.track_id).first() metadata[TrackDBCols.DATE_ADDED.value] = track.date_added metadata[TrackDBCols.COMMENT.value] = audio_file.generate_comment(metadata) for col, val in metadata.items(): setattr(track, col, val) copyfile(old_path, new_path) audio_file = AudioFile(new_path) audio_file.write_tags(metadata) except Exception as e: handle(e, 'Exception occurred processing %s:' % track_file) continue
def create_tag_records(self): factory_name = TAG_RECORD_FACTORIES.get(self.record_type, None) if factory_name is None: raise Exception('Did not find a factory for record type %s' % self.record_type) tag_records = {} for track_file in self.track_files: try: file_path = join(PROCESSING_DIR, track_file) track = self.session.query(Track).filter_by(file_path=file_path).first() cmd_args = dict(ChainMap( { 'record_type': self.record_type, 'file_path': file_path, 'track_id': track.id, 'session': self.session }, self.cmd_overrides )) factory = getattr(tag_record_factories, factory_name)(**cmd_args) tag_record = factory.create_tag_record() if tag_record is None: continue tag_records[track_file] = tag_record except Exception as e: handle(e, 'Exception occurred processing %s:' % track_file) continue return tag_records
def update_artists(session, track_comment_metadata): artists = track_comment_metadata.get(ArtistFields.ARTISTS.value) remixers = track_comment_metadata.get(ArtistFields.REMIXERS.value) all_artists = split_artist_string(artists) + split_artist_string( remixers) artist_updates = {} for a in all_artists: artist_row = session.query(Artist).filter_by(name=a).first() if artist_row is None: try: session.add(Artist(**{'name': a, 'track_count': 1})) session.commit() artist_row = session.query(Artist).filter_by( name=a).first() artist_updates[artist_row.id] = DBUpdateType.INSERT.value except Exception as e: handle(e) artist_updates[a] = DBUpdateType.FAILURE.value continue else: artist_row.track_count += 1 artist_updates[artist_row.id] = DBUpdateType.UPDATE.value return artist_updates
def insert_tracks(tracks): session = database.create_session() try: artist_updates = {} artist_track_updates = {} for new_track_path, track in tracks.items(): # Create new row track_metadata = track.get_metadata() db_row = { k: v for k, v in track_metadata.items() if k in ALL_TRACK_DB_COLS } db_row[TrackDBCols.FILE_PATH.value] = new_track_path title = extract_unformatted_title( db_row[TrackDBCols.TITLE.value]) db_row[TrackDBCols.TITLE.value] = title try: # Persist row to DB session.add(Track(**db_row)) session.commit() except Exception as e: handle(e) session.rollback() continue # Update artists comment = load_comment( track_metadata.get(TrackDBCols.COMMENT.value), '{}') artist_updates_result = DataManager.update_artists( session, comment) artist_updates[title] = artist_updates_result # Add artist tracks track_id = session.query(Track).filter_by( file_path=new_track_path).first().id successful_artist_ids = [ a for a, s in artist_updates_result.items() if s != DBUpdateType.FAILURE.value ] artist_track_updates[title] = DataManager.insert_artist_tracks( session, track_id, successful_artist_ids) DataManager.print_database_operation_statuses( 'Artist updates', artist_updates) DataManager.print_database_operation_statuses( 'Artist track updates', artist_track_updates) except Exception as e: handle(e) session.rollback() raise e finally: session.close()
def execute(self): try: self.create_tag_records() self.session.commit() except Exception as e: self.session.rollback() handle(e) finally: self.session.close()
def execute(self): try: self.cmd_overrides = {'rb_overrides': PostRBPipelineStage.load_rb_tags()} self.create_tag_records() self.session.commit() except Exception as e: self.session.rollback() handle(e) finally: self.session.close()
def execute(self): try: tag_records = self.create_tag_records() self.write_tags(tag_records) self.update_track_table(tag_records) self.session.commit() except Exception as e: self.session.rollback() handle(e) finally: self.session.close()
def sync_fields(): session = database.create_session() try: tracks = session.query(Track).all() DataManager.sync_track_fields(tracks) session.commit() except Exception as e: handle(e, 'Top-level exception occurred while syncing track fields') session.rollback() finally: session.close()
def execute(self): """ Execute this stage of the pipeline. """ try: self.initialize_tracks_in_database() self.create_tag_records() self.session.commit() except Exception as e: self.session.rollback() handle(e) finally: self.session.close()
def guarded_add(self, entity): try: self.add(entity) self.commit() return True except Exception as e: str_entity = str({ c.key: getattr(entity, c.key) for c in inspect(entity).mapper.column_attrs }) handle(e, 'Failed to add %s to DB' % str_entity, print, False) self.rollback() return False
def restore_backup(latest_date): drive = GoogleDrive() backup_files_args = { 'q': '\'%s\' in parents' % drive.backup_dir_id, 'fields': 'nextPageToken, files(id, name)', 'pageSize': 1000 } backup_files = drive.get_files(backup_files_args) get_revisions_args = {'fields': 'revisions(id, modifiedTime)'} drive.get_target_revisions(backup_files, get_revisions_args, latest_date) os.makedirs(drive.restore_dir, exist_ok=True) restored = [] for file in backup_files: file_name = file.get('name') rev = file['revision'] try: media_request = drive.drive.revisions().get_media( fileId=file.get('id'), revisionId=rev.get('id')) byte_descriptor = io.BytesIO() downloader = MediaIoBaseDownload(byte_descriptor, media_request, chunksize=pow(10, 8)) done = False print_and_log('Downloading %s' % file_name, info) while done is False: status, done = downloader.next_chunk(num_retries=5) if done: print_and_log(' Done!', info) else: print_and_log(' %d%%' % int(status.progress() * 100), info) with open('%s/%s' % (drive.restore_dir, file_name), 'wb') as f: f.write(byte_descriptor.getbuffer()) except Exception as e: msg = 'Error occurred while downloading %s' % file_name handle(e, msg, print_and_log) continue restored.append(file) restored_dict = {rev.get('id'): rf.to_json() for rf in restored} with open('%s/backup_progress.json' % get_config_value(['DATA', 'ROOT']), 'w') as f: json.dump(restored_dict, f, indent=2)
def create_transition_match_smms_rows(sesh, compute_missing): db_session = sesh num_to_create = len(pairs_to_create) rows_created = 0 try: for i, (on_deck_id, candidate_id, relative_key) in enumerate(pairs_to_create): try: on_deck_smms = get_smms_value(on_deck_id).get_feature( compute_missing) if on_deck_smms is None: continue match_smms = get_smms_value(candidate_id).get_feature( compute_missing) if match_smms is None: continue mel_score = np.linalg.norm(on_deck_smms - match_smms) match_row = { 'on_deck_id': on_deck_id, 'candidate_id': candidate_id, 'match_factors': { Feature.SMMS.value: mel_score }, 'relative_key': relative_key } if i % 100 == 0: print('%d of %d pairs processed' % (i, num_to_create)) print('%d rows created' % rows_created) print('Cache info: %s\n' % str(get_smms_value.cache_info())) # noinspection PyShadowingNames, PyUnboundLocalVariable db_session = database.recreate_session_contingent(db_session) db_session.guarded_add(TransitionMatchRow(**match_row)) rows_created += 1 except Exception as e: handle(e) continue except Exception as e: handle(e) finally: db_session.close() cache_session.close()
def run_assistant(): """ Accepts user input in an infinite loop until termination. """ assistant = Assistant() print('Assistant is now online.') while True: print('\n$ ', end='') try: assistant.execute(input()) except CommandParsingException as e: handle(e, 'Failed to parse command:', print_error) except Exception as e: handle(e, 'An unexpected exception occurred:', print_error)
def get_target_revisions(self, gd_resources, query_args, latest_date): for gd_resource in gd_resources: try: args = dict(ChainMap(query_args, {'fileId': gd_resource.get('id')})) results = self.drive.revisions().list(**args).execute().get('revisions', []) revs = [{'id': r['id'], 'modifiedTime': datetime.strptime( r['modifiedTime'], TS_FORMAT)} for r in results] revs = sorted([r for r in revs if r['modifiedTime'] <= latest_date], key=lambda x: x['modifiedTime']) gd_resource.set('revision', GDResource(revs[-1])) except Exception as e: msg = 'Error occurred getting revision for %s' % gd_resource.get('name') handle(e, msg, print_and_log) continue
def write_tags(self, tag_records): for track_file, tag_record in tag_records.items(): try: old_path = join(self.source_dir, track_file) new_path = join(self.target_dir, track_file) copyfile(old_path, new_path) audio_file = AudioFile(new_path) audio_file.write_tags({ TrackDBCols.BPM.value: float(tag_record.bpm), TrackDBCols.KEY.value: tag_record.key }) except Exception as e: handle(e, 'Exception occurred processing %s:' % track_file) continue
def create_tag_record(self): if self.session.query(self.TagRecordEntity).filter_by( track_id=self.track_id).first() is not None: raise Exception( '%s already exists in table for %s record types' % (self.track_id, self.TagRecordEntity.__class__.__name__)) try: self.update_row() except Exception as e: handle(e) return self.update_database() return self.tag_record
def find_artist_disparities(): session = database.create_session() try: for track in session.query(Track).all(): # Generate comment track_comment = track.comment if track_comment is None: try: track_model = AudioFile(track.file_path) track_metadata = track_model.get_metadata() track_comment = track_metadata.get( TrackDBCols.COMMENT.value, '{}') except Exception: track_comment = '{}' track_comment = load_comment(track_comment) # Extract artist names from comment artist_str = track_comment.get(ArtistFields.ARTISTS.value, '') remixer_str = track_comment.get(ArtistFields.REMIXERS.value, '') comment_artists = set([ ca for ca in [a.strip() for a in artist_str.split(',')] + [r.strip() for r in remixer_str.split(',')] if not is_empty(ca) ]) # Get artist names in DB artist_tracks = session.query(ArtistTrack).filter_by( track_id=track.id).all() artist_rows = set() for artist_track in artist_tracks: artist_row = session.query(Artist).filter_by( id=artist_track.artist_id).first() artist_rows.add(artist_row.name) # Find diff between comment and DB entries if len(comment_artists.difference(artist_rows)) > 0: print('Artist disparity for track %s' % track.title) print('Comment artist entry: %s' % str(comment_artists)) print('DB artist entries: %s' % str(artist_rows)) print('-------\n') except Exception as e: handle(e, 'Top-level exception occurred while syncing track fields') session.rollback() finally: session.close()
def compute_spectrograms(chunk, transmitter): stage_tracks(chunk) smms_values = [] for track in chunk: try: print('Computing spectrograms for track %s' % str(track.id)) smms = SegmentedMeanMelSpectrogram(track) smms.compute() smms_values.append(smms) except Exception as e: handle(e) continue print('Process %d thread done' % getpid()) transmitter.send(smms_values)
def get_transition_matches(self, track, sort_results=True): """ Gets transition matches for the given track. """ try: db_row = track if isinstance(track, Track) else self.session.query(Track).filter_by(title=track).first() title_mismatch_message = '' if db_row is None: db_row = self.session.query(Track).filter(Track.file_path.like('%{}%'.format(track))).first() if db_row is not None: path = db_row.file_path title_mismatch_message = '\n\nWarning: found %s in path %s (but not title)' % (track, path) else: raise Exception('%s not found in database.' % track) # Validate BPM and Camelot code exist and are well-formatted title = db_row.title bpm = float(db_row.bpm) camelot_code = db_row.camelot_code if bpm is None: raise Exception('Did not find a BPM for %s.' % title) if camelot_code is None: raise Exception('Did not find a Camelot code for %s.' % title) camelot_map_entry = self.camelot_map[camelot_code][bpm] cur_track_md = [md for md in camelot_map_entry if md.get(TrackDBCols.TITLE) == title] if len(cur_track_md) == 0: raise Exception('%s metadata not found in Camelot map.' % title) cur_track_md = cur_track_md[0] # Generate and rank matches harmonic_codes = TransitionMatchFinder._get_all_harmonic_codes(cur_track_md) same_key, higher_key, lower_key = self._get_matches_for_code(harmonic_codes, cur_track_md, sort_results) return (same_key, higher_key, lower_key), title_mismatch_message except Exception as e: handle(e)
def update_artist_counts(session, artist_ids_to_update): update_statuses = {} for aid, update_count in artist_ids_to_update.items(): try: artist = session.query(Artist).filter_by(id=aid).first() artist.track_count -= update_count if artist.track_count == 0: session.delete(artist) update_statuses[aid] = DBUpdateType.DELETE.value else: update_statuses[aid] = DBUpdateType.UPDATE.value except Exception as e: handle(e) update_statuses[aid] = DBUpdateType.FAILURE.value continue return update_statuses
def insert_artist_tracks(session, track_id, artist_ids): artist_track_updates = {} for artist_id in artist_ids: try: session.add( ArtistTrack(**{ 'track_id': track_id, 'artist_id': artist_id })) session.commit() artist_track_row = session.query(ArtistTrack).filter_by( artist_id=artist_id).first() artist_track_updates[ artist_track_row.id] = DBUpdateType.INSERT.value except Exception as e: handle(e) artist_track_updates[artist_id] = DBUpdateType.FAILURE.value continue return artist_track_updates
def run_pipeline(step_args): """ Runs the track ingestion pipeline. """ print( 'Running ingestion pipeline. Type \'next\' to proceed to the next step.' ) n = 0 to_run = set(STEPS.keys() if len(step_args) == 0 else step_args) while True: print('\n$ ', end='') try: cmd = input().lower() if cmd not in PIPELINE_CMDS: print( 'Type \'next\' to proceed to the next step or \'cancel\' to abort.' ) continue if cmd == 'cancel': print('Aborting.') break if cmd == 'next': if n in to_run: (step, arg) = STEPS[n] step().execute() if arg is None else step(arg).execute() else: print('Skipping step %d' % n) n += 1 if n == NUM_STEPS: print('Pipeline ran successfully.') break except Exception as e: handle(e, 'An unexpected exception occurred:', print_error) break
def delete_artist_tracks(session, track_ids): deletion_statuses = {} artist_ids_to_update = defaultdict(int) for track_id in track_ids: artist_tracks = session.query(ArtistTrack).filter_by( track_id=track_id).all() for at in artist_tracks: artist_id = at.artist_id try: session.delete(at) artist_ids_to_update[artist_id] += 1 deletion_statuses[str( (track_id, artist_id))] = DBUpdateType.DELETE.value except Exception as e: handle(e) deletion_statuses[str( (track_id, artist_id))] = DBUpdateType.FAILURE.value continue return deletion_statuses, artist_ids_to_update
def sync_track_tags(tracks): for track in tracks: af = AudioFile(track.file_path) track_pk = track.get_id_title_identifier() try: comment = load_comment(track.comment, '{}') tags_to_update = {} for field in ID3_COMMENT_FIELDS: id3_tag = METADATA_KEY_TO_ID3.get(field) col_value = normalize_tag_text(getattr(track, field, None)) comment_value = normalize_tag_text(comment.get( field, None)) old_value = af.get_tag(id3_tag) new_value = col_value or comment_value if str(new_value) != str(old_value): tags_to_update[field] = new_value if len(tags_to_update) > 0: af.write_tags(tags_to_update) progress_msg = 'Tags saved for %s' % track_pk banner = get_banner(progress_msg) print('\n%s' % banner) print(progress_msg) print('%s' % banner) print('\n'.join([ '%s: %s' % (k, v) for k, v in tags_to_update.items() ])) except Exception as e: handle(e, 'Unexpected exception syncing tags for %s' % track_pk) continue
def ingest_tracks(input_dir, target_dir=PROCESSED_MUSIC_DIR): """ Ingest new tracks - extract tags, format fields, and create track table entries. """ session = database.create_session() try: input_files = get_audio_files(input_dir) tracks_to_save = {} for f in input_files: old_path = join(input_dir, f) # Load track and read ID3 tags try: track = AudioFile(old_path) except Exception as e: handle(e, 'Couldn\'t read ID3 tags for %s' % old_path) continue # Verify requisite ID3 tags exist id3_data = track.get_tags() if not REQUIRED_ID3_TAGS.issubset(set(id3_data.keys())): print( 'Can\'t ingest %s due to missing requisite ID3 tags' % old_path) continue # Copy to target directory new_path = join(target_dir, f) try: print('\nCopying:\t%s\nto:\t\t%s' % (old_path, new_path)) copyfile(old_path, new_path) except Exception as e: handle(e, 'Couldn\'t copy %s to target directory' % new_path) continue tracks_to_save[new_path] = track # Update database DataManager.insert_tracks(tracks_to_save) except Exception as e: handle(e) finally: session.close()
def delete_tracks(track_ids): session = database.create_session() try: # Delete entries from artist_track tables first deletion_statuses, artist_ids_to_update = DataManager.delete_artist_tracks( session, track_ids) DataManager.print_database_operation_statuses( 'Artist track deletion statuses', deletion_statuses) # Then, update artist track count column update_statuses = DataManager.update_artist_counts( session, artist_ids_to_update) DataManager.print_database_operation_statuses( 'Artist track count update statuses', update_statuses) # Then, remove references from the ingestion pipeline tables tag_record_deletion_statuses = defaultdict(lambda: {}) for track_id in track_ids: try: initial_tr = session.query(InitialTagRecord).filter_by( track_id=track_id).first() session.delete(initial_tr) tag_record_deletion_statuses['Initial Record'][ track_id] = DBUpdateType.DELETE.value except Exception as e: handle(e) tag_record_deletion_statuses['Initial Record'][ track_id] = DBUpdateType.FAILURE.value continue try: post_mik_tr = session.query(PostMIKTagRecord).filter_by( track_id=track_id).first() session.delete(post_mik_tr) tag_record_deletion_statuses['Post-MIK Record'][ track_id] = DBUpdateType.DELETE.value except Exception as e: handle(e) tag_record_deletion_statuses['Post-MIK Record'][ track_id] = DBUpdateType.FAILURE.value continue try: post_rb_tr = session.query( PostRekordboxTagRecord).filter_by( track_id=track_id).first() session.delete(post_rb_tr) tag_record_deletion_statuses['Post-RB Record'][ track_id] = DBUpdateType.DELETE.value except Exception as e: handle(e) tag_record_deletion_statuses['Post-RB Record'][ track_id] = DBUpdateType.FAILURE.value continue try: final_tr = session.query(FinalTagRecord).filter_by( track_id=track_id).first() session.delete(final_tr) tag_record_deletion_statuses['Final Record'][ track_id] = DBUpdateType.DELETE.value except Exception as e: handle(e) tag_record_deletion_statuses['Final Record'][ track_id] = DBUpdateType.FAILURE.value continue DataManager.print_database_operation_statuses( 'Tag record update statuses', tag_record_deletion_statuses) # Finally, delete the tracks themselves track_deletion_statuses = {} for track_id in track_ids: try: track = session.query(Track).filter_by(id=track_id).first() session.delete(track) track_deletion_statuses[ track_id] = DBUpdateType.DELETE.value except Exception as e: handle(e) track_deletion_statuses[ track_id] = DBUpdateType.FAILURE.value continue DataManager.print_database_operation_statuses( 'Track deletion statuses', track_deletion_statuses) print('Committing session') session.commit() except Exception as e: handle(e) print('Session not committed') finally: session.close()
def sync_track_fields(tracks): sync_statuses = {} update_msg = 'Updating %s field \'%s\' using %s value: %s -> %s' for track in tracks: af = AudioFile(track.file_path) track_pk = track.get_id_title_identifier() log_buffer = [] try: comment = load_comment(track.comment, '{}') tags_to_update = {} for field in COMMENT_FIELDS: col_value = normalize_tag_text(getattr(track, field, None)) comment_value = normalize_tag_text(comment.get( field, None)) tag_value = af.get_tag(METADATA_KEY_TO_ID3.get( field, None)) if (col_value is None and comment_value is None) and tag_value is not None: setattr(track, field, tag_value) col_value = tag_value if field == TrackDBCols.BPM.value or field == TrackDBCols.ENERGY.value: col_value = None if col_value is None else int( col_value) comment_value = None if comment_value is None else int( comment_value) # Skip any fields without values in either DB or comment if col_value is None and comment_value is None: log_buffer.append('%s is null in DB and comment' % field) continue # Dedupe titles if field == TrackDBCols.TITLE.value: updated_col_title = dedupe_title(col_value) updated_comment_title = dedupe_title(comment_value) title = updated_col_title or updated_comment_title if title != col_value or title != comment_value: log_buffer.append(update_msg % ('comment', field, 'deduped', str(comment_value), str(title))) log_buffer.append(update_msg % ('column', field, 'deduped', str(col_value), str(title))) comment[field] = title setattr(track, field, title) tags_to_update[field] = title continue if col_value == comment_value: continue # Prefer column value over comment value if col_value is not None: log_buffer.append(update_msg % ('comment', field, 'column', str(comment_value), str(col_value))) comment[field] = col_value tags_to_update[field] = col_value elif col_value is None and comment_value is not None: log_buffer.append(update_msg % ('column', field, 'comment', str(None), str(comment_value))) setattr(track, field, comment_value) tags_to_update[field] = comment_value if len(log_buffer) > 0: progress_msg = 'Sync log for %s' % track_pk banner = get_banner(progress_msg) print('\n%s' % banner) print(progress_msg) print('%s' % banner) print('\n'.join(log_buffer)) tags_to_update = { k: v for k, v in tags_to_update.items() if k in ID3_COMMENT_FIELDS } af.write_tags(tags_to_update) track.comment = str(comment) sync_statuses[track.id] = DBUpdateType.UPDATE.value else: sync_statuses[track.id] = DBUpdateType.NOOP.value except Exception as e: handle(e, 'Unexpected exception syncing fields for %s' % track_pk) sync_statuses[track.id] = DBUpdateType.FAILURE.value continue return sync_statuses
def run(track_ids): try: if len(track_ids) > 0: tracks_to_process = [ track for track in tracks if track.id in track_ids ] else: fv_track_ids = set( [fv.track_id for fv in session.query(FeatureValue).all()]) tracks_to_process = [ track for track in tracks if track.id not in fv_track_ids ] print('Computing SMMS feature for %d tracks\n' % len(tracks_to_process)) chunks = np.array_split(tracks_to_process, NUM_CORES) workers = [] smms_aggregator = [] for chunk in chunks: receiver, transmitter = Pipe() smms_aggregator.append(receiver) worker = Process(target=compute_spectrograms, args=( chunk, transmitter, )) worker.daemon = True workers.append(worker) worker.start() smms_results = [ smms for result in [result.recv() for result in smms_aggregator] for smms in result ] for smms in smms_results: track_id = smms.track.id print('Saving feature for track %s to DB' % str(track_id)) try: feature_value = smms.get_feature() if feature_value is None: continue fv_row = { 'track_id': track_id, 'features': { smms.feature_name: smms.preprocess(feature_value) } } session.guarded_add(FeatureValue(**fv_row)) except Exception as e: handle(e) continue except Exception as e: handle(e) session.rollback() return finally: session.close()