def __init__(self, record_type, file_path, track_id, session):
     self.file_path = file_path
     self.track_id = track_id
     self.session = session
     self.tag_record = None
     self.TagRecordEntity = getattr(tag_records, record_type)
     self.audio_file = AudioFile(self.file_path)
     self.row = self.create_row()
Exemple #2
0
    def update_track_table(self, tag_records):
        for track_file, tag_record in tag_records.items():
            try:
                old_path = join(self.target_dir, track_file)
                _, ext = splitext(old_path)

                audio_file = AudioFile(old_path)
                metadata = audio_file.get_metadata()

                formatted_title = metadata[TrackDBCols.TITLE.value] + ext
                new_path = join(PROCESSED_MUSIC_DIR, formatted_title)
                metadata[TrackDBCols.FILE_PATH.value] = new_path

                track = self.session.query(Track).filter_by(id=tag_record.track_id).first()
                metadata[TrackDBCols.DATE_ADDED.value] = track.date_added

                metadata[TrackDBCols.COMMENT.value] = audio_file.generate_comment(metadata)
                for col, val in metadata.items():
                    setattr(track, col, val)

                copyfile(old_path, new_path)
                audio_file = AudioFile(new_path)
                audio_file.write_tags(metadata)

            except Exception as e:
                handle(e, 'Exception occurred processing %s:' % track_file)
                continue
Exemple #3
0
    def write_tags(self, tag_records):
        for track_file, tag_record in tag_records.items():
            try:
                old_path = join(self.source_dir, track_file)
                new_path = join(self.target_dir, track_file)
                copyfile(old_path, new_path)

                audio_file = AudioFile(new_path)
                audio_file.write_tags({
                    TrackDBCols.BPM.value: float(tag_record.bpm),
                    TrackDBCols.KEY.value: tag_record.key
                })

            except Exception as e:
                handle(e, 'Exception occurred processing %s:' % track_file)
                continue
Exemple #4
0
    def ingest_tracks(input_dir, target_dir=PROCESSED_MUSIC_DIR):
        """ Ingest new tracks - extract tags, format fields, and create track table entries. """

        session = database.create_session()

        try:
            input_files = get_audio_files(input_dir)
            tracks_to_save = {}

            for f in input_files:
                old_path = join(input_dir, f)

                # Load track and read ID3 tags
                try:
                    track = AudioFile(old_path)
                except Exception as e:
                    handle(e, 'Couldn\'t read ID3 tags for %s' % old_path)
                    continue

                # Verify requisite ID3 tags exist
                id3_data = track.get_tags()
                if not REQUIRED_ID3_TAGS.issubset(set(id3_data.keys())):
                    print(
                        'Can\'t ingest %s due to missing requisite ID3 tags' %
                        old_path)
                    continue

                # Copy to target directory
                new_path = join(target_dir, f)
                try:
                    print('\nCopying:\t%s\nto:\t\t%s' % (old_path, new_path))
                    copyfile(old_path, new_path)
                except Exception as e:
                    handle(e,
                           'Couldn\'t copy %s to target directory' % new_path)
                    continue

                tracks_to_save[new_path] = track

            # Update database
            DataManager.insert_tracks(tracks_to_save)

        except Exception as e:
            handle(e)

        finally:
            session.close()
def find_artist_disparities():
    session = database.create_session()

    try:
        for track in session.query(Track).all():
            # Generate comment
            track_comment = track.comment
            if track_comment is None:
                try:
                    track_model = AudioFile(track.file_path)
                    track_metadata = track_model.get_metadata()
                    track_comment = track_metadata.get(
                        TrackDBCols.COMMENT.value, '{}')
                except Exception:
                    track_comment = '{}'
            track_comment = load_comment(track_comment)

            # Extract artist names from comment
            artist_str = track_comment.get(ArtistFields.ARTISTS.value, '')
            remixer_str = track_comment.get(ArtistFields.REMIXERS.value, '')
            comment_artists = set([
                ca for ca in [a.strip() for a in artist_str.split(',')] +
                [r.strip() for r in remixer_str.split(',')] if not is_empty(ca)
            ])

            # Get artist names in DB
            artist_tracks = session.query(ArtistTrack).filter_by(
                track_id=track.id).all()
            artist_rows = set()
            for artist_track in artist_tracks:
                artist_row = session.query(Artist).filter_by(
                    id=artist_track.artist_id).first()
                artist_rows.add(artist_row.name)

            # Find diff between comment and DB entries
            if len(comment_artists.difference(artist_rows)) > 0:
                print('Artist disparity for track %s' % track.title)
                print('Comment artist entry: %s' % str(comment_artists))
                print('DB artist entries: %s' % str(artist_rows))
                print('-------\n')

    except Exception as e:
        handle(e, 'Top-level exception occurred while syncing track fields')
        session.rollback()
    finally:
        session.close()
Exemple #6
0
    def sync_track_tags(tracks):
        for track in tracks:
            af = AudioFile(track.file_path)
            track_pk = track.get_id_title_identifier()

            try:
                comment = load_comment(track.comment, '{}')
                tags_to_update = {}

                for field in ID3_COMMENT_FIELDS:
                    id3_tag = METADATA_KEY_TO_ID3.get(field)

                    col_value = normalize_tag_text(getattr(track, field, None))
                    comment_value = normalize_tag_text(comment.get(
                        field, None))
                    old_value = af.get_tag(id3_tag)
                    new_value = col_value or comment_value

                    if str(new_value) != str(old_value):
                        tags_to_update[field] = new_value

                if len(tags_to_update) > 0:
                    af.write_tags(tags_to_update)

                    progress_msg = 'Tags saved for %s' % track_pk
                    banner = get_banner(progress_msg)
                    print('\n%s' % banner)
                    print(progress_msg)
                    print('%s' % banner)
                    print('\n'.join([
                        '%s: %s' % (k, v) for k, v in tags_to_update.items()
                    ]))

            except Exception as e:
                handle(e,
                       'Unexpected exception syncing tags for %s' % track_pk)
                continue
class TagRecordFactory:
    """ Create an ID3 tag record for use in the ingestion pipeline. """
    def __init__(self, record_type, file_path, track_id, session):
        self.file_path = file_path
        self.track_id = track_id
        self.session = session
        self.tag_record = None
        self.TagRecordEntity = getattr(tag_records, record_type)
        self.audio_file = AudioFile(self.file_path)
        self.row = self.create_row()

    def create_tag_record(self):
        if self.session.query(self.TagRecordEntity).filter_by(
                track_id=self.track_id).first() is not None:
            raise Exception(
                '%s already exists in table for %s record types' %
                (self.track_id, self.TagRecordEntity.__class__.__name__))

        try:
            self.update_row()
        except Exception as e:
            handle(e)
            return

        self.update_database()

        return self.tag_record

    def create_row(self):
        row = {k.name.lower(): self.audio_file.get_tag(k) for k in TAG_COLUMNS}
        row['track_id'] = self.track_id
        return row

    def update_row(self):
        pass

    def update_database(self):
        self.tag_record = self.TagRecordEntity(**self.row)
        self.session.guarded_add(self.tag_record)
def generate_camelot_map(tracks):
    """ Generate and return map of camelot code -> BPM -> set of tracks, along with collection metadata. """

    collection_md = {
        CollectionStat.NEWEST: -1,
        CollectionStat.OLDEST: float('inf'),
        CollectionStat.SMMS_MAX: get_max_smms()
    }
    label_counts = defaultdict(int)
    artist_counts = defaultdict(int)
    camelot_map = defaultdict(lambda: defaultdict(list))

    track_mds = []
    for track in tracks:
        file_path = track.file_path
        comment = track.comment
        track_comment = load_comment(comment or AudioFile(file_path).get_metadata().get(TrackDBCols.COMMENT.value))

        # Increment artist/remixer counts
        artists = split_artist_string(track_comment.get(ArtistFields.ARTISTS.value, ''))
        remixers = split_artist_string(track_comment.get(ArtistFields.REMIXERS.value, ''))
        for artist in artists + remixers:
            artist_counts[artist] += 1

        # Increment label count
        if not is_empty(track.label):
            label_counts[track.label] += 1

        # Create track metadata dict and add to index
        track_mds.append({k: v for k, v in {
            TrackDBCols.ID: track.id,
            TrackDBCols.FILE_PATH: file_path,
            TrackDBCols.TITLE: track.title,
            TrackDBCols.BPM: get_or_default(track, 'bpm', float_transform),
            TrackDBCols.KEY: track.key,
            TrackDBCols.CAMELOT_CODE: track.camelot_code,
            TrackDBCols.LABEL: track.label,
            TrackDBCols.GENRE: track.genre,
            TrackDBCols.ENERGY: get_or_default(track, 'energy', int_transform),
            TrackDBCols.DATE_ADDED: get_or_default(track, 'date_added', datetime_transform),
            ArtistFields.ARTISTS: {artist: 0 for artist in artists},
            ArtistFields.REMIXERS: {remixer: 0 for remixer in remixers}
        }.items() if not is_empty(v)})

    # Add sum of counts to collection metadata counter
    collection_md[CollectionStat.LABEL_COUNTS] = sum(label_counts.values())
    collection_md[CollectionStat.ARTIST_COUNTS] = sum(artist_counts.values())

    for track_md in track_mds:
        # Update artist, remixer, and label counts for track
        if ArtistFields.ARTISTS in track_md:
            track_md[ArtistFields.ARTISTS] = generate_artist_counts(artist_counts, track_md[ArtistFields.ARTISTS])
        if ArtistFields.REMIXERS in track_md:
            track_md[ArtistFields.REMIXERS] = generate_artist_counts(artist_counts, track_md[ArtistFields.REMIXERS])
        if TrackDBCols.LABEL in track_md:
            label = track_md[TrackDBCols.LABEL]
            track_md[TrackDBCols.LABEL] = (label, label_counts[label])

        # Update global timestamp extrema
        if TrackDBCols.DATE_ADDED in track_md:
            date_added = track_md[TrackDBCols.DATE_ADDED]
            if date_added > collection_md[CollectionStat.NEWEST]:
                collection_md[CollectionStat.NEWEST] = date_added
            if date_added < collection_md[CollectionStat.OLDEST]:
                collection_md[CollectionStat.OLDEST] = date_added

        # Add track metadata to Camelot map
        camelot_code = track_md[TrackDBCols.CAMELOT_CODE]
        bpm = track_md[TrackDBCols.BPM]
        camelot_map[camelot_code][bpm].append(track_md)

    time_range = collection_md[CollectionStat.NEWEST] - collection_md[CollectionStat.OLDEST]
    collection_md[CollectionStat.TIME_RANGE] = time_range

    return camelot_map, collection_md
Exemple #9
0
    def sync_track_fields(tracks):
        sync_statuses = {}
        update_msg = 'Updating %s field \'%s\' using %s value: %s -> %s'

        for track in tracks:
            af = AudioFile(track.file_path)
            track_pk = track.get_id_title_identifier()
            log_buffer = []

            try:
                comment = load_comment(track.comment, '{}')
                tags_to_update = {}

                for field in COMMENT_FIELDS:
                    col_value = normalize_tag_text(getattr(track, field, None))
                    comment_value = normalize_tag_text(comment.get(
                        field, None))
                    tag_value = af.get_tag(METADATA_KEY_TO_ID3.get(
                        field, None))

                    if (col_value is None and
                            comment_value is None) and tag_value is not None:
                        setattr(track, field, tag_value)
                        col_value = tag_value

                    if field == TrackDBCols.BPM.value or field == TrackDBCols.ENERGY.value:
                        col_value = None if col_value is None else int(
                            col_value)
                        comment_value = None if comment_value is None else int(
                            comment_value)

                    # Skip any fields without values in either DB or comment
                    if col_value is None and comment_value is None:
                        log_buffer.append('%s is null in DB and comment' %
                                          field)
                        continue

                    # Dedupe titles
                    if field == TrackDBCols.TITLE.value:
                        updated_col_title = dedupe_title(col_value)
                        updated_comment_title = dedupe_title(comment_value)
                        title = updated_col_title or updated_comment_title

                        if title != col_value or title != comment_value:
                            log_buffer.append(update_msg %
                                              ('comment', field, 'deduped',
                                               str(comment_value), str(title)))
                            log_buffer.append(update_msg %
                                              ('column', field, 'deduped',
                                               str(col_value), str(title)))

                            comment[field] = title
                            setattr(track, field, title)
                            tags_to_update[field] = title

                            continue

                    if col_value == comment_value:
                        continue

                    # Prefer column value over comment value
                    if col_value is not None:
                        log_buffer.append(update_msg %
                                          ('comment', field, 'column',
                                           str(comment_value), str(col_value)))
                        comment[field] = col_value
                        tags_to_update[field] = col_value

                    elif col_value is None and comment_value is not None:
                        log_buffer.append(update_msg %
                                          ('column', field, 'comment',
                                           str(None), str(comment_value)))
                        setattr(track, field, comment_value)
                        tags_to_update[field] = comment_value

                if len(log_buffer) > 0:
                    progress_msg = 'Sync log for %s' % track_pk
                    banner = get_banner(progress_msg)
                    print('\n%s' % banner)
                    print(progress_msg)
                    print('%s' % banner)
                    print('\n'.join(log_buffer))

                    tags_to_update = {
                        k: v
                        for k, v in tags_to_update.items()
                        if k in ID3_COMMENT_FIELDS
                    }
                    af.write_tags(tags_to_update)
                    track.comment = str(comment)

                    sync_statuses[track.id] = DBUpdateType.UPDATE.value
                else:
                    sync_statuses[track.id] = DBUpdateType.NOOP.value

            except Exception as e:
                handle(e,
                       'Unexpected exception syncing fields for %s' % track_pk)
                sync_statuses[track.id] = DBUpdateType.FAILURE.value

                continue

        return sync_statuses