Esempio n. 1
0
def fill_annotation_metadata(ann, row):
    """Fills the annotation metadata."""
    ann.annotation_metadata.annotation_tools = 'Sonic Visualiser'
    ann.annotation_metadata.curator = (jams.Curator(
        name='Justin Salamon', email='*****@*****.**'))

    annotators = {
        'annotators': [{
            'name': 'Justin Salamon',
            'email': '*****@*****.**'
        }, {
            'name': 'Christopher Jacoby',
            'email': '*****@*****.**'
        }]
    }
    ann.annotation_metadata.annotator = jams.Sandbox(**annotators)

    ann.annotation_metadata.version = '1.0'
    ann.annotation_metadata.corpus = 'UrbanSound8K'
    ann.annotation_metadata.annotation_rules = (
        'See: J. Salamon, C. Jacoby and J. P. Bello, "A Dataset and Taxonomy '
        'for Urban Sound Research", in Proc. 22nd ACM International Conference '
        'on Multimedia, Orlando, USA, Nov. 2014.')
    ann.annotation_metadata.data_source = (
        'https://serv.cusp.nyu.edu/projects/urbansounddataset/')
    ann.annotation_metadata.validation = ''

    # Store all metadata in sandbox too
    ann.sandbox.update(**row)
Esempio n. 2
0
def process_track(input_dir, output_dir, metadata, tags, compress):

    # Construct track metadata
    duration = get_track_duration(
        os.path.join(input_dir, 'audio', metadata['filename']))

    file_meta = jams.FileMetadata(title=metadata['title'],
                                  artist=metadata['artist'],
                                  duration=duration,
                                  identifiers=jams.Sandbox(id=metadata.name))

    # Get the tag annotation
    amd = jams.AnnotationMetadata(curator=jams.Curator(**__curator__),
                                  corpus=__corpus__)

    ann = jams.Annotation('tag_cal10k', annotation_metadata=amd)

    for tag in tags:
        ann.append(time=0, duration=duration, value=tag)

    jam = jams.JAMS(file_metadata=file_meta)
    jam.annotations.append(ann)
    jam.sandbox.content_path = metadata['filename']

    save_jam(output_dir, jam, metadata.name, compress)
Esempio n. 3
0
def smc_tags(tag_file, duration):
    '''Get the tag data for this track as a JAMS annotation'''

    annotation = jams.Annotation('tag_open')

    data = []
    for value in list(pd.read_table(tag_file, header=None, squeeze=True)):
        if len(value) == 2:
            ann_id, ann_conf = tuple(value)
        else:
            data.append(value)

    curator = jams.Curator(**__curator__)

    metadata = jams.AnnotationMetadata(curator=curator,
                                       corpus=__corpus__,
                                       annotator={
                                           'id': ann_id,
                                           'confidence': int(ann_conf)
                                       })

    annotation.annotation_metadata = metadata

    for tag in data:
        annotation.data.add_observation(time=0,
                                        duration=duration,
                                        value=tag,
                                        confidence=None)

    return annotation
Esempio n. 4
0
def test_annotation_metadata():
    def __test(data, curator, annotator):

        md = jams.AnnotationMetadata(curator=curator,
                                     annotator=annotator,
                                     **data)

        if curator is not None:
            eq_(dict(md.curator), dict(curator))

        if annotator is not None:
            eq_(dict(md.annotator), dict(annotator))

        real_data = dict(md)
        real_data.pop('curator')
        real_data.pop('annotator')
        eq_(real_data, data)

    dummies = dict(version='0',
                   corpus='test',
                   annotation_tools='nose',
                   annotation_rules='brains',
                   validation='unnecessary',
                   data_source='null')

    real_curator = jams.Curator(name='nobody', email='*****@*****.**')

    real_annotator = jams.Sandbox(description='none')

    for curator in [None, real_curator]:
        for annotator in [None, real_annotator]:
            yield __test, dummies, curator, annotator
Esempio n. 5
0
def process(in_dir, out_dir):
    """Converts the original Isophonic files into the JAMS format, and saves
    them in the out_dir folder."""
    all_jams = dict()
    output_paths = dict()
    all_labs = jams.util.find_with_extension(in_dir, 'lab', 5)
    all_labs += jams.util.find_with_extension(in_dir, 'txt', 4)

    for lab_file in all_labs:
        title = jams.util.filebase(lab_file)
        if not title in all_jams:
            all_jams[title] = jams.JAMS()
            parts = lab_file.replace(in_dir, '').strip('/').split('/')
            fill_file_metadata(all_jams[title], artist=parts[1], title=title)
            output_paths[title] = os.path.join(
                out_dir, *parts[1:]).replace(".lab", ".jams")
            logging.info("%s -> %s" % (title, output_paths[title]))

        jam = all_jams[title]
        if ISO_ATTRS['beat'] in lab_file:
            try:
                tmp_jam, annot = jams.util.import_lab(NS_DICT['beat'], lab_file,
                                                      jam=jam)
            except TypeError:
                tmp_jam, annot = jams.util.import_lab(NS_DICT['beat'], lab_file,
                                                      jam=jam, sep="\t+")
            fix_beats_values(annot)
        elif ISO_ATTRS['chord'] in lab_file:
            tmp_jam, annot = jams.util.import_lab(NS_DICT['chord'], lab_file,
                                                  jam=jam)
            fix_chord_labels(jam.annotations[-1])
            fix_ranges(jam.annotations[-1])
            jam.file_metadata.duration = get_duration_from_annot(annot)
        elif ISO_ATTRS['key'] in lab_file:
            tmp_jam, annot = jams.util.import_lab(NS_DICT['key'], lab_file,
                                                  jam=jam)
            fix_key_labels(jam.annotations[-1])
            fix_ranges(jam.annotations[-1])
            fix_silence(jam.annotations[-1])
        elif ISO_ATTRS['segment'] in lab_file:
            tmp_jam, annot = jams.util.import_lab(NS_DICT['segment'], lab_file,
                                                  jam=jam)
            fix_ranges(jam.annotations[-1])
            jam.file_metadata.duration = get_duration_from_annot(annot)

        # Add Metadata
        curator = jams.Curator(name="Matthias Mauch",
                               email="*****@*****.**")
        ann_meta = jams.AnnotationMetadata(curator=curator,
                                           version=1.0,
                                           corpus="Isophonics",
                                           annotator=None)
        jam.annotations[-1].annotation_metadata = ann_meta

    logging.info("Saving and validating JAMS...")
    for title in all_jams:
        out_file = output_paths[title]
        jams.util.smkdirs(os.path.split(out_file)[0])
        all_jams[title].save(out_file)
def run():

    #Capturando os nomes das pastas
    names_folders = glob.glob(path_database + '*')

    for names_ in names_folders:

        #Capturando o caminho completo dos audios
        path_audios = glob.glob(names_ + '/*.' + audio_format)

        for path_ in path_audios:

            #Capturando o nome do arquivo de audio (wav)
            match_obj = re.sub(names_, "", path_)
            audio_name = re.sub(r'/', "", match_obj)

            #Capturando o caminho completo da pasta
            fold_path = re.sub(audio_name, "", path_)

            os.chdir(fold_path)
            #Capturando a duracao do sinal
            y, sr = librosa.load(audio_name, sr=None)
            duration = librosa.get_duration(y=y, sr=sr)

            #Remove os arquivos de audio que possuem duracao inferior a 2.3s
            if duration >= 2.3:

                #Gerando o arquivo de anotacao jam
                jam = jams.JAMS()

                #Setando os parametros do arquivo de notacao
                jam.file_metadata.duration = duration
                ann = jams.Annotation(namespace='beat',
                                      time=0,
                                      duration=jam.file_metadata.duration)
                ann.annotation_metadata = jams.AnnotationMetadata(
                    data_source=data_source)
                ann.annotation_metadata = jams.AnnotationMetadata(
                    validation="")
                ann.annotation_metadata.curator = jams.Curator(name=name,
                                                               email=email)
                jam.annotations.append(ann)

                #Salvando o arquivo de notacao para cada audio
                jam_name = re.sub(r'\.wav', ".jams", audio_name)
                jam.save(jam_name)

            else:
                os.system('rm ' + path_)
Esempio n. 7
0
def process_track(input_dir, output_dir, metadata, tags, compress):

    # Construct track metadata
    duration = get_track_duration(
        os.path.join(input_dir, 'mp3',
                     os.path.extsep.join([metadata['track'], 'mp3'])))

    artist, _ = metadata['track'].split('-', 1)

    artist = ARTIST_MAP.get(artist, artist)

    title = metadata['track'].replace('{:s}-'.format(artist), '')

    artist = artist.replace('_', ' ')
    title = title.replace('_', ' ')

    file_meta = jams.FileMetadata(title=title,
                                  artist=artist,
                                  duration=duration)

    # Get the tag annotation
    amd = jams.AnnotationMetadata(curator=jams.Curator(**__curator__),
                                  corpus=__corpus__)

    ann = jams.Annotation('tag_cal500', annotation_metadata=amd)

    for value, confidence in tags.iteritems():
        ann.append(time=0,
                   duration=duration,
                   value=value,
                   confidence=confidence)

    jam = jams.JAMS(file_metadata=file_meta)
    jam.annotations.append(ann)
    jam.sandbox.content_path = metadata['track']

    save_jam(output_dir, jam, metadata.name, compress)
Esempio n. 8
0
def smc_annotation(ann_file):
    '''Create a JAM file and annotation object for beats'''

    # Add metadata + curator
    match = re.match('.*SMC_\d+_(?P<meter>\d_\d_\d)_(?P<annotator>.+).txt',
                     ann_file)

    if not match:
        raise RuntimeError('Could not parse filename {:s}'.format(ann_file))

    curator = jams.Curator(**__curator__)

    metadata = jams.AnnotationMetadata(
        curator=curator,
        corpus=__corpus__,
        annotator={'id': match.group('annotator')})

    # Sandbox the following info:
    #   annotator id
    #   metrical interpretation

    annotation = jams.Annotation(
        'beat',
        annotation_metadata=metadata,
        sandbox={'metrical_interpretation': match.group('meter')})

    # Now load the data

    data = pd.read_csv(ann_file, header=None, squeeze=True)

    for beat_time in data:
        annotation.data.add_observation(time=beat_time,
                                        duration=0,
                                        value=1,
                                        confidence=None)

    return annotation
Esempio n. 9
0
def parse_patterns(csv_file, kern_file, patterns, out_file):
    """Parses the set of patterns and saves the results into the output file.

    Parameters
    ----------
    csv_file : str
        Path to the main csv file from which the pattern is extracted.
    kern_file : str
        Path to the main kern file from which to extract the metadata.
    patterns: list of list of strings (files)
        Set of all the patterns with the occurrences of a given piece.
    out_file: string (path)
        Path to the output file to save the set of patterns in the MIREX
        format.
    """
    # Create JAMS and add some metada
    jam = jams.JAMS()
    curator = jams.Curator(name="Tom Collins", email="*****@*****.**")
    fill_file_metadata(jam, kern_file, csv_file)
    ann_meta = jams.AnnotationMetadata(curator=curator,
                                       version="August2013",
                                       corpus="JKU Development Dataset")

    # Create actual annotation
    annot = jams.Annotation(namespace="pattern_jku",
                            annotation_metadata=ann_meta)

    # Get bpm and first and last onsets
    bpm = get_bpm(kern_file)
    first_onset, last_onset = get_first_last_onset(csv_file)

    pattern_n = 1
    for pattern in patterns:
        occ_n = 1
        for occ_file in pattern:
            start, end = find_in_csv(csv_file, occ_file)
            with open(csv_file, "r") as f:
                file_reader = list(csv.reader(f))
                for i in range(start, end):
                    value = {
                        "midi_pitch": float(file_reader[i][1]),
                        "morph_pitch": float(file_reader[i][2]),
                        "staff": int(float(
                            file_reader[i]
                            [4])),  # Hack to convert 0.000000000 into an int
                        "pattern_id": pattern_n,
                        "occurrence_id": occ_n
                    }
                    # Transform onset to time
                    time = onset_to_seconds(float(file_reader[i][0]),
                                            first_onset, bpm)
                    dur = onset_to_seconds(float(file_reader[i][3]), 0, bpm)
                    annot.data.add_observation(time=time,
                                               duration=dur,
                                               value=value)
            occ_n += 1
        pattern_n += 1

    # Annotation to the jams
    jam.annotations.append(annot)

    # Save file
    jam.save(out_file)
Esempio n. 10
0
def extract_ground_truth(diagnostics_group):
    """
    Extract ground-truth information from one or more MIDI files about a single
    MIDI file based on the results in one or more diagnostics files and return
    a JAMS object with all of the annotations compiled.

    Parameters
    ----------
    - diagnostics_group : list of dict
        List of dicts of diagnostics, each about a successful alignment of a
        different MIDI file to a single audio file.
    """
    # Construct the JAMS object
    jam = jams.JAMS()
    # Load in the first diagnostics (doesn't matter which as they all
    # should correspond the same audio file)
    diagnostics = diagnostics_group[0]
    # Load in the audio file to get its duration for the JAMS file
    audio, fs = librosa.load(diagnostics['audio_filename'],
                             feature_extraction.AUDIO_FS)
    jam.file_metadata.duration = librosa.get_duration(y=audio, sr=fs)
    # Also store metadata about the audio file, retrieved from the MSD
    jam.file_metadata.identifiers = {'track_id': diagnostics['audio_id']}
    jam.file_metadata.artist = MSD_LIST[diagnostics['audio_id']]['artist']
    jam.file_metadata.title = MSD_LIST[diagnostics['audio_id']]['title']

    # Iterate over the diagnostics files supplied
    for diagnostics in diagnostics_group:

        # Create annotation metadata object, shared across annotations
        commit = subprocess.check_output(['git', 'rev-parse', 'HEAD']).strip()
        commit_url = "http://github.com/craffel/midi-dataset/tree/" + commit
        annotator = {
            'midi_md5': diagnostics['midi_md5'],
            'commit_url': commit_url,
            'confidence': diagnostics['score']
        }
        annotation_metadata = jams.AnnotationMetadata(
            curator=jams.Curator('Colin Raffel', '*****@*****.**'),
            version='0.0.1b',
            corpus='Million Song Dataset MIDI Matches',
            annotator=annotator,
            annotation_tools=(
                'MIDI files were matched and aligned to audio files using the '
                'code at http://github.com/craffel/midi-dataset.  Information '
                'was extracted from MIDI files using pretty_midi '
                'https://github.com/craffel/pretty-midi.'),
            annotation_rules=(
                'Beat locations and key change times were linearly '
                'interpolated according to an audio-to-MIDI alignment.'),
            validation=(
                'Only MIDI files with alignment confidence scores >= .5 were '
                'considered "correct".  The confidence score can be used as a '
                'rough guide to the potential correctness of the annotation.'),
            data_source='Inferred from a MIDI file.')

        # Load the extracted features
        midi_features = deepdish.io.load(diagnostics['midi_features_filename'])
        audio_features = deepdish.io.load(
            diagnostics['audio_features_filename'])
        # Load in the original MIDI file
        midi_object = pretty_midi.PrettyMIDI(diagnostics['midi_filename'])
        # Compute the times of the frames (will be used for interpolation)
        midi_frame_times = feature_extraction.frame_times(
            midi_features['gram'])[diagnostics['aligned_midi_indices']]
        audio_frame_times = feature_extraction.frame_times(
            audio_features['gram'])[diagnostics['aligned_audio_indices']]

        # Get the interpolated beat locations and add them to the JAM
        adjusted_beats = interpolate_times(midi_object.get_beats(),
                                           midi_frame_times, audio_frame_times)
        # Create annotation record for the beats
        beat_a = jams.Annotation(namespace='beat')
        beat_a.annotation_metadata = annotation_metadata
        # Add beat timings to the annotation record
        for t in adjusted_beats:
            beat_a.append(time=t, duration=0.0)
        # Add beat annotation record to the JAMS file
        jam.annotations.append(beat_a)

        # Get key signature times and their string names
        key_change_times = [c.time for c in midi_object.key_signature_changes]
        key_names = [
            pretty_midi.key_number_to_key_name(c.key_number)
            for c in midi_object.key_signature_changes
        ]
        # JAMS requires that the key name be supplied in the form e.g.
        # "C:major" but pretty_midi returns things in the format "C Major",
        # so the following code converts to JAMS format
        key_names = [
            name.replace(' ', ':').replace('M', 'm') for name in key_names
        ]
        # Compute interpolated event times
        adjusted_key_change_times, adjusted_key_names = interpolate_times(
            key_change_times, midi_frame_times, audio_frame_times, key_names,
            True)
        # Create JAMS annotation for the key changes
        if len(adjusted_key_change_times) > 0:
            key_a = jams.Annotation(namespace='key_mode')
            key_a.annotation_metadata = annotation_metadata
            # We only have key start times from the MIDI file, but JAMS wants
            # durations too, so create a list of "end times"
            end_times = np.append(adjusted_key_change_times[1:],
                                  jam.file_metadata.duration)
            # Add key labels into the JAMS file
            for start, end, key in zip(adjusted_key_change_times, end_times,
                                       adjusted_key_names):
                key_a.append(time=start, duration=end - start, value=key)
            jam.annotations.append(key_a)

    return jam
Esempio n. 11
0
def test_curator():

    c = jams.Curator(name='myself', email='*****@*****.**')

    assert c.name == 'myself'
    assert c.email == '*****@*****.**'
Esempio n. 12
0
    assert c.name == 'myself'
    assert c.email == '*****@*****.**'


# AnnotationMetadata
@pytest.fixture
def ann_meta_dummy():
    return dict(version='0',
                corpus='test',
                annotation_tools='nose',
                annotation_rules='brains',
                validation='unnecessary',
                data_source='null')


@parametrize('curator', [None, jams.Curator(name='nobody',
                                            email='*****@*****.**')])
@parametrize('annotator', [None, jams.Sandbox(description='desc')])
def test_annotation_metadata(ann_meta_dummy, curator, annotator):

    md = jams.AnnotationMetadata(curator=curator, annotator=annotator,
                                 **ann_meta_dummy)

    if curator is not None:
        assert dict(md.curator) == dict(curator)

    if annotator is not None:
        assert dict(md.annotator) == dict(annotator)

    real_data = dict(md)
    real_data.pop('curator')
    real_data.pop('annotator')
Esempio n. 13
0
    def to_jams(self):
        # Initialize top-level JAMS container
        jam = jams.JAMS()

        # Encode title, artist, and release
        jam.file_metadata.title = os.path.split(self.audio_filename)[1]
        jam.file_metadata.artist = self.drummer
        jam.file_metadata.release = os.path.split(self.session)[1]

        # Encode duration in seconds
        jam.file_metadata.duration = self.duration

        # Encode JAMS curator
        curator = jams.Curator(name="Jon Gillick",
                               email="*****@*****.**")

        # Store mirdata metadata as JAMS identifiers
        jam.file_metadata.identifiers = jams.Sandbox(**self.__dict__)

        # Encode annotation metadata
        ann_meta = jams.AnnotationMetadata(
            annotator={
                "mirdata version": mirdata.__version__,
                "pretty_midi version": pretty_midi.__version__,
            },
            version=GMD_VERSION,
            corpus=DATASET_DIR,
            annotation_tools="Roland TD-11 electronic drum kit",
            annotation_rules=ANNOTATION_RULES,
            validation=mirdata.groove_midi.AUDIO_MIDI_REMOTE,
            data_source="Google Magenta",
            curator=curator,
        )

        # Encode beat annotation
        beat_ann = jams.Annotation(
            namespace="beat_position",
            time=0,
            duration=self.duration,
            annotation_metadata=ann_meta,
        )
        beat_times = self.midi.get_beats()
        meter = self.midi.time_signature_changes[0]
        n_beats_per_bar = meter.numerator
        beat_durations = np.diff(list(beat_times) + [self.duration])
        beat_enum = enumerate(zip(beat_times, beat_durations))
        for beat_id, (beat_time, beat_duration) in beat_enum:
            beat_value = {
                "position": 1 + (beat_id % meter.numerator),
                "measure": 1 + (beat_id // meter.numerator),
                "num_beats": meter.numerator,
                "beat_units": meter.denominator,
            }
            beat_ann.append(time=beat_time,
                            duration=beat_duration,
                            confidence=1,
                            value=beat_value)
        jam.annotations.append(beat_ann)

        # Encode tempo annotation
        tempo_ann = jams.Annotation(
            namespace="tempo",
            time=0,
            duration=self.duration,
            annotation_metadata=ann_meta,
        )
        tempo_ann.append(time=0,
                         duration=self.duration,
                         confidence=1,
                         value=self.bpm)
        jam.annotations.append(tempo_ann)

        # Encode event annotation. We support three drum mappings:
        # Roland, General MIDI (GM), and Simplified.
        mapping_keys = ["Roland", "General MIDI", "Simplified"]
        for mapping_key in mapping_keys:
            mapping_namespace = "drum stroke ({} mapping)".format(mapping_key)
            event_ann = jams.Annotation(
                namespace=mapping_namespace,
                time=0,
                duration=self.duration,
                annotation_metadata=ann_meta,
            )
            for note in self.midi.instruments[0].notes:
                event_value = DRUM_MAPPING[drum.notes[0].pitch][mapping_key]
                event_ann.append(
                    time=note.start,
                    duration=note.end - note.start,
                    value=event_value,
                    confidence=1,
                )
            jam.annotations.append(event_ann)

        return jam
Esempio n. 14
0
def test_curator():

    c = jams.Curator(name='myself', email='*****@*****.**')

    eq_(c.name, 'myself')
    eq_(c.email, '*****@*****.**')
Esempio n. 15
0
    assert c.email == '*****@*****.**'


# AnnotationMetadata
@pytest.fixture
def ann_meta_dummy():
    return dict(version='0',
                corpus='test',
                annotation_tools='nose',
                annotation_rules='brains',
                validation='unnecessary',
                data_source='null')


@parametrize('curator',
             [None, jams.Curator(name='nobody', email='*****@*****.**')])
@parametrize('annotator', [None, jams.Sandbox(description='desc')])
def test_annotation_metadata(ann_meta_dummy, curator, annotator):

    md = jams.AnnotationMetadata(curator=curator,
                                 annotator=annotator,
                                 **ann_meta_dummy)

    if curator is not None:
        assert dict(md.curator) == dict(curator)

    if annotator is not None:
        assert dict(md.annotator) == dict(annotator)

    real_data = dict(md)
    real_data.pop('curator')
Esempio n. 16
0
def parse_annotation(jam, path, annotation_id, level, metadata):
    """Parses one annotation for the given level

    Parameters
    ----------
    jam: object
        The top-level JAMS object.
    path: str
        path to the track in the SALAMI dataset.
    annotation_id: int
        Whether to use the first or the second annotation.
    level: str
        Level of annotation.
    metadata: list
        List containing the information of the CSV file for the current track.
    """
    level_dict = {
        "function": "_functions",
        "large_scale": "_uppercase",
        "small_scale": "_lowercase"
    }
    namespace_dict = {
        "function": "segment_salami_function",
        "large_scale": "segment_salami_upper",
        "small_scale": "segment_salami_lower"
    }

    # File to open
    file_path = os.path.join(
        path, "parsed",
        "textfile" + str(annotation_id) + level_dict[level] + ".txt")

    # Open file
    try:
        f = open(file_path, "r")
    except IOError:
        logging.warning("Annotation missing in %s", file_path)
        return

    # Annotation Metadata
    curator = jams.Curator(name="Jordan Smith", email="*****@*****.**")
    annotator = {
        "name": metadata[annotation_id + 1],
        "submission_date": metadata[annotation_id + 15]
    }
    ann_meta = jams.AnnotationMetadata(curator=curator,
                                       version="2.0",
                                       corpus="SALAMI",
                                       annotator=annotator,
                                       data_source=metadata[1],
                                       annotation_tools="Sonic Visualizer")

    # Create Annotation
    annot = jams.Annotation(namespace=namespace_dict[level],
                            annotation_metadata=ann_meta)

    # Actual Data
    lines = f.readlines()
    for i, line in enumerate(lines[:-1]):
        start_time, label = line.strip("\n").split("\t")
        end_time = lines[i + 1].split("\t")[0]
        start_time = float(start_time)
        end_time = float(end_time)
        dur = end_time - start_time
        if start_time - end_time == 0:
            continue

        if level == "function":
            label = fix_label(label)

        annot.data.add_observation(time=start_time, duration=dur, value=label)
    f.close()

    # Add annotation to the jams
    jam.annotations.append(annot)