def fill_annotation_metadata(ann, row): """Fills the annotation metadata.""" ann.annotation_metadata.annotation_tools = 'Sonic Visualiser' ann.annotation_metadata.curator = (jams.Curator( name='Justin Salamon', email='*****@*****.**')) annotators = { 'annotators': [{ 'name': 'Justin Salamon', 'email': '*****@*****.**' }, { 'name': 'Christopher Jacoby', 'email': '*****@*****.**' }] } ann.annotation_metadata.annotator = jams.Sandbox(**annotators) ann.annotation_metadata.version = '1.0' ann.annotation_metadata.corpus = 'UrbanSound8K' ann.annotation_metadata.annotation_rules = ( 'See: J. Salamon, C. Jacoby and J. P. Bello, "A Dataset and Taxonomy ' 'for Urban Sound Research", in Proc. 22nd ACM International Conference ' 'on Multimedia, Orlando, USA, Nov. 2014.') ann.annotation_metadata.data_source = ( 'https://serv.cusp.nyu.edu/projects/urbansounddataset/') ann.annotation_metadata.validation = '' # Store all metadata in sandbox too ann.sandbox.update(**row)
def process_track(input_dir, output_dir, metadata, tags, compress): # Construct track metadata duration = get_track_duration( os.path.join(input_dir, 'audio', metadata['filename'])) file_meta = jams.FileMetadata(title=metadata['title'], artist=metadata['artist'], duration=duration, identifiers=jams.Sandbox(id=metadata.name)) # Get the tag annotation amd = jams.AnnotationMetadata(curator=jams.Curator(**__curator__), corpus=__corpus__) ann = jams.Annotation('tag_cal10k', annotation_metadata=amd) for tag in tags: ann.append(time=0, duration=duration, value=tag) jam = jams.JAMS(file_metadata=file_meta) jam.annotations.append(ann) jam.sandbox.content_path = metadata['filename'] save_jam(output_dir, jam, metadata.name, compress)
def smc_tags(tag_file, duration): '''Get the tag data for this track as a JAMS annotation''' annotation = jams.Annotation('tag_open') data = [] for value in list(pd.read_table(tag_file, header=None, squeeze=True)): if len(value) == 2: ann_id, ann_conf = tuple(value) else: data.append(value) curator = jams.Curator(**__curator__) metadata = jams.AnnotationMetadata(curator=curator, corpus=__corpus__, annotator={ 'id': ann_id, 'confidence': int(ann_conf) }) annotation.annotation_metadata = metadata for tag in data: annotation.data.add_observation(time=0, duration=duration, value=tag, confidence=None) return annotation
def test_annotation_metadata(): def __test(data, curator, annotator): md = jams.AnnotationMetadata(curator=curator, annotator=annotator, **data) if curator is not None: eq_(dict(md.curator), dict(curator)) if annotator is not None: eq_(dict(md.annotator), dict(annotator)) real_data = dict(md) real_data.pop('curator') real_data.pop('annotator') eq_(real_data, data) dummies = dict(version='0', corpus='test', annotation_tools='nose', annotation_rules='brains', validation='unnecessary', data_source='null') real_curator = jams.Curator(name='nobody', email='*****@*****.**') real_annotator = jams.Sandbox(description='none') for curator in [None, real_curator]: for annotator in [None, real_annotator]: yield __test, dummies, curator, annotator
def process(in_dir, out_dir): """Converts the original Isophonic files into the JAMS format, and saves them in the out_dir folder.""" all_jams = dict() output_paths = dict() all_labs = jams.util.find_with_extension(in_dir, 'lab', 5) all_labs += jams.util.find_with_extension(in_dir, 'txt', 4) for lab_file in all_labs: title = jams.util.filebase(lab_file) if not title in all_jams: all_jams[title] = jams.JAMS() parts = lab_file.replace(in_dir, '').strip('/').split('/') fill_file_metadata(all_jams[title], artist=parts[1], title=title) output_paths[title] = os.path.join( out_dir, *parts[1:]).replace(".lab", ".jams") logging.info("%s -> %s" % (title, output_paths[title])) jam = all_jams[title] if ISO_ATTRS['beat'] in lab_file: try: tmp_jam, annot = jams.util.import_lab(NS_DICT['beat'], lab_file, jam=jam) except TypeError: tmp_jam, annot = jams.util.import_lab(NS_DICT['beat'], lab_file, jam=jam, sep="\t+") fix_beats_values(annot) elif ISO_ATTRS['chord'] in lab_file: tmp_jam, annot = jams.util.import_lab(NS_DICT['chord'], lab_file, jam=jam) fix_chord_labels(jam.annotations[-1]) fix_ranges(jam.annotations[-1]) jam.file_metadata.duration = get_duration_from_annot(annot) elif ISO_ATTRS['key'] in lab_file: tmp_jam, annot = jams.util.import_lab(NS_DICT['key'], lab_file, jam=jam) fix_key_labels(jam.annotations[-1]) fix_ranges(jam.annotations[-1]) fix_silence(jam.annotations[-1]) elif ISO_ATTRS['segment'] in lab_file: tmp_jam, annot = jams.util.import_lab(NS_DICT['segment'], lab_file, jam=jam) fix_ranges(jam.annotations[-1]) jam.file_metadata.duration = get_duration_from_annot(annot) # Add Metadata curator = jams.Curator(name="Matthias Mauch", email="*****@*****.**") ann_meta = jams.AnnotationMetadata(curator=curator, version=1.0, corpus="Isophonics", annotator=None) jam.annotations[-1].annotation_metadata = ann_meta logging.info("Saving and validating JAMS...") for title in all_jams: out_file = output_paths[title] jams.util.smkdirs(os.path.split(out_file)[0]) all_jams[title].save(out_file)
def run(): #Capturando os nomes das pastas names_folders = glob.glob(path_database + '*') for names_ in names_folders: #Capturando o caminho completo dos audios path_audios = glob.glob(names_ + '/*.' + audio_format) for path_ in path_audios: #Capturando o nome do arquivo de audio (wav) match_obj = re.sub(names_, "", path_) audio_name = re.sub(r'/', "", match_obj) #Capturando o caminho completo da pasta fold_path = re.sub(audio_name, "", path_) os.chdir(fold_path) #Capturando a duracao do sinal y, sr = librosa.load(audio_name, sr=None) duration = librosa.get_duration(y=y, sr=sr) #Remove os arquivos de audio que possuem duracao inferior a 2.3s if duration >= 2.3: #Gerando o arquivo de anotacao jam jam = jams.JAMS() #Setando os parametros do arquivo de notacao jam.file_metadata.duration = duration ann = jams.Annotation(namespace='beat', time=0, duration=jam.file_metadata.duration) ann.annotation_metadata = jams.AnnotationMetadata( data_source=data_source) ann.annotation_metadata = jams.AnnotationMetadata( validation="") ann.annotation_metadata.curator = jams.Curator(name=name, email=email) jam.annotations.append(ann) #Salvando o arquivo de notacao para cada audio jam_name = re.sub(r'\.wav', ".jams", audio_name) jam.save(jam_name) else: os.system('rm ' + path_)
def process_track(input_dir, output_dir, metadata, tags, compress): # Construct track metadata duration = get_track_duration( os.path.join(input_dir, 'mp3', os.path.extsep.join([metadata['track'], 'mp3']))) artist, _ = metadata['track'].split('-', 1) artist = ARTIST_MAP.get(artist, artist) title = metadata['track'].replace('{:s}-'.format(artist), '') artist = artist.replace('_', ' ') title = title.replace('_', ' ') file_meta = jams.FileMetadata(title=title, artist=artist, duration=duration) # Get the tag annotation amd = jams.AnnotationMetadata(curator=jams.Curator(**__curator__), corpus=__corpus__) ann = jams.Annotation('tag_cal500', annotation_metadata=amd) for value, confidence in tags.iteritems(): ann.append(time=0, duration=duration, value=value, confidence=confidence) jam = jams.JAMS(file_metadata=file_meta) jam.annotations.append(ann) jam.sandbox.content_path = metadata['track'] save_jam(output_dir, jam, metadata.name, compress)
def smc_annotation(ann_file): '''Create a JAM file and annotation object for beats''' # Add metadata + curator match = re.match('.*SMC_\d+_(?P<meter>\d_\d_\d)_(?P<annotator>.+).txt', ann_file) if not match: raise RuntimeError('Could not parse filename {:s}'.format(ann_file)) curator = jams.Curator(**__curator__) metadata = jams.AnnotationMetadata( curator=curator, corpus=__corpus__, annotator={'id': match.group('annotator')}) # Sandbox the following info: # annotator id # metrical interpretation annotation = jams.Annotation( 'beat', annotation_metadata=metadata, sandbox={'metrical_interpretation': match.group('meter')}) # Now load the data data = pd.read_csv(ann_file, header=None, squeeze=True) for beat_time in data: annotation.data.add_observation(time=beat_time, duration=0, value=1, confidence=None) return annotation
def parse_patterns(csv_file, kern_file, patterns, out_file): """Parses the set of patterns and saves the results into the output file. Parameters ---------- csv_file : str Path to the main csv file from which the pattern is extracted. kern_file : str Path to the main kern file from which to extract the metadata. patterns: list of list of strings (files) Set of all the patterns with the occurrences of a given piece. out_file: string (path) Path to the output file to save the set of patterns in the MIREX format. """ # Create JAMS and add some metada jam = jams.JAMS() curator = jams.Curator(name="Tom Collins", email="*****@*****.**") fill_file_metadata(jam, kern_file, csv_file) ann_meta = jams.AnnotationMetadata(curator=curator, version="August2013", corpus="JKU Development Dataset") # Create actual annotation annot = jams.Annotation(namespace="pattern_jku", annotation_metadata=ann_meta) # Get bpm and first and last onsets bpm = get_bpm(kern_file) first_onset, last_onset = get_first_last_onset(csv_file) pattern_n = 1 for pattern in patterns: occ_n = 1 for occ_file in pattern: start, end = find_in_csv(csv_file, occ_file) with open(csv_file, "r") as f: file_reader = list(csv.reader(f)) for i in range(start, end): value = { "midi_pitch": float(file_reader[i][1]), "morph_pitch": float(file_reader[i][2]), "staff": int(float( file_reader[i] [4])), # Hack to convert 0.000000000 into an int "pattern_id": pattern_n, "occurrence_id": occ_n } # Transform onset to time time = onset_to_seconds(float(file_reader[i][0]), first_onset, bpm) dur = onset_to_seconds(float(file_reader[i][3]), 0, bpm) annot.data.add_observation(time=time, duration=dur, value=value) occ_n += 1 pattern_n += 1 # Annotation to the jams jam.annotations.append(annot) # Save file jam.save(out_file)
def extract_ground_truth(diagnostics_group): """ Extract ground-truth information from one or more MIDI files about a single MIDI file based on the results in one or more diagnostics files and return a JAMS object with all of the annotations compiled. Parameters ---------- - diagnostics_group : list of dict List of dicts of diagnostics, each about a successful alignment of a different MIDI file to a single audio file. """ # Construct the JAMS object jam = jams.JAMS() # Load in the first diagnostics (doesn't matter which as they all # should correspond the same audio file) diagnostics = diagnostics_group[0] # Load in the audio file to get its duration for the JAMS file audio, fs = librosa.load(diagnostics['audio_filename'], feature_extraction.AUDIO_FS) jam.file_metadata.duration = librosa.get_duration(y=audio, sr=fs) # Also store metadata about the audio file, retrieved from the MSD jam.file_metadata.identifiers = {'track_id': diagnostics['audio_id']} jam.file_metadata.artist = MSD_LIST[diagnostics['audio_id']]['artist'] jam.file_metadata.title = MSD_LIST[diagnostics['audio_id']]['title'] # Iterate over the diagnostics files supplied for diagnostics in diagnostics_group: # Create annotation metadata object, shared across annotations commit = subprocess.check_output(['git', 'rev-parse', 'HEAD']).strip() commit_url = "http://github.com/craffel/midi-dataset/tree/" + commit annotator = { 'midi_md5': diagnostics['midi_md5'], 'commit_url': commit_url, 'confidence': diagnostics['score'] } annotation_metadata = jams.AnnotationMetadata( curator=jams.Curator('Colin Raffel', '*****@*****.**'), version='0.0.1b', corpus='Million Song Dataset MIDI Matches', annotator=annotator, annotation_tools=( 'MIDI files were matched and aligned to audio files using the ' 'code at http://github.com/craffel/midi-dataset. Information ' 'was extracted from MIDI files using pretty_midi ' 'https://github.com/craffel/pretty-midi.'), annotation_rules=( 'Beat locations and key change times were linearly ' 'interpolated according to an audio-to-MIDI alignment.'), validation=( 'Only MIDI files with alignment confidence scores >= .5 were ' 'considered "correct". The confidence score can be used as a ' 'rough guide to the potential correctness of the annotation.'), data_source='Inferred from a MIDI file.') # Load the extracted features midi_features = deepdish.io.load(diagnostics['midi_features_filename']) audio_features = deepdish.io.load( diagnostics['audio_features_filename']) # Load in the original MIDI file midi_object = pretty_midi.PrettyMIDI(diagnostics['midi_filename']) # Compute the times of the frames (will be used for interpolation) midi_frame_times = feature_extraction.frame_times( midi_features['gram'])[diagnostics['aligned_midi_indices']] audio_frame_times = feature_extraction.frame_times( audio_features['gram'])[diagnostics['aligned_audio_indices']] # Get the interpolated beat locations and add them to the JAM adjusted_beats = interpolate_times(midi_object.get_beats(), midi_frame_times, audio_frame_times) # Create annotation record for the beats beat_a = jams.Annotation(namespace='beat') beat_a.annotation_metadata = annotation_metadata # Add beat timings to the annotation record for t in adjusted_beats: beat_a.append(time=t, duration=0.0) # Add beat annotation record to the JAMS file jam.annotations.append(beat_a) # Get key signature times and their string names key_change_times = [c.time for c in midi_object.key_signature_changes] key_names = [ pretty_midi.key_number_to_key_name(c.key_number) for c in midi_object.key_signature_changes ] # JAMS requires that the key name be supplied in the form e.g. # "C:major" but pretty_midi returns things in the format "C Major", # so the following code converts to JAMS format key_names = [ name.replace(' ', ':').replace('M', 'm') for name in key_names ] # Compute interpolated event times adjusted_key_change_times, adjusted_key_names = interpolate_times( key_change_times, midi_frame_times, audio_frame_times, key_names, True) # Create JAMS annotation for the key changes if len(adjusted_key_change_times) > 0: key_a = jams.Annotation(namespace='key_mode') key_a.annotation_metadata = annotation_metadata # We only have key start times from the MIDI file, but JAMS wants # durations too, so create a list of "end times" end_times = np.append(adjusted_key_change_times[1:], jam.file_metadata.duration) # Add key labels into the JAMS file for start, end, key in zip(adjusted_key_change_times, end_times, adjusted_key_names): key_a.append(time=start, duration=end - start, value=key) jam.annotations.append(key_a) return jam
def test_curator(): c = jams.Curator(name='myself', email='*****@*****.**') assert c.name == 'myself' assert c.email == '*****@*****.**'
assert c.name == 'myself' assert c.email == '*****@*****.**' # AnnotationMetadata @pytest.fixture def ann_meta_dummy(): return dict(version='0', corpus='test', annotation_tools='nose', annotation_rules='brains', validation='unnecessary', data_source='null') @parametrize('curator', [None, jams.Curator(name='nobody', email='*****@*****.**')]) @parametrize('annotator', [None, jams.Sandbox(description='desc')]) def test_annotation_metadata(ann_meta_dummy, curator, annotator): md = jams.AnnotationMetadata(curator=curator, annotator=annotator, **ann_meta_dummy) if curator is not None: assert dict(md.curator) == dict(curator) if annotator is not None: assert dict(md.annotator) == dict(annotator) real_data = dict(md) real_data.pop('curator') real_data.pop('annotator')
def to_jams(self): # Initialize top-level JAMS container jam = jams.JAMS() # Encode title, artist, and release jam.file_metadata.title = os.path.split(self.audio_filename)[1] jam.file_metadata.artist = self.drummer jam.file_metadata.release = os.path.split(self.session)[1] # Encode duration in seconds jam.file_metadata.duration = self.duration # Encode JAMS curator curator = jams.Curator(name="Jon Gillick", email="*****@*****.**") # Store mirdata metadata as JAMS identifiers jam.file_metadata.identifiers = jams.Sandbox(**self.__dict__) # Encode annotation metadata ann_meta = jams.AnnotationMetadata( annotator={ "mirdata version": mirdata.__version__, "pretty_midi version": pretty_midi.__version__, }, version=GMD_VERSION, corpus=DATASET_DIR, annotation_tools="Roland TD-11 electronic drum kit", annotation_rules=ANNOTATION_RULES, validation=mirdata.groove_midi.AUDIO_MIDI_REMOTE, data_source="Google Magenta", curator=curator, ) # Encode beat annotation beat_ann = jams.Annotation( namespace="beat_position", time=0, duration=self.duration, annotation_metadata=ann_meta, ) beat_times = self.midi.get_beats() meter = self.midi.time_signature_changes[0] n_beats_per_bar = meter.numerator beat_durations = np.diff(list(beat_times) + [self.duration]) beat_enum = enumerate(zip(beat_times, beat_durations)) for beat_id, (beat_time, beat_duration) in beat_enum: beat_value = { "position": 1 + (beat_id % meter.numerator), "measure": 1 + (beat_id // meter.numerator), "num_beats": meter.numerator, "beat_units": meter.denominator, } beat_ann.append(time=beat_time, duration=beat_duration, confidence=1, value=beat_value) jam.annotations.append(beat_ann) # Encode tempo annotation tempo_ann = jams.Annotation( namespace="tempo", time=0, duration=self.duration, annotation_metadata=ann_meta, ) tempo_ann.append(time=0, duration=self.duration, confidence=1, value=self.bpm) jam.annotations.append(tempo_ann) # Encode event annotation. We support three drum mappings: # Roland, General MIDI (GM), and Simplified. mapping_keys = ["Roland", "General MIDI", "Simplified"] for mapping_key in mapping_keys: mapping_namespace = "drum stroke ({} mapping)".format(mapping_key) event_ann = jams.Annotation( namespace=mapping_namespace, time=0, duration=self.duration, annotation_metadata=ann_meta, ) for note in self.midi.instruments[0].notes: event_value = DRUM_MAPPING[drum.notes[0].pitch][mapping_key] event_ann.append( time=note.start, duration=note.end - note.start, value=event_value, confidence=1, ) jam.annotations.append(event_ann) return jam
def test_curator(): c = jams.Curator(name='myself', email='*****@*****.**') eq_(c.name, 'myself') eq_(c.email, '*****@*****.**')
assert c.email == '*****@*****.**' # AnnotationMetadata @pytest.fixture def ann_meta_dummy(): return dict(version='0', corpus='test', annotation_tools='nose', annotation_rules='brains', validation='unnecessary', data_source='null') @parametrize('curator', [None, jams.Curator(name='nobody', email='*****@*****.**')]) @parametrize('annotator', [None, jams.Sandbox(description='desc')]) def test_annotation_metadata(ann_meta_dummy, curator, annotator): md = jams.AnnotationMetadata(curator=curator, annotator=annotator, **ann_meta_dummy) if curator is not None: assert dict(md.curator) == dict(curator) if annotator is not None: assert dict(md.annotator) == dict(annotator) real_data = dict(md) real_data.pop('curator')
def parse_annotation(jam, path, annotation_id, level, metadata): """Parses one annotation for the given level Parameters ---------- jam: object The top-level JAMS object. path: str path to the track in the SALAMI dataset. annotation_id: int Whether to use the first or the second annotation. level: str Level of annotation. metadata: list List containing the information of the CSV file for the current track. """ level_dict = { "function": "_functions", "large_scale": "_uppercase", "small_scale": "_lowercase" } namespace_dict = { "function": "segment_salami_function", "large_scale": "segment_salami_upper", "small_scale": "segment_salami_lower" } # File to open file_path = os.path.join( path, "parsed", "textfile" + str(annotation_id) + level_dict[level] + ".txt") # Open file try: f = open(file_path, "r") except IOError: logging.warning("Annotation missing in %s", file_path) return # Annotation Metadata curator = jams.Curator(name="Jordan Smith", email="*****@*****.**") annotator = { "name": metadata[annotation_id + 1], "submission_date": metadata[annotation_id + 15] } ann_meta = jams.AnnotationMetadata(curator=curator, version="2.0", corpus="SALAMI", annotator=annotator, data_source=metadata[1], annotation_tools="Sonic Visualizer") # Create Annotation annot = jams.Annotation(namespace=namespace_dict[level], annotation_metadata=ann_meta) # Actual Data lines = f.readlines() for i, line in enumerate(lines[:-1]): start_time, label = line.strip("\n").split("\t") end_time = lines[i + 1].split("\t")[0] start_time = float(start_time) end_time = float(end_time) dur = end_time - start_time if start_time - end_time == 0: continue if level == "function": label = fix_label(label) annot.data.add_observation(time=start_time, duration=dur, value=label) f.close() # Add annotation to the jams jam.annotations.append(annot)