def create_annotation(root, annotator_id, jam_file, context): """Creates an annotation from the given root of an XML svl file.""" # Load jam file jam = jams2.load(jam_file) # If annotation exists, replace it annot = None for section in jam.sections: if section.annotation_metadata.annotator.name == annotator_id: annot = section # If this context already exists, do nothing for data in annot.data: if data.label.context == context: return break # Create Annotation if needed if annot is None: annot = jam.sections.create_annotation() # Create Metadata annot.annotation_metadata.annotator = annotators[annotator_id] # TODO: More metadata # Get sampling rate from XML root sr = float(root.iter("model").next().attrib["sampleRate"]) # Create datapoints from the XML root points = root.iter("point") point = points.next() start = float(point.attrib["frame"]) / sr label = point.attrib["label"] for point in points: section = annot.create_datapoint() section.start.value = start section.end.value = float(point.attrib["frame"]) / sr # Make sure upper and lower case are consistent if context == "small_scale": section.label.value = label.lower() elif context == "large_scale": section.label.value = label.upper() section.label.context = context start = float(point.attrib["frame"]) / sr label = point.attrib["label"] # Save file with open(jam_file, "w") as f: json.dump(jam, f, indent=2)
def process_track(in_path, audio_file, jam_file, annot_beats, feature, ds_name, annot_bounds, rank, h, R): # Only analize files with annotated beats if annot_beats: jam = jams2.load(jam_file) if jam.beats == []: return if jam.beats[0].data == []: return logging.info("Segmenting %s" % audio_file) # C-NMF segmenter call if rank is None: est_times, est_labels = S.process(audio_file, feature=feature, annot_beats=annot_beats, annot_bounds=annot_bounds) else: est_times, est_labels = S.process(audio_file, feature=feature, annot_beats=annot_beats, annot_bounds=annot_bounds, rank=rank, h=h, R=R) # Save out_file = os.path.join(in_path, "estimations", os.path.basename(audio_file)[:-4] + ".json") if not annot_bounds: MSAF.save_estimations(out_file, est_times, annot_beats, "cnmf2", bounds=True, feature=feature) MSAF.save_estimations(out_file, est_labels, annot_beats, "cnmf2", bounds=False, annot_bounds=annot_bounds, feature=feature) return []
def compute_gt_results(est_file, trim, annot_beats, jam_files, alg_id, beatles=False, annotator=0, bins=10, **params): """Computes the results by using the ground truth dataset.""" # Get the ds_prefix ds_prefix = os.path.basename(est_file).split("_")[0] # Get corresponding annotation file jam_file = get_annotation(est_file, jam_files) if beatles: jam = jams2.load(jam_file) if jam.metadata.artist != "The Beatles": return [] try: if annotator == "GT" or annotator == 0: ann_inter, ann_labels = jams2.converters.load_jams_range( jam_file, "sections", annotator=0, context=MSAF.prefix_dict[ds_prefix]) else: ann_inter, ann_labels = jams2.converters.load_jams_range( jam_file, "sections", annotator_name=annotator, context="large_scale") except: logging.warning("No annotations for file: %s" % jam_file) return [] est_inter = MSAF.read_estimations(est_file, alg_id, annot_beats, **params) if est_inter == []: return [] # Compute the results and return return compute_results(ann_inter, est_inter, trim, bins, est_file)
def enrich_jam(audio_file, jam_file): """Enriches the JAMS file with the audio file.""" # Read JAMS and annotation jam = jams2.load(jam_file) # Read Echo Nest Info #while True: #try: #pytrack = track.track_from_filename(audio_file) #break #except: #logging.warning("Connection lost. Retrying in 5 seconds...") #time.sleep(5) ## Fill data #dur = pytrack.duration #jam.metadata.duration = float(pytrack.duration) #jam.metadata.md5 = pytrack.md5 #jam.metadata.echonest_id = pytrack.id #try: #jam.metadata.artist = pytrack.artist #except AttributeError: #pass # Use sox instead of Echo Nest parsed_audio_file = audio_file.replace(",", "\,") parsed_audio_file = parsed_audio_file.replace(" ", "\ ") parsed_audio_file = parsed_audio_file.replace("&", "\&") parsed_audio_file = parsed_audio_file.replace("'", "\\'") parsed_audio_file = parsed_audio_file.replace(")", "\)") parsed_audio_file = parsed_audio_file.replace("(", "\(") cmd = "soxi -D " + parsed_audio_file dur = float(subprocess.check_output(cmd, shell=True)) jam.metadata.duration = dur # Add the last segment if os.path.basename(audio_file)[:9] == "Cerulean_": add_last_segment(jam, jam_file, dur) # Save JAMS with open(jam_file, "w") as f: json.dump(jam, f, indent=2)