def main(): parser = argparse.ArgumentParser(description='Process some integers.') parser.add_argument('--input-file', dest='input_file', required=True) args = parser.parse_args() input_dir = os.path.split(args.input_file)[0] temp_file = input_dir + "/temp.wav" cmd = [ "/usr/bin/ffmpeg", "-i", args.input_file, "-ar", "22050", "-ac", "1", "-acodec", "pcm_s16le", temp_file, "-y" ] subprocess.check_call(cmd) cmd = ["yaafe", "-c", FEATURE_PLAN, "-r", "22050", temp_file] subprocess.check_output(cmd) features1 = ["zcr", "flux", "spectral_rollof", "energy_stats"] features2 = ["mfcc_stats"] features3 = ["spectral_flatness_per_band"] features4 = features1 + features2 + features3 FEATURE_GROUPS = [features1, features2, features3, features4] peaks, convolution_values, timestamps = feat.get_combined_peaks( temp_file, FEATURE_GROUPS, kernel_type="gaussian") detected_segments = kernel.calculate_segment_start_end_times_from_peak_positions( peaks, timestamps) timestamps, feature_vectors = feat.read_features(features4, temp_file, scale=True) with open("/opt/speech-music-discrimination/pickled/model.pickle", 'r') as f: trained_model = pickle.load(f) frame_level_predictions = trained_model.predict(feature_vectors) annotated_segments = Util.get_annotated_labels_from_predictions_and_sm_segments( frame_level_predictions, detected_segments, timestamps) annotated_segments = Util.combine_adjacent_labels_of_the_same_class( annotated_segments) annotated_segments = feat.filter_noisy_labels(annotated_segments) annotated_segments = Util.combine_adjacent_labels_of_the_same_class( annotated_segments) Util.write_audacity_labels(annotated_segments, input_dir + "/annotated-segments.txt") for f in glob.glob(input_dir + "/*.csv"): os.remove(f) os.remove(temp_file)
def main(): subprocess.check_output(["unzip", "muspeak-mirex2015-detection-examples.zip", "-d", "muspeak-mirex2015-detection-examples"]) mp3_to_wav() wav_files = glob.glob("./muspeak-mirex2015-detection-examples/*.wav") for wav_file in wav_files: print wav_file label_file = wav_file.replace(".mp3.wav", ".csv") if not os.path.isfile(label_file): label_file = label_file.replace(".csv", "_v2.csv") WavEditor.create_audio_segments(label_file, wav_file, "segments", True, ",", "f2", remove_overlapping=True) speech_wavs = glob.glob("./segments/*_s.wav") music_wavs = glob.glob("./segments/*_m.wav") all_files_dict = {} for f in speech_wavs: all_files_dict[f] = "s" for f in music_wavs: all_files_dict[f] = "m" random.seed(2222) all_files_random_keys = random.sample(all_files_dict.keys(), len(all_files_dict.keys())) last_seconds = 0 files_to_concatenate = [] labels = [] for v in all_files_random_keys: duration = float(subprocess.check_output(["soxi", "-D", v]).strip()) segment_start_time = last_seconds segment_end_time = last_seconds + duration last_seconds += duration labels.append(AudacityLabel(segment_start_time, segment_end_time, all_files_dict[v])) files_to_concatenate.append(v) audacity_labels = Util.combine_adjacent_labels_of_the_same_class(labels) Util.write_audacity_labels(audacity_labels, "mirex_combined.txt") command = [] command.append("sox") command.extend(files_to_concatenate) command.append("mirex_combined.wav") subprocess.check_output(command) shutil.rmtree("./segments") shutil.rmtree("./muspeak-mirex2015-detection-examples")
def test_combine_adjacent_labels_of_the_same_class(self): input_labels = [ AudacityLabel(0, 10, "m"), AudacityLabel(10, 20, "m"), AudacityLabel(20, 21, "s"), AudacityLabel(21, 22, "s"), AudacityLabel(22, 23, "s"), AudacityLabel(23, 30, "m") ] expected_labels = [ AudacityLabel(0, 20, "m"), AudacityLabel(20, 23, "s"), AudacityLabel(23, 30, "m"), ] actual_labels = Util.combine_adjacent_labels_of_the_same_class(input_labels) self.assertListEqual(expected_labels, actual_labels)
all_files_dict[f] = "m" random.seed(1111) all_files_random_keys = random.sample(all_files_dict.keys(), len(all_files_dict.keys())) last_seconds = 0 files_to_concatenate = [] labels = [] for v in all_files_random_keys: duration = float(subprocess.check_output(["soxi", "-D", v]).strip()) segment_start_time = last_seconds segment_end_time = last_seconds + duration last_seconds += duration labels.append( AudacityLabel(segment_start_time, segment_end_time, all_files_dict[v])) files_to_concatenate.append(v) audacity_labels = Util.combine_adjacent_labels_of_the_same_class(labels) Util.write_audacity_labels(audacity_labels, "gtzan_combined.txt") command = [] command.append("sox") command.extend(files_to_concatenate) command.append("gtzan_combined.wav") subprocess.check_output(command) subprocess.call(['chmod', '-R', '777', './music_speech']) shutil.rmtree("./music_speech")
for f in music_wavs: all_files_dict[f] = "m" random.seed(1111) all_files_random_keys = random.sample(all_files_dict.keys(), len(all_files_dict.keys())) last_seconds = 0 files_to_concatenate = [] labels = [] for v in all_files_random_keys: duration = float(subprocess.check_output(["soxi", "-D", v]).strip()) segment_start_time = last_seconds segment_end_time = last_seconds + duration last_seconds += duration labels.append(AudacityLabel(segment_start_time, segment_end_time, all_files_dict[v])) files_to_concatenate.append(v) audacity_labels = Util.combine_adjacent_labels_of_the_same_class(labels) Util.write_audacity_labels(audacity_labels, "gtzan_combined.txt") command = [] command.append("sox") command.extend(files_to_concatenate) command.append("gtzan_combined.wav") subprocess.check_output(command) subprocess.call(['chmod', '-R', '777', './music_speech']) shutil.rmtree("./music_speech")