def main():
    parser = argparse.ArgumentParser(description='Process some integers.')
    parser.add_argument('--input-file', dest='input_file', required=True)
    args = parser.parse_args()

    input_dir = os.path.split(args.input_file)[0]
    temp_file = input_dir + "/temp.wav"

    cmd = [
        "/usr/bin/ffmpeg", "-i", args.input_file, "-ar", "22050", "-ac", "1",
        "-acodec", "pcm_s16le", temp_file, "-y"
    ]
    subprocess.check_call(cmd)

    cmd = ["yaafe", "-c", FEATURE_PLAN, "-r", "22050", temp_file]

    subprocess.check_output(cmd)

    features1 = ["zcr", "flux", "spectral_rollof", "energy_stats"]
    features2 = ["mfcc_stats"]
    features3 = ["spectral_flatness_per_band"]
    features4 = features1 + features2 + features3

    FEATURE_GROUPS = [features1, features2, features3, features4]

    peaks, convolution_values, timestamps = feat.get_combined_peaks(
        temp_file, FEATURE_GROUPS, kernel_type="gaussian")
    detected_segments = kernel.calculate_segment_start_end_times_from_peak_positions(
        peaks, timestamps)

    timestamps, feature_vectors = feat.read_features(features4,
                                                     temp_file,
                                                     scale=True)

    with open("/opt/speech-music-discrimination/pickled/model.pickle",
              'r') as f:
        trained_model = pickle.load(f)

    frame_level_predictions = trained_model.predict(feature_vectors)

    annotated_segments = Util.get_annotated_labels_from_predictions_and_sm_segments(
        frame_level_predictions, detected_segments, timestamps)

    annotated_segments = Util.combine_adjacent_labels_of_the_same_class(
        annotated_segments)
    annotated_segments = feat.filter_noisy_labels(annotated_segments)
    annotated_segments = Util.combine_adjacent_labels_of_the_same_class(
        annotated_segments)

    Util.write_audacity_labels(annotated_segments,
                               input_dir + "/annotated-segments.txt")

    for f in glob.glob(input_dir + "/*.csv"):
        os.remove(f)

    os.remove(temp_file)
def main():

    subprocess.check_output(["unzip", "muspeak-mirex2015-detection-examples.zip", "-d",
                             "muspeak-mirex2015-detection-examples"])

    mp3_to_wav()

    wav_files = glob.glob("./muspeak-mirex2015-detection-examples/*.wav")

    for wav_file in wav_files:
        print wav_file
        label_file = wav_file.replace(".mp3.wav", ".csv")
        if not os.path.isfile(label_file):
            label_file = label_file.replace(".csv", "_v2.csv")
        WavEditor.create_audio_segments(label_file, wav_file, "segments", True, ",", "f2", remove_overlapping=True)

    speech_wavs = glob.glob("./segments/*_s.wav")
    music_wavs = glob.glob("./segments/*_m.wav")

    all_files_dict = {}

    for f in speech_wavs:
        all_files_dict[f] = "s"

    for f in music_wavs:
        all_files_dict[f] = "m"

    random.seed(2222)
    all_files_random_keys = random.sample(all_files_dict.keys(), len(all_files_dict.keys()))

    last_seconds = 0
    files_to_concatenate = []

    labels = []
    for v in all_files_random_keys:
        duration = float(subprocess.check_output(["soxi", "-D", v]).strip())
        segment_start_time = last_seconds
        segment_end_time = last_seconds + duration
        last_seconds += duration
        labels.append(AudacityLabel(segment_start_time, segment_end_time, all_files_dict[v]))
        files_to_concatenate.append(v)

    audacity_labels = Util.combine_adjacent_labels_of_the_same_class(labels)
    Util.write_audacity_labels(audacity_labels, "mirex_combined.txt")

    command = []
    command.append("sox")
    command.extend(files_to_concatenate)
    command.append("mirex_combined.wav")
    subprocess.check_output(command)

    shutil.rmtree("./segments")
    shutil.rmtree("./muspeak-mirex2015-detection-examples")
Esempio n. 3
0
    def test_combine_adjacent_labels_of_the_same_class(self):
        input_labels = [
            AudacityLabel(0, 10, "m"),
            AudacityLabel(10, 20, "m"),
            AudacityLabel(20, 21, "s"),
            AudacityLabel(21, 22, "s"),
            AudacityLabel(22, 23, "s"),
            AudacityLabel(23, 30, "m")
        ]
        expected_labels = [
            AudacityLabel(0, 20, "m"),
            AudacityLabel(20, 23, "s"),
            AudacityLabel(23, 30, "m"),
        ]

        actual_labels = Util.combine_adjacent_labels_of_the_same_class(input_labels)

        self.assertListEqual(expected_labels, actual_labels)
    all_files_dict[f] = "m"

random.seed(1111)
all_files_random_keys = random.sample(all_files_dict.keys(),
                                      len(all_files_dict.keys()))

last_seconds = 0
files_to_concatenate = []

labels = []
for v in all_files_random_keys:
    duration = float(subprocess.check_output(["soxi", "-D", v]).strip())
    segment_start_time = last_seconds
    segment_end_time = last_seconds + duration
    last_seconds += duration
    labels.append(
        AudacityLabel(segment_start_time, segment_end_time, all_files_dict[v]))
    files_to_concatenate.append(v)

audacity_labels = Util.combine_adjacent_labels_of_the_same_class(labels)
Util.write_audacity_labels(audacity_labels, "gtzan_combined.txt")

command = []
command.append("sox")
command.extend(files_to_concatenate)
command.append("gtzan_combined.wav")
subprocess.check_output(command)

subprocess.call(['chmod', '-R', '777', './music_speech'])
shutil.rmtree("./music_speech")
for f in music_wavs:
    all_files_dict[f] = "m"

random.seed(1111)
all_files_random_keys = random.sample(all_files_dict.keys(), len(all_files_dict.keys()))

last_seconds = 0
files_to_concatenate = []

labels = []
for v in all_files_random_keys:
    duration = float(subprocess.check_output(["soxi", "-D", v]).strip())
    segment_start_time = last_seconds
    segment_end_time = last_seconds + duration
    last_seconds += duration
    labels.append(AudacityLabel(segment_start_time, segment_end_time, all_files_dict[v]))
    files_to_concatenate.append(v)

audacity_labels = Util.combine_adjacent_labels_of_the_same_class(labels)
Util.write_audacity_labels(audacity_labels, "gtzan_combined.txt")

command = []
command.append("sox")
command.extend(files_to_concatenate)
command.append("gtzan_combined.wav")
subprocess.check_output(command)

subprocess.call(['chmod', '-R', '777', './music_speech'])
shutil.rmtree("./music_speech")