コード例 #1
0
 def recognise(self, mode='sphinx', marg=0.2):
     # use the audio file as the audio source
     r = srec.Recognizer()
     
     if mode=='sphinx':
         recogniser = r.recognize_sphinx
     if mode=='google':
         recogniser = r.recognise_google
     
     for ii, (ts,te,lab) in enumerate(zip(self.tst,self.tend,self.label)):
         tstart = st-marg
         tend = end+marg
         wo=w[int(tstart*fs):int(tend*fs)]
 
         wavwrite('speech_sample.wav',fs,wo)
 
         with srec.AudioFile('speech_sample.wav') as source:
             audio = r.record(source)  # read the entire audio file
 
         try:
             # for testing purposes, we're just using the default API key
             # to use another API key, use `r.recognize_google(audio, key="GOOGLE_SPEECH_RECOGNITION_API_KEY")`
             # instead of `r.recognize_google(audio)`
             utt = recogniser(audio)
             #utt = r.recognize_sphinx(audio)
             self.label[ii] = utt
         except srec.UnknownValueError:
             print("Speech Recognition could not understand audio")
         except srec.RequestError as e:
             print("Could not request results {}".format(e))
コード例 #2
0
 def recognise(self, mode='sphinx', marg=0.2):
     import speech_recognition as srec
     # use the audio file as the audio source
     r = srec.Recognizer()
     
     if mode=='sphinx':
         recogniser = r.recognize_sphinx
         sys.stderr.write('Doing speech recognition with sphinx\n')
     if mode=='google':
         sys.stderr.write('Doing speech recognition with google\n')
         recogniser = r.recognise_google
     
     for ii, (ts,te,lab) in enumerate(zip(self.tst,self.tend,self.label)):
         tstart = ts-marg
         tend = te+marg
         wo=self.x[int(tstart*self.sr):int(tend*self.sr)]
 
         wavwrite('speech_sample.wav',self.sr,wo.astype('int16'))
 
         with srec.AudioFile('speech_sample.wav') as source:
             audio = r.record(source)  # read the entire audio file
 
         try:
             # for testing purposes, we're just using the default API key
             # to use another API key, use `r.recognize_google(audio, key="GOOGLE_SPEECH_RECOGNITION_API_KEY")`
             # instead of `r.recognize_google(audio)`
             utt = recogniser(audio)
             #utt = r.recognize_sphinx(audio)
             self.label[ii] = utt
             sys.stderr.write('{}\n'.format(utt))
         except srec.UnknownValueError:
             sys.stderr.write("Speech Recognition could not understand audio\n")
         except srec.RequestError as e:
             sys.stderr.write("Could not request results {}\n".format(e))
コード例 #3
0
def main():
	try:
		out_dir, in_dir = sys.argv[1], sys.argv[2]
	except:
		in_dir = '../../data/sc09'
		out_dir = '../../data/sc09_wav'

	if not os.path.isdir(out_dir):
		os.makedirs(out_dir)
		
	tfrecord_fps = glob.glob(os.path.join(in_dir, '*.tfrecord'))

	dataset = tf.data.TFRecordDataset(tfrecord_fps)
	dataset = dataset.map(_mapper)
	dataset = dataset.apply(tf.contrib.data.batch_and_drop_remainder(1))
	x, y = dataset.make_one_shot_iterator().get_next()
	x, y = x[0], y[0]

	with tf.Session() as sess:
		i = 0
		while True:
			try:
				_x, _y = sess.run([x, y])
			except:
				break

			_x *= 32767.
			_x = np.clip(_x, -32767., 32767.)
			_x = _x.astype(np.int16)
			wavwrite(os.path.join(out_dir, '{}_{}.wav'.format(_y, str(i).zfill(5))), 16000, _x)
			i += 1
コード例 #4
0
def main(argv):
    if str(argv[0]) == "help":
        print("python genwave.py FREQ AMP DURATION FS NUMCH BITS f/i OUTPATH")
        return

    freq = float(argv[0]) if len(argv) > 0 else 440
    amp = float(argv[1]) if len(argv) > 1 else 0.7
    duration = float(argv[2]) if len(argv) > 2 else 30
    fs = int(argv[3]) if len(argv) > 3 else 44100
    nch = int(argv[4]) if len(argv) > 4 else 2
    bits = int(argv[5]) if len(argv) > 5 else 16
    f_or_i = argv[6] if len(argv) > 6 else "i"
    outpath = str(argv[7]) if len(argv) > 7 else "out.wav"

    if bits == 16:
        sig = np.zeros([int(fs * duration), nch], dtype=np.int16)
    elif bits == 32:
        if f_or_i == "i":
            sig = np.zeros([int(fs * duration), nch], dtype=np.int32)
        else:
            sig = np.zeros([int(fs * duration), nch], dtype=np.float32)
    else:
        print("invalid bit-width: 16/32 required")
        return

    sinewave = amp * np.sin(
        np.arange(sig.shape[0]) * freq * 1.0 / fs * 2 * np.pi)

    for ich in range(sig.shape[1]):
        if f_or_i == "i":
            sig[:, ich] = np.round(sinewave * (2**(bits - 1) - 1))
        else:
            sig[:, ich] = sinewave[:]

    wavwrite(outpath, fs, sig)
コード例 #5
0
def recover_samples_from_spectrum(logspectrum_stft, spectrum_phase, save_to):
    abs_spectrum = np.exp(logspectrum_stft)
    spectrum = abs_spectrum * (np.exp(1j * spectrum_phase))

    istft_graph = tf.Graph()
    with istft_graph.as_default():
        num_fea = int(FLAGS.Fs * 0.025) / 2 + 1
        frame_length = int(FLAGS.Fs * 0.025)
        frame_step = int(FLAGS.Fs * 0.010)
        stft_ph = tf.placeholder(tf.complex64, shape=(None, num_fea))
        samples = tf.signal.inverse_stft(
            stft_ph,
            frame_length,
            frame_step,
            frame_length,
            window_fn=tf.signal.inverse_stft_window_fn(
                frame_step,
                forward_window_fn=functools.partial(tf.signal.hann_window,
                                                    periodic=True)))
        istft_sess = tf.Session(config=tf.ConfigProto(
            allow_soft_placement=True))
        samples_ = istft_sess.run(samples, feed_dict={stft_ph: spectrum})
        wavwrite(save_to, FLAGS.Fs, samples_)

    return samples_
コード例 #6
0
ファイル: generateWavs.py プロジェクト: ViktorVaczi90/MIDIML
def getFilteredDataList(inputFileName, load = True, save = True):
    frequencyFilterWidthMap = getNs()
    result = []
    [originalSampleRate, original] = wavread(inputFileName)
    for cutOffFrequency in frequencyFilterWidthMap:
        outputFileName = inputFileName + '_noisedfiltered_' + str(cutOffFrequency) + '.wav'
        if os.path.isfile(outputFileName) and load:
            print("Loading file ", outputFileName, " from disk." )
            [sampleRate, x] = wavread(outputFileName)
            if sampleRate != originalSampleRate:
                raise ValueError("Sample rate of file ", outputFileName, " does not eaqual the sample rate of",
                                                                         " the original file " , inputFileName)
        else:
            windowSize =  frequencyFilterWidthMap[cutOffFrequency]
            print('Generating noisedfiltered ', cutOffFrequency, ' data, with windowSize ', windowSize)
            x = running_mean(original, windowSize).astype('int16')
            x = x + np.random.normal(0, 10, len(x)).astype('int16') # to add noise.
            if save:
                wavwrite(outputFileName, originalSampleRate, x)
                print("saved: ", outputFileName)
        if len(x) != len(original):
            raise ValueError("Filtering the wav file failed. Original input is ", len(original), " long",
                             "but the filtered data is " , len(x) , " long.")
        result.append(x)
    return (result ,originalSampleRate)
コード例 #7
0
def test_signal_normalize(tmp_path):
    sig = np.sin(2*np.pi*np.linspace(0,1,50))
    sig1 = sig * .8
    sig2 = sig * .4

    rms_out1 = 0.700 # Should be very close to full amplitude
    rms_out2 = 0.350 # Should be very close to half amplitude

    with tempfile.TemporaryDirectory() as tmpdirin:
        infile1 = os.path.join(tmpdirin, 'file1.wav')
        infile2 = os.path.join(tmpdirin, 'file2.wav')
        wavwrite(infile1, 50, sig1)
        wavwrite(infile2, 50, sig2)

        with tempfile.TemporaryDirectory() as tmpdirout:
            psylab.signal.normalize(tmpdirin, ext='wav', outdir=tmpdirout, relative=False)

            fs,out1 = wavread(os.path.join(tmpdirout, 'file1.wav'))
            fs,out2 = wavread(os.path.join(tmpdirout, 'file2.wav'))

            #print("sig: {:}".format(np.sqrt(np.mean(np.square(sig)))))
            #print("sig1: {:}".format(np.sqrt(np.mean(np.square(sig1)))))
            #print("sig2: {:}".format(np.sqrt(np.mean(np.square(sig2)))))
            #print("out1: {:}".format(np.sqrt(np.mean(np.square(out1)))))
            #print("out2: {:}".format(np.sqrt(np.mean(np.square(out2)))))

            np.testing.assert_allclose(rms_out1, np.sqrt(np.mean(np.square(out1))), rtol=1e-3)
            np.testing.assert_allclose(rms_out2, np.sqrt(np.mean(np.square(out2))), rtol=1e-3)
コード例 #8
0
def gen(path, endian="little"):
    endian_tag = "<" if endian == "little" else ">"
    print("parse the file \"{}\"".format(path))
    tags = path.split("_")
    tags = map(lambda s: s.replace(".pcm", ""), tags)
    fs_tag = filter(lambda s: s.startswith("sr"), tags)
    if len(fs_tag) > 0:
        fs = int(fs_tag[0][2:])
    else:
        print("filename \"{}\" does not match the format.".format(path))
        return

    ch_tag = filter(lambda s: s.startswith("ch"), tags)
    if len(ch_tag) > 0:
        num_ch = int(ch_tag[0][2:])
    else:
        print("filename \"{}\" does not match the format.".format(path))
        return

    format_tag = filter(lambda s: s.startswith("format"), tags)
    if len(format_tag) > 0:
        proc_format = int(format_tag[0][6:])
    else:
        print("filename \"{}\" does not match the format.".format(path))
        return

    if not proc_format in [1, 5, 6]:
        print("the format must be pcm16/pcm24/pcmfloat.")
        return

    with open(path, "rb") as f:
        raw_data = f.read()

    if proc_format == 1:
        signal = np.array(struct.unpack(
            "{}{}h".format(endian_tag,
                           len(raw_data) / 2), raw_data),
                          dtype=np.int16)
    elif proc_format == 5:
        signal = np.array(struct.unpack(
            "{}{}f".format(endian_tag,
                           len(raw_data) / 4), raw_data),
                          dtype=np.float32)
    elif proc_format == 6:
        data_len = len(raw_data) / 3
        data = ""
        for x in range(data_len):
            data += raw_data[x * 3 + 1:x * 3 + 3]
        signal = np.array(struct.unpack(
            "{}{}h".format(endian_tag,
                           len(data) / 2), data),
                          dtype=np.int16)

    if num_ch > 1:
        signal = np.reshape(signal, (len(signal) / num_ch, num_ch))

    output_path = "{}.wav".format(path)
    wavwrite(filename=output_path, rate=fs, data=signal)
    print("generate the file \"{}\"".format(output_path))
コード例 #9
0
ファイル: beepgenerator.py プロジェクト: aviralg/Moogli-Old
def main():
    sound_filename  = sys.argv[1]
    frequency       = float(sys.argv[2])
    duration        = float(sys.argv[3])
    amplitude       = float(sys.argv[4])
    sampling_rate   = int(sys.argv[5])
    data            = sine(frequency, duration, amplitude, sampling_rate)
    wavwrite(sound_filename, sampling_rate, data)
コード例 #10
0
def _wav_write(wav_fp, fs, wav_f, normalize=False):
    if normalize:
        wav_f_max = wav_f.max()
        if wav_f_max != 0.0:
            wav_f /= wav_f.max()
    wav_f = np.clip(wav_f, -1.0, 1.0)
    wav = (wav_f * 32767.0).astype(np.int16)
    wavwrite(wav_fp, fs, wav)
コード例 #11
0
ファイル: app.py プロジェクト: ykhorzon/omnizart
    def transcribe(self, input_audio, model_path=None, output="./"):
        """Transcribe frame-level fundamental frequency of vocal from the given audio.

        Parameters
        ----------
        input_audio: Path
            Path to the wav audio file.
        model_path: Path
            Path to the trained model or the transcription mode. If given a path, should be
            the folder that contains `arch.yaml`, `weights.h5`, and `configuration.yaml`.
        output: Path (optional)
            Path for writing out the extracted vocal f0. Default to current path.

        Returns
        -------
        f0: txt
            The transcribed f0 of the vocal contour in Hz.

        See Also
        --------
        omnizart.cli.vocal_contour.transcribe: The coressponding command line entry.
        """
        if not os.path.isfile(input_audio):
            raise FileNotFoundError(
                f"The given audio path does not exist. Path: {input_audio}")

        logger.info("Loading model...")
        model, model_settings = self._load_model(model_path)

        logger.info("Extracting feature...")
        feature = extract_cfp_feature(
            input_audio,
            hop=model_settings.feature.hop_size,
            win_size=model_settings.feature.window_size,
            down_fs=model_settings.feature.sampling_rate)

        logger.info("Predicting...")
        f0 = inference(feature[:, :, 0],
                       model,
                       timestep=model_settings.training.timesteps)
        agg_f0 = aggregate_f0_info(f0, t_unit=model_settings.feature.hop_size)

        timestamp = np.arange(len(f0)) * model_settings.feature.hop_size
        wav = sonify.pitch_contour(timestamp,
                                   f0,
                                   model_settings.feature.sampling_rate,
                                   amplitudes=0.5 * np.ones(len(f0)))

        output = self._output_midi(output, input_audio, verbose=False)
        if output is not None:
            write_agg_f0_results(agg_f0, f"{output}_f0.csv")
            wavwrite(f"{output}_trans.wav",
                     model_settings.feature.sampling_rate, wav)
            logger.info("Text and Wav files have been written to %s",
                        os.path.abspath(os.path.dirname(output)))

        logger.info("Transcription finished")
        return agg_f0
コード例 #12
0
ファイル: filterwav.py プロジェクト: alexzywiak/vocalize
def main(**kwargs):
  outfile = kwargs['outfile'][0]
  infile = kwargs['infile']
  # print "Filtering %s to %s" % (infile, outfile)

  data, rate = ffmpeg_load_audio(infile, 44100, True, dtype=np.float32)
  wavwrite('test.wav', 44100, data)
  filtered_data = butter_bandpass_filter(data, 100.0, 3000.0, 44100)
  wavwrite(outfile, 44100, filtered_data)
コード例 #13
0
 def process_filtering(self,
                       sig_float_filtered,
                       write=False,
                       output_file_name=None):
     self.filtered = True
     self.sig_int = float2pcm(sig_float_filtered)
     self.sig_float = sig_float_filtered
     if write:
         wavwrite(output_file_name, file.sr, file.sig_int)
コード例 #14
0
 def generate_audio(self, fake_r, fake_i, order):
     audio_dir = './audio/' + str(order) + '/'
     if not os.path.isdir(audio_dir):
         os.makedirs(audio_dir)
     for i in range(self.batch_size):
         audio_fp = os.path.join(audio_dir, '{}.wav'.format(str(i)))
         fake = fake_r[i, 0] + 1j * fake_i[i, 0]
         audio = spec_to_wav(fake)
         wavwrite(audio_fp, self.fs, audio)
     print("Done generating audio :)")
コード例 #15
0
def resample(path_in, path_out=None, sampling_rate=16000):
    # load and resampling
    y, sr = librosa.load(path_in, sr=sampling_rate)
    # save  add '-16k' as suffix if path_out is None
    if path_out is None:
        root, ext = os.path.splitext(path_in)
        path_out= root + '-16k' + ext
    
    wavwrite( path_out, sr , ( y * 2 ** 15).astype(np.int16))
    print ('save wav file', path_out)
コード例 #16
0
ファイル: sound.py プロジェクト: idiosyncraticee/ACOio
    def get_wav(self, *, resample=True):
        '''Retrieves io.BytesIO() packed with `.wav` contents'''
        result = self.resample_fs(self.BULLSHITWAVNUMBER) if resample \
            else self.copy()
        data = result.normdata(dtype=np.int16)

        bytes_io = io.BytesIO()
        wavwrite(bytes_io, result._fs, data)

        return bytes_io
コード例 #17
0
def tx1_to_wav(tx1_fp, out_fp, midi_downsample_rate=None):
  if midi_downsample_rate == 0:
    midi_downsample_rate = None

  print('(Rate {}) {}->{}'.format(midi_downsample_rate, tx1_fp, out_fp))
  with open(tx1_fp, 'r') as f:
    tx1 = f.read()
  midi = tx1_to_midi(tx1)
  wav = nesmdb.convert.midi_to_wav(midi, midi_downsample_rate)
  wavwrite(out_fp, 44100, wav)
  print('Done: {}'.format(wav.shape))
  return True
コード例 #18
0
def main(argv):
    del argv
    tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
        FLAGS.tpu, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)

    config = tf.contrib.tpu.RunConfig(
        cluster=tpu_cluster_resolver,
        model_dir=FLAGS.model_dir,
        tpu_config=tf.contrib.tpu.TPUConfig(
            num_shards=FLAGS.num_shards,
            iterations_per_loop=FLAGS.iterations_per_loop))

    # Set module-level global variable so that model_fn and input_fn can be
    # identical for each different kind of dataset and model
    global dataset, model
    dataset = bias_input
    model = bias_model

    # TPU-based estimator used for TRAIN and EVAL
    est = tf.contrib.tpu.TPUEstimator(model_fn=model_fn,
                                      use_tpu=FLAGS.use_tpu,
                                      config=config,
                                      train_batch_size=FLAGS.batch_size,
                                      eval_batch_size=FLAGS.batch_size)

    # CPU-based estimator used for PREDICT (generating images)
    cpu_est = tf.contrib.tpu.TPUEstimator(model_fn=model_fn,
                                          use_tpu=False,
                                          config=config,
                                          predict_batch_size=_NUM_VIZ_AUDIO)

    current_step = estimator._load_global_step_from_checkpoint_dir(
        FLAGS.model_dir)  # pylint: disable=protected-access,line-too-long
    tf.logging.info('Starting training for %d steps, current step: %d' %
                    (FLAGS.train_steps, current_step))

    # Render some generated images
    G_z = cpu_est.predict(input_fn=noise_input_fn)
    G_z = [p['generated_audio'][:, :] for p in G_z]
    G_z = np.array(G_z)
    preview_dir = './preview'
    if not os.path.isdir(preview_dir):
        os.makedirs(preview_dir)

    for i in range(len(G_z)):
        audio = np.int16(G_z[i] / np.max(np.abs(G_z[i])) * 32767)
        preview_fp = os.path.join(
            preview_dir, '{}_{}_{}.wav'.format(str(i % 10), str(current_step),
                                               str(i)))
        wavwrite(preview_fp, _FS, audio)

    tf.logging.info('Finished generating images')
コード例 #19
0
def recover_samples_from_spectrum(logspectrum_stft, spectrum_phase, save_to=None):
    abs_spectrum = np.exp(logspectrum_stft)
    spectrum = abs_spectrum * (np.exp(1j * spectrum_phase))

    istft_graph = tf.Graph()
    with istft_graph.as_default():
        stft_ph = tf.placeholder(tf.complex64, shape=(None, 201))
        samples = tf.contrib.signal.inverse_stft(stft_ph, 400, 160, 400)
        istft_sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
        samples_ = istft_sess.run(samples, feed_dict={stft_ph: spectrum})
        if save_to:
            wavwrite(save_to, 16000, samples_)

    return samples_
コード例 #20
0
def save_wav(path0, data, sr=44100):
    #
    print('file ', path0)

    amplitude = np.iinfo(np.int16).max
    max_data = np.amax(np.abs(data))  # normalize, max level is 16bit full bit
    if max_data < (1.0 / amplitude):
        max_data = 1.0

    try:
        wavwrite(path0, sr,
                 np.array((amplitude / max_data) * data, dtype=np.int16))
    except:
        print('error: wavwrite ', path0)
        sys.exit()
コード例 #21
0
    def record_audio(self, out_path, time_allowance):
        self.ids.record_button.disabled = True
        self.ids.reading_speed_slider.disabled = True
        self.ids.goto_text_input.disabled = True

        recording = self.mic.record(samplerate=self.fs,
                                    numframes=int(time_allowance * self.fs),
                                    channels=1)
        recording = recording / numpy.max(numpy.abs(recording))

        wavwrite(out_path, self.fs, recording)  # Save as WAV file
        self.recording_indicator = ""
        self.ids.record_button.disabled = False
        self.ids.reading_speed_slider.disabled = False
        self.ids.goto_text_input.disabled = False
        self.load_next_sentence()
コード例 #22
0
def test_signal_equate(tmp_path):
    sig1 = np.sin(2*np.pi*np.linspace(0,1,50))
    sig2 = sig1 * .8
    with tempfile.TemporaryDirectory() as tmpdirin:
        infile1 = os.path.join(tmpdirin, 'file1.wav')
        infile2 = os.path.join(tmpdirin, 'file2.wav')
        wavwrite(infile1, 50, sig1)
        wavwrite(infile2, 50, sig2)

        with tempfile.TemporaryDirectory() as tmpdirout:
            psylab.signal.equate(tmpdirin, ext='wav', outdir=tmpdirout, relative=False)

            fs,out1 = wavread(os.path.join(tmpdirout, 'file1.wav'))
            fs,out2 = wavread(os.path.join(tmpdirout, 'file2.wav'))

            np.testing.assert_allclose(sig2, out1)
            np.testing.assert_allclose(sig2, out2)
            np.testing.assert_allclose(np.sqrt(np.mean(np.square(sig1)))*.8, np.sqrt(np.mean(np.square(out2))))
コード例 #23
0
def filter(input_path, output_path, lowcut=100.0, highcut=3000.0, rate=44100):
    """
    Filters a wav file to get rid of frequencies not present in human speech. Band pass filter.

    :param input_path: the input wav path
    :param output_path: the output wav path
    :param lowcut: the lowest frequency to accept
    :param highcut: the highest frequency to accept
    :param rate: the sampling frequency
    :type rate: Number
    :param force: call recursively if the input_path and match_path lengths vary greatly (not in betwee 0.5 and 2.0)
    :returns: -1 if a file does not exist
    """
    if check_file_paths([input_path]) == -1:
        return -1
    data, rate = ffmpeg_load_audio(input_path, 44100, True, dtype=np.float32)
    filtered_data = butter_bandpass_filter(data, lowcut, highcut, rate)
    wavwrite(output_path, 44100, filtered_data)
コード例 #24
0
ファイル: image_loader.py プロジェクト: sircelj/dcgan-imgldr
    def epoch_save(self, samples, dir_name, epoch):
        """
        Makes a mag/phase figure using _get_image and plotimage from
        samples and saves it as a .png file. Samples are also transformed
        to audio signals and saved as a .wav file. All files are saved
        into a subdirectory with the epoch number as its name.

        Parameters
        ----------
        samples : ndarray
            Array of mag/phase tensors you want to save. Needs to hold at
            least 64 tensors.
        dir_name : string
            Name of the target directory.
        epoch : int
            Specifies the name of directory containing the saved figure
            and .wav ffiles . It is advisable that it is max 3 digits
            long, since the name format is "%00%d".

        """

        # Create new epoch directory
        ep_dir_name = dir_name + '/%s' % str(epoch).zfill(3) + '/'
        if not os.path.exists(ep_dir_name):
            os.makedirs(ep_dir_name)

        # Create a 8x8 grid of mag/phase images
        batch_size, _, width, _ = samples.shape
        samples_magphase = np.zeros((batch_size, width, width))
        # samples_magphase[:, :width // 2, :] = np.squeeze(samples[:, :, :, 0])
        samples_magphase[:, :width // 2, :] = samples[:, :, :,
                                                      0]  # Spectrogram
        samples_magphase[:, width // 2:, :] = samples[:, :, :, 1]  # Phase

        # Save the image
        fig = self.plotimage(samples_magphase)
        plt.savefig(ep_dir_name + 'mag_phase.png', bbox_inches='tight')
        plt.close(fig)

        # Save sound samples
        for i, magphase in enumerate(samples):
            audio = self._image_to_audio(magphase)
            wavwrite(ep_dir_name + "%s.wav" % str(i).zfill(3), int(self.rate),
                     audio / self.rate)
コード例 #25
0
def handle_signals(mixedpath, noisepospath, noisenegpath):
  try:
    # Read Wavs
    mixedsamples = read_wav(mixedpath)
    noisepossamples = read_wav(noisepospath)
    noisenegsamples = read_wav(noisenegpath)

    # Normalize
    max_scale = max(abs(mixedsamples)+0.000001)
    mixedsamples = mixedsamples / max_scale
    wavwrite('/home/user/Desktop/N_HANS_Github/N_HANS___Selective_Noise/audio_examples/mixed_normalised', 16000, mixedsamples)
    noisepossamples = noisepossamples / max_scale
    noisenegsamples = noisenegsamples / max_scale
    mixedsamples = mixedsamples.astype(np.float32)
    noisepossamples = noisepossamples.astype(np.float32)
    noisenegsamples = noisenegsamples.astype(np.float32)

    # Cut the end to have an exact number of frames
    if (len(mixedsamples) - 400) % 160 != 0:
        mixedsamples = mixedsamples[:-((len(mixedsamples) - 400) % 160)]

    nsepos = noisepossamples
    nseneg = noisenegsamples
    while len(mixedsamples) - len(nsepos) > 0:  # Make noise longer
        diff = len(mixedsamples) - len(nsepos)
        nsepos = np.concatenate([nsepos, noisepossamples[:diff]], axis=0)

    while len(mixedsamples) - len(nseneg) > 0:  # Make noise longer
        diff = len(mixedsamples) - len(nseneg)
        nseneg = np.concatenate([nseneg, noisenegsamples[:diff]], axis=0)

    if len(mixedsamples) - len(noisepossamples) < 0:  # Make noise shorter
        nsepos = noisepossamples[:len(mixedsamples)]
    if len(mixedsamples) - len(noisenegsamples) < 0:  # Make noise shorter
        nseneg = noisenegsamples[:len(mixedsamples)]

    noisepossamples = nsepos
    noisenegsamples = nseneg

    return noisepossamples, noisenegsamples, mixedsamples

  except:
    print('error in threads')
    print(mixedpath, noisepospath, noisenegpath)
コード例 #26
0
def filter(input_path, output_path, lowcut=100.0, highcut=3000.0, rate=44100):
    """
    Filters a wav file to get rid of frequencies not present in human speech. Band pass filter.

    :param input_path: the input wav path
    :param output_path: the output wav path
    :param lowcut: the lowest frequency to accept
    :param highcut: the highest frequency to accept
    :param rate: the sampling frequency
    :type rate: Number
    :param force: call recursively if the input_path and match_path lengths vary greatly (not in betwee 0.5 and 2.0)
    :returns: -1 if a file does not exist
    """
    if check_file_paths([input_path]) == -1:
        return -1
    data, rate = ffmpeg_load_audio(input_path, 44100, True, dtype=np.float32)
    filtered_data = butter_bandpass_filter(data, lowcut, highcut, rate)
    # return filtered_data
    wavwrite(output_path, 44100, filtered_data)
コード例 #27
0
ファイル: main.py プロジェクト: Tama-github/PrjAudVid
def runTest():

    dir = 'ressources/'
    files = [
        dir + 'test1.mpg', dir + 'test2.mpg', dir + 'test3.mpg',
        dir + 'test4.mpg', dir + 'test5.mpg'
    ]

    sg = SoundGenerator(sound)
    j = 0
    fileList = []
    for i in files:
        objs = k.kanadeHarris(i, sound)
        data, fs = sg.soundGenerationForVideoPurpose(objs)
        wavwrite("soundTest" + str(j) + ".wav", fs, data)
        genNewVideo('ressources/test' + str(j + 1) + '.mpg_out.avi',
                    "soundTest" + str(j) + ".wav", j)
        fileList.append('res/output' + str(j) + '.avi')
        j += 1
    concatRes(fileList)
コード例 #28
0
ファイル: reconstruct.py プロジェクト: lngka/mllangid-dec
def reconstructFromSTFT(spectrum_phase, logspectrum_stft, save_to):
    abs_spectrum = np.exp(logspectrum_stft)
    spectrum_phase = np.array(spectrum_phase)
    spectrum = abs_spectrum * (np.exp(1j * spectrum_phase))

    istft_graph = tf.Graph()
    with istft_graph.as_default():
        num_fea = FFT_LENGTH // 2 + 1
        frame_length = WIN_SAMPLES
        frame_step = HOP_SAMPLES

        stft_ph = tf.placeholder(tf.complex64, shape=(None, num_fea))
        samples = tf.signal.inverse_stft(stft_ph, frame_length, frame_step,
                                         FFT_LENGTH)
        istft_sess = tf.Session(config=tf.ConfigProto(
            allow_soft_placement=True))
        samples_ = istft_sess.run(samples, feed_dict={stft_ph: spectrum})
        wavwrite(save_to, SAMPLING_RATE, samples_)

    return samples_
コード例 #29
0
def average(*args):
    """
    Averages multiple wav files together. Accomplishes this by performing fast fourier transforms on the data, averaging those arrays, and then performing an inverse fast fourier transform.

    :param args: array of wav paths with the output being the first path and the rest being inputs
    :returns: -1 if it fails or if it cannot find the paths

    :Example:

    >>> import speechprocessing
    >>> speechprocessing.average('output.wav', 'input_one.wav', 'input_two.wav')
    """
    if len(args) < 2:
        print 'Invalid number of arguments'
        return -1

    output_path = args[0]
    input_paths = args[1:]
    processed_wav_data = []
    if check_file_paths(input_paths) == -1:
        return -1
    for path in input_paths:
        data, rate = ffmpeg_load_audio(path, 44100, True, dtype=np.float32)
        filtered_data = butter_bandpass_filter(data, 100.0, 3000.0, 44100)
        processed_wav_data.append(filtered_data)

    fft_data = []

    for data in processed_wav_data:
        fft_data.append(np.fft.rfft(data))

    # Adding a * before an array of arrays makes it zip array-wise
    # .. or something. Nobody really knows how or why this works
    zipped_data = zip(*fft_data)

    mean_data = map(np.mean, zipped_data)

    # Reverse real fft
    averaged = np.fft.irfft(mean_data)
    wavwrite(output_path, 44100.0, averaged)
コード例 #30
0
def average(*args):
    """
    Averages multiple wav files together. Accomplishes this by performing fast fourier transforms on the data, averaging those arrays, and then performing an inverse fast fourier transform.

    :param args: array of wav paths with the output being the first path and the rest being inputs
    :returns: -1 if it fails or if it cannot find the paths

    :Example:

    >>> import speechprocessing
    >>> speechprocessing.average('output.wav', 'input_one.wav', 'input_two.wav')
    """
    if len(args) < 2:
        print 'Invalid number of arguments'
        return -1

    output_path = args[0]
    input_paths = args[1:]
    processed_wav_data = []
    if check_file_paths(input_paths) == -1:
        return -1
    for path in input_paths:
        data, rate = ffmpeg_load_audio(path, 44100, True, dtype=np.float32)
        filtered_data = butter_bandpass_filter(data, 100.0, 3000.0, 44100)
        processed_wav_data.append(filtered_data)

    fft_data = []

    for data in processed_wav_data:
        fft_data.append(np.fft.rfft(data))

    # Adding a * before an array of arrays makes it zip array-wise
    # .. or something. Nobody really knows how or why this works
    zipped_data = zip(*fft_data)

    mean_data = map(np.mean, zipped_data)

    # Reverse real fft
    averaged = np.fft.irfft(mean_data)
    wavwrite(output_path, 44100.0, averaged)
コード例 #31
0
ファイル: log_audio.py プロジェクト: etzinis/fedenhance
    def log_batch(self,
                  pr_batch,
                  t_batch,
                  mix_batch,
                  mixture_rec=None):
        """!
        :param pr_batch: Reconstructed wavs: Torch Tensor of size:
                         batch_size x num_sources x length_of_wavs
        :param t_batch: Target wavs: Torch Tensor of size:
                        batch_size x num_sources x length_of_wavs
        :param mix_batch: Batch of the mixtures: Torch Tensor of size:
                          batch_size x 1 x length_of_wavs
        :param mixture_rec: Batch of the reconstructed mixtures: Torch Tensor of
                            size: batch_size x 1 x length_of_wavs
        """


        mixture = mix_batch.detach().cpu().numpy()
        true_sources = t_batch.detach().cpu().numpy()
        pred_sources = pr_batch.detach().cpu().numpy()

        for b_ind in range(self.bs):
            self.log_example(pred_sources[b_ind],
                             true_sources[b_ind],
                             mixture[b_ind],
                             title='bind_{}'.format(b_ind))

        if mixture_rec is not None:
            mixture_rec_np = mixture_rec.detach().cpu().numpy()
            for b_ind in range(self.bs):
                rec_mix_name = "bind_{}_rec_mix.wav".format(b_ind)
                rec_mix_wav = (mixture_rec_np[b_ind][0] /
                               (np.max(np.abs(mixture_rec_np[b_ind][0])) +
                                10e-8))
                wavwrite(os.path.join(self.dirpath, rec_mix_name),
                         self.fs,
                         rec_mix_wav)
コード例 #32
0
ファイル: processwav.py プロジェクト: alexzywiak/vocalize
def main(**kwargs):
  outfile = kwargs['outfile'][0]
  infile = kwargs['infile']
  print "Filtering %s to %s" % (infile, outfile)
  rate, sound_samples = wavread(infile)
  mono = True
  if 'ndarray' in str(type(sound_samples[0])):
    mono = False
  # data,r = ffmpeg_load_audio('32but.wav', 44100, True, dtype=np.float32)
  rate, sound_samples = ffmpeg_load_audio(infile, rate, mono, dtype=np.float32)

  fs = 44100.0
  lowcut = 100.0
  highcut = 3000.0

  # b,a = butter_bandpass(lowcut, highcut, fs, 5)

  # filtered = lfilter(b, a, sound_samples)

  # filtered = butter_bandpass_filter(sound_samples, lowcut, highcut, fs, 5)

  # filtered = butter_bandpass_filter_two(sound_samples, lowcut, highcut, fs, 5)

  wavwrite(outfile, rate, sound_samples)
コード例 #33
0
def main():
    sig = 3
    offset = 0

    rate, data, nrate, ndata = get_all_data()

    timelength = data.shape[0] / rate
    times = np.linspace(0., timelength, data.shape[0])

    kernel = signal.gaussian(data.size, sig)
    f, pxx, nf, npxx, convolved, nconvolved = get_power_spectra(
        kernel, sig, PLOT_FLAG)

    # Test smoothing
    gaussian_smoothing_test(PLOT_FLAG)

    # Apply custom low-pass filter
    filtered = low_pass(convolved, nconvolved, f, times, PLOT_FLAG)

    # Recover the final signal
    recovered_signal = recover(filtered, data, times, offset, PLOT_FLAG)
    # Save this signal into a wav file
    recovered_signal.real.astype(np.int16)
    wavwrite("reconstructed.wav", rate, recovered_signal)
コード例 #34
0
ファイル: log_audio.py プロジェクト: etzinis/fedenhance
 def log_example(self,
                 pred_sources,
                 true_sources,
                 mixture,
                 title=''):
     mix_name = "{}_mix.wav".format(title)
     mix_wav = (mixture[0] /
                (np.max(np.abs(mixture[0])) + 10e-8))
     wavwrite(os.path.join(self.dirpath, mix_name),
              self.fs,
              mix_wav)
     for s_ind in range(self.n_sources):
         true_s_name = "{}_true_s{}.wav".format(title, s_ind)
         rec_s_name = "{}_rec_s{}.wav".format(title, s_ind)
         rec_wav = (pred_sources[s_ind] /
                    (np.max(np.abs(pred_sources[s_ind])) + 10e-8))
         true_wav = (true_sources[s_ind] /
                     (np.max(np.abs(true_sources[s_ind])) + 10e-8))
         wavwrite(os.path.join(self.dirpath, true_s_name),
                  self.fs,
                  true_wav)
         wavwrite(os.path.join(self.dirpath, rec_s_name),
                  self.fs,
                  rec_wav)
コード例 #35
0
    recons_vt_psd = lpc.lpc2psd(lpc_coef, g, fft_size)
    recons_psd *= recons_vt_psd
    recons_psds.append(recons_psd)
    recons_vt_psds.append(recons_vt_psd)
recons_psds = np.array(recons_psds)
recons_vt_psds = np.array(recons_vt_psds)

df = pysptk.synthesis.AllZeroDF(ORDER)
synthesizer = pysptk.synthesis.Synthesizer(df, int(fs * 0.005))
x_glottal_res_zerodf = synthesizer.synthesis(x, lpcs / gains)
df = pysptk.synthesis.AllZeroDF(3)
synthesizer = pysptk.synthesis.Synthesizer(df, int(fs * 0.005))
x_res_zerodf = synthesizer.synthesis(x_glottal_res_zerodf,
                                     glottal_lpcs / glottal_gains)

wavwrite('x_glottal_res_zerodf.wav', fs,
         (x_glottal_res_zerodf * 2**15).astype(np.int16))
wavwrite('x_res_zerodf.wav', fs, (x_res_zerodf * 2**15).astype(np.int16))

y = synthesisRequiem.get_waveform(x_res_zerodf,
                                  np.transpose(recons_psds, [1, 0]),
                                  dat['temporal_positions'], dat['f0'],
                                  dat['fs'])

y_from_glottal = synthesisRequiem.get_waveform(
    x_glottal_res_zerodf, np.transpose(recons_vt_psds, [1, 0]),
    dat['temporal_positions'], dat['f0'], dat['fs'])

wavwrite('x_recons_zerodf.wav', fs, (y * 2**15).astype(np.int16))
wavwrite('x_recons_glottal_zerodf.wav', fs,
         (y_from_glottal * 2**15).astype(np.int16))
コード例 #36
0
ファイル: average.py プロジェクト: alexzywiak/vocalize
    output_filepath = sys.argv[1]
    input_filepaths = sys.argv[2:]

    fs = 44100.0

    lowcut = 100.0 # Low pass cutoff
    highcut = 3000.0 # High pass cutoff

    processed_wav_data = []

    for path in input_filepaths:
        data, rate = ffmpeg_load_audio(path, 44100, True, dtype=np.float32)
        filtered_data = butter_bandpass_filter(data, 100.0, 3000.0, 44100)
        processed_wav_data.append(filtered_data)

    fft_data = []

    for data in processed_wav_data:
        fft_data.append(np.fft.rfft(data))

    # Adding a * before an array of arrays makes it zip array-wise
    # .. or something. Nobody really knows how or why this works
    zipped_data = zip(*fft_data)

    mean_data = map(np.mean, zipped_data)

    # Reverse real fft
    f = np.fft.irfft(mean_data)

    wavwrite(output_filepath, fs, f)
コード例 #37
0
ファイル: vgm_to_wav.py プロジェクト: Jhird/nesmdb
def save_vgmwav(wav_fp, wav):
  wav *= 32767.
  wav = np.clip(wav, -32768., 32767.)
  wav = wav.astype(np.int16)
  wavwrite(wav_fp, 44100, wav)
コード例 #38
0
    isIce=allsicRS>thresh
    isIce[nanMask]=np.nan
    probIce=np.nanmean(isIce, axis=-1)
    
    p25=[]
    p75=[]
    for yit in range(len(boxPos)):
        p25.append(np.percentile(nSIF[yit,:], 25))
        p75.append(np.percentile(nSIF[yit,:], 75))
    
    p25_1850= np.percentile(nSIF1850, 25)
    p75_1850= np.percentile(nSIF1850, 75)     
         
    data1=(sic[:,0]>15)*1.
    data2=(sic1850>15)*1.
    wavwrite('/Volumes/Pitcairn/seaicePPF/northernHemisphere/figures/OWnoise1850-2100.'+key+'.wav', 365*8, data1)
    wavwrite('/Volumes/Pitcairn/seaicePPF/northernHemisphere/figures/OWnoise1850BG'+key+'.wav', 365*8, data2)

    fig8, ax8 = plt.subplots(1, 2, sharey=True, sharex=False, num=None, figsize=(7,3.5), dpi=300, facecolor='w', edgecolor='w')
    fig8.sca(ax8[0])
    plt.tick_params(axis='both', which='major', labelsize=6)
    plt.tick_params(axis='both', which='minor', labelsize=6)
    fig8.sca(ax8[1])
    plt.tick_params(axis='both', which='major', labelsize=6)
    plt.tick_params(axis='both', which='minor', labelsize=6)
    fig8.patch.set_alpha(0.0)
    
    #ax5.set_aspect('equal', 'datalim')
    fig8.sca(ax8[1])
    plt.imshow(meanAllsic.T, vmin=0, vmax=100, cmap=cmap)
    #plt.colorbar()
コード例 #39
0
ファイル: genfft.py プロジェクト: alexzywiak/vocalize
    # sound_time = f.nframes*1.0/f.samplerate
    # sound_data = f.read_frames(f.nframes)
    # samples_to_take = int(math.floor(sound_time * display_sample_rate))
    # time_step_for_samples = f.samplerate*1.0/display_sample_rate

    # wave = []

    # for i in xrange(samples_to_take):
    #     frame_offset = i * time_step_for_samples
    #     if num_channels == 1:
    #         wave.append(sound_data[frame_offset])
    #     else:
    #         wave.append(sound_data[frame_offset][0])
    rate, wave = wavread(infile)

    wavwrite('test.wav', rate, wave)
    (freq, amp) = get_component_frequencies(wave)

            # print type(s)
    # with open('data.txt', 'a') as textOutputFile:
    #     for line in amp:
    #         textOutputFile.write(str(line))
    #         textOutputFile.write(',')

    # Only plot first 4000 Hz

    hz=4000
    freq = freq[0:hz]
    amp = amp[0:hz]

    fig = pylab.figure()
コード例 #40
0
def sound(var):
    from scipy.io.wavfile import write as wavwrite
    scaled = np.int16(var/np.max(np.abs(var)) * 32767)
    stmp=asarray(scaled,dtype=np.int16)
    wavwrite('stmp.wav',8820,stmp)
    sysfileopen("stmp.wav")
コード例 #41
0
ファイル: downsample_wav.py プロジェクト: kraftp/CS-281-final
import numpy as np
import wave
from scipy.io.wavfile import write as wavwrite
import struct

LENWAV = 20000  # Must be <= 40000 / SAMPLEFACTOR for one-second wav files
SAMPLEFACTOR = 4
img = np.zeros((1, 1, LENWAV, 1), dtype=int)

waveFile = wave.open("../data/pitbull.wav", "rb")
for i in range(0, LENWAV * SAMPLEFACTOR):
    waveData = waveFile.readframes(1)
    if i % SAMPLEFACTOR == 0:
        sound = struct.unpack("<h", waveData)
        img[0, 0, i / SAMPLEFACTOR, 0] = sound[0]

print "NEW SAMPLE RATE", waveFile.getframerate() / SAMPLEFACTOR

wavwrite(
    "../output/test.wav",
    waveFile.getframerate() / SAMPLEFACTOR,
    img.flatten() / float(np.max(np.abs(img.flatten()), axis=0)),
)
コード例 #42
0
ファイル: pocoder.py プロジェクト: bastibe/pocoder
def deemphasis(signal):
    return lfilter([1, 0.70], 1, signal)

def rms(signal):
    return sqrt(mean(power(signal, 2)))

if __name__ == "__main__":
    fs, data = wavread('Mann.wav')
    data = array(data, dtype=double)
    data /= amax(absolute(data))
    data = decimate(data, 4)
    fs = round(fs/4)

    block_len = 0.032
    overlap = 0.5
    order = 16

    out = vocode(data, fs, block_len, overlap, order)

    wavwrite('vocoded.wav', fs, array(out/amax(absolute(out)) * (2**15-1), dtype=int16))

    figure()
    plot(data)
    figure()
    plot(out)
    show()

# ideas:
# use reduce(ola, map(process, array))
# http://stackoverflow.com/questions/6657820/python-convert-an-iterable-to-a-stream
コード例 #43
0
 def process_filtering(self, sig_float_filtered, write=False, output_file_name = None):
     self.filtered = True
     self.sig_int = float2pcm(sig_float_filtered)
     self.sig_float = sig_float_filtered
     if write:
         wavwrite(output_file_name, file.sr, file.sig_int)
コード例 #44
0
ファイル: convert.py プロジェクト: Jhird/nesmdb
def main():
  import argparse
  import os
  import sys
  import traceback

  from tqdm import tqdm

  parser = argparse.ArgumentParser()

  conversion_to_types = {
      # VGM simplifiers
      'vgm_simplify': ('.vgm', '.simp.vgm'),
      'vgm_shorten': ('.vgm', '.short.vgm'),

      # NES disassembly raw
      'vgm_to_ndr': ('.vgm', '.ndr.pkl'),
      'ndr_to_txt': ('.ndr.pkl', '.ndr.txt'),
      'txt_to_ndr': ('.ndr.txt', '.ndr.pkl'),
      'ndr_to_vgm': ('.ndr.pkl', '.ndr.vgm'),

      # NES disassembly functional
      'vgm_to_ndf': ('.vgm', '.ndf.pkl'),
      'ndf_to_txt': ('.ndf.pkl', '.ndf.txt'),
      'txt_to_ndf': ('.ndf.txt', '.ndf.pkl'),
      'ndf_to_vgm': ('.ndf.pkl', '.ndf.vgm'),

      # NES language modeling format
      'vgm_to_nlm': ('.vgm', '.nlm.pkl'),
      'nlm_to_vgm': ('.nlm.pkl', '.nlm.vgm'),

      # NES-MDB score formats
      'ndf_to_exprsco': ('.ndf.pkl', '.exprsco.pkl'),
      'ndf_to_midi': ('.ndf.pkl', '.mid'),
      'exprsco_to_seprsco': ('.exprsco.pkl', '.seprsco.pkl'),
      'exprsco_to_blndsco': ('.exprsco.pkl', '.blndsco.pkl'),

      # WAV converters
      'vgm_to_wav': ('.vgm', '.wav'),
      'ndr_to_wav': ('.ndr.pkl', '.wav'),
      'ndf_to_wav': ('.ndf.pkl', '.wav'),
      'nlm_to_wav': ('.nlm.pkl', '.wav'),
      'midi_to_wav': ('.mid', '.mid.wav'),
      'exprsco_to_wav': ('.exprsco.pkl', '.exprsco.wav'),
      'seprsco_to_wav': ('.seprsco.pkl', '.seprsco.wav'),
      'blndsco_to_wav': ('.blndsco.pkl', '.blndsco.wav'),
  }

  conversion_to_kwargs = {
      'vgm_simplify': ['vgm_simplify_nop1', 'vgm_simplify_nop2', 'vgm_simplify_notr', 'vgm_simplify_nono'],
      'vgm_shorten': ['vgm_shorten_start', 'vgm_shorten_nmax'],
      'ndf_to_exprsco': ['ndf_to_exprsco_rate'],
      'midi_to_wav': ['midi_to_wav_rate'],
  }

  parser.add_argument('conversion', type=str, choices=conversion_to_types.keys())
  parser.add_argument('fps', type=str, nargs='+')
  parser.add_argument('--out_dir', type=str)
  parser.add_argument('--skip_verify', action='store_true', dest='skip_verify')
  parser.add_argument('--vgm_shorten_start', type=int)
  parser.add_argument('--vgm_shorten_nmax', type=int)
  parser.add_argument('--vgm_simplify_nop1', action='store_true', dest='vgm_simplify_nop1')
  parser.add_argument('--vgm_simplify_nop2', action='store_true', dest='vgm_simplify_nop2')
  parser.add_argument('--vgm_simplify_notr', action='store_true', dest='vgm_simplify_notr')
  parser.add_argument('--vgm_simplify_nono', action='store_true', dest='vgm_simplify_nono')
  parser.add_argument('--ndf_to_exprsco_rate', type=float)
  parser.add_argument('--midi_to_wav_rate', type=float)

  parser.set_defaults(
      conversion=None,
      fps=None,
      out_dir=None,
      skip_verify=False,
      vgm_shorten_start=None,
      vgm_shorten_nmax=1024,
      vgm_simplify_nop1=False,
      vgm_simplify_nop2=False,
      vgm_simplify_notr=False,
      vgm_simplify_nono=False,
      ndf_to_exprsco_rate=None,
      midi_to_wav_rate=None)

  args = parser.parse_args()

  in_type, out_type = conversion_to_types[args.conversion]
  fps = args.fps

  if len(fps) > 1 and args.out_dir is None:
    raise Exception('Must specify output directory for batch mode')

  if len(fps) == 1 and args.out_dir is None:
    out_fps = [fps[0].replace(in_type, out_type)]
  else:
    out_fns = [os.path.basename(fp).replace(in_type, out_type) for fp in fps]
    out_fps = [os.path.join(args.out_dir, fn) for fn in out_fns]

    if os.path.exists(args.out_dir):
      print 'WARNING: Output directory {} already exists'.format(args.out_dir)
    else:
      os.makedirs(args.out_dir)

  for in_fp, out_fp in tqdm(zip(fps, out_fps)):
    if not args.skip_verify:
      _verify_type(in_fp, in_type)
      _verify_type(out_fp, out_type)

    # Load input file
    in_ext = in_type.split('.')[-1]
    if in_ext == 'pkl':
      with open(in_fp, 'rb') as f:
        in_file = pickle.load(f)
    elif in_ext in ['mid', 'vgm']:
      with open(in_fp, 'rb') as f:
        in_file = f.read()
    elif in_ext == 'txt':
      with open(in_fp, 'r') as f:
        in_file = f.read()
    else:
      raise NotImplementedError('Input extension .{} not recognized'.format(in_ext))

    kwargs = {}
    if args.conversion in conversion_to_kwargs:
      kwargs = {kw:getattr(args, kw) for kw in conversion_to_kwargs[args.conversion]}

    try:
      out_file = globals()[args.conversion](in_file, **kwargs)
    except:
      print '-' * 80
      print in_fp
      traceback.print_exc()
      continue

    # Save output file
    out_ext = out_type.split('.')[-1]
    if out_ext == 'pkl':
      with open(out_fp, 'wb') as f:
        pickle.dump(out_file, f)
    elif out_ext in ['mid', 'vgm']:
      with open(out_fp, 'wb') as f:
        f.write(out_file)
    elif out_ext == 'txt':
      with open(out_fp, 'w') as f:
        f.write(out_file)
    elif out_ext == 'wav':
      wav = out_file
      wav *= 32767.
      wav = np.clip(wav, -32767., 32767.)
      wav = wav.astype(np.int16)
      wavwrite(out_fp, 44100, wav)
    else:
      raise NotImplementedError('Output extension .{} not recognized'.format(out_ext))
コード例 #45
0
ファイル: ica.py プロジェクト: sduc/urlnn
        sigs = mixsounds_ica(g,dg)
        sigs = np.array(sigs,dtype = 'uint8')
        plot(sigs[0],len(sigs[0]))
        plot(sigs[1],len(sigs[1]))
        plot(sigs[2],len(sigs[2]))
        plot(sigs[3],len(sigs[3]))
        plot(sigs[4],len(sigs[4]))
        plot(sigs[5],len(sigs[5]))
        plot(sigs[6],len(sigs[6]))
        plot(sigs[7],len(sigs[7]))
        plot(sigs[8],len(sigs[8]))
        # write in a file
        if 'output' not in os.listdir('.'):
            os.mkdir('output')
        for i in range(sigs.shape[0]):
            wavwrite('output/unmixedsound'+`i`+'.wav',8000,sigs[i])

    # used to test the quality of the functions (part 1)
    elif sys.argv[1] == 'convergence':
        mean = 0
        N = 100
        N_n = N
        for i in range(N):
            s1,s2,n = square_cos_ica(g,dg)
            if n >= 0:
                mean += n
            #if algo diverged then don't count it
            else:
                N_n = N_n - 1
            print(mean)
        print("The mean of the number of steps untill convergence is " +`mean/N_n`)