Esempio n. 1
0
def synthesize(input_filename, output_filename):
    '''Synthesize the control data from input_filename into output_filename.'''
    frames = parse_input_file(input_filename)
    t = gnuspeech.TRMTubeModelCreate(frames.inputParameters)
    logging.info('Calculating floating point samples...')
    gnuspeech.synthesize(t, frames)
    gnuspeech.writeOutputToFile(t.sampleRateConverter, frames, output_filename)
    logging.info('Wrote scaled samples to file: %s', output_filename)
    gnuspeech.TRMTubeModelFree(t)
Esempio n. 2
0
    def synthesize(self, *controls):
        '''Synthesize a sound from the given control variables.

        Each element of controls is expected to be a list or numpy array
        containing controls for each frame of the sound synthesis. If it is a
        numpy array, frames are read from the 0 axis (the "rows") of the array.

        The variables for each frame, in order, are:

        glotPitch - glottal pitch, 0 == middle C
        glotVol - glottal volume, dB
        aspVol - aspirate volume, dB
        fricVol - fricative volume, dB
        fricPos - fricative position, cm
        fricCF - fricative filter center frequency, Hz
        fricBW - fricative filter bandwidth, Hz
        radius[0] - radius of vocal tract, region 0, cm
        ...
        radius[7] - radius of vocal tract, region 7, cm
        velum - radius of velar opening, cm
        '''
        # convert control frames into TRM linked list structure
        data = gnuspeech.TRMData()
        data.inputParameters = self.parameters._params

        radii = gnuspeech.new_double_array(gnuspeech.TOTAL_REGIONS)
        for frame in itertools.chain.from_iterable(controls):
            glot_pitch, glot_vol, asp_vol, fric_vol, fric_pos, fric_cf, fric_bw = frame[:7]
            for i, v in enumerate(frame[7:15]):
                gnuspeech.double_array_setitem(radii, i, v)
            velum = frame[15]
            gnuspeech.addInput(data, glot_pitch, glot_vol, asp_vol,
                               fric_vol, fric_pos, fric_cf, fric_bw,
                               radii, velum)
        gnuspeech.delete_double_array(radii)

        # run the synthesizer
        gnuspeech.synthesize(self._model, data)

        # now return the synthesized sound data as an array of doubles
        converter = self._model.sampleRateConverter
        converter.tempFilePtr.seek(0)
	logging.debug('number of samples: %d', converter.numberSamples)
	logging.debug('maximum sample value: %.4f', converter.maximumSampleValue)
        arr = array.array('d')
        arr.fromstring(converter.tempFilePtr.read())
        return arr