Exemple #1
0
def gen_wav(savename):
    start= 1
    stop = 101
    
    slice = 'N'
    wavtype='timit'
    postfix='g0.1'

    #mdl = cPickle.load(open('_'.join(savename.split('_')[:-1])+'.pkl'))#[0]
    mdl,z_tot, _ = cPickle.load(open(savename))#[1]
    image_w = mdl.layers[0].input_shape[-1]
    
    z_in = T.tensor4()
    decode_out = theano.function([z_in], mdl.decode(z_in))
    x_tot = load_data_timit_seq('test', start, stop, image_w, wavtype,slice).get_value()
    z_tot = z_tot.get_value()
    z_tot = z_tot+0.01*np.asarray(np.random.normal(size=z_tot.shape),dtype=theano.config.floatX)
    z_dec_tot = decode_out(z_tot)
    
    while True:
        #index = raw_input('input_index : ')
        index = 1
        if index == 'q':
            break
        else:
            index = int(index)
        x = x_tot[index].flatten()
        z = z_tot[index].flatten()
        z_dec = z_dec_tot[index].flatten()

        f, (ax1, ax2) = plt.subplots(2,1)

        ax1.plot(np.asarray([z_dec,x]).T, linewidth=2.0)
        ax1.legend(['z_dec','x'])
        
        ax2.plot(np.asarray([z]).T)
        ax2.legend(['z'])
        for i in xrange(z_tot.shape[1]):
            ax2.axvline(x=i*z_tot.shape[3],color='k', linestyle='--')
         
        #plt.show()
        plt.savefig(os.path.splitext(savename)[0]+'_'+str(index)+postfix+'.png')
        plt.close()
        
        for ind,save_wav in enumerate([z_dec, x_tot[index]]):
            x_dec_sav = save_wav*_std+_mean
            x_dec_sav = np.asarray(x_dec_sav, dtype=np.int16)
            wavfile.write(os.path.splitext(savename)[0]+'_'+str(index)+'_'+str(ind)+postfix+'.wav',16000, x_dec_sav)
        break
Exemple #2
0
def gen_wav(savename):
    mdl, _ = cPickle.load(open(savename))#[1]
    
    image_w = mdl.input_shape[0][-1] # image_w of the first layer
    batch_size = mdl.batch_size
    channel = 1
    start= 1
    stop = start + batch_size
    slice = 'N'
    wavtype='timit'

    x_in = T.matrix()
    #x_re = x_in.reshape((image_w, batch_size, channel))
    x_re = x_in.reshape((batch_size,channel, 1, image_w))
    recon = theano.function([x_in], mdl.recon(x_re))
    x_tot = load_data_timit_seq('test', start, stop, image_w, wavtype,slice,rand='Y').get_value()
    x_tot_rec = recon(x_tot)
    
    while True:
        index = raw_input('input_index : ')
        #index = 1
        if index == 'q':
            break
        else:
            index = int(index)
        x = x_tot[index].flatten()
        x_rec = x_tot_rec[index].flatten()

        plt.plot(np.asarray([x_rec,x]).T, linewidth=2.0)
        plt.legend(['x_rec','x'])
        
        plt.show()
        #plt.savefig(os.path.splitext(savename)[0]+'_'+str(index)+'.png')
        plt.close()
       # break
    '''
def evaluate_rclayer():
    dataset = 'timit'
    gridx = 10
    gridy = 10
    channel = 1
    n_hids = 300
    wavtype = 'timit'
    learning_rate = 0.01
    batch_size = 10
    start = 0
    stop = None
    valid_stop = None
    segment = 'Y'
    image_w = 256
    filter_w = 128
    stride = 64
    learning_rule = 'mom'
    threshold = np.float32(1.)
    mom = 0.96
    mom_an = 0
    st_an = 100
    dec_hid = 'std'
    postfix = ''
    savepath = '/data/lisa/exp/kimtaeho/speech_synthesis/rclayer/'
    if not os.path.exists(savepath):
        os.makedirs(savepath)
    filename = time.strftime("%m%d")+'_recconv_pure_tdnn_' + wavtype + \
        '_' + str(stop) + \
        '_' + str(image_w) + \
        postfix
    
    savename = savepath + filename
    print savename
    if os.path.exists(savename + '.pkl'):
        ans = raw_input('Same exp. exists, continue? ([Y]/N) ')
        if ans.upper() == 'N':
            return 
    
    nrng = np.random.RandomState(23455)
    trng = RandomStreams(nrng.randint(1073741824))
    
    if dataset == 'timit':
        from code.utils.load_data import load_data_timit_seq
        train_set_x = load_data_timit_seq('train', start, stop, image_w, wavtype, segment, normtype=dec_hid)
        valid_set_x = load_data_timit_seq('valid', start, valid_stop, image_w, wavtype, segment, normtype=dec_hid)
        test_set_x = load_data_timit_seq('test', start, valid_stop, image_w, wavtype, segment, normtype=dec_hid)
    else:
        raise ValueError('invalid dataset')
    
    n_train_batches0 = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches0 = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches0 = test_set_x.get_value(borrow=True).shape[0]
    
    if batch_size > min(n_train_batches0,n_valid_batches0,n_test_batches0):
        print 'Maximum batch size is %d' % min(n_train_batches0,n_valid_batches0, n_test_batches0)
        batch_size = min(n_train_batches0,n_test_batches0,n_valid_batches0)
    
    n_train_batches = n_train_batches0 / batch_size
    n_valid_batches = n_valid_batches0 / batch_size
    n_test_batches = n_test_batches0 / batch_size
    
    index = T.lscalar()
    x = T.matrix('x')
    #theano.config.compute_test_value = 'warn'
    #x.tag.test_value = np.asarray(np.random.rand(batch_size,image_w),dtype='float32')
    print '... building the model'
    x_re = x.reshape((batch_size, channel, 1, image_w))
    
    # shape is for [(enc_proj),(dec_proj)]
    input_shape  = [(batch_size, channel, 1, image_w),(batch_size, n_hids, 1, image_w/stride)]
    filter_shape = [(n_hids,channel,1,filter_w),(channel,n_hids,1,filter_w)]
    
    mdl = model(
        rng=nrng,
        n_hids=n_hids,
        filter_shape=filter_shape, 
        input_shape=input_shape,
        channel =channel, 
        stride=stride,) 
    cost = mdl.cost(x_re)
    params = mdl.params
    grads = T.grad(cost, params)

    norm2 = 0.
    for g in grads:
        norm2 += (g**2.).sum()
    grads = [T.switch(T.sqrt(norm2) > threshold, threshold * g/T.sqrt(norm2), g) for g in grads]
    gradsdic = dict(zip(params,grads))
    
    if learning_rule == 'con':
        updates = []
        for (param_i, grad_i,) in zip(params, grads):
            updates.append((param_i, param_i - learning_rate * grad_i))
    elif learning_rule == 'ada':
        ad = AdaDelta()
        updates = ad.get_updates(learning_rate, gradsdic)
    elif learning_rule == 'mom':
        mm = Momentum(mom)
        updates = mm.get_updates(learning_rate, gradsdic)
    else:
        raise ValueError('invalid learning_rule')
    
    train_model = theano.function(
        inputs=[index], 
        outputs=cost,
        updates=updates, 
        givens={x: train_set_x[(index * batch_size):((index + 1) * batch_size)]})
    
    validate_model = theano.function(
        inputs=[index], 
        outputs=cost,
        givens={x: valid_set_x[(index * batch_size):((index + 1) * batch_size)]})
    
    test_model = theano.function(
        inputs=[index], 
        outputs=cost,
        givens={x: test_set_x[(index * batch_size):((index + 1) * batch_size)]})
    
    print '... training'
    first_lr = learning_rate
    best_validation_loss = np.inf
    start_time = time.clock()
    last_epoch_start_time = time.clock()
    train_score = []
    valid_score = []
    epoch = 0
    training_check_freq = np.inf
    valid_check_freq = np.inf#np.ceil(n_train_batches/10)
    improvement_threshold = 0.9
    done_looping = False
    n_epochs = 1000
    
    while (epoch < n_epochs) and (not done_looping):
        sum_cost = 0.
        epoch_start_time = time.clock()
        start_time = epoch_start_time
        valid_time = epoch_start_time
        epoch = epoch + 1
        if epoch > st_an and learning_rule in ('con', 'mom'):
            learning_rate = first_lr / (epoch - st_an)
            if learning_rule == 'mom':
                mm.learning_rate = first_lr / (epoch - st_an)
                print mm.learning_rate
                if epoch == 0:
                    mm.momentum = 0
                elif epoch < mom_an:
                    mm.momentum = mom / (mom_an - e)
                mm.momentum = mom
        
        for minibatch_index in xrange(n_train_batches):
            iter = (epoch - 1) * n_train_batches + minibatch_index
            cost_ij = train_model(minibatch_index)
            sum_cost += cost_ij
            if (iter+1) % training_check_freq == 0:
                print 'training @ iter = %i, time = %3.2fs, train cost/update = %f, %s'%  \
                    (iter+1, time.clock() - start_time, cost_ij, filename)
                start_time = time.clock()

            if (iter+1) % valid_check_freq == 0:
                validation_losses = [validate_model(i) for i in xrange(n_valid_batches)]
                this_validation_loss = np.mean(validation_losses)
                print 'trainin @ iter = %i, time = %3.2fs, valid cost %f, %s ' % (
                    iter+1, time.clock() - valid_time, this_validation_loss, filename)
                valid_time = time.clock()
            
                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    best_validation_loss = this_validation_loss
                    best_iter = iter
                    #with open(savename+'.pkl', 'wb') as f:
                    #    pickle.dump(mdl,f)
        
        validation_losses = [validate_model(i) for i in xrange(n_valid_batches)]
        this_validation_loss = np.mean(validation_losses)
        valid_score.append(this_validation_loss)
        train_score.append(sum_cost/n_train_batches)
        monitor = [train_score,valid_score]
        plt.plot([valid_score,train_score])
        plt.legend(['valid','train'])
        plt.savefig(savename+'.png')
        plt.close()
        print ' %3i epoch, train error %f, valid error %f, takes %3.2fs, %s' % (
            epoch, sum_cost/n_train_batches, this_validation_loss, time.clock() - epoch_start_time, filename)
                
        # if we got the best validation score until now
        if this_validation_loss < best_validation_loss:
            best_validation_loss = this_validation_loss
            best_iter = iter
            with open(savename+'.pkl', 'wb') as f:
                pickle.dump([mdl,monitor],f)
    
    end_time = time.clock()
    print 'Optimization complete. total time is %3.2f' % (end_time - start_time)
    print savename
def gen_wav(savename=None):
    mdl = cPickle.load(open(savename))
    if type(mdl)==list:
        mdl, cost = mdl
    
    dataset = 'timit'
    batch_size = 100
    
    start = 100
    stop = start+batch_size
    image_h = 1
    image_w = 320 
    using_noise = False
    savefreq = 10
    eps_decay = np.inf
    #eps_decay = 100.
    eps0 = 1.
    wavtype= 'noise'
    which_set = 'test'
    index = int(raw_input('Input index [0,%s): '%(batch_size)))
    noise_lvl = None # How about anneling noise?

    mdl.set_image_size(image_w)
    fname0 = filename(wavtype,which_set,index,image_w,eps_decay,using_noise)
    
    from code.utils.load_data import load_data_timit_seq
    set_x = load_data_timit_seq(which_set, start, stop, lend=image_w, wavtype=wavtype)
    #image_w = set_x.shape[1]
    
    x = T.matrix('x')
    epsilon = T.scalar('e')
    x_reshape = x.reshape((1, 1, image_h, image_w))
    if using_noise == True:
        x_reshape = mdl.corruptor(x_reshape, noise_lvl)
    y = epsilon * mdl.reconstruction(x_reshape) + (1-epsilon)* x_reshape# For climbing 
    predict = theano.function([x,epsilon],[x_reshape,y]) # For climbing
    x_in = set_x.get_value()[index,:]
    x_rec = x_in 
    cntiter = 0
    eps = eps0

    # Iteration for denoising 
    while True:
        niter = raw_input('Input niter (cumulative, 0 for initialization): ')
        if niter == '0':
            index = int(raw_input('Input index [0,%s), prev was %i: '%(batch_size,index)))
            # Initialize values
            fname0 = filename(wavtype,which_set,index,image_w,eps_decay,using_noise)
            eps=eps0
            cntiter = 0
            x_in = set_x.get_value()[index,:]
            x_rec = x_in 
            continue

        elif niter =='decay':
            if eps_decay == np.inf:
                eps_decay = 100.
            elif eps_decay== 100.:
                eps_decay = np.inf
            fname0 = filename(wavtype,which_set,index,image_w,eps_decay,using_noise)
            print 'decay is changed to %s' %str(eps_decay)
            continue
        
        elif not niter.isdigit():
            print 'input is not integer. Try again'
            continue
        
        niter = int(niter)
        for i in range(niter):
            cntiter += 1
            if cntiter >= 100:
                eps =  eps0 - cntiter/eps_decay
                if eps < 0.:
                    eps=0.

            x_rec = np.asmatrix(x_rec,dtype=theano.config.floatX)
            x_cor, x_rec = predict(x_rec,eps)
            if cntiter%savefreq==0:
                f, (ax1, ax2, ax3) = plt.subplots(3,1,sharex=True,sharey=True)
                x_in = np.array(x_in).flatten()
                ax1.plot(x_in)
                x_cor = np.array(x_cor).flatten()
                ax2.plot(x_cor)
                x_rec = np.array(x_rec).flatten()
                ax3.plot(x_rec)
        
                ax1.set_title('initial test input')
                ax2.set_title('%dth corrupted' % cntiter)
                ax3.set_title('%dth reconstructed' % cntiter)
                
                ylim = max(x_cor)
                ax1.axis([0,x_in.shape[0],-ylim,ylim])
                ax2.axis([0,x_in.shape[0],-ylim,ylim])
                ax3.axis([0,x_in.shape[0],-ylim,ylim])
        
                plt.show()
                fname = fname0 + '_iter'+str(cntiter)
                fpath=os.path.dirname(savename)+'/wavpng/'+os.path.splitext(os.path.basename(savename))[0]
                if not os.path.exists(fpath):
                    os.makedirs(fpath)
                print os.path.join(fpath,fname) 
                plt.savefig(os.path.join(fpath,fname)+'.png')
                plt.close()
def evaluate_rclayer():
    dataset = 'timit'
    gridx = 10
    gridy = 10
    channel = 1
    n_hids = 100
    wavtype = 'timit'
    learning_rate = 0.001
    batch_size = 256 
    start = 0
    stop = 50
    segment = 'Y'
    image_w = 256 
    learning_rule = 'ada'
    threshold = np.float32(1.)
    mom = 0.96
    mom_an = 0
    st_an = 100
    dec_hid = 'std'
    postfix = '_trial'
    savepath = '/data/lisa/exp/kimtaeho/speech_synthesis/rclayer/'
    if not os.path.exists(savepath):
        os.makedirs(savepath)
    filename = 'recconv_' + wavtype + \
        '_' + str(stop) + \
        '_' + str(image_w) + \
        postfix
    savename = savepath + filename
    print savename
    if os.path.exists(savename + '.pkl'):
        ans = raw_input('Same exp. exists, continue? ([Y]/N) ')
        if ans.upper() == 'N':
            return 
    
    nrng = np.random.RandomState(23455)
    trng = RandomStreams(nrng.randint(1073741824))
    
    if dataset == 'timit':
        from code.utils.load_data import load_data_timit_seq
        train_set_x = load_data_timit_seq('train', start, stop, image_w, wavtype, segment,normtype=dec_hid)
        if not stop == None:
            stop_ = int(np.ceil(stop/3))
        valid_set_x = load_data_timit_seq('valid', start, stop_, image_w, wavtype, segment,normtype=dec_hid)
        test_set_x = load_data_timit_seq('test', start, stop_, image_w, wavtype, segment,normtype=dec_hid)
    else:
        raise ValueError('invalid dataset')
    
    n_train_batches0 = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches0 = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches0 = test_set_x.get_value(borrow=True).shape[0]
    
    if batch_size > min(n_train_batches0,n_valid_batches0,n_test_batches0):
        print 'Maximum batch size is %d' % min(n_train_batches0,n_valid_batches0, n_test_batches0)
        batch_size = min(n_train_batches0,n_test_batches0,n_valid_batches0)
    n_train_batches = n_train_batches0 / batch_size
    n_valid_batches = n_valid_batches0 / batch_size
    n_test_batches = n_test_batches0 / batch_size
    
    index = T.lscalar()
    x = T.matrix('x')
    print '... building the model'
    x_re = x.reshape((image_w, batch_size, channel))

    mdl = model(rng=nrng,n_hids=n_hids)
    cost = mdl.cost(x_re)
    params = mdl.params
    grads = T.grad(cost, params)

    norm2 = 0.
    for g in grads:
        norm2 += (g**2.).sum()
    grads = [T.switch(T.sqrt(norm2) > threshold, threshold * g/T.sqrt(norm2), g) for g in grads]
    gradsdic = dict(zip(params,grads))
    
    if learning_rule == 'con':
        updates = []
        for (param_i, grad_i,) in zip(params, grads):
            updates.append((param_i, param_i - learning_rate * grad_i))
    elif learning_rule == 'ada':
        ad = AdaDelta()
        updates = ad.get_updates(learning_rate, gradsdic)
    elif learning_rule == 'mom':
        mm = Momentum(mom)
        updates = mm.get_updates(learning_rate, gradsdic)
    else:
        raise ValueError('invalid learning_rule')
    
    train_model = theano.function(
        inputs=[index], 
        outputs=cost,
        updates=updates, 
        givens={x: train_set_x[(index * batch_size):((index + 1) * batch_size)].T})
    
    validate_model = theano.function(
        inputs=[index], 
        outputs=cost,
        givens={x: valid_set_x[(index * batch_size):((index + 1) * batch_size)].T})
    
    test_model = theano.function(
        inputs=[index], 
        outputs=cost,
        givens={x: test_set_x[(index * batch_size):((index + 1) * batch_size)].T})
    
    print '... training'
    first_lr = learning_rate
    best_validation_loss = np.inf
    start_time = time.clock()
    last_epoch_start_time = time.clock()
    score = []
    epoch = 0
    patience = 100000
    patience_increase = 1.001
    training_check_freq = 10
    validation_frequency = min(n_train_batches, patience / 2)
    improvement_threshold = 0.9
    done_looping = False
    n_epochs = 100
    
    while (epoch < n_epochs) and (not done_looping):
        epoch_start_time = time.clock()
        epoch = epoch + 1
        if epoch > st_an and learning_rule in ('con', 'mom'):
            learning_rate = first_lr / (epoch - st_an)
            if learning_rule == 'mom':
                mm.learning_rate = first_lr / (epoch - st_an)
                print mm.learning_rate
                if epoch == 0:
                    mm.momentum = 0
                elif epoch < mom_an:
                    mm.momentum = mom / (mom_an - e)
                mm.momentum = mom
        
        for minibatch_index in xrange(n_train_batches):
            iter = (epoch - 1) * n_train_batches + minibatch_index
            cost_ij = train_model(minibatch_index)
            if iter % training_check_freq == 0:
                print 'training @ iter = %5i, time = %3.2fs, training cost = %f, %s'%  \
                    (iter, time.clock() - start_time, cost_ij, filename)
                sys.stdout.flush()
                start_time = time.clock()

            if (iter + 1) % validation_frequency == 0:
                valid_time = time.clock()
                # compute zero-one loss on validation set
                validation_losses = [validate_model(i) for i in xrange(n_valid_batches)]
                this_validation_loss = np.mean(validation_losses)
                print '        %3i, valid error %f, %.2fs, %s ' % (
                    epoch, cost_ij, time.clock() - start_time, filename)
                score.append(this_validation_loss)
                plt.plot(xrange(len(score)),score)
                plt.savefig(savename+'.png')
                plt.close()

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)
                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter
                    with open(savename+'.pkl', 'wb') as f:
                        pickle.dump([mdl,score],f)
                    # test it on the test set 
                    test_losses = [test_model(i) for i in xrange(n_test_batches)] 
                    test_score = np.mean(test_losses) 
                    print(('       test error %f') % (test_score))
            if patience <= iter:
                done_looping = True
                break
        print ' %3i epoch, takes %3.2fs' % (epoch, time.clock() - epoch_start_time)

    '''
    while True:
        epoch_start_time = time.clock()
        epoch = epoch + 1
        if epoch > st_an and learning_rule in ('con', 'mom'):
            learning_rate = first_lr / (epoch - st_an)
            if learning_rule == 'mom':
                mm.learning_rate = first_lr / (epoch - st_an)
                print mm.learning_rate
                if epoch == 0:
                    mm.momentum = 0
                elif epoch < mom_an:
                    mm.momentum = mom / (mom_an - e)
                mm.momentum = mom
        cost_ij = 0
        for minibatch_index in xrange(n_train_batches):
            cost_it = train_model(minibatch_index)
            cost_ij += cost_it
        cost_ij /= 2 * n_train_batches
        print '%3i, test error %f, %.2fs, %s ' % (
            epoch, cost_ij, time.clock() - epoch_start_time, filename)
        last_epoch_start_time = time.clock()
        score.append(cost_ij)
        if epoch % 10 == 0:
            if cost_ij < best_validation_loss:
                best_validation_loss = cost_ij
                with open(savename + '.pkl', 'wb') as f:
                    pickle.dump([mdl, score], f)
    '''
    end_time = time.clock()
    print 'Optimization complete. total time is %3.2f' % end_time - start_time
    print savename
def gen_wav_rtdnn(savename):
    osrtdnn = cPickle.load(open(savename))
    
    learning_rate = 0.1
    n_epochs = 10000000
    dataset = 'timit'
    batch_size, _, _, image_w = osrtdnn.input_shape[0]
    gridx = 10
    gridy = 10
    start = 1
    stop = start + batch_size
    channel = 1
    wavtype = 'timit'
    learning_rule = 'mom'
    slice = 'N'
    mom = 0.96
    postfix = '_z'
    
    savename = os.path.splitext(savename)[0]+postfix
    train_set_x = load_data_timit_seq('test', start, stop, image_w, wavtype,slice)
    
    # compute number of minibatches for training, validation and testing
    n_train_batches0 = train_set_x.get_value(borrow=True).shape[0]
    n_train_batches = n_train_batches0 / batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')   # the data is presented as rasterized images

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    osrtdnn_input = x.reshape(osrtdnn.input_shape[0])
    cost = osrtdnn.cost(osrtdnn_input)
    zparams = osrtdnn.zparams
    zgrads = T.grad(cost, zparams)
    zgradsdic = dict(zip(zparams,zgrads))
    
    if learning_rule == 'ada':
        ad = AdaDelta()
        zupdates = ad.get_updates(learning_rate, zgradsdic)
    elif learning_rule == 'con':
        zupdates = []
        for param_i, grad_i in zip(zparams, zgrads):
            zupdates.append((param_i, param_i - learning_rate * grad_i))
    elif learning_rule == 'mom':
        momentum = mom
        mm = Momentum(momentum)
        zupdates = mm.get_updates(learning_rate, zgradsdic)
    else:
        raise ValueError('invalid learning_rule')

    train_z_model = theano.function(
        inputs = [index], 
        outputs = cost,
        updates = zupdates,
        givens = {x: train_set_x[index * batch_size: (index + 1) * batch_size]})
    
    ###############
    # TRAIN MODEL #
    ###############
    print '... training'
    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                   # considered significant
    validation_frequency = min(n_train_batches, patience / 2)

    first_lr = learning_rate
    st_an = 200
    en_an = 2000
    best_params = None
    best_validation_loss = np.inf
    test_score = 0.
    start_time = time.clock()
    epoch_start_time=0
    score_cum=[]
    epoch = 0
    done_looping = False
    
    while (epoch < n_epochs) and (not done_looping):
        last_epoch_start_time = epoch_start_time
        epoch_start_time = time.clock()
        epoch = epoch + 1
        if epoch > st_an and learning_rule in ['con','mom']:
            learning_rate = first_lr/(epoch-st_an)
        
        cost_ij=0
        for minibatch_index in xrange(n_train_batches):
            cost_ij += train_z_model(minibatch_index)
        cost_ij /= (2*(n_train_batches))

        # compute loss on validation set
        this_validation_loss = cost_ij
        print('%3i, training error %f, %.2fs/%.2fs, %s ' % \
              (epoch, this_validation_loss,
              (time.clock() - epoch_start_time), (epoch_start_time-last_epoch_start_time), savename))
        score_cum.append(this_validation_loss)
        
        # if we got the best validation score until now
        if epoch%100==0 and this_validation_loss < best_validation_loss:
            #plot score
            plt.plot(xrange(len(score_cum)),score_cum)
            plt.savefig(savename+'.png')
            plt.close()

            # save best validation score and iteration number
            best_validation_loss = this_validation_loss
            
            osrtdnn.set_cost(best_validation_loss)
            with open(savename+'.pkl', 'wb') as f:
                cPickle.dump(osrtdnn,f)

    end_time = time.clock()
    print('Optimization complete.')
    print('Best validation score of %f, with test performance %f' %
          (best_validation_loss, test_score))
    print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] +
                        ' ran for %.2fm' % ((end_time - start_time) / 60.))
    print savename
from evaluate import model
from rconv_layers import RecursiveConvolutionalLayer
from code.utils.load_data import load_data_timit_seq

assert len(sys.argv) == 2, "Input argument!"

start = 0
stop = None
image_w = 256
channel = 1
wavtype = " timit"
segment = "Y"
dec_hid = "std"
batch_size = 16

valid_set_x = load_data_timit_seq("valid", start, stop, image_w, wavtype, segment, normtype=dec_hid)
n_valid_batches0 = valid_set_x.get_value(borrow=True).shape[0]
n_valid_batches = n_valid_batches0 / batch_size

index = T.lscalar()
x = T.matrix("x")
x_re = x.reshape((image_w, batch_size, channel))

mdl = cPickle.load(open(sys.argv[1]))[0]
cost = mdl.cost(x_re)

validate_model = theano.function(
    inputs=[index], outputs=cost, givens={x: valid_set_x[(index * batch_size) : ((index + 1) * batch_size)].T}
)

validation_losses = [validate_model(i) for i in xrange(n_valid_batches)]
Exemple #8
0
def evaluate_rtdnn():
    learning_rate = 0.1
    n_epochs = 10000
    dataset = 'timit'
    batch_size = 10
    start = 0
    stop = start+batch_size 
    channel = 1
    image_h = 1
    image_w = 256
    filter_h = 1
    filter_w = 4 
    nkerns = [4]
    wavtype = 'timit'
    learning_rule = 'mom'
    mom = 0.96
    dechid = 'tanh'
    postfix = ''
    
    savepath = '/data/lisa/exp/kimtaeho/speech_synthesis/rtdnn/result/'
    if not os.path.exists(savepath):
        os.makedirs(savepath)
    
    filename =  'osrtdnn'+\
                '_toy'+wavtype+\
                '_w'+str(filter_w)+\
                '_'+learning_rule+\
                '_'+dechid+\
                postfix
    savename = savepath + filename
    
    if os.path.exists(savename+'.pkl'):
        ans=raw_input('Same exp. exists, continue? ([Y]/N) ')
        if ans.upper() == 'N':
            return

    nrng = np.random.RandomState(23455)
    trng = RandomStreams(nrng.randint(2 ** 30))
    if dataset == 'mnist.pkl.gz':
        from code.utils.load_data import load_data_mnist
        datasets = load_data_mnist(dataset)
        train_set_x, train_set_y = datasets[0]
        valid_set_x, valid_set_y = datasets[1]
        test_set_x, test_set_y = datasets[2]
    elif dataset == 'timit':
        from code.utils.load_data import load_data_timit_seq
        train_set_x = load_data_timit_seq('train', start, stop, image_w, wavtype)
        valid_set_x = load_data_timit_seq('valid', start, stop, image_w, wavtype)
        test_set_x = load_data_timit_seq('test', start, stop, image_w, wavtype)
    
    # compute number of minibatches for training, validation and testing
    n_train_batches0 = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches0 = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches0 = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches = n_train_batches0 / batch_size
    n_valid_batches = n_valid_batches0 / batch_size
    n_test_batches = n_test_batches0 / batch_size

    assert min(n_train_batches, n_valid_batches, n_test_batches)>0,\
        'Maximum batch size is %d' % min(n_train_batches0, n_valid_batches0, n_test_batches0)

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')   # the data is presented as rasterized images

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    osrtdnn_input = x.reshape((channel, image_h, image_w, batch_size))
    input_shape = []
    filter_shape = []
    for i in xrange(len(nkerns)):
        if i == 0:
            input_shape.append((channel, image_h, image_w/(2 ** len(nkerns)), batch_size))
            filter_shape.append((channel, filter_h, filter_w, nkerns[0]))
        else:
            input_shape.append((nkerns[i-1], image_h, image_w/(2 ** (len(nkerns)-i)), batch_size))
            filter_shape.append((nkerns[i-1], filter_h, filter_w, nkerns[i]))
    
    osrtdnn = rtdnn(
            nrng = nrng, trng=trng,
            input_shape = input_shape,
            filter_shape = filter_shape,
            dec_hid=dechid,
            )
    
    cost = osrtdnn.cost(osrtdnn_input)
    params = osrtdnn.params
    grads = T.grad(cost, params)
    gradsdic = dict(zip(params,grads))
    
    if learning_rule == 'ada':
        ad = AdaDelta()
        updates = ad.get_updates(learning_rate, gradsdic)
    elif learning_rule == 'con':
        updates = []
        for param_i, grad_i in zip(params, grads):
            updates.append((param_i, param_i - learning_rate * grad_i))
    elif learning_rule == 'mom':
        momentum = mom
        mm = Momentum(momentum)
        updates = mm.get_updates(learning_rate, gradsdic)
    else:
        raise ValueError('invalid learning_rule')

    train_model = theano.function(
        inputs = [index], 
        outputs = cost, 
        updates = updates,
        givens = {x: train_set_x[index * batch_size: (index + 1) * batch_size]})
    
    validate_model = theano.function(
        inputs = [index], 
        outputs = cost,
        givens = {x: valid_set_x[index * batch_size: (index + 1) * batch_size]})
    
    test_model = theano.function(
        inputs = [index],
        outputs = cost,
        givens = {x: test_set_x[index * batch_size: (index + 1) * batch_size]})

    ###############
    # TRAIN MODEL #
    ###############
    print '... training'
    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                   # considered significant
    validation_frequency = min(n_train_batches, patience / 2)

    first_lr = learning_rate
    st_an = 800
    en_an = 2000
    best_params = None
    best_validation_loss = np.inf
    best_iter = 0
    test_score = 0.
    start_time = time.clock()
    
    score_cum=[]
    epoch = 0
    done_looping = False
    
    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        if epoch > st_an and learning_rule in ['con','mom']:
            learning_rate = first_lr/(epoch-st_an)
        #if epoch >=  st_an and epoch < en_an:
        #    learning_rate -= first_lr/(en_an-st_an)
        #elif epoch >=en_an:
        #    learning_rate = 0.
        
        for minibatch_index in xrange(n_train_batches):
            
            iter = (epoch - 1) * n_train_batches + minibatch_index
            if iter % 1000 == 0:
                print 'training @ iter = ', iter
            cost_ij = train_model(minibatch_index)

            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = [validate_model(i) for i in xrange(n_valid_batches)]
                this_validation_loss = np.mean(validation_losses)
                print('    %3i, validation error %f, %s ' % \
                      (epoch, this_validation_loss, filename))
                score_cum.append(this_validation_loss)
                plt.plot(xrange(len(score_cum)),score_cum)
                plt.savefig(savename+'.png')
                plt.close()

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter
                    
                    osrtdnn.set_cost(best_validation_loss)
                    with open(savename+'.pkl', 'wb') as f:
                        pickle.dump(osrtdnn,f)

                    # test it on the test set
                    test_losses = [test_model(i) for i in xrange(n_test_batches)]
                    test_score = np.mean(test_losses)
                    print(('       test error %f') % (test_score))

            if patience <= iter:
                done_looping = True
                break

    end_time = time.clock()
    print('Optimization complete.')
    print('Best validation score of %f obtained at iteration %i, with test performance %f' %
          (best_validation_loss, best_iter + 1, test_score))
    print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] +
                        ' ran for %.2fm' % ((end_time - start_time) / 60.))
    print savename
Exemple #9
0
def evaluate_rtdnn():
    # Not so important 
    n_epochs = 500
    dataset = 'timit'
    channel = 1
    image_h = 1
    filter_h = 1
    
    # Data type : 'timit', 'sin', 'rect', ...
    wavtype = 'timit'
   
    # Control Data size and batch size
    start = 0
    stop = None
    valid_stop = None
    segment = 'Y'
    batch_size = 256
    
    #Learning rule and rate
    learning_rule = 'ada'
    learning_rate = 0.1
    mom = 0.96
    mom_an = 0
    st_an = 100

    # Control Layer's characteristics 
    image_w = 256
    dechid = ['lin','tanh','tanh']
    nkerns = [40,40,4]
    stride = [2,2,2]
    filter_w = 25
    postfix = ''
    if not len(nkerns) == len(dechid) or not len(nkerns) == len(stride):
        raise ValueError('nkerns, dechid, stride should have same length')
    savepath = '/data/lisa/exp/kimtaeho/speech_synthesis/rtdnn/result/'
    if not os.path.exists(savepath):
        os.makedirs(savepath)
    filename = time.strftime("%m%d") + '_tdnn_' + wavtype + \
        '_' + str(batch_size) + \
        '_' + str(stop) + \
        '_' + str(image_w) + \
        '_' + str(filter_w) + \
        '_' + str(nkerns[-1]) + ':' + str(np.prod(stride)) + \
        '_' + learning_rule + \
        '_' + str(learning_rate) + \
        '_' + dechid[-1] \
        + postfix
    savename = savepath + filename
    print savename
    if os.path.exists(savename + '.pkl'):
        ans = raw_input('Same exp. exists, continue? ([Y]/N) ')
        if ans.upper() == 'N':
            return 
    nrng = np.random.RandomState(23455)
    if dataset == 'mnist.pkl.gz':
        from code.utils.load_data import load_data_mnist
        datasets = load_data_mnist(dataset)
        (train_set_x, train_set_y,) = datasets[0]
        (valid_set_x, valid_set_y,) = datasets[1]
        (test_set_x, test_set_y,) = datasets[2]
    elif dataset == 'timit':
        from code.utils.load_data import load_data_timit_seq
        train_set_x = load_data_timit_seq('train', start, stop, image_w, wavtype, segment)
        valid_set_x = load_data_timit_seq('valid', start, valid_stop, image_w, wavtype, segment)
    
    n_train_batches0 = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches0 = valid_set_x.get_value(borrow=True).shape[0]
    if batch_size > min(n_train_batches0, n_valid_batches0):
        print 'Maximum batch size is %d' % n_train_batches0
        batch_size = min(n_train_batches0,n_valid_batches0)
    n_train_batches = n_train_batches0 / batch_size
    n_valid_batches = n_valid_batches0 / batch_size
    
    index = T.lscalar()
    x = T.matrix('x')
    print '... building the model'
    x_re = x.reshape((batch_size, channel, image_h, image_w))
    input_shape = []
    filter_shape = []
    for i in xrange(len(nkerns)):
        if i == 0:
            filter_shape.append((nkerns[0], channel, filter_h, filter_w))
            input_shape.append((batch_size, channel, image_h, image_w))
        else:
            filter_shape.append((nkerns[i], nkerns[(i - 1)], filter_h, filter_w))
            input_shape.append((batch_size, nkerns[(i - 1)], image_h, image_w / 2 ** i)) # stride should be changed

    osrtdnn = rtdnn(nrng=nrng, 
        input_shape=input_shape, 
        filter_shape=filter_shape, 
        stride=stride, 
        dec_hid=dechid)
    
    # Initialization of hidden representation
    osrtdnn.set_batch_size(n_train_batches0)
    x_tot_shape = x.reshape((n_train_batches0, channel, image_h, image_w))
    z_val = osrtdnn.encode(x_tot_shape)
    z_init = theano.function([x], z_val)
    z_tot = theano.shared(value=z_init(train_set_x.get_value()), borrow=True)
    osrtdnn.set_batch_size()
    
    (cost, cost_dec, cost_rec,) = osrtdnn.cost(x_re, z_tot[(index * batch_size):((index + 1) * batch_size)])
    
    fparams = osrtdnn.fparams
    fgrads = T.grad(cost, fparams)
    fgradsdic = dict(zip(fparams, fgrads))
    zgrads = T.grad(cost, z_tot)
    zgradsdic = {z_tot: zgrads}
    if learning_rule == 'ada':
        ad = AdaDelta()
        fupdates = ad.get_updates(learning_rate, fgradsdic)
        zupdates = ad.get_updates(learning_rate, zgradsdic)
    elif learning_rule == 'con':
        fupdates = []
        for (param_i, grad_i,) in zip(fparams, fgrads):
            fupdates.append((param_i, param_i - learning_rate * grad_i))

        zupdates = {z_tot: z_tot - zgrads}
    elif learning_rule == 'mom':
        momentum = mom
        mm = Momentum(momentum)
        fupdates = mm.get_updates(learning_rate, fgradsdic)
        zupdates = mm.get_updates(learning_rate, zgradsdic)
    else:
        raise ValueError('invalid learning_rule')
    
    train_z_model = theano.function(
        inputs=[index], 
        outputs=[cost, cost_dec, cost_rec], 
        updates=zupdates, 
        givens={x: train_set_x[(index * batch_size):((index + 1) * batch_size)]})
    train_f_model = theano.function(
        inputs=[index], 
        outputs=[cost, cost_dec, cost_rec], 
        updates=fupdates, 
        givens={x: train_set_x[(index * batch_size):((index + 1) * batch_size)]})
    valid_model = theano.function(
        inputs=[index], 
        outputs=[cost, cost_dec, cost_rec], 
        givens={x: train_set_x[(index * batch_size):((index + 1) * batch_size)]})
    
    print '... training'
    patience = 10000
    patience_increase = 2
    improvement_threshold = 0.995
    train_check_freq = np.inf
    valid_check_freq = np.inf #min(n_train_batches, patience / 2)
    first_lr = learning_rate
    en_an = 2000
    best_params = None
    best_validation_loss = np.inf
    test_score = 0.0
    start_time = time.clock()
    valid_time = time.clock()
    score = []
    score_dec = []
    score_rec = []
    monitor = []
    epoch = 0
    done_looping = False
    while epoch < n_epochs and not done_looping:
        epoch_start_time = time.clock()
        epoch = epoch + 1
        sum_cost = 0
        sum_cost_dec = 0
        sum_cost_rec = 0
        
        if epoch > st_an and learning_rule in ('con', 'mom'):
            learning_rate = first_lr / (epoch - st_an)
            if learning_rule == 'mom':
                mm.learning_rate = first_lr / (epoch - st_an)
                print mm.learning_rate
                if epoch == 0:
                    mm.momentum = 0
                elif epoch < mom_an:
                    mm.momentum = mom / (mom_an - e)
                mm.momentum = mom
        
        for minibatch_index in xrange(n_train_batches):
            iter = (epoch - 1) * n_train_batches + minibatch_index
            (cost_z, cost_dec_z, cost_rec_z) = train_z_model(minibatch_index)
            (cost_f, cost_dec_f, cost_rec_f) = train_f_model(minibatch_index)
            sum_cost += cost_f + cost_z
            sum_cost_dec += cost_dec_f + cost_dec_z 
            sum_cost_rec += cost_dec_f + cost_dec_z
            if (iter+1) % train_check_freq == 0:
                print 'training @ iter = %i, time = %3.2fs, training cost = %f, %f, %s'%  \
                    (iter+1, time.clock() - start_time, cost_f, cost_z, filename)
                start_time = time.clock()

            if (iter+1) % valid_check_freq == 0:
                validation_losses = [valid_model(i) for i in xrange(n_valid_batches)]
                this_validation_loss = np.mean(validation_losses)
                print 'trainin @ iter = %i, time = %3.2fs, valid cost %f, %s ' % (
                    iter+1, time.clock() - valid_time, this_validation_loss, filename)
                valid_time = time.clock()
            
                # if we got the best validation score until now
                #if this_validation_loss < best_validation_loss:
                #    best_validation_loss = this_validation_loss
                #    best_iter = iter
                #    with open(savename+'.pkl', 'wb') as f:
                #        pickle.dump([osrtdnn, z_tot, monitor],f)

        validation_losses = [valid_model(i) for i in xrange(n_valid_batches)]
        this_validation_loss = np.mean(validation_losses)
        
        sum_cost /= n_train_batches
        sum_cost_dec /= n_train_batches
        sum_cost_rec /= n_train_batches
                
        print ' %3i epoch, train error %f, valid error %f, takes %3.2fs, %s' % (epoch, 
            sum_cost, this_validation_loss, time.clock() - epoch_start_time, filename)

        score.append(sum_cost)
        score_dec.append(sum_cost_dec)
        score_rec.append(sum_cost_rec)
        monitor = [score, score_dec, score_rec]
        if epoch == 0:
            if sum_cost < best_validation_loss:
                best_validation_loss = sum_cost
                osrtdnn.set_cost(best_validation_loss, sum_cost_dec, sum_cost_rec)
                with open(savename + '.pkl', 'wb') as f:
                    pickle.dump([osrtdnn, z_tot, monitor], f)

    end_time = time.clock()
    print 'Optimization complete.'
    print 'Best validation score of %f, with test performance %f' % (best_validation_loss, test_score)
    print >> sys.stderr, 'The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.0)
    print savename
Exemple #10
0
def fine_tune(savename):
    osrtdnn = cPickle.load(open(savename))[0]
    learning_rate = 0.1
    n_epochs = 10000000
    dataset = 'timit'
    image_w = 2048 
    batch_size = 100
    gridx = 10
    gridy = 10
    start = 1
    stop = start + batch_size
    channel = 1
    wavtype = 'timit'
    learning_rule = 'ada'
    slice = 'N'
    mom = 0.96
    postfix = '_z'+str(image_w)
    ind = 1

    savename = os.path.splitext(savename)[0]+postfix
    train_set_x = load_data_timit_seq('test', start, stop, image_w, wavtype,slice)
    
    # compute number of minibatches for training, validation and testing
    n_train_batches0 = train_set_x.get_value(borrow=True).shape[0]
    n_train_batches = n_train_batches0 / batch_size

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'
    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')   # the data is presented as rasterized images
    
    #  Z initialization
    osrtdnn.set_batch_size(n_train_batches0)
    osrtdnn.set_image_w(image_w)
    x_tot_shape = x.reshape((n_train_batches0, channel, 1, image_w))
    z_val = osrtdnn.encode(x_tot_shape)
    z_init = theano.function([x],z_val)
    z_tot = theano.shared(value=z_init(train_set_x.get_value()), borrow=True)
    
    x_re = x.reshape((batch_size, channel, 1, osrtdnn.layers[0].input_shape[3])) 
    cost,cost_dec,cost_rec = osrtdnn.cost(x_re,z_tot[index*batch_size:(index+1)*batch_size])

    zgrads = T.grad(cost, z_tot)
    zgradsdic = {z_tot:zgrads}

    if learning_rule == 'ada':
        ad = AdaDelta()
        zupdates = ad.get_updates(learning_rate, zgradsdic)
    elif learning_rule == 'con':
        zupdates = []
        for param_i, grad_i in zip(zparams, zgrads):
            zupdates.append((param_i, param_i - learning_rate * grad_i))
    elif learning_rule == 'mom':
        momentum = mom
        mm = Momentum(momentum)
        zupdates = mm.get_updates(learning_rate, zgradsdic)
    else:
        raise ValueError('invalid learning_rule')

    train_z_model = theano.function(
        inputs = [index], 
        outputs = [cost, cost_dec,cost_rec],
        updates = zupdates,
        givens = {x: train_set_x[index * batch_size: (index + 1) * batch_size]})
    
    z_in = T.tensor4()
    decode_out = theano.function([z_in], osrtdnn.decode(z_in))

    ###############
    # TRAIN MODEL #
    ###############
    print '... training'
    # early-stopping parameters
    pat_time = np.inf
    first_lr = learning_rate
    st_an = 200
    en_an = 2000
    best_params = None
    best_validation_loss = np.inf
    test_score = 0.
    start_time = time.clock()
    epoch_start_time=0
    score_cum=[]
    score_dec_cum=[]
    score_rec_cum=[]
    epoch = 0
    done_looping = False
    
    while (epoch < n_epochs) and (not done_looping):
        epoch_start_time = time.clock()
        epoch = epoch + 1
        if epoch > st_an and learning_rule in ['con','mom']:
            learning_rate = first_lr/(epoch-st_an)
 
        cost_ij=0
        cost_dec_ij=0
        cost_rec_ij=0
        for minibatch_index in xrange(n_train_batches):
            cost_ij += train_z_model(minibatch_index)[0]
            cost_dec_ij += train_z_model(minibatch_index)[1]
            cost_rec_ij += train_z_model(minibatch_index)[2]
        cost_ij /= (2*(n_train_batches))
        cost_dec_ij /= (2*(n_train_batches))
        cost_rec_ij /= (2*(n_train_batches))
        
        score_cum.append(cost_ij)
        score_dec_cum.append(cost_dec_ij)
        score_rec_cum.append(cost_rec_ij)
        
        # compute loss on validation set
        print('%3i, training error %.2f, %.2f, %.2f, %.2fs, %s ' % \
              (epoch, cost_ij, cost_dec_ij, cost_rec_ij,
              (time.clock() - epoch_start_time), savename))
        
        # if we got the best validation score until now
        if (epoch%50==0 and cost_ij < best_validation_loss) or time.clock()-start_time > pat_time:
            best_validation_loss = cost_ij
            z_dec = decode_out(z_tot.get_value())
            grid_plot.grid_plot((train_set_x.get_value(), z_dec))
            #plt.legend('test','decoded')
            plt.savefig(savename+'.png')
            plt.close()
            with open(savename+'.pkl','wb') as f:
                cPickle.dump([osrtdnn, z_tot, [score_cum, score_dec_cum, score_rec_cum]],f)
            '''
            for i,save_wav in enumerate([z_dec[ind], train_set_x.get_value()[ind]]):
                x_dec_sav = save_wav*_std+_mean
                x_dec_sav = np.asarray(x_dec_sav, dtype=np.int16)
                wavfile.write(os.path.splitext(savename)[0]+'_'+str(ind)+'_'+str(i)+'.wav',16000, x_dec_sav)
            '''
    end_time = time.clock()
    print('Optimization complete.')
    print savename