Ejemplo n.º 1
0
def load_river_network(nnet_param = 'neural_network/river_network_params', nnet_cfg = 'neural_network/river_network_cfg'):
    cfg = cPickle.load(smart_open(nnet_cfg,'r'))
    cfg.init_activation()
    model = DNN(numpy_rng=numpy_rng, cfg = cfg)
    _file2nnet(model.layers, filename = nnet_param)
    get_river_probs = model.build_extract_feat_function(-1)
    return get_river_probs
Ejemplo n.º 2
0
def init_dnn(dnn_cfg, numpy_rng=None, rho=0.0, base_dnn=None):
    """ Initialize a DNN given a DNN config """
    if numpy_rng is None:
        numpy_rng = numpy.random.RandomState()
    if rho != 0:
        return KLDNN(numpy_rng, base_dnn, rho=rho, cfg=dnn_cfg)
    return DNN(numpy_rng, cfg=dnn_cfg)
Ejemplo n.º 3
0
 def __init__(self, numpy_rng, base_dnn, rho=0.5, **kwargs):
     """
     Modify the objective function so it has this form:
         L = (1 - rho) * L' + rho * KL(y, y')
     Here L' is the original objective function, rho is the regularization
     weight. The larger rho is, the more weight we place on the base model.
     """
     DNN.__init__(self, numpy_rng, **kwargs)
     self.base_dnn = base_dnn
     self.rho = rho
     self.finetune_cost = (1 - rho) * self.finetune_cost
     self.kld_cost = -T.mean(T.sum(
         T.log(self.logLayer.p_y_given_x) * self.base_dnn.logLayer.p_y_given_x,
         axis=1
     ))
     self.finetune_cost += rho * self.kld_cost
Ejemplo n.º 4
0
def train():
    if cfg.train_model == 'dnn':
        model = DNN()

    inputs = model.input_data()
    avg_cost, auc_var = model.net(inputs)

    optimizer = fluid.optimizer.Adam(cfg.learning_rate)
    optimizer.minimize(avg_cost)

    place = fluid.CUDAPlace(0) if cfg.use_cuda else fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    dataset, file_list = get_dataset(inputs)

    logger.info("Training Begin")
    for epoch in range(cfg.epoches):
        random.shuffle(file_list)
        dataset.set_filelist(file_list)

        start_time = time.time()
        exe.train_from_dataset(
            program=fluid.default_main_program(),
            dataset=dataset,
            fetch_list=[avg_cost, auc_var],
            fetch_info=['Epoch {} cost: '.format(epoch + 1), ' - auc: '],
            print_period=cfg.log_interval,
            debug=False)
        end_time = time.time()
        logger.info("epoch %d finished, use time = %ds \n" %
                    ((epoch + 1), end_time - start_time))

        if (epoch + 1) % cfg.save_interval == 0:
            model_path = os.path.join(str(cfg.save_path), model.name,
                                      model.name + "_epoch_" + str(epoch + 1))
            if not os.path.isdir(model_path):
                os.makedirs(model_path)
            logger.info("saving model to %s \n" % (model_path))
            fluid.save(fluid.default_main_program(),
                       os.path.join(model_path, "checkpoint"))
    logger.info("Done.")
Ejemplo n.º 5
0
def init_srbm(rbm_cfg, numpy_rng=None):
    """ Initialize a SRBM given a RBM config """
    if numpy_rng is None:
        numpy_rng = numpy.random.RandomState()
    theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
    # Following pdnn, initialize a parallel DNN and use it to initialize SRBM
    dnn_cfg = NetworkConfig()
    dnn_cfg.n_ins = rbm_cfg.n_ins
    dnn_cfg.hidden_layers_sizes = rbm_cfg.hidden_layers_sizes
    dnn_cfg.n_outs = rbm_cfg.n_outs
    dnn = DNN(numpy_rng, theano_rng=theano_rng, cfg=dnn_cfg)
    return SRBM(numpy_rng, theano_rng=theano_rng, cfg=rbm_cfg, dnn=dnn)
Ejemplo n.º 6
0
    def _set_MLPs(self):
        '''
        load the MLP learned model as MLP network  
        '''
        nnet_param = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                  os.pardir, os.pardir, 'models_makam',
                                  'dampB.mdl')
        nnet_cfg = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                os.pardir, os.pardir, 'models_makam',
                                'dampB.cfg')

        numpy_rng = numpy.random.RandomState(89677)
        theano_rng = RandomStreams(numpy_rng.randint(2**30))
        cfg = cPickle.load(smart_open(nnet_cfg, 'r'))
        cfg.init_activation()
        self.cfg = cfg

        model = DNN(numpy_rng=numpy_rng, theano_rng=theano_rng, cfg=cfg)

        # load model parameters
        _file2nnet(model.layers, filename=nnet_param)  # this is very slow
        self.model = model
Ejemplo n.º 7
0
train_data_spec = arguments['train_data']
wdir = arguments['wdir']

path = "/home/piero/Documents/Experiments/Real_Test/Spectral Coef/Noise vs BG_voice+Conversation vs Shout+Scream/fft coef/"
os.chdir(path)
filename = "rbm.cfg"
train_data = "train.pickle.gz"
test_data = "test.pickle.gz"
batch_size = 128

log('> ... setting up the model and loading parameters')
numpy_rng = np.random.RandomState(89677)
theano_rng = RandomStreams(numpy_rng.randint(2**30))
cfg_dnn = cPickle.load(open(filename, 'r'))
cfg_dnn.init_activation()
model = DNN(numpy_rng=numpy_rng, theano_rng=theano_rng, cfg=cfg_dnn)

# load model parameters
_file2nnet(model.layers, filename=wdir + '/rbm.param')

# initialize data reading
cfg_dnn.init_data_reading_test(train_data_spec)

# get the function for feature extraction
log('> ... getting the feat-extraction function')
extract_func = model.build_extract_feat_function(-1)

output_mat = None  # store the features for all the data in memory
log('> ... generating features from the specified layer')
while (not cfg_dnn.test_sets.is_finish()):  # loop over the data
    cfg_dnn.test_sets.load_next_partition(cfg_dnn.test_xy)
Ejemplo n.º 8
0
def train_DNN(train_xy_file_list, valid_xy_file_list, \
              nnets_file_name, n_ins, n_outs, ms_outs, hyper_params, buffer_size, plot=False):
    # get loggers for this function
    # this one writes to both console and file
    logger = logging.getLogger("main.train_DNN")
    logger.debug('Starting train_DNN')

    if plot:
        # this one takes care of plotting duties
        plotlogger = logging.getLogger("plotting")
        # create an (empty) plot of training convergence, ready to receive data points
        logger.create_plot('training convergence', MultipleSeriesPlot)

    try:
        assert numpy.sum(ms_outs) == n_outs
    except AssertionError:
        logger.critical('the summation of multi-stream outputs does not equal to %d' % (n_outs))
        raise

    ####parameters#####
    finetune_lr = numpy.asarray(hyper_params['learning_rate'], dtype='float32')
    training_epochs = int(hyper_params['training_epochs'])
    batch_size = int(hyper_params['batch_size'])
    l1_reg = float(hyper_params['l1_reg'])
    l2_reg = float(hyper_params['l2_reg'])
    #     private_l2_reg  = float(hyper_params['private_l2_reg'])
    warmup_epoch = int(hyper_params['warmup_epoch'])
    momentum = float(hyper_params['momentum'])
    warmup_momentum = float(hyper_params['warmup_momentum'])

    use_rprop = int(hyper_params['use_rprop'])

    use_rprop = int(hyper_params['use_rprop'])

    hidden_layers_sizes = hyper_params['hidden_layer_size']

    #     stream_weights       = hyper_params['stream_weights']
    #     private_hidden_sizes = hyper_params['private_hidden_sizes']

    buffer_utt_size = buffer_size
    early_stop_epoch = int(hyper_params['early_stop_epochs'])

    hidden_activation = hyper_params['hidden_activation']
    output_activation = hyper_params['output_activation']

    #     stream_lr_weights = hyper_params['stream_lr_weights']
    #     use_private_hidden = hyper_params['use_private_hidden']

    model_type = hyper_params['model_type']

    ## use a switch to turn on pretraining
    ## pretraining may not help too much, if this case, we turn it off to save time
    do_pretraining = hyper_params['do_pretraining']
    pretraining_epochs = int(hyper_params['pretraining_epochs'])
    pretraining_lr = float(hyper_params['pretraining_lr'])

    buffer_size = int(buffer_size / batch_size) * batch_size

    ###################
    (train_x_file_list, train_y_file_list) = train_xy_file_list
    (valid_x_file_list, valid_y_file_list) = valid_xy_file_list

    logger.debug('Creating training   data provider')
    train_data_reader = ListDataProvider(x_file_list=train_x_file_list, y_file_list=train_y_file_list, n_ins=n_ins,
                                         n_outs=n_outs, buffer_size=buffer_size, shuffle=True)

    logger.debug('Creating validation data provider')
    valid_data_reader = ListDataProvider(x_file_list=valid_x_file_list, y_file_list=valid_y_file_list, n_ins=n_ins,
                                         n_outs=n_outs, buffer_size=buffer_size, shuffle=False)

    shared_train_set_xy, temp_train_set_x, temp_train_set_y = train_data_reader.load_next_partition()
    train_set_x, train_set_y = shared_train_set_xy
    shared_valid_set_xy, temp_valid_set_x, temp_valid_set_y = valid_data_reader.load_next_partition()
    valid_set_x, valid_set_y = shared_valid_set_xy
    train_data_reader.reset()
    valid_data_reader.reset()

    ##temporally we use the training set as pretrain_set_x.
    ##we need to support any data for pretraining
    pretrain_set_x = train_set_x

    # numpy random generator
    numpy_rng = numpy.random.RandomState(123)
    logger.info('building the model')

    dnn_model = None
    pretrain_fn = None  ## not all the model support pretraining right now
    train_fn = None
    valid_fn = None
    valid_model = None  ## valid_fn and valid_model are the same. reserve to computer multi-stream distortion
    if model_type == 'DNN':
        dnn_model = DNN(numpy_rng=numpy_rng, n_ins=n_ins, n_outs=n_outs,
                        l1_reg=l1_reg, l2_reg=l2_reg,
                        hidden_layers_sizes=hidden_layers_sizes,
                        hidden_activation=hidden_activation,
                        output_activation=output_activation,
                        use_rprop=use_rprop, rprop_init_update=finetune_lr)
        train_fn, valid_fn = dnn_model.build_finetune_functions(
            (train_set_x, train_set_y), (valid_set_x, valid_set_y), batch_size=batch_size)

    else:
        logger.critical('%s type NN model is not supported!' % (model_type))
        raise

    logger.info('fine-tuning the %s model' % (model_type))

    start_time = time.clock()

    best_dnn_model = dnn_model
    best_validation_loss = sys.float_info.max
    previous_loss = sys.float_info.max

    early_stop = 0
    epoch = 0
    previous_finetune_lr = finetune_lr

    while (epoch < training_epochs):
        epoch = epoch + 1

        current_momentum = momentum
        current_finetune_lr = finetune_lr
        if epoch <= warmup_epoch:
            current_finetune_lr = finetune_lr
            current_momentum = warmup_momentum
        else:
            current_finetune_lr = previous_finetune_lr * 0.5

        previous_finetune_lr = current_finetune_lr

        train_error = []
        sub_start_time = time.clock()

        while (not train_data_reader.is_finish()):
            shared_train_set_xy, temp_train_set_x, temp_train_set_y = train_data_reader.load_next_partition()
            train_set_x.set_value(numpy.asarray(temp_train_set_x, dtype=theano.config.floatX), borrow=True)
            train_set_y.set_value(numpy.asarray(temp_train_set_y, dtype=theano.config.floatX), borrow=True)

            n_train_batches = train_set_x.get_value().shape[0] / batch_size

            logger.debug('this partition: %d frames (divided into %d batches of size %d)' % (
            train_set_x.get_value(borrow=True).shape[0], n_train_batches, batch_size))

            for minibatch_index in range(n_train_batches):
                this_train_error = train_fn(minibatch_index, current_finetune_lr, current_momentum)
                train_error.append(this_train_error)

                if numpy.isnan(this_train_error):
                    logger.warning('training error over minibatch %d of %d was %s' % (
                    minibatch_index + 1, n_train_batches, this_train_error))

        train_data_reader.reset()

        logger.debug('calculating validation loss')
        validation_losses = valid_fn()
        this_validation_loss = numpy.mean(validation_losses)

        # this has a possible bias if the minibatches were not all of identical size
        # but it should not be siginficant if minibatches are small
        this_train_valid_loss = numpy.mean(train_error)

        sub_end_time = time.clock()

        loss_difference = this_validation_loss - previous_loss

        logger.info('epoch %i, validation error %f, train error %f  time spent %.2f' % (
        epoch, this_validation_loss, this_train_valid_loss, (sub_end_time - sub_start_time)))
        if plot:
            plotlogger.add_plot_point('training convergence', 'validation set', (epoch, this_validation_loss))
            plotlogger.add_plot_point('training convergence', 'training set', (epoch, this_train_valid_loss))
            plotlogger.save_plot('training convergence', title='Progress of training and validation error',
                                 xlabel='epochs', ylabel='error')

        if this_validation_loss < best_validation_loss:
            best_dnn_model = dnn_model
            best_validation_loss = this_validation_loss
            logger.debug('validation loss decreased, so saving model')
            early_stop = 0
        else:
            logger.debug('validation loss did not improve')
            dbn = best_dnn_model
            early_stop += 1

        if early_stop >= early_stop_epoch:
            # too many consecutive epochs without surpassing the best model
            logger.debug('stopping early')
            break

        if math.isnan(this_validation_loss):
            break

        previous_loss = this_validation_loss

    end_time = time.clock()
    pickle.dump(best_dnn_model, open(nnets_file_name, 'wb'))

    logger.info(
        'overall  training time: %.2fm validation error %f' % ((end_time - start_time) / 60., best_validation_loss))

    if plot:
        plotlogger.save_plot('training convergence', title='Final training and validation error', xlabel='epochs',
                             ylabel='error')

    return best_validation_loss
Ejemplo n.º 9
0
from uci_adult import ADULT
from models.dnn import DNN, train_dnn

if __name__ == '__main__':
    adult = ADULT(path='../adult', n_users=10, user_id=0)

    dnn_model = DNN(input_size=adult.n_features,
                    output_size=adult.n_labels,
                    architecture={
                        'h1': 128,
                        'h2': 128
                    },
                    learning_rate=0.001)
    train_dnn(dnn_model, [adult],
              batch_size=128,
              epochs=10000,
              display_step=10)
Ejemplo n.º 10
0
    theano_rng = RandomStreams(numpy_rng.randint(2**30))
    log('> ... initializing the model')

    # parse network configuration from arguments, and initialize data reading
    cfg = RBMConfig()
    cfg.parse_config_common(arguments)
    cfg.init_data_reading(train_data_spec)

    # we also need to set up a DNN model, whose parameters are shared with RBM, for 2 reasons:
    # first, we can use DNN's model reading and writing functions, instead of designing these functions for RBM specifically
    # second, DNN generates
    cfg_dnn = NetworkConfig()
    cfg_dnn.n_ins = cfg.n_ins
    cfg_dnn.hidden_layers_sizes = cfg.hidden_layers_sizes
    cfg_dnn.n_outs = cfg.n_outs
    dnn = DNN(numpy_rng=numpy_rng, theano_rng=theano_rng, cfg=cfg_dnn)

    # now set up the RBM model with dnn as an argument
    srbm = SRBM(numpy_rng=numpy_rng, theano_rng=theano_rng, cfg=cfg, dnn=dnn)
    # get the pre-training function
    log('> ... getting the pre-training functions')
    pretraining_fns = srbm.pretraining_functions(train_set_x=cfg.train_x,
                                                 batch_size=cfg.batch_size,
                                                 k=1,
                                                 weight_cost=0.0002)

    start_layer_index = 0
    start_epoch_index = 0
    if os.path.exists(wdir +
                      '/nnet.tmp') and os.path.exists(wdir +
                                                      '/training_state.tmp'):
Ejemplo n.º 11
0
    nnet_cfg = arguments['nnet_cfg']
    layer_index = int(arguments['layer_index'])

    # load network configuration
    cfg = cPickle.load(open(nnet_cfg, 'r'))
    cfg.init_activation()

    # set up the model with model config
    log('> ... setting up the model and loading parameters')
    numpy_rng = numpy.random.RandomState(89677)
    theano_rng = RandomStreams(numpy_rng.randint(2**30))
    cfg = cPickle.load(open(nnet_cfg, 'r'))
    model = None
    log('> ... model type: %s' % cfg.model_type)
    if cfg.model_type == 'DNN':
        model = DNN(numpy_rng=numpy_rng, theano_rng=theano_rng, cfg=cfg)
    elif cfg.model_type == 'CNN':
        model = CNN(numpy_rng=numpy_rng, theano_rng=theano_rng, cfg=cfg)
    elif cfg.model_type == 'DNNV':
        model = DNNV(numpy_rng=numpy_rng, theano_rng=theano_rng, cfg=cfg)
    elif cfg.model_type == 'CNNV':
        model = CNNV(numpy_rng=numpy_rng, theano_rng=theano_rng, cfg=cfg)

    # load model parameters
    _file2nnet(model.layers, filename=nnet_param)

    # get the function for feature extraction
    log('> ... getting the feat-extraction function')
    extract_func = model.build_extract_feat_function(layer_index)

    kaldiread = KaldiReadIn(in_scp_file)
Ejemplo n.º 12
0
    )
    train_dnn(dnn_model_1, user_list, ae_list, user_id=0, batch_size=128, epochs=1000, display_step=10)

    dnn_model_2 = DNN(
        input_size=hidden_size,
        output_size=user_list[0].n_labels,
        architecture={'h1': 128, 'h2': 128},
        learning_rate=0.001
    )
    train_dnn(dnn_model_2, user_list, ae_list, user_id=1, batch_size=128, epochs=1000, display_step=10)
    '''

    dnn_model = DNN(input_size=hidden_size,
                    output_size=user_list[0].n_labels,
                    architecture={
                        'h1': 128,
                        'h2': 128
                    },
                    learning_rate=0.001)
    train_dnn(dnn_model,
              user_list,
              ae_list,
              batch_size=128,
              epochs=10000,
              display_step=100)

    batch_xs, batch_ys = user_list[0].next_batch(user_list[0].n_samples_test,
                                                 is_train=False)
    for i in range(n_users):
        acc_test = cal_acc(dnn_model.predict(ae_list[i].transform(batch_xs)),
                           batch_ys)
Ejemplo n.º 13
0
# -*- coding: utf-8 -*-

import os
import sys
import numpy as np
import tensorflow as tf
from models.dnn import DNN
from data_generate import *
from data_process import get_node2id


os.environ['CUDA_VISIBLE_DEVICES'] = '0'
config = tf.ConfigProto()
config.gpu_options.allow_growth = True


if __name__ == '__main__':
    if sys.argv[1] == 'deepwalk':
        embeddings_file = 'deepwalk.embeddings'
    elif sys.argv[1] == 'hin2vec':
        embeddings_file = 'node_vectors.txt'
    node2id = get_node2id()
    node_embeddings = get_embeddings(embeddings_file, node2id)
    train_dataset, test_dataset = train_test_split('./data/all_data.csv', train_size=0.7)
    model = DNN(config=config, batch_size=2048, node_embeddings=node_embeddings, optimizer='adam', learning_rate=0.001,
                epoch_num=5)
    model.train(train_dataset=train_dataset, test_dataset=test_dataset)
Ejemplo n.º 14
0
def train_DNN(train_xy_file_list, valid_xy_file_list, \
              nnets_file_name, n_ins, n_outs, ms_outs, hyper_params, buffer_size, plot=False):

    # get loggers for this function
    # this one writes to both console and file
    logger = logging.getLogger("main.train_DNN")
    logger.debug('Starting train_DNN')

    if plot:
        # this one takes care of plotting duties
        plotlogger = logging.getLogger("plotting")
        # create an (empty) plot of training convergence, ready to receive data points
        logger.create_plot('training convergence',MultipleSeriesPlot)

    try:
        assert numpy.sum(ms_outs) == n_outs
    except AssertionError:
        logger.critical('the summation of multi-stream outputs does not equal to %d' %(n_outs))
        raise

    ####parameters#####
    finetune_lr     = numpy.asarray(hyper_params['learning_rate'],  dtype='float32')
    training_epochs = int(hyper_params['training_epochs'])
    batch_size      = int(hyper_params['batch_size'])
    l1_reg          = float(hyper_params['l1_reg'])
    l2_reg          = float(hyper_params['l2_reg'])
#     private_l2_reg  = float(hyper_params['private_l2_reg'])
    warmup_epoch    = int(hyper_params['warmup_epoch'])
    momentum        = float(hyper_params['momentum'])
    warmup_momentum = float(hyper_params['warmup_momentum'])

    use_rprop = int(hyper_params['use_rprop'])

    use_rprop = int(hyper_params['use_rprop'])

    hidden_layers_sizes = hyper_params['hidden_layer_size']

#     stream_weights       = hyper_params['stream_weights']
#     private_hidden_sizes = hyper_params['private_hidden_sizes']

    buffer_utt_size = buffer_size
    early_stop_epoch = int(hyper_params['early_stop_epochs'])

    hidden_activation = hyper_params['hidden_activation']
    output_activation = hyper_params['output_activation']

#     stream_lr_weights = hyper_params['stream_lr_weights']
#     use_private_hidden = hyper_params['use_private_hidden']

    model_type = hyper_params['model_type']

    ## use a switch to turn on pretraining
    ## pretraining may not help too much, if this case, we turn it off to save time
    do_pretraining = hyper_params['do_pretraining']
    pretraining_epochs = int(hyper_params['pretraining_epochs'])
    pretraining_lr = float(hyper_params['pretraining_lr'])


    buffer_size = int(buffer_size / batch_size) * batch_size

    ###################
    (train_x_file_list, train_y_file_list) = train_xy_file_list
    (valid_x_file_list, valid_y_file_list) = valid_xy_file_list

    logger.debug('Creating training   data provider')
    train_data_reader = ListDataProvider(x_file_list = train_x_file_list, y_file_list = train_y_file_list, n_ins = n_ins, n_outs = n_outs, buffer_size = buffer_size, shuffle = True)

    logger.debug('Creating validation data provider')
    valid_data_reader = ListDataProvider(x_file_list = valid_x_file_list, y_file_list = valid_y_file_list, n_ins = n_ins, n_outs = n_outs, buffer_size = buffer_size, shuffle = False)

    shared_train_set_xy, temp_train_set_x, temp_train_set_y = train_data_reader.load_next_partition()
    train_set_x, train_set_y = shared_train_set_xy
    shared_valid_set_xy, temp_valid_set_x, temp_valid_set_y = valid_data_reader.load_next_partition()
    valid_set_x, valid_set_y = shared_valid_set_xy
    train_data_reader.reset()
    valid_data_reader.reset()

    ##temporally we use the training set as pretrain_set_x.
    ##we need to support any data for pretraining
    pretrain_set_x = train_set_x

    # numpy random generator
    numpy_rng = numpy.random.RandomState(123)
    logger.info('building the model')


    dnn_model = None
    pretrain_fn = None  ## not all the model support pretraining right now
    train_fn = None
    valid_fn = None
    valid_model = None ## valid_fn and valid_model are the same. reserve to computer multi-stream distortion
    if model_type == 'DNN':
        dnn_model = DNN(numpy_rng=numpy_rng, n_ins=n_ins, n_outs = n_outs,
                        l1_reg = l1_reg, l2_reg = l2_reg,
                         hidden_layers_sizes = hidden_layers_sizes,
                          hidden_activation = hidden_activation,
                          output_activation = output_activation,
                          use_rprop = use_rprop, rprop_init_update=finetune_lr)
        train_fn, valid_fn = dnn_model.build_finetune_functions(
                    (train_set_x, train_set_y), (valid_set_x, valid_set_y), batch_size=batch_size)

    else:
        logger.critical('%s type NN model is not supported!' %(model_type))
        raise

    logger.info('fine-tuning the %s model' %(model_type))

    start_time = time.clock()

    best_dnn_model = dnn_model
    best_validation_loss = sys.float_info.max
    previous_loss = sys.float_info.max

    early_stop = 0
    epoch = 0
    previous_finetune_lr = finetune_lr

    while (epoch < training_epochs):
        epoch = epoch + 1

        current_momentum = momentum
        current_finetune_lr = finetune_lr
        if epoch <= warmup_epoch:
            current_finetune_lr = finetune_lr
            current_momentum = warmup_momentum
        else:
            current_finetune_lr = previous_finetune_lr * 0.5

        previous_finetune_lr = current_finetune_lr

        train_error = []
        sub_start_time = time.clock()

        while (not train_data_reader.is_finish()):
            shared_train_set_xy, temp_train_set_x, temp_train_set_y = train_data_reader.load_next_partition()
            train_set_x.set_value(numpy.asarray(temp_train_set_x, dtype=theano.config.floatX), borrow=True)
            train_set_y.set_value(numpy.asarray(temp_train_set_y, dtype=theano.config.floatX), borrow=True)

            n_train_batches = train_set_x.get_value().shape[0] / batch_size

            logger.debug('this partition: %d frames (divided into %d batches of size %d)' %(train_set_x.get_value(borrow=True).shape[0], n_train_batches, batch_size) )

            for minibatch_index in range(n_train_batches):
                this_train_error = train_fn(minibatch_index, current_finetune_lr, current_momentum)
                train_error.append(this_train_error)

                if numpy.isnan(this_train_error):
                    logger.warning('training error over minibatch %d of %d was %s' % (minibatch_index+1,n_train_batches,this_train_error) )

        train_data_reader.reset()

        logger.debug('calculating validation loss')
        validation_losses = valid_fn()
        this_validation_loss = numpy.mean(validation_losses)

        # this has a possible bias if the minibatches were not all of identical size
        # but it should not be siginficant if minibatches are small
        this_train_valid_loss = numpy.mean(train_error)

        sub_end_time = time.clock()

        loss_difference = this_validation_loss - previous_loss

        logger.info('epoch %i, validation error %f, train error %f  time spent %.2f' %(epoch, this_validation_loss, this_train_valid_loss, (sub_end_time - sub_start_time)))
        if plot:
            plotlogger.add_plot_point('training convergence','validation set',(epoch,this_validation_loss))
            plotlogger.add_plot_point('training convergence','training set',(epoch,this_train_valid_loss))
            plotlogger.save_plot('training convergence',title='Progress of training and validation error',xlabel='epochs',ylabel='error')

        if this_validation_loss < best_validation_loss:
            best_dnn_model = dnn_model
            best_validation_loss = this_validation_loss
            logger.debug('validation loss decreased, so saving model')
            early_stop = 0
        else:
            logger.debug('validation loss did not improve')
            dbn = best_dnn_model
            early_stop += 1

        if early_stop >= early_stop_epoch:
            # too many consecutive epochs without surpassing the best model
            logger.debug('stopping early')
            break

        if math.isnan(this_validation_loss):
            break

        previous_loss = this_validation_loss

    end_time = time.clock()
    pickle.dump(best_dnn_model, open(nnets_file_name, 'wb'))

    logger.info('overall  training time: %.2fm validation error %f' % ((end_time - start_time) / 60., best_validation_loss))

    if plot:
        plotlogger.save_plot('training convergence',title='Final training and validation error',xlabel='epochs',ylabel='error')

    return  best_validation_loss
Ejemplo n.º 15
0
    layer_index = int(arguments['layer_index'])
    batch_size = float(arguments['batch_size'])
    argmax = 'argmax' in arguments and string2bool(arguments['argmax'])
    log("Extracting in batches with size="+str(batch_size))
    if batch_size == -1:
      log("Extracting all features per partition at once")

    # load network configuration and set up the model
    log('> ... setting up the model and loading parameters')
    numpy_rng = numpy.random.RandomState(89677)
    theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
    cfg = pickle.load(smart_open(nnet_cfg,'rb'))
    cfg.init_activation()
    model = None
    if cfg.model_type == 'DNN':
        model = DNN(numpy_rng=numpy_rng, theano_rng = theano_rng, cfg = cfg)
    elif cfg.model_type == 'CNN':
        model = CNN(numpy_rng=numpy_rng, theano_rng = theano_rng, cfg = cfg, testing = True)

    # load model parameters
    _file2nnet(model.layers, path = nnet_param)

    # initialize data reading
    cfg.init_data_reading_test(data_spec)

    model.dumpLayerSize()

    # get the function for feature extraction
    log('> ... getting the feat-extraction function for layer='+str(layer_index))
    extract_func = model.build_extract_feat_function(layer_index)
Ejemplo n.º 16
0
def dnn_run(arguments):

    required_arguments = ['train_data', 'valid_data', 'nnet_spec', 'wdir']
    for arg in required_arguments:
        if arguments.has_key(arg) == False:
            print "Error: the argument %s has to be specified" % (arg)
            exit(1)
    train_data_spec = arguments['train_data']
    valid_data_spec = arguments['valid_data']
    nnet_spec = arguments['nnet_spec']
    wdir = arguments['wdir']
    cfg = NetworkConfig()
    cfg.parse_config_dnn(arguments, nnet_spec)
    cfg.init_data_reading(train_data_spec, valid_data_spec)

    # parse pre-training options
    # pre-training files and layer number (how many layers are set to the pre-training parameters)
    ptr_layer_number = 0
    ptr_file = ''
    if arguments.has_key('ptr_file') and arguments.has_key('ptr_layer_number'):
        ptr_file = arguments['ptr_file']
        ptr_layer_number = int(arguments['ptr_layer_number'])

    # check working dir to see whether it's resuming training
    resume_training = False
    if os.path.exists(wdir +
                      '/nnet.tmp') and os.path.exists(wdir +
                                                      '/training_state.tmp'):
        resume_training = True
        cfg.lrate = _file2lrate(wdir + '/training_state.tmp')
        log('> ... found nnet.tmp and training_state.tmp, now resume training from epoch '
            + str(cfg.lrate.epoch))

    numpy_rng = numpy.random.RandomState(89677)
    theano_rng = RandomStreams(numpy_rng.randint(2**30))
    log('> ... building the model')
    # setup model
    if cfg.do_dropout:
        dnn = DNN_Dropout(numpy_rng=numpy_rng, theano_rng=theano_rng, cfg=cfg)
    else:
        dnn = DNN(numpy_rng=numpy_rng, theano_rng=theano_rng, cfg=cfg)

    # initialize model parameters
    # if not resuming training, initialized from the specified pre-training file
    # if resuming training, initialized from the tmp model file
    if (ptr_layer_number > 0) and (resume_training is False):
        _file2nnet(dnn.layers,
                   set_layer_num=ptr_layer_number,
                   filename=ptr_file)
    if resume_training:
        _file2nnet(dnn.layers, filename=wdir + '/nnet.tmp')

    # get the training, validation and testing function for the model
    log('> ... getting the finetuning functions')
    train_fn, valid_fn = dnn.build_finetune_functions(
        (cfg.train_x, cfg.train_y), (cfg.valid_x, cfg.valid_y),
        batch_size=cfg.batch_size)

    log('> ... finetuning the model')
    while (cfg.lrate.get_rate() != 0):
        # one epoch of sgd training
        train_error = train_sgd(train_fn, cfg)
        log('> epoch %d, training error %f ' %
            (cfg.lrate.epoch, 100 * numpy.mean(train_error)) + '(%)')
        # validation
        valid_error = validate_by_minibatch(valid_fn, cfg)
        log('> epoch %d, lrate %f, validation error %f ' %
            (cfg.lrate.epoch, cfg.lrate.get_rate(),
             100 * numpy.mean(valid_error)) + '(%)')
        cfg.lrate.get_next_rate(current_error=100 * numpy.mean(valid_error))
        # output nnet parameters and lrate, for training resume
        if cfg.lrate.epoch % cfg.model_save_step == 0:
            _nnet2file(dnn.layers, filename=wdir + '/nnet.tmp')
            _lrate2file(cfg.lrate, wdir + '/training_state.tmp')

    # save the model and network configuration
    if cfg.param_output_file != '':
        _nnet2file(dnn.layers,
                   filename=cfg.param_output_file,
                   input_factor=cfg.input_dropout_factor,
                   factor=cfg.dropout_factor)
        log('> ... the final PDNN model parameter is ' + cfg.param_output_file)
    if cfg.cfg_output_file != '':
        _cfg2file(dnn.cfg, filename=cfg.cfg_output_file)
        log('> ... the final PDNN model config is ' + cfg.cfg_output_file)
Ejemplo n.º 17
0
    numpy_rng = numpy.random.RandomState(89677)
    theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
    resume_training = False; resume_tasks = []  # if we are resuming training, then MLT only operates on the terminated tasks
    for n in xrange(task_number):
        log('> ... building the model for task %d' % (n))
        cfg = config_array[n]
        # set up the model
        dnn_shared = None; shared_layers = []
        if n > 0:
            dnn_shared = dnn_array[0]; shared_layers = [m for m in xrange(shared_layers_num)]
        if cfg.do_dropout:
            dnn = DNN_Dropout(numpy_rng=numpy_rng, theano_rng = theano_rng, cfg = cfg,
                              dnn_shared = dnn_shared, shared_layers = shared_layers)
        else:
            dnn = DNN(numpy_rng=numpy_rng, theano_rng = theano_rng, cfg = cfg,
                      dnn_shared = dnn_shared, shared_layers = shared_layers)

        # get the training, validation and testing function for the model
        log('> ... getting the finetuning functions for task %d' % (n))
        train_fn, valid_fn = dnn.build_finetune_functions((cfg.train_x, cfg.train_y), (cfg.valid_x, cfg.valid_y), batch_size=cfg.batch_size)
        # add dnn and the functions to the list   
        dnn_array.append(dnn)
        train_fn_array.append(train_fn); valid_fn_array.append(valid_fn)
        # check the working dir to decide whether it's resuming training; if yes, load the tmp network files for initialization
        if os.path.exists(wdir + '/nnet.tmp.task' + str(n)) and os.path.exists(wdir + '/training_state.tmp.task' + str(n)):
            resume_training = True; resume_tasks.append(n)
            cfg.lrate = _file2lrate(wdir + '/training_state.tmp.task' + str(n))
            log('> ... found nnet.tmp.task%d and training_state.tmp.task%d, now resume task%d training from epoch %d' % (n, n, n, cfg.lrate.epoch))
            _file2nnet(dnn.layers, filename = wdir + '/nnet.tmp.task' + str(n))

    # pre-training works only if we are NOT resuming training
Ejemplo n.º 18
0
def main(arg_elements):

    # check the arguments
    arguments = parse_arguments(arg_elements)
    required_arguments = [
        'data', 'nnet_param', 'nnet_cfg', 'output_file', 'layer_index',
        'batch_size'
    ]
    for arg in required_arguments:
        if arguments.has_key(arg) == False:
            print "Error: the argument %s has to be specified" % (arg)
            exit(1)

    # mandatory arguments
    data_spec = arguments['data']
    nnet_param = arguments['nnet_param']
    nnet_cfg = arguments['nnet_cfg']
    output_file = arguments['output_file']
    layer_index = int(arguments['layer_index'])
    batch_size = int(arguments['batch_size'])
    argmax = arguments.has_key('argmax') and string2bool(arguments['argmax'])

    # load network configuration and set up the model
    log('> ... setting up the model and loading parameters')
    numpy_rng = numpy.random.RandomState(89677)
    theano_rng = RandomStreams(numpy_rng.randint(2**30))
    cfg = cPickle.load(smart_open(nnet_cfg, 'r'))
    cfg.init_activation()
    model = None
    if cfg.model_type == 'DNN':
        model = DNN(numpy_rng=numpy_rng, theano_rng=theano_rng, cfg=cfg)
    elif cfg.model_type == 'CNN':
        model = CNN(numpy_rng=numpy_rng,
                    theano_rng=theano_rng,
                    cfg=cfg,
                    testing=True)

    # load model parameters
    _file2nnet(model.layers, filename=nnet_param)

    # initialize data reading
    cfg.init_data_reading_test(data_spec)

    # get the function for feature extraction
    log('> ... getting the feat-extraction function')
    extract_func = model.build_extract_feat_function(layer_index)

    output_mats = [
    ]  # store the features for all the data in memory. TODO: output the features in a streaming mode
    log('> ... generating features from the specified layer')
    while (not cfg.test_sets.is_finish()):  # loop over the data
        cfg.test_sets.load_next_partition(cfg.test_xy)
        batch_num = int(
            math.ceil(1.0 * cfg.test_sets.cur_frame_num / batch_size))

        for batch_index in xrange(batch_num):  # loop over mini-batches
            start_index = batch_index * batch_size
            end_index = min((batch_index + 1) * batch_size,
                            cfg.test_sets.cur_frame_num
                            )  # the residue may be smaller than a mini-batch
            output = extract_func(
                cfg.test_x.get_value()[start_index:end_index])
            output_mats.append(output)

    output_mat = numpy.concatenate(output_mats)
    if argmax:
        output_mat = output_mat.argmax(axis=1)

    # output the feature representations using pickle
    f = smart_open(output_file, 'wb')
    cPickle.dump(output_mat, f, cPickle.HIGHEST_PROTOCOL)

    log('> ... the features are stored in ' + output_file)
Ejemplo n.º 19
0
    cfg = pickle.load(smart_open(nnet_cfg,'rb'))
    layerNr = cfg.totalNumerOfLayers() 
    log('Total number of layers '+str(layerNr))
    for i in range(0,layerNr):
        count = 0        
        log("Going to output layer="+str(i))
        files = []
        layer_index = i

        numpy_rng = numpy.random.RandomState(89677)
        theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
        cfg = pickle.load(smart_open(nnet_cfg,'rb'))
        cfg.init_activation()
        model = None
        if cfg.model_type == 'DNN':
            model = DNN(numpy_rng=numpy_rng, theano_rng = theano_rng, cfg = cfg)
        elif cfg.model_type == 'CNN':
            model = CNN(numpy_rng=numpy_rng, theano_rng = theano_rng, cfg = cfg, testing = True)

        # load model parameters
        _file2nnet(model.layers, path = nnet_param)

        # initialize data reading
        cfg.init_data_reading_test(data_spec)

        model.dumpLayerSize()

        initialDim = model.getNeuronsForLayer(i)
        for p in perplexity:
            files.append(createDataFile(output_path,'Layer'+str(layer_index),countItems,2,initialDim,p))
Ejemplo n.º 20
0
def evaluate():
    place = fluid.CUDAPlace(0) if cfg.use_cuda else fluid.CPUPlace()
    inference_scope = fluid.Scope()
    test_files = [
        os.path.join(cfg.evaluate_file_path, x)
        for x in os.listdir(cfg.evaluate_file_path)
    ]
    dataset = CriteoDataset()
    test_reader = paddle.batch(dataset.test(test_files),
                               batch_size=cfg.batch_size)

    startup_program = fluid.framework.Program()
    test_program = fluid.framework.Program()
    model = DNN()
    model_path = os.path.join(cfg.save_path,
                              model.name + "_epoch_" + str(cfg.test_epoch),
                              "checkpoint")

    with fluid.framework.program_guard(test_program, startup_program):
        with fluid.unique_name.guard():
            inputs = model.input_data()
            loss, auc_var = model.net(inputs)

            exe = fluid.Executor(place)
            feeder = fluid.DataFeeder(feed_list=inputs, place=place)

            fluid.load(fluid.default_main_program(), model_path, exe)

            auc_states_names = [
                '_generated_var_0', '_generated_var_1', '_generated_var_2',
                '_generated_var_3'
            ]
            for var in auc_states_names:
                set_zero(var, scope=inference_scope, place=place)

            run_index = 0
            infer_auc = 0
            L = []
            for batch_id, data_test in enumerate(test_reader()):
                loss_val, auc_val = exe.run(test_program,
                                            feed=feeder.feed(data_test),
                                            fetch_list=[loss, auc_var])
                run_index += 1
                infer_auc = auc_val
                L.append(loss_val / cfg.batch_size)
                if batch_id % cfg.log_interval == 0:
                    logger.info("TEST --> batch: {} loss: {} auc: {}".format(
                        batch_id, loss_val / cfg.batch_size, auc_val))

            infer_loss = np.mean(L)
            infer_result = {}
            infer_result['loss'] = infer_loss
            infer_result['auc'] = infer_auc
            if not os.path.isdir(cfg.log_dir):
                os.makedirs(cfg.log_dir)
            log_path = os.path.join(cfg.log_dir,
                                    model.name + '_infer_result.log')

            logger.info(str(infer_result))
            with open(log_path, 'w+') as f:
                f.write(str(infer_result))
            logger.info("Done.")
    return infer_result
Ejemplo n.º 21
0
    # check working dir to see whether it's resuming training
    resume_training = False
    if os.path.exists(wdir + '/nnet.tmp') and os.path.exists(wdir + '/training_state.tmp'):
        resume_training = True
        cfg.lrate = _file2lrate(wdir + '/training_state.tmp')
        log('> ... found nnet.tmp and training_state.tmp, now resume training from epoch ' + str(cfg.lrate.epoch))

    numpy_rng = numpy.random.RandomState()
    theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
    log('> ... building the model')
    # setup model
    if cfg.do_dropout:
        dnn = DNN_Dropout(numpy_rng=numpy_rng, theano_rng = theano_rng, cfg = cfg)
    else:
        dnn = DNN(numpy_rng=numpy_rng, theano_rng = theano_rng, cfg = cfg)

    # initialize model parameters
    # if not resuming training, initialized from the specified pre-training file
    # if resuming training, initialized from the tmp model file
    if (ptr_layer_number > 0) and (resume_training is False):
        _file2nnet(dnn.layers, set_layer_num = ptr_layer_number, filename = ptr_file)
    if resume_training:
        _file2nnet(dnn.layers, filename = wdir + '/nnet.tmp')

    # get the training, validation and testing function for the model
    log('> ... getting the finetuning functions')
    train_fn, valid_fn = dnn.build_finetune_functions(
                (cfg.train_x, cfg.train_y), (cfg.valid_x, cfg.valid_y),
                batch_size=cfg.batch_size)
Ejemplo n.º 22
0
    data_spec = arguments['data']
    nnet_param = arguments['nnet_param']
    nnet_cfg = arguments['nnet_cfg']
    output_file = arguments['output_file']
    layer_index = int(arguments['layer_index'])
    batch_size = float(arguments['batch_size'])

    # load network configuration and set up the model
    log('> ... setting up the model and loading parameters')
    numpy_rng = numpy.random.RandomState(89677)
    theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
    cfg = cPickle.load(smart_open(nnet_cfg,'r'))
    cfg.init_activation()
    model = None
    if cfg.model_type == 'DNN':
        model = DNN(numpy_rng=numpy_rng, theano_rng = theano_rng, cfg = cfg)
    elif cfg.model_type == 'CNN':
        model = CNN(numpy_rng=numpy_rng, theano_rng = theano_rng, cfg = cfg, testing = True)

    # load model parameters
    _file2nnet(model.layers, filename = nnet_param)

    # initialize data reading
    cfg.init_data_reading_test(data_spec)

    # get the function for feature extraction
    log('> ... getting the feat-extraction function')
    extract_func = model.build_extract_feat_function(layer_index)

    output_mats = []    # store the features for all the data in memory. TODO: output the features in a streaming mode
    log('> ... generating features from the specified layer')
Ejemplo n.º 23
0
    # numpy random generator
    numpy_rng = numpy.random.RandomState(89677)
    theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
    log('> ... initializing the model')

    # parse network configuration from arguments, and initialize data reading
    cfg = RBMConfig()
    cfg.parse_config_common(arguments)
    cfg.init_data_reading(train_data_spec)

    # we also need to set up a DNN model, whose parameters are shared with RBM, for 2 reasons:
    # first, we can use DNN's model reading and writing functions, instead of designing these functions for RBM specifically
    # second, DNN generates 
    cfg_dnn = NetworkConfig()
    cfg_dnn.n_ins = cfg.n_ins; cfg_dnn.hidden_layers_sizes = cfg.hidden_layers_sizes; cfg_dnn.n_outs = cfg.n_outs
    dnn = DNN(numpy_rng=numpy_rng, theano_rng = theano_rng, cfg = cfg_dnn)

    # now set up the RBM model with dnn as an argument
    srbm = SRBM(numpy_rng=numpy_rng, theano_rng = theano_rng, cfg = cfg, dnn = dnn)
    # get the pre-training function
    log('> ... getting the pre-training functions')
    pretraining_fns = srbm.pretraining_functions(train_set_x=cfg.train_x, batch_size=cfg.batch_size,
                                                 k = 1, weight_cost = 0.0002)
 
    start_layer_index = 0
    start_epoch_index = 0
    if os.path.exists(wdir + '/nnet.tmp') and os.path.exists(wdir + '/training_state.tmp'):
        start_layer_index, start_epoch_index = read_two_integers(wdir + '/training_state.tmp')
        log('> ... found nnet.tmp and training_state.tmp, now resume training from layer #' + str(start_layer_index) + ' epoch #' + str(start_epoch_index))
        _file2nnet(dnn.layers, filename = wdir + '/nnet.tmp')
Ejemplo n.º 24
0
train_data_spec = arguments['train_data']
wdir = arguments['wdir']

path = "/home/piero/Documents/Experiments/Real_Test/Spectral Coef/Noise vs BG_voice+Conversation vs Shout+Scream/fft coef/"
os.chdir(path)
filename = "rbm.cfg"
train_data = "train.pickle.gz"
test_data = "test.pickle.gz"
batch_size = 128

log('> ... setting up the model and loading parameters')
numpy_rng = np.random.RandomState(89677)
theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
cfg_dnn = cPickle.load(open(filename,'r'))
cfg_dnn.init_activation()
model = DNN(numpy_rng=numpy_rng, theano_rng = theano_rng, cfg = cfg_dnn)

# load model parameters
_file2nnet(model.layers, filename = wdir + '/rbm.param')

# initialize data reading
cfg_dnn.init_data_reading_test(train_data_spec)

# get the function for feature extraction
log('> ... getting the feat-extraction function')
extract_func = model.build_extract_feat_function(-1)

output_mat = None  # store the features for all the data in memory
log('> ... generating features from the specified layer')
while (not cfg_dnn.test_sets.is_finish()):  # loop over the data
    cfg_dnn.test_sets.load_next_partition(cfg_dnn.test_xy)