Exemple #1
0
def create_trainer(network, epoch_size, num_quantization_bits, warm_up, progress_writers):
    ''' Create Trainer '''
    print('Creating the trainer.')
    # Differential Learning rate scheduler
    lr_schedule = C.learning_rate_schedule([2.5], unit=C.UnitType.minibatch)
    mm_schedule = C.momentum_schedule(0.9)
    l2_reg_weight = 0.001

    # Create the Adam learners
    learner = C.adam(network['output'].parameters,
                     lr_schedule,
                     mm_schedule,
                     l2_regularization_weight=l2_reg_weight,
                     unit_gain=False)

    # Compute the number of workers
    num_workers = C.distributed.Communicator.num_workers()
    print('Number of workers: {}'.format(num_workers))
    if num_workers > 1:
        parameter_learner = C.train.distributed.data_parallel_distributed_learner(learner, num_quantization_bits=num_quantization_bits)
        trainer = C.Trainer(network['output'], (network['ce'], network['pe']), parameter_learner, progress_writers)
    else:
        trainer = C.Trainer(network['output'], (network['ce'], network['pe']), learner, progress_writers)

    return trainer
Exemple #2
0
 def __call__(self, parameters, opt_learning_rate=0.001, **kwargs):
     lr_per_minibatch = cntk.learning_rate_schedule(
         lr=opt_learning_rate, unit=cntk.UnitType.minibatch)
     momentum = cntk.momentum_schedule(momentum=0.99)
     return cntk.adam_sgd(parameters=parameters,
                          lr=lr_per_minibatch,
                          momentum=momentum)
Exemple #3
0
def test_learner_logging():
    from cntk import Trainer
    from cntk.logging import ProgressPrinter
    from cntk import cross_entropy_with_softmax, classification_error

    features = C.input_variable(shape=(1,), needs_gradient=True, name='a')
    w_init = 1
    w = parameter(shape=(1,), init=w_init)
    z = features * w
    labels = C.input_variable(shape=(1,), name='b')
    ce = cross_entropy_with_softmax(z, labels)
    errs = classification_error(z, labels)

    writer = TestProgressWriter();
    lr_values = [0.3, 0.2, 0.1, 0]
    m_values = [0.6, 0.7, 0.8]
    learner = C.momentum_sgd(z.parameters,
                  learning_rate_schedule(lr_values, UnitType.sample, 1),
                  C.momentum_schedule(m_values, 1))
    trainer = Trainer(z, (ce, errs), [learner], writer)

    for i in range(10):
        trainer.train_minibatch({features: [[2.]], labels: [[1.]]})
    
    assert len(writer.log_output) == len(lr_values + m_values)

    values = [j for i in zip(lr_values,m_values) for j in i] + [0]

    for i in range(len(values)):
        assert (values[i] == writer.log_output[i])
Exemple #4
0
def create_trainer(network, epoch_size, num_quantization_bits, warm_up,
                   progress_writers):
    print('Creating the trainer.')
    # Train only the last layers
    lr_schedule = C.learning_rate_schedule([0.01] * 10 + [0.001] * 20 +
                                           [0.0001] * 30,
                                           unit=C.UnitType.minibatch)
    mm_schedule = C.momentum_schedule(0.9)
    l2_reg_weight = 0.0001

    learner = C.adam(network['output'].parameters,
                     lr_schedule,
                     mm_schedule,
                     l2_regularization_weight=l2_reg_weight,
                     unit_gain=False)

    num_workers = C.distributed.Communicator.num_workers()
    print('Number of workers: {}'.format(num_workers))
    if num_workers > 1:
        parameter_learner = C.train.distributed.data_parallel_distributed_learner(
            learner, num_quantization_bits=num_quantization_bits)
        trainer = C.Trainer(network['output'], (network['ce'], network['pe']),
                            parameter_learner, progress_writers)
    else:
        trainer = C.Trainer(network['output'], (network['ce'], network['pe']),
                            learner, progress_writers)

    return trainer
Exemple #5
0
    def __init__(self, n_in, n_out, init_lr, momentum):

        self.param1 = 512
        self.param2 = 256

        self.n_in = int(n_in)
        self.n_out = int(n_out)
        self.input = C.sequence.input_variable(shape=(self.n_in,))
        self.label = C.sequence.input_variable(shape=(self.n_out,))

        self.three_dnn = C.layers.Sequential([
            C.layers.Dense(self.param1, activation=C.tanh, name='dnn_three_1'),
            C.layers.Dense(self.param1, activation=C.tanh, name='dnn_three_2'),
            C.layers.Dense(self.param1, activation=C.tanh, name='dnn_three_3')])
        self.final_dnn = C.layers.Dense(self.n_out, name='dnn_final')
        self.dnn_1 = C.layers.Dense(8 * self.param2, bias=False, name='dnn_1')
        self.dnn_2 = C.layers.Dense(8 * self.param2, bias=False, name='dnn_2')
        self.dnn_3 = C.layers.Dense(8 * self.param2, bias=False, name='dnn_3')
        self.dnn_4 = C.layers.Dense(8 * self.param2, bias=False, name='dnn_4')
        self.list_bias = []
        for i in xrange(16):
            self.list_bias.append(C.parameter(shape=(self.param2, ), name='bias_' + str(i)))

        self.output = self.model(self.input)

        self.loss = loss_fun(self.output, self.label)
        self.eval_err = loss_fun(self.output, self.label)

        self.lr_s = C.learning_rate_schedule(init_lr, C.UnitType.sample)
        self.mom_s = C.momentum_schedule(momentum)
        self.learner = C.momentum_sgd(self.output.parameters, lr=self.lr_s, momentum=self.mom_s)
        self.trainer = C.Trainer(self.output, (self.loss, self.eval_err), [self.learner])
Exemple #6
0
def create_network(para, verbose=False):
    with cntk.layers.default_options(init=cntk.glorot_uniform(), activation=cntk.ops.relu):
        # In order to accelerate the debugging step, we choose a simple structure with only 2 parameters

        h = cntk.layers.Convolution2D(filter_shape=(5, 5), num_filters=para[0],
                                      strides=(1, 1), pad=True, name='C1')(network_input / 255.0)
        h = cntk.layers.layers.MaxPooling(filter_shape=(5, 5), strides=(2, 2), )(h)

        h = cntk.layers.Convolution2D(filter_shape=(5, 5), num_filters=para[1],
                                      strides=(1, 1), pad=True, name='C2')(h)
        h = cntk.layers.layers.MaxPooling(filter_shape=(5, 5), strides=(2, 2))(h)

        h = cntk.layers.Convolution2D(filter_shape=(3, 3), num_filters=para[2],
                                      strides=(1, 1), pad=True, name='C2')(h)

        h = cntk.layers.Dense(para[3])(h)

        h = cntk.layers.Dropout(0.25)(h)

        z = cntk.layers.Dense(10, activation=None, name='R')(h)
    loss = cntk.cross_entropy_with_softmax(z, network_label)
    label_error = cntk.classification_error(z, network_label)
    lr_schedule = cntk.learning_rate_schedule(0.1, cntk.UnitType.minibatch)
    learner = cntk.momentum_sgd(z.parameters, lr_schedule, cntk.momentum_schedule(0.9))
    trainer = cntk.Trainer(z, (loss, label_error), [learner])
    if verbose: log = cntk.logging.ProgressPrinter(100)
    for _ in xrange(20000):
        data = train_reader.next_minibatch(100, input_map=mapping(train_reader))
        trainer.train_minibatch(data)
        if verbose: log.update_with_trainer(trainer)
    return trainer
def init_model(m):
    progress_writers = [
        cntk.logging.ProgressPrinter(
            freq=int(BATCHSIZE / 2),
            rank=cntk.train.distributed.Communicator.rank(),
            num_epochs=EPOCHS)
    ]

    # Loss (dense labels); check if support for sparse labels
    loss = cntk.cross_entropy_with_softmax(m, labels)
    # Momentum SGD
    # https://github.com/Microsoft/CNTK/blob/master/Manual/Manual_How_to_use_learners.ipynb
    # unit_gain=False: momentum_direction = momentum*old_momentum_direction + gradient
    # if unit_gain=True then ...(1-momentum)*gradient
    local_learner = cntk.momentum_sgd(
        m.parameters,
        lr=cntk.learning_rate_schedule(LR, cntk.UnitType.minibatch),
        momentum=cntk.momentum_schedule(MOMENTUM),
        unit_gain=False)

    distributed_learner = cntk.train.distributed.data_parallel_distributed_learner(
        local_learner)

    trainer = cntk.Trainer(m, (loss, cntk.classification_error(m, labels)),
                           [distributed_learner], progress_writers)

    return trainer, distributed_learner
Exemple #8
0
def test_learner_logging():
    from cntk import Trainer
    from cntk.logging import ProgressPrinter
    from cntk import cross_entropy_with_softmax, classification_error

    features = C.input_variable(shape=(1,), needs_gradient=True, name='a')
    w_init = 1
    w = parameter(shape=(1,), init=w_init)
    z = features * w
    labels = C.input_variable(shape=(1,), name='b')
    ce = cross_entropy_with_softmax(z, labels)
    errs = classification_error(z, labels)

    writer = TestProgressWriter();
    lr_values = [0.3, 0.2, 0.1, 0]
    m_values = [0.6, 0.7, 0.8]
    learner = C.momentum_sgd(z.parameters,
                  learning_rate_schedule(lr_values, UnitType.sample, 1),
                  C.momentum_schedule(m_values, 1))
    trainer = Trainer(z, (ce, errs), [learner], writer)

    for i in range(10):
        trainer.train_minibatch({features: [[2.]], labels: [[1.]]})

    assert len(writer.log_output) == len(lr_values + m_values)

    values = [j for i in zip(lr_values,m_values) for j in i] + [0]

    for i in range(len(values)):
        assert (values[i] == writer.log_output[i])
Exemple #9
0
def test_noise_injection_with_checkpointing():
    from cntk import initializer
    shape = (100,100)
    
    w1 = parameter(shape=shape, init=initializer.glorot_uniform(seed=123))
    w2 = parameter(shape=shape, init=initializer.glorot_uniform(seed=123))
    w3 = parameter(shape=shape, init=initializer.glorot_uniform(seed=123))
    
    lr=learning_rate_schedule(0.5, UnitType.sample)
    m=C.momentum_schedule(0.99)

    learner1 = C.momentum_sgd([w1], lr, m, gaussian_noise_injection_std_dev=0.5)
    learner2 = C.momentum_sgd([w2], lr, m, gaussian_noise_injection_std_dev=0.5)
    learner3 = C.momentum_sgd([w3], lr, m, gaussian_noise_injection_std_dev=0.5)

    assert np.allclose(w1.value, w2.value) and np.allclose(w1.value, w3.value)

    for i in range(10):
        checkpoint = learner1.create_checkpoint()

        v =  np.float32(np.random.rand(100,100))
    
        learner1.update({w1: v}, 1)
        learner2.update({w2: v}, 1)
        assert not np.allclose(w1.value, w2.value)

        learner3.restore_from_checkpoint(checkpoint)
        learner3.update({w3: v}, 1)
        assert np.allclose(w1.value, w3.value)
Exemple #10
0
    def train(self, report_freq = 500, as_policy=True):        
        #loss = C.ops.minus(0, C.ops.argmin(self.model) -  C.ops.argmin(self.model) + C.ops.minus(self.label_var, 0))
        loss = C.squared_error(self.model, self.label_var)
        evaluation = C.squared_error(self.model, self.label_var)
        schedule = C.momentum_schedule(self.hp.learning_rate)
        progress_printer = C.logging.ProgressPrinter(num_epochs=self.hp.epochs/self.hp.minibatch_size)
        learner = C.adam(self.model.parameters, 
                     C.learning_rate_schedule(self.hp.learning_rate, C.UnitType.minibatch), 
                     momentum=schedule, 
                     l1_regularization_weight=self.hp.l1reg,
                     l2_regularization_weight=self.hp.l2reg
                     )
        trainer = C.Trainer(self.model, (loss, evaluation), learner, progress_printer)
        self.plotdata = {"loss":[]}
        for epoch in range(self.hp.epochs):             
             indata, label, total_reward = self.get_next_data(self.hp.minibatch_size, as_policy)
             data = {self.input_var: indata, self.label_var: label}
             trainer.train_minibatch(data)
             loss = trainer.previous_minibatch_loss_average
             if not (loss == "NA"):
                self.plotdata["loss"].append(loss)
             if epoch % report_freq == 0:
                 print()
                 print("last epoch total reward: {}".format(total_reward))
                 trainer.summarize_training_progress()
                 print()
#             if self.hp.stop_loss > loss:
#                 break
        print()
        trainer.summarize_training_progress()
Exemple #11
0
def test_noise_injection_with_checkpointing():
    from cntk import initializer
    shape = (100,100)

    w1 = parameter(shape=shape, init=initializer.glorot_uniform(seed=123))
    w2 = parameter(shape=shape, init=initializer.glorot_uniform(seed=123))
    w3 = parameter(shape=shape, init=initializer.glorot_uniform(seed=123))

    lr=learning_rate_schedule(0.5, UnitType.sample)
    m=C.momentum_schedule(0.99)

    learner1 = C.momentum_sgd([w1], lr, m, gaussian_noise_injection_std_dev=0.5)
    learner2 = C.momentum_sgd([w2], lr, m, gaussian_noise_injection_std_dev=0.5)
    learner3 = C.momentum_sgd([w3], lr, m, gaussian_noise_injection_std_dev=0.5)

    assert np.allclose(w1.value, w2.value) and np.allclose(w1.value, w3.value)

    for i in range(10):
        checkpoint = learner1.create_checkpoint()

        v =  np.float32(np.random.rand(100,100))

        learner1.update({w1: v}, 1)
        learner2.update({w2: v}, 1)
        assert not np.allclose(w1.value, w2.value)

        learner3.restore_from_checkpoint(checkpoint)
        learner3.update({w3: v}, 1)
        assert np.allclose(w1.value, w3.value)
Exemple #12
0
def main(params):
    # Create output and log directories if they don't exist
    if not os.path.isdir(params['output_folder']):
        os.makedirs(params['output_folder'])

    if not os.path.isdir(params['log_folder']):
        os.makedirs(params['log_folder'])

    # Create the network
    network = create_network()

    # Create readers
    train_reader = cbf_reader(os.path.join(params['input_folder'], 'train{}.cbf'.format(params['prefix'])), is_training=True,
                              max_samples=cntk.io.INFINITELY_REPEAT)
    cv_reader = cbf_reader(os.path.join(params['input_folder'], 'test{}.cbf'.format(params['prefix'])), is_training=False,
                           max_samples=cntk.io.FULL_DATA_SWEEP)
    test_reader = cbf_reader(os.path.join(params['input_folder'], 'test{}.cbf'.format(params['prefix'])), is_training=False,
                             max_samples=cntk.io.FULL_DATA_SWEEP)

    input_map = {
        network['input']: train_reader.streams.front,
        network['target']: train_reader.streams.label
    }

    # Create learner
    mm_schedule = momentum_schedule(0.90)
    lr_schedule = learning_parameter_schedule([(40, 0.1), (40, 0.01)], minibatch_size=params['minibatch_size'])
    learner = cntk.adam(network['model'].parameters, lr_schedule, mm_schedule, l2_regularization_weight=0.0005,
                        epoch_size=params['epoch_size'], minibatch_size=params['minibatch_size'])

    # Use TensorBoard for visual logging
    log_file = os.path.join(params['log_folder'], 'log.txt')
    pp_writer = cntk.logging.ProgressPrinter(freq=10, tag='Training', num_epochs=params['max_epochs'], log_to_file=log_file)
    tb_writer = cntk.logging.TensorBoardProgressWriter(freq=10, log_dir=params['log_folder'], model=network['model'])

    # Create trainer and training session
    trainer = Trainer(network['model'], (network['loss'], network['metric']), [learner], [pp_writer, tb_writer])
    test_config = TestConfig(minibatch_source=test_reader, minibatch_size=params['minibatch_size'], model_inputs_to_streams=input_map)
    cv_config = CrossValidationConfig(minibatch_source=cv_reader, frequency=(1, DataUnit.sweep),
                                      minibatch_size=params['minibatch_size'], model_inputs_to_streams=input_map)
    checkpoint_config = CheckpointConfig(os.path.join(params['output_folder'], model_name), frequency=(10, DataUnit.sweep), restore=params['restore'])

    session = training_session(trainer=trainer,
                               mb_source=train_reader,
                               mb_size=params['minibatch_size'],
                               model_inputs_to_streams=input_map,
                               max_samples=params['epoch_size'] * params['max_epochs'],
                               progress_frequency=(1, DataUnit.sweep),
                               checkpoint_config=checkpoint_config,
                               cv_config=cv_config,
                               test_config=test_config)

    cntk.logging.log_number_of_parameters(network['model'])
    session.train()

    # Save the trained model
    path = os.path.join(params['output_folder'], 'final_model.dnn')
    network['model'].save(path)
    print('Saved final model to', path)
Exemple #13
0
 def set_optimizer(self, opt_type, opt_conf):
     if opt_type == 'SGD':
         self.lr_schedule = C.learning_rate_schedule(
             opt_conf['lr'], C.UnitType.minibatch)
         self.m_schedule = C.momentum_schedule(
             opt_conf['momentum'], C.UnitType.minibatch)
     else:
         raise NotImplementedError
Exemple #14
0
def train(reader, model_func, max_epochs=10, task='slot_tagging'):
    
    # Create the containers for input feature (x) and the label (y)
    x = C.sequence.input_variable(vocab_size)
    y = C.sequence.input_variable(num_labels)
    # Instantiate the model function; x is the input (feature) variable 
    model = model_func(x)
    # Instantiate the loss and error function
    loss, label_error = create_criterion_function_preferred(model, y)

    # training config
    epoch_size = 18000        # 18000 samples is half the dataset size 
    minibatch_size = 70
    
    # LR schedule over epochs 
    # In CNTK, an epoch is how often we get out of the minibatch loop to
    # do other stuff (e.g. checkpointing, adjust learning rate, etc.)
    lr_per_sample = [3e-4]*4+[1.5e-4]
    lr_per_minibatch = [lr * minibatch_size for lr in lr_per_sample]
    lr_schedule = C.learning_parameter_schedule(lr_per_minibatch, epoch_size=epoch_size)
    
    # Momentum schedule
    momentums = C.momentum_schedule(0.9048374180359595, minibatch_size=minibatch_size)
    
    # We use a the Adam optimizer which is known to work well on this dataset
    # Feel free to try other optimizers from 
    # https://www.cntk.ai/pythondocs/cntk.learner.html#module-cntk.learner
    learner = C.adam(parameters=model.parameters,
                     lr=lr_schedule,
                     momentum=momentums,
                     gradient_clipping_threshold_per_sample=15, 
                     gradient_clipping_with_truncation=True)

    # Setup the progress updater
    progress_printer = C.logging.ProgressPrinter(tag='Training', num_epochs=max_epochs)
    
    # Uncomment below for more detailed logging
    #progress_printer = ProgressPrinter(freq=100, first=10, tag='Training', num_epochs=max_epochs) 

    # Instantiate the trainer
    trainer = C.Trainer(model, (loss, label_error), learner, progress_printer)

    # process minibatches and perform model training
    C.logging.log_number_of_parameters(model)
    
    # Assign the data fields to be read from the input
    if task == 'slot_tagging':
        data_map={x: reader.streams.query, y: reader.streams.slot_labels}
    else:
        data_map={x: reader.streams.query, y: reader.streams.intent} 
    t = 0
    for epoch in range(max_epochs):         # loop over epochs
        epoch_end = (epoch+1) * epoch_size
        while t < epoch_end:                # loop over minibatches on the epoch
            data = reader.next_minibatch(minibatch_size, input_map= data_map)  # fetch minibatch
            trainer.train_minibatch(data)               # update model with it
            t += data[y].num_samples                    # samples so far
        trainer.summarize_training_progress()
Exemple #15
0
 def __init__(self, dim_x, dim_y):
     self.dim_x = int(dim_x)
     self.dim_y = int(dim_y)
     self.input = cntk.sequence.input_variable(shape=(self.dim_x, ))
     self.label = cntk.sequence.input_variable(shape=(self.dim_y, ))
     self.output = self.model(self.input)
     self.loss = loss_fun(self.output, self.label)
     self.eval = loss_fun(self.output, self.label)
     self.learner = cntk.momentum_sgd(parameters=self.output.parameters,
                                      momentum=cntk.momentum_schedule(0.5),
                                      lr=cntk.learning_rate_schedule(0.006, cntk.UnitType.sample))
     self.trainer = cntk.Trainer(self.output, (self.loss, self.eval), [self.learner])
Exemple #16
0
    def train(self):
        tmp_d = {"x": [], "y": []}
        num_list = []
        count = 0
        for idx, value in enumerate(self.series):
            if idx % self.h_dims == 0:
                num_list = []
                count += 1
                if (self.h_dims * count) > len(self.series):
                    break
            num_list.append(np.float32(value))
            increment_list = []
            for num in num_list:
                increment_list.append(num)
                tmp_d["x"].append(np.array(increment_list))
                tmp_d["y"].append(
                    np.array([np.float32(self.series[self.h_dims * count])]))

        x = {"train": tmp_d["x"]}
        y = {"train": np.array(tmp_d["y"])}

        z = self.create_model(self.input_node, self.h_dims)
        var_l = cntk.input_variable(1, dynamic_axes=z.dynamic_axes, name="y")
        learning_rate = 0.005
        lr_schedule = cntk.learning_parameter_schedule(learning_rate)
        loss = cntk.squared_error(z, var_l)
        error = cntk.squared_error(z, var_l)
        momentum_schedule = cntk.momentum_schedule(
            0.9, minibatch_size=self.batch_size)
        learner = cntk.fsadagrad(z.parameters,
                                 lr=lr_schedule,
                                 momentum=momentum_schedule)
        trainer = cntk.Trainer(z, (loss, error), [learner])

        # training
        loss_summary = []

        start = time.time()
        for epoch in range(0, self.epochs):
            for x_batch, l_batch in self.next_batch(x, y, "train",
                                                    self.batch_size):
                trainer.train_minibatch({
                    self.input_node: x_batch,
                    var_l: l_batch
                })
            if epoch % (self.epochs / 10) == 0:
                training_loss = trainer.previous_minibatch_loss_average
                loss_summary.append(training_loss)
                print("epoch: {}, loss: {:.4f} [time: {:.1f}s]".format(
                    epoch, training_loss,
                    time.time() - start))
        return z
Exemple #17
0
def lstm_basic(x, y, epochs=1000, batch_size=100, input_dim=5):

    x_axes = [C.Axis.default_batch_axis(), C.Axis.default_dynamic_axis()]
    C.input_variable(1, dynamic_axes=x_axes)

    # input sequences
    input_seq = C.sequence.input_variable(1)

    # create the model
    z = create_model(input_seq, input_dim)

    # expected output (label), also the dynamic axes of the model output
    # is specified as the model of the label input
    lb = C.input_variable(1, dynamic_axes=z.dynamic_axes, name="y")

    # the learning rate
    learning_rate = 0.02
    lr_schedule = C.learning_parameter_schedule(learning_rate)

    # loss function
    loss = C.squared_error(z, lb)

    # use squared error to determine error for now
    error = C.squared_error(z, lb)

    # use fsadagrad optimizer
    momentum_schedule = C.momentum_schedule(0.9, minibatch_size=batch_size)
    learner = C.fsadagrad(z.parameters,
                          lr=lr_schedule,
                          momentum=momentum_schedule,
                          unit_gain=True)

    trainer = C.Trainer(z, (loss, error), [learner])

    # train
    loss_summary = []
    start = time.time()
    for epoch in range(0, epochs):
        for x1, y1 in next_batch(x, y, "train", batch_size):
            trainer.train_minibatch({input_seq: x1, lb: y1})
        if epoch % (epochs / 10) == 0:
            training_loss = trainer.previous_minibatch_loss_average
            loss_summary.append(training_loss)
            print("epoch: {}, loss: {:.4f} [time: {:.1f}s]".format(
                epoch, training_loss,
                time.time() - start))
    print("training took {0:.1f} sec".format(time.time() - start))

    return z, trainer, input_seq
Exemple #18
0
    def train (self, train_file, output_resources_pickle_file, \
        network_type = 'unidirectional', \
        num_epochs = 1, batch_size = 50, \
        dropout = 0.2, reg_alpha = 0.0, \
        num_hidden_units = 150, num_layers = 1):
        
        train_X, train_Y = self.reader.read_and_parse_training_data(train_file, output_resources_pickle_file) 

        print("Data Shape: ")
        print(train_X.shape) # (15380, 613)
        print(train_Y.shape) # (15380, 613, 8)      
        #self.wordvecs.shape (66962, 50)
        
        print("Hyper parameters:")
        print("output_resources_pickle_file = {}".format(output_resources_pickle_file))
        print("network_type = {}".format(network_type))
        print("num_epochs= {}".format(num_epochs ))
        print("batch_size = {}".format(batch_size ))
        print("dropout = ".format(dropout ))
        print("reg_alpha = {}".format(reg_alpha ))
        print("num_hidden_units = {}".format(num_hidden_units))
        print("num_layers = {}".format(num_layers ))

        # Instantiate the model function;
        features = C.sequence.input_variable(self.wordvecs.shape[0])
        labels = C.input_variable(train_Y.shape[2], dynamic_axes=[C.Axis.default_batch_axis()])
        self.model = self.__create_model(features, train_Y.shape[2], num_hidden_units, dropout)

        plot_path = "./lstm_model.png"
        plot(self.model, plot_path)        
        
        # Instantiate the loss and error function
        loss = C.cross_entropy_with_softmax(self.model, labels)
        error = C.classification_error(self.model, labels)

        # LR schedule
        learning_rate = 0.02
        lr_schedule = C.learning_parameter_schedule(learning_rate)
        momentum_schedule = C.momentum_schedule(0.9, minibatch_size=batch_size)
        learner = C.fsadagrad(self.model.parameters, lr = lr_schedule, momentum = momentum_schedule, unit_gain = True)        

        # Setup the progress updater
        progress_printer = C.logging.ProgressPrinter(freq=100, first=10, tag='Training', num_epochs=num_epochs)

        # Instantiate the trainer. We have all data in memory. https://github.com/Microsoft/CNTK/blob/master/Manual/Manual_How_to_feed_data.ipynb
        print('Start training')       
        train_summary = loss.train((train_X.astype('float32'), train_Y.astype('float32')), parameter_learners=[learner], callbacks=[progress_printer])
Exemple #19
0
def train(model, reader):
    y_pre = model(x)
    loss, label_error = create_criterion_function(model, y_pre, y, True)
    lr_per_minibatch = [lr] + [lr / 2] + [lr / 4]
    # lr_per_minibatch = [lr * batch_size for lr in lr_per_sample]

    lr_schedule = C.learning_parameter_schedule(lr_per_minibatch,
                                                epoch_size=epoch_size)

    # Momentum schedule
    momentums = C.momentum_schedule(0.9048374180359595,
                                    minibatch_size=batch_size)
    progress_printer = C.logging.ProgressPrinter(tag='Training',
                                                 num_epochs=max_epoch)
    # learner = C.sgd(model.parameters, lr_schedule)
    learner = C.adam(y_pre.parameters,
                     lr_schedule,
                     momentum=momentums,
                     gradient_clipping_threshold_per_sample=15)
    trainer = C.Trainer(y_pre, (loss, label_error), learner,
                        progress_printer)  # []

    C.logging.log_number_of_parameters(
        y_pre)  # print # parameters and # tensor

    loss_summary = []
    step = 0
    data_map = {x: reader.streams.query, y: reader.streams.intent}

    t = 0
    for epoch in range(max_epoch):  # loop over epochs
        epoch_end = (epoch + 1) * epoch_size
        while t < epoch_end:  # loop over minibatches on the epoch
            data = reader.next_minibatch(batch_size,
                                         input_map=data_map)  # fetch minibatch
            # print(data)
            trainer.train_minibatch(data)  # update model with it
            t += data[y].num_samples
            if t % 6000 == 0:
                training_loss = trainer.previous_minibatch_loss_average
                error = trainer.previous_minibatch_evaluation_average
                print("epoch: {}, step: {}, loss: {:.5f}, error {:.5f}".format(
                    epoch, t, training_loss, error))
        trainer.summarize_training_progress()
Exemple #20
0
def train(create_model, X, Y, epochs=500, batch_size=10, N=1):
    dim = Y.shape[1]

    # input sequences
    x = C.sequence.input_variable(dim)
    # create the model
    z = create_model(x, N=N, outputs=dim)

    # expected output (label), also the dynamic axes of the model output
    # is specified as the model of the label input
    l = C.input_variable(dim, dynamic_axes=z.dynamic_axes, name="y")

    # the learning rate
    learning_rate = 0.02
    lr_schedule = C.learning_parameter_schedule(learning_rate)

    # loss function
    loss = C.squared_error(z, l)
    # use squared error to determine error for now
    error = C.squared_error(z, l)

    # use fsadagrad optimizer
    momentum_schedule = C.momentum_schedule(0.9, minibatch_size=batch_size)
    learner = C.fsadagrad(z.parameters,
                          lr=lr_schedule,
                          momentum=momentum_schedule,
                          unit_gain=True)
    trainer = C.Trainer(z, (loss, error), [learner])

    # train
    loss_summary = []
    start = time.time()
    for epoch in range(0, epochs):
        for x1, y1 in next_batch(X, Y, batch_size):
            trainer.train_minibatch({x: x1, l: y1})
        if epoch % (epochs / 10) == 0:
            training_loss = trainer.previous_minibatch_loss_average
            loss_summary.append(training_loss)
            print("epoch: {}, loss: {:.5f}".format(epoch, training_loss))

    print("training took {0:.1f} sec".format(time.time() - start))

    return z
Exemple #21
0
def build_SRResNet_graph(lr_image_shape, hr_image_shape, net):
    inp_dynamic_axes = [C.Axis.default_batch_axis()]
    real_X = C.input(
        lr_image_shape, dynamic_axes=inp_dynamic_axes, name="real_X")
    real_Y = C.input(
        hr_image_shape, dynamic_axes=inp_dynamic_axes, name="real_Y")

    real_X_scaled = real_X/255
    real_Y_scaled = real_Y/255

    genG = net(real_X_scaled)

    G_loss = C.reduce_mean(C.square(real_Y_scaled - genG))

    G_optim = C.adam(G_loss.parameters,
                     lr=C.learning_rate_schedule(
                         [(1, 0.01), (1, 0.001), (98, 0.0001)], C.UnitType.minibatch, 10000),
                     momentum=C.momentum_schedule(0.9), gradient_clipping_threshold_per_sample=1.0)

    G_G_trainer = C.Trainer(genG, (G_loss, None), G_optim)

    return (real_X, real_Y, genG, real_X_scaled, real_Y_scaled, G_optim, G_G_trainer)
Exemple #22
0
    def _create_model(self, input_dim, output_dim, hidden_dims):
        c_in = C.input_variable(input_dim, name='state')
        model = c_in

        for h in hidden_dims:
            model = C.layers.Dense(h, activation=C.relu)(model)
        model = C.layers.Dense(output_dim, activation=C.softmax)(model)

        c_action_prob = model
        c_action_onehot = C.input_variable(output_dim, name='action_onehot')
        c_reward = C.input_variable(1, name='reward')
        action_prob = C.reduce_sum(c_action_prob * c_action_onehot)
        log_action_prog = C.log(action_prob)
        loss = -log_action_prog * c_reward
        loss = C.reduce_mean(loss)

        lr = 1e-2
        lr_schedule = C.learning_parameter_schedule(lr)
        learner = C.adam(model.parameters, lr_schedule,
                         C.momentum_schedule(0.9))
        trainer = C.Trainer(model, (loss, None), learner)

        return model, loss, trainer
Exemple #23
0
    def __init__(self, n_in, n_out, init_lr, momentum):

        self.param1 = 512
        self.param2 = 256

        self.n_in = int(n_in)
        self.n_out = int(n_out)
        self.input = C.sequence.input_variable(shape=(self.n_in, ))
        self.label = C.sequence.input_variable(shape=(self.n_out, ))

        self.three_dnn = Sequential([
            Dense(self.param1, activation=C.tanh),
            Dense(self.param1, activation=C.tanh),
            Dense(self.param1, activation=C.tanh)
        ])
        self.rnn_layer1 = Sequential([(Recurrence(LSTM(self.param2)),
                                       Recurrence(LSTM(self.param2),
                                                  go_backwards=True)),
                                      C.splice])
        self.rnn_layer2 = Sequential([(Recurrence(LSTM(self.param2)),
                                       Recurrence(LSTM(self.param2),
                                                  go_backwards=True)),
                                      C.splice])
        self.final_dnn = Dense(self.n_out)

        self.output = self.model(self.input)

        self.loss = loss_fun(self.output, self.label)
        self.eval_err = loss_fun(self.output, self.label)

        self.lr_s = C.learning_rate_schedule(init_lr, C.UnitType.sample)
        self.mom_s = C.momentum_schedule(momentum)
        self.learner = C.momentum_sgd(self.output.parameters,
                                      lr=self.lr_s,
                                      momentum=self.mom_s)
        self.trainer = C.Trainer(self.output, (self.loss, self.eval_err),
                                 [self.learner])
Exemple #24
0
def train_and_test(reader_train, reader_test, model_func):

    ###############################
    # Training the model
    ###############################

    input = C.input_variable(input_dim)
    label = C.input_variable(input_dim)

    model = model_func(input)

    target = label / 255.0
    loss = -(target * C.log(model) + (1 - target) * C.log(1 - model))
    label_error = C.classification_error(model, target)

    epoch_size = 30000
    minibatch_size = 64
    num_sweeps_to_train_with = 5 if isFast else 100
    num_samples_per_sweep = 60000
    num_minibatches_to_train = (num_samples_per_sweep *
                                num_sweeps_to_train_with) // minibatch_size

    lr_per_sample = [3e-4]
    lr_schedule = C.learning_parameter_schedule_per_sample(
        lr_per_sample, epoch_size)

    momentum_schedule = C.momentum_schedule(0.9126265014311797, minibatch_size)

    learner = C.fsadagrad(model.parameters,
                          lr=lr_schedule,
                          momentum=momentum_schedule)

    progress_printer = C.logging.ProgressPrinter(0)
    trainer = C.Trainer(model, (loss, label_error), learner, progress_printer)

    input_map = {
        input: reader_train.streams.features,
        label: reader_train.streams.features
    }

    aggregate_metric = 0
    for i in range(num_minibatches_to_train):
        data = reader_train.next_minibatch(minibatch_size, input_map=input_map)
        trainer.train_minibatch(data)
        samples = trainer.previous_minibatch_sample_count
        aggregate_metric += trainer.previous_minibatch_evaluation_average * samples

    train_error = (aggregate_metric *
                   100) / (trainer.total_number_of_samples_seen)
    print("Average training error: {0:0.2f}%".format(train_error))

    #############################################################################
    # Testing the model
    # Note: we use a test file reader to read data different from a training data
    #############################################################################

    test_minibatch_size = 32
    num_samples = 10000
    num_minibatches_to_test = num_samples / test_minibatch_size
    test_result = 0

    # Test error metric calculation
    metric_numer = 0
    metric_denom = 0

    test_input_map = {
        input: reader_test.streams.features,
        label: reader_test.streams.features
    }

    for i in range(0, int(num_minibatches_to_test)):
        data = reader_test.next_minibatch(test_minibatch_size,
                                          input_map=test_input_map)
        eval_error = trainer.test_minibatch(data)
        metric_numer += np.abs(eval_error * test_minibatch_size)
        metric_denom += test_minibatch_size
    test_error = (metric_numer * 100) / (metric_denom)
    print("Average test error: {0:0.2f}%".format(test_error))

    return model, train_error, test_error
Exemple #25
0
def train(train_x, train_y, seed, model_dir, loss_dir):
    input_dim = 600
    output_dim = 3631
    num_epochs = 100
    hidden_layer_type = ['TANH', 'TANH']
    hidden_layer_size = [1024, 1024]
    momentum = 0.9
    finetune_lr = 0.01
    l2_regularization_weight = 0.00001
    C.cntk_py.set_fixed_random_seed(seed)
    print('Creating DNN model...')
    input = C.input_variable(input_dim)
    output = C.input_variable(output_dim)
    dnn_model = create_dnn_model(input, hidden_layer_type, hidden_layer_size,
                                 output_dim)
    epoch_num = 0
    current_finetune_lr = finetune_lr
    current_momentum = momentum
    train_loss_output = []
    print('Learning...')
    while (epoch_num < num_epochs):
        print('started epoch %i' % epoch_num)
        epoch_num += 1
        sub_start_time = time.time()
        lr_schedule = C.learning_rate_schedule(current_finetune_lr,
                                               C.UnitType.minibatch)
        momentum_schedule = C.momentum_schedule(current_momentum)
        learner = C.momentum_sgd(
            dnn_model.parameters,
            lr_schedule,
            momentum_schedule,
            unit_gain=False,
            l1_regularization_weight=0,
            l2_regularization_weight=l2_regularization_weight)
        #learner = C.adadelta(dnn_model.parameters, lr_schedule, rho=0.95, epsilon=1e-8, l1_regularization_weight=0,
        #                    l2_regularization_weight= 0.00001 )
        loss = C.cross_entropy_with_softmax(dnn_model, output)
        error = loss
        trainer = C.Trainer(dnn_model, (loss, error), [learner])
        train_error = []
        for i in range(len(train_x)):
            temp_train_x = np.float32(train_x[i])
            temp_train_y = np.float32(train_y[i])
            trainer.train_minibatch({
                input: temp_train_x,
                output: temp_train_y
            })
            train_error.append(trainer.previous_minibatch_loss_average)
        this_train_loss = np.mean(train_error)
        sub_end_time = time.time()
        print('time for 1 epoch is %.1f' % (sub_end_time - sub_start_time))
        train_loss_output.append(this_train_loss)
        print('loss is %.4f' % this_train_loss)
        if np.remainder(epoch_num, 10) == 0:
            nnets_file_name = 'dnn_model_ep' + np.str(epoch_num) + '.model'
            if not os.path.isdir(model_dir):
                os.makedirs(model_dir)
            dnn_model.save(os.path.join(model_dir, nnets_file_name))
            if not os.path.isdir(loss_dir):
                os.makedirs(loss_dir)
            np.savetxt(
                os.path.join(loss_dir,
                             'loss_curve_ep' + np.str(epoch_num) + '.csv'),
                train_loss_output)
    nnets_file_name = 'dnn_model_final.model'
    if not os.path.isdir(model_dir):
        os.makedirs(model_dir)
    dnn_model.save(os.path.join(model_dir, nnets_file_name))
    if not os.path.isdir(loss_dir):
        os.makedirs(loss_dir)
    np.savetxt(
        os.path.join(loss_dir,
                     'loss_curve_final' + np.str(epoch_num) + '.csv'),
        train_loss_output)
Exemple #26
0
def train(train_reader, valid_reader, vocab, i2w, s2smodel, max_epochs, epoch_size):
    model_train = create_model_train(s2smodel)
    criterion = create_criterion_function(model_train)
    # also wire in a greedy decoder so that we can properly log progress on a validation example
    # This is not used for the actual training process.
    model_greedy = create_model_test(s2smodel)
    # Instantiate the trainer object to drive the model training
    minibatch_size = 72
    lr = 0.001 if use_attention else 0.005
    learner = C.fsadagrad(model_train.parameters,
                          #apply the learning rate as if it is a minibatch of size 1
                          lr = C.learning_parameter_schedule_per_sample([lr]*2+[lr/2]*3+[lr/4], epoch_size),
                          momentum = C.momentum_schedule(0.9366416204111472, minibatch_size=minibatch_size),
                          gradient_clipping_threshold_per_sample=2.3,
                          gradient_clipping_with_truncation=True)
    trainer = C.Trainer(None, criterion, learner)
    
    # records
    total_samples = 0
    mbs = 0
    eval_freq = 100

    # print out some useful training information
    C.logging.log_number_of_parameters(model_train) ; print()
    progress_printer = C.logging.ProgressPrinter(freq=30, tag='Training')

    # a hack to allow us to print sparse vectors
    sparse_to_dense = create_sparse_to_dense(input_vocab_dim)

    for epoch in range(max_epochs):
        while total_samples < (epoch+1) * epoch_size:
            # get next minibatch of training data
            mb_train = train_reader.next_minibatch(minibatch_size)

            # do the training
            trainer.train_minibatch({criterion.arguments[0]: mb_train[train_reader.streams.features],
                                     criterion.arguments[1]: mb_train[train_reader.streams.labels]})

            progress_printer.update_with_trainer(trainer, with_metric=True) # log progress

            # every N MBs evaluate on a test sequence to visually show how we're doing
            if mbs % eval_freq == 0:
                mb_valid = valid_reader.next_minibatch(1)

                # run an eval on the decoder output model (i.e. don't use the groundtruth)
                e = model_greedy(mb_valid[valid_reader.streams.features])
                print(format_sequences(sparse_to_dense(mb_valid[valid_reader.streams.features]), i2w))
                print("->")
                print(format_sequences(e, i2w))

                # visualizing attention window
                if use_attention:
                    debug_attention(model_greedy, mb_valid[valid_reader.streams.features])

            total_samples += mb_train[train_reader.streams.labels].num_samples
            mbs += 1

        # log a summary of the stats for the epoch
        progress_printer.epoch_summary(with_metric=True)

    # done: save the final model
    model_path = "model_%d.cmf" % epoch
    print("Saving final model to '%s'" % model_path)
    s2smodel.save(model_path)
    print("%d epochs complete." % max_epochs)
]

MOMENTUM_SCHEDULE_PARAMS = [
    ((0.2, ), [0.2]),
    ((0.2, ), [0.2, 0.2, 0.2, 0.2]),
    (([0.2, 0.4], 5), [0.2] * 5 + [0.4] * 20),
    (([(3, 0.2), (2, 0.4),
       (1, 0.8)], 5), [0.2] * 15 + [0.4] * 10 + [0.8] * 20),
]

LEARNER_LAMBDAS = [
    lambda params: C.adadelta(params), lambda params: C.adagrad(
        params, lr=learning_rate_schedule(1, UnitType.minibatch)),
    lambda params: C.adam(params,
                          lr=learning_rate_schedule(1, UnitType.minibatch),
                          momentum=C.momentum_schedule(0.9)),
    lambda params: C.fsadagrad(params,
                               lr=learning_rate_schedule(
                                   1, UnitType.minibatch),
                               momentum=C.momentum_schedule(0.9)),
    lambda params: C.nesterov(params,
                              lr=learning_rate_schedule(1, UnitType.minibatch),
                              momentum=C.momentum_schedule(0.9)),
    lambda params: C.rmsprop(params,
                             lr=learning_rate_schedule(1, UnitType.minibatch),
                             gamma=0.1,
                             inc=3.0,
                             dec=0.1,
                             max=np.inf,
                             min=1e-8),
    lambda params: C.sgd(params,
# expected output (label), also the dynamic axes of the model output
# is specified as the model of the label input
l = C.input_variable(1, dynamic_axes=z.dynamic_axes, name="y")

# the learning rate
learning_rate = 0.005
lr_schedule = C.learning_parameter_schedule(learning_rate)

# loss function
loss = C.squared_error(z, l)

# use squared error to determine error for now
error = C.squared_error(z, l)

# use adam optimizer
momentum_schedule = C.momentum_schedule(0.9, minibatch_size=BATCH_SIZE)
learner = C.fsadagrad(z.parameters, lr=lr_schedule, momentum=momentum_schedule)
trainer = C.Trainer(z, (loss, error), [learner])

# training
loss_summary = []

# time to start training
start = time.time()
for epoch in range(0, EPOCHS):
    for x_batch, l_batch in next_batch(X, Y, "train"):
        trainer.train_minibatch({x: x_batch, l: l_batch})

    if epoch % (EPOCHS / 10) == 0:
        training_loss = trainer.previous_minibatch_loss_average
        loss_summary.append(training_loss)
    def __train_cntk(self, path_to_folder: str, model_definition, epochs: int,
                     output_model_path: str, classes, minibatch_size: int):
        import cntk
        from cntk.learners import learning_parameter_schedule
        from cntk.ops import input_variable
        from cntk.io import MinibatchSource, ImageDeserializer, StreamDefs, StreamDef, MinibatchData, UserDeserializer
        import cntk.io.transforms as xforms
        from cntk.layers import default_options, Dense, Sequential, Activation, Embedding, Convolution2D, MaxPooling, Stabilizer, Convolution, Dropout, BatchNormalization
        from cntk.ops.functions import CloneMethod
        from cntk.logging import ProgressPrinter
        from cntk.losses import cross_entropy_with_softmax
        from cntk import classification_error, softmax, relu, ModelFormat, element_times, momentum_schedule, momentum_sgd
        import pandas as pd

        path_to_folder = path_to_folder.rstrip('/')

        map_file_train = path_to_folder + "/train_map.txt"
        map_file_test = path_to_folder + "/test_map.txt"
        classes_set = set()
        num_train = 0
        num_test = 0
        num_channels = 3

        class TrackDataset(UserDeserializer):
            def __init__(self, map_file, streams, chunksize=100):
                super(TrackDataset, self).__init__()
                self._batch_size = chunksize
                self.dataframes = pd.read_csv(map_file,
                                              sep='\t',
                                              dtype=str,
                                              header=None,
                                              names=["features", "labels"])
                self._streams = [
                    cntk.io.StreamInformation(s['name'], i, 'dense',
                                              np.float32, s['shape'])
                    for i, s in enumerate(streams)
                ]

                self._num_chunks = int(
                    math.ceil(len(self.dataframes) / chunksize))

            def _scale_image(self, image, width=224, height=168):
                try:
                    return image.resize((width, height), Image.LINEAR)
                except:
                    raise Exception('scale_image error')

            def stream_infos(self):
                return self._streams

            def num_chunks(self):
                return self._num_chunks

            def get_chunk(self, chunk_id):
                images = []
                labels = []
                maximum = (chunk_id + 1) * self._batch_size
                if (maximum > len(self.dataframes)):
                    maximum = len(self.dataframes)
                for i in range(chunk_id * self._batch_size, maximum):
                    img_name = self.dataframes.iloc[i, 0]
                    image = Image.open(img_name)
                    cl = self.dataframes.iloc[i, 1:].values[0]
                    image = self._scale_image(image)
                    image = np.moveaxis((np.array(image).astype('float32')),
                                        -1, 0)
                    image -= np.mean(image, keepdims=True)
                    image /= (np.std(image, keepdims=True) + 1e-6)
                    images.append(image)
                    yv = np.zeros(num_classes)
                    yv[classes.index(cl)] = 1
                    labels.append(yv)

                result = {}
                features = np.array(images)
                lab = np.array(labels).astype('float32')
                result[self._streams[0].m_name] = features
                result[self._streams[1].m_name] = lab
                return result

        try:
            with open(map_file_train) as f:
                csv_reader = csv.reader(f, delimiter='\t')
                for row in csv_reader:
                    cmd = row[1]
                    classes_set.add(cmd)
                    num_train = num_train + 1
        except Exception as e:
            raise Exception(
                "No train_map.txt file found in path " + path_to_folder +
                ". Did you create a dataset using create_balanced_dataset()?")

        num_classes = len(classes)

        with open(map_file_test) as f:
            for num_test, l in enumerate(f):
                pass

        # transforms = [
        #     xforms.scale(width=self.__image_width, height=self.__image_height, channels=num_channels, interpolations='linear'),
        #     xforms.mean(mean_file)
        # ]

        dataset_train = TrackDataset(map_file=map_file_train,
                                     streams=[
                                         dict(name='features',
                                              shape=(num_channels,
                                                     self.__image_height,
                                                     self.__image_width)),
                                         dict(name='labels',
                                              shape=(num_classes, ))
                                     ])
        reader_train = MinibatchSource([dataset_train], randomize=True)

        # a = dataset_train.num_chunks()

        dataset_test = TrackDataset(map_file=map_file_test,
                                    streams=[
                                        dict(name='features',
                                             shape=(num_channels,
                                                    self.__image_height,
                                                    self.__image_width)),
                                        dict(name='labels',
                                             shape=(num_classes, ))
                                    ])
        reader_test = MinibatchSource([dataset_test], randomize=True)

        # ImageDeserializer loads images in the BGR format, not RGB
        # reader_train = MinibatchSource(ImageDeserializer(map_file_train, StreamDefs(
        #     features = StreamDef(field='image', transforms=transforms),
        #     labels   = StreamDef(field='label', shape=num_classes)
        # )))

        # reader_test = MinibatchSource(ImageDeserializer(map_file_test, StreamDefs(
        #     features = StreamDef(field='image', transforms=transforms),
        #     labels   = StreamDef(field='label', shape=num_classes)
        # )))

        # mb = reader_train.next_minibatch(10)

        input_var = input_variable(
            (num_channels, self.__image_height, self.__image_width))
        label_var = input_variable((num_classes))

        model = model_definition(input_var)

        ce = cross_entropy_with_softmax(model, label_var)
        pe = classification_error(model, label_var)

        epoch_size = num_train

        lr_per_minibatch = learning_parameter_schedule([0.01] * 10 +
                                                       [0.003] * 10 + [0.001],
                                                       epoch_size=epoch_size)
        momentums = momentum_schedule(0.9, minibatch_size=minibatch_size)
        l2_reg_weight = 0.001

        learner = momentum_sgd(model.parameters,
                               lr=lr_per_minibatch,
                               momentum=momentums,
                               l2_regularization_weight=l2_reg_weight)
        progress_printer = ProgressPrinter(tag='Training', num_epochs=epochs)
        trainer = cntk.train.Trainer(model, (ce, pe), [learner],
                                     [progress_printer])

        input_map = {
            input_var: reader_train.streams.features,
            label_var: reader_train.streams.labels
        }

        print("Training started")
        batch_index = 0
        plot_data = {'batchindex': [], 'loss': [], 'error': []}
        for epoch in range(epochs):
            sample_count = 0
            while sample_count < epoch_size:
                data: MinibatchSource = reader_train.next_minibatch(
                    min(minibatch_size, epoch_size - sample_count),
                    input_map=input_map)

                trainer.train_minibatch(data)
                sample_count += data[label_var].num_samples

                batch_index += 1
                plot_data['batchindex'].append(batch_index)
                plot_data['loss'].append(
                    trainer.previous_minibatch_loss_average)
                plot_data['error'].append(
                    trainer.previous_minibatch_evaluation_average)

            trainer.summarize_training_progress()

        metric_numer = 0
        metric_denom = 0
        sample_count = 0
        minibatch_index = 0
        epoch_size = num_test

        while sample_count < epoch_size:
            current_minibatch = min(minibatch_size, epoch_size - sample_count)

            data = reader_test.next_minibatch(current_minibatch,
                                              input_map=input_map)

            metric_numer += trainer.test_minibatch(data) * current_minibatch
            metric_denom += current_minibatch

            sample_count += data[label_var].num_samples
            minibatch_index += 1

        print("")
        print("Final Results: Minibatch[1-{}]: errs = {:0.1f}% * {}".format(
            minibatch_index + 1, (metric_numer * 100.0) / metric_denom,
            metric_denom))
        print("")

        model.save(output_model_path, format=ModelFormat.ONNX)
                  init=np.float32(np.random.normal(
                      0, 0.1, [HIDDEN_DIM, 4])))  #normal(0.1))
out_num = times(cur_h, W_out)
score = softmax(out_num)

loss = cross_entropy_with_softmax(score, ys)
#eval_error = cross_entropy_with_softmax(score, ys)
eval_error = classification_error(score, ys)

learning_rate = 1e-3
lr_schedule = learning_rate_schedule(learning_rate, UnitType.minibatch)
#lr_schedule = learning_rate_schedule([(5000*minibatch_size,learning_rate), (1,learning_rate/100)], UnitType.minibatch)

#learner = sgd(score.parameters, lr_schedule)

mom_schedule = momentum_schedule(0.9)
#var_mom_schedule = momentum_schedule(0.999)
learner = adam_sgd(score.parameters,
                   lr_schedule,
                   mom_schedule,
                   l2_regularization_weight=0)
#learner = momentum_sgd(score.parameters, lr_schedule, mom_schedule)

#lr_schedule = learning_rate_schedule(learning_rate, UnitType.minibatch)
#learner = adagrad(score.parameters, lr_schedule)

trainer = Trainer(score, loss, eval_error, [learner])

TcmpE = datetime.datetime.now()
# ## Training
Exemple #31
0
# loss = (mkld)

# _q_prime = C.tanh(q)
# _mu = C.reduce_mean(_q_prime, axis=C.Axis.default_batch_axis())
# _sigma = C.reduce_mean(C.square(_q_prime-_mu), axis=C.Axis.default_batch_axis())
# loss += C.reduce_mean(C.square(_mu)) + C.reduce_mean(C.square(_sigma-0.615))

# # _log_mu = C.reduce_mean(C.log(C.abs(q)), axis=C.Axis.default_batch_axis())
# # loss += C.reduce_mean(C.square(_log_mu+0.57))

from IPython import embed;embed()
exit()


lr_rate = 1e-3
learner = C.adam(loss.parameters, C.learning_parameter_schedule_per_sample(lr_rate), C.momentum_schedule(0.99))
trainer = C.Trainer(loss, (loss, None), [learner])

for i in tqdm(range(10000)):
    # v = np.random.uniform(size=(1,2))
    v = datasets.make_moons(n_samples=1000, noise=.05)[0].astype(np.float32)
    trainer.train_minibatch({loss.arguments[0]:v})

    # from IPython import embed;embed()
    if i%100 == 0:
        print('\n',trainer.previous_minibatch_loss_average)

    if len(bn) > 0: # batch norm
        result = C.combine(bn).eval({loss.arguments[0]:v})
        result = list(result.values())
        momentum = C.Constant(0.9)
Exemple #32
0
def test_learner_init():
    i = C.input_variable(shape=(1, ), needs_gradient=True, name='a')
    w = parameter(shape=(1, ))

    res = i * w

    #test new API: learning_parameter_schedule

    #explictly specify reference minibatch size and learning rate is in number:
    learner = sgd(res.parameters, lr=0.1, minibatch_size=25)
    assert learner.is_compatible_mode() == False
    assert learner.minibatch_size == 25  #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == 25
    assert learner.learning_rate() == 0.1

    #no explictly specification of reference minibatch size and learning rate is in number:
    learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1))
    assert learner.is_compatible_mode() == False
    assert learner.minibatch_size == C.learners.IGNORE  #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE
    assert learner.learning_rate() == 0.1

    learner = sgd(res.parameters,
                  lr=learning_parameter_schedule(0.1, 20),
                  minibatch_size=25)
    assert learner.is_compatible_mode() == False
    assert learner.minibatch_size == 25  #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == 20
    assert learner.learning_rate() == 0.1

    learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1, 20))
    assert learner.is_compatible_mode() == False
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == 20
    assert learner.learning_rate() == 0.1

    #no explictly specification of reference minibatch size and learning rate is in number:
    learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1))
    assert learner.is_compatible_mode() == False
    assert learner.minibatch_size == C.learners.IGNORE  #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE
    assert learner.learning_rate() == 0.1

    #no explictly specification of reference minibatch size and learning rate is in number:
    learner = sgd(res.parameters,
                  lr=learning_parameter_schedule(0.1),
                  minibatch_size=C.learners.IGNORE)
    assert learner.is_compatible_mode() == True
    assert learner.minibatch_size == C.learners.IGNORE  #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE
    assert learner.learning_rate() == 0.1

    learner = sgd(res.parameters,
                  lr=learning_parameter_schedule(0.1, 20),
                  minibatch_size=C.learners.IGNORE)
    assert learner.is_compatible_mode() == True
    assert learner.minibatch_size == C.learners.IGNORE  #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == 20
    assert learner.learning_rate() == 0.1

    #no explictly specification of reference minibatch size and learning rate is in number:
    learner = sgd(res.parameters,
                  lr=learning_parameter_schedule(0.1),
                  minibatch_size=C.learners.IGNORE)
    assert learner.is_compatible_mode() == True
    assert learner.minibatch_size == C.learners.IGNORE  #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE
    assert learner.learning_rate() == 0.1

    mysgd = C.sgd(parameters=res.parameters, lr=0.4, minibatch_size=32)
    assert mysgd.minibatch_size == 32
    assert mysgd._learning_rate_schedule.minibatch_size == 32
    assert mysgd.learning_rate() == 0.4

    mymomentum = C.momentum_sgd(parameters=res.parameters,
                                lr=0.4,
                                momentum=0.9,
                                minibatch_size=32)
    assert mymomentum.minibatch_size == 32
    assert mymomentum._learning_rate_schedule.minibatch_size == 32
    assert mymomentum.learning_rate() == 0.4

    myadadelta = C.adadelta(parameters=res.parameters,
                            lr=0.4,
                            minibatch_size=32)
    assert myadadelta.minibatch_size == 32
    assert myadadelta._learning_rate_schedule.minibatch_size == 32
    assert myadadelta.learning_rate() == 0.4

    myadam = C.adam(parameters=res.parameters,
                    lr=0.4,
                    momentum=0.9,
                    variance_momentum=0.9,
                    minibatch_size=32)
    assert myadam.minibatch_size == 32
    assert myadam._learning_rate_schedule.minibatch_size == 32
    assert myadam.learning_rate() == 0.4

    myadagrad = C.adagrad(parameters=res.parameters, lr=0.4, minibatch_size=32)
    assert myadagrad.minibatch_size == 32
    assert myadagrad._learning_rate_schedule.minibatch_size == 32
    assert myadagrad.learning_rate() == 0.4

    myfsadagrad = C.fsadagrad(parameters=res.parameters,
                              lr=0.4,
                              momentum=0.9,
                              variance_momentum=0.9,
                              minibatch_size=32)
    assert myfsadagrad.minibatch_size == 32
    assert myfsadagrad._learning_rate_schedule.minibatch_size == 32
    assert myfsadagrad.learning_rate() == 0.4

    mynesterov = C.nesterov(parameters=res.parameters,
                            lr=0.4,
                            momentum=0.9,
                            minibatch_size=32)
    assert mynesterov.minibatch_size == 32
    assert mynesterov._learning_rate_schedule.minibatch_size == 32
    assert mynesterov.learning_rate() == 0.4

    myrmsrop = C.rmsprop(parameters=res.parameters,
                         lr=0.4,
                         gamma=0.5,
                         inc=1.2,
                         dec=0.7,
                         max=10,
                         min=1e-8,
                         minibatch_size=32)
    assert myrmsrop.minibatch_size == 32
    assert myrmsrop._learning_rate_schedule.minibatch_size == 32
    assert myrmsrop.learning_rate() == 0.4

    mysgd = C.sgd(parameters=res.parameters,
                  lr=[0.4, 0.1, 0.001],
                  minibatch_size=32,
                  epoch_size=512)
    assert mysgd.minibatch_size == 32
    assert mysgd._learning_rate_schedule.minibatch_size == 32
    assert mysgd._learning_rate_schedule[0] == 0.4
    assert mysgd._learning_rate_schedule[512] == 0.1
    assert mysgd._learning_rate_schedule[512 * 2] == 0.001

    mymomentum = C.momentum_sgd(parameters=res.parameters,
                                lr=[0.4, 0.1, 0.001],
                                momentum=[0.9],
                                minibatch_size=32,
                                epoch_size=512)
    assert mymomentum.minibatch_size == 32
    assert mymomentum._learning_rate_schedule.minibatch_size == 32
    assert mymomentum._learning_rate_schedule[0] == 0.4
    assert mymomentum._learning_rate_schedule[512] == 0.1
    assert mymomentum._learning_rate_schedule[512 * 2] == 0.001

    myadadelta = C.adadelta(parameters=res.parameters,
                            lr=[0.4, 0.1, 0.001],
                            minibatch_size=32,
                            epoch_size=512)
    assert myadadelta.minibatch_size == 32
    assert myadadelta._learning_rate_schedule.minibatch_size == 32
    assert myadadelta._learning_rate_schedule[0] == 0.4
    assert myadadelta._learning_rate_schedule[512] == 0.1
    assert myadadelta._learning_rate_schedule[512 * 2] == 0.001

    myadam = C.adam(parameters=res.parameters,
                    lr=[0.4, 0.1, 0.001],
                    momentum=[0.9, 0.1, 0.001],
                    variance_momentum=[0.9],
                    minibatch_size=32,
                    epoch_size=512)
    assert myadam.minibatch_size == 32
    assert myadam._learning_rate_schedule.minibatch_size == 32
    assert myadam._learning_rate_schedule[0] == 0.4
    assert myadam._learning_rate_schedule[512] == 0.1
    assert myadam._learning_rate_schedule[512 * 2] == 0.001

    myadagrad = C.adagrad(parameters=res.parameters,
                          lr=[0.4, 0.1, 0.001],
                          minibatch_size=32,
                          epoch_size=512)
    assert myadagrad.minibatch_size == 32
    assert myadagrad._learning_rate_schedule.minibatch_size == 32
    assert myadagrad._learning_rate_schedule[0] == 0.4
    assert myadagrad._learning_rate_schedule[512] == 0.1
    assert myadagrad._learning_rate_schedule[512 * 2] == 0.001

    myfsadagrad = C.fsadagrad(parameters=res.parameters,
                              lr=[0.4, 0.1, 0.001],
                              momentum=[0.9],
                              variance_momentum=[0.9],
                              minibatch_size=32,
                              epoch_size=512)
    assert myadagrad.minibatch_size == 32
    assert myadagrad._learning_rate_schedule.minibatch_size == 32
    assert myadagrad._learning_rate_schedule[0] == 0.4
    assert myadagrad._learning_rate_schedule[512] == 0.1
    assert myadagrad._learning_rate_schedule[512 * 2] == 0.001

    mynesterov = C.nesterov(parameters=res.parameters,
                            lr=[0.4, 0.1, 0.001],
                            momentum=[0.9],
                            minibatch_size=32,
                            epoch_size=512)
    assert mynesterov.minibatch_size == 32
    assert mynesterov._learning_rate_schedule.minibatch_size == 32
    assert mynesterov._learning_rate_schedule[0] == 0.4
    assert mynesterov._learning_rate_schedule[512] == 0.1
    assert mynesterov._learning_rate_schedule[512 * 2] == 0.001

    myrmsrop = C.rmsprop(parameters=res.parameters,
                         lr=[0.4, 0.1, 0.001],
                         gamma=0.5,
                         inc=1.2,
                         dec=0.7,
                         max=10,
                         min=1e-8,
                         minibatch_size=32,
                         epoch_size=512)
    assert myrmsrop.minibatch_size == 32
    assert myrmsrop._learning_rate_schedule.minibatch_size == 32
    assert myrmsrop._learning_rate_schedule[0] == 0.4
    assert myrmsrop._learning_rate_schedule[512] == 0.1
    assert myrmsrop._learning_rate_schedule[512 * 2] == 0.001

    learner_parameter = learner.parameters
    from cntk.variables import Parameter
    param = learner_parameter[0]
    assert isinstance(param, Parameter)

    unit_gain_value = C.default_unit_gain_value()
    assert unit_gain_value

    momentum = C.momentum_schedule(0.999, minibatch_size=1)
    lr_per_sample = learning_parameter_schedule(0.1, minibatch_size=1)
    C.momentum_sgd(res.parameters, lr_per_sample, momentum)
    C.momentum_sgd(res.parameters, lr_per_sample, momentum, unit_gain_value)
    C.momentum_sgd(res.parameters,
                   lr_per_sample,
                   momentum,
                   unit_gain=unit_gain_value)

    C.set_default_unit_gain_value(False)
    unit_gain_value = C.default_unit_gain_value()
    assert not unit_gain_value

    lr_per_sample = learning_parameter_schedule([0.1, 0.2], minibatch_size=1)
    C.nesterov(res.parameters, lr=lr_per_sample, momentum=momentum)
    C.nesterov(res.parameters, lr_per_sample, momentum, unit_gain_value)
    C.nesterov(res.parameters,
               lr=lr_per_sample,
               momentum=momentum,
               unit_gain=unit_gain_value)

    lr_per_sample = learning_parameter_schedule([0.1] * 3 + [0.2] * 2 + [0.3],
                                                minibatch_size=1)
    C.adagrad(res.parameters, lr=lr_per_sample, need_ave_multiplier=True)

    C.set_default_unit_gain_value(True)
    unit_gain_value = C.default_unit_gain_value()
    assert unit_gain_value

    lr_per_sample = learning_parameter_schedule([(3, 0.1), (2, 0.2), (1, 0.3)],
                                                minibatch_size=1)
    C.fsadagrad(res.parameters, lr=lr_per_sample, momentum=momentum)
    C.fsadagrad(res.parameters, lr_per_sample, momentum, unit_gain_value)
    C.fsadagrad(res.parameters,
                lr=lr_per_sample,
                momentum=momentum,
                unit_gain=unit_gain_value)

    gamma, inc, dec, max, min = [0.5, 1.2, 0.7, 10, 1e-8]
    lr_per_sample = learning_parameter_schedule([0.1, 0.2],
                                                minibatch_size=1,
                                                epoch_size=100)
    C.rmsprop(res.parameters, lr_per_sample, gamma, inc, dec, max, min, True)

    C.adadelta(res.parameters, lr_per_sample)
Exemple #33
0
def test_momentum_schedule_per_sample(params, expectation):
    l = C.momentum_schedule(*params)
    assert [l[i] for i in range(len(expectation))] == expectation
Exemple #34
0
def test_momentum_schedule_per_sample(params, expectation):
    l = C.momentum_schedule(*params)
    assert [l[i] for i in range(len(expectation))] == expectation
def test_lattice_deserializer(device_id):
    if cntk_device(device_id).type() != DeviceKind_GPU:
        pytest.skip('test only runs on GPU')
    try_set_default_device(cntk_device(device_id))

    data_dir = ''
    if 'CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY' in os.environ:
        data_dir = os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY']
    else:
        print('CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY environment variable is not defined')

    print(data_dir)
    data_dir = os.path.join(data_dir, "Speech", "AN4Corpus", "v0")
    os.chdir(data_dir)
    feature_dimension = 33
    feature = C.sequence.input_variable(feature_dimension)

    label_dimension = 133
    label = C.sequence.input_variable(label_dimension)

    axis_lattice = C.Axis.new_unique_dynamic_axis('lattice_axis')
    lattice = C.sequence.input_variable(1, sequence_axis=axis_lattice)

    train_feature_filepath = os.path.join(data_dir,"glob_0000.scp")
    train_label_filepath = os.path.join(data_dir,"glob_0000.mlf")
    train_lattice_index_path = os.path.join(data_dir,"latticeIndex.txt")
    mapping_filepath = os.path.join(data_dir,"state.list")
    train_feature_stream = C.io.HTKFeatureDeserializer(
    C.io.StreamDefs(speech_feature = C.io.StreamDef(shape = feature_dimension, scp = train_feature_filepath)))
    train_label_stream = C.io.HTKMLFDeserializer(
    mapping_filepath, C.io.StreamDefs(speech_label = C.io.StreamDef(shape = label_dimension, mlf = train_label_filepath)), True)
    train_lattice_stream = C.io.LatticeDeserializer(train_lattice_index_path,C.io.StreamDefs(speech_lattice = C.io.StreamDef()))
    train_data_reader = C.io.MinibatchSource([train_feature_stream, train_label_stream, train_lattice_stream], frame_mode = False)
    train_input_map = {feature: train_data_reader.streams.speech_feature, label: train_data_reader.streams.speech_label, lattice: train_data_reader.streams.speech_lattice}

    feature_mean = np.fromfile(os.path.join("GlobalStats", "mean.363"), dtype=float, count=feature_dimension)
    feature_inverse_stddev = np.fromfile(os.path.join("GlobalStats", "var.363"), dtype=float, count=feature_dimension)

    feature_normalized = (feature - feature_mean) * feature_inverse_stddev

    with C.default_options(activation=C.sigmoid):
        z = C.layers.Sequential([
            C.layers.For(range(3), lambda: C.layers.Recurrence(C.layers.LSTM(1024))),
            C.layers.Dense(label_dimension)
        ])(feature_normalized)
    mbsize = 1024
    mbs_per_epoch = 10
    max_epochs = 2

    symListPath = os.path.join(data_dir,"CY2SCH010061231_1369712653.numden.lats.symlist")
    phonePath = os.path.join(data_dir,"model.overalltying")
    stateListPath = os.path.join(data_dir,"state.list")
    transProbPath = os.path.join(data_dir,"model.transprob")

    criteria = C.lattice_sequence_with_softmax(label, z, z, lattice, symListPath, phonePath, stateListPath, transProbPath)
    err = C.classification_error(label,z)
    lr = C.learning_parameter_schedule_per_sample([(3, .01), (1,.001)])
    mm = C.momentum_schedule([(1000, 0.9), (0, 0.99)], mbsize)
    learner = C.momentum_sgd(z.parameters, lr, mm)
    trainer = C.Trainer(z, (criteria, err), learner)

    C.logging.log_number_of_parameters(z)
    progress_printer = C.logging.progress_print.ProgressPrinter(tag='Training', num_epochs = max_epochs)


    for epoch in range(max_epochs):
        for mb in range(mbs_per_epoch):
            minibatch = train_data_reader.next_minibatch(mbsize, input_map = train_input_map)
            trainer.train_minibatch(minibatch)
            progress_printer.update_with_trainer(trainer, with_metric = True)

        progress_printer.epoch_summary(with_metric = True)

    assert np.allclose(trainer.previous_minibatch_evaluation_average, 0.15064, atol=TOLERANCE_ABSOLUTE)
    assert np.allclose(trainer.previous_minibatch_loss_average, 0.035923, atol=TOLERANCE_ABSOLUTE)
    assert (trainer.previous_minibatch_sample_count == 218)
    assert (trainer.total_number_of_samples_seen == 5750)
    print("Completed successfully.")
Exemple #36
0
def test_learner_init():
    i = C.input_variable(shape=(1,), needs_gradient=True, name='a')
    w = parameter(shape=(1,))

    res = i * w

    #test new API: learning_parameter_schedule

    #explicitly specify reference minibatch size and learning rate is in number:
    learner = sgd(res.parameters, lr=0.1, minibatch_size = 25)
    assert learner.is_compatible_mode() == False
    assert learner.minibatch_size == 25 #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == 25
    assert learner.learning_rate() == 0.1

    #no explicitly specification of reference minibatch size and learning rate is in number:
    learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1))
    assert learner.is_compatible_mode() == False
    assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE
    assert learner.learning_rate() == 0.1


    learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1, 20), minibatch_size = 25)
    assert learner.is_compatible_mode() == False
    assert learner.minibatch_size == 25 #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == 20
    assert learner.learning_rate() == 0.1


    learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1, 20))
    assert learner.is_compatible_mode() == False
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == 20
    assert learner.learning_rate() == 0.1

    #no explicitly specification of reference minibatch size and learning rate is in number:
    learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1))
    assert learner.is_compatible_mode() == False
    assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE
    assert learner.learning_rate() == 0.1


    #no explicitly specification of reference minibatch size and learning rate is in number:
    learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1), minibatch_size=C.learners.IGNORE)
    assert learner.is_compatible_mode() == True
    assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE
    assert learner.learning_rate() == 0.1


    learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1, 20), minibatch_size=C.learners.IGNORE)
    assert learner.is_compatible_mode() == True
    assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == 20
    assert learner.learning_rate() == 0.1

    #no explicitly specification of reference minibatch size and learning rate is in number:
    learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1), minibatch_size=C.learners.IGNORE)
    assert learner.is_compatible_mode() == True
    assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE
    assert learner.learning_rate() == 0.1

    mysgd = C.sgd(parameters=res.parameters, lr=0.4, minibatch_size=32)
    assert mysgd.minibatch_size == 32
    assert mysgd._learning_rate_schedule.minibatch_size == 32
    assert mysgd.learning_rate() == 0.4

    mymomentum = C.momentum_sgd(parameters=res.parameters, lr=0.4, momentum=0.9, minibatch_size=32)
    assert mymomentum.minibatch_size == 32
    assert mymomentum._learning_rate_schedule.minibatch_size == 32
    assert mymomentum.learning_rate() == 0.4

    myadadelta = C.adadelta(parameters=res.parameters, lr=0.4, minibatch_size=32)
    assert myadadelta.minibatch_size == 32
    assert myadadelta._learning_rate_schedule.minibatch_size == 32
    assert myadadelta.learning_rate() == 0.4

    myadam = C.adam(parameters=res.parameters, lr=0.4, momentum=0.9, variance_momentum=0.9, minibatch_size=32)
    assert myadam.minibatch_size == 32
    assert myadam._learning_rate_schedule.minibatch_size == 32
    assert myadam.learning_rate() == 0.4

    myadagrad = C.adagrad(parameters=res.parameters, lr=0.4, minibatch_size=32)
    assert myadagrad.minibatch_size == 32
    assert myadagrad._learning_rate_schedule.minibatch_size == 32
    assert myadagrad.learning_rate() == 0.4

    myfsadagrad = C.fsadagrad(parameters=res.parameters, lr=0.4, momentum=0.9, variance_momentum=0.9,
                              minibatch_size=32)
    assert myfsadagrad.minibatch_size == 32
    assert myfsadagrad._learning_rate_schedule.minibatch_size == 32
    assert myfsadagrad.learning_rate() == 0.4

    mynesterov = C.nesterov(parameters=res.parameters, lr=0.4, momentum=0.9, minibatch_size=32)
    assert mynesterov.minibatch_size == 32
    assert mynesterov._learning_rate_schedule.minibatch_size == 32
    assert mynesterov.learning_rate() == 0.4

    myrmsrop = C.rmsprop(parameters=res.parameters, lr=0.4, gamma=0.5, inc=1.2, dec=0.7, max=10, min=1e-8,
                         minibatch_size=32)
    assert myrmsrop.minibatch_size == 32
    assert myrmsrop._learning_rate_schedule.minibatch_size == 32
    assert myrmsrop.learning_rate() == 0.4

    mysgd = C.sgd(parameters=res.parameters, lr=[0.4, 0.1, 0.001], minibatch_size=32, epoch_size=512)
    assert mysgd.minibatch_size == 32
    assert mysgd._learning_rate_schedule.minibatch_size == 32
    assert mysgd._learning_rate_schedule[0] == 0.4
    assert mysgd._learning_rate_schedule[512] == 0.1
    assert mysgd._learning_rate_schedule[512 * 2] == 0.001

    mymomentum = C.momentum_sgd(parameters=res.parameters, lr=[0.4, 0.1, 0.001], momentum=[0.9],
                                minibatch_size=32, epoch_size=512)
    assert mymomentum.minibatch_size == 32
    assert mymomentum._learning_rate_schedule.minibatch_size == 32
    assert mymomentum._learning_rate_schedule[0] == 0.4
    assert mymomentum._learning_rate_schedule[512] == 0.1
    assert mymomentum._learning_rate_schedule[512 * 2] == 0.001


    myadadelta = C.adadelta(parameters=res.parameters, lr=[0.4, 0.1, 0.001],
                            minibatch_size=32, epoch_size=512)
    assert myadadelta.minibatch_size == 32
    assert myadadelta._learning_rate_schedule.minibatch_size == 32
    assert myadadelta._learning_rate_schedule[0] == 0.4
    assert myadadelta._learning_rate_schedule[512] == 0.1
    assert myadadelta._learning_rate_schedule[512 * 2] == 0.001

    myadam = C.adam(parameters=res.parameters, lr=[0.4, 0.1, 0.001], momentum=[0.9, 0.1, 0.001], variance_momentum=[0.9],
                    minibatch_size=32, epoch_size=512)
    assert myadam.minibatch_size == 32
    assert myadam._learning_rate_schedule.minibatch_size == 32
    assert myadam._learning_rate_schedule[0] == 0.4
    assert myadam._learning_rate_schedule[512] == 0.1
    assert myadam._learning_rate_schedule[512 * 2] == 0.001

    myadagrad = C.adagrad(parameters=res.parameters, lr=[0.4, 0.1, 0.001], minibatch_size=32, epoch_size=512)
    assert myadagrad.minibatch_size == 32
    assert myadagrad._learning_rate_schedule.minibatch_size == 32
    assert myadagrad._learning_rate_schedule[0] == 0.4
    assert myadagrad._learning_rate_schedule[512] == 0.1
    assert myadagrad._learning_rate_schedule[512 * 2] == 0.001

    myfsadagrad = C.fsadagrad(parameters=res.parameters, lr=[0.4, 0.1, 0.001], momentum=[0.9],
                              variance_momentum=[0.9],
                              minibatch_size=32, epoch_size=512)
    assert myadagrad.minibatch_size == 32
    assert myadagrad._learning_rate_schedule.minibatch_size == 32
    assert myadagrad._learning_rate_schedule[0] == 0.4
    assert myadagrad._learning_rate_schedule[512] == 0.1
    assert myadagrad._learning_rate_schedule[512 * 2] == 0.001

    mynesterov = C.nesterov(parameters=res.parameters, lr=[0.4, 0.1, 0.001], momentum=[0.9],
                            minibatch_size=32, epoch_size=512)
    assert mynesterov.minibatch_size == 32
    assert mynesterov._learning_rate_schedule.minibatch_size == 32
    assert mynesterov._learning_rate_schedule[0] == 0.4
    assert mynesterov._learning_rate_schedule[512] == 0.1
    assert mynesterov._learning_rate_schedule[512 * 2] == 0.001

    myrmsrop = C.rmsprop(parameters=res.parameters, lr=[0.4, 0.1, 0.001], gamma=0.5, inc=1.2, dec=0.7, max=10,
                         min=1e-8,
                         minibatch_size=32, epoch_size=512)
    assert myrmsrop.minibatch_size == 32
    assert myrmsrop._learning_rate_schedule.minibatch_size == 32
    assert myrmsrop._learning_rate_schedule[0] == 0.4
    assert myrmsrop._learning_rate_schedule[512] == 0.1
    assert myrmsrop._learning_rate_schedule[512 * 2] == 0.001

    learner_parameter = learner.parameters
    from cntk.variables import Parameter
    param = learner_parameter[0]
    assert isinstance(param, Parameter)

    unit_gain_value = C.default_unit_gain_value()
    assert unit_gain_value

    momentum = C.momentum_schedule(0.999, minibatch_size=1)
    lr_per_sample = learning_parameter_schedule(0.1, minibatch_size = 1)
    C.momentum_sgd(res.parameters, lr_per_sample, momentum)
    C.momentum_sgd(res.parameters, lr_per_sample, momentum, unit_gain_value)
    C.momentum_sgd(res.parameters, lr_per_sample, momentum, unit_gain=unit_gain_value)

    C.set_default_unit_gain_value(False)
    unit_gain_value = C.default_unit_gain_value()
    assert not unit_gain_value

    lr_per_sample = learning_parameter_schedule([0.1, 0.2], minibatch_size = 1)
    C.nesterov(res.parameters, lr=lr_per_sample, momentum=momentum)
    C.nesterov(res.parameters, lr_per_sample, momentum, unit_gain_value)
    C.nesterov(res.parameters, lr=lr_per_sample, momentum=momentum, unit_gain=unit_gain_value)

    lr_per_sample = learning_parameter_schedule([0.1]*3 +[0.2]*2 +[0.3], minibatch_size=1)
    C.adagrad(res.parameters, lr=lr_per_sample, need_ave_multiplier=True)

    C.set_default_unit_gain_value(True)
    unit_gain_value = C.default_unit_gain_value()
    assert unit_gain_value

    lr_per_sample = learning_parameter_schedule([(3,0.1), (2, 0.2), (1, 0.3)], minibatch_size=1)
    C.fsadagrad(res.parameters, lr=lr_per_sample, momentum=momentum)
    C.fsadagrad(res.parameters, lr_per_sample, momentum, unit_gain_value)
    C.fsadagrad(res.parameters, lr=lr_per_sample, momentum=momentum, unit_gain=unit_gain_value)

    gamma, inc, dec, max, min = [0.5, 1.2, 0.7, 10, 1e-8]
    lr_per_sample = learning_parameter_schedule([0.1, 0.2], minibatch_size = 1, epoch_size = 100)
    C.rmsprop(res.parameters, lr_per_sample, gamma, inc, dec, max, min, True)

    C.adadelta(res.parameters, lr_per_sample)
Exemple #37
0
def train_and_test(reader_train, reader_test, model_func):

    ###############################################
    # Training the model
    ###############################################

    # Instantiate the input and the label variables
    input = C.input_variable(input_dim)
    label = C.input_variable(input_dim)

    # Create the model function
    model = model_func(input)

    # The labels for this network is same as the input MNIST image.
    # Note: Inside the model we are scaling the input to 0-1 range
    # Hence we rescale the label to the same range
    # We show how one can use their custom loss function
    # loss = -(y* log(p)+ (1-y) * log(1-p)) where p = model output and y = target
    # We have normalized the input between 0-1. Hence we scale the target to same range

    target = label / 255.0
    loss = -(target * C.log(model) + (1 - target) * C.log(1 - model))
    label_error = C.classification_error(model, target)

    # training config
    epoch_size = 30000  # 30000 samples is half the dataset size
    minibatch_size = 64
    num_sweeps_to_train_with = 5 if isFast else 100
    num_samples_per_sweep = 60000
    num_minibatches_to_train = (num_samples_per_sweep *
                                num_sweeps_to_train_with) // minibatch_size

    # Instantiate the trainer object to drive the model training
    lr_per_sample = [0.00003]
    lr_schedule = C.learning_parameter_schedule_per_sample(
        lr_per_sample, epoch_size)

    # Momentum which is applied on every minibatch_size = 64 samples
    momentum_schedule = C.momentum_schedule(0.9126265014311797, minibatch_size)

    # We use a variant of the Adam optimizer which is known to work well on this dataset
    # Feel free to try other optimizers from
    # https://www.cntk.ai/pythondocs/cntk.learner.html#module-cntk.learner
    learner = C.fsadagrad(model.parameters,
                          lr=lr_schedule,
                          momentum=momentum_schedule)

    # Instantiate the trainer
    progress_printer = C.logging.ProgressPrinter(0)
    trainer = C.Trainer(model, (loss, label_error), learner, progress_printer)

    # Map the data streams to the input and labels.
    # Note: for autoencoders input == label
    input_map = {
        input: reader_train.streams.features,
        label: reader_train.streams.features
    }

    aggregate_metric = 0
    for i in range(num_minibatches_to_train):
        # Read a mini batch from the training data file
        data = reader_train.next_minibatch(minibatch_size, input_map=input_map)

        # Run the trainer on and perform model training
        trainer.train_minibatch(data)
        samples = trainer.previous_minibatch_sample_count
        aggregate_metric += trainer.previous_minibatch_evaluation_average * samples

    train_error = (aggregate_metric *
                   100.0) / (trainer.total_number_of_samples_seen)
    print("Average training error: {0:0.2f}%".format(train_error))

    #############################################################################
    # Testing the model
    # Note: we use a test file reader to read data different from a training data
    #############################################################################

    # Test data for trained model
    test_minibatch_size = 32
    num_samples = 10000
    num_minibatches_to_test = num_samples / test_minibatch_size

    # Test error metric calculation
    metric_numer = 0
    metric_denom = 0

    test_input_map = {
        input: reader_test.streams.features,
        label: reader_test.streams.features
    }

    for i in range(0, int(num_minibatches_to_test)):

        # We are loading test data in batches specified by test_minibatch_size
        # Each data point in the minibatch is a MNIST digit image of 784 dimensions
        # with one pixel per dimension that we will encode / decode with the
        # trained model.
        data = reader_test.next_minibatch(test_minibatch_size,
                                          input_map=test_input_map)

        # Specify the mapping of input variables in the model to actual
        # minibatch data to be tested with
        eval_error = trainer.test_minibatch(data)

        # minibatch data to be trained with
        metric_numer += np.abs(eval_error * test_minibatch_size)
        metric_denom += test_minibatch_size

    # Average of evaluation errors of all test minibatches
    test_error = (metric_numer * 100.0) / (metric_denom)
    print("Average test error: {0:0.2f}%".format(test_error))

    return model, train_error, test_error
Exemple #38
0
        ((0.2, 0), [0.2, 0.2, 0.2, 0.2], 0),
        (([0.2,0.4], 0, 5), [0.2]*5+[0.4]*20, 0),
        (([(3,0.2),(2,0.4),(1,0.8)], 0, 5), [0.2]*15+[0.4]*10+[0.8]*20, 0),
        ]

MOMENTUM_SCHEDULE_PARAMS = [
        ((0.2,), [0.2]),
        ((0.2,), [0.2, 0.2, 0.2, 0.2]),
        (([0.2,0.4], 5), [0.2]*5+[0.4]*20),
        (([(3,0.2),(2,0.4),(1,0.8)], 5), [0.2]*15+[0.4]*10+[0.8]*20),
        ]

LEARNER_LAMBDAS = [
    lambda params: C.adadelta(params),
    lambda params: C.adagrad(params, lr=learning_rate_schedule(1, UnitType.minibatch)),
    lambda params: C.adam(params, lr=learning_rate_schedule(1, UnitType.minibatch), momentum=C.momentum_schedule(0.9)),
    lambda params: C.fsadagrad(params, lr=learning_rate_schedule(1, UnitType.minibatch), momentum=C.momentum_schedule(0.9)),
    lambda params: C.nesterov(params, lr=learning_rate_schedule(1, UnitType.minibatch), momentum=C.momentum_schedule(0.9)),
    lambda params: C.rmsprop(params, lr=learning_rate_schedule(1, UnitType.minibatch), gamma=0.1, inc=3.0, dec=0.1, max=np.inf, min=1e-8),
    lambda params: C.sgd(params, lr=learning_rate_schedule(1, UnitType.minibatch)),
    lambda params: C.momentum_sgd(params, lr=learning_rate_schedule(1, UnitType.minibatch), momentum=C.momentum_schedule(0.9))]

@pytest.mark.parametrize("params, expectation, minibatch_size", LR_SCHEDULE_PARAMS_LEGACY)
def test_learning_rate_schedule(params, expectation, minibatch_size):
    l = learning_rate_schedule(*params)
    assert l.minibatch_size == minibatch_size
    assert [l[i] for i in range(len(expectation))] == expectation

@pytest.mark.parametrize("params, expectation, minibatch_size", LR_SCHEDULE_PARAMS)
def test_learning_parameter_schedule(params, expectation, minibatch_size):
    l = learning_parameter_schedule(*params)
Exemple #39
0
def main():
    # We keep upto 14 inputs from a day
    TIMESTEPS = int(input("TIMESTEPS: "))

    # 20000 is the maximum total output in our dataset. We normalize all values with
    # this so our inputs are between 0.0 and 1.0 range.
    NORMALIZE = int(input("NORMALIZE: "))

    # process batches of 10 days
    BATCH_SIZE = int(input("BATCH_SIZE: "))
    BATCH_SIZE_TEST = int(input("BATCH_SIZE_TEST: "))

    # Specify the internal-state dimensions of the LSTM cell
    H_DIMS = int(input("H_DIMS: "))

    data_source = input("Source(1=solar,2=local,3=sin,4=my): ")
    if data_source == "1" or data_source == "":
        X, Y = get_solar_old(TIMESTEPS, NORMALIZE)
    elif data_source == "2":
        X, Y = get_solar(TIMESTEPS, NORMALIZE)
    elif data_source == "3":
        X, Y = get_sin(5, 5, input("Data length: "))
    else:
        X, Y = get_my_data(H_DIMS, H_DIMS)

    epochs = input("Epochs: ")
    if epochs == "":
        EPOCHS = 100
    else:
        EPOCHS = int(epochs)

    start_time = time.time()

    # input sequences
    x = C.sequence.input_variable(1)

    model_file = "{}_epochs.model".format(EPOCHS)

    if not os.path.exists(model_file):
        print("Training model {}...".format(model_file))

        # create the model
        z = create_model(x, H_DIMS)

        # expected output (label), also the dynamic axes of the model output
        # is specified as the model of the label input
        var_l = C.input_variable(1, dynamic_axes=z.dynamic_axes, name="y")

        # the learning rate
        learning_rate = 0.005
        lr_schedule = C.learning_parameter_schedule(learning_rate)

        # loss function
        loss = C.squared_error(z, var_l)

        # use squared error to determine error for now
        error = C.squared_error(z, var_l)

        # use adam optimizer
        momentum_schedule = C.momentum_schedule(0.9, minibatch_size=BATCH_SIZE)
        learner = C.fsadagrad(z.parameters,
                              lr=lr_schedule,
                              momentum=momentum_schedule)
        trainer = C.Trainer(z, (loss, error), [learner])

        # training
        loss_summary = []

        start = time.time()
        for epoch in range(0, EPOCHS):
            for x_batch, l_batch in next_batch(X, Y, "train", BATCH_SIZE):
                trainer.train_minibatch({x: x_batch, var_l: l_batch})

            if epoch % (EPOCHS / 10) == 0:
                training_loss = trainer.previous_minibatch_loss_average
                loss_summary.append(training_loss)
                print("epoch: {}, loss: {:.4f}".format(epoch, training_loss))

        print("Training took {:.1f} sec".format(time.time() - start))

        # Print the train, validation and test errors
        for labeltxt in ["train", "val", "test"]:
            print("mse for {}: {:.6f}".format(
                labeltxt, get_mse(trainer, x, X, Y, BATCH_SIZE, var_l,
                                  labeltxt)))

        z.save(model_file)

    else:
        z = C.load_model(model_file)
        x = cntk.logging.find_all_with_name(z, "")[-1]

    # Print out all layers in the model
    print("Loading {} and printing all nodes:".format(model_file))
    node_outputs = cntk.logging.find_all_with_name(z, "")
    for n in node_outputs:
        print("  {}".format(n))

    # predict
    # f, a = plt.subplots(2, 1, figsize=(12, 8))
    for j, ds in enumerate(["val", "test"]):
        fig = plt.figure()
        a = fig.add_subplot(2, 1, 1)
        results = []
        for x_batch, y_batch in next_batch(X, Y, ds, BATCH_SIZE_TEST):
            pred = z.eval({x: x_batch})
            results.extend(pred[:, 0])
        # because we normalized the input data we need to multiply the prediction
        # with SCALER to get the real values.
        a.plot((Y[ds] * NORMALIZE).flatten(), label=ds + " raw")
        a.plot(np.array(results) * NORMALIZE, label=ds + " pred")
        a.legend()

        fig.savefig("{}_chart_{}_epochs.jpg".format(ds, EPOCHS))

    print("Delta: ", time.time() - start_time)
Exemple #40
0
        ((0.2, 0), [0.2, 0.2, 0.2, 0.2], 0),
        (([0.2,0.4], 0, 5), [0.2]*5+[0.4]*20, 0),
        (([(3,0.2),(2,0.4),(1,0.8)], 0, 5), [0.2]*15+[0.4]*10+[0.8]*20, 0),
        ]

MOMENTUM_SCHEDULE_PARAMS = [
        ((0.2,), [0.2]),
        ((0.2,), [0.2, 0.2, 0.2, 0.2]),
        (([0.2,0.4], 5), [0.2]*5+[0.4]*20),
        (([(3,0.2),(2,0.4),(1,0.8)], 5), [0.2]*15+[0.4]*10+[0.8]*20),
        ]

LEARNER_LAMBDAS = [
    lambda params: C.adadelta(params),
    lambda params: C.adagrad(params, lr=learning_parameter_schedule(1)),
    lambda params: C.adam(params, lr=learning_parameter_schedule(1), momentum=C.momentum_schedule(0.9)),
    lambda params: C.fsadagrad(params, lr=learning_parameter_schedule(1), momentum=C.momentum_schedule(0.9)),
    lambda params: C.nesterov(params, lr=learning_parameter_schedule(1), momentum=C.momentum_schedule(0.9)),
    lambda params: C.rmsprop(params, lr=learning_parameter_schedule(1), gamma=0.1, inc=3.0, dec=0.1, max=np.inf, min=1e-8),
    lambda params: C.sgd(params, lr=learning_parameter_schedule(1)),
    lambda params: C.momentum_sgd(params, lr=learning_parameter_schedule(1), momentum=C.momentum_schedule(0.9))]

@pytest.mark.parametrize("params, expectation, minibatch_size", LR_SCHEDULE_PARAMS_LEGACY)
def test_learning_rate_schedule(params, expectation, minibatch_size):
    l = learning_rate_schedule(*params)
    assert l.minibatch_size == minibatch_size
    assert [l[i] for i in range(len(expectation))] == expectation

@pytest.mark.parametrize("params, expectation, minibatch_size", LR_SCHEDULE_PARAMS)
def test_learning_parameter_schedule(params, expectation, minibatch_size):
    l = learning_parameter_schedule(*params)