Example #1
0
def create_learner(model):
    '''Create the optimized method'''
    lr_per_minibatch = C.learning_rate_schedule(opt.lr, C.UnitType.minibatch)
    momentum_time_constant = C.momentum_as_time_constant_schedule(1100)
    if opt.optim == 'sgd':
        return C.sgd(model.parameters, lr=lr_per_minibatch)
    elif opt.optim == 'adam':
        return C.adam(model.parameters, lr=lr_per_minibatch, momentum=momentum_time_constant)
    elif opt.optim == 'adagrad':
        return C.adagrad(model.parameters, lr=lr_per_minibatch)
    else:
        raise RuntimeError("Invalid optim method: " + opt.optim)
Example #2
0
def create_learner(model):
    '''Create the optimized method'''
    lr_per_sample = C.learning_rate_schedule(opt.lr, C.UnitType.minibatch)
    momentum_time_constant = C.momentum_as_time_constant_schedule(1100)
    if opt.optim == 'sgd':
        return C.sgd(model.parameters, lr=lr_per_sample)
    elif opt.optim == 'adam':
        return C.adam(model.parameters, lr=lr_per_sample, momentum=momentum_time_constant)
    elif opt.optim == 'adagrad':
        return C.adagrad(model.parameters, lr=lr_per_sample)
    else:
        raise RuntimeError("Invalid optim method: " + opt.optim)
Example #3
0
def create_learner(model):
    '''Create the optimized method'''
    lr_per_minibatch = C.learning_parameter_schedule(opt.lr)
    momentum_schedule = C.momentum_schedule_per_sample(0.9990913221888589)
    if opt.optim == 'sgd':
        return C.sgd(model.parameters, lr=lr_per_minibatch)
    elif opt.optim == 'adam':
        return C.adam(model.parameters, lr=lr_per_minibatch, momentum=momentum_schedule)
    elif opt.optim == 'adagrad':
        return C.adagrad(model.parameters, lr=lr_per_minibatch)
    else:
        raise RuntimeError("Invalid optim method: " + opt.optim)
def create_learner(model):
    '''Create the optimized method'''
    lr_per_minibatch = C.learning_parameter_schedule(opt.lr)
    momentum_schedule = C.momentum_schedule_per_sample(0.9990913221888589)
    if opt.optim == 'sgd':
        return C.sgd(model.parameters, lr=lr_per_minibatch)
    elif opt.optim == 'adam':
        return C.adam(model.parameters,
                      lr=lr_per_minibatch,
                      momentum=momentum_schedule)
    elif opt.optim == 'adagrad':
        return C.adagrad(model.parameters, lr=lr_per_minibatch)
    else:
        raise RuntimeError("Invalid optim method: " + opt.optim)
Example #5
0
def test_learner_init():
    i = C.input_variable(shape=(1,), needs_gradient=True, name='a')
    w = parameter(shape=(1,))

    res = i * w

    learner = sgd(res.parameters, lr=learning_rate_schedule(0.1, UnitType.sample))
    assert learner.learning_rate() == 0.1
    
    learner.reset_learning_rate(learning_rate_schedule([1,2,3], UnitType.minibatch));
    assert learner.learning_rate() == 1.0

    learner_parameter = learner.parameters
    from cntk.variables import Parameter
    param = learner_parameter[0]
    assert isinstance(param, Parameter)

    unit_gain_value = C.default_unit_gain_value()
    assert unit_gain_value

    momentum_time_constant = C.momentum_as_time_constant_schedule(1100)
    lr_per_sample = learning_rate_schedule(0.1, UnitType.sample)
    C.momentum_sgd(res.parameters, lr_per_sample, momentum_time_constant)
    C.momentum_sgd(res.parameters, lr_per_sample, momentum_time_constant, unit_gain_value)
    C.momentum_sgd(res.parameters, lr_per_sample, momentum_time_constant, unit_gain=unit_gain_value)

    C.set_default_unit_gain_value(False)
    unit_gain_value = C.default_unit_gain_value()
    assert not unit_gain_value

    lr_per_sample = learning_rate_schedule([0.1, 0.2], UnitType.sample)
    C.nesterov(res.parameters, lr=lr_per_sample, momentum=momentum_time_constant)
    C.nesterov(res.parameters, lr_per_sample, momentum_time_constant, unit_gain_value)
    C.nesterov(res.parameters, lr=lr_per_sample, momentum=momentum_time_constant, unit_gain=unit_gain_value)

    lr_per_sample = learning_rate_schedule([0.1]*3 +[0.2]*2 +[0.3], UnitType.sample)
    C.adagrad(res.parameters, lr=lr_per_sample, need_ave_multiplier=True)

    C.set_default_unit_gain_value(True)
    unit_gain_value = C.default_unit_gain_value()
    assert unit_gain_value

    lr_per_sample = learning_rate_schedule([(3,0.1), (2, 0.2), (1, 0.3)], UnitType.sample)
    C.fsadagrad(res.parameters, lr=lr_per_sample, momentum=momentum_time_constant)
    C.fsadagrad(res.parameters, lr_per_sample, momentum_time_constant, unit_gain_value)
    C.fsadagrad(res.parameters, lr=lr_per_sample, momentum=momentum_time_constant, unit_gain=unit_gain_value)

    gamma, inc, dec, max, min = [0.1]*5
    lr_per_sample = learning_rate_schedule([0.1, 0.2], UnitType.sample, 100)
    C.rmsprop(res.parameters, lr_per_sample, gamma, inc, dec, max, min, True)

    C.set_default_use_mean_gradient_value(False)
    use_mean_gradient_value = C.default_use_mean_gradient_value()
    assert not use_mean_gradient_value

    C.adadelta(res.parameters, lr_per_sample)
    
    C.set_default_use_mean_gradient_value(True)
    use_mean_gradient_value = C.default_use_mean_gradient_value()
    assert use_mean_gradient_value

    C.adadelta(res.parameters, lr_per_sample)
Example #6
0
def TrainAndValidate(trainfile):

    #*****Hyper-Parameters******
    q_max_words= 12
    p_max_words = 50
    emb_dim = 50
    num_classes = 2
    minibatch_size = 32
    epoch_size = 500000 #No.of samples in training set
    total_epochs = 5 #Total number of epochs to run
    query_total_dim = q_max_words*emb_dim
    label_total_dim = num_classes
    passage_total_dim = p_max_words*emb_dim


    #****** Create placeholders for reading Training Data  ***********
    query_input_var =  C.ops.input_variable((1,q_max_words,emb_dim),np.float32,is_sparse=False)
    passage_input_var =  C.ops.input_variable((1,p_max_words,emb_dim),np.float32,is_sparse=False)
    output_var = C.input_variable(num_classes,np.float32,is_sparse = False)
    train_reader = create_reader(trainfile, True, query_total_dim, passage_total_dim, label_total_dim)
    input_map = { query_input_var : train_reader.streams.queryfeatures, passage_input_var:train_reader.streams.passagefeatures, output_var : train_reader.streams.labels}

    # ********* Model configuration *******
    model_output = cnn_network(query_input_var, passage_input_var, num_classes)
    loss = C.binary_cross_entropy(model_output, output_var)
    pe = C.classification_error(model_output, output_var)
    lr_per_minibatch = C.learning_rate_schedule(0.03, C.UnitType.minibatch)
    learner = C.adagrad(model_output.parameters, lr=lr_per_minibatch)
    progress_printer = C.logging.ProgressPrinter(tag='Training', num_epochs=total_epochs)

    #************Create Trainer with model_output object, learner and loss parameters*************
    trainer = C.Trainer(model_output, (loss, pe), learner, progress_printer)
    C.logging.log_number_of_parameters(model_output) ; print()

    # **** Train the model in batchwise mode *****
    for epoch in range(total_epochs):       # loop over epochs
        print("Epoch : ",epoch)
        sample_count = 0
        while sample_count < epoch_size:  # loop over minibatches in the epoch
            data = train_reader.next_minibatch(min(minibatch_size, epoch_size - sample_count), input_map=input_map) # fetch minibatch.
            trainer.train_minibatch(data)        # training step
            sample_count += data[output_var].num_samples   # count samples processed so far

        trainer.summarize_training_progress()

        model_output.save("data/models/CNN_{}.dnn".format(epoch)) # Save the model for every epoch

        #*** Find metrics on validation set after every epoch ******#  (Note : you can skip doing this for every epoch instead to optimize the time, do it after every k epochs)
        predicted_labels=[]
        for i in range(len(validation_query_vectors)):
            queryVec   = np.array(validation_query_vectors[i],dtype="float32").reshape(1,q_max_words,emb_dim)
            passageVec = np.array(validation_passage_vectors[i],dtype="float32").reshape(1,p_max_words,emb_dim)
            scores = model_output(queryVec,passageVec)[0]   # do forward-prop on model to get score
            predictLabel = 1 if scores[1]>=scores[0] else 0
            predicted_labels.append(predictLabel)
        metrics = precision_recall_fscore_support(np.array(validation_labels), np.array(predicted_labels), average='binary')
        #print("precision : "+str(metrics[0])+" recall : "+str(metrics[1])+" f1 : "+str(metrics[2])+"\n")



    return model_output
Example #7
0
    ((0.2, UnitType.sample), [0.2, 0.2, 0.2, 0.2]),
    (([0.2, 0.4], UnitType.sample, 5), [0.2] * 5 + [0.4] * 20),
    (([(3, 0.2), (2, 0.4),
       (1, 0.8)], UnitType.sample, 5), [0.2] * 15 + [0.4] * 10 + [0.8] * 20),
]

MOMENTUM_SCHEDULE_PARAMS = [
    ((0.2, ), [0.2]),
    ((0.2, ), [0.2, 0.2, 0.2, 0.2]),
    (([0.2, 0.4], 5), [0.2] * 5 + [0.4] * 20),
    (([(3, 0.2), (2, 0.4),
       (1, 0.8)], 5), [0.2] * 15 + [0.4] * 10 + [0.8] * 20),
]

LEARNER_LAMBDAS = [
    lambda params: C.adadelta(params), lambda params: C.adagrad(
        params, lr=learning_rate_schedule(1, UnitType.minibatch)),
    lambda params: C.adam(params,
                          lr=learning_rate_schedule(1, UnitType.minibatch),
                          momentum=C.momentum_schedule(0.9)),
    lambda params: C.fsadagrad(params,
                               lr=learning_rate_schedule(
                                   1, UnitType.minibatch),
                               momentum=C.momentum_schedule(0.9)),
    lambda params: C.nesterov(params,
                              lr=learning_rate_schedule(1, UnitType.minibatch),
                              momentum=C.momentum_schedule(0.9)),
    lambda params: C.rmsprop(params,
                             lr=learning_rate_schedule(1, UnitType.minibatch),
                             gamma=0.1,
                             inc=3.0,
                             dec=0.1,
Example #8
0
def test_learner_init():
    i = C.input_variable(shape=(1, ), needs_gradient=True, name='a')
    w = parameter(shape=(1, ))

    res = i * w

    #test new API: learning_parameter_schedule

    #explictly specify reference minibatch size and learning rate is in number:
    learner = sgd(res.parameters, lr=0.1, minibatch_size=25)
    assert learner.is_compatible_mode() == False
    assert learner.minibatch_size == 25  #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == 25
    assert learner.learning_rate() == 0.1

    #no explictly specification of reference minibatch size and learning rate is in number:
    learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1))
    assert learner.is_compatible_mode() == False
    assert learner.minibatch_size == C.learners.IGNORE  #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE
    assert learner.learning_rate() == 0.1

    learner = sgd(res.parameters,
                  lr=learning_parameter_schedule(0.1, 20),
                  minibatch_size=25)
    assert learner.is_compatible_mode() == False
    assert learner.minibatch_size == 25  #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == 20
    assert learner.learning_rate() == 0.1

    learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1, 20))
    assert learner.is_compatible_mode() == False
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == 20
    assert learner.learning_rate() == 0.1

    #no explictly specification of reference minibatch size and learning rate is in number:
    learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1))
    assert learner.is_compatible_mode() == False
    assert learner.minibatch_size == C.learners.IGNORE  #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE
    assert learner.learning_rate() == 0.1

    #no explictly specification of reference minibatch size and learning rate is in number:
    learner = sgd(res.parameters,
                  lr=learning_parameter_schedule(0.1),
                  minibatch_size=C.learners.IGNORE)
    assert learner.is_compatible_mode() == True
    assert learner.minibatch_size == C.learners.IGNORE  #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE
    assert learner.learning_rate() == 0.1

    learner = sgd(res.parameters,
                  lr=learning_parameter_schedule(0.1, 20),
                  minibatch_size=C.learners.IGNORE)
    assert learner.is_compatible_mode() == True
    assert learner.minibatch_size == C.learners.IGNORE  #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == 20
    assert learner.learning_rate() == 0.1

    #no explictly specification of reference minibatch size and learning rate is in number:
    learner = sgd(res.parameters,
                  lr=learning_parameter_schedule(0.1),
                  minibatch_size=C.learners.IGNORE)
    assert learner.is_compatible_mode() == True
    assert learner.minibatch_size == C.learners.IGNORE  #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE
    assert learner.learning_rate() == 0.1

    mysgd = C.sgd(parameters=res.parameters, lr=0.4, minibatch_size=32)
    assert mysgd.minibatch_size == 32
    assert mysgd._learning_rate_schedule.minibatch_size == 32
    assert mysgd.learning_rate() == 0.4

    mymomentum = C.momentum_sgd(parameters=res.parameters,
                                lr=0.4,
                                momentum=0.9,
                                minibatch_size=32)
    assert mymomentum.minibatch_size == 32
    assert mymomentum._learning_rate_schedule.minibatch_size == 32
    assert mymomentum.learning_rate() == 0.4

    myadadelta = C.adadelta(parameters=res.parameters,
                            lr=0.4,
                            minibatch_size=32)
    assert myadadelta.minibatch_size == 32
    assert myadadelta._learning_rate_schedule.minibatch_size == 32
    assert myadadelta.learning_rate() == 0.4

    myadam = C.adam(parameters=res.parameters,
                    lr=0.4,
                    momentum=0.9,
                    variance_momentum=0.9,
                    minibatch_size=32)
    assert myadam.minibatch_size == 32
    assert myadam._learning_rate_schedule.minibatch_size == 32
    assert myadam.learning_rate() == 0.4

    myadagrad = C.adagrad(parameters=res.parameters, lr=0.4, minibatch_size=32)
    assert myadagrad.minibatch_size == 32
    assert myadagrad._learning_rate_schedule.minibatch_size == 32
    assert myadagrad.learning_rate() == 0.4

    myfsadagrad = C.fsadagrad(parameters=res.parameters,
                              lr=0.4,
                              momentum=0.9,
                              variance_momentum=0.9,
                              minibatch_size=32)
    assert myfsadagrad.minibatch_size == 32
    assert myfsadagrad._learning_rate_schedule.minibatch_size == 32
    assert myfsadagrad.learning_rate() == 0.4

    mynesterov = C.nesterov(parameters=res.parameters,
                            lr=0.4,
                            momentum=0.9,
                            minibatch_size=32)
    assert mynesterov.minibatch_size == 32
    assert mynesterov._learning_rate_schedule.minibatch_size == 32
    assert mynesterov.learning_rate() == 0.4

    myrmsrop = C.rmsprop(parameters=res.parameters,
                         lr=0.4,
                         gamma=0.5,
                         inc=1.2,
                         dec=0.7,
                         max=10,
                         min=1e-8,
                         minibatch_size=32)
    assert myrmsrop.minibatch_size == 32
    assert myrmsrop._learning_rate_schedule.minibatch_size == 32
    assert myrmsrop.learning_rate() == 0.4

    mysgd = C.sgd(parameters=res.parameters,
                  lr=[0.4, 0.1, 0.001],
                  minibatch_size=32,
                  epoch_size=512)
    assert mysgd.minibatch_size == 32
    assert mysgd._learning_rate_schedule.minibatch_size == 32
    assert mysgd._learning_rate_schedule[0] == 0.4
    assert mysgd._learning_rate_schedule[512] == 0.1
    assert mysgd._learning_rate_schedule[512 * 2] == 0.001

    mymomentum = C.momentum_sgd(parameters=res.parameters,
                                lr=[0.4, 0.1, 0.001],
                                momentum=[0.9],
                                minibatch_size=32,
                                epoch_size=512)
    assert mymomentum.minibatch_size == 32
    assert mymomentum._learning_rate_schedule.minibatch_size == 32
    assert mymomentum._learning_rate_schedule[0] == 0.4
    assert mymomentum._learning_rate_schedule[512] == 0.1
    assert mymomentum._learning_rate_schedule[512 * 2] == 0.001

    myadadelta = C.adadelta(parameters=res.parameters,
                            lr=[0.4, 0.1, 0.001],
                            minibatch_size=32,
                            epoch_size=512)
    assert myadadelta.minibatch_size == 32
    assert myadadelta._learning_rate_schedule.minibatch_size == 32
    assert myadadelta._learning_rate_schedule[0] == 0.4
    assert myadadelta._learning_rate_schedule[512] == 0.1
    assert myadadelta._learning_rate_schedule[512 * 2] == 0.001

    myadam = C.adam(parameters=res.parameters,
                    lr=[0.4, 0.1, 0.001],
                    momentum=[0.9, 0.1, 0.001],
                    variance_momentum=[0.9],
                    minibatch_size=32,
                    epoch_size=512)
    assert myadam.minibatch_size == 32
    assert myadam._learning_rate_schedule.minibatch_size == 32
    assert myadam._learning_rate_schedule[0] == 0.4
    assert myadam._learning_rate_schedule[512] == 0.1
    assert myadam._learning_rate_schedule[512 * 2] == 0.001

    myadagrad = C.adagrad(parameters=res.parameters,
                          lr=[0.4, 0.1, 0.001],
                          minibatch_size=32,
                          epoch_size=512)
    assert myadagrad.minibatch_size == 32
    assert myadagrad._learning_rate_schedule.minibatch_size == 32
    assert myadagrad._learning_rate_schedule[0] == 0.4
    assert myadagrad._learning_rate_schedule[512] == 0.1
    assert myadagrad._learning_rate_schedule[512 * 2] == 0.001

    myfsadagrad = C.fsadagrad(parameters=res.parameters,
                              lr=[0.4, 0.1, 0.001],
                              momentum=[0.9],
                              variance_momentum=[0.9],
                              minibatch_size=32,
                              epoch_size=512)
    assert myadagrad.minibatch_size == 32
    assert myadagrad._learning_rate_schedule.minibatch_size == 32
    assert myadagrad._learning_rate_schedule[0] == 0.4
    assert myadagrad._learning_rate_schedule[512] == 0.1
    assert myadagrad._learning_rate_schedule[512 * 2] == 0.001

    mynesterov = C.nesterov(parameters=res.parameters,
                            lr=[0.4, 0.1, 0.001],
                            momentum=[0.9],
                            minibatch_size=32,
                            epoch_size=512)
    assert mynesterov.minibatch_size == 32
    assert mynesterov._learning_rate_schedule.minibatch_size == 32
    assert mynesterov._learning_rate_schedule[0] == 0.4
    assert mynesterov._learning_rate_schedule[512] == 0.1
    assert mynesterov._learning_rate_schedule[512 * 2] == 0.001

    myrmsrop = C.rmsprop(parameters=res.parameters,
                         lr=[0.4, 0.1, 0.001],
                         gamma=0.5,
                         inc=1.2,
                         dec=0.7,
                         max=10,
                         min=1e-8,
                         minibatch_size=32,
                         epoch_size=512)
    assert myrmsrop.minibatch_size == 32
    assert myrmsrop._learning_rate_schedule.minibatch_size == 32
    assert myrmsrop._learning_rate_schedule[0] == 0.4
    assert myrmsrop._learning_rate_schedule[512] == 0.1
    assert myrmsrop._learning_rate_schedule[512 * 2] == 0.001

    learner_parameter = learner.parameters
    from cntk.variables import Parameter
    param = learner_parameter[0]
    assert isinstance(param, Parameter)

    unit_gain_value = C.default_unit_gain_value()
    assert unit_gain_value

    momentum = C.momentum_schedule(0.999, minibatch_size=1)
    lr_per_sample = learning_parameter_schedule(0.1, minibatch_size=1)
    C.momentum_sgd(res.parameters, lr_per_sample, momentum)
    C.momentum_sgd(res.parameters, lr_per_sample, momentum, unit_gain_value)
    C.momentum_sgd(res.parameters,
                   lr_per_sample,
                   momentum,
                   unit_gain=unit_gain_value)

    C.set_default_unit_gain_value(False)
    unit_gain_value = C.default_unit_gain_value()
    assert not unit_gain_value

    lr_per_sample = learning_parameter_schedule([0.1, 0.2], minibatch_size=1)
    C.nesterov(res.parameters, lr=lr_per_sample, momentum=momentum)
    C.nesterov(res.parameters, lr_per_sample, momentum, unit_gain_value)
    C.nesterov(res.parameters,
               lr=lr_per_sample,
               momentum=momentum,
               unit_gain=unit_gain_value)

    lr_per_sample = learning_parameter_schedule([0.1] * 3 + [0.2] * 2 + [0.3],
                                                minibatch_size=1)
    C.adagrad(res.parameters, lr=lr_per_sample, need_ave_multiplier=True)

    C.set_default_unit_gain_value(True)
    unit_gain_value = C.default_unit_gain_value()
    assert unit_gain_value

    lr_per_sample = learning_parameter_schedule([(3, 0.1), (2, 0.2), (1, 0.3)],
                                                minibatch_size=1)
    C.fsadagrad(res.parameters, lr=lr_per_sample, momentum=momentum)
    C.fsadagrad(res.parameters, lr_per_sample, momentum, unit_gain_value)
    C.fsadagrad(res.parameters,
                lr=lr_per_sample,
                momentum=momentum,
                unit_gain=unit_gain_value)

    gamma, inc, dec, max, min = [0.5, 1.2, 0.7, 10, 1e-8]
    lr_per_sample = learning_parameter_schedule([0.1, 0.2],
                                                minibatch_size=1,
                                                epoch_size=100)
    C.rmsprop(res.parameters, lr_per_sample, gamma, inc, dec, max, min, True)

    C.adadelta(res.parameters, lr_per_sample)
Example #9
0
def test_learner_init():
    i = C.input_variable(shape=(1,), needs_gradient=True, name='a')
    w = parameter(shape=(1,))

    res = i * w

    #test new API: learning_parameter_schedule

    #explicitly specify reference minibatch size and learning rate is in number:
    learner = sgd(res.parameters, lr=0.1, minibatch_size = 25)
    assert learner.is_compatible_mode() == False
    assert learner.minibatch_size == 25 #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == 25
    assert learner.learning_rate() == 0.1

    #no explicitly specification of reference minibatch size and learning rate is in number:
    learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1))
    assert learner.is_compatible_mode() == False
    assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE
    assert learner.learning_rate() == 0.1


    learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1, 20), minibatch_size = 25)
    assert learner.is_compatible_mode() == False
    assert learner.minibatch_size == 25 #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == 20
    assert learner.learning_rate() == 0.1


    learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1, 20))
    assert learner.is_compatible_mode() == False
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == 20
    assert learner.learning_rate() == 0.1

    #no explicitly specification of reference minibatch size and learning rate is in number:
    learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1))
    assert learner.is_compatible_mode() == False
    assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE
    assert learner.learning_rate() == 0.1


    #no explicitly specification of reference minibatch size and learning rate is in number:
    learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1), minibatch_size=C.learners.IGNORE)
    assert learner.is_compatible_mode() == True
    assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE
    assert learner.learning_rate() == 0.1


    learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1, 20), minibatch_size=C.learners.IGNORE)
    assert learner.is_compatible_mode() == True
    assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == 20
    assert learner.learning_rate() == 0.1

    #no explicitly specification of reference minibatch size and learning rate is in number:
    learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1), minibatch_size=C.learners.IGNORE)
    assert learner.is_compatible_mode() == True
    assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE
    assert learner.learning_rate() == 0.1

    mysgd = C.sgd(parameters=res.parameters, lr=0.4, minibatch_size=32)
    assert mysgd.minibatch_size == 32
    assert mysgd._learning_rate_schedule.minibatch_size == 32
    assert mysgd.learning_rate() == 0.4

    mymomentum = C.momentum_sgd(parameters=res.parameters, lr=0.4, momentum=0.9, minibatch_size=32)
    assert mymomentum.minibatch_size == 32
    assert mymomentum._learning_rate_schedule.minibatch_size == 32
    assert mymomentum.learning_rate() == 0.4

    myadadelta = C.adadelta(parameters=res.parameters, lr=0.4, minibatch_size=32)
    assert myadadelta.minibatch_size == 32
    assert myadadelta._learning_rate_schedule.minibatch_size == 32
    assert myadadelta.learning_rate() == 0.4

    myadam = C.adam(parameters=res.parameters, lr=0.4, momentum=0.9, variance_momentum=0.9, minibatch_size=32)
    assert myadam.minibatch_size == 32
    assert myadam._learning_rate_schedule.minibatch_size == 32
    assert myadam.learning_rate() == 0.4

    myadagrad = C.adagrad(parameters=res.parameters, lr=0.4, minibatch_size=32)
    assert myadagrad.minibatch_size == 32
    assert myadagrad._learning_rate_schedule.minibatch_size == 32
    assert myadagrad.learning_rate() == 0.4

    myfsadagrad = C.fsadagrad(parameters=res.parameters, lr=0.4, momentum=0.9, variance_momentum=0.9,
                              minibatch_size=32)
    assert myfsadagrad.minibatch_size == 32
    assert myfsadagrad._learning_rate_schedule.minibatch_size == 32
    assert myfsadagrad.learning_rate() == 0.4

    mynesterov = C.nesterov(parameters=res.parameters, lr=0.4, momentum=0.9, minibatch_size=32)
    assert mynesterov.minibatch_size == 32
    assert mynesterov._learning_rate_schedule.minibatch_size == 32
    assert mynesterov.learning_rate() == 0.4

    myrmsrop = C.rmsprop(parameters=res.parameters, lr=0.4, gamma=0.5, inc=1.2, dec=0.7, max=10, min=1e-8,
                         minibatch_size=32)
    assert myrmsrop.minibatch_size == 32
    assert myrmsrop._learning_rate_schedule.minibatch_size == 32
    assert myrmsrop.learning_rate() == 0.4

    mysgd = C.sgd(parameters=res.parameters, lr=[0.4, 0.1, 0.001], minibatch_size=32, epoch_size=512)
    assert mysgd.minibatch_size == 32
    assert mysgd._learning_rate_schedule.minibatch_size == 32
    assert mysgd._learning_rate_schedule[0] == 0.4
    assert mysgd._learning_rate_schedule[512] == 0.1
    assert mysgd._learning_rate_schedule[512 * 2] == 0.001

    mymomentum = C.momentum_sgd(parameters=res.parameters, lr=[0.4, 0.1, 0.001], momentum=[0.9],
                                minibatch_size=32, epoch_size=512)
    assert mymomentum.minibatch_size == 32
    assert mymomentum._learning_rate_schedule.minibatch_size == 32
    assert mymomentum._learning_rate_schedule[0] == 0.4
    assert mymomentum._learning_rate_schedule[512] == 0.1
    assert mymomentum._learning_rate_schedule[512 * 2] == 0.001


    myadadelta = C.adadelta(parameters=res.parameters, lr=[0.4, 0.1, 0.001],
                            minibatch_size=32, epoch_size=512)
    assert myadadelta.minibatch_size == 32
    assert myadadelta._learning_rate_schedule.minibatch_size == 32
    assert myadadelta._learning_rate_schedule[0] == 0.4
    assert myadadelta._learning_rate_schedule[512] == 0.1
    assert myadadelta._learning_rate_schedule[512 * 2] == 0.001

    myadam = C.adam(parameters=res.parameters, lr=[0.4, 0.1, 0.001], momentum=[0.9, 0.1, 0.001], variance_momentum=[0.9],
                    minibatch_size=32, epoch_size=512)
    assert myadam.minibatch_size == 32
    assert myadam._learning_rate_schedule.minibatch_size == 32
    assert myadam._learning_rate_schedule[0] == 0.4
    assert myadam._learning_rate_schedule[512] == 0.1
    assert myadam._learning_rate_schedule[512 * 2] == 0.001

    myadagrad = C.adagrad(parameters=res.parameters, lr=[0.4, 0.1, 0.001], minibatch_size=32, epoch_size=512)
    assert myadagrad.minibatch_size == 32
    assert myadagrad._learning_rate_schedule.minibatch_size == 32
    assert myadagrad._learning_rate_schedule[0] == 0.4
    assert myadagrad._learning_rate_schedule[512] == 0.1
    assert myadagrad._learning_rate_schedule[512 * 2] == 0.001

    myfsadagrad = C.fsadagrad(parameters=res.parameters, lr=[0.4, 0.1, 0.001], momentum=[0.9],
                              variance_momentum=[0.9],
                              minibatch_size=32, epoch_size=512)
    assert myadagrad.minibatch_size == 32
    assert myadagrad._learning_rate_schedule.minibatch_size == 32
    assert myadagrad._learning_rate_schedule[0] == 0.4
    assert myadagrad._learning_rate_schedule[512] == 0.1
    assert myadagrad._learning_rate_schedule[512 * 2] == 0.001

    mynesterov = C.nesterov(parameters=res.parameters, lr=[0.4, 0.1, 0.001], momentum=[0.9],
                            minibatch_size=32, epoch_size=512)
    assert mynesterov.minibatch_size == 32
    assert mynesterov._learning_rate_schedule.minibatch_size == 32
    assert mynesterov._learning_rate_schedule[0] == 0.4
    assert mynesterov._learning_rate_schedule[512] == 0.1
    assert mynesterov._learning_rate_schedule[512 * 2] == 0.001

    myrmsrop = C.rmsprop(parameters=res.parameters, lr=[0.4, 0.1, 0.001], gamma=0.5, inc=1.2, dec=0.7, max=10,
                         min=1e-8,
                         minibatch_size=32, epoch_size=512)
    assert myrmsrop.minibatch_size == 32
    assert myrmsrop._learning_rate_schedule.minibatch_size == 32
    assert myrmsrop._learning_rate_schedule[0] == 0.4
    assert myrmsrop._learning_rate_schedule[512] == 0.1
    assert myrmsrop._learning_rate_schedule[512 * 2] == 0.001

    learner_parameter = learner.parameters
    from cntk.variables import Parameter
    param = learner_parameter[0]
    assert isinstance(param, Parameter)

    unit_gain_value = C.default_unit_gain_value()
    assert unit_gain_value

    momentum = C.momentum_schedule(0.999, minibatch_size=1)
    lr_per_sample = learning_parameter_schedule(0.1, minibatch_size = 1)
    C.momentum_sgd(res.parameters, lr_per_sample, momentum)
    C.momentum_sgd(res.parameters, lr_per_sample, momentum, unit_gain_value)
    C.momentum_sgd(res.parameters, lr_per_sample, momentum, unit_gain=unit_gain_value)

    C.set_default_unit_gain_value(False)
    unit_gain_value = C.default_unit_gain_value()
    assert not unit_gain_value

    lr_per_sample = learning_parameter_schedule([0.1, 0.2], minibatch_size = 1)
    C.nesterov(res.parameters, lr=lr_per_sample, momentum=momentum)
    C.nesterov(res.parameters, lr_per_sample, momentum, unit_gain_value)
    C.nesterov(res.parameters, lr=lr_per_sample, momentum=momentum, unit_gain=unit_gain_value)

    lr_per_sample = learning_parameter_schedule([0.1]*3 +[0.2]*2 +[0.3], minibatch_size=1)
    C.adagrad(res.parameters, lr=lr_per_sample, need_ave_multiplier=True)

    C.set_default_unit_gain_value(True)
    unit_gain_value = C.default_unit_gain_value()
    assert unit_gain_value

    lr_per_sample = learning_parameter_schedule([(3,0.1), (2, 0.2), (1, 0.3)], minibatch_size=1)
    C.fsadagrad(res.parameters, lr=lr_per_sample, momentum=momentum)
    C.fsadagrad(res.parameters, lr_per_sample, momentum, unit_gain_value)
    C.fsadagrad(res.parameters, lr=lr_per_sample, momentum=momentum, unit_gain=unit_gain_value)

    gamma, inc, dec, max, min = [0.5, 1.2, 0.7, 10, 1e-8]
    lr_per_sample = learning_parameter_schedule([0.1, 0.2], minibatch_size = 1, epoch_size = 100)
    C.rmsprop(res.parameters, lr_per_sample, gamma, inc, dec, max, min, True)

    C.adadelta(res.parameters, lr_per_sample)
Example #10
0
        ((0.2, 0), [0.2], 0),
        ((0.2, 0), [0.2, 0.2, 0.2, 0.2], 0),
        (([0.2,0.4], 0, 5), [0.2]*5+[0.4]*20, 0),
        (([(3,0.2),(2,0.4),(1,0.8)], 0, 5), [0.2]*15+[0.4]*10+[0.8]*20, 0),
        ]

MOMENTUM_SCHEDULE_PARAMS = [
        ((0.2,), [0.2]),
        ((0.2,), [0.2, 0.2, 0.2, 0.2]),
        (([0.2,0.4], 5), [0.2]*5+[0.4]*20),
        (([(3,0.2),(2,0.4),(1,0.8)], 5), [0.2]*15+[0.4]*10+[0.8]*20),
        ]

LEARNER_LAMBDAS = [
    lambda params: C.adadelta(params),
    lambda params: C.adagrad(params, lr=learning_rate_schedule(1, UnitType.minibatch)),
    lambda params: C.adam(params, lr=learning_rate_schedule(1, UnitType.minibatch), momentum=C.momentum_schedule(0.9)),
    lambda params: C.fsadagrad(params, lr=learning_rate_schedule(1, UnitType.minibatch), momentum=C.momentum_schedule(0.9)),
    lambda params: C.nesterov(params, lr=learning_rate_schedule(1, UnitType.minibatch), momentum=C.momentum_schedule(0.9)),
    lambda params: C.rmsprop(params, lr=learning_rate_schedule(1, UnitType.minibatch), gamma=0.1, inc=3.0, dec=0.1, max=np.inf, min=1e-8),
    lambda params: C.sgd(params, lr=learning_rate_schedule(1, UnitType.minibatch)),
    lambda params: C.momentum_sgd(params, lr=learning_rate_schedule(1, UnitType.minibatch), momentum=C.momentum_schedule(0.9))]

@pytest.mark.parametrize("params, expectation, minibatch_size", LR_SCHEDULE_PARAMS_LEGACY)
def test_learning_rate_schedule(params, expectation, minibatch_size):
    l = learning_rate_schedule(*params)
    assert l.minibatch_size == minibatch_size
    assert [l[i] for i in range(len(expectation))] == expectation

@pytest.mark.parametrize("params, expectation, minibatch_size", LR_SCHEDULE_PARAMS)
def test_learning_parameter_schedule(params, expectation, minibatch_size):
Example #11
0
import cntk

import numpy as np

x_input = np.array([[1, 2, 3, 4, 5]], np.float32)
y_input = np.array([[10]], np.float32)

X = cntk.input_variables(5, np.float32)
y = cntk.input_variables(1, np.float32)

from cntk.layers import Dense, Sequential
model = Sequential([Dense(32), Dense(1)])(X)

loss = cntk.squared_error(model, y)

learning_rate = 0.001
trainer = cntk.Trainer(model, (loss),
                       cntk.adagrad(model.parameters, learning_rate))

for epoch in range(10):
    trainer.train_minibatch({X: x_input, y: y_input})
        ((0.2, 0), [0.2], 0),
        ((0.2, 0), [0.2, 0.2, 0.2, 0.2], 0),
        (([0.2,0.4], 0, 5), [0.2]*5+[0.4]*20, 0),
        (([(3,0.2),(2,0.4),(1,0.8)], 0, 5), [0.2]*15+[0.4]*10+[0.8]*20, 0),
        ]

MOMENTUM_SCHEDULE_PARAMS = [
        ((0.2,), [0.2]),
        ((0.2,), [0.2, 0.2, 0.2, 0.2]),
        (([0.2,0.4], 5), [0.2]*5+[0.4]*20),
        (([(3,0.2),(2,0.4),(1,0.8)], 5), [0.2]*15+[0.4]*10+[0.8]*20),
        ]

LEARNER_LAMBDAS = [
    lambda params: C.adadelta(params),
    lambda params: C.adagrad(params, lr=learning_parameter_schedule(1)),
    lambda params: C.adam(params, lr=learning_parameter_schedule(1), momentum=C.momentum_schedule(0.9)),
    lambda params: C.fsadagrad(params, lr=learning_parameter_schedule(1), momentum=C.momentum_schedule(0.9)),
    lambda params: C.nesterov(params, lr=learning_parameter_schedule(1), momentum=C.momentum_schedule(0.9)),
    lambda params: C.rmsprop(params, lr=learning_parameter_schedule(1), gamma=0.1, inc=3.0, dec=0.1, max=np.inf, min=1e-8),
    lambda params: C.sgd(params, lr=learning_parameter_schedule(1)),
    lambda params: C.momentum_sgd(params, lr=learning_parameter_schedule(1), momentum=C.momentum_schedule(0.9))]

@pytest.mark.parametrize("params, expectation, minibatch_size", LR_SCHEDULE_PARAMS_LEGACY)
def test_learning_rate_schedule(params, expectation, minibatch_size):
    l = learning_rate_schedule(*params)
    assert l.minibatch_size == minibatch_size
    assert [l[i] for i in range(len(expectation))] == expectation

@pytest.mark.parametrize("params, expectation, minibatch_size", LR_SCHEDULE_PARAMS)
def test_learning_parameter_schedule(params, expectation, minibatch_size):