def test_epochsize_wrn_for_momentum_time_constant():
    with warnings.catch_warnings(record=True) as w:
        warnings.simplefilter("always")

        C.momentum_as_time_constant_schedule(1100, epoch_size=1000)

        assert len(w) == 1
        assert issubclass(w[-1].category, RuntimeWarning)
        assert "epoch_size" in str(w[-1].message)
Example #2
0
def test_momentum_schedule():
    m = 2500
    ms = C.momentum_as_time_constant_schedule([m])
    assert ms[0] ==  np.exp(-1.0 / np.asarray(m))

    ms = C.momentum_as_time_constant_schedule(m)
    assert ms[0] ==  np.exp(-1.0 / np.asarray(m))

    mlist = [980, 520]
    msl = C.momentum_as_time_constant_schedule(mlist)
    expected = np.exp(-1.0 / np.asarray(mlist))
    assert all(mi == ei for mi,ei in zip(msl,expected))
Example #3
0
    def create_trainer(self):
        try:
            p = self.output.parameters
            # Three of four parameters are learned by block_momentum_distributed_learner.
            bmd_learner = cntk.block_momentum_distributed_learner(
                cntk.momentum_sgd(
                    [p[0], p[1], p[2]],
                    cntk.learning_parameter_schedule(0.0001),
                    cntk.momentum_as_time_constant_schedule(1000)),
                block_size=1000,
                block_learning_rate=0.01,
                block_momentum_as_time_constant=1000)

            # New API to mark which learner is to use for metric aggregaion.
            bmd_learner.set_as_metric_aggregator()

            # The last parameter is learned by the data_parallel_distributed_learner.
            momentum_schedule = cntk.momentum_schedule_per_sample(
                0.9990913221888589)
            lr_per_sample = cntk.learning_parameter_schedule_per_sample(0.007)
            dpd_learner = cntk.data_parallel_distributed_learner(
                cntk.momentum_sgd([p[3]], lr_per_sample, momentum_schedule,
                                  True))

            comm_rank = cntk.distributed.Communicator.rank()
            self.trainer = cntk.Trainer(
                self.output, (self.ce, self.err), [bmd_learner, dpd_learner], [
                    cntk.logging.ProgressPrinter(
                        freq=progress_freq, tag="Training", rank=comm_rank)
                ])
        except RuntimeError:
            self.trainer = None
        return
Example #4
0
def train(streamf):
    global net
    minibatch_size = 1024
    max_epochs = 2000
    epoch_size = 50000
    net = nn(input_var)
    loss = cntk.losses.binary_cross_entropy(net, label_var)
    error = cntk.classification_error(net, label_var)
    lr_per_sample = [3e-4] * 4 + [1.5e-4]
    lr_per_minibatch = [lr * minibatch_size for lr in lr_per_sample]
    lr_schedule = cntk.learning_rate_schedule(lr_per_minibatch,
                                              cntk.UnitType.minibatch)
    momentum_as_time_constant = cntk.momentum_as_time_constant_schedule(700)
    learner = cntk.adam(net.parameters,
                        lr_schedule,
                        momentum=momentum_as_time_constant,
                        gradient_clipping_threshold_per_sample=15,
                        gradient_clipping_with_truncation=True)
    progres = cntk.logging.ProgressPrinter(0)
    trainer = cntk.Trainer(net, (loss, error), [learner],
                           progress_writers=progres)
    input_map = {
        input_var: streamf.streams.features,
        label_var: streamf.streams.labels
    }
    t = 0
    for epoch in range(max_epochs):
        epoch_end = (epoch + 1) * epoch_size
        while t < epoch_end:
            dat1 = streamf.next_minibatch(minibatch_size, input_map=input_map)
            trainer.train_minibatch(dat1)
            t += dat1[label_var].num_samples
    trainer.summarize_training_progress()
    return trainer
Example #5
0
def train(streamf):
    global net
    minibatch_size = 512
    max_epochs = 2000
    epoch_size = 48985
    net = nn(input_s, input_h, input_l, input_v)
    loss = cntk.losses.cross_entropy_with_softmax(net, label_var)
    error = cntk.classification_error(net, label_var)
    lr_per_sample = [3e-4] * 4 + [1.5e-4]
    lr_per_minibatch = [lr * minibatch_size for lr in lr_per_sample]
    lr_schedule = cntk.learning_rate_schedule(lr_per_minibatch,
                                              cntk.UnitType.minibatch)
    momentum_as_time_constant = cntk.momentum_as_time_constant_schedule(700)
    learner = cntk.fsadagrad(net.parameters, lr_schedule,
                             momentum_as_time_constant)
    progres = cntk.logging.ProgressPrinter(0)
    trainer = cntk.Trainer(net, (loss, error), [learner],
                           progress_writers=progres)
    input_map = {
        input_s: streamf.streams.spread,
        input_h: streamf.streams.high,
        input_l: streamf.streams.low,
        input_v: streamf.streams.volume,
        label_var: streamf.streams.labels
    }
    t = 0
    for epoch in range(max_epochs):
        epoch_end = (epoch + 1) * epoch_size
        while t < epoch_end:
            dat1 = streamf.next_minibatch(minibatch_size, input_map=input_map)
            trainer.train_minibatch(dat1)
            t += dat1[label_var].num_samples
    trainer.summarize_training_progress()
    return trainer
Example #6
0
def train(streamf):
    input_var = cntk.input_variable(45,np.float32, name = 'features',dynamic_axes=cntk.axis.Axis.default_input_variable_dynamic_axes())
    label_var=cntk.input_variable(3,np.float32, name = 'labels')
    net=nn(input_var)
    loss = cntk.squared_error(net,label_var)
    error=cntk.squared_error(net,label_var)
    learning_rate=0.02
    lr_schedule=cntk.learning_rate_schedule(learning_rate,cntk.UnitType.minibatch)
    momentum_time_constant = cntk.momentum_as_time_constant_schedule(5000 / -np.math.log(0.9))
    learner=cntk.fsadagrad(net.parameters,lr=lr_schedule,momentum = momentum_time_constant,unit_gain = True)
    progres=cntk.logging.ProgressPrinter(0)
    trainer=cntk.Trainer(net,(loss,error),[learner],progress_writers=progres)
    input_map={
        input_var : streamf.streams.features,
        label_var : streamf.streams.labels
        
    }
    minibatch_size =  5000
    num_samples_per_sweep = 2000
    for i in range(0,num_samples_per_sweep):
        dat1=streamf.next_minibatch(minibatch_size,input_map = input_map)
        trainer.train_minibatch(dat1)
        training_loss = trainer.previous_minibatch_loss_average
        eval_error = trainer.previous_minibatch_evaluation_average
        if training_loss<0.002:
            break
    return trainer
def test_trainer(tmpdir, no_eval_function):
    in1 = C.input_variable(shape=(1,))
    labels = C.input_variable(shape=(1,))
    p = parameter(shape=(2,), init=10)
    z = plus(in1, reduce_sum(p), name='z')
    ce = cross_entropy_with_softmax(z, labels)
    if no_eval_function:
        errs = None
    else:
        errs = classification_error(z, labels)

    momentum_time_constant = C.momentum_as_time_constant_schedule(1100)
    lr_per_sample = C.learning_parameter_schedule(0.007, minibatch_size =1)
    trainer = C.Trainer(z, (ce, errs),
            [C.momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant, True)])
    in1_value = [[1],[2]]
    label_value = [[0], [1]]
    arguments = {in1: in1_value, labels: label_value}
    z_output = z.output
    updated, var_map = trainer.train_minibatch(arguments, outputs=[z_output])

    p = str(tmpdir / 'checkpoint.dat')
    external_state = {"additional external state":math.pi, "nested dict":{"a":"b"}, "list":[1,2,3]}
    trainer.save_checkpoint(p, external_state)
    restored_state = trainer.restore_from_checkpoint(p)

    assert external_state == restored_state

    assert trainer.model.name == 'z'

    # Ensure that Swig is not leaking raw types
    assert isinstance(trainer.model, Function)
    assert trainer.model.__doc__
    assert isinstance(trainer.parameter_learners[0], C.Learner)
Example #8
0
def create_trainer():
    loss, label_error = create_criterion_function_preferred(dec, y)

    schedule_step = print_freq
    lr_per_sample = [2e-3] * 2 * schedule_step + [1e-3] * 2 * schedule_step + [
        5e-4
    ]
    lr_per_minibatch = [lr * minibatch_size for lr in lr_per_sample]
    lr_schedule = C.learning_rate_schedule(lr_per_minibatch,
                                           C.UnitType.minibatch, epoch_size)

    momentum_as_time_constant = C.momentum_as_time_constant_schedule(1000)
    learner = C.adam(parameters=dec.parameters,
                     lr=lr_schedule,
                     momentum=momentum_as_time_constant,
                     gradient_clipping_threshold_per_sample=15,
                     gradient_clipping_with_truncation=True)

    progress_printer = C.logging.ProgressPrinter(tag='Training',
                                                 num_epochs=num_epochs)
    trainer = C.Trainer(dec, (loss, label_error), learner, progress_printer)
    if restore:
        trainer.restore_from_checkpoint("model-5.cntk")
    C.logging.log_number_of_parameters(dec)
    return trainer
Example #9
0
def train(streamf):
    global net
    net=nn(input_var)
    loss = cntk.losses.squared_error(net,label_var)
    error=cntk.squared_error(net,label_var)
    learning_rate=0.01
    lr_schedule=cntk.learning_rate_schedule(learning_rate,cntk.UnitType.minibatch)
    momentum_time_constant = cntk.momentum_as_time_constant_schedule(140 / -np.math.log(0.9))
    learner=cntk.fsadagrad(net.parameters,lr=lr_schedule,momentum = momentum_time_constant,unit_gain = True)
    progres=cntk.logging.ProgressPrinter(0)
    trainer=cntk.Trainer(net,(loss,error),[learner],progress_writers=progres)
    input_map={
        input_var : streamf.streams.features,
        label_var : streamf.streams.labels
        
    }
    minibatch_size =  1024
    max_epochs = 500
    epoch_size = 48985
    t = 0
    for epoch in range(max_epochs):
        epoch_end = (epoch+1) * epoch_size
        while t < epoch_end: 
            dat1=streamf.next_minibatch(minibatch_size,input_map = input_map)
            trainer.train_minibatch(dat1)
            t += dat1[label_var].num_samples
    trainer.summarize_training_progress()
    return trainer
Example #10
0
def test_momentum_schedule():
    m = 2500
    ms = C.momentum_as_time_constant_schedule([m])
    #all the timeconstant schedule is for per sample
    assert ms.minibatch_size == 1
    assert ms[0] == np.exp(-1.0 / np.asarray(m))

    ms = C.momentum_as_time_constant_schedule(m)
    assert ms.minibatch_size == 1
    assert ms[0] == np.exp(-1.0 / np.asarray(m))

    mlist = [980, 520]
    msl = C.momentum_as_time_constant_schedule(mlist)
    assert ms.minibatch_size == 1
    expected = np.exp(-1.0 / np.asarray(mlist))
    assert all(mi == ei for mi, ei in zip(msl, expected))
def train(reader, model_func, max_epochs=10):

    # Instantiate the model function; x is the input (feature) variable
    model = model_func(x)

    # Instantiate the loss and error function
    loss, label_error = create_criterion_function_preferred(model, y)

    # training config
    epoch_size = 18000  # 18000 samples is half the dataset size
    minibatch_size = 70

    # LR schedule over epochs
    # In CNTK, an epoch is how often we get out of the minibatch loop to
    # do other stuff (e.g. checkpointing, adjust learning rate, etc.)
    # (we don't run this many epochs, but if we did, these are good values)
    lr_per_sample = [0.003] * 4 + [0.0015] * 24 + [0.0003]
    lr_per_minibatch = [lr * minibatch_size for lr in lr_per_sample]
    lr_schedule = C.learning_rate_schedule(lr_per_minibatch,
                                           C.UnitType.minibatch, epoch_size)

    # Momentum schedule
    momentum_as_time_constant = C.momentum_as_time_constant_schedule(700)

    # We use a the Adam optimizer which is known to work well on this dataset
    # Feel free to try other optimizers from
    # https://www.cntk.ai/pythondocs/cntk.learner.html#module-cntk.learner
    learner = C.sgd(
        parameters=model.parameters,
        lr=lr_schedule,
        #momentum=momentum_as_time_constant,
        gradient_clipping_threshold_per_sample=15,
        gradient_clipping_with_truncation=True)

    # Setup the progress updater
    progress_printer = C.logging.ProgressPrinter(tag='Training',
                                                 num_epochs=max_epochs)

    # Uncomment below for more detailed logging
    #progress_printer = ProgressPrinter(freq=100, first=10, tag='Training', num_epochs=max_epochs)

    # Instantiate the trainer
    trainer = C.Trainer(model, (loss, label_error), learner, progress_printer)

    # process minibatches and perform model training
    C.logging.log_number_of_parameters(model)

    t = 0
    for epoch in range(max_epochs):  # loop over epochs
        epoch_end = (epoch + 1) * epoch_size
        while t < epoch_end:  # loop over minibatches on the epoch
            data = reader.next_minibatch(
                minibatch_size,
                input_map={  # fetch minibatch
                    x: reader.streams.query,
                    y: reader.streams.slot_labels
                })
            trainer.train_minibatch(data)  # update model with it
            t += data[y].num_samples  # samples so far
        trainer.summarize_training_progress()
Example #12
0
def test_momentum_schedule():
    m = 2500
    ms = C.momentum_as_time_constant_schedule([m])
    #all the timeconstant schedule is for per sample
    assert ms.minibatch_size == 1
    assert ms[0] ==  np.exp(-1.0 / np.asarray(m))

    ms = C.momentum_as_time_constant_schedule(m)
    assert ms.minibatch_size == 1
    assert ms[0] ==  np.exp(-1.0 / np.asarray(m))

    mlist = [980, 520]
    msl = C.momentum_as_time_constant_schedule(mlist)
    assert ms.minibatch_size == 1
    expected = np.exp(-1.0 / np.asarray(mlist))
    assert all(mi == ei for mi,ei in zip(msl,expected))
Example #13
0
def test_htk_deserializers():
    mbsize = 640
    epoch_size = 1000 * mbsize
    lr = [0.001]

    feature_dim = 33
    num_classes = 132
    context = 2

    os.chdir(data_path)

    features_file = "glob_0000.scp"
    labels_file = "glob_0000.mlf"
    label_mapping_file = "state.list"

    fd = HTKFeatureDeserializer(
        StreamDefs(amazing_features=StreamDef(
            shape=feature_dim, context=(context, context), scp=features_file)))

    ld = HTKMLFDeserializer(
        label_mapping_file,
        StreamDefs(
            awesome_labels=StreamDef(shape=num_classes, mlf=labels_file)))

    reader = MinibatchSource([fd, ld])

    features = C.input_variable(((2 * context + 1) * feature_dim))
    labels = C.input_variable((num_classes))

    model = Sequential(
        [For(range(3), lambda: Recurrence(LSTM(256))),
         Dense(num_classes)])
    z = model(features)
    ce = C.cross_entropy_with_softmax(z, labels)
    errs = C.classification_error(z, labels)

    learner = C.adam_sgd(z.parameters,
                         lr=C.learning_rate_schedule(lr, C.UnitType.sample,
                                                     epoch_size),
                         momentum=C.momentum_as_time_constant_schedule(1000),
                         low_memory=True,
                         gradient_clipping_threshold_per_sample=15,
                         gradient_clipping_with_truncation=True)
    trainer = C.Trainer(z, (ce, errs), learner)

    input_map = {
        features: reader.streams.amazing_features,
        labels: reader.streams.awesome_labels
    }

    pp = C.ProgressPrinter(freq=0)
    # just run and verify it doesn't crash
    for i in range(3):
        mb_data = reader.next_minibatch(mbsize, input_map=input_map)
        trainer.train_minibatch(mb_data)
        pp.update_with_trainer(trainer, with_metric=True)
    assert True
    os.chdir(abs_path)
def fineTuneModel(folder_with_data,path_to_label_csv="label.csv",
    original_model_path="../vgg13.model",max_epochs=10):

    trainingValues = getData(folder_with_data,path_to_label_csv)

    input_var =ct.input((1,height,width),np.float32)
    label_var = ct.input((num_classes), np.float32)
    print("cloning old model")
    z = clone_model(original_model_path,input_var)
    loss = ct.cross_entropy_with_softmax(z, label_var)
    metric = ct.classification_error(z, label_var) 

    minibatch_size = 32
    epoch_size = trainingValues.getLengthOfData()

    lr_per_minibatch = [learning_rate]*10+[learning_rate/2.0]
    mm_time_constant = -minibatch_size/np.log(0.9)
    lr_schedule = ct.learning_rate_schedule(lr_per_minibatch,
        unit=ct.UnitType.minibatch, epoch_size=epoch_size)
    mm_schedule = ct.momentum_as_time_constant_schedule(mm_time_constant)

    learner = ct.momentum_sgd(z.parameters, lr_schedule, mm_schedule)
    trainer = ct.Trainer(z, (loss, metric), learner)
    print("created trainer and learner")

    print("training started")
    while epoch < max_epochs :

        trainingValues.reset() 
        # Training 
        start_time = time.time()
        training_loss = 0
        training_accuracy = 0

        #mini-batch learning
        while trainingValues.hasMoreMinibatches():
            #while there is data for a mini batch:
            x,y,currBatchSize = trainingValues.getNextMinibatch(minibatch_size)
            # x - images y - labels/emotions
            trainer.train_minibatch({ input_var : x, label_var: y})

            #maintain stats:
            training_loss += trainer.previous_minibatch_loss_average *    currBatchSize
            training_accuracy += trainer.previous_minibatch_evaluation_average * currBatchSize
            
        training_accuracy /= trainingValues.getLengthOfData()
        training_accuracy = 1.0 - training_accuracy

        print("Epoch took:", time.time() - start_time, "seconds")
        print("training accuracy:\t\t{:.2f}%".format(training_accuracy*100))

        epoch +=1

    #SAVE MODEL
    z.save("../vgg13.model")
Example #15
0
def create_learner(model):
    '''Create the optimized method'''
    lr_per_sample = C.learning_rate_schedule(opt.lr, C.UnitType.minibatch)
    momentum_time_constant = C.momentum_as_time_constant_schedule(1100)
    if opt.optim == 'sgd':
        return C.sgd(model.parameters, lr=lr_per_sample)
    elif opt.optim == 'adam':
        return C.adam(model.parameters, lr=lr_per_sample, momentum=momentum_time_constant)
    elif opt.optim == 'adagrad':
        return C.adagrad(model.parameters, lr=lr_per_sample)
    else:
        raise RuntimeError("Invalid optim method: " + opt.optim)
Example #16
0
def create_learner(model):
    '''Create the optimized method'''
    lr_per_minibatch = C.learning_rate_schedule(opt.lr, C.UnitType.minibatch)
    momentum_time_constant = C.momentum_as_time_constant_schedule(1100)
    if opt.optim == 'sgd':
        return C.sgd(model.parameters, lr=lr_per_minibatch)
    elif opt.optim == 'adam':
        return C.adam(model.parameters, lr=lr_per_minibatch, momentum=momentum_time_constant)
    elif opt.optim == 'adagrad':
        return C.adagrad(model.parameters, lr=lr_per_minibatch)
    else:
        raise RuntimeError("Invalid optim method: " + opt.optim)
 def _create_learner(self):
     lr_per_sample = [3e-5] * 10 + [1.5e-5] * 20 + [1e-5]
     lr_per_minibatch = [lr * self.minibatch_size for lr in lr_per_sample]
     lr_schedule = C.learning_rate_schedule(lr_per_minibatch,
                                            C.UnitType.minibatch,
                                            self.epoch_size)
     momentum_as_time_constant = C.momentum_as_time_constant_schedule(20)
     learner = C.adam(parameters=self.model.parameters,
                      lr=3e-4,
                      momentum=momentum_as_time_constant,
                      gradient_clipping_threshold_per_sample=0.21,
                      gradient_clipping_with_truncation=True)
     return learner
Example #18
0
    def modelInit(self):

        #create output model folder:
        self.output_model_folder = os.path.join(self.base_folder, R'models')
        if not os.path.exists(self.output_model_folder):
            os.makedirs(self.output_model_folder)

        self.model = VGG13(self.num_classes)
        self.input_var = ct.input(
            (1, self.model.input_height, self.model.input_width), np.float32)
        self.label_var = ct.input((self.num_classes), np.float32)

        print("initialized model")

        self.genData()
        #ct.input_variables takes the no. of dimensions. and automatically creates
        #1-hot encoded. ct.input doesn't.

        #criterian of model: loss, metric:
        #loss = cross_entropy_with_softmax
        #metric = classification error
        self.z = self.model.model(self.input_var)
        loss = ct.cross_entropy_with_softmax(self.z, self.label_var)
        metric = ct.classification_error(self.z, self.label_var)
        """
        pred = ct.softmax(z)
        loss = ct.negate(ct.reduce_sum(ct.element_times(label_var, ct.log(pred)), axis=-1)) 
        """
        minibatch_size = 32
        epoch_size = self.trainingValues.getLengthOfData()

        #THROW MOMENTUM:
        lr_per_minibatch = [self.model.learning_rate
                            ] * 20 + [self.model.learning_rate / 2.0] * 20 + [
                                self.model.learning_rate / 10.0
                            ]
        #use eta for 20 minibatches, then half of eta for other 20 batches then eta/10 for remaining minimaches
        mm_time_constant = -minibatch_size / np.log(0.9)
        lr_schedule = ct.learning_rate_schedule(lr_per_minibatch,
                                                unit=ct.UnitType.minibatch,
                                                epoch_size=epoch_size)
        mm_schedule = ct.momentum_as_time_constant_schedule(mm_time_constant)

        # construct the trainer
        #learner performs model updates. can be adam() or sgd()
        learner = ct.momentum_sgd(self.z.parameters, lr_schedule, mm_schedule)
        # The Trainer optimizes the loss by SGD, and logs the metric
        self.trainer = ct.Trainer(self.z, (loss, metric), learner)

        print("created trainer and learner")
Example #19
0
def test_htk_deserializers():
    mbsize = 640
    epoch_size = 1000 * mbsize
    lr = [0.001]

    feature_dim = 33
    num_classes = 132
    context = 2

    os.chdir(data_path)

    features_file = "glob_0000.scp"
    labels_file = "glob_0000.mlf"
    label_mapping_file = "state.list"

    fd = HTKFeatureDeserializer(StreamDefs(
        amazing_features = StreamDef(shape=feature_dim, context=(context,context), scp=features_file)))

    ld = HTKMLFDeserializer(label_mapping_file, StreamDefs(
        awesome_labels = StreamDef(shape=num_classes, mlf=labels_file)))

    reader = MinibatchSource([fd,ld])

    features = C.input_variable(((2*context+1)*feature_dim))
    labels = C.input_variable((num_classes))

    model = Sequential([For(range(3), lambda : Recurrence(LSTM(256))),
                        Dense(num_classes)])
    z = model(features)
    ce = C.cross_entropy_with_softmax(z, labels)
    errs = C.classification_error    (z, labels)

    learner = C.adam_sgd(z.parameters,
                    lr=C.learning_rate_schedule(lr, C.UnitType.sample, epoch_size),
                    momentum=C.momentum_as_time_constant_schedule(1000),
                    low_memory=True,
                    gradient_clipping_threshold_per_sample=15, gradient_clipping_with_truncation=True)
    trainer = C.Trainer(z, (ce, errs), learner)

    input_map={ features: reader.streams.amazing_features, labels: reader.streams.awesome_labels }

    pp = C.ProgressPrinter(freq=0)
    # just run and verify it doesn't crash
    for i in range(3):
        mb_data = reader.next_minibatch(mbsize, input_map=input_map)
        trainer.train_minibatch(mb_data)
        pp.update_with_trainer(trainer, with_metric=True)
    assert True
    os.chdir(abs_path)
Example #20
0
    def create_trainer(self):
        learner = cntk.block_momentum_distributed_learner(
            cntk.momentum_sgd(self.output.parameters,
                              cntk.learning_parameter_schedule(0.0001),
                              cntk.momentum_as_time_constant_schedule(1000)),
            block_size=1000,
            block_learning_rate=0.01,
            block_momentum_as_time_constant=1000)

        comm_rank = cntk.distributed.Communicator.rank()
        self.trainer = cntk.Trainer(
            self.output, (self.ce, self.err), [learner], [
                cntk.logging.ProgressPrinter(
                    freq=progress_freq, tag="Training", rank=comm_rank)
            ])
def test_output_to_retain():
    in1 = C.input_variable(shape=(1,))
    labels = C.input_variable(shape=(1,))
    p = parameter(shape=(2,), init=10)
    z = plus(in1, reduce_sum(p), name='z')
    ce = cross_entropy_with_softmax(z, labels)
    errs = classification_error(z, labels)
    momentum_time_constant = C.momentum_as_time_constant_schedule(1100)
    lr_per_sample = C.learning_parameter_schedule(0.007, minibatch_size =1)
    trainer = C.Trainer(z, (ce, errs),
            [C.momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant, True)])
    in1_value = [[1], [2]]
    label_value = [[0], [1]]
    arguments = {in1: in1_value, labels: label_value}
    z_output = z.output
    updated, var_map = trainer.train_minibatch(arguments, outputs=[z_output])
    assert np.allclose(var_map[z_output], np.asarray(in1_value)+20)
Example #22
0
def test_ext_train(tmpdir):
    dim = 4

    p = C.parameter(shape=(dim, ), init=10)
    i = C.sequence.input_variable(dim, needs_gradient=True, name='i_var')
    m = MyPlus(i, C.constant(3), 'my_plus')
    # keeping m unwrapped since we need to access its member variables
    z = C.user_function(m) + p

    momentum_time_constant = C.momentum_as_time_constant_schedule(1100)
    lr_per_sample = C.learning_parameter_schedule(0.007, minibatch_size=1)
    trainer = C.Trainer(z, (z + 0, z + 0), [
        C.momentum_sgd(z.parameters,
                       lr_per_sample,
                       momentum_time_constant,
                       True,
                       minibatch_size=0)
    ])

    i = 0
    while i < 100:
        i += 1
        input_data = np.random.rand(dim)
        trainer.train_minibatch([input_data])

    assert m.forward_calls == m.backward_calls == 100

    filepath = str(tmpdir / 'test_ext_train.dat')

    z.save(filepath)

    buf = open(filepath, 'rb').read()

    # this is only need for Python 2.7
    # (which does not distinguish between bytes and strings)
    if isinstance(buf, str):
        buf = bytearray(buf)

    z1 = Function.load(buf)

    m1 = z1.find_by_name('my_plus')
    # m1 is an instance of UserFunction, cannot directly downcast it to MyPlus,
    # using serialize as workaround:
    state = m1.serialize()['state']

    assert state['forward_calls'] == state['backward_calls'] == 100
Example #23
0
def create_trainer():
    masked_dec = dec * C.ops.clip(C.ops.argmax(y), 0, 1)
    loss, label_error = criterion(masked_dec, y)
    loss *= C.ops.clip(C.ops.argmax(y), 0, 1)

    lr_schedule = C.learning_parameter_schedule_per_sample([1e-3] * 2 + [5e-4] * 2 + [1e-4], epoch_size=int(epoch_size))
    momentum_as_time_constant = C.momentum_as_time_constant_schedule(1000)
    learner = C.adam(parameters=dec.parameters,
                     lr=lr_schedule,
                     momentum=momentum_as_time_constant,
                     gradient_clipping_threshold_per_sample=15,
                     gradient_clipping_with_truncation=True)

    progress_printer = C.logging.ProgressPrinter(tag='Training', num_epochs=num_epochs)
    trainer = C.Trainer(dec, (loss, label_error), learner, progress_printer)
    C.logging.log_number_of_parameters(dec)
    return trainer
def test_ext_lambdafunc(tmpdir):
    dim = 4

    class CallbackCounter(object):
        def __init__(self):
            self.count = 0

        def inc(self, arg):
            self.count += 1

    cb = CallbackCounter()

    p = C.parameter(shape=(dim,), init=1)
    i = C.input_variable(dim, needs_gradient=True, name='i_var')
    k = i * p
    m = LambdaFunc(k,
                   when=lambda arg: np.sum(arg) > 1,
                   execute=cb.inc)
    m = C.user_function(m)
    z0 = m + 0

    filepath = str(tmpdir / 'test_ext_lambdafunc.dat')
    z0.save(filepath)

    Function.register_udf_deserialize_callback('conditional_exec_lambda',
                                               lambda x, *unused: LambdaFunc(x, when=lambda arg: np.sum(arg) > 1, execute=cb.inc))

    z = Function.load(filepath)

    momentum_time_constant = C.momentum_as_time_constant_schedule(1100)
    lr_per_sample = C.learning_parameter_schedule(0.007, minibatch_size = 1)
    trainer = C.Trainer(z, (z + 0, z + 0), [C.momentum_sgd(z.parameters,
                                                           lr_per_sample,
                                                           momentum_time_constant,
                                                           True)])

    i = 0
    input_data = 0.1 * np.ones(dim)
    trainer.train_minibatch([input_data])
    assert cb.count == 0

    input_data = 0.3 * np.ones(dim)
    trainer.train_minibatch([input_data])
    assert cb.count == 1
Example #25
0
def test_ext_lambdafunc(tmpdir):
    dim = 4

    class CallbackCounter(object):
        def __init__(self):
            self.count = 0

        def inc(self, arg):
            self.count += 1

    cb = CallbackCounter()

    p = C.parameter(shape=(dim,), init=1)
    i = C.input_variable(dim, needs_gradient=True, name='i_var')
    k = i * p
    m = LambdaFunc(k,
                   when=lambda arg: np.sum(arg) > 1,
                   execute=cb.inc)
    m = C.user_function(m)
    z0 = m + 0

    filepath = str(tmpdir / 'test_ext_lambdafunc.dat')
    z0.save(filepath)

    Function.register_udf_deserialize_callback('conditional_exec_lambda',
                                               lambda x, *unused: LambdaFunc(x, when=lambda arg: np.sum(arg) > 1, execute=cb.inc))

    z = Function.load(filepath)

    momentum_time_constant = C.momentum_as_time_constant_schedule(1100)
    lr_per_sample = C.learning_rate_schedule(0.007, C.UnitType.sample)
    trainer = C.Trainer(z, (z + 0, z + 0), [C.momentum_sgd(z.parameters,
                                                           lr_per_sample,
                                                           momentum_time_constant,
                                                           True)])

    i = 0
    input_data = 0.1 * np.ones(dim)
    trainer.train_minibatch([input_data])
    assert cb.count == 0

    input_data = 0.3 * np.ones(dim)
    trainer.train_minibatch([input_data])
    assert cb.count == 1
Example #26
0
def run_distributed_training(tmpdir, create_func):

    in1 = sequence.input_variable(shape=1)
    labels = sequence.input_variable(shape=1)
    p = parameter(shape=2, init=10)
    z = plus(in1, reduce_sum(p), name='z')
    ce = cross_entropy_with_softmax(z, labels)
    errs = classification_error(z, labels)

    momentum_time_constant = C.momentum_as_time_constant_schedule(1100)
    lr_per_sample = C.learning_rate_schedule(0.007, C.UnitType.sample)
    dist_learner = create_func(
        C.momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant,
                       True))

    communicator = dist_learner.communicator()
    workers = communicator.workers()
    current_worker = communicator.current_worker()
    found_rank = False
    for wk in workers:
        if current_worker.global_rank == wk.global_rank:
            found_rank = True

    assert found_rank

    trainer = C.Trainer(z, (ce, errs), [dist_learner])
    in1_value = [[1], [2]]
    label_value = [[0], [1]]
    arguments = {in1: in1_value, labels: label_value}
    z_output = z.output
    updated, var_map = trainer.train_minibatch(arguments, outputs=[z_output])

    p = str(tmpdir / 'checkpoint.dat')
    trainer.save_checkpoint(p)
    trainer.restore_from_checkpoint(p)

    communicator.barrier()

    assert trainer.model.name == 'z'

    # Ensure that Swig is not leaking raw types
    assert isinstance(trainer.model, Function)
    assert trainer.model.__doc__
Example #27
0
def create_trainer():
    loss, label_error = create_criterion_function_preferred(dec, y)

    schedule_step = 1 * print_freq
    lr_per_sample = [1e-3]
    lr_per_minibatch = [lr * minibatch_size for lr in lr_per_sample]
    lr_schedule = C.learning_rate_schedule(lr_per_minibatch,
                                           C.UnitType.minibatch, epoch_size)

    momentum_as_time_constant = C.momentum_as_time_constant_schedule(0)
    learner = C.adam(parameters=dec.parameters,
                     lr=lr_schedule,
                     momentum=momentum_as_time_constant,
                     gradient_clipping_threshold_per_sample=15,
                     gradient_clipping_with_truncation=True)

    trainer = C.Trainer(dec, (loss, label_error), learner)
    trainer.restore_from_checkpoint(model_file)
    return trainer
Example #28
0
def test_ext_train(tmpdir):
    dim = 4

    p = C.parameter(shape=(dim,), init=10)
    i = C.sequence.input_variable(dim, needs_gradient=True, name='i_var')
    m = MyPlus(i, C.constant(3), 'my_plus')
    # keeping m unwrapped since we need to access its member variables
    z = C.user_function(m) + p

    momentum_time_constant = C.momentum_as_time_constant_schedule(1100)
    lr_per_sample = C.learning_rate_schedule(0.007, C.UnitType.sample)
    trainer = C.Trainer(z, (z + 0, z + 0),
                        [C.momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant,
                                        True)])

    i = 0
    while i < 100:
        i += 1
        input_data = np.random.rand(dim)
        trainer.train_minibatch([input_data])

    assert m.forward_calls == m.backward_calls == 100

    filepath = str(tmpdir / 'test_ext_train.dat')

    z.save(filepath)

    buf = open(filepath, 'rb').read()

    # this is only need for Python 2.7
    # (which does not distinguish between bytes and strings)
    if isinstance(buf, str):
        buf = bytearray(buf)

    z1 = Function.load(buf)

    m1 = z1.find_by_name('my_plus')
    # m1 is an instance of UserFunction, cannot directly downcast it to MyPlus,
    # using serialize as workaround:
    state = m1.serialize()['state']

    assert state['forward_calls'] == state['backward_calls'] == 100
Example #29
0
def run_distributed_training(tmpdir, create_func):

    in1 = sequence.input_variable(shape=1)
    labels = sequence.input_variable(shape=1)
    p = parameter(shape=2, init=10)
    z = plus(in1, reduce_sum(p), name='z')
    ce = cross_entropy_with_softmax(z, labels)
    errs = classification_error(z, labels)

    momentum_time_constant = C.momentum_as_time_constant_schedule(1100)
    lr_per_sample = C.learning_rate_schedule(0.007, C.UnitType.sample)
    dist_learner = create_func(C.momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant, True))

    communicator = dist_learner.communicator()
    workers = communicator.workers()
    current_worker = communicator.current_worker()
    found_rank = False
    for wk in workers:
        if current_worker.global_rank == wk.global_rank:
            found_rank = True

    assert found_rank

    trainer = C.Trainer(z, (ce, errs), [ dist_learner ])
    in1_value = [[1],[2]]
    label_value = [[0], [1]]
    arguments = {in1: in1_value, labels: label_value}
    z_output = z.output
    updated, var_map = trainer.train_minibatch(arguments, outputs=[z_output])
    
    p = str(tmpdir / 'checkpoint.dat')
    trainer.save_checkpoint(p)
    trainer.restore_from_checkpoint(p)

    communicator.barrier()

    assert trainer.model.name == 'z'

    # Ensure that Swig is not leaking raw types
    assert isinstance(trainer.model, Function)
    assert trainer.model.__doc__
def create_trainer(network, minibatch_size, epoch_size, progress_printer):
    """ Create trainer 
    """

    # Set learning parameters
    lr_per_sample = [0.0015625] * 10 + [0.00046875] * 10 + [0.00015625]
    momentum_time_constant = [0] * 20 + [-minibatch_size / np.log(0.9)]
    l2_reg_weight = 0.002

    lr_schedule = learning_rate_schedule(lr_per_sample,
                                         epoch_size=epoch_size,
                                         unit=UnitType.sample)
    mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant)

    learner = momentum_sgd(network['output'].parameters,
                           lr_schedule,
                           mm_schedule,
                           l2_regularization_weight=l2_reg_weight)

    return Trainer(network['output'], (network['ce'], network['pe']), learner,
                   progress_printer)
Example #31
0
def test_learner_init_legacy():
    i = C.input_variable(shape=(1,), needs_gradient=True, name='a')
    w = parameter(shape=(1,))

    res = i * w

    # for backcompatibility test
    # this will be deprecated in future version
    learner = sgd(res.parameters, lr=learning_rate_schedule(0.1, UnitType.sample))
    assert learner._learning_rate_schedule.minibatch_size == 1  # the deprecated per sample schedule should not use compatible mode
    assert learner.learning_rate() == 0.1

    # for backcompatibility test
    # this will be deprecated in future version
    # The UnitType will provide per minibatch instruction for the learner
    # this will be deprecated in future version
    learner = sgd(res.parameters, lr=learning_rate_schedule(0.1, UnitType.minibatch))
    assert learner.is_compatible_mode() == False
    assert learner.learning_rate() == 0.1
    assert learner.minibatch_size == C.learners.IGNORE
    assert learner._learning_rate_schedule.minibatch_size == 0

    # for backcompatibility test, in reset learning rate, the learner won't receive the reference minibatch size from the schedule
    # user will need to specify the reference minibatch size explicitly
    # this will be deprecated in future version
    learner = sgd(res.parameters, lr=0.1)
    learner.reset_learning_rate(learning_rate_schedule([1, 2, 3], UnitType.minibatch))
    assert learner.learning_rate() == 1.0
    learner.minibatch_size = C.learners.IGNORE  # reset to be per minibatch
    assert learner.minibatch_size == C.learners.IGNORE
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE
    assert learner.is_compatible_mode() == True

    # for backcompatibility test
    # this will be deprecated in future version
    learner = sgd(res.parameters, lr=learning_rate_schedule(0.1, UnitType.sample), minibatch_size=C.learners.IGNORE)
    assert learner.is_compatible_mode() == True
    assert learner.learning_rate() == 0.1
    assert learner.minibatch_size == C.learners.IGNORE  # the learner's reference minibatch size is still 0

    # this will be deprecated in future version: This is logical invalid combination but it was the only way to use mean gradient and set learning rate in the past.
    learner = sgd(res.parameters, lr=learning_rate_schedule(0.1, UnitType.sample), use_mean_gradient=True)
    assert learner.is_compatible_mode() == True
    assert learner.learning_rate() == 0.1
    #test the override in the new version
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE
    assert learner.minibatch_size == C.learners.IGNORE  # the learner's reference minibatch size is still 0


    # for backcompatibility test
    # this will be deprecated in future version
    # The UnitType will provide per minibatch instruction for the learner
    # this will be deprecated in future version
    learner = sgd(res.parameters, lr=learning_rate_schedule(0.1, UnitType.minibatch), minibatch_size=C.learners.IGNORE)
    assert learner.is_compatible_mode() == True
    assert learner.learning_rate() == 0.1
    assert learner.minibatch_size == C.learners.IGNORE
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE

    # for backcompatibility test, in reset learning rate, the learner won't receive the reference minibatch size from the schedule
    # user will need to specify the reference minibatch size explicitly
    # this will be deprecated in future version
    learner = sgd(res.parameters, lr=0.1)
    learner.reset_learning_rate(learning_rate_schedule([1, 2, 3], UnitType.minibatch))
    assert learner.learning_rate() == 1.0
    learner.minibatch_size = C.learners.IGNORE  # reset to be per minibatch
    assert learner.minibatch_size == C.learners.IGNORE
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE
    assert learner.is_compatible_mode() == True

    learner_parameter = learner.parameters
    from cntk.variables import Parameter
    param = learner_parameter[0]
    assert isinstance(param, Parameter)

    unit_gain_value = C.default_unit_gain_value()
    assert unit_gain_value

    # back compatible API test
    momentum_time_constant = C.momentum_as_time_constant_schedule(1100)
    lr_per_sample = learning_parameter_schedule(0.1, minibatch_size=1)
    C.momentum_sgd(res.parameters, lr_per_sample, momentum_time_constant)
    C.momentum_sgd(res.parameters, lr_per_sample, momentum_time_constant, unit_gain_value)
    C.momentum_sgd(res.parameters, lr_per_sample, momentum_time_constant, unit_gain=unit_gain_value)

    C.set_default_unit_gain_value(False)
    unit_gain_value = C.default_unit_gain_value()
    assert not unit_gain_value

    C.set_default_unit_gain_value(True)
    unit_gain_value = C.default_unit_gain_value()
    assert unit_gain_value

    lr_per_sample = learning_rate_schedule([(3, 0.1), (2, 0.2), (1, 0.3)], unit=UnitType.sample)
    C.fsadagrad(res.parameters, lr=lr_per_sample, momentum=momentum_time_constant)
    C.fsadagrad(res.parameters, lr_per_sample, momentum_time_constant, unit_gain_value)
    C.fsadagrad(res.parameters, lr=lr_per_sample, momentum=momentum_time_constant, unit_gain=unit_gain_value)

    gamma, inc, dec, max, min = [0.5, 1.2, 0.7, 10, 1e-8]
    lr_per_sample = learning_rate_schedule([0.1, 0.2], unit=UnitType.sample, epoch_size=100)
    C.rmsprop(res.parameters, lr_per_sample, gamma, inc, dec, max, min, True)

    C.adadelta(res.parameters, lr_per_sample, use_mean_gradient=True)
 def create_trainer(self):
     try:
         learner = cntk.block_momentum_distributed_learner(cntk.momentum_sgd(self.output.parameters, cntk.learning_parameter_schedule(0.0001), cntk.momentum_as_time_constant_schedule(1000)), 
                                                           block_size=1000, block_learning_rate=0.01, block_momentum_as_time_constant=1000)
         
         comm_rank = cntk.distributed.Communicator.rank()
         self.trainer = cntk.Trainer(self.output, (self.ce, self.err), [learner], [cntk.logging.ProgressPrinter(freq=progress_freq, tag="Training", rank=comm_rank)])
     except RuntimeError:
         self.trainer = None
     return
    results = re.findall("Completed successfully.", str_out)
    if len(results) != 2:
        print(str_out)
        assert False

if __name__=='__main__':
    in1 = C.input_variable(shape=1)
    labels = C.input_variable(shape=1)
    p1 = parameter(shape=1)
    p2 = parameter(shape=1)
    n = plus(in1, p1, name='n')
    z = plus(n, p2, name='z')
    ce = squared_error(z, labels)

    momentum_time_constant = C.momentum_as_time_constant_schedule(1100)
    lr_per_sample = C.learning_rate_schedule(0.007, C.UnitType.sample)
    dist_learners = [
        C.distributed.data_parallel_distributed_learner(C.momentum_sgd([p1], lr_per_sample, momentum_time_constant, True)),
        C.distributed.data_parallel_distributed_learner(C.momentum_sgd([p2], lr_per_sample, momentum_time_constant, True))
    ]

    trainer = C.Trainer(z, ce, dist_learners)
    in1_value = [[1]]
    label_value = [[0]]
    arguments = {in1: in1_value, labels: label_value}
    z_output = z.output

    def check_samples(learners, expected_number_of_samples):
        for learner in learners:
            if learner.total_number_of_samples_seen != expected_number_of_samples:
Example #34
0
def test_learner_init():
    i = C.input_variable(shape=(1,), needs_gradient=True, name='a')
    w = parameter(shape=(1,))

    res = i * w

    learner = sgd(res.parameters, lr=learning_rate_schedule(0.1, UnitType.sample))
    assert learner.learning_rate() == 0.1
    
    learner.reset_learning_rate(learning_rate_schedule([1,2,3], UnitType.minibatch));
    assert learner.learning_rate() == 1.0

    learner_parameter = learner.parameters
    from cntk.variables import Parameter
    param = learner_parameter[0]
    assert isinstance(param, Parameter)

    unit_gain_value = C.default_unit_gain_value()
    assert unit_gain_value

    momentum_time_constant = C.momentum_as_time_constant_schedule(1100)
    lr_per_sample = learning_rate_schedule(0.1, UnitType.sample)
    C.momentum_sgd(res.parameters, lr_per_sample, momentum_time_constant)
    C.momentum_sgd(res.parameters, lr_per_sample, momentum_time_constant, unit_gain_value)
    C.momentum_sgd(res.parameters, lr_per_sample, momentum_time_constant, unit_gain=unit_gain_value)

    C.set_default_unit_gain_value(False)
    unit_gain_value = C.default_unit_gain_value()
    assert not unit_gain_value

    lr_per_sample = learning_rate_schedule([0.1, 0.2], UnitType.sample)
    C.nesterov(res.parameters, lr=lr_per_sample, momentum=momentum_time_constant)
    C.nesterov(res.parameters, lr_per_sample, momentum_time_constant, unit_gain_value)
    C.nesterov(res.parameters, lr=lr_per_sample, momentum=momentum_time_constant, unit_gain=unit_gain_value)

    lr_per_sample = learning_rate_schedule([0.1]*3 +[0.2]*2 +[0.3], UnitType.sample)
    C.adagrad(res.parameters, lr=lr_per_sample, need_ave_multiplier=True)

    C.set_default_unit_gain_value(True)
    unit_gain_value = C.default_unit_gain_value()
    assert unit_gain_value

    lr_per_sample = learning_rate_schedule([(3,0.1), (2, 0.2), (1, 0.3)], UnitType.sample)
    C.fsadagrad(res.parameters, lr=lr_per_sample, momentum=momentum_time_constant)
    C.fsadagrad(res.parameters, lr_per_sample, momentum_time_constant, unit_gain_value)
    C.fsadagrad(res.parameters, lr=lr_per_sample, momentum=momentum_time_constant, unit_gain=unit_gain_value)

    gamma, inc, dec, max, min = [0.1]*5
    lr_per_sample = learning_rate_schedule([0.1, 0.2], UnitType.sample, 100)
    C.rmsprop(res.parameters, lr_per_sample, gamma, inc, dec, max, min, True)

    C.set_default_use_mean_gradient_value(False)
    use_mean_gradient_value = C.default_use_mean_gradient_value()
    assert not use_mean_gradient_value

    C.adadelta(res.parameters, lr_per_sample)
    
    C.set_default_use_mean_gradient_value(True)
    use_mean_gradient_value = C.default_use_mean_gradient_value()
    assert use_mean_gradient_value

    C.adadelta(res.parameters, lr_per_sample)
Example #35
0
#
# Training action
#

# loss and metric
ce = cross_entropy_with_softmax(z, label_var)
pe = classification_error(z, label_var)

# training config
epoch_size = 6600  #12000 #15000
minibatch_size = 64

# Set training parameters
lr_per_minibatch = learning_rate_schedule([0.01] * 10 + [0.003] * 10 + [0.001],
                                          UnitType.minibatch, epoch_size)
momentum_time_constant = momentum_as_time_constant_schedule(-minibatch_size /
                                                            np.log(0.9))
l2_reg_weight = 0.001

# trainer objectS
progress_printer = ProgressPrinter(0)

learner = momentum_sgd(z.parameters,
                       lr=lr_per_minibatch,
                       momentum=momentum_time_constant,
                       l2_regularization_weight=l2_reg_weight)

# =============================================================================
# Create or RESTORE trainer
# =============================================================================
trainer = Trainer(z, (ce, pe), [learner], [progress_printer])
# trainer.restore_from_checkpoint(model_temp_file)
# expected output (label), also the dynamic axes of the model output
# is specified as the model of the label input
l = C.input_variable(1, dynamic_axes=z.dynamic_axes, name="y")
print l
# the learning rate
learning_rate = 0.02
lr_schedule = C.learning_rate_schedule(learning_rate, C.UnitType.minibatch)

# loss function
loss = C.squared_error(z, l)

# use squared error to determine error for now
error = C.squared_error(z, l)

# use fsadagrad optimizer
momentum_time_constant = C.momentum_as_time_constant_schedule(BATCH_SIZE /
                                                              -math.log(0.9))
learner = C.fsadagrad(z.parameters,
                      lr=lr_schedule,
                      momentum=momentum_time_constant,
                      unit_gain=True)

trainer = C.Trainer(z, (loss, error), [learner])

# train
loss_summary = []
start = time.time()
for epoch in range(0, EPOCHS):
    for x1, y1 in next_batch(X, Y, "train"):
        trainer.train_minibatch({x: x1, l: y1})
    if epoch % (EPOCHS / 10) == 0:
        training_loss = trainer.previous_minibatch_loss_average
Example #37
0
def test_learner_init_legacy():
    i = C.input_variable(shape=(1, ), needs_gradient=True, name='a')
    w = parameter(shape=(1, ))

    res = i * w

    # for backcompatibility test
    # this will be deprecated in future version
    learner = sgd(res.parameters,
                  lr=learning_rate_schedule(0.1, UnitType.sample))
    assert learner._learning_rate_schedule.minibatch_size == 1  # the deprecated per sample schedule should not use compatible mode
    assert learner.learning_rate() == 0.1

    # for backcompatibility test
    # this will be deprecated in future version
    # The UnitType will provide per minibatch instruction for the learner
    # this will be deprecated in future version
    learner = sgd(res.parameters,
                  lr=learning_rate_schedule(0.1, UnitType.minibatch))
    assert learner.is_compatible_mode() == False
    assert learner.learning_rate() == 0.1
    assert learner.minibatch_size == C.learners.IGNORE
    assert learner._learning_rate_schedule.minibatch_size == 0

    # for backcompatibility test, in reset learning rate, the learner won't receive the reference minibatch size from the schedule
    # user will need to specify the reference minibatch size explicitly
    # this will be deprecated in future version
    learner = sgd(res.parameters, lr=0.1)
    learner.reset_learning_rate(
        learning_rate_schedule([1, 2, 3], UnitType.minibatch))
    assert learner.learning_rate() == 1.0
    learner.minibatch_size = C.learners.IGNORE  # reset to be per minibatch
    assert learner.minibatch_size == C.learners.IGNORE
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE
    assert learner.is_compatible_mode() == True

    # for backcompatibility test
    # this will be deprecated in future version
    learner = sgd(res.parameters,
                  lr=learning_rate_schedule(0.1, UnitType.sample),
                  minibatch_size=C.learners.IGNORE)
    assert learner.is_compatible_mode() == True
    assert learner.learning_rate() == 0.1
    assert learner.minibatch_size == C.learners.IGNORE  # the learner's reference minibatch size is still 0

    # this will be deprecated in future version: This is logical invalid combination but it was the only way to use mean gradient and set learning rate in the past.
    learner = sgd(res.parameters,
                  lr=learning_rate_schedule(0.1, UnitType.sample),
                  use_mean_gradient=True)
    assert learner.is_compatible_mode() == True
    assert learner.learning_rate() == 0.1
    #test the override in the new version
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE
    assert learner.minibatch_size == C.learners.IGNORE  # the learner's reference minibatch size is still 0

    # for backcompatibility test
    # this will be deprecated in future version
    # The UnitType will provide per minibatch instruction for the learner
    # this will be deprecated in future version
    learner = sgd(res.parameters,
                  lr=learning_rate_schedule(0.1, UnitType.minibatch),
                  minibatch_size=C.learners.IGNORE)
    assert learner.is_compatible_mode() == True
    assert learner.learning_rate() == 0.1
    assert learner.minibatch_size == C.learners.IGNORE
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE

    # for backcompatibility test, in reset learning rate, the learner won't receive the reference minibatch size from the schedule
    # user will need to specify the reference minibatch size explicitly
    # this will be deprecated in future version
    learner = sgd(res.parameters, lr=0.1)
    learner.reset_learning_rate(
        learning_rate_schedule([1, 2, 3], UnitType.minibatch))
    assert learner.learning_rate() == 1.0
    learner.minibatch_size = C.learners.IGNORE  # reset to be per minibatch
    assert learner.minibatch_size == C.learners.IGNORE
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE
    assert learner.is_compatible_mode() == True

    learner_parameter = learner.parameters
    from cntk.variables import Parameter
    param = learner_parameter[0]
    assert isinstance(param, Parameter)

    unit_gain_value = C.default_unit_gain_value()
    assert unit_gain_value

    # back compatible API test
    momentum_time_constant = C.momentum_as_time_constant_schedule(1100)
    lr_per_sample = learning_parameter_schedule(0.1, minibatch_size=1)
    C.momentum_sgd(res.parameters, lr_per_sample, momentum_time_constant)
    C.momentum_sgd(res.parameters, lr_per_sample, momentum_time_constant,
                   unit_gain_value)
    C.momentum_sgd(res.parameters,
                   lr_per_sample,
                   momentum_time_constant,
                   unit_gain=unit_gain_value)

    C.set_default_unit_gain_value(False)
    unit_gain_value = C.default_unit_gain_value()
    assert not unit_gain_value

    C.set_default_unit_gain_value(True)
    unit_gain_value = C.default_unit_gain_value()
    assert unit_gain_value

    lr_per_sample = learning_rate_schedule([(3, 0.1), (2, 0.2), (1, 0.3)],
                                           unit=UnitType.sample)
    C.fsadagrad(res.parameters,
                lr=lr_per_sample,
                momentum=momentum_time_constant)
    C.fsadagrad(res.parameters, lr_per_sample, momentum_time_constant,
                unit_gain_value)
    C.fsadagrad(res.parameters,
                lr=lr_per_sample,
                momentum=momentum_time_constant,
                unit_gain=unit_gain_value)

    gamma, inc, dec, max, min = [0.5, 1.2, 0.7, 10, 1e-8]
    lr_per_sample = learning_rate_schedule([0.1, 0.2],
                                           unit=UnitType.sample,
                                           epoch_size=100)
    C.rmsprop(res.parameters, lr_per_sample, gamma, inc, dec, max, min, True)

    C.adadelta(res.parameters, lr_per_sample, use_mean_gradient=True)
Example #38
0
def conv3d_ucf11(train_reader, test_reader, max_epochs=30):
    # Replace 0 with 1 to get detailed log.
    set_computation_network_trace_level(0)

    # These values must match for both train and test reader.
    image_height       = train_reader.height
    image_width        = train_reader.width
    num_channels       = train_reader.channel_count
    sequence_length    = train_reader.sequence_length
    num_output_classes = train_reader.label_count

    # Input variables denoting the features and label data
    input_var = C.input_variable((num_channels, sequence_length, image_height, image_width), np.float32)
    label_var = C.input_variable(num_output_classes, np.float32)

    # Instantiate simple 3D Convolution network inspired by VGG network 
    # and http://vlg.cs.dartmouth.edu/c3d/c3d_video.pdf
    with C.default_options (activation=C.relu):
        z = C.layers.Sequential([
            C.layers.Convolution3D((3,3,3), 64, pad=True),
            C.layers.MaxPooling((1,2,2), (1,2,2)),
            C.layers.For(range(3), lambda i: [
                C.layers.Convolution3D((3,3,3), [96, 128, 128][i], pad=True),
                C.layers.Convolution3D((3,3,3), [96, 128, 128][i], pad=True),
                C.layers.MaxPooling((2,2,2), (2,2,2))
            ]),
            C.layers.For(range(2), lambda : [
                C.layers.Dense(1024), 
                C.layers.Dropout(0.5)
            ]),
            C.layers.Dense(num_output_classes, activation=None)
        ])(input_var)
    
    # loss and classification error.
    ce = C.cross_entropy_with_softmax(z, label_var)
    pe = C.classification_error(z, label_var)

    # training config
    train_epoch_size     = train_reader.size()
    train_minibatch_size = 2

    # Set learning parameters
    lr_per_sample          = [0.01]*10+[0.001]*10+[0.0001]
    lr_schedule            = C.learning_rate_schedule(lr_per_sample, epoch_size=train_epoch_size, unit=C.UnitType.sample)
    momentum_time_constant = 4096
    mm_schedule            = C.momentum_as_time_constant_schedule([momentum_time_constant])

    # Instantiate the trainer object to drive the model training
    learner = C.momentum_sgd(z.parameters, lr_schedule, mm_schedule, True)
    progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs)
    trainer = C.Trainer(z, (ce, pe), learner, progress_printer)

    log_number_of_parameters(z) ; print()

    # Get minibatches of images to train with and perform model training
    for epoch in range(max_epochs):       # loop over epochs
        train_reader.reset()

        while train_reader.has_more():
            videos, labels, current_minibatch = train_reader.next_minibatch(train_minibatch_size)
            trainer.train_minibatch({input_var : videos, label_var : labels})

        trainer.summarize_training_progress()

    # Test data for trained model
    epoch_size     = test_reader.size()
    test_minibatch_size = 2

    # process minibatches and evaluate the model
    metric_numer    = 0
    metric_denom    = 0
    minibatch_index = 0

    test_reader.reset()    
    while test_reader.has_more():
        videos, labels, current_minibatch = test_reader.next_minibatch(test_minibatch_size)
        # minibatch data to be trained with
        metric_numer += trainer.test_minibatch({input_var : videos, label_var : labels}) * current_minibatch
        metric_denom += current_minibatch
        # Keep track of the number of samples processed so far.
        minibatch_index += 1

    print("")
    print("Final Results: Minibatch[1-{}]: errs = {:0.2f}% * {}".format(minibatch_index+1, (metric_numer*100.0)/metric_denom, metric_denom))
    print("")

    return metric_numer/metric_denom
Example #39
0
def train_and_evaluate(reader_train, reader_test, max_epochs, model_func):
    # Input variables denoting the features and label data
    input_var = input_variable((num_channels, image_height, image_width))
    label_var = input_variable((num_classes))

    # Normalize the input
    feature_scale = 1.0 / 256.0
    input_var_norm = element_times(feature_scale, input_var)
    
    # apply model to input
    z = model_func(input_var_norm, out_dims=num_classes)

    #
    # Training action
    #

    # loss and metric
    ce = cross_entropy_with_softmax(z, label_var)
    pe = classification_error(z, label_var)

    # training config
    epoch_size     = 20000
    minibatch_size = 64

    # Set training parameters
    lr_per_minibatch       = learning_rate_schedule([0.01]*10 + [0.003]*10 + [0.001],  UnitType.minibatch, epoch_size)
    momentum_time_constant = momentum_as_time_constant_schedule(-minibatch_size/np.log(0.9))
    l2_reg_weight          = 0.001
    
    # trainer object
    progress_printer = ProgressPrinter(0)

    learner     = momentum_sgd(z.parameters, 
                               lr = lr_per_minibatch, momentum = momentum_time_constant, 
                               l2_regularization_weight=l2_reg_weight)
    trainer     = Trainer(z, (ce, pe), [learner], [progress_printer])

    # define mapping from reader streams to network inputs
    input_map = {
        input_var: reader_train.streams.features,
        label_var: reader_train.streams.labels
    }

    log_number_of_parameters(z) ; print()
    #progress_printer = ProgressPrinter(tag='Training')

    # perform model training
    stop_run=False
    batch_index = 0
    plot_data = {'batchindex':[], 'loss':[], 'error':[]}
    for epoch in range(max_epochs):       # loop over epochs
        sample_count = 0
        while sample_count < epoch_size:  # loop over minibatches in the epoch
            data = reader_train.next_minibatch(min(minibatch_size, epoch_size - sample_count), input_map=input_map) # fetch minibatch.
            trainer.train_minibatch(data)                                   # update model with it

            sample_count += data[label_var].num_samples                     # count samples processed so far
            
            # For visualization...            
            plot_data['batchindex'].append(batch_index)
            plot_data['loss'].append(trainer.previous_minibatch_loss_average)
            plot_data['error'].append(trainer.previous_minibatch_evaluation_average)
            
            progress_printer.update_with_trainer(trainer, with_metric=True) # log progress
            batch_index += 1
            if trainer.previous_minibatch_evaluation_average < 0.025:
                stop_run=True
                break
        if stop_run:
            break
        progress_printer.epoch_summary(with_metric=True)
        #trainer.save_checkpoint(model_temp_file)
        
    #
    # Evaluation action
    #
    epoch_size     = 6600
    minibatch_size = 32

    # process minibatches and evaluate the model
    metric_numer    = 0
    metric_denom    = 0
    sample_count    = 0
    minibatch_index = 0

    input_map = {
        input_var: reader_test.streams.features,
        label_var: reader_test.streams.labels
    }

    while sample_count < epoch_size:
        current_minibatch = min(minibatch_size, epoch_size - sample_count)

        # Fetch next test min batch.
        data = reader_test.next_minibatch(current_minibatch, input_map=input_map)

        # minibatch data to be trained with
        metric_numer += trainer.test_minibatch(data) * current_minibatch
        metric_denom += current_minibatch

        # Keep track of the number of samples processed so far.
        sample_count += data[label_var].num_samples
        minibatch_index += 1

    print("")
    print("Final Results: Minibatch[1-{}]: errs = {:0.1f}% * {}".format(minibatch_index+1, (metric_numer*100.0)/metric_denom, metric_denom))
    print("")
    
    # Visualize training result:
    window_width            = 32
    loss_cumsum             = np.cumsum(np.insert(plot_data['loss'], 0, 0)) 
    error_cumsum            = np.cumsum(np.insert(plot_data['error'], 0, 0)) 

    # Moving average.
    plot_data['batchindex'] = np.insert(plot_data['batchindex'], 0, 0)[window_width:]
    plot_data['avg_loss']   = (loss_cumsum[window_width:] - loss_cumsum[:-window_width]) / window_width
    plot_data['avg_error']  = (error_cumsum[window_width:] - error_cumsum[:-window_width]) / window_width
    
    plt.figure(1)
    plt.subplot(211)
    plt.plot(plot_data["batchindex"], plot_data["avg_loss"], 'b--')
    plt.xlabel('Minibatch number')
    plt.ylabel('Loss')
    plt.title('Minibatch run vs. Training loss ')
    
    plt.show()

    plt.subplot(212)
    plt.plot(plot_data["batchindex"], plot_data["avg_error"], 'r--')
    plt.xlabel('Minibatch number')
    plt.ylabel('Label Prediction Error')
    plt.title('Minibatch run vs. Label Prediction Error ')
    plt.show()
    
    return softmax(z)
Example #40
0
def main(base_folder, training_mode='majority', model_name='VGG13', max_epochs = 100):

    # create needed folders.
    output_model_path   = os.path.join(base_folder, R'models')
    output_model_folder = os.path.join(output_model_path, model_name + '_' + training_mode)
    if not os.path.exists(output_model_folder):
        os.makedirs(output_model_folder)

    # creating logging file 
    logging.basicConfig(filename = os.path.join(output_model_folder, "train.log"), filemode = 'w', level = logging.INFO)
    logging.getLogger().addHandler(logging.StreamHandler())

    logging.info("Starting with training mode {} using {} model and max epochs {}.".format(training_mode, model_name, max_epochs))

    # create the model
    num_classes = len(emotion_table)
    model       = build_model(num_classes, model_name)

    # set the input variables.
    input_var = ct.input((1, model.input_height, model.input_width), np.float32)
    label_var = ct.input((num_classes), np.float32)
    
    # read FER+ dataset.
    logging.info("Loading data...")
    train_params        = FERPlusParameters(num_classes, model.input_height, model.input_width, training_mode, False)
    test_and_val_params = FERPlusParameters(num_classes, model.input_height, model.input_width, "majority", True)

    train_data_reader   = FERPlusReader.create(base_folder, train_folders, "label.csv", train_params)
    val_data_reader     = FERPlusReader.create(base_folder, valid_folders, "label.csv", test_and_val_params)
    test_data_reader    = FERPlusReader.create(base_folder, test_folders, "label.csv", test_and_val_params)
    
    # print summary of the data.
    display_summary(train_data_reader, val_data_reader, test_data_reader)
    
    # get the probalistic output of the model.
    z    = model.model((input_var - 127.5)/127.5)
    pred = ct.softmax(z)
    
    epoch_size     = train_data_reader.size()
    minibatch_size = 32

    # Training config
    lr_per_minibatch       = [model.learning_rate]*20 + [model.learning_rate / 2.0]*20 + [model.learning_rate / 10.0]
    mm_time_constant       = -minibatch_size/np.log(0.9)
    lr_schedule            = ct.learning_rate_schedule(lr_per_minibatch, unit=ct.UnitType.minibatch, epoch_size=epoch_size)
    mm_schedule            = ct.momentum_as_time_constant_schedule(mm_time_constant)

    # loss and error cost
    train_loss = cost_func(training_mode, pred, label_var)
    pe         = ct.classification_error(z, label_var)

    # construct the trainer
    learner = ct.momentum_sgd(z.parameters, lr_schedule, mm_schedule)
    trainer = ct.Trainer(z, (train_loss, pe), learner)

    # Get minibatches of images to train with and perform model training
    max_val_accuracy    = 0.0
    final_test_accuracy = 0.0
    best_test_accuracy  = 0.0

    logging.info("Start training...")
    epoch      = 0
    best_epoch = 0
    while epoch < max_epochs: 
        train_data_reader.reset()
        val_data_reader.reset()
        test_data_reader.reset()
        
        # Training 
        start_time = time.time()
        training_loss = 0
        training_accuracy = 0
        while train_data_reader.has_more():
            images, labels, current_batch_size = train_data_reader.next_minibatch(minibatch_size)

            # Specify the mapping of input variables in the model to actual minibatch data to be trained with
            trainer.train_minibatch({input_var : images, label_var : labels})

            # keep track of statistics.
            training_loss     += trainer.previous_minibatch_loss_average * current_batch_size
            training_accuracy += trainer.previous_minibatch_evaluation_average * current_batch_size
                
        training_accuracy /= train_data_reader.size()
        training_accuracy = 1.0 - training_accuracy
        
        # Validation
        val_accuracy = 0
        while val_data_reader.has_more():
            images, labels, current_batch_size = val_data_reader.next_minibatch(minibatch_size)
            val_accuracy += trainer.test_minibatch({input_var : images, label_var : labels}) * current_batch_size
            
        val_accuracy /= val_data_reader.size()
        val_accuracy = 1.0 - val_accuracy
        
        # if validation accuracy goes higher, we compute test accuracy
        test_run = False
        if val_accuracy > max_val_accuracy:
            best_epoch = epoch
            max_val_accuracy = val_accuracy

            trainer.save_checkpoint(os.path.join(output_model_folder, "model_{}".format(best_epoch)))

            test_run = True
            test_accuracy = 0
            while test_data_reader.has_more():
                images, labels, current_batch_size = test_data_reader.next_minibatch(minibatch_size)
                test_accuracy += trainer.test_minibatch({input_var : images, label_var : labels}) * current_batch_size
            
            test_accuracy /= test_data_reader.size()
            test_accuracy = 1.0 - test_accuracy
            final_test_accuracy = test_accuracy
            if final_test_accuracy > best_test_accuracy: 
                best_test_accuracy = final_test_accuracy
 
        logging.info("Epoch {}: took {:.3f}s".format(epoch, time.time() - start_time))
        logging.info("  training loss:\t{:e}".format(training_loss))
        logging.info("  training accuracy:\t\t{:.2f} %".format(training_accuracy * 100))
        logging.info("  validation accuracy:\t\t{:.2f} %".format(val_accuracy * 100))
        if test_run:
            logging.info("  test accuracy:\t\t{:.2f} %".format(test_accuracy * 100))
            
        epoch += 1

    logging.info("")
    logging.info("Best validation accuracy:\t\t{:.2f} %, epoch {}".format(max_val_accuracy * 100, best_epoch))
    logging.info("Test accuracy corresponding to best validation:\t\t{:.2f} %".format(final_test_accuracy * 100))
    logging.info("Best test accuracy:\t\t{:.2f} %".format(best_test_accuracy * 100))

    pred.save('ferplus.onnx', ct.ModelFormat.ONNX)