コード例 #1
0
def learning_word_embeddings_with_the_embedding_layer_cntk():
    x_train, y_train, x_test, y_test = load_from_files()

    max_features = 10000
    maxlen = 20
    embedding_dim = 8

    x = cntk.input_variable(shape=(maxlen, ), dtype=np.float32)
    y = cntk.input_variable(shape=(1, ), dtype=np.float32)
    model = cntk.one_hot(x, num_classes=max_features, sparse_output=True)
    model = cntk.layers.Embedding(embedding_dim)(model)
    model = cntk.layers.Dense(1, activation=cntk.sigmoid)(model)
    loss_function = cntk.binary_cross_entropy(model.output, y)
    round_predictions = cntk.round(model.output)
    equal_elements = cntk.equal(round_predictions, y)
    accuracy_function = cntk.reduce_mean(equal_elements, axis=0)

    max_epochs = 30
    batch_size = 32
    learner = cntk.adam(model.parameters,
                        cntk.learning_parameter_schedule_per_sample(0.0001),
                        cntk.learning_parameter_schedule_per_sample(0.99))
    progress_printer = cntk.logging.ProgressPrinter(tag='Training',
                                                    num_epochs=max_epochs)
    trainer = cntk.Trainer(model, (loss_function, accuracy_function),
                           [learner], progress_printer)
    evaluator = cntk.Evaluator(accuracy_function)

    cntk_train(x, y, x_train, y_train, max_epochs, batch_size, trainer,
               evaluator)
コード例 #2
0
def train(streamf):
    global net
    minibatch_size = 1024
    max_epochs = 2000
    epoch_size = 50000
    net = nn(input_var)
    loss = cntk.losses.binary_cross_entropy(net, label_var)
    error = cntk.classification_error(net, label_var)
    lr_per_sample = [3e-4] * 4 + [1.5e-4]
    lr_per_minibatch = [lr * minibatch_size for lr in lr_per_sample]
    lr_schedule = cntk.learning_rate_schedule(lr_per_minibatch,
                                              cntk.UnitType.minibatch)
    momentum_as_time_constant = cntk.momentum_as_time_constant_schedule(700)
    learner = cntk.adam(net.parameters,
                        lr_schedule,
                        momentum=momentum_as_time_constant,
                        gradient_clipping_threshold_per_sample=15,
                        gradient_clipping_with_truncation=True)
    progres = cntk.logging.ProgressPrinter(0)
    trainer = cntk.Trainer(net, (loss, error), [learner],
                           progress_writers=progres)
    input_map = {
        input_var: streamf.streams.features,
        label_var: streamf.streams.labels
    }
    t = 0
    for epoch in range(max_epochs):
        epoch_end = (epoch + 1) * epoch_size
        while t < epoch_end:
            dat1 = streamf.next_minibatch(minibatch_size, input_map=input_map)
            trainer.train_minibatch(dat1)
            t += dat1[label_var].num_samples
    trainer.summarize_training_progress()
    return trainer
コード例 #3
0
def create_trainer(network, epoch_size, num_quantization_bits, warm_up,
                   progress_writers):
    print('Creating the trainer.')
    # Train only the last layers
    lr_schedule = C.learning_rate_schedule([0.01] * 10 + [0.001] * 20 +
                                           [0.0001] * 30,
                                           unit=C.UnitType.minibatch)
    mm_schedule = C.momentum_schedule(0.9)
    l2_reg_weight = 0.0001

    learner = C.adam(network['output'].parameters,
                     lr_schedule,
                     mm_schedule,
                     l2_regularization_weight=l2_reg_weight,
                     unit_gain=False)

    num_workers = C.distributed.Communicator.num_workers()
    print('Number of workers: {}'.format(num_workers))
    if num_workers > 1:
        parameter_learner = C.train.distributed.data_parallel_distributed_learner(
            learner, num_quantization_bits=num_quantization_bits)
        trainer = C.Trainer(network['output'], (network['ce'], network['pe']),
                            parameter_learner, progress_writers)
    else:
        trainer = C.Trainer(network['output'], (network['ce'], network['pe']),
                            learner, progress_writers)

    return trainer
コード例 #4
0
def implementing_1d_convnet_cntk():
    max_features = 10000  # number of words to consider as features
    max_len = 500  # cut texts after this number of words (among top max_features most common words)
    x_train, y_train, x_test, y_test = load_data(max_features, max_len)

    model = build_model_cntk(max_features, max_len)
    x = cntk.input_variable(shape=(max_len, ), dtype=np.float32)
    y = cntk.input_variable(shape=(1, ), dtype=np.float32)
    model.replace_placeholders({model.placeholders[0]: x})

    loss_function = cntk.binary_cross_entropy(model.output, y)
    round_predictions = cntk.round(model.output)
    equal_elements = cntk.equal(round_predictions, y)
    accuracy_function = cntk.reduce_mean(equal_elements, axis=0)

    max_epochs = 10
    batch_size = 32
    learner = cntk.adam(model.parameters,
                        cntk.learning_parameter_schedule_per_sample(0.0001),
                        cntk.learning_parameter_schedule_per_sample(0.99))
    progress_printer = cntk.logging.ProgressPrinter(tag='Training',
                                                    num_epochs=max_epochs)
    trainer = cntk.Trainer(model, (loss_function, accuracy_function),
                           [learner], progress_printer)
    evaluator = cntk.Evaluator(accuracy_function)

    cntk_train(x, y, x_train, y_train, max_epochs, batch_size, trainer,
               evaluator)
コード例 #5
0
    def train(self, report_freq = 500, as_policy=True):        
        #loss = C.ops.minus(0, C.ops.argmin(self.model) -  C.ops.argmin(self.model) + C.ops.minus(self.label_var, 0))
        loss = C.squared_error(self.model, self.label_var)
        evaluation = C.squared_error(self.model, self.label_var)
        schedule = C.momentum_schedule(self.hp.learning_rate)
        progress_printer = C.logging.ProgressPrinter(num_epochs=self.hp.epochs/self.hp.minibatch_size)
        learner = C.adam(self.model.parameters, 
                     C.learning_rate_schedule(self.hp.learning_rate, C.UnitType.minibatch), 
                     momentum=schedule, 
                     l1_regularization_weight=self.hp.l1reg,
                     l2_regularization_weight=self.hp.l2reg
                     )
        trainer = C.Trainer(self.model, (loss, evaluation), learner, progress_printer)
        self.plotdata = {"loss":[]}
        for epoch in range(self.hp.epochs):             
             indata, label, total_reward = self.get_next_data(self.hp.minibatch_size, as_policy)
             data = {self.input_var: indata, self.label_var: label}
             trainer.train_minibatch(data)
             loss = trainer.previous_minibatch_loss_average
             if not (loss == "NA"):
                self.plotdata["loss"].append(loss)
             if epoch % report_freq == 0:
                 print()
                 print("last epoch total reward: {}".format(total_reward))
                 trainer.summarize_training_progress()
                 print()
#             if self.hp.stop_loss > loss:
#                 break
        print()
        trainer.summarize_training_progress()
コード例 #6
0
ファイル: 02_model.py プロジェクト: srmsoumya/batch-inat
def create_trainer(network, epoch_size, num_quantization_bits, warm_up, progress_writers):
    ''' Create Trainer '''
    print('Creating the trainer.')
    # Differential Learning rate scheduler
    lr_schedule = C.learning_rate_schedule([2.5], unit=C.UnitType.minibatch)
    mm_schedule = C.momentum_schedule(0.9)
    l2_reg_weight = 0.001

    # Create the Adam learners
    learner = C.adam(network['output'].parameters,
                     lr_schedule,
                     mm_schedule,
                     l2_regularization_weight=l2_reg_weight,
                     unit_gain=False)

    # Compute the number of workers
    num_workers = C.distributed.Communicator.num_workers()
    print('Number of workers: {}'.format(num_workers))
    if num_workers > 1:
        parameter_learner = C.train.distributed.data_parallel_distributed_learner(learner, num_quantization_bits=num_quantization_bits)
        trainer = C.Trainer(network['output'], (network['ce'], network['pe']), parameter_learner, progress_writers)
    else:
        trainer = C.Trainer(network['output'], (network['ce'], network['pe']), learner, progress_writers)

    return trainer
コード例 #7
0
def create_trainer():
    loss, label_error = create_criterion_function_preferred(dec, y)

    schedule_step = print_freq
    lr_per_sample = [2e-3] * 2 * schedule_step + [1e-3] * 2 * schedule_step + [
        5e-4
    ]
    lr_per_minibatch = [lr * minibatch_size for lr in lr_per_sample]
    lr_schedule = C.learning_rate_schedule(lr_per_minibatch,
                                           C.UnitType.minibatch, epoch_size)

    momentum_as_time_constant = C.momentum_as_time_constant_schedule(1000)
    learner = C.adam(parameters=dec.parameters,
                     lr=lr_schedule,
                     momentum=momentum_as_time_constant,
                     gradient_clipping_threshold_per_sample=15,
                     gradient_clipping_with_truncation=True)

    progress_printer = C.logging.ProgressPrinter(tag='Training',
                                                 num_epochs=num_epochs)
    trainer = C.Trainer(dec, (loss, label_error), learner, progress_printer)
    if restore:
        trainer.restore_from_checkpoint("model-5.cntk")
    C.logging.log_number_of_parameters(dec)
    return trainer
コード例 #8
0
def run_cntk():
    text, chars, char_indices, x_train, y_train = get_data(one_hot_encode_features=False)
    alphabet_size = len(chars)
    print('alphabet_size=', alphabet_size)
    model = build_model_cntk(alphabet_size=alphabet_size)
    model_filename = 'ch8-1_cntk.model'
    model.save(model_filename)
    model = None
    model = cntk.load_model(model_filename)

    x = cntk.sequence.input_variable(shape=(), dtype=np.float32)
    y = cntk.input_variable(shape=(), dtype=np.float32)
    model.replace_placeholders({model.placeholders[0]: x})

    y_oneHot = cntk.one_hot(y, num_classes=alphabet_size)
    loss_function = cntk.cross_entropy_with_softmax(model.output, y_oneHot)
    learner = cntk.adam(model.parameters, cntk.learning_parameter_schedule_per_sample(0.001), cntk.learning_parameter_schedule_per_sample(0.9))
    trainer = cntk.Trainer(model, (loss_function, loss_function), [learner],)

    for epoch in range(1, 60):
        print('epoch', epoch)
        cntk_train(x, y, x_train, y_train, max_epochs=32, batch_size=128, trainer=trainer)
        model_filename = 'final_ch8-1_cntk.model'
        model.save(model_filename)
        generate_text_cntk(char_indices, chars, model, text)
コード例 #9
0
ファイル: train.py プロジェクト: frankibem/cntk-issue
def main(params):
    # Create output and log directories if they don't exist
    if not os.path.isdir(params['output_folder']):
        os.makedirs(params['output_folder'])

    if not os.path.isdir(params['log_folder']):
        os.makedirs(params['log_folder'])

    # Create the network
    network = create_network()

    # Create readers
    train_reader = cbf_reader(os.path.join(params['input_folder'], 'train{}.cbf'.format(params['prefix'])), is_training=True,
                              max_samples=cntk.io.INFINITELY_REPEAT)
    cv_reader = cbf_reader(os.path.join(params['input_folder'], 'test{}.cbf'.format(params['prefix'])), is_training=False,
                           max_samples=cntk.io.FULL_DATA_SWEEP)
    test_reader = cbf_reader(os.path.join(params['input_folder'], 'test{}.cbf'.format(params['prefix'])), is_training=False,
                             max_samples=cntk.io.FULL_DATA_SWEEP)

    input_map = {
        network['input']: train_reader.streams.front,
        network['target']: train_reader.streams.label
    }

    # Create learner
    mm_schedule = momentum_schedule(0.90)
    lr_schedule = learning_parameter_schedule([(40, 0.1), (40, 0.01)], minibatch_size=params['minibatch_size'])
    learner = cntk.adam(network['model'].parameters, lr_schedule, mm_schedule, l2_regularization_weight=0.0005,
                        epoch_size=params['epoch_size'], minibatch_size=params['minibatch_size'])

    # Use TensorBoard for visual logging
    log_file = os.path.join(params['log_folder'], 'log.txt')
    pp_writer = cntk.logging.ProgressPrinter(freq=10, tag='Training', num_epochs=params['max_epochs'], log_to_file=log_file)
    tb_writer = cntk.logging.TensorBoardProgressWriter(freq=10, log_dir=params['log_folder'], model=network['model'])

    # Create trainer and training session
    trainer = Trainer(network['model'], (network['loss'], network['metric']), [learner], [pp_writer, tb_writer])
    test_config = TestConfig(minibatch_source=test_reader, minibatch_size=params['minibatch_size'], model_inputs_to_streams=input_map)
    cv_config = CrossValidationConfig(minibatch_source=cv_reader, frequency=(1, DataUnit.sweep),
                                      minibatch_size=params['minibatch_size'], model_inputs_to_streams=input_map)
    checkpoint_config = CheckpointConfig(os.path.join(params['output_folder'], model_name), frequency=(10, DataUnit.sweep), restore=params['restore'])

    session = training_session(trainer=trainer,
                               mb_source=train_reader,
                               mb_size=params['minibatch_size'],
                               model_inputs_to_streams=input_map,
                               max_samples=params['epoch_size'] * params['max_epochs'],
                               progress_frequency=(1, DataUnit.sweep),
                               checkpoint_config=checkpoint_config,
                               cv_config=cv_config,
                               test_config=test_config)

    cntk.logging.log_number_of_parameters(network['model'])
    session.train()

    # Save the trained model
    path = os.path.join(params['output_folder'], 'final_model.dnn')
    network['model'].save(path)
    print('Saved final model to', path)
コード例 #10
0
ファイル: lab153.py プロジェクト: say543/LSTM_tagger
def train(reader, model_func, max_epochs=10):

    # Instantiate the model function; x is the input (feature) variable
    model = model_func(x)

    # Instantiate the loss and error function
    loss, label_error = create_criterion_function_preferred(model, y)

    # training config
    epoch_size = 18000  # 18000 samples is half the dataset size
    minibatch_size = 70

    # LR schedule over epochs
    # In CNTK, an epoch is how often we get out of the minibatch loop to
    # do other stuff (e.g. checkpointing, adjust learning rate, etc.)
    # (we don't run this many epochs, but if we did, these are good values)
    lr_per_sample = [0.003] * 4 + [0.0015] * 24 + [0.0003]
    lr_per_minibatch = [lr * minibatch_size for lr in lr_per_sample]
    lr_schedule = C.learning_rate_schedule(lr_per_minibatch,
                                           C.UnitType.minibatch, epoch_size)

    # Momentum schedule
    momentum_as_time_constant = C.momentum_as_time_constant_schedule(700)

    # We use a the Adam optimizer which is known to work well on this dataset
    # Feel free to try other optimizers from
    # https://www.cntk.ai/pythondocs/cntk.learner.html#module-cntk.learner
    learner = C.adam(parameters=model.parameters,
                     lr=lr_schedule,
                     momentum=momentum_as_time_constant,
                     gradient_clipping_threshold_per_sample=15,
                     gradient_clipping_with_truncation=True)

    # Setup the progress updater
    progress_printer = C.logging.ProgressPrinter(tag='Training',
                                                 num_epochs=max_epochs)

    # Uncomment below for more detailed logging
    #progress_printer = ProgressPrinter(freq=100, first=10, tag='Training', num_epochs=max_epochs)

    # Instantiate the trainer
    trainer = C.Trainer(model, (loss, label_error), learner, progress_printer)

    # process minibatches and perform model training
    C.logging.log_number_of_parameters(model)

    t = 0
    for epoch in range(max_epochs):  # loop over epochs
        epoch_end = (epoch + 1) * epoch_size
        while t < epoch_end:  # loop over minibatches on the epoch
            data = reader.next_minibatch(
                minibatch_size,
                input_map={  # fetch minibatch
                    x: reader.streams.query,
                    y: reader.streams.slot_labels
                })
            trainer.train_minibatch(data)  # update model with it
            t += data[y].num_samples  # samples so far
        trainer.summarize_training_progress()
コード例 #11
0
ファイル: cntk202.py プロジェクト: MintYiqingchen/tfStudy
def train(reader, model_func, max_epochs=10, task='slot_tagging'):
    
    # Create the containers for input feature (x) and the label (y)
    x = C.sequence.input_variable(vocab_size)
    y = C.sequence.input_variable(num_labels)
    # Instantiate the model function; x is the input (feature) variable 
    model = model_func(x)
    # Instantiate the loss and error function
    loss, label_error = create_criterion_function_preferred(model, y)

    # training config
    epoch_size = 18000        # 18000 samples is half the dataset size 
    minibatch_size = 70
    
    # LR schedule over epochs 
    # In CNTK, an epoch is how often we get out of the minibatch loop to
    # do other stuff (e.g. checkpointing, adjust learning rate, etc.)
    lr_per_sample = [3e-4]*4+[1.5e-4]
    lr_per_minibatch = [lr * minibatch_size for lr in lr_per_sample]
    lr_schedule = C.learning_parameter_schedule(lr_per_minibatch, epoch_size=epoch_size)
    
    # Momentum schedule
    momentums = C.momentum_schedule(0.9048374180359595, minibatch_size=minibatch_size)
    
    # We use a the Adam optimizer which is known to work well on this dataset
    # Feel free to try other optimizers from 
    # https://www.cntk.ai/pythondocs/cntk.learner.html#module-cntk.learner
    learner = C.adam(parameters=model.parameters,
                     lr=lr_schedule,
                     momentum=momentums,
                     gradient_clipping_threshold_per_sample=15, 
                     gradient_clipping_with_truncation=True)

    # Setup the progress updater
    progress_printer = C.logging.ProgressPrinter(tag='Training', num_epochs=max_epochs)
    
    # Uncomment below for more detailed logging
    #progress_printer = ProgressPrinter(freq=100, first=10, tag='Training', num_epochs=max_epochs) 

    # Instantiate the trainer
    trainer = C.Trainer(model, (loss, label_error), learner, progress_printer)

    # process minibatches and perform model training
    C.logging.log_number_of_parameters(model)
    
    # Assign the data fields to be read from the input
    if task == 'slot_tagging':
        data_map={x: reader.streams.query, y: reader.streams.slot_labels}
    else:
        data_map={x: reader.streams.query, y: reader.streams.intent} 
    t = 0
    for epoch in range(max_epochs):         # loop over epochs
        epoch_end = (epoch+1) * epoch_size
        while t < epoch_end:                # loop over minibatches on the epoch
            data = reader.next_minibatch(minibatch_size, input_map= data_map)  # fetch minibatch
            trainer.train_minibatch(data)               # update model with it
            t += data[y].num_samples                    # samples so far
        trainer.summarize_training_progress()
コード例 #12
0
def test_ctc_encoder_train_and_network_output_to_labels():
    # test CTC encoder in training loop and CTCEncoder.network_output_to_labels

    a = C.sequence.input_variable(10)
    labels = ['a', 'b', 'c']
    encoder = CTCEncoder(labels)

    labels_tensor = C.sequence.input_variable(len(
        encoder.classes_))  # number of classes = 4
    input_tensor = C.sequence.input_variable(100)

    prediction_tensor = Dense(4)(Recurrence(LSTM(100))(
        C.ones_like(input_tensor)))

    labels_graph = C.labels_to_graph(labels_tensor)

    fb = C.forward_backward(labels_graph,
                            prediction_tensor,
                            blankTokenId=encoder.blankTokenId)

    ground_truth = ['a', 'b', 'b', 'b', 'c']
    seq_length = 10  # must be the same length as the sequence length in network_out

    pred = np.array([
        [0., 2., 0., 0.],
        [0., 2., 0., 0.],
        [0., 0., 2., 0.],
        [2., 0., 0., 0.],
        [0., 0., 2., 0.],
        [2., 0., 0., 0.],
        [0., 0., 2., 0.],
        [2., 0., 0., 0.],
        [0., 0., 0., 2.],
        [0., 0., 0., 2.],
    ]).astype(np.float32)

    n = np.random.random((10, 100)).astype(np.float32)

    # result = fb.eval({labels_tensor: [encoder.transform(ground_truth, seq_length=seq_length)],
    #                   input_tensor: [n]})

    # print(result)

    adam = C.adam(prediction_tensor.parameters, 0.01, 0.912)
    trainer = C.Trainer(prediction_tensor, (fb, ), [adam])

    for i in range(300):
        trainer.train_minibatch({
            labels_tensor:
            [encoder.transform(ground_truth, seq_length=seq_length)],
            input_tensor: [n]
        })

        # print(trainer.previous_minibatch_loss_average)

    result = prediction_tensor.eval({input_tensor: [n]})
    assert encoder.network_output_to_labels(result[0],
                                            squash_repeat=True) == ground_truth
コード例 #13
0
    def train(self,
              X1_train,
              X2_train,
              Y_train,
              X1_val,
              X2_val,
              Y_val,
              batch_size=128,
              epochs=10):
        assert X1_train.shape == X2_train.shape
        assert len(X1_train) == len(Y_train)
        assert X1_val.shape == X2_val.shape
        assert len(X1_val) == len(Y_val)

        if cntk.try_set_default_device(cntk.gpu(0)):
            print("GPU Training enabled")
        else:
            print("CPU Training :(")

        input_shape = (X1_train.shape[1], X1_train.shape[2], X1_train.shape[3])
        self.siamese_net = self.build_network(input_shape)

        lr_per_minibatch = cntk.learning_rate_schedule(0.1,
                                                       cntk.UnitType.minibatch)
        pp = cntk.logging.ProgressPrinter()

        out = input_variable((1))
        loss = cntk.binary_cross_entropy(self.out, out)

        learner = cntk.adam(self.out.parameters,
                            lr=lr_per_minibatch,
                            momentum=0.9)
        trainer = cntk.Trainer(self.out, (loss, loss), [learner], [pp])

        cntk.logging.log_number_of_parameters(self.out)

        for epoch in range(epochs):
            # perm = np.random.permutation(len(Y_train))
            for i in range(0, len(Y_train), batch_size):
                max_n = min(i + batch_size, len(Y_train))
                # x1 = X1_train[perm[i:max_n]]
                # x2 = X2_train[perm[i:max_n]]
                # y = Y_train[perm[i:max_n]]
                x1 = X1_train[i:max_n]
                x2 = X2_train[i:max_n]
                y = Y_train[i:max_n]
                trainer.train_minibatch({
                    self.left_input: x1,
                    self.right_input: x2,
                    out: y
                })
                pp.update_with_trainer(trainer, with_metric=True)
                print('.')
            pp.epoch_summary(with_metric=False)
コード例 #14
0
ファイル: train.py プロジェクト: AllanYiin/CNTK
def create_learner(model):
    '''Create the optimized method'''
    lr_per_minibatch = C.learning_parameter_schedule(opt.lr)
    momentum_schedule = C.momentum_schedule_per_sample(0.9990913221888589)
    if opt.optim == 'sgd':
        return C.sgd(model.parameters, lr=lr_per_minibatch)
    elif opt.optim == 'adam':
        return C.adam(model.parameters, lr=lr_per_minibatch, momentum=momentum_schedule)
    elif opt.optim == 'adagrad':
        return C.adagrad(model.parameters, lr=lr_per_minibatch)
    else:
        raise RuntimeError("Invalid optim method: " + opt.optim)
コード例 #15
0
ファイル: train.py プロジェクト: gzt200361/CNTK
def create_learner(model):
    '''Create the optimized method'''
    lr_per_sample = C.learning_rate_schedule(opt.lr, C.UnitType.minibatch)
    momentum_time_constant = C.momentum_as_time_constant_schedule(1100)
    if opt.optim == 'sgd':
        return C.sgd(model.parameters, lr=lr_per_sample)
    elif opt.optim == 'adam':
        return C.adam(model.parameters, lr=lr_per_sample, momentum=momentum_time_constant)
    elif opt.optim == 'adagrad':
        return C.adagrad(model.parameters, lr=lr_per_sample)
    else:
        raise RuntimeError("Invalid optim method: " + opt.optim)
コード例 #16
0
ファイル: train.py プロジェクト: zwlshine/CNTK
def create_learner(model):
    '''Create the optimized method'''
    lr_per_minibatch = C.learning_rate_schedule(opt.lr, C.UnitType.minibatch)
    momentum_time_constant = C.momentum_as_time_constant_schedule(1100)
    if opt.optim == 'sgd':
        return C.sgd(model.parameters, lr=lr_per_minibatch)
    elif opt.optim == 'adam':
        return C.adam(model.parameters, lr=lr_per_minibatch, momentum=momentum_time_constant)
    elif opt.optim == 'adagrad':
        return C.adagrad(model.parameters, lr=lr_per_minibatch)
    else:
        raise RuntimeError("Invalid optim method: " + opt.optim)
 def _create_learner(self):
     lr_per_sample = [3e-5] * 10 + [1.5e-5] * 20 + [1e-5]
     lr_per_minibatch = [lr * self.minibatch_size for lr in lr_per_sample]
     lr_schedule = C.learning_rate_schedule(lr_per_minibatch,
                                            C.UnitType.minibatch,
                                            self.epoch_size)
     momentum_as_time_constant = C.momentum_as_time_constant_schedule(20)
     learner = C.adam(parameters=self.model.parameters,
                      lr=3e-4,
                      momentum=momentum_as_time_constant,
                      gradient_clipping_threshold_per_sample=0.21,
                      gradient_clipping_with_truncation=True)
     return learner
def create_learner(model):
    '''Create the optimized method'''
    lr_per_minibatch = C.learning_parameter_schedule(opt.lr)
    momentum_schedule = C.momentum_schedule_per_sample(0.9990913221888589)
    if opt.optim == 'sgd':
        return C.sgd(model.parameters, lr=lr_per_minibatch)
    elif opt.optim == 'adam':
        return C.adam(model.parameters,
                      lr=lr_per_minibatch,
                      momentum=momentum_schedule)
    elif opt.optim == 'adagrad':
        return C.adagrad(model.parameters, lr=lr_per_minibatch)
    else:
        raise RuntimeError("Invalid optim method: " + opt.optim)
コード例 #19
0
ファイル: inter_infer.py プロジェクト: FeldiPat/IntraTyper
def create_trainer():
    masked_dec = dec * C.ops.clip(C.ops.argmax(y), 0, 1)
    loss, label_error = criterion(masked_dec, y)
    loss *= C.ops.clip(C.ops.argmax(y), 0, 1)

    lr_schedule = C.learning_parameter_schedule_per_sample([1e-3] * 2 + [5e-4] * 2 + [1e-4], epoch_size=int(epoch_size))
    momentum_as_time_constant = C.momentum_as_time_constant_schedule(1000)
    learner = C.adam(parameters=dec.parameters,
                     lr=lr_schedule,
                     momentum=momentum_as_time_constant,
                     gradient_clipping_threshold_per_sample=15,
                     gradient_clipping_with_truncation=True)

    progress_printer = C.logging.ProgressPrinter(tag='Training', num_epochs=num_epochs)
    trainer = C.Trainer(dec, (loss, label_error), learner, progress_printer)
    C.logging.log_number_of_parameters(dec)
    return trainer
コード例 #20
0
def train(model, reader):
    y_pre = model(x)
    loss, label_error = create_criterion_function(model, y_pre, y, True)
    lr_per_minibatch = [lr] + [lr / 2] + [lr / 4]
    # lr_per_minibatch = [lr * batch_size for lr in lr_per_sample]

    lr_schedule = C.learning_parameter_schedule(lr_per_minibatch,
                                                epoch_size=epoch_size)

    # Momentum schedule
    momentums = C.momentum_schedule(0.9048374180359595,
                                    minibatch_size=batch_size)
    progress_printer = C.logging.ProgressPrinter(tag='Training',
                                                 num_epochs=max_epoch)
    # learner = C.sgd(model.parameters, lr_schedule)
    learner = C.adam(y_pre.parameters,
                     lr_schedule,
                     momentum=momentums,
                     gradient_clipping_threshold_per_sample=15)
    trainer = C.Trainer(y_pre, (loss, label_error), learner,
                        progress_printer)  # []

    C.logging.log_number_of_parameters(
        y_pre)  # print # parameters and # tensor

    loss_summary = []
    step = 0
    data_map = {x: reader.streams.query, y: reader.streams.intent}

    t = 0
    for epoch in range(max_epoch):  # loop over epochs
        epoch_end = (epoch + 1) * epoch_size
        while t < epoch_end:  # loop over minibatches on the epoch
            data = reader.next_minibatch(batch_size,
                                         input_map=data_map)  # fetch minibatch
            # print(data)
            trainer.train_minibatch(data)  # update model with it
            t += data[y].num_samples
            if t % 6000 == 0:
                training_loss = trainer.previous_minibatch_loss_average
                error = trainer.previous_minibatch_evaluation_average
                print("epoch: {}, step: {}, loss: {:.5f}, error {:.5f}".format(
                    epoch, t, training_loss, error))
        trainer.summarize_training_progress()
コード例 #21
0
def create_trainer():
    loss, label_error = create_criterion_function_preferred(dec, y)

    schedule_step = 1 * print_freq
    lr_per_sample = [1e-3]
    lr_per_minibatch = [lr * minibatch_size for lr in lr_per_sample]
    lr_schedule = C.learning_rate_schedule(lr_per_minibatch,
                                           C.UnitType.minibatch, epoch_size)

    momentum_as_time_constant = C.momentum_as_time_constant_schedule(0)
    learner = C.adam(parameters=dec.parameters,
                     lr=lr_schedule,
                     momentum=momentum_as_time_constant,
                     gradient_clipping_threshold_per_sample=15,
                     gradient_clipping_with_truncation=True)

    trainer = C.Trainer(dec, (loss, label_error), learner)
    trainer.restore_from_checkpoint(model_file)
    return trainer
コード例 #22
0
def train (train_reader, model_func, num_sweeps_to_train_with=10):
       
    # Instantiate the model function; x is the input (feature) variable 
    # We will scale the input image pixels within 0-1 range by dividing all input value by 255.
    model = model_func(x/255)
    
    # Instantiate the loss and error function
    loss, label_error = create_criterion_function(model, y)
    
    # Instantiate the trainer object to drive the model training
    learning_rate = 0.001
    lr_schedule = C.learning_parameter_schedule(learning_rate)
    learner = C.adam(z.parameters, lr_schedule, momentum=0.9)
    trainer = C.Trainer(z, (loss, label_error), [learner])
    
    # Initialize the parameters for the trainer
    minibatch_size = 100
    num_samples_per_sweep = 60000
    num_minibatches_to_train = (num_samples_per_sweep * num_sweeps_to_train_with) / minibatch_size
    
    # Map the data streams to the input and labels.
    input_map={
        y  : train_reader.streams.labels,
        x  : train_reader.streams.features
    } 
    
    # Uncomment below for more detailed logging
    #training_progress_output_freq = 500
     
    # Start a timer
    
    start = time.time()

    for i in range(0, int(num_minibatches_to_train)):
            # Read a mini batch from the training data file
            data=train_reader.next_minibatch(minibatch_size, input_map=input_map) 
            trainer.train_minibatch(data)
            #print_training_progress(trainer, i, training_progress_output_freq, verbose=1)
     
        # Print training time
    end = time.time()
    print(f'{end-start:.6f}')
    return trainer
コード例 #23
0
def train_mse_cntk(x, y, model, train_gen, val_gen, epochs, val_steps):
    loss_function = cntk.squared_error(model, y)
    accuracy_function = loss_function
    learner = cntk.adam(model.parameters,
                        cntk.learning_parameter_schedule_per_sample(0.001),
                        cntk.learning_parameter_schedule_per_sample(0.9))
    trainer = cntk.Trainer(model, (loss_function, accuracy_function),
                           [learner])
    evaluator = cntk.Evaluator(accuracy_function)

    history = fit_generator(x,
                            y,
                            model=model,
                            trainer=trainer,
                            evaluator=evaluator,
                            train_gen=train_gen,
                            steps_per_epoch=500,
                            epochs=epochs,
                            val_gen=val_gen,
                            validation_steps=val_steps)

    plot_results(history)
コード例 #24
0
def build_SRResNet_graph(lr_image_shape, hr_image_shape, net):
    inp_dynamic_axes = [C.Axis.default_batch_axis()]
    real_X = C.input(
        lr_image_shape, dynamic_axes=inp_dynamic_axes, name="real_X")
    real_Y = C.input(
        hr_image_shape, dynamic_axes=inp_dynamic_axes, name="real_Y")

    real_X_scaled = real_X/255
    real_Y_scaled = real_Y/255

    genG = net(real_X_scaled)

    G_loss = C.reduce_mean(C.square(real_Y_scaled - genG))

    G_optim = C.adam(G_loss.parameters,
                     lr=C.learning_rate_schedule(
                         [(1, 0.01), (1, 0.001), (98, 0.0001)], C.UnitType.minibatch, 10000),
                     momentum=C.momentum_schedule(0.9), gradient_clipping_threshold_per_sample=1.0)

    G_G_trainer = C.Trainer(genG, (G_loss, None), G_optim)

    return (real_X, real_Y, genG, real_X_scaled, real_Y_scaled, G_optim, G_G_trainer)
コード例 #25
0
    def Loss(self):
        # Evaluating old actions and values :
        logprobs, state_value, dist_entropy = self.policy.evaluate()

        # Finding the ratio (pi_theta / pi_theta__old): # (importance sampling)
        c_old_logprobs = C.input_variable(logprobs.shape, name='old_log_probs')
        ratios = C.exp(logprobs - C.stop_gradient(c_old_logprobs))

        c_rewards = C.input_variable(1, name='rewards')
        advantages = c_rewards - C.stop_gradient(state_value)

        # Finding Surrogate Loss:
        surr1 = ratios * advantages
        surr2 = C.clip(ratios, 1 - self.eps_clip,
                       1 + self.eps_clip) * advantages
        neglog_loss = -C.element_min(surr1, surr2)
        entropy_loss = -0.01 * dist_entropy
        actor_loss = C.reduce_mean(neglog_loss + entropy_loss)
        critic_loss = 0.5 * C.reduce_mean(C.square(state_value - c_rewards))
        loss = actor_loss + critic_loss

        chunk = {
            'neglog_loss': neglog_loss,
            'entropy_loss': entropy_loss,
            'actor_loss': actor_loss,
            'critic_loss': critic_loss
        }

        trainer = C.Trainer(
            loss, (loss, None),
            C.adam(loss.parameters,
                   C.learning_parameter_schedule_per_sample(self.lr),
                   C.momentum_schedule_per_sample(self.betas[0]),
                   variance_momentum=C.momentum_schedule_per_sample(
                       self.betas[1])))
        # trainer = C.Trainer(loss, (loss, None), C.adam(loss.parameters, C.learning_parameter_schedule(10), C.momentum_schedule(0.9), variance_momentum=C.momentum_schedule(0.999))) # higher learning rate

        return loss, chunk, trainer
コード例 #26
0
    def _create_model(self, input_dim, output_dim, hidden_dims):
        c_in = C.input_variable(input_dim, name='state')
        model = c_in

        for h in hidden_dims:
            model = C.layers.Dense(h, activation=C.relu)(model)
        model = C.layers.Dense(output_dim, activation=C.softmax)(model)

        c_action_prob = model
        c_action_onehot = C.input_variable(output_dim, name='action_onehot')
        c_reward = C.input_variable(1, name='reward')
        action_prob = C.reduce_sum(c_action_prob * c_action_onehot)
        log_action_prog = C.log(action_prob)
        loss = -log_action_prog * c_reward
        loss = C.reduce_mean(loss)

        lr = 1e-2
        lr_schedule = C.learning_parameter_schedule(lr)
        learner = C.adam(model.parameters, lr_schedule,
                         C.momentum_schedule(0.9))
        trainer = C.Trainer(model, (loss, None), learner)

        return model, loss, trainer
コード例 #27
0
def use_glove_word_embeddings_cntk(preload_weights=False):
    tokenizer, x_train, y_train, x_val, y_val = from_raw_text_to_word_embeddings(
    )

    x = cntk.input_variable(shape=(Constants.maxlen, ), dtype=np.float32)
    y = cntk.input_variable(shape=(1, ), dtype=np.float32)
    model = cntk.one_hot(x,
                         num_classes=Constants.max_words,
                         sparse_output=True)
    if preload_weights is True:
        embedding_matrix = compute_embedding_matrix(tokenizer)
        assert (Constants.embedding_dim
                == embedding_matrix.shape[0]) or (Constants.embedding_dim
                                                  == embedding_matrix.shape[1])
        model = cntk.layers.Embedding(weights=embedding_matrix)(model)
    else:
        model = cntk.layers.Embedding(Constants.embedding_dim)(model)
    model = cntk.layers.Dense(32, activation=cntk.relu)(model)
    model = cntk.layers.Dense(1, activation=cntk.sigmoid)(model)
    loss_function = cntk.binary_cross_entropy(model.output, y)
    round_predictions = cntk.round(model.output)
    equal_elements = cntk.equal(round_predictions, y)
    accuracy_function = cntk.reduce_mean(equal_elements, axis=0)

    max_epochs = 10
    batch_size = 32
    learner = cntk.adam(model.parameters,
                        cntk.learning_parameter_schedule_per_sample(0.0001),
                        cntk.learning_parameter_schedule_per_sample(0.99))
    progress_printer = cntk.logging.ProgressPrinter(tag='Training',
                                                    num_epochs=max_epochs)
    trainer = cntk.Trainer(model, (loss_function, accuracy_function),
                           [learner], progress_printer)
    evaluator = cntk.Evaluator(accuracy_function)

    cntk_train(x, y, x_train, y_train, max_epochs, batch_size, trainer,
               evaluator)
コード例 #28
0
ファイル: learner_test.py プロジェクト: AllanYiin/CNTK
        ((0.2, 0), [0.2, 0.2, 0.2, 0.2], 0),
        (([0.2,0.4], 0, 5), [0.2]*5+[0.4]*20, 0),
        (([(3,0.2),(2,0.4),(1,0.8)], 0, 5), [0.2]*15+[0.4]*10+[0.8]*20, 0),
        ]

MOMENTUM_SCHEDULE_PARAMS = [
        ((0.2,), [0.2]),
        ((0.2,), [0.2, 0.2, 0.2, 0.2]),
        (([0.2,0.4], 5), [0.2]*5+[0.4]*20),
        (([(3,0.2),(2,0.4),(1,0.8)], 5), [0.2]*15+[0.4]*10+[0.8]*20),
        ]

LEARNER_LAMBDAS = [
    lambda params: C.adadelta(params),
    lambda params: C.adagrad(params, lr=learning_parameter_schedule(1)),
    lambda params: C.adam(params, lr=learning_parameter_schedule(1), momentum=C.momentum_schedule(0.9)),
    lambda params: C.fsadagrad(params, lr=learning_parameter_schedule(1), momentum=C.momentum_schedule(0.9)),
    lambda params: C.nesterov(params, lr=learning_parameter_schedule(1), momentum=C.momentum_schedule(0.9)),
    lambda params: C.rmsprop(params, lr=learning_parameter_schedule(1), gamma=0.1, inc=3.0, dec=0.1, max=np.inf, min=1e-8),
    lambda params: C.sgd(params, lr=learning_parameter_schedule(1)),
    lambda params: C.momentum_sgd(params, lr=learning_parameter_schedule(1), momentum=C.momentum_schedule(0.9))]

@pytest.mark.parametrize("params, expectation, minibatch_size", LR_SCHEDULE_PARAMS_LEGACY)
def test_learning_rate_schedule(params, expectation, minibatch_size):
    l = learning_rate_schedule(*params)
    assert l.minibatch_size == minibatch_size
    assert [l[i] for i in range(len(expectation))] == expectation

@pytest.mark.parametrize("params, expectation, minibatch_size", LR_SCHEDULE_PARAMS)
def test_learning_parameter_schedule(params, expectation, minibatch_size):
    l = learning_parameter_schedule(*params)
コード例 #29
0
    D_real = dcgan_discriminator(x_real)
    D_fake = D_real.clone(method="share",
                          substitutions={x_real.output: G_fake.output})

    #
    # loss function
    #
    G_loss = -C.log(D_fake)
    D_loss = -(C.log(D_real) + C.log(1.0 - D_fake))

    #
    # optimizer
    #
    G_learner = C.adam(G_fake.parameters,
                       lr=C.learning_parameter_schedule_per_sample(1e-4),
                       momentum=0.5,
                       gradient_clipping_threshold_per_sample=minibatch_size,
                       gradient_clipping_with_truncation=True)
    D_learner = C.adam(D_real.parameters,
                       lr=C.learning_parameter_schedule_per_sample(1e-4),
                       momentum=0.5,
                       gradient_clipping_threshold_per_sample=minibatch_size,
                       gradient_clipping_with_truncation=True)
    G_progress_printer = C.logging.ProgressPrinter(tag="Generator")
    D_progress_printer = C.logging.ProgressPrinter(tag="Discriminator")

    if not os.path.exists("./dcgan_image"):
        os.mkdir("./dcgan_image")

    G_trainer = C.Trainer(G_fake, (G_loss, None), [G_learner],
                          [G_progress_printer])
コード例 #30
0
ファイル: learner_test.py プロジェクト: junaidnaseer/CNTK
        ((0.2, 0), [0.2, 0.2, 0.2, 0.2], 0),
        (([0.2,0.4], 0, 5), [0.2]*5+[0.4]*20, 0),
        (([(3,0.2),(2,0.4),(1,0.8)], 0, 5), [0.2]*15+[0.4]*10+[0.8]*20, 0),
        ]

MOMENTUM_SCHEDULE_PARAMS = [
        ((0.2,), [0.2]),
        ((0.2,), [0.2, 0.2, 0.2, 0.2]),
        (([0.2,0.4], 5), [0.2]*5+[0.4]*20),
        (([(3,0.2),(2,0.4),(1,0.8)], 5), [0.2]*15+[0.4]*10+[0.8]*20),
        ]

LEARNER_LAMBDAS = [
    lambda params: C.adadelta(params),
    lambda params: C.adagrad(params, lr=learning_rate_schedule(1, UnitType.minibatch)),
    lambda params: C.adam(params, lr=learning_rate_schedule(1, UnitType.minibatch), momentum=C.momentum_schedule(0.9)),
    lambda params: C.fsadagrad(params, lr=learning_rate_schedule(1, UnitType.minibatch), momentum=C.momentum_schedule(0.9)),
    lambda params: C.nesterov(params, lr=learning_rate_schedule(1, UnitType.minibatch), momentum=C.momentum_schedule(0.9)),
    lambda params: C.rmsprop(params, lr=learning_rate_schedule(1, UnitType.minibatch), gamma=0.1, inc=3.0, dec=0.1, max=np.inf, min=1e-8),
    lambda params: C.sgd(params, lr=learning_rate_schedule(1, UnitType.minibatch)),
    lambda params: C.momentum_sgd(params, lr=learning_rate_schedule(1, UnitType.minibatch), momentum=C.momentum_schedule(0.9))]

@pytest.mark.parametrize("params, expectation, minibatch_size", LR_SCHEDULE_PARAMS_LEGACY)
def test_learning_rate_schedule(params, expectation, minibatch_size):
    l = learning_rate_schedule(*params)
    assert l.minibatch_size == minibatch_size
    assert [l[i] for i in range(len(expectation))] == expectation

@pytest.mark.parametrize("params, expectation, minibatch_size", LR_SCHEDULE_PARAMS)
def test_learning_parameter_schedule(params, expectation, minibatch_size):
    l = learning_parameter_schedule(*params)
コード例 #31
0
    result = np.zeros((3, 256, 256))
    result[0, :, :] = x * 100
    result[1, :, :] = out[0, :, :]
    result[2, :, :] = out[1, :, :]
    result = np.transpose(result, (2, 0, 1))
    result = np.transpose(result, (2, 0, 1))
    imsave("img_result.png", lab2rgb(result))
    imsave("img_gray_version.png", rgb2gray(lab2rgb(result)))


if __name__ == '__main__':
    features, labels = image_processing()
    input_var = input_variable((1, image_size, image_size))
    label_var = input_variable((2, image_size, image_size))

    z = create_model(input_var)

    loss = mse(z, label_var)
    ev = mse(z, label_var)

    lr_rate = [0.001]
    lr_per_minibatch = c.learning_parameter_schedule(lr_rate, epoch_size=1)
    progress_printer = c.logging.ProgressPrinter()
    learner = c.adam(z.parameters, lr_per_minibatch, momentum=0.75)
    trainer = c.Trainer(z, (loss, ev), [learner], progress_printer)

    cntk.logging.log_number_of_parameters(z)

    learn()
    colorize('test.jpg')
コード例 #32
0
ファイル: learner_test.py プロジェクト: junaidnaseer/CNTK
def test_learner_init():
    i = C.input_variable(shape=(1,), needs_gradient=True, name='a')
    w = parameter(shape=(1,))

    res = i * w

    #test new API: learning_parameter_schedule

    #explicitly specify reference minibatch size and learning rate is in number:
    learner = sgd(res.parameters, lr=0.1, minibatch_size = 25)
    assert learner.is_compatible_mode() == False
    assert learner.minibatch_size == 25 #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == 25
    assert learner.learning_rate() == 0.1

    #no explicitly specification of reference minibatch size and learning rate is in number:
    learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1))
    assert learner.is_compatible_mode() == False
    assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE
    assert learner.learning_rate() == 0.1


    learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1, 20), minibatch_size = 25)
    assert learner.is_compatible_mode() == False
    assert learner.minibatch_size == 25 #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == 20
    assert learner.learning_rate() == 0.1


    learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1, 20))
    assert learner.is_compatible_mode() == False
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == 20
    assert learner.learning_rate() == 0.1

    #no explicitly specification of reference minibatch size and learning rate is in number:
    learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1))
    assert learner.is_compatible_mode() == False
    assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE
    assert learner.learning_rate() == 0.1


    #no explicitly specification of reference minibatch size and learning rate is in number:
    learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1), minibatch_size=C.learners.IGNORE)
    assert learner.is_compatible_mode() == True
    assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE
    assert learner.learning_rate() == 0.1


    learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1, 20), minibatch_size=C.learners.IGNORE)
    assert learner.is_compatible_mode() == True
    assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == 20
    assert learner.learning_rate() == 0.1

    #no explicitly specification of reference minibatch size and learning rate is in number:
    learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1), minibatch_size=C.learners.IGNORE)
    assert learner.is_compatible_mode() == True
    assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE
    assert learner.learning_rate() == 0.1

    mysgd = C.sgd(parameters=res.parameters, lr=0.4, minibatch_size=32)
    assert mysgd.minibatch_size == 32
    assert mysgd._learning_rate_schedule.minibatch_size == 32
    assert mysgd.learning_rate() == 0.4

    mymomentum = C.momentum_sgd(parameters=res.parameters, lr=0.4, momentum=0.9, minibatch_size=32)
    assert mymomentum.minibatch_size == 32
    assert mymomentum._learning_rate_schedule.minibatch_size == 32
    assert mymomentum.learning_rate() == 0.4

    myadadelta = C.adadelta(parameters=res.parameters, lr=0.4, minibatch_size=32)
    assert myadadelta.minibatch_size == 32
    assert myadadelta._learning_rate_schedule.minibatch_size == 32
    assert myadadelta.learning_rate() == 0.4

    myadam = C.adam(parameters=res.parameters, lr=0.4, momentum=0.9, variance_momentum=0.9, minibatch_size=32)
    assert myadam.minibatch_size == 32
    assert myadam._learning_rate_schedule.minibatch_size == 32
    assert myadam.learning_rate() == 0.4

    myadagrad = C.adagrad(parameters=res.parameters, lr=0.4, minibatch_size=32)
    assert myadagrad.minibatch_size == 32
    assert myadagrad._learning_rate_schedule.minibatch_size == 32
    assert myadagrad.learning_rate() == 0.4

    myfsadagrad = C.fsadagrad(parameters=res.parameters, lr=0.4, momentum=0.9, variance_momentum=0.9,
                              minibatch_size=32)
    assert myfsadagrad.minibatch_size == 32
    assert myfsadagrad._learning_rate_schedule.minibatch_size == 32
    assert myfsadagrad.learning_rate() == 0.4

    mynesterov = C.nesterov(parameters=res.parameters, lr=0.4, momentum=0.9, minibatch_size=32)
    assert mynesterov.minibatch_size == 32
    assert mynesterov._learning_rate_schedule.minibatch_size == 32
    assert mynesterov.learning_rate() == 0.4

    myrmsrop = C.rmsprop(parameters=res.parameters, lr=0.4, gamma=0.5, inc=1.2, dec=0.7, max=10, min=1e-8,
                         minibatch_size=32)
    assert myrmsrop.minibatch_size == 32
    assert myrmsrop._learning_rate_schedule.minibatch_size == 32
    assert myrmsrop.learning_rate() == 0.4

    mysgd = C.sgd(parameters=res.parameters, lr=[0.4, 0.1, 0.001], minibatch_size=32, epoch_size=512)
    assert mysgd.minibatch_size == 32
    assert mysgd._learning_rate_schedule.minibatch_size == 32
    assert mysgd._learning_rate_schedule[0] == 0.4
    assert mysgd._learning_rate_schedule[512] == 0.1
    assert mysgd._learning_rate_schedule[512 * 2] == 0.001

    mymomentum = C.momentum_sgd(parameters=res.parameters, lr=[0.4, 0.1, 0.001], momentum=[0.9],
                                minibatch_size=32, epoch_size=512)
    assert mymomentum.minibatch_size == 32
    assert mymomentum._learning_rate_schedule.minibatch_size == 32
    assert mymomentum._learning_rate_schedule[0] == 0.4
    assert mymomentum._learning_rate_schedule[512] == 0.1
    assert mymomentum._learning_rate_schedule[512 * 2] == 0.001


    myadadelta = C.adadelta(parameters=res.parameters, lr=[0.4, 0.1, 0.001],
                            minibatch_size=32, epoch_size=512)
    assert myadadelta.minibatch_size == 32
    assert myadadelta._learning_rate_schedule.minibatch_size == 32
    assert myadadelta._learning_rate_schedule[0] == 0.4
    assert myadadelta._learning_rate_schedule[512] == 0.1
    assert myadadelta._learning_rate_schedule[512 * 2] == 0.001

    myadam = C.adam(parameters=res.parameters, lr=[0.4, 0.1, 0.001], momentum=[0.9, 0.1, 0.001], variance_momentum=[0.9],
                    minibatch_size=32, epoch_size=512)
    assert myadam.minibatch_size == 32
    assert myadam._learning_rate_schedule.minibatch_size == 32
    assert myadam._learning_rate_schedule[0] == 0.4
    assert myadam._learning_rate_schedule[512] == 0.1
    assert myadam._learning_rate_schedule[512 * 2] == 0.001

    myadagrad = C.adagrad(parameters=res.parameters, lr=[0.4, 0.1, 0.001], minibatch_size=32, epoch_size=512)
    assert myadagrad.minibatch_size == 32
    assert myadagrad._learning_rate_schedule.minibatch_size == 32
    assert myadagrad._learning_rate_schedule[0] == 0.4
    assert myadagrad._learning_rate_schedule[512] == 0.1
    assert myadagrad._learning_rate_schedule[512 * 2] == 0.001

    myfsadagrad = C.fsadagrad(parameters=res.parameters, lr=[0.4, 0.1, 0.001], momentum=[0.9],
                              variance_momentum=[0.9],
                              minibatch_size=32, epoch_size=512)
    assert myadagrad.minibatch_size == 32
    assert myadagrad._learning_rate_schedule.minibatch_size == 32
    assert myadagrad._learning_rate_schedule[0] == 0.4
    assert myadagrad._learning_rate_schedule[512] == 0.1
    assert myadagrad._learning_rate_schedule[512 * 2] == 0.001

    mynesterov = C.nesterov(parameters=res.parameters, lr=[0.4, 0.1, 0.001], momentum=[0.9],
                            minibatch_size=32, epoch_size=512)
    assert mynesterov.minibatch_size == 32
    assert mynesterov._learning_rate_schedule.minibatch_size == 32
    assert mynesterov._learning_rate_schedule[0] == 0.4
    assert mynesterov._learning_rate_schedule[512] == 0.1
    assert mynesterov._learning_rate_schedule[512 * 2] == 0.001

    myrmsrop = C.rmsprop(parameters=res.parameters, lr=[0.4, 0.1, 0.001], gamma=0.5, inc=1.2, dec=0.7, max=10,
                         min=1e-8,
                         minibatch_size=32, epoch_size=512)
    assert myrmsrop.minibatch_size == 32
    assert myrmsrop._learning_rate_schedule.minibatch_size == 32
    assert myrmsrop._learning_rate_schedule[0] == 0.4
    assert myrmsrop._learning_rate_schedule[512] == 0.1
    assert myrmsrop._learning_rate_schedule[512 * 2] == 0.001

    learner_parameter = learner.parameters
    from cntk.variables import Parameter
    param = learner_parameter[0]
    assert isinstance(param, Parameter)

    unit_gain_value = C.default_unit_gain_value()
    assert unit_gain_value

    momentum = C.momentum_schedule(0.999, minibatch_size=1)
    lr_per_sample = learning_parameter_schedule(0.1, minibatch_size = 1)
    C.momentum_sgd(res.parameters, lr_per_sample, momentum)
    C.momentum_sgd(res.parameters, lr_per_sample, momentum, unit_gain_value)
    C.momentum_sgd(res.parameters, lr_per_sample, momentum, unit_gain=unit_gain_value)

    C.set_default_unit_gain_value(False)
    unit_gain_value = C.default_unit_gain_value()
    assert not unit_gain_value

    lr_per_sample = learning_parameter_schedule([0.1, 0.2], minibatch_size = 1)
    C.nesterov(res.parameters, lr=lr_per_sample, momentum=momentum)
    C.nesterov(res.parameters, lr_per_sample, momentum, unit_gain_value)
    C.nesterov(res.parameters, lr=lr_per_sample, momentum=momentum, unit_gain=unit_gain_value)

    lr_per_sample = learning_parameter_schedule([0.1]*3 +[0.2]*2 +[0.3], minibatch_size=1)
    C.adagrad(res.parameters, lr=lr_per_sample, need_ave_multiplier=True)

    C.set_default_unit_gain_value(True)
    unit_gain_value = C.default_unit_gain_value()
    assert unit_gain_value

    lr_per_sample = learning_parameter_schedule([(3,0.1), (2, 0.2), (1, 0.3)], minibatch_size=1)
    C.fsadagrad(res.parameters, lr=lr_per_sample, momentum=momentum)
    C.fsadagrad(res.parameters, lr_per_sample, momentum, unit_gain_value)
    C.fsadagrad(res.parameters, lr=lr_per_sample, momentum=momentum, unit_gain=unit_gain_value)

    gamma, inc, dec, max, min = [0.5, 1.2, 0.7, 10, 1e-8]
    lr_per_sample = learning_parameter_schedule([0.1, 0.2], minibatch_size = 1, epoch_size = 100)
    C.rmsprop(res.parameters, lr_per_sample, gamma, inc, dec, max, min, True)

    C.adadelta(res.parameters, lr_per_sample)
コード例 #33
0
    psi = h_prime(C.dot(w, z_prev)+b) * w
    det_jacob = C.abs(1 + C.dot(u, psi))

    sum_log_det_jacob += C.log(EPS + det_jacob)
    z_prev = z_prev + u * h(C.dot(w, z_prev)+b)

z_k = z_prev
log_q_k = C.log(base_dist.pdf(z_0)) - sum_log_det_jacob
log_p = C.log(EPS + true_density(z_k))

kl = C.reduce_mean(log_q_k - log_p)
#%%
lr = 1
lr_schedule = C.learning_parameter_schedule(lr)
learner = C.adam(kl.parameters, lr_schedule, 0.9)
trainer = C.Trainer(kl, (kl, None), learner)

#%%
for i in range(1, 2000 + 1):
    s = base_dist.sample(500).astype(np.float32)
    trainer.train_minibatch({kl.arguments[0]:s})
    if i % 100 == 0:
        print(trainer.previous_minibatch_loss_average)
    # if i % 500 == 0:
    #     v = z_k.eval({z_k.arguments[0]:s})
    #     plt.scatter(v[:, 0], v[:, 1], alpha=0.7)
    #     plt.show()

v = z_k.eval({z_k.arguments[0]:s})
plt.scatter(v[:, 0], v[:, 1], alpha=0.5, c='green')
コード例 #34
0
       (1, 0.8)], UnitType.sample, 5), [0.2] * 15 + [0.4] * 10 + [0.8] * 20),
]

MOMENTUM_SCHEDULE_PARAMS = [
    ((0.2, ), [0.2]),
    ((0.2, ), [0.2, 0.2, 0.2, 0.2]),
    (([0.2, 0.4], 5), [0.2] * 5 + [0.4] * 20),
    (([(3, 0.2), (2, 0.4),
       (1, 0.8)], 5), [0.2] * 15 + [0.4] * 10 + [0.8] * 20),
]

LEARNER_LAMBDAS = [
    lambda params: C.adadelta(params), lambda params: C.adagrad(
        params, lr=learning_rate_schedule(1, UnitType.minibatch)),
    lambda params: C.adam(params,
                          lr=learning_rate_schedule(1, UnitType.minibatch),
                          momentum=C.momentum_schedule(0.9)),
    lambda params: C.fsadagrad(params,
                               lr=learning_rate_schedule(
                                   1, UnitType.minibatch),
                               momentum=C.momentum_schedule(0.9)),
    lambda params: C.nesterov(params,
                              lr=learning_rate_schedule(1, UnitType.minibatch),
                              momentum=C.momentum_schedule(0.9)),
    lambda params: C.rmsprop(params,
                             lr=learning_rate_schedule(1, UnitType.minibatch),
                             gamma=0.1,
                             inc=3.0,
                             dec=0.1,
                             max=np.inf,
                             min=1e-8),
コード例 #35
0
    def driver(self):
        np.random.seed(0)
        # Define the data dimensions
        image_shape = (1, 28, 28)
        input_dim = int(np.prod(image_shape, dtype=int))
        output_dim = 10
        num_train_samples = 60000
        num_test_samples = 10000
        # The local path where the training and test data might be found or will be downloaded to.
        training_data_path = os.path.join(os.getcwd(), "MNIST_data",
                                          "Train-28x28_cntk_text.txt")
        testing_data_path = os.path.join(os.getcwd(), "MNIST_data",
                                         "Test-28x28_cntk_text.txt")
        # Download the data if they don't already exist
        url_train_image = "train-images-idx3-ubyte.gz"
        url_train_labels = "train-labels-idx1-ubyte.gz"
        if not os.path.exists(training_data_path):
            url_train_image = "train-images-idx3-ubyte.gz"
            url_train_labels = "train-labels-idx1-ubyte.gz"
        print("Loading training data")
        saved_data_dir = os.path.join(os.getcwd(), "MNIST_data")
        train = self.load_mnist_data(url_train_image,
                                     url_train_labels,
                                     num_train_samples,
                                     local_data_dir=saved_data_dir)
        print("Writing training data text file...")
        self.save_as_txt(training_data_path, train)
        print("[Done]")
        url_test_image = "t10k-images-idx3-ubyte.gz"
        url_test_labels = "t10k-labels-idx1-ubyte.gz"
        if not os.path.exists(testing_data_path):
            url_test_image = "t10k-images-idx3-ubyte.gz"
            url_test_labels = "t10k-labels-idx1-ubyte.gz"
        print("Loading testing data")
        saved_data_dir = os.path.join(os.getcwd(), "MNIST_data2")
        test = self.load_mnist_data(url_test_image, url_test_labels,
                                    num_test_samples, saved_data_dir)
        print("Writing testing data text file...")
        self.save_as_txt(testing_data_path, test)
        print("[Done]")

        feature_stream_name = 'features'
        labels_stream_name = 'labels'

        # Convert to CNTK MinibatchSource
        # original as below deprecated------------
        #train_minibatch_source = cntk.text_format_minibatch_source(training_data_path, [
        #cntk.StreamConfiguration(feature_stream_name, input_dim),
        #cntk.StreamConfiguration(labels_stream_name, output_dim)])
        #------------------------------------------------------------------
        train_minibatch_source = MinibatchSource(
            CTFDeserializer(
                training_data_path,
                StreamDefs(features=StreamDef(field='features',
                                              shape=input_dim,
                                              is_sparse=False),
                           labels=StreamDef(field='labels',
                                            shape=output_dim,
                                            is_sparse=False))))
        training_features = train_minibatch_source[feature_stream_name]
        training_labels = train_minibatch_source[labels_stream_name]
        print("Training data from file %s successfully read." %
              training_data_path)

        #test_minibatch_source = cntk.text_format_minibatch_source(testing_data_path, [
        #cntk.StreamConfiguration(feature_stream_name, input_dim),
        #cntk.StreamConfiguration(labels_stream_name, output_dim)])
        test_minibatch_source = MinibatchSource(
            CTFDeserializer(
                testing_data_path,
                StreamDefs(features=StreamDef(field='features',
                                              shape=input_dim,
                                              is_sparse=False),
                           labels=StreamDef(field='labels',
                                            shape=output_dim,
                                            is_sparse=False))))
        test_features = test_minibatch_source[feature_stream_name]
        test_labels = test_minibatch_source[labels_stream_name]
        print("Test data from file %s successfully read." % testing_data_path)

        # Define the input to the neural network
        input_vars = cntk.ops.input_variable(image_shape, np.float32)
        # Create the convolutional neural network
        output = self.create_convolutional_neural_network(input_vars,
                                                          output_dim,
                                                          dropout_prob=0.5)
        #'''
        #----------------------
        #Setting up the trainer
        #----------------------
        #'''
        # Define the label as the other input parameter of the trainer
        labels = cntk.ops.input_variable(output_dim, np.float32)
        # Initialize the parameters for the trainer
        train_minibatch_size = 50
        learning_rate = 1e-4
        momentum = 0.9
        # Define the loss function
        #loss = cntk.ops.cross_entropy_with_softmax(output, labels)
        loss = cntk.cross_entropy_with_softmax(output, labels)
        # Define the function that calculates classification error
        #label_error = cntk.ops.classification_error(output, labels)
        label_error = cntk.classification_error(output, labels)
        # Instantiate the trainer object to drive the model training
        #learner = cntk.adam_sgd(output.parameters, learning_rate, momentum)
        learner = cntk.adam(
            output.parameters,
            learning_rate_schedule(learning_rate, UnitType.sample),
            momentum_schedule(momentum))
        trainer = cntk.Trainer(output, (loss, label_error), [learner])
        #'''
        #-----------------------------------------
        #Training the Convolutional Neural Network
        #-----------------------------------------
        #'''
        num_training_epoch = 1
        training_progress_output_freq = 100

        for epoch in range(num_training_epoch):
            sample_count = 0
            num_minibatch = 0
            # loop over minibatches in the epoch
            while sample_count < num_train_samples:
                minibatch = train_minibatch_source.next_minibatch(
                    min(train_minibatch_size,
                        num_train_samples - sample_count))
                # Specify the mapping of input variables in the model to actual minibatch data to be trained with
                data = {
                    input_vars: minibatch[training_features],
                    labels: minibatch[training_labels]
                }
                trainer.train_minibatch(data)
                sample_count += data[labels].num_samples
                num_minibatch += 1
                #Print the training progress data
                if num_minibatch % training_progress_output_freq == 0:
                    #training_loss = cntk.get_train_loss(trainer)
                    training_loss = trainer.previous_minibatch_loss_average
                    #eval_error = cntk.get_train_eval_criterion(trainer)
                    eval_error = trainer.previous_minibatch_evaluation_average
                    print(
                        "Epoch %d | # of Samples: %6d | Loss: %.6f | Error: %.6f"
                        % (epoch, sample_count, training_loss, eval_error))

        print("Training Completed.", end="\n\n")

        #'''
        #-------------------
        #Classification Test
        #--------------------
        #'''

        test_minibatch_size = 1000
        sample_count = 0
        test_results = []

        while sample_count < num_test_samples:
            minibatch = test_minibatch_source.next_minibatch(
                min(test_minibatch_size, num_test_samples - sample_count))
            # Specify the mapping of input variables in the model to actual minibatch data to be tested with
            data = {
                input_vars: minibatch[test_features],
                labels: minibatch[test_labels]
            }
            eval_error = trainer.test_minibatch(data)
            test_results.append(eval_error)
            sample_count += data[labels].num_samples
        # Printing the average of evaluation errors of all test minibatches
        print("Average errors of all test minibatches: %.3f%%" %
              (float(np.mean(test_results, dtype=float)) * 100))
        a = 5
コード例 #36
0
# loss = (mkld)

# _q_prime = C.tanh(q)
# _mu = C.reduce_mean(_q_prime, axis=C.Axis.default_batch_axis())
# _sigma = C.reduce_mean(C.square(_q_prime-_mu), axis=C.Axis.default_batch_axis())
# loss += C.reduce_mean(C.square(_mu)) + C.reduce_mean(C.square(_sigma-0.615))

# # _log_mu = C.reduce_mean(C.log(C.abs(q)), axis=C.Axis.default_batch_axis())
# # loss += C.reduce_mean(C.square(_log_mu+0.57))

from IPython import embed;embed()
exit()


lr_rate = 1e-3
learner = C.adam(loss.parameters, C.learning_parameter_schedule_per_sample(lr_rate), C.momentum_schedule(0.99))
trainer = C.Trainer(loss, (loss, None), [learner])

for i in tqdm(range(10000)):
    # v = np.random.uniform(size=(1,2))
    v = datasets.make_moons(n_samples=1000, noise=.05)[0].astype(np.float32)
    trainer.train_minibatch({loss.arguments[0]:v})

    # from IPython import embed;embed()
    if i%100 == 0:
        print('\n',trainer.previous_minibatch_loss_average)

    if len(bn) > 0: # batch norm
        result = C.combine(bn).eval({loss.arguments[0]:v})
        result = list(result.values())
        momentum = C.Constant(0.9)