Ejemplo n.º 1
0
def create_criterion(network):
    '''Create the criterion for model'''
    model, label1, label2 = network['model'], network['row_label'], network['col_label']
    label1_ce = C.cross_entropy_with_softmax(model.outputs[0], label1)
    label2_ce = C.cross_entropy_with_softmax(model.outputs[1], label2)
    label1_pe = C.classification_error(model.outputs[0], label1)
    label2_pe = C.classification_error(model.outputs[1], label2)
    label_ce = label1_ce + label2_ce
    label_pe = label1_pe + label2_pe
    return (label_ce, label_pe)
def create_resnet_network(network_name, fp16):
    # Input variables denoting the features and label data
    input_var = C.input_variable((num_channels, image_height, image_width))
    label_var = C.input_variable((num_classes))

    dtype = np.float16 if fp16 else np.float32
    if fp16:
        graph_input = C.cast(input_var, dtype=np.float16)
        graph_label = C.cast(label_var, dtype=np.float16)
    else:
        graph_input = input_var
        graph_label = label_var

    with C.default_options(dtype=dtype):
        stride1x1 = (1, 1)
        stride3x3 = (2, 2)

        # create model, and configure learning parameters
        if network_name == 'resnet18':
            z = create_imagenet_model_basic(graph_input, [2, 1, 1, 2], num_classes)
        elif network_name == 'resnet34':
            z = create_imagenet_model_basic(graph_input, [3, 3, 5, 2], num_classes)
        elif network_name == 'resnet50':
            z = create_imagenet_model_bottleneck(graph_input, [2, 3, 5, 2], num_classes, stride1x1, stride3x3)
        elif network_name == 'resnet101':
            z = create_imagenet_model_bottleneck(graph_input, [2, 3, 22, 2], num_classes, stride1x1, stride3x3)
        elif network_name == 'resnet152':
            z = create_imagenet_model_bottleneck(graph_input, [2, 7, 35, 2], num_classes, stride1x1, stride3x3)
        else:
            return RuntimeError("Unknown model name!")

        # loss and metric
        ce = cross_entropy_with_softmax(z, graph_label)
        errs = classification_error(z, graph_label, topN=1)
        top5Errs = classification_error(z, graph_label, topN=5)

    if fp16:
        ce = C.cast(ce, dtype=np.float32)
        errs = C.cast(errs, dtype=np.float32)
        top5Errs = C.cast(top5Errs, dtype=np.float32)

    return {
        'name' : network_name,
        'feature': input_var,
        'label': label_var,
        'ce' : ce,
        'errs' : errs,
        'top5Errs' : top5Errs,
        'output': z
    }
Ejemplo n.º 3
0
 def criterion(input, labels):
     # criterion function must drop the <s> from the labels
     postprocessed_labels = sequence.slice(labels, 1, 0) # <s> A B C </s> --> A B C </s>
     z = model(input, postprocessed_labels)
     ce   = cross_entropy_with_softmax(z, postprocessed_labels)
     errs = classification_error      (z, postprocessed_labels)
     return (ce, errs)
Ejemplo n.º 4
0
def test_learner_logging():
    from cntk import Trainer
    from cntk.logging import ProgressPrinter
    from cntk import cross_entropy_with_softmax, classification_error

    features = C.input_variable(shape=(1,), needs_gradient=True, name='a')
    w_init = 1
    w = parameter(shape=(1,), init=w_init)
    z = features * w
    labels = C.input_variable(shape=(1,), name='b')
    ce = cross_entropy_with_softmax(z, labels)
    errs = classification_error(z, labels)

    writer = TestProgressWriter();
    lr_values = [0.3, 0.2, 0.1, 0]
    m_values = [0.6, 0.7, 0.8]
    learner = C.momentum_sgd(z.parameters,
                  learning_rate_schedule(lr_values, UnitType.sample, 1),
                  C.momentum_schedule(m_values, 1))
    trainer = Trainer(z, (ce, errs), [learner], writer)

    for i in range(10):
        trainer.train_minibatch({features: [[2.]], labels: [[1.]]})

    assert len(writer.log_output) == len(lr_values + m_values)

    values = [j for i in zip(lr_values,m_values) for j in i] + [0]

    for i in range(len(values)):
        assert (values[i] == writer.log_output[i])
Ejemplo n.º 5
0
 def criterion(input:InputSequence[C.layers.Tensor[input_vocab_dim]]
         ,labels:LabelSequence[C.layers.Tensor[label_vocab_dim]]):
     postprocessed_labels = C.sequence.slice(labels, 1, 0) # <s> A B C </s> --> A B C </s>
     z = model(input, postprocessed_labels)
     ce = C.cross_entropy_with_softmax(z, postprocessed_labels)
     errs = C.classification_error(z, postprocessed_labels)
     return (ce, errs)
def create_resnet_network(network_name):
    # Input variables denoting the features and label data
    input_var = C.input_variable((num_channels, image_height, image_width))
    label_var = C.input_variable((num_classes))

    # create model, and configure learning parameters 
    if network_name == 'resnet20': 
        z = create_cifar10_model(input_var, 3, num_classes)
    elif network_name == 'resnet110': 
        z = create_cifar10_model(input_var, 18, num_classes)
    else: 
        return RuntimeError("Unknown model name!")

    # loss and metric
    ce = cross_entropy_with_softmax(z, label_var)
    pe = classification_error(z, label_var)

    return {
        'name' : network_name,
        'feature': input_var,
        'label': label_var,
        'ce' : ce,
        'pe' : pe,
        'output': z
    }
Ejemplo n.º 7
0
def classification_error(output_vector, target_vector, name=''):
    '''
    This operation computes the prediction error. It finds the index of the highest 
    value in the output_vector and compares it to the actual ground truth label
    (the index of the hot bit in the target vector). The result is a scalar 
    (i.e., one by one matrix). This is often used as an evaluation criterion. 
    It cannot be used as a training criterion though since the gradient is not
    defined for it.
    
    Example:
        >>> C.eval(C.classification_error([1., 2., 3., 4.], [0., 0., 0., 1.]))
        #[0.]
        
        >>> C.eval(C.classification_error([1., 2., 3., 4.], [0., 0., 1., 0.]))
        #[1.]
    
    Args:
        output_vector: the output values from the network
        target_vector: it is one-hot vector where the hot bit corresponds to the label index
        name (str): the name of the node in the network            
    Returns:
        :class:`cntk.Function`
    '''
    from cntk import classification_error
    output_vector = sanitize_input(output_vector, get_data_type(target_vector))
    target_vector = sanitize_input(target_vector, get_data_type(output_vector))
    return classification_error(output_vector, target_vector, name).output()
Ejemplo n.º 8
0
def test_factor_dense_for_prediction():

    input_dim = 2
    num_output_classes = 2
    hidden_layer_dim = 50
    num_minibatches_to_train = 2000
    minibatch_size = 25
    learning_rate = 0.5

    input = C.input_variable(input_dim)
    label = C.input_variable(num_output_classes)

    z = _create_model_dense(input, input_dim, hidden_layer_dim, num_output_classes)

    loss = C.cross_entropy_with_softmax(z, label)
    eval_error = C.classification_error(z, label)

    # Instantiate the trainer object to drive the model training

    lr_schedule = C.learning_rate_schedule(learning_rate, C.UnitType.minibatch)
    learner = C.sgd(z.parameters, lr_schedule)
    trainer = C.Trainer(z, (loss, eval_error), [learner])


    # Run the trainer and perform model training
    training_progress_output_freq = 20
    plotdata = {"batchsize":[], "loss":[], "error":[]}


    for i in range(0, int(num_minibatches_to_train)):
        features, labels = _generate_random_data_sample(minibatch_size, input_dim, num_output_classes)
        # Specify the input variables mapping in the model to actual minibatch data for training
        trainer.train_minibatch({input : features, label : labels})
    
    # generate some data to predict
    features, labels = _generate_random_data_sample(10, 2, 2)

    # factor the model.
    newz = nc.factor_dense(z, projection_function=_get_rank_reduced_size, filter_function = _filter)
    original_out = C.softmax(z)
    factored_out = C.softmax(newz)

    original_labels_probs = original_out.eval({input : features})
    predicted_label_probs = factored_out.eval({input : features})
    
    original_prediction_percentage = _percentage_match(labels, original_labels_probs) 

    # reduced model should have at leat 50% match compared to the original
    # For the test, we reduced the training minibatches, thus the match is lower.
    assert(original_prediction_percentage * 0.5 <= _percentage_match(labels, predicted_label_probs))
Ejemplo n.º 9
0
def test_debug_multi_output():
    input_dim = 2
    num_output_classes = 2

    f_input = input_variable(input_dim, np.float32,
                             needs_gradient=True, name='features')

    p = parameter(shape=(input_dim,), init=10, name='p')

    comb = combine([f_input, p])

    ins = InStream(['n', 'n', 'n', 'n', 'n'])
    outs = OutStream()

    z = times(comb.outputs[0], comb.outputs[1], name='z')
    z = debug_model(z, ins, outs)

    l_input = input_variable(num_output_classes, np.float32, name='labels')
    loss = cross_entropy_with_softmax(z, l_input)
    eval_error = classification_error(z, l_input)

    _train(z, loss, eval_error,
           loss.find_by_name('features'),
           loss.find_by_name('labels'),
           num_output_classes, 1)

    # outs.written contains something like
    # =================================== forward  ===================================
    # Parameter('p', [], [2]) with uid 'Parameter4'
    # Input('features', [#, *], [2]) with uid 'Input3'
    # Times: Output('UserDefinedFunction12_Output_0', [#, *], [2]), Output('UserDefinedFunction15_Output_0', [], [2]) -> Output('z', [#, *], [2 x 2]) with uid 'Times21'
    # =================================== backward ===================================
    # Times: Output('UserDefinedFunction12_Output_0', [#, *], [2]), Output('UserDefinedFunction15_Output_0', [], [2]) -> Output('z', [#, *], [2 x 2]) with uid 'Times21'
    # Input('features', [#, *], [2]) with uid 'Input3'
    # Parameter('p', [], [2]) with uid 'Parameter4'   assert outs.written == out_stuff

    assert len(outs.written) == 8

    v_p = "Parameter('p', "
    v_i = "Input('features'"
    v_t = 'Times: '

    assert outs.written[0].startswith('=') and 'forward' in outs.written[0]
    line_1, line_2, line_3 = outs.written[1:4]

    assert outs.written[4].startswith('=') and 'backward' in outs.written[4]
    line_5, line_6, line_7 = outs.written[5:8]
    assert line_5.startswith(v_t)
    assert line_6.startswith(v_p) and line_7.startswith(v_i) or \
           line_6.startswith(v_i) and line_7.startswith(v_p)
Ejemplo n.º 10
0
def train(nonlinearity, num_hidden_layers, device_id,
          minibatch_size=10, num_samples=1000):
    from cntk.cntk_py import always_allow_setting_default_device
    always_allow_setting_default_device()
    C.try_set_default_device(cntk_device(device_id))
    np.random.seed(0)

    learning_rate = 0.5
    lr_schedule = C.learning_rate_schedule(learning_rate, C.UnitType.minibatch)

    hidden_layers_dim = 50

    inp = C.input_variable((input_dim), np.float32)
    label = C.input_variable((num_output_classes), np.float32)

    z = fully_connected_classifier_net(inp, num_output_classes, hidden_layers_dim,
                                       num_hidden_layers, nonlinearity)

    loss = C.cross_entropy_with_softmax(z, label)
    eval_error = C.classification_error(z, label)

    learner = C.sgd(z.parameters, lr_schedule)
    trainer = C.Trainer(z, (loss, eval_error), [learner])

    num_minibatches_to_train = int(num_samples / minibatch_size)

    training_progress_output_freq = 20

    losses = []
    errors = []

    for i in range(num_minibatches_to_train):
        features, labels = generate_random_data_sample(minibatch_size,
                                                       input_dim,
                                                       num_output_classes)

        # Specify the input variables mapping in the model to actual minibatch
        # data for training.
        trainer.train_minibatch({inp: features, label: labels},
                                device=cntk_device(device_id))

        batchsize, loss, error = print_training_progress(trainer, i,
                                                         training_progress_output_freq)

        if not (loss == "NA" or error == "NA"):
            losses.append(loss)
            errors.append(error)

    return losses, errors
Ejemplo n.º 11
0
def test_htk_deserializers():
    mbsize = 640
    epoch_size = 1000 * mbsize
    lr = [0.001]

    feature_dim = 33
    num_classes = 132
    context = 2

    os.chdir(data_path)

    features_file = "glob_0000.scp"
    labels_file = "glob_0000.mlf"
    label_mapping_file = "state.list"

    fd = HTKFeatureDeserializer(StreamDefs(
        amazing_features = StreamDef(shape=feature_dim, context=(context,context), scp=features_file)))

    ld = HTKMLFDeserializer(label_mapping_file, StreamDefs(
        awesome_labels = StreamDef(shape=num_classes, mlf=labels_file)))

    reader = MinibatchSource([fd,ld])

    features = C.input_variable(((2*context+1)*feature_dim))
    labels = C.input_variable((num_classes))

    model = Sequential([For(range(3), lambda : Recurrence(LSTM(256))),
                        Dense(num_classes)])
    z = model(features)
    ce = C.cross_entropy_with_softmax(z, labels)
    errs = C.classification_error    (z, labels)

    learner = C.adam_sgd(z.parameters,
                    lr=C.learning_rate_schedule(lr, C.UnitType.sample, epoch_size),
                    momentum=C.momentum_as_time_constant_schedule(1000),
                    low_memory=True,
                    gradient_clipping_threshold_per_sample=15, gradient_clipping_with_truncation=True)
    trainer = C.Trainer(z, (ce, errs), learner)

    input_map={ features: reader.streams.amazing_features, labels: reader.streams.awesome_labels }

    pp = C.ProgressPrinter(freq=0)
    # just run and verify it doesn't crash
    for i in range(3):
        mb_data = reader.next_minibatch(mbsize, input_map=input_map)
        trainer.train_minibatch(mb_data)
        pp.update_with_trainer(trainer, with_metric=True)
    assert True
    os.chdir(abs_path)
Ejemplo n.º 12
0
def create_resnet_network(network_name):
    # Input variables denoting the features and label data
    input_var = C.input_variable((num_channels, image_height, image_width))
    label_var = C.input_variable((num_classes))

    stride1x1 = (1, 1)
    stride3x3 = (2, 2)

    # create model, and configure learning parameters
    if network_name == 'resnet18':
        z = create_imagenet_model_basic(input_var, [2, 1, 1, 2], num_classes)
    elif network_name == 'resnet34':
        z = create_imagenet_model_basic(input_var, [3, 3, 5, 2], num_classes)
    elif network_name == 'resnet50':
        z = create_imagenet_model_bottleneck(input_var, [2, 3, 5, 2], num_classes, stride1x1, stride3x3)
    elif network_name == 'resnet101':
        z = create_imagenet_model_bottleneck(input_var, [2, 3, 22, 2], num_classes, stride1x1, stride3x3)
    elif network_name == 'resnet152':
        z = create_imagenet_model_bottleneck(input_var, [2, 7, 35, 2], num_classes, stride1x1, stride3x3)
    else:
        return RuntimeError("Unknown model name!")

    # loss and metric
    ce = cross_entropy_with_softmax(z, label_var)
    errs = classification_error(z, label_var, topN=1)
    top5Errs = classification_error(z, label_var, topN=5)

    return {
        'name' : network_name,
        'feature': input_var,
        'label': label_var,
        'ce' : ce,
        'errs' : errs,
        'top5Errs' : top5Errs,
        'output': z
    }
Ejemplo n.º 13
0
 def create_model(self, frame_mode=False):
     if frame_mode:
         self.feat = cntk.input_variable(shape=(feat_dim,))
         self.label = cntk.input_variable((label_dim,))
         
         net = cntk.layers.Sequential([cntk.layers.Dense(cell_dim), cntk.layers.Dense(label_dim)])
         self.output = net(self.feat)
     else:    
         #sequence mode
         self.feat = cntk.sequence.input_variable(shape=(feat_dim,))
         self.label = cntk.sequence.input_variable((label_dim,))
         
         net = cntk.layers.Sequential([cntk.layers.Recurrence(cntk.layers.LSTM(shape=label_dim, cell_shape=(cell_dim,)))])
         self.output = net(self.feat)
     
     self.ce = cntk.cross_entropy_with_softmax(self.output, self.label)
     self.err = cntk.classification_error(self.output, self.label)
Ejemplo n.º 14
0
def train_sequence_classifier():
    input_dim = 2000
    cell_dim = 25
    hidden_dim = 25
    embedding_dim = 50
    num_output_classes = 5

    # Input variables denoting the features and label data
    features = sequence.input_variable(shape=input_dim, is_sparse=True)
    label = input_variable(num_output_classes)

    # Instantiate the sequence classification model
    classifier_output = LSTM_sequence_classifier_net(
        features, num_output_classes, embedding_dim, hidden_dim, cell_dim)

    ce = cross_entropy_with_softmax(classifier_output, label)
    pe = classification_error(classifier_output, label)

    rel_path = ("../../../Tests/EndToEndTests/Text/" +
                "SequenceClassification/Data/Train.ctf")
    path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path)

    reader = create_reader(path, True, input_dim, num_output_classes)

    input_map = {
            features: reader.streams.features,
            label:    reader.streams.labels
    }

    lr_per_sample = learning_parameter_schedule_per_sample(0.0005)
    # Instantiate the trainer object to drive the model training
    progress_printer = ProgressPrinter(0)
    trainer = Trainer(classifier_output, (ce, pe),
                      sgd(classifier_output.parameters, lr=lr_per_sample),
                      progress_printer)

    # Get minibatches of sequences to train with and perform model training
    minibatch_size = 200

    for i in range(255):
        mb = reader.next_minibatch(minibatch_size, input_map=input_map)
        trainer.train_minibatch(mb)

    evaluation_average = float(trainer.previous_minibatch_evaluation_average)
    loss_average = float(trainer.previous_minibatch_loss_average)
    return evaluation_average, loss_average
Ejemplo n.º 15
0
def test_usermbsource_training(tmpdir, with_checkpoint_impl):
    input_dim = 1000
    num_output_classes = 5

    mbs = MyDataSource(input_dim, num_output_classes)
    # Using this for testing the UserMinibatchSource checkpointing
    if with_checkpoint_impl:
        MBS_CV_CLASS = MyDataSourceWithCheckpoint
    else:
        MBS_CV_CLASS = MyDataSource

    mbs_cv = MBS_CV_CLASS(input_dim, num_output_classes)

    from cntk import sequence, parameter, plus, cross_entropy_with_softmax, \
            classification_error, learning_parameter_schedule_per_sample, sgd, Trainer, \
            training_session, times

    feature = sequence.input_variable(shape=(input_dim,))
    label = C.input_variable(shape=(num_output_classes,))
    p = parameter(shape=(input_dim, num_output_classes), init=10)
    z = times(sequence.reduce_sum(feature), p, name='z')
    ce = cross_entropy_with_softmax(z, label)
    errs = classification_error(z, label)

    #having a large learning rate to prevent the model from converging earlier where not all the intended samples are fed
    #note that training session can end earlier if there is no updates
    lr_per_sample = learning_parameter_schedule_per_sample(0.3)
    learner = sgd(z.parameters, lr_per_sample)
    trainer = Trainer(z, (ce, errs), [learner])
    input_map = {
        feature: mbs.fsi,
        label: mbs.lsi
    }

    session = training_session(
        trainer=trainer, mb_source=mbs,
        model_inputs_to_streams=input_map,
        mb_size=4, max_samples=20,
        cv_config = C.CrossValidationConfig(minibatch_source=mbs_cv, max_samples=10,
            minibatch_size=2)
    )
    session.train()

    assert trainer.total_number_of_samples_seen == 20
    if with_checkpoint_impl:
        assert mbs_cv._restore_from_checkpoint_calls == 1
Ejemplo n.º 16
0
def ffnet(learner, trainer=None):
    inputs = 5
    outputs = 3
    layers = 2
    hidden_dimension = 3

    if trainer is None:
        # input variables denoting the features and label data
        features = C.input_variable((inputs), np.float32)
        label = C.input_variable((outputs), np.float32)

        # Instantiate the feedforward classification model
        my_model = Sequential ([
                        Dense(hidden_dimension, activation=C.sigmoid, init=C.glorot_uniform(seed=98052)),
                        Dense(outputs, init=C.glorot_uniform(seed=98052))])
        z = my_model(features)

        ce = C.cross_entropy_with_softmax(z, label)
        pe = C.classification_error(z, label)

        # Instantiate the trainer object to drive the model training
        progress_printer = ProgressPrinter(0)
        trainer = C.Trainer(z, (ce, pe), [learner(z.parameters)], [progress_printer])
    else:
        features = trainer.loss_function.arguments[0]
        label = trainer.loss_function.arguments[1]

    # Get minibatches of training data and perform model training
    minibatch_size = 25
    num_minibatches_to_train = 100

    aggregate_loss = 0.0
    for i in range(num_minibatches_to_train):
        train_features, labels = generate_random_data(minibatch_size, inputs, outputs)
        # Specify the mapping of input variables in the model to actual minibatch data to be trained with
        trainer.train_minibatch({features : train_features, label : labels})
        sample_count = trainer.previous_minibatch_sample_count
        aggregate_loss += trainer.previous_minibatch_loss_average * sample_count

    last_avg_error = aggregate_loss / trainer.total_number_of_samples_seen

    test_features, test_labels = generate_random_data(minibatch_size, inputs, outputs)
    avg_error = trainer.test_minibatch({features : test_features, label : test_labels})
    print(' error rate on an unseen minibatch: {}'.format(avg_error))
    return last_avg_error, avg_error, trainer
Ejemplo n.º 17
0
def create_binary_convolution_model():

    # Input variables denoting the features and label data
    feature_var = C.input((num_channels, image_height, image_width))
    label_var = C.input((num_classes))

    # apply model to input
    scaled_input = C.element_times(C.constant(0.00390625), feature_var)

    # first layer is ok to be full precision
    z = C.layers.Convolution((3, 3), 32, pad=True, activation=C.relu)(scaled_input)
    z = C.layers.MaxPooling((3,3), strides=(2,2))(z)

    z = C.layers.BatchNormalization(map_rank=1)(z)
    z = BinaryConvolution(z, (3,3), 128, channels=32, pad=True)
    z = C.layers.MaxPooling((3,3), strides=(2,2))(z)

    z = C.layers.BatchNormalization(map_rank=1)(z)
    z = BinaryConvolution(z, (3,3), 128, channels=128, pad=True)
    z = C.layers.MaxPooling((3,3), strides=(2,2))(z)

    z = C.layers.BatchNormalization(map_rank=1)(z)
    z = BinaryConvolution(z, (1,1), num_classes, channels=128, pad=True)
    z = C.layers.AveragePooling((z.shape[1], z.shape[2]))(z)
    z = C.reshape(z, (num_classes,))

    # Add binary regularization (ala Gang Hua)
    weight_sum = C.constant(0)
    for p in z.parameters:
        if (p.name == "filter"):
            weight_sum = C.plus(weight_sum, C.reduce_sum(C.minus(1, C.square(p))))
    bin_reg = C.element_times(.000005, weight_sum)

    # After the last layer, we need to apply a learnable scale
    SP = C.parameter(shape=z.shape, init=0.001)
    z = C.element_times(z, SP)

    # loss and metric
    ce = C.cross_entropy_with_softmax(z, label_var)
    ce = C.plus(ce, bin_reg)
    pe = C.classification_error(z, label_var)

    return C.combine([z, ce, pe])
Ejemplo n.º 18
0
def create_recurrent_network():
    # Input variables denoting the features and label data
    features = sequence.input_variable(((2*context+1)*feature_dim))
    labels = sequence.input_variable((num_classes))

    # create network
    model = Sequential([For(range(3), lambda : Recurrence(LSTM(256))),
                        Dense(num_classes)])
    z = model(features)
    ce = cross_entropy_with_softmax(z, labels)
    errs = classification_error    (z, labels)

    return {
        'feature': features,
        'label': labels,
        'ce' : ce,
        'errs' : errs,
        'output': z
    }
Ejemplo n.º 19
0
def ffnet(optimizer,  num_minibatches_to_train, learning_rate_func, lr_args, learner_kwargs):
    inputs = 2
    outputs = 2
    hidden_dimension = 50

    # input variables denoting the features and label data
    features = C.input_variable((inputs), np.float32)
    label = C.input_variable((outputs), np.float32)

    # Instantiate the feedforward classification model
    my_model = Sequential([
        Dense(hidden_dimension, activation=C.sigmoid,
              init=C.glorot_uniform(seed=SEED)),
        Dense(outputs, init=C.glorot_uniform(seed=SEED))])
    z = my_model(features)

    ce = C.cross_entropy_with_softmax(z, label)
    pe = C.classification_error(z, label)

    # Instantiate the trainer object to drive the model training
    lr= learning_rate_func(0.125, *lr_args)
    progress_printer = ProgressPrinter(0)
    learner = optimizer(z.parameters, lr) if optimizer != sgd else sgd(z.parameters, lr, **learner_kwargs)

    trainer = C.Trainer(z, (ce, pe), [learner], progress_printer)

    # Get minibatches of training data and perform model training
    minibatch_size = 25

    for i in range(num_minibatches_to_train):
        train_features, labels = generate_random_data(
            minibatch_size, inputs, outputs)
        # Specify the mapping of input variables in the model to actual
        # minibatch data to be trained with
        trainer.train_minibatch({features: train_features, label: labels})

    test_features, test_labels = generate_random_data(
        minibatch_size, inputs, outputs)
    avg_error = trainer.test_minibatch(
        {features: test_features, label: test_labels})
    print(' error rate on an unseen minibatch: {}'.format(avg_error))
    return z.parameters
def ffnet():
    inputs = 2
    outputs = 2
    layers = 2
    hidden_dimension = 50

    # input variables denoting the features and label data
    features = C.input_variable((inputs), np.float32)
    label = C.input_variable((outputs), np.float32)

    # Instantiate the feedforward classification model
    my_model = Sequential ([
                    Dense(hidden_dimension, activation=C.sigmoid),
                    Dense(outputs)])
    z = my_model(features)

    ce = C.cross_entropy_with_softmax(z, label)
    pe = C.classification_error(z, label)

    # Instantiate the trainer object to drive the model training
    lr_per_minibatch = C.learning_parameter_schedule(0.125)
    progress_printer = ProgressPrinter(0)
    trainer = C.Trainer(z, (ce, pe), [sgd(z.parameters, lr=lr_per_minibatch)], [progress_printer])

    # Get minibatches of training data and perform model training
    minibatch_size = 25
    num_minibatches_to_train = 1024

    aggregate_loss = 0.0
    for i in range(num_minibatches_to_train):
        train_features, labels = generate_random_data(minibatch_size, inputs, outputs)
        # Specify the mapping of input variables in the model to actual minibatch data to be trained with
        trainer.train_minibatch({features : train_features, label : labels})
        sample_count = trainer.previous_minibatch_sample_count
        aggregate_loss += trainer.previous_minibatch_loss_average * sample_count

    last_avg_error = aggregate_loss / trainer.total_number_of_samples_seen

    test_features, test_labels = generate_random_data(minibatch_size, inputs, outputs)
    avg_error = trainer.test_minibatch({features : test_features, label : test_labels})
    print(' error rate on an unseen minibatch: {}'.format(avg_error))
    return last_avg_error, avg_error
Ejemplo n.º 21
0
def test_udf_checkpointing(tmpdir):
    dev, w_value, c1_value, c2_value, op = build_test_function()

    label = C.constant(np.asarray([[1, 2], [3, 4]]).astype(np.float32))

    loss = C.cross_entropy_with_softmax(op, label)
    eval_error = C.classification_error(op, label)

    lr_schedule = C.learning_rate_schedule(0.5, C.UnitType.minibatch)
    learner = C.sgd(op.parameters, lr_schedule)
    trainer = C.Trainer(op, (loss, eval_error), [learner])

    trainer.train_minibatch({op.arguments[0]: np.random.random((2, 2)).astype(np.float32)}, device=dev)

    filepath = str(tmpdir / 'test_checkpointing.out')

    trainer.save_checkpoint(filepath, external_state={'test': 'test'})

    d = C.cntk_py.Dictionary.load(filepath)
    assert len(d.keys()) != 0
Ejemplo n.º 22
0
def ffnet():
    inputs = 2
    outputs = 2
    layers = 2
    hidden_dimension = 50

    # input variables denoting the features and label data
    features = C.input_variable((inputs), np.float32)
    label = C.input_variable((outputs), np.float32)

    # Instantiate the feedforward classification model
    my_model = Sequential ([
                    Dense(hidden_dimension, activation=C.sigmoid),
                    Dense(outputs)])
    z = my_model(features)

    ce = C.cross_entropy_with_softmax(z, label)
    pe = C.classification_error(z, label)

    # Instantiate the trainer object to drive the model training
    lr_per_minibatch = learning_rate_schedule(0.125, UnitType.minibatch)
    trainer = C.Trainer(z, ce, pe, [sgd(z.parameters, lr=lr_per_minibatch)])

    # Get minibatches of training data and perform model training
    minibatch_size = 25
    num_minibatches_to_train = 1024

    pp = ProgressPrinter(0)
    for i in range(num_minibatches_to_train):
        train_features, labels = generate_random_data(minibatch_size, inputs, outputs)
        # Specify the mapping of input variables in the model to actual minibatch data to be trained with
        trainer.train_minibatch({features : train_features, label : labels})
        pp.update_with_trainer(trainer)

    last_avg_error = pp.avg_loss_since_start()

    test_features, test_labels = generate_random_data(minibatch_size, inputs, outputs)
    avg_error = trainer.test_minibatch({features : test_features, label : test_labels})
    print(' error rate on an unseen minibatch: {}'.format(avg_error))
    return last_avg_error, avg_error
def create_resnet_network(network_name, fp16):
    # Input variables denoting the features and label data
    input_var = C.input_variable((num_channels, image_height, image_width))
    label_var = C.input_variable((num_classes))

    dtype = np.float16 if fp16 else np.float32
    if fp16:
        graph_input = C.cast(input_var, dtype=np.float16)
        graph_label = C.cast(label_var, dtype=np.float16)
    else:
        graph_input = input_var
        graph_label = label_var

    with C.default_options(dtype=dtype):
        # create model, and configure learning parameters
        if network_name == 'resnet20':
            z = create_cifar10_model(graph_input, 3, num_classes)
        elif network_name == 'resnet110':
            z = create_cifar10_model(graph_input, 18, num_classes)
        else:
            return RuntimeError("Unknown model name!")

        # loss and metric
        ce = cross_entropy_with_softmax(z, graph_label)
        pe = classification_error(z, graph_label)

    if fp16:
        ce = C.cast(ce, dtype=np.float32)
        pe = C.cast(pe, dtype=np.float32)

    return {
        'name' : network_name,
        'feature': input_var,
        'label': label_var,
        'ce' : ce,
        'pe' : pe,
        'output': z
    }
def create_conv_network():
    # Input variables denoting the features and label data
    feature_var = C.input_variable((num_channels, image_height, image_width))
    label_var = C.input_variable((num_classes))

    # apply model to input
    scaled_input = C.element_times(C.constant(0.00390625), feature_var)

    z = create_convnet_cifar10_model(num_classes)(scaled_input)

    # loss and metric
    ce = C.cross_entropy_with_softmax(z, label_var)
    pe = C.classification_error(z, label_var)

    C.logging.log_number_of_parameters(z) ; print()

    return {
        'feature': feature_var,
        'label': label_var,
        'ce' : ce,
        'pe' : pe,
        'output': z
    }
Ejemplo n.º 25
0
def train_and_evaluate(reader_train, reader_test, max_epochs, model_func):
    # Input variables denoting the features and label data
    input_var = C.input_variable((num_channels, image_height, image_width))
    label_var = C.input_variable((num_classes))

    # Normalize the input
    feature_scale = 1.0 / 256.0
    input_var_norm = C.element_times(feature_scale, input_var)
    
    # apply model to input
    z = model_func(input_var_norm, out_dims=10)

    #
    # Training action
    #

    # loss and metric
    ce = C.cross_entropy_with_softmax(z, label_var)
    pe = C.classification_error(z, label_var)

    # training config
    epoch_size     = 50000
    minibatch_size = 64

    # Set training parameters
    lr_per_minibatch       = C.learning_parameter_schedule([0.01]*10 + [0.003]*10 + [0.001], 
                                                       epoch_size = epoch_size)
    momentums              = C.momentum_schedule(0.9, minibatch_size = minibatch_size)
    l2_reg_weight          = 0.001
    
    # trainer object
    learner = C.momentum_sgd(z.parameters, 
                             lr = lr_per_minibatch, 
                             momentum = momentums, 
                             l2_regularization_weight=l2_reg_weight)
    progress_printer = C.logging.ProgressPrinter(tag='Training', num_epochs=max_epochs)
    trainer = C.Trainer(z, (ce, pe), [learner], [progress_printer])

    # define mapping from reader streams to network inputs
    input_map = {
        input_var: reader_train.streams.features,
        label_var: reader_train.streams.labels
    }

    C.logging.log_number_of_parameters(z) ; print()

    # perform model training
    batch_index = 0
    plot_data = {'batchindex':[], 'loss':[], 'error':[]}
    for epoch in range(max_epochs):       # loop over epochs
        sample_count = 0
        while sample_count < epoch_size:  # loop over minibatches in the epoch
            data = reader_train.next_minibatch(min(minibatch_size, epoch_size - sample_count), 
                                               input_map=input_map) # fetch minibatch.
            trainer.train_minibatch(data)                                   # update model with it

            sample_count += data[label_var].num_samples                     # count samples processed so far
            
            # For visualization...            
            plot_data['batchindex'].append(batch_index)
            plot_data['loss'].append(trainer.previous_minibatch_loss_average)
            plot_data['error'].append(trainer.previous_minibatch_evaluation_average)
            
            batch_index += 1
        trainer.summarize_training_progress()
        
    #
    # Evaluation action
    #
    epoch_size     = 10000
    minibatch_size = 16

    # process minibatches and evaluate the model
    metric_numer    = 0
    metric_denom    = 0
    sample_count    = 0
    minibatch_index = 0

    while sample_count < epoch_size:
        current_minibatch = min(minibatch_size, epoch_size - sample_count)

        # Fetch next test min batch.
        data = reader_test.next_minibatch(current_minibatch, input_map=input_map)

        # minibatch data to be trained with
        metric_numer += trainer.test_minibatch(data) * current_minibatch
        metric_denom += current_minibatch

        # Keep track of the number of samples processed so far.
        sample_count += data[label_var].num_samples
        minibatch_index += 1

    print("")
    print("Final Results: Minibatch[1-{}]: errs = {:0.1f}% * {}".format(minibatch_index+1, (metric_numer*100.0)/metric_denom, metric_denom))
    print("")
    
    # Visualize training result:
    window_width            = 32
    loss_cumsum             = np.cumsum(np.insert(plot_data['loss'], 0, 0)) 
    error_cumsum            = np.cumsum(np.insert(plot_data['error'], 0, 0)) 

    # Moving average.
    plot_data['batchindex'] = np.insert(plot_data['batchindex'], 0, 0)[window_width:]
    plot_data['avg_loss']   = (loss_cumsum[window_width:] - loss_cumsum[:-window_width]) / window_width
    plot_data['avg_error']  = (error_cumsum[window_width:] - error_cumsum[:-window_width]) / window_width
    
    plt.figure(1)
    plt.subplot(211)
    plt.plot(plot_data["batchindex"], plot_data["avg_loss"], 'b--')
    plt.xlabel('Minibatch number')
    plt.ylabel('Loss')
    plt.title('Minibatch run vs. Training loss ')

    plt.show()

    plt.subplot(212)
    plt.plot(plot_data["batchindex"], plot_data["avg_error"], 'r--')
    plt.xlabel('Minibatch number')
    plt.ylabel('Label Prediction Error')
    plt.title('Minibatch run vs. Label Prediction Error ')
    plt.show()
    
    return C.softmax(z)
Ejemplo n.º 26
0
 def criterion(x, y):
     z = model(normalize(x))
     ce   = cross_entropy_with_softmax(z, y)
     errs = classification_error      (z, y)
     return (ce, errs)
Ejemplo n.º 27
0
def conv3d_ucf11(train_reader, test_reader, max_epochs=30):
    # Replace 0 with 1 to get detailed log.
    set_computation_network_trace_level(0)

    # These values must match for both train and test reader.
    image_height       = train_reader.height
    image_width        = train_reader.width
    num_channels       = train_reader.channel_count
    sequence_length    = train_reader.sequence_length
    num_output_classes = train_reader.label_count

    # Input variables denoting the features and label data
    input_var = C.input_variable((num_channels, sequence_length, image_height, image_width), np.float32)
    label_var = C.input_variable(num_output_classes, np.float32)

    # Instantiate simple 3D Convolution network inspired by VGG network 
    # and http://vlg.cs.dartmouth.edu/c3d/c3d_video.pdf
    with C.default_options (activation=C.relu):
        z = C.layers.Sequential([
            C.layers.Convolution3D((3,3,3), 64, pad=True),
            C.layers.MaxPooling((1,2,2), (1,2,2)),
            C.layers.For(range(3), lambda i: [
                C.layers.Convolution3D((3,3,3), [96, 128, 128][i], pad=True),
                C.layers.Convolution3D((3,3,3), [96, 128, 128][i], pad=True),
                C.layers.MaxPooling((2,2,2), (2,2,2))
            ]),
            C.layers.For(range(2), lambda : [
                C.layers.Dense(1024), 
                C.layers.Dropout(0.5)
            ]),
            C.layers.Dense(num_output_classes, activation=None)
        ])(input_var)
    
    # loss and classification error.
    ce = C.cross_entropy_with_softmax(z, label_var)
    pe = C.classification_error(z, label_var)

    # training config
    train_epoch_size     = train_reader.size()
    train_minibatch_size = 2

    # Set learning parameters
    lr_per_sample          = [0.01]*10+[0.001]*10+[0.0001]
    lr_schedule            = C.learning_rate_schedule(lr_per_sample, epoch_size=train_epoch_size, unit=C.UnitType.sample)
    momentum_time_constant = 4096
    mm_schedule            = C.momentum_as_time_constant_schedule([momentum_time_constant])

    # Instantiate the trainer object to drive the model training
    learner = C.momentum_sgd(z.parameters, lr_schedule, mm_schedule, True)
    progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs)
    trainer = C.Trainer(z, (ce, pe), learner, progress_printer)

    log_number_of_parameters(z) ; print()

    # Get minibatches of images to train with and perform model training
    for epoch in range(max_epochs):       # loop over epochs
        train_reader.reset()

        while train_reader.has_more():
            videos, labels, current_minibatch = train_reader.next_minibatch(train_minibatch_size)
            trainer.train_minibatch({input_var : videos, label_var : labels})

        trainer.summarize_training_progress()

    # Test data for trained model
    epoch_size     = test_reader.size()
    test_minibatch_size = 2

    # process minibatches and evaluate the model
    metric_numer    = 0
    metric_denom    = 0
    minibatch_index = 0

    test_reader.reset()    
    while test_reader.has_more():
        videos, labels, current_minibatch = test_reader.next_minibatch(test_minibatch_size)
        # minibatch data to be trained with
        metric_numer += trainer.test_minibatch({input_var : videos, label_var : labels}) * current_minibatch
        metric_denom += current_minibatch
        # Keep track of the number of samples processed so far.
        minibatch_index += 1

    print("")
    print("Final Results: Minibatch[1-{}]: errs = {:0.2f}% * {}".format(minibatch_index+1, (metric_numer*100.0)/metric_denom, metric_denom))
    print("")

    return metric_numer/metric_denom
Ejemplo n.º 28
0
input = cntk.input_variable(inp_dim)
labels = cntk.input_variable(output_classes)


def net_model(feature):
    with default_options(init=cntk.glorot_uniform()):
        layers = Dense(output_classes, activation=None)(feature)
        return layers


input_layer = input / 255.0
net = net_model(input_layer)

loss = cntk.cross_entropy_with_softmax(net, labels)
error = cntk.classification_error(net, labels)

learning_rate = 0.2
learning_schedule = cntk.learning_rate_schedule(learning_rate,
                                                cntk.UnitType.minibatch)
learner = cntk.sgd(net.parameters, learning_schedule)
trainer = cntk.Trainer(net, (loss, error), [learner])


def cumulative_avg(arr, diff=5):
    if len(arr) < diff:
        return arr
    return [
        val if ids < diff else np.cumsum(arr, axis=None) / 5
        for ids, val in enumerate(arr)
    ]
Ejemplo n.º 29
0
def mem_leak_check(nonlinearity, num_hidden_layers, device_id,
                   minibatch_size=1, num_samples=10000):
    from cntk.cntk_py import always_allow_setting_default_device
    always_allow_setting_default_device()
    C.try_set_default_device(cntk_device(device_id))
    np.random.seed(0)

    learning_rate = 0.5
    lr_schedule = C.learning_rate_schedule(learning_rate, C.UnitType.minibatch)

    hidden_layers_dim = 50

    inp = C.input_variable((input_dim), np.float32)
    label = C.input_variable((num_output_classes), np.float32)

    z = fully_connected_classifier_net(inp, num_output_classes, hidden_layers_dim,
                                       num_hidden_layers, nonlinearity)

    loss = C.cross_entropy_with_softmax(z, label)
    eval_error = C.classification_error(z, label)

    learner = C.sgd(z.parameters, lr_schedule)
    trainer = C.Trainer(z, (loss, eval_error), [learner])

    num_minibatches_to_train = int(num_samples / minibatch_size)

    mem = np.zeros(num_minibatches_to_train)

    features, labels = generate_random_data_sample(minibatch_size,
                                                   input_dim,
                                                   num_output_classes)

    # Set a maximum fraction of iterations, in which the memory is allowed to
    # increase. Most likely these will be the first training runs.
    # Long-term this test needs to be run in a separate process over a longer
    # period of time.
    MEM_INCREASE_FRACTION_TOLERANCE = 0.01
    # Set a maximum allowed memory increase. This is required because the
    # pytest process involves some memory fluctuations.
    MEM_INCREASE_TOLERANCE = 1024*1024

    dev = cntk_device(device_id)
    i = 0
    while i < num_minibatches_to_train:
        mem[i] = mem_used()

        # Specify the input variables mapping in the model to actual minibatch
        # data for training.
        trainer.train_minibatch({inp: features, label: labels},
                                device=dev)
        i += 1

    mem_deltas = np.diff(mem)
    iterations_with_mem_increase = (mem_deltas > 0).sum()
    mem_inc_fraction = iterations_with_mem_increase/num_minibatches_to_train
    mem_diff = mem[-1] - mem[10]

    if mem_inc_fraction > MEM_INCREASE_FRACTION_TOLERANCE and \
            mem_diff > MEM_INCREASE_TOLERANCE:
        # For the rough leak estimation we take the memory footprint after the
        # dust of the first train_minibatch runs has settled.
        mem_changes = mem_deltas[mem_deltas != 0]
        raise ValueError('Potential memory leak of ~ %i KB (%i%% of MBs '
                         'increased memory usage) detected with %s:\n%s' %
                         (int(mem_diff/1024), int(mem_inc_fraction*100),
                             nonlinearity, mem_changes))
Ejemplo n.º 30
0
def train_and_evaluate(reader_train,
                       reader_test,
                       network_name,
                       epoch_size,
                       max_epochs,
                       profiler_dir=None,
                       model_dir=None,
                       log_dir=None,
                       tensorboard_logdir=None,
                       gen_heartbeat=False,
                       fp16=False):

    set_computation_network_trace_level(0)

    # Input variables denoting the features and label data
    input_var = C.input_variable((num_channels, image_height, image_width),
                                 name='features')
    label_var = C.input_variable((num_classes))

    dtype = np.float16 if fp16 else np.float32
    if fp16:
        graph_input = C.cast(input_var, dtype=np.float16)
        graph_label = C.cast(label_var, dtype=np.float16)
    else:
        graph_input = input_var
        graph_label = label_var

    with C.default_options(dtype=dtype):
        # create model, and configure learning parameters
        if network_name == 'resnet20':
            z = create_cifar10_model(graph_input, 3, num_classes)
            lr_per_mb = [1.0] * 80 + [0.1] * 40 + [0.01]
        elif network_name == 'resnet110':
            z = create_cifar10_model(graph_input, 18, num_classes)
            lr_per_mb = [0.1] * 1 + [1.0] * 80 + [0.1] * 40 + [0.01]
        else:
            raise RuntimeError("Unknown model name!")

        # loss and metric
        ce = cross_entropy_with_softmax(z, graph_label)
        pe = classification_error(z, graph_label)

    if fp16:
        ce = C.cast(ce, dtype=np.float32)
        pe = C.cast(pe, dtype=np.float32)

    # shared training parameters
    minibatch_size = 128
    l2_reg_weight = 0.0001

    # Set learning parameters
    lr_per_sample = [lr / minibatch_size for lr in lr_per_mb]
    lr_schedule = learning_parameter_schedule_per_sample(lr_per_sample,
                                                         epoch_size=epoch_size)
    mm_schedule = momentum_schedule(0.9, minibatch_size)

    # progress writers
    progress_writers = [
        ProgressPrinter(tag='Training',
                        log_to_file=log_dir,
                        num_epochs=max_epochs,
                        gen_heartbeat=gen_heartbeat)
    ]
    tensorboard_writer = None
    if tensorboard_logdir is not None:
        tensorboard_writer = TensorBoardProgressWriter(
            freq=10, log_dir=tensorboard_logdir, model=z)
        progress_writers.append(tensorboard_writer)

    # trainer object
    learner = momentum_sgd(z.parameters,
                           lr_schedule,
                           mm_schedule,
                           l2_regularization_weight=l2_reg_weight)
    trainer = Trainer(z, (ce, pe), learner, progress_writers)

    # define mapping from reader streams to network inputs
    input_map = {
        input_var: reader_train.streams.features,
        label_var: reader_train.streams.labels
    }

    log_number_of_parameters(z)
    print()

    # perform model training
    if profiler_dir:
        start_profiler(profiler_dir, True)

    for epoch in range(max_epochs):  # loop over epochs
        sample_count = 0
        while sample_count < epoch_size:  # loop over minibatches in the epoch
            data = reader_train.next_minibatch(
                min(minibatch_size, epoch_size - sample_count),
                input_map=input_map)  # fetch minibatch.
            trainer.train_minibatch(data)  # update model with it
            sample_count += trainer.previous_minibatch_sample_count  # count samples processed so far

        trainer.summarize_training_progress()

        # Log mean of each parameter tensor, so that we can confirm that the parameters change indeed.
        if tensorboard_writer:
            for parameter in z.parameters:
                tensorboard_writer.write_value(parameter.uid + "/mean",
                                               reduce_mean(parameter).eval(),
                                               epoch)

        if model_dir:
            z.save(
                os.path.join(model_dir,
                             network_name + "_{}.dnn".format(epoch)))
        enable_profiler()  # begin to collect profiler data after first epoch

    if profiler_dir:
        stop_profiler()

    # Evaluation parameters
    test_epoch_size = 9312
    minibatch_size = 16

    # process minibatches and evaluate the model
    metric_numer = 0
    metric_denom = 0
    sample_count = 0

    while sample_count < test_epoch_size:
        current_minibatch = min(minibatch_size, test_epoch_size - sample_count)
        # Fetch next test min batch.
        data = reader_test.next_minibatch(current_minibatch,
                                          input_map=input_map)
        # minibatch data to be trained with
        metric_numer += trainer.test_minibatch(data) * current_minibatch
        metric_denom += current_minibatch
        # Keep track of the number of samples processed so far.
        sample_count += data[label_var].num_samples

    print("")
    trainer.summarize_test_progress()
    print("")

    return metric_numer / metric_denom
Ejemplo n.º 31
0
def test_sweep_based_schedule(tmpdir, device_id):
    from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs
    from cntk import cross_entropy_with_softmax, classification_error, plus, reduce_sum, sequence
    from cntk import Trainer

    input_dim = 69

    ctf_data = '''\
0   |S0 3:1   |S1 3:1 |# <s>
0   |S0 4:1 |# A    |S1 32:1 |# ~AH
0   |S0 5:1 |# B    |S1 36:1 |# ~B
0   |S0 4:1 |# A    |S1 31:1 |# ~AE
0   |S0 7:1 |# D    |S1 38:1 |# ~D
0   |S0 12:1 |# I   |S1 47:1 |# ~IY
0   |S0 1:1 |# </s> |S1 1:1 |# </s>
2   |S0 60:1 |# <s> |S1 3:1 |# <s>
2   |S0 61:1 |# A   |S1 32:1 |# ~AH
'''
    ctf_file = str(tmpdir/'2seqtest.txt')
    with open(ctf_file, 'w') as f:
        f.write(ctf_data)

    mbs = MinibatchSource(CTFDeserializer(ctf_file, StreamDefs(
        features  = StreamDef(field='S0', shape=input_dim,  is_sparse=True),
        labels    = StreamDef(field='S1', shape=input_dim,  is_sparse=True)
    )), randomize=False)

    in1 = sequence.input_variable(shape=(input_dim,))
    labels = sequence.input_variable(shape=(input_dim,))
    p = parameter(shape=(input_dim,), init=10)
    z = plus(in1, reduce_sum(p), name='z')
    ce = cross_entropy_with_softmax(z, labels)
    errs = classification_error(z, labels)

    lr_per_sample = learning_rate_schedule([0.3, 0.2, 0.1, 0.0], UnitType.sample)
    learner = sgd(z.parameters, lr_per_sample)
    trainer = Trainer(z, (ce, errs), [learner])

    input_map = {
        in1       : mbs.streams.features,
        labels : mbs.streams.labels
    }

    # fetch minibatch (first sequence)
    data = mbs.next_minibatch(1, input_map=input_map) 
    trainer.train_minibatch(data)
    assert learner.learning_rate() == 0.3

    # fetch minibatch (second sequence, sweep ends at this point)
    data = mbs.next_minibatch(1, input_map=input_map)
    trainer.train_minibatch(data)
    assert learner.learning_rate() == 0.2

    # fetch minibatch (both sequences -- entire sweep in one go)
    data = mbs.next_minibatch(9, input_map=input_map)
    trainer.train_minibatch(data)
    assert learner.learning_rate() == 0.1

    # fetch minibatch (multiple sweeps)
    data = mbs.next_minibatch(30, input_map=input_map)
    trainer.train_minibatch(data, outputs=[z.output])
    assert learner.learning_rate() == 0.0
Ejemplo n.º 32
0
def trainNet(args):

    # Crash doesn't seem to occur with this flag,
    # unfortunatly, it reduces training speed by about 35%
    #os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

    # Instantiate generators for both training and
    # validation datasets. Grab their generator functions
    # TODO: Command line args
    # TODO: Better system for using files testing/validation than ranges?
    tFileShp = (1, 598)
    vFileShp = (0, 1)
    gen = Generator(featurePath, labelPath, tFileShp, batchSize, loadSize=3)
    valGen = Generator(featurePath, labelPath, vFileShp, batchSize, loadSize=1)
    g = gen.generator()
    vg = valGen.generator()

    inputVar = cntk.ops.input_variable((BoardDepth, BoardLength, BoardLength),
                                       name='features')
    policyVar = cntk.ops.input_variable((BoardSize))
    valueVar = cntk.ops.input_variable((2))

    if args.fp16:
        cntk.cast(inputVar, dtype=np.float16)
        cntk.cast(policyVar, dtype=np.float16)
        cntk.cast(valueVar, dtype=np.float16)

    net, epochOffset = loadModel(args, inputVar, netFilters, resBlockCount)

    # Show a heatmap of network outputs
    # over an input board state
    if args.heatMap:
        hmap = NetHeatMap(net, g)
        hmap.genHeatmap(args.heatMap)

    # Loss and accuracy
    policyLoss = cntk.cross_entropy_with_softmax(net.outputs[0], policyVar)
    valueLoss = cntk.cross_entropy_with_softmax(net.outputs[1], valueVar)
    loss = policyLoss + valueLoss

    # TODO: Figure out how to display/report both errors
    policyError = cntk.element_not(
        cntk.classification_error(net.outputs[0], policyVar))
    valueError = cntk.element_not(
        cntk.classification_error(net.outputs[1], valueVar))
    #error      = (valueError + policyError) / 2
    #error       = valueError
    error = policyError

    if args.fp16:
        loss = cntk.cast(loss, dtype=np.float32)
        error = cntk.cast(error, dtype=np.float32)

    lrc = args.lr
    if args.cycleLr[0]:
        lrc = learningRateCycles(*args.cycleLr, gen.stepsPerEpoch,
                                 args.cycleMax)
        lrc = lrc * maxEpochs
    elif args.optLr:
        lrc = findOptLr(maxEpochs, *args.optLr, gen.stepsPerEpoch)

    lrc = cntk.learners.learning_parameter_schedule(lrc, batchSize, batchSize)
    learner = cntk.adam(net.parameters,
                        lrc,
                        momentum=0.9,
                        minibatch_size=batchSize,
                        l2_regularization_weight=0.0001)
    #learner = cntk.adadelta(net.parameters, lrc, l2_regularization_weight=0.0001) # Test adelta out!

    # TODO: Figure out how to write multiple 'metrics'
    tbWriter = cntk.logging.TensorBoardProgressWriter(freq=1,
                                                      log_dir='./TensorBoard/',
                                                      model=net)
    progressPrinter = cntk.logging.ProgressPrinter(tag='Training',
                                                   num_epochs=maxEpochs)
    trainer = cntk.Trainer(net, (loss, error), learner,
                           [progressPrinter, tbWriter])

    # TODO: Replace model load with loading/saving checkpoints!
    # So we can store learners state et al
    #trainer.restore_from_checkpoint(findLatestModel('latest'))
    #checkpointFreq = gen.stepsPerEpoch // checkpointFreq

    ls = []
    losses = []
    #valueAccs   = []
    #policyAccs  = []

    for epoch in range(maxEpochs):

        miniBatches = 0
        while miniBatches < gen.stepsPerEpoch:
            X, Y, W = next(g)
            miniBatches += 1
            trainer.train_minibatch({
                net.arguments[0]: X,
                policyVar: Y,
                valueVar: W
            })
            ls.append(trainer.previous_minibatch_loss_average)

        trainer.summarize_training_progress()
        policyAcc, valueAcc = printAccuracy(net, 'Validation Acc %', vg,
                                            valGen.stepsPerEpoch)

        losses.append([epoch, sum(ls) / gen.stepsPerEpoch])
        ls.clear()
        #policyAccs.append([epoch, policyAcc])
        #valueAccs.append([epoch, valueAcc])

        net.save(saveDir + netName + '_{}_{}_{}_{:.3f}.dnn'.format(
            epoch + 1 + epochOffset, policyAcc, valueAcc, losses[epoch][1]))
Ejemplo n.º 33
0
def create_network(input_vocab_dim, label_vocab_dim):
    # network complexity; initially low for faster testing
    hidden_dim = 256
    num_layers = 1

    # Source and target inputs to the model
    input_seq_axis = Axis('inputAxis')
    label_seq_axis = Axis('labelAxis')
    raw_input = sequence.input(shape=(input_vocab_dim),
                               sequence_axis=input_seq_axis,
                               name='raw_input')
    raw_labels = sequence.input(shape=(label_vocab_dim),
                                sequence_axis=label_seq_axis,
                                name='raw_labels')

    # Instantiate the sequence to sequence translation model
    input_sequence = raw_input

    # Drop the sentence start token from the label, for decoder training
    label_sequence = sequence.slice(raw_labels, 1,
                                    0)  # <s> A B C </s> --> A B C </s>
    label_sentence_start = sequence.first(raw_labels)  # <s>

    is_first_label = sequence.is_first(label_sequence)  # <s> 0 0 0 ...
    label_sentence_start_scattered = sequence.scatter(label_sentence_start,
                                                      is_first_label)

    # Encoder
    encoder_outputH = stabilize(input_sequence)
    for i in range(0, num_layers):
        (encoder_outputH,
         encoder_outputC) = LSTMP_component_with_self_stabilization(
             encoder_outputH.output, hidden_dim, hidden_dim, future_value,
             future_value)

    thought_vectorH = sequence.first(encoder_outputH)
    thought_vectorC = sequence.first(encoder_outputC)

    thought_vector_broadcastH = sequence.broadcast_as(thought_vectorH,
                                                      label_sequence)
    thought_vector_broadcastC = sequence.broadcast_as(thought_vectorC,
                                                      label_sequence)

    # Decoder
    decoder_history_hook = alias(
        label_sequence, name='decoder_history_hook')  # copy label_sequence

    decoder_input = element_select(is_first_label,
                                   label_sentence_start_scattered,
                                   past_value(decoder_history_hook))

    decoder_outputH = stabilize(decoder_input)
    for i in range(0, num_layers):
        if (i > 0):
            recurrence_hookH = past_value
            recurrence_hookC = past_value
        else:
            isFirst = sequence.is_first(label_sequence)
            recurrence_hookH = lambda operand: element_select(
                isFirst, thought_vector_broadcastH, past_value(operand))
            recurrence_hookC = lambda operand: element_select(
                isFirst, thought_vector_broadcastC, past_value(operand))

        (decoder_outputH,
         encoder_outputC) = LSTMP_component_with_self_stabilization(
             decoder_outputH.output, hidden_dim, hidden_dim, recurrence_hookH,
             recurrence_hookC)

    decoder_output = decoder_outputH

    # Softmax output layer
    z = linear_layer(stabilize(decoder_output), label_vocab_dim)

    # Criterion nodes
    ce = cross_entropy_with_softmax(z, label_sequence)
    errs = classification_error(z, label_sequence)

    # network output for decoder history
    net_output = hardmax(z)

    # make a clone of the graph where the ground truth is replaced by the network output
    ng = z.clone(CloneMethod.share,
                 {decoder_history_hook.output: net_output.output})

    return {
        'raw_input': raw_input,
        'raw_labels': raw_labels,
        'ce': ce,
        'pe': errs,
        'ng': ng,
        'output': z
    }
Ejemplo n.º 34
0
def train():

    # TODO: Need to add a method that reads exact sample size when
    # we're loading data that's already been converted
    #convertData(dataPath, 'intel', threshold, timeSteps, timeShift, seqDist)

    input = cntk.sequence.input_variable((numFeatures), name='features')
    label = cntk.input_variable((numClasses), name='label')

    trainReader = createReader('./data/intel_train.ctf', True, numFeatures,
                               numClasses)
    validReader = createReader('./data/intel_valid.ctf', False, numFeatures,
                               numClasses)

    trainInputMap = {
        input: trainReader.streams.features,
        label: trainReader.streams.labels
    }

    validInputMap = {
        input: validReader.streams.features,
        label: validReader.streams.labels
    }

    model = createModel(input, numClasses, lstmLayers, lstmSize)
    z = model(input)

    loss = cntk.cross_entropy_with_softmax(z, label)
    accy = cntk.element_not(cntk.classification_error(
        z, label))  # Print accuracy %, not error!

    lr = cntk.learning_parameter_schedule(0.05, batchSize)
    learner = cntk.adam(
        z.parameters, lr, 0.9
    )  #, l2_regularization_weight=0.00001, gradient_clipping_threshold_per_sample=5.0
    #tbWriter    = cntk.logging.TensorBoardProgressWriter(1, './Tensorboard/', model=model)
    printer = cntk.logging.ProgressPrinter(100, tag='Training')
    trainer = cntk.Trainer(z, (loss, accy), learner, [printer])

    # TODO: These should be automatically detected!
    samplesPerSeq = timeSteps
    sequences = 8709
    validSeqs = 968

    minibatchSize = batchSize * samplesPerSeq
    minibatches = sequences // batchSize
    validBatches = validSeqs // batchSize

    cntk.logging.log_number_of_parameters(z)
    print(
        "Input days: {}; Looking for +- {:.1f}% change {} days ahead;".format(
            samplesPerSeq, threshold * 100.0, timeShift))
    print("Total Sequences: {}; {} epochs; {} minibatches per epoch;".format(
        sequences + validSeqs, numEpochs, minibatches + validBatches))

    # Testing out custom data reader
    reader = DataReader('./data/intel_train.ctf', numFeatures, numClasses,
                        batchSize, timeSteps, False)
    testReader = DataReader('./data/intel_valid.ctf', numFeatures, numClasses,
                            batchSize, timeSteps, False)

    for e in range(numEpochs):
        # Train network
        for b in range(minibatches):
            X, Y = next(reader)
            trainer.train_minibatch({z.arguments[0]: X, label: Y})
        trainer.summarize_training_progress()

        # Look at data we've not trained on (validation)
        for b in range(minibatches):
            X, Y = next(testReader)
            trainer.test_minibatch({z.arguments[0]: X, label: Y})
        trainer.summarize_test_progress()
Ejemplo n.º 35
0
                                   C.sigmoid)


def create_model(features):
    with C.layers.default_options(init=C.layers.glorot_uniform(),
                                  activation=C.sigmoid):
        h = features
        for _ in range(num_hidden_layers):
            h = C.layers.Dense(hidden_layers_dim)(h)
        last_layer = C.layers.Dense(num_output_classes, activation=None)
        return last_layer(h)


z = create_model(input)
loss = C.cross_entropy_with_softmax(z, label)
eval_error = C.classification_error(z, label)
# Instantiate the trainer object to drive the model training
learning_rate = 0.5
lr_schedule = C.learning_parameter_schedule(learning_rate)
learner = C.sgd(z.parameters, lr_schedule)
trainer = C.Trainer(z, (loss, eval_error), [learner])


##################################################################################################################
# Define a utility function to compute the moving average sum.
# A more efficient implementation is possible with np.cumsum() function
def moving_average(a, w=10):
    if len(a) < w:
        return a[:]  # Need to send a copy of the array
    return [
        val if idx < w else sum(a[(idx - w):idx]) / w
Ejemplo n.º 36
0
def convnetlrn_cifar10_dataaug(reader_train,
                               reader_test,
                               epoch_size=50000,
                               max_epochs=80):
    _cntk_py.set_computation_network_trace_level(0)

    # Input variables denoting the features and label data
    input_var = cntk.input((num_channels, image_height, image_width))
    label_var = cntk.input((num_classes))

    # apply model to input
    scaled_input = cntk.element_times(cntk.constant(0.00390625), input_var)

    with cntk.layers.default_options(activation=cntk.relu, pad=True):
        z = cntk.layers.Sequential([
            cntk.layers.For(
                range(2), lambda: [
                    cntk.layers.Convolution2D((3, 3), 64),
                    cntk.layers.Convolution2D((3, 3), 64),
                    LocalResponseNormalization(1.0, 4, 0.001, 0.75),
                    cntk.layers.MaxPooling((3, 3), (2, 2))
                ]),
            cntk.layers.For(
                range(2), lambda i:
                [cntk.layers.Dense([256, 128][i]),
                 cntk.layers.Dropout(0.5)]),
            cntk.layers.Dense(num_classes, activation=None)
        ])(scaled_input)

    # loss and metric
    ce = cntk.cross_entropy_with_softmax(z, label_var)
    pe = cntk.classification_error(z, label_var)

    # training config
    minibatch_size = 64

    # Set learning parameters
    lr_per_sample = [0.0015625] * 20 + [0.00046875] * 20 + [
        0.00015625
    ] * 20 + [0.000046875] * 10 + [0.000015625]
    lr_schedule = cntk.learning_rate_schedule(
        lr_per_sample,
        unit=cntk.learners.UnitType.sample,
        epoch_size=epoch_size)
    mm_time_constant = [0] * 20 + [600] * 20 + [1200]
    mm_schedule = cntk.learners.momentum_as_time_constant_schedule(
        mm_time_constant, epoch_size=epoch_size)
    l2_reg_weight = 0.002

    # trainer object
    learner = cntk.learners.momentum_sgd(
        z.parameters,
        lr_schedule,
        mm_schedule,
        unit_gain=True,
        l2_regularization_weight=l2_reg_weight)
    progress_printer = cntk.logging.ProgressPrinter(tag='Training',
                                                    num_epochs=max_epochs)
    trainer = cntk.Trainer(z, (ce, pe), learner, progress_printer)

    # define mapping from reader streams to network inputs
    input_map = {
        input_var: reader_train.streams.features,
        label_var: reader_train.streams.labels
    }

    cntk.logging.log_number_of_parameters(z)
    print()

    # perform model training
    for epoch in range(max_epochs):  # loop over epochs
        sample_count = 0
        while sample_count < epoch_size:  # loop over minibatches in the epoch
            data = reader_train.next_minibatch(
                min(minibatch_size, epoch_size - sample_count),
                input_map=input_map)  # fetch minibatch.
            trainer.train_minibatch(data)  # update model with it
            sample_count += trainer.previous_minibatch_sample_count  # count samples processed so far

        trainer.summarize_training_progress()
        z.save(
            os.path.join(model_path,
                         "ConvNet_CIFAR10_DataAug_{}.dnn".format(epoch)))

    ### Evaluation action
    epoch_size = 10000
    minibatch_size = 16

    # process minibatches and evaluate the model
    metric_numer = 0
    metric_denom = 0
    sample_count = 0
    minibatch_index = 0

    while sample_count < epoch_size:
        current_minibatch = min(minibatch_size, epoch_size - sample_count)
        # Fetch next test min batch.
        data = reader_test.next_minibatch(current_minibatch,
                                          input_map=input_map)
        # minibatch data to be trained with
        metric_numer += trainer.test_minibatch(data) * current_minibatch
        metric_denom += current_minibatch
        # Keep track of the number of samples processed so far.
        sample_count += data[label_var].num_samples
        minibatch_index += 1

    print("")
    print("Final Results: Minibatch[1-{}]: errs = {:0.2f}% * {}".format(
        minibatch_index + 1, (metric_numer * 100.0) / metric_denom,
        metric_denom))
    print("")

    return metric_numer / metric_denom
def main():
    print("\nBegin binary classification (two-node technique) \n")
    print("Using CNTK version = " + str(C.__version__) + "\n")

    dirname = os.path.dirname(__file__)

    input_dim = 12
    hidden_dim = 20
    output_dim = 2
    onnx_path = os.path.join(dirname, "..\HeartDiseasePrediction\Assets")
    train_file = os.path.join(dirname, "input\TrainingData.txt")
    test_file  = os.path.join(dirname, "input\TestData.txt")
    
    # 1. create network
    X = C.ops.input_variable(input_dim, np.float32)
    Y = C.ops.input_variable(output_dim, np.float32)
    print("Creating a 12-20-2 tanh-softmax NN ")
    
    with C.layers.default_options(init=C.initializer.uniform(scale=0.01, seed=1)):
        hLayer = C.layers.Dense(hidden_dim, activation=C.ops.tanh, name='hidLayer')(X) 
        oLayer = C.layers.Dense(output_dim, activation=None, name='outLayer')(hLayer)
    
    nnet = oLayer
    model = C.ops.softmax(nnet)

    # 2. create learner and trainer
    print("Creating a cross entropy batch=10 SGD LR=0.005 Trainer ")
    tr_loss = C.cross_entropy_with_softmax(nnet, Y)
    tr_clas = C.classification_error(nnet, Y)
    max_iter = 5000
    batch_size = 10
    learn_rate = 0.005
    learner = C.sgd(nnet.parameters, learn_rate)
    trainer = C.Trainer(nnet, (tr_loss, tr_clas), [learner])
  
    # 3. create reader for train data
    rdr = create_reader(train_file, input_dim, output_dim, rnd_order=True, sweeps=C.io.INFINITELY_REPEAT)
    heart_input_map = {
        X : rdr.streams.x_src,
        Y : rdr.streams.y_src
    }
  
    # 4. train
    print("\nStarting training \n")
    for i in range(0, max_iter):
        curr_batch = rdr.next_minibatch(batch_size, input_map=heart_input_map)
        trainer.train_minibatch(curr_batch)
        if i % int(max_iter/10) == 0:
            mcee = trainer.previous_minibatch_loss_average
            macc = (1.0 - trainer.previous_minibatch_evaluation_average) * 100
            print("batch %4d: mean loss = %0.4f, accuracy = %0.2f%% " % (i, mcee, macc))
            trainer.summarize_training_progress()
    
    print("\nTraining complete")

    # Export as ONNX
    model.save(os.path.join(onnx_path, "Heart.onnx"), format=C.ModelFormat.ONNX)
  
    # 5. evaluate model using all data
    print("\nEvaluating accuracy using built-in test_minibatch() \n")
    rdr = create_reader(test_file, input_dim, output_dim, rnd_order=False, sweeps=1)
    heart_input_map = {
        X : rdr.streams.x_src,
        Y : rdr.streams.y_src
    }
    num_test = 91
    all_test = rdr.next_minibatch(num_test, input_map=heart_input_map)
    acc = (1.0 - trainer.test_minibatch(all_test)) * 100
    print("Classification accuracy on the %d data items = %0.2f%%" % (num_test,acc))
   
    unknown = np.array([1, 0, 0, 0, 1, 2, 0.0370370373, 0, 0.832061052, 0, 1, 0.6458333], dtype=np.float32)
    predicted = model.eval(unknown)
    print(predicted)

    # (use trained model to make prediction)
    print("\nEnd Cleveland Heart Disease classification ")
Ejemplo n.º 38
0
def conv3d_ucf11(train_reader, test_reader, max_epochs=30):
    # Replace 0 with 1 to get detailed log.
    set_computation_network_trace_level(0)

    # These values must match for both train and test reader.
    image_height = train_reader.height
    image_width = train_reader.width
    num_channels = train_reader.channel_count
    sequence_length = train_reader.sequence_length
    num_output_classes = train_reader.label_count

    # Input variables denoting the features and label data
    input_var = input_variable(
        (num_channels, sequence_length, image_height, image_width), np.float32)
    label_var = input_variable(num_output_classes, np.float32)

    # Instantiate simple 3D Convolution network inspired by VGG network
    # and http://vlg.cs.dartmouth.edu/c3d/c3d_video.pdf
    with default_options(activation=relu):
        z = Sequential([
            Convolution3D((3, 3, 3), 64, pad=True),
            MaxPooling((1, 2, 2), (1, 2, 2)),
            For(
                range(3), lambda i: [
                    Convolution3D((3, 3, 3), [96, 128, 128][i], pad=True),
                    Convolution3D((3, 3, 3), [96, 128, 128][i], pad=True),
                    MaxPooling((2, 2, 2), (2, 2, 2))
                ]),
            For(range(2), lambda: [Dense(1024), Dropout(0.5)]),
            Dense(num_output_classes, activation=None)
        ])(input_var)

    # loss and classification error.
    ce = cross_entropy_with_softmax(z, label_var)
    pe = classification_error(z, label_var)

    # training config
    epoch_size = 1322  # for now we manually specify epoch size
    minibatch_size = 4

    # Set learning parameters
    lr_per_sample = [0.01] * 10 + [0.001] * 10 + [0.0001]
    lr_schedule = learning_rate_schedule(lr_per_sample,
                                         epoch_size=epoch_size,
                                         unit=UnitType.sample)
    momentum_time_constant = 4096
    mm_schedule = momentum_as_time_constant_schedule([momentum_time_constant],
                                                     epoch_size=epoch_size)

    # Instantiate the trainer object to drive the model training
    learner = momentum_sgd(z.parameters, lr_schedule, mm_schedule, True)
    progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs)
    trainer = Trainer(z, (ce, pe), learner, progress_printer)

    log_number_of_parameters(z)
    print()

    # Get minibatches of images to train with and perform model training
    for epoch in range(max_epochs):  # loop over epochs
        train_reader.reset()

        while train_reader.has_more():
            videos, labels, current_minibatch = train_reader.next_minibatch(
                minibatch_size)
            trainer.train_minibatch({input_var: videos, label_var: labels})

        trainer.summarize_training_progress()

    # Test data for trained model
    epoch_size = 332
    minibatch_size = 2

    # process minibatches and evaluate the model
    metric_numer = 0
    metric_denom = 0
    minibatch_index = 0

    test_reader.reset()
    while test_reader.has_more():
        videos, labels, current_minibatch = test_reader.next_minibatch(
            minibatch_size)
        # minibatch data to be trained with
        metric_numer += trainer.test_minibatch({
            input_var: videos,
            label_var: labels
        }) * current_minibatch
        metric_denom += current_minibatch
        # Keep track of the number of samples processed so far.
        minibatch_index += 1

    print("")
    print("Final Results: Minibatch[1-{}]: errs = {:0.2f}% * {}".format(
        minibatch_index + 1, (metric_numer * 100.0) / metric_denom,
        metric_denom))
    print("")

    return metric_numer / metric_denom
Ejemplo n.º 39
0
def create_criterion_function(model, labels):
    loss = C.cross_entropy_with_softmax(model, labels)
    errs = C.classification_error(model, labels)
    return loss, errs
Ejemplo n.º 40
0
def create_criterion_function_preferred(model, labels):
    ce = C.cross_entropy_with_softmax(model, labels)
    errs = C.classification_error(model, labels)
    return ce, errs  # (model, labels) -> (loss, error metr
Ejemplo n.º 41
0
# Set up NN
input_dim = 4
hidden_dim = 50
num_output_classes = 3
input = cntk.input_variable(input_dim)
label = cntk.input_variable(num_output_classes)

# create a reader to read from the file
reader_train = create_reader(
    "D:/Users/Sachit/source/repos/SamplesRepo/IrisData/IrisData/iris-data/trainData_cntk.txt",
    True, input_dim, num_output_classes)

# Create the model
z = create_model(input, hidden_dim, num_output_classes)
loss = cntk.cross_entropy_with_softmax(z, label)
label_error = cntk.classification_error(z, label)

learning_rate = 0.2
lr_schedule = cntk.learning_parameter_schedule(learning_rate)
learner = cntk.sgd(z.parameters, lr_schedule)
trainer = cntk.Trainer(z, (loss, label_error), [learner])

#Init the params for trainer
minibatch_size = 120
num_iterations = 20

# Map the data streams to input and labels
input_map = {
    label: reader_train.streams.labels,
    input: reader_train.streams.features
}
def create_vgg16():

    # Input variables denoting the features and label data
    feature_var = C.input_variable((num_channels, image_height, image_width))
    label_var = C.input_variable((num_classes))

    # apply model to input
    # remove mean value
    input = minus(feature_var,
                  constant([[[104]], [[117]], [[124]]]),
                  name='mean_removed_input')

    with default_options(activation=None, pad=True, bias=True):
        z = Sequential([
            # we separate Convolution and ReLU to name the output for feature extraction (usually before ReLU)
            For(
                range(2), lambda i: [
                    Convolution2D((3, 3), 64, name='conv1_{}'.format(i)),
                    Activation(activation=relu, name='relu1_{}'.format(i)),
                ]),
            MaxPooling((2, 2), (2, 2), name='pool1'),
            For(
                range(2), lambda i: [
                    Convolution2D((3, 3), 128, name='conv2_{}'.format(i)),
                    Activation(activation=relu, name='relu2_{}'.format(i)),
                ]),
            MaxPooling((2, 2), (2, 2), name='pool2'),
            For(
                range(3), lambda i: [
                    Convolution2D((3, 3), 256, name='conv3_{}'.format(i)),
                    Activation(activation=relu, name='relu3_{}'.format(i)),
                ]),
            MaxPooling((2, 2), (2, 2), name='pool3'),
            For(
                range(3), lambda i: [
                    Convolution2D((3, 3), 512, name='conv4_{}'.format(i)),
                    Activation(activation=relu, name='relu4_{}'.format(i)),
                ]),
            MaxPooling((2, 2), (2, 2), name='pool4'),
            For(
                range(3), lambda i: [
                    Convolution2D((3, 3), 512, name='conv5_{}'.format(i)),
                    Activation(activation=relu, name='relu5_{}'.format(i)),
                ]),
            MaxPooling((2, 2), (2, 2), name='pool5'),
            Dense(4096, name='fc6'),
            Activation(activation=relu, name='relu6'),
            Dropout(0.5, name='drop6'),
            Dense(4096, name='fc7'),
            Activation(activation=relu, name='relu7'),
            Dropout(0.5, name='drop7'),
            Dense(num_classes, name='fc8')
        ])(input)

    # loss and metric
    ce = C.cross_entropy_with_softmax(z, label_var)
    pe = C.classification_error(z, label_var)
    pe5 = C.classification_error(z, label_var, topN=5)

    log_number_of_parameters(z)
    print()

    return {
        'feature': feature_var,
        'label': label_var,
        'ce': ce,
        'pe': pe,
        'pe5': pe5,
        'output': z
    }
Ejemplo n.º 43
0
def create_criterion_function(model):
    labels = C.placeholder(name='labels')
    ce = C.cross_entropy_with_softmax(model, labels)
    errs = C.classification_error(model, labels)
    return C.combine([ce, errs])  # (features, labels) -> (loss, metric)
Ejemplo n.º 44
0
    def create_model(self):
        modeli = C.layers.Sequential([
            # Convolution layers
            C.layers.Convolution2D((1, 3),
                                   num_filters=8,
                                   pad=True,
                                   reduction_rank=0,
                                   activation=C.ops.tanh,
                                   name='conv_a'),
            C.layers.Convolution2D((1, 3),
                                   num_filters=16,
                                   pad=True,
                                   reduction_rank=1,
                                   activation=C.ops.tanh,
                                   name='conv2_a'),
            C.layers.Convolution2D((1, 3),
                                   num_filters=32,
                                   pad=False,
                                   reduction_rank=1,
                                   activation=C.ops.tanh,
                                   name='conv3_a'),
            ######
            # Dense layers
            #C.layers.Dense(128, activation=C.ops.relu,name='dense1_a'),
            #C.layers.Dense(64, activation=C.ops.relu,name='dense2_a'),
            C.layers.Dense(361, activation=C.ops.relu, name='dense3_a')
        ])(self._input)
        ### target
        modelt = C.layers.Sequential(
            [C.layers.Dense(360, activation=C.ops.relu,
                            name='dense4_a')])(self._target)
        ### concatenate both processed target and observations
        inputs = C.ops.splice(modeli, modelt)
        ### Use input to predict next hidden state, and generate
        ### next observation
        model = C.layers.Sequential([
            ######
            C.layers.Dense(720, activation=C.ops.relu, name='dense5_a'),
            # Recurrence
            C.layers.Recurrence(C.layers.LSTM(2048, init=C.glorot_uniform()),
                                name='lstm_a'),
            C.layers.Dense(1024, activation=None)
        ])(inputs)
        ######
        # Prediction
        direction = C.layers.Sequential([
            C.layers.Dense(720, activation=None, name='dense6_a'),
            C.layers.Dense(360, activation=C.ops.softmax, name='dense7_a')
        ])(model)
        velocity = C.layers.Sequential([
            C.layers.Dense(128, activation=C.ops.relu),
            C.layers.Dense(64, activation=None),
            C.layers.Dense(1, activation=None)
        ])(model)
        model = C.ops.splice(direction, velocity)

        if self._load_model:
            model = C.load_model('dnns/action_predicter_f.dnn')
            direction = model[0:360]
            velocity = model[360]

        print(model)
        loss = C.squared_error(direction, self._output) + C.squared_error(
            velocity, self._output_velocity)
        error = C.classification_error(direction,
                                       self._output) + C.squared_error(
                                           velocity, self._output_velocity)

        learner = C.adadelta(model.parameters, l2_regularization_weight=0.001)
        progress_printer = C.logging.ProgressPrinter(tag='Training')
        trainer = C.Trainer(model, (loss, error), learner, progress_printer)
        return model, loss, learner, trainer
Ejemplo n.º 45
0
 def criterion(x, y):
     z = model(normalize(x))
     ce = cross_entropy_with_softmax(z, y)
     errs = classification_error(z, y)
     return (Function.NamedOutput(loss=ce),
             Function.NamedOutput(metric=errs))
Ejemplo n.º 46
0
def criterion(data, label_one_hot):
    z = model(data)  # apply model. Computes a non-normalized log probability for every output class.
    loss   = C.cross_entropy_with_softmax(z, label_one_hot) # this applies softmax to z under the hood
    metric = C.classification_error(z, label_one_hot)
    return loss, metric
Ejemplo n.º 47
0
def create_criterion_function_preferred(model, labels):
    ce = -C.reduce_sum(labels * C.ops.log(model))
    errs = C.classification_error(model, labels)
    return ce, errs
Ejemplo n.º 48
0
def train_and_evaluate(reader_train, reader_test, network_name, epoch_size, max_epochs, profiler_dir=None,
                       model_dir=None, log_dir=None, tensorboard_logdir=None, gen_heartbeat=False):

    set_computation_network_trace_level(0)

    # Input variables denoting the features and label data
    input_var = C.input_variable((num_channels, image_height, image_width), name='features')
    label_var = C.input_variable((num_classes))

    # create model, and configure learning parameters
    if network_name == 'resnet20':
        z = create_cifar10_model(input_var, 3, num_classes)
        lr_per_mb = [1.0]*80+[0.1]*40+[0.01]
    elif network_name == 'resnet110':
        z = create_cifar10_model(input_var, 18, num_classes)
        lr_per_mb = [0.1]*1+[1.0]*80+[0.1]*40+[0.01]
    else:
        raise RuntimeError("Unknown model name!")

    # loss and metric
    ce = cross_entropy_with_softmax(z, label_var)
    pe = classification_error(z, label_var)

    # shared training parameters
    minibatch_size = 128
    momentum_time_constant = -minibatch_size/np.log(0.9)
    l2_reg_weight = 0.0001

    # Set learning parameters
    lr_per_sample = [lr/minibatch_size for lr in lr_per_mb]
    lr_schedule = learning_rate_schedule(lr_per_sample, epoch_size=epoch_size, unit=UnitType.sample)
    mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant)

    # progress writers
    progress_writers = [ProgressPrinter(tag='Training', log_to_file=log_dir, num_epochs=max_epochs, gen_heartbeat=gen_heartbeat)]
    tensorboard_writer = None
    if tensorboard_logdir is not None:
        tensorboard_writer = TensorBoardProgressWriter(freq=10, log_dir=tensorboard_logdir, model=z)
        progress_writers.append(tensorboard_writer)

    # trainer object
    learner = momentum_sgd(z.parameters, lr_schedule, mm_schedule,
                           l2_regularization_weight = l2_reg_weight)
    trainer = Trainer(z, (ce, pe), learner, progress_writers)

    # define mapping from reader streams to network inputs
    input_map = {
        input_var: reader_train.streams.features,
        label_var: reader_train.streams.labels
    }

    log_number_of_parameters(z) ; print()

    # perform model training
    if profiler_dir:
        start_profiler(profiler_dir, True)

    for epoch in range(max_epochs):       # loop over epochs
        sample_count = 0
        while sample_count < epoch_size:  # loop over minibatches in the epoch
            data = reader_train.next_minibatch(min(minibatch_size, epoch_size-sample_count), input_map=input_map) # fetch minibatch.
            trainer.train_minibatch(data)                                   # update model with it
            sample_count += trainer.previous_minibatch_sample_count         # count samples processed so far

        trainer.summarize_training_progress()

        # Log mean of each parameter tensor, so that we can confirm that the parameters change indeed.
        if tensorboard_writer:
            for parameter in z.parameters:
                tensorboard_writer.write_value(parameter.uid + "/mean", reduce_mean(parameter).eval(), epoch)

        if model_dir:
            z.save(os.path.join(model_dir, network_name + "_{}.dnn".format(epoch)))
        enable_profiler() # begin to collect profiler data after first epoch

    if profiler_dir:
        stop_profiler()

    # Evaluation parameters
    test_epoch_size     = 10000
    minibatch_size = 16

    # process minibatches and evaluate the model
    metric_numer    = 0
    metric_denom    = 0
    sample_count    = 0

    while sample_count < test_epoch_size:
        current_minibatch = min(minibatch_size, test_epoch_size - sample_count)
        # Fetch next test min batch.
        data = reader_test.next_minibatch(current_minibatch, input_map=input_map)
        # minibatch data to be trained with
        metric_numer += trainer.test_minibatch(data) * current_minibatch
        metric_denom += current_minibatch
        # Keep track of the number of samples processed so far.
        sample_count += data[label_var].num_samples

    print("")
    trainer.summarize_test_progress()
    print("")

    return metric_numer/metric_denom
Ejemplo n.º 49
0

inputs = C.input_variable(shape=(num_features), dtype=np.float32, name="features")
# Z is the model; a composition of operation. Maps [(input_dim) -> (num_classes)]
num_hidden_layers = 2
hidden_layers_dim = 10

# Choose your model
z = create_model(inputs, num_hidden_layers, hidden_layers_dim)
z = fully_connected_classifier_net(inputs, num_classes, hidden_layers_dim, num_hidden_layers, C.sigmoid)

print(z.parameters)
label = C.input_variable(1, dtype=np.float32, name="label")
onehot = C.one_hot(label, num_classes)
loss = C.cross_entropy_with_softmax(z, onehot)
eval_error = C.classification_error(z, onehot)

# Instantiate the trainer object to drive the model training
learning_rate = 0.5
lr_schedule = C.learning_rate_schedule(learning_rate, C.UnitType.minibatch)
learner = C.sgd(z.parameters, lr_schedule)
trainer = C.Trainer(z, (loss, eval_error), [learner])

# Define a utility that prints the training progress
def print_training_progress(trainer, mb, frequency, verbose=1):
    training_loss, eval_error = "NA", "NA"

    if mb % frequency == 0:
        training_loss = trainer.previous_minibatch_loss_average
        eval_error = trainer.previous_minibatch_evaluation_average
        if verbose:
Ejemplo n.º 50
0
def TrainAndValidate(trainfile):

    #*****Hyper-Parameters******
    q_max_words = 12
    p_max_words = 50
    emb_dim = 50
    num_classes = 3
    minibatch_size = 250
    epoch_size = 500000  #No.of samples in training set
    total_epochs = 20  #Total number of epochs to run
    query_total_dim = q_max_words * emb_dim
    label_total_dim = num_classes
    passage_total_dim = p_max_words * emb_dim

    #****** Create placeholders for reading Training Data  ***********
    query_input_var = C.ops.input_variable((1, q_max_words, emb_dim),
                                           np.float32,
                                           is_sparse=False)
    passage_input_var = C.ops.input_variable((1, p_max_words, emb_dim),
                                             np.float32,
                                             is_sparse=False)
    output_var = C.input_variable(num_classes, np.float32, is_sparse=False)
    train_reader = create_reader(trainfile, True, query_total_dim,
                                 passage_total_dim, label_total_dim)
    input_map = {
        query_input_var: train_reader.streams.queryfeatures,
        passage_input_var: train_reader.streams.passagefeatures,
        output_var: train_reader.streams.labels
    }

    # ********* Model configuration *******
    model_output = cnn_network(query_input_var, passage_input_var, num_classes)
    loss = C.binary_cross_entropy(model_output, output_var)
    pe = C.classification_error(model_output, output_var)
    lr_per_minibatch = C.learning_rate_schedule(0.03, C.UnitType.minibatch)
    learner = C.adagrad(model_output.parameters, lr=lr_per_minibatch)
    progress_printer = C.logging.ProgressPrinter(tag='Training',
                                                 num_epochs=total_epochs)

    #************Create Trainer with model_output object, learner and loss parameters*************
    trainer = C.Trainer(model_output, (loss, pe), learner, progress_printer)
    C.logging.log_number_of_parameters(model_output)
    print()

    # **** Train the model in batchwise mode *****
    for epoch in range(total_epochs):  # loop over epochs
        print("Epoch : ", epoch)
        sample_count = 0
        while sample_count < epoch_size:  # loop over minibatches in the epoch
            data = train_reader.next_minibatch(
                min(minibatch_size, epoch_size - sample_count),
                input_map=input_map)  # fetch minibatch.
            trainer.train_minibatch(data)  # training step
            sample_count += data[
                output_var].num_samples  # count samples processed so far

        trainer.summarize_training_progress()

        model_output.save(
            "CNN_{}.dnn".format(epoch))  # Save the model for every epoch

        #*** Find metrics on validation set after every epoch ******#  (Note : you can skip doing this for every epoch instead to optimize the time, do it after every k epochs)
        predicted_labels = []
        for i in range(len(validation_query_vectors)):
            queryVec = np.array(validation_query_vectors[i],
                                dtype="float32").reshape(
                                    1, q_max_words, emb_dim)
            passageVec = np.array(validation_passage_vectors[i],
                                  dtype="float32").reshape(
                                      1, p_max_words, emb_dim)
            scores = model_output(
                queryVec,
                passageVec)[0]  # do forward-prop on model to get score
            if scores[0] > scores[1] and scores[0] > scores[2]:
                predictLabel = 1
            elif scores[1] > scores[2]:
                predictLabel = 2
            else:
                predictLabel = 3
            # predictLabel = 1 if scores[1]>=scores[0] else 0
            predicted_labels.append(predictLabel)
        metrics = precision_recall_fscore_support(np.array(validation_labels),
                                                  np.array(predicted_labels),
                                                  average='weighted')
        #print("precision : "+str(metrics[0])+" recall : "+str(metrics[1])+" f1 : "+str(metrics[2])+"\n")

    return model_output
Ejemplo n.º 51
0
 def criterion(x, y):
     z = model(normalize(x))
     ce = cross_entropy_with_softmax(z, y)
     errs = classification_error(z, y)
     return (ce, errs)
Ejemplo n.º 52
0
def convnetlrn_cifar10_dataaug(reader_train,
                               reader_test,
                               epoch_size=50000,
                               max_epochs=80):
    _cntk_py.set_computation_network_trace_level(0)

    # Input variables denoting the features and label data
    input_var = C.input_variable((num_channels, image_height, image_width))
    label_var = C.input_variable((num_classes))

    # input normalization 1/256 = 0.00396025
    scaled_input = C.element_times(C.constant(0.00390625), input_var)
    f = GlobalAveragePooling()
    f.update_signature((1, 8, 8))

    with C.layers.default_options():
        z = C.layers.Sequential([
            C.layers.For(
                range(1), lambda: [
                    C.layers.Convolution2D(
                        (3, 3), 32, strides=(1, 1), pad=True),
                    C.layers.Activation(activation=C.relu),
                    C.layers.Convolution2D(
                        (1, 1), 64, strides=(1, 1), pad=False),
                    C.layers.MaxPooling((3, 3), strides=(2, 2), pad=True)
                ]),
            C.layers.For(
                range(1), lambda: [
                    C.layers.Convolution2D(
                        (3, 3), 128, strides=(1, 1), pad=True),
                    C.layers.Activation(activation=C.relu),
                    C.layers.Convolution2D(
                        (1, 1), 256, strides=(1, 1), pad=False),
                    C.layers.Activation(activation=C.relu),
                    C.layers.MaxPooling((3, 3), strides=(2, 2), pad=True)
                ]),
            C.layers.For(
                range(1), lambda: [
                    C.layers.Convolution2D(
                        (3, 3), 256, strides=(1, 1), pad=True),
                    C.layers.Activation(activation=C.relu),
                    C.layers.Convolution2D(
                        (1, 1), 256, strides=(1, 1), pad=False),
                    C.layers.Activation(activation=C.relu),
                    C.layers.AveragePooling((8, 8), strides=(1, 1), pad=False)
                ]),
            C.layers.Dense(num_classes, activation=None)
        ])(scaled_input)

    # loss and metric
    ce = C.cross_entropy_with_softmax(z, label_var)
    pe = C.classification_error(z, label_var)

    # training config
    minibatch_size = 64
    # Set learning parameters
    # learning rate
    lr_per_sample = [0.0015625] * 20 + [0.00046875] * 20 + [
        0.00015625
    ] * 20 + [0.000046875] * 10 + [0.000015625]
    lr_schedule = C.learning_parameter_schedule_per_sample(
        lr_per_sample, epoch_size=epoch_size)
    # momentum
    mms = [0] * 20 + [0.9983347214509387] * 20 + [0.9991670137924583]
    mm_schedule = C.learners.momentum_schedule_per_sample(
        mms, epoch_size=epoch_size)
    l2_reg_weight = 0.002

    # trainer object
    learner = C.learners.momentum_sgd(z.parameters,
                                      lr_schedule,
                                      mm_schedule,
                                      unit_gain=True,
                                      l2_regularization_weight=l2_reg_weight)
    progress_printer = C.logging.ProgressPrinter(tag='Training',
                                                 num_epochs=max_epochs)
    trainer = C.Trainer(z, (ce, pe), learner, progress_printer)

    # define mapping from reader streams to network inputs
    input_map = {
        input_var: reader_train.streams.features,
        label_var: reader_train.streams.labels
    }

    C.logging.log_number_of_parameters(z)
    print()
    # perform model training
    for epoch in range(max_epochs):  # loop over epochs
        sample_count = 0

        while sample_count < epoch_size:  # loop over minibatches in the epoch
            data = reader_train.next_minibatch(
                min(minibatch_size, epoch_size - sample_count),
                input_map=input_map)  # fetch minibatch.
            trainer.train_minibatch(data)  # update model with it
            sample_count += trainer.previous_minibatch_sample_count  # count samples processed so far

        trainer.summarize_training_progress()

    # save model
    modelname = "NIN_test4.dnn"
    z.save(os.path.join(model_path, modelname))

    ### Evaluation action
    epoch_size = 10000
    minibatch_size = 16
    # process minibatches and evaluate the model
    metric_numer = 0
    metric_denom = 0
    sample_count = 0
    minibatch_index = 0

    while sample_count < epoch_size:
        current_minibatch = min(minibatch_size, epoch_size - sample_count)
        data = reader_test.next_minibatch(current_minibatch,
                                          input_map=input_map)
        metric_numer += trainer.test_minibatch(data) * current_minibatch
        metric_denom += current_minibatch
        sample_count += current_minibatch
        minibatch_index += 1

    print("")
    print("Final Results: Minibatch[1-{}]: errs = {:0.2f}% * {}".format(
        minibatch_index + 1, (metric_numer * 100.0) / metric_denom,
        metric_denom))
    print("")

    return metric_numer / metric_denom
Ejemplo n.º 53
0
def train_model(base_model_file,
                train_map_file,
                test_map_file,
                input_resolution,
                num_epochs,
                mb_size,
                max_train_images,
                lr_per_mb,
                momentum_per_mb,
                l2_reg_weight,
                dropout_rate,
                freeze_weights,
                num_channels=3):

    #init
    image_width = input_resolution
    image_height = input_resolution
    epoch_size_test = len(readTable(test_map_file))
    epoch_size_train = len(readTable(train_map_file))
    epoch_size_train = min(epoch_size_train, max_train_images)
    num_classes = max(
        ToIntegers(getColumn(readTable(train_map_file), 1)) +
        ToIntegers(getColumn(readTable(test_map_file), 1))) + 1

    # Create the minibatch source
    minibatch_source_train = create_mb_source(train_map_file, image_width,
                                              image_height, num_channels,
                                              num_classes, True)
    minibatch_source_test = create_mb_source(test_map_file, image_width,
                                             image_height, num_channels,
                                             num_classes, False)

    # Define mapping from reader streams to network inputs
    label_input = input_variable(num_classes)
    image_input = input_variable((num_channels, image_height, image_width),
                                 name="input")
    input_map = {
        image_input: minibatch_source_train['features'],
        label_input: minibatch_source_train['labels']
    }

    # Instantiate the transfer learning model and loss function
    cntkModel = create_model(base_model_file, image_input, num_classes,
                             dropout_rate, freeze_weights)
    ce = cross_entropy_with_softmax(cntkModel, label_input)
    pe = classification_error(cntkModel, label_input)

    # Instantiate the trainer object
    lr_schedule = learning_rate_schedule(lr_per_mb, unit=UnitType.minibatch)
    mm_schedule = momentum_schedule(momentum_per_mb)
    learner = momentum_sgd(cntkModel.parameters,
                           lr_schedule,
                           mm_schedule,
                           l2_regularization_weight=l2_reg_weight)
    progress_writers = [ProgressPrinter(tag='Training', num_epochs=num_epochs)]
    trainer = Trainer(cntkModel, (ce, pe), learner, progress_writers)

    # Run training epochs
    print(
        "Training transfer learning model for {0} epochs (epoch_size_train = {1})."
        .format(num_epochs, epoch_size_train))
    errsTest = []
    errsTrain = []
    log_number_of_parameters(cntkModel)

    for epoch in range(num_epochs):
        # Train model
        err_numer = 0
        sample_counts = 0
        while sample_counts < epoch_size_train:  # Loop over minibatches in the epoch
            sample_count = min(mb_size, epoch_size_train - sample_counts)
            data = minibatch_source_train.next_minibatch(sample_count,
                                                         input_map=input_map)
            trainer.train_minibatch(data)  # Update model with it
            sample_counts += sample_count  # Count samples processed so far
            err_numer += trainer.previous_minibatch_evaluation_average * sample_count

            if sample_counts % (100 * mb_size) == 0:
                print("Training: processed {0} samples".format(sample_counts))

            # Visualize training images
            # img_data = data[image_input].asarray()
            # for i in range(len(img_data)):
            #     debugImg = img_data[i].squeeze().swapaxes(0, 1).swapaxes(1, 2) / 255.0
            #     imshow(debugImg)

        # Compute accuracy on training and test sets
        errsTrain.append(err_numer / float(sample_counts))
        trainer.summarize_training_progress()
        errsTest.append(
            cntkComputeTestError(trainer, minibatch_source_test, mb_size,
                                 epoch_size_test, input_map))
        trainer.summarize_test_progress()

        # Plot training progress
        plt.plot(errsTrain, 'b-', errsTest, 'g-')
        plt.xlabel('Epoch number')
        plt.ylabel('Error')
        plt.title('Training error (blue), test error (green)')
        plt.draw()
    return cntkModel
Ejemplo n.º 54
0
def create_criterion_function(model, labels):
    loss = C.cross_entropy_with_softmax(model, labels)
    errs = C.classification_error(model, labels)
    return loss, errs 
Ejemplo n.º 55
0
training_set = numpy.array([[0,0,1,0],[0,1,0,1],[1,0,0,1],[1,1,1,0]],dtype=float32)

x = cntk.input_variable(2)
y = cntk.input_variable(2)

def getNetwork(_x):
    with cntk.layers.default_options(init=cntk.layers.glorot_uniform(), activation=cntk.relu):
        res = _x
        res = cntk.layers.Dense(4, name="l1")(res)
        res = cntk.layers.Dense(4, name="l2")(res)
        res = cntk.layers.Dense(2, name="lo", activation=None)(res)
        return res

fnn = getNetwork(x)
loss = cntk.cross_entropy_with_softmax(fnn, y)
errs = cntk.classification_error(fnn, y)
trainer = cntk.Trainer(fnn, (loss, errs), [cntk.sgd(fnn.parameters, cntk.learning_rate_schedule(0.03, cntk.UnitType.minibatch))])

for times in range(1000):
    for data in training_set:
        batch = {x: numpy.array(data[:2],dtype=float32).reshape(2), y:numpy.array(data[2:],dtype = float32).reshape(2)}
        trainer.train_minibatch(batch)
        print("\r"+str(times), end="")
print("")

#print(fnn.lo.b.value)

out = cntk.softmax(fnn)
print(numpy.argmax(out.eval({x: numpy.array([[0,0]],dtype=float32).reshape(2)})))
print(numpy.argmax(out.eval({x: numpy.array([[0,1]],dtype=float32).reshape(2)})))
print(numpy.argmax(out.eval({x: numpy.array([[1,0]],dtype=float32).reshape(2)})))
Ejemplo n.º 56
0
data = cntk.input_variable(input_dim)
W = cntk.Parameter((input_dim, num_classes),
                   init=cntk.glorot_uniform(),
                   name='W')
b = cntk.Parameter((num_classes, ), init=0, name='b')
model = cntk.times(data, W) + b

# Define the CNTK criterion function. A criterion function maps
# (input vectors, labels) to a loss function and an optional additional
# metric. The loss function is used to train the model parameters.
# We use cross entropy as a loss function.
label_one_hot = cntk.input_variable(num_classes, is_sparse=True)
loss = cntk.cross_entropy_with_softmax(
    model,
    label_one_hot)  # this applies softmax to model's output under the hood
metric = cntk.classification_error(model, label_one_hot)
criterion = cntk.combine(
    [loss, metric])  # criterion is a tuple-valued function (loss, metric)

# Learner object. The learner implements the update algorithm, in this case plain SGD.
learning_rate = 0.1
learner = cntk.sgd(model.parameters,
                   cntk.learning_parameter_schedule(learning_rate))

# Trainer.
minibatch_size = 32
progress_writer = cntk.logging.ProgressPrinter(
    50)  # helper for logging progress; log every 50 minibatches
trainer = cntk.Trainer(None, criterion, [learner], [progress_writer])

# Train!
        for _ in range(num_hidden_layers):
            input = cntk.layers.Dense(hidden_layers_dim)(input)
        r = cntk.layers.Dense(num_output_classes, activation = None)(input)
        return r

# Scale the input to 0-1 range by dividing each pixel by 255.
input_s_normalized = input/255.0
input_s_squared = cntk.square(input_s_normalized)
input_s_sqrt = cntk.sqrt(input_s_normalized)
z_model = create_model(input_s_normalized)

# Define the loss function for is_training
loss = cntk.cross_entropy_with_softmax(z_model, label)

# Classification error evaluation
label_error = cntk.classification_error(z_model, label)

# Configure training parameters
# Instantiate the trainer object to drive the model training
learning_rate = 0.2
lr_schedule = cntk.learning_rate_schedule(learning_rate, cntk.UnitType.minibatch)

# Schoastic Gradient Descent learner
learner = cntk.sgd(z_model.parameters, lr_schedule)
trainer = cntk.Trainer(z_model, (loss, label_error), [learner])

# Define a utility function to compute the moving average sum.
# A more efficient implementation is possible with np.cumsum() function
def moving_average(a, w=5):
    if len(a) < w:
        return a[:]    # Need to send a copy of the array