def build_graph(noise_shape, image_shape, G_progress_printer, D_progress_printer):
    input_dynamic_axes = [C.Axis.default_batch_axis()]
    Z = C.input_variable(noise_shape, dynamic_axes=input_dynamic_axes)
    X_real = C.input_variable(image_shape, dynamic_axes=input_dynamic_axes)
    X_real_scaled = 2*(X_real / 255.0) - 1.0

    # Create the model function for the generator and discriminator models
    X_fake = generator(Z)
    D_real = discriminator(X_real_scaled)
    D_fake = D_real.clone(
        method = 'share',
        substitutions = {X_real_scaled.output: X_fake.output}
    )

    # Create loss functions and configure optimazation algorithms
    G_loss = 1.0 - C.log(D_fake)
    D_loss = -(C.log(D_real) + C.log(1.0 - D_fake))

    G_learner = C.fsadagrad(
       parameters = X_fake.parameters,
        lr = C.learning_parameter_schedule_per_sample(lr),
        momentum = C.momentum_schedule_per_sample(0.9985724484938566)
    )
    D_learner = C.fsadagrad(
        parameters = D_real.parameters,
        lr = C.learning_parameter_schedule_per_sample(lr),
        momentum = C.momentum_schedule_per_sample(0.9985724484938566)
    )

    DistG_learner = C.train.distributed.data_parallel_distributed_learner(G_learner)
    
    # The following API marks a learner as the matric aggregator, which is used by 
    # the trainer to determine the training progress.
    # It is required, only when more than one learner is provided to a *single* trainer. 
    # In this example, we use two trainers each with a single learner, so it 
    # is not required and automatically set by CNTK for each single learner. However, if you 
    # plan to use both learners with a single trainer, then it needs to be call before 
    # creating the trainer.
    #DistG_learner.set_as_metric_aggregator()

    DistD_learner = C.train.distributed.data_parallel_distributed_learner(D_learner)

    # Instantiate the trainers
    G_trainer = C.Trainer(
        X_fake,
        (G_loss, None),
        DistG_learner,
        G_progress_printer
    )
    D_trainer = C.Trainer(
        D_real,
        (D_loss, None),
        DistD_learner,
        D_progress_printer
    )

    return X_real, X_fake, Z, G_trainer, D_trainer
Esempio n. 2
0
def train(streamf):
    global net
    net=nn(input_var)
    loss = cntk.losses.squared_error(net,label_var)
    error=cntk.squared_error(net,label_var)
    learning_rate=0.01
    lr_schedule=cntk.learning_rate_schedule(learning_rate,cntk.UnitType.minibatch)
    momentum_time_constant = cntk.momentum_as_time_constant_schedule(140 / -np.math.log(0.9))
    learner=cntk.fsadagrad(net.parameters,lr=lr_schedule,momentum = momentum_time_constant,unit_gain = True)
    progres=cntk.logging.ProgressPrinter(0)
    trainer=cntk.Trainer(net,(loss,error),[learner],progress_writers=progres)
    input_map={
        input_var : streamf.streams.features,
        label_var : streamf.streams.labels
        
    }
    minibatch_size =  1024
    max_epochs = 500
    epoch_size = 48985
    t = 0
    for epoch in range(max_epochs):
        epoch_end = (epoch+1) * epoch_size
        while t < epoch_end: 
            dat1=streamf.next_minibatch(minibatch_size,input_map = input_map)
            trainer.train_minibatch(dat1)
            t += dat1[label_var].num_samples
    trainer.summarize_training_progress()
    return trainer
Esempio n. 3
0
def train(streamf):
    global net
    minibatch_size = 1024
    max_epochs = 2000
    epoch_size = 48985
    net = nn(input_var)
    loss = cntk.losses.binary_cross_entropy(net, label_var)
    error = cntk.classification_error(net, label_var)
    lr_per_sample = [3e-4] * 4 + [1.5e-4]
    lr_per_minibatch = [lr * minibatch_size for lr in lr_per_sample]
    lr_schedule = cntk.learning_rate_schedule(lr_per_minibatch,
                                              cntk.UnitType.minibatch)
    momentum_as_time_constant = cntk.momentum_as_time_constant_schedule(200)
    learner = cntk.fsadagrad(net.parameters, lr_schedule,
                             momentum_as_time_constant)
    progres = cntk.logging.ProgressPrinter(0)
    trainer = cntk.Trainer(net, (loss, error), [learner],
                           progress_writers=progres)
    input_map = {
        input_var: streamf.streams.features,
        label_var: streamf.streams.labels
    }
    t = 0
    for epoch in range(max_epochs):
        epoch_end = (epoch + 1) * epoch_size
        while t < epoch_end:
            dat1 = streamf.next_minibatch(minibatch_size, input_map=input_map)
            trainer.train_minibatch(dat1)
            t += dat1[label_var].num_samples
    trainer.summarize_training_progress()
    return trainer
Esempio n. 4
0
def train(streamf):
    input_var = cntk.input_variable(45,np.float32, name = 'features',dynamic_axes=cntk.axis.Axis.default_input_variable_dynamic_axes())
    label_var=cntk.input_variable(3,np.float32, name = 'labels')
    net=nn(input_var)
    loss = cntk.squared_error(net,label_var)
    error=cntk.squared_error(net,label_var)
    learning_rate=0.02
    lr_schedule=cntk.learning_rate_schedule(learning_rate,cntk.UnitType.minibatch)
    momentum_time_constant = cntk.momentum_as_time_constant_schedule(5000 / -np.math.log(0.9))
    learner=cntk.fsadagrad(net.parameters,lr=lr_schedule,momentum = momentum_time_constant,unit_gain = True)
    progres=cntk.logging.ProgressPrinter(0)
    trainer=cntk.Trainer(net,(loss,error),[learner],progress_writers=progres)
    input_map={
        input_var : streamf.streams.features,
        label_var : streamf.streams.labels
        
    }
    minibatch_size =  5000
    num_samples_per_sweep = 2000
    for i in range(0,num_samples_per_sweep):
        dat1=streamf.next_minibatch(minibatch_size,input_map = input_map)
        trainer.train_minibatch(dat1)
        training_loss = trainer.previous_minibatch_loss_average
        eval_error = trainer.previous_minibatch_evaluation_average
        if training_loss<0.002:
            break
    return trainer
Esempio n. 5
0
def test_htk_deserializers():
    mbsize = 640
    epoch_size = 1000 * mbsize
    lr = [0.001]

    feature_dim = 33
    num_classes = 132
    context = 2

    os.chdir(data_path)

    features_file = "glob_0000.scp"
    labels_file = "glob_0000.mlf"
    label_mapping_file = "state.list"

    fd = HTKFeatureDeserializer(
        StreamDefs(amazing_features=StreamDef(
            shape=feature_dim, context=(context, context), scp=features_file)))

    ld = HTKMLFDeserializer(
        label_mapping_file,
        StreamDefs(
            awesome_labels=StreamDef(shape=num_classes, mlf=labels_file)))

    reader = MinibatchSource([fd, ld])

    features = C.sequence.input_variable(((2 * context + 1) * feature_dim))
    labels = C.sequence.input_variable((num_classes))

    model = Sequential(
        [For(range(3), lambda: Recurrence(LSTM(256))),
         Dense(num_classes)])
    z = model(features)
    ce = C.cross_entropy_with_softmax(z, labels)
    errs = C.classification_error(z, labels)

    learner = C.fsadagrad(z.parameters,
                          lr=C.learning_rate_schedule(lr, C.UnitType.sample,
                                                      epoch_size),
                          momentum=C.momentum_as_time_constant_schedule(1000),
                          gradient_clipping_threshold_per_sample=15,
                          gradient_clipping_with_truncation=True)
    progress_printer = C.logging.ProgressPrinter(freq=0)
    trainer = C.Trainer(z, (ce, errs), learner, progress_printer)

    input_map = {
        features: reader.streams.amazing_features,
        labels: reader.streams.awesome_labels
    }

    # just run and verify it doesn't crash
    for i in range(3):
        mb_data = reader.next_minibatch(mbsize, input_map=input_map)
        trainer.train_minibatch(mb_data)
    assert True
    os.chdir(abs_path)
Esempio n. 6
0
    def train(self):
        tmp_d = {"x": [], "y": []}
        num_list = []
        count = 0
        for idx, value in enumerate(self.series):
            if idx % self.h_dims == 0:
                num_list = []
                count += 1
                if (self.h_dims * count) > len(self.series):
                    break
            num_list.append(np.float32(value))
            increment_list = []
            for num in num_list:
                increment_list.append(num)
                tmp_d["x"].append(np.array(increment_list))
                tmp_d["y"].append(
                    np.array([np.float32(self.series[self.h_dims * count])]))

        x = {"train": tmp_d["x"]}
        y = {"train": np.array(tmp_d["y"])}

        z = self.create_model(self.input_node, self.h_dims)
        var_l = cntk.input_variable(1, dynamic_axes=z.dynamic_axes, name="y")
        learning_rate = 0.005
        lr_schedule = cntk.learning_parameter_schedule(learning_rate)
        loss = cntk.squared_error(z, var_l)
        error = cntk.squared_error(z, var_l)
        momentum_schedule = cntk.momentum_schedule(
            0.9, minibatch_size=self.batch_size)
        learner = cntk.fsadagrad(z.parameters,
                                 lr=lr_schedule,
                                 momentum=momentum_schedule)
        trainer = cntk.Trainer(z, (loss, error), [learner])

        # training
        loss_summary = []

        start = time.time()
        for epoch in range(0, self.epochs):
            for x_batch, l_batch in self.next_batch(x, y, "train",
                                                    self.batch_size):
                trainer.train_minibatch({
                    self.input_node: x_batch,
                    var_l: l_batch
                })
            if epoch % (self.epochs / 10) == 0:
                training_loss = trainer.previous_minibatch_loss_average
                loss_summary.append(training_loss)
                print("epoch: {}, loss: {:.4f} [time: {:.1f}s]".format(
                    epoch, training_loss,
                    time.time() - start))
        return z
Esempio n. 7
0
def lstm_basic(x, y, epochs=1000, batch_size=100, input_dim=5):

    x_axes = [C.Axis.default_batch_axis(), C.Axis.default_dynamic_axis()]
    C.input_variable(1, dynamic_axes=x_axes)

    # input sequences
    input_seq = C.sequence.input_variable(1)

    # create the model
    z = create_model(input_seq, input_dim)

    # expected output (label), also the dynamic axes of the model output
    # is specified as the model of the label input
    lb = C.input_variable(1, dynamic_axes=z.dynamic_axes, name="y")

    # the learning rate
    learning_rate = 0.02
    lr_schedule = C.learning_parameter_schedule(learning_rate)

    # loss function
    loss = C.squared_error(z, lb)

    # use squared error to determine error for now
    error = C.squared_error(z, lb)

    # use fsadagrad optimizer
    momentum_schedule = C.momentum_schedule(0.9, minibatch_size=batch_size)
    learner = C.fsadagrad(z.parameters,
                          lr=lr_schedule,
                          momentum=momentum_schedule,
                          unit_gain=True)

    trainer = C.Trainer(z, (loss, error), [learner])

    # train
    loss_summary = []
    start = time.time()
    for epoch in range(0, epochs):
        for x1, y1 in next_batch(x, y, "train", batch_size):
            trainer.train_minibatch({input_seq: x1, lb: y1})
        if epoch % (epochs / 10) == 0:
            training_loss = trainer.previous_minibatch_loss_average
            loss_summary.append(training_loss)
            print("epoch: {}, loss: {:.4f} [time: {:.1f}s]".format(
                epoch, training_loss,
                time.time() - start))
    print("training took {0:.1f} sec".format(time.time() - start))

    return z, trainer, input_seq
Esempio n. 8
0
def test_htk_deserializers():
    mbsize = 640
    epoch_size = 1000 * mbsize
    lr = [0.001]

    feature_dim = 33
    num_classes = 132
    context = 2

    os.chdir(data_path)

    features_file = "glob_0000.scp"
    labels_file = "glob_0000.mlf"
    label_mapping_file = "state.list"

    fd = HTKFeatureDeserializer(StreamDefs(
        amazing_features = StreamDef(shape=feature_dim, context=(context,context), scp=features_file)))

    ld = HTKMLFDeserializer(label_mapping_file, StreamDefs(
        awesome_labels = StreamDef(shape=num_classes, mlf=labels_file)))

    reader = MinibatchSource([fd,ld])

    features = C.sequence.input_variable(((2*context+1)*feature_dim))
    labels = C.sequence.input_variable((num_classes))

    model = Sequential([For(range(3), lambda : Recurrence(LSTM(256))),
                        Dense(num_classes)])
    z = model(features)
    ce = C.cross_entropy_with_softmax(z, labels)
    errs = C.classification_error    (z, labels)

    learner = C.fsadagrad(z.parameters,
                          lr=C.learning_rate_schedule(lr, C.UnitType.sample, epoch_size),
                          momentum=C.momentum_as_time_constant_schedule(1000),
                          gradient_clipping_threshold_per_sample=15, gradient_clipping_with_truncation=True)
    progress_printer = C.logging.ProgressPrinter(freq=0)
    trainer = C.Trainer(z, (ce, errs), learner, progress_printer)

    input_map={ features: reader.streams.amazing_features, labels: reader.streams.awesome_labels }

    # just run and verify it doesn't crash
    for i in range(3):
        mb_data = reader.next_minibatch(mbsize, input_map=input_map)
        trainer.train_minibatch(mb_data)
    assert True
    os.chdir(abs_path)
Esempio n. 9
0
    def train (self, train_file, output_resources_pickle_file, \
        network_type = 'unidirectional', \
        num_epochs = 1, batch_size = 50, \
        dropout = 0.2, reg_alpha = 0.0, \
        num_hidden_units = 150, num_layers = 1):
        
        train_X, train_Y = self.reader.read_and_parse_training_data(train_file, output_resources_pickle_file) 

        print("Data Shape: ")
        print(train_X.shape) # (15380, 613)
        print(train_Y.shape) # (15380, 613, 8)      
        #self.wordvecs.shape (66962, 50)
        
        print("Hyper parameters:")
        print("output_resources_pickle_file = {}".format(output_resources_pickle_file))
        print("network_type = {}".format(network_type))
        print("num_epochs= {}".format(num_epochs ))
        print("batch_size = {}".format(batch_size ))
        print("dropout = ".format(dropout ))
        print("reg_alpha = {}".format(reg_alpha ))
        print("num_hidden_units = {}".format(num_hidden_units))
        print("num_layers = {}".format(num_layers ))

        # Instantiate the model function;
        features = C.sequence.input_variable(self.wordvecs.shape[0])
        labels = C.input_variable(train_Y.shape[2], dynamic_axes=[C.Axis.default_batch_axis()])
        self.model = self.__create_model(features, train_Y.shape[2], num_hidden_units, dropout)

        plot_path = "./lstm_model.png"
        plot(self.model, plot_path)        
        
        # Instantiate the loss and error function
        loss = C.cross_entropy_with_softmax(self.model, labels)
        error = C.classification_error(self.model, labels)

        # LR schedule
        learning_rate = 0.02
        lr_schedule = C.learning_parameter_schedule(learning_rate)
        momentum_schedule = C.momentum_schedule(0.9, minibatch_size=batch_size)
        learner = C.fsadagrad(self.model.parameters, lr = lr_schedule, momentum = momentum_schedule, unit_gain = True)        

        # Setup the progress updater
        progress_printer = C.logging.ProgressPrinter(freq=100, first=10, tag='Training', num_epochs=num_epochs)

        # Instantiate the trainer. We have all data in memory. https://github.com/Microsoft/CNTK/blob/master/Manual/Manual_How_to_feed_data.ipynb
        print('Start training')       
        train_summary = loss.train((train_X.astype('float32'), train_Y.astype('float32')), parameter_learners=[learner], callbacks=[progress_printer])
Esempio n. 10
0
def train(create_model, X, Y, epochs=500, batch_size=10, N=1):
    dim = Y.shape[1]

    # input sequences
    x = C.sequence.input_variable(dim)
    # create the model
    z = create_model(x, N=N, outputs=dim)

    # expected output (label), also the dynamic axes of the model output
    # is specified as the model of the label input
    l = C.input_variable(dim, dynamic_axes=z.dynamic_axes, name="y")

    # the learning rate
    learning_rate = 0.02
    lr_schedule = C.learning_parameter_schedule(learning_rate)

    # loss function
    loss = C.squared_error(z, l)
    # use squared error to determine error for now
    error = C.squared_error(z, l)

    # use fsadagrad optimizer
    momentum_schedule = C.momentum_schedule(0.9, minibatch_size=batch_size)
    learner = C.fsadagrad(z.parameters,
                          lr=lr_schedule,
                          momentum=momentum_schedule,
                          unit_gain=True)
    trainer = C.Trainer(z, (loss, error), [learner])

    # train
    loss_summary = []
    start = time.time()
    for epoch in range(0, epochs):
        for x1, y1 in next_batch(X, Y, batch_size):
            trainer.train_minibatch({x: x1, l: y1})
        if epoch % (epochs / 10) == 0:
            training_loss = trainer.previous_minibatch_loss_average
            loss_summary.append(training_loss)
            print("epoch: {}, loss: {:.5f}".format(epoch, training_loss))

    print("training took {0:.1f} sec".format(time.time() - start))

    return z
Esempio n. 11
0
def main():
    window_len = int(input("window_len: "))
    word_len = int(input("word_len: "))
    alphabet_len = int(input("alphabet_len: "))

    alpha_to_num_step = float(1 / alphabet_len)
    alpha_to_num_map = float(alpha_to_num_step / 2)

    source = "weather_JAN.csv"
    ts_data = pd.read_csv(source,
                          index_col="date",
                          parse_dates=["date"],
                          dtype=np.float32)
    sax_ret = sax_via_window(ts_data["temp"].values,
                             window_len,
                             word_len,
                             alphabet_size=alphabet_len,
                             nr_strategy="none",
                             z_threshold=0.01)

    my_sax = dict()
    for k, v in sax_ret.items():
        for i in v:
            my_sax[i] = k

    tmp_d = {"x": [], "y": []}
    for k, v in my_sax.items():
        num_list = [
            np.float32(((ord(char) - 96) * alpha_to_num_step) -
                       alpha_to_num_map) for char in v[:-1]
        ]
        increment_list = []
        for num in num_list:
            increment_list.append(num)
            tmp_d["x"].append(np.array(increment_list))
            tmp_d["y"].append(
                np.array([
                    np.float32("".join([
                        str(((ord(char) - 96) * alpha_to_num_step) -
                            alpha_to_num_map) for char in v[-1]
                    ]))
                ]))

    # FORMAT:
    # result_x[0] = [1]         result_y[0] = 3
    # result_x[1] = [1,4]       result_y[1] = 3
    # result_x[2] = [1,4,2]     result_y[2] = 3
    # result_x[3] = [1,4,2,2]   result_y[3] = 3
    # result_x[4] = [1,4,2,2,4] result_y[4] = 3
    #####

    result_x = dict()
    result_x["train"] = tmp_d["x"][:len(tmp_d["x"]) - 2000]
    result_x["test"] = tmp_d["x"][len(tmp_d["x"]) - 2000:len(tmp_d["x"]) -
                                  1000]
    result_x["val"] = tmp_d["x"][len(tmp_d["x"]) - 1000:len(tmp_d["x"])]

    result_y = dict()
    result_y["train"] = np.array(tmp_d["y"][:len(tmp_d["y"]) - 2000])
    result_y["test"] = np.array(tmp_d["y"][len(tmp_d["y"]) -
                                           2000:len(tmp_d["y"]) - 1000])
    result_y["val"] = np.array(tmp_d["y"][len(tmp_d["y"]) -
                                          1000:len(tmp_d["y"])])

    batch_size = window_len * (word_len - 1)
    h_dims = word_len

    epochs = input("Epochs: ")
    if not epochs == "":
        epochs = int(epochs)
    else:
        epochs = 100

    start_time = time.time()

    model_file = "{}_epochs.model".format(epochs)

    if not os.path.exists(model_file):
        x = C.sequence.input_variable(1)
        z = create_model(x, h_dims)
        var_l = C.input_variable(1, dynamic_axes=z.dynamic_axes, name="y")
        learning_rate = 0.005
        lr_schedule = C.learning_parameter_schedule(learning_rate)
        loss = C.squared_error(z, var_l)
        error = C.squared_error(z, var_l)
        momentum_schedule = C.momentum_schedule(0.9, minibatch_size=batch_size)
        learner = C.fsadagrad(z.parameters,
                              lr=lr_schedule,
                              momentum=momentum_schedule)
        trainer = C.Trainer(z, (loss, error), [learner])

        # training
        loss_summary = []

        start = time.time()
        for epoch in range(0, epochs):
            for x_batch, l_batch in next_batch(result_x, result_y, "train",
                                               batch_size):
                trainer.train_minibatch({x: x_batch, var_l: l_batch})

            if epoch % (epochs / 10) == 0:
                training_loss = trainer.previous_minibatch_loss_average
                loss_summary.append(training_loss)
                print("epoch: {}, loss: {:.4f}".format(epoch, training_loss))

        print("Training took {:.1f} sec".format(time.time() - start))

        # Print the train, validation and test errors
        for label_txt in ["train", "val", "test"]:
            print("mse for {}: {:.6f}".format(
                label_txt,
                get_mse(trainer, x, result_x, result_y, batch_size, var_l,
                        label_txt)))

        z.save(model_file)

    else:
        z = C.load_model(model_file)
        x = C.logging.find_all_with_name(z, "")[-1]

    # Print out all layers in the model
    print("Loading {} and printing all nodes:".format(model_file))
    node_outputs = C.logging.find_all_with_name(z, "")
    for n in node_outputs:
        print("  {}".format(n))

    results = []
    # predict
    # f, a = plt.subplots(2, 1, figsize=(12, 8))
    for j, ds in enumerate(["val", "test"]):
        fig = plt.figure()
        a = fig.add_subplot(2, 1, 1)
        results = []
        for x_batch, y_batch in next_batch(result_x, result_y, ds, batch_size):
            pred = z.eval({x: x_batch})
            results.extend(pred[:, 0])
        # because we normalized the input data we need to multiply the prediction
        # with SCALER to get the real values.
        a.plot((result_y[ds]).flatten(), label=ds + " raw")
        a.plot(np.array(results), label=ds + " pred")
        a.legend()

        fig.savefig("{}_chart_{}_epochs.jpg".format(ds, epochs))

    print("Delta: ", time.time() - start_time)

    return result_x, result_y, results
Esempio n. 12
0
def test_learner_init():
    i = C.input_variable(shape=(1, ), needs_gradient=True, name='a')
    w = parameter(shape=(1, ))

    res = i * w

    #test new API: learning_parameter_schedule

    #explictly specify reference minibatch size and learning rate is in number:
    learner = sgd(res.parameters, lr=0.1, minibatch_size=25)
    assert learner.is_compatible_mode() == False
    assert learner.minibatch_size == 25  #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == 25
    assert learner.learning_rate() == 0.1

    #no explictly specification of reference minibatch size and learning rate is in number:
    learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1))
    assert learner.is_compatible_mode() == False
    assert learner.minibatch_size == C.learners.IGNORE  #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE
    assert learner.learning_rate() == 0.1

    learner = sgd(res.parameters,
                  lr=learning_parameter_schedule(0.1, 20),
                  minibatch_size=25)
    assert learner.is_compatible_mode() == False
    assert learner.minibatch_size == 25  #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == 20
    assert learner.learning_rate() == 0.1

    learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1, 20))
    assert learner.is_compatible_mode() == False
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == 20
    assert learner.learning_rate() == 0.1

    #no explictly specification of reference minibatch size and learning rate is in number:
    learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1))
    assert learner.is_compatible_mode() == False
    assert learner.minibatch_size == C.learners.IGNORE  #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE
    assert learner.learning_rate() == 0.1

    #no explictly specification of reference minibatch size and learning rate is in number:
    learner = sgd(res.parameters,
                  lr=learning_parameter_schedule(0.1),
                  minibatch_size=C.learners.IGNORE)
    assert learner.is_compatible_mode() == True
    assert learner.minibatch_size == C.learners.IGNORE  #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE
    assert learner.learning_rate() == 0.1

    learner = sgd(res.parameters,
                  lr=learning_parameter_schedule(0.1, 20),
                  minibatch_size=C.learners.IGNORE)
    assert learner.is_compatible_mode() == True
    assert learner.minibatch_size == C.learners.IGNORE  #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == 20
    assert learner.learning_rate() == 0.1

    #no explictly specification of reference minibatch size and learning rate is in number:
    learner = sgd(res.parameters,
                  lr=learning_parameter_schedule(0.1),
                  minibatch_size=C.learners.IGNORE)
    assert learner.is_compatible_mode() == True
    assert learner.minibatch_size == C.learners.IGNORE  #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE
    assert learner.learning_rate() == 0.1

    mysgd = C.sgd(parameters=res.parameters, lr=0.4, minibatch_size=32)
    assert mysgd.minibatch_size == 32
    assert mysgd._learning_rate_schedule.minibatch_size == 32
    assert mysgd.learning_rate() == 0.4

    mymomentum = C.momentum_sgd(parameters=res.parameters,
                                lr=0.4,
                                momentum=0.9,
                                minibatch_size=32)
    assert mymomentum.minibatch_size == 32
    assert mymomentum._learning_rate_schedule.minibatch_size == 32
    assert mymomentum.learning_rate() == 0.4

    myadadelta = C.adadelta(parameters=res.parameters,
                            lr=0.4,
                            minibatch_size=32)
    assert myadadelta.minibatch_size == 32
    assert myadadelta._learning_rate_schedule.minibatch_size == 32
    assert myadadelta.learning_rate() == 0.4

    myadam = C.adam(parameters=res.parameters,
                    lr=0.4,
                    momentum=0.9,
                    variance_momentum=0.9,
                    minibatch_size=32)
    assert myadam.minibatch_size == 32
    assert myadam._learning_rate_schedule.minibatch_size == 32
    assert myadam.learning_rate() == 0.4

    myadagrad = C.adagrad(parameters=res.parameters, lr=0.4, minibatch_size=32)
    assert myadagrad.minibatch_size == 32
    assert myadagrad._learning_rate_schedule.minibatch_size == 32
    assert myadagrad.learning_rate() == 0.4

    myfsadagrad = C.fsadagrad(parameters=res.parameters,
                              lr=0.4,
                              momentum=0.9,
                              variance_momentum=0.9,
                              minibatch_size=32)
    assert myfsadagrad.minibatch_size == 32
    assert myfsadagrad._learning_rate_schedule.minibatch_size == 32
    assert myfsadagrad.learning_rate() == 0.4

    mynesterov = C.nesterov(parameters=res.parameters,
                            lr=0.4,
                            momentum=0.9,
                            minibatch_size=32)
    assert mynesterov.minibatch_size == 32
    assert mynesterov._learning_rate_schedule.minibatch_size == 32
    assert mynesterov.learning_rate() == 0.4

    myrmsrop = C.rmsprop(parameters=res.parameters,
                         lr=0.4,
                         gamma=0.5,
                         inc=1.2,
                         dec=0.7,
                         max=10,
                         min=1e-8,
                         minibatch_size=32)
    assert myrmsrop.minibatch_size == 32
    assert myrmsrop._learning_rate_schedule.minibatch_size == 32
    assert myrmsrop.learning_rate() == 0.4

    mysgd = C.sgd(parameters=res.parameters,
                  lr=[0.4, 0.1, 0.001],
                  minibatch_size=32,
                  epoch_size=512)
    assert mysgd.minibatch_size == 32
    assert mysgd._learning_rate_schedule.minibatch_size == 32
    assert mysgd._learning_rate_schedule[0] == 0.4
    assert mysgd._learning_rate_schedule[512] == 0.1
    assert mysgd._learning_rate_schedule[512 * 2] == 0.001

    mymomentum = C.momentum_sgd(parameters=res.parameters,
                                lr=[0.4, 0.1, 0.001],
                                momentum=[0.9],
                                minibatch_size=32,
                                epoch_size=512)
    assert mymomentum.minibatch_size == 32
    assert mymomentum._learning_rate_schedule.minibatch_size == 32
    assert mymomentum._learning_rate_schedule[0] == 0.4
    assert mymomentum._learning_rate_schedule[512] == 0.1
    assert mymomentum._learning_rate_schedule[512 * 2] == 0.001

    myadadelta = C.adadelta(parameters=res.parameters,
                            lr=[0.4, 0.1, 0.001],
                            minibatch_size=32,
                            epoch_size=512)
    assert myadadelta.minibatch_size == 32
    assert myadadelta._learning_rate_schedule.minibatch_size == 32
    assert myadadelta._learning_rate_schedule[0] == 0.4
    assert myadadelta._learning_rate_schedule[512] == 0.1
    assert myadadelta._learning_rate_schedule[512 * 2] == 0.001

    myadam = C.adam(parameters=res.parameters,
                    lr=[0.4, 0.1, 0.001],
                    momentum=[0.9, 0.1, 0.001],
                    variance_momentum=[0.9],
                    minibatch_size=32,
                    epoch_size=512)
    assert myadam.minibatch_size == 32
    assert myadam._learning_rate_schedule.minibatch_size == 32
    assert myadam._learning_rate_schedule[0] == 0.4
    assert myadam._learning_rate_schedule[512] == 0.1
    assert myadam._learning_rate_schedule[512 * 2] == 0.001

    myadagrad = C.adagrad(parameters=res.parameters,
                          lr=[0.4, 0.1, 0.001],
                          minibatch_size=32,
                          epoch_size=512)
    assert myadagrad.minibatch_size == 32
    assert myadagrad._learning_rate_schedule.minibatch_size == 32
    assert myadagrad._learning_rate_schedule[0] == 0.4
    assert myadagrad._learning_rate_schedule[512] == 0.1
    assert myadagrad._learning_rate_schedule[512 * 2] == 0.001

    myfsadagrad = C.fsadagrad(parameters=res.parameters,
                              lr=[0.4, 0.1, 0.001],
                              momentum=[0.9],
                              variance_momentum=[0.9],
                              minibatch_size=32,
                              epoch_size=512)
    assert myadagrad.minibatch_size == 32
    assert myadagrad._learning_rate_schedule.minibatch_size == 32
    assert myadagrad._learning_rate_schedule[0] == 0.4
    assert myadagrad._learning_rate_schedule[512] == 0.1
    assert myadagrad._learning_rate_schedule[512 * 2] == 0.001

    mynesterov = C.nesterov(parameters=res.parameters,
                            lr=[0.4, 0.1, 0.001],
                            momentum=[0.9],
                            minibatch_size=32,
                            epoch_size=512)
    assert mynesterov.minibatch_size == 32
    assert mynesterov._learning_rate_schedule.minibatch_size == 32
    assert mynesterov._learning_rate_schedule[0] == 0.4
    assert mynesterov._learning_rate_schedule[512] == 0.1
    assert mynesterov._learning_rate_schedule[512 * 2] == 0.001

    myrmsrop = C.rmsprop(parameters=res.parameters,
                         lr=[0.4, 0.1, 0.001],
                         gamma=0.5,
                         inc=1.2,
                         dec=0.7,
                         max=10,
                         min=1e-8,
                         minibatch_size=32,
                         epoch_size=512)
    assert myrmsrop.minibatch_size == 32
    assert myrmsrop._learning_rate_schedule.minibatch_size == 32
    assert myrmsrop._learning_rate_schedule[0] == 0.4
    assert myrmsrop._learning_rate_schedule[512] == 0.1
    assert myrmsrop._learning_rate_schedule[512 * 2] == 0.001

    learner_parameter = learner.parameters
    from cntk.variables import Parameter
    param = learner_parameter[0]
    assert isinstance(param, Parameter)

    unit_gain_value = C.default_unit_gain_value()
    assert unit_gain_value

    momentum = C.momentum_schedule(0.999, minibatch_size=1)
    lr_per_sample = learning_parameter_schedule(0.1, minibatch_size=1)
    C.momentum_sgd(res.parameters, lr_per_sample, momentum)
    C.momentum_sgd(res.parameters, lr_per_sample, momentum, unit_gain_value)
    C.momentum_sgd(res.parameters,
                   lr_per_sample,
                   momentum,
                   unit_gain=unit_gain_value)

    C.set_default_unit_gain_value(False)
    unit_gain_value = C.default_unit_gain_value()
    assert not unit_gain_value

    lr_per_sample = learning_parameter_schedule([0.1, 0.2], minibatch_size=1)
    C.nesterov(res.parameters, lr=lr_per_sample, momentum=momentum)
    C.nesterov(res.parameters, lr_per_sample, momentum, unit_gain_value)
    C.nesterov(res.parameters,
               lr=lr_per_sample,
               momentum=momentum,
               unit_gain=unit_gain_value)

    lr_per_sample = learning_parameter_schedule([0.1] * 3 + [0.2] * 2 + [0.3],
                                                minibatch_size=1)
    C.adagrad(res.parameters, lr=lr_per_sample, need_ave_multiplier=True)

    C.set_default_unit_gain_value(True)
    unit_gain_value = C.default_unit_gain_value()
    assert unit_gain_value

    lr_per_sample = learning_parameter_schedule([(3, 0.1), (2, 0.2), (1, 0.3)],
                                                minibatch_size=1)
    C.fsadagrad(res.parameters, lr=lr_per_sample, momentum=momentum)
    C.fsadagrad(res.parameters, lr_per_sample, momentum, unit_gain_value)
    C.fsadagrad(res.parameters,
                lr=lr_per_sample,
                momentum=momentum,
                unit_gain=unit_gain_value)

    gamma, inc, dec, max, min = [0.5, 1.2, 0.7, 10, 1e-8]
    lr_per_sample = learning_parameter_schedule([0.1, 0.2],
                                                minibatch_size=1,
                                                epoch_size=100)
    C.rmsprop(res.parameters, lr_per_sample, gamma, inc, dec, max, min, True)

    C.adadelta(res.parameters, lr_per_sample)
Esempio n. 13
0
def test_learner_init_legacy():
    i = C.input_variable(shape=(1, ), needs_gradient=True, name='a')
    w = parameter(shape=(1, ))

    res = i * w

    # for backcompatibility test
    # this will be deprecated in future version
    learner = sgd(res.parameters,
                  lr=learning_rate_schedule(0.1, UnitType.sample))
    assert learner._learning_rate_schedule.minibatch_size == 1  # the deprecated per sample schedule should not use compatible mode
    assert learner.learning_rate() == 0.1

    # for backcompatibility test
    # this will be deprecated in future version
    # The UnitType will provide per minibatch instruction for the learner
    # this will be deprecated in future version
    learner = sgd(res.parameters,
                  lr=learning_rate_schedule(0.1, UnitType.minibatch))
    assert learner.is_compatible_mode() == False
    assert learner.learning_rate() == 0.1
    assert learner.minibatch_size == C.learners.IGNORE
    assert learner._learning_rate_schedule.minibatch_size == 0

    # for backcompatibility test, in reset learning rate, the learner won't receive the reference minibatch size from the schedule
    # user will need to specify the reference minibatch size explicitly
    # this will be deprecated in future version
    learner = sgd(res.parameters, lr=0.1)
    learner.reset_learning_rate(
        learning_rate_schedule([1, 2, 3], UnitType.minibatch))
    assert learner.learning_rate() == 1.0
    learner.minibatch_size = C.learners.IGNORE  # reset to be per minibatch
    assert learner.minibatch_size == C.learners.IGNORE
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE
    assert learner.is_compatible_mode() == True

    # for backcompatibility test
    # this will be deprecated in future version
    learner = sgd(res.parameters,
                  lr=learning_rate_schedule(0.1, UnitType.sample),
                  minibatch_size=C.learners.IGNORE)
    assert learner.is_compatible_mode() == True
    assert learner.learning_rate() == 0.1
    assert learner.minibatch_size == C.learners.IGNORE  # the learner's reference minibatch size is still 0

    # this will be deprecated in future version: This is logical invalid combination but it was the only way to use mean gradient and set learning rate in the past.
    learner = sgd(res.parameters,
                  lr=learning_rate_schedule(0.1, UnitType.sample),
                  use_mean_gradient=True)
    assert learner.is_compatible_mode() == True
    assert learner.learning_rate() == 0.1
    #test the override in the new version
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE
    assert learner.minibatch_size == C.learners.IGNORE  # the learner's reference minibatch size is still 0

    # for backcompatibility test
    # this will be deprecated in future version
    # The UnitType will provide per minibatch instruction for the learner
    # this will be deprecated in future version
    learner = sgd(res.parameters,
                  lr=learning_rate_schedule(0.1, UnitType.minibatch),
                  minibatch_size=C.learners.IGNORE)
    assert learner.is_compatible_mode() == True
    assert learner.learning_rate() == 0.1
    assert learner.minibatch_size == C.learners.IGNORE
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE

    # for backcompatibility test, in reset learning rate, the learner won't receive the reference minibatch size from the schedule
    # user will need to specify the reference minibatch size explicitly
    # this will be deprecated in future version
    learner = sgd(res.parameters, lr=0.1)
    learner.reset_learning_rate(
        learning_rate_schedule([1, 2, 3], UnitType.minibatch))
    assert learner.learning_rate() == 1.0
    learner.minibatch_size = C.learners.IGNORE  # reset to be per minibatch
    assert learner.minibatch_size == C.learners.IGNORE
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE
    assert learner.is_compatible_mode() == True

    learner_parameter = learner.parameters
    from cntk.variables import Parameter
    param = learner_parameter[0]
    assert isinstance(param, Parameter)

    unit_gain_value = C.default_unit_gain_value()
    assert unit_gain_value

    # back compatible API test
    momentum_time_constant = C.momentum_as_time_constant_schedule(1100)
    lr_per_sample = learning_parameter_schedule(0.1, minibatch_size=1)
    C.momentum_sgd(res.parameters, lr_per_sample, momentum_time_constant)
    C.momentum_sgd(res.parameters, lr_per_sample, momentum_time_constant,
                   unit_gain_value)
    C.momentum_sgd(res.parameters,
                   lr_per_sample,
                   momentum_time_constant,
                   unit_gain=unit_gain_value)

    C.set_default_unit_gain_value(False)
    unit_gain_value = C.default_unit_gain_value()
    assert not unit_gain_value

    C.set_default_unit_gain_value(True)
    unit_gain_value = C.default_unit_gain_value()
    assert unit_gain_value

    lr_per_sample = learning_rate_schedule([(3, 0.1), (2, 0.2), (1, 0.3)],
                                           unit=UnitType.sample)
    C.fsadagrad(res.parameters,
                lr=lr_per_sample,
                momentum=momentum_time_constant)
    C.fsadagrad(res.parameters, lr_per_sample, momentum_time_constant,
                unit_gain_value)
    C.fsadagrad(res.parameters,
                lr=lr_per_sample,
                momentum=momentum_time_constant,
                unit_gain=unit_gain_value)

    gamma, inc, dec, max, min = [0.5, 1.2, 0.7, 10, 1e-8]
    lr_per_sample = learning_rate_schedule([0.1, 0.2],
                                           unit=UnitType.sample,
                                           epoch_size=100)
    C.rmsprop(res.parameters, lr_per_sample, gamma, inc, dec, max, min, True)

    C.adadelta(res.parameters, lr_per_sample, use_mean_gradient=True)
def train(config, model, args, enable_eval=False):
    max_epoch = config['max_epoch']
    batchsize = config['batchsize']
    epoch_size = config['epoch_size']
    lr = config['lr']
    save_freq = config['save_freq']

    # create models
    embed_layer = GloveEmbed()

    inp_ph, train_model, loss_errs = create_train_model(model, embed_layer)
    train_reader, input_map = create_reader('aq_train.ctf', inp_ph, config)

    if enable_eval:
        inp_ph2, pred_sym, gt_sym = create_eval_model(model, embed_layer)
        eval_reader, input_map2 = create_reader('aq_dev.ctf', inp_ph2, config,
                                                True)
        i2w = get_i2w(vocabs)

    # create loggers
    progress_printer = C.logging.ProgressPrinter(freq=500, tag="Train")
    tensorboard_writer = C.logging.TensorBoardProgressWriter(
        500, 'tensorlog/{}'.format(args.tensorboard), model=train_model)

    lrs = [(1, lr), (5000, lr * 0.1), (10000, lr * 0.01), (30000, lr * 0.001)]
    learner = C.fsadagrad(
        train_model.parameters,
        #apply the learning rate as if it is a minibatch of size 1
        lr=C.learning_parameter_schedule(lrs),
        momentum=C.momentum_schedule(0.9, minibatch_size=batchsize),
        gradient_clipping_threshold_per_sample=2,
        gradient_clipping_with_truncation=True)

    trainer = C.Trainer(train_model, loss_errs, [learner],
                        [progress_printer, tensorboard_writer])

    total_samples = 0
    for epoch in range(max_epoch):
        while total_samples < (epoch + 1) * epoch_size:
            # get next minibatch of training data
            mb_train = train_reader.next_minibatch(batchsize,
                                                   input_map=input_map)
            # do the training
            trainer.train_minibatch(mb_train)
            total_samples += mb_train[list(mb_train.keys())[0]].num_sequences

        trainer.summarize_training_progress()

        if epoch + 1 % save_freq == 0:
            save_name = '{}_{}.model'.format(config['save_name'], epoch + 1)
            print('save {} in {}'.format(save_name, config['output_dir']))
            trainer.save_checkpoint('output/{}/{}'.format(
                config['output_dir'], save_name))

        if enable_eval:
            vis_mb = eval_reader.next_minibatch(1, input_map=input_map2)
            pred = pred_sym.eval(vis_mb)[0]
            gt = gt_sym.eval(vis_mb)[0]
            print(pred.shape, gt.shape)
            res = visualize(pred, i2w)
            print("predict res: {}".format(res))
            res = visualize(gt, i2w)
            print("ground truth: {}".format(res))
            pres = 0.0
            count = 1
            while count < 5:
                mb_eval = eval_reader.next_minibatch(512, input_map=input_map2)
                pred = pred_sym.eval(mb_eval)
                gt = gt_sym.eval(mb_eval)
                pres += report_classification_info(pred, gt)
                count += 1
                if not mb_eval:
                    break
            print('average precision:{}'.format(pres / count))
Esempio n. 15
0
def train(train_reader, valid_reader, vocab, i2w, s2smodel, max_epochs, epoch_size):
    model_train = create_model_train(s2smodel)
    criterion = create_criterion_function(model_train)
    # also wire in a greedy decoder so that we can properly log progress on a validation example
    # This is not used for the actual training process.
    model_greedy = create_model_test(s2smodel)
    # Instantiate the trainer object to drive the model training
    minibatch_size = 72
    lr = 0.001 if use_attention else 0.005
    learner = C.fsadagrad(model_train.parameters,
                          #apply the learning rate as if it is a minibatch of size 1
                          lr = C.learning_parameter_schedule_per_sample([lr]*2+[lr/2]*3+[lr/4], epoch_size),
                          momentum = C.momentum_schedule(0.9366416204111472, minibatch_size=minibatch_size),
                          gradient_clipping_threshold_per_sample=2.3,
                          gradient_clipping_with_truncation=True)
    trainer = C.Trainer(None, criterion, learner)
    
    # records
    total_samples = 0
    mbs = 0
    eval_freq = 100

    # print out some useful training information
    C.logging.log_number_of_parameters(model_train) ; print()
    progress_printer = C.logging.ProgressPrinter(freq=30, tag='Training')

    # a hack to allow us to print sparse vectors
    sparse_to_dense = create_sparse_to_dense(input_vocab_dim)

    for epoch in range(max_epochs):
        while total_samples < (epoch+1) * epoch_size:
            # get next minibatch of training data
            mb_train = train_reader.next_minibatch(minibatch_size)

            # do the training
            trainer.train_minibatch({criterion.arguments[0]: mb_train[train_reader.streams.features],
                                     criterion.arguments[1]: mb_train[train_reader.streams.labels]})

            progress_printer.update_with_trainer(trainer, with_metric=True) # log progress

            # every N MBs evaluate on a test sequence to visually show how we're doing
            if mbs % eval_freq == 0:
                mb_valid = valid_reader.next_minibatch(1)

                # run an eval on the decoder output model (i.e. don't use the groundtruth)
                e = model_greedy(mb_valid[valid_reader.streams.features])
                print(format_sequences(sparse_to_dense(mb_valid[valid_reader.streams.features]), i2w))
                print("->")
                print(format_sequences(e, i2w))

                # visualizing attention window
                if use_attention:
                    debug_attention(model_greedy, mb_valid[valid_reader.streams.features])

            total_samples += mb_train[train_reader.streams.labels].num_samples
            mbs += 1

        # log a summary of the stats for the epoch
        progress_printer.epoch_summary(with_metric=True)

    # done: save the final model
    model_path = "model_%d.cmf" % epoch
    print("Saving final model to '%s'" % model_path)
    s2smodel.save(model_path)
    print("%d epochs complete." % max_epochs)
Esempio n. 16
0
print l
# the learning rate
learning_rate = 0.02
lr_schedule = C.learning_rate_schedule(learning_rate, C.UnitType.minibatch)

# loss function
loss = C.squared_error(z, l)

# use squared error to determine error for now
error = C.squared_error(z, l)

# use fsadagrad optimizer
momentum_time_constant = C.momentum_as_time_constant_schedule(BATCH_SIZE /
                                                              -math.log(0.9))
learner = C.fsadagrad(z.parameters,
                      lr=lr_schedule,
                      momentum=momentum_time_constant,
                      unit_gain=True)

trainer = C.Trainer(z, (loss, error), [learner])

# train
loss_summary = []
start = time.time()
for epoch in range(0, EPOCHS):
    for x1, y1 in next_batch(X, Y, "train"):
        trainer.train_minibatch({x: x1, l: y1})
    if epoch % (EPOCHS / 10) == 0:
        training_loss = trainer.previous_minibatch_loss_average
        loss_summary.append(training_loss)
        print("epoch: {}, loss: {:.5f}".format(epoch, training_loss))
Esempio n. 17
0
MOMENTUM_SCHEDULE_PARAMS = [
    ((0.2, ), [0.2]),
    ((0.2, ), [0.2, 0.2, 0.2, 0.2]),
    (([0.2, 0.4], 5), [0.2] * 5 + [0.4] * 20),
    (([(3, 0.2), (2, 0.4),
       (1, 0.8)], 5), [0.2] * 15 + [0.4] * 10 + [0.8] * 20),
]

LEARNER_LAMBDAS = [
    lambda params: C.adadelta(params), lambda params: C.adagrad(
        params, lr=learning_rate_schedule(1, UnitType.minibatch)),
    lambda params: C.adam(params,
                          lr=learning_rate_schedule(1, UnitType.minibatch),
                          momentum=C.momentum_schedule(0.9)),
    lambda params: C.fsadagrad(params,
                               lr=learning_rate_schedule(
                                   1, UnitType.minibatch),
                               momentum=C.momentum_schedule(0.9)),
    lambda params: C.nesterov(params,
                              lr=learning_rate_schedule(1, UnitType.minibatch),
                              momentum=C.momentum_schedule(0.9)),
    lambda params: C.rmsprop(params,
                             lr=learning_rate_schedule(1, UnitType.minibatch),
                             gamma=0.1,
                             inc=3.0,
                             dec=0.1,
                             max=np.inf,
                             min=1e-8),
    lambda params: C.sgd(params,
                         lr=learning_rate_schedule(1, UnitType.minibatch)),
    lambda params: C.momentum_sgd(params,
                                  lr=learning_rate_schedule(
Esempio n. 18
0
def train_and_test(reader_train, reader_test, model_func):

    ###############################
    # Training the model
    ###############################

    input = C.input_variable(input_dim)
    label = C.input_variable(input_dim)

    model = model_func(input)

    target = label / 255.0
    loss = -(target * C.log(model) + (1 - target) * C.log(1 - model))
    label_error = C.classification_error(model, target)

    epoch_size = 30000
    minibatch_size = 64
    num_sweeps_to_train_with = 5 if isFast else 100
    num_samples_per_sweep = 60000
    num_minibatches_to_train = (num_samples_per_sweep *
                                num_sweeps_to_train_with) // minibatch_size

    lr_per_sample = [3e-4]
    lr_schedule = C.learning_parameter_schedule_per_sample(
        lr_per_sample, epoch_size)

    momentum_schedule = C.momentum_schedule(0.9126265014311797, minibatch_size)

    learner = C.fsadagrad(model.parameters,
                          lr=lr_schedule,
                          momentum=momentum_schedule)

    progress_printer = C.logging.ProgressPrinter(0)
    trainer = C.Trainer(model, (loss, label_error), learner, progress_printer)

    input_map = {
        input: reader_train.streams.features,
        label: reader_train.streams.features
    }

    aggregate_metric = 0
    for i in range(num_minibatches_to_train):
        data = reader_train.next_minibatch(minibatch_size, input_map=input_map)
        trainer.train_minibatch(data)
        samples = trainer.previous_minibatch_sample_count
        aggregate_metric += trainer.previous_minibatch_evaluation_average * samples

    train_error = (aggregate_metric *
                   100) / (trainer.total_number_of_samples_seen)
    print("Average training error: {0:0.2f}%".format(train_error))

    #############################################################################
    # Testing the model
    # Note: we use a test file reader to read data different from a training data
    #############################################################################

    test_minibatch_size = 32
    num_samples = 10000
    num_minibatches_to_test = num_samples / test_minibatch_size
    test_result = 0

    # Test error metric calculation
    metric_numer = 0
    metric_denom = 0

    test_input_map = {
        input: reader_test.streams.features,
        label: reader_test.streams.features
    }

    for i in range(0, int(num_minibatches_to_test)):
        data = reader_test.next_minibatch(test_minibatch_size,
                                          input_map=test_input_map)
        eval_error = trainer.test_minibatch(data)
        metric_numer += np.abs(eval_error * test_minibatch_size)
        metric_denom += test_minibatch_size
    test_error = (metric_numer * 100) / (metric_denom)
    print("Average test error: {0:0.2f}%".format(test_error))

    return model, train_error, test_error
Esempio n. 19
0
def test_learner_init():
    i = C.input_variable(shape=(1,), needs_gradient=True, name='a')
    w = parameter(shape=(1,))

    res = i * w

    learner = sgd(res.parameters, lr=learning_rate_schedule(0.1, UnitType.sample))
    assert learner.learning_rate() == 0.1
    
    learner.reset_learning_rate(learning_rate_schedule([1,2,3], UnitType.minibatch));
    assert learner.learning_rate() == 1.0

    learner_parameter = learner.parameters
    from cntk.variables import Parameter
    param = learner_parameter[0]
    assert isinstance(param, Parameter)

    unit_gain_value = C.default_unit_gain_value()
    assert unit_gain_value

    momentum_time_constant = C.momentum_as_time_constant_schedule(1100)
    lr_per_sample = learning_rate_schedule(0.1, UnitType.sample)
    C.momentum_sgd(res.parameters, lr_per_sample, momentum_time_constant)
    C.momentum_sgd(res.parameters, lr_per_sample, momentum_time_constant, unit_gain_value)
    C.momentum_sgd(res.parameters, lr_per_sample, momentum_time_constant, unit_gain=unit_gain_value)

    C.set_default_unit_gain_value(False)
    unit_gain_value = C.default_unit_gain_value()
    assert not unit_gain_value

    lr_per_sample = learning_rate_schedule([0.1, 0.2], UnitType.sample)
    C.nesterov(res.parameters, lr=lr_per_sample, momentum=momentum_time_constant)
    C.nesterov(res.parameters, lr_per_sample, momentum_time_constant, unit_gain_value)
    C.nesterov(res.parameters, lr=lr_per_sample, momentum=momentum_time_constant, unit_gain=unit_gain_value)

    lr_per_sample = learning_rate_schedule([0.1]*3 +[0.2]*2 +[0.3], UnitType.sample)
    C.adagrad(res.parameters, lr=lr_per_sample, need_ave_multiplier=True)

    C.set_default_unit_gain_value(True)
    unit_gain_value = C.default_unit_gain_value()
    assert unit_gain_value

    lr_per_sample = learning_rate_schedule([(3,0.1), (2, 0.2), (1, 0.3)], UnitType.sample)
    C.fsadagrad(res.parameters, lr=lr_per_sample, momentum=momentum_time_constant)
    C.fsadagrad(res.parameters, lr_per_sample, momentum_time_constant, unit_gain_value)
    C.fsadagrad(res.parameters, lr=lr_per_sample, momentum=momentum_time_constant, unit_gain=unit_gain_value)

    gamma, inc, dec, max, min = [0.1]*5
    lr_per_sample = learning_rate_schedule([0.1, 0.2], UnitType.sample, 100)
    C.rmsprop(res.parameters, lr_per_sample, gamma, inc, dec, max, min, True)

    C.set_default_use_mean_gradient_value(False)
    use_mean_gradient_value = C.default_use_mean_gradient_value()
    assert not use_mean_gradient_value

    C.adadelta(res.parameters, lr_per_sample)
    
    C.set_default_use_mean_gradient_value(True)
    use_mean_gradient_value = C.default_use_mean_gradient_value()
    assert use_mean_gradient_value

    C.adadelta(res.parameters, lr_per_sample)
Esempio n. 20
0
        (([0.2,0.4], 0, 5), [0.2]*5+[0.4]*20, 0),
        (([(3,0.2),(2,0.4),(1,0.8)], 0, 5), [0.2]*15+[0.4]*10+[0.8]*20, 0),
        ]

MOMENTUM_SCHEDULE_PARAMS = [
        ((0.2,), [0.2]),
        ((0.2,), [0.2, 0.2, 0.2, 0.2]),
        (([0.2,0.4], 5), [0.2]*5+[0.4]*20),
        (([(3,0.2),(2,0.4),(1,0.8)], 5), [0.2]*15+[0.4]*10+[0.8]*20),
        ]

LEARNER_LAMBDAS = [
    lambda params: C.adadelta(params),
    lambda params: C.adagrad(params, lr=learning_rate_schedule(1, UnitType.minibatch)),
    lambda params: C.adam(params, lr=learning_rate_schedule(1, UnitType.minibatch), momentum=C.momentum_schedule(0.9)),
    lambda params: C.fsadagrad(params, lr=learning_rate_schedule(1, UnitType.minibatch), momentum=C.momentum_schedule(0.9)),
    lambda params: C.nesterov(params, lr=learning_rate_schedule(1, UnitType.minibatch), momentum=C.momentum_schedule(0.9)),
    lambda params: C.rmsprop(params, lr=learning_rate_schedule(1, UnitType.minibatch), gamma=0.1, inc=3.0, dec=0.1, max=np.inf, min=1e-8),
    lambda params: C.sgd(params, lr=learning_rate_schedule(1, UnitType.minibatch)),
    lambda params: C.momentum_sgd(params, lr=learning_rate_schedule(1, UnitType.minibatch), momentum=C.momentum_schedule(0.9))]

@pytest.mark.parametrize("params, expectation, minibatch_size", LR_SCHEDULE_PARAMS_LEGACY)
def test_learning_rate_schedule(params, expectation, minibatch_size):
    l = learning_rate_schedule(*params)
    assert l.minibatch_size == minibatch_size
    assert [l[i] for i in range(len(expectation))] == expectation

@pytest.mark.parametrize("params, expectation, minibatch_size", LR_SCHEDULE_PARAMS)
def test_learning_parameter_schedule(params, expectation, minibatch_size):
    l = learning_parameter_schedule(*params)
    assert l.minibatch_size == minibatch_size
Esempio n. 21
0
    def train(self, save_intermediates=False, save_prefix='', num_epochs=None):
        print("Setting up model for training...")
        x = self.x_seq
        labels = C.input(shape=(self.batch_size, 3), name="y")
        #labels = C.input_variable(3)

        # the learning rate
        learning_rate = 0.001
        lr_schedule = C.learning_rate_schedule(learning_rate, C.UnitType.minibatch)

        # loss function
        loss = C.squared_error(self._model, labels)

        # use squared error to determine error for now
        error = C.squared_error(self._model, labels)

        # use adam optimizer
        momentum_time_constant = C.momentum_as_time_constant_schedule(
                LSTMRegressionModel.BATCH_SIZE_DEFAULT / -math.log(0.9)) 
        learner = C.fsadagrad(self._model.parameters,
                              lr = lr_schedule,
                              momentum = momentum_time_constant,
                              unit_gain = True)
        trainer = C.Trainer(self._model, (loss, error), [learner])
        
        # train
        loss_summary = []
        start = time.time()
        f, a = plt.subplots(3, 2, figsize=(100, 12))
        x_axis = list(range(len(self.train_x)))

        if num_epochs is None:
            epoch_list = list(range(0, LSTMRegressionModel.EPOCHS_DEFAULT))
        else:
            epoch_list = list(range(0, num_epochs))
        train_loss_epochs = [0.0] * len(epoch_list)
        val_loss_epochs = [0.0] * len(epoch_list)
        test_loss_epochs = [0.0] * len(epoch_list)
        print("done. Starting training.") 
        for epoch in epoch_list:
            print("Training on epoch {}".format(epoch))
            for x1, y1 in self._next_batch():
                trainer.train_minibatch({x: x1, labels: y1})

            val_loss = 0.0
            for idx, example in enumerate(self.val_x):
                y = np.array([self.val_y[idx]])
                example = np.array([example])
                loss_amt = loss.eval({x: example, labels: y})[0]
                val_loss += loss_amt
            #print("val loss is {}".format(val_loss))
            val_loss_epochs[epoch] = val_loss

            test_loss = 0.0
            for idx, example in enumerate(self.test_x):
                y = np.array([self.test_y[idx]])
                example = np.array([example])
                loss_amt = loss.eval({x: example, labels: y})[0]
                test_loss += loss_amt
            #print("test loss is {}".format(test_loss))
            test_loss_epochs[epoch] = test_loss
            training_loss = trainer.previous_minibatch_loss_average
            train_loss_epochs[epoch] = training_loss
            if epoch % (100) == 0:
                if save_intermediates:
                    self.save(save_prefix + 'WIP-training-epoch-{}.dat'.format(epoch))
                loss_summary.append(training_loss)
                evaluation = []
                for example in self.train_x:
                    prediction = self._model.eval({x: example})[0].tolist()
                    evaluation.append(prediction)

                evaluation = np.array(evaluation)
                a[0][0].cla()
                a[1][0].cla()
                a[2][0].cla()
                a[0][1].cla()
                a[1][1].cla()
                a[2][1].cla()
                a[0][0].plot(x_axis, evaluation[:, 0], label='approval')
                a[0][0].plot(x_axis, self.train_y[:, 0], label='approval actual')
                a[0][0].set_title("Approval rating prediction over training set")
                a[0][0].legend()
                a[1][0].plot(x_axis, evaluation[:, 1], label='disapproval')
                a[1][0].plot(x_axis, self.train_y[:, 1], label='disapproval actual')
                a[1][0].set_title("Dispproval rating prediction over training set")
                a[1][0].legend()
                a[2][0].plot(x_axis, evaluation[:, 2], label='neutral')
                a[2][0].plot(x_axis, self.train_y[:, 2], label='neutral actual')
                a[2][0].set_title("Neutral rating prediction over training set")
                a[2][0].legend()

                a[0][1].plot(epoch_list, train_loss_epochs)
                a[0][1].set_title("Training loss vs. epochs")
                a[1][1].plot(epoch_list, val_loss_epochs)
                a[1][1].set_title("Validation loss vs. epochs")
                a[2][1].plot(epoch_list, test_loss_epochs)
                a[2][1].set_title("Test loss vs. epochs")

                for axes in a:
                    axes[0].set_xlabel('training example number/idx')
                    axes[0].set_ylabel('rating (% represented as fraction)')
                    axes[1].set_xlabel('epoch number')
                    axes[1].set_ylabel('MSE loss')

                display.clear_output(wait=True)
                display.display(plt.gcf())

        print("training took {0:.1f} sec".format(time.time() - start))
Esempio n. 22
0
def build_graph(noise_shape, image_shape,
                G_progress_printer, D_progress_printer):
    '''
    The rest of the computational graph is mostly responsible for 
    coordinating the training algorithms and parameter updates, 
    which is particularly tricky with GANs for couple reasons.

    First, the discriminator must be used on both the real MNIST 
    images and fake images generated by the generator function. 
    One way to represent this in the computational graph is to 
    create a clone of the output of the discriminator function, 
    but with substituted inputs. Setting method=share in the 
    clone function ensures that both paths through the 
    discriminator model use the same set of parameters.

    Second, we need to update the parameters for the generator 
    and discriminator model separately using the gradients from 
    different loss functions. We can get the parameters for a 
    Function in the graph with the parameters attribute. However, 
    when updating the model parameters, update only the parameters 
    of the respective models while keeping the other parameters 
    unchanged. In other words, when updating the generator we will 
    update only the parameters of the  GG  function while keeping 
    the parameters of the  DD  function fixed and vice versa.
    '''
    input_dynamic_axes = [C.Axis.default_batch_axis()]
    Z = C.input_variable(noise_shape, dynamic_axes=input_dynamic_axes)
    X_real = C.input_variable(image_shape, dynamic_axes=input_dynamic_axes)
    X_real_scaled = 2*(X_real / 255.0) - 1.0

    # Create the model function for the generator and discriminator models
    X_fake = generator(Z)
    D_real = discriminator(X_real_scaled)
    D_fake = D_real.clone(
        method = 'share',
        substitutions = {X_real_scaled.output: X_fake.output}
    )

    # Create loss functions and configure optimazation algorithms
    G_loss = 1.0 - C.log(D_fake)
    D_loss = -(C.log(D_real) + C.log(1.0 - D_fake))

    G_learner = C.fsadagrad(
        parameters = X_fake.parameters,
        lr = C.learning_parameter_schedule_per_sample(lr),
        momentum = C.momentum_schedule_per_sample(0.9985724484938566)
    )
    D_learner = C.fsadagrad(
        parameters = D_real.parameters,
        lr = C.learning_parameter_schedule_per_sample(lr),
        momentum = C.momentum_schedule_per_sample(0.9985724484938566)
    )

    # Instantiate the trainers
    G_trainer = C.Trainer(
        X_fake,
        (G_loss, None),
        G_learner,
        G_progress_printer
    )
    D_trainer = C.Trainer(
        D_real,
        (D_loss, None),
        D_learner,
        D_progress_printer
    )

    return X_real, X_fake, Z, G_trainer, D_trainer
Esempio n. 23
0
def train(train_reader, valid_reader, vocab, i2w, s2smodel, max_epochs,
          epoch_size):
    model_train = create_model_train(s2smodel)
    criterion = create_criterion_function(model_train)
    # also wire in a greedy decoder so that we can properly log progress on a validation example
    # This is not used for the actual training process.
    model_greedy = create_model_test(s2smodel)
    # Instantiate the trainer object to drive the model training
    minibatch_size = 72
    lr = 0.001 if use_attention else 0.005
    learner = C.fsadagrad(
        model_train.parameters,
        #apply the learning rate as if it is a minibatch of size 1
        lr=C.learning_parameter_schedule_per_sample([lr] * 2 + [lr / 2] * 3 +
                                                    [lr / 4], epoch_size),
        momentum=C.momentum_schedule(0.9366416204111472,
                                     minibatch_size=minibatch_size),
        gradient_clipping_threshold_per_sample=2.3,
        gradient_clipping_with_truncation=True)
    trainer = C.Trainer(None, criterion, learner)

    # records
    total_samples = 0
    mbs = 0
    eval_freq = 100

    # print out some useful training information
    C.logging.log_number_of_parameters(model_train)
    print()
    progress_printer = C.logging.ProgressPrinter(freq=30, tag='Training')

    # a hack to allow us to print sparse vectors
    sparse_to_dense = create_sparse_to_dense(input_vocab_dim)

    for epoch in range(max_epochs):
        while total_samples < (epoch + 1) * epoch_size:
            # get next minibatch of training data
            mb_train = train_reader.next_minibatch(minibatch_size)

            # do the training
            trainer.train_minibatch({
                criterion.arguments[0]:
                mb_train[train_reader.streams.features],
                criterion.arguments[1]:
                mb_train[train_reader.streams.labels]
            })

            progress_printer.update_with_trainer(
                trainer, with_metric=True)  # log progress

            # every N MBs evaluate on a test sequence to visually show how we're doing
            if mbs % eval_freq == 0:
                mb_valid = valid_reader.next_minibatch(1)

                # run an eval on the decoder output model (i.e. don't use the groundtruth)
                e = model_greedy(mb_valid[valid_reader.streams.features])
                print(
                    format_sequences(
                        sparse_to_dense(
                            mb_valid[valid_reader.streams.features]), i2w))
                print("->")
                print(format_sequences(e, i2w))

                # visualizing attention window
                if use_attention:
                    debug_attention(model_greedy,
                                    mb_valid[valid_reader.streams.features])

            total_samples += mb_train[train_reader.streams.labels].num_samples
            mbs += 1

        # log a summary of the stats for the epoch
        progress_printer.epoch_summary(with_metric=True)

    # done: save the final model
    model_path = "model_%d.cmf" % epoch
    print("Saving final model to '%s'" % model_path)
    s2smodel.save(model_path)
    print("%d epochs complete." % max_epochs)
# is specified as the model of the label input
l = C.input_variable(1, dynamic_axes=z.dynamic_axes, name="y")

# the learning rate
learning_rate = 0.005
lr_schedule = C.learning_parameter_schedule(learning_rate)

# loss function
loss = C.squared_error(z, l)

# use squared error to determine error for now
error = C.squared_error(z, l)

# use adam optimizer
momentum_schedule = C.momentum_schedule(0.9, minibatch_size=BATCH_SIZE)
learner = C.fsadagrad(z.parameters, lr=lr_schedule, momentum=momentum_schedule)
trainer = C.Trainer(z, (loss, error), [learner])

# training
loss_summary = []

# time to start training
start = time.time()
for epoch in range(0, EPOCHS):
    for x_batch, l_batch in next_batch(X, Y, "train"):
        trainer.train_minibatch({x: x_batch, l: l_batch})

    if epoch % (EPOCHS / 10) == 0:
        training_loss = trainer.previous_minibatch_loss_average
        loss_summary.append(training_loss)
        print("epoch: {}, loss: {:.4f}".format(epoch, training_loss))
Esempio n. 25
0
net = create_model(input/255.0)

# loss and error calculations.
target_normalized = target/255.0
loss = -(target_normalized * C.log(net) + (1 - target_normalized) * C.log(1 - net))
label_error  = C.classification_error(net, target_normalized)

# Instantiate the trainer object to drive the model training
lr_per_sample = [0.0001]
learning_rate_schedule = C.learning_rate_schedule(lr_per_sample, C.UnitType.sample, epoch_size=int(num_training_samples/2.0))

# Momentum
momentum_as_time_constant = C.momentum_as_time_constant_schedule(200)

# Define the learner
learner = C.fsadagrad(net.parameters, lr=learning_rate_schedule, momentum=momentum_as_time_constant)

# Instantiate the trainer
progress_printer = C.logging.ProgressPrinter(0)
train_op = C.Trainer(net, (loss, label_error), learner, progress_printer)


###############################
########## Training ###########
###############################

# Plot data dictionary.
plotdata = {"iteration":[], "loss":[], "error":[]}

# Initialize the parameters for the trainer
num_iterations = (num_training_samples * num_epochs) / batch_size
Esempio n. 26
0
def train(i2w,
          data_path,
          model_path,
          log_file,
          config_file,
          restore=True,
          profiling=False,
          gen_heartbeat=False):
    polymath = PolyMath(config_file)
    z, loss = polymath.model()
    training_config = importlib.import_module(config_file).training_config
    max_epochs = training_config['max_epochs']
    log_freq = training_config['log_freq']

    progress_writers = [
        C.logging.ProgressPrinter(num_epochs=max_epochs,
                                  freq=log_freq,
                                  tag='Training',
                                  log_to_file=log_file,
                                  metric_is_pct=False,
                                  rank=C.Communicator.rank(),
                                  gen_heartbeat=gen_heartbeat)
    ]

    lr = C.learning_parameter_schedule(training_config['lr'],
                                       minibatch_size=None,
                                       epoch_size=None)

    ema = {}
    dummies = []
    for p in z.parameters:
        ema_p = C.constant(0,
                           shape=p.shape,
                           dtype=p.dtype,
                           name='ema_%s' % p.uid)
        ema[p.uid] = ema_p
        dummies.append(C.reduce_sum(C.assign(ema_p,
                                             0.999 * ema_p + 0.001 * p)))
    dummy = C.combine(dummies)

    #    learner = C.adadelta(z.parameters, lr)
    learner = C.fsadagrad(
        z.parameters,
        #apply the learning rate as if it is a minibatch of size 1
        lr,
        momentum=C.momentum_schedule(
            0.9366416204111472,
            minibatch_size=training_config['minibatch_size']),
        gradient_clipping_threshold_per_sample=2.3,
        gradient_clipping_with_truncation=True)
    if C.Communicator.num_workers() > 1:
        learner = C.data_parallel_distributed_learner(learner)

    trainer = C.Trainer(z, loss, learner, progress_writers)

    if profiling:
        C.debugging.start_profiler(sync_gpu=True)

    train_data_file = os.path.join(data_path, training_config['train_data'])
    train_data_ext = os.path.splitext(train_data_file)[-1].lower()

    model_file = os.path.join(model_path, model_name)
    model = C.combine(z.outputs + loss.outputs)  #this is for validation only

    epoch_stat = {'best_val_err': 1000, 'best_since': 0, 'val_since': 0}
    print(restore, os.path.isfile(model_file))
    #    if restore and os.path.isfile(model_file):
    if restore and os.path.isfile(model_file):
        z.restore(model_file)
        #after restore always re-evaluate
        #TODO replace with rougel with external script(possibly)
        #epoch_stat['best_val_err'] = validate_model(i2w, os.path.join(data_path, training_config['val_data']), model, polymath)

    def post_epoch_work(epoch_stat):
        trainer.summarize_training_progress()
        epoch_stat['val_since'] += 1

        if epoch_stat['val_since'] == training_config['val_interval']:
            epoch_stat['val_since'] = 0
            temp = dict((p.uid, p.value) for p in z.parameters)
            for p in trainer.model.parameters:
                p.value = ema[p.uid].value
            #TODO replace with rougel with external script(possibly)
            val_err = validate_model(
                i2w, os.path.join(data_path, training_config['val_data']),
                model, polymath)
            #if epoch_stat['best_val_err'] > val_err:
            #    epoch_stat['best_val_err'] = val_err
            #    epoch_stat['best_since'] = 0
            #    trainer.save_checkpoint(model_file)
            #    for p in trainer.model.parameters:
            #        p.value = temp[p.uid]
            #else:
            #    epoch_stat['best_since'] += 1
            #    if epoch_stat['best_since'] > training_config['stop_after']:
            #        return False
            z.save(model_file)
            epoch_stat['best_since'] += 1
            if epoch_stat['best_since'] > training_config['stop_after']:
                return False
        if profiling:
            C.debugging.enable_profiler()

        return True

    init_pointer_importance = polymath.pointer_importance
    if train_data_ext == '.ctf':
        mb_source, input_map = create_mb_and_map(loss, train_data_file,
                                                 polymath)

        minibatch_size = training_config['minibatch_size']  # number of samples
        epoch_size = training_config['epoch_size']

        for epoch in range(max_epochs):
            num_seq = 0
            while True:
                if trainer.total_number_of_samples_seen >= training_config[
                        'distributed_after']:
                    data = mb_source.next_minibatch(
                        minibatch_size * C.Communicator.num_workers(),
                        input_map=input_map,
                        num_data_partitions=C.Communicator.num_workers(),
                        partition_index=C.Communicator.rank())
                else:
                    data = mb_source.next_minibatch(minibatch_size,
                                                    input_map=input_map)
                trainer.train_minibatch(data)
                num_seq += trainer.previous_minibatch_sample_count
                dummy.eval()
                if num_seq >= epoch_size:
                    break
            if not post_epoch_work(epoch_stat):
                break
            print('Before Pointer_importance:', polymath.pointer_importance)
            if polymath.pointer_importance > 0.1 * init_pointer_importance:
                polymath.pointer_importance = polymath.pointer_importance * 0.9
                print('Pointer_importance:', polymath.pointer_importance)
    else:
        if train_data_ext != '.tsv':
            raise Exception("Unsupported format")

        minibatch_seqs = training_config[
            'minibatch_seqs']  # number of sequences

        for epoch in range(max_epochs):  # loop over epochs
            tsv_reader = create_tsv_reader(loss, train_data_file, polymath,
                                           minibatch_seqs,
                                           C.Communicator.num_workers())
            minibatch_count = 0
            for data in tsv_reader:
                if (minibatch_count %
                        C.Communicator.num_workers()) == C.Communicator.rank():
                    trainer.train_minibatch(data)  # update model with it
                    dummy.eval()
                minibatch_count += 1
            if not post_epoch_work(epoch_stat):
                break

    if profiling:
        C.debugging.stop_profiler()
    def forecast(self):

        # Mapping each letter to number between 0-1
        alpha_to_num_step = float(1 / self.alphabet_size)
        alpha_to_num_shift = float(alpha_to_num_step / 2)

        # Dict = [floor, point, celling]
        alpha_to_num = dict()
        for i in range(self.alphabet_size):
            step = (alpha_to_num_step * i)
            alpha_to_num[chr(97 + i)] = [
                step, step + alpha_to_num_shift, step + alpha_to_num_step
            ]

        x, y, last_sax_word = self._prepare_data(alpha_to_num)

        response = {
            "last_sax_word": "Fail",
            "forecast_sax_letter": "Fail",
            "position_in_sax_interval": -1
        }

        if not x or not y or not last_sax_word:
            error_msg = "Error while preparing data!"
            log.error(error_msg)
            response["error"] = error_msg
            return response

        # Trying to optimize settings for training.
        # Forbidding heavy training (max 100k input rows from CSV or Financial data)
        batch_size = self.window_len * self.word_len
        if batch_size != 1152:
            batch_size = 1152
        h_dims = self.word_len + 1

        epochs = 100
        if len(x["train"]) > 200000:
            error_msg = "Configured data set too large (max: 200k): {}".format(
                len(x["train"]))
            log.error(error_msg)
            response["error"] = error_msg
            return response
        if len(x["train"]) < 100000:
            epochs = 250
        if len(x["train"]) < 40000:
            epochs = 500
        if len(x["train"]) < 20000:
            epochs = 1000

        log.debug("Training Info:")
        log.debug("len(x[train]): {}".format(len(x["train"])))
        log.debug("Epochs       : {}".format(epochs))
        log.debug("Batch Size   : {}".format(batch_size))

        if x and y:
            input_node = C.sequence.input_variable(1)
            z = self._create_model(input_node, h_dims)
            var_l = C.input_variable(1, dynamic_axes=z.dynamic_axes, name="y")
            learning_rate = 0.005
            lr_schedule = C.learning_parameter_schedule(learning_rate)
            loss = C.squared_error(z, var_l)
            error = C.squared_error(z, var_l)
            momentum_schedule = C.momentum_schedule(0.9,
                                                    minibatch_size=batch_size)
            learner = C.fsadagrad(z.parameters,
                                  lr=lr_schedule,
                                  momentum=momentum_schedule)
            trainer = C.Trainer(z, (loss, error), [learner])

            # training
            loss_summary = []

            start = time.time()
            for epoch in range(0, epochs):
                for x_batch, l_batch in self._next_batch(
                        x, y, "train", batch_size):
                    trainer.train_minibatch({
                        input_node: x_batch,
                        var_l: l_batch
                    })

                if epoch % (epochs / 10) == 0:
                    training_loss = trainer.previous_minibatch_loss_average
                    loss_summary.append(training_loss)
                    log.debug("epoch: {}, loss: {:.4f} [time: {:.1f}s]".format(
                        epoch, training_loss,
                        time.time() - start))

            pred, position_in_sax_interval = self._get_pred(
                z, input_node, last_sax_word, alpha_to_num)
            forecast_sax_letter = self._get_letter(pred, alpha_to_num)
            log.debug("================= PRED =====================")
            log.debug("last_sax_word           : {}".format(last_sax_word))
            log.debug("pred                    : {}".format(pred))
            log.debug(
                "forecast_sax_letter     : {}".format(forecast_sax_letter))
            log.debug("position_in_sax_interval: {}".format(
                position_in_sax_interval))
            log.debug("============================================")

            response["last_sax_word"] = last_sax_word
            response["forecast_sax_letter"] = forecast_sax_letter
            response["position_in_sax_interval"] = position_in_sax_interval
        else:
            error_msg = "X and/or Y with no length: {} and {}".format(
                len(x), len(y))
            log.error(error_msg)
            response["error"] = error_msg
        return response
Esempio n. 28
0
def test_learner_init_legacy():
    i = C.input_variable(shape=(1,), needs_gradient=True, name='a')
    w = parameter(shape=(1,))

    res = i * w

    # for backcompatibility test
    # this will be deprecated in future version
    learner = sgd(res.parameters, lr=learning_rate_schedule(0.1, UnitType.sample))
    assert learner._learning_rate_schedule.minibatch_size == 1  # the deprecated per sample schedule should not use compatible mode
    assert learner.learning_rate() == 0.1

    # for backcompatibility test
    # this will be deprecated in future version
    # The UnitType will provide per minibatch instruction for the learner
    # this will be deprecated in future version
    learner = sgd(res.parameters, lr=learning_rate_schedule(0.1, UnitType.minibatch))
    assert learner.is_compatible_mode() == False
    assert learner.learning_rate() == 0.1
    assert learner.minibatch_size == C.learners.IGNORE
    assert learner._learning_rate_schedule.minibatch_size == 0

    # for backcompatibility test, in reset learning rate, the learner won't receive the reference minibatch size from the schedule
    # user will need to specify the reference minibatch size explicitly
    # this will be deprecated in future version
    learner = sgd(res.parameters, lr=0.1)
    learner.reset_learning_rate(learning_rate_schedule([1, 2, 3], UnitType.minibatch))
    assert learner.learning_rate() == 1.0
    learner.minibatch_size = C.learners.IGNORE  # reset to be per minibatch
    assert learner.minibatch_size == C.learners.IGNORE
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE
    assert learner.is_compatible_mode() == True

    # for backcompatibility test
    # this will be deprecated in future version
    learner = sgd(res.parameters, lr=learning_rate_schedule(0.1, UnitType.sample), minibatch_size=C.learners.IGNORE)
    assert learner.is_compatible_mode() == True
    assert learner.learning_rate() == 0.1
    assert learner.minibatch_size == C.learners.IGNORE  # the learner's reference minibatch size is still 0

    # this will be deprecated in future version: This is logical invalid combination but it was the only way to use mean gradient and set learning rate in the past.
    learner = sgd(res.parameters, lr=learning_rate_schedule(0.1, UnitType.sample), use_mean_gradient=True)
    assert learner.is_compatible_mode() == True
    assert learner.learning_rate() == 0.1
    #test the override in the new version
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE
    assert learner.minibatch_size == C.learners.IGNORE  # the learner's reference minibatch size is still 0


    # for backcompatibility test
    # this will be deprecated in future version
    # The UnitType will provide per minibatch instruction for the learner
    # this will be deprecated in future version
    learner = sgd(res.parameters, lr=learning_rate_schedule(0.1, UnitType.minibatch), minibatch_size=C.learners.IGNORE)
    assert learner.is_compatible_mode() == True
    assert learner.learning_rate() == 0.1
    assert learner.minibatch_size == C.learners.IGNORE
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE

    # for backcompatibility test, in reset learning rate, the learner won't receive the reference minibatch size from the schedule
    # user will need to specify the reference minibatch size explicitly
    # this will be deprecated in future version
    learner = sgd(res.parameters, lr=0.1)
    learner.reset_learning_rate(learning_rate_schedule([1, 2, 3], UnitType.minibatch))
    assert learner.learning_rate() == 1.0
    learner.minibatch_size = C.learners.IGNORE  # reset to be per minibatch
    assert learner.minibatch_size == C.learners.IGNORE
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE
    assert learner.is_compatible_mode() == True

    learner_parameter = learner.parameters
    from cntk.variables import Parameter
    param = learner_parameter[0]
    assert isinstance(param, Parameter)

    unit_gain_value = C.default_unit_gain_value()
    assert unit_gain_value

    # back compatible API test
    momentum_time_constant = C.momentum_as_time_constant_schedule(1100)
    lr_per_sample = learning_parameter_schedule(0.1, minibatch_size=1)
    C.momentum_sgd(res.parameters, lr_per_sample, momentum_time_constant)
    C.momentum_sgd(res.parameters, lr_per_sample, momentum_time_constant, unit_gain_value)
    C.momentum_sgd(res.parameters, lr_per_sample, momentum_time_constant, unit_gain=unit_gain_value)

    C.set_default_unit_gain_value(False)
    unit_gain_value = C.default_unit_gain_value()
    assert not unit_gain_value

    C.set_default_unit_gain_value(True)
    unit_gain_value = C.default_unit_gain_value()
    assert unit_gain_value

    lr_per_sample = learning_rate_schedule([(3, 0.1), (2, 0.2), (1, 0.3)], unit=UnitType.sample)
    C.fsadagrad(res.parameters, lr=lr_per_sample, momentum=momentum_time_constant)
    C.fsadagrad(res.parameters, lr_per_sample, momentum_time_constant, unit_gain_value)
    C.fsadagrad(res.parameters, lr=lr_per_sample, momentum=momentum_time_constant, unit_gain=unit_gain_value)

    gamma, inc, dec, max, min = [0.5, 1.2, 0.7, 10, 1e-8]
    lr_per_sample = learning_rate_schedule([0.1, 0.2], unit=UnitType.sample, epoch_size=100)
    C.rmsprop(res.parameters, lr_per_sample, gamma, inc, dec, max, min, True)

    C.adadelta(res.parameters, lr_per_sample, use_mean_gradient=True)
Esempio n. 29
0
def main():
    window_len = int(input("window_len: "))
    word_len = int(input("word_len: "))
    alphabet_len = int(input("alphabet_len: "))

    epochs = input("Epochs: ")
    if not epochs == "":
        epochs = int(epochs)
    else:
        epochs = 100

    # batch_size = window_len * (word_len - 1)
    batch_size = int(input("Batch size: "))
    h_dims = word_len + 1

    alpha_to_num_step = float(1 / alphabet_len)
    alpha_to_num_shift = float(alpha_to_num_step / 2)

    # Dict = [floor, point, celling]
    alpha_to_num = dict()
    for i in range(alphabet_len):
        step = (alpha_to_num_step * i)
        alpha_to_num[chr(97 + i)] = [
            step, step + alpha_to_num_shift, step + alpha_to_num_step
        ]

    model_file = "{}_{}_{}_{}.model".format(window_len, word_len, alphabet_len,
                                            epochs)
    opt_model_file = input("Change model name [{}]? ".format(model_file))
    if opt_model_file != "" and opt_model_file != "n":
        model_file = opt_model_file

    x, y = prepare_data(window_len, word_len, alphabet_len, alpha_to_num)

    if input("Training? ") == "y":
        input_node = C.sequence.input_variable(1)
        z = create_model(input_node, h_dims)
        var_l = C.input_variable(1, dynamic_axes=z.dynamic_axes, name="y")
        learning_rate = 0.005
        lr_schedule = C.learning_parameter_schedule(learning_rate)
        loss = C.squared_error(z, var_l)
        error = C.squared_error(z, var_l)
        momentum_schedule = C.momentum_schedule(0.9, minibatch_size=batch_size)
        learner = C.fsadagrad(z.parameters,
                              lr=lr_schedule,
                              momentum=momentum_schedule)
        trainer = C.Trainer(z, (loss, error), [learner])

        # training
        loss_summary = []

        start = time.time()
        for epoch in range(0, epochs):
            for x_batch, l_batch in next_batch(x, y, "train", batch_size):
                trainer.train_minibatch({input_node: x_batch, var_l: l_batch})

            if epoch % (epochs / 10) == 0:
                training_loss = trainer.previous_minibatch_loss_average
                loss_summary.append(training_loss)
                print("epoch: {}, loss: {:.4f} [time: {:.1f}s]".format(
                    epoch, training_loss,
                    time.time() - start))

        print("Training took {:.1f} sec".format(time.time() - start))

        # Print the train, validation and test errors
        for label_txt in ["train", "val", "test"]:
            if label_txt in x:
                print("mse for {}: {:.6f}".format(
                    label_txt,
                    get_mse(trainer, input_node, x, y, batch_size, var_l,
                            label_txt)))
        z.save(model_file)
    else:
        z = C.load_model(model_file)
        input_node = C.logging.find_all_with_name(z, "")[-1]

    # Print out all layers in the model
    print("Loading {} and printing all nodes:".format(model_file))
    node_outputs = C.logging.find_all_with_name(z, "")
    for n in node_outputs:
        print("  {}".format(n))

    # predict
    last_p_result = []
    last_p_y = []
    for j, ds in enumerate(["val", "test"]):
        if ds in x:
            fig = plt.figure()
            chart = fig.add_subplot(2, 1, 1)
            results = []
            for x_batch, _ in next_batch(x, y, ds, batch_size):
                pred = z.eval({input_node: x_batch})
                results.extend(pred[:, 0])

            if results:
                print("LAST_PRED({}): {}".format(ds, results[-1]))

            last_p_y = []
            for idx, i in enumerate(y[ds]):
                if (idx + 1) % (word_len + 1) == 0:
                    last_p_y.append(i)

            chart.plot(np.array(last_p_y).flatten(), label=ds + " raw")

            last_p_result = []
            for idx, i in enumerate(results):
                if (idx + 1) % (word_len + 1) == 0:
                    alpha_list = sorted(alpha_to_num)
                    a = "a"
                    for a in alpha_list[::-1]:
                        if i >= alpha_to_num[a][0]:
                            break
                    last_p_result.append(alpha_to_num[a][1])

            chart.plot(np.array(last_p_result), label=ds + " pred")
            chart.legend()

            fig.savefig("{}_chart_{}_epochs.jpg".format(ds, epochs))

            correct_pred = dict()
            for idx, _ in enumerate(last_p_y):
                print("{}: {} == {} ({})".format(
                    idx, last_p_result[idx], float(last_p_y[idx][0]),
                    last_p_result[idx] - float(last_p_y[idx][0])))
                alpha_list = sorted(alpha_to_num)
                for pred_a in alpha_list[::-1]:
                    if last_p_result[idx] >= alpha_to_num[pred_a][0]:
                        pred_l_num = ord(pred_a)
                        for y_a in alpha_list[::-1]:
                            if float(last_p_y[idx][0]) >= alpha_to_num[y_a][0]:
                                stp = abs(ord(y_a) - pred_l_num)
                                print("stp: ", stp)
                                if stp not in correct_pred:
                                    correct_pred[stp] = 1
                                else:
                                    correct_pred[stp] += 1
                                break
                        break

            for k, v in correct_pred.items():
                print("Set({}) Delta[{}]: {}/{} = {:.4f}".format(
                    ds, k, v, len(last_p_y), float(v / len(last_p_y))))
            print("len(last_p_y): ", len(last_p_y))
            print("len(last_p_result): ", len(last_p_result))

    word_pred = input("Word to get pred (cedcaadc): ")
    if word_pred == "":
        word_pred = "cedcaadc"
    r, perc = get_pred(z, input_node, word_pred, window_len, alpha_to_num)
    print("================= PRED =====================")
    print("r = ", r)
    print("perc = ", perc)
    print("============================================")

    for k, v in alpha_to_num.items():
        print(k, v)

    return x, y, last_p_result, last_p_y
Esempio n. 30
0
def train_and_test(reader_train, reader_test, model_func):

    ###############################################
    # Training the model
    ###############################################

    # Instantiate the input and the label variables
    input = C.input_variable(input_dim)
    label = C.input_variable(input_dim)

    # Create the model function
    model = model_func(input)

    # The labels for this network is same as the input MNIST image.
    # Note: Inside the model we are scaling the input to 0-1 range
    # Hence we rescale the label to the same range
    # We show how one can use their custom loss function
    # loss = -(y* log(p)+ (1-y) * log(1-p)) where p = model output and y = target
    # We have normalized the input between 0-1. Hence we scale the target to same range

    target = label / 255.0
    loss = -(target * C.log(model) + (1 - target) * C.log(1 - model))
    label_error = C.classification_error(model, target)

    # training config
    epoch_size = 30000  # 30000 samples is half the dataset size
    minibatch_size = 64
    num_sweeps_to_train_with = 5 if isFast else 100
    num_samples_per_sweep = 60000
    num_minibatches_to_train = (num_samples_per_sweep *
                                num_sweeps_to_train_with) // minibatch_size

    # Instantiate the trainer object to drive the model training
    lr_per_sample = [0.00003]
    lr_schedule = C.learning_parameter_schedule_per_sample(
        lr_per_sample, epoch_size)

    # Momentum which is applied on every minibatch_size = 64 samples
    momentum_schedule = C.momentum_schedule(0.9126265014311797, minibatch_size)

    # We use a variant of the Adam optimizer which is known to work well on this dataset
    # Feel free to try other optimizers from
    # https://www.cntk.ai/pythondocs/cntk.learner.html#module-cntk.learner
    learner = C.fsadagrad(model.parameters,
                          lr=lr_schedule,
                          momentum=momentum_schedule)

    # Instantiate the trainer
    progress_printer = C.logging.ProgressPrinter(0)
    trainer = C.Trainer(model, (loss, label_error), learner, progress_printer)

    # Map the data streams to the input and labels.
    # Note: for autoencoders input == label
    input_map = {
        input: reader_train.streams.features,
        label: reader_train.streams.features
    }

    aggregate_metric = 0
    for i in range(num_minibatches_to_train):
        # Read a mini batch from the training data file
        data = reader_train.next_minibatch(minibatch_size, input_map=input_map)

        # Run the trainer on and perform model training
        trainer.train_minibatch(data)
        samples = trainer.previous_minibatch_sample_count
        aggregate_metric += trainer.previous_minibatch_evaluation_average * samples

    train_error = (aggregate_metric *
                   100.0) / (trainer.total_number_of_samples_seen)
    print("Average training error: {0:0.2f}%".format(train_error))

    #############################################################################
    # Testing the model
    # Note: we use a test file reader to read data different from a training data
    #############################################################################

    # Test data for trained model
    test_minibatch_size = 32
    num_samples = 10000
    num_minibatches_to_test = num_samples / test_minibatch_size

    # Test error metric calculation
    metric_numer = 0
    metric_denom = 0

    test_input_map = {
        input: reader_test.streams.features,
        label: reader_test.streams.features
    }

    for i in range(0, int(num_minibatches_to_test)):

        # We are loading test data in batches specified by test_minibatch_size
        # Each data point in the minibatch is a MNIST digit image of 784 dimensions
        # with one pixel per dimension that we will encode / decode with the
        # trained model.
        data = reader_test.next_minibatch(test_minibatch_size,
                                          input_map=test_input_map)

        # Specify the mapping of input variables in the model to actual
        # minibatch data to be tested with
        eval_error = trainer.test_minibatch(data)

        # minibatch data to be trained with
        metric_numer += np.abs(eval_error * test_minibatch_size)
        metric_denom += test_minibatch_size

    # Average of evaluation errors of all test minibatches
    test_error = (metric_numer * 100.0) / (metric_denom)
    print("Average test error: {0:0.2f}%".format(test_error))

    return model, train_error, test_error
Esempio n. 31
0
def test_learner_init():
    i = C.input_variable(shape=(1,), needs_gradient=True, name='a')
    w = parameter(shape=(1,))

    res = i * w

    #test new API: learning_parameter_schedule

    #explicitly specify reference minibatch size and learning rate is in number:
    learner = sgd(res.parameters, lr=0.1, minibatch_size = 25)
    assert learner.is_compatible_mode() == False
    assert learner.minibatch_size == 25 #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == 25
    assert learner.learning_rate() == 0.1

    #no explicitly specification of reference minibatch size and learning rate is in number:
    learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1))
    assert learner.is_compatible_mode() == False
    assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE
    assert learner.learning_rate() == 0.1


    learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1, 20), minibatch_size = 25)
    assert learner.is_compatible_mode() == False
    assert learner.minibatch_size == 25 #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == 20
    assert learner.learning_rate() == 0.1


    learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1, 20))
    assert learner.is_compatible_mode() == False
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == 20
    assert learner.learning_rate() == 0.1

    #no explicitly specification of reference minibatch size and learning rate is in number:
    learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1))
    assert learner.is_compatible_mode() == False
    assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE
    assert learner.learning_rate() == 0.1


    #no explicitly specification of reference minibatch size and learning rate is in number:
    learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1), minibatch_size=C.learners.IGNORE)
    assert learner.is_compatible_mode() == True
    assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE
    assert learner.learning_rate() == 0.1


    learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1, 20), minibatch_size=C.learners.IGNORE)
    assert learner.is_compatible_mode() == True
    assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == 20
    assert learner.learning_rate() == 0.1

    #no explicitly specification of reference minibatch size and learning rate is in number:
    learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1), minibatch_size=C.learners.IGNORE)
    assert learner.is_compatible_mode() == True
    assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE
    assert learner.learning_rate() == 0.1

    mysgd = C.sgd(parameters=res.parameters, lr=0.4, minibatch_size=32)
    assert mysgd.minibatch_size == 32
    assert mysgd._learning_rate_schedule.minibatch_size == 32
    assert mysgd.learning_rate() == 0.4

    mymomentum = C.momentum_sgd(parameters=res.parameters, lr=0.4, momentum=0.9, minibatch_size=32)
    assert mymomentum.minibatch_size == 32
    assert mymomentum._learning_rate_schedule.minibatch_size == 32
    assert mymomentum.learning_rate() == 0.4

    myadadelta = C.adadelta(parameters=res.parameters, lr=0.4, minibatch_size=32)
    assert myadadelta.minibatch_size == 32
    assert myadadelta._learning_rate_schedule.minibatch_size == 32
    assert myadadelta.learning_rate() == 0.4

    myadam = C.adam(parameters=res.parameters, lr=0.4, momentum=0.9, variance_momentum=0.9, minibatch_size=32)
    assert myadam.minibatch_size == 32
    assert myadam._learning_rate_schedule.minibatch_size == 32
    assert myadam.learning_rate() == 0.4

    myadagrad = C.adagrad(parameters=res.parameters, lr=0.4, minibatch_size=32)
    assert myadagrad.minibatch_size == 32
    assert myadagrad._learning_rate_schedule.minibatch_size == 32
    assert myadagrad.learning_rate() == 0.4

    myfsadagrad = C.fsadagrad(parameters=res.parameters, lr=0.4, momentum=0.9, variance_momentum=0.9,
                              minibatch_size=32)
    assert myfsadagrad.minibatch_size == 32
    assert myfsadagrad._learning_rate_schedule.minibatch_size == 32
    assert myfsadagrad.learning_rate() == 0.4

    mynesterov = C.nesterov(parameters=res.parameters, lr=0.4, momentum=0.9, minibatch_size=32)
    assert mynesterov.minibatch_size == 32
    assert mynesterov._learning_rate_schedule.minibatch_size == 32
    assert mynesterov.learning_rate() == 0.4

    myrmsrop = C.rmsprop(parameters=res.parameters, lr=0.4, gamma=0.5, inc=1.2, dec=0.7, max=10, min=1e-8,
                         minibatch_size=32)
    assert myrmsrop.minibatch_size == 32
    assert myrmsrop._learning_rate_schedule.minibatch_size == 32
    assert myrmsrop.learning_rate() == 0.4

    mysgd = C.sgd(parameters=res.parameters, lr=[0.4, 0.1, 0.001], minibatch_size=32, epoch_size=512)
    assert mysgd.minibatch_size == 32
    assert mysgd._learning_rate_schedule.minibatch_size == 32
    assert mysgd._learning_rate_schedule[0] == 0.4
    assert mysgd._learning_rate_schedule[512] == 0.1
    assert mysgd._learning_rate_schedule[512 * 2] == 0.001

    mymomentum = C.momentum_sgd(parameters=res.parameters, lr=[0.4, 0.1, 0.001], momentum=[0.9],
                                minibatch_size=32, epoch_size=512)
    assert mymomentum.minibatch_size == 32
    assert mymomentum._learning_rate_schedule.minibatch_size == 32
    assert mymomentum._learning_rate_schedule[0] == 0.4
    assert mymomentum._learning_rate_schedule[512] == 0.1
    assert mymomentum._learning_rate_schedule[512 * 2] == 0.001


    myadadelta = C.adadelta(parameters=res.parameters, lr=[0.4, 0.1, 0.001],
                            minibatch_size=32, epoch_size=512)
    assert myadadelta.minibatch_size == 32
    assert myadadelta._learning_rate_schedule.minibatch_size == 32
    assert myadadelta._learning_rate_schedule[0] == 0.4
    assert myadadelta._learning_rate_schedule[512] == 0.1
    assert myadadelta._learning_rate_schedule[512 * 2] == 0.001

    myadam = C.adam(parameters=res.parameters, lr=[0.4, 0.1, 0.001], momentum=[0.9, 0.1, 0.001], variance_momentum=[0.9],
                    minibatch_size=32, epoch_size=512)
    assert myadam.minibatch_size == 32
    assert myadam._learning_rate_schedule.minibatch_size == 32
    assert myadam._learning_rate_schedule[0] == 0.4
    assert myadam._learning_rate_schedule[512] == 0.1
    assert myadam._learning_rate_schedule[512 * 2] == 0.001

    myadagrad = C.adagrad(parameters=res.parameters, lr=[0.4, 0.1, 0.001], minibatch_size=32, epoch_size=512)
    assert myadagrad.minibatch_size == 32
    assert myadagrad._learning_rate_schedule.minibatch_size == 32
    assert myadagrad._learning_rate_schedule[0] == 0.4
    assert myadagrad._learning_rate_schedule[512] == 0.1
    assert myadagrad._learning_rate_schedule[512 * 2] == 0.001

    myfsadagrad = C.fsadagrad(parameters=res.parameters, lr=[0.4, 0.1, 0.001], momentum=[0.9],
                              variance_momentum=[0.9],
                              minibatch_size=32, epoch_size=512)
    assert myadagrad.minibatch_size == 32
    assert myadagrad._learning_rate_schedule.minibatch_size == 32
    assert myadagrad._learning_rate_schedule[0] == 0.4
    assert myadagrad._learning_rate_schedule[512] == 0.1
    assert myadagrad._learning_rate_schedule[512 * 2] == 0.001

    mynesterov = C.nesterov(parameters=res.parameters, lr=[0.4, 0.1, 0.001], momentum=[0.9],
                            minibatch_size=32, epoch_size=512)
    assert mynesterov.minibatch_size == 32
    assert mynesterov._learning_rate_schedule.minibatch_size == 32
    assert mynesterov._learning_rate_schedule[0] == 0.4
    assert mynesterov._learning_rate_schedule[512] == 0.1
    assert mynesterov._learning_rate_schedule[512 * 2] == 0.001

    myrmsrop = C.rmsprop(parameters=res.parameters, lr=[0.4, 0.1, 0.001], gamma=0.5, inc=1.2, dec=0.7, max=10,
                         min=1e-8,
                         minibatch_size=32, epoch_size=512)
    assert myrmsrop.minibatch_size == 32
    assert myrmsrop._learning_rate_schedule.minibatch_size == 32
    assert myrmsrop._learning_rate_schedule[0] == 0.4
    assert myrmsrop._learning_rate_schedule[512] == 0.1
    assert myrmsrop._learning_rate_schedule[512 * 2] == 0.001

    learner_parameter = learner.parameters
    from cntk.variables import Parameter
    param = learner_parameter[0]
    assert isinstance(param, Parameter)

    unit_gain_value = C.default_unit_gain_value()
    assert unit_gain_value

    momentum = C.momentum_schedule(0.999, minibatch_size=1)
    lr_per_sample = learning_parameter_schedule(0.1, minibatch_size = 1)
    C.momentum_sgd(res.parameters, lr_per_sample, momentum)
    C.momentum_sgd(res.parameters, lr_per_sample, momentum, unit_gain_value)
    C.momentum_sgd(res.parameters, lr_per_sample, momentum, unit_gain=unit_gain_value)

    C.set_default_unit_gain_value(False)
    unit_gain_value = C.default_unit_gain_value()
    assert not unit_gain_value

    lr_per_sample = learning_parameter_schedule([0.1, 0.2], minibatch_size = 1)
    C.nesterov(res.parameters, lr=lr_per_sample, momentum=momentum)
    C.nesterov(res.parameters, lr_per_sample, momentum, unit_gain_value)
    C.nesterov(res.parameters, lr=lr_per_sample, momentum=momentum, unit_gain=unit_gain_value)

    lr_per_sample = learning_parameter_schedule([0.1]*3 +[0.2]*2 +[0.3], minibatch_size=1)
    C.adagrad(res.parameters, lr=lr_per_sample, need_ave_multiplier=True)

    C.set_default_unit_gain_value(True)
    unit_gain_value = C.default_unit_gain_value()
    assert unit_gain_value

    lr_per_sample = learning_parameter_schedule([(3,0.1), (2, 0.2), (1, 0.3)], minibatch_size=1)
    C.fsadagrad(res.parameters, lr=lr_per_sample, momentum=momentum)
    C.fsadagrad(res.parameters, lr_per_sample, momentum, unit_gain_value)
    C.fsadagrad(res.parameters, lr=lr_per_sample, momentum=momentum, unit_gain=unit_gain_value)

    gamma, inc, dec, max, min = [0.5, 1.2, 0.7, 10, 1e-8]
    lr_per_sample = learning_parameter_schedule([0.1, 0.2], minibatch_size = 1, epoch_size = 100)
    C.rmsprop(res.parameters, lr_per_sample, gamma, inc, dec, max, min, True)

    C.adadelta(res.parameters, lr_per_sample)
Esempio n. 32
0
def main():
    # We keep upto 14 inputs from a day
    TIMESTEPS = int(input("TIMESTEPS: "))

    # 20000 is the maximum total output in our dataset. We normalize all values with
    # this so our inputs are between 0.0 and 1.0 range.
    NORMALIZE = int(input("NORMALIZE: "))

    # process batches of 10 days
    BATCH_SIZE = int(input("BATCH_SIZE: "))
    BATCH_SIZE_TEST = int(input("BATCH_SIZE_TEST: "))

    # Specify the internal-state dimensions of the LSTM cell
    H_DIMS = int(input("H_DIMS: "))

    data_source = input("Source(1=solar,2=local,3=sin,4=my): ")
    if data_source == "1" or data_source == "":
        X, Y = get_solar_old(TIMESTEPS, NORMALIZE)
    elif data_source == "2":
        X, Y = get_solar(TIMESTEPS, NORMALIZE)
    elif data_source == "3":
        X, Y = get_sin(5, 5, input("Data length: "))
    else:
        X, Y = get_my_data(H_DIMS, H_DIMS)

    epochs = input("Epochs: ")
    if epochs == "":
        EPOCHS = 100
    else:
        EPOCHS = int(epochs)

    start_time = time.time()

    # input sequences
    x = C.sequence.input_variable(1)

    model_file = "{}_epochs.model".format(EPOCHS)

    if not os.path.exists(model_file):
        print("Training model {}...".format(model_file))

        # create the model
        z = create_model(x, H_DIMS)

        # expected output (label), also the dynamic axes of the model output
        # is specified as the model of the label input
        var_l = C.input_variable(1, dynamic_axes=z.dynamic_axes, name="y")

        # the learning rate
        learning_rate = 0.005
        lr_schedule = C.learning_parameter_schedule(learning_rate)

        # loss function
        loss = C.squared_error(z, var_l)

        # use squared error to determine error for now
        error = C.squared_error(z, var_l)

        # use adam optimizer
        momentum_schedule = C.momentum_schedule(0.9, minibatch_size=BATCH_SIZE)
        learner = C.fsadagrad(z.parameters,
                              lr=lr_schedule,
                              momentum=momentum_schedule)
        trainer = C.Trainer(z, (loss, error), [learner])

        # training
        loss_summary = []

        start = time.time()
        for epoch in range(0, EPOCHS):
            for x_batch, l_batch in next_batch(X, Y, "train", BATCH_SIZE):
                trainer.train_minibatch({x: x_batch, var_l: l_batch})

            if epoch % (EPOCHS / 10) == 0:
                training_loss = trainer.previous_minibatch_loss_average
                loss_summary.append(training_loss)
                print("epoch: {}, loss: {:.4f}".format(epoch, training_loss))

        print("Training took {:.1f} sec".format(time.time() - start))

        # Print the train, validation and test errors
        for labeltxt in ["train", "val", "test"]:
            print("mse for {}: {:.6f}".format(
                labeltxt, get_mse(trainer, x, X, Y, BATCH_SIZE, var_l,
                                  labeltxt)))

        z.save(model_file)

    else:
        z = C.load_model(model_file)
        x = cntk.logging.find_all_with_name(z, "")[-1]

    # Print out all layers in the model
    print("Loading {} and printing all nodes:".format(model_file))
    node_outputs = cntk.logging.find_all_with_name(z, "")
    for n in node_outputs:
        print("  {}".format(n))

    # predict
    # f, a = plt.subplots(2, 1, figsize=(12, 8))
    for j, ds in enumerate(["val", "test"]):
        fig = plt.figure()
        a = fig.add_subplot(2, 1, 1)
        results = []
        for x_batch, y_batch in next_batch(X, Y, ds, BATCH_SIZE_TEST):
            pred = z.eval({x: x_batch})
            results.extend(pred[:, 0])
        # because we normalized the input data we need to multiply the prediction
        # with SCALER to get the real values.
        a.plot((Y[ds] * NORMALIZE).flatten(), label=ds + " raw")
        a.plot(np.array(results) * NORMALIZE, label=ds + " pred")
        a.legend()

        fig.savefig("{}_chart_{}_epochs.jpg".format(ds, EPOCHS))

    print("Delta: ", time.time() - start_time)
        (([0.2,0.4], 0, 5), [0.2]*5+[0.4]*20, 0),
        (([(3,0.2),(2,0.4),(1,0.8)], 0, 5), [0.2]*15+[0.4]*10+[0.8]*20, 0),
        ]

MOMENTUM_SCHEDULE_PARAMS = [
        ((0.2,), [0.2]),
        ((0.2,), [0.2, 0.2, 0.2, 0.2]),
        (([0.2,0.4], 5), [0.2]*5+[0.4]*20),
        (([(3,0.2),(2,0.4),(1,0.8)], 5), [0.2]*15+[0.4]*10+[0.8]*20),
        ]

LEARNER_LAMBDAS = [
    lambda params: C.adadelta(params),
    lambda params: C.adagrad(params, lr=learning_parameter_schedule(1)),
    lambda params: C.adam(params, lr=learning_parameter_schedule(1), momentum=C.momentum_schedule(0.9)),
    lambda params: C.fsadagrad(params, lr=learning_parameter_schedule(1), momentum=C.momentum_schedule(0.9)),
    lambda params: C.nesterov(params, lr=learning_parameter_schedule(1), momentum=C.momentum_schedule(0.9)),
    lambda params: C.rmsprop(params, lr=learning_parameter_schedule(1), gamma=0.1, inc=3.0, dec=0.1, max=np.inf, min=1e-8),
    lambda params: C.sgd(params, lr=learning_parameter_schedule(1)),
    lambda params: C.momentum_sgd(params, lr=learning_parameter_schedule(1), momentum=C.momentum_schedule(0.9))]

@pytest.mark.parametrize("params, expectation, minibatch_size", LR_SCHEDULE_PARAMS_LEGACY)
def test_learning_rate_schedule(params, expectation, minibatch_size):
    l = learning_rate_schedule(*params)
    assert l.minibatch_size == minibatch_size
    assert [l[i] for i in range(len(expectation))] == expectation

@pytest.mark.parametrize("params, expectation, minibatch_size", LR_SCHEDULE_PARAMS)
def test_learning_parameter_schedule(params, expectation, minibatch_size):
    l = learning_parameter_schedule(*params)
    assert l.minibatch_size == minibatch_size