コード例 #1
0
ファイル: nn.py プロジェクト: liocsm/dlef
def train_nn(

    # Hyper-Parameters
    dim_token=100,  # word embeding dimension
    lstm_layer_0_n=50,
    lstm_layer_1_n=50,
    ydim0=5,
    ydim1=6,

    #win_size = 3,

    #n_cueTypes = 4,
    n_vocb_words=15489,  # Vocabulary size
    #n_locDiffs = 111,  # Location difference size
    patience=10,  # Number of epoch to wait before early stop if no progress
    max_epochs=100,  # The maximum number of epoch to run
    #dispFreq=10,  # Display to stdout the training progress every N updates
    #decay_c=0.,  # Weight decay for the classifier applied to the U weights.
    lrate=0.01,  # Learning rate for sgd (not used for adadelta and rmsprop)
    dropout_p=1.0,
    adv_epsilon=0.001,
    optimizer=momentum,  # sgd, adadelta and rmsprop available, sgd very hard to use, not recommanded (probably need momentum and decaying learning rate).

    #maxlen=1000,  # Sequence longer then this get ignored
    batch_size=10,  # The batch size during training.
    #inter_cost_margin = 0.001,

    # Parameter for extra option
    #noise_std=0.,
    #use_dropout=True,  # if False slightly faster, but worst test error
    # This frequently need a bigger model.
    #reload_model=None,  # Path to a saved model we want to start from.
    #test_size=-1
):

    # Model options
    model_options = locals().copy()
    print('-------------------------------------------------------------')
    print("model options", model_options)
    print('-------------------------------------------------------------')

    #load_data, prepare_data = get_dataset(dataset)

    print('Loading data ... ... ...')
    train, valid, test = data.load_data(path='../mydata.pkl.gz',
                                        n_words=n_vocb_words)

    print('Building model ... ... ...')

    params = init_params(model_options,
                         Wemb_value=data.read_gz_file("../../matrix.pkl.gz"))

    tparams = init_tparams(params)

    (x, masks, y, f_pred_prob, f_pred, f_adv_pred_prob, f_adv_pred, cost,
     adv_cost) = build_model(tparams, model_options)

    #f_cost = theano.function([x[0], x[1], masks[0], masks[1], y], cost, name='f_cost')

    grads = tensor.grad(cost, wrt=list(tparams.values()))
    #f_grad = theano.function([x[0], x[1], masks[0], masks[1], y], grads, name='f_grad')

    adv_grads = tensor.grad(cost, wrt=tparams['Wemb'])
    f_adv_grad = theano.function([x[0], x[1], masks[0], masks[1], y],
                                 adv_grads,
                                 name='f_adv_grad')

    lr = tensor.scalar(name='lr')
    f_grad_shared, f_update = optimizer(lr, tparams, grads, x, masks, y, cost)

    print('training ... ... ...')

    kf_valid = my_get_minibatches_idx(len(valid[0]))
    kf_test = my_get_minibatches_idx(len(test[0]))

    print("%d train examples" % len(train[0]))
    print("%d valid examples" % len(valid[0]))
    print("%d test examples" % len(test[0]))

    #history_errs = []
    best_p = None
    bad_counter = 0
    stop_counter = 0

    #if validFreq == -1:
    #validFreq = len(train[0]) // batch_size
    #if saveFreq == -1:
    #saveFreq = len(train[0]) // batch_size

    last_ave_of_train_costs = numpy.inf
    costs_list = []

    uidx = 0  # the number of update done
    estop = False  # early stop
    #start_time = time.time()
    try:
        for eidx in range(max_epochs):
            n_samples = 0

            # Get new shuffled index for the training set.
            kf = my_get_minibatches_idx(len(train[0]), shuffle=True)

            #training_sum_costs = 0

            #ave_of_g_costs_sum = 0
            #ave_of_d_costs_sum = 0

            for train_batch_idx, train_index in kf:
                #uidx += 1
                #use_noise.set_value(1.)

                # Select the random examples for this minibatch
                x_0 = train[0][train_index]
                x_1 = train[1][train_index]

                y_0 = train[2][train_index]

                x_0, mask_0, _ = data.prepare_data(x_0)
                x_1, mask_1, _ = data.prepare_data(x_1)

                y_0 = numpy.asarray(y_0, dtype='int32')

                #print(y_0)
                #print(type(y_0))
                #print(y_0.ndim)

                cost = f_grad_shared(x_0, x_1, mask_0, mask_1, y_0)
                costs_list.append(cost)
                f_update(lrate)

                cur_adv_grad = f_adv_grad(x_0, x_1, mask_0, mask_1, y_0)
                tparams['p_Wemb'] = adv_epsilon * cur_adv_grad / (
                    tensor.sqrt(cur_adv_grad**2 + 1e-4))

                if train_batch_idx % 100 == 0 or train_batch_idx == len(
                        kf) - 1:
                    print("---Now %d/%d training bacthes @ epoch = %d" %
                          (train_batch_idx, len(kf), eidx))

            cur_ave_of_train_costs = sum(costs_list) / len(costs_list)
            print("cur_ave_of_train_costs = ", cur_ave_of_train_costs,
                  "@ epoch = ", eidx)

            if numpy.isnan(cur_ave_of_train_costs) or numpy.isinf(
                    cur_ave_of_train_costs):
                print('bad cost detected: ', cur_ave_of_train_costs)
                print('End of Program')
                break

            print('outputing predicted labels of test set ... ... ...')
            output_pred_labels(model_options,
                               f_pred,
                               f_pred_prob,
                               data.prepare_data,
                               test,
                               kf_test,
                               verbose=False,
                               path="test_pred_labels.txt")

            if cur_ave_of_train_costs >= last_ave_of_train_costs * 0.9:
                stop_counter += 1

            last_ave_of_train_costs = cur_ave_of_train_costs

            print('counter for early stopping : %d/%d' %
                  (stop_counter, patience))
            print('learning rate in this epoch = ', lrate)
            print('--------------------------------------------------')

            del costs_list[:]

            if stop_counter >= patience:
                print('Early Stop!')
                estop = True
                break

            if estop:
                break

    except KeyboardInterrupt:
        print("Training interupted")
コード例 #2
0
ファイル: nn.py プロジェクト: qz011/event_factuality
def train_cnn(

    # Hyper-Parameters
    dim_token=100,  # word embeding dimension
    dim_locDiff=10,  # location difference dimension
    dim_cueType=10,  #
    cnn_n1=50,
    n2=10 + 10 + 100,
    ydim0=3,
    ydim1=3,
    #win_size = 3,

    #maxTokens1 = 60, # maximum tokens in sentence 1
    n_cueTypes=5,
    n_words=4000,  # Vocabulary size
    n_locDiffs=108,  # Location difference size
    patience=10,  # Number of epoch to wait before early stop if no progress
    max_epochs=300,  # The maximum number of epoch to run
    #dispFreq=10,  # Display to stdout the training progress every N updates
    #decay_c=0.,  # Weight decay for the classifier applied to the U weights.
    lrate=0.01,  # Learning rate for sgd (not used for adadelta and rmsprop)
    optimizer=momentum,  # sgd, adadelta and rmsprop available, sgd very hard to use, not recommanded (probably need momentum and decaying learning rate).

    #maxlen=1000,  # Sequence longer then this get ignored
    batch_size=16,  # The batch size during training.

    # Parameter for extra option
    noise_std=0.,
    use_dropout=True,  # if False slightly faster, but worst test error
    # This frequently need a bigger model.
    #reload_model=None,  # Path to a saved model we want to start from.
    test_size=-1):

    # Model options
    model_options = locals().copy()
    print('----------------------------------------------')
    print("model options", model_options)
    print('----------------------------------------------')

    #load_data, prepare_data = get_dataset(dataset)

    print('Loading data ... ... ...')
    train, valid, test = data.load_data(path='../mydata.pkl.gz',
                                        n_words=n_words,
                                        valid_portion=0.)
    '''if test_size > 0:
        # The test set is sorted by size, but we want to keep random
        # size example.  So we must select a random selection of the
        # examples.
        idx = numpy.arange(len(test[0]))
        numpy.random.shuffle(idx)
        idx = idx[:test_size]
        test = ([test[0][n] for n in idx], [test[1][n] for n in idx])'''

    print('Building model ... ... ...')
    # This create the initial parameters as numpy ndarrays.
    # Dict name (string) -> numpy ndarray
    params = init_params(model_options,
                         Wemb_value=data.read_gz_file("../matrix.pkl.gz"))
    '''if reload_model:
        load_params('cnn_model.npz', params)'''

    # This create Theano Shared Variable from the parameters.
    # Dict name (string) -> Theano Tensor Shared Variable
    # params and tparams have different copy of the weights.
    tparams = init_tparams(params)

    # use_noise is for dropout
    (use_noise, x, masks, y, f_pred_prob, f_pred, cost, f_pred_prob_test,
     f_pred_test) = build_model(tparams, model_options)
    '''if decay_c > 0.:
        decay_c = theano.shared(numpy_floatX(decay_c), name='decay_c')
        weight_decay = 0.
        weight_decay += (tparams['U'] ** 2).sum()
        weight_decay *= decay_c
        cost += weight_decay'''

    f_cost = theano.function([
        x[0], x[1], x[2], x[3], masks[0], masks[1], masks[2], masks[3], y[0],
        y[1]
    ],
                             cost,
                             name='f_cost')

    grads = tensor.grad(cost, wrt=list(tparams.values()))
    f_grad = theano.function([
        x[0], x[1], x[2], x[3], masks[0], masks[1], masks[2], masks[3], y[0],
        y[1]
    ],
                             grads,
                             name='f_grad')

    lr = tensor.scalar(name='lr')
    f_grad_shared, f_update = optimizer(lr, tparams, grads, x, masks, y, cost)

    #print('Optimization')
    print('training ... ... ...')

    kf_valid = get_minibatches_idx(len(valid[0]), batch_size)
    kf_test = get_minibatches_idx(len(test[0]), batch_size)

    print("%d train examples" % len(train[0]))
    print("%d valid examples" % len(valid[0]))
    print("%d test examples" % len(test[0]))

    #history_errs = []
    best_p = None
    bad_counter = 0
    '''if validFreq == -1:
        validFreq = len(train[0]) // batch_size
    if saveFreq == -1:
        saveFreq = len(train[0]) // batch_size'''

    last_training_sum_costs = numpy.inf

    uidx = 0  # the number of update done
    estop = False  # early stop
    #start_time = time.time()
    try:
        for eidx in range(max_epochs):
            n_samples = 0

            # Get new shuffled index for the training set.
            kf = get_minibatches_idx(len(train[0]), batch_size, shuffle=True)

            training_sum_costs = 0

            for train_batch_idx, train_index in kf:
                uidx += 1
                use_noise.set_value(1.)

                # Select the random examples for this minibatch
                x_0 = [train[0][t] for t in train_index]
                x_1 = [train[1][t] for t in train_index]
                x_2 = [train[2][t] for t in train_index]
                x_3 = [train[3][t] for t in train_index]
                y_0 = [train[4][t] for t in train_index]
                y_1 = [train[5][t] for t in train_index]

                # Get the data in numpy.ndarray format
                #
                # Return something of shape (minibatch maxlen, n samples)
                x_0, mask_0 = data.prepare_data(x_0)
                x_1, mask_1 = data.prepare_data(x_1)
                x_2, mask_2 = data.prepare_data(x_2)
                x_3, mask_3 = data.prepare_data(x_3)
                y_0 = numpy.asarray(y_0, dtype='int32')
                y_1 = numpy.asarray(y_1, dtype='int32')

                n_samples += x_0.shape[1]

                if train_batch_idx % 100 == 0 or train_batch_idx == len(
                        kf) - 1:
                    print("%d/%d training bacthes @ epoch = %d" %
                          (train_batch_idx, len(kf), eidx))

                cost = f_grad_shared(x_0, x_1, x_2, x_3, mask_0, mask_1,
                                     mask_2, mask_3, y_0, y_1)
                f_update(lrate)

                training_sum_costs += cost

            print("sum of costs of all the training samples = ",
                  training_sum_costs, "@ epoch = ", eidx)

            if numpy.isnan(training_sum_costs) or numpy.isinf(
                    training_sum_costs):
                print('bad cost detected: ', training_sum_costs)
                print('End of Program')
                break

            print('outputing predicted labels of test set ... ... ...')

            output_pred_labels(f_pred_test,
                               f_pred_prob_test,
                               data.prepare_data,
                               test,
                               kf_test,
                               verbose=False,
                               path="test_pred_labels.txt")

            if training_sum_costs >= last_training_sum_costs * 0.99:
                bad_counter += 1
                if bad_counter == patience / 2:
                    lrate /= 4.

            last_training_sum_costs = training_sum_costs

            print('bad counter for early stopping : %d/%d' %
                  (bad_counter, patience))
            print('learning rate = ', lrate)
            print('--------------------------------------------------')

            if bad_counter >= patience:
                print('Early Stop!')
                estop = True
                break

            if estop:
                break

    except KeyboardInterrupt:
        print("Training interupted")
コード例 #3
0
ファイル: nn.py プロジェクト: cxjcjj/fackbank
def train_nn(
        # Hyper-Parameters
        dim_token=100,  # word embeding dimension
        dim_locDiff=10,  # location difference dimension
        dim_cueType=10,  #
        dim_ESP_label=10,
        dim_latent=100,

        lstm_layer_n=50,
        lstm_decoder_layer_n=50,

        n2=50 + 10 + 10,
        ydim0=3,
        ydim1=3,
        # win_size = 2,

        # maxTokens1 = 60, # maximum tokens in sentence 1

        # n_ESP_labels = 3,
        n_cueTypes=4,
        n_vocb_words=4396,  # Vocabulary size
        n_locDiffs=111,  # Location difference size

        end_idx=3194,

        patience=10,  # Number of epoch to wait before early stop if no progress
        max_epochs=100,  # The maximum number of epoch to run
        # dispFreq=10,  # Display to stdout the training progress every N updates
        # decay_c=0.,  # Weight decay for the classifier applied to the U weights.
        lrate=0.01,  # Learning rate for sgd (not used for adadelta and rmsprop)
        dropout_p=1.0,

        optimizer=momentum,
        # sgd, adadelta and rmsprop available, sgd very hard to use, not recommanded (probably need momentum and decaying learning rate).

        # maxlen=1000,  # Sequence longer then this get ignored
        batch_size=10,  # The batch size during training.
        inter_cost_margin=0.001,

        # Parameter for extra option
        # noise_std=0.,
        # use_dropout=True,  # if False slightly faster, but worst test error
        # This frequently need a bigger model.
        # reload_model=None,  # Path to a saved model we want to start from.
        # test_size=-1
):
    # Model options
    model_options = locals().copy()
    print('-------------------------------------------------------------')
    print("model options", model_options)
    print('-------------------------------------------------------------')

    # load_data, prepare_data = get_dataset(dataset)

    print('Loading data ... ... ...')
    train, valid, test = data.load_data(path='mydata.pkl', n_words=n_vocb_words)

    print('Building model ... ... ...')

    params_all = init_params(model_options, Wemb_value=data.read_gz_file("word_emb.pkl"))

    # tparams = init_tparams(params)
    tparams_d = init_tparams(params_all[0])
    tparams_g = init_tparams(params_all[1])
    tparams_c = OrderedDict()
    for kk, pp in tparams_d.items():
        tparams_c[kk] = tparams_d[kk]

    for kk, pp in tparams_g.items():
        tparams_c[kk] = tparams_g[kk]

    (x,
     masks,
     x_d_y_fake,
     y,
     x_noises,
     x_maxlens,
     f_D_pred_prob,
     f_D_pred,
     f_G_produce,
     dropouts,
     d_cost,
     g_cost) = Build_Model([tparams_d, tparams_g], model_options)

    d_grads = tensor.grad(d_cost, wrt=list(tparams_d.values()))
    # print(tparams_c)
    g_grads = tensor.grad(g_cost, wrt=list(tparams_c.values()), consider_constant=list(tparams_d.values()),
                          disconnected_inputs='ignore')

    lr = tensor.scalar(name='lr')

    # f_grad_shared, f_update = optimizer(lr, tparams, grads, x, masks, y, cost)

    f_D_grad_shared, f_D_update = optimizer(lr, tparams_d, d_grads,
                                            x + dropouts, masks, x_d_y_fake + y, d_cost)
    # f_G_grad_shared, f_G_update = optimizer(lr, tparams_c, g_grads,
    # x_noise + x_maxlen + x_d_ps + dropouts_g, [], x_g_y_fake + yg, g_cost)
    f_G_grad_shared, f_G_update = optimizer(lr, tparams_c, g_grads,
                                            x + x_noises + x_maxlens, masks, y, g_cost)

    print('training ... ... ...')

    kf_valid = get_minibatches_idx(len(valid[0]), batch_size)
    kf_test = get_minibatches_idx(len(test[0]), batch_size)

    print("%d train examples" % len(train[0]))
    print("%d valid examples" % len(valid[0]))
    print("%d test examples" % len(test[0]))

    # history_errs = []
    best_p = None
    bad_counter = 0
    stop_counter = 0

    # if validFreq == -1:
    # validFreq = len(train[0]) // batch_size
    # if saveFreq == -1:
    # saveFreq = len(train[0]) // batch_size

    # last_training_sum_costs = numpy.inf
    last_ave_of_g_costs = numpy.inf
    last_ave_of_d_costs = numpy.inf

    g_costs_list = []
    d_costs_list = []

    uidx = 0  # the number of update done
    estop = False  # early stop
    # start_time = time.time()
    try:
        for eidx in range(max_epochs):
            n_samples = 0

            # Get new shuffled index for the training set.
            kf = get_minibatches_idx(len(train[0]), batch_size, shuffle=True)
            # kf = get_minibatches_idx(99, batch_size, shuffle=True)

            # training_sum_costs = 0

            # ave_of_g_costs_sum = 0
            # ave_of_d_costs_sum = 0

            for train_batch_idx, train_index in kf:
                # uidx += 1
                # use_noise.set_value(1.)

                cur_batch_size = len(train_index)

                # Select the random examples for this minibatch
                x_0 = [train[0][t] for t in train_index]
                x_1 = [train[1][t] for t in train_index]

                x_3 = [train[2][t] for t in train_index]

                y_0 = [train[3][t] for t in train_index]
                y_1 = [train[4][t] for t in train_index]

                y_one_out = [train[5][t] for t in train_index]

                x_0, mask_0, maxlen_0 = data.prepare_data(x_0)
                x_1, mask_1, maxlen_1 = data.prepare_data(x_1)
                x_3, mask_3, maxlen_3 = data.prepare_data(x_3, addIdxNum=2)

                y_0 = numpy.asarray(y_0, dtype='int32')
                y_1 = numpy.asarray(y_1, dtype='int32')
                y_one_out = numpy.asarray(y_one_out, dtype='int32')

                rng = numpy.random.RandomState(9998)
                x0_noise_0 = rng.normal(scale=0.01, size=(cur_batch_size, dim_latent)).astype(config.floatX)
                x1_noise_1 = rng.normal(scale=0.01, size=(cur_batch_size, dim_latent)).astype(config.floatX)
                x3_noise_3 = rng.normal(scale=0.01, size=(cur_batch_size, dim_latent)).astype(config.floatX)

                generated_xs = f_G_produce(x0_noise_0, x1_noise_1, x3_noise_3,
                                           maxlen_0, maxlen_1, maxlen_3,
                                           y_0, y_1)

                # numpy.asarray([3] * cur_batch_size, dtype='int32')#
                generated_x_0 = generated_xs[0]
                generated_x_1 = generated_xs[1]
                generated_x_3 = numpy.concatenate(
                    (  # numpy.random.randint(0, n_cueTypes, (cur_batch_size,)).astype('int32')[None,:],
                        # numpy.random.randint(0, n_locDiffs, (cur_batch_size,)).astype('int32')[None,:],
                        x_3[0:2, :],
                        generated_xs[2]), axis=0)
                generated_m_0 = generated_xs[3]
                generated_m_1 = generated_xs[4]
                generated_m_3 = generated_xs[5]
                generated_y_0 = numpy.random.randint(0, ydim0 - 1, (cur_batch_size,)).astype('int32')
                generated_y_1 = numpy.random.randint(0, ydim1, (cur_batch_size,)).astype('int32')

                x_d_0 = numpy.concatenate((x_0, generated_x_0), axis=1)
                x_d_1 = numpy.concatenate((x_1, generated_x_1), axis=1)

                x_d_3 = numpy.concatenate((x_3, generated_x_3), axis=1)
                y_d_0_fake = numpy.asarray([1] * cur_batch_size + [0] * cur_batch_size, dtype='int32')
                y_d_1_fake = numpy.asarray([1] * cur_batch_size + [0] * cur_batch_size, dtype='int32')
                y_d_3_fake = numpy.asarray([1] * cur_batch_size + [0] * cur_batch_size, dtype='int32')
                # mask_ones_0 = numpy.ones_like(mask_0)
                # mask_ones_1 = numpy.ones_like(mask_1)
                # mask_ones_3 = numpy.ones_like(mask_3)
                mask_d_0 = numpy.concatenate((mask_0, generated_m_0), axis=1)
                mask_d_1 = numpy.concatenate((mask_1, generated_m_1), axis=1)
                mask_d_3 = numpy.concatenate((mask_3, generated_m_3), axis=1)
                y_d_0 = numpy.concatenate((y_0, generated_y_0), axis=0)
                y_d_1 = numpy.concatenate((y_1, generated_y_1), axis=0)

                d_cost = f_D_grad_shared(x_d_0, x_d_1, x_d_3,
                                         dropout_p, 1.0,
                                         mask_d_0, mask_d_1, mask_d_3,
                                         y_d_0_fake, y_d_1_fake, y_d_3_fake,
                                         y_d_0, y_d_1)

                g_cost = f_G_grad_shared(x_0, x_1, x_3,
                                         x0_noise_0, x1_noise_1, x3_noise_3,
                                         16, 16, 12,
                                         mask_0, mask_1, mask_3,
                                         generated_y_0, generated_y_1)

                # print(y_g_0.shape)

                print('\rd_cost = %f   g_cost = %f  @  %d' % (d_cost, g_cost, train_batch_idx), end='')
                # print(cur_batch_size)

                # ave_of_g_costs_sum += g_cost
                # ave_of_d_costs_sum += d_cost

                g_costs_list.append(g_cost)
                d_costs_list.append(d_cost)

                if d_cost < g_cost * 0.8:
                    for i in range(10):
                        f_G_update(0.01)
                        g_cost = f_G_grad_shared(x_0, x_1, x_3,
                                                 x0_noise_0, x1_noise_1, x3_noise_3,
                                                 16, 16, 12,
                                                 mask_0, mask_1, mask_3,
                                                 generated_y_0, generated_y_1)
                        if d_cost / g_cost >= 0.8 and d_cost / g_cost <= 1.0 / 0.8:
                            break
                elif g_cost < d_cost * 0.8:
                    for i in range(10):
                        f_D_update(0.01)
                        d_cost = f_D_grad_shared(x_d_0, x_d_1, x_d_3,
                                                 dropout_p, 1.0,
                                                 mask_d_0, mask_d_1, mask_d_3,
                                                 y_d_0_fake, y_d_1_fake, y_d_3_fake,
                                                 y_d_0, y_d_1)
                        if g_cost / d_cost >= 0.8 and g_cost / d_cost <= 1.0 / 0.8:
                            break
                else:
                    f_D_update(0.01)
                    f_G_update(0.01)

                if train_batch_idx % 100 == 0 or train_batch_idx == len(kf) - 1:
                    print("---Now %d/%d training bacthes @ epoch = %d" % (train_batch_idx, len(kf), eidx))

                if train_batch_idx > 0 and \
                        (train_batch_idx % 500 == 0 or train_batch_idx == len(kf) - 1):

                    cur_ave_of_d_costs = sum(d_costs_list) / len(d_costs_list)
                    cur_ave_of_g_costs = sum(g_costs_list) / len(g_costs_list)

                    print('ave_of_d_costs_sum = %f\tave_of_g_costs_sum = %f' % (cur_ave_of_d_costs, cur_ave_of_g_costs))

                    # print('outputing predicted labels of test set ... ... ...')

                    output_pred_labels(model_options,
                                       f_D_pred, f_D_pred_prob,
                                       data.prepare_data, test, kf_test,
                                       verbose=False, path="test_pred_labels.txt")

                    if cur_ave_of_d_costs >= last_ave_of_d_costs * 0.99 and \
                            cur_ave_of_g_costs >= last_ave_of_g_costs * 0.99:
                        stop_counter += 1

                    last_ave_of_d_costs = cur_ave_of_d_costs
                    last_ave_of_g_costs = cur_ave_of_g_costs

                    print('counter for early stopping : %d/%d' % (stop_counter, patience))

                    del d_costs_list[:]
                    del g_costs_list[:]

                    if stop_counter >= patience:
                        print('Early Stop!')
                        estop = True
                        break

                # end for

            if stop_counter >= patience:
                print('Early Stop!')
                estop = True
                break

            if estop:
                break


    except KeyboardInterrupt:
        print("Training interupted")