Python set_decode_typeの例、attention_dynamic_model.set_decode_type Pythonの例

コード例 #1

0

ファイルを表示

ファイル: reinforce_baseline.py プロジェクト: abdelabde/Reinforcement-learning-for-solving-VRPTW

def validate(dataset, model, batch_size=1000):
    """Validates model on given dataset in greedy mode
    """
    val_costs = rollout(model, dataset, batch_size=batch_size)
    set_decode_type(model, "sampling")
    mean_cost = tf.reduce_mean(val_costs)
    print(f"Validation score: {np.round(mean_cost, 4)}")
    return mean_cost

コード例 #2

0

ファイルを表示

ファイル: reinforce_baseline.py プロジェクト: abdelabde/Reinforcement-learning-for-solving-VRPTW

def rollout(model, dataset, batch_size = 1000, disable_tqdm = False):
    # Evaluate model in greedy mode
    set_decode_type(model, "greedy")
    costs_list = []

    for batch in tqdm(dataset.batch(batch_size), disable=disable_tqdm, desc="Rollout greedy execution"):
        cost, _ = model(batch)
        costs_list.append(cost)

    return tf.concat(costs_list, axis=0)

コード例 #3

0

ファイルを表示

ファイル: reinforce_baseline.py プロジェクト: abdelabde/Reinforcement-learning-for-solving-VRPTW

def copy_of_tf_model(model, embedding_dim=128, graph_size=20):
    """Copy model weights to new model
    """
    # https://stackoverflow.com/questions/56841736/how-to-copy-a-network-in-tensorflow-2-0
    CAPACITIES = {10: 20.,
                  20: 30.,
                  50: 40.,
                  100: 50.
                  }
    #time_windows=[[0,5],[5,9],[0,9]]
    #time_windows=[str(i) for i in time_windows]
    list_time_windows_min=[0,5]
    list_time_windows_max=[5,10]
    time_windows_min=np.random.choice(list_time_windows_min, size=(2, graph_size), replace=True, p=None)
    time_windows_max=np.zeros(shape=(2, graph_size))
    for n in range(2):
        for m in range(graph_size):
            a=0
            while a <=time_windows_min[n][m]:
                a=random.choice(list_time_windows_max)
            time_windows_max[n][m]=a
    #time_windows_array=np.random.choice(time_windows, size=(2, graph_size), replace=True, p=None)
    service_time=np.array([1,2,0.5])
    service_time_array=np.random.choice(service_time, size=(2, graph_size), replace=True, p=None)
    data_random = [tf.random.uniform((2, 2,), minval=0, maxval=1, dtype=tf.dtypes.float32),
                   tf.random.uniform((2, graph_size, 2), minval=0, maxval=1, dtype=tf.dtypes.float32),
                   tf.cast(tf.random.uniform(minval=1, maxval=10, shape=(2, graph_size),
                                             dtype=tf.int32), tf.float32) / tf.cast(CAPACITIES[graph_size], tf.float32),tf.convert_to_tensor(time_windows_min,dtype='float32'),tf.convert_to_tensor(time_windows_max,dtype='float32'),tf.convert_to_tensor(service_time_array,dtype='float32')]

    new_model = AttentionDynamicModel(embedding_dim)
    set_decode_type(new_model, "sampling")
    _, _ = new_model(data_random)

    for a, b in zip(new_model.variables, model.variables):
        a.assign(b)

    return new_model

コード例 #4

0

ファイルを表示

ファイル: reinforce_baseline.py プロジェクト: abdelabde/Reinforcement-learning-for-solving-VRPTW

def load_tf_model(path, embedding_dim=128, graph_size=20, n_encode_layers=2):
    """Load model weights from hd5 file
    """
    # https://stackoverflow.com/questions/51806852/cant-save-custom-subclassed-model
    CAPACITIES = {10: 20.,
                  20: 30.,
                  50: 40.,
                  100: 50.
                  }
    #time_windows=[[0,5],[5,9],[0,9]]
    #time_windows=[str(i) for i in time_windows]
    list_time_windows_min=[0,5]
    list_time_windows_max=[5,10]
    time_windows_min=np.random.choice(list_time_windows_min, size=(2, graph_size), replace=True, p=None)
    time_windows_max=np.zeros(shape=(2, graph_size))
    for n in range(2):
        for m in range(graph_size):
            a=0
            while a <=time_windows_min[n][m]:
                a=random.choice(list_time_windows_max)
            time_windows_max[n][m]=a
    #time_windows_array=np.random.choice(time_windows, size=(2, graph_size), replace=True, p=None)
    service_time=np.array([1,2,0.5])
    service_time_array=np.random.choice(service_time, size=(2, graph_size), replace=True, p=None)
    data_random = [tf.random.uniform((2, 2,), minval=0, maxval=1, dtype=tf.dtypes.float32),
                   tf.random.uniform((2, graph_size, 2), minval=0, maxval=1, dtype=tf.dtypes.float32),
                   tf.cast(tf.random.uniform(minval=1, maxval=10, shape=(2, graph_size),
                                             dtype=tf.int32), tf.float32) /tf.cast(CAPACITIES[graph_size],tf.float32),tf.convert_to_tensor(time_windows_min,dtype='float32'),
                   tf.convert_to_tensor(time_windows_max,dtype='float32'),tf.convert_to_tensor(service_time_array,dtype='float32')]
    model_loaded = AttentionDynamicModel(embedding_dim,n_encode_layers=n_encode_layers)
    set_decode_type(model_loaded, "greedy")
    _, _ = model_loaded(data_random)

    model_loaded.load_weights(path)

    return model_loaded

コード例 #5

0

ファイルを表示

ファイル: train.py プロジェクト: acardosoj/ml

def train_model(optimizer,
                model_tf,
                baseline,
                validation_dataset,
                samples=1280000,
                batch=128,
                val_batch_size=1000,
                start_epoch=0,
                end_epoch=5,
                from_checkpoint=False,
                grad_norm_clipping=1.0,
                batch_verbose=1000,
                graph_size=20,
                filename=None):

    if filename is None:
        filename = 'VRP_{}_{}'.format(graph_size,
                                      strftime("%Y-%m-%d", gmtime()))

    def rein_loss(model, inputs, baseline, num_batch):
        """Calculate loss for REINFORCE algorithm
        """

        # Evaluate model, get costs and log probabilities
        cost, log_likelihood = model(inputs)

        # Evaluate baseline
        # For first wp_n_epochs we take the combination of baseline and ema for previous batches
        # after that we take a slice of precomputed baseline values
        bl_val = bl_vals[num_batch] if bl_vals is not None else baseline.eval(
            inputs, cost)
        bl_val = tf.stop_gradient(bl_val)

        # Calculate loss
        reinforce_loss = tf.reduce_mean((cost - bl_val) * log_likelihood)

        return reinforce_loss, tf.reduce_mean(cost)

    def grad(model, inputs, baseline, num_batch):
        """Calculate gradients
        """
        with tf.GradientTape() as tape:
            loss, cost = rein_loss(model, inputs, baseline, num_batch)
        return loss, cost, tape.gradient(loss, model.trainable_variables)

    # For plotting
    train_loss_results = []
    train_cost_results = []
    val_cost_avg = []

    # Training loop
    for epoch in range(start_epoch, end_epoch):

        # Create dataset on current epoch
        data = generate_data_onfly(num_samples=samples, graph_size=graph_size)

        epoch_loss_avg = tf.keras.metrics.Mean()
        epoch_cost_avg = tf.keras.metrics.Mean()

        # Skip warm-up stage when we continue training from checkpoint
        if from_checkpoint and baseline.alpha != 1.0:
            print('Skipping warm-up mode')
            baseline.alpha = 1.0

        # If epoch > wp_n_epochs then precompute baseline values for the whole dataset else None
        bl_vals = baseline.eval_all(data)  # (samples, ) or None
        bl_vals = tf.reshape(
            bl_vals,
            (-1, batch
             )) if bl_vals is not None else None  # (n_batches, batch) or None

        print("Current decode type: {}".format(model_tf.decode_type))

        for num_batch, x_batch in tqdm(
                enumerate(data.batch(batch)),
                desc="batch calculation at epoch {}".format(epoch)):

            # Optimize the model
            loss_value, cost_val, grads = grad(model_tf, x_batch, baseline,
                                               num_batch)

            # Clip gradients by grad_norm_clipping
            init_global_norm = tf.linalg.global_norm(grads)
            grads, _ = tf.clip_by_global_norm(grads, grad_norm_clipping)
            global_norm = tf.linalg.global_norm(grads)

            if num_batch % batch_verbose == 0:
                print("grad_global_norm = {}, clipped_norm = {}".format(
                    init_global_norm.numpy(), global_norm.numpy()))

            optimizer.apply_gradients(zip(grads, model_tf.trainable_variables))

            # Track progress
            epoch_loss_avg.update_state(loss_value)
            epoch_cost_avg.update_state(cost_val)

            if num_batch % batch_verbose == 0:
                print("Epoch {} (batch = {}): Loss: {}: Cost: {}".format(
                    epoch, num_batch, epoch_loss_avg.result(),
                    epoch_cost_avg.result()))

        # Update baseline if the candidate model is good enough. In this case also create new baseline dataset
        baseline.epoch_callback(model_tf, epoch)
        set_decode_type(model_tf, "sampling")

        # Save model weights
        model_tf.save_weights('model_checkpoint_epoch_{}_{}.h5'.format(
            epoch, filename),
                              save_format='h5')

        # Validate current model
        val_cost = validate(validation_dataset, model_tf, val_batch_size)
        val_cost_avg.append(val_cost)

        train_loss_results.append(epoch_loss_avg.result())
        train_cost_results.append(epoch_cost_avg.result())

        pd.DataFrame(
            data={
                'epochs': list(range(start_epoch, epoch + 1)),
                'train_loss': [x.numpy() for x in train_loss_results],
                'train_cost': [x.numpy() for x in train_cost_results],
                'val_cost': [x.numpy() for x in val_cost_avg]
            }).to_csv('backup_results_' + filename + '.csv', index=False)

        print(
            get_cur_time(),
            "Epoch {}: Loss: {}: Cost: {}".format(epoch,
                                                  epoch_loss_avg.result(),
                                                  epoch_cost_avg.result()))

    # Make plots and save results
    filename_for_results = filename + '_start={}, end={}'.format(
        start_epoch, end_epoch)
    get_results([x.numpy() for x in train_loss_results],
                [x.numpy() for x in train_cost_results],
                [x.numpy() for x in val_cost_avg],
                save_results=True,
                filename=filename_for_results,
                plots=True)