def create_model(params, data_idx=[0]):
    data_folder_path = params["data_folder_path"]
    model_name = params["model_name"]
    data_set_name = params["data_set_name"]

    import os, sys
    sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
    import iodata

    #print()
    p, E, _ = iodata.load_data_set(data_folder_path, data_set_name, data_idx)
    input = (p, E)
    model = discrete_10000_ec_1(params)
    output = model(input)
    return model
예제 #2
0
def train(model, params):
    # mlflow logging
    # mlflow server --backend-store-uri mlruns/ --default-artifact-root mlruns/ --host 0.0.0.0
    remote_server_uri = params["server_uri"]  # set to your server URI
    mlflow.set_tracking_uri(
        remote_server_uri)  # or set the MLFLOW_TRACKING_URI in the env

    exp_name = params["model_name"]
    mlflow.set_experiment(exp_name)

    # auto logging
    mlflow.tensorflow.autolog(every_n_iter=params['every_n_iter'])

    print("# mlflow initialized")

    # Load model weights
    ckpt_path = os.path.join(params["model_folder_path"], params["model_name"],
                             params["ckpt_filename"])
    ckpt_dir = os.path.dirname(ckpt_path)
    latest = tf.train.latest_checkpoint(ckpt_dir)
    if not latest:
        model.save_weights(ckpt_path.format(epoch=0, val_loss=0))
        latest = tf.train.latest_checkpoint(ckpt_dir)
    else:
        model.load_weights(latest)

    print("# Model loaded from: {}".format(latest))

    # Initializing train and test data set generator
    train_ds_path = os.path.join(params['ds_folder'], params['ds_name'],
                                 "train")
    val_ds_path = os.path.join(params['ds_folder'], params['ds_name'],
                               "validation")

    # deciding a full load or use NVEGenerator API to load
    # data on the fly in pieces (to avoid memory overflow)
    if params["full_load"]:
        train_ds_prefix = os.path.join(train_ds_path, params['ds_name'])
        train_ds = tf.data.Dataset.from_tensor_slices(
            load_data_set(train_ds_prefix, params['train_idx'], shuffle=False))
        train_ds = train_ds.batch(params['train_batch_size'])

        val_ds_prefix = os.path.join(val_ds_path, params['ds_name'])
        val_ds = tf.data.Dataset.from_tensor_slices(
            load_data_set(val_ds_prefix, params['val_idx'], shuffle=False))
        val_ds = val_ds.batch(params['val_batch_size'])
    else:
        train_ds = NVEGenerator(train_ds_path,
                                params['ds_name'],
                                params['train_idx'],
                                meta_batch_size=params['train_batch_size'],
                                shuffle=params['shuffle'])
        val_ds = NVEGenerator(val_ds_path,
                              params['ds_name'],
                              params['val_idx'],
                              meta_batch_size=params['val_batch_size'],
                              shuffle=params['shuffle'])

    print("# Training and validation generator initialized")

    # Compiling model
    model.compile(
        optimizer=params['optimizer'],
        loss=params['loss'],
        #metrics=params['metrics']
    )

    print("# Model compiled")

    # callbacks
    # check point
    cp_callback = tf.keras.callbacks.ModelCheckpoint(
        ckpt_path,
        verbose=1,
        save_weights_only=True,
        mode='min',
        save_freq=params[
            'save_freq'],  # saves weights every save_freq times a ds generator is called
        monitor='val_loss',
        save_best_only=params['save_best_only'])
    # lr schedule
    callbacks_list = [cp_callback]
    if 'lr_schedule' in params:
        callbacks_list.append(
            tf.keras.callbacks.LearningRateScheduler(params['lr_schedule'],
                                                     verbose=0))

    print("# Checkpoint callback initialized")

    # latest epoch
    latest_epoch = latest.find('cp-')
    latest_epoch = int(latest[latest_epoch + 3:latest_epoch + 7]) + 1

    print("# continue training from epoch", latest_epoch)

    # Training
    # Note:
    # do not specify batch_size if data is generator or dataset
    # shuffle is ignored if data set is a generator. shuffle by epoch end.
    history = model.fit(
        x=train_ds,
        epochs=params['epochs'],
        verbose=params["verbose"],
        callbacks=callbacks_list,
        validation_data=val_ds,
        shuffle=True,
        initial_epoch=latest_epoch,
    )

    return model, history
예제 #3
0
 def __getitem__(self, idx):
     idx = self.data_idx[idx * self.meta_batch_size:(idx + 1) *
                         self.meta_batch_size]
     return load_data_set(self.file_name_prefix, idx, shuffle=self.shuffle)