Esempio n. 1
0
def load_test_config(model_path, tta=False):
    """
    Parameters
    ----------
    model_path : path to specific model

    Returns
    -------
    keras model, test ids for the model, params object with model config
    """

    # load utils classes
    params = Params(os.path.join(model_path, "config.json"))

    # cast all numeric types to float and save to json
    cast_params_types(params, model_path)

    params = Params(os.path.join(model_path, "params.json"))

    # read test images from trained model
    test_ids = pd.read_csv(os.path.join(model_path,
                                        "test_ids.csv"))["0"].tolist()
    validation_ids = pd.read_csv(os.path.join(
        model_path, "validation_ids.csv"))["0"].tolist()
    train_ids = pd.read_csv(os.path.join(model_path,
                                         "train_ids.csv"))["0"].tolist()

    save_model_path = os.path.join(model_path, "weights.hdf5")
    model = load_model(
        save_model_path,
        custom_objects={'AttentionAugmentation2D': AttentionAugmentation2D})

    # model = load_model(save_model_path)
    return model, test_ids, validation_ids, train_ids, params
Esempio n. 2
0
for child_dir in [p for p in path.glob("**/*") if p.is_dir()]:
    sys.path.append(str(child_dir))

from custom_metrics import ModelMetrics
from model_logging import ModelCheckpointCustom
from print_stats import PrintStats
from tensorboard_callback import TensorboardCallback
from losses import get_loss
from optimizers import get_optimizer

from models.model import get_model
from config import TRAIN_DATA_PATH
from utils.utils import Params, TrainOps, Logging, data_split
from generator_2d import DataGenerator

params = Params("params.json")
params.data_path = TRAIN_DATA_PATH

logging = Logging("./logs", params)

ids = os.listdir(os.path.join(params.data_path, "images"))
train_ids, validation_ids, test_ids = data_split(ids, params)

logging.create_model_directory()
params.model_directory = logging.model_directory

# saving model config file to model output dir
logging.save_dict_to_json(logging.model_directory + "/config.json")

# Generators
train_generator = DataGenerator(train_ids, params=params, is_training=True)
Esempio n. 3
0
def main(flags):
    params = Params("params.json")
    params.data_path = TRAIN_DATA_PATH

    params.cv_iteration = flags.cfs_cv_iteration

    logging = Logging(flags.save_model_dir, params)

    ids = os.listdir(os.path.join(params.data_path, "images"))
    train_ids, validation_ids, test_ids = data_split(ids, params)

    test_id = [test_ids[params.cv_iteration]]

    # log test id
    params.test_id = test_id[0]

    print("Test records is: ", test_id[0])

    test_ids = [id_ for id_ in test_ids if id_ not in test_id]
    extra_ids = test_ids
    random.shuffle(extra_ids)

    train_ids = train_ids + extra_ids[0:int(len(extra_ids) * 0.75)]
    validation_ids = validation_ids + extra_ids[int(len(extra_ids) * 0.75):]

    print(f"Number of training samples: {len(train_ids)}, "
          f"number of validation samples: {len(validation_ids)}, "
          f"number of test sample: {len(test_id)}")

    logging.create_model_directory(
        model_dir=f"{flags.save_model_dir}/{test_id[0].replace('.png', '')}")
    params.model_directory = logging.model_directory

    # saving model config file to model output dir
    logging.save_dict_to_json(logging.model_directory + "/config.json")

    # Generators
    train_generator = DataGenerator(train_ids, params=params, is_training=True)
    validation_generator = DataGenerator(validation_ids,
                                         params=params,
                                         is_training=False)

    trainops = TrainOps(params, num_records=len(train_generator))

    optimizer = get_optimizer(params, trainops)
    loss_fn = get_loss(params)

    model_metrics = ModelMetrics(params)
    tb_callback = TensorboardCallback(model_dir=params.model_directory)
    model_checkpoint = ModelCheckpointCustom(monitor="val_acc",
                                             model_dir=params.model_directory,
                                             mode="max")
    print_stats = PrintStats(params=params)

    # get model
    model = get_model(params)

    for epoch in range(params.num_epochs):
        # Iterate over the batches of the dataset.
        for step, (x_batch_train,
                   y_batch_train) in tqdm(enumerate(train_generator)):
            with tf.GradientTape() as tape:
                logits = model(x_batch_train, training=True)
                loss = loss_fn(y_batch_train, logits)

            grads = tape.gradient(loss, model.trainable_weights)
            optimizer.apply_gradients(zip(grads, model.trainable_weights))
            current_lr = optimizer._decayed_lr(tf.float32).numpy()
            print(
                f"\nOpt Iteration: {optimizer.__dict__['_iterations'].numpy()} "
                f"learning rate: {current_lr} loss: {np.round(loss.numpy(), 2):.2f}"
            )

            # Update training metric.
            model_metrics.update_metric_states(y_batch_train,
                                               logits,
                                               mode="train")

        # Display metrics at the end of each epoch.
        train_result_dict = model_metrics.result_metrics(mode="train")

        tb_callback.on_epoch_end(epoch=epoch,
                                 logging_dict=train_result_dict,
                                 lr=current_lr)

        # Run a validation loop at the end of each epoch.
        for x_batch_val, y_batch_val in validation_generator:
            val_logits = model(x_batch_val, training=False)
            val_loss = loss_fn(y_batch_val, val_logits)

            # Update val metrics
            model_metrics.update_metric_states(y_batch_val,
                                               val_logits,
                                               mode="val")

        print(f"validation loss is: f'{val_loss.numpy():.2f}'")

        val_result_dict = model_metrics.result_metrics(mode="val")

        tb_callback.on_epoch_end(epoch=epoch, logging_dict=val_result_dict)
        model_checkpoint.on_epoch_end(epoch,
                                      model,
                                      logging_dict=val_result_dict)
        print_stats.on_epoch_end(epoch,
                                 train_dict=train_result_dict,
                                 validation_dict=val_result_dict,
                                 lr=current_lr)

        # Reset training metrics at the end of each epoch
        model_metrics.reset_metric_states(mode="train")
        model_metrics.reset_metric_states(mode="val")
Esempio n. 4
0
    parser.add_argument('--datapath',
                        '-d',
                        type=str,
                        default='./data/bible.txt')
    FLAGS = parser.parse_args()

    if not os.path.exists(FLAGS.folder):
        make_directory(FLAGS.folder)

    set_logger(os.path.join(FLAGS.folder, 'train.log'))
    if FLAGS.config is None:
        try:
            FLAGS.config = os.path.join(FLAGS.folder, 'config.json')
        except FileNotFoundError:
            raise FileNotFoundError('config.json is not found!')
    params = Params(jsonpath=FLAGS.config)

    logging.info('Start word2vec training pipeline! Params:')
    logging.info(json.dumps(params.__dict__, indent=True))

    if params.model not in ['hier_softmax', 'neg_sampling']:
        raise NotImplementedError(f"{params.model} model is not supported!")

    # load data:
    logging.info('Loading data:')

    processed_datapath = os.path.join(FLAGS.folder,
                                      f'{params.model}_processed_data.pkl')

    processing_params = dict(threshold_count=params.threshold_count,
                             pipeline=params.model,
Esempio n. 5
0
def predict(inp, target, params, restore_from, config=None,\
            model_dir='./ie590_project/experiments/ex1', model_save_dir='./ie590_project/experiments/ex1/model_save/1'):
    """predict target values given input file paths
    Args:
        inp: (list) a string list of image files paths; 2D -> [sample_size, number_of_channels]
        model_spec: (dict) model specifications of tf Ops
        params: (Params or str) Params object or params.joson path
        tar: (list) a float list of target values
        restore_from: (str) ckpt or directory name where ckpts are located for restoring
        ...
    Return:
        out: (list) a list of precicted target values; have exactly same dimension as target
    """

    assert len(inp) == len(target)

    iterator_init_op = model_spec['iterator_init_op']
    update_metrics_op = model_spec['update_metrics_op']
    metrics = model_spec['metrics']
    metrics_init_op = model_spec['metrics_init_op']
    predictions = model_spec['predictions']

    saver = tf.compat.v1.train.Saver()

    if type(params) is str:
        assert os.path.isfile(
            params), "params.json does not exits at {}".format(params)
        params = Params(params)
        params.load(params.update)  # load parameters
    params.inp_size = len(inp)

    set_logger(os.path.join(model_dir, 'train.log'))

    logging.info("Creating the dataset...")
    inputs = input_fn(False, inp, target, params)

    logging.info("Creating the model...")
    model_spec = model_fn(False, inputs, params)

    logging.info("Calculating predictions...")
    with tf.compat.v1.Session(config=config) as sess:
        sess.run(model_spec['variable_init_op'])

        save_path = os.path.join(model_save_dir, restore_from)
        if os.path.isdir(save_path):
            save_path = tf.train.latest_checkpoint(
                save_path
            )  # If restore_from is a directory, get the latest ckpt
        saver.restore(sess, save_path)

        num_steps = (params.inp_size + params.batch_size -
                     1) // params.batch_size

        sess.run([iterator_init_op, metrics_init_op])

        if len(np.shape(target)) == 1:
            out = np.empty(np.shape(target))[:, np.newaxis]
        else:
            out = np.empty(np.shape(target))
        for i in tqdm(range(num_steps)):
            _, predictions_eval = sess.run([update_metrics_op, predictions])
            if i < num_steps - 1:
                out[i * params.batch_size:(i + 1) *
                    params.batch_size, :] = predictions_eval
            else:
                out[i * params.batch_size:, :] = predictions_eval

    return out
Esempio n. 6
0
    for _ in range(num_steps):
        _, predictions_eval = sess.run([update_metrics_op, predictions])


if __name__ == '__main__':
    start_time = time.time()

    #for reproducibility
    tf.compat.v1.set_random_seed(123)

    args = parser.parse_args()
    params_path = os.path.join(args.model_dir, 'params.json')
    assert os.path.isfile(
        params_path), "params.json does not exits at {}".format(params_path)
    params = Params(params_path)
    params.load(params.update)

    #TODO: check and load if there's the best weights so far
    #     model_dir_has_best_weights = os.path.isdir(os.path.join(args.model_dir, "best_weights"))

    #set logger
    set_logger(os.path.join(args.model_dir, 'train.log'))

    #train/test split
    train_fpaths, test_fpaths, train_targets, test_targets = \
        get_train_test_split(args.json_path, args.data_dir, train_size=args.train_size)

    params.train_size = len(train_fpaths)
    params.test_size = len(test_fpaths)
Esempio n. 7
0
                    default='./data_3',
                    help="Where to write the new data")

parser.add_argument('-v',
                    dest='verbose',
                    action='store_true',
                    help='verbose mode')

if __name__ == '__main__':

    # Load the parameters from json file
    args = parser.parse_args()
    json_path = './params.json'
    assert os.path.isfile(
        json_path), "No json configuration file found at {}".format(json_path)
    params = Params(json_path)

    assert os.path.isdir(
        args.data_dir), "Couldn't find the dataset at {}".format(args.data_dir)

    # Define the data directories
    train_data_dir = os.path.join(args.output_dir, 'train')
    test_data_dir = os.path.join(args.output_dir, 'test')
    eval_data_dir = os.path.join(args.output_dir, 'eval')
    dataset_dir = args.data_dir

    filenames = {
        'train': train_data_dir,
        'eval': eval_data_dir,
        'test': test_data_dir,
        'data': dataset_dir
Esempio n. 8
0
    u8 = Conv2DTranspose(params.n_filters * 8, (3, 3),
                         strides=(2, 2),
                         padding='same')(c8)
    c9 = conv2d_block(u8, n_filters=params.n_filters * 8, kernel_size=3)

    u9 = Conv2DTranspose(params.n_filters * 4, (3, 3),
                         strides=(2, 2),
                         padding='same')(c9)
    c10 = conv2d_block(u9, n_filters=params.n_filters * 4, kernel_size=3)

    u10 = Conv2DTranspose(params.n_filters * 3, (3, 3),
                          strides=(2, 2),
                          padding='same')(c10)
    c11 = conv2d_block(u10, n_filters=params.n_filters * 3, kernel_size=3)

    u11 = Conv2DTranspose(params.n_filters * 2, (3, 3),
                          strides=(2, 2),
                          padding='same')(c11)
    c12 = conv2d_block(u11, n_filters=params.n_filters * 2, kernel_size=3)

    outputs = Conv2D(params.num_classes, (1, 1), activation='softmax')(c12)

    model = Model(inputs=inputs, outputs=[outputs])
    return model


if __name__ == "__main__":
    params = Params(os.path.join(PROJ_DIR, "params.json"))
    unet(params)
Esempio n. 9
0
        stream_handler.setFormatter(logging.Formatter("%(message)s"))
        logger.addHandler(stream_handler)


if __name__ == "__main__":
    start_time = time.time()
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--params",
        type=str,
        required=True,
        help="load training parameters for BERT model",
    )
    args = parser.parse_args()

    params = Params(args.params)

    print("Num GPUs Available: ",
          len(tf.config.experimental.list_physical_devices("XLA_CPU")))
    tf.random.set_random_seed(params.SEED)
    USE_GPU = True
    if USE_GPU:
        device = '/device:XLA_CPU:0'
    else:
        device = "/CPU:0"
    # Set Logger
    if not os.path.exists(params.LOG_DIR):
        os.makedirs(params.LOG_DIR)
    set_logger(os.path.join(".", params.LOG_DIR + params.NAME + ".log"))

    # Initialize session