def load_test_config(model_path, tta=False): """ Parameters ---------- model_path : path to specific model Returns ------- keras model, test ids for the model, params object with model config """ # load utils classes params = Params(os.path.join(model_path, "config.json")) # cast all numeric types to float and save to json cast_params_types(params, model_path) params = Params(os.path.join(model_path, "params.json")) # read test images from trained model test_ids = pd.read_csv(os.path.join(model_path, "test_ids.csv"))["0"].tolist() validation_ids = pd.read_csv(os.path.join( model_path, "validation_ids.csv"))["0"].tolist() train_ids = pd.read_csv(os.path.join(model_path, "train_ids.csv"))["0"].tolist() save_model_path = os.path.join(model_path, "weights.hdf5") model = load_model( save_model_path, custom_objects={'AttentionAugmentation2D': AttentionAugmentation2D}) # model = load_model(save_model_path) return model, test_ids, validation_ids, train_ids, params
for child_dir in [p for p in path.glob("**/*") if p.is_dir()]: sys.path.append(str(child_dir)) from custom_metrics import ModelMetrics from model_logging import ModelCheckpointCustom from print_stats import PrintStats from tensorboard_callback import TensorboardCallback from losses import get_loss from optimizers import get_optimizer from models.model import get_model from config import TRAIN_DATA_PATH from utils.utils import Params, TrainOps, Logging, data_split from generator_2d import DataGenerator params = Params("params.json") params.data_path = TRAIN_DATA_PATH logging = Logging("./logs", params) ids = os.listdir(os.path.join(params.data_path, "images")) train_ids, validation_ids, test_ids = data_split(ids, params) logging.create_model_directory() params.model_directory = logging.model_directory # saving model config file to model output dir logging.save_dict_to_json(logging.model_directory + "/config.json") # Generators train_generator = DataGenerator(train_ids, params=params, is_training=True)
def main(flags): params = Params("params.json") params.data_path = TRAIN_DATA_PATH params.cv_iteration = flags.cfs_cv_iteration logging = Logging(flags.save_model_dir, params) ids = os.listdir(os.path.join(params.data_path, "images")) train_ids, validation_ids, test_ids = data_split(ids, params) test_id = [test_ids[params.cv_iteration]] # log test id params.test_id = test_id[0] print("Test records is: ", test_id[0]) test_ids = [id_ for id_ in test_ids if id_ not in test_id] extra_ids = test_ids random.shuffle(extra_ids) train_ids = train_ids + extra_ids[0:int(len(extra_ids) * 0.75)] validation_ids = validation_ids + extra_ids[int(len(extra_ids) * 0.75):] print(f"Number of training samples: {len(train_ids)}, " f"number of validation samples: {len(validation_ids)}, " f"number of test sample: {len(test_id)}") logging.create_model_directory( model_dir=f"{flags.save_model_dir}/{test_id[0].replace('.png', '')}") params.model_directory = logging.model_directory # saving model config file to model output dir logging.save_dict_to_json(logging.model_directory + "/config.json") # Generators train_generator = DataGenerator(train_ids, params=params, is_training=True) validation_generator = DataGenerator(validation_ids, params=params, is_training=False) trainops = TrainOps(params, num_records=len(train_generator)) optimizer = get_optimizer(params, trainops) loss_fn = get_loss(params) model_metrics = ModelMetrics(params) tb_callback = TensorboardCallback(model_dir=params.model_directory) model_checkpoint = ModelCheckpointCustom(monitor="val_acc", model_dir=params.model_directory, mode="max") print_stats = PrintStats(params=params) # get model model = get_model(params) for epoch in range(params.num_epochs): # Iterate over the batches of the dataset. for step, (x_batch_train, y_batch_train) in tqdm(enumerate(train_generator)): with tf.GradientTape() as tape: logits = model(x_batch_train, training=True) loss = loss_fn(y_batch_train, logits) grads = tape.gradient(loss, model.trainable_weights) optimizer.apply_gradients(zip(grads, model.trainable_weights)) current_lr = optimizer._decayed_lr(tf.float32).numpy() print( f"\nOpt Iteration: {optimizer.__dict__['_iterations'].numpy()} " f"learning rate: {current_lr} loss: {np.round(loss.numpy(), 2):.2f}" ) # Update training metric. model_metrics.update_metric_states(y_batch_train, logits, mode="train") # Display metrics at the end of each epoch. train_result_dict = model_metrics.result_metrics(mode="train") tb_callback.on_epoch_end(epoch=epoch, logging_dict=train_result_dict, lr=current_lr) # Run a validation loop at the end of each epoch. for x_batch_val, y_batch_val in validation_generator: val_logits = model(x_batch_val, training=False) val_loss = loss_fn(y_batch_val, val_logits) # Update val metrics model_metrics.update_metric_states(y_batch_val, val_logits, mode="val") print(f"validation loss is: f'{val_loss.numpy():.2f}'") val_result_dict = model_metrics.result_metrics(mode="val") tb_callback.on_epoch_end(epoch=epoch, logging_dict=val_result_dict) model_checkpoint.on_epoch_end(epoch, model, logging_dict=val_result_dict) print_stats.on_epoch_end(epoch, train_dict=train_result_dict, validation_dict=val_result_dict, lr=current_lr) # Reset training metrics at the end of each epoch model_metrics.reset_metric_states(mode="train") model_metrics.reset_metric_states(mode="val")
parser.add_argument('--datapath', '-d', type=str, default='./data/bible.txt') FLAGS = parser.parse_args() if not os.path.exists(FLAGS.folder): make_directory(FLAGS.folder) set_logger(os.path.join(FLAGS.folder, 'train.log')) if FLAGS.config is None: try: FLAGS.config = os.path.join(FLAGS.folder, 'config.json') except FileNotFoundError: raise FileNotFoundError('config.json is not found!') params = Params(jsonpath=FLAGS.config) logging.info('Start word2vec training pipeline! Params:') logging.info(json.dumps(params.__dict__, indent=True)) if params.model not in ['hier_softmax', 'neg_sampling']: raise NotImplementedError(f"{params.model} model is not supported!") # load data: logging.info('Loading data:') processed_datapath = os.path.join(FLAGS.folder, f'{params.model}_processed_data.pkl') processing_params = dict(threshold_count=params.threshold_count, pipeline=params.model,
def predict(inp, target, params, restore_from, config=None,\ model_dir='./ie590_project/experiments/ex1', model_save_dir='./ie590_project/experiments/ex1/model_save/1'): """predict target values given input file paths Args: inp: (list) a string list of image files paths; 2D -> [sample_size, number_of_channels] model_spec: (dict) model specifications of tf Ops params: (Params or str) Params object or params.joson path tar: (list) a float list of target values restore_from: (str) ckpt or directory name where ckpts are located for restoring ... Return: out: (list) a list of precicted target values; have exactly same dimension as target """ assert len(inp) == len(target) iterator_init_op = model_spec['iterator_init_op'] update_metrics_op = model_spec['update_metrics_op'] metrics = model_spec['metrics'] metrics_init_op = model_spec['metrics_init_op'] predictions = model_spec['predictions'] saver = tf.compat.v1.train.Saver() if type(params) is str: assert os.path.isfile( params), "params.json does not exits at {}".format(params) params = Params(params) params.load(params.update) # load parameters params.inp_size = len(inp) set_logger(os.path.join(model_dir, 'train.log')) logging.info("Creating the dataset...") inputs = input_fn(False, inp, target, params) logging.info("Creating the model...") model_spec = model_fn(False, inputs, params) logging.info("Calculating predictions...") with tf.compat.v1.Session(config=config) as sess: sess.run(model_spec['variable_init_op']) save_path = os.path.join(model_save_dir, restore_from) if os.path.isdir(save_path): save_path = tf.train.latest_checkpoint( save_path ) # If restore_from is a directory, get the latest ckpt saver.restore(sess, save_path) num_steps = (params.inp_size + params.batch_size - 1) // params.batch_size sess.run([iterator_init_op, metrics_init_op]) if len(np.shape(target)) == 1: out = np.empty(np.shape(target))[:, np.newaxis] else: out = np.empty(np.shape(target)) for i in tqdm(range(num_steps)): _, predictions_eval = sess.run([update_metrics_op, predictions]) if i < num_steps - 1: out[i * params.batch_size:(i + 1) * params.batch_size, :] = predictions_eval else: out[i * params.batch_size:, :] = predictions_eval return out
for _ in range(num_steps): _, predictions_eval = sess.run([update_metrics_op, predictions]) if __name__ == '__main__': start_time = time.time() #for reproducibility tf.compat.v1.set_random_seed(123) args = parser.parse_args() params_path = os.path.join(args.model_dir, 'params.json') assert os.path.isfile( params_path), "params.json does not exits at {}".format(params_path) params = Params(params_path) params.load(params.update) #TODO: check and load if there's the best weights so far # model_dir_has_best_weights = os.path.isdir(os.path.join(args.model_dir, "best_weights")) #set logger set_logger(os.path.join(args.model_dir, 'train.log')) #train/test split train_fpaths, test_fpaths, train_targets, test_targets = \ get_train_test_split(args.json_path, args.data_dir, train_size=args.train_size) params.train_size = len(train_fpaths) params.test_size = len(test_fpaths)
default='./data_3', help="Where to write the new data") parser.add_argument('-v', dest='verbose', action='store_true', help='verbose mode') if __name__ == '__main__': # Load the parameters from json file args = parser.parse_args() json_path = './params.json' assert os.path.isfile( json_path), "No json configuration file found at {}".format(json_path) params = Params(json_path) assert os.path.isdir( args.data_dir), "Couldn't find the dataset at {}".format(args.data_dir) # Define the data directories train_data_dir = os.path.join(args.output_dir, 'train') test_data_dir = os.path.join(args.output_dir, 'test') eval_data_dir = os.path.join(args.output_dir, 'eval') dataset_dir = args.data_dir filenames = { 'train': train_data_dir, 'eval': eval_data_dir, 'test': test_data_dir, 'data': dataset_dir
u8 = Conv2DTranspose(params.n_filters * 8, (3, 3), strides=(2, 2), padding='same')(c8) c9 = conv2d_block(u8, n_filters=params.n_filters * 8, kernel_size=3) u9 = Conv2DTranspose(params.n_filters * 4, (3, 3), strides=(2, 2), padding='same')(c9) c10 = conv2d_block(u9, n_filters=params.n_filters * 4, kernel_size=3) u10 = Conv2DTranspose(params.n_filters * 3, (3, 3), strides=(2, 2), padding='same')(c10) c11 = conv2d_block(u10, n_filters=params.n_filters * 3, kernel_size=3) u11 = Conv2DTranspose(params.n_filters * 2, (3, 3), strides=(2, 2), padding='same')(c11) c12 = conv2d_block(u11, n_filters=params.n_filters * 2, kernel_size=3) outputs = Conv2D(params.num_classes, (1, 1), activation='softmax')(c12) model = Model(inputs=inputs, outputs=[outputs]) return model if __name__ == "__main__": params = Params(os.path.join(PROJ_DIR, "params.json")) unet(params)
stream_handler.setFormatter(logging.Formatter("%(message)s")) logger.addHandler(stream_handler) if __name__ == "__main__": start_time = time.time() parser = argparse.ArgumentParser() parser.add_argument( "--params", type=str, required=True, help="load training parameters for BERT model", ) args = parser.parse_args() params = Params(args.params) print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices("XLA_CPU"))) tf.random.set_random_seed(params.SEED) USE_GPU = True if USE_GPU: device = '/device:XLA_CPU:0' else: device = "/CPU:0" # Set Logger if not os.path.exists(params.LOG_DIR): os.makedirs(params.LOG_DIR) set_logger(os.path.join(".", params.LOG_DIR + params.NAME + ".log")) # Initialize session