def __init__(self, params, output_dir):
        self.strategy = tf.distribute.MirroredStrategy()
        self.params = params
        
        # Datasets
        tf_records = [os.path.join(params.data_dir,file) for file in os.listdir(params.data_dir) if file.endswith('.tfrecords')]

        self.train_dataset = self.strategy.experimental_distribute_dataset(input_fn(tf_records[:30]))
        self.val_dataset = self.strategy.experimental_distribute_dataset(input_fn(tf_records[30:]))
        num_samples = len(tf_records[:30])
        self.total_iteration = (num_samples // params.batch_size) * params.epochs
        
        with self.strategy.scope():
            # Models
            self.models = {}
            self.models['disparity'] = DisparityNet(input_shape=(params.input_h, params.input_w, 3))

            self.models['pose'] = PoseNet(input_shape=(params.input_h, params.input_w, 3 * params.num_input_frames),
                                          num_input_frames=params.num_input_frames)
        
        
            # Optimizer
            learning_rate_fn = tf.keras.optimizers.schedules.PolynomialDecay(0.0002, end_learning_rate=0.000001,
                                                                             decay_steps=self.total_iteration,
                                                                             power=0.5)
            self.optimizer = tf.keras.optimizers.Adam(learning_rate_fn)

            # Tensorboard & Meters
            train_log_dir = os.path.join(output_dir, 'train_logs')
            val_log_dir = os.path.join(output_dir, 'val_logs')
            self.train_summary_writer = tf.summary.create_file_writer(train_log_dir)
            self.test_summary_writer = tf.summary.create_file_writer(val_log_dir)

            self.train_meter = {
                'ssim': tf.keras.metrics.Mean(name='ssim'),
                'l1': tf.keras.metrics.Mean(name='l1'),
                'smooth': tf.keras.metrics.Mean(name='smooth'),
            }

            self.val_meter = {
                'ssim': tf.keras.metrics.Mean(name='ssim'),
                'l1': tf.keras.metrics.Mean(name='l1'),
                'smooth': tf.keras.metrics.Mean(name='smooth'),
            }
            

        self.step = 0
        # Load states from optimiser and model if available
        self.ckpt_disp, self.manager_disp = self.setup_logger(self.models['disparity'],
                                                              os.path.join(output_dir, 'disparity_model'))
        self.ckpt_pose, self.manager_pose = self.setup_logger(self.models['pose'],
                                                              os.path.join(output_dir, 'pose_model'))
        self.start_epoch = int(self.ckpt_disp.step) + 1 if self.manager_disp.latest_checkpoint else int(
            self.ckpt_disp.step)
        
        # Helpers
        self.pix_coords = pixel_coord(params.batch_size, params.input_h, params.input_w, True)  # [b, 3, npoints]

        print("Starting training step {}".format(self.ckpt_disp.step.numpy()))
Exemple #2
0
def train(num_layers,
          embedding_size,
          num_heads,
          dff,
          max_seq_len,
          vocab_size,
          optimizer="adam",
          batch_size=16,
          learning_rate=1e-3,
          distributed=False):
    tf_records = glob.glob((_ROOT + "/data/tf_records/*.tfrecord"))
    if distributed:
        dist_dataset = input_fn(tf_records, batch_size=batch_size)
        mirrored_strategy = tf.distribute.MirroredStrategy(
            devices=["/gpu:0", "/gpu:1"])
        dist_dataset = mirrored_strategy.experimental_distribute_dataset(
            dist_dataset)
        with mirrored_strategy.scope():

            model = Gpt2(num_layers,
                         embedding_size,
                         num_heads,
                         dff,
                         max_seq_len,
                         vocab_size,
                         optimizer=optimizer,
                         learning_rate=learning_rate)
            model.creat_optimizer()
            model.create_checkpoint_manager(MODEL_DIR)
            model.create_summary_writer(LOG_DIR)

        model.mirrored_strategy = mirrored_strategy
        model.fit(dist_dataset)
    else:
        dataset = input_fn(tf_records, batch_size=batch_size)
        model = Gpt2(num_layers,
                     embedding_size,
                     num_heads,
                     dff,
                     max_seq_len,
                     vocab_size,
                     optimizer=optimizer,
                     learning_rate=learning_rate)
        model.creat_optimizer()
        model.create_checkpoint_manager(MODEL_DIR)
        model.create_summary_writer(LOG_DIR)
        model.fit(dataset)
        print("Training Done................")
def train(model_dir,
          data_dir,
          batch_size=16,
          learning_rate=0.001,
          distributed=False,
          mxp=False,
          epochs=5):
    data_dir = os.path.abspath(data_dir)
    model_dir = os.path.abspath(model_dir)
    tf_records = glob.glob(data_dir + "/tf_records/*.tfrecord")
    dataset = input_fn(tf_records, batch_size=batch_size, epoch=epochs)
    if distributed:
        mirrored_strategy = tf.distribute.MirroredStrategy(
            devices=["/gpu:0", "/gpu:1"])
        dataset = mirrored_strategy.experimental_distribute_dataset(dataset)
        with mirrored_strategy.scope():
            model = Gpt2.create_from_params(model_dir)
            model.creat_optimizer(learning_rate=learning_rate,
                                  mixed_precission=mxp)
            model.create_checkpoint_manager(model_dir)
            model.create_summary_writer(LOG_DIR)

        model.mirrored_strategy = mirrored_strategy
    else:
        model = Gpt2.create_from_params(model_dir)
        model.create_optimizer(learning_rate=learning_rate,
                               mixed_precission=mxp)
        model.create_checkpoint_manager(model_dir)
        model.create_summary_writer(LOG_DIR)
    print("Trainign Model...............")
    model.print_params()
    model.fit(dataset)
    print("Training Done................")
    def __init__(self, params, output_dir):
        self.dataset_dir = params.dataset_dir
        self.demo_set = params.demo_set
        self.output_dir = output_dir
        self.params = params
        self.models = {
            'disparity':
            DisparityNet(input_shape=(params.input_h, params.input_w, 3))
        }
        self.load_checkpoint(self.models['disparity'],
                             os.path.join(output_dir, 'disparity_model'))

        # Datasets
        tf_records = [
            os.path.join(params.dataset_dir, file)
            for file in os.listdir(params.dataset_dir)
            if file.endswith('.tfrecords')
        ]
        self.val_dataset = input_fn(tf_records[30:], load_option='val')
        self.images = []
        self.gt_depths = []
        for i, data in enumerate(self.val_dataset):
            image = data['images'][0][0]
            self.images.append(image)
            gt_depth = data['depths'].numpy()[0][0]
            self.gt_depths.append(gt_depth)

        print(f'Total Images: {len(self.images)}')
def train(num_layers,
          embedding_size,
          num_heads,
          dff,
          max_seq_len,
          vocab_size,
          optimizer="adam",
          batch_size=16,
          learning_rate=1e-3,
          distributed=False):

    par_map = {
        "num_layers": num_layers,
        "d_model": embedding_size,
        "num_heads": num_heads,
        "dff": dff,
        "max_seq_len": max_seq_len,
        "vocab_size": vocab_size
    }

    exp_name = "_".join(['{}_{}'.format(k, v) for k, v in par_map.items()])

    if not os.path.exists(MODEL_DIR):
        os.makedirs(MODEL_DIR)

    with open(MODEL_DIR + '/model_par.json', 'w') as f:
        json.dump(par_map, f)

    tf_records = glob.glob((_ROOT + "/data/tf_records/*.tfrecord"))
    if distributed:
        dist_dataset = input_fn(tf_records, batch_size=batch_size)
        mirrored_strategy = tf.distribute.MirroredStrategy(
            devices=["/gpu:0", "/gpu:1"])
        dist_dataset = mirrored_strategy.experimental_distribute_dataset(
            dist_dataset)
        with mirrored_strategy.scope():

            model = Gpt2(num_layers,
                         embedding_size,
                         num_heads,
                         dff,
                         max_seq_len,
                         vocab_size,
                         optimizer=optimizer,
                         learning_rate=learning_rate)
            model.creat_optimizer()
            model.create_checkpoint_manager(MODEL_DIR)
            model.create_summary_writer(LOG_DIR)

        model.mirrored_strategy = mirrored_strategy
        model.fit(dist_dataset)
    else:
        dataset = input_fn(tf_records, batch_size=batch_size)
        model = Gpt2(num_layers,
                     embedding_size,
                     num_heads,
                     dff,
                     max_seq_len,
                     vocab_size,
                     optimizer=optimizer,
                     learning_rate=learning_rate)
        model.creat_optimizer()
        model.create_checkpoint_manager(MODEL_DIR)
        model.create_summary_writer(LOG_DIR)
        model.fit(dataset)
        print("Training Done................")
def train(num_layers, embedding_size, num_heads, dff, max_seq_len, vocab_size,
          optimizer, batch_size, learning_rate, graph_mode, distributed):
    par_map = {
        "num_layers": num_layers,
        "d_model": embedding_size,
        "num_heads": num_heads,
        "dff": dff,
        "max_seq_len": max_seq_len,
        "vocab_size": vocab_size
    }

    # exp_name = "_".join(['{}_{}'.format(k, v) for k, v in par_map.items()])

    if not os.path.exists(MODEL_DIR):
        os.makedirs(MODEL_DIR)

    with open(MODEL_DIR + '/model_par.json', 'w') as f:
        json.dump(par_map, f)

    tf_records = glob.glob((DATA_DIR + "/tf_records/*.tfrecord"))
    train_percent = int(len(tf_records) * (85 / 100))

    print("No. of tf records:- ", len(tf_records))
    train_tf_records = tf_records[:train_percent]
    test_tf_records = tf_records[train_percent:]

    train_dataset = input_fn(train_tf_records, batch_size=batch_size)
    test_dataset = input_fn(test_tf_records, batch_size=batch_size)

    if distributed:
        mirrored_strategy = tf.distribute.MirroredStrategy()
        train_dataset = mirrored_strategy.experimental_distribute_dataset(
            train_dataset)
        test_dataset = mirrored_strategy.experimental_distribute_dataset(
            test_dataset)

        with mirrored_strategy.scope():

            model = Gpt2(num_layers,
                         embedding_size,
                         num_heads,
                         dff,
                         max_seq_len,
                         vocab_size,
                         optimizer=optimizer,
                         learning_rate=learning_rate)
            model.create_optimizer()
            model.create_checkpoint_manager(MODEL_DIR)
            model.create_summary_writer(LOG_DIR)

        model.mirrored_strategy = mirrored_strategy
        model.global_batch_size = tf.cast(batch_size, tf.float32)
    else:
        model = Gpt2(num_layers,
                     embedding_size,
                     num_heads,
                     dff,
                     max_seq_len,
                     vocab_size,
                     optimizer=optimizer,
                     learning_rate=learning_rate)
        model.create_optimizer()
        model.create_checkpoint_manager(MODEL_DIR)
        model.create_summary_writer(LOG_DIR)

    model.fit([train_dataset, test_dataset], graph_mode)
    print("Training Done................")