def test_plot_curves_param(tmpdir): """ Check that plotting runs without errors. """ output_dir = tmpdir.mkdir('output') logger = Logger() logger.log(train_score=1, val_score=2) logger.log(train_score=2) logger.log(train_score=3) logger.log(train_score=1, val_score=3) logger.log(train_score=2) logger.log(train_score=3) filename=os.path.join(output_dir,'plot.png') plot( logger=logger, curves=[ { 'key': 'train_score', },{ 'key': 'val_score', } ], filename=filename ) assert os.path.isfile(filename)
class DummyExp(Experiment): def setup(self, config, output_directory=None): print(config) self.logger = Logger() self.rng = np.random.default_rng(seed=config.get('seed')) self.output_directory = output_directory self._interrupt_at = config.get('interrupt_at') self._run_step_callback = config.get('run_step_callback', lambda _: None) def run_step(self, iteration): print('`run_step(%d)`' % iteration) self._run_step_callback(iteration) self.logger.log(val=self.rng.random()) print(self.logger.data) if self._interrupt_at is not None and iteration == self._interrupt_at: raise KeyboardInterrupt() def state_dict(self): return { 'rng': self.rng.bit_generator.state, 'logger': self.logger.state_dict() } def load_state_dict(self, state): self.rng.bit_generator.state = state['rng'] self.logger.load_state_dict(state['logger'])
def test_log_list_data(): logger = Logger(key_name='time') assert len(logger) == 0 logger.append(time=0, score=1, result=2) assert logger[-1] == {'time': 0, 'score': [1], 'result': [2]} assert len(logger) == 1 logger.append(time=0, score=1.2, result=5) assert logger[-1] == {'time': 0, 'score': [1, 1.2], 'result': [2, 5]} assert len(logger) == 1 logger.append(time=0, score=1.3, result=3) assert logger[-1] == { 'time': 0, 'score': [1, 1.2, 1.3], 'result': [2, 5, 3] } assert len(logger) == 1 logger.append(time=1, score=1.3, result=3) assert logger[-1] == {'time': 1, 'score': [1.3], 'result': [3]} assert logger[-2] == { 'time': 0, 'score': [1, 1.2, 1.3], 'result': [2, 5, 3] } assert len(logger) == 2
def setup(self, config, output_directory=None): print(config) self.logger = Logger() self.rng = np.random.default_rng(seed=config.get('seed')) self.output_directory = output_directory self._interrupt_at = config.get('interrupt_at') self._run_step_callback = config.get('run_step_callback', lambda _: None)
def main(_): # create global configuration object model_config = Configuration(FLAGS.config) model = create_model(FLAGS, model_config) placeholders = { 'l': tf.placeholder(tf.float32, (1, None, None, 3)), 'r': tf.placeholder(tf.float32, (1, None, None, 3)), 'd': tf.placeholder(tf.float32, (1, None, None, 1)), } x = { 'l': tf.placeholder(tf.float32, (1, None, None, 3)), 'r': tf.placeholder(tf.float32, (1, None, None, 3)), 'd': tf.placeholder(tf.float32, (1, None, None, 1)), } p = namedtuple('Placeholders', placeholders.keys())(**placeholders) px = namedtuple('Placeholders', x.keys())(**x) model.build(px, True, None, build_loss=False) model.build(p, False, True, build_loss=False) session = tf.Session() saver = tf.train.Saver() # init variables session.run(tf.local_variables_initializer()) session.run(tf.global_variables_initializer()) # restore model if provided a checkpoint if model_config.checkpoint is not None: print("Restoring model from {}".format(model_config.checkpoint)) saver.restore(session, model_config.checkpoint) # init dataset paths = get_paths_for_dataset(FLAGS.dataset) ratios = { 'train_ratio': FLAGS.train_ratio, 'train_valid_ratio': FLAGS.train_valid_ratio, 'valid_ratio': FLAGS.valid_ratio, 'test_ratio': FLAGS.test_ratio, } paths = split_dataset_paths(paths, **ratios) dataset = Dataset(get_example_class(FLAGS.dataset), paths, FLAGS.dataset) results = {} fd = lambda x: {p.l: x.left, p.r: x.right} phases = ['valid', 'train', 'train_valid'] reconstructions = os.path.join(model_config.directory, 'results') directories = [os.path.join(reconstructions, phase) for phase in phases] for dirname in directories: os.makedirs(dirname, exist_ok=True) f = open(os.path.join(model_config.directory, 'results.txt'), 'w') sys.stdout = Logger(sys.stdout, f) subset_iterator = zip(phases, [dataset.valid, dataset.train, dataset.train_valid], directories) for phase, subset, store_dir in subset_iterator: for example in subset: gt = example.disparity.squeeze() start = time() d = session.run(model.outputs[p], fd(example)).squeeze() print("Time: {}".format(1000 * (time() - start)), file=sys.stderr) hits, total = disp_precision(gt, d, model_config.get('max_disp', FLAGS.max_disp), 3) all_hits, all_total = results.get(phase, (0, 0)) results[phase] = (hits + all_hits, total + all_total) store_disparity(d, os.path.join(store_dir, '{}.png'.format(example.name))) print('{} {} {}%'.format(phase, example.name, 100 * hits / total)) for phase in results: print('Total {} {}'.format(phase, 100 * results[phase][0] / results[phase][1]))
def test_log_data_plot(): logger = Logger() logger.log(train_score=1, val_score=2) logger.log(train_score=2) logger.log(train_score=3) logger.log(train_score=1, val_score=3) logger.log(train_score=2) logger.log(train_score=3) x,y = get_xy_data(logger,'train_score') assert y == [1,2,3,1,2,3] assert x == [0,1,2,3,4,5] x,y = get_xy_data(logger,'val_score') assert y == [2,3] assert x == [0,3]
def test_log_data_with_key(): logger = Logger(key_name='iteration') logger.log(iteration=0,train_score=1, val_score=2) logger.log(iteration=1,train_score=2) logger.log(iteration=2,train_score=3) logger.log(iteration=3,train_score=1, val_score=3) logger.log(iteration=4,train_score=2) logger.log(iteration=5,train_score=3) x,y = get_xy_data(logger,'train_score') assert y == [1,2,3,1,2,3] assert x == [0,1,2,3,4,5] x,y = get_xy_data(logger,'val_score') assert y == [2,3] assert x == [0,3]
def test_repeat_key_with_overwrite(): logger = Logger(key_name='time', overwrite=True) logger.log(time=0, score=1, result=2) assert logger[-1] == {'time': 0, 'score': 1, 'result': 2} assert len(logger) == 1 logger.log(time=0, score=1, result=3) assert logger[-1] == {'time': 0, 'score': 1, 'result': 3} assert len(logger) == 1
def test_log_data(): logger = Logger() assert len(logger) == 0 logger.log(score=1) assert logger[-1] == {'score': 1} assert len(logger) == 1 logger.log(score=1.2) assert logger[-1] == {'score': 1.2} assert logger[-2] == {'score': 1} assert len(logger) == 2 logger.log(score=1.3) assert logger[-1] == {'score': 1.3} assert logger[-2] == {'score': 1.2} assert logger[-3] == {'score': 1} assert len(logger) == 3
def test_log_data_two_values(): logger = Logger() assert len(logger) == 0 logger.log(score=1, result=2) assert logger[-1] == {'score': 1, 'result': 2} assert len(logger) == 1 logger.log(score=1.2, result=5) assert logger[-1] == {'score': 1.2, 'result': 5} assert logger[-2] == {'score': 1, 'result': 2} assert len(logger) == 2 logger.log(score=1.3, result=3) assert logger[-1] == {'score': 1.3, 'result': 3} assert logger[-2] == {'score': 1.2, 'result': 5} assert logger[-3] == {'score': 1, 'result': 2} assert len(logger) == 3
config = conf.Configuration(config_file) unary = models.unary.SeLuResnetUnary(**config.config) volume = models.volume.CostVolumeDot(**config.config) regression = models.regression.SeLuResnetRegression(**config.config) classification = models.classification.SoftArgminOclussion(**config.config) model = models.stereo.SeLuConsistencyStereoRegression(unary, volume, regression, classification) if config.checkpoint is not None: model.load_state_dict(torch.load(os.path.join(model_dir, config.checkpoint))['state_dict']) dataset_splits = dfac.get_dataset_eval(config) f = open(os.path.join(directory, 'valid.txt'), 'w') sys.stdout = Logger(sys.stdout, f) model.cuda() model.eval() dataloaders = {} for split in dataset_splits.keys(): if len(dataset_splits[split]) > 0: dataloaders[split] = DataLoader(dataset_splits[split], batch_size=1, shuffle=False, pin_memory=True) saver_pool = Pool(processes=1) for split in dataloaders: rec_dir = os.path.join(directory, split) os.makedirs(rec_dir, exist_ok=True) hit_total, total = 0, 0
def test_repeat_key_error(): logger = Logger(key_name='time') logger.log(time=0, score=1, result=2) with pytest.raises(Exception): logger.log(time=0, score=1, result=3)
def main(_): # create global configuration object model_config = Configuration(FLAGS.config) # calculate number of steps in an epoch for each subset train_epoch_steps = int( round(FLAGS.examples * FLAGS.train_ratio / FLAGS.batch_size)) train_valid_epoch_steps = int( round(FLAGS.examples * FLAGS.train_valid_ratio / FLAGS.batch_size)) valid_epoch_steps = int( round(FLAGS.examples * FLAGS.valid_ratio / FLAGS.batch_size)) test_epoch_steps = int( round(FLAGS.examples * FLAGS.test_ratio / FLAGS.batch_size)) split_sizes = SplitSizes(train_epoch_steps, train_valid_epoch_steps, valid_epoch_steps, test_epoch_steps) # create placeholders for queue runners configs = get_decoder_configurations(FLAGS, model_config, split_sizes) decoder_class = get_decoder_class(FLAGS.dataset) with tf.variable_scope('placeholders'): placeholders = {} for config in configs: with tf.variable_scope('input_{}'.format(config.name)): placeholders[config.name] = read_and_decode( tf.train.string_input_producer(config.queues, shuffle=config.is_training, capacity=FLAGS.capacity), decoder_class(config)) # create model and create graphs for each input model = create_model(FLAGS, model_config) model.build(placeholders['train'], True, None) print(placeholders.keys(), split_sizes) for split, steps in zip( ['train_valid', 'valid', 'test'], [split_sizes.train_valid, split_sizes.valid, split_sizes.test]): if steps > 0: model.build(placeholders[split], False, True) saver = tf.train.Saver() session = tf.Session() coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=session, coord=coord) # create train method update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): optimizers = { 'adam': tf.train.AdamOptimizer, 'sgd': tf.train.GradientDescentOptimizer, 'rms_prop': tf.train.RMSPropOptimizer, } optimizer = optimizers[model_config.get('optimizer', 'adam')] train_step = optimizer(FLAGS.lr).minimize( model.losses[placeholders['train']]) # init variables session.run(tf.local_variables_initializer()) session.run(tf.global_variables_initializer()) # restore model if provided a checkpoint if model_config.checkpoint is not None: saver.restore(session, model_config.checkpoint) # redirect stdout to file keeping stdout unchanged f = open(os.path.join(model_config.directory, 'log.txt'), 'w') sys.stdout = Logger(sys.stdout, f) # prepare directory for checkpoint storing checkpoints = os.path.join(model_config.directory, 'checkpoints') os.makedirs(checkpoints, exist_ok=True) try: for epoch in range(FLAGS.epochs): # calculate train losses and perform train steps for _ in range(split_sizes.train): _, train_loss = session.run( [train_step, model.losses[placeholders['train']]]) print("train: epoch {} loss {}".format(epoch, train_loss)) # calculate valid losses for _ in range(split_sizes.valid): valid_loss = session.run(model.losses[placeholders['valid']]) print("valid: epoch {} loss {}".format(epoch, valid_loss)) # calculate losses used for early stopping and save checkpoint if best parameters found if split_sizes.train_valid > 0: train_valid_losses = [] for _ in range(split_sizes.train_valid): train_valid_losses.append( session.run(model.losses[placeholders['train_valid']])) print("train_valid: epoch {} loss {}".format( epoch, train_valid_losses[-1])) try: current = sum(train_valid_losses) / len(train_valid_losses) if epoch == 0: best = current if current <= best: saver.save( session, os.path.join(checkpoints, '{}.cpkt'.format(epoch))) except ZeroDivisionError: pass except Exception as e: print(e) finally: # in case of an exception, store model checkpoint and stop queue runners checkpoint_file = os.path.join(checkpoints, 'final.cpkt') saver.save(session, checkpoint_file) print("Model saved to {}".format(checkpoint_file), file=sys.stderr) coord.request_stop() coord.join(threads)