def channel_scaling_checker(num_examples, mode, num_batches, batch_size): num_features = 2 monitor = Monitor(DummyModel(num_features)) dataset = DummyDataset(num_examples, num_features) monitor.add_dataset(dataset=dataset, mode=mode, num_batches=num_batches, batch_size=batch_size) vis_batch = T.matrix() mean = vis_batch.mean() data_specs = (monitor.model.get_input_space(), monitor.model.get_input_source()) monitor.add_channel(name='mean', ipt=vis_batch, val=mean, dataset=dataset, data_specs=data_specs) monitor() assert 'mean' in monitor.channels mean = monitor.channels['mean'] assert len(mean.val_record) == 1 actual = mean.val_record[0] X = dataset.get_design_matrix() if batch_size is not None and num_batches is not None: total = min(num_examples, num_batches * batch_size) else: total = num_examples expected = X[:total].mean() if not np.allclose(expected, actual): raise AssertionError("Expected monitor to contain %f but it has " "%f" % (expected, actual))
def channel_scaling_checker(num_examples, mode, num_batches, batch_size): num_features = 2 monitor = Monitor(DummyModel(num_features)) dataset = DummyDataset(num_examples, num_features) try: monitor.add_dataset(dataset=dataset, mode=mode, num_batches=num_batches, batch_size=batch_size) except NotImplementedError: # make sure this was due to the unimplemented batch_size case if num_batches is None: assert num_examples % batch_size != 0 else: assert num_examples % num_batches != 0 raise SkipTest() vis_batch = T.matrix() mean = vis_batch.mean() monitor.add_channel(name='mean', ipt=vis_batch, val=mean, dataset=dataset) monitor() assert 'mean' in monitor.channels mean = monitor.channels['mean'] assert len(mean.val_record) == 1 actual = mean.val_record[0] X = dataset.get_design_matrix() if batch_size is not None and num_batches is not None: total = min(num_examples, num_batches * batch_size) else: total = num_examples expected = X[:total].mean() if not np.allclose(expected, actual): raise AssertionError("Expected monitor to contain %f but it has " "%f" % (expected, actual))
def test_tagging(): """Test the tagging functionality of this extension.""" try: # TODO: serial.save should be able to take an open file-like object so # we can direct its output to a StringIO or something and not need to # screw around like this in tests that don't actually need to touch # the filesystem. /dev/null would work but the test would fail on # Windows. fd, fn = tempfile.mkstemp(suffix='.pkl') os.close(fd) # Test that the default key gets created. def_model = MockModel() def_model.monitor = Monitor(def_model) def_ext = MonitorBasedSaveBest(channel_name='foobar', save_path=fn) def_ext.setup(def_model, None, None) assert 'MonitorBasedSaveBest' in def_model.tag # Test with a custom key. model = MockModel() model.monitor = Monitor(model) model.monitor.channels['foobar'] = MockChannel() ext = MonitorBasedSaveBest(channel_name='foobar', tag_key='test123', save_path=fn) # Best cost is initially infinity. ext.setup(model, None, None) assert model.tag['test123']['best_cost'] == float("inf") # Best cost after one iteration. model.monitor.channels['foobar'].val_record.append(5.0) model.monitor.report_epoch() ext.on_monitor(model, None, None) assert model.tag['test123']['best_cost'] == 5.0 # Best cost after a second, worse iteration. model.monitor.channels['foobar'].val_record.append(7.0) model.monitor.report_epoch() ext.on_monitor(model, None, None) assert model.tag['test123']['best_cost'] == 5.0 # Best cost after a third iteration better than 2 but worse than 1. model.monitor.channels['foobar'].val_record.append(6.0) model.monitor.report_epoch() ext.on_monitor(model, None, None) assert model.tag['test123']['best_cost'] == 5.0 # Best cost after a fourth, better iteration. model.monitor.channels['foobar'].val_record.append(3.0) model.monitor.report_epoch() ext.on_monitor(model, None, None) assert model.tag['test123']['best_cost'] == 3.0 # setting the starting epoch of saving models. model = MockModel() model.monitor = Monitor(model) model.monitor.channels['foobar'] = MockChannel() ext = MonitorBasedSaveBest(channel_name='foobar', tag_key='test12', start_epoch=4, save_path=fn) ext.setup(model, None, None) assert model.tag['test12']['best_cost'] == float("inf") # Best cost after one iteration. model.monitor.channels['foobar'].val_record.append(5.0) model.monitor.report_epoch() ext.on_monitor(model, None, None) assert model.tag['test12']['best_cost'] == float("inf") # Best cost after a second, better iteration. model.monitor.channels['foobar'].val_record.append(3.0) model.monitor.report_epoch() ext.on_monitor(model, None, None) assert model.tag['test12']['best_cost'] == float("inf") # Best cost after a third, worse iteration. model.monitor.channels['foobar'].val_record.append(7.0) model.monitor.report_epoch() ext.on_monitor(model, None, None) assert model.tag['test12']['best_cost'] == float("inf") # Best cost after a fourth, worse iteration. model.monitor.channels['foobar'].val_record.append(7.0) model.monitor.report_epoch() ext.on_monitor(model, None, None) assert model.tag['test12']['best_cost'] == 7.0 # Best cost after a fifth, worse iteration. model.monitor.channels['foobar'].val_record.append(10.0) model.monitor.report_epoch() ext.on_monitor(model, None, None) assert model.tag['test12']['best_cost'] == 7.0 # Best cost after a fifth, better iteration. model.monitor.channels['foobar'].val_record.append(1.0) model.monitor.report_epoch() ext.on_monitor(model, None, None) assert model.tag['test12']['best_cost'] == 1.0 finally: os.remove(fn)
return output if __name__ == '__main__': trainset, testset = GalaxyZoo.gzdeepdata.get_data() # build and train classifiers for submodels if SUBMODEL == 1: iters = 100 if os.path.exists(DATA_DIR + 'model1saved_conv.pkl'): model = serial.load(DATA_DIR + 'model1saved_conv.pkl') iters = 1 # reset monitor, can't be re-used model.monitor = Monitor(model) else: model = get_conv2([64, 64, 3]) get_trainer1(model, trainset, iters).main_loop() elif SUBMODEL == 2: iters = 50 if os.path.exists(DATA_DIR + 'model2saved_conv.pkl'): model = serial.load(DATA_DIR + 'model2saved_conv.pkl') iters = 1 # reset monitor, can't be re-used model.monitor = Monitor(model) else: model = get_conv2([64, 64, 3]) get_trainer2(model, trainset, iters).main_loop() outtrainset = get_output(model, trainset)
# pretrainer = get_ae_pretrainer(layer, utraindata, bsize, epochs=30) # pretrainer.main_loop() # serial.save(DATA_DIR+'daex_pretrained.pkl', stack) stack = serial.load(DATA_DIR + 'daex_pretrained.pkl') # construct DBN dbn = construct_dbn_from_stack(stack) # finetune softmax layer a bit finetuner = get_finetuner(dbn, trainset, bsize, epochs=15) finetuner.main_loop() # now finetune layer-by-layer, boost earlier layers lrs = [8., 6., 4., 2., 1.] for ii, lr in zip(range(len(structure) - 1), lrs): dbn.monitor = Monitor(dbn) # set lr to boosted value for current layer dbn.layers[ii].W_lr_scale = lr finetuner = get_finetuner(dbn, trainset, bsize, epochs=30) finetuner.main_loop() # return to original lr dbn.layers[ii].W_lr_scale = 1. # total finetuner # dbn = serial.load(DATA_DIR + 'model' + str(SUBMODEL) + 'saved_daex.pkl') dbn.monitor = Monitor(dbn) finetuner = get_finetuner(dbn, trainset, bsize, epochs=150) finetuner.main_loop()