def test_train_ae(): ds = MNIST(which_set='train',one_hot=True,all_labelled=ALL_LABELLED,supervised=SUPERVISED) gsn = GSN.new( layer_sizes=[ds.X.shape[1], HIDDEN_SIZE,ds.X.shape[1]], activation_funcs=["sigmoid", "tanh", rescaled_softmax], pre_corruptors=[GaussianCorruptor(GAUSSIAN_NOISE)] * 3, post_corruptors=[SaltPepperCorruptor(SALT_PEPPER_NOISE), None,SmoothOneHotCorruptor(GAUSSIAN_NOISE)], layer_samplers=[BinomialSampler(), None, MultinomialSampler()], tied=False ) _mbce = MeanBinaryCrossEntropy() reconstruction_cost = lambda a, b: _mbce.cost(a, b) / ds.X.shape[1] c = GSNCost([(0, 1.0, reconstruction_cost)], walkback=WALKBACK) alg = SGD( LEARNING_RATE, init_momentum=MOMENTUM, cost=c, termination_criterion=EpochCounter(MAX_EPOCHS), batches_per_iter=BATCHES_PER_EPOCH, batch_size=BATCH_SIZE, monitoring_dataset=ds, monitoring_batches=MONITORING_BATCHES ) trainer = Train(ds, gsn, algorithm=alg, save_path="./results/gsn_ae_trained.pkl", save_freq=5, extensions=[MonitorBasedLRAdjuster()]) trainer.main_loop() print "done training"
def test_train_ae(): GC = GaussianCorruptor gsn = GSN.new(layer_sizes=[ds.X.shape[1], 1000], activation_funcs=["sigmoid", "tanh"], pre_corruptors=[None, GC(1.0)], post_corruptors=[SaltPepperCorruptor(0.5), GC(1.0)], layer_samplers=[BinomialSampler(), None], tied=False) # average MBCE over example rather than sum it _mbce = MeanBinaryCrossEntropy() reconstruction_cost = lambda a, b: _mbce.cost(a, b) / ds.X.shape[1] c = GSNCost([(0, 1.0, reconstruction_cost)], walkback=WALKBACK) alg = SGD(LEARNING_RATE, init_momentum=MOMENTUM, cost=c, termination_criterion=EpochCounter(MAX_EPOCHS), batches_per_iter=BATCHES_PER_EPOCH, batch_size=BATCH_SIZE, monitoring_dataset=ds, monitoring_batches=10) trainer = Train(ds, gsn, algorithm=alg, save_path="gsn_ae_example.pkl", save_freq=5) trainer.main_loop() print "done training"
def train(): LEARNING_RATE = 1e-4 MOMENTUM = 0.25 MAX_EPOCHS = 500 BATCHES_PER_EPOCH = 100 BATCH_SIZE = 1000 dataset = FunnelDistribution() cost = FunnelGSNCost([(0, 1.0, MSR())], walkback=1) gc = GaussianCorruptor(0.75) dc = DropoutCorruptor(.5) gsn = GSN.new([10, 200, 10], [None, "tanh", "tanh"], # activation [None] * 3, # pre corruption [None] * 3, # post corruption [None] * 3, # layer samplers tied=False) gsn._bias_switch = False alg = SGD(LEARNING_RATE, init_momentum=MOMENTUM, cost=cost, termination_criterion=EpochCounter(MAX_EPOCHS), batches_per_iter=BATCHES_PER_EPOCH, batch_size=BATCH_SIZE, monitoring_batches=100, monitoring_dataset=dataset) trainer = Train(dataset, gsn, algorithm=alg, save_path="funnel_gsn.pkl", extensions=[MonitorBasedLRAdjuster()], save_freq=50) trainer.main_loop() print "done training"
def test_train_ae(): GC = GaussianCorruptor gsn = GSN.new( layer_sizes=[ds.X.shape[1], 1000], activation_funcs=["sigmoid", "tanh"], pre_corruptors=[None, GC(1.0)], post_corruptors=[SaltPepperCorruptor(0.5), GC(1.0)], layer_samplers=[BinomialSampler(), None], tied=False ) # average MBCE over example rather than sum it _mbce = MeanBinaryCrossEntropy() reconstruction_cost = lambda a, b: _mbce.cost(a, b) / ds.X.shape[1] c = GSNCost([(0, 1.0, reconstruction_cost)], walkback=WALKBACK) alg = SGD( LEARNING_RATE, init_momentum=MOMENTUM, cost=c, termination_criterion=EpochCounter(MAX_EPOCHS), batches_per_iter=BATCHES_PER_EPOCH, batch_size=BATCH_SIZE, monitoring_dataset=ds, monitoring_batches=10 ) trainer = Train(ds, gsn, algorithm=alg, save_path="gsn_ae_example.pkl", save_freq=5) trainer.main_loop() print "done training"
def test_train_supervised(): """ Train a supervised GSN. """ # initialize the GSN gsn = GSN.new( layer_sizes=[ds.X.shape[1], 1000, ds.y.shape[1]], activation_funcs=["sigmoid", "tanh", rescaled_softmax], pre_corruptors=[GaussianCorruptor(0.5)] * 3, post_corruptors=[ SaltPepperCorruptor(.3), None, SmoothOneHotCorruptor(.5) ], layer_samplers=[BinomialSampler(), None, MultinomialSampler()], tied=False) # average over costs rather than summing _rcost = MeanBinaryCrossEntropy() reconstruction_cost = lambda a, b: _rcost.cost(a, b) / ds.X.shape[1] _ccost = MeanBinaryCrossEntropy() classification_cost = lambda a, b: _ccost.cost(a, b) / ds.y.shape[1] # combine costs into GSNCost object c = GSNCost( [ # reconstruction on layer 0 with weight 1.0 (0, 1.0, reconstruction_cost), # classification on layer 2 with weight 2.0 (2, 2.0, classification_cost) ], walkback=WALKBACK, mode="supervised") alg = SGD( LEARNING_RATE, init_momentum=MOMENTUM, cost=c, termination_criterion=EpochCounter(MAX_EPOCHS), batches_per_iter=BATCHES_PER_EPOCH, batch_size=BATCH_SIZE, monitoring_dataset=ds, monitoring_batches=10, ) trainer = Train(ds, gsn, algorithm=alg, save_path="gsn_sup_example.pkl", save_freq=10, extensions=[MonitorBasedLRAdjuster()]) trainer.main_loop() print("done training")
def test_train_supervised(): """ Train a supervised GSN. """ # initialize the GSN gsn = GSN.new( layer_sizes=[ds.X.shape[1], 1000, ds.y.shape[1]], activation_funcs=["sigmoid", "tanh", rescaled_softmax], pre_corruptors=[GaussianCorruptor(0.5)] * 3, post_corruptors=[SaltPepperCorruptor(.3), None, SmoothOneHotCorruptor(.5)], layer_samplers=[BinomialSampler(), None, MultinomialSampler()], tied=False ) # average over costs rather than summing _rcost = MeanBinaryCrossEntropy() reconstruction_cost = lambda a, b: _rcost.cost(a, b) / ds.X.shape[1] _ccost = MeanBinaryCrossEntropy() classification_cost = lambda a, b: _ccost.cost(a, b) / ds.y.shape[1] # combine costs into GSNCost object c = GSNCost( [ # reconstruction on layer 0 with weight 1.0 (0, 1.0, reconstruction_cost), # classification on layer 2 with weight 2.0 (2, 2.0, classification_cost) ], walkback=WALKBACK, mode="supervised" ) alg = SGD( LEARNING_RATE, init_momentum=MOMENTUM, cost=c, termination_criterion=EpochCounter(MAX_EPOCHS), batches_per_iter=BATCHES_PER_EPOCH, batch_size=BATCH_SIZE, monitoring_dataset=ds, monitoring_batches=10, ) trainer = Train(ds, gsn, algorithm=alg, save_path="gsn_sup_example.pkl", save_freq=10, extensions=[MonitorBasedLRAdjuster()]) trainer.main_loop() print "done training"