def get_test_kid(data_in, brain): """ Return a default kid given a source and a brain. """ if issubclass(type(data_in), OldSource): return Kid( FeedSensor(source_in=data_in, name='data'), brain, MomentumKongFu(), # debug=True, max_steps=900) else: # data_in is a sensor now return Kid( data_in, brain, MomentumKongFu(), # debug=True, max_steps=900)
def get_test_sensor(simple=False): if A.backend() == A.TF: return FeedSensor(source_in=TestFactory.get_test_feed_source(), val_batch_size=100, name='data') elif A.backend() == A.TORCH: s = MNISTSource(work_dir=AKID_DATA_PATH + '/mnist', name='mnist') s.setup() if simple: return SimpleSensor( source_in=s, # Do not shuffle training set for reproducible test sampler="sequence", name='mnist') else: return ParallelSensor( source_in=s, # Do not shuffle training set for reproducible test sampler="sequence", name='mnist')
def setup(): # Set up brain # ######################################################################### brain = Brain(name='maxout-zca-cifar10') brain.attach(DropoutLayer(keep_prob=0.8, name='dropout1')) brain.attach( ConvolutionLayer([8, 8], [1, 1, 1, 1], 'SAME', init_para={ "name": "uniform", "range": 0.005 }, max_norm=0.9, out_channel_num=192, name='conv1')) brain.attach(PoolingLayer([1, 4, 4, 1], [1, 2, 2, 1], 'SAME', name='pool1')) brain.attach(MaxoutLayer(name='maxout1')) brain.attach(DropoutLayer(keep_prob=0.5, name='dropout2')) brain.attach( ConvolutionLayer([8, 8], [1, 1, 1, 1], 'SAME', init_para={ "name": "uniform", "range": 0.005 }, max_norm=1.9365, out_channel_num=384, name='conv2')) brain.attach(PoolingLayer([1, 4, 4, 1], [1, 2, 2, 1], 'SAME', name='pool2')) brain.attach(MaxoutLayer(name='maxout2')) brain.attach(DropoutLayer(keep_prob=0.5, name='dropout3')) brain.attach( ConvolutionLayer([5, 5], [1, 1, 1, 1], 'SAME', init_para={ "name": "uniform", "range": 0.005 }, max_norm=1.9365, out_channel_num=384, name='conv3')) brain.attach(PoolingLayer([1, 2, 2, 1], [1, 2, 2, 1], 'SAME', name='pool3')) brain.attach(MaxoutLayer(name='maxout3')) brain.attach(DropoutLayer(keep_prob=0.5, name='dropout3')) brain.attach( InnerProductLayer(init_para={ "name": "uniform", "range": 0.005 }, max_norm=1.9, out_channel_num=2500, name='ip1')) brain.attach(MaxoutLayer(group_size=5, name='maxout4')) brain.attach(DropoutLayer(keep_prob=0.5, name='dropout3')) brain.attach( InnerProductLayer(init_para={ "name": "uniform", "range": 0.005 }, max_norm=1.9365, out_channel_num=10, name='softmax_linear')) brain.attach(SoftmaxWithLossLayer(class_num=10, name='loss')) # Set up a sensor. # ######################################################################### cifar_source = Cifar10FeedSource( name="CIFAR10", url='http://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz', work_dir=AKID_DATA_PATH + '/cifar10', use_zca=True, num_train=50000, num_val=10000) sensor = FeedSensor(source_in=cifar_source, batch_size=128, name='data') # Summon a survivor. # ######################################################################### survivor = kids.Kid(sensor, brain, kongfus.MomentumKongFu(base_lr=0.025, momentum=0.5, decay_rate=0.1, decay_epoch_num=50), max_steps=200000) survivor.setup() return survivor
def setup(graph=None): from akid import GraphBrain from akid.sugar import cnn_block from akid import AKID_DATA_PATH from akid import Cifar10FeedSource, FeedSensor, Kid from akid import MomentumKongFu brain = GraphBrain(name="spcnn") out_channel_num_list = [64, 128, 256, 512] group_size_list = [2, 4, 8, 16] for i in range(0, 4): brain.attach(cnn_block( ksize=[3, 3], init_para={ "name": "uniform", "range": 0.005}, wd={"type": "l2", "scale": 0.005}, out_channel_num=out_channel_num_list[i], pool_size=[2, 2], pool_stride=[2, 2], activation={"type": "linearize", "group_size": group_size_list[i]}, keep_prob=0.5, bn={"gamma_init": 1, "fix_gamma": True})) brain.attach(cnn_block( init_para={ "name": "uniform", "range": 0.005}, wd={"type": "l2", "scale": 0.005}, out_channel_num=10, activation={"type": "softmax"}, bn={"gamma_init": 1, "fix_gamma": True})) # Set up a sensor. # ######################################################################### cifar_source = Cifar10FeedSource( name="CIFAR10", url='http://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz', work_dir=AKID_DATA_PATH + '/cifar10', use_zca=True, num_train=50000, num_val=10000) sensor = FeedSensor(source_in=cifar_source, batch_size=128, name='data') # Summon a survivor. # ######################################################################### survivor = Kid( sensor, brain, MomentumKongFu(base_lr=1.0, momentum=0.5, decay_rate=0.1, decay_epoch_num=25), max_steps=60000, graph=graph, ) survivor.setup() return survivor
def setup(): brain = Brain(name="maxout_mnist") bn = {"gamma_init": 1, "fix_gamma": True} brain.attach(DropoutLayer(keep_prob=0.8, name='dropout0')) brain.attach( cnn_block(ksize=[8, 8], initial_bias_value=0., init_para={ "name": "uniform", "range": 0.005 }, wd={ "type": "l2", "scale": 5e-4 }, out_channel_num=48 * 2, pool_size=[4, 4], pool_stride=[2, 2], activation={ "type": "maxout", "group_size": 2 }, keep_prob=0.5, bn=bn)) brain.attach( cnn_block(ksize=[8, 8], initial_bias_value=0., init_para={ "name": "uniform", "range": 0.005 }, wd={ "type": "l2", "scale": 5e-4 }, out_channel_num=48 * 2, pool_size=[4, 4], pool_stride=[2, 2], activation={ "type": "maxout", "group_size": 2 }, keep_prob=0.5, bn=bn)) brain.attach( cnn_block(ksize=[5, 5], initial_bias_value=0, init_para={ "name": "uniform", "range": 0.005 }, wd={ "type": "l2", "scale": 5e-4 }, out_channel_num=24 * 4, pool_size=[2, 2], pool_stride=[2, 2], activation={ "type": "maxout", "group_size": 4 }, bn=bn, keep_prob=0.5)) brain.attach( cnn_block(ksize=None, initial_bias_value=0, init_para={ "name": "uniform", "range": 0.005 }, wd={ "type": "l2", "scale": 5e-4 }, out_channel_num=10, bn=bn, activation={"type": "softmax"})) source = MNISTFeedSource(name="MNIST", url='http://yann.lecun.com/exdb/mnist/', work_dir=AKID_DATA_PATH + '/mnist', num_train=60000, num_val=10000, center=True, scale=True) kid = Kid(FeedSensor(name='data', source_in=source, batch_size=128, val_batch_size=100), brain, MomentumKongFu(momentum=0.9, base_lr=1, decay_rate=0.95, decay_epoch_num=1), max_steps=20000) kid.setup() return kid
def setup(graph): from akid import AKID_DATA_PATH from akid import GraphBrain, Cifar10FeedSource, FeedSensor, Kid from akid import MomentumKongFu from akid.layers import DropoutLayer from akid.sugar import cnn_block brain = GraphBrain(name="maxout") brain.attach(DropoutLayer(keep_prob=0.8, name='dropout0')) brain.attach( cnn_block(ksize=[8, 8], init_para={ "name": "uniform", "range": 0.005 }, wd={ "type": "l2", "scale": 0.0005 }, out_channel_num=192, pool_size=[4, 4], pool_stride=[2, 2], activation={{net_paras["activation"][0]}}, keep_prob=0.5, bn={{net_paras["bn"]}})) brain.attach( cnn_block(ksize=[8, 8], init_para={ "name": "uniform", "range": 0.005 }, wd={ "type": "l2", "scale": 0.0005 }, out_channel_num=384, pool_size=[4, 4], pool_stride=[2, 2], activation={{net_paras["activation"][1]}}, keep_prob=0.5, bn={{net_paras["bn"]}})) brain.attach( cnn_block(ksize=[5, 5], init_para={ "name": "uniform", "range": 0.005 }, wd={ "type": "l2", "scale": 0.0005 }, out_channel_num=384, pool_size=[2, 2], pool_stride=[2, 2], activation={{net_paras["activation"][2]}}, keep_prob=0.5, bn={{net_paras["bn"]}})) brain.attach( cnn_block(init_para={ "name": "uniform", "range": 0.005 }, wd={ "type": "l2", "scale": 0.004 }, out_channel_num=2500, activation={{net_paras["activation"][3]}}, keep_prob=0.5, bn={{net_paras["bn"]}})) brain.attach( cnn_block(init_para={ "name": "uniform", "range": 0.005 }, wd={ "type": "l2", "scale": 0. }, out_channel_num=10, activation={"type": "softmax"}, bn={{net_paras["bn"]}})) # Set up a sensor. # ######################################################################### cifar_source = Cifar10FeedSource( name="CIFAR10", url='http://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz', work_dir=AKID_DATA_PATH + '/cifar10', use_zca=True, num_train=50000, num_val=10000) sensor = FeedSensor(source_in=cifar_source, batch_size=128, name='data') # Summon a survivor. # ######################################################################### survivor = Kid( sensor, brain, MomentumKongFu(base_lr={{opt_paras["lr"]}}, momentum=0.5, decay_rate=0.1, decay_epoch_num=50), max_steps=200000, graph=graph, ) survivor.setup() return survivor
def setup(): from akid import AKID_DATA_PATH from akid import GraphBrain, MNISTFeedSource, FeedSensor, Kid from akid import MomentumKongFu from akid.layers import DropoutLayer, SoftmaxWithLossLayer from akid.sugar import cnn_block from akid import LearningRateScheme brain = GraphBrain(name="one-layer-mnist") brain.attach(DropoutLayer(keep_prob=0.8, name='dropout0')) brain.attach( cnn_block( ksize=[5, 5], init_para={ "name": "truncated_normal", "stddev": 0.1 }, wd={ "type": "l2", "scale": 0.0005 }, in_channel_num=1, out_channel_num=32, pool_size=[5, 5], pool_stride=[5, 5], activation={{net_paras["activation"][0]}}, keep_prob=0.5, )) brain.attach( cnn_block( init_para={ "name": "truncated_normal", "stddev": 0.1 }, wd={ "type": "l2", "scale": 0.0005 }, in_channel_num=1152, out_channel_num=10, activation=None, )) brain.attach( SoftmaxWithLossLayer(class_num=10, inputs=[{ "name": brain.get_last_layer_name() }, { "name": "system_in", "idxs": [1] }], name="softmax")) # Set up a sensor. # ######################################################################### source = MNISTFeedSource(name="MNIST", url='http://yann.lecun.com/exdb/mnist/', work_dir=AKID_DATA_PATH + '/mnist', num_train=50000, num_val=5000, center=True, scale=True) sensor = FeedSensor(name='data', source_in=source, batch_size=64, val_batch_size=100) kid = Kid(sensor, brain, MomentumKongFu(momentum=0.9, lr_scheme={ "name": LearningRateScheme.exp_decay, "base_lr": {{opt_paras["lr"]}}, "decay_rate": 0.95, "num_batches_per_epoch": sensor.num_batches_per_epoch, "decay_epoch_num": 1 }), engine={{opt_paras["engine"]}}, max_steps=1000) kid.setup() return kid
from __future__ import absolute_import from akid import AKID_DATA_PATH from akid import FeedSensor from akid import Kid from akid import MomentumKongFu from akid import MNISTFeedSource from akid.models.brains import LeNet brain = LeNet(name="LeNet") source = MNISTFeedSource(name="MNIST", url='http://yann.lecun.com/exdb/mnist/', work_dir=AKID_DATA_PATH + '/mnist', center=True, scale=True, num_train=60000, num_val=10000) s = Kid(FeedSensor(name='data', source_in=source), brain, MomentumKongFu(), max_steps=1000) s.setup() s.practice()
def setup(bn=None, activation_before_pooling=False): brain = GraphBrain(name="sugar_mnist") brain.attach( cnn_block(ksize=[5, 5], initial_bias_value=0., init_para={ "name": "truncated_normal", "stddev": 0.1 }, wd={ "type": "l2", "scale": 5e-4 }, in_channel_num=1, out_channel_num=32, pool_size=[2, 2], pool_stride=[2, 2], activation={"type": "relu"}, activation_before_pooling=activation_before_pooling, bn=bn)) brain.attach( cnn_block(ksize=[5, 5], initial_bias_value=0., init_para={ "name": "truncated_normal", "stddev": 0.1 }, wd={ "type": "l2", "scale": 5e-4 }, in_channel_num=32, out_channel_num=64, pool_size=[5, 5], pool_stride=[2, 2], activation={"type": "relu"}, activation_before_pooling=activation_before_pooling, bn=bn)) brain.attach( cnn_block(ksize=None, initial_bias_value=0.1, init_para={ "name": "truncated_normal", "stddev": 0.1 }, wd={ "type": "l2", "scale": 5e-4 }, in_channel_num=3136, out_channel_num=512, activation={"type": "relu"}, bn=bn, keep_prob=0.5)) brain.attach( cnn_block(ksize=None, initial_bias_value=0.1, init_para={ "name": "truncated_normal", "stddev": 0.1 }, wd={ "type": "l2", "scale": 5e-4 }, in_channel_num=512, out_channel_num=10, bn=bn, activation=None)) brain.attach( SoftmaxWithLossLayer(class_num=10, inputs=[{ "name": "ip4", "idxs": [0] }, { "name": "system_in", "idxs": [1] }], name="loss")) source = MNISTFeedSource(name="MNIST", url='http://yann.lecun.com/exdb/mnist/', work_dir=AKID_DATA_PATH + '/mnist', num_train=50000, num_val=5000, center=True, scale=True) sensor = FeedSensor(name='data', source_in=source, batch_size=64, val_batch_size=100) kid = Kid(sensor, brain, MomentumKongFu(lr_scheme={ "name": LearningRateScheme.exp_decay, "base_lr": 0.01, "decay_rate": 0.95, "num_batches_per_epoch": 468, "decay_epoch_num": 1 }, momentum=0.9), max_steps=4000) kid.setup() return kid
def main(_): ps_hosts = FLAGS.ps_hosts.split(",") worker_hosts = FLAGS.worker_hosts.split(",") # Create a cluster from the parameter server and worker hosts. cluster = tf.train.ClusterSpec({"ps": ps_hosts, "worker": worker_hosts}) # Create and start a server for the local task. server = tf.train.Server(cluster, job_name=FLAGS.job_name, task_index=FLAGS.task_index) if FLAGS.job_name == "ps": server.join() elif FLAGS.job_name == "worker": # Assigns ops to the local worker by default. with tf.device( tf.train.replica_device_setter( worker_device="/job:worker/task:%d" % FLAGS.task_index, cluster=cluster)): # Build model... source = MNISTFeedSource(name="MNIST", url='http://yann.lecun.com/exdb/mnist/', work_dir=AKID_DATA_PATH + '/mnist', center=True, scale=True, num_train=50000, num_val=10000) sensor = FeedSensor(name='data', source_in=source) sensor.forward() brain = OneLayerBrain(name="brain") # input = [sensor.data()] # input.extend(sensor.labels()) input = [sensor.data(), sensor.labels()] brain.forward(input) loss = brain.loss global_step = tf.Variable(0) train_op = tf.train.AdagradOptimizer(0.01).minimize( loss, global_step=global_step) saver = tf.train.Saver() summary_op = tf.summary.merge_all() init_op = tf.global_variables_initializer() # Create a "supervisor", which oversees the training process. sv = tf.train.Supervisor(is_chief=(FLAGS.task_index == 0), logdir="/tmp/train_logs", init_op=init_op, summary_op=None, saver=saver, global_step=global_step, save_model_secs=600) # The supervisor takes care of session initialization, restoring from # a checkpoint, and closing when done or an error occurs. with sv.managed_session(server.target) as sess: # Loop until the supervisor shuts down or 1000000 steps have completed. step = 0 while not sv.should_stop() and step < 1000000: # Run a training step asynchronously. # See `tf.train.SyncReplicasOptimizer` for additional details on how to # perform *synchronous* training. loss_value, _, step = sess.run( [loss, train_op, global_step], feed_dict=sensor.fill_feed_dict()) print(loss_value) # Ask for all the services to stop. sv.stop()