def __init__(self, config): self.verbose = config['verbose'] self.rank = config['rank'] self.size = config['size'] self.no_paraload = False try: self.no_paraload = config['no_paraload'] except: pass import theano theano.config.on_unused_input = 'warn' self.name = 'VGG16' # data from theanompi.models.data import ImageNet_data self.data = ImageNet_data(verbose=False) self.data.rawdata[4] = image_mean self.channels = self.data.channels # 'c' mean(R,G,B) = (103.939, 116.779, 123.68) self.input_width = input_width # '0' single scale training 224 self.input_height = input_height # '1' single scale training 224 # if self.size>1: # only use avg # self.batch_size = batch_size/self.size # else: # TODO find out if this works better self.batch_size = batch_size # 'b self.file_batch_size = file_batch_size self.n_softmax_out = self.data.n_class # mini batching and other data parallel common routine self.data.batch_data(file_batch_size) self.data.extend_data(rank=self.rank, size=self.size) self.data.shuffle_data(mode='train', common_seed=1234) self.data.shuffle_data(mode='val') self.data.shard_data(mode='train', rank=self.rank, size=self.size) # to update data.n_batch_train self.data.shard_data(mode='val', rank=self.rank, size=self.size) # to update data.n_batch_val # training related self.n_epochs = n_epochs self.epoch = 0 self.step_idx = 0 self.mu = momentum # def: 0.9 # momentum self.use_momentum = use_momentum self.use_nesterov_momentum = use_nesterov_momentum self.eta = weight_decay #0.0002 # weight decay self.monitor_grad = monitor_grad self.base_lr = np.float32(learning_rate) self.shared_lr = theano.shared(self.base_lr) self.shared_x = theano.shared(np.zeros( (3, self.input_width, self.input_height, self.file_batch_size), dtype=theano.config.floatX), borrow=True) self.shared_y = theano.shared(np.zeros((self.file_batch_size, ), dtype=int), borrow=True) # slice batch if needed import theano.tensor as T subb_ind = T.iscalar('subb') # sub batch index self.subb_ind = subb_ind self.shared_x_slice = self.shared_x[:, :, :, subb_ind * self.batch_size:(subb_ind + 1) * self.batch_size] self.shared_y_slice = self.shared_y[subb_ind * self.batch_size:(subb_ind + 1) * self.batch_size] # build model self.build_model() # bc01 from lasagne.layers import get_all_params self.params = lasagne.layers.get_all_params(self.output_layer, trainable=True) from theanompi.models.layers2 import count_params, extract_weight_types self.weight_types = extract_weight_types(self.params) if self.verbose: count_params(self.params, self.verbose) self.grads = T.grad(self.cost, self.params) # To be compiled self.compiled_train_fn_list = [] self.train_iter_fn = None self.val_iter_fn = None # iter related self.n_subb = file_batch_size // batch_size self.current_t = 0 # current filename pointer in the filename list self.last_one_t = False # if pointer is pointing to the last filename in the list self.subb_t = 0 # sub-batch index self.current_v = 0 self.last_one_v = False self.subb_v = 0 subb_ind = T.iscalar('subb') # sub batch index #print self.shared_x[:,:,:,subb_ind*self.batch_size:(subb_ind+1)*self.batch_size].shape.eval() self.subb_ind = subb_ind self.shared_x_slice = self.shared_x[:, :, :, subb_ind * self.batch_size:(subb_ind + 1) * self.batch_size].dimshuffle( 3, 0, 1, 2) # c01b to bc01 self.shared_y_slice = self.shared_y[subb_ind * self.batch_size:(subb_ind + 1) * self.batch_size] if self.data.para_load and not self.no_paraload: self.data.spawn_load() self.data.para_load_init(self.shared_x, input_width, input_height, rand_crop, batch_crop_mirror)
def __init__(self,config): self.verbose = config['verbose'] self.rank = config['rank'] # will be used in sharding and distinguish rng self.size = config['size'] self.no_paraload=False try: self.no_paraload = config['no_paraload'] except: pass import theano theano.config.on_unused_input = 'warn' self.name = 'GoogLeNet' # data from theanompi.models.data import ImageNet_data self.data = ImageNet_data(verbose=False) self.channels = self.data.channels # 'c' mean(R,G,B) = (103.939, 116.779, 123.68) self.input_width = input_width # '0' single scale training 224 self.input_height = input_height # '1' single scale training 224 # if self.size>1: # only use avg # self.batch_size = batch_size/self.size # else: # TODO find out if this works better self.batch_size = batch_size # 'b self.file_batch_size = file_batch_size self.n_softmax_out = self.data.n_class # mini batching and other data parallel common routine self.data.batch_data(file_batch_size) self.data.extend_data(rank=self.rank, size=self.size) self.data.shuffle_data(mode='train', common_seed=1234) self.data.shuffle_data(mode='val') self.data.shard_data(mode='train', rank=self.rank, size=self.size) # to update data.n_batch_train self.data.shard_data(mode='val', rank=self.rank, size=self.size) # to update data.n_batch_val # training related self.n_epochs = n_epochs self.epoch = 0 self.step_idx = 0 self.mu = momentum # def: 0.9 # momentum self.use_momentum = use_momentum self.use_nesterov_momentum = use_nesterov_momentum self.eta = weight_decay #0.0002 # weight decay self.monitor_grad = monitor_grad self.base_lr = np.float32(learning_rate) self.shared_lr = theano.shared(self.base_lr) self.shared_x = theano.shared(np.zeros(( 3, self.input_width,#self.data.width, self.input_height,#self.data.height, file_batch_size ), dtype=theano.config.floatX), borrow=True) self.shared_y = theano.shared(np.zeros((file_batch_size,), dtype=int), borrow=True) # slice batch if needed import theano.tensor as T subb_ind = T.iscalar('subb') # sub batch index self.subb_ind = subb_ind self.shared_x_slice = self.shared_x[:,:,:,subb_ind*self.batch_size:(subb_ind+1)*self.batch_size] self.shared_y_slice = self.shared_y[subb_ind*self.batch_size:(subb_ind+1)*self.batch_size] # ##################### BUILD NETWORK ########################## # allocate symbolic variables for the data # 'rand' is a random array used for random cropping/mirroring of data self.build_model() self.output = self.output_layer.output from theanompi.models.layers2 import get_params, get_layers, count_params #self.layers = get_layers(lastlayer = self.output_layer) self.params,self.weight_types = get_params(self.layers) count_params(self.params, verbose=self.verbose) self.grads = T.grad(self.cost,self.params) # To be compiled self.compiled_train_fn_list = [] self.train_iter_fn = None self.val_iter_fn = None # iter related self.n_subb = file_batch_size//batch_size self.current_t = 0 # current filename pointer in the filename list self.last_one_t = False # if pointer is pointing to the last filename in the list self.subb_t = 0 # sub-batch index self.current_v=0 self.last_one_v=False self.subb_v=0 # preprocessing self.batch_crop_mirror = batch_crop_mirror self.input_width = input_width if self.data.para_load and not self.no_paraload: self.data.spawn_load() self.data.para_load_init(self.shared_x, input_width, input_height, rand_crop, batch_crop_mirror)
def __init__(self, config): self.verbose = config['verbose'] self.rank = config['rank'] self.size = config['size'] self.no_paraload=False try: self.no_paraload = config['no_paraload'] except: pass import theano theano.config.on_unused_input = 'warn' self.name = 'ResNet152' # data from theanompi.models.data import ImageNet_data self.data = ImageNet_data(verbose=False) self.channels = self.data.channels # 'c' mean(R,G,B) = (103.939, 116.779, 123.68) self.input_width = input_width # '0' single scale training 224 self.input_height = input_height # '1' single scale training 224 # if self.size>1: # only use avg # self.batch_size = batch_size/self.size # else: # TODO find out if this works better self.batch_size = batch_size # 'b self.file_batch_size = file_batch_size self.n_softmax_out = self.data.n_class # mini batching and other data parallel common routine self.data.batch_data(file_batch_size) self.data.extend_data(rank=self.rank, size=self.size) self.data.shuffle_data(mode='train', common_seed=1234) self.data.shuffle_data(mode='val') self.data.shard_data(mode='train', rank=self.rank, size=self.size) # to update data.n_batch_train self.data.shard_data(mode='val', rank=self.rank, size=self.size) # to update data.n_batch_val # training related self.n_epochs = n_epochs self.epoch = 0 self.step_idx = 0 self.mu = momentum # def: 0.9 # momentum self.use_momentum = use_momentum self.use_nesterov_momentum = use_nesterov_momentum self.eta = weight_decay #0.0002 # weight decay self.monitor_grad = monitor_grad self.base_lr = np.float32(learning_rate) self.shared_lr = theano.shared(self.base_lr) self.shared_x = theano.shared(np.zeros(( 3, self.input_width, self.input_height, self.file_batch_size ), dtype=theano.config.floatX), borrow=True) self.shared_y = theano.shared(np.zeros((self.file_batch_size,), dtype=int), borrow=True) # slice batch if needed import theano.tensor as T subb_ind = T.iscalar('subb') # sub batch index self.subb_ind = subb_ind self.shared_x_slice = self.shared_x[:,:,:,subb_ind*self.batch_size:(subb_ind+1)*self.batch_size] self.shared_y_slice = self.shared_y[subb_ind*self.batch_size:(subb_ind+1)*self.batch_size] # build model self.build_model() # bc01 from lasagne.layers import get_all_params self.params = lasagne.layers.get_all_params(self.output_layer, trainable=True) from theanompi.models.layers2 import count_params, extract_weight_types self.weight_types = extract_weight_types(self.params) if self.verbose: count_params(self.params, self.verbose) self.grads = T.grad(self.cost,self.params) # To be compiled self.compiled_train_fn_list = [] self.train_iter_fn = None self.val_iter_fn = None # iter related self.n_subb = file_batch_size//batch_size self.current_t = 0 # current filename pointer in the filename list self.last_one_t = False # if pointer is pointing to the last filename in the list self.subb_t = 0 # sub-batch index self.current_v=0 self.last_one_v=False self.subb_v=0 subb_ind = T.iscalar('subb') # sub batch index #print self.shared_x[:,:,:,subb_ind*self.batch_size:(subb_ind+1)*self.batch_size].shape.eval() self.subb_ind = subb_ind self.shared_x_slice = self.shared_x[:,:,:,subb_ind*self.batch_size:(subb_ind+1)*self.batch_size].dimshuffle(3, 0, 1, 2) # c01b to bc01 self.shared_y_slice = self.shared_y[subb_ind*self.batch_size:(subb_ind+1)*self.batch_size] if self.data.para_load and not self.no_paraload: self.data.spawn_load() self.data.para_load_init(self.shared_x, input_width, input_height, rand_crop, batch_crop_mirror)
def __init__(self, config): self.verbose = config['verbose'] self.rank = config['rank'] # will be used in sharding and distinguish rng self.size = config['size'] self.no_paraload=False try: self.no_paraload = config['no_paraload'] except: pass import theano theano.config.on_unused_input = 'warn' self.name = 'AlexNet' # data import theanompi.models.data.imagenet as imagenet from theanompi.models.data import ImageNet_data imagenet.sc=True self.data = ImageNet_data(verbose=False) self.channels = self.data.channels # 'c' mean(R,G,B) = (103.939, 116.779, 123.68) self.input_width = input_width # '0' single scale training 224 self.input_height = input_height # '1' single scale training 224 # if self.size>1: # only use avg # self.batch_size = batch_size/self.size # else: # TODO find out if this works better self.batch_size = batch_size # 'b self.file_batch_size = file_batch_size self.n_softmax_out = self.data.n_class # mini batching self.data.batch_data(file_batch_size) #self.data.shuffle_data() # training related self.n_epochs = n_epochs self.epoch = 0 self.step_idx = 0 self.mu = momentum # def: 0.9 # momentum self.use_momentum = use_momentum self.use_nesterov_momentum = use_nesterov_momentum self.eta = weight_decay #0.0002 # weight decay self.monitor_grad = monitor_grad self.base_lr = np.float32(learning_rate) self.shared_lr = theano.shared(self.base_lr) self.shared_x = theano.shared(np.zeros(( 3, self.data.width, self.data.height, file_batch_size ), dtype=theano.config.floatX), borrow=True) self.shared_y = theano.shared(np.zeros((file_batch_size,), dtype=int), borrow=True) # slice batch if needed import theano.tensor as T subb_ind = T.iscalar('subb') # sub batch index self.subb_ind = subb_ind self.shared_x_slice = self.shared_x[:,:,:,subb_ind*self.batch_size:(subb_ind+1)*self.batch_size] self.shared_y_slice = self.shared_y[subb_ind*self.batch_size:(subb_ind+1)*self.batch_size] # ##################### BUILD NETWORK ########################## # allocate symbolic variables for the data # 'rand' is a random array used for random cropping/mirroring of data self.build_model() self.output = self.output_layer.output from theanompi.models.layers2 import get_params, get_layers, count_params self.layers = get_layers(lastlayer = self.output_layer) self.params,self.weight_types = get_params(self.layers) count_params(self.params, verbose=self.verbose) self.grads = T.grad(self.cost,self.params) # To be compiled self.compiled_train_fn_list = [] self.train_iter_fn = None self.val_iter_fn = None # iter related self.n_subb = file_batch_size//batch_size self.current_t = 0 # current filename pointer in the filename list self.last_one_t = False # if pointer is pointing to the last filename in the list self.subb_t = 0 # sub-batch index self.current_v=0 self.last_one_v=False self.subb_v=0 # preprocessing self.batch_crop_mirror = batch_crop_mirror self.input_width = input_width if self.data.para_load and not self.no_paraload: self.data.spawn_load() self.data.para_load_init(self.shared_x)
def __init__(self, config): self.verbose = config['verbose'] self.rank = config['rank'] # will be used in sharding and distinguish rng self.size = config['size'] import theano self.name = 'Cifar10_model' # data from theanompi.models.data import Cifar10_data self.data = Cifar10_data(verbose=False) self.channels = self.data.channels # 'c' mean(R,G,B) = (103.939, 116.779, 123.68) self.input_width = input_width # '0' single scale training 224 self.input_height = input_height # '1' single scale training 224 # if self.size>1: # only use avg # self.batch_size = batch_size/self.size # else: self.batch_size = batch_size # 'b' self.file_batch_size = file_batch_size self.n_softmax_out = self.data.n_class # mini batching and other data parallel common routine self.data.batch_data(file_batch_size) self.data.extend_data(rank=self.rank, size=self.size) self.data.shuffle_data(mode='train', common_seed=1234) self.data.shuffle_data(mode='val') self.data.shard_data(mode='train', rank=self.rank, size=self.size) # to update data.n_batch_train self.data.shard_data(mode='val', rank=self.rank, size=self.size) # to update data.n_batch_val # preprocessing self.batch_crop_mirror = batch_crop_mirror self.input_width = input_width # training related self.n_epochs = n_epochs self.epoch = 0 self.step_idx = 0 self.mu = momentum # def: 0.9 # momentum self.use_momentum = use_momentum self.use_nesterov_momentum = use_nesterov_momentum self.eta = weight_decay #0.0002 # weight decay self.monitor_grad = monitor_grad self.base_lr = np.float32(learning_rate) self.shared_lr = theano.shared(self.base_lr) self.shared_x = theano.shared(np.zeros(( 3, self.data.width, self.data.height, file_batch_size ), dtype=theano.config.floatX), borrow=True) self.shared_y = theano.shared(np.zeros((file_batch_size,), dtype=int), borrow=True) # slice batch if needed import theano.tensor as T subb_ind = T.iscalar('subb') # sub batch index self.subb_ind = subb_ind self.shared_x_slice = self.shared_x[:,:,:,subb_ind*self.batch_size:(subb_ind+1)*self.batch_size] self.shared_y_slice = self.shared_y[subb_ind*self.batch_size:(subb_ind+1)*self.batch_size] # build model self.build_model() self.output = self.output_layer.output from theanompi.models.layers2 import get_params, get_layers, count_params self.layers = get_layers(lastlayer = self.output_layer) self.params,self.weight_types = get_params(self.layers) count_params(self.params, self.verbose) self.grads = T.grad(self.cost,self.params) # To be compiled self.compiled_train_fn_list = [] self.train_iter_fn = None self.val_iter_fn = None # iter related self.n_subb = file_batch_size//batch_size self.current_t = 0 # current filename pointer in the filename list self.last_one_t = False # if pointer is pointing to the last filename in the list self.subb_t = 0 # sub-batch index self.current_v=0 self.last_one_v=False self.subb_v=0