def compile_time_operation(self, learning_option, cluster): """ define fully-connected(FC) operation for input tensor. """ # get input input_ = self.get_input('input') indim = self.get_dimension('input') # get attr # required field num_output = self.get_attr('num_output', default=None) if num_output is None: raise Exception('[DLMDL ERROR]: {0} in {1} layer must be declared.'.format('num_output', self.name)) # optional field bias_term = self.get_attr('bias_term', default=True) initializer = self.get_attr('initializer', default={'weight': {}, 'bias': {}}) # default will set later regularizer = self.get_attr('regularizer', default={}) # default will set later # get weight for convolution weight_init = get_initializer(initializer.get('weight'), is_bias=False) weight_reg, weight_reg_type = get_regularizer(regularizer, is_bias=False) decay_mul = [weight_reg] # if bias_term is True, add bias term to convolution output if bias_term: bias_init = get_initializer(initializer.get('bias'), is_bias=True) bias_reg, bias_reg_type = get_regularizer(regularizer, is_bias=True) decay_mul.append(bias_reg) else: bias_init = {} # check regularizer type tmp_reg = learning_option.get('caffe_reg_type') if tmp_reg is None: learning_option['caffe_reg_type'] = weight_reg_type else: if tmp_reg != weight_reg_type: raise Exception('[DLMDL ERROR]: In caffe, regularizer type of all layers must be equal') fc = L.InnerProduct(input_, name=self.name, num_output=num_output, weight_filler=weight_init, bias_filler=bias_init, param=decay_mul) outdim = [indim[0], num_output] self.set_output('output', fc) self.set_dimension('output', outdim)
def apiConstructor(): # get weight for gru cell weight_init = get_initializer(initializer.get('weight'), is_bias=False) bias_init = get_initializer(initializer.get('bias'), is_bias=True) #TODO: GRU cudnn cell(CUDNN implmentation of GRU layer) """ TODO: add conditional branch later. TF v1.1 cannot add branch since name attribute not exist in BasicRNNCell API. def f1(): return tf.contrib.rnn.GRUCell(hidden_size, activation=activation, kernel_initializer=weight_init, bias_initializer=bias_init) # for training procedure def f2(): return tf.contrib.rnn.GRUCell(hidden_size, activation=activation, reuse=True, kernel_initializer=weight_init, bias_initializer=bias_init) # for test procedure gru_cell = tf.cond(is_train, f1, f2, name=self.name) """ gru_cell = tf.contrib.rnn.GRUCell( hidden_size, kernel_initializer=weight_init, activation=activation, bias_initializer=bias_init) # for training procedure # set output self.set_output('output', gru_cell)
def compile_time_operation(self, learning_option, cluster): """ define parame rectified-linear unit(ReLU) operation for input tensor It follows: f(x) = alpha * x for x < 0, f(x) = x for x >= 0, where alpha is a learned array with the same shape as x. """ # get input input_ = self.get_input('input') indim = self.get_dimension('input') # get attr # optional field initializer = self.get_attr('initializer', default={ 'weight': {}, 'bias': {} }) # default will set later regularizer = self.get_attr('regularizer', default={}) # default will set later ch_shared = self.get_attr('channel_shared', default=False) # get weight for convolution alpha_init = get_initializer(initializer.get('weight'), is_bias=False) alpha_reg, alpha_reg_type = get_regularizer(regularizer, is_bias=False) # check regularizer type tmp_reg = learning_option.get('caffe_reg_type') if tmp_reg is None: learning_option['caffe_reg_type'] = alpha_reg_type else: if tmp_reg != alpha_reg_type: raise Exception( '[DLMDL ERROR]: In caffe, regularizer type of all layers must be equal' ) prelu = L.PReLU(input_, name=self.name, weight_filler=alpha_init, channel_shared=ch_shared, param=[alpha_reg]) #set output dimension outdim = indim self.set_output('output', prelu) self.set_dimension('output', outdim)
def apiConstructor(): # get weight for prelu alpha_init = get_initializer(initializer.get('weight'), is_bias=False) alpha_reg = get_regularizer(regularizer, scope, is_bias=False) #WARNINIG: constraint of weight is always None prelu = tf.keras.layers.PReLU(input_, alpha_initializer=alpha_init, alpha_regularizer=alpha_reg, alpha_constraint=None, shared_axes=ch_shared) # get output dimension outdim = indim # set output self.set_dimension('output', outdim) self.set_output('output', prelu) # set tf summary tf.summary.histogram(self.name, prelu)
def run_time_operation(self, learning_option, cluster): """ define convolution operation for input tensor outputs: output: convolution output """ # get input input_ = self.get_input('input') indim = self.get_dimension('input') # get attr # required field kernel_size = self.get_attr('kernel_size', default=None) if kernel_size is None: raise Exception( '[DLMDL ERROR]: {0} in {1} layer must be declared.'.format( 'kernel_size', self.name)) num_output = self.get_attr('num_output', default=None) if num_output is None: raise Exception( '[DLMDL ERROR]: {0} in {1} layer must be declared.'.format( 'num_output', self.name)) # optional field padding = self.get_attr('padding', default='VALID') stride = self.get_attr('stride', default=1) bias_term = self.get_attr('bias_term', default=True) initializer = self.get_attr('initializer', default={ 'weight': {}, 'bias': {} }) # default will set later regularizer = self.get_attr('regularizer', default={}) # default will set later dilate = self.get_attr('dilate', default=None) scope = self.get_attr('scope', default=self.name) # get worker info: worker num, device type, device num device = self.get_attr('device') num = re.sub('[^0-9]', '', cluster.get('types')[device]) type = cluster.get('types')[device].replace(str(num), '') # get shape array stride_shape = [stride, stride] weight_shape = [kernel_size[0], kernel_size[1], indim[3], num_output] dilate_shape = [dilate, dilate] if dilate is not None else None bias_shape = [num_output] with tf.variable_scope(self.name): # get weight for convolution with tf.variable_scope(scope): weight_init = get_initializer(initializer.get('weight'), is_bias=False) weight_reg = get_regularizer(regularizer, is_bias=False) weights = tf.get_variable('weights', shape=weight_shape, dtype=tf.float32, initializer=weight_init, regularizer=weight_reg, trainable=True) #tf.add_to_collection(scope, weights) if bias_term: bias_init = get_initializer(initializer.get('bias'), is_bias=True) bias_reg = get_regularizer(regularizer, is_bias=True) biases = tf.get_variable('biases', shape=bias_shape, dtype=tf.float32, initializer=bias_init, regularizer=bias_reg, trainable=True) #tf.add_to_collection(scope, biases) # construct API def apiConstructor(): conv = tf.nn.convolution(input_, weights, padding, strides=stride_shape, dilation_rate=dilate_shape, data_format='NHWC') # if bias_term is True, add bias term to convolution output if bias_term: conv = tf.nn.bias_add(conv, biases, data_format='NHWC') # get output dimension outdim = list(conv.get_shape()[i].value for i in xrange(len(conv.get_shape()))) # set output self.set_dimension('output', outdim) self.set_output('output', conv) # set tf summary tf.summary.histogram(self.name, conv) with tf.variable_scope(self.name): # single node, model parallelism: explicit worker mapping # data parallelism: equally duplicate model if learning_option.get("parallel", None) != "DP": with tf.device('/job:worker/task:{0}/{1}:{2}'.format( device, type, num)): apiConstructor() else: apiConstructor()
def compile_time_operation(self, learning_option, cluster): """ define convolution operation for input blob """ #get input input_ = self.get_input('input') indim = self.get_dimension('input') #get attr #required field kernel_size = self.get_attr('kernel_size', default=None) if kernel_size is None: raise Exception( '[DLMDL ERROR]: {0} in {1} layer must be declared.'.format( 'kernel_size', self.name)) num_output = self.get_attr('num_output', default=None) if num_output is None: raise Exception( '[DLMDL ERROR]: {0} in {1} layer must be declared.'.format( 'num_output', self.name)) #optional field padding = self.get_attr('padding', default='VALID') stride = self.get_attr('stride', default=1) bias_term = self.get_attr('bias_term', default=True) initializer = self.get_attr('initializer', default={ 'weight': {}, 'bias': {} }) # default will set later regularizer = self.get_attr('regularizer', default={}) # default will set later group = self.get_attr('group', default=1) # get weight for convolution weight_init = get_initializer(initializer.get('weight'), is_bias=False) weight_reg, weight_reg_type = get_regularizer(regularizer, is_bias=False) # if bias_term is True, add bias term to convolution output if bias_term: bias_init = get_initializer(initializer.get('bias'), is_bias=True) bias_reg, bias_reg_type = get_regularizer(regularizer, is_bias=True) else: bias_init = None bias_reg = None bias_reg_type = None # check regularizer type tmp_reg = learning_option.get('caffe_reg_type') if tmp_reg is None: learning_option['caffe_reg_type'] = weight_reg_type else: if tmp_reg != weight_reg_type or tmp_reg != bias_reg_type: raise Exception( '[DLMDL ERROR]: In caffe, regularizer type of all layers must be equal' ) # padding if padding == 'SAME': outdim = [ np.ceil(float(indim[i + 2]) / float(stride)) for i in xrange(2) ] outdim.insert(0, indim[0]) outdim.insert(1, num_output) p = [ int(((outdim[i + 2] - 1) * stride + kernel_size[i] - indim[i + 2]) / 2) for i in xrange(2) ] else: outdim = [ np.ceil( float(indim[i + 2] - kernel_size[i] + 1) / float(stride)) for i in xrange(2) ] outdim.insert(0, indim[0]) outdim.insert(1, num_output) p = [0, 0] conv = L.Convolution(input_, name=self.name, kernel_h=kernel_size[0], kernel_w=kernel_size[1], num_output=num_output, stride=stride, group=group, pad_h=p[0], pad_w=p[1], weight_filler=weight_init, bias_filler=bias_init, param=[weight_reg, bias_reg]) self.set_output('output', conv) self.set_dimension('output', outdim)
def main(): params = argparse.ArgumentParser( description='CLI to train sockeye sequence-to-sequence models.') arguments.add_train_cli_args(params) args = params.parse_args() utils.seedRNGs(args) check_arg_compatibility(args) output_folder = os.path.abspath(args.output) resume_training, training_state_dir = check_resume(args, output_folder) global logger logger = setup_main_logger(__name__, file_logging=True, console=not args.quiet, path=os.path.join(output_folder, C.LOG_NAME)) utils.log_basic_info(args) with open(os.path.join(output_folder, C.ARGS_STATE_NAME), "w") as fp: json.dump(vars(args), fp) with ExitStack() as exit_stack: context = determine_context(args, exit_stack) vocab_source, vocab_target = load_or_create_vocabs( args, resume_training, output_folder) vocab_source_size = len(vocab_source) vocab_target_size = len(vocab_target) logger.info("Vocabulary sizes: source=%d target=%d", vocab_source_size, vocab_target_size) train_iter, eval_iter, config_data = create_data_iters( args, vocab_source, vocab_target) lr_scheduler_instance = create_lr_scheduler(args, resume_training, training_state_dir) model_config = create_model_config(args, vocab_source_size, vocab_target_size, config_data) model_config.freeze() training_model = create_training_model(model_config, args, context, train_iter, lr_scheduler_instance, resume_training, training_state_dir) weight_initializer = initializer.get_initializer( default_init_type=args.weight_init, default_init_scale=args.weight_init_scale, default_init_xavier_rand_type=args.weight_init_xavier_rand_type, default_init_xavier_factor_type=args. weight_init_xavier_factor_type, embed_init_type=args.embed_weight_init, embed_init_sigma=vocab_source_size**-0.5, # TODO rnn_init_type=args.rnn_h2h_init) optimizer, optimizer_params, kvstore, gradient_clipping_type, gradient_clipping_threshold = define_optimizer( args, lr_scheduler_instance) # Handle options that override training settings max_updates = args.max_updates max_num_checkpoint_not_improved = args.max_num_checkpoint_not_improved min_num_epochs = args.min_num_epochs max_num_epochs = args.max_num_epochs if min_num_epochs is not None and max_num_epochs is not None: check_condition( min_num_epochs <= max_num_epochs, "Minimum number of epochs must be smaller than maximum number of epochs" ) # Fixed training schedule always runs for a set number of updates if args.learning_rate_schedule: max_updates = sum(num_updates for (_, num_updates) in args.learning_rate_schedule) max_num_checkpoint_not_improved = -1 min_num_epochs = None max_num_epochs = None decode_and_evaluate, decode_and_evaluate_context = determine_decode_and_evaluate_context( args, exit_stack, context) training_model.fit( train_iter, eval_iter, output_folder=output_folder, max_params_files_to_keep=args.keep_last_params, metrics=args.metrics, initializer=weight_initializer, allow_missing_params=args.allow_missing_params, max_updates=max_updates, checkpoint_frequency=args.checkpoint_frequency, optimizer=optimizer, optimizer_params=optimizer_params, optimized_metric=args.optimized_metric, gradient_clipping_type=gradient_clipping_type, clip_gradient_threshold=gradient_clipping_threshold, kvstore=kvstore, max_num_not_improved=max_num_checkpoint_not_improved, min_num_epochs=min_num_epochs, max_num_epochs=max_num_epochs, decode_and_evaluate=decode_and_evaluate, decode_and_evaluate_context=decode_and_evaluate_context, use_tensorboard=args.use_tensorboard, mxmonitor_pattern=args.monitor_pattern, mxmonitor_stat_func=args.monitor_stat_func, lr_decay_param_reset=args.learning_rate_decay_param_reset, lr_decay_opt_states_reset=args. learning_rate_decay_optimizer_states_reset)
def apiConstructor(): if learning_option.get('num_steps') == None: # DNN/CNN case # if this layer is first fc, flatten input if len(indim) == 2: weight_shape = [indim[1], num_output] flatten = input_ else: flatten = tf.reshape(input_, [-1, indim[1] * indim[2] * indim[3]]) weight_shape = [flatten.get_shape()[1].value, num_output] with tf.variable_scope(scope): # get weight for fc weight_init = get_initializer(initializer.get('weight'), is_bias=False) weight_reg = get_regularizer(regularizer, scope, is_bias=False) if learning_option.get("parallel", None) == "DP_mb": with tf.device( '/job:worker/task:{0}/mb:0'.format(device)): weights = tf.get_variable('weights', shape=weight_shape, dtype=tf.float32, initializer=weight_init, regularizer=weight_reg, trainable=True) else: weights = tf.get_variable('weights', shape=weight_shape, dtype=tf.float32, initializer=weight_init, regularizer=weight_reg, trainable=True) tf.add_to_collection(scope, weights) fc = tf.matmul(flatten, weights) # if bias_term is True, add bias term to fc output if bias_term: with tf.variable_scope(scope): bias_shape = [num_output] bias_init = get_initializer(initializer.get('bias'), is_bias=True) bias_reg = get_regularizer(regularizer, scope, is_bias=True) if learning_option.get("parallel", None) == "DP_mb": with tf.device('/job:worker/task:{0}/mb:0'.format( device)): biases = tf.get_variable('biases', shape=bias_shape, dtype=tf.float32, initializer=bias_init, regularizer=bias_reg, trainable=True) else: biases = tf.get_variable('biases', shape=bias_shape, dtype=tf.float32, initializer=bias_init, regularizer=bias_reg, trainable=True) tf.add_to_collection(scope, biases) fc = tf.nn.bias_add(fc, biases, data_format='NHWC') #WARNING: in recurrent neural network, there is only one fully-connected layer else: # RNN/LSTM/GRU case hidden_size = learning_option.get('hidden_size') weight_shape = [hidden_size, num_output] # get weight for fc with tf.variable_scope(scope): weight_init = get_initializer(initializer.get('weight'), is_bias=False) weight_reg = get_regularizer(regularizer, scope, is_bias=False) if learning_option.get("parallel", None) == "DP_mb": with tf.device( '/job:worker/task:{0}/mb:0'.format(device)): weights = tf.get_variable('weights', shape=weight_shape, dtype=tf.float32, initializer=weight_init, regularizer=weight_reg, trainable=True) else: weights = tf.get_variable('weights', shape=weight_shape, dtype=tf.float32, initializer=weight_init, regularizer=weight_reg, trainable=True) tf.add_to_collection(scope, weights) if learning_option.get('is_image'): # MNIST rnn fc = tf.matmul(input_[-1], weights) else: reshape_input_ = tf.reshape( tf.stack(axis=1, values=input_), [-1, hidden_size]) fc = tf.matmul(reshape_input_, weights) # if bias_term is True, add bias term to fc output if bias_term: with tf.variable_scope(scope): bias_shape = [num_output] bias_init = get_initializer(initializer.get('bias'), is_bias=True) bias_reg = get_regularizer(regularizer, scope, is_bias=True) if learning_option.get("parallel", None) == "DP_mb": with tf.device('/job:worker/task:{0}/mb:0'.format( device)): biases = tf.get_variable('biases', shape=bias_shape, dtype=tf.float32, initializer=bias_init, regularizer=bias_reg, trainable=True) else: biases = tf.get_variable('biases', shape=bias_shape, dtype=tf.float32, initializer=bias_init, regularizer=bias_reg, trainable=True) tf.add_to_collection(scope, biases) fc = tf.nn.bias_add(fc, biases, data_format='NHWC') # get output dimension outdim = list(fc.get_shape()[i].value for i in xrange(len(fc.get_shape()))) # set output self.set_dimension('output', outdim) self.set_output('output', fc)