Beispiel #1
0
    def compile_time_operation(self, learning_option, cluster):
        """
        define fully-connected(FC) operation for input tensor.
        """
        # get input
        input_ = self.get_input('input')
        indim = self.get_dimension('input')

        # get attr
        # required field
        num_output = self.get_attr('num_output', default=None)
        if num_output is None:
            raise Exception('[DLMDL ERROR]: {0} in {1} layer must be declared.'.format('num_output', self.name))

        # optional field
        bias_term = self.get_attr('bias_term', default=True)
        initializer = self.get_attr('initializer', default={'weight': {}, 'bias': {}})  # default will set later
        regularizer = self.get_attr('regularizer', default={})  # default will set later

        # get weight for convolution
        weight_init = get_initializer(initializer.get('weight'), is_bias=False)
        weight_reg, weight_reg_type = get_regularizer(regularizer, is_bias=False)
        decay_mul = [weight_reg]
        # if bias_term is True, add bias term to convolution output
        if bias_term:
            bias_init = get_initializer(initializer.get('bias'), is_bias=True)
            bias_reg, bias_reg_type = get_regularizer(regularizer, is_bias=True)
            decay_mul.append(bias_reg)
        else:
            bias_init = {}

        # check regularizer type
        tmp_reg = learning_option.get('caffe_reg_type')
        if tmp_reg is None:
            learning_option['caffe_reg_type'] = weight_reg_type
        else:
            if tmp_reg != weight_reg_type:
                raise Exception('[DLMDL ERROR]: In caffe, regularizer type of all layers must be equal')

        fc = L.InnerProduct(input_, name=self.name, num_output=num_output,
                            weight_filler=weight_init, bias_filler=bias_init,
                            param=decay_mul)

        outdim = [indim[0], num_output]

        self.set_output('output', fc)
        self.set_dimension('output', outdim)
Beispiel #2
0
        def apiConstructor():
            # get weight for gru cell
            weight_init = get_initializer(initializer.get('weight'),
                                          is_bias=False)
            bias_init = get_initializer(initializer.get('bias'), is_bias=True)
            #TODO: GRU cudnn cell(CUDNN implmentation of GRU layer)
            """
            TODO: add conditional branch later. TF v1.1 cannot add branch since name attribute not exist in BasicRNNCell API.
            def f1(): return tf.contrib.rnn.GRUCell(hidden_size, activation=activation, kernel_initializer=weight_init, bias_initializer=bias_init) # for training procedure
            def f2(): return tf.contrib.rnn.GRUCell(hidden_size, activation=activation,
                                               reuse=True, kernel_initializer=weight_init, bias_initializer=bias_init) # for test procedure
            gru_cell = tf.cond(is_train, f1, f2, name=self.name)
            """

            gru_cell = tf.contrib.rnn.GRUCell(
                hidden_size,
                kernel_initializer=weight_init,
                activation=activation,
                bias_initializer=bias_init)  # for training procedure

            # set output
            self.set_output('output', gru_cell)
Beispiel #3
0
    def compile_time_operation(self, learning_option, cluster):
        """
        define parame rectified-linear unit(ReLU) operation for input tensor
        It follows: f(x) = alpha * x for x < 0, f(x) = x for x >= 0, where alpha is a learned array with the same shape as x.
        """
        # get input
        input_ = self.get_input('input')
        indim = self.get_dimension('input')

        # get attr
        # optional field
        initializer = self.get_attr('initializer',
                                    default={
                                        'weight': {},
                                        'bias': {}
                                    })  # default will set later
        regularizer = self.get_attr('regularizer',
                                    default={})  # default will set later
        ch_shared = self.get_attr('channel_shared', default=False)

        # get weight for convolution
        alpha_init = get_initializer(initializer.get('weight'), is_bias=False)
        alpha_reg, alpha_reg_type = get_regularizer(regularizer, is_bias=False)

        # check regularizer type
        tmp_reg = learning_option.get('caffe_reg_type')
        if tmp_reg is None:
            learning_option['caffe_reg_type'] = alpha_reg_type
        else:
            if tmp_reg != alpha_reg_type:
                raise Exception(
                    '[DLMDL ERROR]: In caffe, regularizer type of all layers must be equal'
                )

        prelu = L.PReLU(input_,
                        name=self.name,
                        weight_filler=alpha_init,
                        channel_shared=ch_shared,
                        param=[alpha_reg])

        #set output dimension
        outdim = indim

        self.set_output('output', prelu)
        self.set_dimension('output', outdim)
Beispiel #4
0
        def apiConstructor():
            # get weight for prelu
            alpha_init = get_initializer(initializer.get('weight'), is_bias=False)
            alpha_reg = get_regularizer(regularizer, scope, is_bias=False)

            #WARNINIG: constraint of weight is always None
            prelu = tf.keras.layers.PReLU(input_, alpha_initializer=alpha_init,
                                          alpha_regularizer=alpha_reg,
                                          alpha_constraint=None,
                                          shared_axes=ch_shared)

            # get output dimension
            outdim = indim

            # set output
            self.set_dimension('output', outdim)
            self.set_output('output', prelu)

            # set tf summary
            tf.summary.histogram(self.name, prelu)
Beispiel #5
0
    def run_time_operation(self, learning_option, cluster):
        """
        define convolution operation for input tensor
        outputs:
            output: convolution output
        """
        # get input
        input_ = self.get_input('input')
        indim = self.get_dimension('input')

        # get attr
        # required field
        kernel_size = self.get_attr('kernel_size', default=None)
        if kernel_size is None:
            raise Exception(
                '[DLMDL ERROR]: {0} in {1} layer must be declared.'.format(
                    'kernel_size', self.name))
        num_output = self.get_attr('num_output', default=None)
        if num_output is None:
            raise Exception(
                '[DLMDL ERROR]: {0} in {1} layer must be declared.'.format(
                    'num_output', self.name))

        # optional field
        padding = self.get_attr('padding', default='VALID')
        stride = self.get_attr('stride', default=1)
        bias_term = self.get_attr('bias_term', default=True)
        initializer = self.get_attr('initializer',
                                    default={
                                        'weight': {},
                                        'bias': {}
                                    })  # default will set later
        regularizer = self.get_attr('regularizer',
                                    default={})  # default will set later
        dilate = self.get_attr('dilate', default=None)
        scope = self.get_attr('scope', default=self.name)

        # get worker info: worker num, device type, device num
        device = self.get_attr('device')
        num = re.sub('[^0-9]', '', cluster.get('types')[device])
        type = cluster.get('types')[device].replace(str(num), '')

        # get shape array
        stride_shape = [stride, stride]
        weight_shape = [kernel_size[0], kernel_size[1], indim[3], num_output]
        dilate_shape = [dilate, dilate] if dilate is not None else None
        bias_shape = [num_output]

        with tf.variable_scope(self.name):
            # get weight for convolution
            with tf.variable_scope(scope):
                weight_init = get_initializer(initializer.get('weight'),
                                              is_bias=False)
                weight_reg = get_regularizer(regularizer, is_bias=False)
                weights = tf.get_variable('weights',
                                          shape=weight_shape,
                                          dtype=tf.float32,
                                          initializer=weight_init,
                                          regularizer=weight_reg,
                                          trainable=True)
                #tf.add_to_collection(scope, weights)

                if bias_term:
                    bias_init = get_initializer(initializer.get('bias'),
                                                is_bias=True)
                    bias_reg = get_regularizer(regularizer, is_bias=True)
                    biases = tf.get_variable('biases',
                                             shape=bias_shape,
                                             dtype=tf.float32,
                                             initializer=bias_init,
                                             regularizer=bias_reg,
                                             trainable=True)
                    #tf.add_to_collection(scope, biases)

        # construct API
        def apiConstructor():

            conv = tf.nn.convolution(input_,
                                     weights,
                                     padding,
                                     strides=stride_shape,
                                     dilation_rate=dilate_shape,
                                     data_format='NHWC')

            # if bias_term is True, add bias term to convolution output
            if bias_term:
                conv = tf.nn.bias_add(conv, biases, data_format='NHWC')

            # get output dimension
            outdim = list(conv.get_shape()[i].value
                          for i in xrange(len(conv.get_shape())))

            # set output
            self.set_dimension('output', outdim)
            self.set_output('output', conv)

            # set tf summary
            tf.summary.histogram(self.name, conv)

        with tf.variable_scope(self.name):
            # single node, model parallelism: explicit worker mapping
            # data parallelism: equally duplicate model
            if learning_option.get("parallel", None) != "DP":
                with tf.device('/job:worker/task:{0}/{1}:{2}'.format(
                        device, type, num)):
                    apiConstructor()
            else:
                apiConstructor()
Beispiel #6
0
    def compile_time_operation(self, learning_option, cluster):
        """
        define convolution operation for input blob
        """
        #get input
        input_ = self.get_input('input')
        indim = self.get_dimension('input')

        #get attr
        #required field
        kernel_size = self.get_attr('kernel_size', default=None)
        if kernel_size is None:
            raise Exception(
                '[DLMDL ERROR]: {0} in {1} layer must be declared.'.format(
                    'kernel_size', self.name))
        num_output = self.get_attr('num_output', default=None)
        if num_output is None:
            raise Exception(
                '[DLMDL ERROR]: {0} in {1} layer must be declared.'.format(
                    'num_output', self.name))

        #optional field
        padding = self.get_attr('padding', default='VALID')
        stride = self.get_attr('stride', default=1)
        bias_term = self.get_attr('bias_term', default=True)
        initializer = self.get_attr('initializer',
                                    default={
                                        'weight': {},
                                        'bias': {}
                                    })  # default will set later
        regularizer = self.get_attr('regularizer',
                                    default={})  # default will set later
        group = self.get_attr('group', default=1)

        # get weight for convolution
        weight_init = get_initializer(initializer.get('weight'), is_bias=False)
        weight_reg, weight_reg_type = get_regularizer(regularizer,
                                                      is_bias=False)

        # if bias_term is True, add bias term to convolution output
        if bias_term:
            bias_init = get_initializer(initializer.get('bias'), is_bias=True)
            bias_reg, bias_reg_type = get_regularizer(regularizer,
                                                      is_bias=True)
        else:
            bias_init = None
            bias_reg = None
            bias_reg_type = None

        # check regularizer type
        tmp_reg = learning_option.get('caffe_reg_type')
        if tmp_reg is None:
            learning_option['caffe_reg_type'] = weight_reg_type
        else:
            if tmp_reg != weight_reg_type or tmp_reg != bias_reg_type:
                raise Exception(
                    '[DLMDL ERROR]: In caffe, regularizer type of all layers must be equal'
                )

        # padding
        if padding == 'SAME':
            outdim = [
                np.ceil(float(indim[i + 2]) / float(stride)) for i in xrange(2)
            ]
            outdim.insert(0, indim[0])
            outdim.insert(1, num_output)
            p = [
                int(((outdim[i + 2] - 1) * stride + kernel_size[i] -
                     indim[i + 2]) / 2) for i in xrange(2)
            ]
        else:
            outdim = [
                np.ceil(
                    float(indim[i + 2] - kernel_size[i] + 1) / float(stride))
                for i in xrange(2)
            ]
            outdim.insert(0, indim[0])
            outdim.insert(1, num_output)
            p = [0, 0]

        conv = L.Convolution(input_,
                             name=self.name,
                             kernel_h=kernel_size[0],
                             kernel_w=kernel_size[1],
                             num_output=num_output,
                             stride=stride,
                             group=group,
                             pad_h=p[0],
                             pad_w=p[1],
                             weight_filler=weight_init,
                             bias_filler=bias_init,
                             param=[weight_reg, bias_reg])

        self.set_output('output', conv)
        self.set_dimension('output', outdim)
Beispiel #7
0
def main():
    params = argparse.ArgumentParser(
        description='CLI to train sockeye sequence-to-sequence models.')
    arguments.add_train_cli_args(params)
    args = params.parse_args()

    utils.seedRNGs(args)

    check_arg_compatibility(args)
    output_folder = os.path.abspath(args.output)
    resume_training, training_state_dir = check_resume(args, output_folder)

    global logger
    logger = setup_main_logger(__name__,
                               file_logging=True,
                               console=not args.quiet,
                               path=os.path.join(output_folder, C.LOG_NAME))
    utils.log_basic_info(args)
    with open(os.path.join(output_folder, C.ARGS_STATE_NAME), "w") as fp:
        json.dump(vars(args), fp)

    with ExitStack() as exit_stack:
        context = determine_context(args, exit_stack)
        vocab_source, vocab_target = load_or_create_vocabs(
            args, resume_training, output_folder)
        vocab_source_size = len(vocab_source)
        vocab_target_size = len(vocab_target)
        logger.info("Vocabulary sizes: source=%d target=%d", vocab_source_size,
                    vocab_target_size)
        train_iter, eval_iter, config_data = create_data_iters(
            args, vocab_source, vocab_target)
        lr_scheduler_instance = create_lr_scheduler(args, resume_training,
                                                    training_state_dir)

        model_config = create_model_config(args, vocab_source_size,
                                           vocab_target_size, config_data)
        model_config.freeze()

        training_model = create_training_model(model_config, args, context,
                                               train_iter,
                                               lr_scheduler_instance,
                                               resume_training,
                                               training_state_dir)

        weight_initializer = initializer.get_initializer(
            default_init_type=args.weight_init,
            default_init_scale=args.weight_init_scale,
            default_init_xavier_rand_type=args.weight_init_xavier_rand_type,
            default_init_xavier_factor_type=args.
            weight_init_xavier_factor_type,
            embed_init_type=args.embed_weight_init,
            embed_init_sigma=vocab_source_size**-0.5,  # TODO
            rnn_init_type=args.rnn_h2h_init)

        optimizer, optimizer_params, kvstore, gradient_clipping_type, gradient_clipping_threshold = define_optimizer(
            args, lr_scheduler_instance)

        # Handle options that override training settings
        max_updates = args.max_updates
        max_num_checkpoint_not_improved = args.max_num_checkpoint_not_improved
        min_num_epochs = args.min_num_epochs
        max_num_epochs = args.max_num_epochs
        if min_num_epochs is not None and max_num_epochs is not None:
            check_condition(
                min_num_epochs <= max_num_epochs,
                "Minimum number of epochs must be smaller than maximum number of epochs"
            )
        # Fixed training schedule always runs for a set number of updates
        if args.learning_rate_schedule:
            max_updates = sum(num_updates
                              for (_,
                                   num_updates) in args.learning_rate_schedule)
            max_num_checkpoint_not_improved = -1
            min_num_epochs = None
            max_num_epochs = None

        decode_and_evaluate, decode_and_evaluate_context = determine_decode_and_evaluate_context(
            args, exit_stack, context)

        training_model.fit(
            train_iter,
            eval_iter,
            output_folder=output_folder,
            max_params_files_to_keep=args.keep_last_params,
            metrics=args.metrics,
            initializer=weight_initializer,
            allow_missing_params=args.allow_missing_params,
            max_updates=max_updates,
            checkpoint_frequency=args.checkpoint_frequency,
            optimizer=optimizer,
            optimizer_params=optimizer_params,
            optimized_metric=args.optimized_metric,
            gradient_clipping_type=gradient_clipping_type,
            clip_gradient_threshold=gradient_clipping_threshold,
            kvstore=kvstore,
            max_num_not_improved=max_num_checkpoint_not_improved,
            min_num_epochs=min_num_epochs,
            max_num_epochs=max_num_epochs,
            decode_and_evaluate=decode_and_evaluate,
            decode_and_evaluate_context=decode_and_evaluate_context,
            use_tensorboard=args.use_tensorboard,
            mxmonitor_pattern=args.monitor_pattern,
            mxmonitor_stat_func=args.monitor_stat_func,
            lr_decay_param_reset=args.learning_rate_decay_param_reset,
            lr_decay_opt_states_reset=args.
            learning_rate_decay_optimizer_states_reset)
Beispiel #8
0
        def apiConstructor():
            if learning_option.get('num_steps') == None:  # DNN/CNN case
                # if this layer is first fc, flatten input
                if len(indim) == 2:
                    weight_shape = [indim[1], num_output]
                    flatten = input_
                else:
                    flatten = tf.reshape(input_,
                                         [-1, indim[1] * indim[2] * indim[3]])
                    weight_shape = [flatten.get_shape()[1].value, num_output]
                with tf.variable_scope(scope):
                    # get weight for fc
                    weight_init = get_initializer(initializer.get('weight'),
                                                  is_bias=False)
                    weight_reg = get_regularizer(regularizer,
                                                 scope,
                                                 is_bias=False)
                    if learning_option.get("parallel", None) == "DP_mb":
                        with tf.device(
                                '/job:worker/task:{0}/mb:0'.format(device)):
                            weights = tf.get_variable('weights',
                                                      shape=weight_shape,
                                                      dtype=tf.float32,
                                                      initializer=weight_init,
                                                      regularizer=weight_reg,
                                                      trainable=True)
                    else:
                        weights = tf.get_variable('weights',
                                                  shape=weight_shape,
                                                  dtype=tf.float32,
                                                  initializer=weight_init,
                                                  regularizer=weight_reg,
                                                  trainable=True)
                    tf.add_to_collection(scope, weights)

                fc = tf.matmul(flatten, weights)

                # if bias_term is True, add bias term to fc output
                if bias_term:
                    with tf.variable_scope(scope):
                        bias_shape = [num_output]
                        bias_init = get_initializer(initializer.get('bias'),
                                                    is_bias=True)
                        bias_reg = get_regularizer(regularizer,
                                                   scope,
                                                   is_bias=True)
                        if learning_option.get("parallel", None) == "DP_mb":
                            with tf.device('/job:worker/task:{0}/mb:0'.format(
                                    device)):
                                biases = tf.get_variable('biases',
                                                         shape=bias_shape,
                                                         dtype=tf.float32,
                                                         initializer=bias_init,
                                                         regularizer=bias_reg,
                                                         trainable=True)
                        else:
                            biases = tf.get_variable('biases',
                                                     shape=bias_shape,
                                                     dtype=tf.float32,
                                                     initializer=bias_init,
                                                     regularizer=bias_reg,
                                                     trainable=True)
                        tf.add_to_collection(scope, biases)

                    fc = tf.nn.bias_add(fc, biases, data_format='NHWC')

            #WARNING: in recurrent neural network, there is only one fully-connected layer
            else:  # RNN/LSTM/GRU case
                hidden_size = learning_option.get('hidden_size')
                weight_shape = [hidden_size, num_output]

                # get weight for fc
                with tf.variable_scope(scope):
                    weight_init = get_initializer(initializer.get('weight'),
                                                  is_bias=False)
                    weight_reg = get_regularizer(regularizer,
                                                 scope,
                                                 is_bias=False)
                    if learning_option.get("parallel", None) == "DP_mb":
                        with tf.device(
                                '/job:worker/task:{0}/mb:0'.format(device)):
                            weights = tf.get_variable('weights',
                                                      shape=weight_shape,
                                                      dtype=tf.float32,
                                                      initializer=weight_init,
                                                      regularizer=weight_reg,
                                                      trainable=True)
                    else:
                        weights = tf.get_variable('weights',
                                                  shape=weight_shape,
                                                  dtype=tf.float32,
                                                  initializer=weight_init,
                                                  regularizer=weight_reg,
                                                  trainable=True)
                    tf.add_to_collection(scope, weights)

                if learning_option.get('is_image'):  # MNIST rnn
                    fc = tf.matmul(input_[-1], weights)
                else:
                    reshape_input_ = tf.reshape(
                        tf.stack(axis=1, values=input_), [-1, hidden_size])
                    fc = tf.matmul(reshape_input_, weights)

                # if bias_term is True, add bias term to fc output
                if bias_term:
                    with tf.variable_scope(scope):
                        bias_shape = [num_output]
                        bias_init = get_initializer(initializer.get('bias'),
                                                    is_bias=True)
                        bias_reg = get_regularizer(regularizer,
                                                   scope,
                                                   is_bias=True)
                        if learning_option.get("parallel", None) == "DP_mb":
                            with tf.device('/job:worker/task:{0}/mb:0'.format(
                                    device)):
                                biases = tf.get_variable('biases',
                                                         shape=bias_shape,
                                                         dtype=tf.float32,
                                                         initializer=bias_init,
                                                         regularizer=bias_reg,
                                                         trainable=True)
                        else:
                            biases = tf.get_variable('biases',
                                                     shape=bias_shape,
                                                     dtype=tf.float32,
                                                     initializer=bias_init,
                                                     regularizer=bias_reg,
                                                     trainable=True)
                        tf.add_to_collection(scope, biases)
                    fc = tf.nn.bias_add(fc, biases, data_format='NHWC')

            # get output dimension
            outdim = list(fc.get_shape()[i].value
                          for i in xrange(len(fc.get_shape())))

            # set output
            self.set_dimension('output', outdim)
            self.set_output('output', fc)