Example #1
0
 def makeProto(self, GLOBAL):
     return Props.proto + (
         PD('i', '', integer(), equalto('i', GLOBAL)),
         PD('m3', '', integer(), equalto('m2', GLOBAL)),
         PD('D3', '', integer(), equalto('D2', GLOBAL)),
         PD('j', '', integerOrNone(), 2),
         PD('k', '', integerOrNone(), 2),
         PD('l', '', integerOrNone(), 2),
     )
Example #2
0
 def makeProto(self, GLOBAL):
     return Props.proto + (
         PD('i', '', integer(),
            LambdaVal(lambda _, __: GLOBAL.m + GLOBAL.D)),
         PD('m2', '', integer(), equalto('m', GLOBAL)),
         PD('D2', '', integer(), equalto('D', GLOBAL)),
         PD('j', '', integerOrNone(), None),
         PD('k', '', integerOrNone(), 1),
     )
Example #3
0
class GlobalParams(dlc.HyperParams):
    """ Common Properties to trickle down. """
    proto = (
        ## Data-set Properties ##
        PD(
            'raw_data_dir',
            'Filesystem path of raw_data_folder from where the pre-processed data is stored.',
            dlc.instanceof(str)),
        PD(
            'image_shape_unframed',
            'Shape of input images. Should be a python sequence.'
            'This is superseded by image_shape which optionally includes an extra padding frame around the input image'
            'Value is loaded from the dataset and is not configurable.',
            issequenceof(int),
            # Set dynamically based on dataset.
        ),
        PD(
            'MaxSeqLen',
            "Max sequence length including the end-of-sequence marker token. Is used to "
            "limit the number of decoding steps. Value is loaded from the dataset and is not configurable.",
            integer(151),
            # Set dynamically based on dataset.
        ),
        PD(
            'K',
            'Vocabulary size including zero. Value is loaded from the dataset and is not configurable.',
            (358, 557, 339),
            # Set dynamically based on dataset.
            # LambdaVal(lambda _, d: 557+1 if d.use_ctc_loss else 557) #get_vocab_size(data_folder) + 1 for Blank-Token
        ),
        PD(
            'CTCBlankTokenID',
            'ID of the space/blank token. By tf.nn.ctc requirement, blank token should be == K-1,'
            'Value is loaded from the dataset and is not configurable.',
            integerOrNone(),
            # Set dynamically based on dataset.
        ),
        PD(
            'SpaceTokenID',
            'Space Token ID if present in the dataset.',
            integerOrNone(),
            # Set dynamically based on dataset.
        ),
        PD(
            'NullTokenID',
            'ID of the EOS token == Null Token. Must be zero. Its value is loaded from the dataset and is not configurable.',
            (0, ),
            # Set dynamically based on dataset.
        ),
        PD(
            'StartTokenID',
            'ID of the begin-sequence token. The value is loaded from the dataset and is not configurable.',
            (1, ),
            # Set dynamically based on dataset.
        ),
        ###############################
        PD(
            'build_image_context', """
            (enum): Type of decoder conv-net model to use:
            0 => Do not build decoder conv-net. Use pre-generated image features instead.
            1 => Use VGG16 Conv-Net model (imported from Keras).
            2 => Use a custom conv-net (defined in make_hyper)
            """, (0, 1, 2)),
        PD(
            'build_scanning_RNN',
            '(boolean): Whether to build a regular RNN or a scanning RNN',
            boolean,
        ),
        PD(
            'B',
            '(integer): Size of mini-batch for training, validation and testing graphs/towers. '
            'NOTE: Batch-size for the data-reader is different and set under property "data_reader_B"',
            integer(1),
        ),
        PD(
            'n',
            "The variable n in the paper. The number of units in the decoder_lstm cell(s). "
            "The paper uses a value of 1000.", (1000, 1500), 1500),
        PD(
            'm', '(integer): dimensionality of the embedded input vector (Ex).'
            "Note: For a stacked CALSTM, the upper layers will be fed output of the previous CALSTM, "
            "therefore their input dimensionality will not be equal to the embedding dimensionality, rather "
            " it will be equal to output_size of the previous CALSTM. That's why this value needs to be "
            "appropriately adjusted for upper CALSTM layers.", (64, 3),
            LambdaVal(lambda _, p: 3 if p.build_scanning_RNN else 64)),
        PD(
            'REGROUP_IMAGE',
            """
            Specifies how the image feature vectors should be grouped together 
            along Height and Width axes. For e.g. if the original dimension of the context feature map was (3,33,512) 
            - i.e. original H=3, original W=33 and D=512- and if REGROUP_IMAGE was (3,3) then the new 
            context-map would have shape (1, 11, 512*3*3) resulting in H=1, W=33, D=4608 and L=33.
            A None value implies no regrouping.
            """,
            issequenceofOrNone(int),
        ),
        PD('image_size', 'Older image-size was "small". Newer one is "big"',
           ('small', 'big'), 'big'),
        PD('H0',
           'Height of feature-map produced by conv-net. Specific to the dataset image size.',
           integer(1),
           LambdaVal(lambda _, p: 4 if (p.image_size == 'big') else 3)
           # LambdaVal(lambda _, p: 8 if (p.build_image_context == 2) else (4 if p.dataset == 3 else 3))
           ),
        PD('W0',
           'Width of feature-map produced by conv-net. Specific to the dataset image size.',
           integer(1),
           LambdaVal(lambda _, p: 34 if (p.image_size == 'big') else 33)
           # LambdaVal(lambda _, p: 68 if (p.build_image_context == 2) else (34 if p.dataset == 3 else 33))
           ),
        PD(
            'L0',
            '(integer): number of pixels in an image feature-map coming out of conv-net = H0xW0 (see paper or model description)',
            integer(1), LambdaVal(lambda _, p: p.H0 * p.W0)),
        PD(
            'D0',
            '(integer): number of features coming out of the conv-net. Depth/channels of the last conv-net layer.'
            'See paper or model description.', integer(1), 512),
        PD(
            'H', 'Height of feature-map produced fed to the decoder.',
            integer(1),
            LambdaVal(lambda _, p: p.H0 if (p.REGROUP_IMAGE is None) else p.H0
                      // p.REGROUP_IMAGE[0])),
        PD(
            'W', 'Width of feature-map fed to the decoder.', integer(1),
            LambdaVal(lambda _, p: p.W0 if (p.REGROUP_IMAGE is None) else p.W0
                      // p.REGROUP_IMAGE[1])),
        PD(
            'L',
            '(integer): number of pixels in an image feature-map fed to the decoder = HxW (see paper or model description)',
            integer(1), LambdaVal(lambda _, p: p.H * p.W)),
        PD(
            'D',
            '(integer): number of image-features fed to the decoder. Depth/channels of the last conv-net layer.'
            'See paper or model description.', integer(1),
            LambdaVal(lambda _, p: p.D0 if (p.REGROUP_IMAGE is None) else p.D0
                      * p.REGROUP_IMAGE[0] * p.REGROUP_IMAGE[1])),
        PD(
            'tb',
            "Tensorboard Params.",
            instanceof(TensorboardParams),
        ),
        PD(
            'dropout',
            'Dropout parameters if any - global. Absence of this property '
            'signals no dropouts. If this is non-None, then weights regularizer should be None.',
            instanceofOrNone(DropoutParams)),
        PD('dtype', 'tensorflow float type for the entire model.',
           (tf.float32, tf.float64), tf.float32),
        PD('dtype_np', 'dtype for the entire model.', (np.float32, np.float64),
           np.float32),
        PD('int_type', 'tensorflow int type for the entire model.',
           (tf.int32, tf.int64), tf.int32),
        PD('int_type_np', 'numpy inttype for the entire model.',
           (np.int32, np.int64), np.int32),
        PD(
            'weights_initializer',
            'Tensorflow weights initializer function',
            iscallable(),
            tf.contrib.layers.xavier_initializer(
                uniform=True, dtype=tf.float32)  ## = glorot_uniform
            # tf.contrib.layers.variance_scaling_initializer()
        ),
        PD(
            'biases_initializer',
            'Tensorflow biases initializer function, e.g. tf.zeros_initializer(). ',
            iscallable(), tf.zeros_initializer()),
        PD(
            'rLambda',
            'Lambda value (scale) for regularizer.',
            decimal(),
        ),
        PD(
            'weights_regularizer',
            'L1 / L2 norm regularization. If this is non-None then dropout should be None.',
            iscallableOrNone(),
            # tf.contrib.layers.l2_regularizer(scale=1.0, scope='L2_Regularizer')
            # tf.contrib.layers.l1_regularizer(scale=1.0, scope="L1_Regularizer")
        ),
        PD(
            'use_ctc_loss',
            "Whether to train using ctc_loss or cross-entropy/log-loss/log-likelihood. In either case "
            "ctc_loss will be logged. Also, use_ctc_loss must be turned on if building scanning-RNN.",
            boolean, LambdaVal(lambda _, p: p.build_scanning_RNN)),
        PD('biases_regularizer', 'L1 / L2 norm regularization',
           iscallable(noneokay=True), None),
        PD(
            'use_peephole',
            '(boolean): whether to employ peephole connections in the decoder LSTM',
            (True, False), True),
        PD('logger', 'Python logger object for logging.',
           instanceof(logging.Logger)),
    )

    def __init__(self, initVals=None):
        dlc.HyperParams.__init__(self, self.proto, initVals)
        self._trickledown()

    def _trickledown(self):
        with open(os.path.join(self.raw_data_dir, 'data_props.pkl'),
                  'rb') as pickle_file:
            data_props = np.load(pickle_file,
                                 encoding="latin1",
                                 allow_pickle=True)
        num_channels = 1 if (self.build_image_context == 2) else 3
        self.image_shape_unframed = (data_props['padded_image_dim']['height'],
                                     data_props['padded_image_dim']['width'],
                                     num_channels)

        self.SpaceTokenID = data_props['SpaceTokenID']
        self.NullTokenID = data_props['NullTokenID']
        self.StartTokenID = data_props['StartTokenID']
        self.MaxSeqLen = int(data_props['MaxSeqLen'])
        if self.SpaceTokenID is not None:
            if False:  # self.use_ctc_loss:
                self.K = int(data_props['K']) + 1
                self.CTCBlankTokenID = self.K - 1
            else:
                self.K = int(data_props['K'])
                self.CTCBlankTokenID = None
        else:
            self.K = int(data_props['K']) + 1
            self.CTCBlankTokenID = self.K - 1

    def __copy__(self):
        ## Shallow copy
        return self.__class__(self)

    def copy(self, override_vals={}):
        ## Shallow copy
        return self.__class__(self).updated(override_vals)