def makeProto(self, GLOBAL): return Props.proto + ( PD('i', '', integer(), equalto('i', GLOBAL)), PD('m3', '', integer(), equalto('m2', GLOBAL)), PD('D3', '', integer(), equalto('D2', GLOBAL)), PD('j', '', integerOrNone(), 2), PD('k', '', integerOrNone(), 2), PD('l', '', integerOrNone(), 2), )
def makeProto(self, GLOBAL): return Props.proto + ( PD('i', '', integer(), LambdaVal(lambda _, __: GLOBAL.m + GLOBAL.D)), PD('m2', '', integer(), equalto('m', GLOBAL)), PD('D2', '', integer(), equalto('D', GLOBAL)), PD('j', '', integerOrNone(), None), PD('k', '', integerOrNone(), 1), )
class GlobalParams(dlc.HyperParams): """ Common Properties to trickle down. """ proto = ( ## Data-set Properties ## PD( 'raw_data_dir', 'Filesystem path of raw_data_folder from where the pre-processed data is stored.', dlc.instanceof(str)), PD( 'image_shape_unframed', 'Shape of input images. Should be a python sequence.' 'This is superseded by image_shape which optionally includes an extra padding frame around the input image' 'Value is loaded from the dataset and is not configurable.', issequenceof(int), # Set dynamically based on dataset. ), PD( 'MaxSeqLen', "Max sequence length including the end-of-sequence marker token. Is used to " "limit the number of decoding steps. Value is loaded from the dataset and is not configurable.", integer(151), # Set dynamically based on dataset. ), PD( 'K', 'Vocabulary size including zero. Value is loaded from the dataset and is not configurable.', (358, 557, 339), # Set dynamically based on dataset. # LambdaVal(lambda _, d: 557+1 if d.use_ctc_loss else 557) #get_vocab_size(data_folder) + 1 for Blank-Token ), PD( 'CTCBlankTokenID', 'ID of the space/blank token. By tf.nn.ctc requirement, blank token should be == K-1,' 'Value is loaded from the dataset and is not configurable.', integerOrNone(), # Set dynamically based on dataset. ), PD( 'SpaceTokenID', 'Space Token ID if present in the dataset.', integerOrNone(), # Set dynamically based on dataset. ), PD( 'NullTokenID', 'ID of the EOS token == Null Token. Must be zero. Its value is loaded from the dataset and is not configurable.', (0, ), # Set dynamically based on dataset. ), PD( 'StartTokenID', 'ID of the begin-sequence token. The value is loaded from the dataset and is not configurable.', (1, ), # Set dynamically based on dataset. ), ############################### PD( 'build_image_context', """ (enum): Type of decoder conv-net model to use: 0 => Do not build decoder conv-net. Use pre-generated image features instead. 1 => Use VGG16 Conv-Net model (imported from Keras). 2 => Use a custom conv-net (defined in make_hyper) """, (0, 1, 2)), PD( 'build_scanning_RNN', '(boolean): Whether to build a regular RNN or a scanning RNN', boolean, ), PD( 'B', '(integer): Size of mini-batch for training, validation and testing graphs/towers. ' 'NOTE: Batch-size for the data-reader is different and set under property "data_reader_B"', integer(1), ), PD( 'n', "The variable n in the paper. The number of units in the decoder_lstm cell(s). " "The paper uses a value of 1000.", (1000, 1500), 1500), PD( 'm', '(integer): dimensionality of the embedded input vector (Ex).' "Note: For a stacked CALSTM, the upper layers will be fed output of the previous CALSTM, " "therefore their input dimensionality will not be equal to the embedding dimensionality, rather " " it will be equal to output_size of the previous CALSTM. That's why this value needs to be " "appropriately adjusted for upper CALSTM layers.", (64, 3), LambdaVal(lambda _, p: 3 if p.build_scanning_RNN else 64)), PD( 'REGROUP_IMAGE', """ Specifies how the image feature vectors should be grouped together along Height and Width axes. For e.g. if the original dimension of the context feature map was (3,33,512) - i.e. original H=3, original W=33 and D=512- and if REGROUP_IMAGE was (3,3) then the new context-map would have shape (1, 11, 512*3*3) resulting in H=1, W=33, D=4608 and L=33. A None value implies no regrouping. """, issequenceofOrNone(int), ), PD('image_size', 'Older image-size was "small". Newer one is "big"', ('small', 'big'), 'big'), PD('H0', 'Height of feature-map produced by conv-net. Specific to the dataset image size.', integer(1), LambdaVal(lambda _, p: 4 if (p.image_size == 'big') else 3) # LambdaVal(lambda _, p: 8 if (p.build_image_context == 2) else (4 if p.dataset == 3 else 3)) ), PD('W0', 'Width of feature-map produced by conv-net. Specific to the dataset image size.', integer(1), LambdaVal(lambda _, p: 34 if (p.image_size == 'big') else 33) # LambdaVal(lambda _, p: 68 if (p.build_image_context == 2) else (34 if p.dataset == 3 else 33)) ), PD( 'L0', '(integer): number of pixels in an image feature-map coming out of conv-net = H0xW0 (see paper or model description)', integer(1), LambdaVal(lambda _, p: p.H0 * p.W0)), PD( 'D0', '(integer): number of features coming out of the conv-net. Depth/channels of the last conv-net layer.' 'See paper or model description.', integer(1), 512), PD( 'H', 'Height of feature-map produced fed to the decoder.', integer(1), LambdaVal(lambda _, p: p.H0 if (p.REGROUP_IMAGE is None) else p.H0 // p.REGROUP_IMAGE[0])), PD( 'W', 'Width of feature-map fed to the decoder.', integer(1), LambdaVal(lambda _, p: p.W0 if (p.REGROUP_IMAGE is None) else p.W0 // p.REGROUP_IMAGE[1])), PD( 'L', '(integer): number of pixels in an image feature-map fed to the decoder = HxW (see paper or model description)', integer(1), LambdaVal(lambda _, p: p.H * p.W)), PD( 'D', '(integer): number of image-features fed to the decoder. Depth/channels of the last conv-net layer.' 'See paper or model description.', integer(1), LambdaVal(lambda _, p: p.D0 if (p.REGROUP_IMAGE is None) else p.D0 * p.REGROUP_IMAGE[0] * p.REGROUP_IMAGE[1])), PD( 'tb', "Tensorboard Params.", instanceof(TensorboardParams), ), PD( 'dropout', 'Dropout parameters if any - global. Absence of this property ' 'signals no dropouts. If this is non-None, then weights regularizer should be None.', instanceofOrNone(DropoutParams)), PD('dtype', 'tensorflow float type for the entire model.', (tf.float32, tf.float64), tf.float32), PD('dtype_np', 'dtype for the entire model.', (np.float32, np.float64), np.float32), PD('int_type', 'tensorflow int type for the entire model.', (tf.int32, tf.int64), tf.int32), PD('int_type_np', 'numpy inttype for the entire model.', (np.int32, np.int64), np.int32), PD( 'weights_initializer', 'Tensorflow weights initializer function', iscallable(), tf.contrib.layers.xavier_initializer( uniform=True, dtype=tf.float32) ## = glorot_uniform # tf.contrib.layers.variance_scaling_initializer() ), PD( 'biases_initializer', 'Tensorflow biases initializer function, e.g. tf.zeros_initializer(). ', iscallable(), tf.zeros_initializer()), PD( 'rLambda', 'Lambda value (scale) for regularizer.', decimal(), ), PD( 'weights_regularizer', 'L1 / L2 norm regularization. If this is non-None then dropout should be None.', iscallableOrNone(), # tf.contrib.layers.l2_regularizer(scale=1.0, scope='L2_Regularizer') # tf.contrib.layers.l1_regularizer(scale=1.0, scope="L1_Regularizer") ), PD( 'use_ctc_loss', "Whether to train using ctc_loss or cross-entropy/log-loss/log-likelihood. In either case " "ctc_loss will be logged. Also, use_ctc_loss must be turned on if building scanning-RNN.", boolean, LambdaVal(lambda _, p: p.build_scanning_RNN)), PD('biases_regularizer', 'L1 / L2 norm regularization', iscallable(noneokay=True), None), PD( 'use_peephole', '(boolean): whether to employ peephole connections in the decoder LSTM', (True, False), True), PD('logger', 'Python logger object for logging.', instanceof(logging.Logger)), ) def __init__(self, initVals=None): dlc.HyperParams.__init__(self, self.proto, initVals) self._trickledown() def _trickledown(self): with open(os.path.join(self.raw_data_dir, 'data_props.pkl'), 'rb') as pickle_file: data_props = np.load(pickle_file, encoding="latin1", allow_pickle=True) num_channels = 1 if (self.build_image_context == 2) else 3 self.image_shape_unframed = (data_props['padded_image_dim']['height'], data_props['padded_image_dim']['width'], num_channels) self.SpaceTokenID = data_props['SpaceTokenID'] self.NullTokenID = data_props['NullTokenID'] self.StartTokenID = data_props['StartTokenID'] self.MaxSeqLen = int(data_props['MaxSeqLen']) if self.SpaceTokenID is not None: if False: # self.use_ctc_loss: self.K = int(data_props['K']) + 1 self.CTCBlankTokenID = self.K - 1 else: self.K = int(data_props['K']) self.CTCBlankTokenID = None else: self.K = int(data_props['K']) + 1 self.CTCBlankTokenID = self.K - 1 def __copy__(self): ## Shallow copy return self.__class__(self) def copy(self, override_vals={}): ## Shallow copy return self.__class__(self).updated(override_vals)