Ejemplo n.º 1
0
    def _generator_wrapper():
        """
        Wraps data_generator to precompute labels in python before everything
        becomes tensors. 
        NOTE: Local to get_dataset for sensible passing of args to generator
        function.  
        Returns:
        caption : ground truth string
        image   : raw mat object image [32, ?, 1] 
        label   : list of indices corresponding to out_charset plus a temporary
                  increment; length=len( caption )
        """
    
        # Extract args
        [ config_path, num_producers ] = args[0:2]

        # TODO/NOTE currently using 0 to get true single threaded synthesis
        gen = data_generator( config_path, num_producers )

        while True:
            caption, image = next( gen )

            # Transform string text to sequence of indices using charset dict
            label = charset.string_to_label(caption)

            # Temporarily increment all labels so that zero can be the EOS token
            # during post-batch dense-to-sparse conversion
            label = [index+1 for index in label]

            yield caption, image, label
Ejemplo n.º 2
0
def get_text_and_labels(filename):
    """ 
    Extract the human-readable text and label sequence from image filename
    """
    # Ground truth string lines embedded within base
    # filename between underscores
    # 2697/6/466_MONIKER_49537.jpg --> MONIKER
    text = os.path.basename(filename).split('_', 2)[1]

    # Transform string text to sequence of indices using charset, e.g.,
    # MONIKER -> [12, 14, 13, 8, 10, 4, 17]
    labels = charset.string_to_label(text)

    return text, labels