def _build(self, features, labels, params, mode, config=None):

        input_images = features['images']
        input_images = tf.convert_to_tensor(input_images)

        print_error(input_images)

        is_training = mode == tf.estimator.ModeKeys.TRAIN

        # Build inference graph
        with tf.compat.v1.variable_scope(tf.compat.v1.get_variable_scope(),
                                         reuse=False):
            f_score, f_geometry = model(input_images, is_training=is_training)

        loss = None
        optimizer = None
        predictions = {"f_score": f_score, "f_geometry": f_geometry}

        if mode != tf.estimator.ModeKeys.PREDICT:
            input_score_maps = features['score_maps']
            input_geo_maps = features['geo_maps']
            # input_training_masks = features['training_masks']

            model_loss = get_loss(input_score_maps, f_score, input_geo_maps,
                                  f_geometry)  #,
            # input_training_masks)
            loss = tf.add_n([model_loss] + tf.compat.v1.get_collection(
                tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES))

            # add summary
            logging.info(input_images)

            tf.compat.v1.summary.image('input', input_images)
            tf.compat.v1.summary.image('score_map', input_score_maps)
            tf.compat.v1.summary.image('score_map_pred', f_score * 255)
            tf.compat.v1.summary.image('geo_map_0', input_geo_maps[:, :, :,
                                                                   0:1])
            tf.compat.v1.summary.image('geo_map_0_pred', f_geometry[:, :, :,
                                                                    0:1])
            # tf.compat.v1.summary.image('training_masks', input_training_masks)
            tf.summary.scalar('model_loss', model_loss)
            tf.summary.scalar('total_loss', loss)

            optimizer = self._get_optimizer(loss=loss)

        return tf.estimator.EstimatorSpec(
            mode=mode,
            predictions=predictions,
            export_outputs={
                'predict': tf.estimator.export.PredictOutput(predictions)
            },
            loss=loss,
            train_op=optimizer,
            eval_metric_ops=None)
Esempio n. 2
0
    def __init__(self, character):
        # character (str): set of the possible characters.
        # [GO] for the start token of the attention decoder. [s] for end-of-sentence token.
        list_token = ['[GO]', '[s]']  # ['[s]','[UNK]','[PAD]','[GO]']
        list_character = list(character)
        self.character = list_token + list_character

        print_error("===================================")
        print_error(self.character)
        print_error("===================================")

        self.dict = {}
        for i, char in enumerate(self.character):
            # print(i, char)
            self.dict[char] = i
    def __init__(self,
                 data_dir=gin.REQUIRED,
                 out_dir=gin.REQUIRED,
                 max_image_large_side=1280,
                 max_text_size=800,
                 min_text_size=5,
                 min_crop_side_ratio=0.1,
                 geometry="RBOX",
                 number_images_per_tfrecords=8,
                 num_cores=4,
                 batch_size=4,
                 prefetch_size=16):
        """

        :param data_dir:
        :param out_dir:
        :param max_image_large_side:
        :param max_text_size:
        :param min_text_size:
        :param min_crop_side_ratio:
        :param geometry:
        :param number_images_per_tfrecords:
        :param num_cores: Not used as of now
        :param batch_size:
        :param prefetch_size:
        """

        TensorFlowDataset.__init__(self,
                                   data_dir=data_dir,
                                   batch_size=batch_size,
                                   num_cores=num_cores)

        self._data_dir = data_dir

        self._train_out_dir = out_dir + "/train/"
        self._val_out_dir = out_dir + "/val/"
        self._test_out_dir = out_dir + "/test/"

        make_dirs(self._train_out_dir)
        make_dirs(self._val_out_dir)
        make_dirs(self._test_out_dir)

        self._geometry = geometry
        self._min_text_size = min_text_size
        self._max_image_large_side = max_image_large_side
        self._max_text_size = max_text_size
        self._min_crop_side_ratio = min_crop_side_ratio
        self._number_images_per_tfrecords = number_images_per_tfrecords

        # self.preprocess()

        self._data_dir = data_dir
        self._num_cores = num_cores
        self._batch_size = batch_size
        self._prefetch_size = prefetch_size

        self._num_train_examples = 0

        # TODO find a right way to get this
        path = os.path.join(self._train_out_dir, "*.tfrecords")
        path = path.replace("//", "/")
        print_error(path)
        files = glob.glob(pathname=path)
        self._num_train_examples = get_tf_records_count(files=files)

        self.get_number_steps_per_epcoh(self._num_train_examples)

        self._train_dataset = None
        self._val_dataset = None
Esempio n. 4
0
def main(args):

    memory_used = []
    process = psutil.Process(os.getpid())

    #TODO add into argparser
    IS_EAST_IMAGE_TEST = True

    NUM_ARRAYS_PER_FILE = 10000

    #TODO decode function needs this value as part of dataset map function,  hence for now harcoded value
    # if needed chnage manually at func `numpy_array_decode` in dummy_dataset.py also
    NUM_FEATURES = 250

    NUM_IMAGES_PER_FILE = 8

    BATCH_SIZE = 4
    TRAIN_DATA = os.getcwd() + "/data/train_data_img"
    VAL_DATA = os.getcwd() + "/data/val_data_img"
    MODEL_DIR = os.getcwd() + "/data/" + "east_net"
    EXPORT_DIR = MODEL_DIR + "/" + "export"
    NUM_EPOCHS = 3
    NUM_SAMPLES_PER_FILE = NUM_IMAGES_PER_FILE

    if args["dataset"] == "numpy":
        IS_EAST_IMAGE_TEST = False
        BATCH_SIZE = 128
        TRAIN_DATA = os.getcwd() + "/data/train_data"
        VAL_DATA = os.getcwd() + "/data/val_data"
        MODEL_DIR = os.getcwd() + "/" + "data/fwd_nnet"
        EXPORT_DIR = MODEL_DIR + "/" + "export"
        NUM_EPOCHS = 3
        NUM_SAMPLES_PER_FILE = NUM_ARRAYS_PER_FILE
    elif args["dataset"] == "east":
        pass
    else:
        print_error("Invalid dataset")

    TOTAL_STEPS_PER_FILE = NUM_SAMPLES_PER_FILE / BATCH_SIZE

    if args["delete"] == True:
        print_info("Deleting old data files")
        shutil.rmtree(TRAIN_DATA)
        shutil.rmtree(VAL_DATA)

    gen_data(IS_EAST_IMAGE_TEST=IS_EAST_IMAGE_TEST,
             TRAIN_DATA=TRAIN_DATA,
             VAL_DATA=VAL_DATA,
             NUM_SAMPLES_PER_FILE=NUM_SAMPLES_PER_FILE,
             NUM_FEATURES=NUM_FEATURES,
             number_files=int(args["num_tfrecord_files"]))

    if args["mode"] == "test_iterator":
        print('objgraph growth list start')
        objgraph.show_growth(limit=50)
        print('objgraph growth list end')

        test_dataset(data_path=TRAIN_DATA,
                     BATCH_SIZE=BATCH_SIZE,
                     IS_EAST_IMAGE_TEST=IS_EAST_IMAGE_TEST)
        test_dataset(data_path=TRAIN_DATA,
                     BATCH_SIZE=BATCH_SIZE,
                     IS_EAST_IMAGE_TEST=IS_EAST_IMAGE_TEST)
        test_dataset(data_path=VAL_DATA,
                     BATCH_SIZE=BATCH_SIZE,
                     IS_EAST_IMAGE_TEST=IS_EAST_IMAGE_TEST)
        print('objgraph growth list start')
        objgraph.show_growth(limit=50)
        print('objgraph growth list end')

        return

    # print(dataset_to_iterator(data_path=TRAIN_DATA))

    if IS_EAST_IMAGE_TEST:
        model = EASTTFModel(model_root_directory="store")
    else:
        model = NNet()

    estimator = tf.estimator.Estimator(
        model_fn=model,
        config=_init_tf_config(TOTAL_STEPS_PER_FILE=TOTAL_STEPS_PER_FILE,
                               MODEL_DIR=MODEL_DIR),
        params=None)
    memory_usage_psutil()
    print('objgraph growth list start')
    objgraph.show_growth(limit=50)
    print('objgraph growth list end')

    # print(objgraph.get_leaking_objects())

    # for epoch in tqdm(range(NUM_EPOCHS)):

    print("\n\n\n\n\n\n")
    print_error(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> New Epoch")
    memory_usage_psutil()
    # memory_used.append(process.memory_info()[0] / float(2 ** 20))
    print_error(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> Training")
    # train(estimator=estimator,
    #       TRAIN_DATA=TRAIN_DATA,
    #       BATCH_SIZE=BATCH_SIZE,
    #       IS_EAST_IMAGE_TEST=IS_EAST_IMAGE_TEST)
    # print_error(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> Evaluating")
    # evaluate(estimator=estimator,
    #          VAL_DATA=VAL_DATA,
    #          BATCH_SIZE=BATCH_SIZE,
    #          IS_EAST_IMAGE_TEST=IS_EAST_IMAGE_TEST)

    train_n_evaluate(estimator=estimator,
                     TRAIN_DATA=TRAIN_DATA,
                     VAL_DATA=VAL_DATA,
                     BATCH_SIZE=BATCH_SIZE,
                     IS_EAST_IMAGE_TEST=IS_EAST_IMAGE_TEST,
                     max_steps=None,
                     NUM_EPOCHS=NUM_EPOCHS)

    print('objgraph growth list start')
    objgraph.show_growth(limit=50)
    print('objgraph growth list end')
    memory_usage_psutil()

    # plt.plot(memory_used)
    # plt.title('Evolution of memory')
    # plt.xlabel('iteration')
    # plt.ylabel('memory used (MB)')
    # plt.savefig("logs/" + args["dataset"] + "_dataset_memory_usage.png")
    # plt.show()

    print_error(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> New Epoch")
    export_model(estimator=estimator,
                 model_export_path=EXPORT_DIR,
                 IS_EAST_MODEL=IS_EAST_IMAGE_TEST)

    (objgraph.get_leaking_objects())
Esempio n. 5
0
if __name__ == '__main__':
    parser = argparse.ArgumentParser(
        description='Testing TF Dataset Memory usage : ')

    parser.add_argument('-d',
                        "--delete",
                        type=bool,
                        default=False,
                        help="Delete old data files")
    parser.add_argument('-m', "--mode", default="", help="[test_iterator]")
    parser.add_argument('-ds',
                        "--dataset",
                        default="east",
                        help="[east/numpy]")
    parser.add_argument('-nf',
                        "--num_tfrecord_files",
                        default=5,
                        help="number of train tfrecord files to generate")

    parsed_args = vars(parser.parse_args())

    print_error(parsed_args)
    tracemalloc.start()
    main(parsed_args)
    snapshot = tracemalloc.take_snapshot()
    display_top(snapshot)
"""
References:
- https://medium.com/mostly-ai/tensorflow-records-what-they-are-and-how-to-use-them-c46bc4bbb564
"""