def _build(self, features, labels, params, mode, config=None): input_images = features['images'] input_images = tf.convert_to_tensor(input_images) print_error(input_images) is_training = mode == tf.estimator.ModeKeys.TRAIN # Build inference graph with tf.compat.v1.variable_scope(tf.compat.v1.get_variable_scope(), reuse=False): f_score, f_geometry = model(input_images, is_training=is_training) loss = None optimizer = None predictions = {"f_score": f_score, "f_geometry": f_geometry} if mode != tf.estimator.ModeKeys.PREDICT: input_score_maps = features['score_maps'] input_geo_maps = features['geo_maps'] # input_training_masks = features['training_masks'] model_loss = get_loss(input_score_maps, f_score, input_geo_maps, f_geometry) #, # input_training_masks) loss = tf.add_n([model_loss] + tf.compat.v1.get_collection( tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES)) # add summary logging.info(input_images) tf.compat.v1.summary.image('input', input_images) tf.compat.v1.summary.image('score_map', input_score_maps) tf.compat.v1.summary.image('score_map_pred', f_score * 255) tf.compat.v1.summary.image('geo_map_0', input_geo_maps[:, :, :, 0:1]) tf.compat.v1.summary.image('geo_map_0_pred', f_geometry[:, :, :, 0:1]) # tf.compat.v1.summary.image('training_masks', input_training_masks) tf.summary.scalar('model_loss', model_loss) tf.summary.scalar('total_loss', loss) optimizer = self._get_optimizer(loss=loss) return tf.estimator.EstimatorSpec( mode=mode, predictions=predictions, export_outputs={ 'predict': tf.estimator.export.PredictOutput(predictions) }, loss=loss, train_op=optimizer, eval_metric_ops=None)
def __init__(self, character): # character (str): set of the possible characters. # [GO] for the start token of the attention decoder. [s] for end-of-sentence token. list_token = ['[GO]', '[s]'] # ['[s]','[UNK]','[PAD]','[GO]'] list_character = list(character) self.character = list_token + list_character print_error("===================================") print_error(self.character) print_error("===================================") self.dict = {} for i, char in enumerate(self.character): # print(i, char) self.dict[char] = i
def __init__(self, data_dir=gin.REQUIRED, out_dir=gin.REQUIRED, max_image_large_side=1280, max_text_size=800, min_text_size=5, min_crop_side_ratio=0.1, geometry="RBOX", number_images_per_tfrecords=8, num_cores=4, batch_size=4, prefetch_size=16): """ :param data_dir: :param out_dir: :param max_image_large_side: :param max_text_size: :param min_text_size: :param min_crop_side_ratio: :param geometry: :param number_images_per_tfrecords: :param num_cores: Not used as of now :param batch_size: :param prefetch_size: """ TensorFlowDataset.__init__(self, data_dir=data_dir, batch_size=batch_size, num_cores=num_cores) self._data_dir = data_dir self._train_out_dir = out_dir + "/train/" self._val_out_dir = out_dir + "/val/" self._test_out_dir = out_dir + "/test/" make_dirs(self._train_out_dir) make_dirs(self._val_out_dir) make_dirs(self._test_out_dir) self._geometry = geometry self._min_text_size = min_text_size self._max_image_large_side = max_image_large_side self._max_text_size = max_text_size self._min_crop_side_ratio = min_crop_side_ratio self._number_images_per_tfrecords = number_images_per_tfrecords # self.preprocess() self._data_dir = data_dir self._num_cores = num_cores self._batch_size = batch_size self._prefetch_size = prefetch_size self._num_train_examples = 0 # TODO find a right way to get this path = os.path.join(self._train_out_dir, "*.tfrecords") path = path.replace("//", "/") print_error(path) files = glob.glob(pathname=path) self._num_train_examples = get_tf_records_count(files=files) self.get_number_steps_per_epcoh(self._num_train_examples) self._train_dataset = None self._val_dataset = None
def main(args): memory_used = [] process = psutil.Process(os.getpid()) #TODO add into argparser IS_EAST_IMAGE_TEST = True NUM_ARRAYS_PER_FILE = 10000 #TODO decode function needs this value as part of dataset map function, hence for now harcoded value # if needed chnage manually at func `numpy_array_decode` in dummy_dataset.py also NUM_FEATURES = 250 NUM_IMAGES_PER_FILE = 8 BATCH_SIZE = 4 TRAIN_DATA = os.getcwd() + "/data/train_data_img" VAL_DATA = os.getcwd() + "/data/val_data_img" MODEL_DIR = os.getcwd() + "/data/" + "east_net" EXPORT_DIR = MODEL_DIR + "/" + "export" NUM_EPOCHS = 3 NUM_SAMPLES_PER_FILE = NUM_IMAGES_PER_FILE if args["dataset"] == "numpy": IS_EAST_IMAGE_TEST = False BATCH_SIZE = 128 TRAIN_DATA = os.getcwd() + "/data/train_data" VAL_DATA = os.getcwd() + "/data/val_data" MODEL_DIR = os.getcwd() + "/" + "data/fwd_nnet" EXPORT_DIR = MODEL_DIR + "/" + "export" NUM_EPOCHS = 3 NUM_SAMPLES_PER_FILE = NUM_ARRAYS_PER_FILE elif args["dataset"] == "east": pass else: print_error("Invalid dataset") TOTAL_STEPS_PER_FILE = NUM_SAMPLES_PER_FILE / BATCH_SIZE if args["delete"] == True: print_info("Deleting old data files") shutil.rmtree(TRAIN_DATA) shutil.rmtree(VAL_DATA) gen_data(IS_EAST_IMAGE_TEST=IS_EAST_IMAGE_TEST, TRAIN_DATA=TRAIN_DATA, VAL_DATA=VAL_DATA, NUM_SAMPLES_PER_FILE=NUM_SAMPLES_PER_FILE, NUM_FEATURES=NUM_FEATURES, number_files=int(args["num_tfrecord_files"])) if args["mode"] == "test_iterator": print('objgraph growth list start') objgraph.show_growth(limit=50) print('objgraph growth list end') test_dataset(data_path=TRAIN_DATA, BATCH_SIZE=BATCH_SIZE, IS_EAST_IMAGE_TEST=IS_EAST_IMAGE_TEST) test_dataset(data_path=TRAIN_DATA, BATCH_SIZE=BATCH_SIZE, IS_EAST_IMAGE_TEST=IS_EAST_IMAGE_TEST) test_dataset(data_path=VAL_DATA, BATCH_SIZE=BATCH_SIZE, IS_EAST_IMAGE_TEST=IS_EAST_IMAGE_TEST) print('objgraph growth list start') objgraph.show_growth(limit=50) print('objgraph growth list end') return # print(dataset_to_iterator(data_path=TRAIN_DATA)) if IS_EAST_IMAGE_TEST: model = EASTTFModel(model_root_directory="store") else: model = NNet() estimator = tf.estimator.Estimator( model_fn=model, config=_init_tf_config(TOTAL_STEPS_PER_FILE=TOTAL_STEPS_PER_FILE, MODEL_DIR=MODEL_DIR), params=None) memory_usage_psutil() print('objgraph growth list start') objgraph.show_growth(limit=50) print('objgraph growth list end') # print(objgraph.get_leaking_objects()) # for epoch in tqdm(range(NUM_EPOCHS)): print("\n\n\n\n\n\n") print_error(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> New Epoch") memory_usage_psutil() # memory_used.append(process.memory_info()[0] / float(2 ** 20)) print_error(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> Training") # train(estimator=estimator, # TRAIN_DATA=TRAIN_DATA, # BATCH_SIZE=BATCH_SIZE, # IS_EAST_IMAGE_TEST=IS_EAST_IMAGE_TEST) # print_error(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> Evaluating") # evaluate(estimator=estimator, # VAL_DATA=VAL_DATA, # BATCH_SIZE=BATCH_SIZE, # IS_EAST_IMAGE_TEST=IS_EAST_IMAGE_TEST) train_n_evaluate(estimator=estimator, TRAIN_DATA=TRAIN_DATA, VAL_DATA=VAL_DATA, BATCH_SIZE=BATCH_SIZE, IS_EAST_IMAGE_TEST=IS_EAST_IMAGE_TEST, max_steps=None, NUM_EPOCHS=NUM_EPOCHS) print('objgraph growth list start') objgraph.show_growth(limit=50) print('objgraph growth list end') memory_usage_psutil() # plt.plot(memory_used) # plt.title('Evolution of memory') # plt.xlabel('iteration') # plt.ylabel('memory used (MB)') # plt.savefig("logs/" + args["dataset"] + "_dataset_memory_usage.png") # plt.show() print_error(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> New Epoch") export_model(estimator=estimator, model_export_path=EXPORT_DIR, IS_EAST_MODEL=IS_EAST_IMAGE_TEST) (objgraph.get_leaking_objects())
if __name__ == '__main__': parser = argparse.ArgumentParser( description='Testing TF Dataset Memory usage : ') parser.add_argument('-d', "--delete", type=bool, default=False, help="Delete old data files") parser.add_argument('-m', "--mode", default="", help="[test_iterator]") parser.add_argument('-ds', "--dataset", default="east", help="[east/numpy]") parser.add_argument('-nf', "--num_tfrecord_files", default=5, help="number of train tfrecord files to generate") parsed_args = vars(parser.parse_args()) print_error(parsed_args) tracemalloc.start() main(parsed_args) snapshot = tracemalloc.take_snapshot() display_top(snapshot) """ References: - https://medium.com/mostly-ai/tensorflow-records-what-they-are-and-how-to-use-them-c46bc4bbb564 """