def load_model(doc_path="inference_data", is_savedmodel=False): params = inference_input(doc_path) data_loader = DataLoader( params, params.classes, update_dict=False, load_dictionary=True, data_split=0.0) # False to provide a path with only test data num_words = max(20000, data_loader.num_words) num_classes = data_loader.num_classes # model if params.use_cutie2: network = CUTIEv2(num_words, num_classes, params) else: network = CUTIEv1(num_words, num_classes, params) model_output = network.get_output('softmax') if is_savedmodel: sess = load_savedmodel(params.savedmodel_dir) else: # evaluation ckpt_saver = tf.train.Saver() config = tf.ConfigProto(allow_soft_placement=True) sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) try: ckpt_path = os.path.join(params.e_ckpt_path, params.save_prefix, params.ckpt_file) ckpt = tf.train.get_checkpoint_state(ckpt_path) print('Restoring from {}...'.format(ckpt_path)) ckpt_saver.restore(sess, ckpt_path) print('{} restored'.format(ckpt_path)) except: raise Exception('Check your pretrained {:s}'.format(ckpt_path)) return network, model_output, sess
if dim is None: non_static_indexes.append(index) if not non_static_indexes: return shape dyn_shape = tf.shape(tensor) for index in non_static_indexes: shape[index] = dyn_shape[index] return shape if __name__ == '__main__': # data data_loader = DataLoader(params, update_dict=False, load_dictionary=params.load_dict, data_split=0.75) # save bert dictionary with open(params.bert_dict_file, encoding='utf-8') as f: vocabs = f.read().split('\n') num_words = len(vocabs) dictionary = {vocab: 0 for vocab in vocabs} word_to_index = dict(list(zip(dictionary.keys(), list(range(num_words))))) index_to_word = dict(list(zip(list(range(num_words)), dictionary.keys()))) np.save(params.dict_path + '_dictionary.npy', dictionary) np.save(params.dict_path + '_word_to_index.npy', word_to_index) np.save(params.dict_path + '_index_to_word.npy', index_to_word) # model bert = BertEmbedding()
parser.add_argument('--load_dict_from_path', type=str, default='dict/SROIEnc') # 40000 or table or 20000TC parser.add_argument('--tokenize', type=bool, default=True) # tokenize input text parser.add_argument('--text_case', type=bool, default=False) # case sensitive parser.add_argument('--dict_path', type=str, default='dict/---') # not used if load_dict is True parser.add_argument('--restore_ckpt', type=bool, default=True) parser.add_argument('--embedding_size', type=int, default=128) parser.add_argument('--batch_size', type=int, default=1) parser.add_argument('--c_threshold', type=float, default=0.5) params = parser.parse_args() if __name__ == '__main__': # data #data_loader = DataLoader(params, True, True) # True to use 25% training data data_loader = DataLoader(params, update_dict=False, load_dictionary=True, data_split=0.75) # False to provide a path with only test data num_words = max(20000, data_loader.num_words) num_classes = data_loader.num_classes # model if params.use_cutie2: network = CUTIEv2(num_words, num_classes, params) else: network = CUTIEv1(num_words, num_classes, params) model_output = network.get_output('softmax') # evaluation ckpt_saver = tf.train.Saver() config = tf.ConfigProto(allow_soft_placement=True) with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer())
# not used if load_dict is True parser.add_argument('--dict_path', type=str, default='dict/---') parser.add_argument('--restore_ckpt', type=bool, default=True) parser.add_argument('--embedding_size', type=int, default=128) parser.add_argument('--batch_size', type=int, default=1) parser.add_argument('--c_threshold', type=float, default=0.5) params = parser.parse_args() if __name__ == '__main__': # data # data_loader = DataLoader(params, True, True) # True to use 25% training data # False to provide a path with only test data data_loader = DataLoader(params, update_dict=False, load_dictionary=True, data_split=False) num_words = max(20000, data_loader.num_words) num_classes = data_loader.num_classes # model if params.use_cutie2: network = CUTIEv2(num_words, num_classes, params) else: network = CUTIEv1(num_words, num_classes, params) model_output = network.get_output('softmax') # evaluation ckpt_saver = tf.train.Saver() config = tf.ConfigProto(allow_soft_placement=True) with tf.Session(config=config) as sess:
if not os.path.exists(ckpt_path): os.makedirs(ckpt_path) filename = os.path.join( ckpt_path, network.name + '_d{:d}c{:d}(r{:d}c{:d})_iter_{:d}'.format( num_words, num_classes, data_loader.rows_ulimit, data_loader.cols_ulimit, iter) + '.ckpt') ckpt_saver.save(sess, filename) print('\nCheckpoint saved to: {:s}\n'.format(filename)) if __name__ == '__main__': pprint(params) # data data_loader = DataLoader(params, update_dict=params.update_dict, load_dictionary=params.load_dict, data_split=0.99) num_words = max(20000, data_loader.num_words) num_classes = data_loader.num_classes for _ in range(2000): a = data_loader.next_batch() b = data_loader.fetch_validation_data() # c = data_loader.fetch_test_data() # model if params.use_cutie2: network = CUTIEv2(num_words, num_classes, params) else: network = CUTIEv1(num_words, num_classes, params) model_loss, regularization_loss, total_loss, model_logits, model_output = network.build_loss( )
if not os.path.exists(ckpt_path): os.makedirs(ckpt_path) filename = os.path.join( ckpt_path, network.name + '_d{:d}c{:d}(r{:d}c{:d})_iter_{:d}'.format( num_words, num_classes, data_loader.rows_ulimit, data_loader.cols_ulimit, iter) + '.ckpt') ckpt_saver.save(sess, filename) print('\nCheckpoint saved to: {:s}\n'.format(filename)) if __name__ == '__main__': pprint(params) # data data_loader = DataLoader(params, update_dict=params.update_dict, load_dictionary=params.load_dict) num_words = max(20000, data_loader.num_words) num_classes = data_loader.num_classes for _ in range(2000): a = data_loader.next_batch() b = data_loader.fetch_validation_data() # c = data_loader.fetch_test_data() # model if params.use_cutie2: network = CUTIEv2(num_words, num_classes, params) else: network = CUTIEv1(num_words, num_classes, params) model_loss, regularization_loss, total_loss, model_logits, model_output = network.build_loss( )
def infer(doc_path, network=network, model_output=model_output, sess=sess) -> List[Prediction]: params = inference_input(doc_path) data_loader = DataLoader( params, params.classes, update_dict=False, load_dictionary=True, data_split=0.0) # False to provide a path with only test data ''' num_words = max(20000, data_loader.num_words) num_classes = data_loader.num_classes # model if params.use_cutie2: network = CUTIEv2(num_words, num_classes, params) else: network = CUTIEv1(num_words, num_classes, params) model_output = network.get_output('softmax') # evaluation ckpt_saver = tf.train.Saver() config = tf.ConfigProto(allow_soft_placement=True) with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) try: ckpt_path = os.path.join(params.e_ckpt_path, params.save_prefix, params.ckpt_file) ckpt = tf.train.get_checkpoint_state(ckpt_path) print('Restoring from {}...'.format(ckpt_path)) ckpt_saver.restore(sess, ckpt_path) print('{} restored'.format(ckpt_path)) except: raise Exception('Check your pretrained {:s}'.format(ckpt_path)) ''' num_test = len(data_loader.validation_docs) results = [] result_files = [] for i in range(num_test): predictions = [] data = data_loader.fetch_validation_data() print('{:d} samples left to be tested'.format(num_test - i)) # grid_table = data['grid_table'] # gt_classes = data['gt_classes'] feed_dict = { network.data_grid: data['grid_table'], } if params.use_cutie2: feed_dict = { network.data_grid: data['grid_table'], network.data_image: data['data_image'], network.ps_1d_indices: data['ps_1d_indices'] } fetches = [model_output] print(data['file_name'][0]) print(data['grid_table'].shape, data['data_image'].shape, data['ps_1d_indices'].shape) timer_start = timeit.default_timer() [model_output_val] = sess.run(fetches=fetches, feed_dict=feed_dict) timer_stop = timeit.default_timer() print('\t >>time per step: %.2fs <<' % (timer_stop - timer_start)) # visualize result shape = data['shape'] file_name = data['file_name'][0] # use one single file_name bboxes = data['bboxes'][file_name] if not params.is_table: predictions = get_predicted_bboxes(data_loader, params.doc_path, np.array(data['grid_table'])[0], np.array(data['gt_classes'])[0], np.array(model_output_val)[0], file_name, np.array(bboxes), shape) results.append(predictions) result_files.append(file_name) return results, result_files