コード例 #1
0
def load_model(doc_path="inference_data", is_savedmodel=False):
    params = inference_input(doc_path)

    data_loader = DataLoader(
        params,
        params.classes,
        update_dict=False,
        load_dictionary=True,
        data_split=0.0)  # False to provide a path with only test data
    num_words = max(20000, data_loader.num_words)
    num_classes = data_loader.num_classes
    # model
    if params.use_cutie2:
        network = CUTIEv2(num_words, num_classes, params)
    else:
        network = CUTIEv1(num_words, num_classes, params)
    model_output = network.get_output('softmax')

    if is_savedmodel:
        sess = load_savedmodel(params.savedmodel_dir)
    else:
        # evaluation
        ckpt_saver = tf.train.Saver()
        config = tf.ConfigProto(allow_soft_placement=True)
        sess = tf.Session(config=config)
        sess.run(tf.global_variables_initializer())
        try:
            ckpt_path = os.path.join(params.e_ckpt_path, params.save_prefix,
                                     params.ckpt_file)
            ckpt = tf.train.get_checkpoint_state(ckpt_path)
            print('Restoring from {}...'.format(ckpt_path))
            ckpt_saver.restore(sess, ckpt_path)
            print('{} restored'.format(ckpt_path))
        except:
            raise Exception('Check your pretrained {:s}'.format(ckpt_path))

    return network, model_output, sess
コード例 #2
0
            if dim is None:
                non_static_indexes.append(index)

        if not non_static_indexes:
            return shape

        dyn_shape = tf.shape(tensor)
        for index in non_static_indexes:
            shape[index] = dyn_shape[index]
        return shape


if __name__ == '__main__':
    # data
    data_loader = DataLoader(params,
                             update_dict=False,
                             load_dictionary=params.load_dict,
                             data_split=0.75)

    # save bert dictionary
    with open(params.bert_dict_file, encoding='utf-8') as f:
        vocabs = f.read().split('\n')
    num_words = len(vocabs)
    dictionary = {vocab: 0 for vocab in vocabs}
    word_to_index = dict(list(zip(dictionary.keys(), list(range(num_words)))))
    index_to_word = dict(list(zip(list(range(num_words)), dictionary.keys())))
    np.save(params.dict_path + '_dictionary.npy', dictionary)
    np.save(params.dict_path + '_word_to_index.npy', word_to_index)
    np.save(params.dict_path + '_index_to_word.npy', index_to_word)

    # model
    bert = BertEmbedding()
コード例 #3
0
parser.add_argument('--load_dict_from_path', type=str, default='dict/SROIEnc') # 40000 or table or 20000TC
parser.add_argument('--tokenize', type=bool, default=True) # tokenize input text
parser.add_argument('--text_case', type=bool, default=False) # case sensitive
parser.add_argument('--dict_path', type=str, default='dict/---') # not used if load_dict is True

parser.add_argument('--restore_ckpt', type=bool, default=True) 

parser.add_argument('--embedding_size', type=int, default=128) 
parser.add_argument('--batch_size', type=int, default=1) 
parser.add_argument('--c_threshold', type=float, default=0.5) 
params = parser.parse_args()

if __name__ == '__main__':
    # data
    #data_loader = DataLoader(params, True, True) # True to use 25% training data
    data_loader = DataLoader(params, update_dict=False, load_dictionary=True, data_split=0.75) # False to provide a path with only test data
    num_words = max(20000, data_loader.num_words)
    num_classes = data_loader.num_classes

    # model
    if params.use_cutie2:
        network = CUTIEv2(num_words, num_classes, params)
    else:
        network = CUTIEv1(num_words, num_classes, params)
    model_output = network.get_output('softmax')
    
    # evaluation
    ckpt_saver = tf.train.Saver()
    config = tf.ConfigProto(allow_soft_placement=True)
    with tf.Session(config=config) as sess:
        sess.run(tf.global_variables_initializer())
コード例 #4
0
ファイル: test.py プロジェクト: oleg131/CUTIE
# not used if load_dict is True
parser.add_argument('--dict_path', type=str, default='dict/---')

parser.add_argument('--restore_ckpt', type=bool, default=True)

parser.add_argument('--embedding_size', type=int, default=128)
parser.add_argument('--batch_size', type=int, default=1)
parser.add_argument('--c_threshold', type=float, default=0.5)
params = parser.parse_args()

if __name__ == '__main__':
    # data
    # data_loader = DataLoader(params, True, True) # True to use 25% training data
    # False to provide a path with only test data
    data_loader = DataLoader(params,
                             update_dict=False,
                             load_dictionary=True,
                             data_split=False)
    num_words = max(20000, data_loader.num_words)
    num_classes = data_loader.num_classes

    # model
    if params.use_cutie2:
        network = CUTIEv2(num_words, num_classes, params)
    else:
        network = CUTIEv1(num_words, num_classes, params)
    model_output = network.get_output('softmax')

    # evaluation
    ckpt_saver = tf.train.Saver()
    config = tf.ConfigProto(allow_soft_placement=True)
    with tf.Session(config=config) as sess:
コード例 #5
0
ファイル: main_train_json.py プロジェクト: LJQCN101/CUTIE
    if not os.path.exists(ckpt_path):
        os.makedirs(ckpt_path)
    filename = os.path.join(
        ckpt_path, network.name + '_d{:d}c{:d}(r{:d}c{:d})_iter_{:d}'.format(
            num_words, num_classes, data_loader.rows_ulimit,
            data_loader.cols_ulimit, iter) + '.ckpt')
    ckpt_saver.save(sess, filename)
    print('\nCheckpoint saved to: {:s}\n'.format(filename))


if __name__ == '__main__':
    pprint(params)
    # data

    data_loader = DataLoader(params,
                             update_dict=params.update_dict,
                             load_dictionary=params.load_dict,
                             data_split=0.99)
    num_words = max(20000, data_loader.num_words)
    num_classes = data_loader.num_classes
    for _ in range(2000):
        a = data_loader.next_batch()
        b = data_loader.fetch_validation_data()
    #    c = data_loader.fetch_test_data()

    # model
    if params.use_cutie2:
        network = CUTIEv2(num_words, num_classes, params)
    else:
        network = CUTIEv1(num_words, num_classes, params)
    model_loss, regularization_loss, total_loss, model_logits, model_output = network.build_loss(
    )
コード例 #6
0
# written by Xiaohui Zhao
# 2018-01
# [email protected]
import tensorflow as tf
import argparse

from data_loader_json import DataLoader

parser = argparse.ArgumentParser(description='CUTIE parameters')
parser.add_argument('--dict_path', type=str, default='dict/SROIE')
parser.add_argument('--doc_path', type=str, default='data/SROIE')
parser.add_argument('--test_path', type=str,
                    default='')  # leave empty if no test data provided
parser.add_argument('--text_case', type=bool, default=True)  # case sensitive
parser.add_argument('--tokenize', type=bool,
                    default=True)  # tokenize input text
parser.add_argument('--batch_size', type=int, default=32)
parser.add_argument('--use_cutie2', type=bool, default=False)
params = parser.parse_args()

if __name__ == '__main__':
    ## run this program before training to create a basic dictionary for training
    data_loader = DataLoader(params, update_dict=True, load_dictionary=False)
コード例 #7
0
def infer(doc_path,
          network=network,
          model_output=model_output,
          sess=sess) -> List[Prediction]:
    params = inference_input(doc_path)

    data_loader = DataLoader(
        params,
        params.classes,
        update_dict=False,
        load_dictionary=True,
        data_split=0.0)  # False to provide a path with only test data
    '''
    num_words = max(20000, data_loader.num_words)
    num_classes = data_loader.num_classes

    # model
    if params.use_cutie2:
        network = CUTIEv2(num_words, num_classes, params)
    else:
        network = CUTIEv1(num_words, num_classes, params)
    model_output = network.get_output('softmax')
    
    # evaluation
    ckpt_saver = tf.train.Saver()
    config = tf.ConfigProto(allow_soft_placement=True)
    with tf.Session(config=config) as sess:
        sess.run(tf.global_variables_initializer())
        try:
            ckpt_path = os.path.join(params.e_ckpt_path, params.save_prefix, params.ckpt_file)
            ckpt = tf.train.get_checkpoint_state(ckpt_path)
            print('Restoring from {}...'.format(ckpt_path))
            ckpt_saver.restore(sess, ckpt_path)
            print('{} restored'.format(ckpt_path))
        except:
            raise Exception('Check your pretrained {:s}'.format(ckpt_path))
   '''
    num_test = len(data_loader.validation_docs)
    results = []
    result_files = []
    for i in range(num_test):
        predictions = []
        data = data_loader.fetch_validation_data()
        print('{:d} samples left to be tested'.format(num_test - i))

        #             grid_table = data['grid_table']
        #             gt_classes = data['gt_classes']
        feed_dict = {
            network.data_grid: data['grid_table'],
        }
        if params.use_cutie2:
            feed_dict = {
                network.data_grid: data['grid_table'],
                network.data_image: data['data_image'],
                network.ps_1d_indices: data['ps_1d_indices']
            }
        fetches = [model_output]

        print(data['file_name'][0])
        print(data['grid_table'].shape, data['data_image'].shape,
              data['ps_1d_indices'].shape)

        timer_start = timeit.default_timer()
        [model_output_val] = sess.run(fetches=fetches, feed_dict=feed_dict)
        timer_stop = timeit.default_timer()
        print('\t >>time per step: %.2fs <<' % (timer_stop - timer_start))

        # visualize result
        shape = data['shape']
        file_name = data['file_name'][0]  # use one single file_name
        bboxes = data['bboxes'][file_name]
        if not params.is_table:
            predictions = get_predicted_bboxes(data_loader, params.doc_path,
                                               np.array(data['grid_table'])[0],
                                               np.array(data['gt_classes'])[0],
                                               np.array(model_output_val)[0],
                                               file_name, np.array(bboxes),
                                               shape)
            results.append(predictions)
            result_files.append(file_name)
    return results, result_files