Esempio n. 1
0
    def __init__(self,
                 batch_size=32,
                 max_seq_len=32,
                 graph_file="albert_tiny/graph",
                 vocab_file="albert_tiny/vocab.txt",
                 output_dir="albert_tiny/",
                 config_name="albert_tiny/albert_config_tiny.json",
                 checkpoint_name="albert_tiny/albert_model.ckpt"):
        """
        init BertVector
        :param batch_size:     Depending on your memory default is 32
        """
        self.max_seq_length = max_seq_len
        self.gpu_memory_fraction = 1
        if os.path.exists(graph_file):
            self.graph_path = graph_file
        else:
            self.graph_path = graph.optimize_graph(output_dir, config_name,
                                                   max_seq_len,
                                                   checkpoint_name, graph_file)

        self.tokenizer = tokenization.FullTokenizer(vocab_file=vocab_file,
                                                    do_lower_case=True)
        self.batch_size = batch_size
        self.estimator = self.get_estimator()
        self.input_queue = Queue(maxsize=1)
        self.output_queue = Queue(maxsize=1)
        self.predict_thread = Thread(target=self.predict_from_queue,
                                     daemon=True)
        self.predict_thread.start()
Esempio n. 2
0
    def __init__(self,
                 batch_size=2,
                 max_seq_len=200,
                 graph_file="chinese_bert/graph",
                 vocab_file="chinese_bert/vocab.txt",
                 output_dir="chinese_bert/",
                 config_name="chinese_bert/bert_config.json",
                 checkpoint_name="chinese_bert/bert_model.ckpt"):

        self.max_seq_length = max_seq_len
        self.gpu_memory_fraction = 1
        if os.path.exists(graph_file):
            self.graph_path = graph_file
        else:
            self.graph_path = graph.optimize_graph(output_dir, config_name,
                                                   max_seq_len,
                                                   checkpoint_name, graph_file)

        self.tokenizer = tokenization.FullTokenizer(vocab_file=vocab_file,
                                                    do_lower_case=True)
        self.batch_size = batch_size
        self.estimator = self.get_estimator()
        self.input_queue = Queue(maxsize=10)
        self.output_queue = Queue(maxsize=10)
        self.predict_thread = Thread(target=self.predict_from_queue,
                                     daemon=True)
        self.predict_thread.start()
 def __init__(self, batch_size=32):
     """
     init BertVector
     :param batch_size:     Depending on your memory default is 32
     """
     self.max_seq_length = args.max_seq_len
     self.layer_indexes = args.layer_indexes
     self.gpu_memory_fraction = args.gpu_memory_fraction
     self.graph_path = optimize_graph()
     self.tokenizer = tokenization.FullTokenizer(vocab_file=args.vocab_file,
                                                 do_lower_case=True)
     self.batch_size = batch_size
     self.estimator = self.get_estimator()
     self.input_queue = Queue(maxsize=1)
     self.output_queue = Queue(maxsize=1)
     self.predict_thread = Thread(target=self.predict_from_queue,
                                  daemon=True)
     self.predict_thread.start()
Esempio n. 4
0
                                               },
                                               output_shapes={
                                                   "unique_ids": [None],
                                                   "input_ids": [None, None],
                                                   "input_mask": [None, None],
                                                   "input_type_ids":
                                                   [None, None],
                                               }))

    return input_fn


args = get_run_args()

logger = set_logger(colored('VENTILATOR', 'magenta'), args.verbose)
graph_path, bert_config = optimize_graph(args=args)

if graph_path:
    logger.info('optimized graph is stored at: %s' % graph_path)

logger.info('use device %s, load graph from %s' % ('cpu', graph_path))

tf = import_tf(device_id=-1, verbose=args.verbose, use_fp16=args.fp16)
estimator = get_estimator(args=args, tf=tf, graph_path=graph_path)

save_hook = tf.train.CheckpointSaverHook(checkpoint_dir=args.checkpoint_dir,
                                         save_secs=1)
predicts = estimator.predict(input_fn=input_fn_builder(), hooks=[save_hook])

for predict in predicts:
    print(predict)