def __init__(self, batch_size=32, pooling_strategy="REDUCE_MEAN", max_seq_len=40): """ init BertVector :param batch_size: Depending on your memory default is 32 """ self.max_seq_length = max_seq_len self.layer_indexes = args.layer_indexes self.gpu_memory_fraction = 1 if pooling_strategy == "NONE": pooling_strategy = args.PoolingStrategy.NONE elif pooling_strategy == "REDUCE_MAX": pooling_strategy = args.PoolingStrategy.REDUCE_MAX elif pooling_strategy == "REDUCE_MEAN": pooling_strategy = args.PoolingStrategy.REDUCE_MEAN elif pooling_strategy == "REDUCE_MEAN_MAX": pooling_strategy = args.PoolingStrategy.REDUCE_MEAN_MAX self.graph_path = optimize_graph(pooling_strategy=pooling_strategy, max_seq_len=self.max_seq_length) self.tokenizer = tokenization.FullTokenizer(vocab_file=args.vocab_file, do_lower_case=True) self.batch_size = batch_size self.estimator = self.get_estimator() self.input_queue = Queue(maxsize=1) self.output_queue = Queue(maxsize=1) self.predict_thread = Thread(target=self.predict_from_queue, daemon=True) self.predict_thread.start()
def __init__(self, batch_size=32): """ init BertVector :param batch_size: Depending on your memory default is 32 """ self.max_seq_length = args.max_seq_len self.layer_indexes = args.layer_indexes self.gpu_memory_fraction = 1 self.graph_path = optimize_graph() self.tokenizer = tokenization.FullTokenizer(vocab_file=args.vocab_file, do_lower_case=True) self.batch_size = batch_size self.estimator = self.get_estimator() self.input_queue = Queue(maxsize=1) self.output_queue = Queue(maxsize=1) self.predict_thread = Thread(target=self.predict_from_queue, daemon=True) self.predict_thread.start() self.sentence_len = 0