def __init__(self, mode): ''' :param mode: 0: search, 1: similarty ''' self.mode = mode self.CONFIG = config.BERT self.preprocessor = PreProcessor() # placeholders self.input_ids = None self.input_masks = None self.segment_ids = None # pred indexes self.start_logits = None self.end_logtis = None self.start_pred = None self.end_pred = None # tf.Session() self.sess = None # feature vectors self.all_encoder_layers = None self.pooled_output = None self.feature_vector = None self.similarity_output = None self.build_model()
def __init__(self): self.preprocessor = PreProcessor() self.modelWrapper = TensorServer() self._question_maker = QuestionMaker() self._service_shuttle = ShuttleBus() self._service_search = Search() self.CONFIG = config.QUERY
def __init__(self): self.preprocessor = PreProcessor() self.CONFIG = config.TENSOR_SERVING search_v = json.loads(requests.get(self.CONFIG['url-search-v']).text) sentiment_v = json.loads(requests.get(self.CONFIG['url-sentiment-v']).text) similarity_v = json.loads(requests.get(self.CONFIG['url-similarity-v']).text) print('TensorServer Running') print('QA - {}'.format(search_v)) print('Sentiment - {}'.format(sentiment_v)) print('Similarity - {}'.format(similarity_v))
def __init__(self): self.tfidf_matrix = None self.contexts_list = None self.CONFIG = config.SEARCH self.tfidf_vectorizer = TfidfVectorizer( stop_words=None, sublinear_tf=self.CONFIG['sublinear_tf']) self.preprocessor = PreProcessor() self.set_context() self.set_tfidf_matrix() self.tensor_server = TensorServer()
def __init__(self): self.CONFIG = config.QUESTION self.model_wrapper = TensorServer() self.preprocessor = PreProcessor() vocab = self.preprocessor.vocab[:-1] self.tfidf_vectorizer = TfidfVectorizer( smooth_idf=True, token_pattern=self.CONFIG['tfidf_token_pattern'], stop_words=None, vocabulary=vocab) self.idf_, self.vocabulary_ = self.set_idf()
def create_data(file, file_tf): # # 데이터 불러오기 및 저장 # 데이터 불러오기 DATA_train = pd.read_csv(file, sep='\t') print('데이터 크기: ', len(DATA_train)) if os.path.exists(file_tf): print('FILE ALREADY EXISTS {}'.format(file_tf)) return # 결측값 제거 DATA_train.dropna(axis=0, inplace=True) # 문장, 라벨 추출 X = DATA_train['document'].values Y = DATA_train['label'].values # 문장 전처리 및 토큰화 from src.data.preprocessor import PreProcessor prep = PreProcessor() ## 전처리 1. 클린징 X = list(map(lambda x: prep.clean(x)[0], X)) ## 전처리 2. 토큰화 - InputFeatures object X = list( map(lambda x: prep.create_InputFeature(x), tqdm(X, desc='create_InputFeature'))) # write TFRecord dataset with tf.python_io.TFRecordWriter(file_tf) as writer: def _int64List_feature(value): """Returns an int64_list from a bool / enum / int / uint.""" return tf.train.Feature(int64_list=tf.train.Int64List(value=value)) def _int64_feature(value): """Returns an int64_list from a bool / enum / int / uint.""" return tf.train.Feature(int64_list=tf.train.Int64List( value=[value])) for i in tqdm(range(len(X)), desc='Writing to {}'.format(file_tf)): feature = { 'input_ids': _int64List_feature(X[i].input_ids), 'segment_ids': _int64List_feature(X[i].segment_ids), 'input_masks': _int64List_feature(X[i].input_masks), 'label': _int64_feature(Y[i]) } features = tf.train.Features(feature=feature) example = tf.train.Example(features=features) writer.write(example.SerializeToString())
class Handler(metaclass=Singleton): def __init__(self): self.CONFIG = config.HANDLER self.query_maker = QueryMaker() self.preprocessor = PreProcessor() self._model_wrapper = TensorServer() @staticmethod def get_response(answer, morphs, distance, measurement, text, sentiment): return { "morphs": morphs, # 형태소 분석 된 결과 "measurement": measurement, # 유사도 측정의 방법, [jaccard, manhattan] "with": text, "distance": distance, # 위 유사도의 거리 "answer": answer, 'sentiment': sentiment } def handle(self, chat, added_time=None): chat, _ = self.preprocessor.clean(chat) query = self.query_maker.make_query(chat=chat, added_time=added_time) if query.manhattan_similarity: distance = query.manhattan_similarity else: distance = query.jaccard_similarity queries.insert(query) sentiment_score = self._model_wrapper.sentiment(chat=chat)[0] return self.get_response(answer=query.answer, morphs=query.morphs, distance=distance, measurement=query.measurement, text=query.matched_question, sentiment=sentiment_score)
from collections import Counter, OrderedDict import config import numpy as np from src.data.query import QueryMaker from src.data.preprocessor import PreProcessor from src.db.queries import index as _query from src.db.questions import index as _questions from sklearn.manifold import TSNE import matplotlib.pyplot as plt from src.model.serving import TensorServer from sklearn.metrics import pairwise_distances _tensor_server = TensorServer() _query_maker = QueryMaker() _preprocessor = PreProcessor() CONFIG = config.ANALYSIS # plt.rcParams["font.family"] = 'NanumGothic' # plt.rcParams["font.size"] = 5 # plt.rcParams['figure.figsize'] = (15, 15) def get_Morphs(query): query, removed = _preprocessor.clean(query) output = _preprocessor.get_morphs(query) output['removed'] = removed return output
class Search(metaclass=Singleton): def __init__(self): self.tfidf_matrix = None self.contexts_list = None self.CONFIG = config.SEARCH self.tfidf_vectorizer = TfidfVectorizer( stop_words=None, sublinear_tf=self.CONFIG['sublinear_tf']) self.preprocessor = PreProcessor() self.set_context() self.set_tfidf_matrix() self.tensor_server = TensorServer() def response(self, chat): # context TF IDF 로 찾기 output = self.find_context(chat) context = output['context-1'] score = output['score-1'] if score == 0: return None, None answer = self.tensor_server.search(chat, context) return answer, output def response_with_subject(self, _chat, _subject): context = contexts.find_by_subject(_subject=_subject) context = context['text'] answer = self.tensor_server.search(chat=_chat, context=context) return answer, context def response_with_context(self, _chat, _context): answer = self.tensor_server.search(chat=_chat, context=_context) return answer def response_with_id(self, _chat, _id): context = contexts.find_by_id(_id=_id)['text'] return self.tensor_server.search(chat=_chat, context=context), context def set_tfidf_matrix(self): text_list = list( map(lambda x: ' '.join(self.preprocessor.get_keywords(x['text'])), self.contexts_list)) self.tfidf_matrix = self.tfidf_vectorizer.fit_transform( text_list).todense().tolist() def set_context(self): self.contexts_list = list(contexts.find_all()) def find_context(self, chat): chat = ' '.join(self.preprocessor.get_keywords(chat)) chat_tfidf = self.tfidf_vectorizer.transform([chat ]).todense().tolist()[0] num_context = len(self.tfidf_matrix) score = 0 ordered_list = [] output = { 'context_subject-1': None, 'context_subject-2': None, 'context_subject-3': None, 'context-1': None, 'context-2': None, 'context-3': None, 'score-1': None, 'score-2': None, 'score-3': None } for i in range(num_context): context_tfidf = self.tfidf_matrix[i] num_context_voca = len(context_tfidf) for j in range(num_context_voca): score += chat_tfidf[j] * context_tfidf[j] ordered_list.append((i, score)) score = 0 ordered_list = sorted(ordered_list, key=lambda x: x[1], reverse=True) for i in range(self.CONFIG['max_context_num']): output['context_subject-{}'.format(i + 1)] = self.get_context( ordered_list[i][0])['subject'] output['score-{}'.format(i + 1)] = ordered_list[i][1] output['context-{}'.format(i + 1)] = self.get_context( ordered_list[i][0])['text'] return output def get_context(self, idx): return self.contexts_list[idx]
class QueryMaker(object): def __init__(self): self.preprocessor = PreProcessor() self.modelWrapper = TensorServer() self._question_maker = QuestionMaker() self._service_shuttle = ShuttleBus() self._service_search = Search() self.CONFIG = config.QUERY def by_category(self, chat, category, matched_question=None): if category == 'shuttle_bus': return self._service_shuttle.response() elif category == 'talk' or category == 'prepared': return {"mode": category, "answer": matched_question.answer} elif category == 'food': return {'mode': 'food', 'answer': '학식 보여주기'} elif category == 'book': return {'mode': 'book', 'answer': '도서관 모드 진입'} elif category == 'search': answer, output = self._service_search.response(chat) if not answer: # 정답이 오지 않았다면 실패 return {'mode': 'unknown', 'answer': '무슨 말인지 모르겠다냥~ 다시 해달라냥'} return {'mode': 'search', 'answer': answer, 'output': output} def make_query(self, chat, added_time=None, analysis=False): chat, removed = self.preprocessor.clean(chat) if chat is '' or chat is None: return None if not added_time: added_time = datetime.utcnow().astimezone(UTC) added_time.astimezone(UTC) def get_top(distances, measure='jaccard'): if not distances: return None assert type(distances) is OrderedDict output = {} for n, each in enumerate(list(distances.items())): item = each[0] distance = each[1] if distance >= self.CONFIG[ 'jaccard_threshold'] and measure == 'jaccard': question_matched = questions.find_by_text(item) output[n] = (question_matched, distance) if distance >= self.CONFIG[ 'cosine_threshold'] and measure == 'cosine': question_matched = questions.find_by_text(item) output[n] = (question_matched, distance) # question_matched = questions.find_by_text(item) # output[n] = (question_matched, distance) if len(output) == 0: return None return output feature_vector = self.modelWrapper.similarity(chat) jaccard_similarity = None top_feature_distance = None category = None keywords = self.preprocessor.get_keywords(chat) morphs = self.preprocessor.get_morphs(chat) # 우선 자카드 유사도 TOP 5를 찾음 jaccard_top_distances = get_top(self.get_jaccard(chat), measure='jaccard') if jaccard_top_distances and not analysis: measurement = '자카드 유사도' matched_question, jaccard_similarity = jaccard_top_distances[0][ 0], jaccard_top_distances[0][1] category = matched_question.category else: # 자카드 유사도가 없다면, 유클리드 또는 맨하탄 거리 비교로 넘어간다. feature_top_distances = get_top(self.get_similarity( chat, keywords, analysis), measure='cosine') if analysis: return feature_top_distances measurement = self.CONFIG['distance'] if feature_top_distances is None: category = 'search' matched_question = None top_feature_distance = None else: matched_question = feature_top_distances[0][0] top_feature_distance = feature_top_distances[0][1] category = matched_question.category answer = self.by_category(chat, category, matched_question) query = Query(chat=chat, feature_vector=feature_vector, keywords=keywords, matched_question=matched_question, manhattan_similarity=top_feature_distance, jaccard_similarity=jaccard_similarity, added_time=added_time, answer=answer, morphs=morphs, measurement=measurement, category=category) return query def get_jaccard(self, chat): assert chat is not None question_list = questions.find_all() assert question_list is not None distance_dict = {} def _calc_jaacard(A, B): A_output = A['text'] B_output = B['text'] VISITED = [] num_union = len(A) + len(B) - 2 # output 뺀 것 num_joint = 0 for key_a, tag_a in A.items(): for key_b, tag_b in B.items(): if key_a == 'text' or key_b == 'text': continue if key_a == key_b and tag_a == tag_b and key_a not in VISITED: num_joint += 1 VISITED.append(key_a) return num_joint / (num_union - num_joint) chat_morphs = self.preprocessor.get_morphs(chat) for each in question_list: question_morphs = self.preprocessor.get_morphs(each.text) distance_dict[each.text] = _calc_jaacard(chat_morphs, question_morphs) return OrderedDict( sorted(distance_dict.items(), key=lambda t: t[1], reverse=True)) def get_similarity(self, chat, keywords, analysis=False): assert chat is not None feature_vector = self.modelWrapper.similarity(chat) question_list = questions.find_by_keywords(keywords=keywords) if not question_list: # 걸리는 키워드가 없는 경우 모두 다 비교 # search 로 넘어가는 것이, 성능적으로 좋을 듯 # question_list = questions.find_all() return None # question_list = questions.find_all() distances = {} a_vector = self.get_weighted_average_vector(chat, feature_vector) if type(a_vector) != np.ndarray: return None for question in question_list: b_vector = self.get_weighted_average_vector( question.text, question.feature_vector) if self.CONFIG['distance'] == 'manhattan': distance = manhattan_distance(a_vector, b_vector) elif self.CONFIG['distance'] == 'euclidean': distance = euclidean_distance(a_vector, b_vector) elif self.CONFIG['distance'] == 'cosine': distance = cosine_similarity(a_vector, b_vector) else: raise Exception('CONFIG distance measurement Error!') distances[question.text] = distance return OrderedDict( sorted(distances.items(), key=lambda t: t[1], reverse=True)) # 유클리드 할거면 바꿔야되 def get_weighted_average_vector(self, text, vector): if len(vector.shape) == 1: return vector assert len(vector.shape) == 2 text, _ = self.preprocessor.clean(text) tokens = self.preprocessor.str_to_tokens(text) idf_ = self._question_maker.idf_ vocabulary_ = self._question_maker.vocabulary_ output_vector = [] for i, token in enumerate(tokens): idx = vocabulary_[token] idf = idf_[idx] # if token == '[UNK]': # continue # elif idf == 1.0: # output_vector.append(vector[i]) # continue # else: vector[i] += vector[i] * idf * self.CONFIG['idf_weight'] output_vector.append(vector[i]) if output_vector: output_vector = np.sum(output_vector, axis=0) return output_vector else: return np.array([0.0] * 768)
class QuestionMaker(object): def __init__(self): self.CONFIG = config.QUESTION self.model_wrapper = TensorServer() self.preprocessor = PreProcessor() vocab = self.preprocessor.vocab[:-1] self.tfidf_vectorizer = TfidfVectorizer( smooth_idf=True, token_pattern=self.CONFIG['tfidf_token_pattern'], stop_words=None, vocabulary=vocab) self.idf_, self.vocabulary_ = self.set_idf() def create_question(self, text, answer=None, category=None): text, removed = self.preprocessor.clean(text) if category not in self.CONFIG['categories']: raise Exception('category must be ', self.CONFIG['categories']) keywords = self.preprocessor.get_keywords(text=text) morphs = self.preprocessor.get_morphs(text=text) vector = self.model_wrapper.similarity(text) # ELMO LIKE return Question(text, category, answer, vector, morphs, keywords=keywords) def set_idf(self): question_list = _questions.find_all() raw_documents = [] for question in question_list: text = ' '.join(self.preprocessor.str_to_tokens(question.text)) raw_documents.append(text) self.tfidf_vectorizer.fit_transform(raw_documents=raw_documents) idf_ = self.tfidf_vectorizer.idf_ # idf_ /= max(self.tfidf_vectorizer.idf_) # 최대값으로 정규화 return idf_, self.tfidf_vectorizer.vocabulary_ def insert_text(self, text, answer=None, category=None): question = self.create_question(text, answer, category) return _questions.insert(question) def rebase(self): questions = _questions.find_all() for question in questions: backup = None orig_text = question.text try: backup = question question = self.create_question(text=question.text, category=question.category, answer=question.answer) _questions.delete_by_text(orig_text) _questions.insert(question) print('rebase: {}'.format(question.text)) except Exception as err: print('rebase: ', str(err)) if backup: _questions.insert(backup) return def check_idf(self, word): return self.idf_[self.vocabulary_[word]]
class Model(object): def __init__(self, mode): ''' :param mode: 0: search, 1: similarty ''' self.mode = mode self.CONFIG = config.BERT self.preprocessor = PreProcessor() # placeholders self.input_ids = None self.input_masks = None self.segment_ids = None # pred indexes self.start_logits = None self.end_logtis = None self.start_pred = None self.end_pred = None # tf.Session() self.sess = None # feature vectors self.all_encoder_layers = None self.pooled_output = None self.feature_vector = None self.similarity_output = None self.build_model() def build_model(self): if self.mode == 0: bert_json = self.CONFIG['bert_json'] max_seq_length = self.CONFIG['max_seq_length-search'] elif self.mode == 1: bert_json = self.CONFIG['bert_json'] model_path = self.CONFIG['model_path-similarity'] max_seq_length = self.CONFIG['max_seq_length-similarity'] bert_config = BertConfig() bert_config.read_from_json_file(bert_json) self.input_ids = tf.placeholder(dtype=tf.int32, shape=[None, max_seq_length]) self.input_masks = tf.placeholder(dtype=tf.int32, shape=[None, max_seq_length]) self.segment_ids = tf.placeholder(dtype=tf.int32, shape=[None, max_seq_length]) embedding_output = None # sum of Token, segment, position embedding_table = None # id embedding table self.all_encoder_layers = None # transformer model self.similarity_output = None # output layer self.elmo_output = None # ELMO FEATURE 추출을 위한 레이어 with tf.variable_scope(name_or_scope=None, default_name='bert'): with tf.variable_scope(name_or_scope='embeddings'): embedding_output, embedding_table = embedding_lookup( self.input_ids, bert_config.vocab_size, bert_config.hidden_size, bert_config.initializer_range, word_embedding_name='word_embeddings') embedding_output = embedding_postprocessor( embedding_output, use_token_type=True, token_type_ids=self.segment_ids, token_type_vocab_size=bert_config.type_vocab_size, use_position_embeddings=True, token_type_embedding_name='token_type_embeddings', position_embedding_name='position_embeddings', initializer_range=bert_config.initializer_range, max_position_embeddings=bert_config. max_position_embeddings, dropout_prob=bert_config.hidden_dropout_prob) with tf.variable_scope(name_or_scope='encoder'): attention_mask = create_attention_mask_from_input_mask( self.input_ids, self.input_masks) self.all_encoder_layers = tranformer_model( input_tensor=embedding_output, attention_mask=attention_mask, hidden_size=bert_config.hidden_size, num_hidden_layers=bert_config.num_hidden_layers, num_attention_heads=bert_config.num_attention_heads, intermediate_size=bert_config.intermediate_size, intermediate_act_fn=gelu, # TODO gelu -> . hidden_dropout_prob=bert_config.hidden_dropout_prob, attention_probs_dropout_prob=bert_config. attention_probs_dropout_prob, initializer_range=bert_config.initializer_range, do_return_all_layers=True) self.similarity_output = self.all_encoder_layers[ self.CONFIG['similarity_layer']] self.elmo_output = self.all_encoder_layers[-1] with tf.variable_scope('pooler'): first_token_tensor = tf.squeeze(self.similarity_output[:, 0:1, :], axis=1) self.pooled_output = tf.layers.dense( inputs=first_token_tensor, units=bert_config.hidden_size, activation=tf.nn.tanh, kernel_initializer=tf.truncated_normal_initializer( bert_config.initializer_range)) final_layer = self.similarity_output output_weights = tf.get_variable( 'cls/squad/output_weights', shape=[2, bert_config.hidden_size], initializer=tf.truncated_normal_initializer( bert_config.initializer_range)) output_bias = tf.get_variable( 'cls/squad/output_bias', shape=[2], initializer=tf.truncated_normal_initializer( bert_config.hidden_size)) final_layer = tf.reshape(final_layer, shape=[-1, bert_config.hidden_size]) logits = tf.matmul(final_layer, output_weights, transpose_b=True) + output_bias logits = tf.reshape(logits, shape=[1, -1, 2]) # 질문이 하나씩 온다는 가정임 logits = tf.transpose(logits, perm=[2, 0, 1]) unstacked_logits = tf.unstack(logits, axis=0) self.start_logits = unstacked_logits[0] self.end_logtis = unstacked_logits[1] self.start_pred = tf.argmax(self.start_logits, axis=-1) self.end_pred = tf.argmax(self.end_logtis, axis=-1) def load_checkpoint(self): if self.mode == 0: model_path = self.CONFIG['model_path-search'] elif self.mode == 1: model_path = self.CONFIG['model_path-similarity'] self.sess = tf.Session() self.sess.run(tf.global_variables_initializer()) tvars = tf.trainable_variables() assignment_map, initialized_variable_names = get_assignment_map_from_checkpoint( tvars, model_path) # 201 tf.train.init_from_checkpoint(model_path, assignment_map) self.sess = tf.Session() # TODO 두번 불러야 정상작동되는 에러 해결 self.sess.run(tf.global_variables_initializer()) tvars = tf.trainable_variables() assignment_map, initialized_variable_names = get_assignment_map_from_checkpoint( tvars, model_path) # 201 tf.train.init_from_checkpoint(model_path, assignment_map) for var in tvars: if var.name in initialized_variable_names: print(var.name, ' - INIT FROM CKPT') def _convert_to_feature(self, chat, context): return self.preprocessor.create_InputFeature(chat, context=context) def predict(self, chat, text): input_feature = self._convert_to_feature(chat, text) feed_dict = { self.input_ids: np.array(input_feature.input_ids).reshape((1, -1)), self.input_masks: np.array(input_feature.input_masks).reshape(1, -1), self.segment_ids: np.array(input_feature.segment_ids).reshape(1, -1) } start, end = self.sess.run([self.start_pred, self.end_pred], feed_dict) # start_n, end_n = sess.run([start_n_best, end_n_best], feed_dict) # TODO n best answers return self.preprocessor.idx_to_orig(start, end, input_feature) def extract_feature_vector(self, input_feature): tic = time.time() length = np.sum(input_feature.input_masks) feed_dict = { self.input_ids: np.array(input_feature.input_ids).reshape((1, -1)), self.input_masks: np.array(input_feature.input_masks).reshape(1, -1), self.segment_ids: np.array(input_feature.segment_ids).reshape(1, -1) } sequence_output = self.sess.run(self.similarity_output, feed_dict) feature_vector = np.mean(sequence_output[:, 1:length - 1], axis=1) # [CLS] 와 [SEP]를 제외한 단어 벡터들을 더함 toc = time.time() print('*** Vectorizing Done: %5.3f ***' % (toc - tic)) return np.reshape(feature_vector, newshape=(-1)) # def extract_elmo_feature_vector(self, input_feature): # tic = time.time() # feed_dict = {self.input_ids: np.array(input_feature.input_ids).reshape((1, -1)), # self.input_masks: np.array(input_feature.input_masks).reshape(1, -1), # self.segment_ids: np.array(input_feature.segment_ids).reshape(1, -1)} # elmo_output = self.sess.run(self.elmo_output, feed_dict) def search_to_saved_model(self): MODEL_DIR = self.CONFIG['MODEL_DIR'] version = self.CONFIG['version-search'] export_path = os.path.join(MODEL_DIR, 'search', str(version)) print('export_path = {}\n'.format(export_path)) if os.path.isdir(export_path): print('\nAlready saved a model, cleaning up\n') return builder = tf.saved_model.builder.SavedModelBuilder(export_path) input_ids = tf.saved_model.utils.build_tensor_info(self.input_ids) input_masks = tf.saved_model.utils.build_tensor_info(self.input_masks) segment_ids = tf.saved_model.utils.build_tensor_info(self.segment_ids) start_pred = tf.saved_model.utils.build_tensor_info(self.start_logits) end_pred = tf.saved_model.utils.build_tensor_info(self.end_logtis) prediction_signature = ( tf.saved_model.signature_def_utils.build_signature_def( inputs={ 'input_ids': input_ids, 'input_masks': input_masks, 'segment_ids': segment_ids }, outputs={ 'start_pred': start_pred, 'end_pred': end_pred }, method_name=tf.saved_model.signature_constants. PREDICT_METHOD_NAME)) signature_def_map = { tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: prediction_signature } builder.add_meta_graph_and_variables( self.sess, tags=[tf.saved_model.tag_constants.SERVING], signature_def_map=signature_def_map) builder.save() print('GENERATED SAVED MODEL') def ef_to_saved_model(self): MODEL_DIR = self.CONFIG['MODEL_DIR'] version = self.CONFIG['version-similarity'] export_path = os.path.join(MODEL_DIR, 'similarity', str(version)) print('export_path = {}\n'.format(export_path)) if os.path.isdir(export_path): print('\nAlready saved a model, cleaning up\n') return builder = tf.saved_model.builder.SavedModelBuilder(export_path) input_ids = tf.saved_model.utils.build_tensor_info(self.input_ids) input_masks = tf.saved_model.utils.build_tensor_info(self.input_masks) segment_ids = tf.saved_model.utils.build_tensor_info(self.segment_ids) similarity_output = tf.saved_model.utils.build_tensor_info( self.similarity_output) prediction_signature = ( tf.saved_model.signature_def_utils.build_signature_def( inputs={ 'input_ids': input_ids, 'input_masks': input_masks, 'segment_ids': segment_ids }, outputs={'similarity_output': similarity_output}, method_name=tf.saved_model.signature_constants. PREDICT_METHOD_NAME)) signature_def_map = { tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: prediction_signature } builder.add_meta_graph_and_variables( self.sess, tags=[tf.saved_model.tag_constants.SERVING], signature_def_map=signature_def_map) builder.save() print('GENERATED SAVED MODEL')
class TensorServer(metaclass=Singleton): def __init__(self): self.preprocessor = PreProcessor() self.CONFIG = config.TENSOR_SERVING search_v = json.loads(requests.get(self.CONFIG['url-search-v']).text) sentiment_v = json.loads(requests.get(self.CONFIG['url-sentiment-v']).text) similarity_v = json.loads(requests.get(self.CONFIG['url-similarity-v']).text) print('TensorServer Running') print('QA - {}'.format(search_v)) print('Sentiment - {}'.format(sentiment_v)) print('Similarity - {}'.format(similarity_v)) @staticmethod def create_request(features): request_json = { 'instances': [ { 'input_ids': features.input_ids, 'input_masks': features.input_masks, 'segment_ids': features.segment_ids } ] } return request_json def sentiment(self, chat): chat, _ = self.preprocessor.clean(chat=chat) features = self.preprocessor.create_InputFeature(query_text=chat) response = requests.post(self.CONFIG['url-sentiment'], json=self.create_request(features)) predict = json.loads(response.text)['predictions'][0] return predict def similarity(self, chat): chat, _ = self.preprocessor.clean(chat=chat) features = self.preprocessor.create_InputFeature(query_text=chat) _length = np.sum(features.input_masks) response = requests.post(self.CONFIG['url-similarity'], json=self.create_request(features)) response = json.loads(response.text) similarity_vector = response['predictions'][0] # similarity_vector = np.mean(np.array(similarity_vector), axis=0) # similarity_vector = np.mean(np.array(similarity_vector)[:_length, :], axis=0) # similarity_vector = np.mean(np.array(similarity_vector)[1: _length - 1, :], axis=0) similarity_vector = np.array(similarity_vector)[1:_length - 1] # similarity_vector = np.array(similarity_vector)[0] return similarity_vector def search(self, chat, context): chat, _ = self.preprocessor.clean(chat=chat) features = self.preprocessor.create_InputFeature(chat, context) response = requests.post(self.CONFIG['url-search'], json=self.create_request(features)) response = json.loads(response.text) start = response['predictions'][0]['start_pred'] end = response['predictions'][0]['end_pred'] start = np.argmax(start, axis=-1) end = np.argmax(end, axis=-1) return self.preprocessor.idx_to_orig(start, end, features)
def __init__(self): self.CONFIG = config.HANDLER self.query_maker = QueryMaker() self.preprocessor = PreProcessor() self._model_wrapper = TensorServer()