def load_model(self, path): device = torch.device('cpu') V = len(self.vocab.char2id) d_model = 64 d_ff = 256 h = 4 n_encoders = 4 self_attn = MultiHeadedAttention(h=h, d_model=d_model, d_k=d_model // h, d_v=d_model // h, dropout=0.1) feed_forward = FullyConnectedFeedForward(d_model=d_model, d_ff=d_ff) position = PositionalEncoding(d_model, dropout=0.1) embedding = nn.Sequential(Embeddings(d_model=d_model, vocab=V), position) encoder = Encoder(self_attn=self_attn, feed_forward=feed_forward, size=d_model, dropout=0.1) generator = Generator(d_model=d_model, vocab_size=V) model = Bert(encoder=encoder, embedding=embedding, generator=generator, n_layers=n_encoders) model = model.to(device) model_save_path = path checkpoint = torch.load(model_save_path, map_location=device) model.load_state_dict(checkpoint['model_state_dict']) return model
def main(): BERT_MODEL_PATH = '../../models/bert_jp/' # start bert server commands = [ 'bert-serving-start', '-model_dir', BERT_MODEL_PATH, '-num_worker=1', '-cpu' ] p = subprocess.Popen(commands, shell=False, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) # start bert client bert = Bert(bert_model_path=BERT_MODEL_PATH, client_ip='0.0.0.0') # build train features train_dataset = pd.read_csv('../../data/processed/train_dataset.csv') train_vectors, train_targets = build_features(train_dataset, bert) np.save('../../data/features/train_vectors', train_vectors) np.save('../../data/features/train_targets', train_targets) # build test features test_dataset = pd.read_csv('../../data/processed/test_dataset.csv') test_vectors, test_targets = build_features(test_dataset, bert) np.save('../../data/features/test_vectors', test_vectors) np.save('../../data/features/test_targets', test_targets) p.terminate()
def train(): config = BertConfig() logger = get_logger(config.log_path) model = Bert(config) device = config.device train_dataset = BertDataSet(config.base_config.train_data_path) dev_dataset = BertDataSet(config.base_config.dev_data_path) train_dataloader = DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True) dev_dataloader = DataLoader(dev_dataset, batch_size=config.batch_size, shuffle=False) optimizer = AdamW(model.parameters(), lr=config.lr) criterion = nn.CrossEntropyLoss() model.to(device) model.train() best_acc = 0. for epoch in range(config.epochs): for i, batch in enumerate(train_dataloader): optimizer.zero_grad() input_ids, token_type_ids, attention_mask, labels = batch[0].to(device), batch[1].to(device), batch[ 2].to( device), batch[3].to(device) logits = model(input_ids, token_type_ids, attention_mask) loss = criterion(logits, labels) loss.backward() optimizer.step() if i % 100 == 0: preds = torch.argmax(logits, dim=1) acc = torch.sum(preds == labels)*1. / len(labels) logger.info("TRAIN: epoch: {} step: {} acc: {}, loss: {}".format(epoch, i, acc, loss.item())) acc, cls_report = dev(model, dev_dataloader, config) logger.info("DEV: epoch: {} acc: {}".format(epoch, acc)) logger.info("DEV classification report:\n{}".format(cls_report)) if acc > best_acc: torch.save(model.state_dict(), config.model_path) best_acc = acc test_dataset = BertDataSet(config.base_config.test_data_path) test_dataloader = DataLoader(test_dataset, batch_size=config.batch_size, shuffle=False) best_model = Bert(config) best_model.load_state_dict(torch.load(config.model_path)) acc, cls_report = dev(best_model, test_dataloader, config) logger.info("TEST: ACC:{}".format(acc)) logger.info("TEST classification report:\n{}".format(cls_report))
def test_bert(self,user_account,threshold, test_df, bert): if (not bert): bert=Bert(model_path,class_names) bert.load_model() prediction = bert.predict(user_account,threshold, test_df) return prediction
or metrics['f1'] < f1): break if metrics['f1'] > f1: f1 = metrics['f1'] torch.save(model.state_dict(), config.model_save_path) def inference(config, model): res, inputs_tags = evaluate(config, model) print(res) for i in inputs_tags: print(i) if __name__ == '__main__': os.environ['CUDA_VISIBLE_DEVICES'] = '6' torch.set_num_threads(10) set_seed() config = Config() model = Bert(config) if config.inference or config.resume_train: checkpoint = torch.load(config.model_save_base) model.load_state_dict(checkpoint) if config.inference: inference(config, model) else: train(config, model)
def run(self): global bert bert = Bert(self.model_path, self.class_names) bert.load_model() print("BERT LOADED") self.signals.result.emit(True)
def train(): config = KDConfig() logger = get_logger(config.log_path, "train_KD") device = config.device # 加载bert模型,作为teacher logger.info("load bert .....") bert = Bert(config.bert_config) bert.load_state_dict(torch.load(config.bert_config.model_path)) bert.to(device) bert.eval() # 冻结bert参数 for name, p in bert.named_parameters(): p.requires_grad = False # 加载textcnn模型,作为student textcnn = TextCNN(config.textcnn_config) textcnn.to(device) textcnn.train() # 加载数据集 logger.info("load train/dev data .....") train_loader = DataLoader(KDdataset(config.base_config.train_data_path), batch_size=config.batch_size, shuffle=True) dev_loader = DataLoader(KDdataset(config.base_config.dev_data_path), batch_size=config.batch_size, shuffle=False) optimizer = Adam(textcnn.parameters(), lr=config.lr) # 开始训练 logger.info("start training .....") best_acc = 0. for epoch in range(config.epochs): for i, batch in enumerate(train_loader): cnn_ids, labels, input_ids, token_type_ids, attention_mask = batch[0].to(device), batch[1].to(device), \ batch[2].to(device), batch[3].to(device), \ batch[4].to(device) optimizer.zero_grad() students_output = textcnn(cnn_ids) teacher_output = bert(input_ids, token_type_ids, attention_mask) loss = loss_fn_kd(students_output, labels, teacher_output, config.T, config.alpha) loss.backward() optimizer.step() # 打印信息 if i % 100 == 0: labels = labels.data.cpu().numpy() preds = torch.argmax(students_output, dim=1) preds = preds.data.cpu().numpy() acc = np.sum(preds == labels) * 1. / len(preds) logger.info( "TRAIN: epoch: {} step: {} acc: {} loss: {} ".format( epoch + 1, i, acc, loss.item())) acc, table = dev(textcnn, dev_loader, config) logger.info("DEV: acc: {} ".format(acc)) logger.info("DEV classification report: \n{}".format(table)) if acc > best_acc: torch.save(textcnn.state_dict(), config.model_path) best_acc = acc logger.info("start testing ......") test_loader = DataLoader(KDdataset(config.base_config.test_data_path), batch_size=config.batch_size, shuffle=False) best_model = TextCNN(config.textcnn_config) best_model.load_state_dict(torch.load(config.model_path)) acc, table = dev(best_model, test_loader, config) logger.info("TEST acc: {}".format(acc)) logger.info("TEST classification report:\n{}".format(table))
from bert import Bert bert = Bert() verb, probability = bert.getBestPredicateAndProbability("She", "the ball") print("'" + verb + "'" + " " + str(probability)) item, probability = bert.combineTo("iron", "hammer") print("'" + item + "'" + " " + str(probability))
def __init__(self, model, options, vocab, nnvecs=1): self.word_counts, words, chars, pos, cpos, rels, treebanks, langs = vocab self.model = model self.nnvecs = nnvecs self.device = 'cuda' if options.enable_gpu else '' # Load ELMo if the option is set if options.elmo is not None: from elmo import ELMo self.elmo = ELMo(options.elmo, options.elmo_gamma, options.elmo_learn_gamma) self.elmo.init_weights(model) else: self.elmo = None # Load Albert if the option is set if options.albert: from albert import Albert self.albert = Albert( pretrained_model=options.albert_pretrained_model) else: self.albert = None # Load Bert if the option is set if options.bert: from bert import Bert self.bert = Bert(options.bert_pretrained_model, options.bert_pretrained_config, options.bert_tokenizer) else: self.bert = None extra_words = 2 # MLP padding vector and OOV vector self.words = {word: ind for ind, word in enumerate(words, extra_words)} self.word_lookup = self.model.add_lookup_parameters( (len(self.words) + extra_words, options.word_emb_size)) extra_pos = 2 # MLP padding vector and OOV vector self.pos = {pos: ind for ind, pos in enumerate(cpos, extra_pos)} self.pos_lookup = self.model.add_lookup_parameters( (len(cpos) + extra_pos, options.pos_emb_size)) self.irels = rels self.rels = {rel: ind for ind, rel in enumerate(rels)} extra_chars = 1 # OOV vector self.chars = {char: ind for ind, char in enumerate(chars, extra_chars)} self.char_lookup = self.model.add_lookup_parameters( (len(chars) + extra_chars, options.char_emb_size)) extra_treebanks = 1 # Padding vector self.treebanks = { treebank: ind for ind, treebank in enumerate(treebanks, extra_treebanks) } self.treebank_lookup = self.model.add_lookup_parameters( (len(treebanks) + extra_treebanks, options.tbank_emb_size)) # initialise word vectors with external embeddings where they exist # This part got ugly - TODO: refactor if not options.predict: self.external_embedding = defaultdict(lambda: {}) if options.ext_word_emb_file and options.word_emb_size > 0: # Load pre-trained word embeddings for lang in langs: embeddings = utils.get_external_embeddings( options, emb_file=options.ext_word_emb_file, lang=lang, words=self.words.keys()) self.external_embedding["words"].update(embeddings) if options.ext_char_emb_file and options.char_emb_size > 0: # Load pre-trained character embeddings for lang in langs: embeddings = utils.get_external_embeddings( options, emb_file=options.ext_char_emb_file, lang=lang, words=self.chars, chars=True) self.external_embedding["chars"].update(embeddings) if options.ext_emb_dir: # For every language, load the data for the word and character # embeddings from a directory. for lang in langs: if options.word_emb_size > 0: embeddings = utils.get_external_embeddings( options, emb_dir=options.ext_emb_dir, lang=lang, words=self.words.keys()) self.external_embedding["words"].update(embeddings) if options.char_emb_size > 0: embeddings = utils.get_external_embeddings( options, emb_dir=options.ext_emb_dir, lang=lang, words=self.chars, chars=True) self.external_embedding["chars"].update(embeddings) self.init_lookups(options) # Sartiano elmo_emb_size = self.elmo.emb_dim if self.elmo else 0 albert_emb_size = self.albert.emb_dim if self.albert else 0 bert_emb_size = self.bert.emb_dim if self.bert else 0 self.pretrained_embeddings_size = (elmo_emb_size + albert_emb_size + bert_emb_size) self.lstm_input_size = ( options.word_emb_size + self.pretrained_embeddings_size + options.pos_emb_size + options.tbank_emb_size + 2 * (options.char_lstm_output_size if options.char_emb_size > 0 else 0) ) print("Word-level LSTM input size: " + str(self.lstm_input_size), file=sys.stderr) self.bilstms = [] if options.no_bilstms > 0: self.bilstms.append( BiLSTM(self.lstm_input_size, options.lstm_output_size, self.model, dropout_rate=0.33)) for i in range(1, options.no_bilstms): self.bilstms.append( BiLSTM(2 * options.lstm_output_size, options.lstm_output_size, self.model, dropout_rate=0.33)) #used in the PaddingVec self.word2lstm = self.model.add_parameters( (options.lstm_output_size * 2, self.lstm_input_size)) self.word2lstmbias = self.model.add_parameters( (options.lstm_output_size * 2)) else: self.word2lstm = self.model.add_parameters( (self.lstm_input_size, self.lstm_input_size)) self.word2lstmbias = self.model.add_parameters( (self.lstm_input_size)) self.char_bilstm = BiLSTM(options.char_emb_size, options.char_lstm_output_size, self.model, dropout_rate=0.33) self.charPadding = self.model.add_parameters( (options.char_lstm_output_size * 2))
class FeatureExtractor(object): def __init__(self, model, options, vocab, nnvecs=1): self.word_counts, words, chars, pos, cpos, rels, treebanks, langs = vocab self.model = model self.nnvecs = nnvecs self.device = 'cuda' if options.enable_gpu else '' # Load ELMo if the option is set if options.elmo is not None: from elmo import ELMo self.elmo = ELMo(options.elmo, options.elmo_gamma, options.elmo_learn_gamma) self.elmo.init_weights(model) else: self.elmo = None # Load Albert if the option is set if options.albert: from albert import Albert self.albert = Albert( pretrained_model=options.albert_pretrained_model) else: self.albert = None # Load Bert if the option is set if options.bert: from bert import Bert self.bert = Bert(options.bert_pretrained_model, options.bert_pretrained_config, options.bert_tokenizer) else: self.bert = None extra_words = 2 # MLP padding vector and OOV vector self.words = {word: ind for ind, word in enumerate(words, extra_words)} self.word_lookup = self.model.add_lookup_parameters( (len(self.words) + extra_words, options.word_emb_size)) extra_pos = 2 # MLP padding vector and OOV vector self.pos = {pos: ind for ind, pos in enumerate(cpos, extra_pos)} self.pos_lookup = self.model.add_lookup_parameters( (len(cpos) + extra_pos, options.pos_emb_size)) self.irels = rels self.rels = {rel: ind for ind, rel in enumerate(rels)} extra_chars = 1 # OOV vector self.chars = {char: ind for ind, char in enumerate(chars, extra_chars)} self.char_lookup = self.model.add_lookup_parameters( (len(chars) + extra_chars, options.char_emb_size)) extra_treebanks = 1 # Padding vector self.treebanks = { treebank: ind for ind, treebank in enumerate(treebanks, extra_treebanks) } self.treebank_lookup = self.model.add_lookup_parameters( (len(treebanks) + extra_treebanks, options.tbank_emb_size)) # initialise word vectors with external embeddings where they exist # This part got ugly - TODO: refactor if not options.predict: self.external_embedding = defaultdict(lambda: {}) if options.ext_word_emb_file and options.word_emb_size > 0: # Load pre-trained word embeddings for lang in langs: embeddings = utils.get_external_embeddings( options, emb_file=options.ext_word_emb_file, lang=lang, words=self.words.keys()) self.external_embedding["words"].update(embeddings) if options.ext_char_emb_file and options.char_emb_size > 0: # Load pre-trained character embeddings for lang in langs: embeddings = utils.get_external_embeddings( options, emb_file=options.ext_char_emb_file, lang=lang, words=self.chars, chars=True) self.external_embedding["chars"].update(embeddings) if options.ext_emb_dir: # For every language, load the data for the word and character # embeddings from a directory. for lang in langs: if options.word_emb_size > 0: embeddings = utils.get_external_embeddings( options, emb_dir=options.ext_emb_dir, lang=lang, words=self.words.keys()) self.external_embedding["words"].update(embeddings) if options.char_emb_size > 0: embeddings = utils.get_external_embeddings( options, emb_dir=options.ext_emb_dir, lang=lang, words=self.chars, chars=True) self.external_embedding["chars"].update(embeddings) self.init_lookups(options) # Sartiano elmo_emb_size = self.elmo.emb_dim if self.elmo else 0 albert_emb_size = self.albert.emb_dim if self.albert else 0 bert_emb_size = self.bert.emb_dim if self.bert else 0 self.pretrained_embeddings_size = (elmo_emb_size + albert_emb_size + bert_emb_size) self.lstm_input_size = ( options.word_emb_size + self.pretrained_embeddings_size + options.pos_emb_size + options.tbank_emb_size + 2 * (options.char_lstm_output_size if options.char_emb_size > 0 else 0) ) print("Word-level LSTM input size: " + str(self.lstm_input_size), file=sys.stderr) self.bilstms = [] if options.no_bilstms > 0: self.bilstms.append( BiLSTM(self.lstm_input_size, options.lstm_output_size, self.model, dropout_rate=0.33)) for i in range(1, options.no_bilstms): self.bilstms.append( BiLSTM(2 * options.lstm_output_size, options.lstm_output_size, self.model, dropout_rate=0.33)) #used in the PaddingVec self.word2lstm = self.model.add_parameters( (options.lstm_output_size * 2, self.lstm_input_size)) self.word2lstmbias = self.model.add_parameters( (options.lstm_output_size * 2)) else: self.word2lstm = self.model.add_parameters( (self.lstm_input_size, self.lstm_input_size)) self.word2lstmbias = self.model.add_parameters( (self.lstm_input_size)) self.char_bilstm = BiLSTM(options.char_emb_size, options.char_lstm_output_size, self.model, dropout_rate=0.33) self.charPadding = self.model.add_parameters( (options.char_lstm_output_size * 2)) def Init(self, options): paddingWordVec = self.word_lookup[ 1] if options.word_emb_size > 0 else None paddingElmoVec = dy.inputTensor(np.zeros( (self.elmo.emb_dim, 1)), self.device) if self.elmo else None paddingAlbertVec = dy.inputTensor(np.zeros( (self.albert.emb_dim, 1)), self.device) if self.albert else None paddingBertVec = dy.inputTensor(np.zeros( (self.bert.emb_dim, 1)), self.device) if self.bert else None paddingPosVec = self.pos_lookup[1] if options.pos_emb_size > 0 else None paddingCharVec = self.charPadding.expr( ) if options.char_emb_size > 0 else None paddingTbankVec = self.treebank_lookup[ 0] if options.tbank_emb_size > 0 else None paddings = dy.concatenate( list( filter(None, [ paddingWordVec, paddingElmoVec, paddingAlbertVec, paddingBertVec, paddingPosVec, paddingCharVec, paddingTbankVec ]))) self.paddingVec = dy.tanh(self.word2lstm.expr() *\ paddings + self.word2lstmbias.expr()) self.empty = self.paddingVec if self.nnvecs == 1 else\ dy.concatenate([self.paddingVec for _ in range(self.nnvecs)]) def getWordEmbeddings(self, sentence, train, options, test_embeddings=defaultdict(lambda: {})): """ Fills root.vec of tokens in :param sentence: with corresponding embedding. :param train: boolean whether training or predicting. :return: a Sentence object representing the sentence. """ sentence_representation = None if self.elmo: # Get full text of sentence - excluding root, which is loaded differently # for transition and graph-based parsers. if options.graph_based: sentence_text = " ".join( [entry.form for entry in sentence[1:]]) else: sentence_text = " ".join( [entry.form for entry in sentence[:-1]]) sentence_representation = \ self.elmo.get_sentence_representation(sentence_text) if self.albert: # Get full text of sentence - excluding root, which is loaded differently # for transition and graph-based parsers. if options.graph_based: sentence_text = " ".join( [entry.form for entry in sentence[1:]]) else: sentence_text = " ".join( [entry.form for entry in sentence[:-1]]) sentence_representation = \ self.albert.get_sentence_representation(sentence_text) if self.bert: # Get full text of sentence - excluding root, which is loaded differently # for transition and graph-based parsers. if options.graph_based: sentence_text = " ".join( [entry.form for entry in sentence[1:]]) else: sentence_text = " ".join( [entry.form for entry in sentence[:-1]]) sentence_representation = \ self.bert.get_sentence_representation(sentence_text) for i, root in enumerate(sentence): root.vecs = defaultdict( lambda: None ) # all vecs are None by default (possibly a little risky?) if options.word_emb_size > 0: if train: word_count = float(self.word_counts.get(root.norm, 0)) dropFlag = random.random() > word_count / (0.25 + word_count) root.vecs["word"] = self.word_lookup[ self.words.get(root.norm, 0) if not dropFlag else 0] else: # need to check in test_embeddings at prediction time if root.norm in self.words: root.vecs["word"] = self.word_lookup[self.words[ root.norm]] elif root.norm in test_embeddings["words"]: root.vecs["word"] = dy.inputVector( test_embeddings["words"][root.norm], self.device) else: root.vecs["word"] = self.word_lookup[0] if options.pos_emb_size > 0: root.vecs["pos"] = self.pos_lookup[self.pos.get(root.cpos, 0)] if options.char_emb_size > 0: root.vecs["char"] = self.get_char_vector( root, train, test_embeddings["chars"]) if options.tbank_emb_size > 0: if options.forced_tbank_emb: treebank_id = options.forced_tbank_emb elif root.proxy_tbank: treebank_id = root.proxy_tbank else: treebank_id = root.treebank_id # this is a bit of a hack for models trained on an old version of the code # that used treebank name rather than id as the lookup if not treebank_id in self.treebanks and treebank_id in utils.reverse_iso_dict and \ utils.reverse_iso_dict[treebank_id] in self.treebanks: treebank_id = utils.reverse_iso_dict[treebank_id] if treebank_id is not None: root.vecs["treebank"] = self.treebank_lookup[ self.treebanks[treebank_id]] # lookahead # self.pretrained_embeddings = '' if self.elmo: # lookahead # self.pretrained_embeddings = 'elmo' if i < len(sentence) - 1: # Don't look up the 'root' word root.vecs["elmo"] = sentence_representation[i] else: # TODO root.vecs["elmo"] = dy.inputTensor( np.zeros((self.elmo.emb_dim, 1)), self.device) if self.albert: # lookahead # self.pretrained_embeddings = 'albert' if i < len(sentence) - 1: # Don't look up the 'root' word root.vecs["albert"] = sentence_representation[i] else: # TODO root.vecs["albert"] = dy.inputTensor( np.zeros((self.albert.emb_dim, 1)), self.device) if self.bert: # lookahead # self.pretrained_embeddings = 'bert' if i < len(sentence) - 1: # Don't look up the 'root' word root.vecs["bert"] = sentence_representation[i] else: # TODO # dy.zeros() doesn't have a device='cuda' parameter #root.vecs["bert"] = dy.zeros(self.bert.emb_dim) root.vecs["bert"] = dy.inputTensor( np.zeros((self.bert.emb_dim, 1)), self.device) root.vec = dy.concatenate( list( filter(None, [ root.vecs["word"], root.vecs["elmo"], root.vecs["albert"], root.vecs["bert"], root.vecs["pos"], root.vecs["char"], root.vecs["treebank"] ]))) for bilstm in self.bilstms: bilstm.set_token_vecs(sentence, train) return sentence_representation def get_char_vector(self, root, train, test_embeddings_chars={}): if root.char_rep == "*root*": # no point running a character analysis over this placeholder token return self.charPadding.expr( ) # use the padding vector if it's the root token else: char_vecs = [] for char in root.char_rep: if char in self.chars: char_vecs.append(self.char_lookup[self.chars[char]]) elif char in test_embeddings_chars: char_vecs.append( dy.inputVector(test_embeddings_chars[char], self.device)) else: char_vecs.append(self.char_lookup[0]) return self.char_bilstm.get_sequence_vector(char_vecs, train) def init_lookups(self, options): if self.external_embedding["words"]: print('Initialising %i word vectors with external embeddings' % len(self.external_embedding["words"]), file=sys.stderr) for word in self.external_embedding["words"]: if len(self.external_embedding["words"] [word]) != options.word_emb_size: raise Exception( "Size of external embedding does not match specified word embedding size of %s" % (options.word_emb_size)) self.word_lookup.init_row( self.words[word], self.external_embedding["words"][word]) elif options.word_emb_size > 0: print( 'No word external embeddings found: all vectors initialised randomly', file=sys.stderr) if self.external_embedding["chars"]: print('Initialising %i char vectors with external embeddings' % len(self.external_embedding["chars"]), file=sys.stderr) for char in self.external_embedding["chars"]: if len(self.external_embedding["chars"] [char]) != options.char_emb_size: raise Exception( "Size of external embedding does not match specified char embedding size of %s" % (options.char_emb_size)) self.char_lookup.init_row( self.chars[char], self.external_embedding["chars"][char]) elif options.char_emb_size > 0: print( 'No character external embeddings found: all vectors initialised randomly', file=sys.stderr)
h = 8 self_attn = MultiHeadedAttention(h=h, d_model=d_model, d_k=d_model // h, d_v=d_model // h, dropout=0.) feed_forward = FullyConnectedFeedForward(d_model=d_model, d_ff=1024) embedding = Embeddings(d_model=d_model, vocab=V) encoder = Encoder(self_attn=self_attn, feed_forward=feed_forward, size=d_model, dropout=0.) generator = Generator(d_model=d_model, vocab_size=V) model = Bert(encoder=encoder, embedding=embedding, generator=generator, n_layers=4) data_iter = create_batch(30, 5) for i, batch in enumerate(data_iter): x = embedding(batch.src) y = self_attn(x, x, x, batch.src_mask) masked_src = batch.src.masked_fill( batch.src_mask.squeeze(-2) == 0, mask_token) x2 = embedding(masked_src) y2 = self_attn(x2, x2, x2, batch.src_mask) print(y)
from bert import Bert from chatbot import Chatbot from controller import Controller from flask import Flask, request, abort, jsonify, render_template from elasticsearch import Elasticsearch from elasticsearch_dsl import MultiSearch, Search from spacy.lang.en.stop_words import STOP_WORDS from datetime import datetime app = Flask(__name__) # Load all environment variables dotenv_path = join(dirname(__file__), '.env') load_dotenv(dotenv_path) bert = Bert() chatbot = Chatbot(os.getenv('ChatbotModelName'), os.getenv('ChatbotDataFile'), int(os.getenv('ChatbotNbIterations'))) controller = Controller() nlp = spacy.load('en_core_web_lg') neuralcoref.add_to_pipe(nlp) ES_HOST = os.getenv('Host') ES_PORT = os.getenv('Port') ES_INDEX = os.getenv('Index') sessions = [] @app.route('/', methods=['POST'])