def __init__(self, data_path: Optional[str] = None, x_col_name: Optional[str] = None, y_col_name: Optional[str] = None, save_load_path: Optional[str] = './similarity_matching', edit_dict: Optional[dict] = None, train: bool = True): model_config = read_json(configs.faq.tfidf_autofaq) if x_col_name is not None: model_config['dataset_reader']['x_col_name'] = x_col_name if y_col_name is not None: model_config['dataset_reader']['y_col_name'] = y_col_name model_config['metadata']['variables']['ROOT_PATH'] = save_load_path if data_path is not None: if expand_path(data_path).exists(): if 'data_url' in model_config['dataset_reader']: del model_config['dataset_reader']['data_url'] model_config['dataset_reader']['data_path'] = data_path else: if 'data_path' in model_config['dataset_reader']: del model_config['dataset_reader']['data_path'] model_config['dataset_reader']['data_url'] = data_path if edit_dict is not None: update_dict_recursive(model_config, edit_dict) if train: self.model = train_model(model_config) log.info('Your model was saved at: \'' + save_load_path + '\'') else: self.model = build_model(model_config)
def __init__(self, data_path: Optional[str] = None, config_type: Optional[str] = 'tfidf_autofaq', x_col_name: Optional[str] = 'Question', y_col_name: Optional[str] = 'Answer', save_load_path: Optional[str] = './similarity_matching', edit_dict: Optional[dict] = None, train: Optional[bool] = True): if config_type not in configs.faq: raise ValueError("There is no config named '{0}'. Possible options are: {1}" .format(config_type, ", ".join(configs.faq.keys()))) model_config = read_json(configs.faq[config_type]) if x_col_name is not None: model_config['dataset_reader']['x_col_name'] = x_col_name if y_col_name is not None: model_config['dataset_reader']['y_col_name'] = y_col_name model_config['metadata']['variables']['MODELS_PATH'] = save_load_path if data_path is not None: if expand_path(data_path).exists(): if 'data_url' in model_config['dataset_reader']: del model_config['dataset_reader']['data_url'] model_config['dataset_reader']['data_path'] = data_path else: if 'data_path' in model_config['dataset_reader']: del model_config['dataset_reader']['data_path'] model_config['dataset_reader']['data_url'] = data_path if edit_dict is not None: update_dict_recursive(model_config, edit_dict) if train: self.model = train_model(model_config, download=True) log.info('Your model was saved at: \'' + save_load_path + '\'') else: self.model = build_model(model_config, download=False)
def train(data='wiki_train.csv', save_path='model_config.json'): os.environ["CUDA_VISIBLE_DEVICES"] = "3" model_config = read_json(configs.faq.tfidf_logreg_en_faq) model_config['dataset_reader']['data_path'] = data model_config['dataset_reader']['data_url'] = None model = train_model(model_config) save(model_config, save_path)
def fit_model(self): self.model_config = read_json( configs.doc_retrieval.ru_ranker_tfidf_wiki) self.model_config["dataset_reader"]["data_path"] = os.path.abspath( os.getcwd()) + "/Resourses" self.model_config["dataset_reader"]["dataset_format"] = "txt" self.model_config["train"]["batch_size"] = 100 print("work!") self.doc_retrieval = train_model(self.model_config) self.squad = build_model(configs.squad.squad_ru_rubert_infer, download=True) self.odqa = build_model(configs.odqa.ru_odqa_infer_wiki_rubert, download=False)
def train_custom_model(data_path, x_col_name, y_col_name, save_load_path): model_config = read_json("./configs/tfidf_logreg_en_faq.json") model_config['dataset_reader']['x_col_name'] = x_col_name model_config['dataset_reader']['y_col_name'] = y_col_name model_config["dataset_reader"]["data_path"] = data_path model_config["dataset_reader"]["data_url"] = None model_config['metadata']['variables']['MODELS_PATH'] = save_load_path if data_path in model_config['dataset_reader']: del model_config['dataset_reader']['data_url'] model_config['dataset_reader']['data_path'] = data_path custom_model = train_model(model_config) return custom_model
def __init__(self, config_path: str = "src/core/configs/tfidf_logreg_autofaq.json", data_path: str = None, train: bool = True, fallback: FallbackMessage = FallbackMessage(0.7)): self._fallback = fallback stop_words = stopwords.words('russian') stop_words.extend(['что', 'это', 'так', 'вот', 'быть', 'как', 'в', '—', '–', 'к', 'на', '...']) model_config = read_json(config_path) if data_path: model_config["dataset_reader"]["data_path"] = data_path model_config["dataset_reader"]["data_url"] = None if train: model_config["chainer"]["pipe"][4]["warm_start"] = True model_config["chainer"]["pipe"][0]["stopwords"] = stop_words self._faq = train_model(model_config) else: self._faq = build_model(model_config)
def new_question_answer(self, question, answer): '''Adds a new question-answer pair.\n INPUT:\n - question\n - answer\n The new question-answer pair is stored in the path *self.data['faq']['path']* and the models in *qa_models['faq']* get re-trained by calling the function `deeppavlaov.train_model` ''' _faq = self.data['faq'] new_faq = pd.DataFrame({'Question': [question], 'Answer': [answer]}) _faq['df'] = _faq['df'].append(new_faq) _faq['df'].to_csv(_faq['path'], index=False) self.qa_models['faq']['tfidf'] = deeppavlov.train_model(_faq['config'], download=False) self.question, self.answer = question, answer logging.info('FAQ dataset and model updated..')
import tensorflow as tf config = tf.ConfigProto() config.gpu_options.allow_growth = True session = tf.Session(config=config) from deeppavlov import configs from deeppavlov.core.common.file import read_json from deeppavlov import train_model from deeppavlov.core.commands.infer import build_model model_config = read_json('en_ranker_tfidf_data.json') ranker = train_model(model_config) print('==========RANKER======', ranker(['accidents'])) # Download all the SQuAD models # squad = build_model(configs.squad.multi_squad_noans_infer, download = True) # Do not download the ODQA models, we've just trained it odqa = build_model('en_odqa_infer_data.json', download=False) val_q = "Do you like people?" answer1 = odqa([val_q]) # provide answer based on trained data print(answer1)
from django.http import HttpResponse from django.http import JsonResponse from django.core import serializers import json import uuid from uuid import UUID import pysolr from deeppavlov import configs, train_model from deeppavlov.core.common.file import read_json model_config = read_json(configs.faq.tfidf_logreg_en_faq) model_config["dataset_reader"][ "data_path"] = "C:/Users/DELL/Downloads/faq_school_en.csv" model_config["dataset_reader"]["data_url"] = None faq = train_model(model_config) def index(request): # return HttpResponse('Hello World!') return render(request, 'index.html') def query(request): print("entered query mode") myDict = dict(request.GET) a = faq(myDict.get('query')) print(a[0][0]) dict1 = json.dumps({'query': a[0][0]}) return JsonResponse(dict1, safe=False)
def train(): faq = train_model("../data/faq.json")
def train(): train_model(NER.config, download=True) NER.ner_model = build_model(NER.config, download=True)
def train_model(self, model_name: str): if self.__model_is_exist(model_name): train_model(self.file_util.get_config_model_path(model_name)) else: raise ModelNotFoundException("model {} not found".format(model_name))
import os from deeppavlov.core.commands.utils import parse_config, expand_path from deeppavlov import train_model CONFIG_NAME = os.environ.get("CONFIG_NAME", None) parsed = parse_config(CONFIG_NAME) if expand_path(parsed["metadata"]["variables"]["MODEL_PATH"]).exists(): # model folder exists, so it is already trained print("Model is already trained.") else: print("Model is NOT trained.\nLet's train the model!\n\n") model = train_model(CONFIG_NAME) print("Model is trained.")
'{DEEPPAVLOV_PATH}/requirements/en_core_web_sm.txt' ], 'labels': { 'telegram_utils': 'GoalOrientedBot', 'server_utils': 'GoalOrientedBot' }, 'download': [{ 'url': 'http://files.deeppavlov.ai/datasets/dstc2_v2.tar.gz', 'subdir': '{DOWNLOADS_PATH}/dstc2' }] } } if __name__ == '__main__': time_start = time() logger = logging.getLogger(__name__) logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO) basic_bot = train_model(basic_config, download=True) for question in [ 'hello', 'I want some chinese food', 'on the south side?', 'i want cheap food in chinese restaurant in the south of town', 'bye' ]: answer = basic_bot([question]) logger.info('Q: {} A: {}'.format(question, answer)) logger.info('total time: {:5.2f}s'.format(time() - time_start))
import json from deeppavlov import configs, build_model, train_model data = pd.read_csv('pristavki.csv', header=None, names=['text']) with configs.ner.ner_ontonotes_bert_mult.open(encoding='utf8') as f: ner_config = json.load(f) ner_config['dataset_reader'][ 'data_path'] = './' # directory with train.txt, valid.txt and test.txt files ner_config['metadata']['variables']['NER_PATH'] = './' ner_config['metadata']['download'] = [ ner_config['metadata']['download'][-1] ] # do not download the pretrained ontonotes model ner_model = train_model(ner_config, download=True) # ner_model(['Playstation 4', 'Xbox 360 продам', 'Продам PS 3']) # # marked = [] # # for text in data.text.values[:1000]: # # BERT имеет лимит на длину текста в 512 слов, возьмем даже еще меньше # if len(text.split()) > 100: # continue # pred = ner_model([text]) # sent, tags = pred[0][0], pred[1][0] # # # достанем только тексты с сущностями # if len(set(tags[0])) > 1: # marked.append(list(zip(sent, tags)))
}], "out": ["y_pred_labels"] }, "train": { "epochs": 10, "batch_size": 64, "metrics": [ "sets_accuracy", "f1_macro", { "name": "roc_auc", "inputs": ["y_onehot", "y_pred_probas"] } ], "validation_patience": 5, "val_every_n_epochs": 1, "log_every_n_epochs": 1, "show_examples": True, "validate_best": True, "test_best": False } } m = train_model(cnn_config)
from flask import Flask, render_template, request from deeppavlov import configs, train_model from deeppavlov.core.common.file import read_json app = Flask(__name__) model_config = read_json(configs.faq.tfidf_logreg_en_faq) model_config["dataset_reader"][ "data_url"] = "https://raw.githubusercontent.com/harrislam1/covid19_chatbot/master/cdc_covid19_faq.csv" bot = train_model(model_config) @app.route("/") def home(): return render_template("index.html") @app.route("/get") def get_bot_response(): userText = request.args.get('msg') return str(bot([userText])[0][0]) if __name__ == "__main__": app.run()
"load_path": "assistant_bot/small.txt" } gobot_config['chainer']['pipe'][-1]['nlg_manager']['template_path'] = 'assistant_data/assistant-templates.txt' gobot_config['chainer']['pipe'][-1]['nlg_manager']['api_call_action'] = None gobot_config['dataset_reader']['class_name'] = '__main__:AssistantDatasetReader' gobot_config['metadata']['variables']['DATA_PATH'] = 'assistant_data' gobot_config['metadata']['variables']['MODEL_PATH'] = 'assistant_bot' gobot_config['train']['batch_size'] = 4 # set batch size gobot_config['train']['max_batches'] = 30 # maximum number of training batches gobot_config['train']['val_every_n_batches'] = 30 # evaluate on full 'valid' split every 30 epochs gobot_config['train']['log_every_n_batches'] = 5 # evaluate on full 'train' split every 5 batches train_model(gobot_config) bot_model = build_model(gobot_config) def get_answer(messege): print(bot_model([[{"text": messege}]])) if __name__ == '__main__': text = get_answer("золотые") # from deeppavlov.utils.telegram import interact_model_by_telegram # interact_model_by_telegram(model_config=gobot_config, token='1153548935:AAFIZkbBaYKjzlpum6wVM6oTHviL4VYlPY8')
from deeppavlov import train_model from deeppavlov.deprecated.agents.default_agent import DefaultAgent from deeppavlov.deprecated.agents.processors import HighestConfidenceSelector from deeppavlov.deprecated.skills.pattern_matching_skill import PatternMatchingSkill from telebot.types import Update from telegram.ext import Updater, CallbackContext from Application.controls.misc.user_commands import register, indicates, account from Application.models.models import * # Файл содержит функции колбэка для принимаемых ботом команд. # Дополнительный функционал (не вызываемый командами) вынесен # в папку ./misc QAModel = train_model( os.path.dirname(sys.argv[0]) + "/Application/config.json") # Путь для конфига фиксирован (hardcode), можно воспользоваться переменной окружения agent = DefaultAgent([QAModel], skills_selector=HighestConfidenceSelector()) # Агент чисто для удобства, можно от него избавиться, если не использовать скиллы def send_help(update: Update, context: CallbackContext): """ В случае необходимости (при отладке) можно изменить функцию добавив ассоциативный массив доступных команд. (удобное решение) """ context.bot.send_message(chat_id=update.effective_chat.id, text="Доступные команды:" "\n - /help для вызова списка команд." "\n - /indicates для подачи показаний счётчика." "\n - /register для регистрации."
from deeppavlov import configs, build_model, train_model from deeppavlov.core.commands.utils import parse_config config_dict = parse_config(configs.ner.ner_ontonotes_bert_mult) reader = config_dict['dataset_reader'] print(config_dict['dataset_reader']['data_path']) ner_model = train_model(configs.ner.ner_ontonotes_bert_mult, download=False)
from deeppavlov import train_model ner_model = train_model('ner.json', download=False)
def Train(self): self.chatbotModel = train_model(config, download=True) self.chatbotModel.save() # Load the bert database self.bertInsultsModel = build_model(configs.classifiers.insults_kaggle_bert, download=True)
################# Universal Import ################################################### import sys import os SELF_DIR = os.path.dirname(os.path.abspath(__file__)) ROOT_DIR = os.path.dirname(os.path.dirname(SELF_DIR)) print(ROOT_DIR) sys.path.append(ROOT_DIR) os.environ.setdefault("DJANGO_SETTINGS_MODULE", "ruler_bot.settings") # ##################################################### import django django.setup() from deeppavlov import configs, train_model from deeppavlov import build_model if __name__ == "__main__": #ner_model = train_model(configs.ner.ner_few_shot_ru, download=True) #ner_model = train_model("/home/alx/Cloud/DeepPavlov/dj_bot/ruler_bot/translator_skill/translator_intents_receptor/translator_intents_dataset/translator_intents_receptor_config.json") # path_to_config = "/home/alx/Cloud/DeepPavlov/dj_bot/ruler_bot/translator_skill/translator_intents_receptor/translator_intents_dataset/translator_translation_intents_receptor_config.json" path_to_config = "/home/alx/Workspace/dj_bot/ruler_bot/translator_skill/translator_intents_receptor/translator_intents_dataset/translator_translation_intents_receptor_config.json" ner_model = train_model(path_to_config) import ipdb ipdb.set_trace() print(ner_model(['Переведи на испанский слово мальчик'])) ner_model.save() def pa(): ner_model = build_model(path_to_config)
def __init__(self, model_config): model_config = read_json(model_config) train_model(model_config) build_model(configs.squad.multi_squad_noans_infer, download=True)
time_start = time() logger = logging.getLogger(__name__) logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO) logger.info('started') case = 'plos' # 'arxiv' cases = { 'arxiv': './SentenceCorpus/unlabeled_articles/arxiv_unlabeled', 'plos': './SentenceCorpus/unlabeled_articles/plos_unlabeled', } model_config = read_json(configs.doc_retrieval.en_ranker_tfidf_wiki) model_config['dataset_reader']['data_path'] = cases[case] model_config['dataset_reader']['dataset_format'] = 'txt' doc_retrieval = train_model(model_config) logger.info(doc_retrieval(['cerebellum'])) do_squad = False # Download all the SQuAD models if do_squad: squad = build_model(configs.squad.multi_squad_noans_infer, download=True) # Do not download the ODQA models, we've just trained it model = build_model(configs.odqa.en_odqa_infer_wiki, download=False) plos_questions = sorted([ 'What is rubella?', 'What is whooping cough?', 'What are yaws?',