def __init__(self,
                 data_path: Optional[str] = None,
                 x_col_name: Optional[str] = None,
                 y_col_name: Optional[str] = None,
                 save_load_path: Optional[str] = './similarity_matching',
                 edit_dict: Optional[dict] = None,
                 train: bool = True):

        model_config = read_json(configs.faq.tfidf_autofaq)
        if x_col_name is not None:
            model_config['dataset_reader']['x_col_name'] = x_col_name
        if y_col_name is not None:
            model_config['dataset_reader']['y_col_name'] = y_col_name

        model_config['metadata']['variables']['ROOT_PATH'] = save_load_path

        if data_path is not None:
            if expand_path(data_path).exists():
                if 'data_url' in model_config['dataset_reader']:
                    del model_config['dataset_reader']['data_url']
                model_config['dataset_reader']['data_path'] = data_path
            else:
                if 'data_path' in model_config['dataset_reader']:
                    del model_config['dataset_reader']['data_path']
                model_config['dataset_reader']['data_url'] = data_path

        if edit_dict is not None:
            update_dict_recursive(model_config, edit_dict)

        if train:
            self.model = train_model(model_config)
            log.info('Your model was saved at: \'' + save_load_path + '\'')
        else:
            self.model = build_model(model_config)
예제 #2
0
    def __init__(self, data_path: Optional[str] = None, config_type: Optional[str] = 'tfidf_autofaq',
                 x_col_name: Optional[str] = 'Question', y_col_name: Optional[str] = 'Answer',
                 save_load_path: Optional[str] = './similarity_matching',
                 edit_dict: Optional[dict] = None, train: Optional[bool] = True):

        if config_type not in configs.faq:
            raise ValueError("There is no config named '{0}'. Possible options are: {1}"
                             .format(config_type, ", ".join(configs.faq.keys())))
        model_config = read_json(configs.faq[config_type])

        if x_col_name is not None:
            model_config['dataset_reader']['x_col_name'] = x_col_name
        if y_col_name is not None:
            model_config['dataset_reader']['y_col_name'] = y_col_name

        model_config['metadata']['variables']['MODELS_PATH'] = save_load_path

        if data_path is not None:
            if expand_path(data_path).exists():
                if 'data_url' in model_config['dataset_reader']:
                    del model_config['dataset_reader']['data_url']
                model_config['dataset_reader']['data_path'] = data_path
            else:
                if 'data_path' in model_config['dataset_reader']:
                    del model_config['dataset_reader']['data_path']
                model_config['dataset_reader']['data_url'] = data_path

        if edit_dict is not None:
            update_dict_recursive(model_config, edit_dict)

        if train:
            self.model = train_model(model_config, download=True)
            log.info('Your model was saved at: \'' + save_load_path + '\'')
        else:
            self.model = build_model(model_config, download=False)
def train(data='wiki_train.csv', save_path='model_config.json'):
    os.environ["CUDA_VISIBLE_DEVICES"] = "3"
    model_config = read_json(configs.faq.tfidf_logreg_en_faq)
    model_config['dataset_reader']['data_path'] = data
    model_config['dataset_reader']['data_url'] = None

    model = train_model(model_config)
    save(model_config, save_path)
예제 #4
0
 def fit_model(self):
     self.model_config = read_json(
         configs.doc_retrieval.ru_ranker_tfidf_wiki)
     self.model_config["dataset_reader"]["data_path"] = os.path.abspath(
         os.getcwd()) + "/Resourses"
     self.model_config["dataset_reader"]["dataset_format"] = "txt"
     self.model_config["train"]["batch_size"] = 100
     print("work!")
     self.doc_retrieval = train_model(self.model_config)
     self.squad = build_model(configs.squad.squad_ru_rubert_infer,
                              download=True)
     self.odqa = build_model(configs.odqa.ru_odqa_infer_wiki_rubert,
                             download=False)
예제 #5
0
def train_custom_model(data_path, x_col_name, y_col_name, save_load_path):
    model_config = read_json("./configs/tfidf_logreg_en_faq.json")
    model_config['dataset_reader']['x_col_name'] = x_col_name
    model_config['dataset_reader']['y_col_name'] = y_col_name
    model_config["dataset_reader"]["data_path"] = data_path
    model_config["dataset_reader"]["data_url"] = None
    model_config['metadata']['variables']['MODELS_PATH'] = save_load_path
    if data_path in model_config['dataset_reader']:
        del model_config['dataset_reader']['data_url']
    model_config['dataset_reader']['data_path'] = data_path

    custom_model = train_model(model_config)
    return custom_model
예제 #6
0
    def __init__(self, config_path: str = "src/core/configs/tfidf_logreg_autofaq.json",
                 data_path: str = None, train: bool = True,
                 fallback: FallbackMessage = FallbackMessage(0.7)):
        self._fallback = fallback

        stop_words = stopwords.words('russian')
        stop_words.extend(['что', 'это', 'так', 'вот', 'быть', 'как', 'в', '—', '–', 'к', 'на', '...'])

        model_config = read_json(config_path)
        if data_path:
            model_config["dataset_reader"]["data_path"] = data_path
            model_config["dataset_reader"]["data_url"] = None
        if train:
            model_config["chainer"]["pipe"][4]["warm_start"] = True
            model_config["chainer"]["pipe"][0]["stopwords"] = stop_words
            self._faq = train_model(model_config)
        else:
            self._faq = build_model(model_config)
    def new_question_answer(self, question, answer):
        '''Adds a new question-answer pair.\n
        INPUT:\n
        - question\n
        - answer\n

        The new question-answer pair is stored in the path *self.data['faq']['path']*
        and the models in *qa_models['faq']* get re-trained by calling the function
        `deeppavlaov.train_model`

        '''
        _faq = self.data['faq']
        new_faq = pd.DataFrame({'Question': [question], 'Answer': [answer]})
        _faq['df'] = _faq['df'].append(new_faq)
        _faq['df'].to_csv(_faq['path'], index=False)
        self.qa_models['faq']['tfidf'] = deeppavlov.train_model(_faq['config'],
                                                                download=False)
        self.question, self.answer = question, answer
        logging.info('FAQ dataset and model updated..')
예제 #8
0
import tensorflow as tf
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
session = tf.Session(config=config)

from deeppavlov import configs
from deeppavlov.core.common.file import read_json
from deeppavlov import train_model
from deeppavlov.core.commands.infer import build_model

model_config = read_json('en_ranker_tfidf_data.json')
ranker = train_model(model_config)

print('==========RANKER======', ranker(['accidents']))

# Download all the SQuAD models
# squad = build_model(configs.squad.multi_squad_noans_infer, download = True)

# Do not download the ODQA models, we've just trained it
odqa = build_model('en_odqa_infer_data.json', download=False)

val_q = "Do you like people?"
answer1 = odqa([val_q])  #  provide answer based on trained data

print(answer1)
예제 #9
0
파일: views.py 프로젝트: bprash/chat
from django.http import HttpResponse
from django.http import JsonResponse
from django.core import serializers
import json
import uuid
from uuid import UUID

import pysolr

from deeppavlov import configs, train_model
from deeppavlov.core.common.file import read_json
model_config = read_json(configs.faq.tfidf_logreg_en_faq)
model_config["dataset_reader"][
    "data_path"] = "C:/Users/DELL/Downloads/faq_school_en.csv"
model_config["dataset_reader"]["data_url"] = None
faq = train_model(model_config)


def index(request):
    # return HttpResponse('Hello World!')
    return render(request, 'index.html')


def query(request):
    print("entered query mode")
    myDict = dict(request.GET)
    a = faq(myDict.get('query'))
    print(a[0][0])
    dict1 = json.dumps({'query': a[0][0]})
    return JsonResponse(dict1, safe=False)
예제 #10
0
def train():
    faq = train_model("../data/faq.json")
 def train():
     train_model(NER.config, download=True)
     NER.ner_model = build_model(NER.config, download=True)
예제 #12
0
파일: faqbot.py 프로젝트: kvadro1/deepfaq
 def train_model(self, model_name: str):
     if self.__model_is_exist(model_name):
         train_model(self.file_util.get_config_model_path(model_name))
     else:
         raise ModelNotFoundException("model {} not found".format(model_name))
예제 #13
0
import os

from deeppavlov.core.commands.utils import parse_config, expand_path
from deeppavlov import train_model

CONFIG_NAME = os.environ.get("CONFIG_NAME", None)
parsed = parse_config(CONFIG_NAME)

if expand_path(parsed["metadata"]["variables"]["MODEL_PATH"]).exists():
    # model folder exists, so it is already trained
    print("Model is already trained.")
else:
    print("Model is NOT trained.\nLet's train the model!\n\n")
    model = train_model(CONFIG_NAME)
    print("Model is trained.")
예제 #14
0
            '{DEEPPAVLOV_PATH}/requirements/en_core_web_sm.txt'
        ],
        'labels': {
            'telegram_utils': 'GoalOrientedBot',
            'server_utils': 'GoalOrientedBot'
        },
        'download': [{
            'url': 'http://files.deeppavlov.ai/datasets/dstc2_v2.tar.gz',
            'subdir': '{DOWNLOADS_PATH}/dstc2'
        }]
    }
}

if __name__ == '__main__':
    time_start = time()
    logger = logging.getLogger(__name__)
    logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s',
                        level=logging.INFO)

    basic_bot = train_model(basic_config, download=True)

    for question in [
            'hello', 'I want some chinese food', 'on the south side?',
            'i want cheap food in chinese restaurant in the south of town',
            'bye'
    ]:
        answer = basic_bot([question])
        logger.info('Q: {} A: {}'.format(question, answer))

    logger.info('total time: {:5.2f}s'.format(time() - time_start))
예제 #15
0
import json
from deeppavlov import configs, build_model, train_model

data = pd.read_csv('pristavki.csv', header=None, names=['text'])

with configs.ner.ner_ontonotes_bert_mult.open(encoding='utf8') as f:
    ner_config = json.load(f)

ner_config['dataset_reader'][
    'data_path'] = './'  # directory with train.txt, valid.txt and test.txt files
ner_config['metadata']['variables']['NER_PATH'] = './'
ner_config['metadata']['download'] = [
    ner_config['metadata']['download'][-1]
]  # do not download the pretrained ontonotes model

ner_model = train_model(ner_config, download=True)

# ner_model(['Playstation 4', 'Xbox 360 продам', 'Продам PS 3'])
#
# marked = []
#
# for text in data.text.values[:1000]:
#     # BERT имеет лимит на длину текста в 512 слов, возьмем даже еще меньше
#     if len(text.split()) > 100:
#         continue
#     pred = ner_model([text])
#     sent, tags = pred[0][0], pred[1][0]
#
#     # достанем только тексты с сущностями
#     if len(set(tags[0])) > 1:
#         marked.append(list(zip(sent, tags)))
예제 #16
0
        }],
        "out": ["y_pred_labels"]
    },
    "train": {
        "epochs":
        10,
        "batch_size":
        64,
        "metrics": [
            "sets_accuracy", "f1_macro", {
                "name": "roc_auc",
                "inputs": ["y_onehot", "y_pred_probas"]
            }
        ],
        "validation_patience":
        5,
        "val_every_n_epochs":
        1,
        "log_every_n_epochs":
        1,
        "show_examples":
        True,
        "validate_best":
        True,
        "test_best":
        False
    }
}

m = train_model(cnn_config)
예제 #17
0
from flask import Flask, render_template, request
from deeppavlov import configs, train_model
from deeppavlov.core.common.file import read_json

app = Flask(__name__)

model_config = read_json(configs.faq.tfidf_logreg_en_faq)
model_config["dataset_reader"][
    "data_url"] = "https://raw.githubusercontent.com/harrislam1/covid19_chatbot/master/cdc_covid19_faq.csv"
bot = train_model(model_config)


@app.route("/")
def home():
    return render_template("index.html")


@app.route("/get")
def get_bot_response():
    userText = request.args.get('msg')
    return str(bot([userText])[0][0])


if __name__ == "__main__":
    app.run()
예제 #18
0
    "load_path": "assistant_bot/small.txt"
}

gobot_config['chainer']['pipe'][-1]['nlg_manager']['template_path'] = 'assistant_data/assistant-templates.txt'
gobot_config['chainer']['pipe'][-1]['nlg_manager']['api_call_action'] = None

gobot_config['dataset_reader']['class_name'] = '__main__:AssistantDatasetReader'
gobot_config['metadata']['variables']['DATA_PATH'] = 'assistant_data'

gobot_config['metadata']['variables']['MODEL_PATH'] = 'assistant_bot'

gobot_config['train']['batch_size'] = 4  # set batch size
gobot_config['train']['max_batches'] = 30  # maximum number of training batches
gobot_config['train']['val_every_n_batches'] = 30  # evaluate on full 'valid' split every 30 epochs
gobot_config['train']['log_every_n_batches'] = 5  # evaluate on full 'train' split every 5 batches

train_model(gobot_config)

bot_model = build_model(gobot_config)

def get_answer(messege):
    print(bot_model([[{"text": messege}]]))

if __name__ == '__main__':

    text = get_answer("золотые")

# from deeppavlov.utils.telegram import interact_model_by_telegram

# interact_model_by_telegram(model_config=gobot_config, token='1153548935:AAFIZkbBaYKjzlpum6wVM6oTHviL4VYlPY8')
예제 #19
0
from deeppavlov import train_model
from deeppavlov.deprecated.agents.default_agent import DefaultAgent
from deeppavlov.deprecated.agents.processors import HighestConfidenceSelector
from deeppavlov.deprecated.skills.pattern_matching_skill import PatternMatchingSkill
from telebot.types import Update
from telegram.ext import Updater, CallbackContext

from Application.controls.misc.user_commands import register, indicates, account
from Application.models.models import *

# Файл содержит функции колбэка для принимаемых ботом команд.
# Дополнительный функционал (не вызываемый командами) вынесен
# в папку ./misc

QAModel = train_model(
    os.path.dirname(sys.argv[0]) + "/Application/config.json")
# Путь для конфига фиксирован (hardcode), можно воспользоваться переменной окружения
agent = DefaultAgent([QAModel], skills_selector=HighestConfidenceSelector())
# Агент чисто для удобства, можно от него избавиться, если не использовать скиллы


def send_help(update: Update, context: CallbackContext):
    """
    В случае необходимости (при отладке) можно изменить функцию
    добавив ассоциативный массив доступных команд. (удобное решение)
    """
    context.bot.send_message(chat_id=update.effective_chat.id,
                             text="Доступные команды:"
                             "\n - /help для вызова списка команд."
                             "\n - /indicates для подачи показаний счётчика."
                             "\n - /register для регистрации."
예제 #20
0
from deeppavlov import configs, build_model, train_model
from deeppavlov.core.commands.utils import parse_config

config_dict = parse_config(configs.ner.ner_ontonotes_bert_mult)
reader = config_dict['dataset_reader']
print(config_dict['dataset_reader']['data_path'])
ner_model = train_model(configs.ner.ner_ontonotes_bert_mult, download=False)
예제 #21
0
from deeppavlov import train_model

ner_model = train_model('ner.json', download=False)
예제 #22
0
    def Train(self):
        self.chatbotModel = train_model(config, download=True)
        self.chatbotModel.save()

        # Load the bert database
        self.bertInsultsModel = build_model(configs.classifiers.insults_kaggle_bert, download=True)
예제 #23
0
################# Universal Import ###################################################
import sys
import os
SELF_DIR = os.path.dirname(os.path.abspath(__file__))
ROOT_DIR = os.path.dirname(os.path.dirname(SELF_DIR))
print(ROOT_DIR)
sys.path.append(ROOT_DIR)
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "ruler_bot.settings")
# #####################################################
import django
django.setup()

from deeppavlov import configs, train_model
from deeppavlov import build_model

if __name__ == "__main__":
    #ner_model = train_model(configs.ner.ner_few_shot_ru, download=True)
    #ner_model = train_model("/home/alx/Cloud/DeepPavlov/dj_bot/ruler_bot/translator_skill/translator_intents_receptor/translator_intents_dataset/translator_intents_receptor_config.json")
    # path_to_config = "/home/alx/Cloud/DeepPavlov/dj_bot/ruler_bot/translator_skill/translator_intents_receptor/translator_intents_dataset/translator_translation_intents_receptor_config.json"
    path_to_config = "/home/alx/Workspace/dj_bot/ruler_bot/translator_skill/translator_intents_receptor/translator_intents_dataset/translator_translation_intents_receptor_config.json"
    ner_model = train_model(path_to_config)
    import ipdb
    ipdb.set_trace()
    print(ner_model(['Переведи на испанский слово мальчик']))
    ner_model.save()


def pa():

    ner_model = build_model(path_to_config)
예제 #24
0
 def __init__(self, model_config):
     model_config = read_json(model_config)
     train_model(model_config)
     build_model(configs.squad.multi_squad_noans_infer, download=True)
예제 #25
0
    time_start = time()
    logger = logging.getLogger(__name__)
    logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s',
                        level=logging.INFO)

    logger.info('started')

    case = 'plos'  # 'arxiv'
    cases = {
        'arxiv': './SentenceCorpus/unlabeled_articles/arxiv_unlabeled',
        'plos': './SentenceCorpus/unlabeled_articles/plos_unlabeled',
    }
    model_config = read_json(configs.doc_retrieval.en_ranker_tfidf_wiki)
    model_config['dataset_reader']['data_path'] = cases[case]
    model_config['dataset_reader']['dataset_format'] = 'txt'
    doc_retrieval = train_model(model_config)

    logger.info(doc_retrieval(['cerebellum']))

    do_squad = False

    # Download all the SQuAD models
    if do_squad:
        squad = build_model(configs.squad.multi_squad_noans_infer,
                            download=True)
    # Do not download the ODQA models, we've just trained it
    model = build_model(configs.odqa.en_odqa_infer_wiki, download=False)
    plos_questions = sorted([
        'What is rubella?',
        'What is whooping cough?',
        'What are yaws?',