def start_model_server(model_config, https=False, ssl_key=None, ssl_cert=None, port=None): server_config_path = get_settings_path() / SERVER_CONFIG_FILENAME server_params = get_server_params(server_config_path, model_config) host = server_params['host'] port = port or server_params['port'] model_endpoint = server_params['model_endpoint'] model_args_names = server_params['model_args_names'] https = https or server_params['https'] if https: ssh_key_path = Path(ssl_key or server_params['https_key_path']).resolve() if not ssh_key_path.is_file(): e = FileNotFoundError('Ssh key file not found: please provide correct path in --key param or ' 'https_key_path param in server configuration file') log.error(e) raise e ssh_cert_path = Path(ssl_cert or server_params['https_cert_path']).resolve() if not ssh_cert_path.is_file(): e = FileNotFoundError('Ssh certificate file not found: please provide correct path in --cert param or ' 'https_cert_path param in server configuration file') log.error(e) raise e ssl_context = ssl.SSLContext(ssl.PROTOCOL_TLSv1_2) ssl_context.load_cert_chain(ssh_cert_path, ssh_key_path) else: ssl_context = None model = build_model(model_config) @app.route('/') def index(): return redirect('/apidocs/') endpoint_description = { 'description': 'A model endpoint', 'parameters': [ { 'name': 'data', 'in': 'body', 'required': 'true', 'example': {arg: ['value'] for arg in model_args_names} } ], 'responses': { "200": { "description": "A model response" } } } @app.route(model_endpoint, methods=['POST']) @swag_from(endpoint_description) def answer(): return interact(model, model_args_names) app.run(host=host, port=port, threaded=False, ssl_context=ssl_context)
def __init__(self, model_config: Union[str, Path], service_name: str, agent_namespace: str, batch_size: int, utterance_lifetime_sec: int, rabbit_host: str, rabbit_port: int, rabbit_login: str, rabbit_password: str, rabbit_virtualhost: str, loop: asyncio.AbstractEventLoop) -> None: self._add_to_buffer_lock = asyncio.Lock() self._infer_lock = asyncio.Lock() server_params = get_server_params(model_config) self._model_args_names = server_params['model_args_names'] self._model = build_model(model_config) self._in_queue = None self._utterance_lifetime_sec = utterance_lifetime_sec self._batch_size = batch_size self._incoming_messages_buffer = [] loop.run_until_complete( self._connect(loop=loop, host=rabbit_host, port=rabbit_port, login=rabbit_login, password=rabbit_password, virtualhost=rabbit_virtualhost, agent_namespace=agent_namespace)) loop.run_until_complete( self._setup_queues(service_name, agent_namespace)) loop.run_until_complete( self._in_queue.consume(callback=self._on_message_callback)) log.info(f'Service in queue started consuming')
def from_params(params: Dict, mode: str = 'infer', serialized: Any = None, **kwargs) -> Component: """Builds and returns the Component from corresponding dictionary of parameters.""" # what is passed in json: config_params = {k: _resolve(v) for k, v in params.items()} # get component by reference (if any) if 'ref' in config_params: try: component = _refs[config_params['ref']] if serialized is not None: component.deserialize(serialized) return component except KeyError: e = ConfigError('Component with id "{id}" was referenced but not initialized' .format(id=config_params['ref'])) log.exception(e) raise e elif 'config_path' in config_params: from deeppavlov.core.commands.infer import build_model refs = _refs.copy() _refs.clear() config = parse_config(expand_path(config_params['config_path'])) model = build_model(config, serialized=serialized) _refs.clear() _refs.update(refs) return model cls_name = config_params.pop('class_name', None) if not cls_name: e = ConfigError('Component config has no `class_name` nor `ref` fields') log.exception(e) raise e cls = get_model(cls_name) # find the submodels params recursively config_params = {k: _init_param(v, mode) for k, v in config_params.items()} try: spec = inspect.getfullargspec(cls) if 'mode' in spec.args+spec.kwonlyargs or spec.varkw is not None: kwargs['mode'] = mode component = cls(**dict(config_params, **kwargs)) try: _refs[config_params['id']] = component except KeyError: pass except Exception: log.exception("Exception in {}".format(cls)) raise if serialized is not None: component.deserialize(serialized) return component
def make_agent() -> EcommerceAgent: """Make an agent Returns: agent: created Ecommerce agent """ config_path = find_config('tfidf_retrieve') skill = build_model(config_path) agent = EcommerceAgent(skills=[skill]) return agent
def make_agent() -> EcommerceAgent: """Make an agent Returns: agent: created Ecommerce agent """ config_path = find_config('tfidf_retrieve') skill = build_model(config_path) agent = EcommerceAgent(skills=[skill]) return agent
def from_params(params: Dict, mode: str = 'infer', **kwargs) -> Component: """Builds and returns the Component from corresponding dictionary of parameters.""" # what is passed in json: config_params = {k: _resolve(v) for k, v in params.items()} # get component by reference (if any) if 'ref' in config_params: try: return _refs[config_params['ref']] except KeyError: e = ConfigError( 'Component with id "{id}" was referenced but not initialized'. format(id=config_params['ref'])) log.exception(e) raise e elif 'config_path' in config_params: from deeppavlov.core.commands.infer import build_model refs = _refs.copy() _refs.clear() config = parse_config(expand_path(config_params['config_path'])) model = build_model(config) _refs.clear() _refs.update(refs) return model cls_name = config_params.pop('class_name', None) if not cls_name: e = ConfigError( 'Component config has no `class_name` nor `ref` fields') log.exception(e) raise e cls = get_model(cls_name) # find the submodels params recursively config_params = {k: _init_param(v, mode) for k, v in config_params.items()} try: spec = inspect.getfullargspec(cls) if 'mode' in spec.args + spec.kwonlyargs or spec.varkw is not None: kwargs['mode'] = mode component = cls(**dict(config_params, **kwargs)) try: _refs[config_params['id']] = component except KeyError: pass except Exception: log.exception("Exception in {}".format(cls)) raise return component
def __init__(self, model_config: Union[str, Path, dict], input_queue: Queue) -> None: """Builds DeepPavlov model, initiates class attributes. Args: model_config: Path to DeepPavlov model config file. input_queue: Queue for incoming requests from channel. """ super(BaseBot, self).__init__() self.input_queue = input_queue self._run_flag = True self._model = build_model(model_config) self._conversations = dict() log.info('Bot initiated')
def interact_model_by_telegram(config, token=None): server_config_path = Path(get_settings_path(), SERVER_CONFIG_FILENAME) server_config = read_json(server_config_path) token = token if token else server_config['telegram_defaults']['token'] if not token: e = ValueError('Telegram token required: initiate -t param or telegram_defaults/token ' 'in server configuration file') log.error(e) raise e model = build_model(config) model_name = type(model.get_main_component()).__name__ skill = DefaultStatelessSkill(model) agent = DefaultAgent([skill], skills_processor=DefaultRichContentWrapper()) init_bot_for_model(agent, token, model_name)
def start_model_server(model_config: Path, https: Optional[bool] = None, ssl_key: Optional[str] = None, ssl_cert: Optional[str] = None, port: Optional[int] = None) -> None: server_params = get_server_params(model_config) host = server_params['host'] port = port or server_params['port'] model_endpoint = server_params['model_endpoint'] model_args_names = server_params['model_args_names'] ssl_config = get_ssl_params(server_params, https, ssl_key=ssl_key, ssl_cert=ssl_cert) model = build_model(model_config) def batch_decorator(cls: MetaModel) -> MetaModel: cls.__annotations__ = {arg_name: list for arg_name in model_args_names} cls.__fields__ = {arg_name: Field(name=arg_name, type_=list, class_validators=None, model_config=BaseConfig, required=False, schema=Schema(None)) for arg_name in model_args_names} return cls @batch_decorator class Batch(BaseModel): pass redirect_root_to_docs(app, 'answer', model_endpoint, 'post') model_endpoint_post_example = {arg_name: ['string'] for arg_name in model_args_names} @app.post(model_endpoint, summary='A model endpoint') async def answer(item: Batch = Body(..., example=model_endpoint_post_example)) -> List: loop = asyncio.get_event_loop() return await loop.run_in_executor(None, interact, model, item.dict()) @app.post('/probe', include_in_schema=False) async def probe(item: Batch) -> List[str]: loop = asyncio.get_event_loop() return await loop.run_in_executor(None, test_interact, model, item.dict()) @app.get('/api', summary='Model argument names') async def api() -> List[str]: return model_args_names uvicorn.run(app, host=host, port=port, logger=uvicorn_log, ssl_version=ssl_config.version, ssl_keyfile=ssl_config.keyfile, ssl_certfile=ssl_config.certfile, timeout_keep_alive=20)
def interact_model_by_telegram(config, token=None): server_config_path = Path(get_settings_path(), SERVER_CONFIG_FILENAME) server_config = read_json(server_config_path) token = token if token else server_config['telegram_defaults']['token'] if not token: e = ValueError( 'Telegram token required: initiate -t param or telegram_defaults/token ' 'in server configuration file') log.error(e) raise e model = build_model(config) model_name = type(model.get_main_component()).__name__ skill = DefaultStatelessSkill(model) agent = DefaultAgent([skill], skills_processor=DefaultRichContentWrapper()) init_bot_for_model(agent, token, model_name)
def predict_with_model(config_path: [Path, str]) -> List[Optional[List[str]]]: """Returns predictions of morphotagging model given in config :config_path:. Args: config_path: a path to config Returns: a list of morphological analyses for each sentence. Each analysis is either a list of tags or a list of full CONLL-U descriptions. """ config = parse_config(config_path) reader_config = config['dataset_reader'] reader = get_model(reader_config['class_name'])() data_path = expand_path(reader_config.get('data_path', '')) read_params = { k: v for k, v in reader_config.items() if k not in ['class_name', 'data_path'] } data: Dict = reader.read(data_path, **read_params) iterator_config = config['dataset_iterator'] iterator: MorphoTaggerDatasetIterator = from_params(iterator_config, data=data) model = build_model(config, load_trained=True) answers = [None] * len(iterator.test) batch_size = config['predict'].get("batch_size", -1) for indexes, (x, _) in iterator.gen_batches(batch_size=batch_size, data_type="test", shuffle=False, return_indexes=True): y = model(x) for i, elem in zip(indexes, y): answers[i] = elem outfile = config['predict'].get("outfile") if outfile is not None: outfile = Path(outfile) if not outfile.exists(): outfile.parent.mkdir(parents=True, exist_ok=True) with open(outfile, "w", encoding="utf8") as fout: for elem in answers: fout.write(elem + "\n") return answers
def __init__(self, model_config: Path, socket_type: str, port: Optional[int] = None, socket_file: Optional[Union[str, Path]] = None) -> None: """Initialize socket server. Args: model_config: Path to the config file. socket_type: Socket family. "TCP" for the AF_INET socket, "UNIX" for the AF_UNIX. port: Port number for the AF_INET address family. If parameter is not defined, the port number from the model_config is used. socket_file: Path to the file to which server of the AF_UNIX address family connects. If parameter is not defined, the path from the model_config is used. """ socket_config_path = get_settings_path() / SOCKET_CONFIG_FILENAME self._params = get_server_params(socket_config_path, model_config) self._socket_type = socket_type or self._params['socket_type'] if self._socket_type == 'TCP': host = self._params['host'] port = port or self._params['port'] self._address_family = socket.AF_INET self._launch_msg = f'{self._params["binding_message"]} http://{host}:{port}' self._bind_address = (host, port) elif self._socket_type == 'UNIX': self._address_family = socket.AF_UNIX bind_address = socket_file or self._params['unix_socket_file'] bind_address = Path(bind_address).resolve() if bind_address.exists(): bind_address.unlink() self._bind_address = str(bind_address) self._launch_msg = f'{self._params["binding_message"]} {self._bind_address}' else: raise ValueError( f'socket type "{self._socket_type}" is not supported') self._dialog_logger = DialogLogger(agent_name='dp_api') self._log = getLogger(__name__) self._loop = asyncio.get_event_loop() self._model = build_model(model_config) self._socket = socket.socket(self._address_family, socket.SOCK_STREAM) self._socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) self._socket.setblocking(False)
def __init__(self, model_config: Path, socket_type: str, port: Optional[int] = None, socket_file: Optional[Union[str, Path]] = None) -> None: """Initializes socket server. Args: model_config: Path to the config file. socket_type: Socket family. "TCP" for the AF_INET socket server, "UNIX" for UNIX Domain Socket server. port: Port number for the AF_INET address family. If parameter is not defined, the port number from the utils/settings/server_config.json is used. socket_file: Path to the file to which UNIX Domain Socket server connects. If parameter is not defined, the path from the utils/settings/server_config.json is used. Raises: ValueError: If ``socket_type`` parameter is neither "TCP" nor "UNIX". """ server_params = get_server_params(model_config) socket_type = socket_type or server_params['socket_type'] self._loop = asyncio.get_event_loop() if socket_type == 'TCP': host = server_params['host'] port = port or server_params['port'] self._launch_msg = f'{server_params["socket_launch_message"]} http://{host}:{port}' self._loop.create_task( asyncio.start_server(self._handle_client, host, port)) elif socket_type == 'UNIX': socket_file = socket_file or server_params['unix_socket_file'] socket_path = Path(socket_file).resolve() if socket_path.exists(): socket_path.unlink() self._launch_msg = f'{server_params["socket_launch_message"]} {socket_file}' self._loop.create_task( asyncio.start_unix_server(self._handle_client, socket_file)) else: raise ValueError(f'socket type "{socket_type}" is not supported') self._model = build_model(model_config) self._model_args_names = server_params['model_args_names']
def predict_with_model( config_path: [Path, str], infile: Optional[Union[Path, str]] = None, input_format: str = "ud", batch_size: [int] = 16, output_format: str = "basic") -> List[Optional[List[str]]]: """Returns predictions of morphotagging model given in config :config_path:. Args: config_path: a path to config Returns: a list of morphological analyses for each sentence. Each analysis is either a list of tags or a list of full CONLL-U descriptions. """ config = parse_config(config_path) if infile is None: if sys.stdin.isatty(): raise RuntimeError( 'To process data from terminal please use interact mode') infile = sys.stdin else: infile = expand_path(infile) if input_format in ["ud", "conllu", "vertical"]: from_words = (input_format == "vertical") data: List[tuple] = read_infile(infile, from_words=from_words) # keeping only sentences data = [elem[0] for elem in data] else: if infile is not sys.stdin: with open(infile, "r", encoding="utf8") as fin: data = fin.readlines() else: data = sys.stdin.readlines() model = build_model(config, load_trained=True) model.pipe[-1][-1].set_format_mode(output_format) answers = model.batched_call(data, batch_size=batch_size) for elem in answers: print(elem) return answers
def predict_with_model(config_path: [Path, str]) -> List[Optional[List[str]]]: """Returns predictions of morphotagging model given in config :config_path:. Args: config_path: a path to config Returns: a list of morphological analyses for each sentence. Each analysis is either a list of tags or a list of full CONLL-U descriptions. """ config = parse_config(config_path) reader_config = config['dataset_reader'] reader = get_model(reader_config['class_name'])() data_path = expand_path(reader_config.get('data_path', '')) read_params = {k: v for k, v in reader_config.items() if k not in ['class_name', 'data_path']} data: Dict = reader.read(data_path, **read_params) iterator_config = config['dataset_iterator'] iterator: MorphoTaggerDatasetIterator = from_params(iterator_config, data=data) model = build_model(config, load_trained=True) answers = [None] * len(iterator.test) batch_size = config['predict'].get("batch_size", -1) for indexes, (x, _) in iterator.gen_batches( batch_size=batch_size, data_type="test", shuffle=False, return_indexes=True): y = model(x) for i, elem in zip(indexes, y): answers[i] = elem outfile = config['predict'].get("outfile") if outfile is not None: outfile = Path(outfile) if not outfile.exists(): outfile.parent.mkdir(parents=True, exist_ok=True) with open(outfile, "w", encoding="utf8") as fout: for elem in answers: fout.write(elem + "\n") return answers
def main(): # Read data args = parser.parse_args() dataset = args.dataset_path dataset_name = args.dataset_name if dataset_name == 'sddata_pull': questions, true_answers = parse_sddata_pull(dataset, args.answer_key) elif dataset_name == 'ru_squad': questions, true_answers = parse_ru_squad(dataset) else: raise RuntimeError(f'Unknown dataset type! Select from {args.dataset_name.choices}') formatted_true_answers = normalize_strings(true_answers) # Build models ranker = build_model(configs.doc_retrieval.ru_ranker_tfidf_wiki, download=True) reader = build_model(configs.squad.multi_squad_ru_retr_noans_rubert_infer, download=True) db_path = str(ranker.pipe[0][2].vectorizer.load_path).split("models", 1)[0] + "downloads/odqa/ruwiki.db" vocab = WikiSQLiteVocab(db_path, join_docs=False) logit_ranker = LogitRanker(reader) chunker = DocumentChunker(paragraphs=True, flatten_result=True) str_multiplier = StringMultiplier() start_time = time.time() try: # Get ranker results ranker.pipe[0][2].top_n = args.top_n[1] doc_indices = ranker(questions) docs = [vocab([indices]) for indices in doc_indices] formatted_docs = [] for dd in docs: formatted_docs.append(normalize_strings(dd)) del ranker del vocab for n in range(args.top_n[0], args.top_n[1] + 1): # Counting ranker metrics logger.info(f"Counting metrics for top {n} retrieved docs.") top_docs = [i[:n] for d in docs for i in d] formatted_top_docs = [i[:n] for d in formatted_docs for i in d] recall = ranker_em_recall(top_docs, true_answers) logger.info(f"Ranker em_recall {recall:.3f}") soft_recall = ranker_em_recall(formatted_top_docs, formatted_true_answers) logger.info(f"Ranker soft_recall {soft_recall:.3f}") # Counting ODQA metrics chunks = chunker(top_docs) mult_questions = str_multiplier(questions, chunks) if dataset_name == 'ru_squad': logit_ranker.sort_noans = True pred_answers, pred_scores = logit_ranker(chunks, mult_questions) f1 = squad_v1_f1(true_answers, pred_answers) em = squad_v1_exact_match(true_answers, pred_answers) logger.info(f"ODQA f1 v1 {f1:.3f}") logger.info(f"ODQA em v1 {em:.3f}") # Write model answers to a file with open(f'answers_top_{n}.csv', mode='w') as csv_file: writer = csv.writer(csv_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) for pr_a, pr_s in zip(pred_answers, pred_scores): writer.writerow([pr_a, round(pr_s, 3)]) t = time.time() - start_time logger.info(f"Completed successfully in {t:.3f} seconds.") except Exception as e: logger.exception(e) t = time.time() - start_time logger.info(f"Completed with exception in {t:.3f} seconds.") raise
import tensorflow as tf config = tf.ConfigProto() config.gpu_options.allow_growth = True session = tf.Session(config=config) from deeppavlov import configs from deeppavlov.core.common.file import read_json from deeppavlov import train_model from deeppavlov.core.commands.infer import build_model model_config = read_json('en_ranker_tfidf_data.json') ranker = train_model(model_config) print('==========RANKER======', ranker(['accidents'])) # Download all the SQuAD models # squad = build_model(configs.squad.multi_squad_noans_infer, download = True) # Do not download the ODQA models, we've just trained it odqa = build_model('en_odqa_infer_data.json', download=False) val_q = "Do you like people?" answer1 = odqa([val_q]) # provide answer based on trained data print(answer1)
import asyncio import websockets import sys import json import deeppavlov from deeppavlov import configs, train_model from deeppavlov.core.common.file import read_json from deeppavlov.core.commands.infer import build_model #FEEDER_SOCKET_HOST = '127.0.0.1' #FEEDER_SOCKET_PORT = 9007 connections = set() model_config = read_json(configs.faq.fasttext_avg_autofaq) intents = build_model(model_config) class ActiveSpeaker: lastActiveSpeaker = None agent_speech = [] caller_speech = [] async def run_feeding_process(websocket, path): if path == '/sub': connections.add(websocket) print('subscriber #' + str(len(connections)) + ' got connected') try: async for msg in websocket:
def get_default_agent() -> DefaultAgent: model = build_model(model_config) skill = DefaultStatelessSkill(model) agent = DefaultAgent([skill], skills_processor=DefaultRichContentWrapper()) return agent
# -*- coding: utf-8 -*- import telebot as telebot from telebot import apihelper from deeppavlov import configs, train_model from deeppavlov.core.common.file import read_json from deeppavlov.core.commands.infer import build_model from deeppavlov.core.commands.train import train_evaluate_model_from_config print("import successful") far = train_evaluate_model_from_config("./config.json") faq = build_model("./config.json", download=True) model_config = read_json("./config.json") model_config["dataset_reader"]["data_path"] = "./faq_school_en.csv" model_config["dataset_reader"]["data_url"] = None faq = train_model(model_config) print("train model") bot = telebot.TeleBot('301914397:AAEmR8WlfzyxQT53zdpqHrSwR8iwaKEr-h8') def GetAnswer(question): print("get question") return faq([question])[0][0][0] @bot.message_handler(content_types=['text']) def get_text_messages(message): print("text handler") if message.text == "Привет": bot.send_message(message.from_user.id, "Привет, чем я могу тебе помочь?") elif message.text == "/help":
from deeppavlov import configs from deeppavlov.core.commands.infer import build_model odqa = build_model('model_config.json', download=False) # for testing a = odqa(["what is tuberculosis?"]) print(a)
def __init__(self, model_config): model_config = read_json(model_config) train_model(model_config) build_model(configs.squad.multi_squad_noans_infer, download=True)
def start_model_server(model_config: Path, https: bool = False, ssl_key: Optional[str] = None, ssl_cert: Optional[str] = None, port: Optional[int] = None) -> None: server_config_path = get_settings_path() / SERVER_CONFIG_FILENAME server_params = get_server_params(server_config_path, model_config) host = server_params['host'] port = port or server_params['port'] model_endpoint = server_params['model_endpoint'] docs_endpoint = server_params['docs_endpoint'] model_args_names = server_params['model_args_names'] https = https or server_params['https'] if https: ssh_key_path = Path(ssl_key or server_params['https_key_path']).resolve() if not ssh_key_path.is_file(): e = FileNotFoundError( 'Ssh key file not found: please provide correct path in --key param or ' 'https_key_path param in server configuration file') log.error(e) raise e ssh_cert_path = Path(ssl_cert or server_params['https_cert_path']).resolve() if not ssh_cert_path.is_file(): e = FileNotFoundError( 'Ssh certificate file not found: please provide correct path in --cert param or ' 'https_cert_path param in server configuration file') log.error(e) raise e ssl_version = PROTOCOL_TLSv1_2 ssl_keyfile = str(ssh_key_path) ssl_certfile = str(ssh_cert_path) else: ssl_version = None ssl_keyfile = None ssl_certfile = None model = build_model(model_config) def batch_decorator(cls: MetaModel) -> MetaModel: cls.__annotations__ = { arg_name: List[str] for arg_name in model_args_names } cls.__fields__ = { arg_name: Field(name=arg_name, type_=List[str], class_validators=None, model_config=BaseConfig, required=False, schema=Schema(None)) for arg_name in model_args_names } return cls @batch_decorator class Batch(BaseModel): pass @app.get('/', include_in_schema=False) async def redirect_to_docs() -> RedirectResponse: operation_id = generate_operation_id_for_path(name='answer', path=model_endpoint, method='post') response = RedirectResponse( url=f'{docs_endpoint}#/default/{operation_id}') return response @app.post(model_endpoint, status_code=200, summary='A model endpoint') async def answer(item: Batch) -> JSONResponse: return interact(model, item.dict()) @app.post('/probe', status_code=200, include_in_schema=False) async def probe(item: Batch) -> JSONResponse: return test_interact(model, item.dict()) @app.get('/api', status_code=200, summary='Model argument names') async def api() -> JSONResponse: return JSONResponse(model_args_names) uvicorn.run(app, host=host, port=port, logger=uvicorn_log, ssl_version=ssl_version, ssl_keyfile=ssl_keyfile, ssl_certfile=ssl_certfile)
Original file is located at https://colab.research.google.com/drive/1OL-h0sB7xf3po7v9FYqSKcfIad4cUEC5 """ pip install -q deeppavlov # Commented out IPython magic to ensure Python compatibility. # %load https://raw.githubusercontent.com/deepmipt/DeepPavlov/master/deeppavlov/configs/faq/tfidf_logreg_en_faq.json !python -m deeppavlov install tfidf_logreg_en_faq !python -m deeppavlov interact tfidf_logreg_en_faq -d """Alternatively, you can **build_model** from the Python code as on the example below. In addition, please make sure that you can navigate the configuration files by using Autocomplete (Tab key) with **configs** module.""" pip install deeppavlov from deeppavlov import configs from deeppavlov.core.common.file import read_json from deeppavlov.core.commands.infer import build_model faq = build_model(configs.squad.squad_ru_bert, download = True) a = faq(["I need help"]) a pip install git+https://github.com/deepmipt/bert.git@feat/multi_gpu from deeppavlov import build_model, configs model = build_model(configs.squad.squad_ru_bert, download=True) model(['Борис видит как люди видят красоту ходят направо налево прямо назад. '], ['Что видит Борис?'])
def get_default_agent() -> DefaultAgent: model = build_model(model_config) skill = DefaultStatelessSkill(model) if default_skill_wrap else model agent = DefaultAgent([skill], skills_processor=DefaultRichContentWrapper()) return agent
#!/usr/bin/env python from deeppavlov import configs from deeppavlov.core.common.file import read_json from deeppavlov.core.commands.infer import build_model # Run python -m spacy download en_core_web_sm faq = build_model(configs.faq.tfidf_logreg_en_faq, download=True) a = faq(["I need help"]) print(a)
def start_model_server(model_config, https=False, ssl_key=None, ssl_cert=None, port=None): server_config_path = get_settings_path() / SERVER_CONFIG_FILENAME server_params = get_server_params(server_config_path, model_config) host = server_params['host'] port = port or server_params['port'] model_endpoint = server_params['model_endpoint'] model_args_names = server_params['model_args_names'] https = https or server_params['https'] if https: ssh_key_path = Path(ssl_key or server_params['https_key_path']).resolve() if not ssh_key_path.is_file(): e = FileNotFoundError( 'Ssh key file not found: please provide correct path in --key param or ' 'https_key_path param in server configuration file') log.error(e) raise e ssh_cert_path = Path(ssl_cert or server_params['https_cert_path']).resolve() if not ssh_cert_path.is_file(): e = FileNotFoundError( 'Ssh certificate file not found: please provide correct path in --cert param or ' 'https_cert_path param in server configuration file') log.error(e) raise e ssl_context = ssl.SSLContext(ssl.PROTOCOL_TLSv1_2) ssl_context.load_cert_chain(ssh_cert_path, ssh_key_path) else: ssl_context = None model = build_model(model_config) @app.route('/') def index(): return redirect('/apidocs/') endpoint_description = { 'description': 'A model endpoint', 'parameters': [{ 'name': 'data', 'in': 'body', 'required': 'true', 'example': {arg: ['value'] for arg in model_args_names} }], 'responses': { "200": { "description": "A model response" } } } @app.route(model_endpoint, methods=['POST']) @swag_from(endpoint_description) def answer(): return interact(model, model_args_names) app.run(host=host, port=port, threaded=False, ssl_context=ssl_context)
from deeppavlov import configs from deeppavlov.core.commands.infer import build_model from deeppavlov.core.common.file import read_json if __name__ == '__main__': configuration = read_json(configs.faq.tfidf_logreg_en_faq) configuration['dataset_reader'] = './data/faq_school_en.csv' configuration['dataset_url'] = None faq = build_model(config=configuration) # , download=True, ) question = 'I need help' answer = faq([question])[0][0] print('Q: {} A: {}'.format(question, answer))
def train_evaluate_model_from_config(config: [str, Path, dict], iterator=None, *, to_train=True, to_validate=True, download=False, start_epoch_num=0, recursive=False) -> Dict[str, Dict[str, float]]: """Make training and evaluation of the model described in corresponding configuration file.""" config = parse_config(config) if download: deep_download(config) if to_train and recursive: for subconfig in get_all_elems_from_json(config['chainer'], 'config_path'): log.info(f'Training "{subconfig}"') train_evaluate_model_from_config(subconfig, download=False, recursive=True) import_packages(config.get('metadata', {}).get('imports', [])) if iterator is None: try: data = read_data_by_config(config) except ConfigError as e: to_train = False log.warning(f'Skipping training. {e.message}') else: iterator = get_iterator_from_config(config, data) train_config = { 'metrics': ['accuracy'], 'validate_best': to_validate, 'test_best': True, 'show_examples': False } try: train_config.update(config['train']) except KeyError: log.warning('Train config is missing. Populating with default values') in_y = config['chainer'].get('in_y', ['y']) if isinstance(in_y, str): in_y = [in_y] if isinstance(config['chainer']['out'], str): config['chainer']['out'] = [config['chainer']['out']] metrics_functions = _parse_metrics(train_config['metrics'], in_y, config['chainer']['out']) if to_train: model = fit_chainer(config, iterator) if callable(getattr(model, 'train_on_batch', None)): _train_batches(model, iterator, train_config, metrics_functions, start_epoch_num=start_epoch_num) model.destroy() res = {} if iterator is not None and (train_config['validate_best'] or train_config['test_best']): model = build_model(config, load_trained=to_train) log.info('Testing the best saved model') if train_config['validate_best']: report = { 'valid': _test_model(model, metrics_functions, iterator, train_config.get('batch_size', -1), 'valid', show_examples=train_config['show_examples']) } res['valid'] = report['valid']['metrics'] print(json.dumps(report, ensure_ascii=False)) if train_config['test_best']: report = { 'test': _test_model(model, metrics_functions, iterator, train_config.get('batch_size', -1), 'test', show_examples=train_config['show_examples']) } res['test'] = report['test']['metrics'] print(json.dumps(report, ensure_ascii=False)) model.destroy() return res
def skill_server(config: Union[dict, str, Path], https=False, ssl_key=None, ssl_cert=None, *, host: Optional[str] = None, port: Optional[int] = None, endpoint: Optional[str] = None, download: bool = False, batch_size: Optional[int] = None, env: Optional[Dict[str, str]] = None): if env: os.environ.update(env) host = host or '0.0.0.0' port = port or 80 endpoint = endpoint or '/skill' if batch_size is not None and batch_size < 1: log.warning(f'batch_size of {batch_size} is less than 1 and is interpreted as unlimited') batch_size = None ssl_context = _get_ssl_context(ssl_key, ssl_cert) if https else None model = build_model(config, download=download) endpoint_description = { 'description': 'A skill endpoint', 'parameters': [ { 'name': 'data', 'in': 'body', 'required': 'true', 'example': { 'version': '0.9.3', 'dialogs': [ { 'id': '5c65706b0110b377e17eba41', 'location': None, 'utterances': [ { "id": "5c62f7330110b36bdd1dc5d7", "text": "Привет!", "user_id": "5c62f7330110b36bdd1dc5d5", "annotations": { "ner": {}, "coref": {}, "sentiment": {}, "obscenity": {} }, "date": "2019-02-12 16:41:23.142000" }, { "id": "5c62f7330110b36bdd1dc5d8", "active_skill": "chitchat", "confidence": 0.85, "text": "Привет, я бот!", "user_id": "5c62f7330110b36bdd1dc5d6", "annotations": { "ner": {}, "coref": {}, "sentiment": {}, "obscenity": {} }, "date": "2019-02-12 16:41:23.142000" }, { "id": "5c62f7330110b36bdd1dc5d9", "text": "Как дела?", "user_id": "5c62f7330110b36bdd1dc5d5", "annotations": { "ner": {}, "coref": {}, "sentiment": {}, "obscenity": {} }, "date": "2019-02-12 16:41:23.142000" } ], 'user': { 'id': '5c62f7330110b36bdd1dc5d5', 'user_telegram_id': '44d279ea-62ab-4c71-9adb-ed69143c12eb', 'user_type': 'human', 'device_type': None, 'persona': None }, 'bot': { 'id': '5c62f7330110b36bdd1dc5d6', 'user_telegram_id': '56f1d5b2-db1a-4128-993d-6cd1bc1b938f', 'user_type': 'bot', 'device_type': None, 'personality': None }, 'channel_type': 'telegram' } ] } } ], 'responses': { "200": { "description": "A skill response", 'example': { 'responses': [{name: 'sample-answer' for name in model.out_params}] } } } } @app.route('/') def index(): return redirect('/apidocs/') @app.route(endpoint, methods=['POST']) @swag_from(endpoint_description) def answer(): return interact_skill(model, batch_size) app.run(host=host, port=port, threaded=False, ssl_context=ssl_context)
def _load(self) -> None: if not self._loaded: self._chainer.destroy() self._chainer = build_model({'chainer': self.chainer_config}, load_trained=self._saved) self._loaded = True
# coding: utf-8 # Импортирует поддержку UTF-8. from __future__ import unicode_literals from deeppavlov import configs from deeppavlov.core.commands.infer import build_model import numpy as np ecommerce = build_model(configs.ecommerce_skill.tfidf_retrieve1, load_trained=True) result = ecommerce(['обои'], [[]], [{}]) market_id2yandex_id = dict() print(result) # Импортируем модули для работы с JSON и логами. import json import logging # Импортируем подмодули Flask для запуска веб-сервиса. from flask import Flask, request app = Flask(__name__) logging.basicConfig(level=logging.DEBUG) # Хранилище данных о сессиях. sessionStorage = {} search_result = []
from deeppavlov.core.commands.infer import build_model if __name__ == '__main__': config_ = './tfidf_logreg_en_faq.json' model = build_model( config=config_, download=True, load_trained=True, ) for question in [ 'What time is it now?', 'What are your open hours?', ]: result = model([question]) print('Q: {} A: {}'.format(question, result[0][0]))
from deeppavlov import configs from deeppavlov.core.commands.infer import build_model from deeppavlov.core.commands.train import train_evaluate_model_from_config if __name__ == '__main__': train_evaluate_model_from_config( configs.doc_retrieval.en_ranker_tfidf_wiki, download=True) train_evaluate_model_from_config(configs.squad.multi_squad_noans, download=True) odqa = build_model(configs.odqa.en_odqa_infer_wiki, load_trained=True) result = odqa(['What is the name of Darth Vader\'s son?']) print(result)
def odqa_deeppavlov(questions): odqa = build_model(configs.odqa.en_odqa_infer_wiki, download=True) results = odqa(questions) return results
cases = { 'arxiv': './SentenceCorpus/unlabeled_articles/arxiv_unlabeled', 'plos': './SentenceCorpus/unlabeled_articles/plos_unlabeled', } model_config = read_json(configs.doc_retrieval.en_ranker_tfidf_wiki) model_config['dataset_reader']['data_path'] = cases[case] model_config['dataset_reader']['dataset_format'] = 'txt' doc_retrieval = train_model(model_config) logger.info(doc_retrieval(['cerebellum'])) do_squad = False # Download all the SQuAD models if do_squad: squad = build_model(configs.squad.multi_squad_noans_infer, download=True) # Do not download the ODQA models, we've just trained it model = build_model(configs.odqa.en_odqa_infer_wiki, download=False) plos_questions = sorted([ 'What is rubella?', 'What is whooping cough?', 'What are yaws?', 'What is influenza?', 'What is measles?', 'What is marginalization?', 'Who was Bernoulli?', 'Who is or was Bayes?', 'What is phylogeny?', 'What is phylogenetic?', 'What is evolution?', 'What is protein?',