def init_bot_for_model(token, model): bot = telebot.TeleBot(token) model_name = type(model).__name__ models_info = read_json('../telegram_utils/models_info.json') model_info = models_info[model_name] if model_name in models_info else models_info['@default'] @bot.message_handler(commands=['start']) def send_start_message(message): chat_id = message.chat.id out_message = model_info['start_message'] if hasattr(model, 'reset'): model.reset() bot.send_message(chat_id, out_message) @bot.message_handler(commands=['help']) def send_help_message(message): chat_id = message.chat.id out_message = model_info['help_message'] bot.send_message(chat_id, out_message) @bot.message_handler() def handle_inference(message): chat_id = message.chat.id context = message.text pred = model.infer(context) reply_message = str(pred) bot.send_message(chat_id, reply_message) bot.polling()
def build_model_from_config(config: [str, Path, dict], mode: str = 'infer', load_trained: bool = False, as_component: bool = False) -> Chainer: """Build and return the model described in corresponding configuration file.""" if isinstance(config, (str, Path)): config = read_json(config) set_deeppavlov_root(config) import_packages(config.get('metadata', {}).get('imports', [])) model_config = config['chainer'] model = Chainer(model_config['in'], model_config['out'], model_config.get('in_y'), as_component=as_component) for component_config in model_config['pipe']: if load_trained and ('fit_on' in component_config or 'in_y' in component_config): try: component_config['load_path'] = component_config['save_path'] except KeyError: log.warning('No "save_path" parameter for the {} component, so "load_path" will not be renewed' .format(component_config.get('name', component_config.get('ref', 'UNKNOWN')))) component = from_params(component_config, mode=mode) if 'in' in component_config: c_in = component_config['in'] c_out = component_config['out'] in_y = component_config.get('in_y', None) main = component_config.get('main', False) model.append(component, c_in, c_out, in_y, main) return model
def train_model_from_config(config_path: str, mode='train'): usr_dir = paths.USR_PATH config = read_json(config_path) reader_config = config['dataset_reader'] # NOTE: Why there are no params for dataset reader? Because doesn't have __init__() reader = from_params(REGISTRY[reader_config['name']], {}) data = reader.read(reader_config.get('data_path', usr_dir)) dataset_config = config['dataset'] dataset_name = dataset_config['name'] dataset = from_params(REGISTRY[dataset_name], dataset_config, data=data) vocabs = {} if 'vocabs' in config: for vocab_param_name, vocab_config in config['vocabs'].items(): vocab_name = vocab_config['name'] v = from_params(REGISTRY[vocab_name], vocab_config, mode=mode) v.train(dataset.iter_all('train')) vocabs[vocab_param_name] = v model_config = config['model'] model_name = model_config['name'] model = from_params(REGISTRY[model_name], model_config, vocabs=vocabs, mode=mode) model.train(dataset)
def init_bot_for_model(agent: Agent, token: str, model_name: str): bot = telebot.TeleBot(token) models_info_path = Path(get_settings_path(), TELEGRAM_MODELS_INFO_FILENAME).resolve() models_info = read_json(str(models_info_path)) model_info = models_info[model_name] if model_name in models_info else models_info['@default'] @bot.message_handler(commands=['start']) def send_start_message(message): chat_id = message.chat.id out_message = model_info['start_message'] bot.send_message(chat_id, out_message) @bot.message_handler(commands=['help']) def send_help_message(message): chat_id = message.chat.id out_message = model_info['help_message'] bot.send_message(chat_id, out_message) @bot.message_handler() def handle_inference(message): chat_id = message.chat.id context = message.text response: RichMessage = agent([context], [chat_id])[0] for message in response.json(): message_text = message['content'] bot.send_message(chat_id, message_text) bot.polling()
def get_config_downloads(config_path): dp_root_back = get_deeppavlov_root() config = read_json(config_path) set_deeppavlov_root(config) downloads = set() if 'metadata' in config and 'download' in config['metadata']: for resource in config['metadata']['download']: if isinstance(resource, str): resource = { 'url': resource } url = resource['url'] dest = expand_path(resource.get('subdir', '')) downloads.add((url, dest)) config_references = [expand_path(config_ref) for config_ref in get_all_elems_from_json(config, 'config_path')] downloads |= {(url, dest) for config in config_references for url, dest in get_config_downloads(config)} set_deeppavlov_root({'deeppavlov_root': dp_root_back}) return downloads
def predict_on_stream(config_path: str, batch_size: int = 1, file_path: Optional[str] = None) -> None: """Make a prediction with the component described in corresponding configuration file.""" import sys import json from itertools import islice if file_path is None or file_path == '-': if sys.stdin.isatty(): raise RuntimeError('To process data from terminal please use interact mode') f = sys.stdin else: f = open(file_path, encoding='utf8') config = read_json(config_path) model: Chainer = build_model_from_config(config) args_count = len(model.in_x) while True: batch = (l.strip() for l in islice(f, batch_size*args_count)) if args_count > 1: batch = zip(*[batch]*args_count) batch = list(batch) if not batch: break for res in model(batch): if type(res).__module__ == 'numpy': res = res.tolist() if not isinstance(res, str): res = json.dumps(res, ensure_ascii=False) print(res, flush=True) if f is not sys.stdin: f.close()
def init_bot_for_model(token, model): bot = telebot.TeleBot(token) config_dir = Path(__file__).resolve().parent config_path = Path(config_dir, TELEGRAM_UI_CONFIG_FILENAME).resolve() models_info = read_json(str(config_path)) model_name = type(model.get_main_component()).__name__ model_info = models_info[model_name] if model_name in models_info else models_info['@default'] buffer = {} expect = [] @bot.message_handler(commands=['start']) def send_start_message(message): chat_id = message.chat.id out_message = model_info['start_message'] if hasattr(model, 'reset'): model.reset() bot.send_message(chat_id, out_message) if len(model.in_x) > 1: buffer[chat_id] = [] expect[:] = list(model.in_x) bot.send_message(chat_id, f'Please, send {expect.pop(0)}') @bot.message_handler(commands=['help']) def send_help_message(message): chat_id = message.chat.id out_message = model_info['help_message'] bot.send_message(chat_id, out_message) @bot.message_handler() def handle_inference(message): chat_id = message.chat.id context = message.text if len(model.in_x) > 1: if chat_id not in buffer: send_start_message(message) else: buffer[chat_id].append(context) if expect: bot.send_message(chat_id, f'Please, send {expect.pop(0)}') else: pred = model([tuple(buffer[chat_id])]) reply_message = str(pred[0]) bot.send_message(chat_id, reply_message) buffer[chat_id] = [] expect[:] = list(model.in_x) bot.send_message(chat_id, f'Please, send {expect.pop(0)}') else: pred = model([context]) reply_message = str(pred[0]) bot.send_message(chat_id, reply_message) bot.polling()
def __init__(self, enabled: bool = False, agent_name: Optional[str] = None) -> None: self.config: dict = read_json(get_settings_path() / LOGGER_CONFIG_FILENAME) self.enabled: bool = enabled or self.config['enabled'] if self.enabled: self.agent_name: str = agent_name or self.config['agent_name'] self.log_max_size: int = self.config['logfile_max_size_kb'] self.log_file = self._get_log_file() self.log_file.writelines('"Agent initiated"\n')
def from_params(params: Dict, mode: str = 'infer', **kwargs) -> Component: """Builds and returns the Component from corresponding dictionary of parameters.""" # what is passed in json: config_params = {k: _resolve(v) for k, v in params.items()} # get component by reference (if any) if 'ref' in config_params: try: return _refs[config_params['ref']] except KeyError: e = ConfigError('Component with id "{id}" was referenced but not initialized' .format(id=config_params['ref'])) log.exception(e) raise e elif 'config_path' in config_params: from deeppavlov.core.commands.infer import build_model_from_config deeppavlov_root = get_deeppavlov_root() refs = _refs.copy() _refs.clear() config = read_json(expand_path(config_params['config_path'])) model = build_model_from_config(config, as_component=True) set_deeppavlov_root({'deeppavlov_root': deeppavlov_root}) _refs.clear() _refs.update(refs) return model elif 'class' in config_params: cls = cls_from_str(config_params.pop('class')) else: cls_name = config_params.pop('name', None) if not cls_name: e = ConfigError('Component config has no `name` nor `ref` or `class` fields') log.exception(e) raise e cls = get_model(cls_name) # find the submodels params recursively config_params = {k: _init_param(v, mode) for k, v in config_params.items()} try: spec = inspect.getfullargspec(cls) if 'mode' in spec.args+spec.kwonlyargs or spec.varkw is not None: kwargs['mode'] = mode component = cls(**dict(config_params, **kwargs)) try: _refs[config_params['id']] = component except KeyError: pass except Exception: log.exception("Exception in {}".format(cls)) raise return component
def __init__(self, pop_dict_path: str, load_path: str, top_n: int = 3, active: bool = True, **kwargs) -> None: pop_dict_path = expand_path(pop_dict_path) logger.info(f"Reading popularity dictionary from {pop_dict_path}") self.pop_dict = read_json(pop_dict_path) self.mean_pop = np.mean(list(self.pop_dict.values())) load_path = expand_path(load_path) logger.info(f"Loading popularity ranker from {load_path}") self.clf = joblib.load(load_path) self.top_n = top_n self.active = active
def get_server_params(server_config_path, model_config_path): server_config = read_json(server_config_path) model_config = read_json(model_config_path) server_params = server_config['common_defaults'] if check_nested_dict_keys(model_config, ['metadata', 'labels', 'server_utils']): model_tag = model_config['metadata']['labels']['server_utils'] if model_tag in server_config['model_defaults']: model_defaults = server_config['model_defaults'][model_tag] for param_name in model_defaults.keys(): if model_defaults[param_name]: server_params[param_name] = model_defaults[param_name] for param_name in server_params.keys(): if not server_params[param_name]: log.error('"{}" parameter should be set either in common_defaults ' 'or in model_defaults section of {}'.format(param_name, SERVER_CONFIG_FILENAME)) sys.exit(1) return server_params
def set_usr_dir(config_path: str, usr_dir_name='download'): """ Make a serialization user dir. """ config = read_json(config_path) try: usr_dir = Path(config['usr_dir']) except KeyError: root_dir = (Path(__file__) / ".." / ".." / ".." / "..").resolve() usr_dir = root_dir / usr_dir_name usr_dir.mkdir(exist_ok=True) paths.USR_PATH = usr_dir
def run_ms_bot_framework_server(agent_generator: callable, app_id: str, app_secret: str, multi_instance: bool = False, stateful: bool = False, port: Optional[int] = None): server_config_path = Path(get_settings_path(), SERVER_CONFIG_FILENAME).resolve() server_params = read_json(server_config_path) host = server_params['common_defaults']['host'] port = port or server_params['common_defaults']['port'] ms_bf_server_params = server_params['ms_bot_framework_defaults'] ms_bf_server_params['multi_instance'] = multi_instance or server_params['common_defaults']['multi_instance'] ms_bf_server_params['stateful'] = stateful or server_params['common_defaults']['stateful'] ms_bf_server_params['auth_url'] = AUTH_URL ms_bf_server_params['auth_host'] = AUTH_HOST ms_bf_server_params['auth_content_type'] = AUTH_CONTENT_TYPE ms_bf_server_params['auth_grant_type'] = AUTH_GRANT_TYPE ms_bf_server_params['auth_scope'] = AUTH_SCOPE ms_bf_server_params['auth_app_id'] = app_id or ms_bf_server_params['auth_app_id'] if not ms_bf_server_params['auth_app_id']: e = ValueError('Microsoft Bot Framework app id required: initiate -i param ' 'or auth_app_id param in server configuration file') log.error(e) raise e ms_bf_server_params['auth_app_secret'] = app_secret or ms_bf_server_params['auth_app_secret'] if not ms_bf_server_params['auth_app_secret']: e = ValueError('Microsoft Bot Framework app secret required: initiate -s param ' 'or auth_app_secret param in server configuration file') log.error(e) raise e input_q = Queue() bot = Bot(agent_generator, ms_bf_server_params, input_q) bot.start() @app.route('/') def index(): return redirect('/apidocs/') @app.route('/v3/conversations', methods=['POST']) def handle_activity(): activity = request.get_json() bot.input_queue.put(activity) return jsonify({}), 200 app.run(host=host, port=port, threaded=True)
def interact_model_by_telegram(config, token=None): server_config_path = Path(get_settings_path(), SERVER_CONFIG_FILENAME) server_config = read_json(server_config_path) token = token if token else server_config['telegram_defaults']['token'] if not token: e = ValueError('Telegram token required: initiate -t param or telegram_defaults/token ' 'in server configuration file') log.error(e) raise e model = build_model(config) model_name = type(model.get_main_component()).__name__ skill = DefaultStatelessSkill(model) agent = DefaultAgent([skill], skills_processor=DefaultRichContentWrapper()) init_bot_for_model(agent, token, model_name)
def get_server_params(server_config_path, model_config): server_config = read_json(server_config_path) model_config = parse_config(model_config) server_params = server_config['common_defaults'] if check_nested_dict_keys(model_config, ['metadata', 'labels', 'server_utils']): model_tag = model_config['metadata']['labels']['server_utils'] if model_tag in server_config['model_defaults']: model_defaults = server_config['model_defaults'][model_tag] for param_name in model_defaults.keys(): if model_defaults[param_name]: server_params[param_name] = model_defaults[param_name] return server_params
def parse_config(config: Union[str, Path, dict]) -> dict: """Read config's variables and apply their values to all its properties""" if isinstance(config, (str, Path)): config = read_json(find_config(config)) variables = { 'DEEPPAVLOV_PATH': os.getenv(f'DP_DEEPPAVLOV_PATH', Path(__file__).parent.parent.parent) } for name, value in config.get('metadata', {}).get('variables', {}).items(): env_name = f'DP_{name}' if env_name in os.environ: value = os.getenv(env_name) variables[name] = value.format(**variables) return _parse_config_property(config, variables)
def interact_model(config_path): config = read_json(config_path) model = build_model_from_config(config) while True: # get input from user context = input(':: ') # check for exit command if context == 'exit' or context == 'stop' or context == 'quit' or context == 'q': return try: pred = model.infer(context) print('>>', pred) except Exception as e: raise e
def __init__(self, ner_network: NerNetwork, save_path, load_path=None, num_epochs=10, train_now=False, **kwargs): super().__init__(save_path=save_path, load_path=load_path, train_now=train_now, mode=kwargs['mode']) # Check existance of file with slots, slot values, and corrupted (misspelled) slot values if not self.load_path.is_file(): self.load() print("[ loading slot values from `{}` ]".format(str(self.load_path))) self._slot_vals = read_json(self.load_path) self._ner_network = ner_network self._ner_network.load()
def interact_model(config_path: str) -> None: """Start interaction with the model described in corresponding configuration file.""" config = read_json(config_path) model = build_model_from_config(config) while True: args = [] for in_x in model.in_x: args.append(input('{}::'.format(in_x))) # check for exit command if args[-1] == 'exit' or args[-1] == 'stop' or args[-1] == 'quit' or args[-1] == 'q': return if len(args) == 1: pred = model(args) else: pred = model([args]) print('>>', *pred)
def load(self, model_name: str) -> None: """ Initialize uncompiled model from saved params and weights Args: model_name: name of model function described as a method of this class Returns: model with loaded weights and network parameters from files but compiled with given learning parameters """ if self.load_path: if isinstance(self.load_path, Path) and not self.load_path.parent.is_dir(): raise ConfigError("Provided load path is incorrect!") opt_path = Path("{}_opt.json".format(str(self.load_path.resolve()))) weights_path = Path("{}.h5".format(str(self.load_path.resolve()))) if opt_path.exists() and weights_path.exists(): log.info("[initializing `{}` from saved]".format(self.__class__.__name__)) self.opt = read_json(opt_path) model_func = getattr(self, model_name, None) if callable(model_func): model = model_func(**self.opt) else: raise AttributeError("Model {} is not defined".format(model_name)) log.info("[loading weights from {}]".format(weights_path.name)) model.load_weights(str(weights_path)) self.model = model return None else: self.model = self.init_model_from_scratch(model_name) return None else: log.warning("No `load_path` is provided for {}".format(self.__class__.__name__)) self.model = self.init_model_from_scratch(model_name) return None
def run_alexa_server(agent_generator: callable, multi_instance: bool = False, stateful: bool = False, port: Optional[int] = None, https: bool = False, ssl_key: str = None, ssl_cert: str = None) -> None: """Initiates Flask web service with Alexa skill. Args: agent_generator: Callback Alexa agents factory. multi_instance: Multi instance mode flag. stateful: Stateful mode flag. port: Flask web service port. https: Flag for running Alexa skill service in https mode. ssl_key: SSL key file path. ssl_cert: SSL certificate file path. """ server_config_path = Path(get_settings_path(), SERVER_CONFIG_FILENAME).resolve() server_params = read_json(server_config_path) host = server_params['common_defaults']['host'] port = port or server_params['common_defaults']['port'] alexa_server_params = server_params['alexa_defaults'] alexa_server_params['multi_instance'] = multi_instance or server_params['common_defaults']['multi_instance'] alexa_server_params['stateful'] = stateful or server_params['common_defaults']['stateful'] alexa_server_params['amazon_cert_lifetime'] = AMAZON_CERTIFICATE_LIFETIME if https: ssh_key_path = Path(ssl_key or server_params['https_key_path']).resolve() if not ssh_key_path.is_file(): e = FileNotFoundError('Ssh key file not found: please provide correct path in --key param or ' 'https_key_path param in server configuration file') log.error(e) raise e ssh_cert_path = Path(ssl_cert or server_params['https_cert_path']).resolve() if not ssh_cert_path.is_file(): e = FileNotFoundError('Ssh certificate file not found: please provide correct path in --cert param or ' 'https_cert_path param in server configuration file') log.error(e) raise e ssl_context = ssl.SSLContext(ssl.PROTOCOL_TLSv1_2) ssl_context.load_cert_chain(ssh_cert_path, ssh_key_path) else: ssl_context = None input_q = Queue() output_q = Queue() bot = Bot(agent_generator, alexa_server_params, input_q, output_q) bot.start() endpoint_description = { 'description': 'Amazon Alexa custom service endpoint', 'parameters': [ { 'name': 'Signature', 'in': 'header', 'required': 'true', 'type': 'string', 'example': 'Z5H5wqd06ExFVPNfJiqhKvAFjkf+cTVodOUirucHGcEVAMO1LfvgqWUkZ/X1ITDZbI0w+SMwVkEQZlkeThbVS/54M22StNDUtfz4Ua20xNDpIPwcWIACAmZ38XxbbTEFJI5WwqrbilNcfzqiGrIPfdO5rl+/xUjHFUdcJdUY/QzBxXsceytVYfEiR9MzOCN2m4C0XnpThUavAu159KrLj8AkuzN0JF87iXv+zOEeZRgEuwmsAnJrRUwkJ4yWokEPnSVdjF0D6f6CscfyvRe9nsWShq7/zRTa41meweh+n006zvf58MbzRdXPB22RI4AN0ksWW7hSC8/QLAKQE+lvaw==', }, { 'name': 'Signaturecertchainurl', 'in': 'header', 'required': 'true', 'type': 'string', 'example': 'https://s3.amazonaws.com/echo.api/echo-api-cert-6-ats.pem', }, { 'name': 'data', 'in': 'body', 'required': 'true', 'example': { 'version': '1.0', 'session': { 'new': False, 'sessionId': 'amzn1.echo-api.session.3c6ebffd-55b9-4e1a-bf3c-c921c1801b63', 'application': { 'applicationId': 'amzn1.ask.skill.8b17a5de-3749-4919-aa1f-e0bbaf8a46a6' }, 'attributes': { 'sessionId': 'amzn1.echo-api.session.3c6ebffd-55b9-4e1a-bf3c-c921c1801b63' }, 'user': { 'userId': 'amzn1.ask.account.AGR4R2LOVHMNMNOGROBVNLU7CL4C57X465XJF2T2F55OUXNTLCXDQP3I55UXZIALEKKZJ6Q2MA5MEFSMZVPEL5NVZS6FZLEU444BVOLPB5WVH5CHYTQAKGD7VFLGPRFZVHHH2NIB4HKNHHGX6HM6S6QDWCKXWOIZL7ONNQSBUCVPMZQKMCYXRG5BA2POYEXFDXRXCGEVDWVSMPQ' } }, 'context': { 'System': { 'application': { 'applicationId': 'amzn1.ask.skill.8b17a5de-3749-4919-aa1f-e0bbaf8a46a6' }, 'user': { 'userId': 'amzn1.ask.account.AGR4R2LOVHMNMNOGROBVNLU7CL4C57X465XJF2T2F55OUXNTLCXDQP3I55UXZIALEKKZJ6Q2MA5MEFSMZVPEL5NVZS6FZLEU444BVOLPB5WVH5CHYTQAKGD7VFLGPRFZVHHH2NIB4HKNHHGX6HM6S6QDWCKXWOIZL7ONNQSBUCVPMZQKMCYXRG5BA2POYEXFDXRXCGEVDWVSMPQ' }, 'device': { 'deviceId': 'amzn1.ask.device.AFQAMLYOYQUUACSE7HFVYS4ZI2KUB35JPHQRUPKTDCAU3A47WESP5L57KSWT5L6RT3FVXWH4OA2DNPJRMZ2VGEIACF3PJEIDCOUWUBC4W5RPJNUB3ZVT22J4UJN5UL3T2UBP36RVHFJ5P4IPT2HUY3P2YOY33IOU4O33HUAG7R2BUNROEH4T2', 'supportedInterfaces': {} }, 'apiEndpoint': 'https://api.amazonalexa.com', 'apiAccessToken': 'eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiIsImtpZCI6IjEifQ.eyJhdWQiOiJodHRwczovL2FwaS5hbWF6b25hbGV4YS5jb20iLCJpc3MiOiJBbGV4YVNraWxsS2l0Iiwic3ViIjoiYW16bjEuYXNrLnNraWxsLjhiMTdhNWRlLTM3NDktNDkxOS1hYTFmLWUwYmJhZjhhNDZhNiIsImV4cCI6MTU0NTIyMzY1OCwiaWF0IjoxNTQ1MjIwMDU4LCJuYmYiOjE1NDUyMjAwNTgsInByaXZhdGVDbGFpbXMiOnsiY29uc2VudFRva2VuIjpudWxsLCJkZXZpY2VJZCI6ImFtem4xLmFzay5kZXZpY2UuQUZRQU1MWU9ZUVVVQUNTRTdIRlZZUzRaSTJLVUIzNUpQSFFSVVBLVERDQVUzQTQ3V0VTUDVMNTdLU1dUNUw2UlQzRlZYV0g0T0EyRE5QSlJNWjJWR0VJQUNGM1BKRUlEQ09VV1VCQzRXNVJQSk5VQjNaVlQyMko0VUpONVVMM1QyVUJQMzZSVkhGSjVQNElQVDJIVVkzUDJZT1kzM0lPVTRPMzNIVUFHN1IyQlVOUk9FSDRUMiIsInVzZXJJZCI6ImFtem4xLmFzay5hY2NvdW50LkFHUjRSMkxPVkhNTk1OT0dST0JWTkxVN0NMNEM1N1g0NjVYSkYyVDJGNTVPVVhOVExDWERRUDNJNTVVWFpJQUxFS0taSjZRMk1BNU1FRlNNWlZQRUw1TlZaUzZGWkxFVTQ0NEJWT0xQQjVXVkg1Q0hZVFFBS0dEN1ZGTEdQUkZaVkhISDJOSUI0SEtOSEhHWDZITTZTNlFEV0NLWFdPSVpMN09OTlFTQlVDVlBNWlFLTUNZWFJHNUJBMlBPWUVYRkRYUlhDR0VWRFdWU01QUSJ9fQ.jcomYhBhU485T4uoe2NyhWnL-kZHoPQKpcycFqa-1sy_lSIitfFGup9DKrf2NkN-I9lZ3xwq9llqx9WRN78fVJjN6GLcDhBDH0irPwt3n9_V7_5bfB6KARv5ZG-JKOmZlLBqQbnln0DAJ10D8HNiytMARNEwduMBVDNK0A5z6YxtRcLYYFD2-Ieg_V8Qx90eE2pd2U5xOuIEL0pXfSoiJ8vpxb8BKwaMO47tdE4qhg_k7v8ClwyXg3EMEhZFjixYNqdW1tCrwDGj58IWMXDyzZhIlRMh6uudMOT6scSzcNVD0v42IOTZ3S_X6rG01B7xhUDlZXMqkrCuzOyqctGaPw' }, 'Viewport': { 'experiences': [ { 'arcMinuteWidth': 246, 'arcMinuteHeight': 144, 'canRotate': False, 'canResize': False } ], 'shape': 'RECTANGLE', 'pixelWidth': 1024, 'pixelHeight': 600, 'dpi': 160, 'currentPixelWidth': 1024, 'currentPixelHeight': 600, 'touch': [ 'SINGLE' ] } }, 'request': { 'type': 'IntentRequest', 'requestId': 'amzn1.echo-api.request.388d0f6e-04b9-4450-a687-b9abaa73ac6a', 'timestamp': '2018-12-19T11:47:38Z', 'locale': 'en-US', 'intent': { 'name': 'AskDeepPavlov', 'confirmationStatus': 'NONE', 'slots': { 'raw_input': { 'name': 'raw_input', 'value': 'my beautiful sandbox skill', 'resolutions': { 'resolutionsPerAuthority': [ { 'authority': 'amzn1.er-authority.echo-sdk.amzn1.ask.skill.8b17a5de-3749-4919-aa1f-e0bbaf8a46a6.GetInput', 'status': { 'code': 'ER_SUCCESS_NO_MATCH' } } ] }, 'confirmationStatus': 'NONE', 'source': 'USER' } } } } } } ], 'responses': { "200": { "description": "A model response" } } } @app.route('/') def index(): return redirect('/apidocs/') @app.route('/interact', methods=['POST']) @swag_from(endpoint_description) def handle_request(): request_body: bytes = request.get_data() signature_chain_url: str = request.headers.get('Signaturecertchainurl') signature: str = request.headers.get('Signature') alexa_request: dict = request.get_json() request_dict = { 'request_body': request_body, 'signature_chain_url': signature_chain_url, 'signature': signature, 'alexa_request': alexa_request } bot.input_queue.put(request_dict) response: dict = bot.output_queue.get() response_code = 400 if 'error' in response.keys() else 200 return jsonify(response), response_code app.run(host=host, port=port, threaded=True, ssl_context=ssl_context)
def train_evaluate_model_from_config(config: [str, Path, dict], to_train=True, to_validate=True) -> None: if isinstance(config, (str, Path)): config = read_json(config) set_deeppavlov_root(config) import_packages(config.get('metadata', {}).get('imports', [])) dataset_config = config.get('dataset', None) if dataset_config: config.pop('dataset') ds_type = dataset_config['type'] if ds_type == 'classification': reader = {'name': 'basic_classification_reader'} iterator = {'name': 'basic_classification_iterator'} config['dataset_reader'] = {**dataset_config, **reader} config['dataset_iterator'] = {**dataset_config, **iterator} else: raise Exception("Unsupported dataset type: {}".format(ds_type)) data = [] reader_config = config.get('dataset_reader', None) if reader_config: reader_config = config['dataset_reader'] if 'class' in reader_config: c = reader_config.pop('class') try: module_name, cls_name = c.split(':') reader = getattr(importlib.import_module(module_name), cls_name)() except ValueError: e = ConfigError( 'Expected class description in a `module.submodules:ClassName` form, but got `{}`' .format(c)) log.exception(e) raise e else: reader = get_model(reader_config.pop('name'))() data_path = expand_path(reader_config.pop('data_path', '')) data = reader.read(data_path, **reader_config) else: log.warning("No dataset reader is provided in the JSON config.") iterator_config = config['dataset_iterator'] iterator: Union[DataLearningIterator, DataFittingIterator] = from_params(iterator_config, data=data) train_config = { 'metrics': ['accuracy'], 'validate_best': to_validate, 'test_best': True, 'show_examples': False } try: train_config.update(config['train']) except KeyError: log.warning('Train config is missing. Populating with default values') metrics_functions = list( zip(train_config['metrics'], get_metrics_by_names(train_config['metrics']))) if to_train: model = fit_chainer(config, iterator) if callable(getattr(model, 'train_on_batch', None)): _train_batches(model, iterator, train_config, metrics_functions) elif callable(getattr(model, 'fit_batches', None)): _fit_batches(model, iterator, train_config) elif callable(getattr(model, 'fit', None)): _fit(model, iterator, train_config) elif not isinstance(model, Chainer): log.warning('Nothing to train') if train_config['validate_best'] or train_config['test_best']: # try: # model_config['load_path'] = model_config['save_path'] # except KeyError: # log.warning('No "save_path" parameter for the model, so "load_path" will not be renewed') model = build_model_from_config(config, load_trained=True) log.info('Testing the best saved model') if train_config['validate_best']: report = { 'valid': _test_model(model, metrics_functions, iterator, train_config.get('batch_size', -1), 'valid', show_examples=train_config['show_examples']) } print(json.dumps(report, ensure_ascii=False)) if train_config['test_best']: report = { 'test': _test_model(model, metrics_functions, iterator, train_config.get('batch_size', -1), 'test', show_examples=train_config['show_examples']) } print(json.dumps(report, ensure_ascii=False))
def from_params(params: Dict, **kwargs) -> Component: # what is passed in json: config_params = {k: _resolve(v) for k, v in params.items()} # get component by reference (if any) if 'ref' in config_params: try: return _refs[config_params['ref']] except KeyError: e = ConfigError( 'Component with id "{id}" was referenced but not initialized'. format(id=config_params['ref'])) log.exception(e) raise e elif 'config_path' in config_params: from deeppavlov.core.commands.infer import build_model_from_config deeppavlov_root = get_deeppavlov_root() config = read_json(expand_path(config_params['config_path'])) model = build_model_from_config(config, as_component=True) set_deeppavlov_root({'deeppavlov_root': deeppavlov_root}) return model elif 'class' in config_params: c = config_params.pop('class') try: module_name, cls_name = c.split(':') cls = getattr(importlib.import_module(module_name), cls_name) except ValueError: e = ConfigError( 'Expected class description in a `module.submodules:ClassName` form, but got `{}`' .format(c)) log.exception(e) raise e else: cls_name = config_params.pop('name', None) if not cls_name: e = ConfigError( 'Component config has no `name` nor `ref` or `class` fields') log.exception(e) raise e try: cls = REGISTRY[cls_name] except KeyError: e = ConfigError('Class {} is not registered.'.format(cls_name)) log.exception(e) raise e # find the submodels params recursively for param_name, subcls_params in config_params.items(): if isinstance(subcls_params, dict): if not {'ref', 'name', 'class', 'config_path' }.intersection(subcls_params): "This parameter is passed as dict to the class constructor." " The user didn't intent it to be a component." for k, v in subcls_params.items(): subcls_params[k] = _resolve(v) continue config_params[param_name] = from_params(subcls_params, vocabs=kwargs['vocabs'], mode=kwargs['mode']) try: component = cls(**dict(config_params, **kwargs)) try: _refs[config_params['id']] = component except KeyError: pass except Exception: log.exception("Exception in {}".format(cls)) raise return component
import gc #gc.set_threshold(200,10,10) gc.enable() #import spacy #spacy.prefer_gpu() from deeppavlov.core.common.file import read_json from deeppavlov import configs, train_evaluate_model_from_config #train_evaluate_model_from_config(read_json("/ai/jupyter/.deeppavlov/configs/doc_retrieval/ru_ranker_tfidf_wiki_custom.json"), download=False) train_evaluate_model_from_config(read_json( "/ai/jupyter/.deeppavlov/configs/doc_retrieval/en_ranker_tfidf_wiki_custom.json" ), download=False)
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Thu May 10 16:28:38 2018 @author: lsm """ from deeppavlov.core.commands.train import train_evaluate_model_from_config from deeppavlov.core.commands.utils import expand_path, set_deeppavlov_root from deeppavlov.core.common.file import read_json from model.pipeline.text_normalizer import * from model.pipeline.embedder import * from model.pipeline.CNN_model import * config = read_json('model/subs/pay/pay_config.json') set_deeppavlov_root(config) train_evaluate_model_from_config('model/subs/pay/pay_config.json')
from pathlib import Path from deeppavlov.utils import settings from deeppavlov.core.common.file import read_json, save_json settings_path = Path(settings.__path__[0]) / 'server_config.json' settings = read_json(settings_path) settings['model_defaults']['Chitchat'] = { "host": "", "port": "", "model_endpoint": "/model", "model_args_names": ["utterances", "annotations", "u_histories", "dialogs"] } save_json(settings, settings_path)
def train_evaluate_model_from_config( config: [str, Path, dict], iterator=None, to_train=True, to_validate=True) -> Dict[str, Dict[str, float]]: """Make training and evaluation of the model described in corresponding configuration file.""" if isinstance(config, (str, Path)): config = read_json(config) set_deeppavlov_root(config) import_packages(config.get('metadata', {}).get('imports', [])) if iterator is None: data = read_data_by_config(config) iterator = get_iterator_from_config(config, data) train_config = { 'metrics': ['accuracy'], 'validate_best': to_validate, 'test_best': True, 'show_examples': False } try: train_config.update(config['train']) except KeyError: log.warning('Train config is missing. Populating with default values') metrics_functions = list( zip(train_config['metrics'], get_metrics_by_names(train_config['metrics']))) if to_train: model = fit_chainer(config, iterator) if callable(getattr(model, 'train_on_batch', None)): _train_batches(model, iterator, train_config, metrics_functions) elif callable(getattr(model, 'fit_batches', None)): _fit_batches(model, iterator, train_config) elif callable(getattr(model, 'fit', None)): _fit(model, iterator, train_config) elif not isinstance(model, Chainer): log.warning('Nothing to train') model.destroy() res = {} if train_config['validate_best'] or train_config['test_best']: # try: # model_config['load_path'] = model_config['save_path'] # except KeyError: # log.warning('No "save_path" parameter for the model, so "load_path" will not be renewed') model = build_model_from_config(config, load_trained=True) log.info('Testing the best saved model') if train_config['validate_best']: report = { 'valid': _test_model(model, metrics_functions, iterator, train_config.get('batch_size', -1), 'valid', show_examples=train_config['show_examples']) } res['valid'] = report['valid']['metrics'] print(json.dumps(report, ensure_ascii=False)) if train_config['test_best']: report = { 'test': _test_model(model, metrics_functions, iterator, train_config.get('batch_size', -1), 'test', show_examples=train_config['show_examples']) } res['test'] = report['test']['metrics'] print(json.dumps(report, ensure_ascii=False)) model.destroy() return res
import asyncio import websockets import sys import json import deeppavlov from deeppavlov import configs, train_model from deeppavlov.core.common.file import read_json from deeppavlov.core.commands.infer import build_model #FEEDER_SOCKET_HOST = '127.0.0.1' #FEEDER_SOCKET_PORT = 9007 connections = set() model_config = read_json(configs.faq.fasttext_avg_autofaq) intents = build_model(model_config) class ActiveSpeaker: lastActiveSpeaker = None agent_speech = [] caller_speech = [] async def run_feeding_process(websocket, path): if path == '/sub': connections.add(websocket) print('subscriber #' + str(len(connections)) + ' got connected') try:
from deeppavlov.core.commands.infer import build_model_from_config from deeppavlov.core.common.file import read_json CONFIG_PATH = '/home/DeepPavlov/deeppavlov/configs/intents/intents_dstc2.json' model = build_model_from_config(read_json(CONFIG_PATH)) for i in range(10): var = input("::") print(">> " + str(var)) resp = model([var])[0] print('>>', resp)
def setup(self): self.skill_config = read_json(configs.skills.dsl_skill) install_from_config(self.skill_config)
from flask import Flask, render_template, request from deeppavlov import configs, train_model from deeppavlov.core.common.file import read_json app = Flask(__name__) model_config = read_json(configs.faq.tfidf_logreg_en_faq) model_config["dataset_reader"][ "data_url"] = "https://raw.githubusercontent.com/harrislam1/covid19_chatbot/master/cdc_covid19_faq.csv" bot = train_model(model_config) @app.route("/") def home(): return render_template("index.html") @app.route("/get") def get_bot_response(): userText = request.args.get('msg') return str(bot([userText])[0][0]) if __name__ == "__main__": app.run()
def train_model_from_config(config_path: str) -> None: config = read_json(config_path) set_deeppavlov_root(config) dataset_config = config.get('dataset', None) if dataset_config: config.pop('dataset') ds_type = dataset_config['type'] if ds_type == 'classification': reader = {'name': 'basic_classification_reader'} iterator = {'name': 'basic_classification_iterator'} config['dataset_reader'] = {**dataset_config, **reader} config['dataset_iterator'] = {**dataset_config, **iterator} else: raise Exception("Unsupported dataset type: {}".format(ds_type)) reader_config = config['dataset_reader'] reader = get_model(reader_config['name'])() data_path = expand_path(reader_config.get('data_path', '')) kwargs = { k: v for k, v in reader_config.items() if k not in ['name', 'data_path'] } data = reader.read(data_path, **kwargs) iterator_config = config['dataset_iterator'] iterator: BasicDatasetIterator = from_params(iterator_config, data=data) if 'chainer' in config: model = fit_chainer(config, iterator) else: vocabs = config.get('vocabs', {}) for vocab_param_name, vocab_config in vocabs.items(): v: Estimator = from_params(vocab_config, mode='train') vocabs[vocab_param_name] = _fit(v, iterator) model_config = config['model'] model = from_params(model_config, vocabs=vocabs, mode='train') train_config = { 'metrics': ['accuracy'], 'validate_best': True, 'test_best': True } try: train_config.update(config['train']) except KeyError: log.warning('Train config is missing. Populating with default values') metrics_functions = list( zip(train_config['metrics'], get_metrics_by_names(train_config['metrics']))) if callable(getattr(model, 'train_on_batch', None)): _train_batches(model, iterator, train_config, metrics_functions) elif callable(getattr(model, 'fit', None)): _fit(model, iterator, train_config) elif not isinstance(model, Chainer): log.warning('Nothing to train') if train_config['validate_best'] or train_config['test_best']: # try: # model_config['load_path'] = model_config['save_path'] # except KeyError: # log.warning('No "save_path" parameter for the model, so "load_path" will not be renewed') model = build_model_from_config(config, load_trained=True) log.info('Testing the best saved model') if train_config['validate_best']: report = { 'valid': _test_model(model, metrics_functions, iterator, train_config.get('batch_size', -1), 'valid') } print(json.dumps(report, ensure_ascii=False)) if train_config['test_best']: report = { 'test': _test_model(model, metrics_functions, iterator, train_config.get('batch_size', -1), 'test') } print(json.dumps(report, ensure_ascii=False))
def build_agent_from_config(config_path: str) -> Agent: """Build and return the agent described in corresponding configuration file.""" config = read_json(config_path) skill_configs = config['skills'] commutator_config = config['commutator'] return Agent(skill_configs, commutator_config)
for dialog in iterator.gen_batches(batch_size=1, data_type='train'): turns_x, turns_y = dialog print("User utterances:\n----------------\n") pprint(turns_x[0], indent=4) print("\nSystem responses:\n-----------------\n") pprint(turns_y[0], indent=4) break print("\n-----------------") print(f"{len(iterator.get_instances('train')[0])} dialog(s) in train.") print(f"{len(iterator.get_instances('valid')[0])} dialog(s) in valid.") print(f"{len(iterator.get_instances('test')[0])} dialog(s) in test.") gobot_config = read_json(configs.go_bot.gobot_dstc2_minimal) gobot_config['chainer']['pipe'][-1]['embedder'] = { "class_name": "glove", "load_path": "assistant_bot/small.txt" } gobot_config['chainer']['pipe'][-1]['nlg_manager']['template_path'] = 'assistant_data/assistant-templates.txt' gobot_config['chainer']['pipe'][-1]['nlg_manager']['api_call_action'] = None gobot_config['dataset_reader']['class_name'] = '__main__:AssistantDatasetReader' gobot_config['metadata']['variables']['DATA_PATH'] = 'assistant_data' gobot_config['metadata']['variables']['MODEL_PATH'] = 'assistant_bot' gobot_config['train']['batch_size'] = 4 # set batch size
def build_agent_from_config(config_path: str): config = read_json(config_path) skill_configs = config['skills'] commutator_config = config['commutator'] return Agent(skill_configs, commutator_config)
def main(): args = parser.parse_args() pipeline_config_path = find_config(args.config_path) key_main_model = args.key_main_model population_size = args.p_size gpus = [int(gpu) for gpu in args.gpus.split(",")] train_partition = int(args.train_partition) start_from_population = int(args.start_from_population) path_to_population = args.path_to_population elitism_with_weights = args.elitism_with_weights iterations = int(args.iterations) p_crossover = args.p_cross pow_crossover = args.pow_cross p_mutation = args.p_mut pow_mutation = args.pow_mut if os.environ.get("CUDA_VISIBLE_DEVICES") is None: pass else: cvd = [ int(gpu) for gpu in os.environ.get("CUDA_VISIBLE_DEVICES").split(",") ] if gpus == [-1]: gpus = cvd else: try: gpus = [cvd[gpu] for gpu in gpus] except IndexError: raise ConfigError( "Can not use gpus `{}` with CUDA_VISIBLE_DEVICES='{}'". format(",".join(map(str, gpus)), ",".join(map(str, cvd)))) basic_params = read_json(pipeline_config_path) log.info("Given basic params: {}\n".format( json.dumps(basic_params, indent=2))) # Initialize evolution evolution = ParamsEvolution(population_size=population_size, p_crossover=p_crossover, crossover_power=pow_crossover, p_mutation=p_mutation, mutation_power=pow_mutation, key_main_model=key_main_model, seed=42, train_partition=train_partition, elitism_with_weights=elitism_with_weights, **basic_params) considered_metrics = evolution.get_value_from_config( evolution.basic_config, list(evolution.find_model_path(evolution.basic_config, "metrics"))[0] + ["metrics"]) considered_metrics = [ metric['name'] if isinstance(metric, dict) else metric for metric in considered_metrics ] log.info(considered_metrics) evolve_metric = considered_metrics[0] # Create table variable for gathering results abs_path_to_main_models = expand_path( str(evolution.models_path).format( **evolution.basic_config['metadata']['variables'])) abs_path_to_main_models.mkdir(parents=True, exist_ok=True) result_file = abs_path_to_main_models / "result_table.tsv" print(result_file) result_table_columns = [] result_table_dict = {} for el in considered_metrics: result_table_dict[el + "_valid"] = [] result_table_dict[el + "_test"] = [] result_table_columns.extend([el + "_valid", el + "_test"]) result_table_dict["params"] = [] result_table_columns.append("params") if start_from_population == 0: # if starting evolution from scratch iters = 0 result_table = pd.DataFrame(result_table_dict) # write down result table file result_table.loc[:, result_table_columns].to_csv(result_file, index=False, sep='\t') log.info("Iteration #{} starts".format(iters)) # randomly generate the first population population = evolution.first_generation() else: # if starting evolution from already existing population iters = start_from_population log.info("Iteration #{} starts".format(iters)) population = [] for i in range(population_size): config = read_json( expand_path(path_to_population) / f"model_{i}" / "config.json") evolution.insert_value_or_dict_into_config( config, evolution.path_to_models_save_path, str(evolution.main_model_path / f"population_{start_from_population}" / f"model_{i}")) population.append(config) run_population(population, evolution, gpus) population_scores = results_to_table(population, evolution, considered_metrics, result_file, result_table_columns)[evolve_metric] log.info("Population scores: {}".format(population_scores)) log.info("Iteration #{} was done".format(iters)) iters += 1 while True: if iterations != -1 and start_from_population + iterations == iters: log.info("End of evolution on iteration #{}".format(iters)) break log.info("Iteration #{} starts".format(iters)) population = evolution.next_generation(population, population_scores, iters) run_population(population, evolution, gpus) population_scores = results_to_table( population, evolution, considered_metrics, result_file, result_table_columns)[evolve_metric] log.info("Population scores: {}".format(population_scores)) log.info("Iteration #{} was done".format(iters)) iters += 1
def interact_model_by_telegram(config_path, token): config = read_json(config_path) model = build_model_from_config(config) init_bot_for_model(token, model)
def main(): params_helper = ParamsSearch() args = parser.parse_args() is_loo = False n_folds = None if args.folds == 'loo': is_loo = True elif args.folds is None: n_folds = None elif args.folds.isdigit(): n_folds = int(args.folds) else: raise NotImplementedError('Not implemented this type of CV') # read config pipeline_config_path = find_config(args.config_path) config_init = read_json(pipeline_config_path) config = parse_config(config_init) data = read_data_by_config(config) target_metric = parse_config(config_init)['train']['metrics'][0] if isinstance(target_metric, dict): target_metric = target_metric['name'] # get all params for search param_paths = list(params_helper.find_model_path(config, 'search_choice')) param_values = [] param_names = [] for path in param_paths: value = params_helper.get_value_from_config(config, path) param_name = path[-1] param_value_search = value['search_choice'] param_names.append(param_name) param_values.append(param_value_search) # find optimal params if args.search_type == 'grid': # generate params combnations for grid search combinations = list(product(*param_values)) # calculate cv scores scores = [] for comb in combinations: config = deepcopy(config_init) for param_path, param_value in zip(param_paths, comb): params_helper.insert_value_or_dict_into_config( config, param_path, param_value) config = parse_config(config) if (n_folds is not None) | is_loo: # CV for model evaluation score_dict = calc_cv_score(config, data=data, n_folds=n_folds, is_loo=is_loo) score = score_dict[next(iter(score_dict))] else: # train/valid for model evaluation data_to_evaluate = data.copy() if len(data_to_evaluate['valid']) == 0: data_to_evaluate['train'], data_to_evaluate[ 'valid'] = train_test_split(data_to_evaluate['train'], test_size=0.2) iterator = get_iterator_from_config(config, data_to_evaluate) score = train_evaluate_model_from_config( config, iterator=iterator)['valid'][target_metric] scores.append(score) # get model with best score best_params_dict = get_best_params(combinations, scores, param_names, target_metric) log.info('Best model params: {}'.format(best_params_dict)) else: raise NotImplementedError('Not implemented this type of search') # save config best_config = config_init for i, param_name in enumerate(best_params_dict.keys()): if param_name != target_metric: params_helper.insert_value_or_dict_into_config( best_config, param_paths[i], best_params_dict[param_name]) best_model_filename = pipeline_config_path.with_suffix('.cvbest.json') save_json(best_config, best_model_filename) log.info('Best model saved in json-file: {}'.format(best_model_filename))
def init_model(model_config_path): model_config = read_json(model_config_path) model = build_model_from_config(model_config) return model
def load(self, model_name, optimizer_name, loss_name, lear_rate=None, lear_rate_decay=None): """ Initialize model from saved params and weights Args: model_name: name of model function described as a method of this class optimizer_name: name of optimizer from keras.optimizers lr: learning rate decay: learning rate decay loss_name: loss function name (from keras.losses) Returns: model with loaded weights and network parameters from files but compiled with given learning parameters """ if self.load_path: if isinstance(self.load_path, Path) and not self.load_path.parent.is_dir(): raise ConfigError("Provided load path is incorrect!") opt_path = Path("{}_opt.json".format(str( self.load_path.resolve()))) weights_path = Path("{}.h5".format(str(self.load_path.resolve()))) if opt_path.exists() and weights_path.exists(): log.info("[initializing `{}` from saved]".format( self.__class__.__name__)) self.opt = read_json(opt_path) model_func = getattr(self, model_name, None) if callable(model_func): model = model_func(params=self.opt) else: raise AttributeError( "Model {} is not defined".format(model_name)) log.info("[loading weights from {}]".format(weights_path.name)) model.load_weights(str(weights_path)) optimizer_func = getattr(keras.optimizers, optimizer_name, None) if callable(optimizer_func): if not (lear_rate is None): if not (lear_rate_decay is None): self.optimizer = optimizer_func( lr=lear_rate, decay=lear_rate_decay) else: self.optimizer = optimizer_func(lr=lear_rate) elif not (lear_rate_decay is None): self.optimizer = optimizer_func(decay=lear_rate_decay) else: self.optimizer = optimizer_func() else: raise AttributeError( "Optimizer {} is not defined in `keras.optimizers`". format(optimizer_name)) loss_func = getattr(keras.losses, loss_name, None) if callable(loss_func): loss = loss_func else: raise AttributeError( "Loss {} is not defined".format(loss_name)) model.compile(optimizer=self.optimizer, loss=loss) return model else: return self.init_model_from_scratch(model_name, optimizer_name, loss_name, lear_rate, lear_rate_decay) else: log.warning("No `load_path` is provided for {}".format( self.__class__.__name__)) return self.init_model_from_scratch(model_name, optimizer_name, loss_name, lear_rate, lear_rate_decay)
query_map = {} lem_query_map = {} with open(queries_filename) as query_f: stem = Mystem() for line in tqdm(query_f, total=6311): t = line[:-1].split('\t') query_id = t[0] query = t[1].strip().lower() lem_query = ''.join(stem.lemmatize(query)[:-1]) query_map[query_id] = query lem_query_map[query_id] = lem_query doc_num = 582167 bert_config_rus = read_json(configs.embedder.bert_embedder) bert_config_rus['metadata']['variables']['BERT_PATH'] = model_dp_rus_dirname model_dp_rus = build_model(bert_config_rus) bert_config_mult = read_json(configs.embedder.bert_embedder) bert_config_mult['metadata']['variables']['BERT_PATH'] = model_dp_mult_dirname model_dp_mult = build_model(bert_config_mult) model_sent_trans = SentenceTransformer('distiluse-base-multilingual-cased') with open(titles_filename) as docs_f, open( lem_titles_filename) as lem_doc_f, open(features_filename, 'w') as features_f: for line in tqdm(docs_f, total=doc_num): t = line[:-1].split('\t') lem_t = lem_doc_f.readline()[:-1].split('\t')
def from_params(params: Dict, mode='infer', **kwargs) -> Component: # what is passed in json: config_params = {k: _resolve(v) for k, v in params.items()} # get component by reference (if any) if 'ref' in config_params: try: return _refs[config_params['ref']] except KeyError: e = ConfigError('Component with id "{id}" was referenced but not initialized' .format(id=config_params['ref'])) log.exception(e) raise e elif 'config_path' in config_params: from deeppavlov.core.commands.infer import build_model_from_config deeppavlov_root = get_deeppavlov_root() refs = _refs.copy() _refs.clear() config = read_json(expand_path(config_params['config_path'])) model = build_model_from_config(config, as_component=True) set_deeppavlov_root({'deeppavlov_root': deeppavlov_root}) _refs.clear() _refs.update(refs) return model elif 'class' in config_params: c = config_params.pop('class') try: module_name, cls_name = c.split(':') cls = getattr(importlib.import_module(module_name), cls_name) except ValueError: e = ConfigError('Expected class description in a `module.submodules:ClassName` form, but got `{}`' .format(c)) log.exception(e) raise e else: cls_name = config_params.pop('name', None) if not cls_name: e = ConfigError('Component config has no `name` nor `ref` or `class` fields') log.exception(e) raise e try: cls = REGISTRY[cls_name] except KeyError: e = ConfigError('Class {} is not registered.'.format(cls_name)) log.exception(e) raise e # find the submodels params recursively config_params = {k: _init_param(v, mode) for k, v in config_params.items()} try: spec = inspect.getfullargspec(cls) if 'mode' in spec.args+spec.kwonlyargs or spec.varkw is not None: kwargs['mode'] = mode component = cls(**dict(config_params, **kwargs)) try: _refs[config_params['id']] = component except KeyError: pass except Exception: log.exception("Exception in {}".format(cls)) raise return component
def next_generation(self, generation: List[dict], scores: List[float], iteration: int) -> List[dict]: """ Provide replacement Args: generation: current generation (set of self.population_size configs scores: corresponding scores that should be maximized iteration: iteration number Returns: the next generation according to the given scores of current generation """ next_population = self.selection_of_best_with_weights(generation, scores) log.info("Saved with weights: {} models".format(self.n_saved_best_pretrained)) offsprings = self.crossover(generation, scores) changable_next = self.mutation(offsprings) next_population.extend(changable_next) for i in range(self.n_saved_best_pretrained): # if several train files: if self.train_partition != 1: file_ext = str(Path(next_population[i]["dataset_reader"]["train"]).suffix) next_population[i]["dataset_reader"]["train"] = "_".join( [str(p) for p in Path(next_population[i]["dataset_reader"]["train"]).stem.split("_")[:-1]])\ + "_" + str(iteration % self.train_partition) + file_ext try: # re-init learning rate with the final one (works for KerasModel) next_population[i] = self.insert_value_or_dict_into_config( next_population[i], self.main_model_path + ["learning_rate"], read_json(str(Path(self.get_value_from_config(next_population[i], self.main_model_path + ["save_path"]) ).parent.joinpath("model_opt.json")))["final_learning_rate"]) except: pass # load_paths if self.elitism_with_weights: # if elite models are saved with weights next_population[i] = self.insert_value_or_dict_into_config( next_population[i], self.main_model_path + ["load_path"], str(Path(self.get_value_from_config(next_population[i], self.main_model_path + ["save_path"])))) for path_id, path_ in enumerate(self.paths_to_fiton_dicts): next_population[i] = self.insert_value_or_dict_into_config( next_population[i], path_ + ["load_path"], str(Path(self.get_value_from_config(next_population[i], path_ + ["save_path"])))) else: # if elite models are saved only as configurations and trained again next_population[i] = self.insert_value_or_dict_into_config( next_population[i], self.main_model_path + ["load_path"], str(Path(self.get_value_from_config(self.basic_config, self.main_model_path + ["load_path"]) ).joinpath("population_" + str(iteration)).joinpath("model_" + str(i)).joinpath("model"))) for path_id, path_ in enumerate(self.paths_to_fiton_dicts): suffix = Path(self.get_value_from_config(self.basic_config, path_ + ["load_path"])).suffix next_population[i] = self.insert_value_or_dict_into_config( next_population[i], path_ + ["load_path"], str(Path(self.get_value_from_config(self.basic_config, self.main_model_path + ["load_path"]) ).joinpath("population_" + str(iteration)).joinpath("model_" + str(i)).joinpath( "fitted_model_" + str(path_id)).with_suffix(suffix))) # save_paths next_population[i] = self.insert_value_or_dict_into_config( next_population[i], self.main_model_path + ["save_path"], str(Path(self.get_value_from_config(self.basic_config, self.main_model_path + ["save_path"]) ).joinpath("population_" + str(iteration)).joinpath("model_" + str(i)).joinpath("model"))) for path_id, path_ in enumerate(self.paths_to_fiton_dicts): suffix = Path(self.get_value_from_config(self.basic_config, path_ + ["save_path"])).suffix next_population[i] = self.insert_value_or_dict_into_config( next_population[i], path_ + ["save_path"], str(Path(self.get_value_from_config(self.basic_config, self.main_model_path + ["save_path"]) ).joinpath("population_" + str(iteration)).joinpath("model_" + str(i)).joinpath( "fitted_model_" + str(path_id)).with_suffix(suffix))) for i in range(self.n_saved_best_pretrained, self.population_size): # if several train files if self.train_partition != 1: file_ext = str(Path(next_population[i]["dataset_reader"]["train"]).suffix) next_population[i]["dataset_reader"]["train"] = "_".join( [str(p) for p in Path(next_population[i]["dataset_reader"]["train"]).stem.split("_")[:-1]])\ + "_" + str(iteration % self.train_partition) + file_ext for which_path in ["save_path", "load_path"]: next_population[i] = self.insert_value_or_dict_into_config( next_population[i], self.main_model_path + [which_path], str(Path(self.get_value_from_config(self.basic_config, self.main_model_path + [which_path]) ).joinpath("population_" + str(iteration)).joinpath("model_" + str(i)).joinpath("model"))) for path_id, path_ in enumerate(self.paths_to_fiton_dicts): suffix = Path(self.get_value_from_config(self.basic_config, path_ + ["save_path"])).suffix for which_path in ["save_path", "load_path"]: next_population[i] = self.insert_value_or_dict_into_config( next_population[i], path_ + [which_path], str(Path(self.get_value_from_config(self.basic_config, self.main_model_path + [which_path]) ).joinpath("population_" + str(iteration)).joinpath("model_" + str(i)).joinpath( "fitted_model_" + str(path_id)).with_suffix(suffix))) next_population[i]["evolution_model_id"] = self.evolution_model_id self.evolution_model_id += 1 return next_population
def __init__(self, faq_config_path): faq_config = read_json(f"{faq_config_path}/faq_config.json") self.faq = build_model(faq_config, download=True)
def load(self, model_name, optimizer_name, lr, decay, loss_name, metrics_names=None, add_metrics_file=None, loss_weights=None, sample_weight_mode=None, weighted_metrics=None, target_tensors=None): """ Initialize model from saved params and weights Args: model_name: name of model function described as a method of this class optimizer_name: name of optimizer from keras.optimizers lr: learning rate decay: learning rate decay loss_name: loss function name (from keras.losses) metrics_names: names of metrics (from keras.metrics) as one string add_metrics_file: file with additional metrics functions loss_weights: optional parameter as in keras.model.compile sample_weight_mode: optional parameter as in keras.model.compile weighted_metrics: optional parameter as in keras.model.compile target_tensors: optional parameter as in keras.model.compile Returns: model with loaded weights and network parameters from files but compiled with given learning parameters """ if self.load_path: if isinstance(self.load_path, Path) and not self.load_path.parent.is_dir(): raise ConfigError("Provided save path is incorrect!") opt_path = Path("{}_opt.json".format(str(self.load_path.resolve()))) weights_path = Path("{}.h5".format(str(self.load_path.resolve()))) if opt_path.exists() and weights_path.exists(): print("\n:: initializing `{}` from saved\n"\ .format(self.__class__.__name__)) self.opt = read_json(opt_path) model_func = getattr(self, model_name, None) if callable(model_func): model = model_func(params=self.opt) else: raise AttributeError("Model {} is not defined".format(model_name)) print("[ loading weights from `{}` ]".format(weights_path.name)) model.load_weights(str(weights_path)) optimizer_func = getattr(keras.optimizers, optimizer_name, None) if callable(optimizer_func): optimizer_ = optimizer_func(lr=lr, decay=decay) else: raise AttributeError("Optimizer {} is not callable".format(optimizer_name)) loss_func = getattr(keras.losses, loss_name, None) if callable(loss_func): loss = loss_func else: raise AttributeError("Loss {} is not defined".format(loss_name)) metrics_funcs = [] for i in range(len(metrics_names)): metrics_func = getattr(keras.metrics, metrics_names[i], None) if callable(metrics_func): metrics_funcs.append(metrics_func) else: metrics_func = getattr(add_metrics_file, metrics_names[i], None) if callable(metrics_func): metrics_funcs.append(metrics_func) else: raise AttributeError( "Metric {} is not defined".format(metrics_names[i])) model.compile(optimizer=optimizer_, loss=loss, metrics=metrics_funcs, loss_weights=loss_weights, sample_weight_mode=sample_weight_mode, weighted_metrics=weighted_metrics, target_tensors=target_tensors) return model else: return self.init_model_from_scratch(model_name, optimizer_name, lr, decay, loss_name, metrics_names=metrics_names, add_metrics_file=add_metrics_file, loss_weights=loss_weights, sample_weight_mode=sample_weight_mode, weighted_metrics=weighted_metrics, target_tensors=target_tensors) else: warn("No `load_path` is provided for {}".format(self.__class__.__name__)) return self.init_model_from_scratch(model_name, optimizer_name, lr, decay, loss_name, metrics_names=metrics_names, add_metrics_file=add_metrics_file, loss_weights=loss_weights, sample_weight_mode=sample_weight_mode, weighted_metrics=weighted_metrics, target_tensors=target_tensors)
def load(self, model_name, optimizer_name, lr, decay, loss_name, metrics_names=None, add_metrics_file=None, loss_weights=None, sample_weight_mode=None, weighted_metrics=None, target_tensors=None): """ Initialize model from saved params and weights Args: model_name: name of model function described as a method of this class optimizer_name: name of optimizer from keras.optimizers lr: learning rate decay: learning rate decay loss_name: loss function name (from keras.losses) metrics_names: names of metrics (from keras.metrics) as one string add_metrics_file: file with additional metrics functions loss_weights: optional parameter as in keras.model.compile sample_weight_mode: optional parameter as in keras.model.compile weighted_metrics: optional parameter as in keras.model.compile target_tensors: optional parameter as in keras.model.compile Returns: model with loaded weights and network parameters from files but compiled with given learning parameters """ if self.load_path: if isinstance(self.load_path, Path) and not self.load_path.parent.is_dir(): raise ConfigError("Provided save path is incorrect!") opt_path = Path("{}_opt.json".format(str( self.load_path.resolve()))) weights_path = Path("{}.h5".format(str(self.load_path.resolve()))) if opt_path.exists() and weights_path.exists(): print("\n:: initializing `{}` from saved\n"\ .format(self.__class__.__name__)) self.opt = read_json(opt_path) model_func = getattr(self, model_name, None) if callable(model_func): model = model_func(params=self.opt) else: raise AttributeError( "Model {} is not defined".format(model_name)) print("[ loading weights from `{}` ]".format( weights_path.name)) model.load_weights(str(weights_path)) optimizer_func = getattr(keras.optimizers, optimizer_name, None) if callable(optimizer_func): optimizer_ = optimizer_func(lr=lr, decay=decay) else: raise AttributeError( "Optimizer {} is not callable".format(optimizer_name)) loss_func = getattr(keras.losses, loss_name, None) if callable(loss_func): loss = loss_func else: raise AttributeError( "Loss {} is not defined".format(loss_name)) metrics_funcs = [] for i in range(len(metrics_names)): metrics_func = getattr(keras.metrics, metrics_names[i], None) if callable(metrics_func): metrics_funcs.append(metrics_func) else: metrics_func = getattr(add_metrics_file, metrics_names[i], None) if callable(metrics_func): metrics_funcs.append(metrics_func) else: raise AttributeError( "Metric {} is not defined".format( metrics_names[i])) model.compile(optimizer=optimizer_, loss=loss, metrics=metrics_funcs, loss_weights=loss_weights, sample_weight_mode=sample_weight_mode, weighted_metrics=weighted_metrics, target_tensors=target_tensors) return model else: return self.init_model_from_scratch( model_name, optimizer_name, lr, decay, loss_name, metrics_names=metrics_names, add_metrics_file=add_metrics_file, loss_weights=loss_weights, sample_weight_mode=sample_weight_mode, weighted_metrics=weighted_metrics, target_tensors=target_tensors) else: warn("No `load_path` is provided for {}".format( self.__class__.__name__)) return self.init_model_from_scratch( model_name, optimizer_name, lr, decay, loss_name, metrics_names=metrics_names, add_metrics_file=add_metrics_file, loss_weights=loss_weights, sample_weight_mode=sample_weight_mode, weighted_metrics=weighted_metrics, target_tensors=target_tensors)
# -*- coding: utf-8 -*- import telebot as telebot from telebot import apihelper from deeppavlov import configs, train_model from deeppavlov.core.common.file import read_json from deeppavlov.core.commands.infer import build_model from deeppavlov.core.commands.train import train_evaluate_model_from_config print("import successful") far = train_evaluate_model_from_config("./config.json") faq = build_model("./config.json", download=True) model_config = read_json("./config.json") model_config["dataset_reader"]["data_path"] = "./faq_school_en.csv" model_config["dataset_reader"]["data_url"] = None faq = train_model(model_config) print("train model") bot = telebot.TeleBot('301914397:AAEmR8WlfzyxQT53zdpqHrSwR8iwaKEr-h8') def GetAnswer(question): print("get question") return faq([question])[0][0][0] @bot.message_handler(content_types=['text']) def get_text_messages(message): print("text handler") if message.text == "Привет": bot.send_message(message.from_user.id, "Привет, чем я могу тебе помочь?") elif message.text == "/help":
def train_evaluate_model_from_config(config: [str, Path, dict], to_train: bool = True, to_validate: bool = True) -> None: """Make training and evaluation of the model described in corresponding configuration file.""" if isinstance(config, (str, Path)): config = read_json(config) set_deeppavlov_root(config) import_packages(config.get('metadata', {}).get('imports', [])) dataset_config = config.get('dataset', None) if dataset_config: config.pop('dataset') ds_type = dataset_config['type'] if ds_type == 'classification': reader = {'name': 'basic_classification_reader'} iterator = {'name': 'basic_classification_iterator'} config['dataset_reader'] = {**dataset_config, **reader} config['dataset_iterator'] = {**dataset_config, **iterator} else: raise Exception("Unsupported dataset type: {}".format(ds_type)) data = [] reader_config = config.get('dataset_reader', None) if reader_config: reader_config = config['dataset_reader'] if 'class' in reader_config: c = reader_config.pop('class') try: module_name, cls_name = c.split(':') reader = getattr(importlib.import_module(module_name), cls_name)() except ValueError: e = ConfigError('Expected class description in a `module.submodules:ClassName` form, but got `{}`' .format(c)) log.exception(e) raise e else: reader = get_model(reader_config.pop('name'))() data_path = reader_config.pop('data_path', '') if isinstance(data_path, list): data_path = [expand_path(x) for x in data_path] else: data_path = expand_path(data_path) data = reader.read(data_path, **reader_config) else: log.warning("No dataset reader is provided in the JSON config.") iterator_config = config['dataset_iterator'] iterator: Union[DataLearningIterator, DataFittingIterator] = from_params(iterator_config, data=data) train_config = { 'metrics': ['accuracy'], 'validate_best': to_validate, 'test_best': True, 'show_examples': False } try: train_config.update(config['train']) except KeyError: log.warning('Train config is missing. Populating with default values') metrics_functions = list(zip(train_config['metrics'], get_metrics_by_names(train_config['metrics']))) if to_train: model = fit_chainer(config, iterator) if callable(getattr(model, 'train_on_batch', None)): _train_batches(model, iterator, train_config, metrics_functions) elif callable(getattr(model, 'fit_batches', None)): _fit_batches(model, iterator, train_config) elif callable(getattr(model, 'fit', None)): _fit(model, iterator, train_config) elif not isinstance(model, Chainer): log.warning('Nothing to train') if train_config['validate_best'] or train_config['test_best']: # try: # model_config['load_path'] = model_config['save_path'] # except KeyError: # log.warning('No "save_path" parameter for the model, so "load_path" will not be renewed') model = build_model_from_config(config, load_trained=True) log.info('Testing the best saved model') if train_config['validate_best']: report = { 'valid': _test_model(model, metrics_functions, iterator, train_config.get('batch_size', -1), 'valid', show_examples=train_config['show_examples']) } print(json.dumps(report, ensure_ascii=False)) if train_config['test_best']: report = { 'test': _test_model(model, metrics_functions, iterator, train_config.get('batch_size', -1), 'test', show_examples=train_config['show_examples']) } print(json.dumps(report, ensure_ascii=False))
def run_ms_bot_framework_server(agent_generator: callable, app_id: str, app_secret: str, multi_instance: bool = False, stateful: bool = False, port: Optional[int] = None, https: bool = False, ssl_key: str = None, ssl_cert: str = None): server_config_path = Path(get_settings_path(), SERVER_CONFIG_FILENAME).resolve() server_params = read_json(server_config_path) host = server_params['common_defaults']['host'] port = port or server_params['common_defaults']['port'] ms_bf_server_params = server_params['ms_bot_framework_defaults'] ms_bf_server_params['multi_instance'] = multi_instance or server_params['common_defaults']['multi_instance'] ms_bf_server_params['stateful'] = stateful or server_params['common_defaults']['stateful'] ms_bf_server_params['auth_url'] = AUTH_URL ms_bf_server_params['auth_host'] = AUTH_HOST ms_bf_server_params['auth_content_type'] = AUTH_CONTENT_TYPE ms_bf_server_params['auth_grant_type'] = AUTH_GRANT_TYPE ms_bf_server_params['auth_scope'] = AUTH_SCOPE ms_bf_server_params['auth_app_id'] = app_id or ms_bf_server_params['auth_app_id'] if not ms_bf_server_params['auth_app_id']: e = ValueError('Microsoft Bot Framework app id required: initiate -i param ' 'or auth_app_id param in server configuration file') log.error(e) raise e ms_bf_server_params['auth_app_secret'] = app_secret or ms_bf_server_params['auth_app_secret'] if not ms_bf_server_params['auth_app_secret']: e = ValueError('Microsoft Bot Framework app secret required: initiate -s param ' 'or auth_app_secret param in server configuration file') log.error(e) raise e if https: ssh_key_path = Path(ssl_key or server_params['https_key_path']).resolve() if not ssh_key_path.is_file(): e = FileNotFoundError('Ssh key file not found: please provide correct path in --key param or ' 'https_key_path param in server configuration file') log.error(e) raise e ssh_cert_path = Path(ssl_cert or server_params['https_cert_path']).resolve() if not ssh_cert_path.is_file(): e = FileNotFoundError('Ssh certificate file not found: please provide correct path in --cert param or ' 'https_cert_path param in server configuration file') log.error(e) raise e ssl_context = ssl.SSLContext(ssl.PROTOCOL_TLSv1_2) ssl_context.load_cert_chain(ssh_cert_path, ssh_key_path) else: ssl_context = None input_q = Queue() bot = Bot(agent_generator, ms_bf_server_params, input_q) bot.start() @app.route('/') def index(): return redirect('/apidocs/') @app.route('/v3/conversations', methods=['POST']) def handle_activity(): activity = request.get_json() bot.input_queue.put(activity) return jsonify({}), 200 app.run(host=host, port=port, threaded=True, ssl_context=ssl_context)
def next_generation(self, generation: List[dict], scores: List[float], iteration: int) -> List[dict]: """ Provide replacement Args: generation: current generation (set of self.population_size configs scores: corresponding scores that should be maximized iteration: iteration number Returns: the next generation according to the given scores of current generation """ next_population = self.selection_of_best_with_weights(generation, scores) log.info("Saved with weights: {} models".format(self.n_saved_best_pretrained)) offsprings = self.crossover(generation, scores) changable_next = self.mutation(offsprings) next_population.extend(changable_next) for i in range(self.n_saved_best_pretrained): # if several train files: if self.train_partition != 1: file_ext = str(Path(next_population[i]["dataset_reader"]["train"]).suffix) next_population[i]["dataset_reader"]["train"] = "_".join( [str(p) for p in Path(next_population[i]["dataset_reader"]["train"]).stem.split("_")[:-1]])\ + "_" + str(iteration % self.train_partition) + file_ext try: # re-init learning rate with the final one (works for KerasModel) next_population[i] = self.insert_value_or_dict_into_config( next_population[i], self.main_model_path + ["lear_rate"], read_json(str(Path(self.get_value_from_config(next_population[i], self.main_model_path + ["save_path"]) ).parent.joinpath("model_opt.json")))["final_lear_rate"]) except: pass # load_paths if self.elitism_with_weights: # if elite models are saved with weights next_population[i] = self.insert_value_or_dict_into_config( next_population[i], self.main_model_path + ["load_path"], str(Path(self.get_value_from_config(next_population[i], self.main_model_path + ["save_path"])))) for path_id, path_ in enumerate(self.paths_to_fiton_dicts): next_population[i] = self.insert_value_or_dict_into_config( next_population[i], path_ + ["load_path"], str(Path(self.get_value_from_config(next_population[i], path_ + ["save_path"])))) else: # if elite models are saved only as configurations and trained again next_population[i] = self.insert_value_or_dict_into_config( next_population[i], self.main_model_path + ["load_path"], str(Path(self.get_value_from_config(self.basic_config, self.main_model_path + ["load_path"]) ).joinpath("population_" + str(iteration)).joinpath("model_" + str(i)).joinpath("model"))) for path_id, path_ in enumerate(self.paths_to_fiton_dicts): suffix = Path(self.get_value_from_config(self.basic_config, path_ + ["load_path"])).suffix next_population[i] = self.insert_value_or_dict_into_config( next_population[i], path_ + ["load_path"], str(Path(self.get_value_from_config(self.basic_config, self.main_model_path + ["load_path"]) ).joinpath("population_" + str(iteration)).joinpath("model_" + str(i)).joinpath( "fitted_model_" + str(path_id)).with_suffix(suffix))) # save_paths next_population[i] = self.insert_value_or_dict_into_config( next_population[i], self.main_model_path + ["save_path"], str(Path(self.get_value_from_config(self.basic_config, self.main_model_path + ["save_path"]) ).joinpath("population_" + str(iteration)).joinpath("model_" + str(i)).joinpath("model"))) for path_id, path_ in enumerate(self.paths_to_fiton_dicts): suffix = Path(self.get_value_from_config(self.basic_config, path_ + ["save_path"])).suffix next_population[i] = self.insert_value_or_dict_into_config( next_population[i], path_ + ["save_path"], str(Path(self.get_value_from_config(self.basic_config, self.main_model_path + ["save_path"]) ).joinpath("population_" + str(iteration)).joinpath("model_" + str(i)).joinpath( "fitted_model_" + str(path_id)).with_suffix(suffix))) for i in range(self.n_saved_best_pretrained, self.population_size): # if several train files if self.train_partition != 1: file_ext = str(Path(next_population[i]["dataset_reader"]["train"]).suffix) next_population[i]["dataset_reader"]["train"] = "_".join( [str(p) for p in Path(next_population[i]["dataset_reader"]["train"]).stem.split("_")[:-1]])\ + "_" + str(iteration % self.train_partition) + file_ext for which_path in ["save_path", "load_path"]: next_population[i] = self.insert_value_or_dict_into_config( next_population[i], self.main_model_path + [which_path], str(Path(self.get_value_from_config(self.basic_config, self.main_model_path + [which_path]) ).joinpath("population_" + str(iteration)).joinpath("model_" + str(i)).joinpath("model"))) for path_id, path_ in enumerate(self.paths_to_fiton_dicts): suffix = Path(self.get_value_from_config(self.basic_config, path_ + ["save_path"])).suffix for which_path in ["save_path", "load_path"]: next_population[i] = self.insert_value_or_dict_into_config( next_population[i], path_ + [which_path], str(Path(self.get_value_from_config(self.basic_config, self.main_model_path + [which_path]) ).joinpath("population_" + str(iteration)).joinpath("model_" + str(i)).joinpath( "fitted_model_" + str(path_id)).with_suffix(suffix))) next_population[i]["evolution_model_id"] = self.evolution_model_id self.evolution_model_id += 1 return next_population
from deeppavlov.core.common.file import read_json from deeppavlov import build_model, configs bert_config = read_json(configs.embedder.bert_embedder) bert_config['metadata']['variables']['BERT_PATH'] = '/projappl/project_2002016/gramcor/bert-pretraned/rubert_cased_L-12_H-768_A-12_v2' m = build_model(bert_config) texts = ['Скоро рождество!', 'А это значит, что все будет хорошо.'] tokens, token_embs, subtokens, subtoken_embs, sent_max_embs, sent_mean_embs, bert_pooler_outputs = m(texts) print(token_embs.shape)
def __init__(self, rel2id_path: str, rel2label_path: str, **kwargs): self.rel2id_path = rel2id_path self.rel2label_path = rel2label_path self.rel2id = read_json(str(expand_path(self.rel2id_path))) self.id2rel = {rel_id: rel for rel, rel_id in self.rel2id.items()} self.rel2label = read_json(str(expand_path(self.rel2label_path)))
def run_alexa_server(agent_generator: callable, multi_instance: bool = False, stateful: bool = False, port: Optional[int] = None, https: bool = False, ssl_key: str = None, ssl_cert: str = None) -> None: """Initiates Flask web service with Alexa skill. Args: agent_generator: Callback Alexa agents factory. multi_instance: Multi instance mode flag. stateful: Stateful mode flag. port: Flask web service port. https: Flag for running Alexa skill service in https mode. ssl_key: SSL key file path. ssl_cert: SSL certificate file path. """ server_config_path = Path(get_settings_path(), SERVER_CONFIG_FILENAME).resolve() server_params = read_json(server_config_path) host = server_params['common_defaults']['host'] port = port or server_params['common_defaults']['port'] docs_endpoint = server_params['common_defaults']['docs_endpoint'] Swagger.DEFAULT_CONFIG['specs_route'] = docs_endpoint Swagger(app) alexa_server_params = server_params['alexa_defaults'] alexa_server_params['multi_instance'] = multi_instance or server_params[ 'common_defaults']['multi_instance'] alexa_server_params[ 'stateful'] = stateful or server_params['common_defaults']['stateful'] alexa_server_params['amazon_cert_lifetime'] = AMAZON_CERTIFICATE_LIFETIME if https: ssh_key_path = Path(ssl_key or server_params['https_key_path']).resolve() if not ssh_key_path.is_file(): e = FileNotFoundError( 'Ssh key file not found: please provide correct path in --key param or ' 'https_key_path param in server configuration file') log.error(e) raise e ssh_cert_path = Path(ssl_cert or server_params['https_cert_path']).resolve() if not ssh_cert_path.is_file(): e = FileNotFoundError( 'Ssh certificate file not found: please provide correct path in --cert param or ' 'https_cert_path param in server configuration file') log.error(e) raise e ssl_context = ssl.SSLContext(ssl.PROTOCOL_TLSv1_2) ssl_context.load_cert_chain(ssh_cert_path, ssh_key_path) else: ssl_context = None input_q = Queue() output_q = Queue() bot = Bot(agent_generator, alexa_server_params, input_q, output_q) bot.start() endpoint_description = { 'description': 'Amazon Alexa custom service endpoint', 'parameters': [{ 'name': 'Signature', 'in': 'header', 'required': 'true', 'type': 'string', 'example': 'Z5H5wqd06ExFVPNfJiqhKvAFjkf+cTVodOUirucHGcEVAMO1LfvgqWUkZ/X1ITDZbI0w+SMwVkEQZlkeThbVS/54M22StNDUtfz4Ua20xNDpIPwcWIACAmZ38XxbbTEFJI5WwqrbilNcfzqiGrIPfdO5rl+/xUjHFUdcJdUY/QzBxXsceytVYfEiR9MzOCN2m4C0XnpThUavAu159KrLj8AkuzN0JF87iXv+zOEeZRgEuwmsAnJrRUwkJ4yWokEPnSVdjF0D6f6CscfyvRe9nsWShq7/zRTa41meweh+n006zvf58MbzRdXPB22RI4AN0ksWW7hSC8/QLAKQE+lvaw==', }, { 'name': 'Signaturecertchainurl', 'in': 'header', 'required': 'true', 'type': 'string', 'example': 'https://s3.amazonaws.com/echo.api/echo-api-cert-6-ats.pem', }, { 'name': 'data', 'in': 'body', 'required': 'true', 'example': { 'version': '1.0', 'session': { 'new': False, 'sessionId': 'amzn1.echo-api.session.3c6ebffd-55b9-4e1a-bf3c-c921c1801b63', 'application': { 'applicationId': 'amzn1.ask.skill.8b17a5de-3749-4919-aa1f-e0bbaf8a46a6' }, 'attributes': { 'sessionId': 'amzn1.echo-api.session.3c6ebffd-55b9-4e1a-bf3c-c921c1801b63' }, 'user': { 'userId': 'amzn1.ask.account.AGR4R2LOVHMNMNOGROBVNLU7CL4C57X465XJF2T2F55OUXNTLCXDQP3I55UXZIALEKKZJ6Q2MA5MEFSMZVPEL5NVZS6FZLEU444BVOLPB5WVH5CHYTQAKGD7VFLGPRFZVHHH2NIB4HKNHHGX6HM6S6QDWCKXWOIZL7ONNQSBUCVPMZQKMCYXRG5BA2POYEXFDXRXCGEVDWVSMPQ' } }, 'context': { 'System': { 'application': { 'applicationId': 'amzn1.ask.skill.8b17a5de-3749-4919-aa1f-e0bbaf8a46a6' }, 'user': { 'userId': 'amzn1.ask.account.AGR4R2LOVHMNMNOGROBVNLU7CL4C57X465XJF2T2F55OUXNTLCXDQP3I55UXZIALEKKZJ6Q2MA5MEFSMZVPEL5NVZS6FZLEU444BVOLPB5WVH5CHYTQAKGD7VFLGPRFZVHHH2NIB4HKNHHGX6HM6S6QDWCKXWOIZL7ONNQSBUCVPMZQKMCYXRG5BA2POYEXFDXRXCGEVDWVSMPQ' }, 'device': { 'deviceId': 'amzn1.ask.device.AFQAMLYOYQUUACSE7HFVYS4ZI2KUB35JPHQRUPKTDCAU3A47WESP5L57KSWT5L6RT3FVXWH4OA2DNPJRMZ2VGEIACF3PJEIDCOUWUBC4W5RPJNUB3ZVT22J4UJN5UL3T2UBP36RVHFJ5P4IPT2HUY3P2YOY33IOU4O33HUAG7R2BUNROEH4T2', 'supportedInterfaces': {} }, 'apiEndpoint': 'https://api.amazonalexa.com', 'apiAccessToken': 'eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiIsImtpZCI6IjEifQ.eyJhdWQiOiJodHRwczovL2FwaS5hbWF6b25hbGV4YS5jb20iLCJpc3MiOiJBbGV4YVNraWxsS2l0Iiwic3ViIjoiYW16bjEuYXNrLnNraWxsLjhiMTdhNWRlLTM3NDktNDkxOS1hYTFmLWUwYmJhZjhhNDZhNiIsImV4cCI6MTU0NTIyMzY1OCwiaWF0IjoxNTQ1MjIwMDU4LCJuYmYiOjE1NDUyMjAwNTgsInByaXZhdGVDbGFpbXMiOnsiY29uc2VudFRva2VuIjpudWxsLCJkZXZpY2VJZCI6ImFtem4xLmFzay5kZXZpY2UuQUZRQU1MWU9ZUVVVQUNTRTdIRlZZUzRaSTJLVUIzNUpQSFFSVVBLVERDQVUzQTQ3V0VTUDVMNTdLU1dUNUw2UlQzRlZYV0g0T0EyRE5QSlJNWjJWR0VJQUNGM1BKRUlEQ09VV1VCQzRXNVJQSk5VQjNaVlQyMko0VUpONVVMM1QyVUJQMzZSVkhGSjVQNElQVDJIVVkzUDJZT1kzM0lPVTRPMzNIVUFHN1IyQlVOUk9FSDRUMiIsInVzZXJJZCI6ImFtem4xLmFzay5hY2NvdW50LkFHUjRSMkxPVkhNTk1OT0dST0JWTkxVN0NMNEM1N1g0NjVYSkYyVDJGNTVPVVhOVExDWERRUDNJNTVVWFpJQUxFS0taSjZRMk1BNU1FRlNNWlZQRUw1TlZaUzZGWkxFVTQ0NEJWT0xQQjVXVkg1Q0hZVFFBS0dEN1ZGTEdQUkZaVkhISDJOSUI0SEtOSEhHWDZITTZTNlFEV0NLWFdPSVpMN09OTlFTQlVDVlBNWlFLTUNZWFJHNUJBMlBPWUVYRkRYUlhDR0VWRFdWU01QUSJ9fQ.jcomYhBhU485T4uoe2NyhWnL-kZHoPQKpcycFqa-1sy_lSIitfFGup9DKrf2NkN-I9lZ3xwq9llqx9WRN78fVJjN6GLcDhBDH0irPwt3n9_V7_5bfB6KARv5ZG-JKOmZlLBqQbnln0DAJ10D8HNiytMARNEwduMBVDNK0A5z6YxtRcLYYFD2-Ieg_V8Qx90eE2pd2U5xOuIEL0pXfSoiJ8vpxb8BKwaMO47tdE4qhg_k7v8ClwyXg3EMEhZFjixYNqdW1tCrwDGj58IWMXDyzZhIlRMh6uudMOT6scSzcNVD0v42IOTZ3S_X6rG01B7xhUDlZXMqkrCuzOyqctGaPw' }, 'Viewport': { 'experiences': [{ 'arcMinuteWidth': 246, 'arcMinuteHeight': 144, 'canRotate': False, 'canResize': False }], 'shape': 'RECTANGLE', 'pixelWidth': 1024, 'pixelHeight': 600, 'dpi': 160, 'currentPixelWidth': 1024, 'currentPixelHeight': 600, 'touch': ['SINGLE'] } }, 'request': { 'type': 'IntentRequest', 'requestId': 'amzn1.echo-api.request.388d0f6e-04b9-4450-a687-b9abaa73ac6a', 'timestamp': '2018-12-19T11:47:38Z', 'locale': 'en-US', 'intent': { 'name': 'AskDeepPavlov', 'confirmationStatus': 'NONE', 'slots': { 'raw_input': { 'name': 'raw_input', 'value': 'my beautiful sandbox skill', 'resolutions': { 'resolutionsPerAuthority': [{ 'authority': 'amzn1.er-authority.echo-sdk.amzn1.ask.skill.8b17a5de-3749-4919-aa1f-e0bbaf8a46a6.GetInput', 'status': { 'code': 'ER_SUCCESS_NO_MATCH' } }] }, 'confirmationStatus': 'NONE', 'source': 'USER' } } } } } }], 'responses': { "200": { "description": "A model response" } } } @app.route('/') def index(): return redirect(docs_endpoint) @app.route('/interact', methods=['POST']) @swag_from(endpoint_description) def handle_request(): request_body: bytes = request.get_data() signature_chain_url: str = request.headers.get('Signaturecertchainurl') signature: str = request.headers.get('Signature') alexa_request: dict = request.get_json() request_dict = { 'request_body': request_body, 'signature_chain_url': signature_chain_url, 'signature': signature, 'alexa_request': alexa_request } bot.input_queue.put(request_dict) response: dict = bot.output_queue.get() response_code = 400 if 'error' in response.keys() else 200 return jsonify(response), response_code app.run(host=host, port=port, threaded=True, ssl_context=ssl_context)
def load(self, model_name: str, optimizer_name: str, loss_name: str, lear_rate: float = 0.01, lear_rate_decay: float = 0.) -> Model: """ Initialize model from saved params and weights Args: model_name: name of model function described as a method of this class optimizer_name: name of optimizer from keras.optimizers loss_name: loss function name (from keras.losses) lear_rate: learning rate. lear_rate_decay: learning rate decay. Returns: model with loaded weights and network parameters from files but compiled with given learning parameters """ if self.load_path: if isinstance(self.load_path, Path) and not self.load_path.parent.is_dir(): raise ConfigError("Provided load path is incorrect!") opt_path = Path("{}_opt.json".format(str(self.load_path.resolve()))) weights_path = Path("{}.h5".format(str(self.load_path.resolve()))) if opt_path.exists() and weights_path.exists(): log.info("[initializing `{}` from saved]".format(self.__class__.__name__)) self.opt = read_json(opt_path) model_func = getattr(self, model_name, None) if callable(model_func): model = model_func(**self.opt) else: raise AttributeError("Model {} is not defined".format(model_name)) log.info("[loading weights from {}]".format(weights_path.name)) model.load_weights(str(weights_path)) optimizer_func = getattr(keras.optimizers, optimizer_name, None) if callable(optimizer_func): if not (lear_rate is None): if not (lear_rate_decay is None): self.optimizer = optimizer_func(lr=lear_rate, decay=lear_rate_decay) else: self.optimizer = optimizer_func(lr=lear_rate) elif not (lear_rate_decay is None): self.optimizer = optimizer_func(decay=lear_rate_decay) else: self.optimizer = optimizer_func() else: raise AttributeError("Optimizer {} is not defined in `keras.optimizers`".format(optimizer_name)) loss_func = getattr(keras.losses, loss_name, None) if callable(loss_func): loss = loss_func else: raise AttributeError("Loss {} is not defined".format(loss_name)) model.compile(optimizer=self.optimizer, loss=loss) return model else: return self.init_model_from_scratch(model_name, optimizer_name, loss_name, lear_rate, lear_rate_decay) else: log.warning("No `load_path` is provided for {}".format(self.__class__.__name__)) return self.init_model_from_scratch(model_name, optimizer_name, loss_name, lear_rate, lear_rate_decay)
def train_model_from_config(config_path: str): config = read_json(config_path) set_deeppavlov_root(config) reader_config = config['dataset_reader'] reader = get_model(reader_config['name'])() data_path = expand_path(reader_config.get('data_path', '')) data = reader.read(data_path) dataset_config = config['dataset'] dataset: Dataset = from_params(dataset_config, data=data) if 'chainer' in config: model = fit_chainer(config, dataset) else: vocabs = {} for vocab_param_name, vocab_config in config.get('vocabs', {}).items(): v: Estimator = from_params(vocab_config, mode='train') vocabs[vocab_param_name] = _fit(v, dataset) model_config = config['model'] model = from_params(model_config, vocabs=vocabs, mode='train') train_config = { 'metrics': ['accuracy'], 'validate_best': True, 'test_best': True } try: train_config.update(config['train']) except KeyError: log.warning('Train config is missing. Populating with default values') metrics_functions = list( zip(train_config['metrics'], get_metrics_by_names(train_config['metrics']))) if callable(getattr(model, 'train_on_batch', None)): _train_batches(model, dataset, train_config, metrics_functions) elif callable(getattr(model, 'fit', None)): _fit(model, dataset, train_config) elif not isinstance(model, Chainer): log.warning('Nothing to train') if train_config['validate_best'] or train_config['test_best']: # try: # model_config['load_path'] = model_config['save_path'] # except KeyError: # log.warning('No "save_path" parameter for the model, so "load_path" will not be renewed') model = build_model_from_config(config, load_trained=True) log.info('Testing the best saved model') if train_config['validate_best']: report = { 'valid': _test_model(model, metrics_functions, dataset, train_config.get('batch_size', -1), 'valid') } print(json.dumps(report, ensure_ascii=False)) if train_config['test_best']: report = { 'test': _test_model(model, metrics_functions, dataset, train_config.get('batch_size', -1), 'test') } print(json.dumps(report, ensure_ascii=False))
def main(): args = parser.parse_args() pipeline_config_path = find_config(args.config_path) key_main_model = args.key_main_model population_size = args.p_size gpus = [int(gpu) for gpu in args.gpus.split(",")] train_partition = int(args.train_partition) start_from_population = int(args.start_from_population) path_to_population = args.path_to_population elitism_with_weights = args.elitism_with_weights iterations = int(args.iterations) p_crossover = args.p_cross pow_crossover = args.pow_cross p_mutation = args.p_mut pow_mutation = args.pow_mut if os.environ.get("CUDA_VISIBLE_DEVICES") is None: pass else: cvd = [int(gpu) for gpu in os.environ.get("CUDA_VISIBLE_DEVICES").split(",")] if gpus == [-1]: gpus = cvd else: try: gpus = [cvd[gpu] for gpu in gpus] except IndexError: raise ConfigError("Can not use gpus `{}` with CUDA_VISIBLE_DEVICES='{}'".format( ",".join(map(str, gpus)), ",".join(map(str, cvd)) )) basic_params = read_json(pipeline_config_path) log.info("Given basic params: {}\n".format(json.dumps(basic_params, indent=2))) # Initialize evolution evolution = ParamsEvolution(population_size=population_size, p_crossover=p_crossover, crossover_power=pow_crossover, p_mutation=p_mutation, mutation_power=pow_mutation, key_main_model=key_main_model, seed=42, train_partition=train_partition, elitism_with_weights=elitism_with_weights, **basic_params) considered_metrics = evolution.get_value_from_config(evolution.basic_config, list(evolution.find_model_path( evolution.basic_config, "metrics"))[0] + ["metrics"]) considered_metrics = [metric['name'] if isinstance(metric, dict) else metric for metric in considered_metrics] log.info(considered_metrics) evolve_metric = considered_metrics[0] # Create table variable for gathering results abs_path_to_main_models = expand_path(str(evolution.models_path).format( **evolution.basic_config['metadata']['variables'])) abs_path_to_main_models.mkdir(parents=True, exist_ok=True) result_file = abs_path_to_main_models / "result_table.tsv" print(result_file) result_table_columns = [] result_table_dict = {} for el in considered_metrics: result_table_dict[el + "_valid"] = [] result_table_dict[el + "_test"] = [] result_table_columns.extend([el + "_valid", el + "_test"]) result_table_dict["params"] = [] result_table_columns.append("params") if start_from_population == 0: # if starting evolution from scratch iters = 0 result_table = pd.DataFrame(result_table_dict) # write down result table file result_table.loc[:, result_table_columns].to_csv(result_file, index=False, sep='\t') log.info("Iteration #{} starts".format(iters)) # randomly generate the first population population = evolution.first_generation() else: # if starting evolution from already existing population iters = start_from_population log.info("Iteration #{} starts".format(iters)) population = [] for i in range(population_size): config = read_json(expand_path(path_to_population) / f"model_{i}" / "config.json") evolution.insert_value_or_dict_into_config( config, evolution.path_to_models_save_path, str(evolution.main_model_path / f"population_{start_from_population}" / f"model_{i}")) population.append(config) run_population(population, evolution, gpus) population_scores = results_to_table(population, evolution, considered_metrics, result_file, result_table_columns)[evolve_metric] log.info("Population scores: {}".format(population_scores)) log.info("Iteration #{} was done".format(iters)) iters += 1 while True: if iterations != -1 and start_from_population + iterations == iters: log.info("End of evolution on iteration #{}".format(iters)) break log.info("Iteration #{} starts".format(iters)) population = evolution.next_generation(population, population_scores, iters) run_population(population, evolution, gpus) population_scores = results_to_table(population, evolution, considered_metrics, result_file, result_table_columns)[evolve_metric] log.info("Population scores: {}".format(population_scores)) log.info("Iteration #{} was done".format(iters)) iters += 1
from deeppavlov import configs from deeppavlov.core.common.file import read_json from deeppavlov import configs, train_model model_config = read_json('train_config.json') ranker = train_model(model_config) docs = ranker(['cerebellum']) print(docs)