def parse_config(config: Union[str, Path, dict]) -> dict: """Read config's variables and apply their values to all its properties""" if isinstance(config, (str, Path)): find_config(config) config = read_json(config) variables = {'DEEPPAVLOV_PATH': Path(__file__).parent.parent.parent} for name, value in config.get('metadata', {}).get('variables', {}).items(): variables[name] = value.format(**variables) return _parse_config_property(config, variables)
def main(): args = parser.parse_args() pipeline_config_path = find_config(args.config_path) https = args.https ssl_key = args.key ssl_cert = args.cert if args.download or args.mode == 'download': deep_download(pipeline_config_path) multi_instance = args.multi_instance stateful = args.stateful start_epoch_num = args.start_epoch_num if args.mode == 'train': train_evaluate_model_from_config(pipeline_config_path, recursive=args.recursive, start_epoch_num=start_epoch_num) elif args.mode == 'evaluate': train_evaluate_model_from_config(pipeline_config_path, to_train=False, to_validate=False, start_epoch_num=start_epoch_num) elif args.mode == 'interact': interact_model(pipeline_config_path) elif args.mode == 'interactbot': token = args.token interact_model_by_telegram(pipeline_config_path, token) elif args.mode == 'interactmsbot': ms_id = args.ms_id ms_secret = args.ms_secret run_ms_bf_default_agent(model_config=pipeline_config_path, app_id=ms_id, app_secret=ms_secret, multi_instance=multi_instance, stateful=stateful, port=args.port) elif args.mode == 'alexa': run_alexa_default_agent(model_config=pipeline_config_path, multi_instance=multi_instance, stateful=stateful, port=args.port, https=https, ssl_key=ssl_key, ssl_cert=ssl_cert) elif args.mode == 'riseapi': alice = args.api_mode == 'alice' if alice: start_alice_server(pipeline_config_path, https, ssl_key, ssl_cert, port=args.port) else: start_model_server(pipeline_config_path, https, ssl_key, ssl_cert, port=args.port) elif args.mode == 'predict': predict_on_stream(pipeline_config_path, args.batch_size, args.file_path) elif args.mode == 'install': install_from_config(pipeline_config_path) elif args.mode == 'crossval': if args.folds < 2: log.error('Minimum number of Folds is 2') else: n_folds = args.folds calc_cv_score(pipeline_config_path, n_folds=n_folds, is_loo=False)
def parse_config(config: Union[str, Path, dict]) -> dict: """Apply variables' values to all its properties""" if isinstance(config, (str, Path)): config = read_json(find_config(config)) variables, variables_exact = _get_variables_from_config(config) return _parse_config_property(config, variables, variables_exact)
def main(): args = parser.parse_args() pipeline_config_path = find_config(args.config_path) os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_id log.info("use gpu id:" + args.gpu_id) if args.download or args.mode == 'download': deep_download(pipeline_config_path) multi_instance = args.multi_instance stateful = args.stateful start_epoch_num = args.start_epoch_num if args.mode == 'train': train_evaluate_model_from_config(pipeline_config_path, recursive=args.recursive, start_epoch_num=start_epoch_num) elif args.mode == 'evaluate': train_evaluate_model_from_config(pipeline_config_path, to_train=False, to_validate=False, start_epoch_num=start_epoch_num) elif args.mode == 'interact': interact_model(pipeline_config_path) elif args.mode == 'interactbot': token = args.token interact_model_by_telegram(pipeline_config_path, token) elif args.mode == 'interactmsbot': ms_id = args.ms_id ms_secret = args.ms_secret run_ms_bf_default_agent(model_config=pipeline_config_path, app_id=ms_id, app_secret=ms_secret, multi_instance=multi_instance, stateful=stateful) elif args.mode == 'riseapi': alice = args.api_mode == 'alice' https = args.https ssl_key = args.key ssl_cert = args.cert if alice: start_alice_server(pipeline_config_path, https, ssl_key, ssl_cert) else: start_model_server(pipeline_config_path, https, ssl_key, ssl_cert) elif args.mode == 'predict': predict_on_stream(pipeline_config_path, args.batch_size, args.file_path) elif args.mode == 'install': install_from_config(pipeline_config_path) elif args.mode == 'crossval': if args.folds < 2: log.error('Minimum number of Folds is 2') else: n_folds = args.folds calc_cv_score(pipeline_config_path, n_folds=n_folds, is_loo=False)
def parse_config(config: Union[str, Path, dict]) -> dict: """Read config's variables and apply their values to all its properties""" if isinstance(config, (str, Path)): config = read_json(find_config(config)) variables = { 'DEEPPAVLOV_PATH': os.getenv(f'DP_DEEPPAVLOV_PATH', Path(__file__).parent.parent.parent) } for name, value in config.get('metadata', {}).get('variables', {}).items(): env_name = f'DP_{name}' if env_name in os.environ: value = os.getenv(env_name) variables[name] = value.format(**variables) return _parse_config_property(config, variables)
def upload(config_in_file): config_in = parse_config(config_in_file) config_in_file = find_config(config_in_file) model_path = Path( config_in['metadata']['variables']['MODEL_PATH']).expanduser() model_name, class_name = config_in_file.stem, config_in_file.parent.name tmp_dir = f'/tmp/{class_name}' tmp_tar = f'/tmp/{class_name}/{model_name}.tar.gz' shutil.rmtree(tmp_dir, ignore_errors=True) os.mkdir(tmp_dir) with tarfile.open(tmp_tar, "w:gz") as tar: tar.add(model_path, arcname=model_name) main(tmp_tar) command = f'scp -r {tmp_dir} share.ipavlov.mipt.ru:/home/export/v1/' donwload_url = f'http://files.deeppavlov.ai/v1/{class_name}/{model_name}.tar.gz' print(command, donwload_url, sep='\n')
def _get_variables_from_config(config: Union[str, Path, dict]): """Read config's variables""" if isinstance(config, (str, Path)): config = read_json(find_config(config)) variables = { 'DEEPPAVLOV_PATH': os.getenv(f'DP_DEEPPAVLOV_PATH', Path(__file__).parent.parent.parent) } variables_exact = {f'{{{k}}}': v for k, v in variables.items()} for name, value in config.get('metadata', {}).get('variables', {}).items(): env_name = f'DP_{name}' if env_name in os.environ: value = os.getenv(env_name) if value in variables_exact: value = variables_exact[value] elif isinstance(value, str): value = value.format(**variables) variables[name] = value variables_exact[f'{{{name}}}'] = value return variables, variables_exact
def upload(config_in_file): print(config_in_file) config_in = parse_config(config_in_file) config_in_file = find_config(config_in_file) model_path = Path( config_in['metadata']['variables']['MODEL_PATH']).expanduser() models_path = Path( config_in['metadata']['variables']['MODELS_PATH']).expanduser() model_name, class_name = config_in_file.stem, config_in_file.parent.name if str(model_name) not in str(model_path): raise (f'{model_name} is not the path of the {model_path}') arcname = str(model_path).split("models/")[1] tar_path = models_path / model_name tmp_folder = f'/tmp/' tmp_tar = tmp_folder + f'{model_name}.tar.gz' print("model_path", model_path) print("class_name", class_name) print("model_name", model_name) print("Start tarring") archive = tarfile.open(tmp_tar, "w|gz") archive.add(model_path, arcname=arcname) archive.close() print("Stop tarring") print("Calculating hash") main(tmp_tar) print("tmp_tar", tmp_tar) command = f'scp -r {tmp_folder}{model_name}* share.ipavlov.mipt.ru:/home/export/v1/{class_name}' donwload_url = f'http://files.deeppavlov.ai/v1/{class_name}/{model_name}.tar.gz' print(command, donwload_url, sep='\n')
def main(): args = parser.parse_args() pipeline_config_path = find_config(args.config_path) key_main_model = args.key_main_model population_size = args.p_size gpus = [int(gpu) for gpu in args.gpus.split(",")] train_partition = int(args.train_partition) start_from_population = int(args.start_from_population) path_to_population = args.path_to_population elitism_with_weights = args.elitism_with_weights iterations = int(args.iterations) p_crossover = args.p_cross pow_crossover = args.pow_cross p_mutation = args.p_mut pow_mutation = args.pow_mut if os.environ.get("CUDA_VISIBLE_DEVICES") is None: pass else: cvd = [ int(gpu) for gpu in os.environ.get("CUDA_VISIBLE_DEVICES").split(",") ] if gpus == [-1]: gpus = cvd else: try: gpus = [cvd[gpu] for gpu in gpus] except IndexError: raise ConfigError( "Can not use gpus `{}` with CUDA_VISIBLE_DEVICES='{}'". format(",".join(map(str, gpus)), ",".join(map(str, cvd)))) basic_params = read_json(pipeline_config_path) log.info("Given basic params: {}\n".format( json.dumps(basic_params, indent=2))) # Initialize evolution evolution = ParamsEvolution(population_size=population_size, p_crossover=p_crossover, crossover_power=pow_crossover, p_mutation=p_mutation, mutation_power=pow_mutation, key_main_model=key_main_model, seed=42, train_partition=train_partition, elitism_with_weights=elitism_with_weights, **basic_params) considered_metrics = evolution.get_value_from_config( evolution.basic_config, list(evolution.find_model_path(evolution.basic_config, "metrics"))[0] + ["metrics"]) considered_metrics = [ metric['name'] if isinstance(metric, dict) else metric for metric in considered_metrics ] log.info(considered_metrics) evolve_metric = considered_metrics[0] # Create table variable for gathering results abs_path_to_main_models = expand_path( parse_value_with_config(evolution.models_path, evolution.basic_config)) abs_path_to_main_models.mkdir(parents=True, exist_ok=True) result_file = abs_path_to_main_models / "result_table.tsv" print(result_file) result_table_columns = [] result_table_dict = {} for el in considered_metrics: result_table_dict[el + "_valid"] = [] result_table_dict[el + "_test"] = [] result_table_columns.extend([el + "_valid", el + "_test"]) result_table_dict["params"] = [] result_table_columns.append("params") if start_from_population == 0: # if starting evolution from scratch iters = 0 result_table = pd.DataFrame(result_table_dict) # write down result table file result_table.loc[:, result_table_columns].to_csv(result_file, index=False, sep='\t') log.info("Iteration #{} starts".format(iters)) # randomly generate the first population population = evolution.first_generation() else: # if starting evolution from already existing population iters = start_from_population log.info("Iteration #{} starts".format(iters)) population = [] for i in range(population_size): config = read_json( expand_path(path_to_population) / f"model_{i}" / "config.json") evolution.insert_value_or_dict_into_config( config, evolution.path_to_models_save_path, str(evolution.main_model_path / f"population_{start_from_population}" / f"model_{i}")) population.append(config) run_population(population, evolution, gpus) population_scores = results_to_table(population, evolution, considered_metrics, result_file, result_table_columns)[evolve_metric] log.info("Population scores: {}".format(population_scores)) log.info("Iteration #{} was done".format(iters)) iters += 1 while True: if iterations != -1 and start_from_population + iterations == iters: log.info("End of evolution on iteration #{}".format(iters)) break log.info("Iteration #{} starts".format(iters)) population = evolution.next_generation(population, population_scores, iters) run_population(population, evolution, gpus) population_scores = results_to_table( population, evolution, considered_metrics, result_file, result_table_columns)[evolve_metric] log.info("Population scores: {}".format(population_scores)) log.info("Iteration #{} was done".format(iters)) iters += 1
def main(): args = parser.parse_args() pipeline_config_path = find_config(args.config_path) https = args.https ssl_key = args.key ssl_cert = args.cert if args.download or args.mode == 'download': deep_download(pipeline_config_path) multi_instance = args.multi_instance stateful = args.stateful if args.mode == 'train': train_evaluate_model_from_config(pipeline_config_path, recursive=args.recursive, start_epoch_num=args.start_epoch_num) elif args.mode == 'evaluate': train_evaluate_model_from_config(pipeline_config_path, to_train=False, start_epoch_num=args.start_epoch_num) elif args.mode == 'interact': interact_model(pipeline_config_path) elif args.mode == 'interactbot': token = args.token interact_model_by_telegram( model_config=pipeline_config_path, token=token, default_skill_wrap=not args.no_default_skill) elif args.mode == 'interactmsbot': ms_id = args.ms_id ms_secret = args.ms_secret run_ms_bf_default_agent(model_config=pipeline_config_path, app_id=ms_id, app_secret=ms_secret, multi_instance=multi_instance, stateful=stateful, port=args.port, https=https, ssl_key=ssl_key, ssl_cert=ssl_cert, default_skill_wrap=not args.no_default_skill) elif args.mode == 'alexa': run_alexa_default_agent(model_config=pipeline_config_path, multi_instance=multi_instance, stateful=stateful, port=args.port, https=https, ssl_key=ssl_key, ssl_cert=ssl_cert, default_skill_wrap=not args.no_default_skill) elif args.mode == 'riseapi': alice = args.api_mode == 'alice' if alice: start_alice_server(pipeline_config_path, https, ssl_key, ssl_cert, port=args.port) else: start_model_server(pipeline_config_path, https, ssl_key, ssl_cert, port=args.port) elif args.mode == 'predict': predict_on_stream(pipeline_config_path, args.batch_size, args.file_path) elif args.mode == 'install': install_from_config(pipeline_config_path) elif args.mode == 'crossval': if args.folds < 2: log.error('Minimum number of Folds is 2') else: n_folds = args.folds calc_cv_score(pipeline_config_path, n_folds=n_folds, is_loo=False)
import argparse from deeppavlov.core.common.file import find_config from deeppavlov.download import deep_download from deeppavlov.models.morpho_tagger.common import predict_with_model parser = argparse.ArgumentParser() parser.add_argument("config_path", help="path to file with prediction configuration") parser.add_argument("-d", "--download", action="store_true", help="download model components") if __name__ == "__main__": args = parser.parse_args() config_path = find_config(args.config_path) if args.download: deep_download(config_path) predict_with_model(config_path)
def main(): params_helper = ParamsSearch() args = parser.parse_args() is_loo = False n_folds = None if args.folds == 'loo': is_loo = True elif args.folds is None: n_folds = None elif args.folds.isdigit(): n_folds = int(args.folds) else: raise NotImplementedError('Not implemented this type of CV') # read config pipeline_config_path = find_config(args.config_path) config_init = read_json(pipeline_config_path) config = parse_config(config_init) data = read_data_by_config(config) target_metric = parse_config(config_init)['train']['metrics'][0] if isinstance(target_metric, dict): target_metric = target_metric['name'] # get all params for search param_paths = list(params_helper.find_model_path(config, 'search_choice')) param_values = [] param_names = [] for path in param_paths: value = params_helper.get_value_from_config(config, path) param_name = path[-1] param_value_search = value['search_choice'] param_names.append(param_name) param_values.append(param_value_search) # find optimal params if args.search_type == 'grid': # generate params combnations for grid search combinations = list(product(*param_values)) # calculate cv scores scores = [] for comb in combinations: config = deepcopy(config_init) for param_path, param_value in zip(param_paths, comb): params_helper.insert_value_or_dict_into_config( config, param_path, param_value) config = parse_config(config) if (n_folds is not None) | is_loo: # CV for model evaluation score_dict = calc_cv_score(config, data=data, n_folds=n_folds, is_loo=is_loo) score = score_dict[next(iter(score_dict))] else: # train/valid for model evaluation data_to_evaluate = data.copy() if len(data_to_evaluate['valid']) == 0: data_to_evaluate['train'], data_to_evaluate[ 'valid'] = train_test_split(data_to_evaluate['train'], test_size=0.2) iterator = get_iterator_from_config(config, data_to_evaluate) score = train_evaluate_model_from_config( config, iterator=iterator)['valid'][target_metric] scores.append(score) # get model with best score best_params_dict = get_best_params(combinations, scores, param_names, target_metric) log.info('Best model params: {}'.format(best_params_dict)) else: raise NotImplementedError('Not implemented this type of search') # save config best_config = config_init for i, param_name in enumerate(best_params_dict.keys()): if param_name != target_metric: params_helper.insert_value_or_dict_into_config( best_config, param_paths[i], best_params_dict[param_name]) best_model_filename = pipeline_config_path.with_suffix('.cvbest.json') save_json(best_config, best_model_filename) log.info('Best model saved in json-file: {}'.format(best_model_filename))
def main(): args = parser.parse_args() pipeline_config_path = find_config(args.config_path) key_main_model = args.key_main_model population_size = args.p_size gpus = [int(gpu) for gpu in args.gpus.split(",")] train_partition = int(args.train_partition) start_from_population = int(args.start_from_population) path_to_population = args.path_to_population elitism_with_weights = args.elitism_with_weights iterations = int(args.iterations) p_crossover = args.p_cross pow_crossover = args.pow_cross p_mutation = args.p_mut pow_mutation = args.pow_mut if os.environ.get("CUDA_VISIBLE_DEVICES") is None: pass else: cvd = [int(gpu) for gpu in os.environ.get("CUDA_VISIBLE_DEVICES").split(",")] if gpus == [-1]: gpus = cvd else: try: gpus = [cvd[gpu] for gpu in gpus] except IndexError: raise ConfigError("Can not use gpus `{}` with CUDA_VISIBLE_DEVICES='{}'".format( ",".join(map(str, gpus)), ",".join(map(str, cvd)) )) basic_params = read_json(pipeline_config_path) log.info("Given basic params: {}\n".format(json.dumps(basic_params, indent=2))) # Initialize evolution evolution = ParamsEvolution(population_size=population_size, p_crossover=p_crossover, crossover_power=pow_crossover, p_mutation=p_mutation, mutation_power=pow_mutation, key_main_model=key_main_model, seed=42, train_partition=train_partition, elitism_with_weights=elitism_with_weights, **basic_params) considered_metrics = evolution.get_value_from_config(evolution.basic_config, list(evolution.find_model_path( evolution.basic_config, "metrics"))[0] + ["metrics"]) considered_metrics = [metric['name'] if isinstance(metric, dict) else metric for metric in considered_metrics] log.info(considered_metrics) evolve_metric = considered_metrics[0] # Create table variable for gathering results abs_path_to_main_models = expand_path(str(evolution.models_path).format( **evolution.basic_config['metadata']['variables'])) abs_path_to_main_models.mkdir(parents=True, exist_ok=True) result_file = abs_path_to_main_models / "result_table.tsv" print(result_file) result_table_columns = [] result_table_dict = {} for el in considered_metrics: result_table_dict[el + "_valid"] = [] result_table_dict[el + "_test"] = [] result_table_columns.extend([el + "_valid", el + "_test"]) result_table_dict["params"] = [] result_table_columns.append("params") if start_from_population == 0: # if starting evolution from scratch iters = 0 result_table = pd.DataFrame(result_table_dict) # write down result table file result_table.loc[:, result_table_columns].to_csv(result_file, index=False, sep='\t') log.info("Iteration #{} starts".format(iters)) # randomly generate the first population population = evolution.first_generation() else: # if starting evolution from already existing population iters = start_from_population log.info("Iteration #{} starts".format(iters)) population = [] for i in range(population_size): config = read_json(expand_path(path_to_population) / f"model_{i}" / "config.json") evolution.insert_value_or_dict_into_config( config, evolution.path_to_models_save_path, str(evolution.main_model_path / f"population_{start_from_population}" / f"model_{i}")) population.append(config) run_population(population, evolution, gpus) population_scores = results_to_table(population, evolution, considered_metrics, result_file, result_table_columns)[evolve_metric] log.info("Population scores: {}".format(population_scores)) log.info("Iteration #{} was done".format(iters)) iters += 1 while True: if iterations != -1 and start_from_population + iterations == iters: log.info("End of evolution on iteration #{}".format(iters)) break log.info("Iteration #{} starts".format(iters)) population = evolution.next_generation(population, population_scores, iters) run_population(population, evolution, gpus) population_scores = results_to_table(population, evolution, considered_metrics, result_file, result_table_columns)[evolve_metric] log.info("Population scores: {}".format(population_scores)) log.info("Iteration #{} was done".format(iters)) iters += 1
def main(): args = parser.parse_args() pipeline_config_path = find_config(args.config_path) if args.download or args.mode == 'download': deep_download(pipeline_config_path) if args.mode == 'train': train_evaluate_model_from_config(pipeline_config_path, recursive=args.recursive, start_epoch_num=args.start_epoch_num) elif args.mode == 'evaluate': train_evaluate_model_from_config(pipeline_config_path, to_train=False, start_epoch_num=args.start_epoch_num) elif args.mode == 'interact': interact_model(pipeline_config_path) elif args.mode == 'telegram': interact_model_by_telegram(model_config=pipeline_config_path, token=args.token) elif args.mode == 'msbot': start_ms_bf_server(model_config=pipeline_config_path, app_id=args.ms_id, app_secret=args.ms_secret, port=args.port, https=args.https, ssl_key=args.key, ssl_cert=args.cert) elif args.mode == 'alexa': start_alexa_server(model_config=pipeline_config_path, port=args.port, https=args.https, ssl_key=args.key, ssl_cert=args.cert) elif args.mode == 'alice': start_alice_server(model_config=pipeline_config_path, port=args.port, https=args.https, ssl_key=args.key, ssl_cert=args.cert) elif args.mode == 'riseapi': start_model_server(pipeline_config_path, args.https, args.key, args.cert, port=args.port) elif args.mode == 'risesocket': start_socket_server(pipeline_config_path, args.socket_type, port=args.port, socket_file=args.socket_file) elif args.mode == 'agent-rabbit': start_rabbit_service(model_config=pipeline_config_path, service_name=args.service_name, agent_namespace=args.agent_namespace, batch_size=args.batch_size, utterance_lifetime_sec=args.utterance_lifetime, rabbit_host=args.rabbit_host, rabbit_port=args.rabbit_port, rabbit_login=args.rabbit_login, rabbit_password=args.rabbit_password, rabbit_virtualhost=args.rabbit_virtualhost) elif args.mode == 'predict': predict_on_stream(pipeline_config_path, args.batch_size, args.file_path) elif args.mode == 'install': install_from_config(pipeline_config_path) elif args.mode == 'crossval': if args.folds < 2: log.error('Minimum number of Folds is 2') else: calc_cv_score(pipeline_config_path, n_folds=args.folds, is_loo=False)