Ejemplo n.º 1
0
def main():
    args = parser.parse_args()
    pipeline_config_path = find_config(args.config_path)
    if args.download or args.mode == 'download':
        deep_download(['-c', pipeline_config_path])
    token = args.token or os.getenv('TELEGRAM_TOKEN')

    if args.mode == 'train':
        train_evaluate_model_from_config(pipeline_config_path)
    elif args.mode == 'evaluate':
        train_evaluate_model_from_config(pipeline_config_path,
                                         to_train=False,
                                         to_validate=False)
    elif args.mode == 'interact':
        interact_model(pipeline_config_path)
    elif args.mode == 'interactbot':
        if not token:
            log.error(
                'Token required: initiate -t param or TELEGRAM_BOT env var with Telegram bot token'
            )
        else:
            interact_model_by_telegram(pipeline_config_path, token)
    elif args.mode == 'riseapi':
        start_model_server(pipeline_config_path)
    elif args.mode == 'predict':
        predict_on_stream(pipeline_config_path, args.batch_size,
                          args.file_path)
Ejemplo n.º 2
0
def main():
    args = parser.parse_args()

    pipeline_config_path = find_config(args.config_path)
    https = args.https
    ssl_key = args.key
    ssl_cert = args.cert

    if args.download or args.mode == 'download':
        deep_download(pipeline_config_path)

    multi_instance = args.multi_instance
    stateful = args.stateful

    start_epoch_num = args.start_epoch_num

    if args.mode == 'train':
        train_evaluate_model_from_config(pipeline_config_path, recursive=args.recursive, 
                                         start_epoch_num=start_epoch_num)
    elif args.mode == 'evaluate':
        train_evaluate_model_from_config(pipeline_config_path, to_train=False, to_validate=False,
                                         start_epoch_num=start_epoch_num)
    elif args.mode == 'interact':
        interact_model(pipeline_config_path)
    elif args.mode == 'interactbot':
        token = args.token
        interact_model_by_telegram(pipeline_config_path, token)
    elif args.mode == 'interactmsbot':
        ms_id = args.ms_id
        ms_secret = args.ms_secret
        run_ms_bf_default_agent(model_config=pipeline_config_path,
                                app_id=ms_id,
                                app_secret=ms_secret,
                                multi_instance=multi_instance,
                                stateful=stateful,
                                port=args.port)
    elif args.mode == 'alexa':
        run_alexa_default_agent(model_config=pipeline_config_path,
                                multi_instance=multi_instance,
                                stateful=stateful,
                                port=args.port,
                                https=https,
                                ssl_key=ssl_key,
                                ssl_cert=ssl_cert)
    elif args.mode == 'riseapi':
        alice = args.api_mode == 'alice'
        if alice:
            start_alice_server(pipeline_config_path, https, ssl_key, ssl_cert, port=args.port)
        else:
            start_model_server(pipeline_config_path, https, ssl_key, ssl_cert, port=args.port)
    elif args.mode == 'predict':
        predict_on_stream(pipeline_config_path, args.batch_size, args.file_path)
    elif args.mode == 'install':
        install_from_config(pipeline_config_path)
    elif args.mode == 'crossval':
        if args.folds < 2:
            log.error('Minimum number of Folds is 2')
        else:
            n_folds = args.folds
            calc_cv_score(pipeline_config_path, n_folds=n_folds, is_loo=False)
Ejemplo n.º 3
0
def main():
    args = parser.parse_args()
    pipeline_config_path = find_config(args.config_path)

    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_id
    log.info("use gpu id:" + args.gpu_id)

    if args.download or args.mode == 'download':
        deep_download(pipeline_config_path)

    multi_instance = args.multi_instance
    stateful = args.stateful

    start_epoch_num = args.start_epoch_num

    if args.mode == 'train':
        train_evaluate_model_from_config(pipeline_config_path,
                                         recursive=args.recursive,
                                         start_epoch_num=start_epoch_num)
    elif args.mode == 'evaluate':
        train_evaluate_model_from_config(pipeline_config_path,
                                         to_train=False,
                                         to_validate=False,
                                         start_epoch_num=start_epoch_num)
    elif args.mode == 'interact':
        interact_model(pipeline_config_path)
    elif args.mode == 'interactbot':
        token = args.token
        interact_model_by_telegram(pipeline_config_path, token)
    elif args.mode == 'interactmsbot':
        ms_id = args.ms_id
        ms_secret = args.ms_secret
        run_ms_bf_default_agent(model_config=pipeline_config_path,
                                app_id=ms_id,
                                app_secret=ms_secret,
                                multi_instance=multi_instance,
                                stateful=stateful)
    elif args.mode == 'riseapi':
        alice = args.api_mode == 'alice'
        https = args.https
        ssl_key = args.key
        ssl_cert = args.cert
        if alice:
            start_alice_server(pipeline_config_path, https, ssl_key, ssl_cert)
        else:
            start_model_server(pipeline_config_path, https, ssl_key, ssl_cert)
    elif args.mode == 'predict':
        predict_on_stream(pipeline_config_path, args.batch_size,
                          args.file_path)
    elif args.mode == 'install':
        install_from_config(pipeline_config_path)
    elif args.mode == 'crossval':
        if args.folds < 2:
            log.error('Minimum number of Folds is 2')
        else:
            n_folds = args.folds
            calc_cv_score(pipeline_config_path, n_folds=n_folds, is_loo=False)
Ejemplo n.º 4
0
def calc_cv_score(config=None,
                  pipeline_config_path=None,
                  data=None,
                  n_folds=5,
                  is_loo=False):
    if config is None:
        if pipeline_config_path is not None:
            config = read_json(pipeline_config_path)
        else:
            raise ValueError(
                'Both \"config\" and \"pipeline_config_path\" are None')

    if data is None:
        data = read_data_by_config(config)

    config, dirs_for_saved_models = change_savepath_for_model(config)

    target_metrics = config['train']['metrics']
    cv_score = OrderedDict((k, []) for k in target_metrics)
    for data_i in generate_train_valid(data, n_folds=n_folds, is_loo=is_loo):
        iterator = get_iterator_from_config(config, data_i)
        create_dirs_to_save_models(dirs_for_saved_models)
        score = train_evaluate_model_from_config(config, iterator=iterator)
        delete_dir_for_saved_models(dirs_for_saved_models)
        for key, value in score['valid'].items():
            cv_score[key].append(value)

    for key, value in cv_score.items():
        cv_score[key] = np.mean(value)
        log.info('Cross-Validation \"{}\" is: {}'.format(key, cv_score[key]))

    return cv_score
Ejemplo n.º 5
0
def main():
    args = parser.parse_args()
    pipeline_config_path = find_config(args.config_path)
    if args.download or args.mode == 'download':
        deep_download(['-c', pipeline_config_path])
    token = args.token or os.getenv('TELEGRAM_TOKEN')

    if args.mode == 'train':
        train_evaluate_model_from_config(pipeline_config_path)
    elif args.mode == 'evaluate':
        train_evaluate_model_from_config(pipeline_config_path, to_train=False, to_validate=False)
    elif args.mode == 'interact':
        interact_model(pipeline_config_path)
    elif args.mode == 'interactbot':
        if not token:
            log.error('Token required: initiate -t param or TELEGRAM_BOT env var with Telegram bot token')
        else:
            interact_model_by_telegram(pipeline_config_path, token)
    elif args.mode == 'riseapi':
        start_model_server(pipeline_config_path)
    elif args.mode == 'predict':
        predict_on_stream(pipeline_config_path, args.batch_size, args.file_path)
    elif args.mode == 'install':
        install_from_config(pipeline_config_path)
Ejemplo n.º 6
0
def calc_cv_score(config, data=None, n_folds=5, is_loo=False):
    config = parse_config(config)

    if data is None:
        data = read_data_by_config(config)

    config, dirs_for_saved_models = change_savepath_for_model(config)

    cv_score = OrderedDict()
    for data_i in generate_train_valid(data, n_folds=n_folds, is_loo=is_loo):
        iterator = get_iterator_from_config(config, data_i)
        create_dirs_to_save_models(dirs_for_saved_models)
        score = train_evaluate_model_from_config(config, iterator=iterator)
        delete_dir_for_saved_models(dirs_for_saved_models)
        for key, value in score['valid'].items():
            if key not in cv_score:
                cv_score[key] = []
            cv_score[key].append(value)

    for key, value in cv_score.items():
        cv_score[key] = np.mean(value)
        log.info('Cross-Validation \"{}\" is: {}'.format(key, cv_score[key]))

    return cv_score
Ejemplo n.º 7
0
def calc_cv_score(config, data=None, n_folds=5, is_loo=False):
    config = parse_config(config)

    if data is None:
        data = read_data_by_config(config)

    config, dirs_for_saved_models = change_savepath_for_model(config)

    cv_score = OrderedDict()
    for data_i in generate_train_valid(data, n_folds=n_folds, is_loo=is_loo):
        iterator = get_iterator_from_config(config, data_i)
        create_dirs_to_save_models(dirs_for_saved_models)
        score = train_evaluate_model_from_config(config, iterator=iterator)
        delete_dir_for_saved_models(dirs_for_saved_models)
        for key, value in score['valid'].items():
            if key not in cv_score:
                cv_score[key] = []
            cv_score[key].append(value)

    for key, value in cv_score.items():
        cv_score[key] = np.mean(value)
        log.info('Cross-Validation \"{}\" is: {}'.format(key, cv_score[key]))

    return cv_score
Ejemplo n.º 8
0
def main():
    args = parser.parse_args()

    pipeline_config_path = find_config(args.config_path)
    https = args.https
    ssl_key = args.key
    ssl_cert = args.cert

    if args.download or args.mode == 'download':
        deep_download(pipeline_config_path)

    multi_instance = args.multi_instance
    stateful = args.stateful

    if args.mode == 'train':
        train_evaluate_model_from_config(pipeline_config_path,
                                         recursive=args.recursive,
                                         start_epoch_num=args.start_epoch_num)
    elif args.mode == 'evaluate':
        train_evaluate_model_from_config(pipeline_config_path,
                                         to_train=False,
                                         start_epoch_num=args.start_epoch_num)
    elif args.mode == 'interact':
        interact_model(pipeline_config_path)
    elif args.mode == 'interactbot':
        token = args.token
        interact_model_by_telegram(
            model_config=pipeline_config_path,
            token=token,
            default_skill_wrap=not args.no_default_skill)
    elif args.mode == 'interactmsbot':
        ms_id = args.ms_id
        ms_secret = args.ms_secret
        run_ms_bf_default_agent(model_config=pipeline_config_path,
                                app_id=ms_id,
                                app_secret=ms_secret,
                                multi_instance=multi_instance,
                                stateful=stateful,
                                port=args.port,
                                https=https,
                                ssl_key=ssl_key,
                                ssl_cert=ssl_cert,
                                default_skill_wrap=not args.no_default_skill)
    elif args.mode == 'alexa':
        run_alexa_default_agent(model_config=pipeline_config_path,
                                multi_instance=multi_instance,
                                stateful=stateful,
                                port=args.port,
                                https=https,
                                ssl_key=ssl_key,
                                ssl_cert=ssl_cert,
                                default_skill_wrap=not args.no_default_skill)
    elif args.mode == 'riseapi':
        alice = args.api_mode == 'alice'
        if alice:
            start_alice_server(pipeline_config_path,
                               https,
                               ssl_key,
                               ssl_cert,
                               port=args.port)
        else:
            start_model_server(pipeline_config_path,
                               https,
                               ssl_key,
                               ssl_cert,
                               port=args.port)
    elif args.mode == 'predict':
        predict_on_stream(pipeline_config_path, args.batch_size,
                          args.file_path)
    elif args.mode == 'install':
        install_from_config(pipeline_config_path)
    elif args.mode == 'crossval':
        if args.folds < 2:
            log.error('Minimum number of Folds is 2')
        else:
            n_folds = args.folds
            calc_cv_score(pipeline_config_path, n_folds=n_folds, is_loo=False)
Ejemplo n.º 9
0
      {
        "ref": "vocab",
        "in": ["y_predicted_tokens_ids"],
        "out": ["y_predicted_tokens"]
      },
      {
        "name": "postprocessing",
        "in": ["y_predicted_tokens"],
        "out": ["y_predicted_tokens"]
      }
    ],
    "out": ["y_predicted_tokens"]
  },
  "train": {
    "log_every_n_batches": 100,
    "val_every_n_epochs":0,
    "batch_size": 64,
    "validation_patience": 0,
    "epochs": 20,
    "metrics": ["bleu"],
  }
}

print("Before building from config")
model = build_model_from_config(config)
model(['Hi, how are you?', 'Any ideas my dear friend?'])
json.dump(config, open('seq2seq.json', 'w'))
train_evaluate_model_from_config('seq2seq.json')

model = build_model_from_config(config)
model(['hi, how are you?', 'any ideas my dear friend?', 'okay, i agree with you', 'good bye!'])
Ejemplo n.º 10
0
# -*- coding: utf-8 -*-
import telebot as telebot
from telebot import apihelper
from deeppavlov import configs, train_model
from deeppavlov.core.common.file import read_json
from deeppavlov.core.commands.infer import build_model
from deeppavlov.core.commands.train import train_evaluate_model_from_config

print("import successful")
far = train_evaluate_model_from_config("./config.json")
faq = build_model("./config.json", download=True)
model_config = read_json("./config.json")
model_config["dataset_reader"]["data_path"] = "./faq_school_en.csv"
model_config["dataset_reader"]["data_url"] = None
faq = train_model(model_config)
print("train model")
bot = telebot.TeleBot('301914397:AAEmR8WlfzyxQT53zdpqHrSwR8iwaKEr-h8')


def GetAnswer(question):
    print("get question")
    return faq([question])[0][0][0]


@bot.message_handler(content_types=['text'])
def get_text_messages(message):
    print("text handler")
    if message.text == "Привет":
        bot.send_message(message.from_user.id,
                         "Привет, чем я могу тебе помочь?")
    elif message.text == "/help":
Ejemplo n.º 11
0
from deeppavlov import configs
from deeppavlov.core.commands.infer import build_model
from deeppavlov.core.commands.train import train_evaluate_model_from_config

if __name__ == '__main__':
    train_evaluate_model_from_config(
        configs.doc_retrieval.en_ranker_tfidf_wiki, download=True)
    train_evaluate_model_from_config(configs.squad.multi_squad_noans,
                                     download=True)
    odqa = build_model(configs.odqa.en_odqa_infer_wiki, load_trained=True)

    result = odqa(['What is the name of Darth Vader\'s son?'])
    print(result)
Ejemplo n.º 12
0
def main():
    params_helper = ParamsSearch()

    args = parser.parse_args()
    is_loo = False
    n_folds = None
    if args.folds == 'loo':
        is_loo = True
    elif args.folds is None:
        n_folds = None
    elif args.folds.isdigit():
        n_folds = int(args.folds)
    else:
        raise NotImplementedError('Not implemented this type of CV')

    # read config
    pipeline_config_path = find_config(args.config_path)
    config_init = read_json(pipeline_config_path)
    config = parse_config(config_init)
    data = read_data_by_config(config)
    target_metric = parse_config(config_init)['train']['metrics'][0]
    if isinstance(target_metric, dict):
        target_metric = target_metric['name']

    # get all params for search
    param_paths = list(params_helper.find_model_path(config, 'search_choice'))
    param_values = []
    param_names = []
    for path in param_paths:
        value = params_helper.get_value_from_config(config, path)
        param_name = path[-1]
        param_value_search = value['search_choice']
        param_names.append(param_name)
        param_values.append(param_value_search)

    # find optimal params
    if args.search_type == 'grid':
        # generate params combnations for grid search
        combinations = list(product(*param_values))

        # calculate cv scores
        scores = []
        for comb in combinations:
            config = deepcopy(config_init)
            for param_path, param_value in zip(param_paths, comb):
                params_helper.insert_value_or_dict_into_config(
                    config, param_path, param_value)
            config = parse_config(config)

            if (n_folds is not None) | is_loo:
                # CV for model evaluation
                score_dict = calc_cv_score(config,
                                           data=data,
                                           n_folds=n_folds,
                                           is_loo=is_loo)
                score = score_dict[next(iter(score_dict))]
            else:
                # train/valid for model evaluation
                data_to_evaluate = data.copy()
                if len(data_to_evaluate['valid']) == 0:
                    data_to_evaluate['train'], data_to_evaluate[
                        'valid'] = train_test_split(data_to_evaluate['train'],
                                                    test_size=0.2)
                iterator = get_iterator_from_config(config, data_to_evaluate)
                score = train_evaluate_model_from_config(
                    config, iterator=iterator)['valid'][target_metric]

            scores.append(score)

        # get model with best score
        best_params_dict = get_best_params(combinations, scores, param_names,
                                           target_metric)
        log.info('Best model params: {}'.format(best_params_dict))
    else:
        raise NotImplementedError('Not implemented this type of search')

    # save config
    best_config = config_init
    for i, param_name in enumerate(best_params_dict.keys()):
        if param_name != target_metric:
            params_helper.insert_value_or_dict_into_config(
                best_config, param_paths[i], best_params_dict[param_name])

    best_model_filename = pipeline_config_path.with_suffix('.cvbest.json')
    save_json(best_config, best_model_filename)
    log.info('Best model saved in json-file: {}'.format(best_model_filename))
Ejemplo n.º 13
0
# from deeppavlov.core.commands.train import train_evaluate_model_from_config

# import importlib.util
# deeppavlov_spec = importlib.util.spec_from_file_location("deeppavlov", "/home/anton/DeepPavlov/deeppavlov/__init__.py")
# deeppavlov = importlib.util.module_from_spec(deeppavlov_spec)
# deeppavlov_spec.loader.exec_module(deeppavlov)
#
# train_spec = importlib.util.spec_from_file_location(
#     "deeppavlov", "/home/anton/DeepPavlov/deeppavlov/core/commands/train.py")
# train = importlib.util.module_from_spec(train_spec)
# train_spec.loader.exec_module(train)

import sys

sys.path.append('/home/anton/DeepPavlov')
if '/home/anton/dpenv/src/deeppavlov' in sys.path:
    sys.path.remove('/home/anton/dpenv/src/deeppavlov')

import one_str_lm_reader, one_str_lm_iterator, char_lm_vocab, lstm
from deeppavlov.core.commands.train import train_evaluate_model_from_config

train_evaluate_model_from_config('lstm_config.json')
Ejemplo n.º 14
0
def main():
    args = parser.parse_args()
    pipeline_config_path = find_config(args.config_path)

    if args.download or args.mode == 'download':
        deep_download(pipeline_config_path)

    if args.mode == 'train':
        train_evaluate_model_from_config(pipeline_config_path,
                                         recursive=args.recursive,
                                         start_epoch_num=args.start_epoch_num)
    elif args.mode == 'evaluate':
        train_evaluate_model_from_config(pipeline_config_path,
                                         to_train=False,
                                         start_epoch_num=args.start_epoch_num)
    elif args.mode == 'interact':
        interact_model(pipeline_config_path)
    elif args.mode == 'telegram':
        interact_model_by_telegram(model_config=pipeline_config_path,
                                   token=args.token)
    elif args.mode == 'msbot':
        start_ms_bf_server(model_config=pipeline_config_path,
                           app_id=args.ms_id,
                           app_secret=args.ms_secret,
                           port=args.port,
                           https=args.https,
                           ssl_key=args.key,
                           ssl_cert=args.cert)
    elif args.mode == 'alexa':
        start_alexa_server(model_config=pipeline_config_path,
                           port=args.port,
                           https=args.https,
                           ssl_key=args.key,
                           ssl_cert=args.cert)
    elif args.mode == 'alice':
        start_alice_server(model_config=pipeline_config_path,
                           port=args.port,
                           https=args.https,
                           ssl_key=args.key,
                           ssl_cert=args.cert)
    elif args.mode == 'riseapi':
        start_model_server(pipeline_config_path,
                           args.https,
                           args.key,
                           args.cert,
                           port=args.port)
    elif args.mode == 'risesocket':
        start_socket_server(pipeline_config_path,
                            args.socket_type,
                            port=args.port,
                            socket_file=args.socket_file)
    elif args.mode == 'agent-rabbit':
        start_rabbit_service(model_config=pipeline_config_path,
                             service_name=args.service_name,
                             agent_namespace=args.agent_namespace,
                             batch_size=args.batch_size,
                             utterance_lifetime_sec=args.utterance_lifetime,
                             rabbit_host=args.rabbit_host,
                             rabbit_port=args.rabbit_port,
                             rabbit_login=args.rabbit_login,
                             rabbit_password=args.rabbit_password,
                             rabbit_virtualhost=args.rabbit_virtualhost)
    elif args.mode == 'predict':
        predict_on_stream(pipeline_config_path, args.batch_size,
                          args.file_path)
    elif args.mode == 'install':
        install_from_config(pipeline_config_path)
    elif args.mode == 'crossval':
        if args.folds < 2:
            log.error('Minimum number of Folds is 2')
        else:
            calc_cv_score(pipeline_config_path,
                          n_folds=args.folds,
                          is_loo=False)
Ejemplo n.º 15
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu May 10 16:28:38 2018

@author: lsm
"""

from deeppavlov.core.commands.train import train_evaluate_model_from_config
from deeppavlov.core.commands.utils import expand_path, set_deeppavlov_root
from deeppavlov.core.common.file import read_json
from model.pipeline.text_normalizer import *
from model.pipeline.embedder import *
from model.pipeline.CNN_model import *

config = read_json('model/subs/pay/pay_config.json')
set_deeppavlov_root(config)
train_evaluate_model_from_config('model/subs/pay/pay_config.json')
Ejemplo n.º 16
0
from deeppavlov.deep import find_config
from deeppavlov.core.commands.train import train_evaluate_model_from_config
from deeppavlov.core.commands.infer import interact_model

# PIPELINE_CONFIG_PATH = 'configs/intents/intents_dstc2.json'
# PIPELINE_CONFIG_PATH = 'configs/intents/intents_snips.json'
# PIPELINE_CONFIG_PATH = 'configs/ner/ner_dstc2.json'
# PIPELINE_CONFIG_PATH = 'configs/ner/ner_rus.json'
# PIPELINE_CONFIG_PATH = 'configs/ner/slotfill_dstc2.json'
# PIPELINE_CONFIG_PATH = 'configs/error_model/brillmoore_wikitypos_en.json'
# PIPELINE_CONFIG_PATH = 'configs/error_model/brillmoore_kartaslov_ru.json'
# PIPELINE_CONFIG_PATH = 'configs/error_model/levenshtein_searcher.json'
# PIPELINE_CONFIG_PATH = 'configs/go_bot/config.json'
# PIPELINE_CONFIG_PATH = 'configs/go_bot/config_minimal.json'
# PIPELINE_CONFIG_PATH = 'configs/go_bot/config_all.json'
# PIPELINE_CONFIG_PATH = 'configs/squad/squad.json'
# PIPELINE_CONFIG_PATH = 'configs/ranking/ranking_insurance.json'
# PIPELINE_CONFIG_PATH = 'configs/seq2seq_go_bot/bot_kvret.json'
# PIPELINE_CONFIG_PATH = 'configs/odqa/en_ranker_prod.json'
# PIPELINE_CONFIG_PATH = 'configs/odqa/ru_ranker_prod.json'
# PIPELINE_CONFIG_PATH = 'configs/odqa/en_odqa_infer_prod.json'
# PIPELINE_CONFIG_PATH = 'configs/odqa/ru_odqa_infer_prod.json'
# PIPELINE_CONFIG_PATH = 'configs/odqa/ranker_test.json'
# PIPELINE_CONFIG_PATH = find_config('morpho_ru_syntagrus_train')
PIPELINE_CONFIG_PATH = find_config('morpho_ru_syntagrus_train_pymorphy')

if __name__ == '__main__':
    train_evaluate_model_from_config(PIPELINE_CONFIG_PATH)
    # interact_model(PIPELINE_CONFIG_PATH)
Ejemplo n.º 17
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu May 10 16:28:38 2018

@author: lsm
"""

from deeppavlov.core.commands.train import train_evaluate_model_from_config
from deeppavlov.core.commands.utils import expand_path, set_deeppavlov_root
from deeppavlov.core.common.file import read_json
import sys
sys.path.insert(0, '../..')
from model.pipeline.text_normalizer import *
from model.pipeline.embedder import *
from model.pipeline.CNN_model import *

config = read_json('/subs/deliver/deliver_config.json')
set_deeppavlov_root(config)
train_evaluate_model_from_config('model/subs/deliver/deliver_config.json')
Ejemplo n.º 18
0
def main():
    args = parser.parse_args()
    pipeline_config_path = find_config(args.config_path)

    if args.download or args.mode == 'download':
        deep_download(['-c', pipeline_config_path])
    token = args.token or os.getenv('TELEGRAM_TOKEN')

    ms_id = args.ms_id or os.getenv('MS_APP_ID')
    ms_secret = args.ms_secret or os.getenv('MS_APP_SECRET')

    multi_instance = args.multi_instance
    stateful = args.stateful

    if args.mode == 'train':
        train_evaluate_model_from_config(pipeline_config_path)
    elif args.mode == 'evaluate':
        train_evaluate_model_from_config(pipeline_config_path,
                                         to_train=False,
                                         to_validate=False)
    elif args.mode == 'interact':
        interact_model(pipeline_config_path)
    elif args.mode == 'interactbot':
        if not token:
            log.error(
                'Token required: initiate -t param or TELEGRAM_BOT env var with Telegram bot token'
            )
        else:
            interact_model_by_telegram(pipeline_config_path, token)
    elif args.mode == 'interactmsbot':
        if not ms_id:
            log.error(
                'Microsoft Bot Framework app id required: initiate -i param '
                'or MS_APP_ID env var with Microsoft app id')
        elif not ms_secret:
            log.error(
                'Microsoft Bot Framework app secret required: initiate -s param '
                'or MS_APP_SECRET env var with Microsoft app secret')
        else:
            run_ms_bf_default_agent(model_config_path=pipeline_config_path,
                                    app_id=ms_id,
                                    app_secret=ms_secret,
                                    multi_instance=multi_instance,
                                    stateful=stateful)
    elif args.mode == 'riseapi':
        alice = args.api_mode == 'alice'
        https = args.https
        ssl_key = args.key
        ssl_cert = args.cert
        start_model_server(pipeline_config_path, alice, https, ssl_key,
                           ssl_cert)
    elif args.mode == 'predict':
        predict_on_stream(pipeline_config_path, args.batch_size,
                          args.file_path)
    elif args.mode == 'install':
        install_from_config(pipeline_config_path)
    elif args.mode == 'crossval':
        if args.folds < 2:
            log.error('Minimum number of Folds is 2')
        else:
            n_folds = args.folds
            calc_cv_score(pipeline_config_path=pipeline_config_path,
                          n_folds=n_folds,
                          is_loo=False)
    def train(self,
              model_level,
              model_name,
              path_to_data,
              path_to_config,
              path_to_global_embeddings,
              test_size=0.15,
              aug_method='word_dropout',
              samples_per_class=None,
              class_names=None,
              path_to_save_file=None,
              path_to_resulting_file=None):
        # preparing training/testing data
        df_raw = pd.read_csv(path_to_data)
        # preparing config
        config = read_json(path_to_config)

        if 'labels' not in df_raw or 'text' not in df_raw:
            raise InvalidDataFormatError(
                '\'labels\' and \'text\' columns must be in the dataframe')

        if model_level not in ['root', 'subs']:
            raise InvalidModelLevelError(
                'model level should be either \'root\' or \'subs\'')

        __df_train, df_test, _, _ = train_test_split(df_raw,
                                                     df_raw,
                                                     test_size=test_size)
        df_train, df_val, _, _ = train_test_split(__df_train,
                                                  __df_train,
                                                  test_size=test_size)

        if aug_method not in ['word_dropout', 'duplicate']:
            raise InvalidDataAugmentationMethodError(
                '\'aug_method\' should be  \'word_dropout\' or \'duplicate\'')

        df_train_equalized = self.__data_equalizer.equalize_classes(
            df_train, samples_per_class, aug_method)

        model_path = config['model_path']

        if not os.path.isdir(model_path):
            os.mkdir(model_path)
        if not os.path.isdir(model_path + 'data/'):
            os.mkdir(model_path + 'data/')
        df_train_equalized.to_csv(model_path + 'data/train.csv')
        df_val[['text',
                'labels']].sample(frac=1).to_csv(model_path + 'data/valid.csv')
        df_test[['text',
                 'labels']].sample(frac=1).to_csv(model_path + 'df_test.csv')

        # making embeddings
        emb_len = IntentsClassifier.get_config_element_by_name(
            config=config['chainer']['pipe'], name='embedder')['emb_len']
        eb = EmbeddingsBuilder(
            resulting_dim=emb_len,
            path_to_original_embeddings=path_to_global_embeddings)
        tc = TextCorrector()
        corpus_cleaned = tc.tn.transform(df_raw.text.tolist())
        if not os.path.isfile(model_path + 'ft_compressed.pkl'):
            eb.compress_embeddings(corpus_cleaned,
                                   model_path + 'ft_compressed.pkl', 'pca',
                                   eb.path_to_original_embeddings)
        gc.collect()
        if not os.path.isfile(model_path + 'ft_compressed_local.pkl'):
            eb.build_local_embeddings(corpus_cleaned,
                                      model_path + 'ft_compressed_local.pkl')
        # dealing with class_names
        if type(class_names) == list:
            pickle.dump(class_names, open(model_path + 'class_names.pkl',
                                          'wb'))
        else:
            pickle.dump(df_train['labels'].value_counts().index.tolist(),
                        open(model_path + 'class_names.pkl', 'wb'))
        # setting up saving and loading
        if not path_to_save_file == None:
            config['chainer']['pipe'][-1][
                'save_path'] = path_to_save_file + '/' + 'weights.hdf5'
        if not os.path.isdir(
                path_to_save_file) and not path_to_save_file == None:
            os.mkdir(path_to_save_file)

        if not os.path.isdir(
                path_to_resulting_file) and not path_to_resulting_file == None:
            os.mkdir(path_to_resulting_file)
        emb_config = IntentsClassifier.get_config_element_by_name(
            config['chainer']['pipe'], 'embedder')
        cnn_config = IntentsClassifier.get_config_element_by_name(
            config['chainer']['pipe'], 'cnn_model')
        config['chainer']['pipe'][config['chainer']['pipe'].index(emb_config)][
            'load_path'][0] = model_path + config['chainer']['pipe'][
                config['chainer']['pipe'].index(emb_config)]['load_path'][0]
        config['chainer']['pipe'][config['chainer']['pipe'].index(emb_config)][
            'load_path'][1] = model_path + config['chainer']['pipe'][
                config['chainer']['pipe'].index(emb_config)]['load_path'][1]
        config['chainer']['pipe'][config['chainer']['pipe'].index(
            cnn_config)]['classes'] = model_path + config['chainer']['pipe'][
                config['chainer']['pipe'].index(cnn_config)]['classes']
        config['dataset_reader'][
            'data_path'] = model_path + config['dataset_reader']['data_path']
        config['train']['tensorboard_log_dir'] = model_path + config['train'][
            'tensorboard_log_dir']
        load_path_bckp = config['chainer']['pipe'][-1]['load_path']
        check_results = self.check_config(config)
        if len(check_results) > 0:
            raise InvalidConfig(check_results, model_path,
                                'Config file is invalid')

        # training
        set_deeppavlov_root(config)
        # update training status
        training_status = 'Classification model {} {} is currently training. Total number of epochs is set to {}'.format(
            model_level, model_name, config['train']['epochs'])
        with open(model_path + 'status.txt', 'w') as f:
            f.writelines(training_status)
        # fukken training
        train_evaluate_model_from_config(config)
        # fixing load_path
        # updating status
        perf = IntentsClassifier.get_latest_accuracy(
            config)  #self.get_performance(config, model_path + 'df_test.csv')
        training_status = 'Classification model {} {} is trained \nf1_score (macro avg): {}'.format(
            model_level, model_name, perf)
        with open(model_path + 'status.txt', 'w') as f:
            f.writelines(training_status)
        # getting performance
        config['chainer']['pipe'][-1]['load_path'] = load_path_bckp
        copy(
            path_to_save_file + '/' + 'weights.hdf5', path_to_resulting_file +
            '/' + config['chainer']['pipe'][-1]['load_path'])
        copy(path_to_save_file + '/' + 'weights.hdf5',
             model_path + config['chainer']['pipe'][-1]['load_path'])
Ejemplo n.º 20
0
    },
    'main': True
}
# chainer.pipe: a list of consequently run components
vocab_config['chainer']['pipe'] = [vocab_comp_config]

json.dump(vocab_config, open("gobot/vocab_config.json", 'wt'))
""" Download "dstc2_v2" dataset, need to do only once """
deep_download(['--config', 'gobot/vocab_config.json'])
dstc2_path = deeppavlov.__path__[
    0] + '/../download/dstc2'  # Data was downloaded to dstc2_path
"""
Step 3: Vocabulary Building
"""

train_evaluate_model_from_config("gobot/vocab_config.json")

vocabs_path = deeppavlov.__path__[0] + '/../download/vocabs'
vocab_comp_config['in'] = ['utterance']
vocab_comp_config['out'] = ['utterance_token_indices']

vocab_config['chainer']['pipe'] = [vocab_comp_config]
vocab_config['chainer']['out'] = ['utterance_token_indices']
"""
Step 4: Gobot Configurations
"""

db_config = {}
"""dataset_reader, dataset_iterator and metadata will be the same as for vocabulary only"""
db_config['dataset_reader'] = dstc2_reader_comp_config
db_config['dataset_iterator'] = dialog_iterator_comp_config