model2.save_weights(weights_path) # Для удобства сохраним конфигурацию модели with open(config_path, 'w') as f: model_config = { 'word2vector_path': word2vector_path, 'word_dims': word_dims, 'max_phrase_len': max_phrase_len, 'encoder_dim': encoder_dim } json.dump(model_config, f) # Теперь грузим данные для оценочной задачи eval_data = EvaluationDataset(max_phrase_len, tokenizer) eval_data.load(data_folder) nb_good = 0 # попадание предпосылки в top-1 nb_good5 = 0 nb_good10 = 0 nb_total = 0 for irecord, phrases in eval_data.generate_groups(): nb_samples = len(phrases) X_data = np.zeros((nb_samples * 2, max_phrase_len, word_dims), dtype='float32') for irow, (premise_words, question_words) in enumerate(phrases): for iword, word in enumerate(premise_words[:max_phrase_len]): if word in w2v: X_data[irow * 2, iword, :] = norm(w2v[word])
'model': 'lightgbm', 'shingle2id': shingle2id, 'model_filename': model_filename, 'shingle_len': shingle_len, 'nb_features': nb_features, 'lemmatize': lemmatize } with open(os.path.join(tmp_folder, config_filename), 'w') as f: json.dump(model_config, f, indent=4) if use_hyperopt: ho_model_config = model_config ho_eval_data = EvaluationDataset(0, tokenizer, 'none') ho_eval_data.load(data_folder) space = { 'num_leaves': hp.quniform('num_leaves', 20, 100, 1), 'min_data_in_leaf': hp.quniform('min_data_in_leaf', 5, 100, 1), 'feature_fraction': hp.uniform('feature_fraction', 0.75, 1.0), 'bagging_fraction': hp.uniform('bagging_fraction', 0.75, 1.0), 'learning_rate': hp.loguniform('learning_rate', -2, -1.2), 'min_sum_hessian_in_leaf': hp.loguniform('min_sum_hessian_in_leaf', 0, 2.3), }