예제 #1
0
model2.save_weights(weights_path)

# Для удобства сохраним конфигурацию модели
with open(config_path, 'w') as f:
    model_config = {
        'word2vector_path': word2vector_path,
        'word_dims': word_dims,
        'max_phrase_len': max_phrase_len,
        'encoder_dim': encoder_dim
    }
    json.dump(model_config, f)

# Теперь грузим данные для оценочной задачи
eval_data = EvaluationDataset(max_phrase_len, tokenizer)
eval_data.load(data_folder)

nb_good = 0  # попадание предпосылки в top-1
nb_good5 = 0
nb_good10 = 0
nb_total = 0

for irecord, phrases in eval_data.generate_groups():
    nb_samples = len(phrases)
    X_data = np.zeros((nb_samples * 2, max_phrase_len, word_dims),
                      dtype='float32')

    for irow, (premise_words, question_words) in enumerate(phrases):
        for iword, word in enumerate(premise_words[:max_phrase_len]):
            if word in w2v:
                X_data[irow * 2, iword, :] = norm(w2v[word])
예제 #2
0
        'model': 'lightgbm',
        'shingle2id': shingle2id,
        'model_filename': model_filename,
        'shingle_len': shingle_len,
        'nb_features': nb_features,
        'lemmatize': lemmatize
    }

    with open(os.path.join(tmp_folder, config_filename), 'w') as f:
        json.dump(model_config, f, indent=4)

    if use_hyperopt:
        ho_model_config = model_config

        ho_eval_data = EvaluationDataset(0, tokenizer, 'none')
        ho_eval_data.load(data_folder)

        space = {
            'num_leaves':
            hp.quniform('num_leaves', 20, 100, 1),
            'min_data_in_leaf':
            hp.quniform('min_data_in_leaf', 5, 100, 1),
            'feature_fraction':
            hp.uniform('feature_fraction', 0.75, 1.0),
            'bagging_fraction':
            hp.uniform('bagging_fraction', 0.75, 1.0),
            'learning_rate':
            hp.loguniform('learning_rate', -2, -1.2),
            'min_sum_hessian_in_leaf':
            hp.loguniform('min_sum_hessian_in_leaf', 0, 2.3),
        }