def test_qaranker_local_integration(self): relations = Relations.read(self.qa_path + "/relations.txt") assert len(relations) == 4 text_set = TextSet.read_csv(self.qa_path + "/question_corpus.csv") assert text_set.get_uris() == ["Q1", "Q2"] transformed = text_set.tokenize().normalize().word2idx( ).shape_sequence(5) relation_pairs = TextSet.from_relation_pairs(relations, transformed, transformed) pair_samples = relation_pairs.get_samples() assert len(pair_samples) == 2 for sample in pair_samples: assert list(sample.feature.shape) == [2, 10] assert np.allclose(sample.label.to_ndarray(), np.array([[1.0], [0.0]])) relation_lists = TextSet.from_relation_lists(relations, transformed, transformed) relation_samples = relation_lists.get_samples() assert len(relation_samples) == 2 for sample in relation_samples: assert list(sample.feature.shape) == [2, 10] assert list(sample.label.shape) == [2, 1] knrm = KNRM(5, 5, self.glove_path, word_index=transformed.get_word_index()) model = Sequential().add(TimeDistributed(knrm, input_shape=(2, 10))) model.compile("sgd", "rank_hinge") model.fit(relation_pairs, batch_size=2, nb_epoch=2) print(knrm.evaluate_ndcg(relation_lists, 3)) print(knrm.evaluate_map(relation_lists))
def test_with_keras(self): kmodel = self.keras_knrm(5, 10, 22, 50) input_data = np.random.randint(20, size=(4, 15)) koutput = kmodel.predict([input_data[:, :5], input_data[:, 5:]]) kweights = kmodel.get_weights() bweights = [kweights[0], np.transpose(kweights[1]), kweights[2]] model = KNRM(5, 10, glove_path) model.set_weights(bweights) output = model.forward(input_data) self.assert_allclose(output, koutput)
def test_forward_backward(self): weights = np.random.random([40, 20]) model = KNRM(15, 60, 40, 20, embed_weights=weights, kernel_num=10, sigma=0.15, exact_sigma=1e-4) input_data = np.random.randint(40, size=(1, 75)) self.assert_forward_backward(model, input_data)
def test_save_load(self): model = KNRM(5, 10, glove_path) input_data = np.random.randint(20, size=(3, 15)) self.assert_zoo_model_save_load(model, input_data)
def test_forward_backward(self): model = KNRM(15, 60, glove_path, word_index={"is": 1, "to": 2, "the": 3, "for": 4}, kernel_num=10, sigma=0.15, exact_sigma=1e-4) input_data = np.random.randint(5, size=(1, 75)) self.assert_forward_backward(model, input_data)
sc, int(options.partition_num)).tokenize().normalize()\ .word2idx(min_freq=2).shape_sequence(int(options.question_length)) a_set = TextSet.read_csv(options.data_path+"/answer_corpus.csv", sc, int(options.partition_num)).tokenize().normalize()\ .word2idx(min_freq=2, existing_map=q_set.get_word_index())\ .shape_sequence(int(options.answer_length)) train_relations = Relations.read(options.data_path + "/relation_train.csv", sc, int(options.partition_num)) train_set = TextSet.from_relation_pairs(train_relations, q_set, a_set) validate_relations = Relations.read(options.data_path + "/relation_valid.csv", sc, int(options.partition_num)) validate_set = TextSet.from_relation_lists(validate_relations, q_set, a_set) if options.model: knrm = KNRM.load_model(options.model) else: word_index = a_set.get_word_index() knrm = KNRM(int(options.question_length), int(options.answer_length), options.embedding_file, word_index) model = Sequential().add( TimeDistributed( knrm, input_shape=(2, int(options.question_length) + int(options.answer_length)))) model.compile(optimizer=SGD(learningrate=float(options.learning_rate)), loss="rank_hinge") for i in range(0, int(options.nb_epoch)): model.fit(train_set, batch_size=int(options.batch_size), nb_epoch=1) knrm.evaluate_ndcg(validate_set, 3) knrm.evaluate_ndcg(validate_set, 5) knrm.evaluate_map(validate_set)