def test5(): ''' =========================================================== Configuration: 1. Return only min pooling =========================================================== ''' print('running vectorizer test 5 ...') model_file = 'model.bin' model = KeyedVectors.load_word2vec_format(model_file, binary=True) config = vectorizer.Config('gensim', model=model, average_type=None, min_pooling=True, max_pooling=False, concatenation=None, return_type='only_min_pooling') pipeline = vectorizer.Vectorizer(config) case = [ ['отвергнуть_VERB', 'щедрый_ADJ', 'дар_NOUN'], ['покупать_VERB', 'преданность_NOUN', 'дар_NOUN', 'награда_NOUN'], ['яд_NOUN', 'последний_ADJ', 'дар_NOUN', 'мой_DET', 'изора_NOUN'] ] expected = pickle.load(open('./sanity_checks_data/vectorizer5.p', 'rb')) res = pipeline.fit(case).transform(case) np.testing.assert_allclose(expected, res) print('vectorizer test 5 is passed')
def test6(): ''' =========================================================== Configuration: 1. Full 2. One word in line =========================================================== ''' print('running vectorizer test 6 ...') model_file = 'model.bin' model = KeyedVectors.load_word2vec_format(model_file, binary=True) config = vectorizer.Config('gensim', model=model, average_type='simple_average', min_pooling=True, max_pooling=True, concatenation='full', return_type=None) pipeline = vectorizer.Vectorizer(config) case = [['отвергнуть_VERB', 'щедрый_ADJ'], ['покупать_VERB']] expected = pickle.load(open('./sanity_checks_data/vectorizer6.p', 'rb')) res = pipeline.fit(case).transform(case) np.testing.assert_allclose(expected, res) print('vectorizer test 6 is passed')
def test1(): ''' =========================================================== Full house: 1. Simple average 2. Min pooling 3. Max pooling 4. Full concatenation 5. Full return =========================================================== ''' print('running vectorizer test 1 ...') model_file = 'model.bin' model = KeyedVectors.load_word2vec_format(model_file, binary=True) config = vectorizer.Config('gensim', model=model, average_type='simple_average', min_pooling=True, max_pooling=True, concatenation='full', return_type='full') pipeline = vectorizer.Vectorizer(config) case = [ ['отвергнуть_VERB', 'щедрый_ADJ', 'дар_NOUN'], ['покупать_VERB', 'преданность_NOUN', 'дар_NOUN', 'награда_NOUN'], ['яд_NOUN', 'последний_ADJ', 'дар_NOUN', 'мой_DET', 'изора_NOUN'] ] expected = pickle.load(open('./sanity_checks_data/vectorizer1.p', 'rb')) res = pipeline.fit(case).transform(case) np.testing.assert_allclose(expected, res) print('vectorizer test 1 is passed')