Beispiel #1
0
 def test5():
     '''
     ===========================================================
     Configuration:
         1. Return only min pooling
     ===========================================================
     '''
     print('running vectorizer test 5 ...')
     model_file = 'model.bin'
     model = KeyedVectors.load_word2vec_format(model_file, binary=True)
     config = vectorizer.Config('gensim',
                                model=model,
                                average_type=None,
                                min_pooling=True,
                                max_pooling=False,
                                concatenation=None,
                                return_type='only_min_pooling')
     pipeline = vectorizer.Vectorizer(config)
     case = [
         ['отвергнуть_VERB', 'щедрый_ADJ', 'дар_NOUN'],
         ['покупать_VERB', 'преданность_NOUN', 'дар_NOUN', 'награда_NOUN'],
         ['яд_NOUN', 'последний_ADJ', 'дар_NOUN', 'мой_DET', 'изора_NOUN']
     ]
     expected = pickle.load(open('./sanity_checks_data/vectorizer5.p',
                                 'rb'))
     res = pipeline.fit(case).transform(case)
     np.testing.assert_allclose(expected, res)
     print('vectorizer test 5 is passed')
Beispiel #2
0
 def test6():
     '''
     ===========================================================
     Configuration:
         1. Full
         2. One word in line
     ===========================================================
     '''
     print('running vectorizer test 6 ...')
     model_file = 'model.bin'
     model = KeyedVectors.load_word2vec_format(model_file, binary=True)
     config = vectorizer.Config('gensim',
                                model=model,
                                average_type='simple_average',
                                min_pooling=True,
                                max_pooling=True,
                                concatenation='full',
                                return_type=None)
     pipeline = vectorizer.Vectorizer(config)
     case = [['отвергнуть_VERB', 'щедрый_ADJ'], ['покупать_VERB']]
     expected = pickle.load(open('./sanity_checks_data/vectorizer6.p',
                                 'rb'))
     res = pipeline.fit(case).transform(case)
     np.testing.assert_allclose(expected, res)
     print('vectorizer test 6 is passed')
Beispiel #3
0
 def test1():
     '''
     ===========================================================
     Full house:
         1. Simple average
         2. Min pooling
         3. Max pooling
         4. Full concatenation
         5. Full return
     ===========================================================
     '''
     print('running vectorizer test 1 ...')
     model_file = 'model.bin'
     model = KeyedVectors.load_word2vec_format(model_file, binary=True)
     config = vectorizer.Config('gensim',
                                model=model,
                                average_type='simple_average',
                                min_pooling=True,
                                max_pooling=True,
                                concatenation='full',
                                return_type='full')
     pipeline = vectorizer.Vectorizer(config)
     case = [
         ['отвергнуть_VERB', 'щедрый_ADJ', 'дар_NOUN'],
         ['покупать_VERB', 'преданность_NOUN', 'дар_NOUN', 'награда_NOUN'],
         ['яд_NOUN', 'последний_ADJ', 'дар_NOUN', 'мой_DET', 'изора_NOUN']
     ]
     expected = pickle.load(open('./sanity_checks_data/vectorizer1.p',
                                 'rb'))
     res = pipeline.fit(case).transform(case)
     np.testing.assert_allclose(expected, res)
     print('vectorizer test 1 is passed')