예제 #1
0
 def test_SIF_works(self):
     """ 
     test that components_to_remove works in grouped embedding merging
     """
     # Just there to integration test normalization
     meanEmbeddings_n = embedding.groupedEmbedding(
         given_tdocs(),
         given_tgroups(),
         model=given_incomplete_keyedVectors(),
         weights=given_normalized_tweights(),
         word2SentenceMerge=mergingmethods.sumMerge,
         sentence2GroupMerge=mergingmethods.avgMerge,
         sentence2GroupKwargs={
             "components_to_remove": 1,
             "normalize": True
         },
         verbose=False)
     meanEmbeddings = embedding.groupedEmbedding(
         given_tdocs(),
         given_tgroups(),
         model=given_incomplete_keyedVectors(),
         weights=given_normalized_tweights(),
         word2SentenceMerge=mergingmethods.sumMerge,
         sentence2GroupMerge=mergingmethods.avgMerge,
         sentence2GroupKwargs={
             "components_to_remove": 1,
             "normalize": False
         },
         verbose=False)
     w2vec = given_incomplete_keyedVectors()
     for key in meanEmbeddings:
         self.assertEqual(len(meanEmbeddings[key]), w2vec.vector_size)
         self.assertEqual(len(meanEmbeddings_n[key]), w2vec.vector_size)
예제 #2
0
 def test_given_oov_applies_correctly(self):
     """
     Test that OOV words apply equally and don't break pipeline
     """
     tdocs = given_tdocs()
     tgroups = given_tgroups()
     w2vec = given_incomplete_keyedVectors()
     meanEmbeddingsManual = embedding.groupedEmbedding(
         tdocs,
         tgroups,
         model=w2vec,
         weights=None,
         word2SentenceMerge=np.mean,
         word2SentenceKwargs={"axis": 0},
         sentence2GroupMerge=np.mean,
         sentence2GroupKwargs={"axis": 0},
         verbose=False)
     meanEmbeddings = embedding.groupedEmbedding(
         tdocs,
         tgroups,
         model=w2vec,
         weights=None,
         word2SentenceMerge=mergingmethods.avgMerge,
         sentence2GroupMerge=mergingmethods.avgMerge,
         verbose=False)
     for key in meanEmbeddings:
         np.testing.assert_array_almost_equal(meanEmbeddingsManual[key],
                                              meanEmbeddings[key])
예제 #3
0
 def test_given_unique_pooling_applies_correctly(self):
     """
     test function from mergingmethods.py works with tautological equivalent
     """
     tdocs = given_tdocs()
     tgroups = given_tgroups()
     w2vec = given_mock_keyedVectors()
     meanEmbeddingsManual = embedding.groupedEmbedding(
         tdocs,
         tgroups,
         model=w2vec,
         word2SentenceMerge='unique',
         sentence2GroupMerge=np.mean,
         sentence2GroupKwargs={"axis": 0},
         verbose=False)
     meanEmbeddings = embedding.groupedEmbedding(
         tdocs,
         tgroups,
         model=w2vec,
         word2SentenceMerge='unique',
         sentence2GroupMerge=mergingmethods.avgMerge,
         verbose=False)
     for key in meanEmbeddings:
         np.testing.assert_array_almost_equal(meanEmbeddingsManual[key],
                                              meanEmbeddings[key])
예제 #4
0
 def test_given_list_input_input_untouched(self):
     tdocs = given_tdocs()
     tgroups = given_tgroups()
     w2vec = given_mock_keyedVectors()
     gembeddings = embedding.groupedEmbedding(tdocs,
                                              tgroups,
                                              model=w2vec,
                                              verbose=False)
     self.assertTrue(type(tdocs) == list and type(tgroups) == list,
                     msg="input should be unchanged")
     self.assertEqual(len(gembeddings),
                      len(set(tgroups)),
                      msg="groupe dict should be # of categories")
예제 #5
0
 def test_given_weights_tautological_works(self):
     """ 
     test that weights work normally in a tautological check
     """
     meanEmbeddings = embedding.groupedEmbedding(
         given_tdocs(),
         given_tgroups(),
         model=given_incomplete_keyedVectors(),
         weights=given_normalized_tweights(),
         word2SentenceMerge=mergingmethods.sumMerge,
         sentence2GroupMerge=mergingmethods.sumMerge,
         verbose=False)
     w2vec = given_incomplete_keyedVectors()
     for key in meanEmbeddings:
         self.assertEqual(len(meanEmbeddings[key]), w2vec.vector_size)
예제 #6
0
 def test_given_weight_embedding_mismatch_raises(self):
     """weight dimension != embedding dimension should raise"""
     bad_weights = given_normalized_tweights()
     bad_weights[0] = bad_weights[0][:-1]
     bad_weights[1] = bad_weights[0][:-2]
     with self.assertRaises(ValueError):
         try:
             meanEmbeddings = embedding.groupedEmbedding(
                 given_tdocs(),
                 given_tgroups(),
                 model=given_incomplete_keyedVectors(),
                 weights=bad_weights,
                 word2SentenceMerge=mergingmethods.sumMerge,
                 sentence2GroupMerge=mergingmethods.sumMerge,
                 verbose=False)
             del meanEmbeddings  # avoid pylint warning
         except IndexError:  # can happen on edgecase of dropped word
             raise ValueError