def test_cooccurrence(self): s = "The black cat sat on the mat." v = metrics.cooccurrence(metrics.isplit(s), window=(-1,1), match = lambda w: w in ("cat",), normalize = lambda w: w.lower().strip(".:;,!?()[]'\"")) self.assertEqual(sorted(v.keys()), ["cat"]) self.assertEqual(sorted(v["cat"].keys()), ["black", "cat", "sat"]) self.assertEqual(sorted(v["cat"].values()), [1, 1, 1]) s = [("The","DT"), ("black","JJ"), ("cat","NN"), ("sat","VB"), ("on","IN"), ("the","DT"), ("mat","NN")] v = metrics.co_occurrence(s, window=(-2,-1), match = lambda token: token[1].startswith("NN"), filter = lambda token: token[1].startswith("JJ")) self.assertEqual(v, {("cat", "NN"): {("black", "JJ"): 1}}) print "pattern.metrics.cooccurrence()"
def test_cooccurrence(self): s = "The black cat sat on the mat." v = metrics.cooccurrence( metrics.isplit(s), window=(-1, 1), match=lambda w: w in ("cat",), normalize=lambda w: w.lower().strip(".:;,!?()[]'\""), ) self.assertEqual(sorted(v.keys()), ["cat"]) self.assertEqual(sorted(v["cat"].keys()), ["black", "cat", "sat"]) self.assertEqual(sorted(v["cat"].values()), [1, 1, 1]) s = [("The", "DT"), ("black", "JJ"), ("cat", "NN"), ("sat", "VB"), ("on", "IN"), ("the", "DT"), ("mat", "NN")] v = metrics.co_occurrence( s, window=(-2, -1), match=lambda token: token[1].startswith("NN"), filter=lambda token: token[1].startswith("JJ"), ) self.assertEqual(v, {("cat", "NN"): {("black", "JJ"): 1}}) print "pattern.metrics.cooccurrence()"
def test_isplit(self): # Assert string.split() iterator. v = metrics.isplit("test\nisplit") self.assertTrue(hasattr(v, "next")) self.assertEqual(list(v), ["test", "isplit"]) print("pattern.metrics.isplit()")
def test_isplit(self): # Assert string.split() iterator. v = metrics.isplit("test\nisplit") self.assertTrue(not isinstance(v, list)) # TODO is this needed? self.assertEqual(list(v), ["test", "isplit"]) print("pattern.metrics.isplit()")
def test_isplit(self): # Assert string.split() iterator. v = metrics.isplit("test\nisplit") self.assertTrue(isinstance(v, GeneratorType)) self.assertEqual(list(v), ["test", "isplit"]) print("pattern.metrics.isplit()")