def test_getstrictlyminimummis(self): inputData = {'T': [[[1]], [[2]], [[2]], [[1], [1, 2]]], 'MS': {1: 0.09600845652974467, 2: 0.2357830588199925}, 'SDC': 0.056047812216985904} pymsgsp = pyMSGSP(inputData["T"], inputData["MS"], inputData["SDC"]) self.assertEqual(pymsgsp.getStrictlyMinimumMIS([]), sys.maxint) self.assertTrue(abs(pymsgsp.getStrictlyMinimumMIS([[1]]) - 0.09600845652974467) <= 0.0001) self.assertTrue(abs(pymsgsp.getStrictlyMinimumMIS([[1, 2]]) - 0.09600845652974467) <= 0.0001) self.assertTrue(abs(pymsgsp.getStrictlyMinimumMIS([[1], [2]]) - 0.09600845652974467) <= 0.0001) self.assertEqual(pymsgsp.getStrictlyMinimumMIS([[1], [1]]), sys.maxint)
def test_extendsequence(self): inputData = {'T': [[[1]], [[2]], [[2]], [[1], [1, 2]]], 'MS': {1: 0.09600845652974467, 2: 0.2357830588199925}, 'SDC': 0.056047812216985904} pymsgsp = pyMSGSP(inputData["T"], inputData["MS"], inputData["SDC"]) s1 = [[1]] s2 = [[2]] self.assertEqual(pymsgsp.extendSequence(s1, s2, MSCandidateJoinCriteria.FORWARD), [[[1], [2]]]) s1 = [[1], [2]] s2 = [[2], [2]] self.assertEqual(pymsgsp.extendSequence(s1, s2, MSCandidateJoinCriteria.FORWARD), [[[1], [2], [2]]])
def test_level2candidategenspm(self): inputData = {'T': [[[1]], [[2], [2]], [[1], [1, 2]]], 'MS': {1: 0.09600845652974467, 2: 0.2357830588199925}, 'SDC': 0.056047812216985904} pymsgsp = pyMSGSP(inputData["T"], inputData["MS"], inputData["SDC"]) M = Utils.getUniqueItems(inputData["T"]) M.sort(key=lambda item: inputData["MS"][item]) SUP = Utils.genItemSupportCount(M, inputData["T"]) L = [(M[m], SUP[m]) for m in range(len(M))] out1 = pymsgsp.level2CandidateGenSPM(L) out2 = [[[1, 1]], [[1], [1]], [[1, 2]], [[1], [2]], [[2], [1]], [[2, 2]], [[2], [2]]] out1 = sorted(out1, cmp=self.comparator) out2 = sorted(out2, cmp=self.comparator) self.assertEqual(out1, out2)
def test_sample(self): algo1 = pyMSGSP(self.inputData["T"], self.inputData["MS"], self.inputData["SDC"], logging.ERROR) algo2 = BruteForceSPM(self.inputData["T"], self.inputData["MS"], self.inputData["SDC"]) output1 = algo1.run() output2 = algo2.run() for s in output1: for it in s: it.sort() for s in output2: for it in s: it.sort() for t in output1: self.assertTrue(t in output2)
def test_mscandidategenspm(self): inputData = {'T': [[[1]], [[2], [2]], [[1], [1, 2]]], 'MS': {1: 0.09600845652974467, 2: 0.2357830588199925}, 'SDC': 0.056047812216985904} pymsgsp = pyMSGSP(inputData["T"], inputData["MS"], inputData["SDC"]) T = inputData["T"] M = Utils.getUniqueItems(T) M.sort(key=lambda item: inputData["MS"][item]) SUP = Utils.genItemSupportCount(M, T) L = [(M[m], SUP[m]) for m in range(len(M))] C2 = pymsgsp.level2CandidateGenSPM(L) cSUP = Utils.genSupportCount(C2, T) F2 = [C2[c] for c in range(len(C2)) if float(cSUP[c])/len(T) >= pymsgsp.getMinMIS(C2[c])] out1 = pymsgsp.MSCandidateGenSPM(F2) out2 = [[[1], [1], [1]], [[1], [1, 2]], [[1], [1], [2]], [[1, 2], [2]], [[1], [2], [2]], [[2], [2], [2]]] out1 = sorted(out1, cmp=self.comparator) out2 = sorted(out2, cmp=self.comparator) self.assertEqual(out1, out2)
def test_random(self): RDG = RandomDataGenerator() for i in range(10): randomData = RDG.genData() print 'randomData', i+1, ':', randomData algo1 = BruteForceSPM(randomData["T"], randomData["MS"], randomData["SDC"]) algo2 = pyMSGSP(randomData["T"], randomData["MS"], randomData["SDC"], logging.ERROR) output1 = algo1.run() output2 = algo2.run() for s in output1: for it in s: it.sort() for s in output2: for it in s: it.sort() for t in output1: self.assertTrue(t in output2)
def test_canprune(self): inputData = {'T': [[[1]], [[2]], [[2]], [[1], [1, 2]]], 'MS': {1: 0.09600845652974467, 2: 0.2357830588199925}, 'SDC': 0.056047812216985904} pymsgsp = pyMSGSP(inputData["T"], inputData["MS"], inputData["SDC"]) s = [[1], [1]] self.assertFalse(pymsgsp.canPrune(s))