def comparator(self, a, b): if len(a) < len(b): return -1 elif len(a) > len(b): return 1 else: return Utils.seqLength(a) - Utils.seqLength(b)
def MSCandidateGenSPM(self, F): logging.debug('MSCandidateGenSPM: %s', F) cs = [] for s1 in F: for s2 in F: if self.MS[s1[0][0]] == self.getStrictlyMinimumMIS(s1): if (Utils.removeItem(s1, 1) == Utils.removeItem(s2, Utils.seqLength(s2)-1)) and (self.MS[s2[-1][-1]] >= self.MS[s1[0][0]]): #TODO: need to check why >= here? nc = self.extendSequence(s1, s2, MSCandidateJoinCriteria.FORWARD) for c in nc: cs.append(c) logging.debug('join: %s %s -> %s %d', s1, s2, c, MSCandidateJoinCriteria.FORWARD) elif self.MS[s2[-1][-1]] == self.getStrictlyMinimumMIS(s2): if (Utils.removeItem(s2, Utils.seqLength(s2)-2) == Utils.removeItem(s1, 0)) and (self.MS[s1[0][0]] > self.MS[s2[-1][-1]]): nc = self.extendSequence(s1, s2, MSCandidateJoinCriteria.REVERSE) for c in nc: cs.append(c) logging.debug('join: %s %s -> %s %d', s1, s2, c, MSCandidateJoinCriteria.REVERSE) else: if Utils.removeItem(s1, 0) == Utils.removeItem(s2, Utils.seqLength(s2)-1): nc = self.extendSequence(s1, s2, MSCandidateJoinCriteria.APRIORI) for c in nc: cs.append(c) logging.debug('join: %s %s -> %s %d', s1, s2, c, MSCandidateJoinCriteria.APRIORI) return [c for c in cs if self.canPrune(c) is False]
def test_getitem(self): s = [] self.assertEqual(Utils.getItem(s, 4), None) s = [[30, 70, 80]] self.assertEqual(Utils.getItem(s, 0), 30) s = [[70, 80], [90]] self.assertEqual(Utils.getItem(s, Utils.seqLength(s)-1), 90)
def extendSequence(self, s1, s2, criteria): newCandidates = [] if criteria == MSCandidateJoinCriteria.FORWARD: if len(s2[-1]) == 1: s1copy = copy.deepcopy(s1) s1copy.append(s2[-1]) newCandidates.append(s1copy) if Utils.seqLength(s1) == 2 and len(s1) == 2 and s2[-1][-1] > s1[-1][-1]: s1copy1 = copy.deepcopy(s1) s1copy1[-1].append(s2[-1][-1]) newCandidates.append(s1copy1) elif (Utils.seqLength(s1) == 2 and len(s1) == 1 and s2[-1][-1] > s1[-1][-1]) or Utils.seqLength(s1) > 2: s1copy = copy.deepcopy(s1) s1copy[-1].append(s2[-1][-1]) newCandidates.append(s1copy) elif criteria == MSCandidateJoinCriteria.REVERSE: if len(s1[0]) == 1: s2copy = copy.deepcopy(s2) s2copy.insert(0, s1[0]) newCandidates.append(s2copy) if Utils.seqLength(s2) == 2 and len(s2) == 2 and s1[0][0] > s2[0][0]: s2copy1 = copy.deepcopy(s2) s2copy1[0].insert(0, s1[0][0]) newCandidates.append(s2copy1) elif (Utils.seqLength(s2) == 2 and len(s2) == 1 and s1[0][0] > s2[0][0]) or Utils.seqLength(s2) > 2: s2copy1 = copy.deepcopy(s2) s2copy1[0].insert(0, s1[0][0]) newCandidates.append(s2copy1) elif criteria == MSCandidateJoinCriteria.APRIORI: if len(s2[-1]) == 1: s1copy = copy.deepcopy(s1) s1copy.append(s2[-1]) newCandidates.append(s1copy) else: s1copy = copy.deepcopy(s1) s1copy[-1].append(s2[-1][-1]) newCandidates.append(s1copy) return newCandidates
def test_removeitem(self): s = [] ts = s self.assertEqual(Utils.removeItem(s, 0), []) self.assertEqual(s, ts) s = [[30, 70, 80]] ts = s self.assertEqual(Utils.removeItem(s, 0), [[70, 80]]) self.assertEqual(s, ts) s = [[70, 80], [90]] ts = s self.assertEqual(Utils.removeItem(s, Utils.seqLength(s)-1), [[70, 80]]) self.assertEqual(s, ts)
def canPrune(self, seq): sLowestMIS = self.getStrictlyMinimumMIS(seq) k = Utils.seqLength(seq) for i in range(k): item = Utils.getItem(seq, i) if self.MS[item] == sLowestMIS: continue k_1_subseq = Utils.removeItem(seq, i) count = 0 for d in self.T: if Utils.isSubsequence(k_1_subseq, d): count += 1 if float(count) / len(self.T) < self.getMinMIS(k_1_subseq): return True return False
def test_seqlength(self): s = [] self.assertEqual(Utils.seqLength(s), 0) s = [[1]] self.assertEqual(Utils.seqLength(s), 1) s = [[2, 3]] self.assertEqual(Utils.seqLength(s), 2) s = [[1], [2, 3]] self.assertEqual(Utils.seqLength(s), 3) s = [[1], [2, 3], [4]] self.assertEqual(Utils.seqLength(s), 4) s = [[1], [1, 1], [1]] self.assertEqual(Utils.seqLength(s), 4)
data_file = sys.argv[2] para_file = sys.argv[3] result_file = sys.argv[4] DP = DataProcessor(data_file, para_file, result_file, False) inputData = DP.loadInput() startTime = datetime.now() print 'Execution started at:', startTime algo = pyMSGSP(inputData["T"], inputData["MS"], inputData["SDC"], logging.INFO) # algo = BruteForceSPM(inputData["T"], inputData["MS"], inputData["SDC"]) outputData = algo.run() print 'Execution time:', datetime.now() - startTime outputDict = defaultdict(list) for seq in outputData: count = 0 for d in inputData["T"]: if Utils.isSubsequence(seq, d): count += 1 outputDict[Utils.seqLength(seq)].append((seq, count)) DP.printOutput(outputDict)