Esempio n. 1
0
    def test_issubsequence(self):
        sub = []
        sup = []
        self.assertTrue(Utils.isSubsequence(sub, sup))

        sub = []
        sup = [[1], [2]]
        self.assertTrue(Utils.isSubsequence(sub, sup))

        sub = [[2], [3]]
        sup = [[1, 2, 3], [1, 2, 3, 4]]
        self.assertTrue(Utils.isSubsequence(sub, sup))

        sub = [[1, 3], [3, 3, 3]]
        sup = [[1, 2, 3], [1, 2, 3, 4]]
        self.assertFalse(Utils.isSubsequence(sub, sup))
Esempio n. 2
0
    def canPrune(self, seq):
        sLowestMIS = self.getStrictlyMinimumMIS(seq)
        k = Utils.seqLength(seq)

        for i in range(k):
            item = Utils.getItem(seq, i)

            if self.MS[item] == sLowestMIS:
                continue

            k_1_subseq = Utils.removeItem(seq, i)

            count = 0
            for d in self.T:
                if Utils.isSubsequence(k_1_subseq, d):
                    count += 1

            if float(count) / len(self.T) < self.getMinMIS(k_1_subseq):
                return True

        return False
Esempio n. 3
0
    def run(self):

        L = Utils.getUniqueItems(self.T)
        SUP = Utils.genItemSupportCount(L, self.T)
        lSUP = {}
        for l in range(len(L)):
            lSUP[L[l]] = SUP[l]

        if len(L) > 3:
            print "SORRY! Can't run Brute Force with these large data"
            return []

        C = Utils.generateAllSubsets(L)
        S = Utils.generateAllSequences(C)

        outputData = []

        for seq in S:
            count = 0
            minSUP = 999
            maxSUP = 0
            minMIS = 999

            for s in range(len(seq)):
                for i in seq[s]:
                    if lSUP[i] < minSUP:
                        minSUP = lSUP[i]
                    if lSUP[i] > maxSUP:
                        maxSUP = lSUP[i]
                    if self.MS[i] < minMIS:
                        minMIS = self.MS[i]

            for d in self.T:
                if Utils.isSubsequence(seq, d):
                    count += 1

            if ((float(count) / len(self.T)) >= minMIS) and (float(maxSUP - minSUP) / len(self.T) <= self.SDC):
                outputData.append(seq)

        return outputData
Esempio n. 4
0
                data_file = sys.argv[2]
                para_file = sys.argv[3]
                result_file = sys.argv[4]


    DP = DataProcessor(data_file, para_file, result_file, False)
    inputData = DP.loadInput()

    startTime = datetime.now()
    print 'Execution started at:', startTime
    algo = pyMSGSP(inputData["T"], inputData["MS"], inputData["SDC"], logging.INFO)
    # algo = BruteForceSPM(inputData["T"], inputData["MS"], inputData["SDC"])

    outputData = algo.run()
    print 'Execution time:', datetime.now() - startTime

    outputDict = defaultdict(list)

    for seq in outputData:
        count = 0
        for d in inputData["T"]:
            if Utils.isSubsequence(seq, d):
                count += 1
        outputDict[Utils.seqLength(seq)].append((seq, count))

    DP.printOutput(outputDict)