Example #1
0
    def executeOrphanReduction(self):
        leftReductionFactor = 0
        rightReductionFactor = 0
        currentReductionIsLeft = False
        increment = 10

        while leftReductionFactor < 80 and rightReductionFactor < 80:

            # First we retrieve the current orphans
            orphans = []
            tmp_symbols = []
            # extract orphans
            for symbol in self.symbols:
                if len(symbol.getMessages()) == 1:
                    orphans.append(symbol)
            # create a tmp symbols array where symbols will be added once computed
            for symbol in self.symbols:
                if len(symbol.getMessages()) > 1:
                    tmp_symbols.append(symbol)

            if len(orphans) <= 1:
                self.log.info("Number of orphan symbols : {0}. The orphan merging op. is finished !".format(len(orphans)))
                break

            self.symbols = orphans
            if currentReductionIsLeft:
                leftReductionFactor = leftReductionFactor + increment
                # Reduce the size of the messages by 50% from the left
                for orphan in self.symbols:
                    orphan.getMessages()[0].setLeftReductionFactor(leftReductionFactor)
                    orphan.getMessages()[0].setRightReductionFactor(0)

                self.log.info("Start to merge orphans reduced by {0}% from the left".format(str(leftReductionFactor)))
                self.executeClustering()
                currentReductionIsLeft = False

            if not currentReductionIsLeft:
                rightReductionFactor = rightReductionFactor + increment
                # Reduce the size of the messages from the right
                for orphan in self.symbols:
                    orphan.getMessages()[0].setRightReductionFactor(rightReductionFactor)
                    orphan.getMessages()[0].setLeftReductionFactor(0)

                self.log.info("Start to merge orphans reduced by {0}% from the right".format(str(rightReductionFactor)))
                self.executeClustering()
                currentReductionIsLeft = True

            for orphan in self.symbols:
                for message in orphan.getMessages():
                    message.setLeftReductionFactor(0)
                    message.setRightReductionFactor(0)
                tmp_symbols.append(orphan)
            self.symbols = tmp_symbols

        self.cb_executionStatus(50.0, "Executing last alignment...")
        alignment = NeedlemanAndWunsch(self.unitSize, self.cb_status)
        # Compute the regex/alignment of each symbol
        for symbol in self.symbols:
            alignment.alignSymbol(symbol, self.doInternalSlick, self.defaultFormat)
        return self.symbols
Example #2
0
    def executeClustering(self):
        self.log.debug("Re-Organize the symbols (nbIteration={0}, min_equivalence={1})".format(self.nbIteration, self.minEquivalence))

        # Find equel messages. Useful for redundant protocols before doing heavy computations with Needleman (complexity=O(N²) where N is #Symbols)
        ll = len(self.symbols) - 1
        i_equ = 0
        while(ll > 0):
            currentMess = self.symbols[i_equ].getMessages()[0].getReducedStringData()
            for j in range(ll):
                if(currentMess == self.symbols[i_equ + j + 1].getMessages()[0].getReducedStringData()):
                    self.mergeEffectiveRowCol(i_equ, i_equ + j + 1)
                    self.log.debug("Merge the equal column/line {0} with the column/line {1}".format(str(i_equ), str(j + 1)))
                    i_equ -= 1
                    break
            ll -= 1
            i_equ += 1

        # Process the UPGMA on symbols
        self.processUPGMA()

        # Retrieve the alignment of each symbol and the build the associated regular expression
        self.cb_executionStatus(50.0, "Executing last alignment...")
        alignment = NeedlemanAndWunsch(self.unitSize, self.cb_status)
        for symbol in self.symbols:
            alignment.alignSymbol(symbol, self.doInternalSlick, self.defaultFormat)

        return self.symbols
Example #3
0
 def test_randomAlignmentsWithTwoCenteredMessages(self):        
     workspace = self.getWorkspace()
     currentProject = workspace.getProjects()[0]
     
     doInternalSlick = currentProject.getConfiguration().getVocabularyInferenceParameter(ProjectConfiguration.VOCABULARY_DO_INTERNAL_SLICK)
     defaultFormat = currentProject.getConfiguration().getVocabularyInferenceParameter(ProjectConfiguration.VOCABULARY_GLOBAL_FORMAT)
     
     # We generate 1000 random couples of data and try to align them
     # Objectives : just test if it executes
     nb_data = 1000
     nb_failed = 0
     nb_success = 0
     for i_test in range(0, nb_data) :
         
         common_pattern = self.generateRandomString(30, 40)
         # Generate the content of two messages
         data1 = TypeConvertor.stringToNetzobRaw(self.generateRandomString(5, 100) + common_pattern + self.generateRandomString(5, 100))
         data2 = TypeConvertor.stringToNetzobRaw(self.generateRandomString(5, 100) + common_pattern + self.generateRandomString(5, 100))
         # Create the messages
         message1 = RawMessage(uuid.uuid4(), str(time.time()), data1)
         message2 = RawMessage(uuid.uuid4(), str(time.time()), data2)
         # Create the symbol
         symbol = Symbol(uuid.uuid4(), "test_randomAlignments#" + str(i_test), None)
         symbol.addMessage(message1)
         symbol.addMessage(message2)
         
         # Starts the alignment process
         alignmentProcess = NeedlemanAndWunsch(self.emptyAlignmentCB)
         alignmentProcess.alignSymbol(symbol, doInternalSlick, defaultFormat)
         
         if not TypeConvertor.stringToNetzobRaw(common_pattern[:]) in symbol.getAlignment() :
             print "Message 1 : " + str(data1)
             print "Message 2 : " + str(data2)
             print "Common pattern : " + TypeConvertor.stringToNetzobRaw(common_pattern)
             print "Alignment : " + symbol.getAlignment()
             nb_failed += 1
         else :
             nb_success += 1
     if nb_failed > 0 :
         print "A number of " + str(nb_failed) + "/" + str(nb_data) + " alignment failed !"
     self.assertEqual(0, nb_failed)
     self.assertEqual(nb_success, nb_data)