def executeOrphanReduction(self): leftReductionFactor = 0 rightReductionFactor = 0 currentReductionIsLeft = False increment = 10 while leftReductionFactor < 80 and rightReductionFactor < 80: # First we retrieve the current orphans orphans = [] tmp_symbols = [] # extract orphans for symbol in self.symbols: if len(symbol.getMessages()) == 1: orphans.append(symbol) # create a tmp symbols array where symbols will be added once computed for symbol in self.symbols: if len(symbol.getMessages()) > 1: tmp_symbols.append(symbol) if len(orphans) <= 1: self.log.info("Number of orphan symbols : {0}. The orphan merging op. is finished !".format(len(orphans))) break self.symbols = orphans if currentReductionIsLeft: leftReductionFactor = leftReductionFactor + increment # Reduce the size of the messages by 50% from the left for orphan in self.symbols: orphan.getMessages()[0].setLeftReductionFactor(leftReductionFactor) orphan.getMessages()[0].setRightReductionFactor(0) self.log.info("Start to merge orphans reduced by {0}% from the left".format(str(leftReductionFactor))) self.executeClustering() currentReductionIsLeft = False if not currentReductionIsLeft: rightReductionFactor = rightReductionFactor + increment # Reduce the size of the messages from the right for orphan in self.symbols: orphan.getMessages()[0].setRightReductionFactor(rightReductionFactor) orphan.getMessages()[0].setLeftReductionFactor(0) self.log.info("Start to merge orphans reduced by {0}% from the right".format(str(rightReductionFactor))) self.executeClustering() currentReductionIsLeft = True for orphan in self.symbols: for message in orphan.getMessages(): message.setLeftReductionFactor(0) message.setRightReductionFactor(0) tmp_symbols.append(orphan) self.symbols = tmp_symbols self.cb_executionStatus(50.0, "Executing last alignment...") alignment = NeedlemanAndWunsch(self.unitSize, self.cb_status) # Compute the regex/alignment of each symbol for symbol in self.symbols: alignment.alignSymbol(symbol, self.doInternalSlick, self.defaultFormat) return self.symbols
def executeClustering(self): self.log.debug("Re-Organize the symbols (nbIteration={0}, min_equivalence={1})".format(self.nbIteration, self.minEquivalence)) # Find equel messages. Useful for redundant protocols before doing heavy computations with Needleman (complexity=O(N²) where N is #Symbols) ll = len(self.symbols) - 1 i_equ = 0 while(ll > 0): currentMess = self.symbols[i_equ].getMessages()[0].getReducedStringData() for j in range(ll): if(currentMess == self.symbols[i_equ + j + 1].getMessages()[0].getReducedStringData()): self.mergeEffectiveRowCol(i_equ, i_equ + j + 1) self.log.debug("Merge the equal column/line {0} with the column/line {1}".format(str(i_equ), str(j + 1))) i_equ -= 1 break ll -= 1 i_equ += 1 # Process the UPGMA on symbols self.processUPGMA() # Retrieve the alignment of each symbol and the build the associated regular expression self.cb_executionStatus(50.0, "Executing last alignment...") alignment = NeedlemanAndWunsch(self.unitSize, self.cb_status) for symbol in self.symbols: alignment.alignSymbol(symbol, self.doInternalSlick, self.defaultFormat) return self.symbols
def test_randomAlignmentsWithTwoCenteredMessages(self): workspace = self.getWorkspace() currentProject = workspace.getProjects()[0] doInternalSlick = currentProject.getConfiguration().getVocabularyInferenceParameter(ProjectConfiguration.VOCABULARY_DO_INTERNAL_SLICK) defaultFormat = currentProject.getConfiguration().getVocabularyInferenceParameter(ProjectConfiguration.VOCABULARY_GLOBAL_FORMAT) # We generate 1000 random couples of data and try to align them # Objectives : just test if it executes nb_data = 1000 nb_failed = 0 nb_success = 0 for i_test in range(0, nb_data) : common_pattern = self.generateRandomString(30, 40) # Generate the content of two messages data1 = TypeConvertor.stringToNetzobRaw(self.generateRandomString(5, 100) + common_pattern + self.generateRandomString(5, 100)) data2 = TypeConvertor.stringToNetzobRaw(self.generateRandomString(5, 100) + common_pattern + self.generateRandomString(5, 100)) # Create the messages message1 = RawMessage(uuid.uuid4(), str(time.time()), data1) message2 = RawMessage(uuid.uuid4(), str(time.time()), data2) # Create the symbol symbol = Symbol(uuid.uuid4(), "test_randomAlignments#" + str(i_test), None) symbol.addMessage(message1) symbol.addMessage(message2) # Starts the alignment process alignmentProcess = NeedlemanAndWunsch(self.emptyAlignmentCB) alignmentProcess.alignSymbol(symbol, doInternalSlick, defaultFormat) if not TypeConvertor.stringToNetzobRaw(common_pattern[:]) in symbol.getAlignment() : print "Message 1 : " + str(data1) print "Message 2 : " + str(data2) print "Common pattern : " + TypeConvertor.stringToNetzobRaw(common_pattern) print "Alignment : " + symbol.getAlignment() nb_failed += 1 else : nb_success += 1 if nb_failed > 0 : print "A number of " + str(nb_failed) + "/" + str(nb_data) + " alignment failed !" self.assertEqual(0, nb_failed) self.assertEqual(nb_success, nb_data)