def test_executingClusteringWithOrphanReduction(self): # We create 6 messages of 2 group # group1 originalSymbol1 = Symbol(uuid.uuid4(), "TestSymbol", None) message1 = RawMessage(uuid.uuid4(), str(time.time()), TypeConvertor.stringToNetzobRaw("bonjour " + self.generateRandomString(200, 1000))) originalSymbol1.addMessage(message1) originalSymbol2 = Symbol(uuid.uuid4(), "TestSymbol2", None) message2 = RawMessage(uuid.uuid4(), str(time.time()), TypeConvertor.stringToNetzobRaw("bonjour " + self.generateRandomString(200, 1000))) originalSymbol2.addMessage(message2) originalSymbol3 = Symbol(uuid.uuid4(), "TestSymbol3", None) message3 = RawMessage(uuid.uuid4(), str(time.time()), TypeConvertor.stringToNetzobRaw("bonjour " + self.generateRandomString(200, 1000))) originalSymbol3.addMessage(message3) # group2 originalSymbol4 = Symbol(uuid.uuid4(), "TestSymbol4", None) message4 = RawMessage(uuid.uuid4(), str(time.time()), TypeConvertor.stringToNetzobRaw("salut " + self.generateRandomString(200, 1000))) originalSymbol4.addMessage(message4) originalSymbol5 = Symbol(uuid.uuid4(), "TestSymbol5", None) message5 = RawMessage(uuid.uuid4(), str(time.time()), TypeConvertor.stringToNetzobRaw("salut " + self.generateRandomString(200, 1000))) originalSymbol5.addMessage(message5) originalSymbol6 = Symbol(uuid.uuid4(), "TestSymbol6", None) message6 = RawMessage(uuid.uuid4(), str(time.time()), TypeConvertor.stringToNetzobRaw("salut " + self.generateRandomString(200, 1000))) originalSymbol6.addMessage(message6) symbols = [originalSymbol1, originalSymbol2, originalSymbol3, originalSymbol4, originalSymbol5, originalSymbol6] # Start the clustering clusteringSolution = UPGMA(None, symbols, True, 100, 80, True, Format.ASCII) resultBeforeOrphan = clusteringSolution.executeClustering() resultAfterOrphan = clusteringSolution.executeOrphanReduction() if (len(resultAfterOrphan) < len(resultBeforeOrphan)) : print "Before Orphan Reduction : " for symbol in resultBeforeOrphan : print "Symbol : " + str(symbol.getName()) for m in symbol.getMessages() : print " + " + str(m.getStringData()) print "After Orphan Reduction : " for symbol in resultAfterOrphan : print "Symbol : " + str(symbol.getName()) for m in symbol.getMessages() : print " + " + str(m.getStringData()) self.assertGreaterEqual(len(resultBeforeOrphan), len(resultAfterOrphan))
def test_executingClusteringWithOrphanReduction(self): # We create 6 messages of 2 group # group1 originalSymbol1 = Symbol(str(uuid.uuid4()), "TestSymbol", None) message1 = RawMessage(str(uuid.uuid4()), str(time.time()), TypeConvertor.stringToNetzobRaw("bonjour " + self.generateRandomString(200, 1000))) originalSymbol1.addMessage(message1) originalSymbol2 = Symbol(str(uuid.uuid4()), "TestSymbol2", None) message2 = RawMessage(str(uuid.uuid4()), str(time.time()), TypeConvertor.stringToNetzobRaw("bonjour " + self.generateRandomString(200, 1000))) originalSymbol2.addMessage(message2) originalSymbol3 = Symbol(str(uuid.uuid4()), "TestSymbol3", None) message3 = RawMessage(str(uuid.uuid4()), str(time.time()), TypeConvertor.stringToNetzobRaw("bonjour " + self.generateRandomString(200, 1000))) originalSymbol3.addMessage(message3) # group2 originalSymbol4 = Symbol(str(uuid.uuid4()), "TestSymbol4", None) message4 = RawMessage(str(uuid.uuid4()), str(time.time()), TypeConvertor.stringToNetzobRaw("salut " + self.generateRandomString(200, 1000))) originalSymbol4.addMessage(message4) originalSymbol5 = Symbol(str(uuid.uuid4()), "TestSymbol5", None) message5 = RawMessage(str(uuid.uuid4()), str(time.time()), TypeConvertor.stringToNetzobRaw("salut " + self.generateRandomString(200, 1000))) originalSymbol5.addMessage(message5) originalSymbol6 = Symbol(str(uuid.uuid4()), "TestSymbol6", None) message6 = RawMessage(str(uuid.uuid4()), str(time.time()), TypeConvertor.stringToNetzobRaw("salut " + self.generateRandomString(200, 1000))) originalSymbol6.addMessage(message6) symbols = [originalSymbol1, originalSymbol2, originalSymbol3, originalSymbol4, originalSymbol5, originalSymbol6] # Start the clustering clusteringSolution = UPGMA(None, symbols, True, 100, 80, True, Format.ASCII) resultBeforeOrphan = clusteringSolution.executeClustering() resultAfterOrphan = clusteringSolution.executeOrphanReduction() if (len(resultAfterOrphan) < len(resultBeforeOrphan)): print "Before Orphan Reduction: " for symbol in resultBeforeOrphan: print "Symbol: " + str(symbol.getName()) for m in symbol.getMessages(): print " + " + str(m.getStringData()) print "After Orphan Reduction: " for symbol in resultAfterOrphan: print "Symbol: " + str(symbol.getName()) for m in symbol.getMessages(): print " + " + str(m.getStringData()) self.assertGreaterEqual(len(resultBeforeOrphan), len(resultAfterOrphan))
def test_executingClustering(self): # We create 6 messages of 2 group # group1 originalSymbol1 = Symbol(uuid.uuid4(), "TestSymbol", None) message1 = RawMessage(uuid.uuid4(), str(time.time()), TypeConvertor.stringToNetzobRaw("bonjour " + self.generateRandomString(20, 30) + " comment vas-tu ?")) originalSymbol1.addMessage(message1) originalSymbol2 = Symbol(uuid.uuid4(), "TestSymbol2", None) message2 = RawMessage(uuid.uuid4(), str(time.time()), TypeConvertor.stringToNetzobRaw("bonjour " + self.generateRandomString(20, 30) + " comment vas-tu ?")) originalSymbol2.addMessage(message2) originalSymbol3 = Symbol(uuid.uuid4(), "TestSymbol3", None) message3 = RawMessage(uuid.uuid4(), str(time.time()), TypeConvertor.stringToNetzobRaw("bonjour " + self.generateRandomString(20, 30) + " comment vas-tu ?")) originalSymbol3.addMessage(message3) # group2 originalSymbol4 = Symbol(uuid.uuid4(), "TestSymbol4", None) message4 = RawMessage(uuid.uuid4(), str(time.time()), TypeConvertor.stringToNetzobRaw("salut à toi " + self.generateRandomString(10, 15) + " what's up ?")) originalSymbol4.addMessage(message4) originalSymbol5 = Symbol(uuid.uuid4(), "TestSymbol5", None) message5 = RawMessage(uuid.uuid4(), str(time.time()), TypeConvertor.stringToNetzobRaw("salut à toi " + self.generateRandomString(10, 15) + " what's up ?")) originalSymbol5.addMessage(message5) originalSymbol6 = Symbol(uuid.uuid4(), "TestSymbol6", None) message6 = RawMessage(uuid.uuid4(), str(time.time()), TypeConvertor.stringToNetzobRaw("salut à toi " + self.generateRandomString(10, 15) + " what's up ?")) originalSymbol6.addMessage(message6) symbols = [originalSymbol1, originalSymbol2, originalSymbol3, originalSymbol4, originalSymbol5, originalSymbol6] # Start the clustering clusteringSolution = UPGMA(None, symbols, True, 100, 90, True, Format.ASCII) result = clusteringSolution.executeClustering() for symbol in result : print "Symbol : " + str(symbol.getName()) for m in symbol.getMessages() : print " + " + str(m.getStringData())
def test_executingClustering(self): # We create 6 messages of 2 group # group1 originalSymbol1 = Symbol(str(uuid.uuid4()), "TestSymbol", None) message1 = RawMessage(str(uuid.uuid4()), str(time.time()), TypeConvertor.stringToNetzobRaw("bonjour " + self.generateRandomString(20, 30) + " comment vas-tu ?")) originalSymbol1.addMessage(message1) originalSymbol2 = Symbol(str(uuid.uuid4()), "TestSymbol2", None) message2 = RawMessage(str(uuid.uuid4()), str(time.time()), TypeConvertor.stringToNetzobRaw("bonjour " + self.generateRandomString(20, 30) + " comment vas-tu ?")) originalSymbol2.addMessage(message2) originalSymbol3 = Symbol(str(uuid.uuid4()), "TestSymbol3", None) message3 = RawMessage(str(uuid.uuid4()), str(time.time()), TypeConvertor.stringToNetzobRaw("bonjour " + self.generateRandomString(20, 30) + " comment vas-tu ?")) originalSymbol3.addMessage(message3) # group2 originalSymbol4 = Symbol(str(uuid.uuid4()), "TestSymbol4", None) message4 = RawMessage(str(uuid.uuid4()), str(time.time()), TypeConvertor.stringToNetzobRaw("salut à toi " + self.generateRandomString(10, 15) + " what's up ?")) originalSymbol4.addMessage(message4) originalSymbol5 = Symbol(str(uuid.uuid4()), "TestSymbol5", None) message5 = RawMessage(str(uuid.uuid4()), str(time.time()), TypeConvertor.stringToNetzobRaw("salut à toi " + self.generateRandomString(10, 15) + " what's up ?")) originalSymbol5.addMessage(message5) originalSymbol6 = Symbol(str(uuid.uuid4()), "TestSymbol6", None) message6 = RawMessage(str(uuid.uuid4()), str(time.time()), TypeConvertor.stringToNetzobRaw("salut à toi " + self.generateRandomString(10, 15) + " what's up ?")) originalSymbol6.addMessage(message6) symbols = [originalSymbol1, originalSymbol2, originalSymbol3, originalSymbol4, originalSymbol5, originalSymbol6] # Start the clustering clusteringSolution = UPGMA(None, symbols, True, 100, 90, True, Format.ASCII) result = clusteringSolution.executeClustering() for symbol in result: print "Symbol: " + str(symbol.getName()) for m in symbol.getMessages(): print " + " + str(m.getStringData())
def alignSymbols(self, symbols, project): from netzob.Inference.Vocabulary.Alignment.UPGMA import UPGMA self.result = [] preResults = [] # First we add in results, the symbols which wont be aligned for symbol in project.getVocabulary().getSymbols(): found = False for s in symbols: if str(symbol.getID()) == str(s.getID()): found = True if not found: logging.debug("Symbol " + str(symbol.getName()) + "[" + str(symbol.getID()) + "]] wont be aligned") preResults.append(symbol) # Then we retrieve all the parameters of the CLUSTERING / ALIGNMENT defaultFormat = project.getConfiguration().getVocabularyInferenceParameter(ProjectConfiguration.VOCABULARY_GLOBAL_FORMAT) nbIteration = project.getConfiguration().getVocabularyInferenceParameter(ProjectConfiguration.VOCABULARY_NB_ITERATION) minEquivalence = project.getConfiguration().getVocabularyInferenceParameter(ProjectConfiguration.VOCABULARY_EQUIVALENCE_THRESHOLD) doInternalSlick = project.getConfiguration().getVocabularyInferenceParameter(ProjectConfiguration.VOCABULARY_DO_INTERNAL_SLICK) doOrphanReduction = project.getConfiguration().getVocabularyInferenceParameter(ProjectConfiguration.VOCABULARY_ORPHAN_REDUCTION) # We try to cluster each symbol listEqu = [] # list of thresholds recorded emptySymbols = [] # list of all empty symbols clusteringSolution = UPGMA(project, symbols, True, nbIteration, minEquivalence, doInternalSlick, defaultFormat, self.unitSize, self.cb_status) t1 = time.time() self.result = clusteringSolution.executeClustering() # We optionally handle orphans if doOrphanReduction: self.result = clusteringSolution.executeOrphanReduction() self.result.extend(preResults) self.result.extend(emptySymbols) # Add the empty symbols (To discuss: can we delete them before?) logging.info("Time of clustering : " + str(time.time() - t1))