def concatSymbolButton_clicked_cb(self, toolButton): if self.getCurrentProject() is None: NetzobErrorMessage(_("No project selected.")) return # retrieve the checked symbols symbols = self.view.getCheckedSymbolList() # Create a new symbol newSymbol = Symbol(str(uuid.uuid4()), "Merged", self.getCurrentProject()) # fetch all their messages for sym in symbols: newSymbol.addMessages(sym.getMessages()) #delete all selected symbols self.view.emptyMessageTableDisplayingSymbols(symbols) for sym in symbols: self.getCurrentProject().getVocabulary().removeSymbol(sym) #add the concatenate symbol self.getCurrentProject().getVocabulary().addSymbol(newSymbol) #refresh view self.view.updateLeftPanel()
def __init__(self, project, symbols, explodeSymbols, nbIteration, minEquivalence, doInternalSlick, defaultFormat, unitSize, cb_status=None, scores={}): self.project = project self.nbIteration = nbIteration self.minEquivalence = minEquivalence self.doInternalSlick = doInternalSlick self.cb_status = cb_status self.defaultFormat = defaultFormat self.unitSize = unitSize self.log = logging.getLogger('netzob.Inference.Vocabulary.UPGMA.py') self.scores = scores self.path = [] if explodeSymbols == False: self.symbols = symbols else: # Create a symbol for each message self.symbols = [] i_symbol = 1 for symbol in symbols: for m in symbol.getMessages(): tmpSymbol = Symbol(str(uuid.uuid4()), "Symbol " + str(i_symbol), project) tmpSymbol.addMessage(m) self.symbols.append(tmpSymbol) i_symbol += 1 self.log.debug("A number of {0} already aligned symbols will be clustered.".format(str(len(symbols))))
def __init__(self, project, symbols, explodeSymbols, doInternalSlick, defaultFormat, unitSize, cb_status=None): self.symbols = symbols self.project = project self.doInternalSlick = doInternalSlick self.cb_status = cb_status self.defaultFormat = defaultFormat self.unitSize = unitSize self.log = logging.getLogger( 'netzob.Inference.Vocabulary.AsciiAlign.py') self.server = "" if explodeSymbols is False: self.symbols = symbols else: #Create a symbol for each message and reset to constant the tokens self.symbols = [] i_symbol = 1 for symbol in symbols: for m in symbol.getMessages(): for t in m.getPattern()[1]: t.setType("constant") tmpSymbol = Symbol( str(uuid.uuid4()), "Symbol " + str(i_symbol), project) tmpSymbol.setPattern(m.getPattern()) tmpSymbol.addMessage(m) self.symbols.append(tmpSymbol) i_symbol += 1
def __init__(self, project, symbols, unitSize, cb_status=None, scores={}): self.project = project self.unitSize = unitSize self.cb_status = cb_status self.scores = scores # Then we retrieve all the parameters of the CLUSTERING / ALIGNMENT self.defaultFormat = self.project.getConfiguration().getVocabularyInferenceParameter(ProjectConfiguration.VOCABULARY_GLOBAL_FORMAT) self.nbIteration = self.project.getConfiguration().getVocabularyInferenceParameter(ProjectConfiguration.VOCABULARY_NB_ITERATION) self.minEquivalence = self.project.getConfiguration().getVocabularyInferenceParameter(ProjectConfiguration.VOCABULARY_EQUIVALENCE_THRESHOLD) self.doInternalSlick = self.project.getConfiguration().getVocabularyInferenceParameter(ProjectConfiguration.VOCABULARY_DO_INTERNAL_SLICK) self.log = logging.getLogger('netzob.Inference.Vocabulary.UPGMA.py') self.path = [] self.flagStop = False self.currentAlignment = None # Create a symbol for each message self.symbols = [] i_symbol = 1 for symbol in symbols: for m in symbol.getMessages(): tmpSymbol = Symbol(str(uuid.uuid4()), "Symbol " + str(i_symbol), project) tmpSymbol.addMessage(m) self.symbols.append(tmpSymbol) i_symbol += 1 self.log.debug("A number of {0} already aligned symbols will be clustered.".format(str(len(symbols))))
def test_semanticAlignment_bug1(self): """test_semanticAlignment_bug1: A bug on the semantic alignment has been identified which prevent the computation of a valid regex. This test verifies the bug is not comming back. @date 18/04/2013 """ firstname1 = "antoine" email1 = "*****@*****.**" firstname2 = "luc" email2 = "*****@*****.**" msg1 = RawMessage(uuid.uuid4(), None, TypeConvertor.stringToNetzobRaw("6" + firstname1 + "GAHFSHQS" + email1)) msg2 = RawMessage(uuid.uuid4(), None, TypeConvertor.stringToNetzobRaw("3" + firstname2 + "CVSDHISD" + email2)) project = Project(uuid.uuid4(), "Experiment", datetime.now(), "") nwEngine = NeedlemanAndWunsch(8, project, False, None) symbol = Symbol(uuid.uuid4(), "Test", project) symbol.addMessages([msg1, msg2]) msg1.addSemanticTag("firstname", 2, 2 + len(firstname1) * 2) msg1.addSemanticTag("email", 2 + len(firstname1) * 2 + 16, 2 + len(firstname1) * 2 + 16 + len(email1) * 2) msg2.addSemanticTag("firstname", 2, 2 + len(firstname2) * 2) msg2.addSemanticTag("email", 2 + len(firstname2) * 2 + 16, 2 + len(firstname2) * 2 + 16 + len(email2) * 2) nwEngine.alignField(symbol.getField()) symbol.getField().setFormat(Format.STRING) print("Computed Regex : {0}".format(symbol.getRegex())) print(symbol.getCells(True)) computedFields = symbol.getExtendedFields() self.assertTrue(len(computedFields) > 1, "Only one field has been computed which tells us something went wrong.")
def traceImportInProjectAction_activate_cb(self, button): """This callback is called by the 'ImportInProjectAction'. It allows to import a trace or a session in the currently opened project. FIXME: the code doesn't allow to remove duplicated messages. """ project = self.mainController.getCurrentProject() # We are only able to import a trace if a project is open. if project is None: self.view.showErrorMessage( _("No project is currently opened"), _("A project has to be opened before importing a trace.")) return response = self.view.showImportInProjectDialog() if response == 1: self.log.info( "Asked to import trace '{0}' (id={1}) in the current project". format(self.currentTrace.name, self.currentTrace.id)) symbolName = self.view.importInProjectNameEntry.get_text() if self.traceSelectionIsATrace: messages = self.currentTrace.getMessages() sessions = self.currentTrace.getSessions() else: model, path = self.view.traceTreeview.get_selection( ).get_selected_rows() # UI prevents from importing multiple sessions at a # time. assert len(path) == 1 session = self.currentTrace.getSession(model[path.pop()][0]) sessions = [session] messages = session.getMessages() # We add a new Symbol symbol = Symbol(str(uuid.uuid4()), symbolName, project) # We register each message in the vocabulary of the project for message in messages: project.getVocabulary().addMessage(message) symbol.addMessage(message) project.getVocabulary().addSymbol(symbol) for session in sessions: project.getVocabulary().addSession(session)
def traceImportInProjectAction_activate_cb(self, button): """This callback is called by the 'ImportInProjectAction'. It allows to import a trace or a session in the currently opened project. FIXME: the code doesn't allow to remove duplicated messages. """ project = self.mainController.getCurrentProject() # We are only able to import a trace if a project is open. if project is None: self.view.showErrorMessage(_("No project is currently opened"), _("A project has to be opened before importing a trace.")) return response = self.view.showImportInProjectDialog() if response == 1: self.log.info("Asked to import trace '{1}' (id={2}) in the current project".format(self.currentTrace.name, self.currentTrace.id)) symbolName = self.view.importInProjectNameEntry.get_text() if self.traceSelectionIsATrace: messages = self.currentTrace.getMessages() sessions = self.currentTrace.getSessions() else: model, path = self.view.traceTreeview.get_selection().get_selected_rows() # UI prevents from importing multiple sessions at a # time. assert len(path) == 1 session = self.currentTrace.getSession(model[path.pop()][0]) sessions = [session] messages = session.getMessages() # We add a new Symbol symbol = Symbol(str(uuid.uuid4()), symbolName, project) # We register each message in the vocabulary of the project for message in messages: project.getVocabulary().addMessage(message) symbol.addMessage(message) project.getVocabulary().addSymbol(symbol) for session in sessions: project.getVocabulary().addSession(session)
def mergeEffectiveRowCol(self, i_maximum, j_maximum): # Extract symbols i and j if i_maximum > j_maximum: symbol1 = self.symbols.pop(i_maximum) symbol2 = self.symbols.pop(j_maximum) else: symbol1 = self.symbols.pop(j_maximum) symbol2 = self.symbols.pop(i_maximum) # Merge the symbols i and j messages = [] messages.extend(symbol1.getMessages()) messages.extend(symbol2.getMessages()) newSymbol = Symbol(str(uuid.uuid4()), symbol1.getName(), self.project) newSymbol.setPattern(self.mergePattern(symbol1.getPattern(), symbol2.getPattern())) # self.log.debug("Patterns to merge: {0} with {1}: Give Result {2}".format(symbol1.getPatternString(), symbol2.getPatternString(), newSymbol.getPatternString())) newSymbol.setPattern( self.mergePattern(symbol1.getPattern(), symbol2.getPattern())) # self.log.debug("Patterns to merge: {0} with {1}: Give Result {2}".format(symbol1.getPatternString(), symbol2.getPatternString(), newSymbol.getPatternString())) for message in messages: newSymbol.addMessage(message) # Append th new symbol to the "symbols" structure self.symbols.append(newSymbol)
def test_deserialisationGroups(self): print("start") symbols = [] nbSymbol = random.randint(2, 50) for iSymbol in range(0, nbSymbol): # We create 6 messages of 2 group originalSymbol = Symbol(str(uuid.uuid4()), "TestSymbol", None) # group1 message1 = RawMessage(str(uuid.uuid4()), str(time.time()), TypeConvertor.stringToNetzobRaw("bonjour " + self.generateRandomString(20, 30) + " comment vas-tu ?")) message2 = RawMessage(str(uuid.uuid4()), str(time.time()), TypeConvertor.stringToNetzobRaw("bonjour " + self.generateRandomString(20, 30) + " comment vas-tu ?")) message3 = RawMessage(str(uuid.uuid4()), str(time.time()), TypeConvertor.stringToNetzobRaw("bonjour " + self.generateRandomString(20, 30) + " comment vas-tu ?")) # group2 message4 = RawMessage(str(uuid.uuid4()), str(time.time()), TypeConvertor.stringToNetzobRaw("salut à toi " + self.generateRandomString(10, 15) + " what's up ?")) message5 = RawMessage(str(uuid.uuid4()), str(time.time()), TypeConvertor.stringToNetzobRaw("salut à toi " + self.generateRandomString(10, 15) + " what's up ?")) message6 = RawMessage(str(uuid.uuid4()), str(time.time()), TypeConvertor.stringToNetzobRaw("salut à toi " + self.generateRandomString(10, 15) + " what's up ?")) originalSymbol.addMessage(message1) originalSymbol.addMessage(message2) originalSymbol.addMessage(message3) originalSymbol.addMessage(message4) originalSymbol.addMessage(message5) originalSymbol.addMessage(message6) symbols.append(originalSymbol) # Start the clustering clusteringSolution = UPGMA(None, [originalSymbol], True, 100, 90, True) result = clusteringSolution.deserializeGroups(symbols) self.assertEqual(result, len(symbols))
def importTraceInProject(self, trace, project): percent = 0.0 inc = 1.0 / len(trace.getMessages()) # We create a symbol dedicated for the trace symbol = Symbol(uuid.uuid4(), trace.getType(), project) for message in trace.getMessages(): percent += inc symbol.addMessage(message) gobject.idle_add(self.progressbarAlignment.set_fraction, float(percent)) # We create a default field for the symbol symbol.addField(Field.createDefaultField()) # and register the symbol in the vocabulary of the project project.getVocabulary().addSymbol(symbol) project.saveConfigFile(self.workspace)
def createSymbolButton_clicked_cb(self, toolButton): if self.getCurrentProject() is None: NetzobErrorMessage(_("No project selected.")) return builder2 = Gtk.Builder() builder2.add_from_file( os.path.join(ResourcesConfiguration.getStaticResources(), "ui", "dialogbox.glade")) dialog = builder2.get_object("createsymbol") dialog.set_transient_for(self.netzob.view.mainWindow) # Disable apply button if no text applybutton = builder2.get_object("button1") entry = builder2.get_object("entry1") entry.connect("changed", self.entry_disableButtonIfEmpty_cb, applybutton) result = dialog.run() if (result == 0): newSymbolName = entry.get_text() newSymbolId = str(uuid.uuid4()) self.log.debug( "A new symbol will be created with the given name: {0}".format( newSymbolName)) currentProject = self.netzob.getCurrentProject() newSymbol = Symbol(newSymbolId, newSymbolName, currentProject) currentProject.getVocabulary().addSymbol(newSymbol) self.view.updateLeftPanel() dialog.destroy() if (result == 1): dialog.destroy()
def loadVocabulary(xmlRoot, namespace_project, namespace_common, version, project): vocabulary = Vocabulary() if version == "0.1": # Messages for xmlMessage in xmlRoot.findall("{" + namespace_project + "}messages/{" + namespace_common + "}message"): message = AbstractMessageFactory.loadFromXML( xmlMessage, namespace_common, version) if message is not None: vocabulary.addMessage(message) # Symbols for xmlSymbol in xmlRoot.findall("{" + namespace_project + "}symbols/{" + namespace_project + "}symbol"): symbol = Symbol.loadSymbol(xmlSymbol, namespace_project, namespace_common, version, project, vocabulary) if symbol is not None: vocabulary.addSymbol(symbol) # Sessions for xmlSession in xmlRoot.findall("{" + namespace_project + "}sessions/{" + namespace_common + "}session"): session = Session.loadFromXML(xmlSession, namespace_project, namespace_common, version, vocabulary) if session is not None: vocabulary.addSession(session) return vocabulary
def mergeEffectiveRowCol(self, i_maximum, j_maximum): """Merge the symbols i and j in the "symbols" structure @param i_maximum: id of the first symbol to merge @param j_maximum: id of the second symbol to merge @return the newly created symbol result of the merged process""" # Extract symbols i and j if i_maximum > j_maximum: symbol1 = self.symbols.pop(i_maximum) symbol2 = self.symbols.pop(j_maximum) else: symbol1 = self.symbols.pop(j_maximum) symbol2 = self.symbols.pop(i_maximum) # Merge the symbols i and j messages = [] messages.extend(symbol1.getMessages()) messages.extend(symbol2.getMessages()) newSymbol = Symbol(str(uuid.uuid4()), symbol1.getName(), self.project) newSymbol.setMinEqu(self.minEquivalence) for message in messages: newSymbol.addMessage(message) # Append th new symbol to the "symbols" structure self.symbols.append(newSymbol) return newSymbol.getID()
def test_randomAlignmentsWithTwoCenteredMessages(self): workspace = self.getWorkspace() currentProject = workspace.getProjects()[0] doInternalSlick = currentProject.getConfiguration().getVocabularyInferenceParameter(ProjectConfiguration.VOCABULARY_DO_INTERNAL_SLICK) defaultFormat = currentProject.getConfiguration().getVocabularyInferenceParameter(ProjectConfiguration.VOCABULARY_GLOBAL_FORMAT) defaultUnitSize = 8 # We generate 1000 random couples of data and try to align them # Objectives: just test if it executes nb_data = 1000 nb_failed = 0 nb_success = 0 for i_test in range(0, nb_data): common_pattern = self.generateRandomString(30, 40) # Generate the content of two messages data1 = TypeConvertor.stringToNetzobRaw(self.generateRandomString(5, 100) + common_pattern + self.generateRandomString(5, 100)) data2 = TypeConvertor.stringToNetzobRaw(self.generateRandomString(5, 100) + common_pattern + self.generateRandomString(5, 100)) # Create the messages message1 = RawMessage(str(uuid.uuid4()), str(time.time()), data1) message2 = RawMessage(str(uuid.uuid4()), str(time.time()), data2) # Create the symbol symbol = Symbol(str(uuid.uuid4()), "test_randomAlignments#" + str(i_test), currentProject) symbol.addMessage(message1) symbol.addMessage(message2) field = symbol.getField() # Starts the alignment process alignmentProcess = NeedlemanAndWunsch(defaultUnitSize, currentProject, False, self.emptyAlignmentCB) alignmentProcess.alignField(field) if not TypeConvertor.stringToNetzobRaw(common_pattern[:]) in field.getAlignment(): if self.debug is True: print "Message 1: " + str(data1) print "Message 2: " + str(data2) print "Common pattern: " + TypeConvertor.stringToNetzobRaw(common_pattern) print "Alignment: " + field.getAlignment() nb_failed += 1 else: nb_success += 1 if nb_failed > 0: print "A number of " + str(nb_failed) + "/" + str(nb_data) + " alignment failed !" self.assertEqual(0, nb_failed) self.assertEqual(nb_success, nb_data)
def test_deserialisationGroups(self): print "start" symbols = [] nbSymbol = random.randint(2, 50) for iSymbol in range(0, nbSymbol): # We create 6 messages of 2 group originalSymbol = Symbol(str(uuid.uuid4()), "TestSymbol", None) # group1 message1 = RawMessage(str(uuid.uuid4()), str(time.time()), TypeConvertor.stringToNetzobRaw("bonjour " + self.generateRandomString(20, 30) + " comment vas-tu ?")) message2 = RawMessage(str(uuid.uuid4()), str(time.time()), TypeConvertor.stringToNetzobRaw("bonjour " + self.generateRandomString(20, 30) + " comment vas-tu ?")) message3 = RawMessage(str(uuid.uuid4()), str(time.time()), TypeConvertor.stringToNetzobRaw("bonjour " + self.generateRandomString(20, 30) + " comment vas-tu ?")) # group2 message4 = RawMessage(str(uuid.uuid4()), str(time.time()), TypeConvertor.stringToNetzobRaw("salut à toi " + self.generateRandomString(10, 15) + " what's up ?")) message5 = RawMessage(str(uuid.uuid4()), str(time.time()), TypeConvertor.stringToNetzobRaw("salut à toi " + self.generateRandomString(10, 15) + " what's up ?")) message6 = RawMessage(str(uuid.uuid4()), str(time.time()), TypeConvertor.stringToNetzobRaw("salut à toi " + self.generateRandomString(10, 15) + " what's up ?")) originalSymbol.addMessage(message1) originalSymbol.addMessage(message2) originalSymbol.addMessage(message3) originalSymbol.addMessage(message4) originalSymbol.addMessage(message5) originalSymbol.addMessage(message6) symbols.append(originalSymbol) # Start the clustering clusteringSolution = UPGMA(None, [originalSymbol], True, 100, 90, True) result = clusteringSolution.deserializeGroups(symbols) self.assertEqual(result, len(symbols))
def mergeEffectiveRowCol(self, i_maximum, j_maximum): # Extract symbols i and j if i_maximum > j_maximum: symbol1 = self.symbols.pop(i_maximum) symbol2 = self.symbols.pop(j_maximum) else: symbol1 = self.symbols.pop(j_maximum) symbol2 = self.symbols.pop(i_maximum) # Merge the symbols i and j messages = [] messages.extend(symbol1.getMessages()) messages.extend(symbol2.getMessages()) newSymbol = Symbol(str(uuid.uuid4()), symbol1.getName(), self.project, pattern=self.mergePattern(symbol1.getPattern(), symbol2.getPattern())) # self.log.debug("Patterns to merge: {0} with {1}: Give Result {2}".format(symbol1.getPatternString(), symbol2.getPatternString(), newSymbol.getPatternString())) for message in messages: newSymbol.addMessage(message) # Append th new symbol to the "symbols" structure self.symbols.append(newSymbol)
def __init__(self, project, symbols, explodeSymbols, doInternalSlick, defaultFormat, unitSize, cb_status=None): self.symbols = symbols self.project = project self.doInternalSlick = doInternalSlick self.cb_status = cb_status self.defaultFormat = defaultFormat self.unitSize = unitSize self.log = logging.getLogger( 'netzob.Inference.Vocabulary.AsciiAlign.py') self.server = "" if explodeSymbols is False: self.symbols = symbols else: #Create a symbol for each message and reset to constant the tokens self.symbols = [] i_symbol = 1 for symbol in symbols: for m in symbol.getMessages(): for t in m.getPattern()[1]: t.setType("constant") tmpSymbol = Symbol(str(uuid.uuid4()), "Symbol " + str(i_symbol), project) tmpSymbol.setPattern(m.getPattern()) tmpSymbol.addMessage(m) self.symbols.append(tmpSymbol) i_symbol += 1
def mergeEffectiveRowCol(self, i_maximum, j_maximum): # Extract symbols i and j if i_maximum > j_maximum: symbol1 = self.symbols.pop(i_maximum) symbol2 = self.symbols.pop(j_maximum) else: symbol1 = self.symbols.pop(j_maximum) symbol2 = self.symbols.pop(i_maximum) # Merge the symbols i and j messages = [] messages.extend(symbol1.getMessages()) messages.extend(symbol2.getMessages()) newSymbol = Symbol(str(uuid.uuid4()), symbol1.getName(), self.project, minEqu=self.minEquivalence) for message in messages: newSymbol.addMessage(message) # Append th new symbol to the "symbols" structure self.symbols.append(newSymbol) return newSymbol.getID()
def test_semanticAlignment_bug1(self): """test_semanticAlignment_bug1: A bug on the semantic alignment has been identified which prevent the computation of a valid regex. This test verifies the bug is not comming back. @date 18/04/2013 """ firstname1 = "antoine" email1 = "*****@*****.**" firstname2 = "luc" email2 = "*****@*****.**" msg1 = RawMessage(uuid.uuid4(), None, TypeConvertor.stringToNetzobRaw("6" + firstname1 + "GAHFSHQS" + email1)) msg2 = RawMessage(uuid.uuid4(), None, TypeConvertor.stringToNetzobRaw("3" + firstname2 + "CVSDHISD" + email2)) project = Project(uuid.uuid4(), "Experiment", datetime.now(), "") nwEngine = NeedlemanAndWunsch(8, project, False, None) symbol = Symbol(uuid.uuid4(), "Test", project) symbol.addMessages([msg1, msg2]) msg1.addSemanticTag("firstname", 2, 2 + len(firstname1) * 2) msg1.addSemanticTag("email", 2 + len(firstname1) * 2 + 16, 2 + len(firstname1) * 2 + 16 + len(email1) * 2) msg2.addSemanticTag("firstname", 2, 2 + len(firstname2) * 2) msg2.addSemanticTag("email", 2 + len(firstname2) * 2 + 16, 2 + len(firstname2) * 2 + 16 + len(email2) * 2) nwEngine.alignField(symbol.getField()) symbol.getField().setFormat(Format.STRING) print "Computed Regex : {0}".format(symbol.getRegex()) print "=======" print symbol.getCells(True) computedFields = symbol.getExtendedFields() self.assertTrue(len(computedFields) > 1, "Only one field has been computed which tells us something went wrong.")
def loadVocabulary(xmlRoot, namespace_project, namespace_common, version, project): vocabulary = Vocabulary() if version == "0.1": # Messages for xmlMessage in xmlRoot.findall("{" + namespace_project + "}messages/{" + namespace_common + "}message"): message = AbstractMessageFactory.loadFromXML(xmlMessage, namespace_common, version) if message is not None: vocabulary.addMessage(message) # Symbols for xmlSymbol in xmlRoot.findall("{" + namespace_project + "}symbols/{" + namespace_project + "}symbol"): symbol = Symbol.loadSymbol(xmlSymbol, namespace_project, namespace_common, version, project, vocabulary) if symbol is not None: vocabulary.addSymbol(symbol) # Sessions for xmlSession in xmlRoot.findall("{" + namespace_project + "}sessions/{" + namespace_common + "}session"): session = Session.loadFromXML(xmlSession, namespace_project, namespace_common, version, vocabulary) if session is not None: vocabulary.addSession(session) return vocabulary
def importButton_clicked_cb(self, widget): """Callback executed when the user wants to import messages""" if self.currentProject is None: self.log.error("No project is open") return # retrieve symbol name symbolName = self._view.nameOfCreatedSymbolEntry.get_text() if symbolName is None or len(symbolName) < 1: self.displayErrorMessage(_("Specify the name of new symbol")) return found = False for symbol in self.currentProject.getVocabulary().getSymbols(): if symbol.getName() == symbolName: found = True break if found: self.displayErrorMessage(_("The provided symbol name already exists.")) return # Should we consider meta datas of excluded messages if self._view.removeDuplicatedMessagesCheckButton.get_active() and self._view.keepPropertiesOfDuplicatedMessagesCheckButton.get_active(): # Retrieve the 'excluded' messages and retrieve their properties for message in self.excludedMessages: # search for an included message to register properties eq_message = None for importedMessage in self.importedMessages: if importedMessage.getStringData() == message.getStringData(): eq_message = importedMessage break if eq_message is not None: for property in message.getProperties(): eq_message.addExtraProperty(property) # We create a session with each message session = Session(str(uuid.uuid4()), "Session 1", "") for message in self.importedMessages: session.addMessage(message) # We register the session in the vocabulary of the project self.currentProject.getVocabulary().addSession(session) # We register each message in the vocabulary of the project for message in self.importedMessages: self.currentProject.getVocabulary().addMessage(message) message.setSession(session) # We create a default symbol dedicated for this symbol = Symbol(str(uuid.uuid4()), symbolName, self.currentProject) for message in self.importedMessages: symbol.addMessage(message) # We register the symbol in the vocabulary of the project self.currentProject.getVocabulary().addSymbol(symbol) # Add the environmental dependencies to the project # if fetchEnv: # project.getConfiguration().setVocabularyInferenceParameter(ProjectConfiguration.VOCABULARY_ENVIRONMENTAL_DEPENDENCIES, # self.envDeps.getEnvData()) # Computes current date date = datetime.now() description = "No description (yet not implemented)" # We also save the session and the messages in the workspace trace = ImportedTrace(str(uuid.uuid4()), date, self.importType, description, self.currentProject.getName()) trace.addSession(session) for message in self.importedMessages: trace.addMessage(message) self.currentWorkspace.addImportedTrace(trace) # Now we save the workspace self.currentWorkspace.saveConfigFile() self._view.destroy() if self.finish_cb is not None: GObject.idle_add(self.finish_cb)
def test_executingClusteringWithOrphanReduction(self): # We create 6 messages of 2 group # group1 originalSymbol1 = Symbol(str(uuid.uuid4()), "TestSymbol", None) message1 = RawMessage(str(uuid.uuid4()), str(time.time()), TypeConvertor.stringToNetzobRaw("bonjour " + self.generateRandomString(200, 1000))) originalSymbol1.addMessage(message1) originalSymbol2 = Symbol(str(uuid.uuid4()), "TestSymbol2", None) message2 = RawMessage(str(uuid.uuid4()), str(time.time()), TypeConvertor.stringToNetzobRaw("bonjour " + self.generateRandomString(200, 1000))) originalSymbol2.addMessage(message2) originalSymbol3 = Symbol(str(uuid.uuid4()), "TestSymbol3", None) message3 = RawMessage(str(uuid.uuid4()), str(time.time()), TypeConvertor.stringToNetzobRaw("bonjour " + self.generateRandomString(200, 1000))) originalSymbol3.addMessage(message3) # group2 originalSymbol4 = Symbol(str(uuid.uuid4()), "TestSymbol4", None) message4 = RawMessage(str(uuid.uuid4()), str(time.time()), TypeConvertor.stringToNetzobRaw("salut " + self.generateRandomString(200, 1000))) originalSymbol4.addMessage(message4) originalSymbol5 = Symbol(str(uuid.uuid4()), "TestSymbol5", None) message5 = RawMessage(str(uuid.uuid4()), str(time.time()), TypeConvertor.stringToNetzobRaw("salut " + self.generateRandomString(200, 1000))) originalSymbol5.addMessage(message5) originalSymbol6 = Symbol(str(uuid.uuid4()), "TestSymbol6", None) message6 = RawMessage(str(uuid.uuid4()), str(time.time()), TypeConvertor.stringToNetzobRaw("salut " + self.generateRandomString(200, 1000))) originalSymbol6.addMessage(message6) symbols = [originalSymbol1, originalSymbol2, originalSymbol3, originalSymbol4, originalSymbol5, originalSymbol6] # Start the clustering clusteringSolution = UPGMA(None, symbols, True, 100, 80, True, Format.ASCII) resultBeforeOrphan = clusteringSolution.executeClustering() resultAfterOrphan = clusteringSolution.executeOrphanReduction() if (len(resultAfterOrphan) < len(resultBeforeOrphan)): print "Before Orphan Reduction: " for symbol in resultBeforeOrphan: print "Symbol: " + str(symbol.getName()) for m in symbol.getMessages(): print " + " + str(m.getStringData()) print "After Orphan Reduction: " for symbol in resultAfterOrphan: print "Symbol: " + str(symbol.getName()) for m in symbol.getMessages(): print " + " + str(m.getStringData()) self.assertGreaterEqual(len(resultBeforeOrphan), len(resultAfterOrphan))
def test_executingClustering(self): # We create 6 messages of 2 group # group1 originalSymbol1 = Symbol(str(uuid.uuid4()), "TestSymbol", None) message1 = RawMessage(str(uuid.uuid4()), str(time.time()), TypeConvertor.stringToNetzobRaw("bonjour " + self.generateRandomString(20, 30) + " comment vas-tu ?")) originalSymbol1.addMessage(message1) originalSymbol2 = Symbol(str(uuid.uuid4()), "TestSymbol2", None) message2 = RawMessage(str(uuid.uuid4()), str(time.time()), TypeConvertor.stringToNetzobRaw("bonjour " + self.generateRandomString(20, 30) + " comment vas-tu ?")) originalSymbol2.addMessage(message2) originalSymbol3 = Symbol(str(uuid.uuid4()), "TestSymbol3", None) message3 = RawMessage(str(uuid.uuid4()), str(time.time()), TypeConvertor.stringToNetzobRaw("bonjour " + self.generateRandomString(20, 30) + " comment vas-tu ?")) originalSymbol3.addMessage(message3) # group2 originalSymbol4 = Symbol(str(uuid.uuid4()), "TestSymbol4", None) message4 = RawMessage(str(uuid.uuid4()), str(time.time()), TypeConvertor.stringToNetzobRaw("salut à toi " + self.generateRandomString(10, 15) + " what's up ?")) originalSymbol4.addMessage(message4) originalSymbol5 = Symbol(str(uuid.uuid4()), "TestSymbol5", None) message5 = RawMessage(str(uuid.uuid4()), str(time.time()), TypeConvertor.stringToNetzobRaw("salut à toi " + self.generateRandomString(10, 15) + " what's up ?")) originalSymbol5.addMessage(message5) originalSymbol6 = Symbol(str(uuid.uuid4()), "TestSymbol6", None) message6 = RawMessage(str(uuid.uuid4()), str(time.time()), TypeConvertor.stringToNetzobRaw("salut à toi " + self.generateRandomString(10, 15) + " what's up ?")) originalSymbol6.addMessage(message6) symbols = [originalSymbol1, originalSymbol2, originalSymbol3, originalSymbol4, originalSymbol5, originalSymbol6] # Start the clustering clusteringSolution = UPGMA(None, symbols, True, 100, 90, True, Format.ASCII) result = clusteringSolution.executeClustering() for symbol in result: print "Symbol: " + str(symbol.getName()) for m in symbol.getMessages(): print " + " + str(m.getStringData())
def importButton_clicked_cb(self, widget): """Callback executed when the user wants to import messages""" if self.currentProject is None: self.log.error("No project is open") return # retrieve symbol name symbolName = self._view.nameOfCreatedSymbolEntry.get_text() if symbolName is None or len(symbolName) < 1: self.displayErrorMessage(_("Specify the name of new symbol")) return found = False for symbol in self.currentProject.getVocabulary().getSymbols(): if symbol.getName() == symbolName: found = True break if found: self.displayErrorMessage( _("The provided symbol name already exists.")) return # Should we consider meta datas of excluded messages if self._view.removeDuplicatedMessagesCheckButton.get_active( ) and self._view.keepPropertiesOfDuplicatedMessagesCheckButton.get_active( ): # Retrieve the 'excluded' messages and retrieve their properties for message in self.excludedMessages: # search for an included message to register properties eq_message = None for importedMessage in self.importedMessages: if importedMessage.getStringData( ) == message.getStringData(): eq_message = importedMessage break if eq_message is not None: for property in message.getProperties(): eq_message.addExtraProperty(property) # We create a session with each message session = Session(str(uuid.uuid4()), "Session 1", "") for message in self.importedMessages: session.addMessage(message) # We register the session in the vocabulary of the project self.currentProject.getVocabulary().addSession(session) # We register each message in the vocabulary of the project for message in self.importedMessages: self.currentProject.getVocabulary().addMessage(message) message.setSession(session) # We create a default symbol dedicated for this symbol = Symbol(str(uuid.uuid4()), symbolName, self.currentProject) for message in self.importedMessages: symbol.addMessage(message) # We register the symbol in the vocabulary of the project self.currentProject.getVocabulary().addSymbol(symbol) # Add the environmental dependencies to the project # if fetchEnv: # project.getConfiguration().setVocabularyInferenceParameter(ProjectConfiguration.VOCABULARY_ENVIRONMENTAL_DEPENDENCIES, # self.envDeps.getEnvData()) # Computes current date date = datetime.now() description = "No description (yet not implemented)" # We also save the session and the messages in the workspace trace = ImportedTrace(str(uuid.uuid4()), date, self.importType, description, self.currentProject.getName()) trace.addSession(session) for message in self.importedMessages: trace.addMessage(message) self.currentWorkspace.addImportedTrace(trace) # Now we save the workspace self.currentWorkspace.saveConfigFile() self._view.destroy() if self.finish_cb is not None: GObject.idle_add(self.finish_cb)
def test_semanticAlignment_simple(self): """test_semanticAlignment_simple: Test that messages with embedded semantic are efficiently aligned. Format : <random 10 bytes><random username><random 5 ASCII><random email> Optimal Needleman & Wunsch Parameters : // Cost definitions for the alignment static const short int MATCH = 5; static const short int SEMANTIC_MATCH = 30; static const short int MISMATCH = -5; static const short int GAP = 0; static const short int BLEN = 10; // Consts for the definition of a mask static const unsigned char END = 2; static const unsigned char DIFFERENT = 1; static const unsigned char EQUAL = 0; """ project = Project(uuid.uuid4(), "Experiment", datetime.now(), "") symbol = Symbol(uuid.uuid4(), "Test", project) nbMessage = 500 usernames = [] emails = [] for iMessage in range(0, nbMessage): str_username = self.generateRandomString(4, 10) username = TypeConvertor.stringToNetzobRaw(str_username) usernames.append(str_username) email_prefix = self.generateRandomString(4, 10) email_domain = self.generateRandomString(4, 10) email_extension = self.generateRandomString(2, 3) str_email = "{0}@{1}.{2}".format(email_prefix, email_domain, email_extension) emails.append(str_email) email = TypeConvertor.stringToNetzobRaw(str_email) random10Bytes = self.generateRandomBytes(10, 10) random5ASCII = TypeConvertor.stringToNetzobRaw(self.generateRandomString(5, 5)) data = "{0}{1}{2}{3}".format(random10Bytes, username, random5ASCII, email) message = RawMessage(uuid.uuid4(), None, data) message.addSemanticTag("username", len(random10Bytes), len(random10Bytes) + len(username)) message.addSemanticTag("email", len(random10Bytes) + len(username) + len(random5ASCII), len(random10Bytes) + len(username) + len(random5ASCII) + len(email)) symbol.addMessage(message) nwEngine = NeedlemanAndWunsch(8, project, False, None) nwEngine.alignField(symbol.getField()) symbol.getField().setFormat(Format.STRING) print("Number of computed fields : {0}".format(len(symbol.getExtendedFields()))) self.assertEqual(4, len(symbol.getExtendedFields())) nbValidMessages = 0 for message in symbol.getMessages(): isValid = symbol.getField().isRegexValidForMessage(message) if isValid: nbValidMessages += 1 self.assertTrue(isValid) print(symbol.getCells()) print("Computed regex is valid for {0}/{1} messages.".format(nbValidMessages, len(symbol.getMessages())))