def getVisibleNodes(projectName): db = DBInterface() db.connectToDatabase(projectName) visibleStatementTypes = [ 'CustomNode', 'ClassDef', 'DeclByClass', 'DeclByType', 'FunctionDef', 'CompoundStatement', 'Statement', 'DeclStmt', 'StructUnionEnum', 'FunctionPointerDeclare', 'TryStatement', 'CatchStatement', 'IfStatement', 'ElseStatement', 'SwitchStatement', 'ForStatement', 'DoStatement', 'WhileStatement', 'BreakStatement', 'ContinueStatement', 'GotoStatement', 'Label', 'ReturnStatement', 'ThrowStatement', 'ExpressionStatement', 'IdentifierDeclStatement', 'PreIfStatement', 'PreElIfStatement', 'PreElseStatement', 'PreEndIfStatement', 'PreDefine', 'PreUndef', 'PreDiagnostic', 'PreOther', 'PreInclude', 'PreIncludeNext', 'PreLine', 'PrePragma', 'UsingDirective', 'BlockCloser', 'Comment', 'File', 'Directory' ] # Remove unneeded nodes (we need to exclude IdentifierDeclStatement that have a ForInit or StructUnionEnum as parent) query = """g.V().has('type', within(%s)) .not(__.repeat(__.in(AST_EDGE)).emit().has('type', within('ForInit','StructUnionEnum'))) .id()""" % (visibleStatementTypes) result = db.runGremlinQuery(query) # Finally close db connection and release the shell db.runGremlinQuery("quit") return result
class StartTool(CmdLineTool): def __init__(self, DESCRIPTION): CmdLineTool.__init__(self, DESCRIPTION) # @Override def _constructQuery(self): """ Create a query from arguments that will be passed to the database. """ pass # @Override def _handleResult(self, res): """ Process the result of the query. """ pass def _runImpl(self): query = self._constructQuery() self.dbInterface = DBInterface() self.dbInterface.connectToDatabase() res = self.dbInterface.runGremlinQuery(query) self._handleResult(res)
def initialize(): # Get the ids from the SemanticUnit (first line is the projectName) idList = [line.rstrip('\n') for line in open('result.txt')] # Connect to project DB projectName = idList.pop(0) db = DBInterface() db.connectToDatabase(projectName) return [db, idList]
class ChunkStartTool(CmdLineTool): def __init__(self, DESCRIPTION): CmdLineTool.__init__(self, DESCRIPTION) # @Override def _constructIdQuery(self): pass # @Override def _constructQueryForChunk(self, chunk): pass # @Override def handleChunkResult(self, res, chunk): pass # @Override def _start(self): pass def _stop(self): pass def _runImpl(self): self.dbInterface = DBInterface() self.dbInterface.connectToDatabase() self._start() query = self._constructIdQuery() ids = self.dbInterface.runGremlinQuery(query) for chunk in self.dbInterface.chunks(ids, CHUNK_SIZE): query = self._constructQueryForChunk(chunk) res = self.dbInterface.runGremlinQuery(query) self._handleChunkResult(res, chunk) self._stop()
def run(self): if self.args.file != None: f = open(self.args.file, "r") else: f = sys.stdin lines = __class__._parseScript(f) query = "\n".join(lines) db = DBInterface() if self.args.no_json: db.disable_json() db.connectToDatabase(self.args.project) result = db.runGremlinQuery(query) pp = pprint.PrettyPrinter(indent=4, compact=True) for x in result: if self.args.raw: print(repr(x)) elif self.args.pretty: pp.pprint(x) else: print(x) db.runGremlinQuery("quit")
class ProgramGraph(JoernTool): def __init__(self, DESCRIPTION): JoernTool.__init__(self, DESCRIPTION) # @Override def processLine(self, line): self.plot_configuration = PlotConfiguration() f = open(self.args.plot_config, "r") self.plot_configuration.parse(f) labels = self._getLabels() nodes = self._getNodes(int(line), labels) edges = self._getEdges(int(line), labels) G = pgv.AGraph(directed=True, strict=False) self._addNodes(G, nodes) self._addEdges(G, edges) self._outputGraph(G, line) def streamStart(self): self.dbInterface = DBInterface() self.dbInterface.connectToDatabase(self.args.project) def _addNodes(self, G, nodes): for v in nodes: nr = NodeResult(v) label = self._createGraphElementLabel( self.plot_configuration.getElementDisplayItems(nr)) plot_properties = self.plot_configuration.getElementLayout(nr) if label: plot_properties['label'] = label G.add_node(nr.getId(), **plot_properties) def _addEdges(self, G, edges): for e in edges: er = EdgeResult(e) label = self._createGraphElementLabel( self.plot_configuration.getElementDisplayItems(er)) plot_properties = self.plot_configuration.getElementLayout(er) plot_properties['label'] = label G.add_edge(er.getSrc(), er.getDest(), er.getId(), **plot_properties) def _createGraphElementLabel(self, labeldata): return "\n".join( [":".join([str(self._escape(e)) for e in d]) for d in labeldata]) def _escape(self, label): return str(label).replace("\\", "\\\\") def _outputGraph(self, G, identifier): outputString = '//' + identifier + '\n' outputString += str(G) + '\n' outputString += '//###' + '\n' self.output(outputString) def _getLabels(self): labels = ["FLOWS_TO", "USE", "DEF", "IS_AST_PARENT"] if self.args.show_all: return labels if not self.args.show_control_flow: labels.remove("FLOWS_TO") if not self.args.show_data_flow: labels.remove("USE") labels.remove("DEF") if not self.args.show_ast: labels.remove("IS_AST_PARENT") return labels def _getStartNode(self, functionId): if self.args.id_property: startnode = """g.V().has('type', 'Function').has('_key', {})""".format( functionId) else: startnode = """g.V({})""".format(functionId) startnode += """.union( out('IS_FUNCTION_OF_CFG'), out('IS_FUNCTION_OF_AST') )""" return startnode def _getNodes(self, functionId, labels): query = """ {}.repeat(outE({}).subgraph('sg').inV().dedup().simplePath()).cap('sg').next().traversal().V() """.format(self._getStartNode(functionId), ','.join(map(lambda x: "'{}'".format(x), labels))) return self._runGremlinQuery(query) def _getEdges(self, functionId, labels): query = """ {}.repeat(outE({}).subgraph('sg').inV().simplePath()).cap('sg').next().traversal().E() """.format(self._getStartNode(functionId), ','.join(map(lambda x: "'{}'".format(x), labels))) return self._runGremlinQuery(query)
class APIEmbedder(object): def __init__(self): self._initializeDBConnection() def _initializeDBConnection(self): self.dbInterface = DBInterface() def setOutputDirectory(self, directory): self.outputDirectory = directory def run(self): try: # Will throw error if output directory already exists self._initializeOutputDirectory() except: return self._connectToDatabase() functions = self._getAPISymbolsFromDatabase() self._writeDataPoints(functions) self._finalizeOutputDirectory() self._embed() def _embed(self): # self.embedder = SallyBasedEmbedder() self.embedder = Embedder() self.embedder.embed(self.outputDirectory) def _connectToDatabase(self): self.dbInterface.connectToDatabase() def _writeDataPoints(self, functions): for (funcId, symbols) in functions: self.toc.write("%d\n" % (funcId)) self._addDataPoint(symbols) def _addDataPoint(self, symbols): datapointFilename = os.path.join(self.dataDir, str(self.curDatapoint)) f = file(datapointFilename, 'w') f.writelines([x + "\n" for x in symbols]) f.close() self.curDatapoint += 1 def _initializeOutputDirectory(self): directory = self.outputDirectory if os.path.exists(directory): raise self.dataDir = os.path.join(directory, 'data') self.tocFilename = os.path.join(directory, 'TOC') os.makedirs(self.dataDir) self.toc = file(self.tocFilename, 'w') self.curDatapoint = 0 def _finalizeOutputDirectory(self): self.toc.close() def _getAPISymbolsFromDatabase(self): CHUNK_SIZE = 1024 query = """queryNodeIndex('type:Function').id""" functionIds = self._runGremlinQuery(query) result = [] for chunk in self.chunks(functionIds, CHUNK_SIZE): query = """ _().transform{ %s }.scatter().transform{g.v(it)} .sideEffect{funcId = it.id} .transform{ [funcId, it.functionToAPISymbolNodes().code.toList()] } """ % (str(chunk)) result.extend(self._runGremlinQuery(query)) return result def chunks(self, l, n): for i in xrange(0, len(l), n): yield l[i:i + n] def _runGremlinQuery(self, query): return self.dbInterface.runGremlinQuery(query)