class DBContentsProvider: def __init__(self): self._initDatabaseConnection() def _initDatabaseConnection(self): self.j = JoernSteps() self.j.connectToDatabase() self.j.addStepsDir('steps/') def RunGremlinQuery(self, query): results = self.j.runGremlinQuery(query) return results def GetCalleesInfo(self): query = "getCalleeListInfo()" return self.j.runGremlinQuery(query) """ Generate contents for a given selector, overwriting the contents currently held in cndToQueries memory by the server. """ def generate(self, selector): query = """generateTaintLearnStructures(%s.id.toList()) _()""" % (selector) for unused in self.j.runGremlinQuery(query): pass
class DBInterface: def connectToDatabase(self): self.j = JoernSteps() self.j.addStepsDir(JOERN_TOOLS_STEPDIR) self.j.connectToDatabase() def runGremlinQuery(self, query): return self.j.runGremlinQuery(query) def chunks(self, ids, chunkSize): return self.j.chunks(ids, chunkSize)
class DBContentsProvider: def __init__(self): self.j = JoernSteps() self.init_database_connection() def init_database_connection(self): self.j.connectToDatabase() self.j.addStepsDir('steps/') def run_gremlin_query(self, query_script): results = self.j.runGremlinQuery(query_script) return results
def getFunctionSimilarity(): # initialize write file analysis = file(my_constant.FUNC_SIMILAIRTY_FILE_NAME, 'wb') analyze_writer = csv.writer(analysis) analyze_writer.writerow(['func_a', 'func_b', 'similarity']) # initialize python-joern instance joern_instance = JoernSteps() joern_instance.addStepsDir("/data/joern-code/query/") joern_instance.setGraphDbURL("http://localhost:7474/db/data/") # connect to database joern_instance.connectToDatabase() # fetch all function info functions_query = '_().getFunctions()' functions_temp = joern_instance.runGremlinQuery(functions_query)[0] len_func = len(functions_temp) # filter some operator reload functions functions = [] for function in functions_temp: # remove namespace before:: function = my_util.removeNamespace(function) if function == '': continue if not function.startswith("operator ") and [function ] not in functions: functions.append([function]) len_func = len(functions) # compute similarity and write back into file func_similarity_dic = {} word_list_dict = {} for i in range(len_func): for j in range(len_func): if i == j: continue similarity, word_list_dict = computeSim(functions[i], functions[j], word_list_dict) # store back if similarity > 0.5: analyze_writer.writerow( [functions[i][0], functions[j][0], similarity]) func_similarity_dic[(functions[i][0], functions[j][0])] = similarity # close files analysis.close() return func_similarity_dic
class DBContentsProvider: def __init__(self): self.j = JoernSteps() self.init_database_connection() def init_database_connection(self): self.j.connectToDatabase() self.j.addStepsDir('steps/') def run_gremlin_query(self, query_script): results = self.j.runGremlinQuery(query_script) return results # 以下为用户的数据库查询过程 # 功能:从数据库中查询所有的被调用的函数名称 def query_allCallee_name(self): query = """ g.V.has('type','Callee').as('x').code.dedup().back('x').code.toList() """ result = self.run_gremlin_query(query) return result
class ChuckyJoern(CmdLineTool): """ Joern interface used by chucky tools. """ def __init__(self, description): super(ChuckyJoern, self).__init__(description) self._joern = None self.__is_initialized = False def _init_joern_interface(self, step_dir=None): self._joern = JoernSteps() if step_dir: self._joern.addStepsDir(step_dir) self._joern.connectToDatabase() self.__is_initialized = True def run_query(self, query): return self.joern.runGremlinQuery(query) @property def joern(self): if not self.__is_initialized: self._init_joern_interface() return self._joern
#!/usr/bin/env python ############################################################# # A template for feature extraction for functions with joern. # Author: Fabian Yamaguchi ############################################################# from joern.all import JoernSteps j = JoernSteps() j.connectToDatabase() j.addStepsDir('steps/') statementIds = j.runGremlinQuery("queryNodeIndex('type:Function').id") for chunk in j.chunks(statementIds, 256): query = """ idListToNodes(%s).transform{ [it.id, it.name, it.functionToFeatureVec() ] } """ % (chunk) X = j.runGremlinQuery(query) for x in X: print '===' print 'FunctionId: %d' % (x[0]) print 'FunctionName: %s' % (x[1]) print 'Features (list): %s' % (x[2]) print '==='
class DBContentsProvider: def __init__(self): self._initDatabaseConnection() def _initDatabaseConnection(self): self.j = JoernSteps() self.j.connectToDatabase() self.j.addStepsDir('steps/') """ Generate contents for a given selector, overwriting the contents currently held in cndToQueries memory by the server. """ def generate(self, selector): query = """generateTaintLearnStructures(%s.id.toList()) _()""" % (selector) for unused in self.j.runGremlinQuery(query): pass def generateChecksForInvocations(self, invocs): query = """generateChecksForInvocations(%s.toList()) _()""" % (invocs) for unused in self.j.runGremlinQuery(query): pass # Source Analysis def getSourceAPISymbols(self): query = """_().transform{ getSourceAPISymbols() }.scatter() """ return [x for x in self.j.runGremlinQuery(query)] def getAllDefStmtsPerArg(self): query = """_().transform{ getAllDefStmtsPerArg() }.scatter() """ return [x for x in self.j.runGremlinQuery(query)] # Condition Analysis def getAllChecksPerArg(self): query = """_().transform{ getAllChecksPerArg() }.scatter() """ return [x for x in self.j.runGremlinQuery(query)] def getAllConditions(self): query = """_().transform{ getAllConditions() }.scatter() """ return [x for x in self.j.runGremlinQuery(query)] def getAllConditionsCode(self): query = """_().transform{ getAllConditionsCode() }.scatter() """ return [x for x in self.j.runGremlinQuery(query)] def getInvocationCallSiteIds(self): query = """_().transform{ getInvocationCallSites() }.scatter() """ return [x for x in self.j.runGremlinQuery(query)] def getSubConditions(self, nodeId): query = """_().transform{ subConditions(%s) }.scatter() """ % (nodeId) return [x for x in self.j.runGremlinQuery(query)] def getAllCndFeatureVectors(self, invocs=[], argNum=None): if not invocs: if argNum != None: query = """_().transform{ getAllCndFeatureVectors(%d) }.scatter() """ % ( argNum) else: query = """_().transform{ getAllCndFeatureVectors() }.scatter() """ else: if argNum != None: query = """_().transform{ getCndFeatureVectorsForInvocs(%s, %d) }.scatter() """ % ( invocs, argNum) else: query = """_().transform{ getCndFeatureVectorsForInvocs(%s) }.scatter() """ % ( invocs) return [x for x in self.j.runGremlinQuery(query)] def getAllASTNodeLabels(self): query = """_().transform{ getAllASTNodeLabels() }.scatter() """ return [x for x in self.j.runGremlinQuery(query)] # Choosing sinks def getControlledSinks(self, nodeId): query = """_().transform{ getControlledSinks(%s) }.scatter() """ % ( nodeId) return [x for x in self.j.runGremlinQuery(query)]
class ManualCCSearch(object): ''' classdocs ''' UNTRUSTED_DATA = """attacker_sources = [ "_GET", "_POST", "_COOKIE", "_REQUEST", "_ENV", "HTTP_ENV_VARS" ]\n""" SQL_QUERY_FUNCS = """sql_query_funcs = [ "mysql_query", "pg_query", "sqlite_query" ]\n""" # Gremlin operations ORDER_LN = ".order{it.a.lineno <=> it.b.lineno}" # Order by linenumber def __init__(self, port): ''' Constructor ''' self.j = JoernSteps() self.j.setGraphDbURL('http://localhost:%d/db/data/' % (int(port))) # self.j.addStepsDir( # Configurator.getPath(Configurator.KEY_PYTHON_JOERN) + # "/joern/phpjoernsteps" # ) self.j.addStepsDir( Configurator.getPath(Configurator.KEY_BASE_DIR) + "/custom_gremlin_steps" ) self.j.connectToDatabase() # self.QUERIES_DIR = Configurator.getPath(Configurator.BASE_DIR) + \ # "/gremlin_queries" def searchCCOne(self): """ Search for the first vulnerable tutorial (SQL injection from stackoverflow): $user_alcohol_permitted_selection = $_POST['alcohol_check']; //Value sent using jquery .load() $user_social_club_name_input = $_POST['name']; //Value sent using jquery .load() $query="SELECT * FROM social_clubs WHERE name = $user_social_club_name_input"; if ($user_alcohol_permitted_selection != "???") { $query.= "AND WHERE alcohol_permitted = $user_alcohol_permitted_selection"; } """ # construct gremlin query step by step: # 1. Find variable name X of "variable = $_POST[..]" # 2. Go to next statement list. # (3. Find variable name Y of "variable = $_POST[..]" # (4. Go to next statement list. # 5. Find variable name Z and string str1 of "variable = string" # 6. Check if str1 contains regexp "WHERE any_word=$Y". # (7. Go to next statement list.) # (8. Check for if-statement with variable $X.) # 9. Check if variable $Z is extended using string with regexp # "and where any_word=$X" # (10. Check for mysql_query($Z)) # all nodes # query = "g.V(NODE_TYPE, TYPE_STMT_LIST).out" # # # AST_ASSIGN nodes' right side # query += ".rval" query = "g.V" return query def sqlNewIndirect(self): query = self.UNTRUSTED_DATA + self.SQL_QUERY_FUNCS query += open(self.QUERIES_DIR + "sql_new_indirect.query", 'r').read() return query def runQuery(self, query): return query def runTimedQuery(self, myFunction, query=None): start = time.time() res = None try: if query: res = self.j.runGremlinQuery(myFunction(query)) else: res = self.j.runGremlinQuery(myFunction()) except Exception as err: print "Caught exception:", type(err), err elapsed = time.time() - start # print "Query done in %f seconds." % (elapsed) result = [] try: for node in res: print node data = CodeCloneData() data.stripDataFromOutput(node) data.setQueryTime(elapsed) result.append(data) except TypeError: # res is not iterable, because it is one/no node. # print res if res: data = CodeCloneData() data.stripDataFromOutput(node) data.setQueryTime(elapsed) result.append(data) print res return (result, elapsed)
class DBContentsProvider: def __init__(self): self._initDatabaseConnection() def _initDatabaseConnection(self): self.j = JoernSteps() self.j.connectToDatabase() self.j.addStepsDir('steps/') """ Generate contents for a given selector, overwriting the contents currently held in cndToQueries memory by the server. """ def generate(self, selector): query = """generateTaintLearnStructures(%s.id.toList()) _()""" % (selector) for unused in self.j.runGremlinQuery(query): pass def generateChecksForInvocations(self, invocs): query = """generateChecksForInvocations(%s.toList()) _()""" % (invocs) for unused in self.j.runGremlinQuery(query): pass # Source Analysis def getSourceAPISymbols(self): query = """_().transform{ getSourceAPISymbols() }.scatter() """ return [x for x in self.j.runGremlinQuery(query)] def getAllDefStmtsPerArg(self): query = """_().transform{ getAllDefStmtsPerArg() }.scatter() """ return [x for x in self.j.runGremlinQuery(query)] # Condition Analysis def getAllChecksPerArg(self): query = """_().transform{ getAllChecksPerArg() }.scatter() """ return [x for x in self.j.runGremlinQuery(query)] def getAllConditions(self): query = """_().transform{ getAllConditions() }.scatter() """ return [x for x in self.j.runGremlinQuery(query)] def getAllConditionsCode(self): query = """_().transform{ getAllConditionsCode() }.scatter() """ return [x for x in self.j.runGremlinQuery(query)] def getInvocationCallSiteIds(self): query = """_().transform{ getInvocationCallSites() }.scatter() """ return [x for x in self.j.runGremlinQuery(query)] def getSubConditions(self, nodeId): query = """_().transform{ subConditions(%s) }.scatter() """ % (nodeId) return [x for x in self.j.runGremlinQuery(query)] def getAllCndFeatureVectors(self, invocs = [], argNum = None): if not invocs: if argNum != None: query = """_().transform{ getAllCndFeatureVectors(%d) }.scatter() """ % (argNum) else: query = """_().transform{ getAllCndFeatureVectors() }.scatter() """ else: if argNum != None: query = """_().transform{ getCndFeatureVectorsForInvocs(%s, %d) }.scatter() """ % (invocs, argNum) else: query = """_().transform{ getCndFeatureVectorsForInvocs(%s) }.scatter() """ % (invocs) return [x for x in self.j.runGremlinQuery(query)] def getAllASTNodeLabels(self): query = """_().transform{ getAllASTNodeLabels() }.scatter() """ return [x for x in self.j.runGremlinQuery(query)] # Choosing sinks def getControlledSinks(self, nodeId): query = """_().transform{ getControlledSinks(%s) }.scatter() """ % (nodeId) return [x for x in self.j.runGremlinQuery(query)]