def check(self): #Build the identifier using the filename and commit hashes identifier = '%s (%s)' % (self.filename, self.commit_hashes[1]) #The comments is a list to keep track of useful information #encountered when checking, right now, its only being used #to annotate when base64 code was removed comments = [] #Check the number of additions, if there are too many #send a warning and skip, this may be due to a big data file addition if self.error: return Result(self.filename, self.error) #Check if extension/mimetype is allowed if filetype.get_extension(self.filename) not in self.allowed_extensions: return Result(identifier, FILETYPE_NOT_ALLOWED) #Start applying rules... #First check if additions contain base64, if there is remove it has_base64, self.content = m.base64_matcher(self.content, remove=True) if has_base64: comments.append('BASE64_REMOVED') #Create matcher for amazonaws.com amazonaws_matcher = m.create_domain_matcher('amazonaws.com') #Apply matchers: password, ips and aws match, matches = m.multi_matcher(self.content, m.password_matcher, m.ip_matcher, amazonaws_matcher) if match: return Result(identifier, MATCH, matches=matches, comments=comments) else: return Result(identifier, NOT_MATCH, comments=comments)
def check(self): #Build the identifier using the filename and commit hashes identifier = '%s (%s)' % (self.filename, self.commit_hashes[1]) #The comments is a list to keep track of useful information #encountered when checking, right now, its only being used #to annotate when base64 code was removed comments = [] #Check the number of additions, if there are too many #send a warning and skip, this may be due to a big data file addition if self.error: return Result(self.filename, self.error) #Check if extension/mimetype is allowed if filetype.get_extension( self.filename) not in self.allowed_extensions: return Result(identifier, FILETYPE_NOT_ALLOWED) #Start applying rules... #First check if additions contain base64, if there is remove it has_base64, self.content = m.base64_matcher(self.content, remove=True) if has_base64: comments.append('BASE64_REMOVED') #Create matcher for amazonaws.com amazonaws_matcher = m.create_domain_matcher('amazonaws.com') #Apply matchers: password, ips and aws match, matches = m.multi_matcher(self.content, m.password_matcher, m.ip_matcher, amazonaws_matcher) if match: return Result(identifier, MATCH, matches=matches, comments=comments) else: return Result(identifier, NOT_MATCH, comments=comments)
def check(self): #The comments is a list to keep track of useful information #encountered when checking, right now, its only being used #to annotate when base64 code was removed comments = [] #Check file size if it's more than max_file_size_bytes (default is 1MB) #send just a warning and do not open the file, #since pattern matching is going to be really slow f_size = os.stat(self.path).st_size if f_size > self.max_file_size_bytes: return Result(self.path, BIG_FILE) #Check if extension is allowed if filetype.get_extension(self.path) not in self.allowed_extensions: return Result(self.path, FILETYPE_NOT_ALLOWED) #At this point you only have files with allowed extensions and #smaller than max_file_size_bytes #open the file and then apply all rules with open(self.path, 'r') as f: content = f.read() #Last check: search for potential base64 strings and remove them, send a warning has_base64, content = m.base64_matcher(content, remove=True) if has_base64: comments.append('BASE64_REMOVED') #Create matcher for amazonaws.com amazonaws_matcher = m.create_domain_matcher('amazonaws.com') #Apply matchers: password, ips and aws match, matches = m.multi_matcher(content, m.password_matcher, m.ip_matcher, amazonaws_matcher) if match: return Result(self.path, MATCH, matches=matches, comments=comments) else: return Result(self.path, NOT_MATCH, comments=comments)