def check_a_repo_by_random_algorithm(repo, shuffle: bool = False): # retrieve the code snippet code = repo.code # step 0. remove comments text = comment_remover(code.snipped_code) # step 1. Tokenize the code snippet (A) tokens = word_tokenize(text) # step 2. extract the previous keywords (B) keywords = extract_from_description(code.description, with_space=True) # TODO step 3. subtract the second set from the first set (C = A - B) if len(keywords) == 0: return False # step 4. choose random keywords from C (to the A number) final_tokens = extract_tokens(tokens, len(keywords), min_len=3, shuffling=shuffle) print(final_tokens) # step 5. check repo with new keywords # step 6. store result in the database check_github_repo_with_keywords( repo, checking_type=CHECKING_TYPE.RANDOM_ALGORITHM, keywords=final_tokens) return True
def _check_repos(): repos = GHResult_KeywordMeter.objects.filter(is_checked=False) for repo in repos: # retrieve the code snippet code = repo.code # step 0. remove comments text = comment_remover(code.snipped_code) # step 1. Tokenize the code snippet (A) tokens = word_tokenize(text) # step 2. extract the previous keywords (B) keywords = extract_from_description(code.description, with_space=True) # TODO step 3. subtract the second set from the first set (C = A - B) if len(keywords) == 0: continue # step 4. choose random keywords from C (to the A number) final_tokens = extract_tokens(tokens, len(keywords)) print(final_tokens) # step 5. check repo with new keywords # step 6. store result in the database _checkGHUrl(repo, final_tokens) return "Done"
def _checkGHUrl(gResult, keywords: list = None): try: # load content req = requests.get(gResult.ghUrl) # remove comments from content content = comment_remover(req.text.replace(" ", "")) #print(content) # load keywords if keywords is None: code = Code.objects.filter(id=gResult.code_id).first() keywords = extract_from_description(code.description) # check keywords for file if contain_keywords(content, keywords): print("YESS", gResult.id, gResult.code_id, gResult.answer_id) # save it to the DB gResult.is_vulnerable = True gResult.is_checked = True gResult.save() else: print("NOOO", gResult.code_id, gResult.answer_id) gResult.is_vulnerable = False gResult.is_checked = True gResult.save() return True except Exception as e: # open('/home/ali/error_connection_report','a').write("Exception: {}\n" # "Answer Link: {}\n" # "CodeID: {}\n\n".format(e,answer_id,code_id)) gResult.is_error = True gResult.report = "{}".format(e) gResult.save() return False
def _extract_from_code(request, id, extract_type): vulnerable_code = Code.objects.filter(is_vulnerable=True).filter( id=id).first() if vulnerable_code is None: return HttpResponse("", status=404) result = extract_from_description(vulnerable_code.description, extract_type) if result is None: return HttpResponse( "'{}' type not exists. (valid types: explain, keywords, mitigation, references)" .format(extract_type), status=406) return HttpResponse(result)
def check_github_repo_with_keywords(gResult, checking_type: int, keywords: list = None): try: # load content req = requests.get(gResult.ghUrl) # remove comments from content content = comment_remover(req.text.replace(" ", "")) # load keywords if keywords is None: code = Code.objects.filter(id=gResult.code_id).first() keywords = extract_from_description(code.description) # check keywords for file if contain_keywords(content, keywords): print("YESS", gResult.id, gResult.code_id, gResult.answer_id) # save it to the DB if checking_type == CHECKING_TYPE.OUR_ALGORITHM: gResult.is_vulnerable_our_algorithm = True gResult.status = KeywordMeterStatus.checked_by_our_algorithm elif checking_type == CHECKING_TYPE.RANDOM_ALGORITHM: gResult.is_vulnerable_random_algorithm = True gResult.status = KeywordMeterStatus.checked_by_random_algorithm gResult.save() else: print("NOOO", gResult.code_id, gResult.answer_id) if checking_type == CHECKING_TYPE.OUR_ALGORITHM: gResult.is_vulnerable_our_algorithm = False gResult.status = KeywordMeterStatus.checked_by_our_algorithm elif checking_type == CHECKING_TYPE.RANDOM_ALGORITHM: gResult.is_vulnerable_random_algorithm = False gResult.status = KeywordMeterStatus.checked_by_random_algorithm gResult.save() return True except Exception as e: gResult.is_error = True gResult.report = "{}".format(e) gResult.save() return False
def extract_section(self, code: Code, section): return '\n'.join( extract_from_description(code.description, section=section, with_space=True))