def test_adv_record_products(): record = AdvisoryRecord(vulnerability_id="CVE-XXXX-YYYY", description=ADVISORY_TEXT) record.analyze() # print(record) assert "Chrysler" in record.affected_products
def test_adv_record_versions(): record = AdvisoryRecord(vulnerability_id="CVE-2014-0050", description=ADVISORY_TEXT) record.analyze() assert "15.26.1" in record.versions assert "15.26" not in record.versions
def test_adv_record_code_tokens(): record = AdvisoryRecord(vulnerability_id="CVE-XXXX-YYYY", description=ADVISORY_TEXT_2) record.analyze() assert record.code_tokens == ( "IO", "2.7,", "FileNameUtils.normalize", '"//../foo",', '"\\..\\foo",', '"limited"', )
def test_extract_referred_to_by_pages_linked_from_advisories(repository, requests_mock): requests_mock.get( "https://for.testing.purposes/containing_commit_id_in_text_2", text="some text r97993e3d78e1f5389b7b172ba9f308440830ce5 blah", ) advisory_record = AdvisoryRecord( vulnerability_id="CVE-2020-26258", references=["https://for.testing.purposes/containing_commit_id_in_text_2"], ) commit = Commit( commit_id="r97993e3d78e1f5389b7b172ba9f308440830ce5", repository="test_repository", ) assert extract_referred_to_by_pages_linked_from_advisories( commit, advisory_record ) == { "https://for.testing.purposes/containing_commit_id_in_text_2", } commit = Commit( commit_id="f4d2eabd921cbd8808b9d923ee63d44538b4154f", repository="test_repository", ) assert ( extract_referred_to_by_pages_linked_from_advisories(commit, advisory_record) == set() )
def test_extract_other_CVE_in_message(): commit = Commit( commit_id="test_commit", repository="test_repository", cve_refs=["CVE-2021-29425", "CVE-2021-21251"], ) advisory_record = AdvisoryRecord(vulnerability_id="CVE-2020-31284") assert extract_other_CVE_in_message(commit, advisory_record) == { "CVE-2021-29425", "CVE-2021-21251", } advisory_record = AdvisoryRecord(vulnerability_id="CVE-2021-29425") result = extract_other_CVE_in_message(commit, advisory_record) assert result == { "CVE-2021-21251", }
def test_same_path_only(paths): commit = Commit( commit_id="test_commit", repository="test_repository", changed_files=paths ) advisory_record = AdvisoryRecord( vulnerability_id="test_advisory_record", paths=paths[:2] ) assert extract_changed_relevant_paths(commit, advisory_record) == set(paths[:2])
def test_time_between_commit_and_advisory_record(): commit = Commit( commit_id="test_commit", repository="test_repository", timestamp=142 ) advisory_record = AdvisoryRecord( vulnerability_id="test_advisory_record", published_timestamp=100 ) assert ( extract_time_between_commit_and_advisory_record(commit, advisory_record) == 42 )
def test_no_match(paths): commit = Commit( commit_id="test_commit", repository="test_repository", changed_files=paths[:1], ) advisory_record = AdvisoryRecord( vulnerability_id="test_advisory_record", paths=paths[2:] ) assert extract_changed_relevant_paths(commit, advisory_record) == set()
def report_as_json(results: "list[Commit]", advisory_record: AdvisoryRecord): data = { "advisory_record": advisory_record.dict(), "commits": [r.dict() for r in results], } filename = "prospector-report.json" _logger.info("Writing results to " + filename) with open(filename, "w", encoding="utf8") as json_file: json.dump(data, json_file, ensure_ascii=True, indent=4) return filename
def advisory_record(): return AdvisoryRecord( vulnerability_id="CVE-2020-26258", repository_url="https://github.com/apache/struts", published_timestamp=1607532756, references=[ "https://reference.to/some/commit/7532d2fb0d6081a12c2a48ec854a81a8b718be62" ], code_tokens=["AutomaticWorkQueueImpl"], paths=["pom.xml"], )
def test_extract_references_vuln_id(): commit = Commit( commit_id="test_commit", repository="test_repository", cve_refs=[ "test_advisory_record", "another_advisory_record", "yet_another_advisory_record", ], ) advisory_record = AdvisoryRecord(vulnerability_id="test_advisory_record") result = extract_references_vuln_id(commit, advisory_record) assert result is True
def test_extract_referred_to_by_pages_linked_from_advisories_wrong_url(repository): advisory_record = AdvisoryRecord( vulnerability_id="CVE-2020-26258", references=["https://non-existing-url.com"], ) commit = Commit( commit_id="r97993e3d78e1f5389b7b172ba9f308440830ce5", repository="test_repository", ) assert not extract_referred_to_by_pages_linked_from_advisories( commit, advisory_record )
def test_sub_path_matching(paths, sub_paths): commit = Commit( commit_id="test_commit", repository="test_repository", changed_files=paths ) advisory_record = AdvisoryRecord( vulnerability_id="test_advisory_record", paths=sub_paths ) matched_paths = { "fire-nation/zuko/lightning.png", "water-bending/katara/necklace.gif", } assert extract_changed_relevant_paths(commit, advisory_record) == matched_paths
def test_report_generation(): candidates = [] for _ in range(100): annotated_candidates = Commit( commit_id=random_commit_hash(), repository=random_url(4), message=" ".join(random_list_of_strs(100)), timestamp=randint(0, 100000), hunks=random_list_of_hunks(1000, 42), diff=random_list_of_strs(200), changed_files=random_list_of_path(4, 42), message_reference_content=random_list_of_strs(42), jira_refs=random_list_of_jira_refs(42), ghissue_refs=random_list_of_github_issue_ids(100000, 42), cve_refs=random_list_of_cve(42), tags=random_list_of_strs(42), annotations=random_dict_of_strs(16, 10), ) candidates.append(annotated_candidates) advisory = AdvisoryRecord( vulnerability_id=random_list_of_cve(max_count=1, min_count=1)[0], repository_url=random_url(4), published_timestamp=randint(0, 100000), last_modified_timestamp=randint(0, 100000), references=random_list_of_strs(42), references_content=random_list_of_strs(42), advisory_references=random_list_of_cve(42), affected_products=random_list_of_strs(42), description=" ".join(random_list_of_strs(42)), preprocessed_vulnerability_description=" ".join( random_list_of_strs(42)), relevant_tags=random_list_of_strs(42), versions=random_list_of_version(42, 4, 42), from_nvd=random_bool(), paths=random_list_of_path(4, 42), code_tokens=tuple(random_list_of_strs(42)), ) filename = "test_report.html" if os.path.isfile(filename): os.remove(filename) generated_report = report_as_html(candidates, advisory, filename, statistics=sample_statistics()) assert os.path.isfile(generated_report)
def test_extract_referred_to_by_nvd(repository): advisory_record = AdvisoryRecord( vulnerability_id="CVE-2020-26258", references=[ "https://lists.apache.org/thread.html/r97993e3d78e1f5389b7b172ba9f308440830ce5f051ee62714a0aa34@%3Ccommits.struts.apache.org%3E", "https://other.com", ], ) commit = Commit( commit_id="r97993e3d78e1f5389b7b172ba9f308440830ce5", repository="test_repository", ) assert extract_referred_to_by_nvd(commit, advisory_record) == { "https://lists.apache.org/thread.html/r97993e3d78e1f5389b7b172ba9f308440830ce5f051ee62714a0aa34@%3Ccommits.struts.apache.org%3E", } commit = Commit( commit_id="f4d2eabd921cbd8808b9d923ee63d44538b4154f", repository="test_repository", ) assert extract_referred_to_by_nvd(commit, advisory_record) == set()
def test_is_commit_reachable_from_given_tag(repository): repo = repository commit = repo.get_commit("7532d2fb0d6081a12c2a48ec854a81a8b718be62") test_commit = preprocess_commit(commit) advisory_record = AdvisoryRecord( vulnerability_id="CVE-2020-26258", repository_url="https://github.com/apache/struts", paths=["pom.xml"], published_timestamp=1000000, versions=["STRUTS_2_1_3", "STRUTS_2_3_9"], ) assert not is_commit_reachable_from_given_tag( test_commit, advisory_record, advisory_record.versions[0] ) assert is_commit_reachable_from_given_tag( preprocess_commit(repo.get_commit("2e19fc6670a70c13c08a3ed0927abc7366308bb1")), advisory_record, advisory_record.versions[1], )
def test_extract_path_similarities(): code_tokens = [ "TophBeifong_Zuko_IknikBlackstoneVarrick_AsamiSato", "Bolin+Bumi+Ozai+Katara", "Jinora.Appa.Unalaq.Zaheer", "Naga.LinBeifong", "Sokka.Kya", "Bumi=Momo=Naga=Iroh", "Sokka_Unalaq", "Sokka.Iroh.Pabu", "LinBeifong=Zuko", "TenzinBolinSokka", "Korra-AsamiSato-Pabu-Iroh", "Mako.Naga", "Jinora=Bumi", "BolinAppaKuvira", "TophBeifongIroh", "Amon+Zuko+Unalaq", ] paths = [ "Unalaq/Aang/Suyin Beifong", "Tenzin/Asami Sato/Suyin Beifong/Tenzin/Bumi/Zaheer", "Asami Sato/Tenzin/Tonraq/Katara/Tarrlok/Naga/Zuko", "Amon/Asami Sato/Bumi/Kuvira/Toph Beifong/Bolin/Bumi", "Momo", "Kuvira/Bolin/Lin Beifong/Sokka/Mako/Korra/Toph Beifong/Unalaq", ] commit = Commit(changed_files=paths) advisory = AdvisoryRecord( vulnerability_id=random_list_of_cve(max_count=1, min_count=1)[0], code_tokens=code_tokens, ) similarities: pandas.DataFrame = extract_path_similarities(commit, advisory) expected = ( ",changed file,code token,jaccard,sorensen-dice,otsuka-ochiai,levenshtein,damerau-levenshtein,length diff,inverted normalized levenshtein,inverted normalized damerau-levenshtein\n" "0,Unalaq/Aang/Suyin Beifong,TophBeifong_Zuko_IknikBlackstoneVarrick_AsamiSato,0.09090909090909091,0.16666666666666666,0.17677669529663687,8,8,4,0.19999999999999996,0.19999999999999996\n" "1,Unalaq/Aang/Suyin Beifong,Bolin+Bumi+Ozai+Katara,0.0,0.0,0.0,4,4,0,0.6,0.6\n" "2,Unalaq/Aang/Suyin Beifong,Jinora.Appa.Unalaq.Zaheer,0.14285714285714285,0.25,0.25,4,4,0,0.6,0.6\n" "3,Unalaq/Aang/Suyin Beifong,Naga.LinBeifong,0.16666666666666666,0.2857142857142857,0.2886751345948129,3,3,1,0.7,0.7\n" "4,Unalaq/Aang/Suyin Beifong,Sokka.Kya,0.0,0.0,0.0,4,4,2,0.6,0.6\n" "5,Unalaq/Aang/Suyin Beifong,Bumi=Momo=Naga=Iroh,0.0,0.0,0.0,4,4,0,0.6,0.6\n" "6,Unalaq/Aang/Suyin Beifong,Sokka_Unalaq,0.2,0.3333333333333333,0.35355339059327373,4,4,2,0.6,0.6\n" "7,Unalaq/Aang/Suyin Beifong,Sokka.Iroh.Pabu,0.0,0.0,0.0,4,4,1,0.6,0.6\n" "8,Unalaq/Aang/Suyin Beifong,LinBeifong=Zuko,0.16666666666666666,0.2857142857142857,0.2886751345948129,4,4,1,0.6,0.6\n" "9,Unalaq/Aang/Suyin Beifong,TenzinBolinSokka,0.0,0.0,0.0,4,4,1,0.6,0.6\n" "10,Unalaq/Aang/Suyin Beifong,Korra-AsamiSato-Pabu-Iroh,0.0,0.0,0.0,5,5,1,0.5,0.5\n" "11,Unalaq/Aang/Suyin Beifong,Mako.Naga,0.0,0.0,0.0,4,4,2,0.6,0.6\n" "12,Unalaq/Aang/Suyin Beifong,Jinora=Bumi,0.0,0.0,0.0,4,4,2,0.6,0.6\n" "13,Unalaq/Aang/Suyin Beifong,BolinAppaKuvira,0.0,0.0,0.0,4,4,1,0.6,0.6\n" "14,Unalaq/Aang/Suyin Beifong,TophBeifongIroh,0.16666666666666666,0.2857142857142857,0.2886751345948129,4,4,1,0.6,0.6\n" "15,Unalaq/Aang/Suyin Beifong,Amon+Zuko+Unalaq,0.16666666666666666,0.2857142857142857,0.2886751345948129,4,4,1,0.6,0.6\n" "16,Tenzin/Asami Sato/Suyin Beifong/Tenzin/Bumi/Zaheer,TophBeifong_Zuko_IknikBlackstoneVarrick_AsamiSato,0.25,0.4,0.4008918628686366,8,8,0,0.19999999999999996,0.19999999999999996\n" "17,Tenzin/Asami Sato/Suyin Beifong/Tenzin/Bumi/Zaheer,Bolin+Bumi+Ozai+Katara,0.1,0.18181818181818182,0.1889822365046136,8,8,4,0.19999999999999996,0.19999999999999996\n" "18,Tenzin/Asami Sato/Suyin Beifong/Tenzin/Bumi/Zaheer,Jinora.Appa.Unalaq.Zaheer,0.1,0.18181818181818182,0.1889822365046136,7,7,4,0.30000000000000004,0.30000000000000004\n" "19,Tenzin/Asami Sato/Suyin Beifong/Tenzin/Bumi/Zaheer,Naga.LinBeifong,0.1111111111111111,0.2,0.2182178902359924,7,7,5,0.30000000000000004,0.30000000000000004\n" "20,Tenzin/Asami Sato/Suyin Beifong/Tenzin/Bumi/Zaheer,Sokka.Kya,0.0,0.0,0.0,8,8,6,0.19999999999999996,0.19999999999999996\n" "21,Tenzin/Asami Sato/Suyin Beifong/Tenzin/Bumi/Zaheer,Bumi=Momo=Naga=Iroh,0.1,0.18181818181818182,0.1889822365046136,8,8,4,0.19999999999999996,0.19999999999999996\n" "22,Tenzin/Asami Sato/Suyin Beifong/Tenzin/Bumi/Zaheer,Sokka_Unalaq,0.0,0.0,0.0,8,8,6,0.19999999999999996,0.19999999999999996\n" "23,Tenzin/Asami Sato/Suyin Beifong/Tenzin/Bumi/Zaheer,Sokka.Iroh.Pabu,0.0,0.0,0.0,8,8,5,0.19999999999999996,0.19999999999999996\n" "24,Tenzin/Asami Sato/Suyin Beifong/Tenzin/Bumi/Zaheer,LinBeifong=Zuko,0.1111111111111111,0.2,0.2182178902359924,7,7,5,0.30000000000000004,0.30000000000000004\n" "25,Tenzin/Asami Sato/Suyin Beifong/Tenzin/Bumi/Zaheer,TenzinBolinSokka,0.1111111111111111,0.2,0.2182178902359924,7,7,5,0.30000000000000004,0.30000000000000004\n" "26,Tenzin/Asami Sato/Suyin Beifong/Tenzin/Bumi/Zaheer,Korra-AsamiSato-Pabu-Iroh,0.2,0.3333333333333333,0.3380617018914066,6,6,3,0.4,0.4\n" "27,Tenzin/Asami Sato/Suyin Beifong/Tenzin/Bumi/Zaheer,Mako.Naga,0.0,0.0,0.0,8,8,6,0.19999999999999996,0.19999999999999996\n" "28,Tenzin/Asami Sato/Suyin Beifong/Tenzin/Bumi/Zaheer,Jinora=Bumi,0.125,0.2222222222222222,0.2672612419124244,7,7,6,0.30000000000000004,0.30000000000000004\n" "29,Tenzin/Asami Sato/Suyin Beifong/Tenzin/Bumi/Zaheer,BolinAppaKuvira,0.0,0.0,0.0,8,8,5,0.19999999999999996,0.19999999999999996\n" "30,Tenzin/Asami Sato/Suyin Beifong/Tenzin/Bumi/Zaheer,TophBeifongIroh,0.1111111111111111,0.2,0.2182178902359924,7,7,5,0.30000000000000004,0.30000000000000004\n" "31,Tenzin/Asami Sato/Suyin Beifong/Tenzin/Bumi/Zaheer,Amon+Zuko+Unalaq,0.0,0.0,0.0,8,8,5,0.19999999999999996,0.19999999999999996\n" "32,Asami Sato/Tenzin/Tonraq/Katara/Tarrlok/Naga/Zuko,TophBeifong_Zuko_IknikBlackstoneVarrick_AsamiSato,0.23076923076923078,0.375,0.375,8,8,0,0.19999999999999996,0.19999999999999996\n" "33,Asami Sato/Tenzin/Tonraq/Katara/Tarrlok/Naga/Zuko,Bolin+Bumi+Ozai+Katara,0.09090909090909091,0.16666666666666666,0.17677669529663687,7,7,4,0.30000000000000004,0.30000000000000004\n" "34,Asami Sato/Tenzin/Tonraq/Katara/Tarrlok/Naga/Zuko,Jinora.Appa.Unalaq.Zaheer,0.0,0.0,0.0,8,8,4,0.19999999999999996,0.19999999999999996\n" "35,Asami Sato/Tenzin/Tonraq/Katara/Tarrlok/Naga/Zuko,Naga.LinBeifong,0.1,0.18181818181818182,0.20412414523193154,8,8,5,0.19999999999999996,0.19999999999999996\n" "36,Asami Sato/Tenzin/Tonraq/Katara/Tarrlok/Naga/Zuko,Sokka.Kya,0.0,0.0,0.0,8,8,6,0.19999999999999996,0.19999999999999996\n" "37,Asami Sato/Tenzin/Tonraq/Katara/Tarrlok/Naga/Zuko,Bumi=Momo=Naga=Iroh,0.09090909090909091,0.16666666666666666,0.17677669529663687,7,7,4,0.30000000000000004,0.30000000000000004\n" "38,Asami Sato/Tenzin/Tonraq/Katara/Tarrlok/Naga/Zuko,Sokka_Unalaq,0.0,0.0,0.0,8,8,6,0.19999999999999996,0.19999999999999996\n" "39,Asami Sato/Tenzin/Tonraq/Katara/Tarrlok/Naga/Zuko,Sokka.Iroh.Pabu,0.0,0.0,0.0,8,8,5,0.19999999999999996,0.19999999999999996\n" "40,Asami Sato/Tenzin/Tonraq/Katara/Tarrlok/Naga/Zuko,LinBeifong=Zuko,0.1,0.18181818181818182,0.20412414523193154,7,7,5,0.30000000000000004,0.30000000000000004\n" "41,Asami Sato/Tenzin/Tonraq/Katara/Tarrlok/Naga/Zuko,TenzinBolinSokka,0.1,0.18181818181818182,0.20412414523193154,7,7,5,0.30000000000000004,0.30000000000000004\n" "42,Asami Sato/Tenzin/Tonraq/Katara/Tarrlok/Naga/Zuko,Korra-AsamiSato-Pabu-Iroh,0.18181818181818182,0.3076923076923077,0.31622776601683794,7,7,3,0.30000000000000004,0.30000000000000004\n" "43,Asami Sato/Tenzin/Tonraq/Katara/Tarrlok/Naga/Zuko,Mako.Naga,0.1111111111111111,0.2,0.25,7,7,6,0.30000000000000004,0.30000000000000004\n" "44,Asami Sato/Tenzin/Tonraq/Katara/Tarrlok/Naga/Zuko,Jinora=Bumi,0.0,0.0,0.0,8,8,6,0.19999999999999996,0.19999999999999996\n" "45,Asami Sato/Tenzin/Tonraq/Katara/Tarrlok/Naga/Zuko,BolinAppaKuvira,0.0,0.0,0.0,8,8,5,0.19999999999999996,0.19999999999999996\n" "46,Asami Sato/Tenzin/Tonraq/Katara/Tarrlok/Naga/Zuko,TophBeifongIroh,0.0,0.0,0.0,8,8,5,0.19999999999999996,0.19999999999999996\n" "47,Asami Sato/Tenzin/Tonraq/Katara/Tarrlok/Naga/Zuko,Amon+Zuko+Unalaq,0.1,0.18181818181818182,0.20412414523193154,8,8,5,0.19999999999999996,0.19999999999999996\n" "48,Amon/Asami Sato/Bumi/Kuvira/Toph Beifong/Bolin/Bumi,TophBeifong_Zuko_IknikBlackstoneVarrick_AsamiSato,0.3333333333333333,0.5,0.5,9,9,1,0.09999999999999998,0.09999999999999998\n" "49,Amon/Asami Sato/Bumi/Kuvira/Toph Beifong/Bolin/Bumi,Bolin+Bumi+Ozai+Katara,0.2,0.3333333333333333,0.35355339059327373,8,8,5,0.19999999999999996,0.19999999999999996\n" "50,Amon/Asami Sato/Bumi/Kuvira/Toph Beifong/Bolin/Bumi,Jinora.Appa.Unalaq.Zaheer,0.0,0.0,0.0,9,9,5,0.09999999999999998,0.09999999999999998\n" "51,Amon/Asami Sato/Bumi/Kuvira/Toph Beifong/Bolin/Bumi,Naga.LinBeifong,0.1,0.18181818181818182,0.20412414523193154,8,8,6,0.19999999999999996,0.19999999999999996\n" "52,Amon/Asami Sato/Bumi/Kuvira/Toph Beifong/Bolin/Bumi,Sokka.Kya,0.0,0.0,0.0,9,9,7,0.09999999999999998,0.09999999999999998\n" "53,Amon/Asami Sato/Bumi/Kuvira/Toph Beifong/Bolin/Bumi,Bumi=Momo=Naga=Iroh,0.09090909090909091,0.16666666666666666,0.17677669529663687,8,8,5,0.19999999999999996,0.19999999999999996\n" "54,Amon/Asami Sato/Bumi/Kuvira/Toph Beifong/Bolin/Bumi,Sokka_Unalaq,0.0,0.0,0.0,9,9,7,0.09999999999999998,0.09999999999999998\n" "55,Amon/Asami Sato/Bumi/Kuvira/Toph Beifong/Bolin/Bumi,Sokka.Iroh.Pabu,0.0,0.0,0.0,9,9,6,0.09999999999999998,0.09999999999999998\n" "56,Amon/Asami Sato/Bumi/Kuvira/Toph Beifong/Bolin/Bumi,LinBeifong=Zuko,0.1,0.18181818181818182,0.20412414523193154,8,8,6,0.19999999999999996,0.19999999999999996\n" "57,Amon/Asami Sato/Bumi/Kuvira/Toph Beifong/Bolin/Bumi,TenzinBolinSokka,0.1,0.18181818181818182,0.20412414523193154,8,8,6,0.19999999999999996,0.19999999999999996\n" "58,Amon/Asami Sato/Bumi/Kuvira/Toph Beifong/Bolin/Bumi,Korra-AsamiSato-Pabu-Iroh,0.18181818181818182,0.3076923076923077,0.31622776601683794,7,7,4,0.30000000000000004,0.30000000000000004\n" "59,Amon/Asami Sato/Bumi/Kuvira/Toph Beifong/Bolin/Bumi,Mako.Naga,0.0,0.0,0.0,9,9,7,0.09999999999999998,0.09999999999999998\n" "60,Amon/Asami Sato/Bumi/Kuvira/Toph Beifong/Bolin/Bumi,Jinora=Bumi,0.1111111111111111,0.2,0.25,8,8,7,0.19999999999999996,0.19999999999999996\n" "61,Amon/Asami Sato/Bumi/Kuvira/Toph Beifong/Bolin/Bumi,BolinAppaKuvira,0.2222222222222222,0.36363636363636365,0.4082482904638631,8,8,6,0.19999999999999996,0.19999999999999996\n" "62,Amon/Asami Sato/Bumi/Kuvira/Toph Beifong/Bolin/Bumi,TophBeifongIroh,0.2222222222222222,0.36363636363636365,0.4082482904638631,7,7,6,0.30000000000000004,0.30000000000000004\n" "63,Amon/Asami Sato/Bumi/Kuvira/Toph Beifong/Bolin/Bumi,Amon+Zuko+Unalaq,0.1,0.18181818181818182,0.20412414523193154,8,8,6,0.19999999999999996,0.19999999999999996\n" "64,Momo,TophBeifong_Zuko_IknikBlackstoneVarrick_AsamiSato,0.0,0.0,0.0,8,8,7,0.19999999999999996,0.19999999999999996\n" "65,Momo,Bolin+Bumi+Ozai+Katara,0.0,0.0,0.0,4,4,3,0.6,0.6\n" "66,Momo,Jinora.Appa.Unalaq.Zaheer,0.0,0.0,0.0,4,4,3,0.6,0.6\n" "67,Momo,Naga.LinBeifong,0.0,0.0,0.0,3,3,2,0.7,0.7\n" "68,Momo,Sokka.Kya,0.0,0.0,0.0,2,2,1,0.8,0.8\n" "69,Momo,Bumi=Momo=Naga=Iroh,0.25,0.4,0.5,3,3,3,0.7,0.7\n" "70,Momo,Sokka_Unalaq,0.0,0.0,0.0,2,2,1,0.8,0.8\n" "71,Momo,Sokka.Iroh.Pabu,0.0,0.0,0.0,3,3,2,0.7,0.7\n" "72,Momo,LinBeifong=Zuko,0.0,0.0,0.0,3,3,2,0.7,0.7\n" "73,Momo,TenzinBolinSokka,0.0,0.0,0.0,3,3,2,0.7,0.7\n" "74,Momo,Korra-AsamiSato-Pabu-Iroh,0.0,0.0,0.0,5,5,4,0.5,0.5\n" "75,Momo,Mako.Naga,0.0,0.0,0.0,2,2,1,0.8,0.8\n" "76,Momo,Jinora=Bumi,0.0,0.0,0.0,2,2,1,0.8,0.8\n" "77,Momo,BolinAppaKuvira,0.0,0.0,0.0,3,3,2,0.7,0.7\n" "78,Momo,TophBeifongIroh,0.0,0.0,0.0,3,3,2,0.7,0.7\n" "79,Momo,Amon+Zuko+Unalaq,0.0,0.0,0.0,3,3,2,0.7,0.7\n" "80,Kuvira/Bolin/Lin Beifong/Sokka/Mako/Korra/Toph Beifong/Unalaq,TophBeifong_Zuko_IknikBlackstoneVarrick_AsamiSato,0.13333333333333333,0.23529411764705882,0.23570226039551587,9,9,2,0.09999999999999998,0.09999999999999998\n" "81,Kuvira/Bolin/Lin Beifong/Sokka/Mako/Korra/Toph Beifong/Unalaq,Bolin+Bumi+Ozai+Katara,0.08333333333333333,0.15384615384615385,0.16666666666666666,9,9,6,0.09999999999999998,0.09999999999999998\n" "82,Kuvira/Bolin/Lin Beifong/Sokka/Mako/Korra/Toph Beifong/Unalaq,Jinora.Appa.Unalaq.Zaheer,0.08333333333333333,0.15384615384615385,0.16666666666666666,10,10,6,0.0,0.0\n" "83,Kuvira/Bolin/Lin Beifong/Sokka/Mako/Korra/Toph Beifong/Unalaq,Naga.LinBeifong,0.2,0.3333333333333333,0.3849001794597505,8,8,7,0.19999999999999996,0.19999999999999996\n" "84,Kuvira/Bolin/Lin Beifong/Sokka/Mako/Korra/Toph Beifong/Unalaq,Sokka.Kya,0.1,0.18181818181818182,0.23570226039551587,9,9,8,0.09999999999999998,0.09999999999999998\n" "85,Kuvira/Bolin/Lin Beifong/Sokka/Mako/Korra/Toph Beifong/Unalaq,Bumi=Momo=Naga=Iroh,0.0,0.0,0.0,10,10,6,0.0,0.0\n" "86,Kuvira/Bolin/Lin Beifong/Sokka/Mako/Korra/Toph Beifong/Unalaq,Sokka_Unalaq,0.2222222222222222,0.36363636363636365,0.47140452079103173,8,8,8,0.19999999999999996,0.19999999999999996\n" "87,Kuvira/Bolin/Lin Beifong/Sokka/Mako/Korra/Toph Beifong/Unalaq,Sokka.Iroh.Pabu,0.09090909090909091,0.16666666666666666,0.19245008972987526,9,9,7,0.09999999999999998,0.09999999999999998\n" "88,Kuvira/Bolin/Lin Beifong/Sokka/Mako/Korra/Toph Beifong/Unalaq,LinBeifong=Zuko,0.2,0.3333333333333333,0.3849001794597505,8,8,7,0.19999999999999996,0.19999999999999996\n" "89,Kuvira/Bolin/Lin Beifong/Sokka/Mako/Korra/Toph Beifong/Unalaq,TenzinBolinSokka,0.2,0.3333333333333333,0.3849001794597505,8,8,7,0.19999999999999996,0.19999999999999996\n" "90,Kuvira/Bolin/Lin Beifong/Sokka/Mako/Korra/Toph Beifong/Unalaq,Korra-AsamiSato-Pabu-Iroh,0.07692307692307693,0.14285714285714285,0.14907119849998599,10,10,5,0.0,0.0\n" "91,Kuvira/Bolin/Lin Beifong/Sokka/Mako/Korra/Toph Beifong/Unalaq,Mako.Naga,0.1,0.18181818181818182,0.23570226039551587,9,9,8,0.09999999999999998,0.09999999999999998\n" "92,Kuvira/Bolin/Lin Beifong/Sokka/Mako/Korra/Toph Beifong/Unalaq,Jinora=Bumi,0.0,0.0,0.0,10,10,8,0.0,0.0\n" "93,Kuvira/Bolin/Lin Beifong/Sokka/Mako/Korra/Toph Beifong/Unalaq,BolinAppaKuvira,0.2,0.3333333333333333,0.3849001794597505,9,9,7,0.09999999999999998,0.09999999999999998\n" "94,Kuvira/Bolin/Lin Beifong/Sokka/Mako/Korra/Toph Beifong/Unalaq,TophBeifongIroh,0.2,0.3333333333333333,0.3849001794597505,8,8,7,0.19999999999999996,0.19999999999999996\n" "95,Kuvira/Bolin/Lin Beifong/Sokka/Mako/Korra/Toph Beifong/Unalaq,Amon+Zuko+Unalaq,0.09090909090909091,0.16666666666666666,0.19245008972987526,9,9,7,0.09999999999999998,0.09999999999999998\n" ) assert similarities.to_csv() == expected
def test_advisory_basic(): adv_rec = AdvisoryRecord(vulnerability_id="CVE-2015-5612", repository_url="https://github.com/abc/xyz") assert adv_rec.repository_url == "https://github.com/abc/xyz"
def prospector( # noqa: C901 vulnerability_id: str, repository_url: str, publication_date: str = "", vuln_descr: str = "", tag_interval: str = "", version_interval: str = "", modified_files: "list[str]" = [], code_tokens: "list[str]" = [], time_limit_before: int = TIME_LIMIT_BEFORE, time_limit_after: int = TIME_LIMIT_AFTER, use_nvd: bool = False, nvd_rest_endpoint: str = "", backend_address: str = "", git_cache: str = GIT_CACHE, limit_candidates: int = MAX_CANDIDATES, active_rules: "list[str]" = ["ALL"], model_name: str = "", ) -> "list[Commit]": _logger.info("begin main commit and CVE processing") # ------------------------------------------------------------------------- # advisory record extraction # ------------------------------------------------------------------------- advisory_record = AdvisoryRecord( vulnerability_id=vulnerability_id, repository_url=repository_url, description=vuln_descr, from_nvd=use_nvd, nvd_rest_endpoint=nvd_rest_endpoint, ) _logger.pretty_log(advisory_record) advisory_record.analyze(use_nvd=use_nvd) _logger.info(f"{advisory_record.code_tokens=}") if publication_date != "": advisory_record.published_timestamp = int( datetime.strptime(publication_date, r"%Y-%m-%dT%H:%M%z").timestamp() ) if len(code_tokens) > 0: advisory_record.code_tokens += tuple(code_tokens) # drop duplicates advisory_record.code_tokens = list(set(advisory_record.code_tokens)) # FIXME this should be handled better (or '' should not end up in the modified_files in # the first place) if modified_files != [""]: advisory_record.paths += modified_files _logger.info(f"{advisory_record.code_tokens=}") # print(advisory_record.paths) # ------------------------------------------------------------------------- # retrieval of commit candidates # ------------------------------------------------------------------------- with ExecutionTimer( core_statistics.sub_collection(name="retrieval of commit candidates") ): _logger.info( "Downloading repository {} in {}..".format(repository_url, git_cache) ) repository = Git(repository_url, git_cache) repository.clone() tags = repository.get_tags() _logger.debug(f"Found tags: {tags}") _logger.info("Done retrieving %s" % repository_url) prev_tag = None following_tag = None if tag_interval != "": prev_tag, following_tag = tag_interval.split(":") elif version_interval != "": vuln_version, fixed_version = version_interval.split(":") prev_tag = get_tag_for_version(tags, vuln_version)[0] following_tag = get_tag_for_version(tags, fixed_version)[0] since = None until = None if advisory_record.published_timestamp: since = advisory_record.published_timestamp - time_limit_before until = advisory_record.published_timestamp + time_limit_after candidates = repository.get_commits( since=since, until=until, ancestors_of=following_tag, exclude_ancestors_of=prev_tag, filter_files="*.java", ) _logger.info("Found %d candidates" % len(candidates)) # if some code_tokens were found in the advisory text, require # that candidate commits touch some file whose path contains those tokens # NOTE: this works quite well for Java, not sure how general this criterion is # ------------------------------------------------------------------------- # commit filtering # # Here we apply additional criteria to discard commits from the initial # set extracted from the repository # # ------------------------------------------------------------------------- # if advisory_record.code_tokens != []: # _logger.info( # "Detected tokens in advisory text, searching for files whose path contains those tokens" # ) # _logger.info(advisory_record.code_tokens) # if modified_files == [""]: # modified_files = advisory_record.code_tokens # else: # modified_files.extend(advisory_record.code_tokens) # candidates = filter_by_changed_files(candidates, modified_files, repository) with ExecutionTimer(core_statistics.sub_collection(name="commit filtering")): candidates = filter_commits(candidates) _logger.debug(f"Collected {len(candidates)} candidates") if len(candidates) > limit_candidates: _logger.error( "Number of candidates exceeds %d, aborting." % limit_candidates ) _logger.error( "Possible cause: the backend might be unreachable or otherwise unable to provide details about the advisory." ) sys.exit(-1) # ------------------------------------------------------------------------- # commit preprocessing # ------------------------------------------------------------------------- with ExecutionTimer( core_statistics.sub_collection(name="commit preprocessing") ) as timer: raw_commit_data = dict() missing = [] try: # Exploit the preprocessed commits already stored in the backend # and only process those that are missing. Note: the endpoint # does not exist (yet) r = requests.get( backend_address + "/commits/" + repository_url + "?commit_id=" + ",".join(candidates) ) _logger.info("The backend returned status '%d'" % r.status_code) if r.status_code != 200: _logger.error("This is weird...Continuing anyway.") missing = candidates else: raw_commit_data = r.json() _logger.info( "Found {} preprocessed commits".format(len(raw_commit_data)) ) except requests.exceptions.ConnectionError: _logger.error( "Could not reach backend, is it running? The result of commit pre-processing will not be saved.", exc_info=log.config.level < logging.WARNING, ) missing = candidates preprocessed_commits: "list[Commit]" = [] for idx, commit in enumerate(raw_commit_data): if ( commit ): # None results are not in the DB, collect them to missing list, they need local preprocessing preprocessed_commits.append(Commit.parse_obj(commit)) else: missing.append(candidates[idx]) _logger.info("Preprocessing commits...") first_missing = len(preprocessed_commits) pbar = tqdm(missing) with Counter( timer.collection.sub_collection(name="commit preprocessing") ) as counter: counter.initialize("preprocessed commits", unit="commit") for commit_id in pbar: counter.increment("preprocessed commits") preprocessed_commits.append( preprocess_commit(repository.get_commit(commit_id)) ) _logger.pretty_log(advisory_record) _logger.debug(f"preprocessed {len(preprocessed_commits)} commits") payload = [c.__dict__ for c in preprocessed_commits[first_missing:]] # ------------------------------------------------------------------------- # save preprocessed commits to backend # ------------------------------------------------------------------------- with ExecutionTimer( core_statistics.sub_collection(name="save preprocessed commits to backend") ): _logger.info("Sending preprocessing commits to backend...") try: r = requests.post(backend_address + "/commits/", json=payload) _logger.info( "Saving to backend completed (status code: %d)" % r.status_code ) except requests.exceptions.ConnectionError: _logger.error( "Could not reach backend, is it running?" "The result of commit pre-processing will not be saved." "Continuing anyway.....", exc_info=log.config.level < logging.WARNING, ) # TODO compute actual rank # This can be done by a POST request that creates a "search" job # whose inputs are the AdvisoryRecord, and the repository URL # The API returns immediately indicating a job id. From this # id, a URL can be constructed to poll the results asynchronously. # ranked_results = [repository.get_commit(c) for c in preprocessed_commits] # ------------------------------------------------------------------------- # analyze candidates by applying rules and ML predictor # ------------------------------------------------------------------------- with ExecutionTimer( core_statistics.sub_collection(name="analyze candidates") ) as timer: _logger.info("Extracting features from commits...") # annotated_candidates = [] # with Counter(timer.collection.sub_collection("commit analysing")) as counter: # counter.initialize("analyzed commits", unit="commit") # # TODO remove "proactive" invocation of feature extraction # for commit in tqdm(preprocessed_commits): # counter.increment("analyzed commits") # annotated_candidates.append(extract_features(commit, advisory_record)) annotated_candidates = apply_rules( preprocessed_commits, advisory_record, active_rules=active_rules ) annotated_candidates = rank(annotated_candidates, model_name=model_name) return annotated_candidates, advisory_record