def dashboard_page(state):

    st.title("PROSPECTOR")

    st.subheader(
        "The search engine for fix-commits for security vulnerabilities in OSS"
    )
    st.write('By SAP - Antonino SABETTA & Daan HOMMERSOM')
    st.write('''
        How to use Prospector:
        \n1) Provide a vulnerability description, (GitHub) repository URL and a release date (or pick a CVE).
        \n2) Check whether Prospector fills in the rest correctly, and provide additional information if needed.
        \n3) Find security fixes!
    ''')

    # with st.beta_expander(label="Find out more", expanded=False):
    st.write('''
        The objective of Prospector is to minimize the (manual) effort needed for finding
        the fix commit of a known vulnerability in an open-source software project.
        Since these repositories can contain hundreds thousands commits, the commits are
        firstly filtered by only selecting all commits within two years before and
        one hundred days after the release date with a maximum of respectively 5215 and 100 commits.
        A study has shown that this selection has 93% recall.
        \n
        Firstly, an advisory record is created containing information on the vulnerability.
        This advisory record is used to select candidate commits. For these candidate commits,
        ranking vectors are computed. These ranking vectors consist of several components that
        can be used to predict whether a candidate commit is the fix commit we are looking for.
        These candidates are then ranked on this probability score.

        In 77.68% of the cases, the fix is in the top 5. In 84.03% in the top 10,
        and in 88.59% in the top 20.
    ''')

    st.subheader("ADVISORY RECORD")
    state.vulnerability_id = st.text_input(
        "Vulnerability identifyer:",
        value=state.vulnerability_id
        if state.vulnerability_id else '').upper()

    if state.vulnerability_id:
        try:
            cve_description, cve_published_timestamp, preprocessed_description, references = get_vulnerability_data(
                state.vulnerability_id, state.vulnerabilities_df,
                state.db_references_df)
        except:
            references = st.text_input(
                "Please provide useful references (separated by commas)")
            references = references.split(',')
            cve_description, cve_published_timestamp, preprocessed_description = '', time.time(
            ), None
    else:
        cve_description, cve_published_timestamp, preprocessed_description, references = '', time.time(
        ), None, []

    vulnerability_description = st.text_area("Vulnerability description",
                                             value=cve_description)
    project_name = st.text_input(
        "Project name",
        value=' '.join([
            token.text for token in nlp(vulnerability_description)
            if token.tag_ == 'NNP'
        ]))
    repo_url = st.text_input("Repository URL",
                             value=map_description_to_repository_url(
                                 vulnerability_id=state.vulnerability_id,
                                 description=project_name,
                                 vulnerabilities_df=state.vulnerabilities_df,
                                 repository_url_df=state.repository_url_df)
                             if project_name != '' else '')
    published_date = st.date_input("Vulnerability published date",
                                   value=datetime.fromtimestamp(
                                       int(cve_published_timestamp)))
    published_timestamp = int(time.mktime(published_date.timetuple()))

    state.advisory_record_confirmed = st.button(
        "CONFIRM ADVISORY RECORD"
    ) if not state.advisory_record_confirmed else True
    if state.advisory_record_confirmed:

        # option to clear the state
        if st.button("CLEAR FIELDS"):
            state.clear()

        # if it was a new vulnerability, add it to the DB
        if type(state.vulnerabilities_df) == type(
                None) or state.vulnerability_id not in list(
                    state.vulnerabilities_df.index):
            vulnerabilities_connection, vulnerabilities_cursor = database.connect_with_vulnerabilities_database(
                vulnerabilities_db_path)
            database.add_vulnerability_to_database(vulnerabilities_connection,
                                                   state.vulnerability_id,
                                                   repo_url,
                                                   vulnerability_description,
                                                   published_timestamp)

            # if it was not an NVD CVE, or the extraction failed
            if len(references) == 0:
                try:
                    cve_description, cve_published_timestamp, references = database.extract_nvd_content(
                        state.vulnerability_id)
                    references = [reference for reference in references]
                except:
                    references = st.text_input(
                        "Please provide useful references (separated by commas)"
                    )
                    references = references.split(',')

            database.add_vulnerability_references_to_database(
                vulnerabilities_connection,
                state.vulnerability_id,
                references,
                driver=None)
            prospector_connection, prospector_cursor = connect_with_commits_database(
                commits_db_path)
            database.add_tags_to_database(prospector_connection,
                                          tags=None,
                                          git_repo=None,
                                          repo_url=repo_url,
                                          verbose=True)
            state.vulnerabilities_df, state.db_references_df, state.advisory_references_df, state.tags_df, state.repository_url_df, state.fixes_df = load_vulnerabilities(
            )

        # gather values
        repository_tags = gather_tags(repo_url, state.tags_df)
        versions_in_description = filter.retreive_all_versions_from_description(
            vulnerability_description)
        tags_in_description = list(
            dict.fromkeys([
                tag for version in versions_in_description
                for tag in filter.get_tag_for_version(repository_tags, version)
            ]))
        references = [
            state.db_references_df.at[index, 'url'] for index in
            state.db_references_df[state.db_references_df.vulnerability_id ==
                                   state.vulnerability_id].index
        ]

        advisory_references = list(state.advisory_references_df[
            state.advisory_references_df.vulnerability_id ==
            state.vulnerability_id].url)

        # allow the user to influence the filtering
        state.advanced_settings = st.checkbox("Show advanced settings",
                                              state.advanced_settings)
        if state.advanced_settings:

            # the adding of references can be gone wrong
            first_commit_timestamp = rank.get_first_commit_timestamp(
                repo_url
            )  #@TODO: add a column to the database containing this value
            first_commit_date, today = datetime.fromtimestamp(
                int(first_commit_timestamp)).date(), datetime.fromtimestamp(
                    int(time.time())).date()
            lower_bound = published_date - timedelta(
                days=730) if published_date - timedelta(
                    days=730) > first_commit_date else first_commit_date
            upper_bound = published_date + timedelta(
                days=100) if published_date + timedelta(
                    days=100) < today else today

            since, until = st.slider("Published date based interval",
                                     min_value=first_commit_date,
                                     max_value=today,
                                     value=(lower_bound, upper_bound))
            since, until = int(time.mktime(since.timetuple())), int(
                time.mktime(until.timetuple()))

            # references
            additional_references = st.text_input(
                "Additional references (separated by commas)")
            if additional_references:
                references += additional_references.split(',')
                vulnerabilities_connection, vulnerabilities_cursor = database.connect_with_vulnerabilities_database(
                    vulnerabilities_db_path)
                database.add_vulnerability_references_to_database(
                    vulnerabilities_connection,
                    state.vulnerability_id,
                    references,
                    driver=None)

            selected_references = st.multiselect('Advisory references',
                                                 tuple(references),
                                                 default=tuple(references))

            # affected versions
            relevant_tags = st.multiselect(
                'Relevant tags',
                tuple(repository_tags),
                default=tuple(tags_in_description)
                if len(tags_in_description) != 0 else None)
            # st input int k
            k = st.number_input("The number of results to show",
                                min_value=1,
                                max_value=50,
                                value=10,
                                step=1)
        else:
            selected_references = references
            relevant_tags = tags_in_description
            since, until = None, None
            k = 10

        # st.write('vulnerability_description:', vulnerability_description)
        # st.write('references_content:', references_content)
        # st.write('vulnerability_id:', state.vulnerability_id)
        # st.write('since - published_timestamp - until:', since, published_timestamp, until)
        # st.write('repo_url:', repo_url)
        # # st.write('references:', references)
        # # st.write('advisory_references:', advisory_references)
        # st.write('relevant_tags:', relevant_tags)

        if st.button("Search prospects!"):
            model, min_max_scaler = load_model()
            prospector_connection, prospector_cursor = connect_with_commits_database(
                commits_db_path)

            preprocessed_description = rank.simpler_filter_text(
                vulnerability_description)

            references_content = tuple(state.db_references_df[
                (state.db_references_df.vulnerability_id ==
                 state.vulnerability_id)
                & (state.db_references_df.url.isin(selected_references))].
                                       preprocessed_content)
            references_content = rank.extract_n_most_occurring_words(
                rank.remove_forbidden_words_from_string(
                    string=' '.join(references_content),
                    forbidden_words=rank.reference_stopwords +
                    project_name.split(' ')),
                n=20)

            st.write(references_content)

            advisory_record = rank.Advisory_record(
                state.vulnerability_id,
                published_timestamp,
                repo_url,
                selected_references,
                references_content,
                advisory_references,
                vulnerability_description,
                prospector_connection,
                preprocessed_vulnerability_description=preprocessed_description,
                relevant_tags=relevant_tags,
                verbose=True,
                since=since,
                until=until)

            print(
                "\nGathering candidate commits and computing ranking vectors.")
            advisory_record.gather_candidate_commits()
            advisory_record.compute_ranking_vectors()

            # scaling some columns using the pretrained scaler, and some vulnerability specific
            advisory_record.ranking_vectors[
                vulnerability_specific_columns] = MinMaxScaler().fit_transform(
                    advisory_record.
                    ranking_vectors[vulnerability_specific_columns])
            advisory_record.ranking_vectors[
                universal_columns] = min_max_scaler.transform(
                    advisory_record.ranking_vectors[universal_columns])
            advisory_record.ranking_vectors.drop(columns=columns_to_drop,
                                                 inplace=True)

            advisory_record.ranked_candidate_commits = rank.rank_candidates(
                model, advisory_record.ranking_vectors)

            advisory_record.ranking_vectors.set_index('commit_id',
                                                      inplace=True)
            output = prospector_main.advisory_record_to_output(
                advisory_record, model, prospector_cursor, k=k)
            tmp_download_link = download_link(
                output,
                'Prospector_results-{}.txt'.format(state.vulnerability_id),
                "Click here to download Prospector's results as a txt file!")

            st.header("Results")

            st.markdown(tmp_download_link, unsafe_allow_html=True)

            st.write(
                "Showing the top {} candidates from {} candidates considered".
                format(k, len(advisory_record.ranking_vectors)))
            st.write(output)
def test_get_tag_for_version_errors(version, error, tags):
    with pytest.raises(error):
        filter.get_tag_for_version(tags, version)
def test_get_tag_for_version_errors_2(tags=[]):
    with pytest.raises(ValueError):
        filter.get_tag_for_version(tags, 'version-1.8')
def test_get_tag_for_version(version, tag, tags):
    # returns a list of tags that could be corresponding to the version
    assert tag in filter.get_tag_for_version(tags, version)