def test_get_publication_id_list_wrong_status_code(mock_get):
    mock_get.return_value.status_code = 400

    scrapper = NCBIPubScrapper()
    publication_list = scrapper.get_publication_id_list(0, 2)

    assert publication_list == []
def test_get_publication_content_wrong_status_code(mock_get):
    mock_get.return_value.status_code = 400

    publication_id = '12345'

    scrapper = NCBIPubScrapper()
    publication_content = scrapper.get_publication_content(publication_id)

    assert publication_content is None
def test_get_publication_id_list_ok(mock_get, example_search_response):
    mock_get.return_value.json.return_value = example_search_response
    mock_get.return_value.status_code = 200

    scrapper = NCBIPubScrapper()
    publication_list = scrapper.get_publication_id_list(0, 2)

    assert '31782494' in publication_list
    assert '31782426' in publication_list
def test_get_extracted_emails_no_mails():
    publication_content = """
    test test test
    another test string
    lorem ipsum.
    """
    scrapper = NCBIPubScrapper()
    emails = scrapper.get_extracted_emails(publication_content)

    assert len(emails) == 0
def test_get_publication_content_ok(mock_get, example_publication_content):
    mock_get.return_value.content.decode.return_value = example_publication_content
    mock_get.return_value.status_code = 200

    publication_id = '12345'

    scrapper = NCBIPubScrapper()
    publication_content = scrapper.get_publication_content(publication_id)

    assert 'Baylor College of Medicine' in publication_content
def test_get_extracted_emails_multiple_mails():
    publication_content = """
    test test test
    [email protected]
    another test string
    [email protected], lorem ipsum.
    [email protected]
    """
    scrapper = NCBIPubScrapper()
    emails = scrapper.get_extracted_emails(publication_content)

    assert '*****@*****.**' in emails
    assert '*****@*****.**' in emails
    assert '*****@*****.**' in emails
    assert len(emails) == 3
def test_create_ncbi_object():
    test_email = '*****@*****.**'
    test_publication_id = '12345'

    test_ncbi_object = NCBIObject(
        email=test_email,
        publication_id=test_publication_id,
        publication_url=f'https://www.ncbi.nlm.nih.gov/pubmed/{test_publication_id}',
    )

    scrapper = NCBIPubScrapper()
    ncbi_object = scrapper.create_ncbi_object(test_email, test_publication_id)

    assert ncbi_object.email == test_ncbi_object.email
    assert ncbi_object.publication_id == test_ncbi_object.publication_id
    assert ncbi_object.publication_url == test_ncbi_object.publication_url
def ncbi_scrapper_run_view():
    form = NCBIScrapperForm()
    form.mail_package.choices = get_mail_package_choices()

    if form.validate_on_submit():
        scrapper = NCBIPubScrapper()
        ncbi_objects = scrapper.run(0, form.publication_number.data)

        objects_to_add = []

        for ncbi_object in ncbi_objects:
            ncbi_mail = NCBIMail(
                publication_id=ncbi_object.publication_id,
                ncbi_publication_url=ncbi_object.publication_url,
                email=ncbi_object.email,
                package_id=form.mail_package.data,
            )

            exist = NCBIMail.query.filter_by(email=ncbi_mail.email).first()

            if not exist:
                objects_to_add.append(ncbi_mail)
                flash(f'Successfully added {ncbi_mail.email}', 'info')
            else:
                flash(f'E-mail {ncbi_mail.email} already exist in DB', 'danger')

        if objects_to_add:
            db.session.add_all(objects_to_add)
            db.session.commit()

            flash(
                f'You have successfully scrapped {len(objects_to_add)} e-mails ',
                f'form {form.publication_number.data} NCBI publications success',
            )

        return redirect(url_for('userpanel.ncbi_scrapper_run_view'))

    return render_template('userpanel/ncbi_scrapper/scrapper.html', form=form)
def test_get_extracted_emails_ok(example_publication_content):
    scrapper = NCBIPubScrapper()
    emails = scrapper.get_extracted_emails(example_publication_content)

    assert '*****@*****.**' in emails