def test_get_publication_id_list_wrong_status_code(mock_get): mock_get.return_value.status_code = 400 scrapper = NCBIPubScrapper() publication_list = scrapper.get_publication_id_list(0, 2) assert publication_list == []
def test_get_publication_content_wrong_status_code(mock_get): mock_get.return_value.status_code = 400 publication_id = '12345' scrapper = NCBIPubScrapper() publication_content = scrapper.get_publication_content(publication_id) assert publication_content is None
def test_get_publication_id_list_ok(mock_get, example_search_response): mock_get.return_value.json.return_value = example_search_response mock_get.return_value.status_code = 200 scrapper = NCBIPubScrapper() publication_list = scrapper.get_publication_id_list(0, 2) assert '31782494' in publication_list assert '31782426' in publication_list
def test_get_extracted_emails_no_mails(): publication_content = """ test test test another test string lorem ipsum. """ scrapper = NCBIPubScrapper() emails = scrapper.get_extracted_emails(publication_content) assert len(emails) == 0
def test_get_publication_content_ok(mock_get, example_publication_content): mock_get.return_value.content.decode.return_value = example_publication_content mock_get.return_value.status_code = 200 publication_id = '12345' scrapper = NCBIPubScrapper() publication_content = scrapper.get_publication_content(publication_id) assert 'Baylor College of Medicine' in publication_content
def test_get_extracted_emails_multiple_mails(): publication_content = """ test test test [email protected] another test string [email protected], lorem ipsum. [email protected] """ scrapper = NCBIPubScrapper() emails = scrapper.get_extracted_emails(publication_content) assert '*****@*****.**' in emails assert '*****@*****.**' in emails assert '*****@*****.**' in emails assert len(emails) == 3
def test_create_ncbi_object(): test_email = '*****@*****.**' test_publication_id = '12345' test_ncbi_object = NCBIObject( email=test_email, publication_id=test_publication_id, publication_url=f'https://www.ncbi.nlm.nih.gov/pubmed/{test_publication_id}', ) scrapper = NCBIPubScrapper() ncbi_object = scrapper.create_ncbi_object(test_email, test_publication_id) assert ncbi_object.email == test_ncbi_object.email assert ncbi_object.publication_id == test_ncbi_object.publication_id assert ncbi_object.publication_url == test_ncbi_object.publication_url
def ncbi_scrapper_run_view(): form = NCBIScrapperForm() form.mail_package.choices = get_mail_package_choices() if form.validate_on_submit(): scrapper = NCBIPubScrapper() ncbi_objects = scrapper.run(0, form.publication_number.data) objects_to_add = [] for ncbi_object in ncbi_objects: ncbi_mail = NCBIMail( publication_id=ncbi_object.publication_id, ncbi_publication_url=ncbi_object.publication_url, email=ncbi_object.email, package_id=form.mail_package.data, ) exist = NCBIMail.query.filter_by(email=ncbi_mail.email).first() if not exist: objects_to_add.append(ncbi_mail) flash(f'Successfully added {ncbi_mail.email}', 'info') else: flash(f'E-mail {ncbi_mail.email} already exist in DB', 'danger') if objects_to_add: db.session.add_all(objects_to_add) db.session.commit() flash( f'You have successfully scrapped {len(objects_to_add)} e-mails ', f'form {form.publication_number.data} NCBI publications success', ) return redirect(url_for('userpanel.ncbi_scrapper_run_view')) return render_template('userpanel/ncbi_scrapper/scrapper.html', form=form)
def test_get_extracted_emails_ok(example_publication_content): scrapper = NCBIPubScrapper() emails = scrapper.get_extracted_emails(example_publication_content) assert '*****@*****.**' in emails