""" This script is for unit testing of image_extractor Use pytest to run this script Command to run: /stampify$ python -m pytest """ import pytest from data_models.image import Image from extraction.content_extractors import image_extractor from tests.test_extraction import unit_test_utils as test_utils __EXTRACTOR = image_extractor.ImageExtractor() __soup = test_utils.soup('image.html') expected_output_1 = Image( 'http://www.google.com/' 'image_with_src_and_title.jpg', 100, 100, False, None, 'This is Image with src and title!', '') expected_output_2 = Image( 'http://www.google.com/' 'image_with_src_but_without_title.gif', 0, 0, True, None, None, '') expected_output_3 = Image( 'http://www.google.com/' 'image_with_src_and_title_inside_figure.jpg', 0, 0, False, None, 'This is Image with src and title inside figure!', '') expected_output_4 = Image( 'http://www.google.com/'
""" This script is for unit testing of embedded pinterest_pin extractor Use pytest to run this script Command to run: /stampify$ python -m pytest """ import pytest from data_models.embedded_pinterest_pin import EPinterestPin from extraction.content_extractors import embedded_pinterest_pin_extractor from tests.test_extraction import unit_test_utils as test_utils __EXTRACTOR = embedded_pinterest_pin_extractor.EPinterestPinExtractor() __soup = test_utils.soup('pinterest_pin.html') expected_output_1 \ = EPinterestPin('https://www.pinterest.com/pin/99360735500167749/') acceptable_test_data = [(__soup.find('a', class_='a_tag1'), expected_output_1), ] non_acceptable_test_data = [(__soup.find('a', class_='a_tag2'), None), (__soup.find('a', class_='a_tag3'), None), (__soup.find('a', class_='a_tag4'), None), (__soup.find('img'), None), ] @pytest.mark.parametrize("input_node, expected", acceptable_test_data) def test_tag_should_return_epinterestpin_object(input_node, expected):
""" This script is for unit testing of embedded_instagram_post_extractor Use pytest to run this script Command to run: /stampify$ python -m pytest """ import pytest from data_models.embedded_instagram_post import EInstagramPost from extraction.content_extractors import embedded_instagram_post_extractor from tests.test_extraction import unit_test_utils as test_utils __EXTRACTOR = embedded_instagram_post_extractor.EInstagramPostExtractor() __soup = test_utils.soup('instagram.html') expected_output_1 = EInstagramPost("post_shortcode1", '') expected_output_2 = EInstagramPost("post_shortcode2", '') expected_output_3 = EInstagramPost("short_code1", '') acceptable_test_data = [(__soup.find('blockquote', class_='node1'), expected_output_1), (__soup.find('blockquote', class_='node2'), expected_output_2), (__soup.find('iframe', class_='iframe1'), expected_output_3), ] non_acceptable_test_data = [(__soup.find('div').get_text(), None), (__soup.find('p'), None), (__soup.find('iframe', class_='iframe2'), None),
""" This script is for unit testing of embedded_tweet_extractor Use pytest to run this script Command to run: /stampify$ python -m pytest """ import pytest from data_models.embedded_tweet import ETweet from extraction.content_extractors import embedded_tweet_extractor from tests.test_extraction import unit_test_utils as test_utils __EXTRACTOR = embedded_tweet_extractor.ETweetExtractor() __soup = test_utils.soup('embedded_tweet.html') expected_output_1 = ETweet('123456789123456789') expected_output_2 = ETweet('987654321987654321') acceptable_test_data \ = [(__soup.find('blockquote', class_='twitter-tweet'), expected_output_1), (__soup.find('blockquote', class_='twitter-tweet-rendered'), expected_output_2), ] non_acceptable_test_data = [ (__soup.find('p', class_='twitter-tweet'), None), (__soup.find('p', class_='p1'), None), ]
""" This script is for unit testing of quote extractor Use pytest to run this script Command to run: /stampify$ python -m pytest """ import pytest from data_models.quote import Quote from extraction.content_extractors import quote_extractor from tests.test_extraction import unit_test_utils as test_utils __EXTRACTOR = quote_extractor.QuoteExtractor() __soup = test_utils.soup('quote.html') expected_output_1 = Quote('This is a quote tag!', 'citation1') expected_output_2 = Quote('This is another quote tag!', None) acceptable_test_data = [(__soup.find('q', class_='q_tag1'), expected_output_1), (__soup.find('q', class_='q_tag2'), expected_output_2), ] non_acceptable_test_data = [(__soup.find('q', class_='q_tag3'), None), (__soup.find('img'), None), ] @pytest.mark.parametrize("input_node, expected", acceptable_test_data) def test_tag_should_return_quote_object(input_node, expected):
""" This script is for unit testing of video_extractor Use pytest to run this script Command to run: /stampify$ python -m pytest """ import pytest from data_models.video import Video from extraction.content_extractors import video_extractor from tests.test_extraction import unit_test_utils as test_utils __EXTRACTOR = video_extractor.VideoExtractor() __soup = test_utils.soup('video.html') expected_output_1 = Video(['http://www.google.com/video1.mp4'], 100, 100) expected_output_2 = Video( ['http://www.google.com/movie1.mp4', 'http://www.google.com/movie1.ogg'], 320, 240) expected_output_3 = Video(['http://www.google.com/movie1.mp4'], 320, 240) expected_output_4 = Video(['http://www.google.com/embed_video1.mp4'], 0, 0) acceptable_test_data = [ (__soup.find('video', class_='video_node1'), expected_output_1), (__soup.find('video', class_='video_node2'), expected_output_2), (__soup.find('video', class_='video_node3'), expected_output_3), (__soup.find('embed', class_='embed1'), expected_output_4) ]
""" This script is for unit testing of embedded youtube video Use pytest to run this script Command to run: /stampify$ python -m pytest """ import pytest from data_models.embedded_youtube_video import EYouTubeVideo from extraction.content_extractors import embedded_youtube_video_extractor from tests.test_extraction import unit_test_utils as test_utils __EXTRACTOR = embedded_youtube_video_extractor.EYouTubeVideoExtractor() __soup = test_utils.soup('youtube_video.html') expected_output_1 = EYouTubeVideo("tgbNymZ7vqY", 0, 0) acceptable_test_data = [ (__soup.find('iframe', class_='iframe1'), expected_output_1), ] non_acceptable_test_data = [ (__soup.find('iframe', class_='iframe2'), None), (__soup.find('iframe', class_='iframe3'), None), (__soup.find('p'), None), ] @pytest.mark.parametrize("input_node, expected", acceptable_test_data) def test_tag_should_return_eyoutube_video_object(input_node, expected): actual_yt_video_content = __EXTRACTOR.validate_and_extract(input_node)
""" This script is for unit testing of text_extractor Use pytest to run this script Command to run: /stampify$ python -m pytest """ import pytest from data_models.text import Text from extraction.content_extractors.text_extractor import TextExtractor from tests.test_extraction import unit_test_utils as test_utils __EXTRACTOR = TextExtractor() __soup = test_utils.soup('text.html') expected_output_1 = Text('This is paragraph tag.', 'p', is_bold=None) expected_output_2 = Text('Important Tag!', 'h1', is_bold=None) expected_output_3 = Text('This is paragraph which is having strong content.', 'p', is_bold=True) expected_output_4 = Text('This is Navigable String.', '', is_bold=None) acceptable_test_data = [(__soup.find('p', class_='p1'), expected_output_1), (__soup.find('h1'), expected_output_2), (__soup.find('p', class_='p2'), expected_output_3)] non_acceptable_test_data = [(__soup.find('p', class_='p3'), None), (__soup.find('img'), None)]