Example #1
0
def test_extract_sample_posts_info():
    with patch.multiple('site_crawler.crawler',
                        extract_posts_link=DEFAULT,
                        PostPageParser=DEFAULT,
                        get_soup_from_url=DEFAULT,
                        extract_sample_category_from_site=DEFAULT) as values:
        extract_posts_link_mock = values['extract_posts_link']
        get_soup_from_url_mock = values['get_soup_from_url']
        PostPageParser_mock = values['PostPageParser']

        soup1, soup2 = mock_BeautifulSoup(), mock_BeautifulSoup()
        parser1, parser2 = mock_post_parser(), mock_post_parser()

        extract_posts_link_mock.return_value = ['post1', 'post2']
        get_soup_from_url_mock.side_effect = lambda x: dict(post1=soup1,
                                                            post2=soup2)[x]
        PostPageParser_mock.parser_factory.side_effect = lambda x: parser1 if soup1 == x else parser2
        parser1.jsonify.return_value = 'json1'
        parser2.jsonify.return_value = 'json2'

        result = list(extract_sample_posts_info(3))
        extract_posts_link_mock.assert_called_once_with(3)
        get_soup_from_url_mock.assert_any_call('post1')
        get_soup_from_url_mock.assert_any_call('post2')

        PostPageParser_mock.parser_factory.assert_any_call(soup1)
        PostPageParser_mock.parser_factory.assert_any_call(soup2)

        assert get_soup_from_url_mock.call_count == 2
        assert PostPageParser_mock.parser_factory.call_count == 2
        assert result == ['json1', 'json2']
def test_extract_sample_posts_info():
    with patch.multiple('site_crawler.crawler',
                        extract_posts_link=DEFAULT,
                        PostPageParser=DEFAULT,
                        get_soup_from_url=DEFAULT,
                        extract_sample_category_from_site=DEFAULT) as values:
        extract_posts_link_mock = values['extract_posts_link']
        get_soup_from_url_mock = values['get_soup_from_url']
        PostPageParser_mock = values['PostPageParser']

        soup1, soup2 = mock_BeautifulSoup(), mock_BeautifulSoup()
        parser1, parser2 = mock_post_parser(), mock_post_parser()

        extract_posts_link_mock.return_value = ['post1', 'post2']
        get_soup_from_url_mock.side_effect = lambda x: dict(post1=soup1, post2=soup2)[x]
        PostPageParser_mock.parser_factory.side_effect = lambda x: parser1 if soup1 == x else parser2
        parser1.jsonify.return_value = 'json1'
        parser2.jsonify.return_value = 'json2'

        result = list(extract_sample_posts_info(3))
        extract_posts_link_mock.assert_called_once_with(3)
        get_soup_from_url_mock.assert_any_call('post1')
        get_soup_from_url_mock.assert_any_call('post2')

        PostPageParser_mock.parser_factory.assert_any_call(soup1)
        PostPageParser_mock.parser_factory.assert_any_call(soup2)

        assert get_soup_from_url_mock.call_count == 2
        assert PostPageParser_mock.parser_factory.call_count == 2
        assert result == ['json1', 'json2']
Example #3
0
import json
from db.post import add_update_product, find_posts
from site_crawler.crawler import extract_sample_posts_info

for post_json in extract_sample_posts_info(20):
    add_update_product(json.loads(post_json))
    print post_json.decode('unicode-escape')

print find_posts("download")