def test_known_single_listing( listing_name, url, exception_name, match, make_response, headers, mock_details_page, in_memory_db, ): """ Parsing known listings. """ mock_response = make_response(listing_name, url) exception = { "parsing_error": exceptions.ListingParsingError, "captcha": exceptions.Captcha, None: None, }[exception_name] pytest_context_manager = (pytest.raises(exception, match=match) if exception_name is not None else nullcontext()) app = create_app() with HTTMock(mock_response), HTTMock(mock_details_page): with pytest_context_manager: with app.app_context(): _seloger(url, headers=headers)
def test_captcha(mock_captcha, mock_proxies, in_memory_db): app = create_app("cli") with HTTMock(mock_captcha): with app.app_context(): match = r"Failed to reach .*" with pytest.raises(RuntimeError, match=match): leboncoin("rent", "92130")
def test_invalid_inputs( inputs, exception, match, make_search_and_response, mock_proxies, in_memory_db ): search_and_response = make_search_and_response("success") mock_response = search_and_response["response"] search = {"transaction": "rent", "post_codes": ["92130"]} search.update(inputs) app = create_app("cli") with HTTMock(mock_response): with app.app_context(): with pytest.raises(exception, match=match): leboncoin(**search)
def test_request_error(exception_name, make_error_response, mock_proxies, in_memory_db): exception = { "proxy": requests.exceptions.ProxyError, "timeout": requests.exceptions.Timeout, }[exception_name] mock_response = make_error_response(exception) app = create_app("cli") with HTTMock(mock_response): with app.app_context(): match = r"Failed to reach .*" with pytest.raises(RuntimeError, match=match): leboncoin("rent", "92130")
def test_known_query( name, overrides, make_search_and_response, mock_image, mock_proxies, in_memory_db, images_folder, caplog, ): search_and_response = make_search_and_response(name) search = search_and_response["search"] search.update(overrides) mock_response = search_and_response["response"] app = create_app("cli") with HTTMock(mock_response), HTTMock(mock_image), app.app_context(): # some of the fixtures' listing are deliebarately malformed # the code is robust to them but they are logged for debugging purposes # we don't want to print them out, during the test suite. with caplog.at_level(logging.CRITICAL): leboncoin(**search)
def run(event, context): """ Run a given scrape and store the results in the database. """ if "search" in event: search = event.get("search", None) elif "Records" in event: records = event.get("Records", []) if len(records) > 1: msg = f"Unexpectedly got multiple SNS events." raise ValueError(msg) sns_message = json.loads(records[0].get("Sns", {}).get("Message", "")) if not sns_message: msg = "No SNS message found." raise ValueError(msg) search = sns_message.get("search", None) else: msg = f"Unexpected event:\n{event}" raise RuntimeError(msg) if search is None: msg = ( "'event' must include a 'search' object or be a " "SNS event with a search object in the message" ) raise ValueError(msg) if not {"transaction", "post_codes", "sources"}.issubset(search): msg = ( "The 'search' object must include at least " "'transaction', 'post_codes' and 'sources' objects." ) raise ValueError(msg) sources = search.pop("sources") sources_postcodes = list(it.product(sources, search["post_codes"])) if len(sources_postcodes) > 1: for source, post_code in sources_postcodes: search["sources"] = [source] search["post_codes"] = [post_code] data = {"search": search} lambda_ = boto3.client("lambda") lambda_.invoke( FunctionName=context.function_name, InvocationType="Event", Payload=json.dumps(data), ) return app = create_app() added_listings: List[Listing] = [] seen_listings: List[Listing] = [] failed_listings: List[str] = [] for source in sources: scraper = getattr(scrapers, source) with app.app_context(): results = scraper(**search) added_listings += [listing.to_dict() for listing in results["added"]] seen_listings += results["seen"] failed_listings += results["failed"] num_added = len(added_listings) num_seen = len(seen_listings) num_failed = len(failed_listings) num_total = num_added + num_seen + num_failed msg = ( f"All done!✨ 🍰 ✨\n" f"Of the {num_total} listings visited, we added {num_added}, " f"had already seen {num_seen} and choked on {num_failed}." ) if failed_listings: msg += "\nFailed Listings:\n\n • {}".format("\n • ".join(failed_listings)) logger.info(msg) # publish result info to admins topic sns = boto3.client("sns") admins_topic_arn = os.getenv("ADMINS_TOPIC_ARN") try: pub = sns.publish(TopicArn=admins_topic_arn, Message=msg) except ClientError as e: msg = f"Could not publish to admins topic:\n{e}" logging.warn(msg) # publish new listings to relevant topic notify = event.get("notify", {}) if not notify: msg = "Nobody to notify." logger.debug(msg) return new_listings_topic_arn = os.getenv("NEW_LISTINGS_TOPIC_ARN") if new_listings_topic_arn is not None: msg = f"No topic to send notifications." logger.warn(msg) return message = json.dumps(added_listings) message_attributes = { k: {"DataType": "String.Array", "StringValue": json.dumps(notify[k])} for k in notify } try: pub = sns.publish( TopicArn=new_listings_topic_arn, Message=message, MessageAttributes=message_attributes, ) logger.debug(f"Response : {str(pub)}") except ClientError as e: msg = f"Could not publish to new listing topic:\n{e}" logging.warn(msg)
from alembic import context from sqlalchemy import engine_from_config, pool from pogam import create_app, db # this is the Alembic Config object, which provides # access to the values within the .ini file in use. config = context.config # Interpret the config file for Python logging. # This line sets up loggers basically. fileConfig(config.config_file_name) # add your model's MetaData object here for 'autogenerate' support app = create_app() config.set_main_option("sqlalchemy.url", app.config.get("SQLALCHEMY_DATABASE_URI")) target_metadata = db.metadata # other values from the config, defined by the needs of env.py, # can be acquired: # my_important_option = config.get_main_option("my_important_option") # ... etc. def run_migrations_offline(): """Run migrations in 'offline' mode. This configures the context with just a URL and not an Engine, though an Engine is acceptable