def test_subscriber_review_ads_drops_old_ads_for_that_city(): mySubscriber = Subscriber(4711) ad1 = Ad.from_dict(nettenMenschenDict) ad2 = Ad.from_dict(mitbewohnerinFuer21qm) mySubscriber.review_ads({ad1}, 'ber') == set() assert mySubscriber.already_had(ad1) assert mySubscriber.already_had(ad2) == False
def test_subscriber_review_ads_returns_new_ads(): mySubscriber = Subscriber(4711) ad1 = Ad.from_dict(nettenMenschenDict) ad2 = Ad.from_dict(mitbewohnerinFuer21qm) mySubscriber.review_ads({}, 'ber') assert mySubscriber.review_ads({ad1}, 'ber') == {ad1} assert mySubscriber.review_ads({ad1, ad2}, 'ber') == {ad2} assert mySubscriber.review_ads({ad1, ad2}, 'ber') == set() assert mySubscriber.review_ads({ad1, ad2}, 'ber') == set()
def test_subscriber_review_ads_drops_old_ads(): mySubscriber = Subscriber(4711) ad1 = Ad.from_dict(nettenMenschenDict) ad2 = Ad.from_dict(mitbewohnerinFuer21qm) mySubscriber.review_ads({ad1, ad2}, 'ber') # empty, because ad2 was in the list before assert mySubscriber.review_ads({ad2}, 'ber') == set() # now, this behaviour happens if an ad was updated after a day # it was not scraped the last time, but now it should be resent assert mySubscriber.review_ads({ad1, ad2}, 'ber') == {ad1}
def test_subscriber_review_ads_drops_old_ads_for_that_city(): mySubscriber = Subscriber(4711) ad1 = Ad.from_dict(nettenMenschenDict) ad2 = Ad.from_dict(mitbewohnerinFuer21qm) assert mySubscriber.review_ads({ad1}, 'ber') == set() assert mySubscriber.review_ads({ad2}, 'muc') == set() assert mySubscriber.review_ads({ad1}, 'ber') == set() assert mySubscriber.review_ads({}, 'ber') == set() assert mySubscriber.review_ads({ad1}, 'ber') == {ad1} # these two sets are completely independent of each other assert mySubscriber.review_ads({ad2}, 'muc') == set()
def get_ads_from_listings(listings: List[BeautifulSoup], city: str, first_run=False) -> set: new_ads = set() for listing in listings: links = listing.find_all('a', class_='detailansicht') link_to_offer = 'https://www.wg-gesucht.de/{}'.format( links[0].get_attribute_list('href')[0]) # logging.info('new offer: {}'.format(link_to_offer)) price_wrapper = listing.find(class_="detail-size-price-wrapper") link_named_price = price_wrapper.find(class_="detailansicht") # print(list(link_named_price.children)) size, rent = next(link_named_price.children).replace(' ', '').replace( '\n', '').replace('€', '').split('|') mates = link_named_price.find('span').get_attribute_list('title')[0] # print(mates) searching_for = link_named_price.find_all( 'img')[-1].get_attribute_list('alt')[0].replace( 'Mitbewohnerin', '🚺').replace('Mitbwohner', '🚹').replace('Mitbewohner', '🚹') headline = listing.find(class_='headline-list-view') # mates = headline.find('span').get_attribute_list('title')[0] # emojis read faster -- also note the typo from the page missing the first e in mitbewohner # searching_for = headline.find_all('img')[-1].get_attribute_list('alt')[0].replace('Mitbewohnerin', '🚺') # searching_for = searching_for.replace('Mitbwohner', '🚹').replace('Mitbewohner', '🚹') title = headline.find('a').text.replace('\n', '').strip() location_and_availability = listing.find('p') location_and_availability_split = location_and_availability.text[ location_and_availability.text.index('in'):].replace('\n', '').split() index_avail = location_and_availability_split.index('Verfügbar:') location = ' '.join(location_and_availability_split[:index_avail]) availability = ' '.join(location_and_availability_split[index_avail:]) wg_details = '{} {}'.format(mates, location) info = { 'city': city, 'url': link_to_offer, 'title': title, 'size': size, 'rent': rent, 'availability': availability, 'wg_details': wg_details, 'searching_for': searching_for, } ad = Ad.from_dict(info) new_ads.add(ad) return new_ads
def test_filter_available_3months_is_ok_forever(): ad = Ad.from_dict(mitbewohnerinFuer21qm) mySubscriber = Subscriber(4711) mySubscriber.add_filter(FilterAvailability, datetime.timedelta(weeks=12)) assert mySubscriber.is_interested_in(ad)
def test_parsing_of_availability(): ad = Ad.from_dict(mitbewohnerinFuer21qm) assert ad.available_from() == datetime.datetime(day=25, month=2, year=2018) assert ad.available_to() == None
def test_parsing_of_availability(): ad = Ad.from_dict(nettenMenschenDict) assert ad.available_from() == datetime.datetime(day=1, month=3, year=2018) assert ad.available_to() == datetime.datetime(day=31, month=3, year=2018)
def test_ad_to_chat_message(): ad = Ad.from_dict(nettenMenschenDict) assert ad.to_chat_message() == nettenMenschenString
def test_filter_gender_ok(): mySubscriber = Subscriber(4711) mySubscriber.add_filter(FilterGender, 'w') ad = Ad.from_dict(mitbewohnerinFuer21qm) assert mySubscriber.is_interested_in(ad)
def test_filter_rent_ok(): mySubscriber = Subscriber(4711) mySubscriber.add_filter(FilterRent, 600) ad = Ad.from_dict(mitbewohnerinFuer21qm) assert mySubscriber.is_interested_in(ad)
def test_filter_rent_too_expensive(): mySubscriber = Subscriber(4711) mySubscriber.add_filter(FilterRent, 500) ad = Ad.from_dict(mitbewohnerinFuer21qm) assert mySubscriber.is_interested_in(ad) == False
def test_filter_city_not_ok(): mySubscriber = Subscriber(4711) mySubscriber.add_filter(FilterCity, ['muc', 'stuggi']) ad = Ad.from_dict(mitbewohnerinFuer21qm) assert mySubscriber.is_interested_in(ad) == False
def test_subscriber_review_ads_returns_empty_set_on_first_run(): mySubscriber = Subscriber(4711) ad1 = Ad.from_dict(nettenMenschenDict) ad2 = Ad.from_dict(mitbewohnerinFuer21qm) # this behaviour will prevent current ads to be sent to the user assert mySubscriber.review_ads({ad1, ad2}, 'ber') == set()
def test_filter_available_2months_ok(): ad = Ad.from_dict(nettenMenschenDict) mySubscriber = Subscriber(4711) mySubscriber.add_filter(FilterAvailability, datetime.timedelta(weeks=4)) assert mySubscriber.is_interested_in(ad)