def parse(self, response): # follow links to offer pages offers = response.xpath("//a[@class='href-link tile-title-text']") for offer in offers: yield response.follow(url=offer.attrib['href'], callback=self.parse_offer) # follow pagination link tmp_1 = response.xpath( "//a[@class='arrows icon-angle-right-gray icon-right-arrow']") tmp_2 = response.xpath( "//a[@class='arrows icon-right-arrow icon-angle-right-gray']") next_page = tmp_1 if 'href' in tmp_1.attrib else tmp_2 offer_date = response.xpath("//div[@class='creation-date']") parsed_date = extract_date(offer_date[len(offer_date) - 1].get()) gumtree_req = GumtreeRequirements(req) cal = Calendar() date = cal.parse(parsed_date) required_date = cal.parse(' '.join( [str(gumtree_req.number_of_days), 'days ago'])) if date < required_date: return yield response.follow(next_page.attrib['href'], self.parse)
def test_extract_date_from_polish_from_2_days_ago(): test_input = '<div class="creation-date"><span>2 dni temu</span></div>' assert extract_date(test_input) == '2 days ago'
def test_extract_date_from_polish_from_1_day_ago(): test_input = '<div class="creation-date"><span>1 dzień temu</span></div>' assert extract_date(test_input) == '1 days ago'
def test_extract_date_from_polish_since_12_hours(): test_input = '<div class="creation-date"><span>12 godziny temu</span></div>' assert extract_date(test_input) == '12 hours ago'
def test_extract_date_from_polish_since_one_hour(): test_input = '<div class="creation-date"><span>godzinę temu</span></div>' assert extract_date(test_input) == '1 hours ago'
def test_extract_date_from_polish_from_3_minutes_ago(): test_input = '<div class="creation-date"><span>3 minuty temu</span></div>' assert extract_date(test_input) == '3 minutes ago'