Beispiel #1
0
    def parseCar(self, response):
        DESC_XPATH = './/div[contains(@id, \"msg_div_msg\")]/text()'
        MAKE_XPATH = './/td[contains(@id, \"tdo_31\")]/b/text()|.//td[contains(@id, \"tdo_24\")]/b/text()'
        YEAR_XPATH = './/td[contains(@id, \"tdo_18\")]/text()'
        ENGINE_XPATH = './/td[contains(@id, \"tdo_15\")]/text()'
        GEARBOX_XPATH = './/td[contains(@id, \"tdo_35\")]/text()'
        MILEAGE_XPATH = './/td[contains(@id, \"tdo_16\")]/text()'
        TA_XPATH = './/td[contains(@id, \"tdo_223\")]/text()'
        PRICE_XPATH = './/td[contains(@id, \"tdo_8\")]/text()|.//span[contains(@id, \"tdo_8\")]/text()'
        BODY_XPATH = './/td[contains(@id, \"tdo_32\")]/text()'
        LOCATION_XPATH = './/td[@class=\"ads_contacts\"]/text()'

        # Description
        arr = response.xpath(DESC_XPATH).extract()  # Array of lines of text

        eol = map(lambda s: str(s).replace('\r\n', ' '), arr)  # EOL
        lf = map(lambda s: str(s).replace('\n', ' '), eol)  # LF
        cr = list(map(lambda s: str(s).replace('\r', ' '), lf))  # CR

        desc = ''.join(cr)

        # Make
        make = response.xpath(MAKE_XPATH).extract_first()
        # Year
        year = Sanitizer.sanitizeDate(
            response.xpath(YEAR_XPATH).extract_first())

        # Engine & engine type
        engine = None
        engineType = None
        engine_str = response.xpath(ENGINE_XPATH).extract_first()
        if engine_str != None:
            split = engine_str.split()
            engine = split[0]
            if len(split) > 1:
                engineType = split[1]

        # Gearbox
        gearbox = response.xpath(GEARBOX_XPATH).extract_first()
        # Mileage
        mileage = Sanitizer.sanitizeMileage(
            response.xpath(MILEAGE_XPATH).extract_first())
        # TA
        ta = Sanitizer.sanitizeInspection(
            response.xpath(TA_XPATH).extract_first())
        # Price
        price = Sanitizer.sanitizePrice(
            response.xpath(PRICE_XPATH).extract_first())
        # Body
        body = response.xpath(BODY_XPATH).extract_first()
        # Location
        contacts = response.xpath(LOCATION_XPATH).extract()
        location = next(contact for contact in contacts
                        if contact != None and contact != ' ')

        if Sanitizer.isCarValid(make, price, ta):
            file = open(self.fileName, 'a', newline='', encoding='utf8')
            writer = csv.writer(file)
            writer.writerow([
                make, desc, year, engine, engineType, gearbox, mileage, body,
                ta, price, location, response.request.url
            ])
            file.close()
Beispiel #2
0
 def testDefaultValueMiles(self):
     self.assertEqual(Sanitizer.sanitizeMileage('111 222'), 111222)
Beispiel #3
0
 def testAlreadyGoodValueMiles(self):
     self.assertEqual(Sanitizer.sanitizeMileage('111222'), 111222)
Beispiel #4
0
 def testReverseCaseInspection(self):
     self.assertEqual(
         Sanitizer.sanitizeInspection('2019.10'),
         datetime.datetime(year=2019, month=10,
                           day=1).strftime(time_format))
Beispiel #5
0
 def testFaultyCaseInspection(self):
     self.assertEqual(Sanitizer.sanitizeInspection('None'), None)
Beispiel #6
0
 def testAlreadyGoodValueDate(self):
     self.assertEqual(Sanitizer.sanitizeDate('2008'), '2008')
Beispiel #7
0
 def testDefaultCaseInspection(self):
     self.assertEqual(
         Sanitizer.sanitizeInspection('10.2019'),
         datetime.datetime(year=2019, month=10,
                           day=1).strftime(time_format))
Beispiel #8
0
 def testEmptyDate(self):
     self.assertEqual(Sanitizer.sanitizeDate(None), None)
Beispiel #9
0
 def testOtherValueDate(self):
     self.assertEqual(Sanitizer.sanitizeDate('abc'), None)
Beispiel #10
0
 def testDefaultCaseDate(self):
     self.assertTrue(Sanitizer.sanitizeDate('2008 aprīlis'), '2008')
Beispiel #11
0
 def testHugeValuePrice(self):
     self.assertEqual(Sanitizer.sanitizePrice('25 000 $'), '25000')
Beispiel #12
0
 def testAlreadyGoodValuePrice(self):
     self.assertEqual(Sanitizer.sanitizePrice('500'), '500')
Beispiel #13
0
 def testEmptyPrice(self):
     self.assertEqual(Sanitizer.sanitizePrice(None), None)
Beispiel #14
0
 def testDefaultCasePrice(self):
     self.assertEqual(Sanitizer.sanitizePrice('5000 $'), '5000')