def test_util_date_timewithtoutz(self): t = time.time() def ablocaltime(t): from time import gmtime res = gmtime(t) return res util.localtime = ablocaltime res = util.date_time(t, local_time_zone=True) self.assert_(res is not t)
def crawl(zipCodeSearchString, tr, adsGraph): payload = {'keywords':'iPhone','locationStr':zipCodeSearchString,'categoryId':'173','adType':'OFFER'} response = tr.get('https://www.ebay-kleinanzeigen.de/s-suchanfrage.html', params=payload) todayTimeStamp = time.mktime(datetime.now().replace(hour=12, minute=0, second=0, microsecond=0).timetuple()) adTimeLiteral = Literal(date_time(todayTimeStamp)) soup = BeautifulSoup(response.text, 'html.parser') for ad in soup.find_all("article", class_="aditem"): adId = ad.attrs['data-adid'] title = ad.contents[3].contents[1].contents[0].contents[0] try: zipCode = ad.contents[5].contents[4].replace(" ", "").strip() except IndexError: continue try: adTimeString = ad.contents[7].contents[0].replace(" ", "").strip() except IndexError: continue #priceRaw = re.sub("\D", "", ad.contents[5].contents[1].contents[0]) try: price = int(re.sub("\D", "", ad.contents[5].contents[1].contents[0])) except ValueError: # filtering "VB Preise" continue if price > 50 and 'Gestern' in adTimeString and "reparatur " not in title.lower() and "defekt" not in title.lower(): # finding out which iphone is in the ad: # and iphoneModelString.lower() in title.lower() for tuple in modelList: if tuple[0].lower() in title.lower(): foundiPhoneModelResource = tuple[1] break else: # no model could be detected, i.e. old model break adRessource = URIRef("ad:" + adId) zipCodeURI = URIRef("zipCode:"+zipCode) priceLiteral = Literal(price) adsGraph.add((adRessource, containsModel, foundiPhoneModelResource)) adsGraph.add((adRessource, isInZipCode, zipCodeURI)) adsGraph.add((adRessource, hasPrice, priceLiteral)) adsGraph.add((adRessource, postedOn, adTimeLiteral)) return adsGraph
def test_util_date_time_tistimewithtz(self): t = time.time() res = util.date_time(t, local_time_zone=True) self.assertTrue(res[4:5] == "-")
def test_util_date_time_tisnonebuttz(self): t = None res = util.date_time(t, local_time_zone=True) self.assertTrue(res[4:5] == "-")
def finalize(self, result): # TODO: add plugin options for specifying where to send # output. self.graph.serialize("file:results-%s.rdf" % date_time(), format="pretty-xml")
def test_util_date_time_tistimewithtz(self): t = time.time() res = util.date_time(t, local_time_zone=True) self.assert_(res[4:5] == "-")
def test_util_date_time_tisnonebuttz(self): t = None res = util.date_time(t, local_time_zone=True) self.assert_(res[4:5] == "-")
def test_util_date_time_tisnoneandnotz(self): t = None res = util.date_time(t, local_time_zone=False) self.assert_(res[4:5] == "-")
def test_util_date_time_tistime(self): t = time.time() res = util.date_time(t, local_time_zone=False) self.assert_(res[4:5] == "-")
def test_util_date_time_tistime(self): t = time.time() res = util.date_time(t, local_time_zone=False) self.assertTrue(res[4:5] == "-")