def url(self): """ The URL representing the search request. """ url = 'http://www.gaspedaal.nl/{0}/{1}/?srt=df&p={2}'.format( q(self.brand), q(self.model), self.page) if len(self.parameters) > 0: pairs = ['{0}={1}'.format(q(k), q(str(v))) for k, v in self.parameters.items()] url += '&' + '&'.join(pairs) return url
def _test(self, arg): """tests a command on a different bunny1 host. usage: _test [fully-qualified-bunny1-url] [command]""" (bunny1_url, arg) = arg.split(None, 1) if not bunny1_url.endswith("?"): bunny1_url += "?" save("bunny1testurl", bunny1_url) raise HTTPRedirect(bunny1_url + q(arg))
def cached_requests(url): ### CACHED FILE RELATIVE PATH ### cached_filename = os.path.join(CACHE_DIR, q(url)) contents = "" ### IF THE RESULT IS PRESENT IN CACHED DIRECTORY ### if os.path.exists(cached_filename): with open(cached_filename, "r") as read_file: contents = read_file.read() else: print(f"Requesting URL: {url}") if ENABLE_REQUESTS is False: raise RuntimeError("Declining request as ENABLE_REQUESTS is False") # Rate limit our requests if RATE_LIMIT_DURATION: print( f"Pausing for {RATE_LIMIT_DURATION} seconds as rate limiting") time.sleep(RATE_LIMIT_DURATION) # Make our request and verify it was a 200 req = requests.get(url) req.raise_for_status() ### SAVED THE RESULT IN THE CACHE ### contents = req.text if not os.path.exists(CACHE_DIR): os.mkdir(CACHE_DIR) with open(cached_filename, "w") as write_file: write_file.write(contents) return contents
def Search(self): cards = [] listingsURL = [] pagenum = '1' try: site = requests.get('https://www.olx.bg/ads/q-' + q(self.keyword)) pagenum = bs(site.content, "lxml").find("a", {"data-cy": "page-link-last"}).find('span').find(text=True) except: pass counter = [] count = 0 while count < int(pagenum): count = count + 1 counter.append(count) for number in counter: search = requests.get('https://www.olx.bg/ads/q-'+q(self.keyword)+'/?page='+str(number)) home = bs(search.content, "lxml") listings = home.find_all("a", {"data-cy": "listing-ad-title"}) for listing in listings: if listing['href'][-8:] != 'promoted': listingsURL.append(listing['href']) print(len(listingsURL)) for listing in listingsURL: url=requests.get(listing) home = bs(url.content, "lxml") title=price=address=data=name=link='' try: title = home.find('h1').find(text=True).strip() address = home.find('address').find("p").find(text=True).strip() data = home.find('div',{"id":"textContent"}).find(text=True).strip() name = home.find('div',{"class", "offer-user__actions"}).find('h4').find('a').find(text=True).strip() link = listing price = home.find('strong', {"class": "pricelabel__value"}).find(text=True).strip() except: pass card = { "Title":title, "Price":price, "Address":address, "Description":data, "Name" :name, "Link" :link } cards.append(card) return cards
def model_filename(clf_class, tr_set, *features, **clf_params): '''converts the given argument to a model filename, with extension .sklmodel''' pars = odict() pars['clf'] = [str(clf_class.__name__)] pars['tr_set'] = [ str(_) for _ in (tr_set if isinstance(tr_set, (list, tuple)) else [tr_set]) ] pars['feats'] = [str(_) for _ in sorted(features)] for key in sorted(clf_params): val = clf_params[key] pars[q(key, safe=_safe)] = [ str(_) for _ in (val if isinstance(val, (list, tuple)) else [val]) ] return '&'.join("%s=%s" % (k, ','.join(q(_, safe=_safe) for _ in v)) for k, v in pars.items()) + '.sklmodel'
def fallback(self, raw): raise HTTPRedirect(self.fallback_url + q(raw))
def _t(self, arg): """tests a command on the most recently used bunny1 host. usage: _t [command]""" bunny1_url = load("bunny1testurl") raise HTTPRedirect(bunny1_url + q(arg))
def g(self, arg): """does a google search. we could fallback to yubnub, but why do an unnecessary roundtrip for something as common as a google search?""" return GOOGLE_SEARCH_URL + q(arg)