def request(self): """Find random image from options.path and create new Item() object with retrieved wallpaper. """ if type(self.options["path"]) in [str, unicode]: self.options["path"] = [self.options["path"]] imglist = [] for source in self.options["path"]: if not os.path.isdir(source): services.log("Scraper", "Not a directory `%s`, skipping source." % source) continue for filename in os.listdir(source): ext = os.path.splitext(filename)[1] if ext.lower() in [".jpeg", ".jpg", ".png"]: imglist.append(os.path.join(source, filename)) if not len(imglist): return services.log("Scraper", "Empty sources list.") result = random.choice(imglist) result = os.path.abspath(result) result = "file://" + result return result
def _url(self): """Get url from options. """ result = "https://source.unsplash.com/random" if self.options["category"] and self.options["user"]: services.log( "Scraper", "Warning: category and user option can not be used together (ignoring category)." ) if self.options["likes"] and not self.options["user"]: services.log( "Scraper", "Warning: likes option must be used with user option (ignoring likes option)." ) if self.options["category"]: result = "https://source.unsplash.com/category/%s" % self.options[ "category"] if self.options["user"]: result = result = "https://source.unsplash.com/user/%s" % self.options[ "user"] if self.options["user"] and self.options["likes"]: result = result = "https://source.unsplash.com/user/%s/likes" % self.options[ "user"] if self.options["size"]: result += "/" + self.options["size"] return result
def _request_api(self, method, params={}): """thegamesdb.net API request. Source: https://github.com/fffilo/thegamesdbnet/ URL: http://wiki.thegamesdb.net/index.php/API_Introduction """ url = self.link + "/api/" + method for key, value in params.items(): if value == None: del params[key] request = requests.get(url, params, timeout=30) response = request.text request.close() if not request.status_code == 200: return services.log("Scraper", "Request to `%s` returns status code %d.", url, request.status_code) result = xmltodict.parse(response, xml_attribs=True) if "Error" in result and result["Error"]: return services.log("Scraper", result["Error"]) #return json.loads(json.dumps(result)) return result
def loop(): try: # start scraper (if properly initialized) if not scraper is None: scraper.start(config.get("refresh"), config.get("delay")) if scraper is None: return services.log("Service", "Closing app loop...") # set app loop import time while True: time.sleep(0.2) except KeyboardInterrupt: services.log(None, "Break signal received, closing...") except Exception, e: raise e
def init_scraper(): global scraper scraper = None if config.get("scraper") is None: return services.log( "Service", "Warning: scraper set to null, scraper not started.") # import scraper from config try: mod = getattr(__import__("scrapers.%s" % config.get("scraper")), config.get("scraper")) except ImportError, e: return services.log( "Service", "Error: %s. Try to install module by executing shell command: `pip install {module_name}`", str(e))
def request(self): """Send request to self._url() and parse wallpaper from HTML response. Execute request with stream option, so we can dowload html by chunks (no need for full page download). Read chunks until figure tag closure is found. """ url = self._url() req = requests.get(url, timeout=10, allow_redirects=False, headers={"User-Agent": "Magic Browser"}, stream=True) if not req.status_code in [200]: req.close() return services.log("Scraper", "Request to `%s` returns status code %d.", url, req.status_code) buff = "" for chunk in req.iter_content(): buff += chunk if "</figure>" in buff: break req.close() pattern = re.compile("<figure(.*?)>(.*?)<\/figure>") match = re.search(pattern, buff) if not match: return services.log("Scraper", "Unable to parse response data (figure tag).") try: doc = lxml.html.fromstring(match.group(0)) guid = doc.xpath("//figure")[0].get("data-wallpaper-id") #thumb = doc.xpath("//img")[0].get("data-src") #link = doc.xpath("//a[@class=\"preview\"]")[0].get("href") except Exception, e: return services.log("Scraper", "Unable to parse response data (media data).")
def request(self): """Send request to self._url() and return new Item() object with retrieved wallpaper. """ url = self._url() req = requests.get(url, timeout=10, allow_redirects=False, headers={"User-Agent": "Magic Browser"}) req.close() if not req.status_code in [200, 302]: return services.log("Scraper", "Request to `%s` returns status code %d.", url, req.status_code) if "Location" in req.headers: return req.headers["Location"] return services.log("Scraper", "Location header not found in `%s` request.", url)
config = None scraper = None def init_config(): global config config = None # init config object try: config = services.Config() except Exception, e: raise Exception("Can not init config (%s)" % str(e)) services.log("Service", "Config initialized.") def init_scraper(): global scraper scraper = None if config.get("scraper") is None: return services.log( "Service", "Warning: scraper set to null, scraper not started.") # import scraper from config try: mod = getattr(__import__("scrapers.%s" % config.get("scraper")), config.get("scraper")) except ImportError, e: