def find_info(self, url): # get video id match = re.match(URL_REGEX_PREFIX + 'vimeo\.com/(\S*)', url) if match is None: return self.video_id = match.group(1) logger.info("Found vimeo video: " + self.video_id) json_url = "http://vimeo.com/api/v2/video/{0}.json".format(self.video_id) json_content = download_page(json_url).decode("utf-8") json_data = json.loads(json_content) json_data = json_data[0] raw_title = json_data["title"] title = self.msg_formats.bold(self.msg_formats.red(raw_title)) secs = int(json_data["duration"]) duration = self.msg_formats.green(str(int(secs / 60)) + "m " + str(secs % 60) + "s") message = "{0} :: {1}".format(title, duration) return (message, raw_title)
def search_site(self, url, resource_dict): """Downloads the URL's content, searches for the paths and patterns and builds a message out of the matched data. Arguments: resource_dict contains the paths, patterns and additional data for the url. """ if self.sitedata is None: return # retrieve content try: content = download_page(url).decode(WEB_ENCODING, "replace") except: return if content is None: return message = None title = None def info_xpath(): # try to find info using xpath root = lxml.html.fromstring(content) items = root.xpath(info["xpath"]) logger.debug("using xpath: " + info["xpath"]) if items is not None and len(items) >= 1: return items[0] else: return None def info_regex(): # try to find info using a regex pattern logger.debug("using regex: " + info["pattern"]) match = re.search(info["pattern"], content) if match is None: logger.warning( "Could not find info! (match == None) with pattern: " + info["pattern"]) return None if match.groups() is None: logger.warning("match.groups() was None") return None if len(match.groups()) <= 0: logger.warning("Found match but no groups") return None return match.group(1) for info in resource_dict["patterns"]: if not "pattern" in info and not "xpath" in info: logger.error( "siteinfo entry does not contain a path or pattern!") break infodata = None # try regex first because it seems to be faster if "pattern" in info: infodata = info_regex() # try xpath if there was no pattern or regex was unsuccessful if infodata is None and "xpath" in info: infodata = info_xpath() if infodata is None: logger.warning("infodata was None!") break logger.debug("\ninfodata:\n") logger.debug(infodata) if infodata is None or infodata == "": continue logger.info("found info data: " + infodata) infodata = unescape(infodata) infodata = escape(infodata) infodata = infodata.strip() if title is None: title = infodata color = REST_COLOR style = REST_STYLE if message is None: message = "" color = FIRST_COLOR style = FIRST_STYLE message += self.msg_formats.get( style, self.msg_formats.get(color, infodata)) if info != resource_dict["patterns"][-1]: message += " " + self.sitedata["separator"] + " " # cut last separator if there is one sep = self.sitedata["separator"] if message is not None and message.strip()[-len(sep):] == sep: message = message.strip()[:-len(sep)].strip() return message, title
def search_site(self, url, resource_dict): """Downloads the URL's content, searches for the paths and patterns and builds a message out of the matched data. Arguments: resource_dict contains the paths, patterns and additional data for the url. """ if self.sitedata is None: return # retrieve content try: content = download_page(url).decode(WEB_ENCODING, "replace") except: return if content is None: return message = None title = None def info_xpath(): # try to find info using xpath root = lxml.html.fromstring(content) items = root.xpath(info["xpath"]) logger.debug("using xpath: " + info["xpath"]) if items is not None and len(items) >= 1: return items[0] else: return None def info_regex(): # try to find info using a regex pattern logger.debug("using regex: " + info["pattern"]) match = re.search(info["pattern"], content) if match is None: logger.warning("Could not find info! (match == None) with pattern: " + info["pattern"]) return None if match.groups() is None: logger.warning("match.groups() was None") return None if len(match.groups()) <= 0: logger.warning("Found match but no groups") return None return match.group(1) for info in resource_dict["patterns"]: if not "pattern" in info and not "xpath" in info: logger.error("siteinfo entry does not contain a path or pattern!") break infodata = None # try regex first because it seems to be faster if "pattern" in info: infodata = info_regex() # try xpath if there was no pattern or regex was unsuccessful if infodata is None and "xpath" in info: infodata = info_xpath() if infodata is None: logger.warning("infodata was None!") break logger.debug("\ninfodata:\n") logger.debug(infodata) if infodata is None or infodata == "": continue logger.info("found info data: " + infodata) infodata = unescape(infodata) infodata = escape(infodata) infodata = infodata.strip() if title is None: title = infodata color = REST_COLOR style = REST_STYLE if message is None: message = "" color = FIRST_COLOR style = FIRST_STYLE message += self.msg_formats.get(style, self.msg_formats.get(color, infodata)) if info != resource_dict["patterns"][-1]: message += " " + self.sitedata["separator"] + " " # cut last separator if there is one sep = self.sitedata["separator"] if message is not None and message.strip()[-len(sep) :] == sep: message = message.strip()[: -len(sep)].strip() return message, title