def get(self): url = self.request.get("url") meta = {} fetcheddata = urlfetch.fetch(url, deadline=10) if fetcheddata.status_code == 200: # parse the data to get the basic information out. html = fetcheddata.content # get the data titleMatch = re.search(title, html, flags=re.IGNORECASE) if titleMatch: meta["name"] = titleMatch.group( 1) # only consider the first title # get the data imageMatch = re.search(image, html, flags=re.IGNORECASE) if imageMatch: meta["image"] = "http:" + imageMatch.group(1) else: self.response.status_code = fetcheddata.status_code self.response.headers['Content-Type'] = "application/json" self.response.out.write(simplejson2.dumps(meta, ensure_ascii=False))
def get(self): url = self.request.get("url") meta = {} fetcheddata = urlfetch.fetch(url, deadline = 10) meta["web_url"] = fetcheddata.final_url or url meta["urls"] = [meta["web_url"]] urlInfo = urlparse(meta["web_url"]) baseUrl = urlInfo.scheme + "://" + urlInfo.netloc + "/" if fetcheddata.status_code == 200: # parse the data to get the basic information out. html = fetcheddata.content # get the data titleMatch = re.search(title, html, flags=re.IGNORECASE) if titleMatch: meta["name"] = titleMatch.group(1) # only consider the first title descriptionMatch = re.search(description, html, flags=re.IGNORECASE) if descriptionMatch: meta["description"] = descriptionMatch.group(3)[0:132] # parse the fav icons meta["icons"] = dict([parseFavIcon(baseUrl, m) for m in re.finditer(favicon, html, flags=re.IGNORECASE) if m is not None]) # if "16" not in meta["icons"]: # meta["icons"]["16"] = meta["web_url"] + "/favicon.ico" # # else: self.response.status_code = fetcheddata.status_code self.response.headers['Content-Type'] = "application/json" self.response.out.write(simplejson2.dumps(meta, ensure_ascii=False))
def get(self): url = self.request.get("url") meta = {} fetcheddata = urlfetch.fetch(url, deadline = 10) if fetcheddata.status_code == 200: # parse the data to get the basic information out. html = fetcheddata.content # get the data titleMatch = re.search(title, html, flags=re.IGNORECASE) if titleMatch: meta["name"] = titleMatch.group(1) # only consider the first title # get the data imageMatch = re.search(image, html, flags=re.IGNORECASE) if imageMatch: meta["image"] = "http:" + imageMatch.group(1) else: self.response.status_code = fetcheddata.status_code self.response.headers['Content-Type'] = "application/json" self.response.out.write(simplejson2.dumps(meta, ensure_ascii=False))