def searchcrawler(url,keyword=''): """ tb搜索页爬虫 """ html=get_html(url) #print html if html: soup = BeautifulSoup(html,fromEncoding='gbk') items_row = soup.findAll('div',{'class':'row item icon-datalink'}) if items_row: print '=======================row search row==========================' #print items for item in items_row: item_info = item.find('div',{'class':'col title'}).h3.a item_url = item_info['href'] url_info = urlparse.urlparse(item_url) item_id = urlparse.parse_qs(url_info.query,True)['id'][0] print item_url print item_id judge_site(item_url,keyword) items_col = soup.findAll('div',{'class':'col item icon-datalink'}) if items_col: print '=======================row search col==========================' #print items for item in items_col: item_info = item.find('div',{'class':'item-box'}).h3.a item_url = item_info['href'] url_info = urlparse.urlparse(item_url) item_id = urlparse.parse_qs(url_info.query,True)['id'][0] print item_url print item_id judge_site(item_url,keyword)
def searchcrawler(url): html=get_html(url) # print url if html: soup = BeautifulSoup(html,fromEncoding='gbk') items_row = soup.findAll('div',{'class':'item-box st-itembox'}) if items_row: print '=======================row search row==========================' for item in items_row: # print item item_info = item.find('h3',{'class':'summary'}).a item_url = item_info['href'] # print item_url sid_info = item.find('div',{'class':'col seller feature-dsi-tgr'}).a print sid_info sid_item_url = sid_info['href'] sid_url_info = urlparse.urlparse(sid_item_url) sid_id = urlparse.parse_qs(sid_url_info.query,True)['user_number_id'][0] print sid_id judge_site(item_url, sid_id) # logging.warning(item_id) # # download_reply_by_id(item_id) items_col = soup.findAll('div',{'class':'product-item row icon-datalink'}) if items_col: print '=======================row search col==========================' #print items for item in items_col: item_info = item.find('div',{'class':'title'}).a item_url = item_info['href'] # url_info = urlparse.urlparse(item_url) # item_id = urlparse.parse_qs(url_info.query,True)['id'][0] print item_url # print item_id sid_info = item.find('div',{'class':'seller'}).a print sid_info sid_item_url = sid_info['href'] sid_url_info = urlparse.urlparse(sid_item_url) sid_id = urlparse.parse_qs(sid_url_info.query,True)['user_number_id'][0] print sid_id judge_site(item_url, sid_id)
def judge_site(url,keyword=''): """ 判断物品是tb还是tm """ url_info = urlparse.urlparse(url) urlkey = urlparse.parse_qs(url_info.query,True) iid = int(urlkey['id'][0]) #print 'url_info:',url_info[1] try: if url_info[1] == 'detail.tmall.com': print 'it is a tm item' if check_item_update_time(iid,'tm'): return data = getTmallItemInfo(iid,keyword) elif urlkey.get('cm_id'): print 'it is a tm item' if check_item_update_time(iid,'tm'): return data = getTmallItemInfo(iid,keyword) else: print 'it is a tb item' if check_item_update_time(iid,'tb'): return data = getTaobaoItemInfo(iid,keyword) except Exception ,e: print traceback.print_exc() return
def judge_site(url, sid_id): """ 判断物品是tb还是tm """ url_info = urlparse.urlparse(url) urlkey = urlparse.parse_qs(url_info.query,True) iid = int(urlkey['id'][0]) print iid # print 'url_info:',url_info[1] try: if url_info[1] == 'detail.tmall.com': print 'it is a tm item' # data = download_tm_reply_by_id(iid) elif urlkey.get('cm_id'): print 'it is a tm item cm_id' # data = download_tm_reply_by_id(iid) else: print 'it is a tb item' data = download_tb_reply_by_id(iid, sid_id) except Exception ,e: print traceback.print_exc() return
def searchcrawler(url): html=get_html(url) # print url if html: soup = BeautifulSoup(html,fromEncoding='gbk') items_row = soup.findAll('div',{'class':'product-iWrap'}) #items_row = soup.find('div',{'class':'item-box st-itembox'}) # print items_row if items_row: print '=======================row search row==========================' for item in items_row: # print item try: item_info = item.find('p',{'class':'productTitle'}).a except: item_info = item.find('div',{'class':'productTitle productTitle-spu'}).a # print item_info item_url = item_info['href'] # print item_url url_info = urlparse.urlparse(item_url) item_id = urlparse.parse_qs(url_info.query,True)['id'][0] print item_id logging.warning(item_id) # item_id = 16862466992 download_reply_by_id(item_id)
def do_GET(self): """Implementa el manejo de peticiones GET al server. Maneja una web raiz, las respuestas a la consulta (Tanto para rss validos como invalidos, y para el caso de otro tipo de consulta no valida dentro de las pre-establecidas, devuelve un error 404. """ if self.path == '/': # Peticion a la raiz del server self.send_response(200) self.send_header("Content-type", "text/html") self.end_headers() self.wfile.write(self._get_root_page()) else: rss = urlparse.parse_qs(self.path[2:]) # Descarta /? if rss.has_key('p'): if self.es_valido(rss): # Recuperar RSS self.send_response(200) self.send_header("Content-type", "application/xml") self.end_headers() self.wfile.write(self._get_valid_feed_page(rss)) else: # RSS invalido self.send_response(200) self.send_header("Content-type", "text/html") self.end_headers() self.wfile.write(self._get_invalid_feed_page(rss)) else: # Si la peticion no esta bien formada, devuelve error 404 self.send_response(404) self.send_header("Content-type", "text/html") self.end_headers() self.wfile.write(self._get_error_page())
def auth(sender): """ log userids and passwords from get and post requests NOTE this will not work with most sites """ include = ["log", "login", "logon", "user", "username", "key", "name", "email", \ "password", "pass", "passwd", "pwd", "psw", "passw", "auth"] query = sender.data if sender.data else sender.query if not query: return # split into auth and noauth q = urlparse.parse_qs(query) auth = dict() noauth = dict() for k, v in q.items(): if k in include: auth[k] = v else: noauth[k] = v # output auth auth = '\n'.join(["%s=%s"%(k,v) for k, v in auth.items()]) log.info("query strings===>\n%s"% auth) # output noauth truncating the values noauth = {k: v[:15]+"..." if len(v)>15 else v for k, v in noauth.items()} noauth = '\n'.join(["%s=%s"%(k,v) for k, v in noauth.items()]) log.info("auth strings===> ***************************************\n%s" % noauth)
def getVGProductId(link): if 'record_id' in link: parsed = urlparse(link) record_id = urlparse.parse_qs(parsed.query)['record_id'] return int(record_id) parsed = urlparse(link) path = parsed.path return int(path.split('/')[2])
def rendered_wall_posts( wall_posts ): for wall_post in wall_posts: title = '' desc = '' site_image = '' article_title = '' urls = re.findall('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', wall_post.data['post_content']) for url in urls: parse_obj = urlparse.urlparse(url) site = parse_obj.netloc path = parse_obj.path conn = httplib.HTTPConnection(site) conn.request('HEAD',path) response = conn.getresponse() conn.close() ctype = response.getheader('Content-Type') if response.status < 400 and ctype.startswith('image'): wall_post.data['post_content'] = wall_post.data['post_content']+"<br/><a href='"+url+"' target='_blank'><img width=300 src='"+url+"' target = '_blank'/></a>" else: og = opengraph.OpenGraph(url) if not len(og.items()) == 2: for x,y in og.items(): if x == 'type' and y == 'video': for k,l in og.items(): if k == 'site_name' and l == 'YouTube': url_data = urlparse.urlparse(url) query = urlparse.parse_qs(url_data.query) video = query["v"][0] wall_post.data['post_content'] = wall_post.data['post_content'].replace(url,"")+"<br/><iframe width='300' height='200' src='//www.youtube.com/embed/"+video+"' frameborder='0' allowfullscreen></iframe>" elif k == 'site_name' and l == 'Vimeo': url_data = urlparse.urlparse(url) video = url_data.path wall_post.data['post_content'] = wall_post.data['post_content'].replace(url,"")+"<br/><iframe src='//player.vimeo.com/video"+video+"' width='300' height='200' frameborder='0' webkitallowfullscreen mozallowfullscreen allowfullscreen></iframe> <p></p>" elif x == 'type' and y == 'article': for k,l in og.items(): if k == 'title': article_title = l elif k == 'site_name': title = l elif k=='description': desc = l elif k=='image': site_image = l wall_post.data['post_content'] = wall_post.data['post_content'] +"<br/><table><tr><td><img width='50' src='"+site_image+"'</td><td><a href='"+url+"' target='_blank'/>"+article_title+"</a><br/>"+title+"</td></td></table>" elif x=='type': for k,l in og.items(): if k == 'site_name': title = l elif k=='description': desc = l elif k=='image': site_image = l wall_post.data['post_content'] = wall_post.data['post_content'].replace(url, "<table><tr><td><img width='50' src='"+site_image+"'</td><td><a href='"+url+"' target='_blank'/>"+title+"</a><br/>"+desc+"</td></td></table>") else: wall_post.data['post_content'] = wall_post.data['post_content'].replace(url, "<a href='"+url+"' target='_blank'>"+url+"</a>") return wall_posts
def extract_query_params(url, *names): """ Extracts names in the list from url @param url: @param names: @return: dict """ parsed_res = urlparse.urlparse(url) d = urlparse.parse_qs(parsed_res.query) return {key:value[0] for (key, value) in d.iteritems() if key in names}
def a_taleo(): # get the query string qsdata= request.query_string qs = urlparse.parse_qs(qsdata) # write log entry neclogger(qsdata,True, True) data = qsdata js = json.dumps(data) resp = Response(js, status=200, mimetype='application/json') return resp
def listsCategoriesMenu(self,url): query_data = { 'url': url, 'use_host': False, 'use_cookie': False, 'use_post': False, 'return_data': True } link = self.cm.getURLRequestData(query_data) #ile jest filmów ? match = re.compile('<li class="active"id="mVid"><a href="#" onclick="moreVideo\(\);return false;">Video \((.*?)\)</a></li>', re.DOTALL).findall(link) ilejest = int(match[0]) policz = int(ilejest/o_filmow_na_stronie) +1 max_stron = policz parsed = urlparse.urlparse(url) typ = urlparse.parse_qs(parsed.query)['s'][0] for i in range(0, (policz)): purl = 'http://www.cda.pl/video/show/ca%C5%82e_filmy_or_ca%C5%82y_film/p'+str(i+1)+'?s='+typ self.add('cdapl', 'categories-menu', 'Strona '+str(i+1), 'None', 'None', purl, 'None', 'None', True, False,str(i+1)) xbmcplugin.endOfDirectory(int(sys.argv[1]))
def searchcrawler(url): html=get_html(url) # print url if html: soup = BeautifulSoup(html,fromEncoding='gbk') items_row = soup.findAll('div',{'class':'product-iWrap'}) #items_row = soup.find('div',{'class':'item-box st-itembox'}) # print items_row if items_row: print '=======================row search row==========================' for item in items_row: # print item item_info = item.find('p',{'class':'productTitle'}).a item_url = item_info['href'] # print item_url url_info = urlparse.urlparse(item_url) item_id = urlparse.parse_qs(url_info.query,True)['id'][0] print item_id # item_id = 16862466992 download_reply_by_id(item_id)
def searchcrawler(url): html = get_html(url) # print url if html: soup = BeautifulSoup(html, fromEncoding="gbk") items_row = soup.findAll("div", {"class": "product-iWrap"}) # items_row = soup.find('div',{'class':'item-box st-itembox'}) # print items_row if items_row: print "=======================row search row==========================" for item in items_row: # print item item_info = item.find("p", {"class": "productTitle"}).a item_url = item_info["href"] # print item_url url_info = urlparse.urlparse(item_url) item_id = urlparse.parse_qs(url_info.query, True)["id"][0] print item_id # item_id = 16862466992 download_reply_by_id(item_id)
def do_GET(self): """Implementa el manejo de peticiones GET al server.""" if self.path == '/': self.send_response(200) self.send_header("Content-type", "text/html") self.end_headers() self.wfile.write(self._get_root_page()) else: peticion = urlparse.parse_qs(self.path[2:]) # Descarta /? if peticion.has_key('consultar'): # Si la peticion es correcta, solicita el server a ser usado, realiza la peticion y la devuelve al cliente server = self.next_server() url = 'http://'+server['host']+':'+server['port']+'/' open(LOGFILE, "a").write("Accediendo a %s" % url) self.send_response(200) self.send_header("Content-type", "text/html") self.end_headers() self.wfile.write(self._get_node_response(url)) else: self.send_response(404) self.send_header("Content-type", "text/html") self.end_headers() self.wfile.write(self._get_error_page())
def oembed(oembed_url): width = 400 height = 300 services = { 'youtube.com': 'youtube', 'youtu.be': 'yoube', 'vimeo.com': 'vimeo', 'vine.co': 'vine', 'facebook.com': 'facebook', #'imgur.com': 'imgur' } parsedUrl = urlparse(oembed_url) url = parsedUrl.netloc url = url.replace("www.", "", 1) if url not in services: # Check if url is image images = ['.jpg', '.jpeg', '.gif', '.png'] disassembled = urlparse(oembed_url) filename, file_ext = splitext(basename(disassembled.path)) print file_ext if file_ext in images: embedHtml = "<a href='{image}' ><img src='{image}' height=300 alt='{name}' /></a>".format( image=oembed_url, name=filename) return embedHtml else: return "" else: provider = services[url] try: # Youtube if provider is 'youtube': videoCode = parsedUrl.query[-11:] embedHtml = "<iframe width=\"{width}\" height=\"{height}\" src=\"//www.youtube.com/embed/{video}\" frameborder=\"0\" allowfullscreen></iframe>".format( width=width, height=height, video=videoCode) embedHtml = re.sub('http', 'https', embedHtml) return embedHtml # Vimeo elif provider is 'vimeo': videoCode = parsedUrl.path[-8:] embedHtml = "<iframe src=\"//player.vimeo.com/video/{video}\" width=\"{width}\" height=\"{height}\" frameborder=\"0\" webkitallowfullscreen mozallowfullscreen allowfullscreen></iframe>".format( width=width, height=height, video=videoCode) embedHtml = re.sub('http', 'https', embedHtml) return embedHtml # Youtu.be elif provider is 'yoube': videoCode = parsedUrl.path[-11:] embedHtml = "<iframe width=\"{width}\" height=\"{height}\" src=\"//www.youtube.com/embed/{video}\" frameborder=\"0\" allowfullscreen></iframe>".format( width=width, height=height, video=videoCode) embedHtml = re.sub('http', 'https', embedHtml) return embedHtml # Vine elif provider is 'vine': videoCode = parsedUrl.path[-11:] embedHtml = "<iframe class=\"vine-embed\" src=\"https://vine.co/v/{video}/embed/simple\" width=\"{width}\" height=\"{height}\" frameborder=\"0\"></iframe>".format( width=width, height=height, video=videoCode) if parsedUrl.scheme is "http": embedHtml = re.sub('http', 'https', embedHtml) return embedHtml # Facebook elif provider is 'facebook': par = urlparse.parse_qs(urlparse.urlparse(url).query) videoCode = par['v'] print videoCode embedHtml = "<object width=\"{width}\" height=\"{height}\" ><param name=\"allowfullscreen\" value=\"true\" /><param name=\"allowscriptaccess\" value=\"always\" /><param name=\"movie\" value=\"http://www.facebook.com/v/{video}\" /><embed src=\"http://www.facebook.com/v/{video}\" type=\"application/x-shockwave-flash\" allowscriptaccess=\"always\" allowfullscreen=\"true\" width=\"{width}\" height=\"{height}\"></embed></object>".format( width=width, height=height, video=videoCode) if parsedUrl.scheme is "http": embedHtml = re.sub('http', 'https', embedHtml) return embedHtml # Imgur TODO ''' elif provider is 'imgur': # Check if url is image images = ['.jpg', '.jpeg', '.gif', '.png'] disassembled = urlparse(oembed_url) filename, file_ext = splitext(basename(disassembled.path)) print file_ext if file_ext in images: embedHtml = "<a href='{image}' ><img src='{image}' height=300 alt='{name}' /></a>".format(image=oembed_url, name=filename) return embedHtml else: # Try to show picture anyway ''' except Exception: return ""
# # WSGI behavior is different from CGI behavior, because we may not want # to return a chatty rummy for likely-deployed WSGI vs. testing CGI. # # if layer and layer not in self.config.layers: # return self._response(start_response, 404) # # path_info = environ.get('PATH_INFO', None) # query_string = environ.get('QUERY_STRING', None) # script_name = environ.get('SCRIPT_NAME', None) logging.debug("self is %s",self) path_info = environ.get('PATH_INFO', None) query_string = environ.get('QUERY_STRING', None) script_name = environ.get('SCRIPT_NAME', None) version_str = urlparse.parse_qs(query_string,True).get('version') if version_str is not None: version = long(version_str[0]) logging.debug("layer %s version %d", layer,version) else: version = None if version is not None and layer is None: return self._response(start_response, 404) layer_obj = self.config.layers[layer] if layer_obj is None: return self._response(start_response, 404)
def get_product_price(self, url): page = MarketBrowser.get_html(url) xpath = Directory.flat_xpath name_str = xpath(page, '//h3[@class="trade_Name"]/text()') intro_str = xpath(page, '//dd[@class="introduction"]/text()') content_origin_str = xpath( page, '//div[@class="product_content"]//tr[contains(string(), "產地")]/td[2]//text()' ) content_unit_str = xpath( page, '//div[@class="product_content"]//tr[contains(string(), "數量")]/td[2]//text()' ) price_str = xpath(page, '//dd[@class="list_price"]/text()') try: # 大成去骨雞腿1盒 => 大成去骨雞腿 name = GeantBrowser.NAME_RE.findall(name_str)[0] # try to find origin in introduction try: origin_str = GeantBrowser.ORIGIN_RE.findall(intro_str)[0] # try content table, could be '' except IndexError: origin_str = content_origin_str origin = self.get_origin(origin_str) # try to find count in introduction try: count_str = GeantBrowser.COUNT_RE.findall(intro_str)[0] count = Directory.get_count(count_str) # try to find count in title, or 1 except IndexError: count = Directory.get_count(name_str) # try to find spec in introduction try: spec_str = GeantBrowser.WEIGHT_RE.findall(intro_str)[0] weight = self.get_weight(spec_str) # test weight with title weight test_weight = self.get_weight(name_str) if test_weight and weight != test_weight: weight = test_weight # try to find spec in title except IndexError: weight = self.get_weight(name_str) # &pid=4940444 => 4940444 pid = urlparse.parse_qs(url)['pid'][0] price = int(price_str) # try to find unit in title, introduction, content table try: unit_str = GeantBrowser.COUNT_RE.findall(intro_str)[0] unit_str += name_str unit_str += content_unit_str except IndexError: unit_str = name_str + content_unit_str unit = self.get_unit(unit_str) except: log.error(Directory.ERROR_MAP[3] % (name_str, url)) return None, None product = Product(source=url, name=name, origin=origin, market_id=self.market.id, pid=pid, weight=weight, count=count, unit=unit) price = Price(price=price, date=self.date) return product, price
def a_nec_log(): # timer code if debug == True: start = time() domainMatch = None global kinveyInitialized global whitelistCache # check whitelist if not already initialized if not kinveyInitialized: kinveyURL = kinveyBaseURL + '/appdata/' + kinveyAppKey + '/' +'whitelist/' #app.logger.info(kinveyURL) r = requests.get(kinveyURL, auth=(kinveyUsername, kinveyPassword)) neclogger(r.text, debug, True) kinveyInitialized = True s = r.text entries = json.loads(s) for entry in entries: whitelistEntry = entry.get('url', None) if whitelistEntry: whitelistCache.append(whitelistEntry) neclogger("Entry = " + whitelistEntry, debug, True) rurl = request.url o = urlparse.urlparse(rurl) requestDomain = None requestDomain = o.netloc neclogger(requestDomain, debug, True) for entry in whitelistCache: if entry == requestDomain: domainMatch = True break if domainOverride: msg = "Overriding domain blocking" neclogger(msg, debug, False) else: if not domainMatch: msg = "No match for requesting domain: " + requestDomain neclogger(msg,debug, False) #app.logger.info(msg) rc = 'NYMBLE202' data = rc js = json.dumps(data) result = js resp = Response(result, status=202, mimetype='application/javascript') return resp if debug == True: end = time() t = "kinvey lookup elapsed time = " + str(end - start) neclogger(t,debug, True) #app.logger.info(t) # get the query string qsdata= request.query_string qs = urlparse.parse_qs(qsdata) # process the user-agent info ua = request.headers.get('User-Agent') neclogger("Useragent is:=== " + ua,debug, True) platform = None browser = None version = None if ua: useragent = UserAgent(ua) if useragent.platform: platform = useragent.platform if useragent.browser: browser = useragent.browser if useragent.version: version = useragent.version s_a = "platform," + platform + ",browser," + browser + ",version," + version + "," js_a = '"platform": "' + platform + '", "browser": "' + browser + '", "version": "' + version + '"' # get the client IP address ip = request.remote_addr if ip and'X-Forwarded-For' in request.headers: ip_adds = request.headers['X-Forwarded-For'].split(",") ip = ip_adds[0] else: ip = "0.0.0.0" # add ip and user-agent data to logging record s = "ip," + ip + "," json_s = '"ip": "' + ip + '"' s = s + s_a json_s = json_s + ', ' + js_a s1 = "" json_s1 = "" cb = "" # process the query string, return the callback function if provided as a jsonp convenience if qs: keys = qs.keys() i = 0 for k in keys: # print k, "..." # print qs.get(k) v = qs.get(k) if k == 'callback': # print k + " = " + v[0] cb = v[0] s1 = s1 + k + "," + v[i] + "," json_s1 = json_s1 + ', "' + k + '": "' + v[i] + '"' s = s + s1 json_s = json_s + json_s1 st = dt.datetime.now().strftime("date,%Y-%m-%d,time,%H:%M:%S.%f,") json_st = dt.datetime.now().strftime('"date": "%Y-%m-%d", "time": "%H:%M:%S.%f", ') s = "NECLog: " + st + s json_s = 'JSON_NECLog:' + ' { ' + json_st + json_s + ' }' # write log entry neclogger(s,debug, False) neclogger(json_s,debug, False) rc = 'NYMBLE200' data = rc js = json.dumps(data) if cb != '': result = cb + '(' + js + ')' resp = Response(result, status=200, mimetype='application/javascript') else: result = js resp = Response(result, status=200, mimetype='application/json') if debug == True: end = time() t = "final elapsed time = ", end - start neclogger(t,debug, True) return resp
def get_product_price(self, url): page = MarketBrowser.get_html(url) xpath = Directory.flat_xpath name_str = xpath( page, '//div[@class="pro_rightbox"]/h2[@class="product_Titlename"]/span/text()' ) price_str = xpath( page, '//div[@class="product_PRICEBOX"]//span[@class="price_num"]/text()' ) intro_str = xpath(page, '//table[@class="title_word"]//table/tr/td/text()') try: # 紅蘿蔔約500g => 紅蘿蔔 name = RtmartBrowser.NAME_RE.findall(name_str)[0] # try to find spec in introduction try: spec_str = RtmartBrowser.WEIGHT_RE.findall(intro_str)[0] weight = self.get_weight(spec_str) # test spec with weight in title test_weight = self.get_weight(name_str) if test_weight and test_weight != weight: weight = test_weight # try to find spec in title except IndexError: weight = self.get_weight(name_str) # &prod_no=12345 => 12345 pid = urlparse.parse_qs(url)['prod_no'][0] # try to find origin in introduction try: origin_str = RtmartBrowser.ORIGIN_RE.findall(intro_str)[0] # tyr to find origin in title except IndexError: origin_str = name_str origin = self.get_origin(origin_str) # try to find count in title count = self.get_count(name_str) price_str = Directory.NUM_RE.findall(price_str)[0] price = int(price_str) # try to find unit in title, introduction unit = self.get_unit(name_str + intro_str) except: log.error(Directory.ERROR_MAP[3] % (name_str, url)) return None, None product = Product(source=url, name=name, origin=origin, market_id=self.market.id, pid=pid, weight=weight, count=count, unit=unit) price = Price(price=price, date=self.date) return product, price
def rendered_content(content, request): #for wall_post in wall_posts: title = '' desc = '' site_image = '' article_title = '' urls = re.findall( 'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', content) mentions = re.findall('\@\w+', content) r = re.compile('###uploaded_image###(.*?)##!uploaded_image!##') m = r.search(content) if m: content = content.replace(m.group(1), "").replace( "###uploaded_image###", "" ).replace( "##!uploaded_image!##", "" ) + "<br/><div class='row'><div class='col-sm-6 col-md-3'><a href='" + m.group( 1 ) + "' target='_blank' class='thumbnail'><img data-src='holder.js/300' src='" + m.group( 1) + "'/></a></div></div>" for mention in mentions: mentioned_username = mention.replace('@', '') mentioned_user = User.objects.get(username=mentioned_username) if mentioned_user: notify.send(request.user, recipient=mentioned_user, verb='post_mention') content = content.replace( mention, '<a href="/user/profile/' + mentioned_username + '">' + mention + '</a>') for url in urls: parse_obj = urlparse.urlparse(url) site = parse_obj.netloc path = parse_obj.path conn = httplib.HTTPConnection(site) conn.request('HEAD', path) response = conn.getresponse() conn.close() ctype = response.getheader('Content-Type') if response.status < 400 and ctype.startswith('image'): content = content + "<br/><div class='row'><div class='col-sm-6 col-md-3'><a href='" + url + "' target='_blank' class='thumbnail'><img data-src='holder.js/300' src='" + url + "'/></a></div></div>" else: og = opengraph.OpenGraph(url) if not len(og.items()) == 2: for x, y in og.items(): if x == 'type' and y == 'video': for k, l in og.items(): if k == 'site_name' and l == 'YouTube': url_data = urlparse.urlparse(url) query = urlparse.parse_qs(url_data.query) video = query["v"][0] content = content.replace( url, "<a href='" + url + "' target='_blank'>" + url + "</a>" ) + "<br/><br/><iframe width='300' height='200' src='//www.youtube.com/embed/" + video + "' frameborder='0' allowfullscreen></iframe>" elif k == 'site_name' and l == 'Vimeo': url_data = urlparse.urlparse(url) video = url_data.path content = content.replace( url, "<a href='" + url + "' target='_blank'>" + url + "</a>" ) + "<br/><br/><iframe src='//player.vimeo.com/video" + video + "' width='300' height='200' frameborder='0' webkitallowfullscreen mozallowfullscreen allowfullscreen></iframe> <p></p>" elif x == 'type' and y == 'article': for k, l in og.items(): if k == 'title': article_title = l elif k == 'site_name': title = l elif k == 'description': desc = l elif k == 'image': site_image = l content = content + "<br/><table><tr><td><img width='50' src='" + site_image + "'</td><td><a href='" + url + "' target='_blank'/>" + article_title + "</a><br/>" + title + "</td></td></table>" elif x == 'type': for k, l in og.items(): if k == 'site_name': title = l elif k == 'description': desc = l elif k == 'image': site_image = l content = content.replace( url, "<table><tr><td><img width='50' src='" + site_image + "'</td><td><a href='" + url + "' target='_blank'/>" + title + "</a><br/>" + desc + "</td></td></table>") else: content = content.replace( url, "<a href='" + url + "' target='_blank'>" + url + "</a>") return content
def key_value_pairs(url): return dict(urlparse.parse_qs(urlparse.urlsplit(url).query))
def rendered_content( content,request ): #for wall_post in wall_posts: title = '' desc = '' site_image = '' article_title = '' urls = re.findall('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', content) mentions = re.findall('\@\w+', content) r = re.compile('###uploaded_image###(.*?)##!uploaded_image!##') m = r.search(content) if m: content = content.replace(m.group(1), "").replace("###uploaded_image###", "").replace("##!uploaded_image!##", "") +"<br/><div class='row'><div class='col-sm-6 col-md-3'><a href='"+m.group(1)+"' target='_blank' class='thumbnail'><img data-src='holder.js/300' src='"+m.group(1)+"'/></a></div></div>" for mention in mentions: mentioned_username= mention.replace('@','') mentioned_user = User.objects.get(username=mentioned_username) if mentioned_user: notify.send(request.user, recipient=mentioned_user, verb='post_mention' ) content=content.replace(mention, '<a href="/user/profile/'+mentioned_username+'">'+mention+'</a>') for url in urls: parse_obj = urlparse.urlparse(url) site = parse_obj.netloc path = parse_obj.path conn = httplib.HTTPConnection(site) conn.request('HEAD',path) response = conn.getresponse() conn.close() ctype = response.getheader('Content-Type') if response.status < 400 and ctype.startswith('image'): content = content+"<br/><div class='row'><div class='col-sm-6 col-md-3'><a href='"+url+"' target='_blank' class='thumbnail'><img data-src='holder.js/300' src='"+url+"'/></a></div></div>" else: og = opengraph.OpenGraph(url) if not len(og.items()) == 2: for x,y in og.items(): if x == 'type' and y == 'video': for k,l in og.items(): if k == 'site_name' and l == 'YouTube': url_data = urlparse.urlparse(url) query = urlparse.parse_qs(url_data.query) video = query["v"][0] content = content.replace(url,"<a href='"+url+"' target='_blank'>"+url+"</a>")+"<br/><br/><iframe width='300' height='200' src='//www.youtube.com/embed/"+video+"' frameborder='0' allowfullscreen></iframe>" elif k == 'site_name' and l == 'Vimeo': url_data = urlparse.urlparse(url) video = url_data.path content = content.replace(url,"<a href='"+url+"' target='_blank'>"+url+"</a>")+"<br/><br/><iframe src='//player.vimeo.com/video"+video+"' width='300' height='200' frameborder='0' webkitallowfullscreen mozallowfullscreen allowfullscreen></iframe> <p></p>" elif x == 'type' and y == 'article': for k,l in og.items(): if k == 'title': article_title = l elif k == 'site_name': title = l elif k=='description': desc = l elif k=='image': site_image = l content = content +"<br/><table><tr><td><img width='50' src='"+site_image+"'</td><td><a href='"+url+"' target='_blank'/>"+article_title+"</a><br/>"+title+"</td></td></table>" elif x=='type': for k,l in og.items(): if k == 'site_name': title = l elif k=='description': desc = l elif k=='image': site_image = l content = content.replace(url, "<table><tr><td><img width='50' src='"+site_image+"'</td><td><a href='"+url+"' target='_blank'/>"+title+"</a><br/>"+desc+"</td></td></table>") else: content = content.replace(url, "<a href='"+url+"' target='_blank'>"+url+"</a>") return content