def youdao_get_rss_records(xml_id,cookie, parent_dir='./'): url_base = 'http://reader.youdao.com/view.do?_=%s&method=viewChannel¶m=%s&pageIndex=%d&first=0&viewnew=0&viewtitle=1&shot=-1' start_page = 1 end_page = 251 url = url_base % (timestamp(), xml_id, 1) s = fetch (url,None, cookie) s = s.replace('true','True').replace('false','False') info = eval (s) # convert the string to dict end_page = int(info.get('page').get('lastPage')) + 1 #get how many pages end_page = 3 for i in xrange(start_page, end_page): print 'download page',i url = url_base % (timestamp(), xml_id, i) s = fetch (url,None, cookie) s = s.replace('true','True').replace('false','False') try: info = eval (s) start_article_index = info.get("articles")[0].get("articleIndex") end_article_index = info.get("articles")[-1].get("articleIndex") except: pass filename = os.path.join(parent_dir,str(i)+'.json') write_file (filename,s)
def sina_fetch (index, template): content = fetch (index) urls = re.findall(template, content) result = [] for url in urls: result.append ([urlparse.urljoin(index,url[0]),url[1]]) return result
def youdao_login_get_cookie (): url = 'https://reg.163.com/logins.jsp' input_url="http://account.youdao.com/login?service=reader&back_url=http%3A%2F%2Freader.youdao.com%2Fview.do%3Fmethod%3DviewChannel%26pageIndex%3D249%26param%3D4134975263908880489%26first%3D0%26viewtitle%3D1%26shot%3D-1%26viewnew%3D0%26_%3D1382592916423&success=1" username = "******" password = "******" data = {"url":input_url, "product":"search", "type":"1", "username":username,"password":password} s,c = fetch(url, data,None, True) return c
def youdao_xml_to_id (xml_url, cookie): url = "http://reader.youdao.com/subscribe.do?_=%s1&method=addChannel&addChannel=%s&page=1" % (timestamp(),urllib.quote(xml_url,'')) template = r'''<span class="btnR" onclick="YSubMgr.subFeed\('([^']+)'\);"><span class="sprite">\+</span>订阅</span>''' try: content = fetch (url,None,cookie) except urllib2.HTTPError: return '' except urllib2.URLError: return '' xml_id = re.findall (template, content) if len(xml_id)>=1: return xml_id[0] else: print 'Error, get xml id fail' return ''
f = open('entries.json', 'w') f.write(json.dumps(entries)) f.close() """ like_weight = 1.25 comment_weight = 2.0 comment_weight_by_nonunique_user = 1.1 #unique_user_commented_weight = freq = {} #for entry in json.loads(open('entries.json').read())['data']: for entry in fetch(CONFIG['uid'], 'statuses', token)['data']: message = entry['message'] like_count = len(entry['likes']['data']) if 'likes' in entry else 0 if 'comments' in entry: comment_count = len(entry['comments']['data']) unique_users_commented = len(unique_users(entry['comments'])) else: comment_count = 0 unique_users_commented = 0 weighed_score = sum((like_count*like_weight, unique_users_commented*comment_weight, (comment_count-unique_users_commented)*comment_weight_by_nonunique_user ))