Ejemplo n.º 1
0
def youdao_get_rss_records(xml_id,cookie, parent_dir='./'):
    url_base = 'http://reader.youdao.com/view.do?_=%s&method=viewChannel&param=%s&pageIndex=%d&first=0&viewnew=0&viewtitle=1&shot=-1'

    start_page = 1
    end_page = 251

    url = url_base % (timestamp(), xml_id, 1)
    s = fetch (url,None, cookie)
    s = s.replace('true','True').replace('false','False')
    info = eval (s)          # convert the string to dict
    end_page = int(info.get('page').get('lastPage')) + 1           #get how many pages
    end_page = 3

    for i in xrange(start_page, end_page):
        print 'download page',i
        url = url_base % (timestamp(), xml_id, i)
        s = fetch (url,None, cookie)
        s = s.replace('true','True').replace('false','False')

        try:
            info = eval (s)
            start_article_index = info.get("articles")[0].get("articleIndex")
            end_article_index = info.get("articles")[-1].get("articleIndex")
            
        except:
            pass

        filename = os.path.join(parent_dir,str(i)+'.json')
        write_file (filename,s)
Ejemplo n.º 2
0
def sina_fetch (index, template):
    content = fetch (index)
    urls = re.findall(template, content)
    result = []
    for url in urls:
        result.append ([urlparse.urljoin(index,url[0]),url[1]])
    return result
Ejemplo n.º 3
0
def youdao_login_get_cookie ():
    url = 'https://reg.163.com/logins.jsp'
    input_url="http://account.youdao.com/login?service=reader&back_url=http%3A%2F%2Freader.youdao.com%2Fview.do%3Fmethod%3DviewChannel%26pageIndex%3D249%26param%3D4134975263908880489%26first%3D0%26viewtitle%3D1%26shot%3D-1%26viewnew%3D0%26_%3D1382592916423&success=1"
    username = "******"
    password = "******"
    data = {"url":input_url, "product":"search", "type":"1", "username":username,"password":password}
    s,c = fetch(url, data,None, True)  
    return c
Ejemplo n.º 4
0
def youdao_xml_to_id (xml_url, cookie):
    url = "http://reader.youdao.com/subscribe.do?_=%s1&method=addChannel&addChannel=%s&page=1" % (timestamp(),urllib.quote(xml_url,''))
    template = r'''<span class="btnR" onclick="YSubMgr.subFeed\('([^']+)'\);"><span class="sprite">\+</span>订阅</span>'''

    try:
        content = fetch (url,None,cookie)
    except urllib2.HTTPError:
        return ''
    except urllib2.URLError:
        return ''
    xml_id = re.findall (template, content)
    if len(xml_id)>=1:
        return xml_id[0]
    else:
        print 'Error, get xml id fail'
        return ''
Ejemplo n.º 5
0
f = open('entries.json', 'w')
f.write(json.dumps(entries))
f.close()

"""

like_weight = 1.25
comment_weight = 2.0
comment_weight_by_nonunique_user = 1.1
#unique_user_commented_weight = 

freq = {}

#for entry in json.loads(open('entries.json').read())['data']:
for entry in fetch(CONFIG['uid'], 'statuses', token)['data']:
    message = entry['message']
    like_count = len(entry['likes']['data']) if 'likes' in entry else 0
    
    if 'comments' in entry:
        comment_count = len(entry['comments']['data'])
        unique_users_commented = len(unique_users(entry['comments']))
    else:
        comment_count = 0
        unique_users_commented = 0
    
    weighed_score = sum((like_count*like_weight,
            unique_users_commented*comment_weight,
            (comment_count-unique_users_commented)*comment_weight_by_nonunique_user
    ))