def fetch_save_picture(url):
    o = urlparse(url)
    path = o.path
    fpath = re.sub('^/', '', path)
    fpath = re.sub('/', '-', fpath)
    save_path = 'images/'+fpath
    if os.path.exists(save_path):
        print save_path, 'exists'
        return
    content = fetch.get_url(o.hostname, path)
    # ff = open('get_url.cache', 'r')
    # content = ff.read()
    # ff.close()
    f = open(save_path, 'w');
    f.write(content);
    f.close()
Example #2
0
def get_save_page(page_num):
    print page_num
    url = '/7349086';
    if page_num != 1:
        url += '-'+str(page_num)
    url += '.html'
    content = fetch.get_url('bbs.hupu.com', url, {}, 'gbk')
    scene_list = santi_parser.get_scene_list(content)
    for scene in scene_list:
        scene['text'] = scene['text'].strip()
        m = re.match('(\d+)([a-z]?)', scene['number'])
        scene['number'] = (m.group(1))
        if m.group(2) is not None:
            scene['number_extra'] = m.group(2)
        db.insert('scene', scene)
    print 'ok'
def get_info(uid):
    content = fetch.get_url('www.renren.com', '/'+str(uid)+'/profile', {'v': 'info_timeline'})
    info = parser.get_info(content)
    return info