def fetch_save_picture(url): o = urlparse(url) path = o.path fpath = re.sub('^/', '', path) fpath = re.sub('/', '-', fpath) save_path = 'images/'+fpath if os.path.exists(save_path): print save_path, 'exists' return content = fetch.get_url(o.hostname, path) # ff = open('get_url.cache', 'r') # content = ff.read() # ff.close() f = open(save_path, 'w'); f.write(content); f.close()
def get_save_page(page_num): print page_num url = '/7349086'; if page_num != 1: url += '-'+str(page_num) url += '.html' content = fetch.get_url('bbs.hupu.com', url, {}, 'gbk') scene_list = santi_parser.get_scene_list(content) for scene in scene_list: scene['text'] = scene['text'].strip() m = re.match('(\d+)([a-z]?)', scene['number']) scene['number'] = (m.group(1)) if m.group(2) is not None: scene['number_extra'] = m.group(2) db.insert('scene', scene) print 'ok'
def get_info(uid): content = fetch.get_url('www.renren.com', '/'+str(uid)+'/profile', {'v': 'info_timeline'}) info = parser.get_info(content) return info