def fetch(name, account_url, last_num): now = datetime.datetime.now() num = 0 check = 'a' if last_num < 10000000: last_num = 10000000 for i in xrange(last_num + 1, last_num + 500): url = account_url + '%s' % i r = requests.get(url) if r.status_code == 200: for title in zip(extract_all('<title>', '</title>', r.content)): title = ''.join(title) if len(title) == 0: num += 1 if num == 10: return else: if check == title: pass else: check = title for date2 in zip( extract_all('2013-', '</span>', r.content)): date = ''.join(date2) #date = now.year +'-'+ date model.add_essay(name, title, url, date) num = 0 model.update_account(name, i)
def fetch(name,account_url,last_num): now = datetime.datetime.now() num = 0 check = 'a' if last_num < 10000000: last_num = 10000000 for i in xrange(last_num+1,last_num+500): url = account_url + '%s' % i r = requests.get(url) if r.status_code == 200: for title in zip(extract_all('<title>','</title>',r.content)): title = ''.join(title) if len(title) == 0: num += 1 if num == 10: return else: if check == title: pass else: check = title for date2 in zip(extract_all('2013-','</span>',r.content)): date = ''.join(date2) #date = now.year +'-'+ date model.add_essay(name,title,url,date) num = 0 model.update_account(name,i)
def run(): """抓取页面处理内容""" start = int(sys.argv[1]) end = int(sys.argv[2]) file_name = sys.argv[3] count = 0 global outfile outfile = open(file_name, "w") for url_num in range(start, end + 1): url = URL + str(url_num) html = requests.get(url) content = html.content title_list = extract_all(TITLE_START, TITLE_END, content) if len(title_list) > 0: count += 1 title = title_list[0] text_space = extract_all(TEXT_START, TEXT_END, content)[0] text = "".join(text_space.split()) print ("get ok %s all:%d \n") % (url, count) write_file(url_num, title, text) else: print "nothig here at %s\n" % url outfile.close()
def open_sub_install(url): path = kodi.translate_path( os.path.join('special://home', 'addons', 'packages')) dp = xbmcgui.DialogProgress() dp.create("Please Wait", 'Installing Official OpenSubtitles Addon') lib = os.path.join(path, 'opensubtitlesOfficial.zip') try: os.remove(lib) except OSError: pass page = kodi.open_url(url) xbmc.log('@#@contentcontentcontent: %s' % str(url), xbmc.LOGINFO) url += re.search('''title="([^z]*zip)''', page).group(1) downloader.download(url, lib, dp, timeout=120) addonfolder = kodi.translate_path( os.path.join('special://', 'home', 'addons')) time.sleep(2) try: extract.extract_all(lib, addonfolder, '') except IOError as e: kodi.message("Failed to open required files", "Error is: ", str(e)) return False # except IOError, (errno, strerror): # kodi.message("Failed to open required files", "Error code is:", strerror) # return False # addon_able.set_enabled("service.subtitles.opensubtitles_by_opensubtitles") dialog.ok("Installation Complete!", " We hope you enjoy your Kodi addon experience!", " Brought To You By %s " % siteTitle)
def download(url, dest, addonfolder, name): kodi.log(' DOWNLOADING FILE:' + name + '.zip') kodi.log('From: ' + url) dp = xbmcgui.DialogProgress() dp.create("Downloading: " + name) dp.update(0, "Downloading: " + name, '', 'Please Wait') urlretrieve(url, dest, lambda nb, bs, fs: _pbhook(nb, bs, fs, dp)) kodi.log("DOWNLOAD IS DONE " + name) extract.extract_all(dest, addonfolder, dp=None)
def get_dir(script, source_url): match = re.findall(script + '(-.+?)?.zip', kodi.open_url(source_url)) match.sort(reverse=True) version = match[0] if match else '' newest_v_url = source_url + script + version + '.zip' lib = os.path.join(packages_path, script + version + '.zip') os.remove(lib) if os.path.exists(lib) else '' downloader.download(newest_v_url, lib, None, timeout=120, silent=True) extract.extract_all(os.path.join(packages_path, lib), addonspath, None) addon_able.set_enabled(script) xbmc.executebuiltin("UpdateLocalAddons()")
def build(self): cfg = utils.load_config() if os.path.exists(f"{cfg['user']}.csv"): print('Existing csv found, loading the file') dataset = DataSet(cfg, create_csv=False) else: print('No csv found, creating one using segmented data') extract.extract_all(data_path=cfg['data_path'], segments_path=cfg['segments_path']) dataset = DataSet(cfg, create_csv=True) return VideoWidget(dataset, cfg)
def plim_map(filename): with open(filename, 'r') as infile: html = infile.read() for i in extract_all('"', '.plim"', html)+extract_all("'", ".plim'", html)+extract_all(' ', '.plim', html): if not i.startswith('/'): base = dirname(filename) i = abspath(join(base, i))[len(PLIM_PATH)+1:] else: i = i[1:] PLIM_MAP[i].add(filename[len(PLIM_PATH)+1:-5])
def install_keymap(name, url): if os.path.isfile(KEYBOARD_FILE): try: os.remove(KEYBOARD_FILE) except OSError: pass # Check is the packages folder exists, if not create it. path = kodi.translate_path( os.path.join('special://home/addons', 'packages')) if not os.path.exists(path): os.makedirs(path) path_key = kodi.translate_path( os.path.join('special://home/userdata', 'keymaps')) if not os.path.exists(path_key): os.makedirs(path_key) buildname = name dp = xbmcgui.DialogProgress() dp.create("Keymap Installer", "", "", "[B]Keymap: [/B]" + buildname) buildname = "customkeymap" lib = os.path.join(path, buildname + '.zip') try: os.remove(lib) except OSError: pass downloader.download(url, lib, dp, timeout=120) addonfolder = kodi.translate_path(os.path.join('special://', 'home')) time.sleep(2) dp.update(0, "", "Installing Please wait..", "") try: extract.extract_all(lib, addonfolder, dp) except IOError as e: kodi.message("Failed to open required files", "Error is: ", str(e)) return False # except IOError, (errno, strerror): # kodi.message("Failed to open required files", "Error code is:", strerror) # return False time.sleep(1) try: os.remove(lib) except OSError: pass xbmc.executebuiltin("Container.Refresh") dialog.ok("Custom Keymap Installed!", " We hope you enjoy your Kodi addon experience!", " Brought To You By %s " % siteTitle)
def hub_install(script, script_url, silent=False, dp=None): version, newest_v_url = get_url(script, script_url, kodi.open_url(script_url)) kodi.log("Looking for : " + newest_v_url) if not silent: dp = xbmcgui.DialogProgress() dp.create("Starting up", "Initializing ", '', 'Please Stand By....') lib = os.path.join(packages_path, script + version + '.zip') os.remove(lib) if os.path.exists(lib) else '' downloader.download(newest_v_url, lib, dp, timeout=120) try: extract.extract_all(lib, addon_folder, None) time.sleep(2) except IOError as e: kodi.message("Failed to open required files", "Error is: ", str(e)) return False
def parse_sharejs(url, html): kind = url.rsplit('/', 2)[1] # kind是大分类,区别tag_list html = html.decode('utf-8') # decode here title = extract('<h1>', '</h1>', extract('<div class="post_title">', '</div>', html)) post_content = extract('<div class="post_content" id="paragraph">', '<div class="hot_tags">', html) if not post_content: post_content = extract('<div class="post_content" id="paragraph">', '<div class="share">', html) post_content = re.sub(r'<span class="title">(.*?)</span>', '', post_content) content = html2markdown(post_content) try: tag_list = extract_all('">', '</a>', extract('<div class="hot_tags">', '</div>', html)) except AttributeError: tag_list = [] data = { 'kind': kind, 'title': title, 'source_url': url, 'source': 'www.sharejs.com', 'content': content, 'tag_list': tag_list, 'read_count': 0, } return data
def parse_sharejs(url, html): kind = url.rsplit('/', 2)[1] # kind是大分类,区别tag_list html = html.decode('utf-8') # decode here title = extract('<h1>', '</h1>', extract('<div class="post_title">', '</div>', html)) post_content = extract('<div class="post_content" id="paragraph">', '<div class="hot_tags">', html) if not post_content: post_content = extract('<div class="post_content" id="paragraph">', '<div class="share">', html) post_content = re.sub(r'<span class="title">(.*?)</span>', '', post_content) content = html2markdown(post_content) try: tag_list = extract_all( '">', '</a>', extract('<div class="hot_tags">', '</div>', html)) except AttributeError: tag_list = [] data = { 'kind': kind, 'title': title, 'source_url': url, 'source': 'www.sharejs.com', 'content': content, 'tag_list': tag_list, 'read_count': 0, } return data
def all(): archives = get_response(host="opslinux.com",url="/archives.html") content = extract_all('<article>','</article>',archives) for item in content: title_html = extract('<a href="','</a>',item) title = title_html.split('">') print "标题: %s \n地址: %s\n" % (title[1],title[0])
def last(): home = get_response(host="opslinux.com",url="/") content = extract_all('<article>','</article>',home) for item in content: title_html = extract('<a href="','</a>',item) title = title_html.split('">') print "标题: %s \n地址: %s\n" % (title[1],title[0])
def handle_html(self, url, html): html = html.decode('utf-8') start_list = extract_all('<a href="?start=', '"', html) if start_list: max_query = max((int(i) for i in start_list)) for index in range(0, max_query, 30): base_url = url.rsplit('=', 1)[0] + '=' + str(index) self.results.append(base_url)
def get_all_tag_urls(url='http://www.sharejs.com/codes/'): html = requests.get(url).content.decode('utf-8') tag_urls = extract_all('<a href="', '"', extract('<div class="tags_cloud">', '</ul>', html)) base_url = 'http://www.sharejs.com%s' tag_urls = [base_url % i for i in tag_urls] tag_urls = [i + '?start=0' for i in tag_urls] return tag_urls
def add_url_list(self): for i in range(1, 532): url = 'http://www.lagou.com/upload/sitemap/xml/lagou_sitemap_%d.xml'%i self.logger.info('sitemap url: %s', url) html = self.get_response(url).text all_loc_url = extract_all('<loc>', '</loc>', html) self.logger.info('%s', pformat(all_loc_url)) self.add_url(all_loc_url)
def handle_html(self, url, html): html = html.decode('utf-8') url_list = extract_all('<a href="', '"', extract('<div class="code_list">', '</ul>', html)) article_list = [i for i in url_list if 'author' not in i] base_url = 'http://www.sharejs.com' article_list = [base_url+i for i in article_list] article_list.pop(0) self.results.extend(article_list)
def handle_html(self, url, html): html = html.decode('utf-8') url_list = extract_all( '<a href="', '"', extract('<div class="code_list">', '</ul>', html)) article_list = [i for i in url_list if 'author' not in i] base_url = 'http://www.sharejs.com' article_list = [base_url + i for i in article_list] article_list.pop(0) self.results.extend(article_list)
def handle_html(self, url, html): html = html.decode('utf-8') # 先decode下,参数参考html的charset left_column_tag = extract('id="leftcolumn"', '</div>', html) urls = extract_all('href="', '"', left_column_tag) base_url = 'http://www.runoob.com' # 这句是把相对路径/path/html-tutorial # 变成http://www.runoob.com/path/html-tutorial urls = [urlparse.urljoin(base_url, url) for url in urls if base_url not in url] self.results.extend(urls) # 保存结果,url的列表
def handle_html(self, url, html): html = html.decode('utf-8') # 先decode下,参数参考html的charset left_column_tag = extract('id="leftcolumn"', '</div>', html) urls = extract_all('href="', '"', left_column_tag) base_url = 'http://www.runoob.com' # 这句是把相对路径/path/html-tutorial # 变成http://www.runoob.com/path/html-tutorial urls = [ urlparse.urljoin(base_url, url) for url in urls if base_url not in url ] self.results.extend(urls) # 保存结果,url的列表
def wechat_list(): for _id in range(1, 16): url = 'http://www.iwgc.cn/%d' % _id page = 1 res = [] while True: page_url = url + '/p/' + str(page) html = requests.get(page_url).text detail_list = extract_all('<div class="detail">', '</div>', html) name_list = [extract('title="', '"', tag) for tag in detail_list] if not name_list: break else: res.extend(name_list) page += 1 COL.update({'_id': _id}, {'$set': {'name_list': res}}, upsert=True)
def wechat_list(): for _id in range(1, 16): url = 'http://www.iwgc.cn/%d' % _id page = 1 res = [] while True: page_url = url + '/p/' + str(page) html = requests.get(page_url).text detail_list = extract_all('<div class="detail">', '</div>', html) name_list = [extract('title="', '"', tag) for tag in detail_list] if not name_list: break else: res.extend(name_list) page += 1 COL.update( {'_id': _id}, { '$set': {'name_list': res} }, upsert=True )
def main_menu(): maintool.source_change() maintool.feed_change() # ########## TRY POP ######## if len(kodi.get_setting('notify')) > 0: kodi.set_setting('notify', str(int(kodi.get_setting('notify')) + 1)) else: kodi.set_setting('notify', "1") if int(kodi.get_setting('notify')) == 1: xbmcgui.Dialog().notification('Need Support?', 'www.tvaddons.co', artwork + 'icon.png', 3000, False) elif int(kodi.get_setting('notify')) == 5: kodi.set_setting('notify', "0") # ######## END POP ########### if kodi.get_setting('hasran') == 'false': kodi.set_setting('hasran', 'true') dp = xbmcgui.DialogProgress() try: if (not os.path.exists(ART)) or (not os.path.exists(ART2)) or (not os.path.exists(ART3)): dp.create(AddonTitle, 'Getting ' + AddonTitle + ' Ready......\nDownloading ' + AddonTitle + ' Icons.....') dp.update(0) icons_zip = os.path.join(packagepath, AddonTitle + '_icons.zip') downloader.download(kodi.read_file('http://indigo.tvaddons.co/graphics/arts.txt'), icons_zip, dp) dp.update(0, 'Getting %s Ready........' % AddonTitle, 'Extracting %s Icons......' % AddonTitle) extract.extract_all(icons_zip, addon_path, dp) dp.close() except Exception as e: kodi.log(str(e)) # Check for old version of hubrepo and remove it try: if os.path.exists(hubpath): with open(hubpath + '/addon.xml', 'r') as content: if 'AG' in content: shutil.rmtree(hubpath) except Exception as e: kodi.log(str(e)) # # Check for HUBRepo and install it try: if not os.path.exists(hubpath): installer.hub_install('repository.xbmchub', 'http://github.com/tvaddonsco/tva-release-repo/raw/master/' 'repository.xbmchub/') # xbmc.executebuiltin("XBMC.InstallAddon(%s)" % 'repository.xbmchub') addon_able.set_enabled("repository.xbmchub") except Exception as e: kodi.log(str(e)) traceback.print_exc(file=sys.stdout) raise # Check for Log Uploader and install it try: if not os.path.exists(uploaderpath): installer.hub_install('script.tvaddons.debug.log', 'http://github.com/tvaddonsco/tva-release-repo/raw/' 'master/script.tvaddons.debug.log/') addon_able.set_enabled('script.tvaddons.debug.log') except Exception as e: kodi.log(str(e)) raise # Check for old maintenance tools and remove them old_maintenance = (oldinstaller, oldnotify, oldmain, oldwiz, oldfresh) for old_file in old_maintenance: if os.path.exists(old_file): shutil.rmtree(old_file) # Notification Status if kodi.get_setting("notifications-on-startup") == "false": note_status = '(Opt Out)' note_art = 'notification_optout.png' note_description = 'Unsubscribe' else: note_status = '(Opt In)' note_art = 'notification_in.png' note_description = 'Subscribe' if kodi.get_setting('wizardran') == 'false': kodi.add_item("Config Wizard", '', 'call_wizard', artwork+'config_wizard.png', description="Automatically configure Kodi with the best addons and goodies in seconds!") kodi.add_dir("Addon Installer", '', 'call_installer', artwork + 'addon_installer.png', description="It’s like an App Store for Kodi addons!") kodi.add_dir("Maintenance Tools", '', 'call_maintool', artwork + 'maintool.png', description="Keep your Kodi setup running at optimum performance!") # kodi.add_dir("Kodi Librtmp Files", '', 'get_libs', artwork +'librtmp_files.png') kodi.add_item("Rejuvenate Kodi", '', 'call_rejuv', artwork + 'rejuvinate.png', description="Wipe and reconfigure Kodi with the latest Config Wizard setup!") kodi.add_dir("Factory Restore", '', 'call_restore', artwork + 'factory_restore.png', description="Start off fresh, wipe your Kodi setup clean!") if os.path.exists(uploaderpath): kodi.add_item("Log Uploader", '', 'log_upload', artwork + 'log_uploader.png', description="Easily upload your error logs for troubleshooting!") kodi.add_dir("Network Speed Test", '', 'runspeedtest', artwork + 'speed_test.png', description="How fast is your internet?") kodi.add_dir("System Information", '', 'system_info', artwork + 'system_info.png', description="Useful information about your Kodi setup!") kodi.add_dir("Sports Listings", '', 'call_sports', artwork + 'sports_list.png', description="Who’s playing what today?") kodi.add_dir('Backup / Restore', '', 'backup_restore', artwork + 'backup_restore.png', description="Backup or restore your Kodi configuration in minutes!") kodi.add_item("Log Viewer", '', 'log_view', artwork + 'log_viewer.png', description="Easily view your error log without leaving Kodi!") kodi.add_item("No-Coin Scan", '', 'nocoin', artwork + 'no_coin.png', description="Scan your Kodi directory for coin mining.") kodi.add_item("Notifications " + note_status, '', 'toggle_notify', artwork + note_art, description="%s to important TV ADDONS notifications on startup!" % note_description) kodi.add_item("Show Notification", '', 'show_note', artwork + 'notification.png', description="Show TVA Notification. To get Important News, Tips, and Giveaways from TV ADDONS") viewsetter.set_view("sets")
def JUVWIZARD(filetype='main'): if xbmcgui.Dialog().yesno( "Please Confirm", " Please confirm that you wish to automatically", " configure Kodi with all the best addons and tweaks!", " ", "Cancel", "Install"): filetype = filetype.lower() if filetype == 'main': addonfolder = kodi.translate_path('special://home') elif filetype == 'addon': addonfolder = kodi.translate_path( os.path.join('special://home', 'addons')) else: print({'filetype': filetype}) dialog.ok("Error!", 'filetype: "%s"' % str(filetype)) return link = kodi.read_file(wizlink).replace('\n', '').replace('\r', '').replace( '\a', '').strip() # kodi.log(link) if '[error]' in link: print(link) dialog.ok("Error!", link) return path = kodi.translate_path( os.path.join('special://home', 'addons', 'packages')) lib = os.path.join(path, 'rejuv.zip') try: os.remove(lib) except: pass # ## ## ... ## dp = xbmcgui.DialogProgress() dp.create(AddonTitle, " ", 'Downloading and Configuring ', 'Please Wait') downloader.download(link, lib, dp) xbmc.sleep(4000) extract.extract_all(lib, addonfolder, dp) xbmc.executebuiltin("XBMC.UpdateLocalAddons()") addon_able.setall_enable() try: addon_able.set_enabled("inputstream.adaptive") except: pass xbmc.sleep(4000) try: addon_able.set_enabled("inputstream.rtmp") except: pass xbmc.executebuiltin("XBMC.UpdateLocalAddons()") try: os.remove(lib) except: pass if filetype == 'main': link = kodi.read_file(cutslink) shorts = re.compile('shortcut="(.+?)"').findall(link) for shortname in shorts: xEB('Skin.SetString(%s)' % shortname) enableBG16 = "UseCustomBackground,true" enableBG17 = "use_custom_bg,true" xEB('Skin.SetBool(%s)' % enableBG16) xEB('Skin.SetBool(%s)' % enableBG17) kodi.set_setting("wizardran", 'true') dialog.ok(AddonTitle, "Installation Complete.", "", "Click OK to exit Kodi and then restart to complete .") xbmc.executebuiltin('ShutDown')
def extract_segments(self): extract.extract_all(data_path=self.cfg['data_path'], segments_path=self.cfg['segments_path']) self.dataset.add_segments(segments_path=self.cfg['segments_path'])
def extract_all(self, begin, end): return extract_all(begin, end, self.html)
def HELPWIZARD(name, url, description, filetype): # path = xbmc.translatePath(os.path.join('special://home', 'addons', 'packages')) filetype = filetype.lower() if xbmcgui.Dialog().yesno("Please Confirm", " Please confirm that you wish to automatically\n"+\ " configure Kodi with all the best addons and tweaks!", "Cancel", "Install"): path = kodi.translate_path( os.path.join('special://home', 'addons', 'packages')) dp = xbmcgui.DialogProgress() dp.create(AddonTitle, '\nDownloading and Configuring\nPlease Wait') lib = os.path.join(path, name + '.zip') try: os.remove(lib) except: pass # ## ## ... ## # kodi.log(url) # if str(url).endswith('[error]'): # print(url) # dialog = xbmcgui.Dialog() # dialog.ok("Error!", url) # return if '[error]' in url: print(url) dialog = xbmcgui.Dialog() dialog.ok("Error!", url) return downloader.download(url, lib, dp) if not os.path.exists(lib): return if filetype == 'main': addonfolder = kodi.translate_path('special://home') elif filetype == 'addon': addonfolder = kodi.translate_path( os.path.join('special://home', 'addons')) else: print({'filetype': filetype}) dialog = xbmcgui.Dialog() dialog.ok("Error!", 'filetype: "%s"' % str(filetype)) return xbmc.sleep(4000) extract.extract_all(lib, addonfolder, dp) xbmc.executebuiltin("XBMC.UpdateLocalAddons()") addon_able.setall_enable() try: addon_able.set_enabled("inputstream.adaptive") except: pass xbmc.sleep(4000) try: addon_able.set_enabled("inputstream.rtmp") except: pass xbmc.executebuiltin("XBMC.UpdateLocalAddons()") try: os.remove(lib) except: pass if filetype == 'main': link = kodi.read_file(cutslink) shorts = re.compile('shortcut="(.+?)"').findall(link) for shortname in shorts: xEB('Skin.SetString(%s)' % shortname) enableBG16 = "UseCustomBackground,true" enableBG17 = "use_custom_bg,true" xEB('Skin.SetBool(%s)' % enableBG16) xEB('Skin.SetBool(%s)' % enableBG17) xbmc.sleep(4000) xbmc.executebuiltin('XBMC_UpdateLocalAddons()') addon_able.setall_enable() # try: # addon_able.set_enabled("inputstream.adaptive") # except: # pass # xbmc.sleep(4000) # try: # addon_able.set_enabled("inputstream.rtmp") # except: # pass kodi.set_setting("wizardran", 'true') dialog = xbmcgui.Dialog() dialog.ok(AddonTitle, "Installation Complete!", "", "Click OK to exit Kodi and then restart to complete .") xbmc.executebuiltin('ShutDown')
def get_sub_urls(url): """从主页拿到所有的子分类""" html = requests.get(url).content sub = extract_all('<a class="item-top item-1" href="', '"', html) return sub
def handle_html(self, url, html): all_loc = extract_all('<loc>', '</loc>', html) self.logger.info('%s', pformat(all_loc)) self.col.insert_many([{'ur': url} for url in all_loc])
#! /usr/bin/env python #coding=utf-8 import requests from os.path import dirname, abspath import extract import re RE_CN = re.compile(ur'[\u4e00-\u9fa5]+') PREFIX = dirname(abspath(__file__)) with open("%s/down.bat"%PREFIX,"w") as down: for i in xrange(1,396): for url in ( 'http://www.luoo.net/radio/radio%s/mp3.xml'%i, 'http://www.luoo.net/radio/radio%s/mp3player.xml'%i ): r = requests.get(url) print url if r.status_code == 200: for path,name in zip( extract.extract_all('path="','"',r.content), extract.extract_all('title="','"',r.content) ): if RE_CN.match(name.decode('utf-8','ignore')): down.write('wget %s -O "%s/%s.mp3"\n'%(path,PREFIX,name.decode('utf-8',"ignore").encode("gb18030","ignore"))) break
from os.path import dirname, abspath from extract import extract, extract_all import re import json import sys reload(sys) sys.setdefaultencoding('utf-8') RE_CN = re.compile(ur'[\u4e00-\u9fa5]+') PREFIX = dirname(abspath(__file__)) with open("%s/highschool.sh"%PREFIX, "w") as down: r = requests.get('http://s.xnimg.cn/js/cityArray.js') if r.status_code == 200: for json_str in extract_all('=', ";", r.content): json_obj = json.loads(json_str) for area in json_obj: [num, name] = area.split(':') res = requests.get('http://support.renren.com/highschool/%s.html'%num) if res.status_code == 200: print num, name down.write('wget http://support.renren.com/highschool/%s.html -O %s/highschool/%s%s.html\n'%(num, PREFIX, name, num)) for json_str in extract_all(' "', '\n', r.content): for entry in json_str.split('","'): [num, name] = entry.split(':') name = name.replace('"', '') res = requests.get('http://support.renren.com/highschool/%s.html'%num) print num, name if res.status_code == 200: print num, name
#! /usr/bin/env python #-*-coding=utf-8-*- import requests from os.path import dirname,abspath from extract import extract,extract_all import re PREFIX = dirname(abspath(__file__)) r = requests.get("http://weibo.com") with open("%s/huati.html"%PREFIX,"w") as huati: if r.status_code == 200: for path,name in zip( extract_all('<li><a href="','" target="_blank">',r.content), extract_all('" target="_blank">','</a></li>',r.content)): huati.write('<a href="%s">%s</a><br />'%(path,name))
#! /usr/bin/env python #coding=utf-8 import requests from os.path import dirname,abspath from extract import extract,extract_all import re PREFIX = dirname(abspath(__file__)) # http://b.zol-img.com.cn/desk/bizhi/image/1/960x600/1349074751592.jpg # http://desk.zol.com.cn/bizhi/508_4569_2.html with open("%s/down.sh"%PREFIX,"w") as down: for i in xrange(300,575): for j in xrange(1900,5000): for url in ('http://desk.zol.com.cn/bizhi/%d_%d_2.html'%(i,j),): r = requests.get(url) print url if r.status_code ==200: for path,name in zip( extract_all('id="bigImg" src="','"',r.content), extract_all('id="bigImg" src="http://b.zol-img.com.cn/desk/bizhi/image/1/960x600/','.jpg"',r.content)): down.write('wget %s -O "%s/%s.jpg"\n'%(path,PREFIX,name)) break
#! /usr/bin/env python #coding=utf-8 import requests from os.path import dirname, abspath import extract import re RE_CN = re.compile(ur'[\u4e00-\u9fa5]+') PREFIX = dirname(abspath(__file__)) with open("%s/down.bat" % PREFIX, "w") as down: for i in xrange(1, 396): for url in ('http://www.luoo.net/radio/radio%s/mp3.xml' % i, 'http://www.luoo.net/radio/radio%s/mp3player.xml' % i): r = requests.get(url) print url if r.status_code == 200: for path, name in zip( extract.extract_all('path="', '"', r.content), extract.extract_all('title="', '"', r.content)): if RE_CN.match(name.decode('utf-8', 'ignore')): down.write('wget %s -O "%s/%s.mp3"\n' % (path, PREFIX, name.decode( 'utf-8', "ignore").encode( "gb18030", "ignore"))) break
def _get_pars(self): return extract.extract_all(self.text)