def DownloadUpdate(self, file): self.log('Downloading: %s' % file) dirfile = os.path.join(self.UpdateTempDir,file) dirname, filename = os.path.split(dirfile) if not os.path.isdir(dirname): try: os.makedirs(dirname) except: self.log('Error creating directory: ' +dirname) url = self.SVNPathAddress+urllib.quote(file) try: if re.findall(".xbt",url): self.totalsize = int(re.findall("File length: ([0-9]*)",urllib2.urlopen(url+"?view=log").read())[0]) urllib.urlretrieve( url.decode("utf-8"), dirfile.decode("utf-8")) else: urllib.urlretrieve( url.decode("utf-8"), dirfile.decode("utf-8") ) self.DownloadedFiles.append(urllib.unquote(url)) return 1 except: try: time.sleep(2) if re.findall(".xbt",url): self.totalsize = int(re.findall("File length: ([0-9]*)",urllib2.urlopen(url+"?view=log").read())[0]) urllib.urlretrieve(url.decode("utf-8"), dirfile.decode("utf-8")) else: urllib.urlretrieve(url.decode("utf-8"), dirfile.decode("utf-8") ) urllib.urlretrieve(url.decode("utf-8"), dirfile.decode("utf-8")) self.DownloadedFiles.append(urllib.unquote(url)) return 1 except: self.log("Download failed: %s" % url) self.DownloadFailedFiles.append(urllib.unquote(url)) return 0
def check_proxy(self, specific={}): """ Checks if proxy settings are set on the OS Returns: -- 1 when direct connection works fine -- 2 when direct connection fails and any proxy is set in the OS -- 3 and settings when direct connection fails but a proxy is set see: https://docs.python.org/2/library/urllib.html#urllib.getproxies """ os_proxies = getproxies() if len(os_proxies) == 0 and self.check_internet_connection: logging.info("No proxy needed nor set. Direct connection works.") return 1 elif len(os_proxies) == 0 and not self.check_internet_connection: logging.error("Proxy not set in the OS. Needs to be specified") return 2 else: # env['http_proxy'] = os_proxies.get("http") env['https_proxy'] = os_proxies.get("https") # proxy = ProxyHandler({ 'http': os_proxies.get("http"), 'https': os_proxies.get("https") }) opener = build_opener(proxy) install_opener(opener) urlopen('http://www.google.com') return 3, os_proxies
def recognise_eHentai(link, path): url = str(link) page = urllib2.urlopen(url).read() soup = BeautifulSoup(page) name = soup.findAll('title') name = name[0].get_text().encode('utf-8') name = str(name) path = path + '\\' + name download_eHentai(link, path) pages = soup.find_all('span') pages = pages[1].get_text() pages = int(pages) z = 0 while (pages > z): z = z + 1 sopa = soup.find('div', 'sn') sopa = sopa.find_all('a') sopa = sopa[2].get('href') url = str(sopa) download_eHentai(url, path) page = urllib2.urlopen(url).read() soup = BeautifulSoup(page) sopa = soup.find('div', 'sn') sopa = sopa.find_all('a') sopa = sopa[2].get('href') download_eHentai(sopa, path)
def tag_to_server(scanid, tagid): try: myurl = tag_url % (scanid, tagid) urlopen(myurl) except: print 'error' print 'sent to server'
def pullPhotos(query): print "looking for", query url1 = "https://www.google.com/search?biw=1309&bih=704&sei=bsHjUbvaEILqrQeA-YCYDw&tbs=itp:lineart&tbm=isch&" query2 = urllib.urlencode({"q": query}) req = urllib2.Request(url1 + query2, headers={"User-Agent": "Chrome"}) response = urllib2.urlopen(req).read() parser = MyHTMLParser() parser.feed(response) print image_lib + "\\buffer\\" + query if not os.path.exists(image_lib + "\\buffer"): os.mkdir(image_lib + "\\buffer") # make directory to put them in if not os.path.exists(image_lib + "\\buffer\\" + query): os.mkdir(image_lib + "\\buffer\\" + query) # make directory to put them in for i in xrange(5): req_cat = urllib2.Request(cat_urls[i], headers={"User-Agent": "Chrome"}) response_cat = urllib2.urlopen(req_cat).read() name = query + os.sep + query + str(i) + ".jpg" fd = open(image_lib + "\\buffer\\" + name, "wb") fd.write(response_cat) fd.close() print name, "written", "complexity is ", countComponents(image_lib + "\\buffer\\" + name) print "done"
def main(): #for p in range(1,intGetMaxPage +1): #soup = BeautifulSoup() try: resp = urllib2.urlopen(getUrl,timeout=10) soup = BeautifulSoup(resp) soup = soup.find('div' ,{'id':'prodlist'}) #for k in soup.findAll("div", {'class': 'p-name'}): # 抓< div class='p=name'>...< /div> for k in soup.findAll('a', href=True): try: url = k.get('href') print k.text print url page_url = homeUrl + url print page_url resp_text_page = urllib2.urlopen(homeUrl + url, timeout=10) soup_text_page = BeautifulSoup(resp_text_page) contextPageUrl(soup_text_page,page_url) except: print "Unexpected error:", sys.exc_info()[0] print "Unexpected error:", sys.exc_info()[1] continue except: #continue print "Unexpected error:", sys.exc_info()[0] print "Unexpected error:", sys.exc_info()[1] pass
def flight_search_results(sid, searchid): # 删除开头的$和逗号,并把数字转化成浮点类型 def parse_price(p): return float(p[1:].replace(',', '')) # 遍历检测 while 1: time.sleep(2) # 构造检测所用的 URL url = 'http://www.kayak.com/s/basic/flight?' url += 'searchid=%s&c=5&apimode=1&_sid_=%s&version=1' % (searchid, sid) doc = xml.dom.minidom.parseString(urllib2.urlopen(url).read()) # 寻找 morepending 标签,并等待其不再为 true more_pending = doc.getElementsByTagName('morepending')[0].firstChild if more_pending is None or more_pending.data == 'false': break # 现在,下载完整列表 url = 'http://www.kayak.com/s/basic/flight?' url += 'searchid=%s&c=999&apimode=1&_sid_=%s&version=1' % (searchid, sid) doc = xml.dom.minidom.parseString(urllib2.urlopen(url).read()) # 得到不同元素组成的列表 prices = doc.getElementsByTagName('price') departures = doc.getElementsByTagName('depart') arrivals = doc.getElementsByTagName('arrive') # 用 zip 将它们连在一起 return zip([p.firstChild.data.split(' ')[1] for p in departures], [p.firstChild.data.split(' ')[1] for p in arrivals], [parse_price(p.firstChild.data) for p in prices])
def parse(self,response): #Get Access Token for Microsoft Translate atrequest = urllib2.Request('https://datamarket.accesscontrol.windows.net/v2/OAuth2-13') atrequest.add_data(atdata) atresponse = urllib2.urlopen(atrequest) access_token = json.loads(atresponse.read())['access_token'] hxs = HtmlXPathSelector(response) sites = hxs.select('//span[contains(@class, "productsAzLink")]/a/text()').extract() items = [] for site in sites: text = [] item = IkeaItem() item['name'],_,item['thing'] = unicode(site).partition(' ') tosend = {'text': unicode(item['name']), 'from' : 'sv' , 'to' : 'en' } request = urllib2.Request('http://api.microsofttranslator.com/v2/Http.svc/Translate?'+urllib.urlencode(tosend)) request.add_header('Authorization', 'Bearer '+access_token) response = urllib2.urlopen(request) doc = etree.fromstring(response.read()) for elem in doc.xpath('/foo:string', namespaces={'foo': 'http://schemas.microsoft.com/2003/10/Serialization/'}): if elem.text: elem_text = ' '.join(elem.text.split()) if len(elem_text) > 0: text.append(elem_text) item['translation'] = ' '.join(text) items.append(item) return items
def resolve_novamov(url, guid): xbmc.log("Starting resolve_novamov with url: " + str(url) + " and guid: " + str(guid)) req = urllib2.Request(url) req.add_header('User-Agent', 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3') response = urllib2.urlopen(req) link=response.read() response.close() match1=re.compile('flashvars.file="(.+?)"').findall(link) for file in match1: file = file match2=re.compile('flashvars.filekey="(.+?)"').findall(link) for filekey in match2: filekey = filekey if not match1 or not match2: return 'CONTENTREMOVED' novaurl = 'http://www.novamov.com/api/player.api.php?user=undefined&key=' + filekey + '&codes=undefined&pass=undefined&file=' + file req = urllib2.Request(novaurl) req.add_header('User-Agent', 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3') response = urllib2.urlopen(req) link=response.read() response.close() match3=re.compile('url=(.+?\.flv)').findall(link) for link in match3: link = link print ('auth url is ' + str(link)) return link
def start(self): with QMutexLocker(self.mutex): self.stoped = False #for i in range(self.start_p,self.end_p): for i in range(1,3): while self.suspended: self.wait() return if self.stoped: return url ="http://www.99fang.com/service/agency/a1/?p=%d" % i print url try: r = urllib2.urlopen(url).read() soup = BeautifulSoup(r) box = soup.find("div",{'class':'agency-call-box'}) lis = box("li") for li in lis: tel = li.a.string print tel r =urllib2.urlopen("http://suzhou.jjr360.com/app.php?c=spider&a=index&city=&tel=%s" % tel) print r.read() except: pass else: #self.emit(SIGNAL("updateTime()")) time.sleep(1)
def post(user,passwd): fp = open("Score.txt", "w") login_url="http://www.dean.gxnu.edu.cn/jwxt/index.php/api/user/login" data={} data['phone']="+8613512345678" data['username']=user data['password']=passwd post_data=urllib.urlencode(data) req=urllib2.urlopen(login_url,post_data) content=req.read() sid=content[56:82] data2={} data2['session_id']=sid url2="http://www.dean.gxnu.edu.cn/jwxt/index.php/api/chengji/getyxcj" sessionid="PHPSESSID="+sid post_data2=urllib.urlencode(data2) req2=urllib2.Request(url2,post_data2) req2.add_header('Cookie',sessionid) resp=urllib2.urlopen(req2) content2=json.loads(resp.read().encode('utf-8')) print u"课程名称\t\t成绩\t\t年度/学期\t\tbk\t\tcx\t\t绩点" fp.writelines("课程名称\t\t成绩\t\t年度/学期\t\tbk\t\tcx\t\t绩点\n") for subject in content2['msg']: print subject['kcmc'] + "\t\t" + subject['cj'] + "\t\t" + subject['ndxq'][:-1] + "/" + subject['ndxq'][-1] + "\t\t" + subject['bk'] + "\t\t" + subject['cx'] + "\t\t" + subject['jd'] # print "%-40s\t%-10s" % (subject['kcmc'], subject['cj']) fp.write(subject['kcmc'] + "\t\t" + subject['cj'] + "\t\t" + subject['ndxq'][:-1] + "/" + subject['ndxq'][-1] + "\t\t" + subject['bk'] + "\t\t" + subject['cx'] + "\t\t" + subject['jd'] + "\n") fp.close()
def urlread(url, get={}, post={}, headers={}, timeout=None): req = urllib2.Request(url, urllib.urlencode(get), headers=headers) try: response = urllib2.urlopen(req, urllib.urlencode(post), timeout).read() except: response = urllib2.urlopen(req, urllib.urlencode(post)).read() return response
def getmodelvendor(type,ipaddress): if type=="thermostat": modeladdress=ipaddress.replace('/sys','/tstat/model') deviceModelUrl = urllib2.urlopen(modeladdress) if (deviceModelUrl.getcode()==200): deviceModel = parseJSONresponse(deviceModelUrl.read().decode("utf-8"),"model") deviceVendor = "RadioThermostat" deviceModelUrl.close() return {'model':deviceModel,'vendor':deviceVendor} elif type=="Philips": deviceUrl = urllib2.urlopen(ipaddress) dom=minidom.parse(deviceUrl) deviceModel=dom.getElementsByTagName('modelName')[0].firstChild.data deviceVendor=dom.getElementsByTagName('manufacturer')[0].firstChild.data deviceUrl.close() return {'model':deviceModel,'vendor':deviceVendor} elif type=="WeMo": deviceUrl = urllib2.urlopen(ipaddress) dom=minidom.parse(deviceUrl) deviceModel=dom.getElementsByTagName('modelName')[0].firstChild.data deviceVendor=dom.getElementsByTagName('manufacturer')[0].firstChild.data nickname = dom.getElementsByTagName('friendlyName')[0].firstChild.data if str(deviceModel).lower() == 'socket': deviceType = dom.getElementsByTagName('deviceType')[0].firstChild.data deviceType = re.search('urn:Belkin:device:([A-Za-z]*):1',deviceType).groups()[0] if (deviceType.lower() == 'controllee'): deviceModel = deviceModel else: deviceModel = 'Unknown' deviceUrl.close() return {'model':deviceModel,'vendor':deviceVendor,'nickname':nickname}
def getcommits_from_project(project): global access_token url1 = 'https://api.github.com/user' request1=Request(url1) request1.add_header('Authorization', 'token %s' % access_token) response1 = urlopen(request1) result1 = json.load(response1) person = result1['login'] repo_info=['Fasta','js2839'] owner= repo_info[1] repo = repo_info[0] url = 'https://api.github.com/repos/'+owner+'/'+repo+'/commits' data=[] request = Request(url) request.add_header('Authorization', 'token %s' % access_token) response = urlopen(request) result = json.load(response) for i in range(len(result)): print 'result0' data.append([result[i]['commit']['message'], result[i]['commit']['author']['name'], result[i]['commit']['author']['date']]) print data[i] for com in data: (per,sub_name)=getPercentage(com[0]) err = save_to_db( per, sub_name, com[1], project, com[2]) return
def login(): # 模拟登录程序 postdata = { 'entry': 'weibo', 'gateway': '1', 'from': '', 'savestate': '7', 'userticket': '1', 'ssosimplelogin': '******', 'vsnf': '1', 'vsnval': '', 'su': '', 'service': 'miniblog', 'servertime': '', 'nonce': '', 'pwencode': 'rsa2', #'wsse', 'sp': '', 'encoding': 'UTF-8', #### 'prelt':'115', 'rsakv':'', #### 'url':'http://weibo.com/ajaxlogin.php?framelogin=1&callback=parent.sinaSSOController.feedBackUrlCallBack', #'http://weibo.com/ajaxlogin.php?framelogin=1&callback=parent.sinaSSOController.feedBackUrlCallBack', 'returntype': 'META' } global account username = '******'%(account) pwd = '1161895575' url = 'http://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.4.5)' try:#主要获取servertime和nonce这两个随机的内容 servertime, nonce, pubkey, rsakv = get_servertime() except: return #global postdata postdata['servertime'] = servertime postdata['nonce'] = nonce postdata['rsakv']= rsakv postdata['su'] = get_user(username)#对用户名进行加密 postdata['sp'] = get_pwd(pwd, servertime, nonce, pubkey)#对密码进行加密 postdata = urllib.urlencode(postdata) #headers = {'User-Agent':'Mozilla/5.0 (X11; Linux i686; rv:8.0) Gecko/20100101 Firefox/8.0'}#设置post头部,根据不同的应用平台进行设定 headers = {'User-Agent':'Mozilla/5.0 (X11; Linux i686; rv:10.0) Gecko/20100101 Firefox/10.0'} #headers = {'User-Agent':'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322)'} req = urllib2.Request( url = url, data = postdata, headers = headers ) result = urllib2.urlopen(req) text = result.read() p = re.compile('location\.replace\(\'(.*?)\'\)') try: login_url = p.search(text).group(1) ###print login_url urllib2.urlopen(login_url) print "Login successful!" except: print 'Login error!'
def check_url(self, url): try: urllib2.urlopen(url).headers.getheader('Content-Length') except urllib2.HTTPError: print("404 error checking url: " + url) return False return True
def get_proportional_hash_area(period): """ Takes in periods accepted by P2Pool - hour, day, week, month or year, then gets hash_data from the server running on localhost, parses it, and calculates each miner's hash power against the total during that time. """ import urllib2, json path1 = 'http://localhost:9332/web/graph_data/miner_hash_rates/last_'+period result1 = json.load(urllib2.urlopen(path1)) path2 = 'http://localhost:9332/web/graph_data/miner_dead_hash_rates/last_'+period result2 = json.load(urllib2.urlopen(path2)) hash_areas = {} total_hash_area = 0 for row in result1: for address in row[1]: try: hash_areas[address] += row[1][address] * row[2] except KeyError: hash_areas[address] = row[1][address] * row[2] finally: total_hash_area += row[1][address] * row[2] for row in result2: for address in row[1]: hash_areas[address] -= row[1][address]*row[2] total_hash_area -= row[1][address] * row[2] proportions = {} for address in hash_areas.keys(): proportions[address] = hash_areas[address] / total_hash_area hash_areas[address] /= 1000000000000000 return hash_areas, proportions
def getSCLeg(partyDict): houseSoup = BeautifulSoup(urllib2.urlopen('http://www.scstatehouse.gov/member.php?chamber=H&order=D').read()) senateSoup = BeautifulSoup(urllib2.urlopen('http://www.scstatehouse.gov/member.php?chamber=S&order=D').read()) houseTable = houseSoup.find('div', {'class': 'mainwidepanel'}).find_all('div', {'style': 'width: 325px; height: 135px; margin: 0 0 0 20px; text-align: left; float: left;'}) senateTable = senateSoup.find('div', {'class': 'mainwidepanel'}).find_all('div', {'style': 'width: 325px; height: 135px; margin: 0 0 0 20px; text-align: left; float: left;'}) dictList = [] for item in houseTable: repInfo = {} link = item.find('a') if link is not None: repInfo['Website'] = 'http://www.scstatehouse.gov' + link.get('href') repInfo['Name'] = re.sub(r'\[.*$', '', link.string.strip()).strip().replace(' ', ' ').replace(' ', ' ') repInfo['Party'] = partyDict[str(re.sub(r'^.*\[(.*)\].*$', r'\1', link.string.strip()))] else: repInfo['Name'] = 'VACANT' repInfo['District'] = 'SC State House ' + re.sub(r'^.*(District [0-9]*).*$', r'\1', item.get_text()) dictList.append(repInfo) for item in senateTable: repInfo = {} link = item.find('a') if link is not None: repInfo['Website'] = 'http://www.scstatehouse.gov' + link.get('href') repInfo['Name'] = re.sub(r'\[.*$', '', link.string.strip()).strip().replace(' ', ' ').replace(' ', ' ') repInfo['Party'] = partyDict[str(re.sub(r'^.*\[(.*)\].*$', r'\1', link.string.strip()))] else: repInfo['Name'] = 'VACANT' repInfo['District'] = 'SC State Senate ' + re.sub(r'^.*(District [0-9]*).*$', r'\1', item.get_text()) dictList.append(repInfo) return dictList
def sendMessage(subject, content, chanel, mobile) : if content : content = subject + content subject = "时时彩计划方案" if chanel == "serverChan" : key = "SCU749Tfa80c68db4805b9421f52d360f6614cb565696559f19e" url = "http://sc.ftqq.com/" + key +".send" parameters = { "text" : subject, "desp" : content, "key" : key } elif chanel == "pushBear" : url = "http://api.pushbear.com/smart" parameters = { "sendkey" : "96-d296f0cdb565bae82a833fabcd860309", "text" : subject, "mobile" : mobile, "desp" : content } if chanel == "mail" : sendMail("smtp.126.com", "*****@*****.**", ["*****@*****.**", "*****@*****.**"], subject, content, "126.com", "dhysgzs*211", format='plain') return postData = urllib.urlencode(parameters) request = urllib2.Request(url, postData) urllib2.urlopen(request)
def fetchVideo_DBpedia(videoName): def is_person(url, response): try: for item in response[url.replace('data', 'resource')[:-5]]['http://www.w3.org/1999/02/22-rdf-syntax-ns#type']: if item['value'] == 'http://dbpedia.org/ontology/Person': return True return False except: return False def find_disambiguates(url, response): ret = [] try: for item in response[url.replace('data', 'resource')[:-5]]['http://dbpedia.org/ontology/wikiPageDisambiguates']: ret.append(item['value']) except: pass return ret try: url="http://dbpedia.org/" videoName='_'.join(word[0] + word[1:] for word in videoName.title().split()) titleUrl = url+"data/"+videoName+".json" response = json.loads(urllib2.urlopen(titleUrl).read()) if is_person(titleUrl, response): return True ds = find_disambiguates(titleUrl, response) for d in ds: d = d.replace('resource', 'data') + ".json" if is_person(d, json.loads(urllib2.urlopen(d).read())): return True except: return False return False
def synopsis_mode_video(): ##### Check for Preview files. PreviewFile = "0" for root, dirs, files in os.walk( _Resources_Preview ): for filename in files: PreviewFile = root + '\\' + filename Current_Window.setProperty( 'Synopsis_Video_Preview_Path', PreviewFile ) Current_Window.setProperty( 'Synopsis_Video_Preview_Name', "Found "+filename ) if PreviewFile == "0": log('| No preview video found') xbmc.executebuiltin('Skin.Reset(SynopsisPreviewThere)') else: if PreviewFile.endswith('.xmv'): Current_Window.setProperty( 'Player_Type','DVDPlayer' ) xbmc.executebuiltin('Skin.SetBool(SynopsisPreviewThere)') elif PreviewFile.endswith('.strm'): try: urllib2.urlopen('http://www.google.com', timeout=1) Current_Window.setProperty( 'Player_Type','MPlayer' ) xbmc.executebuiltin('Skin.SetBool(SynopsisPreviewThere)') except urllib2.URLError as err: xbmc.executebuiltin('Skin.Reset(SynopsisPreviewThere)') else: Current_Window.setProperty( 'Player_Type','MPlayer' ) xbmc.executebuiltin('Skin.SetBool(SynopsisPreviewThere)') log('| Found ' + PreviewFile)
def test_enketo_remote_server_responses(self): #just in case if we want to shift the testing back to the main server testing_enketo_url = settings.ENKETO_URL #testing_enketo_url = 'http://enketo-dev.formhub.org' form_id = "test_%s" % re.sub(re.compile("\."), "_", str(time())) server_url = "%s/%s" % (self.base_url, self.user.username) enketo_url = '%slaunch/launchSurvey' % testing_enketo_url values = { 'format': 'json', 'form_id': form_id, 'server_url': server_url } data = urllib.urlencode(values) req = urllib2.Request(enketo_url, data) try: response = urllib2.urlopen(req) response = json.loads(response.read()) success = response['success'] if not success and 'reason' in response: fail_msg = "This enketo installation is for use by "\ "formhub.org users only." if response['reason'].startswith(fail_msg): raise SkipTest return_url = response['url'] success = response['success'] self.assertTrue(success) enketo_base_url = urlparse(settings.ENKETO_URL).netloc return_base_url = urlparse(return_url).netloc self.assertIn(enketo_base_url, return_base_url) except urllib2.URLError: self.assertTrue(False) #second time req2 = urllib2.Request(enketo_url, data) try: response2 = urllib2.urlopen(req2) response2 = json.loads(response2.read()) return_url_2 = response2['url'] success2 = response2['success'] reason2 = response2['reason'] self.assertEqual(return_url, return_url_2) self.assertFalse(success2) self.assertEqual(reason2, "existing") except urllib2.URLError: self.assertTrue(False) #error message values['server_url'] = "" data = urllib.urlencode(values) req3 = urllib2.Request(enketo_url, data) try: response3 = urllib2.urlopen(req3) response3 = json.loads(response3.read()) success3 = response3['success'] reason3 = response3['reason'] self.assertFalse(success3) self.assertEqual(reason3, "empty") except urllib2.URLError: self.assertTrue(False)
def scrap_items(): for itemlist in ITEMLIST: soup = BS(urllib2.urlopen(''.join([LOLWIKI, itemlist])).read()) item_table = soup.find('table', class_='stdt sortable') for tr in item_table.find_all('tr'): tds = tr.find_all('td') if len(tds) < 1: continue if tr.find('p') == None: continue item_name = tr.find('p').text.strip() item_url = tr.find('img')['src'] if item_url.split(':')[0] == 'data': item_url = tr.find('img')['data-src'] if not HOOKED: continue #store item in database d_item = Item() d_item.name = item_name t_img = NamedTemporaryFile(delete=True) t_img.write(urllib2.urlopen(item_url).read()) t_img.flush() t_img.name = '.'.join([item_name, 'jpg']) d_item.picture = File(t_img) d_item.save()
def checkNetConnection(self): try: urllib2.urlopen('http://www.google.com',timeout=7) return True except urllib2.URLError as err: pass return False
def update_lyrics(request): b = open('./artistList.txt', 'r') bb = b.read() b.close() bbb = bb.split(chr(10)) for ar in bbb: if ar.split('=')[1] == '1': return index(request) furl = "/"+ar.split('=')[1]+".htm" ar = ar.split('=')[0] artxt = '' #req = urllib2.Request(u"http://mojim.com/"+ar+".html?t1") #print "connected >> http://mojim.com/"+ar+".html?t1" #response = urllib2.urlopen(req) #result = response.read() print '--',furl,'--' if len(furl) > 0: req2 = urllib2.Request("http://mojim.com"+furl) response2 = urllib2.urlopen(req2) result2 = response2.read() furl2 = re.findall('/tw[0-9x]*.htm', result2) iii = -1 if len(furl2) > 0: for furl3 in furl2: iii = iii + 1 if iii % 2 == 0: continue try: req3 = urllib2.Request("http://mojim.com"+furl3) response3 = urllib2.urlopen(req3) result3 = response3.read() lasturl = re.findall('<dl><dt><br /><br />[^^]*</div>', result3) #a = raw_input() artxt = lasturl[0].replace('更多更詳盡歌詞','').replace(u'在 ','').replace(u'Mojim.com','').replace(u'※','').replace('魔鏡歌詞網','') aaaaaaaa = re.findall(u'title="歌詞(.*)">', artxt) bbbbbbbb = re.findall('<dd><br />(.*)</dd>', artxt) bCnt = len(bbbbbbbb) for bi in range(0, bCnt): if len(bbbbbbbb[bi]) > 22: lv = LyricsView() ll = striphtml(bbbbbbbb[bi].encode('Shift_JIS').replace('<br />', '\r')) ll = ll[:len(ll)-24] lv.setParams({'artist':ar,'title':aaaaaaaa[bi],'lyrics':ll}) lv.save() except: pass '''a = open(u''+ar+'.html', 'w') a.write(artxt) a.close()''' return index(request)
def get_tags(): socket_to = None try: socket_to = socket.getdefaulttimeout() socket.setdefaulttimeout(EC2.TIMEOUT) except Exception: pass try: iam_role = urllib2.urlopen(EC2.URL + "/iam/security-credentials").read().strip() iam_params = json.loads(urllib2.urlopen(EC2.URL + "/iam/security-credentials" + "/" + unicode(iam_role)).read().strip()) from checks.libs.boto.ec2.connection import EC2Connection connection = EC2Connection(aws_access_key_id=iam_params['AccessKeyId'], aws_secret_access_key=iam_params['SecretAccessKey'], security_token=iam_params['Token']) instance_object = connection.get_only_instances([EC2.metadata['instance-id']])[0] EC2_tags = [u"%s:%s" % (tag_key, tag_value) for tag_key, tag_value in instance_object.tags.iteritems()] except Exception: log.exception("Problem retrieving custom EC2 tags") EC2_tags = [] try: if socket_to is None: socket_to = 3 socket.setdefaulttimeout(socket_to) except Exception: pass return EC2_tags
def registration(request): form = AuthRegistrationForm(request.POST or None) if request.method == 'POST' and form.is_valid(): user_id = User.objects.all().aggregate(Max('id'))['id__max'] + 1 username = '******' + str(user_id) try: user = User.objects.create_user(username=username, password=generate_user_password(16), email=form.cleaned_data['email'], first_name=form.cleaned_data['first_name'], last_name=form.cleaned_data['last_name']) except IntegrityError: user_id = User.objects.all().aggregate(Max('id'))['id__max'] + 1 username = '******' + str(user_id) user = User.objects.create_user(username=username, password=generate_user_password(16), email=form.cleaned_data['email'], first_name=form.cleaned_data['first_name'], last_name=form.cleaned_data['last_name']) user_profile = UserProfile.objects.create(user=user, phone=request.GET.get('phone'), sms_code=random.randint(100000, 999999), account_type=form.cleaned_data['account_type']) urllib2.urlopen('http://smsc.ru/sys/send.php?login=Jango.kz&psw=AcEMXtLGz042Fc1ZJUSl&phones=+' + user_profile.phone + '&mes=Access code: ' + str(user_profile.sms_code)) Client.objects.create(profile=user_profile) return redirect('/account/login/?phone='+user_profile.phone) else: return render(request, 'accounts/registration.html', {'form': form})
def getUserAnswers(self, all): # 获取最新的文件的qID和aID latestFile = self.getLatestAnswerFileName() latestQID = 0 latestAID = 0 if latestFile is None: # 没有符合格式的文件,需要全抓 all = True else: # 计算出最新的questionID和answerID pattern = re.compile('^\[\d{4}-\d{2}-\d{2}\].*-q(\d{1,50})-a(\d{1,50}).html$') match = pattern.findall(latestFile) for pp in match: latestQID = pp[0] latestAID = pp[1] # 默认是要抓第一页的,顺便计算回答的总页数 pageContent = urllib2.urlopen("{}?page={}". format(self.answerURL, self.startPage)).read() d = pq(pageContent) pageMax = self.getMaxPageNumber(d) currentPage = self.startPage ret = False while True: self.logging("parsing page {} of {}".format(currentPage, pageMax), True) # 如果不是需要全部抓取,那么看看现在抓够了没有 # 遇到老答案之后,再向前寻找10个老答案,并更新 ret = self.parseAnswerAndSave(d, latestQID, latestAID, all) if not all and ret: # 不用全抓,而且发现了重复 return if currentPage >= pageMax: # 已经是最后一页 break # 计算下一页的pq值 currentPage += 1 pageContent = urllib2.urlopen("{}?page={}". format(self.answerURL, currentPage)).read() d = pq(pageContent)
def download_mango(url, path): """ Function: download_mango(url, path) Usage: download_mango('http://www.mangareader.net/poyopoyo-kansatsu-nikki/1', os.getcwd()) Added in version: 0.1 Beta """ if path != os.getcwd(): pathchange(os.getcwd(), path) urlContent = urllib2.urlopen(url).read() imgUrls = re.findall('img .*?src="(.*?.jpg)"', urlContent) for imgUrl in imgUrls: try: imgData = urllib2.urlopen(imgUrl).read() fileName = basename(urlsplit(imgUrl)[2]) output = open(fileName, 'wb') output.write(imgData) output.close() except IOError: print "File not found or full disk. Try again." sys.exit(1) except KeyboardInterrupt: print "Operation aborted manually." sys.exit(1) except: print "Unknown error. If this persists, contact the author or create a ticket in the bugtracker." sys.exit(1)
def IsRunning(self): """Returns whether the server is up and running.""" try: urllib2.urlopen(self.GetUrl() + '/status') return True except urllib2.URLError: return False