def main(args): # print args.species, args.force_scheme_name, args.repository_url docfile = url.urlopen(args.repository_url) doc = xml.parse(docfile) root = doc.childNodes[0] found_species = [] for species_node in root.getElementsByTagName('species'): info = getspeciesinfo(species_node, args.species, args.force_scheme_name) if info is not None: found_species.append(info) if len(found_species) == 0: print("No species matched your query.") return if len(found_species) > 1: print( "The following {} species match your query, please be more specific:" .format(len(found_species))) for info in found_species: print(info.name) return # exit(2) # output information for the single matching species assert len(found_species) == 1 species_info = found_species[0] species_name_underscores = species_info.name.replace(' ', '_') species_name_underscores = species_name_underscores.replace('/', '_') species_all_fasta_filename = species_name_underscores + '.fasta' species_all_fasta_file = open( '{}/{}'.format(args.path, species_all_fasta_filename), 'w') log_filename = "mlst_data_download_{}_{}.log".format( species_name_underscores, species_info.retrieved) log_file = open('{}/{}'.format(args.path, log_filename), "w") profile_path = urlparse(species_info.profiles_url).path profile_filename = profile_path.split('/')[-1] log_file.write("definitions: {}\n".format(profile_filename)) log_file.write("{} profiles\n".format(species_info.profiles_count)) log_file.write("sourced from: {}\n\n".format(species_info.profiles_url)) profile_doc = url.urlopen(species_info.profiles_url) profile_file = open('{}/{}'.format(args.path, profile_filename), 'w') profile_file.write(profile_doc.read()) profile_file.close() profile_doc.close() for locus in species_info.loci: locus_path = urlparse(locus.url).path locus_filename = locus_path.split('/')[-1] log_file.write("locus {}\n".format(locus.name)) log_file.write(locus_filename + '\n') log_file.write("Sourced from {}\n\n".format(locus.url)) locus_doc = url.urlopen(locus.url) locus_file = open('{}/{}'.format(args.path, locus_filename), 'w') locus_fasta_content = locus_doc.read() locus_file.write(locus_fasta_content) species_all_fasta_file.write(locus_fasta_content) locus_file.close() locus_doc.close() log_file.write("all loci: {}\n".format(species_all_fasta_filename)) log_file.close() species_all_fasta_file.close()
def _register_agent(self): register_name = self.app.config.get('TRCDASH_REGISTER_AS') if not register_name: register_name = socket.gethostname() url_args = { 'name': register_name, 'port': self.app.config.get('TRCDASH_PORT', self.DEFAULT_PORT), } register_url = '%s/register?%s' % ( self.app.config['TRCDASH_REGISTER_TO'], urllib.urlencode(url_args)) if 'TRCDASH_AUTH_USERNAME' in self.app.config and 'TRCDASH_AUTH_PASSWORD' in self.app.config: auth_handler = urllib3.HTTPBasicAuthHandler() auth_handler.add_password( realm='TRCDash login required', uri=register_url, user=self.app.config['TRCDASH_AUTH_USERNAME'], passwd=self.app.config['TRCDASH_AUTH_PASSWORD']) opener = urllib3.build_opener(auth_handler) urllib3.install_opener(opener) try: urllib3.urlopen(register_url) except urllib3.HTTPError as e: logger.error('Failed to register agent to "%s": %s', register_url, e)
def main(): ### Setup access credentials consumer_key = "your key" consumer_secret = "your secret" ### Get the Access Token bearer_token = "%s:%s" % (consumer_key, consumer_secret) bearer_token_64 = base64.b64encode(bearer_token) http = urllib3.PoolManager() token_request = http.request('GET', "https://api.twitter.com/oauth2/token") token_request.add_header( "Content-Type", "application/x-www-form-urlencoded;charset=UTF-8") token_request.add_header("Authorization", "Basic %s" % bearer_token_64) token_request.data = "grant_type=client_credentials" token_response = urllib3.urlopen(token_request) token_contents = token_response.read() token_data = json.loads(token_contents) access_token = token_data["access_token"] ### Use the Access Token to make an API request timeline_request = urllib3.Request( "https://api.twitter.com/1.1/users/show.json?screen_name=@realself") timeline_request = urllib3.Request( "https://api.twitter.com/1.1/statuses/user_timeline.json?screen_name=realself&count=2" ) timeline_request.add_header("Authorization", "Bearer %s" % access_token) timeline_response = urllib3.urlopen(timeline_request) timeline_contents = timeline_response.read() timeline_data = json.loads(timeline_contents) print(json.dumps(timeline_data, indent=2, sort_keys=True))
def crawlLinks(self, links, pages, file=None): res = [] for link in pages: if shutdown_event.isSet(): return GAME_OVER status_code = 0 # This is due to an error the program will pick up if link != "https://www.linkedin.com/edu/school?id=17968": try: request = build_request(link) f = urlopen(request) status_code = f.code f.close() except (HTTPError, URLError): status_code = HTTPError if status_code == 200: request = build_request(link) f = urlopen(request, timeout=3) xml = f.read() youtubes = self.getYoutube(xml, link) l = len(youtubes['youtube']) for i in range(l): youtubeURL = youtubes['youtube'][i][:-1] if youtubeURL in res: continue res.append(youtubeURL) print(youtubeURL + ", " + youtubes['link']) file.write(youtubeURL + "," + youtubes['link'] + "\n") file.flush() return GAME_OVER
def crawlLinks(self, links, pages, file=None): res = [] for link in pages: if shutdown_event.isSet(): return GAME_OVER status_code = 0 # This is due to an error the program will pick up if link != "https://www.linkedin.com/edu/school?id=17968": try: request = build_request(link) f = urlopen(request) status_code = f.code f.close() except (HTTPError, URLError): status_code = HTTPError if status_code == 200: request = build_request(link) f = urlopen(request, timeout=3) xml = f.read() links = self.getKeyword(xml, link) for i in links['keyword']: if "www.googletagmanager.com/ns.html?id=gtm-nmx8dc" in i: # this is for iframe, if you want to search other one please remove this if statment continue print (i + "," + links['link']) file.write(i + "," + links['link'] + "\n") file.flush() return GAME_OVER
def crawlLinks(self, links, pages, file=None): count = 0 res = [] for link in pages: if shutdown_event.isSet(): return GAME_OVER status_code = 0 # This is due to an error the program will pick up if link != "https://www.linkedin.com/edu/school?id=17968": try: request = build_request(link) f = urlopen(request) status_code = f.code f.close() except (HTTPError, URLError): status_code = HTTPError if status_code == 200: count += 1 print (count) request = build_request(link) f = urlopen(request, timeout=3) xml = f.read() links = self.getKeyword(xml, link) if len(links['keyword'])!=0: print (links['keyword'][0] + "," + links['link']) file.write(links['keyword'][0] + "," + links['link'] + "\n") file.flush() return GAME_OVER
def add_event(): event_name = request.args['name'] location_lat = float(request.args['lat']) if(location_lat > 90 or location_lat < -90): return "bad lattitude" location_lng = float(request.args['lng']) if(location_lng > 180 or location_lat < -180): return "bad longitude" start_time = to_utc(request.args['start']) end_time = to_utc(request.args['end']) if(end_time <= start_time): return "invalid request" event_type = request.args['type'] image_url='https://www.originvietnam.com/file/st_uploadfont/No_Image.jpg' if('image' in request.args): image_url = request.args['image'] try: print urllib3.urlopen(image_url) except: return image_url + " site does not exist" #Add to mongo here... try: event_table.insert({"startTime": start_time, "endTime": end_time, "lat":location_lat, "lng": location_lng, "name":event_name, "votes":0, "comments":[], "type":event_type, "imageLink":image_url}) print event_table.find_one() return "Inserting entry: " + str(event_table.count()) except: return "Could not insert, duplicate entry" return "add_event code here"
def start_server(port): with socketserver.TCPServer(('127.0.0.1', port), TaskServer) as httpd: httpd.handle_request() # We fire a last request at the server in order to take it out of the try: urllib3.urlopen('http://%s:%s/' % (httpd.server_address[1], httpd.server_address[1])) except: # If the server is already shut down, we receive a socket error, # which we ignore. pass httpd.server_close() return task_meta
def get_latest_price(self, target_currency, base_currency): url = CurrentAnalyze.base_coin_market_cap_url.format(base_currency) response = urllib3.urlopen(url) jsonObj = simplejson.load(response) currency_to_usd = float(jsonObj[0]["price_usd"]) target_url = (CurrentAnalyze.base_binance_url + CurrentAnalyze.latest_price).format(target_currency + CurrentAnalyze.symbol_id_map[base_currency]) target_obj = simplejson.load(urllib3.urlopen(target_url)) price_usd = currency_to_usd * float(target_obj['price']) target_obj['price_usd'] = price_usd target_obj['symbol'] = target_currency target_obj['base_currency'] = CurrentAnalyze.symbol_id_map[base_currency] return target_obj
def test_suggest(request,did): event = Event.objects.get(id=did) url = "https://maps.googleapis.com/maps/api/place/nearbysearch/json?location="+event.lat+","+event.lng+"&radius=4000&types=restaurant&key=InsertKeyHere" response = urllib3.urlopen(url).read() json_response = json.loads(response) print(type(json_response)) print(response) results = json_response["results"] hospital_arr = [] for result in results: #if 'hospital' in result["name"] or 'Hospital' in result["name"]: place = Copy.objects.create() place.name = result["name"] place.vicinity = result["vicinity"] place.place_id = result["id"] place.lat = result["geometry"]["location"]["lat"] place.lng = result["geometry"]["location"]["lng"] place.did = did #place.typeof = 'H' types = '' for keyword in result["types"]: types = types + ' ' + keyword place.types = types place.save() hospital_arr.append(place) police_arr = [] url = "https://maps.googleapis.com/maps/api/place/nearbysearch/json?location="+event.lat+","+event.lng+"&radius=4000&types=cafe&key=InsertKeyHere" response = urllib3.urlopen(url).read() json_response = json.loads(response) print(type(json_response)) print(response) results = json_response["results"] for result in results: #if 'police' in result["name"] or 'Police' in result["name"]: place = Center.objects.create() place.name = result["name"] place.vicinity = result["vicinity"] place.place_id = result["id"] place.lat = result["geometry"]["location"]["lat"] place.lng = result["geometry"]["location"]["lng"] place.did = did place.typeof = 'P' types = '' for keyword in result["types"]: types = types + ' ' + keyword place.types = types place.save() police_arr.append(place) return HttpResponse('lol')
def gi2up(gi_accession_number): try: url = 'http://www.uniprot.org/mapping/?from=P_GI&to=ACC&query={}'.format( gi_accession_number) p = urllib3.urlopen(url).read() splitter = 'xml:lang="en"><head><title>' ls = p.split(splitter)[1].split(' ')[0].split(':')[1] url2 = 'http://www.uniprot.org/uniprot/?query=yourlist:{}&sort=yourlist:{}&columns=yourlist%28{}%29,id%2Centry%20name%2Creviewed%2Cprotein%20names%2Cgenes%2Corganism%2Clength%2Cexistence%2Ccomment%28PATHWAY%29%2Cgo%2Cgo%28biological%20process%29%2Cgo%28molecular%20function%29%2Cgo%28cellular%20component%29%2Cgo-id'.format( ls, ls, ls) x = urllib3.urlopen(url2).read() datum = x.split('class="addRemoveColumn mid"')[0].split( '</script></td></tr></thead><tbody><tr ')[1] datum = datum.split('class=') biorec = {} biorec['Entry (Uniprot)'] = datum[5].split( '"entryID"><a href="/uniprot/')[1].split('"')[0] biorec['Entry Name (Uniprot)'] = datum[6].split('>')[1].split('<')[0] biorec['Protein Names'] = datum[11].split('title="')[1].split('"')[0] biorec['Gene Names'] = ''.join(''.join('>'.join( datum[14].split('>')[1:]).split('<strong>')).split( '</strong>')).split('</div>')[0].strip() biorec['Organism'] = ''.join( datum[15].split('">')[2:]).split('</a><')[0] biorec['Length'] = datum[16].split('>')[1].split('<')[0] biorec['Protein Existence'] = datum[17].split('>')[1].split('<')[0] biorec['Pathway'] = datum[18].split('td>')[1].split('<td')[0] go = datum[19].split('<td style=""')[0].split('</a>') biorec['Gene Ontology (GO)'] = [ i.split('>')[-1] for i in go if len(i.split('>')[-1]) > 0 ] go = datum[20].split('<td style=""')[0].split('</a>') biorec['Gene ontology (biological process)'] = [ i.split('>')[-1] for i in go if len(i.split('>')[-1]) > 0 ] #return entryID1, entryID2, protein_name, gene_names, organisms go = datum[21].split('<td style=""')[0].split('</a>') biorec['Gene ontology (molecular function)'] = [ i.split('>')[-1] for i in go if len(i.split('>')[-1]) > 0 ] go = datum[22].split('<td style=""')[0].split('</a>') biorec['Gene ontology (cellular component)'] = [ i.split('>')[-1] for i in go if len(i.split('>')[-1]) > 0 ] go = datum[23].split('<td style=""')[0].split('</a>') biorec['Gene ontology IDs'] = [ i.split('>')[-1] for i in go if ((len(i.split('>')[-1]) > 0) and (i.split('>')[-1] != '<td ')) ] return biorec except: return None
def _send_msg(url, access_token, body): posturl = url + access_token headers = {'Content-Type': 'application/json'} request = urllib3.Request(url=posturl, headers=headers, data=body) response = urllib3.urlopen(request) resp = response.read() print(resp)
def find_interests(self, tweets): interests = {} interests['links'] = [] interests['users'] = [] interests['hashtags'] = [] for tweet in tweets: text = tweet['tweet'] links = re.compile(r'(https.*?)|(http.*?)').findall(text) for link in links: if link[0]: link = link[0] elif link[1]: link = link[1] else: continue try: response = urllib3.urlopen(link) full_link = response.url interests['links'].append(full_link) except Exception: pass interests['users'] += re.compile(r'(@\w+)').findall(text) interests['hashtags'] += re.compile(r'(#\w+)').findall(text) interests['users'].sort() interests['hashtags'].sort() interests['links'].sort() return interests
def main(client, number_of_campaigns, number_of_adgroups, number_of_keywords): # Initialize BatchJobHelper. batch_job_helper = client.GetBatchJobHelper(version='v201806') # Create a BatchJob. batch_job = AddBatchJob(client) # Retrieve the URL used to upload the BatchJob operations. upload_url = batch_job['uploadUrl']['url'] batch_job_id = batch_job['id'] print('Created BatchJob with ID "%d", status "%s", and upload URL "%s"' % ( batch_job['id'], batch_job['status'], upload_url)) # Generate operations to upload. budget_operations = BuildBudgetOperations(batch_job_helper) campaign_operations = BuildCampaignOperations( batch_job_helper, budget_operations, number_of_campaigns) campaign_criterion_operations = BuildCampaignCriterionOperations( campaign_operations) adgroup_operations = BuildAdGroupOperations( batch_job_helper, campaign_operations, number_of_adgroups) adgroup_criterion_operations = BuildAdGroupCriterionOperations( adgroup_operations, number_of_keywords) adgroup_ad_operations = BuildAdGroupAdOperations(adgroup_operations) # Upload operations. batch_job_helper.UploadOperations( upload_url, budget_operations, campaign_operations, campaign_criterion_operations, adgroup_operations, adgroup_criterion_operations, adgroup_ad_operations) # Download and display results. download_url = GetBatchJobDownloadUrlWhenReady(client, batch_job_id) response = urllib3.urlopen(download_url).read() PrintResponse(batch_job_helper, response)
def use_simple_urllib3(): response = urllib3.urlopen(URL_IP) print('>>>>Response Headers:') print(response.info()) print('>>>>R' \ 'esponse body:') print(''.join([line for line in response.readlines()]))
def get(self,symbol,exchange): url = self.prefix+"%s:%s"%(exchange,symbol) u = urllib3.urlopen(url) content = u.read() obj = json.loads(content[3:]) return obj[0]
def testFunctionWeb(): """Benchmarcking function... """ #print p resp = urllib3.urlopen('http://www.i3visio.com') html = resp.read() return
def download_the_av(url): req = urllib.request.Request(url) content = urllib.request.urlopen(req).read() content = content.decode('utf-8') while len(content) < 100: print("try again...") content = urllib3.urlopen(req).read() print("All length:" + str(len(content))) title_begin = content.find("<title>") title_end = content.find("</title>") title = content[title_begin + 7:title_end - 14] title = title.replace('/', '_') title = filter( lambda x: x in "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ _-", title) quality = ['720', '480', '240'] for i in quality: find_position = content.find("\"quality\":\"" + i + "\"") if find_position > 0: print("Quality: " + i + "P") break to_find = content[find_position:find_position + 4000] pattern = re.compile(r"\"videoUrl\":\"[^\"]*\"") match = pattern.search(to_find) if match: the_url = match.group() the_url = the_url[12:-1] #the real url the_url = the_url.replace("\\/", "/") save_file(the_url, big_path + title + ".mp4")
def github_set_commit_status(user, repo, token, sha1, state="success", description="", link=""): #pending, success, error, failure description = description[0:min(len( description), 140)] #github doesn't like too long description data = js.dumps({ 'state': state, 'context': 'default', 'description': description, 'target_url': link }) url = "https://api.github.com/repos/{0}/{1}/statuses/{2}".format( github_user, github_repo, sha1) req = urllib3.Request(url) req.add_header("Authorization", "token {0}".format(token)) req.add_data(data) try: res = urllib3.urlopen(req) result = res.read() except urllib3.HTTPError as e: print("setting github status failed: HTTP error ", e.code) except urllib3.URLError as e: print("setting github status failed: failure ", e.reason)
def get(self, symbol, exchange): url = self.prefix + "%s:%s" % (exchange, symbol) u = urllib3.urlopen(url) content = u.read() obj = json.loads(content[3:]) return obj[0]
def get_first_string(): response = urllib3.urlopen( 'https://gist.githubusercontent.com/jsdario/6d6c69398cb0c73111e49f1218960f79/raw/8d4fc4548d437e2a7203a5aeeace5477f598827d/el_quijote.txt' ) full_text = response.read() text_tokenized = full_text.split(' ') return text_tokenized[0]
def find_interests(tweets): interests = defaultdict(list) for tweet in tweets: text = tweet['tweet'] # Regexp to grab URLs might miss certain types of URL since it's hard # to match all possible URLs with a regexp. But, it is sufficient for # this program. links = re.compile('(http.*?)\Z|(http.*?) ').findall(text) for link in links: if link[0]: link = link[0] elif link[1]: link = link[1] else: continue with suppress(Exception): response = urllib3.urlopen(link) full_link = response.url interests['links'].append(full_link) interests['users'] += re.compile('(@\w+)').findall(text) interests['hashtags'] += re.compile('(#\w+)').findall(text) interests['users'].sort() interests['hashtags'].sort() interests['links'].sort() return interests
def analyze(url): with contextlib.closing(urllib.urlopen(url)) as urlf: url = urlf.geturl() content = urlf.read() doc = etree.HTML(content) title = doc.find('.//h1[@class="title"]').text votes = {} for o in OPTIONS: votes[o] = int(doc.find('.//div[@class="votingresults"]/div[@class="option-' + o + '"]').text.strip('()')) voteCount = sum(votes.values()) decision = sum(OPTIONS[o]['weight'] * votes for o,votes in votes.items()) nextUrl = 'http://besser-studieren.nrw.de' + doc.find('.//a[@class="navigate_next"]').attrib['href'] authorNode = doc.find('.//div[@class="username"]') aNode = authorNode.find('./a') if aNode is not None: author = aNode.text else: author = authorNode.text if author.startswith('verfasst von: '): author = author[len('verfasst von: '):] author = author.strip() return { 'author': author, 'url': url, 'title': title, 'votes': votes, 'nextUrl': nextUrl, }
def post(self): self.values = {"username":"******","password":"******"} data = urllib.urlencode(self.values) url = "https://passport.csdn.net/account/login?from=http://my.csdn.net/my/mycsdn" request = urllib3.Request(url, data) response = urllib3.urlopen(request) print(response.read())
def _get_aws_meta(scene_id: str, path: int, row: int) -> str: meta_url = '{}/{}/{}/{}/{}_MTL.txt'.format(AWS_LS8_URL, path, row, scene_id, scene_id) # TODO update to Python3 meta_data = urllib3.urlopen(meta_url).readlines() return meta_data
def upcoverImage(self, url): image_name = 'binary' data_param = { "csrf": self.csrf_token, 'img_name': image_name } #有些API要求指定文件名参数 # TODO 获取远程网络图片 image_file = io.StringIO(urllib3.urlopen(url).read()) image_data = Image.open(image_file) output = io.BytesIO() image_data.save(output, format='PNG') # format=image_data.format print(image_data.format) #输出的不一定是JPEG也可能是PNG image_data.close() data_bin = output.getvalue() output.close() file_obj = data_bin #fixed at 2017-05-19 10:49:57 img_file = {image_name: file_obj} #Very Important. #the key must be the filename, #because the requests cannot guess the correct filename from the bytes array. data_result = requests.post(url, data_param, files=img_file, headers=self.headers) if isinstance(file_obj, MsgUtil): #这里load_image获得的是二进制流了,不是file对象。 file_obj.close() data_resultJson = demjson.decode(data_result) if data_resultJson['code'] == 0: imageUrl = data_resultJson['data']['url'] else: imageUrl = "https://i0.hdslb.com/bfs/album/1453def5c58b7c52041e4e076a5a853e358a53e1.jpg" return imageUrl
def query(self, address): lat, lng = self.address_to_latlng(address) query_url = 'https://en.wikipedia.org/w/api.php?action=query&list=geosearch&gsradius=5000&gscoord={0}%7C{1}&gslimit=20&format=json'.format( lat, lng) g = urllib3.urlopen(query_url) results = g.read() g.close() data = json.loads(results) places = [] for place in data['query']['geosearch']: name = place['title'] meters = place['dist'] lat = place['lat'] lng = place['lon'] wiki_url = self.wiki_path(name) walking_time = self.meters_to_walking_time(meters) d = { 'name': name, 'url': wiki_url, 'time': walking_time, 'lat': lat, 'lng': lng } places.append(d) return places
def readSiteMap(self): pages = [] try: # f = urlopen("http://www.codepool.biz/sitemap.xml") # Change the link when you need to crawl a different page url = "https://www.usfca.edu/sitemap.xml" maps = self.getLinks(self.getHtml(url)) for map in maps: request = build_request(map) f = urlopen(request, timeout=3) xml = f.read() soup = BeautifulSoup(xml) urlTags = soup.find_all("url") # print(urlTags) print("The number of url tags in sitemap: ", str(len(urlTags))) for sitemap in urlTags: link = sitemap.findNext("loc").text pages.append(link) f.close() except (HTTPError, URLError): print(URLError.code) return pages
def run(self): while True: item = self.queue.get() data = self._data_post(item) try: req = urllib3.Request(url=self.url, data=data) res = urllib3.urlopen(req) except urllib3.HTTPError as e: raise e.reason py_data = json.loads(res.read()) res.close() item['first'] = 'false' item['pn'] = item['pn'] + 1 success = py_data['success'] if success: print("Get success ...") else: print('Get fail') print('pn is : %s' % item['pn']) result = py_data['content']['result'] if len(result) != 0: self.queue.put(item) print("now queue size is : %d" % self.queue.qsize()) self.out_queue.put(py_data['content']['result']) self.queue.task_done()
def get_historical_data(name, number_of_days): data = [] url = "https://finance.yahoo.com/quote/" + name + "/history/" rows = bs( urllib3.urlopen(url).read()).findAll('table')[0].tbody.findAll('tr') for each_row in rows: divs = each_row.findAll('td') if divs[1].span.text != 'Dividend': #Ignore this row in the table #I'm only interested in 'Open' price; For other values, play with divs[1 - 5] data.append({ 'Date': divs[0].span.text, 'Open': float(divs[1].span.text.replace(',', '')) }) return data[:number_of_days] #Test # print get_historical_data('amzn', 15) # https://query1.finance.yahoo.com/v7/finance/download/WFC?period1=1561874153&period2=1593496553&interval=1d&events=history # https://query1.finance.yahoo.com/v7/finance/download/WFC?period1=1561874369&period2=1593496769&interval=1d&events=history # https://query1.finance.yahoo.com/v7/finance/download/AMZN?period1=1561874338&period2=1593496738&interval=1d&events=history # max # https://query1.finance.yahoo.com/v7/finance/download/WFC?period1=76204800&period2=1593388800&interval=1d&events=history # https://query1.finance.yahoo.com/v7/finance/download/VBIV?period1=1031097600&period2=1593388800&interval=1d&events=history
def get_photo_size(url): width = 0 height = 0 if url == '': return width, height try: file = urllib3.urlopen(url, timeout=URL_OPEN_TIME_OUT) p = ImageFile.Parser() while 1: data = file.read(1024) if not data: break p.feed(data) if p.image >= 0: width = p.image.size[0] height = p.image.size[1] file.close() except: print 'get_photo_size error' return width, height
def google(terms): # google <search term> '''Returns the link and the description of the first result from a google search ''' #query = raw_input ( 'Query: ' ) query = terms.text.strip('/wiki').lstrip(' ') print "going to google %s" % query query = urllib.urlencode({'q': query}) response = urllib.urlopen( 'http://ajax.googleapis.com/ajax/services/search/web?v=1.0&' + query).read() json = m_json.loads(response) results = json['responseData']['results'] returnval = "" for result in results: title = result['title'] url = result[ 'url'] # was URL in the original and that threw a name error exception #print ( title + '; ' + url ) title = title.translate({ord(k): None for k in u'<b>'}) title = title.translate({ord(k): None for k in u'</b>'}) returnval += title + ' ; ' + url + '\n' print "returning %s" % returnval return returnval.encode('utf-8')
def httpRequest(): #Function to send the POST request to ThingSpeak channel for bulk update. global messageBuffer data = json.dumps({ 'write_api_key': writeAPIkey, 'updates': messageBuffer }) # Format the json data buffer http = ul.PoolManager() #req = ul.request.urlopen(url = url) req = http.request('GET', url=url) requestHeaders = { "User-Agent": "mw.doc.bulk-update (Raspberry Pi)", "Content-Type": "application/json", "Content-Length": str(len(data)) } for key, val in requestHeaders.iteritems(): # Set the headers req.add_header(key, val) req.add_data(data) # Add the data to the request # Make the request to ThingSpeak try: response = ul.urlopen(req) # Make the request print(response.getcode() ) # A 202 indicates that the server has accepted the request except ul.HTTPError as e: print(e.code) # Print the error code messageBuffer = [] # Reinitialize the message buffer global lastConnectionTime lastConnectionTime = time.time() # Update the connection time
def del1__request(url): #url="http://quote.eastmoney.com/stocklist.html#sz" i_headers = {"User-Agent": "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9.1) Gecko/20090624 Firefox/3.5"} retry =0 MaxRetry=3 header=True while True : try: if header==True: req = urllib3.Request(url, headers=i_headers) response = urllib3.urlopen(req, timeout=10) else: response = urllib3.urlopen(url, timeout=10) #print 'http header:\n', response.info() #print 'http status:\n', response.getcode() #print 'http url:\n', response.geturl() break # except urllib3.HTTPError,e: except Exception as e: if hasattr(e,'code'): print('code:{0}'.format(e.code)) raise urllib3.HTTPError # except urllib3.URLError, e: except Exception as e: if hasattr(e,'reason'): print('reason:{0}'.format(e.reason)) if hasattr(e,'code'): print('code:{0}'.format(e.code)) retry +=1 if retry > MaxRetry: print('More than max %d' % MaxRetry) raise urllib3.URLError else: print('Try request again ...') return response.read()
def getPM25(cityname): site = 'http://www.pm25.com/' + cityname + '.html' html = urllib3.urlopen(site) soup = BeautifulSoup(html) city = soup.find(class_='bi_loaction_city') # 城市名称 aqi = soup.find("a", {"class", "bi_aqiarea_num"}) # AQI指数 quality = soup.select(".bi_aqiarea_right span") # 空气质量等级 result = soup.find("div", class_='bi_aqiarea_bottom') # 空气质量描述 print(city.text + u'AQI指数:' + aqi.text + u'\n空气质量:' + quality[0].text + result.text) print('*' * 20 + ctime() + '*' * 20)
def get_para(wlink): msg = '' try: page_request = urllib3.Request(wlink) page_request.add_header('User-agent', 'Mozilla/5.0') page = urllib3.urlopen(page_request) except IOError: msg = 'No hay articulos en Wikipedia, tal vez quieras buscarlo en Google!' else: msg = wlink return msg
def download_setuptools( version=DEFAULT_VERSION, download_base=DEFAULT_URL, to_dir=os.curdir, delay=15 ): """Download setuptools from a specified location and return its filename `version` should be a valid setuptools version number that is available as an egg for download under the `download_base` URL (which should end with a '/'). `to_dir` is the directory where the egg will be downloaded. `delay` is the number of seconds to pause before an actual download attempt. """ import urllib3 as urllib2 egg_name = "setuptools-%s-py%s.egg" % (version, sys.version[:3]) url = download_base + egg_name saveto = os.path.join(to_dir, egg_name) src = dst = None if not os.path.exists(saveto): # Avoid repeated downloads try: from distutils import log if delay: log.warn(""" --------------------------------------------------------------------------- This script requires setuptools version %s to run (even to display help). I will attempt to download it for you (from %s), but you may need to enable firewall access for this script first. I will start the download in %d seconds. (Note: if this machine does not have network access, please obtain the file %s and place it in this directory before rerunning this script.) ---------------------------------------------------------------------------""", version, download_base, delay, url) from time import sleep sleep(delay) log.warn("Downloading %s", url) src = urllib2.urlopen(url) # Read/write all in one block, so we don't create a corrupt file # if the download is interrupted. data = _validate_md5(egg_name, src.read()) dst = open(saveto, "wb") dst.write(data) finally: if src: src.close() if dst: dst.close() return os.path.realpath(saveto)
def ipCheck(): if ipConfig["ip"] is None: #initialize the ipcheck.yaml file pass http_pool = urllib3.connection_from_url(URL) try: response = urllib3.urlopen(req) except URLError, e: if hasattr(e, 'reason'): print 'We failed to reach a server.' print 'Reason: ', e.reason elif hasattr(e, 'code'): print 'The server couldn\'t fulfill the request.' print 'Error code: ', e.code
def getURLs(url): try: fp = urllib3.urlopen(url) except: print('get url exception') return [] pattern = re.compile('http://[\w\.]+') while True: s = fp.read() if not s: break urls = pattern.findall(s) fp.close() return urls
def downURL(url, filename): try: fp = urllib3.urlopen(url) except: print('download exception') return False op = open(filename, 'wb') while True: s = fp.read() if not s: break op.write(s) fp.close() op.close() return True
def get_url_of_page(url, if_img=False): """ 获取一个页面上的所有链接。 if_img:如果为true,则获取的是页面上的所有图片的链接 """ urls = [] try: f = urllib3.urlopen(url, timeout=3).read() url_listen = URLLister() url_listen.feed(f) if if_img: urls.extend(url_listen.imgs) else: urls.extend(url_listen.urls) except urllib3.URLError, e: print e
def VOGetLines(self,log, source, w_range = [88000,720000]): #w_range is in Mhz c = 299792458.0 log.write('Importing lines in range from %s to %s \n' % (w_range[0], w_range[1])) w_init = c / (int(w_range[0]) * 1000000.0) w_end = c / (int(w_range[1]) * 1000000.0) data = '?REQUEST=queryData&WAVELENGTH=' + \ str(w_init) + '/' + str(w_end) + '&VERB=3' curl = source + data.encode('utf-8') log.write(' -> Downloading lines via %s:\n' % source) log.write(' -> ' + curl + '\n') req = urllib3.Request(curl) response = urllib3.urlopen(req) votable = response.read() location = './votables/customVOTable.xml' f = open(location, 'w') f.write(votable) f.close()
def download(fname, redownload=False): """download a file if redownload=False, the file will not be downloaded if it already exists. """ dest = os.path.join(here, fname) if os.path.exists(dest) and not redownload: return url = 'https://raw.github.com/dpsanders/ipython_extensions/master/section_numbering/' + fname print("Downloading %s to %s" % (url, dest)) filein = urllib3.urlopen(url) fileout = open(dest, "wb") chunk = filein.read(1024) while chunk: fileout.write(chunk) chunk = filein.read(1024) filein.close() fileout.close()
def GetTradeArea(url): headers = {"User-Agent":"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6"} req = urllib3.Request( url, headers = headers) try: content = urllib3.urlopen(req).read() except Exception as e: print(e) return 'error no trade' soup = BeautifulSoup(content) c = soup.findAll('div',class_='relate_stock clearfix') #print c name = soup.find('h1',class_='name').contents[1].contents[0].encode('utf-8') industry = c[1].findAll('li') industry_name = industry[0].contents[0].contents[0].encode('utf-8').strip() #print industry_name #print name area_name = industry[1].contents[0].contents[0].encode('utf-8').strip() return (industry_name, area_name)
def google(terms): # google <search term> '''Returns the link and the description of the first result from a google search ''' #query = raw_input ( 'Query: ' ) query=terms.text.strip('/wiki').lstrip(' ') print "going to google %s" % query query = urllib.urlencode ( { 'q' : query } ) response = urllib.urlopen ( 'http://ajax.googleapis.com/ajax/services/search/web?v=1.0&' + query ).read() json = m_json.loads ( response ) results = json [ 'responseData' ] [ 'results' ] returnval="" for result in results: title = result['title'] url = result['url'] # was URL in the original and that threw a name error exception #print ( title + '; ' + url ) title=title.translate({ord(k):None for k in u'<b>'}) title=title.translate({ord(k):None for k in u'</b>'}) returnval += title + ' ; ' + url + '\n' print "returning %s" %returnval return returnval.encode('utf-8')
def UrlRequest(str_symbol,start,end): #sym=SymbolCheck(symbol) mainurl="http://quotes.money.163.com/service/chddata.html?" #http://quotes.money.163.com/service/chddata.html?code=1000593&start=19960312&end=20150623&fields=TCLOSE;HIGH;LOW;TOPEN;LCLOSE;CHG;PCHG;TURNOVER;VOTURNOVER;VATURNOVER;TCAP;MCAP options="TCLOSE;HIGH;LOW;TOPEN;LCLOSE;CHG;PCHG;TURNOVER;VOTURNOVER;VATURNOVER;TCAP;MCAP" suburl="code=%s&start=%d&end=%d&fields=%s" % (str_symbol, start, end, options) #print mainurl+suburl #header=False header=True testpost=False if testpost == True: url=mainurl user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)' values = {'code' : str_symbol, 'start' : start, 'end' : end, 'fields' : options } headers = { 'User-Agent' : user_agent } else : url=mainurl+suburl i_headers = {"User-Agent": "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9.1) Gecko/20090624 Firefox/3.5"} Debug=False if Debug==True: httpHandler = urllib3.HTTPHandler(debuglevel=1) httpsHandler = urllib3.HTTPSHandler(debuglevel=1) opener = urllib3.build_opener(httpHandler, httpsHandler) urllib3.install_opener(opener) #useipv4=True useipv4=False retry =0 MaxRetry=3 while True : try: headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:52.0) Gecko/20100101 Firefox/52.0' } requests.packages.urllib3.disable_warnings() # print(url) r= requests.get(url, headers=headers, verify=False) r.encoding='UTF-8' page = r.text return page tout=120 if useipv4==True: urlopen_IPv4= urllib3.build_opener(HTTPHandler_IPv4).open response= urlopen_IPv4(url, timeout=tout) break if header==True: if testpost == True: data = urllib3.urlencode(values) print(data) req = urllib3.Request(url, data, headers) else: req = urllib3.Request(url, headers=i_headers) response = urllib3.urlopen(req, timeout=tout) else: response = urllib3.urlopen(url, timeout=tout) break except Exception as e: if hasattr(e,'code'): print('code:{0}'.format(e.code)) # raise urllib3.HTTPError except Exception as e: if hasattr(e,'reason'): print('reason:{0}'.format(e.reason)) if hasattr(e,'code'): print('code:{0}'.format(e.code)) retry +=1 if retry > MaxRetry: print('More than max %d' % MaxRetry) raise urllib3.URLError else: print('Try request again ...') else : pass #print "Down data ok" return response
from bs4 import BeautifulSoup import urllib3 import re import sys from pymongo import MongoClient client = MongoClient("mongodb://*****:*****@kahana.mongohq.com:10009/courier_db") db = client.courier_db base_url = "https://www.studential.com" # url of subjects: https://www.studential.com/personal-statement-examples/subjects page = urllib3.urlopen("https://www.studential.com/personal-statement-examples/subjects") soup = BeautifulSoup(page.read()) chunk=soup.find("div",{"id":"content"}) content=chunk.find("div",{"class":"content"}) # regex to find valid href's: ^/personal-statement-examples/.*-personal-statements$ subjects = content.find_all("a",href = re.compile(r'^/personal-statement-examples/.*-personal-statements$')) # subjects[i]["href"] to access href for a in subjects[:]: subject = re.match(r"^/personal-statement-examples/(.*)-personal-statements$",a["href"]).group(1) print subject subject_page = urllib3.urlopen(base_url+a["href"]) subject_soup = BeautifulSoup(subject_page.read()) statement_chunk = subject_soup.find("div",{"id":"content"}) statements = statement_chunk.find_all("p")
def __request(url): #urlopen_IPv4= urllib3.build_opener(HTTPHandler_IPv4).open #url="http://quote.eastmoney.com/stocklist.html#sz" i_headers = {"User-Agent": "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9.1) Gecko/20090624 Firefox/3.5"} retry =0 MaxRetry=3 #useipv4=True useipv4=False header=True while True : try: headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:52.0) Gecko/20100101 Firefox/52.0' } requests.packages.urllib3.disable_warnings() print(url) r= requests.get(url, headers=headers, verify=False) r.encoding='UTF-8' page = r.text return page if useipv4==True: #response= urlopen_IPv4(url, timeout=3) break if header==True: req = urllib3.Request(url, headers=i_headers) response = urllib3.urlopen(req, timeout=10) else: response = urllib3.urlopen(url, timeout=10) #print 'http header:\n', response.info() #print 'http status:\n', response.getcode() #print 'http url:\n', response.geturl() break # except urllib3.HTTPError,e: except Exception as e: if hasattr(e,'code'): print('code:{0}'.format(e.code)) # raise urllib3.HTTPError # except urllib3.URLError, e: except Exception as e: if hasattr(e,'reason'): print('reason:{0}'.format(e.reason)) if hasattr(e,'code'): print('code:{0}'.format(e.code)) retry +=1 if retry > MaxRetry: print('More than max %d' % MaxRetry) raise urllib3.URLError else: print('Try request again ...') return response.read()
import urllib3 es_dir = '/Users/MarinaFomicheva/Dropbox/workspace/questplusplus/lang_resources/spanish/wmt15_baseline' en_dir = '/Users/MarinaFomicheva/Dropbox/workspace/questplusplus/lang_resources/english/wmt15_baseline' url = 'http://www.quest.dcs.shef.ac.uk/quest_files/truecase-model.es' response = urllib3.urlopen(url) with open(es_dir + '/' + 'truecase-model.es', 'w') as f: f.write(response.read())
def __init__(self, filename, gdalDataset, gdalMetadata, **kwargs): ''' Create VRT ''' ThreddsBase = 'http://thredds.met.no/thredds/dodsC/myocean/siw-tac/siw-metno-svalbard/' # First check if mapper is called with keyword syntax: # filename = metno_hires_seaice:YYYYmmdd keywordBase = 'metno_hires_seaice' foundDataset = False if filename[0:len(keywordBase)] == keywordBase: keywordTime = filename[len(keywordBase)+1:] requestedTime = datetime.strptime(keywordTime, '%Y%m%d') # Search for nearest available file, within the closest 3 days for deltaDay in [0, -1, 1, -2, 2, -3, 3]: validTime = (requestedTime + timedelta(days=deltaDay) + timedelta(hours=15)) filename = (ThreddsBase + validTime.strftime( '%Y/%m/ice_conc_svalbard_%Y%m%d1500.nc')) try: urllib.urlopen(filename + '.dds') foundDataset = True # Data is found for this day break except: # No data for this day pass if not foundDataset: raise WrongMapperError # Then check if a valid OPeNDAP URL is given # (or has been constructed from keyword) if filename[0:len(ThreddsBase)] != ThreddsBase: AttributeError("Not Met.no Svalbard-ice Thredds URL") else: timestr = filename[-15:-3] validTime = datetime.strptime(timestr, '%Y%m%d%H%M') filename = filename + '?ice_concentration[0][y][x]' srcProjection = osr.SpatialReference() srcProjection.ImportFromProj4('+proj=stere lon_0=0.0 +lat_0=90 +datum=WGS84 +ellps=WGS84 +units=km +no_defs') srcProjection = srcProjection.ExportToWkt() # From thredds web, with manual shift srcGeotransform = (-1243.008 - 1, 1, 0, -3190.026 - 7, 0, 1) # create empty VRT dataset with geolocation only self._init_from_dataset_params(3812, 2980, srcGeotransform, srcProjection) metaDict = [{'src': {'SourceFilename': filename, 'sourceBand': 1}, 'dst': {'name': 'sea_ice_area_fraction', 'wkv': 'sea_ice_area_fraction'}}] # Add band self.create_bands(metaDict) # Set time self.logger.info('Valid time: %s', str(validTime)) self.dataset.SetMetadataItem('time_coverage_start', validTime.isoformat())
__author__ = 'sereg' #!/usr/bin/python import string from urllib3 import urlopen u = urlopen("http://python.org") words = {} for line in u.readlines(): line = string.strip(line, " \n") for word in line.split(" "): try: words[word] += 1 except KeyError: words[word] = 1 pairs = words.items() pairs.sort(lambda a, b: b[1]-a[1]) for p in pairs[:10]: print p[0], p[1]