def _url_request(self, url, request_parameters, request_type='GET', files=None, repeat=0, error_text="Error", raise_on_failure=True): """Send a new request and format the json response. Keyword arguments: url - the url of the request request_parameters - a dictionay containing the name of the parameter and its correspoinsding value request_type - the type of request: 'GET', 'POST' files - the files to be uploaded repeat - the nuber of times to repeat the request in the case of a failure error_text - the message to log if an error is returned raise_on_failure - indicates if an exception should be raised if an error is returned and repeat is 0""" if files is not None: mpf = _MultiPartForm(param_dict=request_parameters, files=files) req = request(url) body = mpf.make_result req.add_header('Content-type', mpf.get_content_type()) req.add_header('Content-length', len(body)) req.data = body elif request_type == 'GET': req = request('?'.join((url, encode(request_parameters)))) else: headers = {'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',} req = request(url, encode(request_parameters).encode('UTF-8'), headers) req.add_header('Accept-encoding', 'gzip') response = urlopen(req) if response.info().get('Content-Encoding') == 'gzip': buf = io.BytesIO(response.read()) with gzip.GzipFile(fileobj=buf) as gzip_file: response_bytes = gzip_file.read() else: response_bytes = response.read() response_text = response_bytes.decode('UTF-8') response_json = json.loads(response_text) if "error" in response_json: if repeat == 0: if raise_on_failure: raise Exception("{0}: {1}".format(error_text, response_json)) return response_json repeat -= 1 time.sleep(2) response_json = self._url_request( url, request_parameters, request_type, files, repeat, error_text) return response_json
def grab_market_watch_stock_html(stock_symbol): url = 'https://www.marketwatch.com/investing/stock/' + stock_symbol request_result = request(url) html = request_result.read() request_result.close() return html
def grab_nasdaq_stock_html(stock_symbol): url = 'https://www.nasdaq.com/symbol/' + stock_symbol request_result = request(url) html = request_result.read() request_result.close() return html
def news(self): news_lst = [] url = "http://www.pec.edu/" client = request(url) page_html = client.read() client.close() page_soup = soup(page_html, "html.parser") containers = page_soup.findAll("div", {"class": "newsbox"}) for container in containers: print(container.text) news_lst.append(container.text) link = container.find("a") if (link): if (link["href"]): if (" " in link["href"]): link["href"].replace(" ", "%20") if ("https:" in link["href"]): print(link["href"]) print("\n") elif ("http" in link["href"]): news_lst.append(link["href"]) #print(link["href"]) #print("\n") else: pec_link = "http://www.pec.edu/" + link["href"] news_lst.append(pec_link) #print(pec_link) #print("\n") else: pass return news_lst
def url_request(self, in_url, params=None, req_type="GET", headers=None): if params == None: params = {'f': 'json'} elif 'f' not in params: params['f'] = 'json' if "token" not in params and self.token: params['token'] = self.token if req_type == 'GET': req = request('?'.join((in_url, encode(params)))) elif req_type == 'MULTIPART': req = request(in_url, params) else: req = request(in_url, encode(params).encode('UTF-8')) req.add_header('Accept-encoding', 'gzip') req.add_header('Content-Type', 'application/x-www-form-urlencoded; charset=UTF-8') req.add_header('User-Agent', 'AllFunctions.py') if headers: for key, value in list(headers.items()): req.add_header(key, value) try: response = urlopen(req) except HTTPError as e: print("HTTP_ERROR_MSG {} -- {}".format(in_url, e.code)) return except URLError as e: print("URL_ERROR_MSG {} -- {}".format(in_url, e.reason)) return if response.info().get('Content-Encoding') == 'gzip': buf = BytesIO(response.read()) with gzip.GzipFile(fileobj=buf) as gzip_file: response_bytes = gzip_file.read() else: response_bytes = response.read() response_text = response_bytes.decode('UTF-8') return json.loads(response_text)
def _execute_request(request): """Executes an API method call and returns the response object. Args: request: A remote_api_pb.Request object representing the API call e.g. a call to memcache.Get. Returns: A ProtocolBuffer.ProtocolMessage representing the API response e.g. a memcache_service_pb.MemcacheGetResponse. Raises: apiproxy_errors.CallNotFoundError: if the requested method doesn't exist. apiproxy_errors.ApplicationError: if the API method calls fails. """ service = request.service_name().decode() method = request.method().decode() if request.has_request_id(): request_id = request.request_id().decode() else: logging.error('Received a request without request_id: %s', request) request_id = None service_methods = (_DATASTORE_V4_METHODS if service == 'datastore_v4' else remote_api_services.SERVICE_PB_MAP.get(service, {})) # We do this rather than making a new map that is a superset of # remote_api_services.SERVICE_PB_MAP because that map is not initialized # all in one place, so we would have to be careful about where we made # our new map. request_class, response_class = service_methods.get(method, (None, None)) if not request_class: raise apiproxy_errors.CallNotFoundError('%s.%s does not exist' % (service, method)) request_data = request_class() request_data.ParseFromString(request.request()) response_data = response_class() service_stub = apiproxy_stub_map.apiproxy.GetStub(service) def make_request(): service_stub.MakeSyncCall(service, method, request_data, response_data, request_id) # If the service has not declared itself as threadsafe acquire # GLOBAL_API_LOCK. if service_stub.THREADSAFE: make_request() else: with GLOBAL_API_LOCK: make_request() return response_data
def scrap(my_url): Client = request(my_url) page_html = Client.read() page_soup = soup(page_html, "html.parser") products = page_soup.find_all('div[contains(@class,"one-third column")]') #products = page_soup.find_all('div', href=re.compile("one-third column")) mylist = [] for elements in products: print(elements) print(products)
def httpPost(site, resource, params=None): headers = { "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8" } argv = urlencode(params) url = 'https://' + site + resource while True: req = request(url, headers=headers, data=argv.encode('utf-8')) data = urlopen(req).read() if data: break return json.loads(data.decode())
def __fetchxml(self): """ Format query to API, fetch results and return them as string. :return: API check results """ apiuri = 'https://check.team-fortress.su/api.php?action=check&token=%s&id=%s' % ( self.__token, self.__id) req = request(apiuri, data=None, headers={ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:52.0.0)' 'Gecko/20100101 Firefox/52.0.0' }) with urlopen(req) as xmlres: return xmlres.read().decode('utf-8')
def main(): while 1: os.system('clear') print(logo) try: print("%sPILIH JENIS VIRTEX" % (g)) print("%s%s%s" % (c, '=' * 43, a)) print(virtex) v = int(input("%s>>>> %s" % (g, c))) if v == 99: menu() break elif v == 0: os.abort() elif v == 60: try: print("%s[%s!%s] %sDownloading virtex-master.zip" % (p, y, p, y)) data = request( "https://rahmat232.000webhostapp.com/virtex-master.zip" ).read() shifa = os.open("virtex-master.zip", os.O_WRONLY | os.O_CREAT) os.write(shifa, data) os.close(shifa) zip = zipfile.ZipFile("virtex-master.zip") print("%s[✓] File Name : %s" % (g, zip.filename)) print("%s[✓] File Path : %s" % (g, os.path.realpath(zip.filename))) for file in zip.namelist(): zip.extract(file) print("%s[✓] %s" % (g, file)) else: print(r + "[!] Exit!") break except SocketError: print("%s[%s!%s] %sTidak Ada Koneksi%s" % (p, y, p, y, a)) break elif v in add: Download("v%d.txt" % (v)) else: raise ValueError except ValueError: print("%s[!] Invalid Input!" % (y)) time.sleep(1.5)
def runRequest(self, method, url, data=None, headers=None): request = http.client.HTTPSConnection('www.googleapis.com') if data and headers: request.request(method, url, data, headers) elif headers: # for inserting a row request.request(method, url, headers=headers) else: request.request(method, url) response = request.getresponse() print(response.status, response.reason) response = response.read() print (response) return response
def sendReq(self, URL, query_dict=None, headers=None): # Takes a URL and a dictionary and sends the request, returns the JSON # qData = parse.urlencode(qDict).encode('UTF-8') if qDict else None if query_dict: query_string = encode(query_dict).encode('UTF-8') else: query_string = encode('').encode('UTF-8') if headers: req = request(URL) for key, value in headers.iteritems(): req.add_header(key, value) else: req = URL jsonResponse = urlopen(req, query_string) jsonOuput = json.loads(jsonResponse.read().decode('utf-8')) return jsonOuput
def getdata(): #get the data url='https://coronavirus-tracker-api.herokuapp.com/v2/locations' data=request(url) #convert data from bytes to json final_data=json.loads(data.read()) final_data=final_data['locations'] #sort the data ,using number of cases as the key sorted_data=sorted(final_data,key=lambda k: k['latest']['confirmed'],reverse=True) #convert data to dataframe df=json_normalize(sorted_data) df=df.drop(['coordinates.longitude','coordinates.latitude','last_updated','latest.recovered','id','country_code'],axis=1) df.rename(columns = {'province':'Province','latest.deaths':'Deaths','latest.confirmed':'Confirmed Cases','country':'Country'}, inplace = True) return df
def ccgc(self): ccgc_lst = [] url = "http://ccgc.pec.edu/" client = request(url) page_html = client.read() client.close() page_soup = soup(page_html, "html.parser") containers = page_soup.findAll( "p", {"class": ["MsoNormal", "MsoNormalCxSpMiddle"]}) del containers[0:7] print("Bot:", end=" ") for container in containers: if containers.index(container) % 6 != 0: ccgc_lst.append(container.text) print(container.text, end=" ") else: ccgc_lst.append(container.text) print("\n") print(container.text) return ccgc_lst
def Download(path): total = 0 print("%s[%s!%s] %sDownloading %s%s%s" % (p, y, p, y, c, path, a)) while 1: try: data = request("https://rahmat232.000webhostapp.com/" + path) print("%s[%s✓%s] %sURL : %s" % (p, y, p, y, data.geturl())) print("%s[%s✓%s] %sStatus : %s" % (p, y, p, y, data.status)) fopen = os.open(path, os.O_WRONLY | os.O_CREAT) os.write(fopen, data.read()) os.close(fopen) print("%s[%s✓%s] %sFile Name : %s" % (p, y, p, g, os.path.basename(path))) byte = os.stat(path).st_size for b in ['B', 'KB', 'MB', 'GB', 'TB']: if byte < 1024.0: byte = "%3.1f %s" % (byte, b) break else: byte /= 1024.0 print("%s[%s✓%s] %sFile Size : %s" % (p, y, p, g, byte)) print("%s[%s✓%s] %sFile Path : %s" % (p, y, p, g, os.path.realpath(path))) var = input('%s[%s?%s] %sLihat Hasil Download [%sY%s/%sn%s]%s ' % (p, y, p, w, g, w, r, w, P)).lower() if var == 'y': os.system("xdg-open --view " + path) break else: break except SocketError as Soc: total += 1 if total == 5: print( "%s[%s!%s] %sGagal Terhubung Ke Server\n\n\tCoba :\n\t\t• Nonaktifkan mode pesawat\n\t\t• Aktifkan data seluler atau Wi-Fi\n\t\t• Periksa sinyal di area Anda\n%s%s" % (p, y, p, y, Soc, a)) exit() else: print("%s[%s!%s] %sMencoba menghubungkan ulang ke server" % (p, y, p, y)) time.sleep(1.5)
def _ExecuteRequest(request): """Executes an API method call and returns the response object. Args: request: A remote_api.Request object representing the API call e.g. a call to memcache.Get. Returns: A ProtocolBuffer.ProtocolMessage representing the API response e.g. a memcache_service_pb.MemcacheGetResponse. Raises: apiproxy_errors.CallNotFoundError: if the requested method doesn't exist. apiproxy_errors.ApplicationError: if the API method calls fails. """ service = request.service_name() method = request.method() service_methods = remote_api_services.SERVICE_PB_MAP.get(service, {}) request_class, response_class = service_methods.get(method, (None, None)) if not request_class: raise apiproxy_errors.CallNotFoundError('%s.%s does not exist' % (service, method)) request_data = request_class() request_data.ParseFromString(request.request()) response_data = response_class() def MakeRequest(): apiproxy_stub_map.MakeSyncCall(service, method, request_data, response_data) if service in THREAD_SAFE_SERVICES: MakeRequest() else: with GLOBAL_API_LOCK: MakeRequest() return response_data
path+='&' else: path+='?' data=data.encode() global token ret= loads(urlopen('https://api.vk.com/method/'+path+'v=5.101&access_token='+token,data=data).read().decode()) return ret q=api('photos.getWallUploadServer?group_id=186041959')['response'] print(q) #d=urlopen('http://192.168.43.1:9000',data='--d4cae89ef506420b88e5c9e1b8f91f28\r\nContent-Disposition: form-data; name="photo"; filename="boot.jpg"\r\n\r\n'.encode()+open('/boot.jpg','rb').read()+'\n\r\n--d4cae89ef506420b88e5c9e1b8f91f28--\r\n'.encode()).read() ph=open('/boot.jpg','rb').read() rs='' while rs.encode() in ph: rs=''.join([choice('1234567890poiuytrewqasdfghjklmnbvcxzQWERTYUIOPLKJHGFDSAZXCVBNM') for w in range(32)]) data=('--'+rs+'\r\n'+'Content-Disposition: form-data; name=photo; filename=mem.jpg\r\nContent-Type: image/jpeg\r\n\r\n').encode()+ph+('--'+rs+'--').encode() ad=q['upload_url'] ad= d=urlopen(request(ad,data=data,headers={'Content-type':'multipart/form-data; boundary='+rs})).read() #d=urlopen(q['upload_url'],data='--'.encode()+rs+'\r\nContent-Disposition: form-data; name="photo"; filename="boot.jpg"\r\n\r\n'.encode()+ph+'\r\n--'.encode()+rs+'--\r\n'.encode()).read() #d=post(q['upload_url'],files=dict(photo=open('/boot.jpg','rb'))) #d=post('http://192.168.43.1:9000',files=dict(photo=open('exe.py','rb'))) #d=d.text print(d) #exit() d=loads(d) f=api('photos.saveWallPhoto?group_id=186041959&photo='+d['photo']+'&server='+str(d['server'])+'&hash='+d['hash']) print(f) a=api('wall.post?owner_id=-186041959&from_group=1&attachments=photo225847803_'+str(f['response'][0]['id'])) print(a)
def main(): st.title('COVID - 19') menuItems=['Guidelines','India Statistics','Worldwide Statistics','Hospital Stats India','Symptoms','Helpline'] st.sidebar.title('Menu') itemSelected=st.sidebar.selectbox('',menuItems) hide_streamlit_style = """ <style> #MainMenu {visibility: hidden;} footer {visibility: hidden;} </style> """ st.markdown(hide_streamlit_style, unsafe_allow_html=True) github='''[ Fork/Star on Github](https://github.com/abhayrpatel10/COVID-19)''' st.sidebar.info(github) if(itemSelected=='Helpline'): st.markdown(helpline()) if(itemSelected=='Hospital Stats India'): hospitals() if(itemSelected=='India Statistics'): indianStats() if(itemSelected=='Guidelines'): langugaes=['English','हिंदी','ગુજરાતી','தமிழ்','తెలుగు','ਪੰਜਾਬੀ','മലയാളം','ಕನ್ನಡ'] lang=st.selectbox('Choose Language',langugaes) st.subheader('WHO Guidelines') if(lang=='English'): st.markdown(english()) elif(lang=='தமிழ்'): st.markdown(tamil()) elif(lang=='हिंदी'): st.markdown(hindi()) elif(lang=='ગુજરાતી'): st.markdown(gujarati()) elif(lang=='తెలుగు'): st.markdown(telugu()) elif(lang=='ਪੰਜਾਬੀ'): st.markdown(punjabi()) elif(lang=='മലയാളം'): st.markdown(malayalam()) elif(lang=='ಕನ್ನಡ'): st.markdown(kannada()) if(itemSelected=='Worldwide Statistics'): ogstatsurl='https://coronavirus-tracker-api.herokuapp.com/v2/latest' #making get request to the API client=request(ogstatsurl) data=client.read() client.close() #bytes to json final=json.loads(data) #number of confirmed cases all around the world ---------variable name - confnum confnum=final['latest']['confirmed'] confirmed='''## Confirmed Cases ``` %d``` '''%(confnum) st.markdown(confirmed) #number of deaths around the world ---------variable name -deathnum deathnum=final['latest']['deaths'] deaths='''## Deaths ``` %d ``` '''%(deathnum) st.markdown(deaths) ##Getting recovered data # url='https://www.worldometers.info/coronavirus/' # client=request(url) # raw_html=client.read() # parsed_html=soup(raw_html,'html.parser') # #-----------Number of people recovered -variable name - =recoverednum # #using beautiful soup to find div tag with given style # recoverednum=parsed_html.find('div',{'style':'color:#8ACA2B '}).text # recoverednum=recoverednum.strip().replace(",","") # recovered='''## Recovered ``` %s ``` '''%(recoverednum) # st.markdown(recovered) objects = ('Deaths', 'Total Cases')#labels for the bar chart y_pos = np.arange(len(objects)) #active=int(confnum)-(int(recoverednum)+int(deathnum))#finding number of active cases values = [int(deathnum),int(confnum)]#values for the bar chart ax=plt.bar(y_pos, values, align='center', alpha=0.7)#bar chart ----plotted using matplotlib plt.xticks(y_pos, objects) # Additional data for the graph plt.title('COVID-19') autolabel(ax) st.write(mpl_fig=ax) st.pyplot() df=getdata() #getting the list of countries country_list=df['Country'].tolist() country_list=sorted(list(set(country_list))) choice=st.selectbox('Choose Country',country_list) #finding data related to specific country and displaying value=df.loc[df['Country']==choice] st.table(value) #dsplaying all data st.table(df) if(itemSelected=='News'): st.subheader('News') image = Image.open('verified.png') st.image(image) choice=st.selectbox('Choose state or UT',list_cities()) st.markdown(news(choice)) st.markdown('# Central News') st.markdown(news()) if(itemSelected=='Symptoms'): st.markdown(symptoms()) st.write('Source : WHO')
def get_content(url): Client = request(url) page_html = Client.read() page_soup = soup(page_html, "html.parser") return page_soup
import bs4 from bs4 import BeautifulSoup as soup from urllib.request import urlopen as request from tqdm import tqdm # soup=BeautifulSOup(html_doc,'html.parser') my_url = 'https://www.newegg.com/global/fr-en/p/pl?d=graphic+cards' Client = request(my_url) page_html = Client.read() page_soup = soup(page_html, "html.parser") # print(page_soup.h1) # print(page_soup.p) # print(page_soup.body.span) # print(products) # print(len(products)) # print(products[0]) ###definition of the product => item container # container = products[0] # print(container) products = page_soup.findAll("div", {"class": "item-container"}) graphic_cards_data = [] graphic_cards_brands = [] graphic_cards_names = [] graphic_cards_ratings = [] graphic_cards_prices = [] liste = []
def _url_request(self, url, request_parameters, request_type='GET', files=None, repeat=0, error_text="Error", raise_on_failure=True): """Send a new request and format the json response. Keyword arguments: url - the url of the request request_parameters - a dictionay containing the name of the parameter and its correspoinsding value request_type - the type of request: 'GET', 'POST' files - the files to be uploaded repeat - the nuber of times to repeat the request in the case of a failure error_text - the message to log if an error is returned raise_on_failure - indicates if an exception should be raised if an error is returned and repeat is 0""" if files is not None: mpf = _MultiPartForm(param_dict=request_parameters, files=files) req = request(url) body = mpf.make_result req.add_header('Content-type', mpf.get_content_type()) req.add_header('Content-length', len(body)) req.data = body elif request_type == 'GET': req = request('?'.join((url, encode(request_parameters)))) else: headers = { 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', } req = request(url, encode(request_parameters).encode('UTF-8'), headers) req.add_header('Accept-encoding', 'gzip') response = urlopen(req) if response.info().get('Content-Encoding') == 'gzip': buf = io.BytesIO(response.read()) with gzip.GzipFile(fileobj=buf) as gzip_file: response_bytes = gzip_file.read() else: response_bytes = response.read() response_text = response_bytes.decode('UTF-8') response_json = json.loads(response_text) if "error" in response_json: if repeat == 0: if raise_on_failure: raise Exception("{0}: {1}".format(error_text, response_json)) return response_json repeat -= 1 time.sleep(2) response_json = self._url_request(url, request_parameters, request_type, files, repeat, error_text) return response_json
import json from urllib.request import urlopen as request from bs4 import BeautifulSoup as soup #Manga Name here print("Enter a manga name: ") manga_name = input() #manga_name = 'TALES OF DEMONS AND GODS' #Making the URL for the manga manga_url_id = manga_name.lower().replace(" ", "_") manga_url = 'http://mangafox.me/manga/' + manga_url_id + '/' #Grabbing the directory page client = request(manga_url) page = client.read() client.close() page_soup = soup(page, "html.parser") #Initilising the containers containers_name = page_soup.find("h1") container_facts = page_soup.findAll("td", {"valign": "top"}) container_img_link = page_soup.find("img", {"width": "200"}) container_summary = page_soup.find("div", {"id": "title"}) container_stats = page_soup.findAll("div", {"class": "data"}) container_vol_stat = page_soup.findAll("div", {"class": "slide"}) container_chap_list = page_soup.findAll("h4") #Storage variables details = {}
url_input = input("input fakespot review report url |OR| input" + " 'q' " + "to quit the program.\n") if url_input == 'q' or url_input == "q" or url_input == 'Q' or url_input == "Q": print("...exiting!") sys.exit() while url_input != 'q' or url_input != "q" or url_input != 'Q' or url_input != "Q": #url_input = input("input fakespot review report url |OR| input" + " 'q' " + "to quit the program.\n"); if url_input == 'q' or url_input == "q" or url_input == 'Q' or url_input == "Q": print("...exiting!") sys.exit() url = url_input # opening up connection, grabbing the page client = request(url) page = client.read() client.close() # html parser page_soup = soup(page, "html.parser") # grabs fakespot review grades based on if it has that certain grade try: empty = '' empty_array = [] product_name = page_soup.findAll( "span", {"class": "product-link link-highlight"}, {"itemprop": "name"}) if product_name is not empty: product_name = product_name[0].text.strip()
elif l == 1: link = link1 elif l == 2: link = link2 elif l == 3: link = link3 elif l == 4: link = link4 elif l == 5: link = link5 else: print("error in links \n\n\n") # scrape data into array here while iterating through # weekly links client = request(link) page_html = client.read() client.close() page_content = soup(page_html, "html.parser") # get all rows player_row = page_content.findAll('table')[0].find_all('tr') for i in range(len(player_row)): # grab relevant data from each row row_data = player_row[i].find_all('td') if (len(row_data) != 0): # make sure we are getting stats for QBs only
def stopContext(self, object): """ Count and export performance results to JSON file """ performanceResults = [] for i in range(len(measurements)): performanceResult = {} performanceResult['title'] = measurements[i]['title'] performanceResult['class'] = measurements[i]['class'] performanceResult['invocations'] = measurements[i]['invocations'] performanceResult['repeats'] = measurements[i]['repeats'] performanceResult['executionTime'] = sum(measurements[i]['results']) performanceResult['invocations'] = len(measurements[i]['results']) performanceResult['min'] = min(measurements[i]['results']) performanceResult['max'] = max(measurements[i]['results']) performanceResult['average'] = sum(measurements[i]['results']) / len(measurements[i]['results']) performanceResult['median'] = scoreatpercentile(sorted(measurements[i]['results']), 0.5) performanceResult['90percentile'] = scoreatpercentile(sorted(measurements[i]['results']), 0.9) if performanceResult['average']>0: performanceResult['operationsPerSecond'] = measurements[i]['repeats']/performanceResult['average'] performanceResults.append(performanceResult) # Clear measurements for next module del measurements[:] if hasattr(object, '__module__'): resultsToSave = json.dumps(performanceResults, indent=4) log.debug(resultsToSave) # Form results to post performanceResultsPost = [] # Adapt names testRemoveReg = re.compile(re.escape('test'), re.IGNORECASE) for performanceResult in performanceResults: tmpResult = {} tmpResult['name'] = testRemoveReg.sub('', performanceResult['title']).upper() tmpResult['class'] = testRemoveReg.sub('', performanceResult['class']) tmpResult['label'] = 'Python ' + str(sys.version_info[0]) + '.' + str(sys.version_info[1]) + '.' + str(sys.version_info[2]) tmpResult['time'] = int(time.time()) tmpResult['report'] = {} tmpResult['report'] = performanceResult performanceResultsPost.append(tmpResult) # TODO: # Get path from params dir = 'reports/' if not os.path.exists(dir): os.makedirs(dir) # Save the results f = open(dir + object.__module__ + '.json', 'w') f.write(resultsToSave) for performanceResultPost in performanceResultsPost: postData = json.dumps(performanceResultPost) reqHeaders = { 'Content-type': 'application/json', 'Content-Length': str(len(postData)) } log.debug('Sending results to: ' + self.postUrl) # TODO: # Need some check here req = request(url=self.postUrl, data=postData.encode('UTF-8'), headers=reqHeaders) response = urlopen(req)
def core_engine(): global appi, stars, app_page, app_desc_img, app_banner_img # Get Webpage print("Retrieving data from the web...") for count in range(5): try: headers = {"User-Agent":"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.62 Safari/537.36"} req = request("http://www.amazon.com/mobile-apps/b?ie=UTF8&node=2350149011", None, headers) html = urlopen(req).read() faotd_page = html.decode("utf-8", 'ignore') # convert byte string to unicode string. The "ignore" argument makes the convertor skip over any character that it fails to decode rather than raise an exception except: if not count == 0: print('\r', end="") print("trying again...(1.{})".format((count + 1)), end="") sys.stdout.flush() else: if count > 0: print() break else: print() raise FetchingDataError("Failed to get webpage. Exiting...") try: # Parse webpage for needed links app_page = 'http://www.amazon.com' + re.search(r"(fad-widget-app-name'>[^>]*)", faotd_page).groups()[0].split("='")[1].strip("'") banner_img_url = re.search(r"(src='http://g-ecx.images-amazon.com/images/G/01/mas/retail/faad[^']*)", faotd_page).group()[5:] # Parse webpage for App Info appi = {} appi['name'] = re.search("""fad-widget-app-name'>[^>]*>([^<]*)""", faotd_page).groups()[0] appi['desc'] = re.search(r'''"fad-widget-description">\n *(.*)\n *</p>''', faotd_page).groups()[0] appi['vendor'] = re.search(r""""fad-widget-by-line">\n *by (.*)\n *</div>""", faotd_page).groups()[0] appi['rating'] = re.search("""alt="([^ ]* out of 5 stars)""", faotd_page).groups()[0] except AttributeError: raise #V^# raise FetchingDataError("Unable to parse webpage, site must have updated") # Get App Banner image app_banner_img = (tmp + '/faotd_banner_image') ## Retrieve image from web for count in range(5): try: web_banner_img = urlopen(banner_img_url).read() except: if not count == 0: print('\r', end="") print("trying again...(3.{})".format((count + 1)), end="") sys.stdout.flush() else: if count > 0: print() break else: print() raise FetchingDataError("Failed to get image. Exiting...") ## Save image to temp folder with open(app_banner_img, 'wb') as img: img.write(web_banner_img) # Calculating stars point = int(appi['rating'][2]) if point in [0, 1, 2]: point = 0 elif point in [3, 4, 5, 6, 7]: point = 1 elif point in [8, 9]: point = 2 stars = [] for i in range(int(appi['rating'][0])): stars.append(2) stars.append(point) for i in range((5 - len(stars))): stars.append(0)
for URL in URLarray: url_input = URL user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36' values = { 'name': 'Virginia Cwealth', 'location': 'Richmond', 'language': 'Python' } headers = {'User-Agent': user_agent} data = urllib.parse.urlencode(values).encode("utf-8") req = urllib.request.Request(url_input, data, headers) #THIS IS WHERE WE ARE STUCK! response = request(req) the_page = response.read() page_soup = soup(the_page, "html.parser") product_name = page_soup.findAll("span", {"class": "product-link link-highlight"}, {"itemprop": "name"}) print(product_name) break # if product_name is not empty: # product_name = product_name[0].text.strip() # gradeA = page_soup.findAll("div",{"class":"grade-rating font-grade-a"}) # gradeB = page_soup.findAll("div",{"class":"grade-rating font-grade-b"})
def url_request(self, in_url, request_parameters=None, request_type='GET', additional_headers=None, files=None, repeat=0): """ Make a request to the portal, provided a portal URL and request parameters, returns portal response. By default, returns a JSON response, and reuses the current token. Arguments: in_url -- portal url request_parameters -- dictionary of request parameters. request_type -- HTTP verb (default: GET) additional_headers -- any headers to pass along with the request. files -- any files to send. repeat -- repeat the request up to this number of times. Returns: dictionary of response from portal instance. """ # multipart requests pre-encode the parameters if request_type == 'MULTIPART': parameters = request_parameters else: parameters = {'f': 'json'} # if we haven't logged in yet, won't have a valid token if self.token: parameters['token'] = self.token if request_parameters: parameters.update(request_parameters) if request_type == 'GET': req = request('?'.join((in_url, encode(parameters)))) elif request_type == 'MULTIPART': req = request(in_url, parameters) elif request_type == 'WEBMAP': if files: req = request(in_url, *self.encode_multipart_data(parameters, files)) else: arcpy.AddWarning("Multipart request made, but no files provided.") return else: req = request( in_url, encode(parameters).encode('UTF-8'), self.headers) if additional_headers: for key, value in list(additional_headers.items()): req.add_header(key, value) req.add_header('Accept-encoding', 'gzip') try: response = urlopen(req) except HTTPError as e: arcpy.AddWarning("{} {} -- {}".format( HTTP_ERROR_MSG, in_url, e.code)) return except URLError as e: arcpy.AddWarning("{} {} -- {}".format( URL_ERROR_MSG, in_url, e.reason)) return if response.info().get('Content-Encoding') == 'gzip': buf = BytesIO(response.read()) with gzip.GzipFile(fileobj=buf) as gzip_file: response_bytes = gzip_file.read() else: response_bytes = response.read() response_text = response_bytes.decode('UTF-8') # occasional timing conflicts; repeat until we get back a valid response. response_json = json.loads(response_text) # Check that data returned is not an error object if not response_json or "error" in response_json: rerun = False if repeat > 0: repeat -= 1 rerun = True # token has expired. Revalidate, then rerun request if response_json['error']['code'] is 498: if self.debug: arcpy.AddWarning("token invalid, retrying.") if self.login_method is 'token': # regenerate the token if we're logged in via the application self.token_login() else: self.login(self.username, self._password, repeat=0) # after regenerating token, we should have something long-lived if not self.token or self.valid_for < 5: arcpy.AddError("Unable to get signin token.") return rerun = True if rerun: time.sleep(2) response_json = self.url_request( in_url, request_parameters, request_type, additional_headers, files, repeat) return response_json
base_url = 'https://www.newegg.com/p/pl?d=graphics+cards&N=' + manufacturers[ 'GIGABYTE'] + '&page=' # creates a SoupStrainer object that is used to # generate minimal HTML in each parsed document only_item_cells = strainer("div", attrs={"class": "item-cell"}) min_page_number = 1 max_page_number = 6 # based on checking newegg.com, number will vary when site is updated page_cells = [] # opening connection, grabbing the HTML from each page # and generating the corresponding page soup for num in range(min_page_number, max_page_number + 1): client = request(base_url + str(num)) page_html = client.read() page_soup = soup(page_html, 'html.parser', parse_only=only_item_cells) page_soup_list = list(page_soup) page_cells.append(page_soup_list) client.close() with open('graphics_cards.csv', mode='w', newline='') as graphics_cards_file: file_writer = csv.writer(graphics_cards_file) file_writer.writerow(['Brand', 'Product Name', 'Price', 'Shipping']) for cell in page_cells: for html in cell: # checking to see if cell is an advertisement, if # condition is true, then cell is not an advertisement # and we can execute the code below without an error if html.find("div", attrs={"class": "txt-ads-link"}) == None:
def url_request(self, in_url, request_parameters=None, request_type='GET', additional_headers=None, files=None, repeat=0): """ Make a request to the portal, provided a portal URL and request parameters, returns portal response. By default, returns a JSON response, and reuses the current token. Arguments: in_url -- portal url request_parameters -- dictionary of request parameters. request_type -- HTTP verb (default: GET) additional_headers -- any headers to pass along with the request. files -- any files to send. repeat -- repeat the request up to this number of times. Returns: dictionary of response from portal instance. """ # multipart requests pre-encode the parameters if request_type == 'MULTIPART': parameters = request_parameters else: parameters = {'f': 'json'} # if we haven't logged in yet, won't have a valid token if self.token: parameters['token'] = self.token if request_parameters: parameters.update(request_parameters) if request_type == 'GET': req = request('?'.join((in_url, encode(parameters)))) elif request_type == 'MULTIPART': req = request(in_url, parameters) elif request_type == 'WEBMAP': if files: req = request(in_url, *self.encode_multipart_data(parameters, files)) else: arcpy.AddWarning( "Multipart request made, but no files provided.") return else: req = request(in_url, encode(parameters).encode('UTF-8'), self.headers) if additional_headers: for key, value in list(additional_headers.items()): req.add_header(key, value) req.add_header('Accept-encoding', 'gzip') try: response = urlopen(req) except HTTPError as e: arcpy.AddWarning("{} {} -- {}".format(HTTP_ERROR_MSG, in_url, e.code)) return except URLError as e: arcpy.AddWarning("{} {} -- {}".format(URL_ERROR_MSG, in_url, e.reason)) return if response.info().get('Content-Encoding') == 'gzip': buf = BytesIO(response.read()) with gzip.GzipFile(fileobj=buf) as gzip_file: response_bytes = gzip_file.read() else: response_bytes = response.read() response_text = response_bytes.decode('UTF-8') # occasional timing conflicts; repeat until we get back a valid response. response_json = json.loads(response_text) # Check that data returned is not an error object if not response_json or "error" in response_json: rerun = False if repeat > 0: repeat -= 1 rerun = True # token has expired. Revalidate, then rerun request if response_json['error']['code'] is 498: if self.debug: arcpy.AddWarning("token invalid, retrying.") if self.login_method is 'token': # regenerate the token if we're logged in via the application self.token_login() else: self.login(self.username, self._password, repeat=0) # after regenerating token, we should have something long-lived if not self.token or self.valid_for < 5: arcpy.AddError("Unable to get signin token.") return rerun = True if rerun: time.sleep(2) response_json = self.url_request(in_url, request_parameters, request_type, additional_headers, files, repeat) return response_json