def get_html_str(url): res_obj=request.urlopen(url) html_byte=res_obj.read() if isinstance(html_byte,bytes): return html_byte.decode("utf-8") elif isinstance(html_byte,str): return html_byte
def update(self, rowid, new_values): url = self.endpoint_url + 'subscribers/' + self.list_id + '/subscribe.json?' + 'apikey=' + self.api_key raw_params = { "Name": new_values.get("Name"), "Email": new_values.get("Email"), "HasExternalDoubleOptIn": True, "CustomFields": [ str(k) + "=" + str(new_values.get(k, None)) for k in self.custom_fields if new_values.get(k, None) is not None ] } params = json.dumps(raw_params) log_to_postgres(params, DEBUG) headers = { "Content-Type": "application/json", "Accept": "application/json" } request = Request(url, data=params, headers=headers) response = urlopen(request) result = json.loads(response.read()) log_to_postgres(result, DEBUG) if result["Code"] != 0: log_to_postgres("MoosendFDW: " + result["Error"], ERROR) return None return {c: self.col(c, result["Context"]) for c in self.columns}
def get_page(reg_url): headers = {'User-Agent': 'Mozilla/5.0 (Window NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2228.0 Safari/537.3'} req = Request(url=reg_url, headers=headers) if req is not None : html = urlopen(req).read() if html is not None : page = BeautifulSoup(html, 'lxml') return (page)
def fetch_page(self, page_num): response = urlopen(self.endpoint_url + 'lists/' + self.list_id + '/subscribers/Subscribed.json?' + 'apikey=' + self.api_key + '&Page=' + str(page_num) + '&PageSize=' + str(self.page_size)) results = json.loads(response.read()) log_to_postgres(results, DEBUG) if results["Code"] != 0: log_to_postgres("MoosendFDW: " + results["Error"], ERROR) return (None, None) return (results["Context"], results["Context"]["Paging"]["TotalPageCount"])
def delete(self, rowid): url = self.endpoint_url + "subscribers/" + self.list_id + "/remove.json?apikey=" + self.api_key raw_params = {"Email": rowid} params = json.dumps(raw_params) log_to_postgres(params, DEBUG) headers = { "Content-Type": "application/json", "Accept": "application/json" } request = Request(url, data=params, headers=headers) response = urlopen(request) results = json.loads(response.read()) log_to_postgres(results, DEBUG) if results["Code"] != 0: log_to_postgres("MoosendFDW: " + results["Error"], ERROR)
def query(query, useragent='python-duckduckgo '+str(__version__), safesearch=True, html=False, meanings=True, **kwargs): """ Query DuckDuckGo, returning a Results object. Here's a query that's unlikely to change: >>> result = query('1 + 1') >>> result.type 'nothing' >>> result.answer.text '1 + 1 = 2' >>> result.answer.type 'calc' Keword arguments: useragent: UserAgent to use while querying. Default: "python-duckduckgo %d" (str) safesearch: True for on, False for off. Default: True (bool) html: True to allow HTML in output. Default: False (bool) meanings: True to include disambiguations in results (bool) Any other keyword arguments are passed directly to DuckDuckGo as URL params. """ % __version__ safesearch = '1' if safesearch else '-1' html = '0' if html else '1' meanings = '0' if meanings else '1' params = { 'q': query, 'o': 'json', 'kp': safesearch, 'no_redirect': '1', 'no_html': html, 'd': meanings, } params.update(kwargs) encparams = urlencode(params) url = 'http://api.duckduckgo.com/?' + encparams request = Request(url, headers={'User-Agent': useragent}) response = urlopen(request) json = j.loads(response.read()) response.close() return Results(json)
def main(): sortby = 1 # Magnitude = 0, Place = 1, Distance = 2 # Using data feed from the USGS # quakeData = "http://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/2.5_day.geojson" # quakeData = "https://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/4.5_week.geojson" quakeData = "https://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/2.5_month.geojson" # Open the URL and read the data, call printResults() to format and output try: webUrl = urlopen(quakeData) except: print ("Error opening: {}".format(quakeData)) else: if (webUrl.getcode() == 200): data = webUrl.read() start = timer() printResults(data, sortby) print ("Processed data in {:2.3f} seconds".format(timer() - start)) else: print ("Error from USGS server, cannot retrieve data " + str(webUrl.getcode()))
def __create_remote_webdriver_from_config(self, testname=None): ''' Reads the config value for browser type. ''' desired_capabilities = self._generate_desired_capabilities(testname) remote_url = self._config_reader.get( WebDriverFactory.REMOTE_URL_CONFIG) # Instantiate remote webdriver. driver = webdriver.Remote( desired_capabilities=desired_capabilities, command_executor=remote_url ) # Log IP Address of node if configured, so it can be used to # troubleshoot issues if they occur. log_driver_props = \ self._config_reader.get( WebDriverFactory.LOG_REMOTEDRIVER_PROPS, default_value=False ) in [True, "true", "TRUE", "True"] if "wd/hub" in remote_url and log_driver_props: try: grid_addr = remote_url[:remote_url.index("wd/hub")] info_request_response = urlopen( grid_addr + "grid/api/testsession?session=" + driver.session_id, "", 5000) node_info = info_request_response.read() _wtflog.info( u("RemoteWebdriver using node: ") + u(node_info).strip()) except: # Unable to get IP Address of remote webdriver. # This happens with many 3rd party grid providers as they don't want you accessing info on nodes on # their internal network. pass return driver
# -*- coding: utf-8 -*- """ Created on Wed Aug 8 11:36:39 2018 @author: SilverDoe """ try: from urllib2.request import urlopen # python 2.7 except: print("urllib2 doesn't exist") from urllib.request import urlopen my_url = "https://wccftech.com" fileurl = urlopen(my_url) fileurl.read()
import numpy as np from urllib2.request import urlopen import matplotlib.pyplot as plt # Visuals import pandas as pd np.set_printoptions(threshold=np.nan) url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/heart-disease/processed.hungarian.data' names = [ 'age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach', 'exang', 'oldpeak', 'slope', 'ca', 'thal', 'heartdisease' ] heartDisease = pd.read_csv(urlopen(url), names=names) heartDisease.head() print(heartDisease.head()) # Delete COST del heartDisease['ca'] del heartDisease['slope'] del heartDisease['thal'] del heartDisease['oldpeak'] heartDisease = heartDisease.replace('?', np.nan) heartDisease.dtypes print(heartDisease.dtypes) heartDisease.columns from pgmpy.models import BayesianModel from pgmpy.estimators import MaximumLikelihoodEstimator, BayesianEstimator model = BayesianModel([('age', 'trestbps'), ('age', 'fbs'), ('sex', 'trestbps'), ('sex', 'trestbps'),
# From Web Scraping with Python with modifications __author__ = 'phil' from urllib2.request import urlopen from bs4 import BeautifulSoup html = urlopen("http://dowjones.com") bsObj = BeautifulSoup(html) for link in bsObj.findAll("a"): if 'href' in link.attrs: print(link.attrs['href'])
from urllib2 import request, parse url = "http://httpbin.org/get" parms={ 'name1' : 'value1', 'name2' : 'value2' } qrystr = parse.urlencode(parms) u = request.urlopen(url + '?' + qrystr) resp = u.read() print(resp)