def check_internet(): """Check if internet is available""" try: urlopen("https://github.com") except URLError as err: return False return True
def check_internet(url=None): """Check if internet is available""" url = "https://github.com" if url is None else url try: urlopen(url) except URLError as err: return False return True
def webuse(data, baseurl='http://www.stata-press.com/data/r11/', as_df=True): """ Parameters ---------- data : str Name of dataset to fetch. baseurl : str The base URL to the stata datasets. as_df : bool If True, returns a `pandas.DataFrame` Returns ------- dta : Record Array A record array containing the Stata dataset. Examples -------- >>> dta = webuse('auto') Notes ----- Make sure baseurl has trailing forward slash. Doesn't do any error checking in response URLs. """ # lazy imports from statsmodels.iolib import genfromdta url = urljoin(baseurl, data+'.dta') dta = urlopen(url) dta = StringIO(dta.read()) # make it truly file-like if as_df: # could make this faster if we don't process dta twice? return DataFrame.from_records(genfromdta(dta)) else: return genfromdta(dta)
def _urlopen_cached(url, cache): """ Tries to load data from cache location otherwise downloads it. If it downloads the data and cache is not None then it will put the downloaded data in the cache path. """ from_cache = False if cache is not None: file_name = url.split("://")[-1].replace('/', ',') file_name = file_name.split('.') if len(file_name) > 1: file_name[-2] += '-v2' else: file_name[0] += '-v2' file_name = '.'.join(file_name) + ".zip" cache_path = join(cache, file_name) try: data = _open_cache(cache_path) from_cache = True except: pass # not using the cache or didn't find it in cache if not from_cache: data = urlopen(url, timeout=3).read() if cache is not None: # then put it in the cache _cache_it(data, cache_path) return data, from_cache
def _urlopen_cached(url, cache): """ Tries to load data from cache location otherwise downloads it. If it downloads the data and cache is not None then it will put the downloaded data in the cache path. """ from_cache = False if cache is not None: cache_path = join(cache, url.split("://")[-1].replace("/", ",") + ".zip") try: data = _open_cache(cache_path) from_cache = True except: pass # not using the cache or didn't find it in cache if not from_cache: data = urlopen(url).read() if cache is not None: # then put it in the cache _cache_it(data, cache_path) return data, from_cache
def _urlopen_cached(url, cache): """ Tries to load data from cache location otherwise downloads it. If it downloads the data and cache is not None then it will put the downloaded data in the cache path. """ from_cache = False if cache is not None: cache_path = join(cache, url.split("://")[-1].replace('/', ',') + ".zip") try: data = _open_cache(cache_path) from_cache = True except: pass # not using the cache or didn't find it in cache if not from_cache: data = urlopen(url).read() if cache is not None: # then put it in the cache _cache_it(data, cache_path) return data, from_cache
else: y = [] for _x in x: y.append(fn(_x, **keywords)) y = np.array(y) a = np.argsort(y) return interp1d(y[a], x[a]) if __name__ == "__main__": #TODO: Make sure everything is correctly aligned and make a plotting # function from statsmodels.compat.python import urlopen import matplotlib.pyplot as plt nerve_data = urlopen('http://www.statsci.org/data/general/nerve.txt') nerve_data = np.loadtxt(nerve_data) x = nerve_data / 50. # was in 1/50 seconds cdf = ECDF(x) x.sort() F = cdf(x) plt.step(x, F, where='post') lower, upper = _conf_set(F) plt.step(x, lower, 'r', where='post') plt.step(x, upper, 'r', where='post') plt.xlim(0, 1.5) plt.ylim(0, 1.05) plt.vlines(x, 0, .05) plt.show()
y = fn(x, **keywords) else: y = [] for _x in x: y.append(fn(_x, **keywords)) y = np.array(y) a = np.argsort(y) return interp1d(y[a], x[a]) if __name__ == "__main__": #TODO: Make sure everything is correctly aligned and make a plotting # function from statsmodels.compat.python import urlopen import matplotlib.pyplot as plt nerve_data = urlopen('http://www.statsci.org/data/general/nerve.txt') nerve_data = np.loadtxt(nerve_data) x = nerve_data / 50. # was in 1/50 seconds cdf = ECDF(x) x.sort() F = cdf(x) plt.step(x, F, where='post') lower, upper = _conf_set(F) plt.step(x, lower, 'r', where='post') plt.step(x, upper, 'r', where='post') plt.xlim(0, 1.5) plt.ylim(0, 1.05) plt.vlines(x, 0, .05) plt.show()
dt = (A + B) * 1 / thet dq = (A + B) * np.log(lamb) db = X * (dl * lamb)[:, np.newaxis] sc = np.array([dt.sum(), dq.sum()]) sc = np.concatenate([db.sum(axis=0), sc]) return sc #### Tests #### from statsmodels.compat.python import urlopen from numpy.testing import assert_almost_equal import pandas import patsy medpar = pandas.read_csv( urlopen( 'https://raw.githubusercontent.com/vincentarelbundock/Rdatasets/csv/COUNT/medpar.csv' )) mdvis = pandas.read_csv( urlopen( 'https://raw.githubusercontent.com/vincentarelbundock/Rdatasets/csv/COUNT/mdvis.csv' )) # NB-2 ''' # R v2.15.1 library(MASS) library(COUNT) data(medpar) f <- los~factor(type)+hmo+white mod <- glm.nb(f, medpar) summary(mod)
dl = (A + B) * Q / lamb - B * 1 / lamb dt = (A + B) * 1 / thet dq = (A + B) * np.log(lamb) db = X * (dl * lamb)[:, np.newaxis] sc = np.array([dt.sum(), dq.sum()]) sc = np.concatenate([db.sum(axis=0), sc]) return sc #### Tests #### from statsmodels.compat.python import urlopen from numpy.testing import assert_almost_equal import pandas import patsy medpar = pandas.read_csv( urlopen( 'http://vincentarelbundock.github.com/Rdatasets/csv/COUNT/medpar.csv')) mdvis = pandas.read_csv( urlopen( 'http://vincentarelbundock.github.com/Rdatasets/csv/COUNT/mdvis.csv')) # NB-2 ''' # R v2.15.1 library(MASS) library(COUNT) data(medpar) f <- los~factor(type)+hmo+white mod <- glm.nb(f, medpar) summary(mod) Call: glm.nb(formula = f, data = medpar, init.theta = 2.243376203,
A = digamma(y+g) - digamma(g) + np.log(w) B = g*(1-w) - y*w dl = (A+B) * Q/lamb - B * 1/lamb dt = (A+B) * 1/thet dq = (A+B) * np.log(lamb) db = X * (dl * lamb)[:,np.newaxis] sc = np.array([dt.sum(), dq.sum()]) sc = np.concatenate([db.sum(axis=0), sc]) return sc #### Tests #### from statsmodels.compat.python import urlopen from numpy.testing import assert_almost_equal import pandas import patsy medpar = pandas.read_csv(urlopen('https://raw.githubusercontent.com/vincentarelbundock/Rdatasets/csv/COUNT/medpar.csv')) mdvis = pandas.read_csv(urlopen('https://raw.githubusercontent.com/vincentarelbundock/Rdatasets/csv/COUNT/mdvis.csv')) # NB-2 ''' # R v2.15.1 library(MASS) library(COUNT) data(medpar) f <- los~factor(type)+hmo+white mod <- glm.nb(f, medpar) summary(mod) Call: glm.nb(formula = f, data = medpar, init.theta = 2.243376203, link = log)
A = digamma(y+g) - digamma(g) + np.log(w) B = g*(1-w) - y*w dl = (A+B) * Q/lamb - B * 1/lamb dt = (A+B) * 1/thet dq = (A+B) * np.log(lamb) db = X * (dl * lamb)[:,np.newaxis] sc = np.array([dt.sum(), dq.sum()]) sc = np.concatenate([db.sum(axis=0), sc]) return sc #### Tests #### from statsmodels.compat.python import urlopen from numpy.testing import assert_almost_equal import pandas import patsy medpar = pandas.read_csv(urlopen('http://vincentarelbundock.github.com/Rdatasets/csv/COUNT/medpar.csv')) mdvis = pandas.read_csv(urlopen('http://vincentarelbundock.github.com/Rdatasets/csv/COUNT/mdvis.csv')) # NB-2 ''' # R v2.15.1 library(MASS) library(COUNT) data(medpar) f <- los~factor(type)+hmo+white mod <- glm.nb(f, medpar) summary(mod) Call: glm.nb(formula = f, data = medpar, init.theta = 2.243376203, link = log)