def __init__(self, path='/usr/share/dict/words', force_dload=False): if os.path.exists(path) and not force_dload: #not on shatter-prone os with open(path) as fobj: self.word_list = fobj.read().splitlines() else: words = urlopen(self.online_dict_url).read().splitlines() self.word_list = [word.decode().strip().lower() for word in words]
# Where to download the data, if not already on disk redirects_url = "http://downloads.dbpedia.org/3.5.1/en/redirects_en.nt.bz2" redirects_filename = redirects_url.rsplit("/", 1)[1] page_links_url = "http://downloads.dbpedia.org/3.5.1/en/page_links_en.nt.bz2" page_links_filename = page_links_url.rsplit("/", 1)[1] resources = [ (redirects_url, redirects_filename), (page_links_url, page_links_filename), ] for url, filename in resources: if not os.path.exists(filename): print("Downloading data from '%s', please wait..." % url) opener = urlopen(url) open(filename, 'wb').write(opener.read()) print() # ############################################################################# # Loading the redirect files memory = Memory(cachedir=".") def index(redirects, index_map, k): """Find the index of an article name after redirect resolution""" k = redirects.get(k, k) return index_map.setdefault(k, len(index_map))
'RTN': 'Raytheon', 'CVS': 'CVS', 'CAT': 'Caterpillar', 'DD': 'DuPont de Nemours'} symbols, names = np.array(sorted(symbol_dict.items())).T quotes = [] for symbol in symbols: print('Fetching quote history for %r' % symbol, file=sys.stderr) url = ('https://raw.githubusercontent.com/scikit-learn/examples-data/' 'master/financial-data/{}.csv') data = list(csv.DictReader(l.decode('utf8') for l in urlopen(url.format(symbol)))) quotes.append({'close': [float(rec['close']) for rec in data], 'open': [float(rec['open']) for rec in data]}) close_prices = np.vstack([q['close'] for q in quotes]) open_prices = np.vstack([q['open'] for q in quotes]) # The daily variations of the quotes are what carry most information variation = close_prices - open_prices # ############################################################################# # Learn a graphical structure from the correlations edge_model = covariance.GraphLassoCV() # standardize the time series: using correlations rather than covariance
'RTN': 'Raytheon', 'CVS': 'CVS', 'CAT': 'Caterpillar', 'DD': 'DuPont de Nemours' } symbols, names = np.array(sorted(symbol_dict.items())).T quotes = [] for symbol in symbols: print('Fetching quote history for %r' % symbol, file=sys.stderr) url = ('https://raw.githubusercontent.com/scikit-learn/examples-data/' 'master/financial-data/{}.csv') data = list( csv.DictReader(l.decode('utf8') for l in urlopen(url.format(symbol)))) quotes.append({ 'close': [float(rec['close']) for rec in data], 'open': [float(rec['open']) for rec in data] }) close_prices = np.vstack([q['close'] for q in quotes]) open_prices = np.vstack([q['open'] for q in quotes]) # The daily variations of the quotes are what carry most information variation = close_prices - open_prices # ############################################################################# # Learn a graphical structure from the correlations edge_model = covariance.GraphLassoCV()