def fetch_data_table(download=True, show_progress=False, retries=5): """ Fetch CME data table from Quandl """ if download: for _ in range(retries): try: if show_progress: log.info('Downloading CME data') quandl.bulkdownload('CME', filename=QUANDL_ZIP_FILE) break except Exception: log.exception( "Exception raised reading Quandl data. Retrying.") else: raise ValueError( "Failed to download Quandl data after %d attempts." % (retries)) else: if show_progress: log.info('Reading CME data from disk') return load_data_table( file=QUANDL_ZIP_FILE, index_col=None, show_progress=show_progress, )
def update_data_csv(self): #download data, extract zip file quandl.bulkdownload("EOD") now = datetime.datetime.now() self.current_date = now.strftime("%Y%m%d") zip_ref = zipfile.ZipFile('EOD.zip', 'r') zip_ref.extractall() zip_ref.close() #update close date temp = quandl.get("EOD/MSFT", start_date=self.days_ago(10)) self.close_date = str(temp.index.values[-1])[:10]
def update_data_csv(self): # clean previous files file_list = glob.glob('stock_data/*.csv') for filename in file_list: os.remove(filename) quandl.bulkdownload("EOD") print("Download succeeded") zip_ref = zipfile.ZipFile('EOD.zip', 'r') zip_ref.extractall(self.stock_data_folder) zip_ref.close() print("Extract to", self.stock_data_folder) # get filename self.csv_filename = glob.glob('stock_data/*.csv')[0]
def import_bulk_data(self): print("Bulk download from Quandl, it might takes time.") quandl.bulkdownload("WIKI") print('File downloaded as WIKI.zip.') print('Reading zip file...') bulk_data = pd.read_csv('./WIKI.zip', usecols=['ticker', 'adj_close', 'adj_volume', 'date'], dtype={'ticker': 'category', 'adj_close': 'float32', 'adj_volume': 'float32'}, parse_dates=['date'], names=self.wiki_columns ) self._data = self._adjust_frame_format(bulk_data) self._data = self.check_data_integrity(self._data) del bulk_data
def bulkdownload(dataset: str, chunksize=None): with tempfile.TemporaryDirectory() as td: filename = os.path.join(td, dataset + '.zip') logging.getLogger(__name__).info("Downloading dataset " + dataset + " to " + filename) quandl.bulkdownload(dataset, filename=filename, api_key=os.environ['QUANDL_API_KEY'] if 'QUANDL_API_KEY' in os.environ else None) zipfile.ZipFile(filename).extractall(td) logging.getLogger(__name__).info("Done... Start yielding dataframes") return pd.read_csv(glob.glob(os.path.join(td, '*.csv'))[0], header=None, chunksize=chunksize, parse_dates=[1])
def download_quandl_dataset(database, dataset, save_path, columns, tickers, start_date, end_date): """ Download a dataset from Quandl and save it to `save_path`. Filter by columns, tickers, and date :param database: The Quandl database to download from :param dataset: The dataset to download :param save_path: The path to save the dataset :param columns: The columns to save :param tickers: The tickers to save :param start_date: The rows to save that are older than this date :param end_date: The rows to save that are younger than this date """ with tempfile.TemporaryDirectory() as tmp_dir: tmp_wiki_file = tmp_dir + 'tmp.zip' quandl.bulkdownload(database, dataset_code=dataset, filename=tmp_wiki_file) # Unzip downloaded data zip_ref = zipfile.ZipFile(tmp_wiki_file, 'r') zip_ref.extractall(tmp_dir) zip_ref.close() # Check if the zip file only contains one csv file # We're assuming that Quandl will always give us the data in a single csv file. # If it's different, we want to throw an error. csv_files = glob.glob(os.path.join(tmp_dir, '*.csv')) assert len(csv_files) == 1,\ 'Bulk download of Quandl Wiki data failed. Wrong number of csv files found. Found {} file(s).'\ .format(len(csv_files)) tmp_csv_file = csv_files[0] names = quandl.get_table('{}/{}'.format(database, dataset), ticker='EMPTY_RESULTS_TICKER').columns.values tmp_df = pd.read_csv(tmp_csv_file, names=names) tmp_df['date'] = pd.to_datetime(tmp_df['date']) # Remove unused data and save tmp_df = tmp_df[tmp_df['date'].isin(pd.date_range( start_date, end_date))] # Filter unused dates tmp_df = tmp_df[tmp_df['ticker'].isin( tickers)] # Filter unused tickers tmp_df.to_csv(save_path, columns=columns, index=False) # Filter unused columns and save
# COMMAND ---------- import quandl # COMMAND ---------- dbutils.widgets.text("quandl_api_key", "kNg6oG-tXRn7As_S7Z1i", "Quandl API Key:") # COMMAND ---------- quandl.ApiConfig.api_key = dbutils.widgets.get("quandl_api_key") # COMMAND ---------- quandl.bulkdownload("WIKI", download_type="complete", filename="/tmp/WIKI.zip") # COMMAND ---------- # MAGIC %sh unzip /tmp/WIKI.zip -d /tmp # COMMAND ---------- dbutils.fs.rm("/DemoData/stock_data", True) # COMMAND ---------- # MAGIC %sh mkdir -p /dbfs/DemoData/stock_data # COMMAND ----------
import quandl Authkey = '5_pRK9pKefuvZzHe-MkS' try: quandl.bulkdownload( 'DEB', api_key=Authkey, filename= 'E:\\001_Stock_Market_Analysis\\Quandl\\DEB Whole Database\\DEB_DB_01_Sep_2016.zip' ) except Exception as e: print(str(e)) print("Download done !")