def main(project_dir): logger = logging.getLogger(__name__) logger.info('Getting raw data') napi = NumerAPI() dataset_url = napi.get_dataset_url() round_number = napi.get_current_round() dataset_filename = '{}_numerai_raw.pkl'.format(round_number) raw_data_path = os.path.join(project_dir, 'data', 'raw') raw_data_file = os.path.join(raw_data_path, dataset_filename) if dataset_filename in [pkl for pkl in os.listdir(raw_data_path)]: logger.info("Dataset for round {} already downloaded as {}".format( round_number, dataset_filename)) else: logger.info("Downloading data for round {}".format(round_number)) df = download_dataset_as_df(dataset_url) logger.info('Data concatenated, downcasting data') df = df_to_numeric(df) logger.info('Data converted, saving to file') df.to_pickle(raw_data_file) logger.info("Dataset for round {} downloaded as {}".format( round_number, dataset_filename))
def download(filename, load=True, n_tries=100, sleep_seconds=300, verbose=False): """ Download current Numerai dataset; overwrites if file exists. If `load` is True (default) then return data object; otherwise return None. If download fails then retry download `n_tries` times, pausing `sleep_seconds` between each try. Unlike nx.download() this function loads and returns the data object. """ # line below expands e.g. ~/tmp to /home/me/tmp... filename = os.path.expanduser(filename) count = 0 while count < n_tries: try: if verbose: print("Download dataset {}".format(filename)) napi = NumerAPI() url = napi.get_dataset_url(tournament=8) download_file(url, filename) break except: # noqa print('download failed') time.sleep(sleep_seconds) count += 1 if load: data = nx.load_zip(filename, verbose=verbose) else: data = None return data
def download(filename, tournament=1, verbose=False): "Download the current Numerai dataset; overwrites if file exists" if verbose: print("Download dataset {}".format(filename)) napi = NumerAPI() url = napi.get_dataset_url(tournament=tournament) filename = os.path.expanduser(filename) # expand ~/tmp to /home/... download_file(url, filename)