def test_load_zip(): "test nx.load_zip" for i in (0, 1): if i == 0: d = nx.load_zip(TINY_DATASET_CSV) else: with testing.HiddenPrints(): d = nx.load_zip(TINY_DATASET_CSV, verbose=True) ok_(len(d) == 14, "wrong number of rows") ok_(d.shape == (14, 59), 'data has wrong shape') ok_(d.x.shape == (14, 50), 'x has wrong shape') ok_(d.df.iloc[2, 3] == 0.34143, 'wrong feature value')
def download(filename, load=True, n_tries=100, sleep_seconds=300, verbose=False): """ Download current Numerai dataset; overwrites if file exists. If `load` is True (default) then return data object; otherwise return None. If download fails then retry download `n_tries` times, pausing `sleep_seconds` between each try. Unlike nx.download() this function loads and returns the data object. """ # line below expands e.g. ~/tmp to /home/me/tmp... filename = os.path.expanduser(filename) count = 0 while count < n_tries: try: if verbose: print("Download dataset {}".format(filename)) napi = NumerAPI() url = napi.get_dataset_url(tournament=8) download_file(url, filename) break except: # noqa print('download failed') time.sleep(sleep_seconds) count += 1 if load: data = nx.load_zip(filename, verbose=verbose) else: data = None return data
def test_data_y_for_tournment(): "test data.y_for_tournmanent" d = nx.load_zip(TINY_DATASET_CSV) for i in range(1, 6): y = np.zeros(14) y[i - 1] = 1 y[i - 1 + 5] = 1 y[10:] = np.nan yt = d.y[i] yt2 = d.y[nx.tournament_str(i)] assert_array_equal(yt, yt2, "y{} indexing corrupted".format(i)) assert_array_equal(yt, y, "y{} targets corrupted".format(i))
def setup_data(self): if self.trainer_params['get_current_data']: napi = numerapi.NumerAPI(verbosity="info") if napi.check_new_round(): LOGGER.info('Loading current dataset from NumerAPI..') self.data = self.get_tournament_data() else: if os.path.isfile(self.trainer_params['local_data']): LOGGER.info( f"Loading data locally from {self.trainer_params['local_data']}" ) self.data = nx.load_zip(self.trainer_params['local_data']) else: return FileNotFoundError('local data not found')
def main(): # download dataset from numerai nx.download_dataset('numerai_dataset.zip', verbose=True) # load numerai dataset data = nx.load_zip('numerai_dataset.zip', verbose=True) # we will use logistic regression; you will want to write your own model model = nx.logistic() # fit model with train data and make predictions for tournament data prediction = nx.production(model, data) # save predictions to csv file for later upload to numerai prediction.to_csv('logistic.csv', verbose=True)
def predict(): tournaments = nx.tournament_names() print(tournaments) # download dataset from numerai data = nx.download('numerai_dataset.zip', load=False) print('data downloaded') data = nx.load_zip('numerai_dataset.zip', single_precision=True) print('data loaded') for tournament_name in tournaments: saved_model_name = 'model_trained_' + tournament_name if os.path.exists(saved_model_name): print("using saved model for", tournament_name) m = model.LinearModel.load(saved_model_name) else: print("saved model not found for", tournament_name) m = model.LinearModel(verbose=True) print("training model for", tournament_name) m.fit(data['train'], tournament_name) print("running predictions for", tournament_name, flush=True) # fit model with train data and make predictions for tournament data prediction = nx.production(m, data, tournament=tournament_name) # save predictions to csv file prediction_filename = '/tmp/prediction_' + tournament_name + '.csv' prediction.to_csv(prediction_filename, verbose=True) # submit the prediction # Numerai API key # You will need to create an API key by going to https://numer.ai/account and clicking "Add" under the "Your API keys" section. # Select the following permissions for the key: "Upload submissions", "Make stakes", "View historical submission info", "View user info" public_id = os.environ["NUMERAI_PUBLIC_ID"] secret_key = os.environ["NUMERAI_SECRET_KEY"] for tournament_name in tournaments: prediction_filename = '/tmp/prediction_' + tournament_name + '.csv' api = NumerAPI(public_id=public_id, secret_key=secret_key) model_id = api.get_models() api.upload_predictions(prediction_filename, model_id=model_id['akrimedes_2'])
def test_data_y_for_tournament(): """test data.y_for_tournament""" d = nx.load_zip(TINY_DATASET_CSV) for number, name in nx.tournament_iter(active_only=True): y = np.zeros(14) y[0] = y[4] = y[5] = y[9] = 0.75000 y[1] = y[6] = 0.25000 y[10:] = np.nan yt = d.y[number] yt2 = d.y[name] assert_array_equal(yt, yt2, f"y{number} indexing corrupted") assert_array_equal(yt, y, f"y{number} targets corrupted")
def first_tournament(): """ Example of how to prepare a submission for the Numerai tournament. It uses Numerox which you can install with: pip install numerox For more information see: https://github.com/kwgoodman/numerox """ # download dataset from numerai nx.download('numerai_dataset.zip', verbose=True) # load numerai dataset data = nx.load_zip('numerai_dataset.zip', verbose=True) # we will use logistic regression; you will want to write your own model model = nx.logistic() # fit model with train data and make predictions for tournament data prediction = nx.production(model, data) # save predictions to csv file prediction.to_csv('logistic.csv', verbose=True)
def download_data_object(verbose=False): "Used by numerox to avoid hard coding paths; probably not useful to users" with tempfile.NamedTemporaryFile() as temp: download(temp.name, verbose=verbose) data = nx.load_zip(temp.name) return data
# In[3]: # set the data working directory os.chdir(os.path.join(os.getcwd(), "..", "data")) # In[4]: # download the latest numerai dataset # data = nx.download("numerai_dataset.zip") # to make it faster use an existing dataset data = nx.load_zip("numerai_dataset.zip") # In[5]: # environment settings MODEL_NAME = "logistic-regression" FOLDER_NAME = "submission" # In[6]: # extend the logistic model class offered by numerox class logistic(nx.Model):
import numerox as nx from model import get_model data = nx.load_zip('numerai_dataset.zip') model = get_model() prediction = nx.production(model, data, 'bernie', verbosity=1) prediction.to_csv('output.csv', tournament='bernie')
def update_play_data(numerai_zip_path): "Create and save data used by load_play_data function" data = nx.load_zip(numerai_zip_path) play = row_sample(data, fraction=0.01, seed=0) play.save(TEST_DATA)