Ejemplo n.º 1
0
def test_load_zip():
    "test nx.load_zip"
    for i in (0, 1):
        if i == 0:
            d = nx.load_zip(TINY_DATASET_CSV)
        else:
            with testing.HiddenPrints():
                d = nx.load_zip(TINY_DATASET_CSV, verbose=True)
        ok_(len(d) == 14, "wrong number of rows")
        ok_(d.shape == (14, 59), 'data has wrong shape')
        ok_(d.x.shape == (14, 50), 'x has wrong shape')
        ok_(d.df.iloc[2, 3] == 0.34143, 'wrong feature value')
Ejemplo n.º 2
0
def download(filename, load=True, n_tries=100, sleep_seconds=300,
             verbose=False):
    """
    Download current Numerai dataset; overwrites if file exists.

    If `load` is True (default) then return data object; otherwise return
    None.

    If download fails then retry download `n_tries` times, pausing
    `sleep_seconds` between each try.

    Unlike nx.download() this function loads and returns the data object.
    """
    # line below expands e.g. ~/tmp to /home/me/tmp...
    filename = os.path.expanduser(filename)
    count = 0
    while count < n_tries:
        try:
            if verbose:
                print("Download dataset {}".format(filename))
            napi = NumerAPI()
            url = napi.get_dataset_url(tournament=8)
            download_file(url, filename)
            break
        except: # noqa
            print('download failed')
            time.sleep(sleep_seconds)
        count += 1
    if load:
        data = nx.load_zip(filename, verbose=verbose)
    else:
        data = None
    return data
Ejemplo n.º 3
0
def test_data_y_for_tournment():
    "test data.y_for_tournmanent"
    d = nx.load_zip(TINY_DATASET_CSV)
    for i in range(1, 6):
        y = np.zeros(14)
        y[i - 1] = 1
        y[i - 1 + 5] = 1
        y[10:] = np.nan
        yt = d.y[i]
        yt2 = d.y[nx.tournament_str(i)]
        assert_array_equal(yt, yt2, "y{} indexing corrupted".format(i))
        assert_array_equal(yt, y, "y{} targets corrupted".format(i))
Ejemplo n.º 4
0
 def setup_data(self):
     if self.trainer_params['get_current_data']:
         napi = numerapi.NumerAPI(verbosity="info")
         if napi.check_new_round():
             LOGGER.info('Loading current dataset from NumerAPI..')
             self.data = self.get_tournament_data()
     else:
         if os.path.isfile(self.trainer_params['local_data']):
             LOGGER.info(
                 f"Loading data locally from {self.trainer_params['local_data']}"
             )
             self.data = nx.load_zip(self.trainer_params['local_data'])
         else:
             return FileNotFoundError('local data not found')
Ejemplo n.º 5
0
def main():

    # download dataset from numerai
    nx.download_dataset('numerai_dataset.zip', verbose=True)

    # load numerai dataset
    data = nx.load_zip('numerai_dataset.zip', verbose=True)

    # we will use logistic regression; you will want to write your own model
    model = nx.logistic()

    # fit model with train data and make predictions for tournament data
    prediction = nx.production(model, data)

    # save predictions to csv file for later upload to numerai
    prediction.to_csv('logistic.csv', verbose=True)
Ejemplo n.º 6
0
def predict():

    tournaments = nx.tournament_names()
    print(tournaments)

    # download dataset from numerai
    data = nx.download('numerai_dataset.zip', load=False)
    print('data downloaded')
    data = nx.load_zip('numerai_dataset.zip', single_precision=True)
    print('data loaded')

    for tournament_name in tournaments:
        saved_model_name = 'model_trained_' + tournament_name
        if os.path.exists(saved_model_name):
            print("using saved model for", tournament_name)
            m = model.LinearModel.load(saved_model_name)
        else:
            print("saved model not found for", tournament_name)
            m = model.LinearModel(verbose=True)

            print("training model for", tournament_name)
            m.fit(data['train'], tournament_name)

        print("running predictions for", tournament_name, flush=True)
        # fit model with train data and make predictions for tournament data
        prediction = nx.production(m, data, tournament=tournament_name)

        # save predictions to csv file
        prediction_filename = '/tmp/prediction_' + tournament_name + '.csv'
        prediction.to_csv(prediction_filename, verbose=True)

    # submit the prediction

    # Numerai API key
    # You will need to create an API key by going to https://numer.ai/account and clicking "Add" under the "Your API keys" section.
    # Select the following permissions for the key: "Upload submissions", "Make stakes", "View historical submission info", "View user info"
    public_id = os.environ["NUMERAI_PUBLIC_ID"]
    secret_key = os.environ["NUMERAI_SECRET_KEY"]

    for tournament_name in tournaments:
        prediction_filename = '/tmp/prediction_' + tournament_name + '.csv'

        api = NumerAPI(public_id=public_id, secret_key=secret_key)
        model_id = api.get_models()
        api.upload_predictions(prediction_filename,
                               model_id=model_id['akrimedes_2'])
Ejemplo n.º 7
0
def test_data_y_for_tournament():
    """test data.y_for_tournament"""
    d = nx.load_zip(TINY_DATASET_CSV)

    for number, name in nx.tournament_iter(active_only=True):
        y = np.zeros(14)

        y[0] = y[4] = y[5] = y[9] = 0.75000
        y[1] = y[6] = 0.25000

        y[10:] = np.nan

        yt = d.y[number]
        yt2 = d.y[name]

        assert_array_equal(yt, yt2, f"y{number} indexing corrupted")
        assert_array_equal(yt, y, f"y{number} targets corrupted")
Ejemplo n.º 8
0
def first_tournament():
    """
    Example of how to prepare a submission for the Numerai tournament.
    It uses Numerox which you can install with: pip install numerox
    For more information see: https://github.com/kwgoodman/numerox
    """

    # download dataset from numerai
    nx.download('numerai_dataset.zip', verbose=True)

    # load numerai dataset
    data = nx.load_zip('numerai_dataset.zip', verbose=True)

    # we will use logistic regression; you will want to write your own model
    model = nx.logistic()

    # fit model with train data and make predictions for tournament data
    prediction = nx.production(model, data)

    # save predictions to csv file
    prediction.to_csv('logistic.csv', verbose=True)
Ejemplo n.º 9
0
def download_data_object(verbose=False):
    "Used by numerox to avoid hard coding paths; probably not useful to users"
    with tempfile.NamedTemporaryFile() as temp:
        download(temp.name, verbose=verbose)
        data = nx.load_zip(temp.name)
    return data
Ejemplo n.º 10
0
# In[3]:


# set the data working directory
os.chdir(os.path.join(os.getcwd(), "..", "data"))


# In[4]:


# download the latest numerai dataset
# data = nx.download("numerai_dataset.zip")

# to make it faster use an existing dataset
data = nx.load_zip("numerai_dataset.zip")


# In[5]:


# environment settings
MODEL_NAME = "logistic-regression"
FOLDER_NAME = "submission"


# In[6]:


# extend the logistic model class offered by numerox
class logistic(nx.Model):
Ejemplo n.º 11
0
import numerox as nx
from model import get_model


data = nx.load_zip('numerai_dataset.zip')
model = get_model()

prediction = nx.production(model, data, 'bernie', verbosity=1)
prediction.to_csv('output.csv', tournament='bernie')
Ejemplo n.º 12
0
def update_play_data(numerai_zip_path):
    "Create and save data used by load_play_data function"
    data = nx.load_zip(numerai_zip_path)
    play = row_sample(data, fraction=0.01, seed=0)
    play.save(TEST_DATA)