Ejemplo n.º 1
0
def test_get_leaderboard_returns_empty_list():
    # don't use fixture here, create our own competition
    api = NumerAPI(manager=NumerMockManager())
    api.manager.create_competition(number=67)
    lb = api.get_leaderboard(67)
    assert isinstance(lb, list)
    assert not lb
Ejemplo n.º 2
0
def download(filename, load=True, n_tries=100, sleep_seconds=300,
             verbose=False):
    """
    Download current Numerai dataset; overwrites if file exists.

    If `load` is True (default) then return data object; otherwise return
    None.

    If download fails then retry download `n_tries` times, pausing
    `sleep_seconds` between each try.

    Unlike nx.download() this function loads and returns the data object.
    """
    # line below expands e.g. ~/tmp to /home/me/tmp...
    filename = os.path.expanduser(filename)
    count = 0
    while count < n_tries:
        try:
            if verbose:
                print("Download dataset {}".format(filename))
            napi = NumerAPI()
            url = napi.get_dataset_url(tournament=8)
            download_file(url, filename)
            break
        except: # noqa
            print('download failed')
            time.sleep(sleep_seconds)
        count += 1
    if load:
        data = nx.load_zip(filename, verbose=verbose)
    else:
        data = None
    return data
Ejemplo n.º 3
0
def upload(filename, tournament, public_id, secret_key, block=True,
           n_tries=100, sleep_seconds=60, verbose=False):
    """
    Upload tournament submission (csv file) to Numerai.

    If upload fails then retry upload `n_tries` times, pausing `sleep_seconds`
    between each try.

    If block is True (default) then the scope of your token must be both
    upload_submission and read_submission_info. If block is False then only
    upload_submission is needed.

    """
    tournament = nx.tournament_int(tournament)
    count = 0
    while count < n_tries:
        try:

            napi = NumerAPI(public_id=public_id, secret_key=secret_key,
                            verbosity='warning')
            upload_id = napi.upload_predictions(filename,
                                                tournament=tournament)
            if block:
                status = status_block(upload_id, public_id, secret_key)
            else:
                status = upload_status(upload_id, public_id, secret_key)
            break

        except: # noqa
            print('upload failed')
            time.sleep(sleep_seconds)
        count += 1
    return upload_id, status
Ejemplo n.º 4
0
def download_data(api: numerapi.NumerAPI, keys):
    if int(keys['LATEST_ROUND']) == api.get_current_round():
        return int(keys['LATEST_ROUND'])
    else:
        LATEST_ROUND = api.get_current_round()
        api.download_current_dataset('./data')
        return LATEST_ROUND
Ejemplo n.º 5
0
def test_get_submission_ids_contains_uploaded_submission(api: NumerAPI):
    submission_id = api.upload_predictions('foo')
    ids = api.get_submission_ids()
    assert len(ids) == 1
    assert isinstance(ids, dict)
    assert api.manager.user_name in ids
    assert ids[api.manager.user_name] == submission_id
Ejemplo n.º 6
0
def main(project_dir):
    logger = logging.getLogger(__name__)
    logger.info('Getting raw data')
    
    napi = NumerAPI()

    dataset_url = napi.get_dataset_url()
    round_number = napi.get_current_round()
    dataset_filename = '{}_numerai_raw.pkl'.format(round_number)
    raw_data_path = os.path.join(project_dir, 'data', 'raw')
    raw_data_file = os.path.join(raw_data_path, dataset_filename)

    if dataset_filename in [pkl for pkl in os.listdir(raw_data_path)]:
        logger.info("Dataset for round {} already downloaded as {}".format(
                        round_number, dataset_filename))
    else:
        logger.info("Downloading data for round {}".format(round_number))
        df = download_dataset_as_df(dataset_url)

        logger.info('Data concatenated, downcasting data')
        df = df_to_numeric(df)

        logger.info('Data converted, saving to file')
        df.to_pickle(raw_data_file)

        logger.info("Dataset for round {} downloaded as {}".format(
                        round_number, dataset_filename))
Ejemplo n.º 7
0
def download_raw_leaderboard(round_number=None, tournament=1):
    "Download leaderboard for given round number"
    query = '''
            query($number: Int!
                  $tournament: Int!) {
                rounds(number: $number
                       tournament: $tournament) {
                    leaderboard {
                        username
                        LiveLogloss
                        paymentGeneral {
                          nmrAmount
                          usdAmount
                        }
                        paymentStaking {
                          nmrAmount
                          usdAmount
                        }
                        stake {
                          value
                        }
                        stakeResolution {
                          destroyed
                        }
                    }
                }
            }
    '''
    napi = NumerAPI(verbosity='warn')
    if round_number is None:
        round_number = napi.get_current_round()
    arguments = {'number': round_number, 'tournament': tournament}
    leaderboard = napi.raw_query(query, arguments)
    leaderboard = leaderboard['data']['rounds'][0]['leaderboard']
    return leaderboard
Ejemplo n.º 8
0
def download(filename, tournament=1, verbose=False):
    "Download the current Numerai dataset; overwrites if file exists"
    if verbose:
        print("Download dataset {}".format(filename))
    napi = NumerAPI()
    url = napi.get_dataset_url(tournament=tournament)
    filename = os.path.expanduser(filename)  # expand ~/tmp to /home/...
    download_file(url, filename)
Ejemplo n.º 9
0
def get_user_activities(user):
    "Activity of `user` across all rounds and tournaments as dataframe"
    napi = NumerAPI()
    data = []
    for number, name in nx.tournament_iter():
        data += napi.get_user_activities(user, number)
    flat = [flatten_dict(d) for d in data]
    df = pd.DataFrame.from_dict(flat)
    return df
Ejemplo n.º 10
0
def upload_status(upload_id, public_id, secret_key):
    "Dictionary containing the status of upload"
    napi = NumerAPI(public_id=public_id, secret_key=secret_key,
                    verbosity='warning')
    status_raw = napi.submission_status(upload_id)
    status = {}
    for key, value in status_raw.items():
        if isinstance(value, dict):
            value = value['value']
        status[key] = value
    return status
Ejemplo n.º 11
0
def test_get_current_round():
    # don't use fixture here, create our own rounds
    api = NumerAPI(public_id='foo',
                   secret_key='bar',
                   manager=NumerMockManager())
    api.manager.create_competition(number=1)
    current_round = api.get_current_round()
    assert current_round == 1

    api.manager.create_competition(number=2)
    current_round = api.get_current_round()
    assert current_round == 2
Ejemplo n.º 12
0
def round_resolution_date(tournament=1):
    "The date each round was resolved as a Dataframe."
    napi = NumerAPI(verbosity='warn')
    dates = napi.get_competitions(tournament=tournament)
    dates = pd.DataFrame(dates)[['number', 'resolveTime']]
    rename_map = {'number': 'round', 'resolveTime': 'date'}
    dates = dates.rename(rename_map, axis=1)
    date = dates['date'].tolist()
    date = [d.date() for d in date]
    dates['date'] = date
    dates = dates.set_index('round')
    dates = dates.sort_index()
    return dates
Ejemplo n.º 13
0
def downloadNumeraiData():

    # set up paths for download of dataset and upload of predictions
    dataset_parent_folder = "./datasets/"

    # We don't need to login in order to download the dataset
    napi = NumerAPI(verbosity="info")

    # download current dataset
    napi.download_current_dataset(dest_path=dataset_parent_folder, unzip=True)

    sp.call("mv " + dataset_parent_folder + "/*.zip ZipFiles/", shell=True)
    sp.call("rm " + dataset_parent_folder + "/*/example*")
Ejemplo n.º 14
0
def load_data(round_number=False):
    napi = NumerAPI()

    if not round_number:
        round_number = napi.get_current_round()
    project_dir = os.path.join(os.path.dirname(__file__), os.pardir, os.pardir)
    raw_data_path = os.path.join(project_dir, 'data', 'raw')
    raw_data_file = os.path.join(raw_data_path,
                                 '{}_numerai_raw.pkl'.format(round_number))
    try:
        return pd.read_pickle(raw_data_file)
    except FileNotFoundError:
        get_raw_data.main(project_dir)
        return pd.read_pickle(raw_data_file)
Ejemplo n.º 15
0
def test_get_competitions():
    # don't use fixtures here, create our own competitions
    api = NumerAPI(manager=NumerMockManager())
    all_competitions = api.get_competitions()
    assert isinstance(all_competitions, list)
    assert not all_competitions

    round_number = 42
    api.manager.create_competition(number=round_number)

    all_competitions = api.get_competitions()
    assert isinstance(all_competitions, list)
    assert len(all_competitions) == 1
    assert all_competitions[0]['number'] == round_number
Ejemplo n.º 16
0
def download_leaderboard(round_number=None, tournament=1):
    """
    Download leaderboard for specified tournament and round.

    Default is to download current round.
    """
    if round_number is None:
        napi = NumerAPI(verbosity='warn')
        num = napi.get_current_round(tournament=tournament)
    else:
        num = round_number
    df = download_raw_leaderboard(round_number=num, tournament=tournament)
    df = raw_leaderboard_to_df(df, num)
    return df
Ejemplo n.º 17
0
def get_user_names():
    "A list containing all Numerai users, past and present."
    q = '''
        query {
            rankings(limit:100000, offset:0)
                {
                    username
                }
        }
    '''
    napi = NumerAPI()
    users = napi.raw_query(q)
    users = [x['username'] for x in users['data']['rankings']]
    return users
Ejemplo n.º 18
0
def round_dates():
    "The dates each round was opened and resolved as a Dataframe."
    napi = NumerAPI(verbosity='warn')
    dates = napi.get_competitions(tournament=1)
    dates = pd.DataFrame(dates)[['number', 'openTime', 'resolveTime']]
    rename_map = {'number': 'round',
                  'openTime': 'open',
                  'resolveTime': 'resolve'}
    dates = dates.rename(rename_map, axis=1)
    for item in ('open', 'resolve'):
        date = dates[item].tolist()
        date = [d.date() for d in date]
        dates[item] = date
    dates = dates.set_index('round')
    dates = dates.sort_index()
    return dates
Ejemplo n.º 19
0
def upload(filename, public_id, secret_key, tournament=1, block=True):
    """
    Upload tournament submission (csv file) to Numerai.

    If block is True (default) then the scope of your token must be both
    upload_submission and read_submission_info. If block is False then only
    upload_submission is needed.
    """
    napi = NumerAPI(public_id=public_id, secret_key=secret_key,
                    verbosity='warning')
    upload_id = napi.upload_predictions(filename, tournament=tournament)
    if block:
        status = status_block(upload_id, public_id, secret_key)
    else:
        status = upload_status(upload_id, public_id, secret_key)
    return upload_id, status
Ejemplo n.º 20
0
def download_leaderboard(round1=None, round2=None, tournament=1):
    "Download leaderboard for specified round range."
    napi = NumerAPI(verbosity='warn')
    if round1 is None and round2 is None:
        r0 = napi.get_current_round(tournament=tournament)
        r1 = r0
    elif round1 is None:
        r0 = napi.get_current_round(tournament=tournament)
        r1 = round2
    elif round2 is None:
        r0 = round1
        r1 = napi.get_current_round(tournament=tournament)
    else:
        r0 = round1
        r1 = round2
    for num in range(r0, r1 + 1):
        e = download_raw_leaderboard(round_number=num, tournament=tournament)
        e = raw_leaderboard_to_df(e, num)
        if num == r0:
            df = e
        else:
            df = pd.concat([df, e])
    return df
Ejemplo n.º 21
0
def download_leaderboard(round_number=None, tournament=1):
    """
    Download leaderboard for specified tournament and round.

    Default is to download current round.
    """
    tournament = nx.tournament_int(tournament)
    if round_number is None:
        napi = NumerAPI(verbosity='warn')
        num = napi.get_current_round()
    else:
        num = round_number
    df = download_raw_leaderboard(round_number=num, tournament=tournament)
    df = raw_leaderboard_to_df(df, num)
    df.insert(1, 'tournament', tournament)
    cols = ['usd_main', 'usd_stake', 'nmr_main', 'nmr_stake', 'nmr_burn']
    d = df[cols]
    total = d.abs().sum().sum()
    if total == 0:
        resolved = False
    else:
        resolved = True
    df.insert(2, 'resolved', resolved)
    return df
Ejemplo n.º 22
0
def test_download_current_dataset(api: NumerAPI):
    directory = None
    csv_files = ['numerai_tournament_data.csv', 'numerai_training_data.csv']

    try:
        path = api.download_current_dataset(unzip=True)
        assert os.path.exists(path)
        directory = path.replace(".zip", "")

        for csv_file in csv_files:
            final_path_name = os.path.join(directory, csv_file)
            assert os.path.exists(final_path_name)
    finally:
        if directory is not None:
            for csv_file in csv_files:
                os.remove(os.path.join(directory, csv_file))

            os.removedirs(os.path.join(directory, 'numerai_dataset'))
            os.remove('%s.zip' % directory)
Ejemplo n.º 23
0
from utils import (
    save_model,
    load_model,
    neutralize,
    get_biggest_change_features,
    validation_metrics,
    ERA_COL,
    DATA_TYPE_COL,
    TARGET_COL,
    EXAMPLE_PREDS_COL
)


# download all the things

napi = NumerAPI()

current_round = napi.get_current_round()

# Tournament data changes every week so we specify the round in their name. Training
# and validation data only change periodically, so no need to download them every time.
print('Downloading dataset files...')

Path("./v4").mkdir(parents=False, exist_ok=True)
napi.download_dataset("v4/train.parquet")
napi.download_dataset("v4/validation.parquet")
napi.download_dataset("v4/live.parquet", f"v4/live_{current_round}.parquet")
napi.download_dataset("v4/validation_example_preds.parquet")
napi.download_dataset("v4/features.json")

print('Reading minimal training data')
Ejemplo n.º 24
0
def getapi():
    return NumerAPI(getenv('NUMERAI_ID'), getenv('NUMERAI_SECRET'))
Ejemplo n.º 25
0
def test_error_handling_get_leaderboard_unknown_round_id(api: NumerAPI):
    # round that doesn't exist
    with pytest.raises(ValueError):
        api.get_leaderboard(-1)
Ejemplo n.º 26
0
def get_stakes(round_number=None,
               tournament=1,
               sort_by='prize pool',
               mark_user=None,
               use_integers=True):
    """
    Download stakes, modify it to make it more useful, return as dataframe.

    cumsum is dollars ABOVE you.
    """

    # get raw stakes
    napi = NumerAPI()
    query = '''
        query stakes($number: Int!
                     $tournament: Int!){
          rounds(number: $number
                 tournament: $tournament){
            leaderboard {
              username
              stake {
                insertedAt
                soc
                confidence
                value
              }
            }
          }
        }
    '''
    if round_number is None:
        round_number = 0
    elif round_number < 61:
        raise ValueError('First staking was in round 61')
    arguments = {'number': round_number, 'tournament': tournament}
    stakes = napi.raw_query(query, arguments)

    # massage raw stakes
    stakes = stakes['data']['rounds'][0]['leaderboard']
    stakes2 = []
    strptime = datetime.datetime.strptime
    now = datetime.datetime.utcnow()
    secperday = 24 * 60 * 60
    micperday = 1000000 * secperday
    for s in stakes:
        user = s['username']
        s = s['stake']
        if s['value'] is not None:
            s2 = {}
            s2['user'] = user
            s2['s'] = float(s['value'])
            s2['c'] = decimal.Decimal(s['confidence'])
            s2['soc'] = float(s['soc'])
            t = now - strptime(s['insertedAt'], '%Y-%m-%dT%H:%M:%S.%fZ')
            d = t.days
            d += 1.0 * t.seconds / secperday
            d += 1.0 * t.microseconds / micperday
            s2['days'] = d
            stakes2.append(s2)
    stakes = stakes2

    # jam stakes into a dataframe
    stakes = pd.DataFrame(stakes)
    stakes = stakes[['days', 's', 'soc', 'c', 'user']]

    # remove C=0 stakers
    c_zero_users = stakes.user[stakes.c == 0].tolist()
    stakes = stakes[stakes.c != 0]

    # index by user
    stakes = stakes.set_index('user')

    # sort in prize pool order; add s/c cumsum
    stakes = stakes.sort_values(['c', 'days'],
                                axis=0,
                                ascending=[False, False])
    cumsum = stakes.soc.cumsum(axis=0) - stakes.soc  # dollars above you
    stakes.insert(3, 'cumsum', cumsum)

    # other sorting
    if sort_by == 'prize pool':
        pass
    elif sort_by == 'c':
        stakes = stakes.sort_values(['c'], ascending=[False])
    elif sort_by == 's':
        stakes = stakes.sort_values(['s'], ascending=[False])
    elif sort_by == 'soc':
        stakes = stakes.sort_values(['soc'], ascending=[False])
    elif sort_by == 'days':
        stakes = stakes.sort_values(['days'], ascending=[True])
    elif sort_by == 'user':
        stakes = stakes.sort_values(['user'], ascending=[True])
    else:
        raise ValueError("`sort_by` key not recognized")

    # round stakes
    if use_integers:
        stakes['days'] = stakes['days'].round(4)
        stakes['s'] = stakes['s'].astype(int)
        stakes['soc'] = stakes['soc'].astype(int)
        stakes['cumsum'] = stakes['cumsum'].astype(int)

    # mark user
    if mark_user is not None and mark_user in stakes.index:
        stakes['mark'] = ''
        me = stakes.loc[mark_user]['days']
        idx = stakes.days < me
        stakes.loc[idx, 'mark'] = 'new'
        stakes.loc[mark_user, 'mark'] = '<<<<'

    return stakes, c_zero_users
Ejemplo n.º 27
0
def get_stakes_minimal(round_number=None, tournament=1, mark_user=None):
    "Download stakes, modify it to make it more useful, return as dataframe."

    tournament = nx.tournament_int(tournament)

    # get raw stakes
    napi = NumerAPI()
    query = '''
        query stakes($number: Int!
                     $tournament: Int!){
          rounds(number: $number
                 tournament: $tournament){
            leaderboard {
              username
              stake {
                insertedAt
                soc
                confidence
                value
              }
            }
          }
        }
    '''
    if round_number is None:
        round_number = 0
    elif round_number < 61:
        raise ValueError('First staking was in round 61')
    arguments = {'number': round_number, 'tournament': tournament}
    stakes = napi.raw_query(query, arguments)

    # massage raw stakes
    stakes = stakes['data']['rounds'][0]['leaderboard']
    stakes2 = []
    strptime = datetime.datetime.strptime
    now = datetime.datetime.utcnow()
    secperday = 24 * 60 * 60
    micperday = 1000000 * secperday
    for s in stakes:
        user = s['username']
        s = s['stake']
        if s['value'] is not None:
            s2 = {}
            s2['user'] = user
            s2['s'] = float(s['value'])
            s2['c'] = decimal.Decimal(s['confidence'])
            s2['soc'] = float(s['soc'])
            t = now - strptime(s['insertedAt'], '%Y-%m-%dT%H:%M:%S.%fZ')
            d = t.days
            d += 1.0 * t.seconds / secperday
            d += 1.0 * t.microseconds / micperday
            s2['days'] = d
            stakes2.append(s2)
    stakes = stakes2

    # jam stakes into a dataframe
    stakes = pd.DataFrame(stakes)
    stakes = stakes[['days', 's', 'soc', 'c', 'user']]

    # index by user
    stakes = stakes.set_index('user')

    # sort in prize pool order
    stakes = stakes.sort_values(['c', 'days'],
                                axis=0,
                                ascending=[False, False])

    # mark user
    if mark_user is not None and mark_user in stakes.index:
        stakes['mark'] = ''
        me = stakes.loc[mark_user]['days']
        idx = stakes.days < me
        stakes.loc[idx, 'mark'] = 'new'
        stakes.loc[mark_user, 'mark'] = '<<<<'

    return stakes
    "colsample_bytree": 0.1
}

# the amount of downsampling we'll use to speed up cross validation and full train.
# a value of 1 means no downsampling
# a value of 10 means use every 10th row
downsample_cross_val = 20
downsample_full_train = 2

# if model_selection_loop=True get OOS performance for training_data
# and use that to select best model
# if model_selection_loop=False, just predict on tournament data using existing models and model config
model_selection_loop = True
model_config_name = "advanced_example_model"

napi = NumerAPI()

current_round = napi.get_current_round()

Path("./v4").mkdir(parents=False, exist_ok=True)
napi.download_dataset("v4/train.parquet")
napi.download_dataset("v4/features.json")

print("Entering model selection loop.  This may take awhile.")
if model_selection_loop:
    model_config = {}
    print('reading training_data')
    training_data = pd.read_parquet('v4/train.parquet')

    # keep track of some prediction columns
    ensemble_cols = set()
Ejemplo n.º 29
0
def upload(filename,
           tournament,
           public_id,
           secret_key,
           block=True,
           n_tries=100,
           sleep_seconds=60,
           verbose=False,
           model_id=None):
    """
    Upload tournament submission (csv file) to Numerai.

    Accounts with multiple models must specify model_id

    If upload fails then retry upload `n_tries` times, pausing `sleep_seconds`
    between each try.

    If block is True (default) then the scope of your token must be both
    upload_submission and read_submission_info. If block is False then only
    upload_submission is needed.

    """
    tournament = nx.tournament_int(tournament)
    count = 0
    napi = NumerAPI(public_id=public_id,
                    secret_key=secret_key,
                    verbosity='warning')
    models = napi.get_models()
    if len(models) > 1 and model_id is None:
        raise Exception(
            f"Account has multiple models - you must specify model_id from {models}"
        )
    elif model_id and model_id not in models.values():
        raise Exception(
            f"Specified model_id {model_id} not found in account models {models}"
        )

    while count < n_tries:
        try:
            upload_id = napi.upload_predictions(filename,
                                                tournament=tournament,
                                                model_id=model_id)
            if block:
                status = status_block(upload_id, public_id, secret_key, model_id=model_id)
            else:
                status = upload_status(upload_id, public_id, secret_key, model_id=model_id)
            break

        except Exception as e:  # noqa
            if str(e).startswith("Can't update submission after deadline"):
                # Bailout with error message and do not retry uploads
                raise Exception(e)
            else:
                print('Upload exception - %s' % e)
                time.sleep(sleep_seconds)
        count += 1

    else:
        raise Exception('Upload failed after reaching max retries')

    return upload_id, status
Ejemplo n.º 30
0
def get_current_round_number(tournament):
    "Current round number as an integer."
    napi = NumerAPI(verbosity='warn')
    cr = napi.get_current_round(tournament=tournament)
    return cr