def test_get_competitions():
    api = NumerAPI()

    # get all competions
    res = api.get_competitions()
    assert isinstance(res, list)
    assert len(res) > 80
def test_download_current_dataset():
    api = NumerAPI()
    path = api.download_current_dataset(unzip=True)
    assert os.path.exists(path)

    directory = path.replace(".zip", "")
    filename = "numerai_tournament_data.csv"
    assert os.path.exists(os.path.join(directory, filename))
def test_stake():
    api = NumerAPI()
    with pytest.raises(ValueError) as err:
        # while this won't work because we are not authorized, it still tells
        # us if the request is formatted correctly
        api.stake(3, 2)
    # error should warn about not beeing logged in.
    assert "You must be authenticated" in str(err.value)
Example #4
0
 def output(self):
     """
     Saves outputs of this task--which is a csv file of the predictions made for the
     given data.
     """
     self.apc = NumerAPI()
     fn = 'predictions_{0}_LogisticRegression.csv'.format(
         self.apc.get_current_round())
     return luigi.LocalTarget(os.path.join(self.output_path, fn))
Example #5
0
 def run(self):
     napi = NumerAPI()
     while True:
         try:
             napi.download_current_dataset(dest_filename=self.output().path,
                                           unzip=False)
             break
         except Exception as exception:
             time.sleep(10)
             continue
Example #6
0
 def login(self):
     public_id = os.environ['PUBLIC_ID']
     private_secret = os.environ['PRIVATE_SECRET']
     while True:
         try:
             self.napi = NumerAPI(public_id, private_secret)
             break
         except Exception:
             self.logger.exception('Login failed')
             time.sleep(10)
    def output(self):
        """
        Manages the files to be written and determines their existence.
        This is determined by checking all the listed files below. If any
        of them does not exist, :py:func:`run` is evoked.

        :returns:
            A ``dict`` with the following keys:

            * ``zipfile``: original file as downloaded
            (``numerai_dataset_xxx.zip``)
            * ``training_data.csv``: the training data
            (``numerai_training_data.csv``)
            * ``tournament_data.csv``: the tournament data
            (``numerai_tournament_data.csv``)
            * ``example_predictions.csv``: example predictions
            (``example_predictions.csv``)

            Note that ``example_model.py`` and ``example_model.r`` are not referenced,
            as these are to no use for us.
        """
        self.apc = NumerAPI()

        current_round = self.apc.get_current_round()
        dataset_name = "numerai_dataset_{0}.zip".format(current_round)
        dataset_dir = "numerai_dataset_{0}".format(current_round)

        assert self.apc.download_current_dataset(dest_path=self.output_path,
                                                 dest_filename=dataset_name,
                                                 unzip=True)

        # see numerapi download_current_dataset
        dataset_path = os.path.join(self.output_path, dataset_dir)

        test_data_path = os.path.join(dataset_path,
                                      'numerai_training_data.csv')
        tournament_data_path = os.path.join(dataset_path,
                                            'numerai_tournament_data.csv')
        example_data_path = os.path.join(dataset_path,
                                         'example_predictions.csv')

        out = {
            'zipfile':
            luigi.LocalTarget(os.path.join(self.output_path, dataset_name)),
            'training_data.csv':
            luigi.LocalTarget(test_data_path),
            'tournament_data.csv':
            luigi.LocalTarget(tournament_data_path),
            'example_predictions.csv':
            luigi.LocalTarget(example_data_path)
        }
        print(out)
        return out
def test_error_handling():
    api = NumerAPI()
    # String instead of Int
    with pytest.raises(ValueError):
        api.get_leaderboard("foo")
    # round that doesn't exist
    with pytest.raises(ValueError):
        api.get_leaderboard(-1)
    # unauthendicated request
    with pytest.raises(ValueError):
        # set wrong token
        api.token = ("foo", "bar")
        api.submission_id = 1
        api.submission_status()
Example #9
0
class SubmissionTarget(luigi.target.Target):
    """
    Implements a submission target to "output" predictions from luigi tasks on
    the numer.ai servers.
    """
    def __init__(self, path, public_id, secret):
        """
        Creates a new SubmissionTarget.

        :param: path (str):
            local path to the predictions csv file
        :param: public_id (str):
            public_id as reported by the numer.ai website when creating API
            credentials
        :param: secret (str):
            secret as reported by the numer.ai website when creating API
            credentials
        """
        self.path = path
        self.fn = os.path.split(path)[1]
        self.apc = NumerAPI(public_id, secret)

    def exists(self):
        """
        Checks if a submission for the file named :py:attr:`path` was uploaded.

        NB: the filename as reported by the server is appended by a random
        string (before the file extension), and we can just access the file
        that was submitted last. This might result in double uploads for the
        same file.
        """
        qry = "query user { user { latestSubmission { filename } } }"
        res = self.apc.raw_query(qry, authorization=True)

        data = res['data']['user']['latestSubmission']

        for d in data:
            if d['filename'].startswith(self.fn.replace('.csv', '')):
                return True

        return False

    def submit(self):
        """
        Submits the predictions to the numer.ai servers and tries to report
        back the status.
        """
        ret = self.apc.upload_predictions(self.path)
        print(self.apc.submission_status())
Example #10
0
def getCSV(hmpath=homepath()):
    napi = NumerAPI(verbosity="info")
    ps = Parser(hmpath)
    napi.download_current_dataset(dest_path=ps.datapath, unzip=True)
    r, d, ff = os.walk(ps.datapath)
    dp = op.join(r, d[0])
    for fn in os.listdir(dp):
        if fn.endswith('.csv'):
            os.rename(op.join(dp, fn), op.join(r, fn))

    os.removedirs(dp)
    fff = [f for f in ff if f.endswith(".zip")]
    for fb in fff:
        os.remove(fb)

    return ps
Example #11
0
    def __init__(self, path, public_id, secret):
        """
        Creates a new SubmissionTarget.

        :param: path (str):
            local path to the predictions csv file
        :param: public_id (str):
            public_id as reported by the numer.ai website when creating API
            credentials
        :param: secret (str):
            secret as reported by the numer.ai website when creating API
            credentials
        """
        self.path = path
        self.fn = os.path.split(path)[1]
        self.apc = NumerAPI(public_id, secret)
Example #12
0
class TrainAndPredict(luigi.Task):
    """
    Trains a naïve bayes classifier with an assumed bernoulli distribution of
    the features, then predicts the targets on the tournament data.
    The default signature of this task is ``TrainAndPredict(output_path='./data')``.

    :param: output_path (str):
        path to the directory where the predictions shall be saved to, defaults to
        ``./data``.
    """
    output_path = luigi.Parameter(default='./data/')

    def requires(self):
        """
        Dependencies to be fullfiled prior to execution. This task needs the
        :py:class:`tasks.numerai_fetch_training_data.FetchAndExtractData` task that provides
        the training/tournament data.
        """
        return FetchAndExtractData(output_path=self.output_path)

    def output(self):
        """
        Saves outputs of this task--which is a csv file of the predictions made for the
        given data.
        """
        self.apc = NumerAPI()
        fn = 'predictions_{0}_LogisticRegression.csv'.format(
            self.apc.get_current_round())
        return luigi.LocalTarget(os.path.join(self.output_path, fn))

    def run(self):
        """
        Trains a model and makes predictions given the data. These are then saved
        to a csv file.
        """
        data = self.input()
        out = self.output()

        training_data = pd.read_csv(data['training_data.csv'].path, header=0)
        prediction_data = pd.read_csv(data['tournament_data.csv'].path,
                                      header=0)

        # Transform the loaded CSV data into numpy arrays
        features = [f for f in list(training_data) if "feature" in f]
        X = training_data[features]
        Y = training_data["target"]
        x_prediction = prediction_data[features]
        ids = prediction_data["id"]

        # This is your model that will learn to predict
        model = linear_model.LogisticRegression(n_jobs=-1)

        # Your model is trained on the training_data
        model.fit(X, Y)

        # Your trained model is now used to make predictions on the
        # numerai_tournament_data
        # The model returns two columns: [probability of 0, probability of 1]
        # We are just interested in the probability that the target is 1.
        y_prediction = model.predict_proba(x_prediction)
        results = y_prediction[:, 1]
        results_df = pd.DataFrame(data={'probability': results})
        joined = pd.DataFrame(ids).join(results_df)

        print("Writing predictions to predictions.csv")
        # Save the predictions out to a CSV file
        joined.to_csv("predictions.csv", index=False)
        y_prediction = model.predict_proba(x_prediction)
        results = y_prediction[:, 1]
        results_df = pd.DataFrame(data={'probability': results})
        joined = pd.DataFrame(ids).join(results_df)

        print("Writing predictions to predictions.csv")
        # Save the predictions out to a CSV file
        joined.to_csv(out.path, index=False)
Example #13
0
def download_new_dataset():
    napi = NumerAPI()
    print("Downloading the current dataset...")
    napi.download_current_dataset(dest_path=os.path.join(definitions.DATA_DIR, 'raw'), unzip=True)
Example #14
0
def test_get_submission_ids(api: NumerAPI):
    ids = api.get_submission_ids()
    assert ids
    assert isinstance(ids, dict)
Example #15
0
def test_get_submission_ids():
    api = NumerAPI()
    ids = api.get_submission_ids()
    assert len(ids) > 0
    assert isinstance(ids, dict)
Example #16
0
def fixture_for_api():
    public_id = os.environ.get('NUMERAI_PUBLIC_ID', None)
    secret_key = os.environ.get('NUMERAI_SECRET_KEY', None)
    return NumerAPI(public_id=public_id,
                    secret_key=secret_key,
                    verbosity='DEBUG')
Example #17
0
def test_get_staking_leaderboard():
    api = NumerAPI()
    stakes = api.get_staking_leaderboard(82)
    # 115 people staked that round
    assert len(stakes) == 115
Example #18
0
def main():
    # set example username and round
    example_username = "******"
    example_round = 51

    # set up paths for download of dataset and upload of predictions
    now = datetime.now().strftime("%Y%m%d")
    dataset_parent_folder = "./dataset"
    dataset_name = "numerai_dataset_{0}/example_predictions.csv".format(now)
    dataset_path = "{0}/{1}".format(dataset_parent_folder, dataset_name)

    # most API calls do not require logging in
    napi = NumerAPI(verbosity="info")

    # log in
    credentials = napi.login()
    print(json.dumps(credentials, indent=2))

    # download current dataset
    dl_succeeded = napi.download_current_dataset(
        dest_path=dataset_parent_folder, unzip=True)
    print("download succeeded: " + str(dl_succeeded))

    # get competitions (returned data is too long to print practically)
    # all_competitions = napi.get_all_competitions()
    # current_competition = napi.get_competition()
    # example_competition = napi.get_competition(round_id=example_round)

    # get user earnings per round
    user_earnings = napi.get_earnings_per_round()
    print("user earnings:")
    print(user_earnings)
    example_earnings = napi.get_earnings_per_round(username=example_username)
    print("example earnings:")
    print(example_earnings)

    # get scores for user
    personal_scores = napi.get_scores_for_user()
    print("personal scores:")
    print(personal_scores)
    other_scores = napi.get_scores_for_user(username=example_username)
    print("other scores:")
    print(other_scores)

    # get user information
    current_user = napi.get_user()
    print("current user:"******"example user:"******"submission:")
    print(json.dumps(submission, indent=2))

    # upload predictions
    ul_succeeded = napi.upload_predictions(dataset_path)
    print("upload succeeded: " + str(ul_succeeded))
Example #19
0
def test_get_leaderboard():
    api = NumerAPI()
    lb = api.get_leaderboard(67)
    assert len(lb) == 1425
Example #20
0
def test_raw_query():
    api = NumerAPI()
    query = "query {dataset}"
    result = api.raw_query(query)
    assert isinstance(result, dict)
    assert "data" in result
Example #21
0
def test_get_current_round():
    api = NumerAPI()
    current_round = api.get_current_round()
    assert current_round >= 82
Example #22
0
class Evaluator(threading.Thread):
    def __init__(self):
        super(Evaluator, self).__init__()
        self.logger = logging.getLogger('evaluator')
        self.logger.setLevel(logging.DEBUG)
        self.login()

    def login(self):
        public_id = os.environ['PUBLIC_ID']
        private_secret = os.environ['PRIVATE_SECRET']
        while True:
            try:
                self.napi = NumerAPI(public_id, private_secret)
                break
            except Exception:
                self.logger.exception('Login failed')
                time.sleep(10)

    def upload(self, prediction):
        while True:
            try:
                self.logger.info('Uploading prediction: {}'.format(prediction))
                self.napi.upload_predictions(file_path=prediction)
                self.logger.info('Uploaded prediction: {}'.format(prediction))
                break
            except requests.exceptions.HTTPError as error:
                if error.response.status_code == 429:
                    self.logger.info('Backing off')
                    time.sleep(30 * 60)
                else:
                    self.logger.exception('Network failure')
                    time.sleep(60)
            except Exception as exception:
                self.logger.exception('Upload failure')
                time.sleep(10)

    def check(self, prediction):
        while True:
            try:
                self.logger.info('Checking submission: {}'.format(prediction))
                status = self.napi.submission_status()
                self.logger.info('Got {}: {}'.format(prediction, str(status)))
                logloss_ready = status['validation_logloss'] is not None
                concordance_ready = not status['concordance']['pending']
                originality_ready = not status['originality']['pending']
                if logloss_ready and concordance_ready and originality_ready:
                    return status
                else:
                    time.sleep(10)
            except Exception:
                self.logger.exception('Checking submission failed')
                time.sleep(10)

    def report(self, prediction, status):
        result = {
            'logloss': status['validation_logloss'],
            'consistency': status['consistency'],
            'concordance': status['concordance']['value'],
            'originality': status['originality']['value']
        }
        with open(prediction + '.report.json', 'wb') as handle:
            handle.write(json.dumps(result).encode('utf-8'))

    def submit(self, prediction):
        self.upload(prediction)
        time.sleep(5)
        self.report(prediction, self.check(prediction))

    def run(self):
        captor = Captor(os.getenv('STORING'), 'predictions*.csv')
        while True:
            while not captor.empty():
                prediction = captor.grab()
                if not os.path.isfile(prediction + '.report.json'):
                    self.submit(prediction)
            time.sleep(1)
Example #23
0
def test_get_current_round(api: NumerAPI):
    current_round = api.get_current_round()
    assert current_round >= 82
Example #24
0
from h2o.grid.grid_search import H2OGridSearch
from numerapi.numerapi import NumerAPI
from flatten_json import flatten
from time import sleep
from slacker import Slacker

# In[186]:

#setup slackbot
token = 'slack_token'
slack = Slacker(token)

# In[187]:

#setup numerai API
napi = NumerAPI()
napi.credentials = ('email', 'password')
username = '******'

# In[188]:

h2o.init()
#h2o.remove_all()

# In[189]:

#download dataset
napi.download_current_dataset(dest_path='.', unzip=True)

# In[190]:
Example #25
0
def test_get_leaderboard(api: NumerAPI):
    lb = api.get_leaderboard(67)
    assert len(lb) == 1425
Example #26
0
def main():
    # set example username and round
    example_public_id = "somepublicid"
    example_secret_key = "somesecretkey"

    # some API calls do not require logging in
    napi = NumerAPI(verbosity="info")
    # download current dataset
    napi.download_current_dataset(unzip=True)
    # get competitions
    all_competitions = napi.get_competitions()
    # get leaderboard for the current round
    leaderboard = napi.get_leaderboard()
    # leaderboard for a historic round
    leaderboard_67 = napi.get_leaderboard(round_num=67)

    # provide api tokens
    napi = NumerAPI(example_public_id, example_secret_key)

    # upload predictions
    submission_id = napi.upload_predictions("mypredictions.csv")
    # check submission status
    napi.submission_status()
Example #27
0
#!/usr/bin/env python

from numerapi.numerapi import NumerAPI

# Most API calls don't require logging in:
napi = NumerAPI()

print("Downloading the current dataset...")
napi.download_current_dataset(dest_path='.', unzip=True)

# User-specific information
username = '******'
print("Getting information about user {}...".format(username))
print(napi.get_user(username))
print(napi.get_scores(username))
print(napi.get_earnings_per_round(username))

# Get the leaderboard for the current round of the competition
print(napi.get_new_leaderboard())

# Get the leaderboard for previous rounds of the competition
print(napi.get_new_leaderboard(40))

# Uploading predicitons to your account require your credentials:
# napi.credentials = ("YOUR_EMAIL", "YOUR_PASSWORD")
# napi.upload_prediction('./numerai_datasets/example_predictions.csv')
class FetchAndExtractData(luigi.Task):
    """
    Fetches the most recent dataset and extracts the contents to the given
    path if not yet done (default path is ``./data``).

    :param: output_path:
        (relative) path where the data should be written to. Defaults to
        ``./data``. Default signature is
        ``FetchAndExtractData(output_path='./data')``.

    ::

        data
        ├── numerai_dataset_95
        │   ├── example_model.py
        │   ├── example_model.r
        │   ├── example_predictions.csv
        │   ├── numerai_tournament_data.csv
        │   └── numerai_training_data.csv
        └── numerai_dataset_95.zip

    """
    output_path = luigi.Parameter(default='./data/')

    def output(self):
        """
        Manages the files to be written and determines their existence.
        This is determined by checking all the listed files below. If any
        of them does not exist, :py:func:`run` is evoked.

        :returns:
            A ``dict`` with the following keys:

            * ``zipfile``: original file as downloaded
            (``numerai_dataset_xxx.zip``)
            * ``training_data.csv``: the training data
            (``numerai_training_data.csv``)
            * ``tournament_data.csv``: the tournament data
            (``numerai_tournament_data.csv``)
            * ``example_predictions.csv``: example predictions
            (``example_predictions.csv``)

            Note that ``example_model.py`` and ``example_model.r`` are not referenced,
            as these are to no use for us.
        """
        self.apc = NumerAPI()

        current_round = self.apc.get_current_round()
        dataset_name = "numerai_dataset_{0}.zip".format(current_round)
        dataset_dir = "numerai_dataset_{0}".format(current_round)

        assert self.apc.download_current_dataset(dest_path=self.output_path,
                                                 dest_filename=dataset_name,
                                                 unzip=True)

        # see numerapi download_current_dataset
        dataset_path = os.path.join(self.output_path, dataset_dir)

        test_data_path = os.path.join(dataset_path,
                                      'numerai_training_data.csv')
        tournament_data_path = os.path.join(dataset_path,
                                            'numerai_tournament_data.csv')
        example_data_path = os.path.join(dataset_path,
                                         'example_predictions.csv')

        out = {
            'zipfile':
            luigi.LocalTarget(os.path.join(self.output_path, dataset_name)),
            'training_data.csv':
            luigi.LocalTarget(test_data_path),
            'tournament_data.csv':
            luigi.LocalTarget(tournament_data_path),
            'example_predictions.csv':
            luigi.LocalTarget(example_data_path)
        }
        print(out)
        return out

    def run(self):
        out = self.output()