def test_get_competitions(): api = NumerAPI() # get all competions res = api.get_competitions() assert isinstance(res, list) assert len(res) > 80
def test_download_current_dataset(): api = NumerAPI() path = api.download_current_dataset(unzip=True) assert os.path.exists(path) directory = path.replace(".zip", "") filename = "numerai_tournament_data.csv" assert os.path.exists(os.path.join(directory, filename))
def test_stake(): api = NumerAPI() with pytest.raises(ValueError) as err: # while this won't work because we are not authorized, it still tells # us if the request is formatted correctly api.stake(3, 2) # error should warn about not beeing logged in. assert "You must be authenticated" in str(err.value)
def output(self): """ Saves outputs of this task--which is a csv file of the predictions made for the given data. """ self.apc = NumerAPI() fn = 'predictions_{0}_LogisticRegression.csv'.format( self.apc.get_current_round()) return luigi.LocalTarget(os.path.join(self.output_path, fn))
def run(self): napi = NumerAPI() while True: try: napi.download_current_dataset(dest_filename=self.output().path, unzip=False) break except Exception as exception: time.sleep(10) continue
def login(self): public_id = os.environ['PUBLIC_ID'] private_secret = os.environ['PRIVATE_SECRET'] while True: try: self.napi = NumerAPI(public_id, private_secret) break except Exception: self.logger.exception('Login failed') time.sleep(10)
def output(self): """ Manages the files to be written and determines their existence. This is determined by checking all the listed files below. If any of them does not exist, :py:func:`run` is evoked. :returns: A ``dict`` with the following keys: * ``zipfile``: original file as downloaded (``numerai_dataset_xxx.zip``) * ``training_data.csv``: the training data (``numerai_training_data.csv``) * ``tournament_data.csv``: the tournament data (``numerai_tournament_data.csv``) * ``example_predictions.csv``: example predictions (``example_predictions.csv``) Note that ``example_model.py`` and ``example_model.r`` are not referenced, as these are to no use for us. """ self.apc = NumerAPI() current_round = self.apc.get_current_round() dataset_name = "numerai_dataset_{0}.zip".format(current_round) dataset_dir = "numerai_dataset_{0}".format(current_round) assert self.apc.download_current_dataset(dest_path=self.output_path, dest_filename=dataset_name, unzip=True) # see numerapi download_current_dataset dataset_path = os.path.join(self.output_path, dataset_dir) test_data_path = os.path.join(dataset_path, 'numerai_training_data.csv') tournament_data_path = os.path.join(dataset_path, 'numerai_tournament_data.csv') example_data_path = os.path.join(dataset_path, 'example_predictions.csv') out = { 'zipfile': luigi.LocalTarget(os.path.join(self.output_path, dataset_name)), 'training_data.csv': luigi.LocalTarget(test_data_path), 'tournament_data.csv': luigi.LocalTarget(tournament_data_path), 'example_predictions.csv': luigi.LocalTarget(example_data_path) } print(out) return out
def test_error_handling(): api = NumerAPI() # String instead of Int with pytest.raises(ValueError): api.get_leaderboard("foo") # round that doesn't exist with pytest.raises(ValueError): api.get_leaderboard(-1) # unauthendicated request with pytest.raises(ValueError): # set wrong token api.token = ("foo", "bar") api.submission_id = 1 api.submission_status()
class SubmissionTarget(luigi.target.Target): """ Implements a submission target to "output" predictions from luigi tasks on the numer.ai servers. """ def __init__(self, path, public_id, secret): """ Creates a new SubmissionTarget. :param: path (str): local path to the predictions csv file :param: public_id (str): public_id as reported by the numer.ai website when creating API credentials :param: secret (str): secret as reported by the numer.ai website when creating API credentials """ self.path = path self.fn = os.path.split(path)[1] self.apc = NumerAPI(public_id, secret) def exists(self): """ Checks if a submission for the file named :py:attr:`path` was uploaded. NB: the filename as reported by the server is appended by a random string (before the file extension), and we can just access the file that was submitted last. This might result in double uploads for the same file. """ qry = "query user { user { latestSubmission { filename } } }" res = self.apc.raw_query(qry, authorization=True) data = res['data']['user']['latestSubmission'] for d in data: if d['filename'].startswith(self.fn.replace('.csv', '')): return True return False def submit(self): """ Submits the predictions to the numer.ai servers and tries to report back the status. """ ret = self.apc.upload_predictions(self.path) print(self.apc.submission_status())
def getCSV(hmpath=homepath()): napi = NumerAPI(verbosity="info") ps = Parser(hmpath) napi.download_current_dataset(dest_path=ps.datapath, unzip=True) r, d, ff = os.walk(ps.datapath) dp = op.join(r, d[0]) for fn in os.listdir(dp): if fn.endswith('.csv'): os.rename(op.join(dp, fn), op.join(r, fn)) os.removedirs(dp) fff = [f for f in ff if f.endswith(".zip")] for fb in fff: os.remove(fb) return ps
def __init__(self, path, public_id, secret): """ Creates a new SubmissionTarget. :param: path (str): local path to the predictions csv file :param: public_id (str): public_id as reported by the numer.ai website when creating API credentials :param: secret (str): secret as reported by the numer.ai website when creating API credentials """ self.path = path self.fn = os.path.split(path)[1] self.apc = NumerAPI(public_id, secret)
class TrainAndPredict(luigi.Task): """ Trains a naïve bayes classifier with an assumed bernoulli distribution of the features, then predicts the targets on the tournament data. The default signature of this task is ``TrainAndPredict(output_path='./data')``. :param: output_path (str): path to the directory where the predictions shall be saved to, defaults to ``./data``. """ output_path = luigi.Parameter(default='./data/') def requires(self): """ Dependencies to be fullfiled prior to execution. This task needs the :py:class:`tasks.numerai_fetch_training_data.FetchAndExtractData` task that provides the training/tournament data. """ return FetchAndExtractData(output_path=self.output_path) def output(self): """ Saves outputs of this task--which is a csv file of the predictions made for the given data. """ self.apc = NumerAPI() fn = 'predictions_{0}_LogisticRegression.csv'.format( self.apc.get_current_round()) return luigi.LocalTarget(os.path.join(self.output_path, fn)) def run(self): """ Trains a model and makes predictions given the data. These are then saved to a csv file. """ data = self.input() out = self.output() training_data = pd.read_csv(data['training_data.csv'].path, header=0) prediction_data = pd.read_csv(data['tournament_data.csv'].path, header=0) # Transform the loaded CSV data into numpy arrays features = [f for f in list(training_data) if "feature" in f] X = training_data[features] Y = training_data["target"] x_prediction = prediction_data[features] ids = prediction_data["id"] # This is your model that will learn to predict model = linear_model.LogisticRegression(n_jobs=-1) # Your model is trained on the training_data model.fit(X, Y) # Your trained model is now used to make predictions on the # numerai_tournament_data # The model returns two columns: [probability of 0, probability of 1] # We are just interested in the probability that the target is 1. y_prediction = model.predict_proba(x_prediction) results = y_prediction[:, 1] results_df = pd.DataFrame(data={'probability': results}) joined = pd.DataFrame(ids).join(results_df) print("Writing predictions to predictions.csv") # Save the predictions out to a CSV file joined.to_csv("predictions.csv", index=False) y_prediction = model.predict_proba(x_prediction) results = y_prediction[:, 1] results_df = pd.DataFrame(data={'probability': results}) joined = pd.DataFrame(ids).join(results_df) print("Writing predictions to predictions.csv") # Save the predictions out to a CSV file joined.to_csv(out.path, index=False)
def download_new_dataset(): napi = NumerAPI() print("Downloading the current dataset...") napi.download_current_dataset(dest_path=os.path.join(definitions.DATA_DIR, 'raw'), unzip=True)
def test_get_submission_ids(api: NumerAPI): ids = api.get_submission_ids() assert ids assert isinstance(ids, dict)
def test_get_submission_ids(): api = NumerAPI() ids = api.get_submission_ids() assert len(ids) > 0 assert isinstance(ids, dict)
def fixture_for_api(): public_id = os.environ.get('NUMERAI_PUBLIC_ID', None) secret_key = os.environ.get('NUMERAI_SECRET_KEY', None) return NumerAPI(public_id=public_id, secret_key=secret_key, verbosity='DEBUG')
def test_get_staking_leaderboard(): api = NumerAPI() stakes = api.get_staking_leaderboard(82) # 115 people staked that round assert len(stakes) == 115
def main(): # set example username and round example_username = "******" example_round = 51 # set up paths for download of dataset and upload of predictions now = datetime.now().strftime("%Y%m%d") dataset_parent_folder = "./dataset" dataset_name = "numerai_dataset_{0}/example_predictions.csv".format(now) dataset_path = "{0}/{1}".format(dataset_parent_folder, dataset_name) # most API calls do not require logging in napi = NumerAPI(verbosity="info") # log in credentials = napi.login() print(json.dumps(credentials, indent=2)) # download current dataset dl_succeeded = napi.download_current_dataset( dest_path=dataset_parent_folder, unzip=True) print("download succeeded: " + str(dl_succeeded)) # get competitions (returned data is too long to print practically) # all_competitions = napi.get_all_competitions() # current_competition = napi.get_competition() # example_competition = napi.get_competition(round_id=example_round) # get user earnings per round user_earnings = napi.get_earnings_per_round() print("user earnings:") print(user_earnings) example_earnings = napi.get_earnings_per_round(username=example_username) print("example earnings:") print(example_earnings) # get scores for user personal_scores = napi.get_scores_for_user() print("personal scores:") print(personal_scores) other_scores = napi.get_scores_for_user(username=example_username) print("other scores:") print(other_scores) # get user information current_user = napi.get_user() print("current user:"******"example user:"******"submission:") print(json.dumps(submission, indent=2)) # upload predictions ul_succeeded = napi.upload_predictions(dataset_path) print("upload succeeded: " + str(ul_succeeded))
def test_get_leaderboard(): api = NumerAPI() lb = api.get_leaderboard(67) assert len(lb) == 1425
def test_raw_query(): api = NumerAPI() query = "query {dataset}" result = api.raw_query(query) assert isinstance(result, dict) assert "data" in result
def test_get_current_round(): api = NumerAPI() current_round = api.get_current_round() assert current_round >= 82
class Evaluator(threading.Thread): def __init__(self): super(Evaluator, self).__init__() self.logger = logging.getLogger('evaluator') self.logger.setLevel(logging.DEBUG) self.login() def login(self): public_id = os.environ['PUBLIC_ID'] private_secret = os.environ['PRIVATE_SECRET'] while True: try: self.napi = NumerAPI(public_id, private_secret) break except Exception: self.logger.exception('Login failed') time.sleep(10) def upload(self, prediction): while True: try: self.logger.info('Uploading prediction: {}'.format(prediction)) self.napi.upload_predictions(file_path=prediction) self.logger.info('Uploaded prediction: {}'.format(prediction)) break except requests.exceptions.HTTPError as error: if error.response.status_code == 429: self.logger.info('Backing off') time.sleep(30 * 60) else: self.logger.exception('Network failure') time.sleep(60) except Exception as exception: self.logger.exception('Upload failure') time.sleep(10) def check(self, prediction): while True: try: self.logger.info('Checking submission: {}'.format(prediction)) status = self.napi.submission_status() self.logger.info('Got {}: {}'.format(prediction, str(status))) logloss_ready = status['validation_logloss'] is not None concordance_ready = not status['concordance']['pending'] originality_ready = not status['originality']['pending'] if logloss_ready and concordance_ready and originality_ready: return status else: time.sleep(10) except Exception: self.logger.exception('Checking submission failed') time.sleep(10) def report(self, prediction, status): result = { 'logloss': status['validation_logloss'], 'consistency': status['consistency'], 'concordance': status['concordance']['value'], 'originality': status['originality']['value'] } with open(prediction + '.report.json', 'wb') as handle: handle.write(json.dumps(result).encode('utf-8')) def submit(self, prediction): self.upload(prediction) time.sleep(5) self.report(prediction, self.check(prediction)) def run(self): captor = Captor(os.getenv('STORING'), 'predictions*.csv') while True: while not captor.empty(): prediction = captor.grab() if not os.path.isfile(prediction + '.report.json'): self.submit(prediction) time.sleep(1)
def test_get_current_round(api: NumerAPI): current_round = api.get_current_round() assert current_round >= 82
from h2o.grid.grid_search import H2OGridSearch from numerapi.numerapi import NumerAPI from flatten_json import flatten from time import sleep from slacker import Slacker # In[186]: #setup slackbot token = 'slack_token' slack = Slacker(token) # In[187]: #setup numerai API napi = NumerAPI() napi.credentials = ('email', 'password') username = '******' # In[188]: h2o.init() #h2o.remove_all() # In[189]: #download dataset napi.download_current_dataset(dest_path='.', unzip=True) # In[190]:
def test_get_leaderboard(api: NumerAPI): lb = api.get_leaderboard(67) assert len(lb) == 1425
def main(): # set example username and round example_public_id = "somepublicid" example_secret_key = "somesecretkey" # some API calls do not require logging in napi = NumerAPI(verbosity="info") # download current dataset napi.download_current_dataset(unzip=True) # get competitions all_competitions = napi.get_competitions() # get leaderboard for the current round leaderboard = napi.get_leaderboard() # leaderboard for a historic round leaderboard_67 = napi.get_leaderboard(round_num=67) # provide api tokens napi = NumerAPI(example_public_id, example_secret_key) # upload predictions submission_id = napi.upload_predictions("mypredictions.csv") # check submission status napi.submission_status()
#!/usr/bin/env python from numerapi.numerapi import NumerAPI # Most API calls don't require logging in: napi = NumerAPI() print("Downloading the current dataset...") napi.download_current_dataset(dest_path='.', unzip=True) # User-specific information username = '******' print("Getting information about user {}...".format(username)) print(napi.get_user(username)) print(napi.get_scores(username)) print(napi.get_earnings_per_round(username)) # Get the leaderboard for the current round of the competition print(napi.get_new_leaderboard()) # Get the leaderboard for previous rounds of the competition print(napi.get_new_leaderboard(40)) # Uploading predicitons to your account require your credentials: # napi.credentials = ("YOUR_EMAIL", "YOUR_PASSWORD") # napi.upload_prediction('./numerai_datasets/example_predictions.csv')
class FetchAndExtractData(luigi.Task): """ Fetches the most recent dataset and extracts the contents to the given path if not yet done (default path is ``./data``). :param: output_path: (relative) path where the data should be written to. Defaults to ``./data``. Default signature is ``FetchAndExtractData(output_path='./data')``. :: data ├── numerai_dataset_95 │ ├── example_model.py │ ├── example_model.r │ ├── example_predictions.csv │ ├── numerai_tournament_data.csv │ └── numerai_training_data.csv └── numerai_dataset_95.zip """ output_path = luigi.Parameter(default='./data/') def output(self): """ Manages the files to be written and determines their existence. This is determined by checking all the listed files below. If any of them does not exist, :py:func:`run` is evoked. :returns: A ``dict`` with the following keys: * ``zipfile``: original file as downloaded (``numerai_dataset_xxx.zip``) * ``training_data.csv``: the training data (``numerai_training_data.csv``) * ``tournament_data.csv``: the tournament data (``numerai_tournament_data.csv``) * ``example_predictions.csv``: example predictions (``example_predictions.csv``) Note that ``example_model.py`` and ``example_model.r`` are not referenced, as these are to no use for us. """ self.apc = NumerAPI() current_round = self.apc.get_current_round() dataset_name = "numerai_dataset_{0}.zip".format(current_round) dataset_dir = "numerai_dataset_{0}".format(current_round) assert self.apc.download_current_dataset(dest_path=self.output_path, dest_filename=dataset_name, unzip=True) # see numerapi download_current_dataset dataset_path = os.path.join(self.output_path, dataset_dir) test_data_path = os.path.join(dataset_path, 'numerai_training_data.csv') tournament_data_path = os.path.join(dataset_path, 'numerai_tournament_data.csv') example_data_path = os.path.join(dataset_path, 'example_predictions.csv') out = { 'zipfile': luigi.LocalTarget(os.path.join(self.output_path, dataset_name)), 'training_data.csv': luigi.LocalTarget(test_data_path), 'tournament_data.csv': luigi.LocalTarget(tournament_data_path), 'example_predictions.csv': luigi.LocalTarget(example_data_path) } print(out) return out def run(self): out = self.output()