def run(self): napi = NumerAPI() while True: try: napi.download_current_dataset(dest_filename=self.output().path, unzip=False) break except Exception as exception: time.sleep(10) continue
def getCSV(hmpath=homepath()): napi = NumerAPI(verbosity="info") ps = Parser(hmpath) napi.download_current_dataset(dest_path=ps.datapath, unzip=True) r, d, ff = os.walk(ps.datapath) dp = op.join(r, d[0]) for fn in os.listdir(dp): if fn.endswith('.csv'): os.rename(op.join(dp, fn), op.join(r, fn)) os.removedirs(dp) fff = [f for f in ff if f.endswith(".zip")] for fb in fff: os.remove(fb) return ps
def test_download_current_dataset(): api = NumerAPI() path = api.download_current_dataset(unzip=True) assert os.path.exists(path) directory = path.replace(".zip", "") filename = "numerai_tournament_data.csv" assert os.path.exists(os.path.join(directory, filename))
def main(): # set example username and round example_public_id = "somepublicid" example_secret_key = "somesecretkey" # some API calls do not require logging in napi = NumerAPI(verbosity="info") # download current dataset napi.download_current_dataset(unzip=True) # get competitions all_competitions = napi.get_competitions() # get leaderboard for the current round leaderboard = napi.get_leaderboard() # leaderboard for a historic round leaderboard_67 = napi.get_leaderboard(round_num=67) # provide api tokens napi = NumerAPI(example_public_id, example_secret_key) # upload predictions submission_id = napi.upload_predictions("mypredictions.csv") # check submission status napi.submission_status()
def download_new_dataset(): napi = NumerAPI() print("Downloading the current dataset...") napi.download_current_dataset(dest_path=os.path.join(definitions.DATA_DIR, 'raw'), unzip=True)
# In[187]: #setup numerai API napi = NumerAPI() napi.credentials = ('email', 'password') username = '******' # In[188]: h2o.init() #h2o.remove_all() # In[189]: #download dataset napi.download_current_dataset(dest_path='.', unzip=True) # In[190]: #read data into pandas train = pd.read_csv('numerai_training_data.csv') tournament = pd.read_csv('numerai_tournament_data.csv') valid = tournament[tournament['data_type'] == 'validation'] # In[ ]: #drop un-needed columns valid.drop(['id', 'data_type', 'era'], axis=1, inplace=True) train.drop(['id', 'data_type', 'era'], axis=1, inplace=True) tournament.drop(['data_type', 'era'], axis=1, inplace=True)
class FetchAndExtractData(luigi.Task): """ Fetches the most recent dataset and extracts the contents to the given path if not yet done (default path is ``./data``). :param: output_path: (relative) path where the data should be written to. Defaults to ``./data``. Default signature is ``FetchAndExtractData(output_path='./data')``. :: data ├── numerai_dataset_95 │ ├── example_model.py │ ├── example_model.r │ ├── example_predictions.csv │ ├── numerai_tournament_data.csv │ └── numerai_training_data.csv └── numerai_dataset_95.zip """ output_path = luigi.Parameter(default='./data/') def output(self): """ Manages the files to be written and determines their existence. This is determined by checking all the listed files below. If any of them does not exist, :py:func:`run` is evoked. :returns: A ``dict`` with the following keys: * ``zipfile``: original file as downloaded (``numerai_dataset_xxx.zip``) * ``training_data.csv``: the training data (``numerai_training_data.csv``) * ``tournament_data.csv``: the tournament data (``numerai_tournament_data.csv``) * ``example_predictions.csv``: example predictions (``example_predictions.csv``) Note that ``example_model.py`` and ``example_model.r`` are not referenced, as these are to no use for us. """ self.apc = NumerAPI() current_round = self.apc.get_current_round() dataset_name = "numerai_dataset_{0}.zip".format(current_round) dataset_dir = "numerai_dataset_{0}".format(current_round) assert self.apc.download_current_dataset(dest_path=self.output_path, dest_filename=dataset_name, unzip=True) # see numerapi download_current_dataset dataset_path = os.path.join(self.output_path, dataset_dir) test_data_path = os.path.join(dataset_path, 'numerai_training_data.csv') tournament_data_path = os.path.join(dataset_path, 'numerai_tournament_data.csv') example_data_path = os.path.join(dataset_path, 'example_predictions.csv') out = { 'zipfile': luigi.LocalTarget(os.path.join(self.output_path, dataset_name)), 'training_data.csv': luigi.LocalTarget(test_data_path), 'tournament_data.csv': luigi.LocalTarget(tournament_data_path), 'example_predictions.csv': luigi.LocalTarget(example_data_path) } print(out) return out def run(self): out = self.output()
def main(): # set example username and round example_username = "******" example_round = 51 # set up paths for download of dataset and upload of predictions now = datetime.now().strftime("%Y%m%d") dataset_parent_folder = "./dataset" dataset_name = "numerai_dataset_{0}/example_predictions.csv".format(now) dataset_path = "{0}/{1}".format(dataset_parent_folder, dataset_name) # most API calls do not require logging in napi = NumerAPI(verbosity="info") # log in credentials = napi.login() print(json.dumps(credentials, indent=2)) # download current dataset dl_succeeded = napi.download_current_dataset( dest_path=dataset_parent_folder, unzip=True) print("download succeeded: " + str(dl_succeeded)) # get competitions (returned data is too long to print practically) # all_competitions = napi.get_all_competitions() # current_competition = napi.get_competition() # example_competition = napi.get_competition(round_id=example_round) # get user earnings per round user_earnings = napi.get_earnings_per_round() print("user earnings:") print(user_earnings) example_earnings = napi.get_earnings_per_round(username=example_username) print("example earnings:") print(example_earnings) # get scores for user personal_scores = napi.get_scores_for_user() print("personal scores:") print(personal_scores) other_scores = napi.get_scores_for_user(username=example_username) print("other scores:") print(other_scores) # get user information current_user = napi.get_user() print("current user:"******"example user:"******"submission:") print(json.dumps(submission, indent=2)) # upload predictions ul_succeeded = napi.upload_predictions(dataset_path) print("upload succeeded: " + str(ul_succeeded))