Example #1
0
 def run(self):
     napi = NumerAPI()
     while True:
         try:
             napi.download_current_dataset(dest_filename=self.output().path,
                                           unzip=False)
             break
         except Exception as exception:
             time.sleep(10)
             continue
Example #2
0
def getCSV(hmpath=homepath()):
    napi = NumerAPI(verbosity="info")
    ps = Parser(hmpath)
    napi.download_current_dataset(dest_path=ps.datapath, unzip=True)
    r, d, ff = os.walk(ps.datapath)
    dp = op.join(r, d[0])
    for fn in os.listdir(dp):
        if fn.endswith('.csv'):
            os.rename(op.join(dp, fn), op.join(r, fn))

    os.removedirs(dp)
    fff = [f for f in ff if f.endswith(".zip")]
    for fb in fff:
        os.remove(fb)

    return ps
def test_download_current_dataset():
    api = NumerAPI()
    path = api.download_current_dataset(unzip=True)
    assert os.path.exists(path)

    directory = path.replace(".zip", "")
    filename = "numerai_tournament_data.csv"
    assert os.path.exists(os.path.join(directory, filename))
Example #4
0
def main():
    # set example username and round
    example_public_id = "somepublicid"
    example_secret_key = "somesecretkey"

    # some API calls do not require logging in
    napi = NumerAPI(verbosity="info")
    # download current dataset
    napi.download_current_dataset(unzip=True)
    # get competitions
    all_competitions = napi.get_competitions()
    # get leaderboard for the current round
    leaderboard = napi.get_leaderboard()
    # leaderboard for a historic round
    leaderboard_67 = napi.get_leaderboard(round_num=67)

    # provide api tokens
    napi = NumerAPI(example_public_id, example_secret_key)

    # upload predictions
    submission_id = napi.upload_predictions("mypredictions.csv")
    # check submission status
    napi.submission_status()
Example #5
0
def download_new_dataset():
    napi = NumerAPI()
    print("Downloading the current dataset...")
    napi.download_current_dataset(dest_path=os.path.join(definitions.DATA_DIR, 'raw'), unzip=True)
Example #6
0
# In[187]:

#setup numerai API
napi = NumerAPI()
napi.credentials = ('email', 'password')
username = '******'

# In[188]:

h2o.init()
#h2o.remove_all()

# In[189]:

#download dataset
napi.download_current_dataset(dest_path='.', unzip=True)

# In[190]:

#read data into pandas
train = pd.read_csv('numerai_training_data.csv')
tournament = pd.read_csv('numerai_tournament_data.csv')
valid = tournament[tournament['data_type'] == 'validation']

# In[ ]:

#drop un-needed columns
valid.drop(['id', 'data_type', 'era'], axis=1, inplace=True)
train.drop(['id', 'data_type', 'era'], axis=1, inplace=True)
tournament.drop(['data_type', 'era'], axis=1, inplace=True)
class FetchAndExtractData(luigi.Task):
    """
    Fetches the most recent dataset and extracts the contents to the given
    path if not yet done (default path is ``./data``).

    :param: output_path:
        (relative) path where the data should be written to. Defaults to
        ``./data``. Default signature is
        ``FetchAndExtractData(output_path='./data')``.

    ::

        data
        ├── numerai_dataset_95
        │   ├── example_model.py
        │   ├── example_model.r
        │   ├── example_predictions.csv
        │   ├── numerai_tournament_data.csv
        │   └── numerai_training_data.csv
        └── numerai_dataset_95.zip

    """
    output_path = luigi.Parameter(default='./data/')

    def output(self):
        """
        Manages the files to be written and determines their existence.
        This is determined by checking all the listed files below. If any
        of them does not exist, :py:func:`run` is evoked.

        :returns:
            A ``dict`` with the following keys:

            * ``zipfile``: original file as downloaded
            (``numerai_dataset_xxx.zip``)
            * ``training_data.csv``: the training data
            (``numerai_training_data.csv``)
            * ``tournament_data.csv``: the tournament data
            (``numerai_tournament_data.csv``)
            * ``example_predictions.csv``: example predictions
            (``example_predictions.csv``)

            Note that ``example_model.py`` and ``example_model.r`` are not referenced,
            as these are to no use for us.
        """
        self.apc = NumerAPI()

        current_round = self.apc.get_current_round()
        dataset_name = "numerai_dataset_{0}.zip".format(current_round)
        dataset_dir = "numerai_dataset_{0}".format(current_round)

        assert self.apc.download_current_dataset(dest_path=self.output_path,
                                                 dest_filename=dataset_name,
                                                 unzip=True)

        # see numerapi download_current_dataset
        dataset_path = os.path.join(self.output_path, dataset_dir)

        test_data_path = os.path.join(dataset_path,
                                      'numerai_training_data.csv')
        tournament_data_path = os.path.join(dataset_path,
                                            'numerai_tournament_data.csv')
        example_data_path = os.path.join(dataset_path,
                                         'example_predictions.csv')

        out = {
            'zipfile':
            luigi.LocalTarget(os.path.join(self.output_path, dataset_name)),
            'training_data.csv':
            luigi.LocalTarget(test_data_path),
            'tournament_data.csv':
            luigi.LocalTarget(tournament_data_path),
            'example_predictions.csv':
            luigi.LocalTarget(example_data_path)
        }
        print(out)
        return out

    def run(self):
        out = self.output()
Example #8
0
def main():
    # set example username and round
    example_username = "******"
    example_round = 51

    # set up paths for download of dataset and upload of predictions
    now = datetime.now().strftime("%Y%m%d")
    dataset_parent_folder = "./dataset"
    dataset_name = "numerai_dataset_{0}/example_predictions.csv".format(now)
    dataset_path = "{0}/{1}".format(dataset_parent_folder, dataset_name)

    # most API calls do not require logging in
    napi = NumerAPI(verbosity="info")

    # log in
    credentials = napi.login()
    print(json.dumps(credentials, indent=2))

    # download current dataset
    dl_succeeded = napi.download_current_dataset(
        dest_path=dataset_parent_folder, unzip=True)
    print("download succeeded: " + str(dl_succeeded))

    # get competitions (returned data is too long to print practically)
    # all_competitions = napi.get_all_competitions()
    # current_competition = napi.get_competition()
    # example_competition = napi.get_competition(round_id=example_round)

    # get user earnings per round
    user_earnings = napi.get_earnings_per_round()
    print("user earnings:")
    print(user_earnings)
    example_earnings = napi.get_earnings_per_round(username=example_username)
    print("example earnings:")
    print(example_earnings)

    # get scores for user
    personal_scores = napi.get_scores_for_user()
    print("personal scores:")
    print(personal_scores)
    other_scores = napi.get_scores_for_user(username=example_username)
    print("other scores:")
    print(other_scores)

    # get user information
    current_user = napi.get_user()
    print("current user:"******"example user:"******"submission:")
    print(json.dumps(submission, indent=2))

    # upload predictions
    ul_succeeded = napi.upload_predictions(dataset_path)
    print("upload succeeded: " + str(ul_succeeded))