Beispiel #1
0
def get_models():
    models = [
        nx.logistic(),
        nx.extratrees(),
        nx.randomforest(),
        nx.mlpc(),
        nx.logisticPCA(),
        nx.example_predictions(),
        fifty()
    ]
    return models
Beispiel #2
0
def get_models():

    models = [nx.linear(),
              nx.ridge_mean(),
              nx.extratrees(),
              nx.randomforest(),
              nx.mlpc(),
              nx.linearPCA(),
              nx.example_predictions(),
              nx.fifty()]

    return models
Beispiel #3
0
    def check(self, data, verbose=True):
        """
        Run Numerai upload checks.

        Parameters
        ----------
        data : nx.Data
            Data object of Numerai dataset.
        verbose : bool
            By default, True, output is printed to stdout.

        Returns
        -------
        check : dict
            A dictionary where the keys are the (name, tournament) pairs and
            the values are Pandas DataFrames that contain the results of the
            checks.
        """

        # calc example predictions
        example_y = {}
        for tournament in self.tournaments(as_str=False):
            ep = nx.production(nx.example_predictions(),
                               data,
                               tournament=tournament,
                               verbosity=0)
            ep = ep.loc[self.ids]
            example_y[tournament] = ep.y[:, 0]

        df_dict = {}
        columns = ['validation', 'test', 'live', 'all', 'pass']
        data = data.loc[self.ids]
        regions = data.region
        pairs = list(self.pairs(as_str=False))

        # check each model, tournament pair
        for pair in pairs:
            print('{}, {}'.format(pair[0], nx.tournament_str(pair[1])))
            df = pd.DataFrame(columns=columns)
            idx = pairs.index(pair)
            y = self.y[:, idx]
            for region in ('validation', 'test', 'live', 'all'):
                yexi = example_y[pair[1]]
                if region == 'all':
                    yi = y
                else:
                    idx = regions == region
                    yi = y[idx]
                    yexi = yexi[idx]
                df.loc['corr', region] = pearsonr(yi, yexi)[0]
                df.loc['rcorr', region] = spearmanr(yi, yexi)[0]
                df.loc['min', region] = yi.min()
                df.loc['max', region] = yi.max()
                maz = np.abs((yi - yi.mean()) / yi.std()).max()
                df.loc['maz', region] = maz

            df.loc['corr', 'pass'] = (df.loc['corr'][:-1] >= 0.2).all()
            df.loc['rcorr', 'pass'] = (df.loc['rcorr'][:-1] >= 0.2).all()
            df.loc['min', 'pass'] = (df.loc['min'][:-1] >= 0.3).all()
            df.loc['max', 'pass'] = (df.loc['max'][:-1] <= 0.7).all()
            df.loc['maz', 'pass'] = (df.loc['maz'][:-1] <= 15).all()

            print(df)

            df_dict[pair] = df

        return df_dict