Exemple #1
0
 def __getitem__(self, index):
     "Index by round number, list (or tuple), or slice"
     if isinstance(index, slice):
         if index.step is not None:
             raise ValueError("slice step size must be 1")
         r1, r2 = self.rounds_to_ints(index.start, index.stop)
         rs = list(range(r1, r2 + 1))
         ts = nx.tournament_all(as_str=False)
     elif nx.isint(index):
         rs = [index]
         ts = nx.tournament_all(as_str=False)
     elif isinstance(index, list):
         rs, ts = zip(*index)
         ts = [nx.tournament_int(i) for i in ts]
     elif isinstance(index, tuple):
         if len(index) != 2:
             raise IndexError("tuple index must have length 2")
         r, t = index
         if not nx.isint(r):
             raise IndexError("first element of tuple index must be int")
         if not (nx.isint(t) or nx.isstring(t)):
             msg = "second element of tuple index must be int or str"
             raise IndexError(msg)
         rs = [r]
         ts = [nx.tournament_int(t)]
     else:
         raise IndexError("indexing method not supported")
     self.gets(rs, ts)
     ridx = self.df['round'].isin(rs)
     tidx = self.df['tournament'].isin(ts)
     idx = ridx & tidx
     df = self.df[idx]
     return df
Exemple #2
0
 def next_split(self):
     if not nx.isint(self.p['seed']):
         raise ValueError("`seed` must be an integer")
     data = self.p['data']
     if self.count == 0:
         if self.p['train_only']:
             data = data['train']
         eras = data.unique_era()
         n = int(eras.size / self.p['kfold'])
         idx = self.p['seed'] % n
         if idx == 0:
             era0 = eras[idx:n]
             eras = eras[n:]
         else:
             era0 = np.concatenate((eras[idx:n], eras[-idx:]))
             eras = np.concatenate((eras[:idx], eras[n:-idx]))
         eras = np.array_split(eras, self.p['kfold'] - 1)
         eras.append(era0)
         self.eras = eras
     fit_eras = []
     for i, e in enumerate(self.eras):
         if i != self.count:
             fit_eras.append(e)
     fit_eras = np.concatenate(fit_eras)
     dfit = data.era_isin(fit_eras)
     pre_eras = self.eras[self.count]
     dpre = data.era_isin(pre_eras)
     return dfit, dpre
Exemple #3
0
def tournament_str(tournament_int_or_str):
    "Convert tournament int or str to str"
    if nx.isstring(tournament_int_or_str):
        if tournament_int_or_str not in nx.tournament_names(active_only=True):
            raise ValueError('tournament name is unknown')
        return tournament_int_or_str
    elif nx.isint(tournament_int_or_str):
        return tournament_int2str(tournament_int_or_str)
    raise ValueError('input must be a str or int')
Exemple #4
0
def tournament_str(tournament_int_or_str):
    """Convert tournament int or str to str"""
    if nx.isstring(tournament_int_or_str):
        if tournament_int_or_str not in nx.tournament_names(active_only=False):
            raise ValueError('tournament name is not recognized')
        return tournament_int_or_str
    elif nx.isint(tournament_int_or_str):
        return tournament_int2str(tournament_int_or_str)
    raise ValueError('input must be a str or int')
Exemple #5
0
def tournament_int(tournament_int_or_str):
    "Convert tournament int or str to int"
    if nx.isstring(tournament_int_or_str):
        return tournament_str2int(tournament_int_or_str)
    elif nx.isint(tournament_int_or_str):
        if tournament_int_or_str not in (1, 2, 3, 4, 5):
            raise ValueError('tournament int must be between 1 and 5')
        return tournament_int_or_str
    raise ValueError('input must be a str or int')
Exemple #6
0
def tournament_str(tournament_int_or_str):
    "Convert tournament int or str to str"
    if nx.isstring(tournament_int_or_str):
        if tournament_int_or_str not in TOURNAMENT_NAMES:
            raise ValueError('tournament name is unknown')
        return tournament_int_or_str
    elif nx.isint(tournament_int_or_str):
        return tournament_int2str(tournament_int_or_str)
    raise ValueError('input must be a str or int')
Exemple #7
0
def tournament_int(tournament_int_or_str):
    "Convert tournament int or str to int"
    if nx.isstring(tournament_int_or_str):
        return tournament_str2int(tournament_int_or_str)
    elif nx.isint(tournament_int_or_str):
        numbers = nx.tournament_numbers(active_only=True)
        if tournament_int_or_str not in numbers:
            raise ValueError("`tournament_int_or_str` not recognized")
        return tournament_int_or_str
    raise ValueError('input must be a str or int')
Exemple #8
0
def test_isint():
    "test isint"
    ok_(nx.isint(1))
    ok_(nx.isint(-1))
    ok_(not nx.isint(1.1))
    ok_(not nx.isint('a'))
    ok_(not nx.isint(True))
    ok_(not nx.isint(False))
    ok_(not nx.isint(None))
Exemple #9
0
    def whatif(self, users, s, c, round1, round2):
        """
        Profit if `users` had staked `s` and `c` in every tournament.

        Earnings are left in NMR instead of splitting the NMR earnings into
        NMR and USD.

        """
        if nx.isint(round1):
            if round1 < 113:
                raise ValueError("`round1` must start at at least 113")
        lb = self.lb[round1:round2]
        df = whatif(lb, users, s, c)
        return df
Exemple #10
0
 def pairs_with_tournament(self, tournament, as_str=True):
     """List of pairs; `tournament` can be int, str, or list"""
     if isinstance(tournament, list):
         tournaments = [nx.tournament_int(t) for t in tournament]
     elif nx.isstring(tournament):
         tournaments = [nx.tournament_int(tournament)]
     elif nx.isint(tournament):
         tournaments = [tournament]
     else:
         raise ValueError('`tournament` must be int, str or list')
     prs = self.pairs(as_str=False)
     pairs = []
     for pr in prs:
         if pr[1] in tournaments:
             if as_str:
                 pr = (pr[0], nx.tournament_str(pr[1]))
             pairs.append(pr)
     return pairs
Exemple #11
0
 def __getitem__(self2, index):
     if isinstance(index, str):
         if index in ('bernie', 'elizabeth', 'jordan', 'ken', 'charles'):
             return self2.df[index].values
         else:
             raise IndexError('string index not recognized')
     elif nx.isint(index):
         if index < 1 or index > 5:
             raise IndexError('tournament number must be between 1 and 5')
         return self2.df[nx.tournament_str(index)].values
     elif isinstance(index, slice):
         if (index.start is None and index.stop is None
                 and index.step is None):
             # slicing below means a view is returned instead of a copy
             return self2.df.iloc[:, -5:].values
         else:
             raise IndexError('Start, stop, and step of slice must be None')
     else:
         raise IndexError('indexing type not recognized')
Exemple #12
0
 def __getitem__(self, index):
     "Prediction indexing is by model pair(s)"
     if isinstance(index, tuple):
         if len(index) != 2:
             raise IndexError("When indexing by tuple must be length 2")
         if isinstance(index[0], slice):
             if not is_none_slice(index[0]):
                 raise IndexError("Slces must be slice(None, None, None,)")
             pairs1 = self.pairs(as_str=False)
         elif nx.isstring(index[0]):
             pairs1 = self.pairs_with_name(index[0], as_str=False)
         else:
             raise IndexError("indexing method not recognized")
         if isinstance(index[1], slice):
             if not is_none_slice(index[1]):
                 raise IndexError("Slces must be slice(None, None, None,)")
             pairs2 = self.pairs(as_str=False)
         elif nx.isint(index[1]):
             pairs2 = self.pairs_with_tournament(index[1], as_str=False)
         elif nx.isstring(index[1]):
             pairs2 = self.pairs_with_tournament(index[1], as_str=False)
         else:
             raise IndexError("indexing method not recognized")
         pairs = []
         for pair in pairs1:
             if pair in pairs2:
                 pairs.append(pair)
         p = Prediction(pd.DataFrame(data=self.df[pairs]))
     elif nx.isstring(index):
         pairs = self.pairs_with_name(index, as_str=False)
         p = Prediction(self.df[pairs])
     else:
         # assume an iterable of tuple pairs
         idx = []
         for ix in index:
             if len(ix) != 2:
                 msg = "Expecting list of tuple pairs with length 2"
                 raise IndexError(msg)
             idx.append((ix[0], nx.tournament_int(ix[1])))
         p = Prediction(self.df[idx])
     return p
Exemple #13
0
 def __getitem__(self2, index):
     n = nx.tournament_count(active_only=False)
     if isinstance(index, str):
         if index in nx.tournament_all(as_str=True, active_only=True):
             return self2.df[index].values
         else:
             raise IndexError('string index not recognized')
     elif nx.isint(index):
         if index < 1 or index > n:
             txt = 'tournament number must be between 1 and {}'
             raise IndexError(txt.format(n))
         return self2.df[nx.tournament_str(index)].values
     elif isinstance(index, slice):
         if (index.start is None and index.stop is None
                 and index.step is None):
             # slicing below means a view is returned instead of a copy
             return self2.df.iloc[:, -n:].values
         else:
             raise IndexError('Start, stop, and step of slice must be None')
     else:
         raise IndexError('indexing type not recognized')
Exemple #14
0
 def __getitem__(self, index):
     "Index by round number, list (or tuple), or slice"
     if isinstance(index, slice):
         if index.step is not None:
             raise ValueError("slice step size must be 1")
         r1, r2 = self.rounds_to_ints(index.start, index.stop)
         self.get_range(r1, r2)
         idx1 = self.df['round'] >= r1
         idx2 = self.df['round'] <= r2
         idx = idx1 & idx2
         df = self.df[idx]
     elif nx.isint(index):
         self.get_round(index)
         df = self.df[self.df['round'] == index]
     elif isinstance(index, list) or isinstance(index, tuple):
         self.get_list(index)
         idx = self.df['round'].isin(index)
         df = self.df[idx]
     else:
         raise IndexError("indexing method not supported")
     return df
Exemple #15
0
def run(model, splitter, tournament=None, verbosity=2):
    """
    Run a model/tournament pair (or pairs) through a data splitter.

    Parameters
    ----------
    model : nx.Model, list, tuple
        Prediction model to run through the splitter. Can be a list or tuple
        of prediction models. Model names must be unique.
    splitter : nx.Splitter
        An iterator of fit/predict data pairs.
    tournament : {None, int, str, list, tuple}, optional
        The tournament(s) to run the model through. By default (None) the
        model is run through all active tournaments. If a list or tuple of
        tournaments is given then it must must not contain duplicate
        tournaments.
    verbosity : int, optional
        An integer that determines verbosity. Zero is silent.

    Returns
    -------
    p : nx.Prediction
        A prediction object containing the predictions of the specified
        model/tournament pairs.

    """

    # make list of models
    if isinstance(model, nx.Model):
        models = [model]
    elif isinstance(model, list) or isinstance(model, tuple):
        models = model
    else:
        raise ValueError('`model` must be a model, list, or tuple of models')
    names = [m.name for m in models]
    if len(names) != len(set(names)):
        raise ValueError('`model` cannot contain duplicate names')

    # make list of tournaments
    if tournament is None:
        tournaments = nx.tournament_all()
    elif nx.isint(tournament) or nx.isstring(tournament):
        tournaments = [tournament]
    elif isinstance(tournament, list) or isinstance(tournament, tuple):
        tournaments = tournament
    else:
        msg = '`tournament` must be an integer, string, list, tuple, or None.'
        raise ValueError(msg)
    tournaments = [nx.tournament_str(t) for t in tournaments]
    if len(tournaments) != len(set(tournaments)):
        raise ValueError('`tournament` cannot contain duplicates')

    # loop over all model/tournament pairs
    p = nx.Prediction()
    for m in models:
        for t in tournaments:
            p += run_one(m, splitter, t, verbosity=verbosity)
            splitter.reset()
    splitter.reset()

    return p
Exemple #16
0
    def __getitem__(self, index):
        """Data indexing"""
        typidx = type(index)
        if isinstance(index, str):
            if index.startswith('era'):
                if len(index) < 4:
                    raise IndexError('length of era string index too short')
                return self.era_isin([index])
            else:
                if index in ('train', 'validation', 'test', 'live'):
                    return self.region_isin([index])
                elif index == 'tournament':
                    return self.region_isin(TOURNAMENT_REGIONS)
                else:
                    raise IndexError('string index not recognized')
        elif isinstance(index, slice):

            # step check
            if index.step is not None:
                if not nx.isint(index.step):
                    msg = "slice step size must be None or psotive integer"
                    raise IndexError(msg)
                if index.step < 1:
                    raise IndexError('slice step must be greater than 0')
                step = index.step
            else:
                step = 1

            ueras = self.unique_era().tolist()

            # start
            era1 = index.start
            idx1 = None
            if era1 is None:
                idx1 = 0
            elif not nx.isstring(era1) or not era1.startswith('era'):
                raise IndexError("slice elements must be strings like 'era23'")
            if idx1 is None:
                idx1 = ueras.index(era1)

            # end
            era2 = index.stop
            idx2 = None
            if era2 is None:
                idx2 = len(ueras) - 1
            elif not nx.isstring(era2) or not era2.startswith('era'):
                raise IndexError("slice elements must be strings like 'era23'")
            if idx2 is None:
                idx2 = ueras.index(era2)

            if idx1 > idx2:
                raise IndexError("slice cannot go from large to small era")

            # find eras in slice
            eras = []
            for ix in range(idx1, idx2 + 1, step):
                eras.append(ueras[ix])

            data = self.era_isin(eras)

            return data

        elif typidx is pd.Series or typidx is np.ndarray:

            return Data(self.df[index])

        else:

            raise IndexError('indexing type not recognized')