def __getitem__(self, index): "Index by round number, list (or tuple), or slice" if isinstance(index, slice): if index.step is not None: raise ValueError("slice step size must be 1") r1, r2 = self.rounds_to_ints(index.start, index.stop) rs = list(range(r1, r2 + 1)) ts = nx.tournament_all(as_str=False) elif nx.isint(index): rs = [index] ts = nx.tournament_all(as_str=False) elif isinstance(index, list): rs, ts = zip(*index) ts = [nx.tournament_int(i) for i in ts] elif isinstance(index, tuple): if len(index) != 2: raise IndexError("tuple index must have length 2") r, t = index if not nx.isint(r): raise IndexError("first element of tuple index must be int") if not (nx.isint(t) or nx.isstring(t)): msg = "second element of tuple index must be int or str" raise IndexError(msg) rs = [r] ts = [nx.tournament_int(t)] else: raise IndexError("indexing method not supported") self.gets(rs, ts) ridx = self.df['round'].isin(rs) tidx = self.df['tournament'].isin(ts) idx = ridx & tidx df = self.df[idx] return df
def next_split(self): if not nx.isint(self.p['seed']): raise ValueError("`seed` must be an integer") data = self.p['data'] if self.count == 0: if self.p['train_only']: data = data['train'] eras = data.unique_era() n = int(eras.size / self.p['kfold']) idx = self.p['seed'] % n if idx == 0: era0 = eras[idx:n] eras = eras[n:] else: era0 = np.concatenate((eras[idx:n], eras[-idx:])) eras = np.concatenate((eras[:idx], eras[n:-idx])) eras = np.array_split(eras, self.p['kfold'] - 1) eras.append(era0) self.eras = eras fit_eras = [] for i, e in enumerate(self.eras): if i != self.count: fit_eras.append(e) fit_eras = np.concatenate(fit_eras) dfit = data.era_isin(fit_eras) pre_eras = self.eras[self.count] dpre = data.era_isin(pre_eras) return dfit, dpre
def tournament_str(tournament_int_or_str): "Convert tournament int or str to str" if nx.isstring(tournament_int_or_str): if tournament_int_or_str not in nx.tournament_names(active_only=True): raise ValueError('tournament name is unknown') return tournament_int_or_str elif nx.isint(tournament_int_or_str): return tournament_int2str(tournament_int_or_str) raise ValueError('input must be a str or int')
def tournament_str(tournament_int_or_str): """Convert tournament int or str to str""" if nx.isstring(tournament_int_or_str): if tournament_int_or_str not in nx.tournament_names(active_only=False): raise ValueError('tournament name is not recognized') return tournament_int_or_str elif nx.isint(tournament_int_or_str): return tournament_int2str(tournament_int_or_str) raise ValueError('input must be a str or int')
def tournament_int(tournament_int_or_str): "Convert tournament int or str to int" if nx.isstring(tournament_int_or_str): return tournament_str2int(tournament_int_or_str) elif nx.isint(tournament_int_or_str): if tournament_int_or_str not in (1, 2, 3, 4, 5): raise ValueError('tournament int must be between 1 and 5') return tournament_int_or_str raise ValueError('input must be a str or int')
def tournament_str(tournament_int_or_str): "Convert tournament int or str to str" if nx.isstring(tournament_int_or_str): if tournament_int_or_str not in TOURNAMENT_NAMES: raise ValueError('tournament name is unknown') return tournament_int_or_str elif nx.isint(tournament_int_or_str): return tournament_int2str(tournament_int_or_str) raise ValueError('input must be a str or int')
def tournament_int(tournament_int_or_str): "Convert tournament int or str to int" if nx.isstring(tournament_int_or_str): return tournament_str2int(tournament_int_or_str) elif nx.isint(tournament_int_or_str): numbers = nx.tournament_numbers(active_only=True) if tournament_int_or_str not in numbers: raise ValueError("`tournament_int_or_str` not recognized") return tournament_int_or_str raise ValueError('input must be a str or int')
def test_isint(): "test isint" ok_(nx.isint(1)) ok_(nx.isint(-1)) ok_(not nx.isint(1.1)) ok_(not nx.isint('a')) ok_(not nx.isint(True)) ok_(not nx.isint(False)) ok_(not nx.isint(None))
def whatif(self, users, s, c, round1, round2): """ Profit if `users` had staked `s` and `c` in every tournament. Earnings are left in NMR instead of splitting the NMR earnings into NMR and USD. """ if nx.isint(round1): if round1 < 113: raise ValueError("`round1` must start at at least 113") lb = self.lb[round1:round2] df = whatif(lb, users, s, c) return df
def pairs_with_tournament(self, tournament, as_str=True): """List of pairs; `tournament` can be int, str, or list""" if isinstance(tournament, list): tournaments = [nx.tournament_int(t) for t in tournament] elif nx.isstring(tournament): tournaments = [nx.tournament_int(tournament)] elif nx.isint(tournament): tournaments = [tournament] else: raise ValueError('`tournament` must be int, str or list') prs = self.pairs(as_str=False) pairs = [] for pr in prs: if pr[1] in tournaments: if as_str: pr = (pr[0], nx.tournament_str(pr[1])) pairs.append(pr) return pairs
def __getitem__(self2, index): if isinstance(index, str): if index in ('bernie', 'elizabeth', 'jordan', 'ken', 'charles'): return self2.df[index].values else: raise IndexError('string index not recognized') elif nx.isint(index): if index < 1 or index > 5: raise IndexError('tournament number must be between 1 and 5') return self2.df[nx.tournament_str(index)].values elif isinstance(index, slice): if (index.start is None and index.stop is None and index.step is None): # slicing below means a view is returned instead of a copy return self2.df.iloc[:, -5:].values else: raise IndexError('Start, stop, and step of slice must be None') else: raise IndexError('indexing type not recognized')
def __getitem__(self, index): "Prediction indexing is by model pair(s)" if isinstance(index, tuple): if len(index) != 2: raise IndexError("When indexing by tuple must be length 2") if isinstance(index[0], slice): if not is_none_slice(index[0]): raise IndexError("Slces must be slice(None, None, None,)") pairs1 = self.pairs(as_str=False) elif nx.isstring(index[0]): pairs1 = self.pairs_with_name(index[0], as_str=False) else: raise IndexError("indexing method not recognized") if isinstance(index[1], slice): if not is_none_slice(index[1]): raise IndexError("Slces must be slice(None, None, None,)") pairs2 = self.pairs(as_str=False) elif nx.isint(index[1]): pairs2 = self.pairs_with_tournament(index[1], as_str=False) elif nx.isstring(index[1]): pairs2 = self.pairs_with_tournament(index[1], as_str=False) else: raise IndexError("indexing method not recognized") pairs = [] for pair in pairs1: if pair in pairs2: pairs.append(pair) p = Prediction(pd.DataFrame(data=self.df[pairs])) elif nx.isstring(index): pairs = self.pairs_with_name(index, as_str=False) p = Prediction(self.df[pairs]) else: # assume an iterable of tuple pairs idx = [] for ix in index: if len(ix) != 2: msg = "Expecting list of tuple pairs with length 2" raise IndexError(msg) idx.append((ix[0], nx.tournament_int(ix[1]))) p = Prediction(self.df[idx]) return p
def __getitem__(self2, index): n = nx.tournament_count(active_only=False) if isinstance(index, str): if index in nx.tournament_all(as_str=True, active_only=True): return self2.df[index].values else: raise IndexError('string index not recognized') elif nx.isint(index): if index < 1 or index > n: txt = 'tournament number must be between 1 and {}' raise IndexError(txt.format(n)) return self2.df[nx.tournament_str(index)].values elif isinstance(index, slice): if (index.start is None and index.stop is None and index.step is None): # slicing below means a view is returned instead of a copy return self2.df.iloc[:, -n:].values else: raise IndexError('Start, stop, and step of slice must be None') else: raise IndexError('indexing type not recognized')
def __getitem__(self, index): "Index by round number, list (or tuple), or slice" if isinstance(index, slice): if index.step is not None: raise ValueError("slice step size must be 1") r1, r2 = self.rounds_to_ints(index.start, index.stop) self.get_range(r1, r2) idx1 = self.df['round'] >= r1 idx2 = self.df['round'] <= r2 idx = idx1 & idx2 df = self.df[idx] elif nx.isint(index): self.get_round(index) df = self.df[self.df['round'] == index] elif isinstance(index, list) or isinstance(index, tuple): self.get_list(index) idx = self.df['round'].isin(index) df = self.df[idx] else: raise IndexError("indexing method not supported") return df
def run(model, splitter, tournament=None, verbosity=2): """ Run a model/tournament pair (or pairs) through a data splitter. Parameters ---------- model : nx.Model, list, tuple Prediction model to run through the splitter. Can be a list or tuple of prediction models. Model names must be unique. splitter : nx.Splitter An iterator of fit/predict data pairs. tournament : {None, int, str, list, tuple}, optional The tournament(s) to run the model through. By default (None) the model is run through all active tournaments. If a list or tuple of tournaments is given then it must must not contain duplicate tournaments. verbosity : int, optional An integer that determines verbosity. Zero is silent. Returns ------- p : nx.Prediction A prediction object containing the predictions of the specified model/tournament pairs. """ # make list of models if isinstance(model, nx.Model): models = [model] elif isinstance(model, list) or isinstance(model, tuple): models = model else: raise ValueError('`model` must be a model, list, or tuple of models') names = [m.name for m in models] if len(names) != len(set(names)): raise ValueError('`model` cannot contain duplicate names') # make list of tournaments if tournament is None: tournaments = nx.tournament_all() elif nx.isint(tournament) or nx.isstring(tournament): tournaments = [tournament] elif isinstance(tournament, list) or isinstance(tournament, tuple): tournaments = tournament else: msg = '`tournament` must be an integer, string, list, tuple, or None.' raise ValueError(msg) tournaments = [nx.tournament_str(t) for t in tournaments] if len(tournaments) != len(set(tournaments)): raise ValueError('`tournament` cannot contain duplicates') # loop over all model/tournament pairs p = nx.Prediction() for m in models: for t in tournaments: p += run_one(m, splitter, t, verbosity=verbosity) splitter.reset() splitter.reset() return p
def __getitem__(self, index): """Data indexing""" typidx = type(index) if isinstance(index, str): if index.startswith('era'): if len(index) < 4: raise IndexError('length of era string index too short') return self.era_isin([index]) else: if index in ('train', 'validation', 'test', 'live'): return self.region_isin([index]) elif index == 'tournament': return self.region_isin(TOURNAMENT_REGIONS) else: raise IndexError('string index not recognized') elif isinstance(index, slice): # step check if index.step is not None: if not nx.isint(index.step): msg = "slice step size must be None or psotive integer" raise IndexError(msg) if index.step < 1: raise IndexError('slice step must be greater than 0') step = index.step else: step = 1 ueras = self.unique_era().tolist() # start era1 = index.start idx1 = None if era1 is None: idx1 = 0 elif not nx.isstring(era1) or not era1.startswith('era'): raise IndexError("slice elements must be strings like 'era23'") if idx1 is None: idx1 = ueras.index(era1) # end era2 = index.stop idx2 = None if era2 is None: idx2 = len(ueras) - 1 elif not nx.isstring(era2) or not era2.startswith('era'): raise IndexError("slice elements must be strings like 'era23'") if idx2 is None: idx2 = ueras.index(era2) if idx1 > idx2: raise IndexError("slice cannot go from large to small era") # find eras in slice eras = [] for ix in range(idx1, idx2 + 1, step): eras.append(ueras[ix]) data = self.era_isin(eras) return data elif typidx is pd.Series or typidx is np.ndarray: return Data(self.df[index]) else: raise IndexError('indexing type not recognized')