def _build_power(games, outcomes, coerce_fn, acc=0.0001, alpha=1.0, snap=True): """ Builds power model over a set of related games (they should all be from the same competition, for example). Given a series of games and their outcome, builds a logistic regression model that computes a relative ranking for the teams. Returns a dict of team id to power ranking between 0 and 1. If snap is set, the rankings are bucketed into quartiles. This is useful bcause we may only have rough estimates of power rating and we don't want to get a false specificity. """ outcomes = pd.Series([coerce_fn(val) for val in outcomes]) # print(outcomes.describe()) # print(outcomes.value_counts()) model = world_cup.build_model_logistic(outcomes, games, acc=acc, alpha=alpha) # model = world_cup.build_model_MNlogistic(outcomes, games, # acc=acc, alpha=alpha) # print(model.summary()) # print(model.params) params = np.exp(model.params) del params['intercept'] params = params[params != 1.0] max_param = params.max() min_param = params.min() param_range = max_param - min_param if len(params) == 0 or param_range < 0.0001: return None params = params.sub(min_param) params = params.div(param_range) qqs = np.percentile(params, [20, 40, 60, 80]) def _snap(val): """ Snaps a value to a quartile. """ for idx in range(len(qqs)): if (qqs[idx] > val): return idx * 0.25 return 1.0 if snap: # Snap power data to rough quartiles. return params.apply(_snap).to_dict() else: return params.to_dict()
def _build_power(games, outcomes, coerce_fn, acc=0.0001, alpha=1.0, snap=True): """ Birbiri ile alakali bir kume mac uzerinden bir guc modeli hazirlar (tum bu maclar ayni turnuvadan, musabakadan olmalidir mesela). Bu maclar ve onlarin sonucunu alarak bu fonksiyon bir lojistik regresyon modeli kurar, ve bu model tum takimlarin birbirine olan izafi bir siralamasini hesaplar. Geriye takim id'si ve o takimin 0 ve 1 arasindaki bir guc indisini dondurur. Eger snap degiskeni True ise, indisler ceyreklere bolunur. Bu faydali cunku siralama tahmini oldukca kabaca yapilan bir tahmin ve tek bir numaradan elde edilecek bir tur "asiri spesifiklik" bizi yaniltabilirdi. """ outcomes = pd.Series([coerce_fn(val) for val in outcomes]) games.to_csv('/tmp/games.csv', index=None) outcomes.to_csv('/tmp/outcomes.csv', index=None) model = world_cup.build_model_logistic(outcomes, games, acc=acc, alpha=alpha) #print model.summary() params = np.exp(model.params) del params['intercept'] params = params[params != 1.0] max_param = params.max() min_param = params.min() param_range = max_param - min_param if len(params) == 0 or param_range < 0.0001: return None params = params.sub(min_param) params = params.div(param_range) qqs = np.percentile(params, [20, 40, 60, 80]) def _snap(val): """ Snaps a value to a quartile. """ for idx in xrange(len(qqs)): if (qqs[idx] > val): return idx * 0.25 return 1.0 if snap: # Snap power data to rough quartiles. return params.apply(_snap).to_dict() else: return params.to_dict()
def _build_power(games, outcomes, coerce_fn, acc=0.0001, alpha=1.0, snap=True): """ Birbiri ile alakali bir kume mac uzerinden bir guc modeli hazirlar (tum bu maclar ayni turnuvadan, musabakadan olmalidir mesela). Bu maclar ve onlarin sonucunu alarak bu fonksiyon bir lojistik regresyon modeli kurar, ve bu model tum takimlarin birbirine olan izafi bir siralamasini hesaplar. Geriye takim id'si ve o takimin 0 ve 1 arasindaki bir guc indisini dondurur. Eger snap degiskeni True ise, indisler ceyreklere bolunur. Bu faydali cunku siralama tahmini oldukca kabaca yapilan bir tahmin ve tek bir numaradan elde edilecek bir tur "asiri spesifiklik" bizi yaniltabilirdi. """ outcomes = pd.Series([coerce_fn(val) for val in outcomes]) games.to_csv('/tmp/games.csv',index=None) outcomes.to_csv('/tmp/outcomes.csv',index=None) model = world_cup.build_model_logistic(outcomes, games, acc=acc, alpha=alpha) #print model.summary() params = np.exp(model.params) del params['intercept'] params = params[params != 1.0] max_param = params.max() min_param = params.min() param_range = max_param - min_param if len(params) == 0 or param_range < 0.0001: return None params = params.sub(min_param) params = params.div(param_range) qqs = np.percentile(params, [20, 40, 60, 80]) def _snap(val): """ Snaps a value to a quartile. """ for idx in xrange(len(qqs)): if (qqs[idx] > val): return idx * 0.25 return 1.0 if snap: # Snap power data to rough quartiles. return params.apply(_snap).to_dict() else: return params.to_dict()
def _build_power(games, outcomes, coerce_fn, acc=0.0001, alpha=1.0, snap=True): """ Builds power model over a set of related games (they should all be from the same competition, for example). Given a series of games and their outcome, builds a logistic regression model that computes a relative ranking for the teams. Returns a dict of team id to power ranking between 0 and 1. If snap is set, the rankings are bucketed into quartiles. This is useful bcause we may only have rough estimates of power rating and we don't want to get a false specificity. """ outcomes = pd.Series([coerce_fn(val) for val in outcomes]) games.to_csv('/tmp/games.csv',index=None) outcomes.to_csv('/tmp/outcomes.csv',index=None) model = world_cup.build_model_logistic(outcomes, games, acc=acc, alpha=alpha) #print model.summary() params = np.exp(model.params) del params['intercept'] params = params[params != 1.0] max_param = params.max() min_param = params.min() param_range = max_param - min_param if len(params) == 0 or param_range < 0.0001: return None params = params.sub(min_param) params = params.div(param_range) qqs = np.percentile(params, [20, 40, 60, 80]) def _snap(val): """ Snaps a value to a quartile. """ for idx in xrange(len(qqs)): if (qqs[idx] > val): return idx * 0.25 return 1.0 if snap: # Snap power data to rough quartiles. return params.apply(_snap).to_dict() else: return params.to_dict()