def fit(self, results, train_true): x = [] y = [] count = 0 missing = [] for idx, row in train_true.iterrows(): res = results.get(row['Id']) if res is None: missing.append(row['Id']) continue count += 1 x.extend(res) y.extend([row['Systole'], row['Diastole']]) print(("{} cases are used to fit the model".format(count))) if len(missing) > 0: print( ("cases are missing: " + ','.join([str(_x) for _x in missing]))) x = np.asarray(x).reshape((-1, 4)) y = np.asarray(y) ff = minimize(lambda p: analysis.crps_score(self._get_result(x, p), y), self.p0, bounds=self.bounds, options={ 'gtol': 1e-6, 'maxiter': 500, 'eps': 1e-5 }) self.p = ff.x print(("fitting parameters " + str(self.p))) print(("fitting score " + str(ff.fun)))
def fit(self,preds,train_true): N = len(preds); print("combine # predictions:" + ','.join([str(len(x)) for x in preds])); self.p0 = np.ones(N)*np.sqrt(N); X = np.zeros((train_true.shape[0]*2,N*2)); X[:] = np.nan; y = []; i = 0; for idx,row in train_true.iterrows(): case = row['Id']; y.extend([row['Systole'],row['Diastole']]); for j in range(N): sede = preds[j].get(case); if sede is not None: X[i*2,2*j:2*j+2] = sede[0:2]; X[i*2+1,2*j:2*j+2] = sede[2:4]; i += 1; y = np.asarray(y); print("init score :{}".format(analysis.crps_score(self._get_result(X,self.p0),y))); ff = minimize(lambda p:analysis.crps_score(self._get_result(X,p),y) + self.ll*np.var(p), self.p0, options={'gtol':1e-5,'eps':1e-4,'maxiter':500}); self.p = ff.x; print("fitting parameters " + str(self.p)); print("fitting score " + str(ff.fun));
def fit(self, results,train_true): x = []; y = []; count = 0; missing = []; for idx,row in train_true.iterrows(): res = results.get(row['Id']); if res is None: missing.append(row['Id']); continue count+=1; x.extend(res); y.extend([row['Systole'],row['Diastole']]); print("{} cases are used to fit the model".format(count)); if len(missing)>0: print("cases are missing: " + ','.join([str(_x) for _x in missing])); x = np.asarray(x).reshape((-1,4)); y = np.asarray(y); ff = minimize(lambda p:analysis.crps_score(self._get_result(x,p),y), self.p0, bounds=self.bounds, options={'gtol':1e-6,'maxiter':500,'eps':1e-5}); self.p = ff.x; print("fitting parameters " + str(self.p)); print("fitting score " + str(ff.fun));
class AverageModel(BaseModel): def __init__(self, ll=9.5e-5): self.p = None self.ll = ll def _get_result(self, X, p): """ how to deal with nans??? this code treat them as missing use the same coefficients ideally, it should fit another model use only the rest of models """ NR = X.shape[0] y = np.zeros((NR, 2)) p = np.asarray(p) for i in range(NR): preds = np.copy(X[i]).reshape((-1, 2)) err0 = np.copy(preds[:, 1]) preds[:, 1] = err0 * p preds = preds[~np.isnan(preds[:, 0])] if preds.shape[0] == 0: y[i] = [np.nan, np.nan] continue me = np.sum(preds[:, 0] / preds[:, 1]**2) err = np.sum(1.0 / preds[:, 1]**2) me /= err err = 1.0 / np.sqrt(err) err = np.minimum(np.nanmin(err0), err) err *= (1.0 + np.std(preds[:, 0]) / np.max(preds[:, 1]) / 3)**0.5 y[i] = [me, err] return y def fit(self, preds, train_true): N = len(preds) print( ("combine # predictions:" + ','.join([str(len(x)) for x in preds]))) self.p0 = np.ones(N) * np.sqrt(N) X = np.zeros((train_true.shape[0] * 2, N * 2)) X[:] = np.nan y = [] i = 0 for idx, row in train_true.iterrows(): case = row['Id'] y.extend([row['Systole'], row['Diastole']]) for j in range(N): sede = preds[j].get(case) if sede is not None: X[i * 2, 2 * j:2 * j + 2] = sede[0:2] X[i * 2 + 1, 2 * j:2 * j + 2] = sede[2:4] i += 1 y = np.asarray(y) print(("init score :{}".format( analysis.crps_score(self._get_result(X, self.p0), y)))) ff = minimize(lambda p: analysis.crps_score(self._get_result(X, p), y) + self.ll * np.var(p), self.p0, options={ 'gtol': 1e-5, 'eps': 1e-4, 'maxiter': 500 }) self.p = ff.x print(("fitting parameters " + str(self.p))) print(("fitting score " + str(ff.fun)))