def sim_pearson(a, b, signature=None): mean_a = a.mean() mean_b = b.mean() am, bm = a - mean_a, b - mean_b r_num = np.add.reduce(am * bm) r_den = np.sqrt(ss(am) * ss(bm)) r = (r_num / r_den) return max(min(r, 1.0), -1.0)
def sim_pearson(a, b): mean_a = a.mean() mean_b = b.mean() am, bm = a-mean_a, b-mean_b r_num = np.add.reduce(am*bm) r_den = np.sqrt(ss(am)*ss(bm)) r = (r_num / r_den) return max(min(r, 1.0), -1.0)
def pearsonr(x, y): # x and y should have same length. x = np.asarray(x) y = np.asarray(y) n = len(x) mx = x.mean() my = y.mean() xm, ym = x - mx, y - my r_num = n * (np.add.reduce(xm * ym)) r_den = n * np.sqrt(ss(xm) * ss(ym)) r = r_num / r_den r = max(min(r, 1.0), -1.0) return r
def pearsonr(x, y): # x and y should have same length. x = np.asarray(x) y = np.asarray(y) n = len(x) mx = x.mean() my = y.mean() xm, ym = x - mx, y - my r_num = n * (np.add.reduce(xm * ym)) r_den = n * np.sqrt(ss(xm) * ss(ym)) r = (r_num / r_den) r = max(min(r, 1.0), -1.0) return r
def loglike(self, params): """ Returns the value of the gaussian loglikelihood function at params. Given the whitened design matrix, the loglikelihood is evaluated at the parameter vector `params` for the dependent variable `Y`. Parameters ---------- params : array-like The parameter estimates. Returns ------- The value of the loglikelihood function for a WLS Model. Notes -------- .. math:: -\\frac{n}{2}\\log\\left(Y-\\hat{Y}\\right)-\\frac{n}{2}\\left(1+\\log\\left(\\frac{2\\pi}{n}\\right)\\right)-\\frac{1}{2}log\\left(\\left|W\\right|\\right) where :math:`W` is a diagonal matrix """ nobs2 = self.nobs / 2.0 SSR = ss(self.wendog - np.dot(self.wexog,params)) #SSR = ss(self.endog - np.dot(self.exog,params)) llf = -np.log(SSR) * nobs2 # concentrated likelihood llf -= (1+np.log(np.pi/nobs2))*nobs2 # with constant if np.all(self.weights != 1): #FIXME: is this a robust-enough check? llf -= .5*np.log(np.multiply.reduce(1/self.weights)) # with weights return llf
def loglike(self, params): """ Returns the value of the gaussian loglikelihood function at params. Given the whitened design matrix, the loglikelihood is evaluated at the parameter vector `params` for the dependent variable `endog`. Parameters ---------- params : array-like The parameter estimates Returns ------- loglike : float The value of the loglikelihood function for a GLS Model. Notes ----- The loglikelihood function for the normal distribution is .. math:: -\\frac{n}{2}\\log\\left(Y-\\hat{Y}\\right)-\\frac{n}{2}\\left(1+\\log\\left(\\frac{2\\pi}{n}\\right)\\right)-\\frac{1}{2}\\log\\left(\\left|\\Sigma\\right|\\right) Y and Y-hat are whitened. """ #TODO: combine this with OLS/WLS loglike and add _det_sigma argument nobs2 = self.nobs / 2.0 SSR = ss(self.wendog - np.dot(self.wexog,params)) llf = -np.log(SSR) * nobs2 # concentrated likelihood llf -= (1+np.log(np.pi/nobs2))*nobs2 # with likelihood constant if np.any(self.sigma) and self.sigma.ndim == 2: #FIXME: robust-enough check? unneeded if _det_sigma gets defined llf -= .5*np.log(np.linalg.det(self.sigma)) # with error covariance matrix return llf
def sum_squared_error(x, y, popt): yprime = sigmoid(x, *popt) return ss(y-yprime)
def pearsonr(x, y, dof=None): """ Calculates a Pearson correlation coefficient and the p-value for testing non-correlation. The Pearson correlation coefficient measures the linear relationship between two datasets. Strictly speaking, Pearson's correlation requires that each dataset be normally distributed. Like other correlation coefficients, this one varies between -1 and +1 with 0 implying no correlation. Correlations of -1 or +1 imply an exact linear relationship. Positive correlations imply that as x increases, so does y. Negative correlations imply that as x increases, y decreases. The p-value roughly indicates the probability of an uncorrelated system producing datasets that have a Pearson correlation at least as extreme as the one computed from these datasets. The p-values are not entirely reliable but are probably reasonable for datasets larger than 500 or so. This is a modified version that supports an optional argument to set the degrees of freedom (dof) manually. Parameters ---------- x : (N,) array_like Input y : (N,) array_like Input dof : int or None, optional Input Returns ------- (Pearson's correlation coefficient, 2-tailed p-value) References ---------- http://www.statsoft.com/textbook/glosp.html#Pearson%20Correlation """ # x and y should have same length. x = np.asarray(x) y = np.asarray(y) n = len(x) mx = x.mean() my = y.mean() xm, ym = x-mx, y-my r_num = np.add.reduce(xm * ym) r_den = np.sqrt(ss(xm) * ss(ym)) r = r_num / r_den # Presumably, if abs(r) > 1, then it is only some small artifact of floating # point arithmetic. r = max(min(r, 1.0), -1.0) df = n-2 if dof is None else dof if abs(r) == 1.0: prob = 0.0 else: t_squared = r*r * (df / ((1.0 - r) * (1.0 + r))) prob = betai(0.5*df, 0.5, df / (df + t_squared)) return r, prob