def plotInfluential ( InferenceObject ): """Diagnostic plot for detecting influential observations Determining influential observations follows a different logic for bootstrap and for bayes inference. A block is labelled an influential observation if the fit for a dataset without that point is significantly different from the fit including that point. For BootstrapInference objects, this is quantified using a normed distance of the maximum likelihood fit including the block and withouth that block. This distance is normed in the following way: If the maximum likelihood fit for the reduced dataset remains inside the 95% confidence limits of the maximum likelihood fit for the full dataset, the influence value is below 1. Thus, influence values large than 1 indicate a problem with the data set. For BayesInference objects, the influence of a block is simply quantified as the Kullbach-Leibler divergence of the posterior for the full data set from the posterior for the reduced data set. :Parameters: *InferenceObject* : Data set for which the influential observations are to be plotted """ maxinfl = N.argmax(InferenceObject.infl) ind = range ( InferenceObject.data.shape[0] ) ind.pop(maxinfl) # influencedDataset = psignidata.BootstrapInference( InferenceObject.data[ind,:], # sample=False, **(InferenceObject.model)) # influencedDataset = psignidata.BayesInference ( InferenceObject.data[ind,:], **(InferenceObject.model) ) est = interface.mapestimate ( InferenceObject.data[ind,:], start=InferenceObject.estimate, **(InferenceObject.model) )[0] x = N.mgrid[InferenceObject.data[:,0].min():InferenceObject.data[:,0].max():100j] influencedPMF = interface.diagnostics ( x, est, nafc = InferenceObject.model["nafc"], sigmoid = InferenceObject.model["sigmoid"], core = InferenceObject.model["core"] ) p.figure ( figsize=(6,8) ) # ax = p.axes ( (0.0,.5,.9,.5) ) ax = prepare_axes ( p.subplot ( 2,1,1 ) ) ax.set_ylabel ( r"$\Psi(x)$" ) if InferenceObject.__repr__().split()[1] in ["BayesInference","ASIRInference"]: InferenceObject.drawposteriorexamples ( ax=ax ) plotPMF ( InferenceObject, ax=ax, showaxes=True, showdesc=False, color="b", linewidth=2 ) ax.plot ( [InferenceObject.data[maxinfl,0]], [InferenceObject.data[maxinfl,1].astype("d")/InferenceObject.data[maxinfl,2]], 'rx', markersize=20, markeredgewidth=5 ) # ax = plotPMF ( influencedDataset, ax=ax, showdesc=False, showaxes=True, color="r", markertype=([(0,0)],0), linewidth=2 )[-1] ax.plot ( x, influencedPMF, color="r", linewidth=2 ) xl = list(ax.get_xlim ()) # ax = p.axes ( (0.0, 0., .9, .5) ) ax = p.subplot ( 2,1,2, sharex=ax ) if InferenceObject.__repr__().split()[1] == "BootstrapInference": ax.plot ( [InferenceObject.data[:,0].min(),InferenceObject.data[:,0].max()], [1,1], 'k:' ) yname = "Influence" else: yname = "D_KL( full || reduced )" ax.plot ( InferenceObject.data[:,0], InferenceObject.infl, 'bo' ) ax.set_xlim(xl) drawaxes ( ax, ax.get_xticks(), "%g", ax.get_yticks(), "%g", r"stimulus intensity $x$", yname )
def test_old_doctest(self): x = [float(2 * k) for k in xrange(6)] k = [34, 32, 40, 48, 50, 48] n = [50] * 6 d = [[xx, kk, nn] for xx, kk, nn in zip(x, k, n)] prm = [2.75, 1.45, 0.015] pred, di, D, thres, slope, Rpd, Rkd = interface.diagnostics(d, prm) self.assertAlmostEqual(8.07484858608, D) self.assertAlmostEqual(1.68932796526, di[0]) self.assertAlmostEqual(-0.19344675783032761, Rpd)
def test_old_doctest(self): x = [float(2*k) for k in xrange(6)] k = [34,32,40,48,50,48] n = [50]*6 d = [[xx,kk,nn] for xx,kk,nn in zip(x,k,n)] prm = [2.75, 1.45, 0.015] pred,di,D,thres,slope,Rpd,Rkd = interface.diagnostics(d,prm) self.assertAlmostEqual(8.07484858608, D) self.assertAlmostEqual(1.68932796526, di[0]) self.assertAlmostEqual(-0.19344675783032761, Rpd)
def test_single_cut(self): # this test was added to ensure the return type of swignifit # is also an ndarray. Before it was a list. Unfortunately a list with # one item cannot be cast to a float, but an ndarray with one item can # this was a problem in the getThres() of the PsiInference class in # psignidata.py sfi_output = float(sfi.diagnostics(data, TestDiagnostics.prm, cuts=[0.5])[3]) psipy_output = float(psipy.diagnostics(data, TestDiagnostics.prm, cuts=[0.5])[3]) self.assertEqual(sfi_output, psipy_output)
def test_empty_data(self): # if an empty sequence is passed we only obtain the threshold result = interface.diagnostics([], TestDiagnostics.prm)
def sample (): boots = interface.bootstrap ( d, priors=priors, nsamples=1500-2*k ) mape = interface.mapestimate ( d, priors=priors ) mcmc = interface.mcmc ( d, start=(4,2,.02), priors=priors, nsamples = 1500-2*k ) diag = interface.diagnostics ( d, (4,1,.02) ) return float(os.popen ( "ps -C python -o rss" ).readlines()[1])/1024
def test_cuts(self): interface.diagnostics(data, TestDiagnostics.prm, cuts=[0.5, 0.75, 0.85])
def test_intensities_only(self): predicted = interface.diagnostics(x, TestDiagnostics.prm)
def test_sigmoid(self): interface.diagnostics(data, TestDiagnostics.prm, sigmoid='logistic')
def test_core(self): interface.diagnostics(data, TestDiagnostics.prm, core='linear')
def test_nafc(self): interface.diagnostics(data, TestDiagnostics.prm, nafc=23)
def test_basic(self): interface.diagnostics(data, TestDiagnostics.prm)