Exemple #1
0
    def fitspectra(self, spectra, phase=None, x0=None, frac=1.0):
        """
        Fit a set of spectra for jdmax, x0, x1, c.
        'spectra' is a list, each element is another list containing
            [juliandate, wavelength, flux, fluxerr]
            wavelength, flux, fluxerr are lists of values for that spectrum
        Returns jdmax, x0, x1, c  (sorry, no errors)
        """
        
        #- Get x0 normalization into the right ballpark
        maxflux = 0.0
        jdmaxguess = 0.0
        for jd, w, flux, fluxerr in spectra:
            fluxsum = sum(flux)
            if fluxsum > maxflux:
                maxflux = fluxsum
                jdmaxguess = jd
        
        if x0 is None:
            modelsum = sum( self.flux(phase=0, wavelengths=w) )
            x0 = maxflux/modelsum
            print "x0 starting guess", x0

        if phase is None:
            jdmax = jdmaxguess
        else:
            jdmax = spectra[0][0] - phase

        p0 = (jdmax, x0, 0.1, 0.1)  #- jdmax, x0, x1, c
        pfit = fmin(self.chi2, p0, args=(spectra, frac))
        return pfit
    def optimize(self,
                 C,
                 doc_start,
                 features=None,
                 dev_sentences=[],
                 gold_labels=None,
                 C_data_initial=None,
                 maxfun=50):
        '''
        Run with MLII optimisation over the lower bound on the log-marginal likelihood.
        Optimizes the confusion matrix prior to the same values for all previous labels,
        and the scaling of the transition matrix hyperparameters.
        '''
        self.opt_runs = 0

        def neg_marginal_likelihood(hyperparams, C, doc_start):
            # set hyperparameters

            n_alpha_elements = len(hyperparams) - 1

            self.A.alpha0 = np.exp(hyperparams[0:n_alpha_elements]).reshape(
                self.A.alpha_shape)
            self.LM.set_beta0(
                np.ones(self.LM.beta_shape) * np.exp(hyperparams[-1]))

            # run the method
            self.fit_predict(C, doc_start, features, dev_sentences,
                             gold_labels, C_data_initial)

            # compute lower bound
            lb = self.lowerbound()

            print("Run %i. Lower bound: %.5f, alpha_0 = %s, nu0 scale = %.3f" %
                  (self.opt_runs, lb, str(np.exp(
                      hyperparams[0:-1])), np.exp(hyperparams[-1])))

            self.opt_runs += 1
            return -lb

        initialguess = np.log(
            np.append(self.A.alpha0.flatten(),
                      self.LM.beta0.flatten()[0]))
        ftol = 1.0  #1e-3
        opt_hyperparams, _, _, _, _ = fmin(neg_marginal_likelihood,
                                           initialguess,
                                           args=(C, doc_start),
                                           maxfun=maxfun,
                                           full_output=True,
                                           ftol=ftol,
                                           xtol=1e100)

        print("Optimal hyper-parameters: alpha_0 = %s, nu0 scale = %s" %
              (np.array2string(np.exp(opt_hyperparams[:-1])),
               np.array2string(np.exp(opt_hyperparams[-1]))))

        return self.Et, self.LM.most_likely_labels(
            self._feature_ll(self.features))[1]
def calcMuTauSigmaEstimate(hts, alphaF,
                           mts_init = [0.5,log(0.5), log(0.5)],
                           visualize_gs=False,
                           ftol = 1e-3,
                           maxiter=1000):
    '''given a set of hitting times hts and known control (alpha) - 
       calculate the structural parameters: (mu,tau, sigma) using Max Likelihood'''
    
    Tf = amax(hts)+0.05

    'Objective (negative log-likelihood) Function:'    
    def nllk(mts):
        'current ests:'
        tau = exp(mts[1:2])
        mu_sigma =  [mts[0], exp(mts[2])];
        
        'parametrize solver'
        lSolver =  generateDefaultAdjointSolver(tau, mu_sigma,  Tf=Tf);
        lSolver.refine(0.01, 0.5);
        
        'interpolate control:'
        alphas_for_f = alphaF(lSolver._ts);
        
        'compute hitting time distn:'
        gs = lSolver.solve_hittime_distn_per_parameter(tau,
                                                       mu_sigma,
                                                       alphas_for_f,
                                                       force_positive=True)
        if visualize_gs:
            figure();    plot(lSolver._ts, gs, 'r') ;
        
        'Form likelihood'
        gs_interp = interp1d( lSolver._ts, gs)
        
        'Form negativee log likelihood'
        nllk = -sum(log( gs_interp(hts) ) )
        
        'diagnose:'
        print 'mts: %.3f,%.3f,%.3f,%.0f '%(mu_sigma[0], tau, mu_sigma[1], nllk);
        
        return  nllk; 
    
    'Main Call:'
    from scipy.optimize import fmin
    mts_est, nllk_val, ierr, numfunc, warn_flag = fmin(nllk,
                                                       mts_init,
                                                        ftol = ftol,
                                                         maxiter=maxiter,
                                                          full_output = True);
    if 1 == ierr:
        print 'WARNING: fmin hit max fevals'
        
    print 'NM output:', nllk_val, numfunc, warn_flag
    
    return r_[mts_est[0], exp(array(mts_est[1:]))];
Exemple #4
0
def fit_knobdule(x0, x1, y0, y1, p0, im_norm, direction, mode):
    p_dir_out = fmin(
        partial(min_func_x,
                x0=x0,
                x1=x1,
                y0=y0,
                y1=y1,
                im_norm=im_norm,
                direction=direction,
                mode=mode), p0)
    return p_dir_out
def fit_power_shear_ref(z_u_lst, z_ref, plt=None):
    """Estimate power shear parameter, alpha, from two or more specific reference heights using polynomial fit.

    Parameters
    ----------
    z_u_lst : [(z1, u_z1), (z2, u_z2),...]
        - z1: Some height
        - u_z1: Wind speeds or mean wind speed at z1
        - z2: another height
        - u_z2: Wind speeds or mean wind speeds at z2
    z_ref : float or int
        Reference height (hub height)
    plt : matplotlib.pyplot (or similar) or None
        Used to plot result if not None

    Returns
    -------
    alpha : float
        power shear parameter
    u_ref : float
        Wind speed at reference height

    Example
    --------
    >>> fit_power_shear_ref([(85, 8.88131), (21, 4.41832)],  87.13333)
    [ 0.49938238  8.99192568]
    """
    def shear_error(x, z_u_lst, z_ref):
        alpha, u_ref = x
        return np.nansum([(u - u_ref * (z / z_ref)**alpha)**2
                          for z, u in z_u_lst])

    z_u_lst = [(z, np.mean(u)) for z, u in z_u_lst]
    alpha, u_ref = fmin(shear_error, (.1, 10), (z_u_lst, z_ref), disp=False)
    if plt:
        z, u = list(zip(*z_u_lst))
        plt.plot(u, z, '.')
        z = np.linspace(min(z), max(z), 100)
        plt.plot(power_shear(alpha, z_ref, u_ref)(z), z)
        plt.margins(.1)
    if alpha == .1 and u_ref == 10:  # Initial conditions
        return np.nan, np.nan
    return alpha, u_ref
Exemple #6
0
    def fit_params(self, p0):
        p, fopt, itercnt, funccall, warnflag = fmin(
            fitting.chi,
            p0,
            args=[self.c_amplitude, self.frequency, fitting.lorenzian],
            full_output=1)
        args = {}
        ##        print "Amplitude ",p0[0]," => ",p[0]
        ##        print "Gamma     ",p0[1]," => ",p[1]
        ##        print "Frequency ",p0[2]," => ",p[2]
        ##        print "Baseline ",p0[3]," => ",p[3]
        ##        print "CHI      ",fitting.chi(p0,self.c_amplitude,self.frequency,fitting.lorenzian)," => ",fopt

        if p[0] < p0[0] / 2:
            ##            print "Will toss fit, amplitude lowers too much"
            return None

        prestd = self.std

        ##        plot(self.frequency,self.c_amplitude.copy(),label="before")
        ##        plot(self.frequency,self.c_amplitude-fitting.lorenzian(p,self.frequency,{'baseline':0}),label="after")
        ##        plot(self.frequency,fitting.lorenzian(p,self.frequency,args),label="fit")
        ##        plot(self.frequency,fitting.lorenzian(p0,self.frequency,args),label="prefit")
        ##
        ##        legend()
        ##        savefig('fseq.'+str(self.imgout)+".png")
        ##        self.imgout += 1
        ##        cla()

        freqwidth = self.frequency_step
        if p[1] / 2 > freqwidth:
            freqwidth = p[1] / 2

        if (abs(p[2] - p0[2]) > freqwidth):
            #print "Fit has strayed from x0, from",p0[2]," to ",p[2]," larger than ",freqwidth
            return None

        self.add_fit(p)
        #print "STD:   ",prestd," => ",self.std," (target = ",self.target_std,")"
        return p
Exemple #7
0
    def fit_params(self,p0):
        p,fopt,itercnt,funccall,warnflag = fmin(fitting.chi,p0,args=[self.c_amplitude,self.frequency,fitting.lorenzian],full_output=1)
        args = {}
##        print "Amplitude ",p0[0]," => ",p[0]
##        print "Gamma     ",p0[1]," => ",p[1]
##        print "Frequency ",p0[2]," => ",p[2]
##        print "Baseline ",p0[3]," => ",p[3]
##        print "CHI      ",fitting.chi(p0,self.c_amplitude,self.frequency,fitting.lorenzian)," => ",fopt

        if p[0] < p0[0]/2:
##            print "Will toss fit, amplitude lowers too much"
            return None

        prestd = self.std
        
##        plot(self.frequency,self.c_amplitude.copy(),label="before")
##        plot(self.frequency,self.c_amplitude-fitting.lorenzian(p,self.frequency,{'baseline':0}),label="after")
##        plot(self.frequency,fitting.lorenzian(p,self.frequency,args),label="fit")
##        plot(self.frequency,fitting.lorenzian(p0,self.frequency,args),label="prefit")
##
##        legend()
##        savefig('fseq.'+str(self.imgout)+".png")
##        self.imgout += 1
##        cla()

        freqwidth = self.frequency_step
        if p[1]/2 > freqwidth:
           freqwidth = p[1]/2
        
        if(abs(p[2] - p0[2]) > freqwidth):
            #print "Fit has strayed from x0, from",p0[2]," to ",p[2]," larger than ",freqwidth
            return None
            
        self.add_fit(p)
        #print "STD:   ",prestd," => ",self.std," (target = ",self.target_std,")"
        return p
Exemple #8
0
def remove_saturated_pixel(ds, threshold=0.1, minimum=None, maximum=None):
    """
    Remove saturated fixes from an array inplace.

    :param ds: a dataset as ndarray
    :param float threshold: what is the upper limit?
        all pixel > max*(1-threshold) are discareded.
    :param float minimum: minumum valid value (or True for auto-guess)
    :param float maximum: maximum valid value
    :return: the input dataset
    """
    shape = ds.shape
    if ds.dtype == numpy.uint16:
        maxt = (1.0 - threshold) * 65535.0
    elif ds.dtype == numpy.int16:
        maxt = (1.0 - threshold) * 32767.0
    elif ds.dtype == numpy.uint8:
        maxt = (1.0 - threshold) * 255.0
    elif ds.dtype == numpy.int8:
        maxt = (1.0 - threshold) * 127.0
    else:
        if maximum is None:
            maxt = (1.0 - threshold) * ds.max()
        else:
            maxt = maximum
    if maximum is not None:
        maxt = min(maxt, maximum)
    invalid = (ds > maxt)
    if minimum:
        if minimum is True:
            # automatic guess of the best minimum TODO: use the HWHM to guess the minumum...
            data_min = ds.min()
            x, y = numpy.histogram(numpy.log(ds - data_min + 1.0), bins=100)
            f = interp1d((y[1:] + y[:-1]) / 2.0,
                         -x,
                         bounds_error=False,
                         fill_value=-x.min())
            max_low = fmin(f, y[1], disp=0)
            max_hi = fmin(f, y[-1], disp=0)
            if max_hi > max_low:
                f = interp1d((y[1:] + y[:-1]) / 2.0, x, bounds_error=False)
                min_center = fminbound(f, max_low, max_hi)
            else:
                min_center = max_hi
            minimum = float(numpy.exp(y[(
                (min_center / y) > 1).sum() - 1])) - 1.0 + data_min
            logger.debug("removeSaturatedPixel: best minimum guessed is %s",
                         minimum)
        ds[ds < minimum] = minimum
        ds -= minimum  # - 1.0

    if invalid.sum(dtype=int) == 0:
        logger.debug("No saturated area where found")
        return ds
    gi = ndimage.morphology.binary_dilation(invalid)
    lgi, nc = ndimage.label(gi)
    if nc > 100:
        logger.warning(
            "More than 100 saturated zones were found on this image !!!!")
    for zone in range(nc + 1):
        dzone = (lgi == zone)
        if dzone.sum(dtype=int) > ds.size // 2:
            continue
        min0, min1, max0, max1 = bounding_box(dzone)
        ksize = min(max0 - min0, max1 - min1)
        subset = ds[max(0, min0 - 4 * ksize):min(shape[0], max0 + 4 * ksize),
                    max(0, min1 - 4 * ksize):min(shape[1], max1 + 4 * ksize)]
        while subset.max() > maxt:
            subset = ndimage.median_filter(subset, ksize)
        ds[max(0, min0 - 4 * ksize):min(shape[0], max0 + 4 * ksize),
           max(0, min1 - 4 * ksize):min(shape[1], max1 + 4 * ksize)] = subset
    return ds
Exemple #9
0
    def SAEM(self, x, burnin=100, niter=200, verbose=False):
        """
        SAEM estimation procedure:

        Parameters
        -------------
        x: ndarray
            vector of observations
        burnin: integer, optional
            number of burn-in SEM iterations (discarded values)
        niter: integer, optional
            maximum number of SAEM iterations for convergence to local maximum
        verbose: 0 or 1
            verbosity level

        Returns
        ---------
        LL: ndarray
            successive log-likelihood values

        Notes
        ------
        The Gaussian disribution mean is fixed to zero
        """
        
        #Initialization
        n = len(x)
        #Averaging steps
        step = np.ones(burnin+niter)
        step[burnin:] = 1/(1.0 + np.arange(niter))
        # Posterior probabilities of class membership
        P = np.zeros((3,n))
        # Complete model sufficient statistics
        s = np.zeros((8,1))
        # Averaged sufficient statistics
        A = np.zeros((8,1))
        # Successive parameter values
        LL = np.zeros(burnin+niter)
        # Mean fixed to zero
        self.mean = 0
        
        #Main loop
        for t in xrange(burnin+niter):
            
            #E-step ( posterior probabilities )
            P[0] = st.gamma.pdf( -x,self.shape_n,scale=self.scale_n )
            P[1] = st.norm.pdf( x,0,np.sqrt(self.var) )
            P[2] = st.gamma.pdf( x,self.shape_p,scale=self.scale_p )
            P *= self.mixt.reshape(3,1)
            P /= P.sum(axis=0)
            
            #S-step ( class label simulation )
            Z = np.zeros(n)
            u = nr.uniform(size=n)
            Z[u<P[0]] -= 1
            Z[u>1-P[2]] += 1
            
            #A-step ( sufficient statistics )
            s[0] = (Z == 0).sum()
            s[1] = (Z == +1).sum()
            s[2] = (Z == -1).sum()
            s[3] = np.abs( x[Z == +1] ).sum()
            s[4] = np.abs( x[Z == -1] ).sum()
            s[5] = ( x[Z == 0]**2 ).sum()
            s[6] = np.log( np.abs( x[Z == +1] ) ).sum()
            s[7] = np.log( np.abs( x[Z == -1] ) ).sum()
            # Averaged sufficient statistics
            A = A + step[t] * (s - A)
            
            #M-step ( Maximization of expected log-likelihood )
            
            self.var = A[5]/A[0]
            
            def Qp(Y):
                """
                Expected log_likelihood of positive gamma class
                """
                return A[6]*(1-Y[0])+A[3]*Y[1]-A[1]*(Y[0]*np.log(Y[1])
                                                     -np.log(sp.gamma(Y[0])))
            
            def Qn(Y):
                """
                Expected log_likelihood of negative gamma class
                """
                return A[7]*(1-Y[0])+A[4]*Y[1]-A[2]*(Y[0]*np.log(Y[1])
                                                     -np.log(sp.gamma(Y[0])))
            
            Y = so.fmin( Qp, [self.shape_p, 1/self.scale_p], disp=0)
            self.shape_p = Y[0]
            self.scale_p = 1/Y[1]
            
            Y = so.fmin( Qn, [self.shape_n, 1/self.scale_n], disp=0)
            self.shape_n = Y[0]
            self.scale_n = 1/Y[1]
            
            self.mixt = np.array([A[2],A[0],A[1]]) /n
            
            LL[t] = np.log(np.array(self.posterior(x)).sum(axis=0)).sum()
            
            if verbose:
                print "Iteration "+str(t)+" out of "+str(burnin+niter),
                print "LL = ", str(LL[t])
        self.mixt=self.mixt.squeeze()
        return LL
Exemple #10
0
def removeSaturatedPixel(ds, threshold=0.1, minimum=None, maximum=None):
    """
    @param ds: a dataset as  ndarray

    @param threshold: what is the upper limit? all pixel > max*(1-threshold) are discareded.
    @param minimum: minumum valid value (or True for auto-guess)
    @param maximum: maximum valid value
    @return: another dataset
    """
    shape = ds.shape
    if ds.dtype == numpy.uint16:
        maxt = (1.0 - threshold) * 65535.0
    elif ds.dtype == numpy.int16:
        maxt = (1.0 - threshold) * 32767.0
    elif ds.dtype == numpy.uint8:
        maxt = (1.0 - threshold) * 255.0
    elif ds.dtype == numpy.int8:
        maxt = (1.0 - threshold) * 127.0
    else:
        if maximum is None:
            maxt = (1.0 - threshold) * ds.max()
        else:
            maxt = maximum
    if maximum is not None:
        maxt = min(maxt, maximum)
    invalid = ds > maxt
    if minimum:
        if minimum is True:  # automatic guess of the best minimum TODO: use the HWHM to guess the minumum...
            data_min = ds.min()
            x, y = numpy.histogram(numpy.log(ds - data_min + 1.0), bins=100)
            f = interp1d((y[1:] + y[:-1]) / 2.0, -x, bounds_error=False, fill_value=-x.min())
            max_low = fmin(f, y[1], disp=0)
            max_hi = fmin(f, y[-1], disp=0)
            if max_hi > max_low:
                f = interp1d((y[1:] + y[:-1]) / 2.0, x, bounds_error=False)
                min_center = fminbound(f, max_low, max_hi)
            else:
                min_center = max_hi
            minimum = float(numpy.exp(y[((min_center / y) > 1).sum() - 1])) - 1.0 + data_min
            logger.debug("removeSaturatedPixel: best minimum guessed is %s", minimum)
        ds[ds < minimum] = minimum
        ds -= minimum  # - 1.0

    if invalid.sum(dtype=int) == 0:
        logger.debug("No saturated area where found")
        return ds
    gi = ndimage.morphology.binary_dilation(invalid)
    lgi, nc = ndimage.label(gi)
    if nc > 100:
        logger.warning("More than 100 saturated zones were found on this image !!!!")
    for zone in range(nc + 1):
        dzone = lgi == zone
        if dzone.sum(dtype=int) > ds.size // 2:
            continue
        min0, min1, max0, max1 = boundingBox(dzone)
        ksize = min(max0 - min0, max1 - min1)
        subset = ds[
            max(0, min0 - 4 * ksize) : min(shape[0], max0 + 4 * ksize),
            max(0, min1 - 4 * ksize) : min(shape[1], max1 + 4 * ksize),
        ]
        while subset.max() > maxt:
            subset = ndimage.median_filter(subset, ksize)
        ds[
            max(0, min0 - 4 * ksize) : min(shape[0], max0 + 4 * ksize),
            max(0, min1 - 4 * ksize) : min(shape[1], max1 + 4 * ksize),
        ] = subset
    fabio.edfimage.edfimage(data=ds).write("removeSaturatedPixel.edf")
    return ds
Exemple #11
0
    print(i)
    mdkl_sampled += 0.01 * sample_mdkl(
        np.tile(log_alpha_grid, K).reshape(K, M)).mean(axis=0)
    clear_output()
C = sample_mdkl(np.tile([20], 100000000)).mean()


def mdkl_approx(log_alpha, k1, k2, k3):
    return k1 * expit(k2 + k3 * log_alpha) - 0.5 * np.log1p(np.exp(-log_alpha))


def mdkl_approximation_loss(x):
    k1, k2, k3 = x
    return np.sum((mdkl_sampled - mdkl_approx(log_alpha_grid, k1, k2, k3))**2)


k1, k2, k3 = optimize.fmin(mdkl_approximation_loss, x0=np.array([0., 0., 0.]))
print('k1, k2, k3 =', k1, k2, k3)

plt.plot(log_alpha_grid, mdkl_sampled - C, label='Sampled')
plt.plot(log_alpha_grid,
         mdkl_approx(log_alpha_grid, k1, k2, k3) - C,
         label='Approximated')
_ = plt.axes().set_xlabel(r'$\log\alpha$')
_ = plt.axes().set_ylabel(r'$-KL$')
_ = plt.legend(loc=4)

plt.plot(log_alpha_grid,
         mdkl_sampled - mdkl_approx(log_alpha_grid, k1, k2, k3))
_ = plt.axes().set_xlabel(r'$\log\alpha$')
_ = plt.axes().set_ylabel(r'Approximation deviation')
Exemple #12
0
#-------------------------------------------------------------------------------
# performing optimization step

print "     a          b          c          alpha      beta       gamma      delta_E"
print "------------------------------------------------------------------------------"

eps0 = [
    0,
]
eps0 = len(opt_type) * eps0

res = fmin(tot_energy,\
           eps0,\
           xtol=eps_tolerance,\
           ftol=ene_tolerance,\
           retall=0,\
           disp=0,\
           full_output=0)
print
print "=============================================================================="
print "     Convergence has been achieved."
print
#
#
dirmin = get_minimum("energy-vs-step")
os.system("INPUT-relaxupdate.py " + dirmin + "/input.xml " + dirmin +
          "/geometry_opt.xml")
os.system('cp ' + dirmin + '/input_rel.xml ./input_opt_rel.xml')
#
print "=============================================================================="
if (LC=='M'): 
    tree = read_input()
    if (check_monoclinic(tree)): opt_type = ['VOL','BOA','COA']

#-------------------------------------------------------------------------------          
# performing optimization step                          

print "     a          b          c          alpha      beta       gamma      delta_E"
print "------------------------------------------------------------------------------"

eps0 = [0,] ; eps0 = len(opt_type)*eps0

res = fmin(tot_energy,\
           eps0,\
           xtol=eps_tolerance,\
           ftol=ene_tolerance,\
           retall=0,\
           disp=0,\
           full_output=0)
print
print "=============================================================================="
print "     Convergence has been achieved."
print
#
#
dirmin = get_minimum("energy-vs-step")
os.system("INPUT-relaxupdate.py "+dirmin+"/input.xml "+dirmin+"/geometry_opt.xml")
os.system('cp '+dirmin+'/input_rel.xml ./input_opt_rel.xml')
#
print "=============================================================================="
print "INITIAL LATTICE PARAMETERS:"
Exemple #14
0
                dates_fit, sim, stretch_vec, func, args)
            # vel_offset, vel_change, t_phase
            # vel_perm vel_temp t_dec
            #p0 = np.array((0., 0.01, .5) + (0., 0., 30.) * N)
            if FIT == 'sinus_exp_alt':
                p0 = np.array(
                    (0.0, 0.25 / 100, 150. / t_period, 0, 0.6 / 100, 700))
            else:
                p0 = np.array(
                    (-0.5 / 100, 0.25 / 100, 150. / t_period, 1. / 100, 700))
#            p0 = np.array((-0.7 / 100, 0.6 / 100, 150. / t_period,
#                           0, 0.6 / 100, 700))
#            bnds = ((-0.1 / 100, 0.1 / 100), (0, 1. / 100), (100 / t_period, 200 / t_period),
#                    (0.0, 0.1 / 100), (0.0, 1. / 100), (300, 1200))
            print func_min(p0)
            results = fmin(func_min, p0)
            popt = results
            print func_min(popt), popt
            popt = list(popt) + [-func_min(popt)]
        else:
            sigma = 1. / corr_fit if USE_SIGMA else None
            func(
                dates_fit, *p0
            )  # bind lambda, for some reason I dont understand fully this line is needed
            #        import pylab
            #        pylab.plot(dates_fit, stretch_fit)
            #        pylab.plot(dates_fit, func(dates_fit, *p0))
            #        from IPython import embed
            #        embed()

            popt, pcov = curve_fit(func,
Exemple #15
0
def fopt(pars):
    fpr, tpr, _ = metrics.roc_curve(ytrue, fopt_pred(pars, dataset))
    return -metrics.auc(fpr, tpr)

niter = 10
cv = cross_validation.ShuffleSplit(dataset_blend_train.shape[0], n_iter=niter, test_size=0.3, random_state=rnd)
mean_auc = 0.0; itr = 0       
for train, test in cv:
    xtrain = dataset_blend_train[train]
    xtest = dataset_blend_train[test]
    ytrain = ylr[train]
    ytest = ylr[test]        

    dataset = xtrain
    ytrue = ytrain
    xopt = fmin(fopt, x0)
    preds = fopt_pred(xopt, xtest)
    
    fpr, tpr, _ = metrics.roc_curve(ytest, preds)
    roc_auc = metrics.auc(fpr, tpr)
    print "AUC (fold %d/%d): %f" % (itr + 1, niter, roc_auc)
    mean_auc += roc_auc ; itr += 1
print "Mean AUC: ", mean_auc/niter    

  
print "blending on full data..."    
def fopt_pred1(pars, data):
    return np.dot(data, pars)

def fopt1(pars):
    fpr, tpr, _ = metrics.roc_curve(ylr, fopt_pred1(pars, dataset_blend_train))
Exemple #16
0
            func_min = lambda args:-correlation_at_fit(dates_fit, sim, stretch_vec, func, args)
            # vel_offset, vel_change, t_phase
            # vel_perm vel_temp t_dec
            #p0 = np.array((0., 0.01, .5) + (0., 0., 30.) * N)
            if FIT == 'sinus_exp_alt':
                p0 = np.array((0.0, 0.25 / 100, 150. / t_period,
                               0, 0.6 / 100, 700))
            else:
                p0 = np.array((-0.5 / 100, 0.25 / 100, 150. / t_period,
                               1. / 100, 700))
#            p0 = np.array((-0.7 / 100, 0.6 / 100, 150. / t_period,
#                           0, 0.6 / 100, 700))
#            bnds = ((-0.1 / 100, 0.1 / 100), (0, 1. / 100), (100 / t_period, 200 / t_period),
#                    (0.0, 0.1 / 100), (0.0, 1. / 100), (300, 1200))
            print func_min(p0)
            results = fmin(func_min, p0)
            popt = results
            print func_min(popt), popt
            popt = list(popt) + [-func_min(popt)]
        else:
            sigma = 1. / corr_fit if USE_SIGMA else None
            func(dates_fit, *p0)  # bind lambda, for some reason I dont understand fully this line is needed
    #        import pylab
    #        pylab.plot(dates_fit, stretch_fit)
    #        pylab.plot(dates_fit, func(dates_fit, *p0))
    #        from IPython import embed
    #        embed()

            popt, pcov = curve_fit(func, dates_fit, stretch_fit, p0, sigma=sigma)  #, maxfev=100000)
        if FIT == 'sinus_exp_alt2':
            popt = popt[:3] + [0.] + popt[3:]  # func -> func2