Exemple #1
0
 def test_multilinear_model(self):
     x = np.linspace(0.0, 5.0)
     y = 10.0 + 5.0 * x
     data = Data(x, y)
     odr_obj = ODR(data, multilinear)
     output = odr_obj.run()
     assert_array_almost_equal(output.beta, [10.0, 5.0])
Exemple #2
0
def orthoregress(x, y):
    """Perform an Orthogonal Distance Regression on the given data,
    using the same interface as the standard scipy.stats.linregress function.
    Adapted from https://gist.github.com/robintw/d94eb527c44966fbc8b9#file-orthoregress-py
    
    Arguments:
    x: x data
    y: y data

    Returns:
    [slope, intercept, residual]

    Uses standard ordinary least squares to estimate the starting parameters
    then uses the scipy.odr interface to the ODRPACK Fortran code to do the
    orthogonal distance calculations.
    """
    def f(p, x):
        """Basic linear regression 'model' for use with ODR"""
        return (p[0] * x) + p[1]

    linreg = stats.linregress(x, y)
    mod = Model(f)
    dat = Data(x, y)
    od = ODR(dat, mod, beta0=linreg[0:2])
    out = od.run()

    return list(out.beta) + [out.res_var]
Exemple #3
0
 def test_quadratic_model(self):
     x = np.linspace(0.0, 5.0)
     y = 1.0 * x**2 + 2.0 * x + 3.0
     data = Data(x, y)
     odr_obj = ODR(data, quadratic)
     output = odr_obj.run()
     assert_array_almost_equal(output.beta, [1.0, 2.0, 3.0])
Exemple #4
0
 def test_exponential_model(self):
     x = np.linspace(0.0, 5.0)
     y = -10.0 + np.exp(0.5 * x)
     data = Data(x, y)
     odr_obj = ODR(data, exponential)
     output = odr_obj.run()
     assert_array_almost_equal(output.beta, [-10.0, 0.5])
Exemple #5
0
 def test_unilinear_model(self):
     x = np.linspace(0.0, 5.0)
     y = 1.0 * x + 2.0
     data = Data(x, y)
     odr_obj = ODR(data, unilinear)
     output = odr_obj.run()
     assert_array_almost_equal(output.beta, [1.0, 2.0])
Exemple #6
0
    def test_explicit(self):
        explicit_mod = Model(
            self.explicit_fcn,
            fjacb=self.explicit_fjb,
            fjacd=self.explicit_fjd,
            meta=dict(name='Sample Explicit Model',
                      ref='ODRPACK UG, pg. 39'),
        )
        explicit_dat = Data([0.,0.,5.,7.,7.5,10.,16.,26.,30.,34.,34.5,100.],
                        [1265.,1263.6,1258.,1254.,1253.,1249.8,1237.,1218.,1220.6,
                         1213.8,1215.5,1212.])
        explicit_odr = ODR(explicit_dat, explicit_mod, beta0=[1500.0, -50.0, -0.1],
                       ifixx=[0,0,1,1,1,1,1,1,1,1,1,0])
        explicit_odr.set_job(deriv=2)
        explicit_odr.set_iprint(init=0, iter=0, final=0)

        out = explicit_odr.run()
        assert_array_almost_equal(
            out.beta,
            np.array([1.2646548050648876e+03, -5.4018409956678255e+01,
                -8.7849712165253724e-02]),
        )
        assert_array_almost_equal(
            out.sd_beta,
            np.array([1.0349270280543437, 1.583997785262061, 0.0063321988657267]),
        )
        assert_array_almost_equal(
            out.cov_beta,
            np.array([[4.4949592379003039e-01, -3.7421976890364739e-01,
                 -8.0978217468468912e-04],
               [-3.7421976890364739e-01, 1.0529686462751804e+00,
                 -1.9453521827942002e-03],
               [-8.0978217468468912e-04, -1.9453521827942002e-03,
                  1.6827336938454476e-05]]),
        )
Exemple #7
0
    def test_output_file_overwrite(self):
        """
        Verify fix for gh-1892
        """
        def func(b, x):
            return b[0] + b[1] * x

        p = Model(func)
        data = Data(np.arange(10), 12 * np.arange(10))
        tmp_dir = tempfile.mkdtemp()
        error_file_path = os.path.join(tmp_dir, "error.dat")
        report_file_path = os.path.join(tmp_dir, "report.dat")
        try:
            ODR(data,
                p,
                beta0=[0.1, 13],
                errfile=error_file_path,
                rptfile=report_file_path).run()
            ODR(data,
                p,
                beta0=[0.1, 13],
                errfile=error_file_path,
                rptfile=report_file_path,
                overwrite=True).run()
        finally:
            # remove output files for clean up
            shutil.rmtree(tmp_dir)
Exemple #8
0
 def test_polynomial_model(self):
     x = np.linspace(0.0, 5.0)
     y = 1.0 + 2.0 * x + 3.0 * x**2 + 4.0 * x**3
     poly_model = polynomial(3)
     data = Data(x, y)
     odr_obj = ODR(data, poly_model)
     output = odr_obj.run()
     assert_array_almost_equal(output.beta, [1.0, 2.0, 3.0, 4.0])
Exemple #9
0
def ortho_regress(x, y):
    linreg = linregress(x, y)
    mod = Model(f)
    dat = Data(x, y)
    od = ODR(dat, mod, beta0=linreg[0:2])
    out = od.run()
    #print(list(out.beta))
    #return list(out.beta) + [np.nan, np.nan, np.nan]
    return(list(out.beta))
Exemple #10
0
    def test_empty_data(self):
        beta0 = [0.02, 0.0]
        linear = Model(self.empty_data_func)

        empty_dat = Data([], [])
        assert_warns(OdrWarning, ODR, empty_dat, linear, beta0=beta0)

        empty_dat = RealData([], [])
        assert_warns(OdrWarning, ODR, empty_dat, linear, beta0=beta0)
Exemple #11
0
    def fit(self, x, y):
        # Initial estimate of betas
        linreg = linregress(x, y)

        linear = Model(self.model)
        mydata = Data(x, y)
        myodr = ODR(mydata, linear, beta0=linreg[0:2])
        myoutput = myodr.run()

        self.betas = myoutput.beta
Exemple #12
0
    def _run_odr(self):
        """Run an ODR regression"""
        linear = Model(self._modelODR)
        mydata = Data(ravel(self._datax), ravel(self._datay), 1)
        myodr = ODR(mydata, linear, beta0=self._guess, maxit=10000)

        myoutput = myodr.run()

        self._result = myoutput.beta
        self._stdev = myoutput.sd_beta
        self._covar = myoutput.cov_beta
        self._odr = myoutput
Exemple #13
0
    def test_ifixx(self):
        x1 = [-2.01, -0.99, -0.001, 1.02, 1.98]
        x2 = [3.98, 1.01, 0.001, 0.998, 4.01]
        fix = np.vstack((np.zeros_like(x1, dtype=int), np.ones_like(x2, dtype=int)))
        data = Data(np.vstack((x1, x2)), y=1, fix=fix)
        model = Model(lambda beta, x: x[1, :] - beta[0] * x[0, :]**2., implicit=True)

        odr1 = ODR(data, model, beta0=np.array([1.]))
        sol1 = odr1.run()
        odr2 = ODR(data, model, beta0=np.array([1.]), ifixx=fix)
        sol2 = odr2.run()
        assert_equal(sol1.beta, sol2.beta)
Exemple #14
0
    def test_ticket_1253(self):
        def linear(c, x):
            return c[0] * x + c[1]

        c = [2.0, 3.0]
        x = np.linspace(0, 10)
        y = linear(c, x)

        model = Model(linear)
        data = Data(x, y, wd=1.0, we=1.0)
        job = ODR(data, model, beta0=[1.0, 1.0])
        result = job.run()
        assert_equal(result.info, 2)
Exemple #15
0
	def ODR_fit(self):

		parameter_initialization = [0, 0, 0, 0, 1, 0, 0, 0, 0]

		'''
		parameter_initialization = []
	
		for i in range(self.data["Enthalpy_order"]):
			parameter_initialization.append(0)
		
		parameter_initialization.append(1)

		if self.data["Temp_dependence"] == "True":
			parameter_initialization.append(0)
			parameter_initialization.append(0)

		if self.data["A"] == "Free":
			parameter_initialization.append(0)
		if self.data["B"] == "Free": 
			parameter_initialization.append(0)
		'''
		self.odr_model = Model(self.ODR_fit_func)
		self.mydata = Data([self.H_M_ratio_data, self.Temperatures_data], self.Pressures_data)
		self.myodr = ODR(self.mydata, self.odr_model, beta0=np.asarray(parameter_initialization), maxit=10000000)
		self.myoutput = self.myodr.run()

		print(self.myoutput.beta)

		self.assign_fitting_constants()

		order = self.data["Enthalpy_order"]

		for i in range(len(self.H_M_ratio_dict)):

			self.Pressures_fit[i] = []
			T = self.Temperatures[i]
			x = np.asarray(self.H_M_ratio_dict[i])

			if self.data["Temp_dependence"] == "True":
				R = self.myoutput.beta[order] + self.myoutput.beta[order+1]*T + self.myoutput.beta[order+2]*(T**2)	
			else:
				R = self.myoutput.beta[order]

			H = self.E + 2*self.Alpha*x + 3*self.Beta*(x**2) + 4*self.Gamma*(x**3)

			self.Pressures_fit[i] = self.P_0**0.5*np.exp(self.A + self.B*T + (1/(self.k_B*T))*H + np.log(x/(R-x)) + R/(R-x))

		self.plot_fit()
Exemple #16
0
    def test_implicit(self):
        implicit_mod = Model(
            self.implicit_fcn,
            implicit=1,
            meta=dict(name='Sample Implicit Model',
                      ref='ODRPACK UG, pg. 49'),
        )
        implicit_dat = Data([
            [0.5,1.2,1.6,1.86,2.12,2.36,2.44,2.36,2.06,1.74,1.34,0.9,-0.28,
             -0.78,-1.36,-1.9,-2.5,-2.88,-3.18,-3.44],
            [-0.12,-0.6,-1.,-1.4,-2.54,-3.36,-4.,-4.75,-5.25,-5.64,-5.97,-6.32,
             -6.44,-6.44,-6.41,-6.25,-5.88,-5.5,-5.24,-4.86]],
            1,
        )
        implicit_odr = ODR(implicit_dat, implicit_mod,
            beta0=[-1.0, -3.0, 0.09, 0.02, 0.08])

        out = implicit_odr.run()
        assert_array_almost_equal(
            out.beta,
            np.array([-0.9993809167281279, -2.9310484652026476, 0.0875730502693354,
                0.0162299708984738, 0.0797537982976416]),
        )
        assert_array_almost_equal(
            out.sd_beta,
            np.array([0.1113840353364371, 0.1097673310686467, 0.0041060738314314,
                0.0027500347539902, 0.0034962501532468]),
        )
        assert_array_almost_equal(
            out.cov_beta,
            np.array([[2.1089274602333052e+00, -1.9437686411979040e+00,
                  7.0263550868344446e-02, -4.7175267373474862e-02,
                  5.2515575927380355e-02],
               [-1.9437686411979040e+00, 2.0481509222414456e+00,
                 -6.1600515853057307e-02, 4.6268827806232933e-02,
                 -5.8822307501391467e-02],
               [7.0263550868344446e-02, -6.1600515853057307e-02,
                  2.8659542561579308e-03, -1.4628662260014491e-03,
                  1.4528860663055824e-03],
               [-4.7175267373474862e-02, 4.6268827806232933e-02,
                 -1.4628662260014491e-03, 1.2855592885514335e-03,
                 -1.2692942951415293e-03],
               [5.2515575927380355e-02, -5.8822307501391467e-02,
                  1.4528860663055824e-03, -1.2692942951415293e-03,
                  2.0778813389755596e-03]]),
        )
def orthoregress(x, y):
    """Perform an Orthogonal Distance Regression on the given data,
    using the same interface as the standard scipy.stats.linregress function.
    Arguments:
    x: x data
    y: y data
    Returns:
    [m, c, nan, nan, nan]
    Uses standard ordinary least squares to estimate the starting parameters
    then uses the scipy.odr interface to the ODRPACK Fortran code to do the
    orthogonal distance calculations.
    """
    linreg = linregress(x, y)
    mod = Model(f)
    dat = Data(x, y)
    od = ODR(dat, mod, beta0=linreg[0:2])
    out = od.run()
    return list(out.beta) + [np.nan, np.nan, np.nan]
Exemple #18
0
def Normal_calc(t, nn):
    pc_0 = NN(t, nn)
    x = pc_0.x
    y = pc_0.y
    z = pc_0.z

    def func(beta, data):
        x, y = data
        a, b, c = beta
        return a * x + b * y + c

    data = Data([x, y], z)
    model = Model(func)
    odr = ODR(data, model, beta0=[0.0, 0.0, 0.0])
    odr.set_job(fit_type=0)
    res = odr.run()
    """Extend plot with plt.Quiver (vectors) later on...?"""

    # Calculate xyz coordinates for corner vertices of the plane
    Y, X = np.mgrid[y.min():y.max():2j, x.min():x.max():2j]
    Z = func(res.beta, [X, Y])
    f = plt.figure()
    pl = f.add_subplot(111, projection='3d')
    pl.scatter3D(x, y, z)
    pl.plot_surface(X, Y, Z, alpha=0.4)
    plt.show()

    # Define 3 points on plane for cross product calculation (from previous calculation)
    P = [X[0][0], Y[0][0], Z[0][0]]
    Q = [X[0][1], Y[0][1], Z[0][1]]
    R = [X[1][0], Y[1][0], Z[1][0]]
    print('PQR:', P, Q, R)

    # Calculate vectors on plane
    PQ = [Q[0] - P[0], Q[1] - P[1], Q[2] - P[2]]
    PR = [R[0] - P[0], R[1] - P[1], R[2] - P[2]]
    print(PQ, PR)

    # Calculate cross product of vectors + normalize to 1 = sqrt(x**2+y**2+z**2)
    N1 = np.cross(PQ, PR)
    print('N1:', N1)
    N1_array = np.array([[N1[0], N1[1], N1[2]]], dtype=np.float)
    N1_normalized = preprocessing.normalize(N1_array, norm='l2')
    return N1_normalized[0]
def orthoregress(x, y):
    '''
    Orthogonal regression.
    Parameters
    ----------
    x: np.array
    y: np.array

    Returns
    -------
    (slope, intercpet): (float, float)
    '''
    linreg = linregress(x, y)
    mod = Model(f)
    dat = Data(x, y, wd=1. / (np.var(x) + 1e-8), we=1. / (np.var(y) + 1e-8))
    # small value is added to var to prevent zero division error
    od = ODR(dat, mod, beta0=linreg[0:2])
    out = od.run()
    return list(out.beta)
Exemple #20
0
def orthoregress(x, y):
    """Perform an Orthogonal Distance Regression on the given data,
    using the same interface as the standard scipy.stats.linregress function.
    Arguments:
    x: x data
    y: y data
    Returns:
    [m, c]
    Uses standard ordinary least squares to estimate the starting parameters
    then uses the scipy.odr interface to the ODRPACK Fortran code to do the
    orthogonal distance calculations.

    Source: http://blog.rtwilson.com/orthogonal-distance-regression-in-python/
    """
    linreg = linregress(x, y)
    mod = Model(f)
    dat = Data(x, y)
    od = ODR(data=dat, model=mod, beta0=linreg[0:2], maxit=10)
    out = od.run()
    return list(out.beta)
Exemple #21
0
 def set_odr_peak_model(self):
     """!
     @brief Set ODR model
     """
     x = self.roi_data[:, 0]
     y = self.roi_data[:, 1]
     data = Data(x, y)
     #
     bgn = len(self.bg_model.params)
     self.tot_model = lambda p, X: self.bg_model.eval(
         p[:bgn], X) + self.peak_model.eval(p[bgn:], X)
     #self.tot_model = lambda p, X: self.bg_model.eval(p[:bgn], X)
     print("Initial Model Params")
     print(self._init_params)
     self.odr_model = ODR(data,
                          Model(self.tot_model),
                          beta0=self._init_params,
                          ifixb=[1, 1, 1, 0, 1],
                          maxit=800,
                          taufac=0.8)
Exemple #22
0
    def run_odr(self, x, y, x_weights, y_weights):
        """"receives pairwise distance matrices and wODR weights

        :parameter X: pairwise distance matrix from gene1
        :parameter Y: pairwise distance matrix from gene2
        :parameter x_weights: wODR weights for gene1 distances
        :parameter y_weights: wODR weights for gene2 distances

        :return ODR object (https://docs.scipy.org/doc/scipy/reference/generated/scipy.odr.ODR.html)
        """
        mod  = Model(self.line)
        data = Data(x,
                   y,
                   wd=x_weights,
                   we=y_weights
        )
        odr  = ODR(data,
                   mod,
                   beta0=[np.std(y)/np.std(x)]
                  )
        return(odr.run())
Exemple #23
0
def monte_carlo_odr(x_data, y_data, x_err, y_err, new_x_data, new_y_data, new_x_err, new_y_err):

    """

    1) Randomises the data (i = 1000) based on values (x, y) and associated errors (x_err, y_err).
    2) Constructs a standard logged OLS regression (used for ODR beta estimates).
    3) Detects outliers using internally studentised residuals from the OLS.  Those > 2 sigma (95%) are rejected. 
    4) Constructs an ODR and saves model coefficients (beta, covariance matrix, errors)
    5) Takes the median coefficients for final ODR model construction  
    
    """

    # Generates results files
    betas = []
    covariances = []
    eps = []

    # make function into Model instance (logarithmic)
    model = Model(log_func)

    # Sets seed for reproducible results
    np.random.seed(214)

    # 1000 iterations                
    for i in range(1000):

        # Randomises the data (mean, sd)
        x = np.random.normal(x_data, x_err)
        y = np.random.normal(y_data, y_err)

        # Logs the data first
        logX = log10(x)

        # Adds constant for stats model intercept
        X = sm.add_constant(logX)

        # 10 iterations (should be much less, but "just in case")
        for i in range(10):
            # runs a simple OLS model (log)
            linear_model = sm.OLS(y, X)
            results = linear_model.fit()
            
            # creates instance of influence
            influence = results.get_influence()
            # calculates internally standardized residuals
            St_Res = influence.resid_studentized_internal

            # Finds max residual
            M = np.max(abs(St_Res))

            # If any are larger than 2 standard deviations
            if M > 2:
                # Find their index
                res = [idx for idx, val in enumerate(St_Res) if val > 2 or val < -2]
                # Delete these data points
                x = np.delete(x, res)
                X = np.delete(X, res, axis = 0)
                y = np.delete(y, res)
            # If none are larger than 2 sd, continue using this dataset. Slope and intercept used for ODR fit.
            else:
                slope = results.params[1]
                intercept = results.params[0]
                continue
        
        # New data and model
        data = Data(x, y)
        # Job = 0, explicit orthogonal
        out = ODR(data, model, beta0=[slope, intercept], job=0).run()

        # Appends model coefficients to results file (for EPS, only the maximum value is recorded)
        betas.append(out.beta)
        eps.append(max(out.eps))
        covariances.append(out.cov_beta)

    # Takes the median of the model estimates
    Beta = np.median(betas, axis = 0)
    Eps = np.median(eps, axis = 0)
    Covariance = np.median(covariances, axis = 0)

    # fit model using new beta, original x scale for consistency
    xn = linspace(min(x_data), max(x_data), 1000)
    yn = log_func(Beta, xn)

    # 1 and 2 sigma prediction intervals
    pl1 = prediction_interval([log10(xn), 1], 54, len(xn), Eps, 68., Beta, Covariance)
    pl2 = prediction_interval([log10(xn), 1], 54, len(xn), Eps, 95., Beta, Covariance)

    # create a figure to draw on and add a subplot
    fig, ax = subplots(1)

    # plot y calculated from px against de-logged x (and 1 and 2 sigma prediction intervals)
    ax.plot(xn, yn, '#EC472F', label='Logarithmic ODR')
    ax.plot(xn, yn + pl1, '#0076D4', dashes=[9, 4.5], label='1σ Prediction limit (~68%)', linewidth=0.8)
    ax.plot(xn, yn - pl1, '#0076D4', dashes=[9, 4.5], linewidth=0.8)
    ax.plot(xn, yn + pl2, '#BFBFBF', dashes=[9, 4.5], label='2σ Prediction limit (~95%)', linewidth=0.5)
    ax.plot(xn, yn - pl2, '#BFBFBF', dashes=[9, 4.5], linewidth=0.5)

    # plot points and error bars
    ax.plot(x_data, y_data, 'k.', markerfacecolor= '#4495F3',
             markeredgewidth=.5,  markeredgecolor = 'k',
            label='Calibration data (n = 54)', markersize=5)
    ax.errorbar(x_data, y_data, ecolor='k', xerr=x_err, yerr=y_err, fmt=" ", linewidth=0.5, capsize=0)

    # adds new data and errors bars
    ax.plot(new_x_data, new_y_data,'k.', markerfacecolor= '#FF8130',
             markeredgewidth=.5,  markeredgecolor = 'k',
            label = 'New data (n = 15)', markersize = 5)
    ax.errorbar(new_x_data, new_y_data, ecolor='k', xerr=new_x_err, yerr=new_y_err, fmt=" ", linewidth=0.5, capsize=0)

    # labels, extents etc.
    ax.set_ylim(0, 60)
    ax.set_xlabel('Mean R-value')
    ax.set_ylabel('Age (ka)')
    ax.tick_params(direction = 'in')
    ax.tick_params(bottom=True, top=True, left=True, right=True)
    ax.tick_params(labelbottom=True, labeltop=False, labelleft=True, labelright=False)

    # configure legend
    ax.legend(frameon=False,
              fontsize=7)

    # Sets axis ratio to 1
    ratio = 1
    ax.set_aspect(1.0/ax.get_data_ratio()*ratio)

    # export the figure
    fig.set_size_inches(3.2, 3.2)
    savefig('Pyrenees_Monte_Carlo_ODR.png', dpi = 900, bbox_inches='tight')
    #savefig('Pyrenees_Monte_Carlo_ODR.svg')

    # Return final model coefficients
    return Beta, Eps, Covariance
Exemple #24
0
    # 0  gong/clip1/1.jpg  181.0  231.0
    # 1  gong/clip1/2.jpg  180.0  231.0
    segment_data = segment_data[abs(segment_data['x'].mean() -
                                    segment_data['x']) < 2 *
                                segment_data['x'].std()]
    segment_data = segment_data[abs(segment_data['y'].mean() -
                                    segment_data['y']) < 2 *
                                segment_data['y'].std()]

    x = list(segment_data['x'])
    y = list(segment_data['y'])

    if len(x) == 0:
        continue

    mydata = Data(x, y)

    f = linear
    mod = Model(linear)
    myodr = ODR(mydata, mod, beta0=[0, 2])
    # print(myodr)
    res = myodr.run()
    coeff = res.beta
    obj_and_metric = measure_objects(f, coeff)

    obj_locations = list(zip(list(objects['x_pos']), list(objects['y_pos'])))
    for obj in obj_locations:
        error = abs(obj[1] - f(coeff, obj[0]))
        obj_and_metric.append((obj, error))

    if x[-1] >= x[0]:
Exemple #25
0
    def test_multi(self):
        multi_mod = Model(
            self.multi_fcn,
            meta=dict(name='Sample Multi-Response Model',
                      ref='ODRPACK UG, pg. 56'),
        )

        multi_x = np.array([
            30.0, 50.0, 70.0, 100.0, 150.0, 200.0, 300.0, 500.0, 700.0, 1000.0,
            1500.0, 2000.0, 3000.0, 5000.0, 7000.0, 10000.0, 15000.0, 20000.0,
            30000.0, 50000.0, 70000.0, 100000.0, 150000.0
        ])
        multi_y = np.array([
            [
                4.22, 4.167, 4.132, 4.038, 4.019, 3.956, 3.884, 3.784, 3.713,
                3.633, 3.54, 3.433, 3.358, 3.258, 3.193, 3.128, 3.059, 2.984,
                2.934, 2.876, 2.838, 2.798, 2.759
            ],
            [
                0.136, 0.167, 0.188, 0.212, 0.236, 0.257, 0.276, 0.297, 0.309,
                0.311, 0.314, 0.311, 0.305, 0.289, 0.277, 0.255, 0.24, 0.218,
                0.202, 0.182, 0.168, 0.153, 0.139
            ],
        ])
        n = len(multi_x)
        multi_we = np.zeros((2, 2, n), dtype=float)
        multi_ifixx = np.ones(n, dtype=int)
        multi_delta = np.zeros(n, dtype=float)

        multi_we[0, 0, :] = 559.6
        multi_we[1, 0, :] = multi_we[0, 1, :] = -1634.0
        multi_we[1, 1, :] = 8397.0

        for i in range(n):
            if multi_x[i] < 100.0:
                multi_ifixx[i] = 0
            elif multi_x[i] <= 150.0:
                pass  # defaults are fine
            elif multi_x[i] <= 1000.0:
                multi_delta[i] = 25.0
            elif multi_x[i] <= 10000.0:
                multi_delta[i] = 560.0
            elif multi_x[i] <= 100000.0:
                multi_delta[i] = 9500.0
            else:
                multi_delta[i] = 144000.0
            if multi_x[i] == 100.0 or multi_x[i] == 150.0:
                multi_we[:, :, i] = 0.0

        multi_dat = Data(multi_x,
                         multi_y,
                         wd=1e-4 / np.power(multi_x, 2),
                         we=multi_we)
        multi_odr = ODR(multi_dat,
                        multi_mod,
                        beta0=[4., 2., 7., .4, .5],
                        delta0=multi_delta,
                        ifixx=multi_ifixx)
        multi_odr.set_job(deriv=1, del_init=1)

        out = multi_odr.run()
        assert_array_almost_equal(
            out.beta,
            np.array([
                4.3799880305938963, 2.4333057577497703, 8.0028845899503978,
                0.5101147161764654, 0.5173902330489161
            ]),
        )
        assert_array_almost_equal(
            out.sd_beta,
            np.array([
                0.0130625231081944, 0.0130499785273277, 0.1167085962217757,
                0.0132642749596149, 0.0288529201353984
            ]),
        )
        assert_array_almost_equal(
            out.cov_beta,
            np.array([[
                0.0064918418231375, 0.0036159705923791, 0.0438637051470406,
                -0.0058700836512467, 0.011281212888768
            ],
                      [
                          0.0036159705923791, 0.0064793789429006,
                          0.0517610978353126, -0.0051181304940204,
                          0.0130726943624117
                      ],
                      [
                          0.0438637051470406, 0.0517610978353126,
                          0.5182263323095322, -0.0563083340093696,
                          0.1269490939468611
                      ],
                      [
                          -0.0058700836512467, -0.0051181304940204,
                          -0.0563083340093696, 0.0066939246261263,
                          -0.0140184391377962
                      ],
                      [
                          0.011281212888768, 0.0130726943624117,
                          0.1269490939468611, -0.0140184391377962,
                          0.0316733013820852
                      ]]),
        )
Exemple #26
0
out.pprint()

af = out.beta[0]
bf = out.beta[1]
cf = out.beta[2]

yrngf = bf + af * xrng
yhinge = bf + af * hxfix
idx = xrng > hxfix
yrngf[idx] = cf * (xrng[idx]-hxfix) + yhinge
# pdb.set_trace()
############## polynomial
from scipy.odr import Model, Data, ODR
from scipy.stats import linregress,norm
mod = Model(f)
dat = Data(ml, mw)
co = np.polynomial.polynomial.polyfit(ml, mw,2)
od = ODR(dat, mod, beta0=[co[2],co[1],co[0]])
out = od.run()
print '\npolynomial\n'
out.pprint()

yrng_poly = out.beta[0]*xrng**2+out.beta[1]*xrng+out.beta[2]

out_poly = out

############## 
# get stats from simulated data
##############
a = 0.042
b = 0.481
Exemple #27
0
def FFD_powerlaw(logx, logy, logxerr, logyerr, findXmin=False, slope=False):
    '''
    Given a FFD, provide the best fit parameters for a power law function. This is done using
    orthogonal distance regression, so that both x and y errors can be accounted for. As a crude
    way to account for incompleteness, determine a minimum energy, below which the fit gets bad. 
    This is done by iteratively fitting with different values for xmin, and finding the value of
    xmin that minimizes the KS distance between the model and the data. See Clauset 2007
    (arXiv:0706.1062), sec 3.3 for an explanation of the algorithm.
    Parameters
    ----------
    logx : The log-scaled x values
    logy : The log-scaled y values
    logxerr : The log-scaled x error bars
    logyerr : The log-scaled y error bars
    findXmin : Iteratively determine the best x value below which to trim the data
    Returns
    -------
    b0, b1, b0_err, b1_err, cutoff
    Best fit parameters for the power law slope and normalization, along with the optimal xmin
    '''        
    if slope == False:
        def f(B, x):
            if B[0] > 0:
                return np.inf
            return B[0]*x + B[1]
    else:
        def f(B, x):
            if B[0] > 0:
                return np.inf
            return -1*x + B[1]
    
    if findXmin:
        cutoff_vals = np.linspace(np.min(logx), np.max(logx))
    else:
        cutoff_vals = -np.inf
        
    ks_vals = np.zeros_like(cutoff_vals)
    param_arr = np.empty((len(ks_vals), 4))
    
    for idx, e_cut in enumerate(cutoff_vals):
        # Initial guess for powerlaw fit
        b00, b10 = -1.0, 10

        mask = logx > e_cut
        
        # Make the KS distance large if we threw out all of the data
        if len(logx[mask]) < 1:
            ks_vals[idx] = np.inf
            continue

        linear = Model(f)
        mydata = Data(logx[mask], logy[mask], wd=1/logxerr[mask]**2, we=1/logyerr[mask]**2)
        myodr = ODR(mydata, linear, beta0=[b00, b10])
        myoutput = myodr.run()
        b0, b1 = myoutput.beta[0], myoutput.beta[1]
        b0_err, b1_err = myoutput.sd_beta[0], myoutput.sd_beta[1]

        # Record the KS distance and best fit parameters for this particular xmin value
        ks_vals[idx] = stats.ks_2samp(logy[mask], b0*logx[mask] + b1).statistic
        param_arr[idx][0] = b0
        param_arr[idx][1] = b1
        param_arr[idx][2] = b0_err
        param_arr[idx][3] = b1_err
        
    # Use the parameters that correspond to the minimum KS distance
    best_ks_idx = np.argmin(ks_vals)
    b0 = param_arr[best_ks_idx][0]
    b1 = param_arr[best_ks_idx][1]
    b0_err = param_arr[best_ks_idx][2]
    b1_err = param_arr[best_ks_idx][3]
    cutoff = cutoff_vals[best_ks_idx]
    
    return b0, b1, b0_err, b1_err, cutoff
Exemple #28
0
def main():
    try:
        data_filename = 'number.txt'
        data = np.loadtxt(data_filename, skiprows=0)
        cur = np.reshape(data, (41, 3))

        data_filename = 'curvature.txt'
        data = np.loadtxt(data_filename, skiprows=0)
        hough = np.reshape(data, (41, 3))

        #emperical error
        xerr = np.sqrt(hough[:, 0]) / 3
        yerr_h = hough[:, 2]
        yerr_c = cur[:, 2]

        data_h = Data(hough[:, 0].T,
                      hough[:, 1].T,
                      we=1 / (np.power(xerr.T, 2) + np.spacing(1)),
                      wd=1 / (np.power(yerr_h.T, 2) + np.spacing(1)))
        data_c = Data(cur[:, 0].T,
                      cur[:, 1].T,
                      we=1 / (np.power(xerr.T, 2) + np.spacing(1)),
                      wd=1 / (np.power(yerr_c.T, 2) + np.spacing(1)))

        model = Model(ord_function)
        odr_h = ODR(data_h, model, beta0=[0, 0])
        odr_c = ODR(data_c, model, beta0=[0, 0])

        odr_h.set_job(fit_type=2)
        odr_c.set_job(fit_type=2)

        output_h = odr_h.run()
        output_c = odr_c.run()

        popt_h = output_h.beta
        perr_h = output_h.sd_beta

        popt_c = output_c.beta
        perr_c = output_c.sd_beta

        popt_h, pcov_h = curve_fit(linear_fit_function, hough[:, 0],
                                   hough[:, 1], [1, 0], hough[:, 2])
        perr_h = np.sqrt(np.diag(pcov_h))

        # 	popt_c, pcov_c = curve_fit(linear_fit_function, cur[:,0], cur[:,1], [1, 0], cur[:, 2])
        #       perr_c = np.sqrt(np.diag(pcov_c))

        A = popt_h[0] / np.sqrt(popt_h[0] * popt_h[0] + 1)
        B = -1 / np.sqrt(popt_h[0] * popt_h[0] + 1)
        C = popt_h[1] / np.sqrt(popt_h[0] * popt_h[0] + 1)
        fitting_error_h = np.mean(np.abs(A * hough[:, 0] + B * hough[:, 1] +
                                         C))

        A = popt_c[0] / np.sqrt(popt_c[0] * popt_c[0] + 1)
        B = -1 / np.sqrt(popt_c[0] * popt_c[0] + 1)
        C = popt_c[1] / np.sqrt(popt_c[0] * popt_c[0] + 1)
        fitting_error_c = np.mean(np.abs(A * cur[:, 0] + B * cur[:, 1] + C))

        fig, ax = plt.subplots(ncols=1)
        ax.errorbar(hough[:, 0],
                    hough[:, 1],
                    xerr=xerr,
                    yerr=yerr_h,
                    fmt='o',
                    color='blue')
        ax.errorbar(cur[:, 0],
                    cur[:, 1],
                    xerr=xerr,
                    yerr=yerr_c,
                    fmt='o',
                    color='red')

        ax.plot(hough[:, 0],
                popt_h[0] * hough[:, 0] + popt_h[1],
                '-b',
                linewidth=2)
        ax.plot(cur[:, 0],
                popt_c[0] * cur[:, 0] + popt_c[1],
                '-r',
                linewidth=2)

        bbox_props = dict(boxstyle="square,pad=0.3",
                          fc="white",
                          ec="black",
                          lw=2)

        annotation_text = "function:  y = kx + b \n" \
        "Hough Transfrom (blue)\n"\
 "k = %.2f b = %.2f Error = %.2f" % (popt_h[0], popt_h[1], fitting_error_h) + '\n'\
        "Curvature Method (red)\n"\
 "k = %.2f b = %.2f Error = %.2f" % (popt_c[0], popt_c[1], fitting_error_c)

        ax.text(10,
                max(np.amax(hough[:, 1]), np.amax(cur[:, 1])) + 10,
                annotation_text,
                ha="left",
                va="top",
                rotation=0,
                size=15,
                bbox=bbox_props)

        ax.set_title('Algorithom Performance')
        ax.set_xlabel('Bubble Number Counted Manually')
        ax.set_ylabel('Bubbble Number Counted by Algorithom')
        plt.grid()
        plt.xlim((np.amin(hough[:, 0]) - 5, np.amax(hough[:, 0]) + 5))
        plt.ylim((0, max(np.amax(hough[:, 1]), np.amax(cur[:, 1])) + 20))
        plt.show()

    except KeyboardInterrupt:
        print "Shutdown requested... exiting"
    except Exception:
        traceback.print_exc(file=sys.stdout)
        sys.exit(0)
Exemple #29
0
    def match_copies(self,
                     matrix1, taxa1,
                     matrix2, taxa2,
                     force_single_copy=False):
        """Select best pairing copies between assessed gene families

        :parameter matrix1: DataFrame with distances from gene1
        :parameter matrix2: DataFrame with distances from gene2
        :parameter taxa1: taxon table from gene1 (pd.DataFrame)
        :parameter taxa2: taxon table from gene2 (pd.DataFrame)

        Return paired copies of input DataFrames"""

        #
        # create a single DataFrame matching taxa from both gene families, and
        #     remove "|<num>" identification for added copies
        all_taxon_pairs           = pd.DataFrame()
        all_taxon_pairs['gene1']  = taxa1.gene
        all_taxon_pairs['gene2']  = taxa2.gene
        all_taxon_pairs['genome'] = taxa1.genome.tolist()
        all_taxon_pairs['pairs']  = all_taxon_pairs[['gene1', 'gene2']].apply(lambda x: frozenset(x), axis=1)

        #
        # summarize distances matrices by using only its upper triangle (triu)
        triu_indices = np.triu_indices_from(matrix1, k=1)
        condensed1   = matrix1.values[triu_indices]
        condensed2   = matrix2.values[triu_indices]

        #
        # run ODR with no weights...
        model = Model(self.line)
        data  = Data(condensed1,
                     condensed2)
        odr   = ODR(data,
                    model,
                    beta0=[np.std(condensed2) / # Geometric Mean slope estimate
                           np.std(condensed1)]  #
                   )

        regression = odr.run()

        ############################################### new code...
        #
        # create DataFrame with all residuals from the preliminary ODR with all
        #      possible combinations of gene within the same genome
        residual_df = pd.DataFrame(columns=['matrix1_gene', 
                                            'matrix2_gene', 
                                            'genome',
                                            'to_drop',
                                            'combined_residual'],
                                   data   =zip(taxa1.iloc[triu_indices[0], 0].values,
                                               taxa2.iloc[triu_indices[0], 0].values,
                                               taxa1.iloc[triu_indices[0], 1].values,
                                               taxa1.iloc[triu_indices[0], 1].values == taxa1.iloc[triu_indices[1], 1].values,
                                               abs(regression.delta)+abs(regression.eps))
                                 )

        residual_df = residual_df.append(
            pd.DataFrame(columns=['matrix1_gene', 
                                  'matrix2_gene', 
                                  'genome',
                                  'to_drop',
                                  'combined_residual'],
                         data   =zip(taxa1.iloc[triu_indices[1], 0].values,
                                     taxa2.iloc[triu_indices[1], 0].values,
                                     taxa1.iloc[triu_indices[1], 1].values,
                                     taxa1.iloc[triu_indices[0], 1].values == taxa1.iloc[triu_indices[1], 1].values,
                                     abs(regression.delta)+abs(regression.eps))
                        ),
            sort        =True,
            ignore_index=True
        )
        
        residual_df.drop(index  =residual_df.index[residual_df.to_drop], 
                         inplace=True)
                
        sum_paired_residuals = residual_df.groupby(
            ['matrix1_gene', 'matrix2_gene']
        ).agg(
            residual_sum=pd.NamedAgg(column ="combined_residual", 
                                     aggfunc=sum),
            genome      =pd.NamedAgg(column='genome', aggfunc=lambda x: x.iloc[0])
        ).reset_index()

        sum_paired_residuals.sort_values('residual_sum', 
                                         inplace=True)
        sum_paired_residuals.reset_index(inplace=True, 
                                         drop   =True)

        best_pairs = pd.DataFrame(columns=['gene1', 'gene2', 'genome'])
        for genome, indices in sum_paired_residuals.groupby('genome').groups.items():

            pairing_possibilities = sum_paired_residuals.loc[indices].copy()

            while pairing_possibilities.shape[0]:
                first_row = pairing_possibilities.iloc[0]

                best_pairs = best_pairs.append(
                    pd.Series(index=['gene1', 
                                     'gene2', 
                                     'genome'],
                              data =[first_row.matrix1_gene, 
                                     first_row.matrix2_gene, 
                                     genome]),
                    ignore_index=True
                )

                if force_single_copy:
                    break

                pairing_possibilities.drop(
                    index=pairing_possibilities.query(
                        '(matrix1_gene == @first_row.matrix1_gene) | '
                        '(matrix2_gene == @first_row.matrix2_gene)'
                    ).index, 
                    inplace=True)

        best_pairs['pairs'] = best_pairs[['gene1', 'gene2']].apply(lambda x: frozenset(x), 
                                                                   axis=1)

        all_taxon_pairs = all_taxon_pairs.query('pairs.isin(@best_pairs.pairs)').copy()
        taxa1 = taxa1.reindex(index=all_taxon_pairs.index)
        taxa2 = taxa2.reindex(index=all_taxon_pairs.index)
        
        taxa1.sort_values('genome', kind='mergesort', inplace=True)
        taxa2.sort_values('genome', kind='mergesort', inplace=True)

        taxa1.reset_index(drop=True, inplace=True)
        taxa2.reset_index(drop=True, inplace=True)

        ############################################### ...up to here

        
        if not all(taxa1.genome == taxa2.genome):
            raise Exception('**Wow, taxa order is wrong! ABORT!!!')
        
        matrix1 = matrix1.reindex(index  =taxa1.taxon,
                                  columns=taxa1.taxon,
                                  copy   =True)
        matrix2 = matrix2.reindex(index  =taxa2.taxon,
                                  columns=taxa2.taxon,
                                  copy   =True)

        return(matrix1, taxa1, matrix2, taxa2)
Exemple #30
0

# Compute line of best fit

Y = future_life_expectancy
X = age

from scipy.odr import Model, Data, ODR
from scipy.stats import linregress

def f(p, x):
    return (p[0] * x) + p[1]

linreg = linregress(X, Y)
mod = Model(f)
dat = Data(X, Y)
od = ODR(dat, mod, beta0=[1., 2.])
out = od.run()
TLSbeta = out.beta[0]


# In[15]:


# Plot chart

plt.plot(X, Y, '.')
plt.plot(X, out.beta[1] + np.multiply(X, out.beta[0]), '.')

plt.xlabel('Current Age')
plt.ylabel('Future Life')