def setup(self, conductance, quantity, super_pore_conductance):
     r"""
     This setup provides the initial data for the solver from the provided
     properties.
     It also creates the matrices A and b.
     """
     # Assigning super_pore conductance for Neumann_group BC
     if super_pore_conductance is None:
         self.super_pore_conductance = []
     else:
         self.super_pore_conductance = super_pore_conductance
     # Providing conductance values for the algorithm from the Physics name
     if sp.size(self._phase) == 1:
         self._conductance = 'throat.' + conductance.split('.')[-1]
         self._quantity = 'pore.' + quantity.split('.')[-1]
         # Check health of conductance vector
         if self._phase.check_data_health(props=self._conductance).health:
             self['throat.conductance'] = self._phase[self._conductance]
         else:
             raise Exception('The provided throat conductance has problems')
     else:
         raise Exception('The linear solver accepts just one phase.')
     # Checking for the linear terms to be added to the coeff diagonal/RHS
     diag_added_data = sp.zeros(self.Np)
     RHS_added_data = sp.zeros(self.Np)
     for label in self.labels():
         if 'pore.source_' in label:
             source_name = 'pore.' + \
                           (label.split('.')[-1]).replace('source_', '')
             matching_physics = [phys for phys in self._phase._physics
                                 if source_name in phys.models.keys()]
             for phys in matching_physics:
                 x = phys.models[source_name]['x']
                 if x != '' and type(x) == str:
                     if x.split('.')[-1] != quantity.split('.')[-1]:
                         raise Exception('The quantity(pore.' +
                                         x.split('.')[-1] +
                                         '), provided by source term(' +
                                         source_name + '), is different ' +
                                         'from the main quantity(pore.' +
                                         quantity.split('.')[-1] + ') in ' +
                                         self.name + ' algorithm.')
             source_name = label.replace('pore.source_', '')
             if 'pore.source_linear_s1_' + source_name in self.props():
                 prop1 = 'pore.source_linear_s1_' + source_name
                 pores = ~sp.isnan(self[prop1])
                 diag_added_data[pores] = diag_added_data[pores] + \
                     self[prop1][pores]
                 prop2 = 'pore.source_linear_s2_' + source_name
                 pores = ~sp.isnan(self[prop2])
                 RHS_added_data[pores] = RHS_added_data[pores] + \
                     self[prop2][pores]
     # Creating A and b based on the conductance values and new linear terms
     logger.info('Creating Coefficient matrix for the algorithm')
     d = diag_added_data
     self.A = self._build_coefficient_matrix(modified_diag_pores=self.Ps,
                                             diag_added_data=d)
     logger.info('Creating RHS matrix for the algorithm')
     self.b = self._build_RHS_matrix(modified_RHS_pores=self.Ps,
                                     RHS_added_data=-RHS_added_data)
Exemple #2
0
    def create_models(self):
        import scipy,cPickle
        from stellarpop import tools
        from stellarpop.ndinterp import ndInterp

        index = {}
        shape = []
        axes = {}
        axes_index = 0
        for key in self.axes_names:
            index[key] = {}
            shape.append(self.axes[key]['points'].size)
            axes[axes_index] = self.axes[key]['eval']
            axes_index += 1
            for i in range(self.axes[key]['points'].size):
                index[key][self.axes[key]['points'][i]] = i

        models = {}
        model = scipy.empty(shape)*scipy.nan
        for f in self.filter_names:
            models[f] = {}
            for z in self.redshifts:
                models[f][z] = model.copy()

        for file in self.files:
            f = open(file,'rb')
            data = cPickle.load(f)
            wave = cPickle.load(f)
            f.close()
            for key in data.keys():
                obj = data[key]
                jj = key
                spec = obj['sed']
                ind = []
                for key in self.axes_names:
                    try:
                        ind.append([index[key][obj[key]]])
                    except:
                        print key,index[key]
                        print obj
                        df
                for f in self.filter_names:
                    for i in range(len(self.redshifts)):
                        z = self.redshifts[i]
                        # correction is the units correction factor
                        correction = self.corrections[i]
                        sed = [wave,spec*correction]
                        mag = tools.ABFilterMagnitude(self.filters[f],sed,z)
                        if scipy.isnan(mag)==True:
                            df
                        models[f][z][ind] = mag

        for f in self.filter_names:
            for z in self.redshifts:
                model = models[f][z].copy()
                if scipy.isnan(model).any():
                    models[f][z] = None
                else:
                    models[f][z] = ndInterp(axes,model)
        return models
def zeroMeanUnitVarianz(data=None,x=True):
    if x:
        return (data-data.mean(axis=0))/data.std(axis=0)
    else:
        mean = data[~sp.isnan(data)].mean(axis=0)
        std = data[~sp.isnan(data)].std(axis=0)
        return (data - mean)/std
 def _do_one_outer_iteration(self, **kwargs):
     r"""
     One iteration of an outer iteration loop for an algorithm
     (e.g. time or parametric study)
     """
     # Checking for the necessary values in Picard algorithm
     nan_tol = sp.isnan(self['pore.source_tol'])
     nan_max = sp.isnan(self['pore.source_maxiter'])
     self._tol_for_all = sp.amin(self['pore.source_tol'][~nan_tol])
     self._maxiter_for_all = sp.amax(self['pore.source_maxiter'][~nan_max])
     if self._guess is None:
         self._guess = sp.zeros(self._coeff_dimension)
     t = 1
     step = 0
     # The main Picard loop
     while t > self._tol_for_all and step <= self._maxiter_for_all:
         X, t, A, b = self._do_inner_iteration_stage(guess=self._guess,
                                                     **kwargs)
         logger.info('tol for Picard source_algorithm in step ' +
                     str(step) + ' : ' + str(t))
         self._guess = X
         step += 1
     # Check for divergence
     self._steps = step
     if t >= self._tol_for_all and step > self._maxiter_for_all:
         raise Exception('Iterative algorithm for the source term reached '
                         'to the maxiter: ' + str(self._maxiter_for_all) +
                         ' without achieving tol: ' +
                         str(self._tol_for_all))
     logger.info('Picard algorithm for source term converged!')
     self.A = A
     self.b = b
     self._tol_reached = t
     return X
def LDA_batch_normalization(dataset, sample_table, batch_col, output_folder, ncomps): # this is actually the batch normalization method
   
    tmp_output_folder = os.path.join(output_folder, 'tmp')

    if not os.path.isdir(tmp_output_folder):
        os.makedirs(tmp_output_folder)
    
    barcodes, filtered_conditions, filtered_matrix, conditions, matrix = dataset
    
    # Remove any remaining NaNs and Infs from the filtered matrix - they would screw
    # up the LDA. 
    filtered_matrix[scipy.isnan(filtered_matrix)] = 0
    filtered_matrix[scipy.isinf(filtered_matrix)] = 0

    # For full matrix, also eliminate NaNs and Infs, BUT preserve the indices and values
    # so they can be added back into the matrix later (not implemented yet, and may never
    # be - there should no longer be NaNs and Infs in the dataset)
    # The NaNs and Infs will mess up the final step of the MATLAB LDA script, which uses
    # matrix multiplication to remove the specified number of components!
    matrix_nan_inds = scipy.isnan(matrix)
    matrix_nan_vals = matrix[matrix_nan_inds]
    matrix_inf_inds = scipy.isinf(matrix)
    matrix_inf_vals = matrix[matrix_inf_inds]

    matrix[matrix_nan_inds] = 0
    matrix[matrix_inf_inds] = 0

    # Save both the small matrix (for determining the components to remove) and the 
    # full matrix for the matlab script
    filtered_matrix_tmp_filename = os.path.join(tmp_output_folder, 'nonreplicating_matrix.txt')
    full_matrix_tmp_filename = os.path.join(tmp_output_folder, 'full_matrix.txt')
    
    np.savetxt(filtered_matrix_tmp_filename, filtered_matrix)
    np.savetxt(full_matrix_tmp_filename, matrix)

    # Map the batch to integers for matlab, and write out to a file so matlab can read
    # Note that yes, the batch_classes should match up with the filtered matrix, not
    # the full matrix
    batch_classes = get_batch_classes(dataset = [barcodes, filtered_conditions, filtered_matrix], sample_table = sample_table, batch_col = batch_col)
    class_tmp_filename = os.path.join(tmp_output_folder, 'classes.txt')
    writeList(batch_classes, class_tmp_filename)
   
    output_tmp_filename = os.path.join(tmp_output_folder, 'full_matrix_lda_normalized.txt')
    runLDAMatlabFunc(filtered_matrix_filename = filtered_matrix_tmp_filename, \
            matrix_filename = full_matrix_tmp_filename, \
            class_filename = class_tmp_filename, \
            ncomps = ncomps, \
            output_filename = output_tmp_filename)
    # The X norm that is returned is the full matrix. In the future, we could add in
    # returning the components to remove so they can be visualized or applied to other
    # one-off datasets
    Xnorm =  scipy.genfromtxt(output_tmp_filename)

    ## Dump the dataset out!
    #output_filename = os.path.join(mtag_effect_folder, 'scaleddeviation_full_mtag_lda_{}.dump.gz'.format(ncomps))
    #of = gzip.open(output_filename, 'wb')
    #cPickle.dump([barcodes, conditions, Xnorm], of)
    #of.close()

    return [barcodes, conditions, Xnorm]
def ProcessData(data):
	data = data[::-1]
	n = 100
	growthOfThisData = 0
	fitPrice = FitPrice(data)
	if fitPrice == 0:
		return
	print("FitResult : " + str(fitPrice))	
	for i in range(0, len(data) - n):
		if not (sp.isnan(data[i][1]) or sp.isnan(data[i][4]) or sp.isnan(data[i][5])):
			if data[i][5] > 0:
				maxPrice = MaxPriceInNextNDays(data, i, n, fitPrice)
				minPrice = MinPriceInNextNDays(data, i, n, fitPrice)
				currentPrice = data[i][4] / fitPrice
				key = (currentPrice // 0.05) * 0.05
				if maxPriceResult.has_key(key):
					maxPriceResult[key] += maxPrice
					numOfDataMax[key] += 1
				else:
					maxPriceResult[key] = maxPrice
					numOfDataMax[key] = 1
				if minPriceResult.has_key(key):
					minPriceResult[key] += minPrice
					numOfDataMin[key] += 1
				else:
					minPriceResult[key] = minPrice
					numOfDataMin[key] = 1
    def _prepareICContents(self):
        allfilestr = ""
        topstr = "function ics_ = " + self.name +"_ics()\n"
        commentstr = "% Initial conditions for model " + self.name + "\n% Generated by PyDSTool for ADMC++ target\n\n"

        bodystr = "ics_ = [ ...\n"
        if self.initialconditions:
            icnames = self.initialconditions.keys()
            icnames.sort()
            
            for i in range(len(icnames)-1):
                if isnan(self.initialconditions[icnames[i]]):
                    val = str(0.0)
                else:
                    val = str(self.initialconditions[icnames[i]])
                
                bodystr += val + ", ... % " + icnames[i] + "\n"

            if isnan(self.initialconditions[icnames[len(icnames)-1]]):
                val = str(0.0)
            else:
                val = self.initialconditions[icnames[len(icnames)-1]]
                
            bodystr += val + " % " + icnames[len(icnames)-1] + " ...\n"

        bodystr += "];\n"
        
        allfilestr = topstr + commentstr + bodystr
        
        return allfilestr
def main():
    data = sp.genfromtxt('./data/web_traffic.tsv', delimiter='\t')
    x = data[:, 0]
    y = data[:, 1]
    x = x[~sp.isnan(y)]
    y = y[~sp.isnan(y)]
    fp1 = sp.polyfit(x, y, 1)
    print('Model parameters for fp1 %s' % fp1)
    f1 = sp.poly1d(fp1)
    print('This is the error rate for fp1 %f' % error(f1, x, y))

    fp2 = sp.polyfit(x, y, 2)
    print('Model parameters for fp2 %s' % fp2)
    f2 = sp.poly1d(fp2)
    print('This is the error rate for fp2 %f' % error(f2, x, y))

    plt.scatter(x, y,color= 'pink')
    plt.title('My first impression')
    plt.xlabel('Time')
    plt.ylabel('#Hits')
    plt.xticks([w * 7 * 24 for w in range(10)], ['week %i' % w for w in range(10)])
    fx = sp.linspace(0, x[-1], 1000)
    plt.plot(fx, f1(fx), linewidth=3,color='cyan')


    plt.plot(fx, f2(fx), linewidth=3, linestyle='--',color= 'red')
    plt.legend(['d = %i' %f1.order, 'd = %i' %f2.order], loc='upper left')
    plt.autoscale(tight=True)
    plt.grid()
    plt.show()
def init_and_cleanup_data(path, delimiter):
    data = sp.genfromtxt(path, delimiter=delimiter)
    hours = data[:, 0] # contains the hours
    webhits = data[:, 1] # contains the number of web hits at a particular hour
    hours = hours[~sp.isnan(webhits)]
    webhits = webhits[~sp.isnan(webhits)]
    return (hours, webhits)
Exemple #10
0
    def simulate(self, X):
        """
        @arguments
          X -- 2d array of [sample_i][var_i] : float
        @return
          y -- 1d array of [sample_i] : float
        """
        op = self.nonlin_op
        ok = True
        y_lin = self.simple_base.simulate(X)

        if op == OP_ABS:     ya = numpy.abs(y_lin)
        elif op == OP_MAX0:  ya = numpy.clip(y_lin, 0.0, INF)
        elif op == OP_MIN0:  ya = numpy.clip(y_lin, -INF, 0.0)
        elif op == OP_LOG10:
            #safeguard against: log() on values <= 0.0
            mn, mx = min(y_lin), max(y_lin)
            if mn <= 0.0 or scipy.isnan(mn) or mx == INF or scipy.isnan(mx):
                ok = False
            else:
                ya = numpy.log10(y_lin)
        elif op == OP_GTH:   ya = numpy.clip(self.thr - y_lin, 0.0, INF)
        elif op == OP_LTH:   ya = numpy.clip(y_lin - self.thr, 0.0, INF)
        else:                raise 'Unknown op %d' % op

        if ok: #could always do ** exp, but faster ways if exp is 0,1
            y = ya
        else:
            y = INF * numpy.ones(X.shape[0], dtype=float)    
        return y
Exemple #11
0
def getFluxes(val_mat, direction_mat, dist_mat, duxdy_mat, out_flux, inc):

	import scipy;
	import math;

	speed_factor   = 1;
	angle_factor   = 1;
	inc_factor     = 1;
	dist_factor    = 1;
	strain_factor  = 1;

	duxdy_mat = duxdy_mat / (sum(duxdy_mat[~scipy.isnan(duxdy_mat)]));

	cell_angles  = scipy.flipud(scipy.array([[-1 * math.pi / 4, -1 * math.pi / 2, -3 * math.pi / 4], [0, scipy.nan, math.pi], [math.pi / 4, math.pi / 2, 3 * math.pi / 4]]));
#	cell_angles  = scipy.flipud(scipy.array([[3 * math.pi / 4, 1 * math.pi / 2, 1 * math.pi / 4], [math.pi, scipy.nan, 0], [-3 * math.pi / 4, -1 * math.pi / 2, -1 * math.pi / 4]]));

	cell_incs    = scipy.array([[(inc**2 + inc**2)**0.5, inc, (inc**2 + inc**2)**0.5], [inc, scipy.nan, inc], [(inc**2 + inc**2)**0.5, inc, (inc**2 + inc**2)**0.5]]);
	cell_incs    = (1 / cell_incs**inc_factor);
	cell_incs    = cell_incs / sum(cell_incs[~scipy.isnan(cell_incs)]);

	vels_in      = scipy.cos(cell_angles - direction_mat);
	vels_in[1,1] = scipy.nan;
	vels_in[vels_in < 0.00001] = scipy.nan;
	vels_in      = vels_in**angle_factor * val_mat**speed_factor * dist_mat**dist_factor * (1 / duxdy_mat**strain_factor) * cell_incs;
	in_fluxes    = (vels_in / sum(vels_in[~scipy.isnan(vels_in)]) * out_flux);

        return in_fluxes;
Exemple #12
0
 def __call__(self,x1, x2, d1=[sp.NaN], d2=[sp.NaN],gets=False):
     D1 = 0 if sp.isnan(d1[0]) else int(sum([8**x for x in d1]))
     D2 = 0 if sp.isnan(d2[0]) else int(sum([8**x for x in d2]))
     self.smodel=sp.empty(1)
     r=libGP.k(x1.ctypes.data_as(ctpd),x2.ctypes.data_as(ctpd), cint(D1),cint(D2),cint(self.dim),self.ihyp.ctypes.data_as(ctpd),cint(self.Kindex),self.smodel.ctypes.data_as(ctpd))
     if gets:
         return [r,self.smodel[0]]
     return r
def load_data():
    datas = sp.genfromtxt("web_traffic.tsv", delimiter='\t')
    print datas[:10]
    x = datas[:,0]
    y = datas[:,1]
    x = x[ ~sp.isnan(y)]
    y = y[ ~sp.isnan(y)]
    return x,y
Exemple #14
0
    def preProcess(self,
                                    periodF0 = 0.06,
                                    deltaF_div_F0 = True,
                                    
                                    max_threshold = None,
                                    min_threshold = None,
                                    nan_to_zeros = True,
                                    
                                    detrend = False,
                                    
                                    #~ band_filter = None,
                                    
                                    gaussian_filter = None,
                                    
                                    f1 = None,
                                    f2 = None,
                                    
                                    **kargs):
        
        images = self.images
        if deltaF_div_F0:
            ind = self.t()<=self.t_start+periodF0
            m0 = mean(images[ind,:,:] , axis = 0)
            images = (images-m0)/m0*1000.
            
        if max_threshold is not None:
            #~ images[images>max_threshold] = max_threshold
            images[images>max_threshold] = nan
            

        if min_threshold is not None:
            #~ images[images<min_threshold] = min_threshold
            images[images<min_threshold] = nan
                
            
        if nan_to_zeros:
            images[isnan(images) ] = 0.

        if detrend and not nan_to_zeros:
            m = any(isnan(images) , axis = 0)
            images[isnan(images) ] = 0.
            images = signal.detrend( images , axis = 0)
            images[:,m] = nan
        elif detrend and nan_to_zeros:
            images = signal.detrend( images , axis = 0)
            
        if gaussian_filter is not None:
            images = ndimage.gaussian_filter( images , (0 , gaussian_filter , gaussian_filter))
            

        if f1 is not None or f2 is not None:
            from ..computing.filter import fft_passband_filter
            if f1 is None: f1=0.
            if f2 is None: f1=inf
            nq = self.sampling_rate/2.
            images = fft_passband_filter(images, f_low = f1/nq , f_high = f2/nq , axis = 0)
        
        return images
Exemple #15
0
def get_data():
    data = sp.genfromtxt("input/web_traffic.tsv", delimiter="\t")

    x = data[:, 0]
    y = data[:, 1]

    x = x[~sp.isnan(y)]
    y = y[~sp.isnan(y)]

    return (x, y,)
 def test_returns_nan_if_one_spike_train_is_empty(self):
     empty = create_empty_spike_train()
     non_empty = neo.SpikeTrain(sp.array([1.0]) * pq.s, t_stop=2.0 * pq.s)
     k = sigproc.GaussianKernel()
     with warnings.catch_warnings():
         warnings.simplefilter('ignore')
         actual = stm.schreiber_similarity((empty, non_empty), k)
     self.assertTrue(sp.isnan(actual[0, 0]))
     self.assertTrue(sp.isnan(actual[0, 1]))
     self.assertTrue(sp.isnan(actual[1, 0]))
def LDA_batch_normalization(dataset, sample_table, batch_col, output_folder, n_comps): # this is actually the batch normalization method
   
    tmp_output_folder = os.path.join(output_folder, 'tmp')

    if not os.path.isdir(tmp_output_folder):
        os.makedirs(tmp_output_folder)
    
    barcodes, filtered_conditions, filtered_matrix, conditions, matrix = dataset
    
    # Remove any remaining NaNs and Infs from the filtered matrix - they would screw
    # up the LDA. 
    filtered_matrix[scipy.isnan(filtered_matrix)] = 0
    filtered_matrix[scipy.isinf(filtered_matrix)] = 0

    # For full matrix, also eliminate NaNs and Infs, BUT preserve the indices and values
    # so they can be added back into the matrix later (not implemented yet, and may never
    # be - there should no longer be NaNs and Infs in the dataset)
    # The NaNs and Infs will mess up the final step of the MATLAB LDA script, which uses
    # matrix multiplication to remove the specified number of components!
    matrix_nan_inds = scipy.isnan(matrix)
    matrix_nan_vals = matrix[matrix_nan_inds]
    matrix_inf_inds = scipy.isinf(matrix)
    matrix_inf_vals = matrix[matrix_inf_inds]

    matrix[matrix_nan_inds] = 0
    matrix[matrix_inf_inds] = 0

    # Save both the small matrix (for determining the components to remove) and the 
    # full matrix for the matlab script
    filtered_matrix_tmp_filename = os.path.join(tmp_output_folder, 'nonreplicating_matrix.txt')
    full_matrix_tmp_filename = os.path.join(tmp_output_folder, 'full_matrix.txt')
    
    np.savetxt(filtered_matrix_tmp_filename, filtered_matrix)
    np.savetxt(full_matrix_tmp_filename, matrix)

    # Map batch classes to integers
    batch_classes = get_batch_classes(dataset = [barcodes, filtered_conditions, filtered_matrix], sample_table = sample_table, batch_col = batch_col)
	
    # Checks number of classes and limits ncomps
    a = [x > 0 for x in np.sum(np.absolute(filtered_matrix), axis=0)]
    classes = np.asarray([batch_classes[i] for i in range(len(batch_classes)) if a[i]])
    n_samples = filtered_matrix.shape[0]
    n_classes = len(np.unique(classes))
    if n_samples == n_classes:
        print "ERROR: The number of samples is equal to the number of classes. Exiting"
    if n_classes <= n_comps:
        print "Fewer classes, " + str(n_classes) + ", than components. Setting components to " + str(n_classes-1)
        n_comps = n_classes-1

    # Runs LDA
    #Xnorm = scikit_lda(filtered_matrix, matrix, batch_classes, n_comps)
    Xnorm = outer_python_lda(filtered_matrix, matrix, batch_classes, n_comps)

    return [barcodes, conditions, Xnorm, n_comps]
Exemple #18
0
def load_samples(fname):
	""" Load training sample dataset """

	data = sp.genfromtxt(fname, delimiter='\t')
	x = data[:, 0]
	y = data[:, 1]

	print('Totally %i entries while %i invalid entries.' % (sp.shape(data)[0], sp.sum(sp.isnan(y))))
	x = x[~sp.isnan(y)]
	y = y[~sp.isnan(y)]
	return (x, y)
Exemple #19
0
def get_cleaned_data():
    data = sp.genfromtxt(os.path.join(DATA_DIR, 'web_traffic.tsv'), delimiter='\t')
    x = data[:, 0]
    y = data[:, 1]
    print "Number of invalid entries: {}".format(sp.sum(sp.isnan(y)))
    print "Removing invalid entries."

    x = x[~sp.isnan(y)]
    y = y[~sp.isnan(y)]
    print "Number of invalid entries: {}".format(sp.sum(sp.isnan(y)))
    return x, y
Exemple #20
0
def get_relative_prices(walking_time, smoothed_prices):
    x = walking_time.flatten()
    y = smoothed_prices.flatten()
    mask = sp.isnan(x) | sp.isnan(y)
    
    spline = sp.interpolate.UnivariateSpline(x[~mask], y[~mask], s=len(x))
    v = spline(x)
    
    rel = (y - v).reshape(walking_time.shape)
    
    return rel
 def terminate(self, maxsteps):
     """ Termination criteria """
     if maxsteps is not None:
         if self._num_updates >= maxsteps:
             return True
     if self.loss_target is not None:
         l = self.provider.currentLosses(self.bestParameters)
         if mean(l) <= self.loss_target:
             return True
     if sum(isnan(self.parameters)) + sum(isnan(self.parameters)) > 0:
         print 'Diverged'
         return True
     return False
    def setXY(self):
        global x, y, xa, xb, ya, yb
        x = data[:, 0]
        y = data[:, 1]

        x = x[~sp.isnan(y)]
        y = y[~sp.isnan(y)]

        inflection = 3.5 * 7 * 24
        xa = x[:inflection]
        ya = y[:inflection]
        xb = x[inflection:]
        yb = y[inflection:]
 def test_ransohoff_snapoff_verts(self):
     ws = op.Workspace()
     ws.clear()
     bp = sp.array([[0.25, 0.25, 0.25], [0.25, 0.75, 0.25],
                    [0.75, 0.25, 0.25], [0.75, 0.75, 0.25],
                    [0.25, 0.25, 0.75], [0.25, 0.75, 0.75],
                    [0.75, 0.25, 0.75], [0.75, 0.75, 0.75]])
     scale = 1e-4
     sp.random.seed(1)
     p = (sp.random.random([len(bp), 3])-0.5)/1000
     bp += p
     fiber_rad = 2e-6
     bp = op.topotools.reflect_base_points(bp, domain_size=[1, 1, 1])
     prj = op.materials.VoronoiFibers(fiber_rad=fiber_rad,
                                      resolution=1e-6,
                                      shape=[scale, scale, scale],
                                      points=bp*scale,
                                      name='test')
     net = prj.network
     del_geom = prj.geometries()['test_del']
     vor_geom = prj.geometries()['test_vor']
     f = op.models.physics.capillary_pressure.ransohoff_snap_off
     water = op.phases.GenericPhase(network=net)
     water['pore.surface_tension'] = 0.072
     water['pore.contact_angle'] = 45
     phys1 = op.physics.GenericPhysics(network=net,
                                       geometry=del_geom,
                                       phase=water)
     phys1.add_model(propname='throat.snap_off',
                     model=f,
                     wavelength=fiber_rad)
     phys1.add_model(propname='throat.snap_off_pair',
                     model=f,
                     wavelength=fiber_rad,
                     require_pair=True)
     phys2 = op.physics.GenericPhysics(network=net,
                                       geometry=vor_geom,
                                       phase=water)
     phys2.add_model(propname='throat.snap_off',
                     model=f,
                     wavelength=fiber_rad)
     phys2.add_model(propname='throat.snap_off_pair',
                     model=f,
                     wavelength=fiber_rad,
                     require_pair=True)
     ts = ~net['throat.interconnect']
     assert ~sp.any(sp.isnan(water['throat.snap_off'][ts]))
     assert sp.any(sp.isnan(water['throat.snap_off_pair'][ts]))
     assert sp.any(~sp.isnan(water['throat.snap_off_pair'][ts]))
Exemple #24
0
def load_dataset2(dataset_name):
    try:
        data_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "..", "data")
    except NameError: data_dir = "../data"
    data = sp.genfromtxt(os.path.join(data_dir, "{0}.tsv".format(dataset_name)), delimiter="\t") #schema 'features label'
    print(data[:10]) #print first 10 row for a peek of the data
    
    # all examples will have three classes in this file
    
    x = data[:, 0] # take first column
    y = data[:, -1] #take last column
    print("Number of invalid entries:", sp.sum(sp.isnan(y))) #value being nan in y
    #clean the data, remove rows with nan value 
    x = x[~sp.isnan(y)]
    y = y[~sp.isnan(y)]
Exemple #25
0
 def test_interleave_data_float(self):
     net = OpenPNM.Network.Cubic(shape=[2, 2, 2])
     Ps = net.pores('top')
     geom1 = OpenPNM.Geometry.GenericGeometry(network=net, pores=Ps)
     Ps = net.pores('bottom')
     geom2 = OpenPNM.Geometry.GenericGeometry(network=net, pores=Ps)
     geom1['pore.blah'] = 1.0
     # Ensure flaots are returned geom1
     assert 'float' in geom1['pore.blah'].dtype.name
     # Ensure nans are returned on geom2
     assert sp.all(sp.isnan(geom2['pore.blah']))
     # Ensure interleaved array is float with nans
     assert 'float' in net['pore.blah'].dtype.name
     # Ensure missing values are floats
     assert sp.sum(sp.isnan(net['pore.blah'])) == 4
Exemple #26
0
def main():
    data = sp.genfromtxt("web_traffic.tsv", delimiter="\t")
    plt.xkcd()

    x = data[:, 0]
    y = data[:, 1]

    x = x[~sp.isnan(y)]
    y = y[~sp.isnan(y)]

    fp1, _, _, _, _ = sp.polyfit(x, y, 1, full=True)

    # Here we try 3 degrees of freedom
    fp2, _, _, _, _ = sp.polyfit(x, y, 3, full=True)

    f1 = sp.poly1d(fp1)
    f2 = sp.poly1d(fp2)

    # We have an obvious inflection point between 3rd and 4th week
    inflection_in_hours = int(3.5 * 7 * 24)

    x_before_inflection = x[:inflection_in_hours]

    x_after_inflection = x[inflection_in_hours:]
    y_after_inflection = y[inflection_in_hours:]

    f_after = sp.poly1d(sp.polyfit(x_after_inflection, y_after_inflection, 1))

    fx = sp.linspace(0, x[-1], 1000)
    fx_after = sp.linspace(len(x_before_inflection)+1, x[-1], 1000)

    plt.scatter(x, y, s=5)
    plt.title("Web traffic over the last month.")
    plt.xlabel("Time")
    plt.ylabel("Hits/hour")
    plt.xticks([w * 7 * 24 for w in range(10)],
               ['week {}'.format(w) for w in range(10)])
    plt.autoscale(tight=True)

    plt.plot(fx, f1(fx), linewidth=2)
    plt.plot(fx, f2(fx), linewidth=2)
    plt.plot(fx_after, f_after(fx_after), linewidth=3)
    plt.legend(["d={}".format(f1.order),
                "d={}".format(f2.order),
                "d after inflection"],
               loc="upper left")
    # plt.grid(True, linestyle="-", color='0.75')
    plt.show()
Exemple #27
0
def conduit_lengths(network, throats=None, mode='pore'):
    r"""
    Return the respective lengths of the conduit components defined by the throat
    conns P1 - T - P2

    Notes
    -----
    mode = 'pore' - uses pore coordinates
    mode = 'centroid' uses pore and throat centroids

    """
    if throats is None:
        throats = network.throats()
    Ps = network['throat.conns']
    pdia = network['pore.diameter']

    if mode == 'centroid':
        try:
            pcentroids = network['pore.centroid']
            tcentroids = network['throat.centroid']
            if _sp.sum(_sp.isnan(pcentroids)) + _sp.sum(_sp.isnan(tcentroids)) > 0:
                mode = 'pore'
            else:
                plen1 = _sp.sqrt(_sp.sum(_sp.square(pcentroids[Ps[:, 0]] -
                                         tcentroids), 1))-network['throat.length']/2
                plen2 = _sp.sqrt(_sp.sum(_sp.square(pcentroids[Ps[:, 1]] -
                                         tcentroids), 1))-network['throat.length']/2
        except KeyError:
            mode = 'pore'
    if mode == 'pore':
        # Find half-lengths of each pore
        pcoords = network['pore.coords']
        # Find the pore-to-pore distance, minus the throat length
        lengths = _sp.sqrt(_sp.sum(_sp.square(pcoords[Ps[:, 0]] -
                                   pcoords[Ps[:, 1]]), 1)) - network['throat.length']
        lengths[lengths < 0.0] = 2e-9
        # Calculate the fraction of that distance from the first pore
        try:
            fractions = pdia[Ps[:, 0]]/(pdia[Ps[:, 0]] + pdia[Ps[:, 1]])
            # Don't allow zero lengths
            # fractions[fractions == 0.0] = 0.5
            # fractions[fractions == 1.0] = 0.5
        except:
            fractions = 0.5
        plen1 = lengths*fractions
        plen2 = lengths*(1-fractions)

    return _sp.vstack((plen1, network['throat.length'], plen2)).T[throats]
    def buildAndTestPCAModel(self, noise):
        modelbuilder = statismo.PCAModelBuilder_vtkPD.Create()
 
        model = modelbuilder.BuildNewModel(self.dataManager.GetSampleDataStructure(), noise)
                        
        self.assertTrue(model.GetNumberOfPrincipalComponents() <= len(self.datafiles))        
        
        # we cannot have negative eigenvalues
        self.assertTrue((model.GetPCAVarianceVector() >= 0).all() == True)
        self.assertTrue(isnan(model.GetPCAVarianceVector()).any() == False) 

        # we project a dataset into the model and try to restore it.
  
        samples = self.dataManager.GetSampleDataStructure()
        sample = samples[0].GetSample()
        
        coeffs_sample = model.ComputeCoefficientsForDataset(sample)
        restored_sample = model.DrawSample(coeffs_sample)

        self.assertEqual(sample.GetNumberOfPoints(), restored_sample.GetNumberOfPoints())

        self.checkPointsAlmostEqual(sample.GetPoints(), restored_sample.GetPoints(), 100, noise)

        # check if the scores can be used to restore the data in the datamanager
        scores = model.GetModelInfo().GetScoresMatrix()
        for i in xrange(0, scores.shape[1]):
            sample_from_scores = model.DrawSample(scores[:,i])
            sample_from_dm = samples[i].GetSample()

            self.checkPointsAlmostEqual(sample_from_scores.GetPoints(), sample_from_dm.GetPoints(), 100, noise)
        return model
Exemple #29
0
    def run(self,phase=None):
        r'''
        '''
        logger.warning('This algorithm can take some time...')
        graph = self._net.create_adjacency_matrix(data=self._net['throat.length'],sprsfmt='csr')

        if phase is not None:
            self._phase = phase
            if 'throat.occupancy' in self._phase.props():
                temp = self._net['throat.length']*(self._phase['throat.occupancy']==1)
                graph = self._net.create_adjacency_matrix(data=temp,sprsfmt='csr',prop='temp')

        #self._net.tic()
        path = spgr.shortest_path(csgraph = graph, method='D', directed = False)
        #self._net.toc()

        Px = sp.array(self._net['pore.coords'][:,0],ndmin=2)
        Py = sp.array(self._net['pore.coords'][:,1],ndmin=2)
        Pz = sp.array(self._net['pore.coords'][:,2],ndmin=2)

        Cx = sp.square(Px.T - Px)
        Cy = sp.square(Py.T - Py)
        Cz = sp.square(Pz.T - Pz)
        Ds = sp.sqrt(Cx + Cy + Cz)

        temp = path/Ds
        #temp = path

        temp[sp.isnan(temp)] = 0
        temp[sp.isinf(temp)] = 0

        return temp
Exemple #30
0
def nmse(yhat, y, min_y, max_y):
    """
    @description
        Calculates the normalized mean-squared error. 

    @arguments
        yhat -- 1d array or list of floats -- estimated values of y
        y -- 1d array or list of floats -- true values
        min_y, max_y -- float, float -- roughly the min and max; they
          do not have to be the perfect values of min and max, because
          they're just here to scale the output into a roughly [0,1] range

    @return
        nmse -- float -- normalized mean-squared error
    """
    #base case: no entries
    if len(yhat) == 0:
        return 0.0

    #base case: both yhat and y are constant, and same values
    if (max_y == min_y) and (max(yhat) == min(yhat) == max(y) == min(y)):
        return 0.0

    #main case
    assert max_y > min_y, 'max_y=%g was not > min_y=%g' % (max_y, min_y)
    yhat_a, y_a = numpy.asarray(yhat), numpy.asarray(y)
    y_range = float(max_y - min_y)
    try:
        result = math.sqrt(numpy.mean(((yhat_a - y_a) / y_range) ** 2))
        if scipy.isnan(result):
            return INF
        return result
    except:
        return INF
Exemple #31
0
def ycorrect(data):
	"""
	ycorrect(data)

	Inputs:
	  data - a flatfield image of the mask

	Outputs:
	  true_coeffs - A polynomial describing the transformation:
	                   y_straight = f(x_ccd,y_ccd)
	  map_coeffs  - A polynomial describing the transformation:
	                   y_ccd = f(x_cdd,y_straight)
	"""

	# Parameters
	SUMWIDTH = 41	# Width of summing over columns

	y_axis = data.shape[0]
	x_axis = data.shape[1]


	central = x_axis/2

	x_min_orig = central - SUMWIDTH/2
	x_max_orig = central + SUMWIDTH/2

	# Find the 'holes' in the center of the mask to use as the reference
	#   position.
	midcol = data[:,x_min_orig:x_max_orig].mean(axis=1)
	central_edges,threshold,star_cutoff = find_holes(midcol)

	# transform_table would be easier to use as a list....
	transform_table = scipy.zeros((1,3),'f4')
	index = 0
	for peak in central_edges:
		if index:
			transform_table.resize((index+1,3))
		transform_table[index,0] = central
		transform_table[index,1] = peak
		transform_table[index,2] = peak

		index += 1

	offset = scipy.zeros(len(central_edges))

	x_min = x_min_orig
	x_max = x_max_orig
	current_column = central

	while current_column>SUMWIDTH + 20:
		current_column = current_column - SUMWIDTH - 10
		x_min = x_min - SUMWIDTH - 10
		x_max = x_max - SUMWIDTH - 10

		comp_array = data[:,x_min:x_max].mean(axis=1)
		comp_array.clip(min=-1000.,max=star_cutoff)
		derivative = deriv_1d(comp_array)
		derivative = ndimage.gaussian_filter1d(derivative,3)
		derivative = abs(derivative)

		for i in range(offset.size):
			if scipy.isnan(offset[i]):
				continue
			ref = central_edges[i] + offset[i]

			start = int(ref) - 6
			end = start + 13

			if derivative[start:end].max()<threshold:
				offset[i] = scipy.nan
				continue

			fit = find_peak(derivative[start:end])

			# If the fit has crazy parameters, skip it
			if(fit[2]<0 or fit[2]>13 or fit[3]<1 or fit[3]>6):
				offset[i] = scipy.nan
				continue

			peak = fit[2]+float(start)			
			offset[i] = peak - central_edges[i]

			transform_table.resize((index+1,3))

			transform_table[index,0] = current_column
			transform_table[index,1] = central_edges[i]
			transform_table[index,2] = peak

			index += 1

	offset = scipy.zeros(offset.size)

	x_min = x_min_orig
	x_max = x_max_orig
	current_column = central
	while current_column<x_axis - SUMWIDTH - 19:
		current_column = current_column + SUMWIDTH + 10
		x_min = x_min + SUMWIDTH + 10
		x_max = x_max + SUMWIDTH + 10

		comp_array = data[:,x_min:x_max].mean(axis=1)
		comp_array.clip(min=-1000.,max=star_cutoff)
		derivative = deriv_1d(comp_array)
		derivative = ndimage.gaussian_filter1d(derivative,3)
		derivative = abs(derivative)

		for i in range(offset.size):
			if scipy.isnan(offset[i]):
				continue
			ref = central_edges[i] + offset[i]
 
			start = int(round(ref)) - 6
			end = start + 13
 
			if derivative[start:end].max()<threshold:
				offset[i] = scipy.nan
				continue
 
			fit = find_peak(derivative[start:end])

			if(fit[2]<0 or fit[2]>13 or fit[3]<1 or fit[3]>6):
				offset[i] = scipy.nan
				continue
 
			peak = fit[2]+float(start)
			offset[i] = peak - central_edges[i]

			transform_table.resize((index+1,3))

			transform_table[index,0] = current_column
			transform_table[index,1] = central_edges[i]
			transform_table[index,2] = peak

			index += 1

	true_coeffs = special_functions.lsqfit(transform_table,"chebyshev",4,4)

	temp = transform_table[:,1].copy()
	transform_table[:,1] = transform_table[:,2].copy()
	transform_table[:,2] = temp.copy()

	map_coeffs = special_functions.lsqfit(transform_table,"chebyshev",4,4)

	return true_coeffs,map_coeffs
Exemple #32
0
def KLSampling(ratio, data):
    """
    Computes KL(q||p) by using samples of q.

    Some very elementary checks are made to make sure the numbers
    returned make sense. The estimator used has the same properties
    (and issues) as when one is doing importance sampling, i.e.,
    problems with estimating the ratio of partition functions.

    Arguments::
     ratio: The ratio of the normalized (or un-normalized) pdfs or pmfs
     of the form p/q.

    data: Data to compute the estimator on.

    Returns::
     est: estimate of the KL divergence.

    Examples::
    The case of equal distributions.
    >>> from scipy import exp
    >>> rat = lambda x: exp(-(x)**2/2)/exp(-(x)**2/2)
    >>> data = scipy.randn(1000) # sample from normal with mean 1
    >>> est = KLSampling(rat, data)
    >>> est<1e-7
    True

    Different means, same deviations.
    >>> mu = 0.4
    >>> rat = lambda x: exp(-(x-mu)**2/2)/exp(-(x-1.0)**2/2)
    >>> data = scipy.randn(100000)+1
    >>> est = KLSampling(rat, data)
    >>> abs(est-KLNormal(mu,1.0,1,1.0))<1
    True


    Testing if mapping works well for distributions of more than one
    parameter.
    >>> rat = lambda x: exp(-(x[0]+x[1])/2**2)
    >>> data = scipy.randn(100,2)
    >>> KLSampling(rat,data)<0.6
    True
    """

    dim = len(data.shape)
    est_a = 0.0
    est_b = 0.0

    if dim == 1:
        n = len(data)
        est_a = sum([log(1.0 / ratio(x)) for x in data])
        est_b = sum([ratio(x) for x in data])
        # for i in xrange(n):
        #     val = ratio(data[i])
        #     est_a = est_a+log(1.0/val)
        #     est_b = est_b+val

        if scipy.isnan(est_a):
            raise ValueError("est_a is nan")
        if scipy.isnan(est_b):
            raise ValueError("est_b is nan")

        est = est_a / n + log(est_b / n)  # total estimate

    else:
        # nxm format assumed, where every row accounts for data
        # every column for variables
        n = scipy.size(data, 0)
        for i in xrange(n):
            val = ratio(data[i, :])
            est_a = est_a + log(1.0 / val)
            est_b = est_b + val

        if scipy.isnan(est_a):
            raise ValueError("est_a is nan")
        if scipy.isnan(est_b):
            raise ValueError("est_b is nan")

        est = est_a / n + log(est_b / n)  # total estimate

    if est < 0 or est_b < 0:
        raise ValueError("Insufficient data to converge.")

    return est
import sys

import scipy as sp

data = sp.genfromtxt("E:\python\data\ch01\data\web_traffic.tsv",
                     delimiter="\t")

print(data[:10])

x = data[:, 0]

y = data[:, 1]

sp.sum(sp.isnan(y))

x = x[~sp.isnan(y)]

y = y[~sp.isnan(y)]

import matplotlib.pyplot as plt

plt.scatter(x, y)

plt.title("Web traffic over the last month")

plt.xlabel("Time")

plt.ylabel("Hits/hour")

plt.xticks([w * 7 * 24 for w in range(10)], ['week %i' % w for w in range(10)])
lst = os.listdir('/big_disk/ajoshi/HCP5')
rho1 = 0
rho1rot = 0
rho2 = 0
rho2rot = 0
# lst = [lst[0]]
diffbefore = 0
diffafter = 0

sub = lst[0]

vrest1 = scipy.io.loadmat('/big_disk/ajoshi/coding_ground/epilepsy/\
NorthShoreLIJ/0019002/fmri_tnlm_5_reduce3_v2.mat')  # h5py.File(fname1);
data = vrest1['func_right']
indx = sp.isnan(data)
data[indx] = 0

vrest = data
m = np.mean(vrest, 1)
vrest = vrest - m[:, None]
s = np.std(vrest, 1) + 1e-116
vrest1 = vrest / s[:, None]

rho1 = 0
rho1rot = 0
diffafter = 0
diffbefore = 0

lst = glob.glob('/big_disk/ajoshi/fcon_1000/Beijing/sub*')
nsub = 0
Exemple #35
0
    def initZ(self,
              pmean,
              pvar,
              qmean,
              qvar,
              qE=None,
              qE2=None,
              covariates=None,
              scale_covariates=None):
        """Method to initialise the latent variables

        PARAMETERS
        ----------
        pmean:
        pvar:
        qmean
        qvar
        qE
        qE2
        covariates: nd array
            matrix of covariates with dimensions (nsamples,ncovariates)
        scale_covariates: 
        """

        # Initialise mean of the Q distribution
        if qmean is not None:
            if isinstance(qmean, str):
                if qmean == "random":  # Random initialisation of latent variables
                    qmean = stats.norm.rvs(loc=0,
                                           scale=1,
                                           size=(self.N, self.K))

                elif qmean == "orthogonal":  # Latent variables are initialised randomly but ensuring orthogonality
                    pca = sklearn.decomposition.PCA(n_components=self.K,
                                                    copy=True,
                                                    whiten=True)
                    pca.fit(
                        stats.norm.rvs(loc=0, scale=1, size=(self.N, 9999)).T)
                    qmean = pca.components_.T

                elif qmean == "pca":  # Latent variables are initialised from PCA in the concatenated matrix
                    pca = sklearn.decomposition.PCA(n_components=self.K,
                                                    copy=True,
                                                    whiten=True)
                    pca.fit(s.concatenate(self.data, axis=0).T)
                    qmean = pca.components_.T

            elif isinstance(qmean, s.ndarray):
                assert qmean.shape == (self.N, self.K)

            elif isinstance(qmean, (int, float)):
                qmean = s.ones((self.N, self.K)) * qmean

            else:
                print("Wrong initialisation for Z")
                exit()

        # Add covariates
        if covariates is not None:
            assert scale_covariates != None, "If you use covariates also define data_opts['scale_covariates']"

            # Select indices for covaraites
            idx_covariates = s.array(range(covariates.shape[1]))

            # Center and scale the covariates to match the prior distribution N(0,1)
            # to-do: this needs to be improved to take the particular mean and var into account
            # covariates[scale_covariates] = (covariates - covariates.mean(axis=0)) / covariates.std(axis=0)
            scale_covariates = s.array(scale_covariates)
            covariates[:, scale_covariates] = (
                covariates[:, scale_covariates] -
                s.nanmean(covariates[:, scale_covariates], axis=0)) / s.nanstd(
                    covariates[:, scale_covariates], axis=0)

            # Set to zero the missing values in the covariates
            covariates[s.isnan(covariates)] = 0.
            qmean[:, idx_covariates] = covariates
        else:
            idx_covariates = None

        # Initialise the node
        # self.Z = Constant_Node(dim=(self.N,self.K), value=qmean)
        self.Z = Z_Node(dim=(self.N, self.K),
                        pmean=s.ones((self.N, self.K)) * pmean,
                        pvar=s.ones((self.K, )) * pvar,
                        qmean=s.ones((self.N, self.K)) * qmean,
                        qvar=s.ones((self.N, self.K)) * qvar,
                        qE=qE,
                        qE2=qE2,
                        idx_covariates=idx_covariates)
        self.nodes["Z"] = self.Z
Exemple #36
0
def _intersections(x1, y1, x2, y2):
    """X0,Y0 = intersections(X1,Y1,X2,Y2)
    INTERSECTIONS Intersections of curves.
      Computes the (x,y) locations where two curves intersect.  The curves
      can be broken with NaNs or have vertical segments.
    
    Example:
      [X0,Y0] = intersections(X1,Y1,X2,Y2);
    
    where X1 and Y1 are equal-length vectors of at least two points and
    represent curve 1.  Similarly, X2 and Y2 represent curve 2.
    X0 and Y0 are column vectors containing the points at which the two
    curves intersect.

    The algorithm can return two additional vectors that indicate which
    segment pairs contain intersections and where they are:

      [X0,Y0,I,J] = intersections(X1,Y1,X2,Y2);
    
    For each element of the vector I, I(k) = (segment number of (X1,Y1)) +
    (how far along this segment the intersection is).  For example, if I(k) =
    45.25 then the intersection lies a quarter of the way between the line
    segment connecting (X1(45),Y1(45)) and (X1(46),Y1(46)).  Similarly for
    the vector J and the segments in (X2,Y2).

    Version: 1.10, 25 February 2008
    Converted to Python October 2010 by Jeffrey Bush [email protected]
    Author:  Douglas M. Schwarz
    Email:   dmschwarz=ieee*org, dmschwarz=urgrad*rochester*edu
    Real_email = regexprep(Email,{'=','*'},{'@','.'})

    Theory of operation:
      Given two line segments, L1 and L2,
    
      L1 endpoints:  (x1(1),y1(1)) and (x1(2),y1(2))
      L2 endpoints:  (x2(1),y2(1)) and (x2(2),y2(2))
    
    we can write four equations with four unknowns and then solve them.  The
    four unknowns are t1, t2, x0 and y0, where (x0,y0) is the intersection of
    L1 and L2, t1 is the distance from the starting point of L1 to the
    intersection relative to the length of L1 and t2 is the distance from the
    starting point of L2 to the intersection relative to the length of L2.
    
    So, the four equations are
    
       (x1(2) - x1(1))*t1 = x0 - x1(1)
       (x2(2) - x2(1))*t2 = x0 - x2(1)
       (y1(2) - y1(1))*t1 = y0 - y1(1)
       (y2(2) - y2(1))*t2 = y0 - y2(1)
    
    Rearranging and writing in matrix form,
    
      [x1(2)-x1(1)       0       -1   0;      [t1;      [-x1(1);
            0       x2(2)-x2(1)  -1   0;   *   t2;   =   -x2(1);
       y1(2)-y1(1)       0        0  -1;       x0;       -y1(1);
            0       y2(2)-y2(1)   0  -1]       y0]       -y2(1)]
    
    Let's call that A*T = B.  We can solve for T with T = A\B.
    
    Once we have our solution we just have to look at t1 and t2 to determine
    whether L1 and L2 intersect.  If 0 <= t1 < 1 and 0 <= t2 < 1 then the two
    line segments cross and we can include (x0,y0) in the output.
    
    In principle, we have to perform this computation on every pair of line
    segments in the input data.  This can be quite a large number of pairs so
    we will reduce it by doing a simple preliminary check to eliminate line
    segment pairs that could not possibly cross.  The check is to look at the
    smallest enclosing rectangles (with sides parallel to the axes) for each
    line segment pair and see if they overlap.  If they do then we have to
    compute t1 and t2 (via the A\B computation) to see if the line segments
    cross, but if they don't then the line segments cannot cross.  In a
    typical application, this technique will eliminate most of the potential
    line segment pairs.
    """

    # x1 and y1 must be vectors with same number of points (at least 2).
    if sp.sum(sp.size(x1) > 1) != 1 or sp.sum(
            sp.size(y1) > 1) != 1 or len(x1) != len(y1):
        raise ValueError(
            'X1 and Y1 must be equal-length vectors of at least 2 points.')
    # x2 and y2 must be vectors with same number of points (at least 2).
    if sp.sum(sp.size(x2) > 1) != 1 or sp.sum(
            sp.size(y2) > 1) != 1 or len(x2) != len(y2):
        raise ValueError(
            'X2 and Y2 must be equal-length vectors of at least 2 points.')

    # Compute number of line segments in each curve and some differences we'll
    # need later.
    n1 = len(x1) - 1
    n2 = len(x2) - 1
    xy1 = sp.column_stack((x1, y1))
    xy2 = sp.column_stack((x2, y2))
    dxy1 = sp.diff(xy1, axis=0)
    dxy2 = sp.diff(xy2, axis=0)

    # Determine the combinations of i and j where the rectangle enclosing the
    # i'th line segment of curve 1 overlaps with the rectangle enclosing the
    # j'th line segment of curve 2.
    i, j = sp.nonzero(
        sp.logical_and(
            sp.logical_and(
                sp.logical_and(
                    sp.tile(sp.minimum(x1[0:-1], x1[1:]),
                            (n2, 1)).T <= sp.tile(sp.maximum(x2[0:-1], x2[1:]),
                                                  (n1, 1)),
                    sp.tile(sp.maximum(x1[0:-1], x1[1:]),
                            (n2, 1)).T >= sp.tile(sp.minimum(x2[0:-1], x2[1:]),
                                                  (n1, 1))),
                sp.tile(sp.minimum(y1[0:-1], y1[1:]),
                        (n2, 1)).T <= sp.tile(sp.maximum(y2[0:-1], y2[1:]),
                                              (n1, 1))),
            sp.tile(sp.maximum(y1[0:-1], y1[1:]),
                    (n2, 1)).T >= sp.tile(sp.minimum(y2[0:-1], y2[1:]),
                                          (n1, 1))))
    i = sp.copy(i)  # make the arrays writable
    j = sp.copy(j)

    # Find segments pairs which have at least one vertex = NaN and remove them.
    # This line is a fast way of finding such segment pairs.  We take
    # advantage of the fact that NaNs propagate through calculations, in
    # particular subtraction (in the calculation of dxy1 and dxy2, which we
    # need anyway) and addition.
    remove = sp.isnan(sp.sum(dxy1[i, :] + dxy2[j, :], axis=1))
    i[remove] = []
    j[remove] = []

    # Initialize matrices.  We'll put the T's and B's in matrices and use them
    # one column at a time.  AA is a 3-D extension of A where we'll use one
    # plane at a time.
    n = len(i)
    T = sp.zeros((4, n))
    AA = sp.zeros((4, 4, n))
    AA[[0, 1], 2, :] = -1
    AA[[2, 3], 3, :] = -1
    AA[[0, 2], 0, :] = dxy1[i, :].T
    AA[[1, 3], 1, :] = dxy2[j, :].T
    B = -sp.array([x1[i], x2[j], y1[i], y2[j]])

    # Loop through possibilities.  Trap singularity warning and then use
    # lastwarn to see if that plane of AA is near singular.  Process any such
    # segment pairs to determine if they are colinear (overlap) or merely
    # parallel.  That test consists of checking to see if one of the endpoints
    # of the curve 2 segment lies on the curve 1 segment.  This is done by
    # checking the cross product
    #
    #   (x1(2),y1(2)) - (x1(1),y1(1)) x (x2(2),y2(2)) - (x1(1),y1(1)).
    #
    # If this is close to zero then the segments overlap.
    for k in sp.arange(n):
        L, U = lin.lu(AA[:, :, k], True)
        T[:, k] = lin.solve(U, lin.solve(L, B[:, k]))

    # Find where t1 and t2 are between 0 and 1 and return the corresponding
    # x0 and y0 values.
    in_range = sp.logical_and(
        sp.logical_and(sp.logical_and(T[0, :] >= 0, T[1, :] >= 0),
                       T[0, :] < 1), T[1, :] < 1)
    x0 = T[2, in_range].T
    y0 = T[3, in_range].T

    return x0, y0

data = sp.genfromtxt(
    os.path.expanduser(
        "~/Python_src/src/MachineLearningSystem/exdata/ch01/data/web_traffic.tsv"
    ))

print(data.shape)

print(data[:10])
x = data[:, 0]
y = data[:, 1]

#NAN値の処理

x = x[~sp.isnan(y)]
y = y[~sp.isnan(y)]
plt.scatter(x, y)

#fp1=切片と変数,residuals=残差 , full指定で残差等を取得可能, 1は次数を示す
fp1, residuals, rank, s, rcond = sp.polyfit(x, y, 1, full=True)
f1 = sp.poly1d(fp1)  #モデルパラメータからモデル関数の作成f1(x)= ~
print(error(f1, x, y))

print("Model parameters: %s", fp1)

#多項式のあてはめ
f2p = sp.polyfit(x, y, 2)
print(f2p)

#Pyplotの設定
Exemple #38
0
def extract(data, varimg, width=WIDTH, nsig=NSIG, noise=NOISE):
    WIDTH = width
    NSIG = nsig
    NOISE = noise

    data = data.copy()
    spectra = []

    # Replace nan with zero
    data[scipy.isnan(data)] = 0.
    varimg[scipy.isnan(varimg)] = 0.

    # Create model of real flux. We ignore the slit ends, which may have
    #  artifacts from the resampling.
    slit = data[:, 8:-8].astype(scipy.float32)
    var = varimg[:, 8:-8]

    # OK...so negative-variance also isn't good; set these pixels to zero
    var[var < 0] = 0

    # Create noise models
    sigmaimg = slit / scipy.sqrt(var)
    highpix = scipy.where(sigmaimg > 1.5, sigmaimg, 0.)
    source_columns = highpix.sum(axis=0)

    # MASKING DISABLED (this would take only columns with lotsa flux...)
    #	mask = scipy.where(source_columns>4.,1.,scipy.nan)
    mask = source_columns * 0.

    # Condition 1, dealing with bad pixels
    if (var == 0).any():
        cond = var == 0
        var[cond] = scipy.nan
        slit[cond] = scipy.nan
        mask = scipy.where(cond, 0, 1)
        flux = scipy.nansum(slit / var, axis=1) / scipy.nansum(1. / var,
                                                               axis=1)
        noise = scipy.sqrt(scipy.nansum(var, axis=1)) / mask.sum(axis=1)
    # Condition 2, no masking
    elif scipy.nansum(mask) == 0:
        flux = (slit / var).sum(axis=1) / (1. / var).sum(axis=1)
        noise = scipy.sqrt(var.sum(axis=1)) / mask.size
    # Condition 3, masking
    else:
        fluxmodel = slit * mask
        noisemodel = var * mask

        noise = scipy.sqrt(scipy.nansum(noisemodel,
                                        axis=1)) / scipy.nansum(mask)
        flux = stats.stats.nanmean(fluxmodel, axis=1)

    # A smooth S/N estimate for the slit
#	sig2noise = ndimage.gaussian_filter1d(flux,1)/noise

    row = scipy.arange(flux.size)
    model = flux.copy()
    nspec = 10  # Maximum number of attempts
    while nspec:
        nspec -= 1

        # Fit a gaussian around the peak of the S/N model
        start = model.argmax() - WIDTH
        end = model.argmax() + WIDTH + 1
        if start < 0:
            start = 0.
        if end > model.size:
            end = model.size

        fitarr = model[start:end]
        p = scipy.zeros(4)
        p[1] = fitarr.max()
        p[2] = fitarr.argmax()
        p[3] = 2.

        fit, val = special_functions.ngaussfit(fitarr, p)
        chi2 = val / (fitarr.size - 3)
        fit[2] += start

        # If the centroid doesn't lie on the slit, get use the edge pix
        midcol = fit[2].round()
        if midcol >= flux.size:
            midcol = flux.size - 1
        elif midcol < 0:
            midcol = 0
        # Require a reasonable S/N and width
        if fit[3] > fitarr.size / 2. or fit[3] < 0.85:
            break
        elif fit[0] > 0 and fit[1] < NOISE * noise[midcol]:
            break
        elif fit[0] < 0 and fit[1] - fit[0] < NOISE * noise[midcol]:
            break
        else:
            fit[1] += fit[0]
            fit[0] = 0.
            # Subtract away a model of the source
            source = special_functions.ngauss(row, fit)
            model -= scipy.where(source > noise, source, 0.)

            # Skip residuals!
            if fit[2] < flux.size and fit[1] < scipy.sqrt(flux[fit[2]]):
                continue
            fit[1] = 1.
            weight = special_functions.ngauss(row, fit)
            cond = (row > fit[2] - fit[3] * NSIG) & (row <
                                                     fit[2] + fit[3] * NSIG)
            weight = scipy.where(cond, weight, 0)
            weight /= weight.sum()
            spec = weight * data.T
            spec = spec.sum(axis=1)
            varspec = weight * varimg.T
            varspec = varspec.sum(axis=1)
            spec[varspec == 0] = 0.
            smooth = signal.wiener(spec, FILTSIZE, varspec)
            smooth[scipy.isnan(smooth)] = 0.
            spectra.append([fit, spec, smooth, varspec])
    return spectra
def bulk_diffusion(physics,
                   phase,
                   network,
                   pore_molar_density='pore.molar_density',
                   pore_diffusivity='pore.diffusivity',
                   pore_area='pore.area',
                   pore_diameter='pore.diameter',
                   throat_area='throat.area',
                   throat_length='throat.length',
                   throat_diameter='throat.diameter',
                   calc_pore_len=True,
                   **kwargs):
    r"""
    Calculate the diffusive conductance of conduits in network, where a 
    conduit is ( 1/2 pore - full throat - 1/2 pore ) based on the areas

    Parameters
    ----------
    network : OpenPNM Network Object

    phase : OpenPNM Phase Object
        The phase of interest

    Notes
    -----
    (1) This function requires that all the necessary phase properties already 
    be calculated.
    
    (2) This function calculates the specified property for the *entire* 
    network then extracts the values for the appropriate throats at the end.
    
    """
    #Get Nt-by-2 list of pores connected to each throat
    Ps = network['throat.conns']
    #Get properties in every pore in the network
    parea = network[pore_area]
    pdia = network[pore_diameter]
    #Get the properties of every throat
    tarea = network[throat_area]
    tlen = network[throat_length]
    #Interpolate pore phase property values to throats
    cp = phase[pore_molar_density]
    ct = phase.interpolate_data(data=cp)
    DABp = phase[pore_diffusivity]
    DABt = phase.interpolate_data(data=DABp)
    if calc_pore_len:
        lengths = misc.conduit_lengths(network, mode='centroid')
        plen1 = lengths[:, 0]
        plen2 = lengths[:, 2]
    else:
        plen1 = (0.5 * pdia[Ps[:, 0]])
        plen2 = (0.5 * pdia[Ps[:, 1]])
    #remove any non-positive lengths
    plen1[plen1 <= 0] = 1e-12
    plen2[plen2 <= 0] = 1e-12
    #Find g for half of pore 1
    gp1 = ct * DABt * parea[Ps[:, 0]] / plen1
    gp1[_sp.isnan(gp1)] = _sp.inf
    gp1[~(gp1 > 0)] = _sp.inf  # Set 0 conductance pores (boundaries) to inf
    #Find g for half of pore 2
    gp2 = ct * DABt * parea[Ps[:, 1]] / plen2
    gp2[_sp.isnan(gp2)] = _sp.inf
    gp2[~(gp2 > 0)] = _sp.inf  # Set 0 conductance pores (boundaries) to inf
    #Find g for full throat
    #remove any non-positive lengths
    tlen[tlen <= 0] = 1e-12
    gt = ct * DABt * tarea / tlen
    value = (1 / gt + 1 / gp1 + 1 / gp2)**(-1)
    value = value[phase.throats(physics.name)]
    return value
Exemple #40
0
def bedTopo(east_grd_path, slope_grd_path, thickness_txt_path):

    import math
    import matplotlib
    import matplotlib.pyplot
    import os
    import scipy
    from scipy.io import netcdf
    from scipy.sparse import lil_matrix
    import subprocess

    assert os.path.exists(
        east_grd_path), "\n***** ERROR: " + east_grd_path + " does not exist\n"
    assert os.path.exists(
        slope_grd_path
    ), "\n***** ERROR: " + slope_grd_path + " does not exist\n"
    assert os.path.exists(
        thickness_txt_path
    ), "\n***** ERROR: " + thickness_txt_path + " does not exist\n"

    north_grd_path = east_grd_path.replace("east", "north")
    angles_grd_path = east_grd_path.replace("eastxyz", "angles")
    mag_grd_path = east_grd_path.replace("eastxyz", "mag")

    if not os.path.exists(angles_grd_path):
        cmd = "\ngrdmath " + north_grd_path + " " + east_grd_path + " ATAN2 --IO_NC4_CHUNK_SIZE=c = " + angles_grd_path + "\n"
        subprocess.call(cmd, shell=True)

    cmd = "\ngrdclip " + angles_grd_path + " -Sa0.7853981633974483/NaN -Sb0/NaN -Gone.grd\n"
    cmd += "\ngrdclip " + angles_grd_path + " -Sa1.5707963267948966/NaN -Sb0.7853981633974483/NaN -Gtwo.grd\n"
    cmd += "\ngrdclip " + angles_grd_path + " -Sa2.356194490192345/NaN -Sb1.5707963267948966/NaN -Gthree.grd\n"
    cmd += "\ngrdclip " + angles_grd_path + " -Sa3.141592653589793/NaN -Sb2.356194490192345/NaN -Gfour.grd\n"
    cmd += "\ngrdclip " + angles_grd_path + " -Sa-2.356194490192345/NaN -Gfive.grd\n"
    cmd += "\ngrdclip " + angles_grd_path + " -Sa-1.5707963267948966/NaN -Sb-2.356194490192345/NaN -Gsix.grd\n"
    cmd += "\ngrdclip " + angles_grd_path + " -Sa-0.7853981633974483/NaN -Sb-1.5707963267948966/NaN -Gseven.grd\n"
    cmd += "\ngrdclip " + angles_grd_path + " -Sa0/NaN -Sb-0.7853981633974483/NaN -Geight.grd\n"

    cmd += "\ngrdmath one.grd two.grd AND = u.grd\n"
    cmd += "\ngrdmath three.grd u.grd AND = u.grd\n"
    cmd += "\ngrdmath four.grd u.grd AND = u.grd\n"

    cmd += "\ngrdmath five.grd six.grd AND = d.grd\n"
    cmd += "\ngrdmath seven.grd d.grd AND = d.grd\n"
    cmd += "\ngrdmath eight.grd d.grd AND = d.grd\n"

    cmd += "\ngrdmath three.grd four.grd AND = l.grd\n"
    cmd += "\ngrdmath five.grd l.grd AND = l.grd\n"
    cmd += "\ngrdmath six.grd l.grd AND = l.grd\n"

    cmd += "\ngrdmath one.grd two.grd AND = r.grd\n"
    cmd += "\ngrdmath eight.grd r.grd AND = r.grd\n"
    cmd += "\ngrdmath seven.grd r.grd AND = r.grd\n"

    cmd += "\ngrdmath two.grd three.grd AND = ul.grd\n"
    cmd += "\ngrdmath four.grd ul.grd AND = ul.grd\n"
    cmd += "\ngrdmath five.grd ul.grd AND = ul.grd\n"

    cmd += "\ngrdmath one.grd two.grd AND = ur.grd\n"
    cmd += "\ngrdmath three.grd ur.grd AND = ur.grd\n"
    cmd += "\ngrdmath eight.grd ur.grd AND = ur.grd\n"

    cmd += "\ngrdmath four.grd five.grd AND = dl.grd\n"
    cmd += "\ngrdmath six.grd dl.grd AND = dl.grd\n"
    cmd += "\ngrdmath seven.grd dl.grd AND = dl.grd\n"

    cmd += "\ngrdmath one.grd six.grd AND = dr.grd\n"
    cmd += "\ngrdmath seven.grd dr.grd AND = dr.grd\n"
    cmd += "\ngrdmath eight.grd dr.grd AND = dr.grd\n"

    cmd += "\ngrdmath 1.5707963267948966 u.grd SUB = u.grd\n"
    cmd += "\ngrdmath u.grd ABS = u.grd\n"
    cmd += "\ngrdmath u.grd COS = u.grd\n"
    cmd += "\ngrdmath u.grd " + mag_grd_path + " MUL --IO_NC4_CHUNK_SIZE=c = u.grd"

    cmd += "\ngrdmath -1.5707963267948966 d.grd SUB = d.grd\n"
    cmd += "\ngrdmath d.grd ABS = d.grd\n"
    cmd += "\ngrdmath d.grd COS = d.grd\n"
    cmd += "\ngrdmath d.grd " + mag_grd_path + " MUL --IO_NC4_CHUNK_SIZE=c = d.grd"

    cmd += "\ngrdmath 3.141592653589793 l.grd SUB = l.grd\n"
    cmd += "\ngrdmath l.grd ABS = l.grd\n"
    cmd += "\ngrdmath l.grd COS = l.grd\n"
    cmd += "\ngrdmath l.grd " + mag_grd_path + " MUL --IO_NC4_CHUNK_SIZE=c = l.grd"

    cmd += "\ngrdmath 0 r.grd SUB = r.grd\n"
    cmd += "\ngrdmath r.grd ABS = r.grd\n"
    cmd += "\ngrdmath r.grd COS = r.grd\n"
    cmd += "\ngrdmath r.grd " + mag_grd_path + " MUL --IO_NC4_CHUNK_SIZE=c = r.grd"

    cmd += "\ngrdmath 2.356194490192345 ul.grd SUB = ul.grd\n"
    cmd += "\ngrdmath ul.grd ABS = ul.grd\n"
    cmd += "\ngrdmath ul.grd COS = ul.grd\n"
    cmd += "\ngrdmath ul.grd " + mag_grd_path + " MUL --IO_NC4_CHUNK_SIZE=c = ul.grd"

    cmd += "\ngrdmath 0.7853981633974483 ur.grd SUB = ur.grd\n"
    cmd += "\ngrdmath ur.grd ABS = ur.grd\n"
    cmd += "\ngrdmath ur.grd COS = ur.grd\n"
    cmd += "\ngrdmath ur.grd " + mag_grd_path + " MUL --IO_NC4_CHUNK_SIZE=c = ur.grd"

    cmd += "\ngrdmath -2.356194490192345 dl.grd SUB = dl.grd\n"
    cmd += "\ngrdmath dl.grd ABS = dl.grd\n"
    cmd += "\ngrdmath dl.grd COS = dl.grd\n"
    cmd += "\ngrdmath dl.grd " + mag_grd_path + " MUL --IO_NC4_CHUNK_SIZE=c = dl.grd"

    cmd += "\ngrdmath -0.7853981633974483 dr.grd SUB = dr.grd\n"
    cmd += "\ngrdmath dr.grd ABS = dr.grd\n"
    cmd += "\ngrdmath dr.grd COS = dr.grd\n"
    cmd += "\ngrdmath dr.grd " + mag_grd_path + " MUL --IO_NC4_CHUNK_SIZE=c = dr.grd"

    subprocess.call(cmd, shell=True)

    f = netcdf.netcdf_file("u.grd", "r", False)
    x = f.variables["x"].data
    y = f.variables["y"].data
    u = f.variables["z"].data[:]
    f.close()

    f = netcdf.netcdf_file("d.grd", "r", False)
    x = f.variables["x"].data
    y = f.variables["y"].data
    d = f.variables["z"].data[:]
    f.close()

    f = netcdf.netcdf_file("l.grd", "r", False)
    x = f.variables["x"].data
    y = f.variables["y"].data
    l = f.variables["z"].data[:]
    f.close()

    f = netcdf.netcdf_file("r.grd", "r", False)
    x = f.variables["x"].data
    y = f.variables["y"].data
    r = f.variables["z"].data[:]
    f.close()

    f = netcdf.netcdf_file("ul.grd", "r", False)
    x = f.variables["x"].data
    y = f.variables["y"].data
    ul = f.variables["z"].data[:]
    f.close()

    f = netcdf.netcdf_file("ur.grd", "r", False)
    x = f.variables["x"].data
    y = f.variables["y"].data
    ur = f.variables["z"].data[:]
    f.close()

    f = netcdf.netcdf_file("dl.grd", "r", False)
    x = f.variables["x"].data
    y = f.variables["y"].data
    dl = f.variables["z"].data[:]
    f.close()

    f = netcdf.netcdf_file("dr.grd", "r", False)
    x = f.variables["x"].data
    y = f.variables["y"].data
    dr = f.variables["z"].data[:]
    f.close()

    f = netcdf.netcdf_file(mag_grd_path, "r", False)
    x = f.variables["x"].data
    y = f.variables["y"].data
    speeds = f.variables["z"].data[:]
    f.close()

    f = netcdf.netcdf_file(slope_grd_path, "r", False)
    x = f.variables["x"].data
    y = f.variables["y"].data
    slopes = f.variables["z"].data[:]
    f.close()

    f = netcdf.netcdf_file(angles_grd_path, "r", False)
    x = f.variables["x"].data
    y = f.variables["y"].data
    angles = f.variables["z"].data[:]
    f.close()

    width = f.dimensions["x"]
    length = f.dimensions["y"]

    min_x = min(x)
    max_x = max(x)
    min_y = min(y)
    max_y = max(y)

    inc = int((max(x) - min(x)) / (width - 1))

    #	Read in ice-only pixels

    #	f        = netcdf.netcdf_file("ice_only.grd","r",False);
    f = netcdf.netcdf_file(east_grd_path, "r", False)
    x = f.variables["x"].data
    y = f.variables["y"].data
    ice_vals = f.variables["z"].data[:]
    f.close()

    #	Read in thicknesses, initialize fluxes

    thicknesses = {}
    f_lons = {}
    f_lats = {}
    dr_stresses = {}
    basal_drags = {}

    fluxes = scipy.zeros((length, width))
    locked = scipy.zeros((length, width))

    infile = open(thickness_txt_path, "r")

    for line in infile:

        utm_x, utm_y, thickness = line.strip().split()

        #		j = str(int(math.floor((float(utm_x) - float(min_x)) / int(inc))));
        #		i = str(int(math.floor((float(utm_y) - float(min_y)) / int(inc))));

        j = str(int(round((float(utm_x) - float(min_x)) / int(inc))))
        i = str(int(round((float(utm_y) - float(min_y)) / int(inc))))

        thicknesses[i + " " + j] = float(thickness)
        fluxes[int(i), int(j)] = speeds[int(i), int(j)] * float(thickness)
        locked[int(i), int(j)] = 1

#		print(str(int(j) * int(inc) + float(min_x)) + " " + str(int(i) * int(inc) + float(min_y)) + " " + thickness);

    infile.close()

    #	Iteratively calculate fluxes, thicknesses

    max_iterations = 50
    cur_iteration = 0
    cs1 = 0.0
    cs2 = 0.0

    todo = thicknesses.keys()

    while cur_iteration < max_iterations:

        tolock = {}
        inputs = {}
        outputs = {}

        for coord in todo:

            str_i, str_j = coord.split()
            y_i = int(str_i)
            x_i = int(str_j)

            cs1 += fluxes[y_i, x_i]

            in_total = 0.0
            out_total = 0.0
            cs3 = 0.0
            factor = 4

            #			Calculate input fluxes

            if locked[y_i - 1, x_i] < 1 and not scipy.isnan(
                    ice_vals[y_i - 1, x_i]) and not scipy.isnan(u[y_i - 1,
                                                                  x_i]):
                in_total += u[y_i - 1, x_i]**factor

            if locked[y_i + 1, x_i] < 1 and not scipy.isnan(
                    ice_vals[y_i + 1, x_i]) and not scipy.isnan(d[y_i + 1,
                                                                  x_i]):
                in_total += d[y_i + 1, x_i]**factor

            if locked[y_i, x_i + 1] < 1 and not scipy.isnan(
                    ice_vals[y_i, x_i + 1]) and not scipy.isnan(l[y_i,
                                                                  x_i + 1]):
                in_total += l[y_i, x_i + 1]**factor

            if locked[y_i, x_i - 1] < 1 and not scipy.isnan(
                    ice_vals[y_i, x_i - 1]) and not scipy.isnan(r[y_i,
                                                                  x_i - 1]):
                in_total += r[y_i, x_i - 1]**factor

            if locked[y_i - 1, x_i + 1] < 1 and not scipy.isnan(
                    ice_vals[y_i - 1, x_i + 1]) and not scipy.isnan(
                        ul[y_i - 1, x_i + 1]):
                in_total += ul[y_i - 1, x_i + 1]**factor

            if locked[y_i - 1, x_i - 1] < 1 and not scipy.isnan(
                    ice_vals[y_i - 1, x_i - 1]) and not scipy.isnan(
                        ur[y_i - 1, x_i - 1]):
                in_total += ur[y_i - 1, x_i - 1]**factor

            if locked[y_i + 1, x_i + 1] < 1 and not scipy.isnan(
                    ice_vals[y_i + 1, x_i + 1]) and not scipy.isnan(
                        dl[y_i + 1, x_i + 1]):
                in_total += dl[y_i + 1, x_i + 1]**factor

            if locked[y_i + 1, x_i - 1] < 1 and not scipy.isnan(
                    ice_vals[y_i + 1, x_i - 1]) and not scipy.isnan(
                        dr[y_i + 1, x_i - 1]):
                in_total += dr[y_i + 1, x_i - 1]**factor

            if locked[y_i - 1, x_i] < 1 and not scipy.isnan(
                    ice_vals[y_i - 1, x_i]) and not scipy.isnan(u[y_i - 1,
                                                                  x_i]):
                fluxes[y_i - 1,
                       x_i] += fluxes[y_i, x_i] * (u[y_i - 1, x_i]**factor /
                                                   in_total)
                tolock[str(y_i - 1) + " " + str(x_i)] = True
                inputs[str(y_i - 1) + " " + str(x_i)] = True
                cs3 += (u[y_i - 1, x_i]**factor / in_total)

            if locked[y_i + 1, x_i] < 1 and not scipy.isnan(
                    ice_vals[y_i + 1, x_i]) and not scipy.isnan(d[y_i + 1,
                                                                  x_i]):
                fluxes[y_i + 1,
                       x_i] += fluxes[y_i, x_i] * (d[y_i + 1, x_i]**factor /
                                                   in_total)
                tolock[str(y_i + 1) + " " + str(x_i)] = True
                inputs[str(y_i + 1) + " " + str(x_i)] = True
                cs3 += (d[y_i + 1, x_i]**factor / in_total)

            if locked[y_i, x_i + 1] < 1 and not scipy.isnan(
                    ice_vals[y_i, x_i + 1]) and not scipy.isnan(l[y_i,
                                                                  x_i + 1]):
                fluxes[y_i, x_i +
                       1] += fluxes[y_i,
                                    x_i] * (l[y_i, x_i + 1]**factor / in_total)
                tolock[str(y_i) + " " + str(x_i + 1)] = True
                inputs[str(y_i) + " " + str(x_i + 1)] = True
                cs3 += (l[y_i, x_i + 1]**factor / in_total)

            if locked[y_i, x_i - 1] < 1 and not scipy.isnan(
                    ice_vals[y_i, x_i - 1]) and not scipy.isnan(r[y_i,
                                                                  x_i - 1]):
                fluxes[y_i, x_i -
                       1] += fluxes[y_i,
                                    x_i] * (r[y_i, x_i - 1]**factor / in_total)
                tolock[str(y_i) + " " + str(x_i - 1)] = True
                inputs[str(y_i) + " " + str(x_i - 1)] = True
                cs3 += (r[y_i, x_i - 1]**factor / in_total)

            if locked[y_i - 1, x_i + 1] < 1 and not scipy.isnan(
                    ice_vals[y_i - 1, x_i + 1]) and not scipy.isnan(
                        ul[y_i - 1, x_i + 1]):
                fluxes[y_i - 1, x_i +
                       1] += fluxes[y_i, x_i] * (ul[y_i - 1, x_i + 1]**factor /
                                                 in_total)
                tolock[str(y_i - 1) + " " + str(x_i + 1)] = True
                inputs[str(y_i - 1) + " " + str(x_i + 1)] = True
                cs3 += (ul[y_i - 1, x_i + 1]**factor / in_total)

            if locked[y_i - 1, x_i - 1] < 1 and not scipy.isnan(
                    ice_vals[y_i - 1, x_i - 1]) and not scipy.isnan(
                        ur[y_i - 1, x_i - 1]):
                fluxes[y_i - 1, x_i -
                       1] += fluxes[y_i, x_i] * (ur[y_i - 1, x_i - 1]**factor /
                                                 in_total)
                tolock[str(y_i - 1) + " " + str(x_i - 1)] = True
                inputs[str(y_i - 1) + " " + str(x_i - 1)] = True
                cs3 += (ur[y_i - 1, x_i - 1]**factor / in_total)

            if locked[y_i + 1, x_i + 1] < 1 and not scipy.isnan(
                    ice_vals[y_i + 1, x_i + 1]) and not scipy.isnan(
                        dl[y_i + 1, x_i + 1]):
                fluxes[y_i + 1, x_i +
                       1] += fluxes[y_i, x_i] * (dl[y_i + 1, x_i + 1]**factor /
                                                 in_total)
                tolock[str(y_i + 1) + " " + str(x_i + 1)] = True
                inputs[str(y_i + 1) + " " + str(x_i + 1)] = True
                cs3 += (dl[y_i + 1, x_i + 1]**factor / in_total)

            if locked[y_i + 1, x_i - 1] < 1 and not scipy.isnan(
                    ice_vals[y_i + 1, x_i - 1]) and not scipy.isnan(
                        dr[y_i + 1, x_i - 1]):
                fluxes[y_i + 1, x_i -
                       1] += fluxes[y_i, x_i] * (dr[y_i + 1, x_i - 1]**factor /
                                                 in_total)
                tolock[str(y_i + 1) + " " + str(x_i - 1)] = True
                inputs[str(y_i + 1) + " " + str(x_i - 1)] = True
                cs3 += (dr[y_i + 1, x_i - 1]**factor / in_total)

#			Calculate output fluxes
            """
			if locked[y_i-1,x_i] < 1 and not scipy.isnan(ice_vals[y_i-1,x_i]) and not scipy.isnan(d[y_i,x_i]) and (str(y_i-1) + " " + str(x_i)) not in inputs:
				out_total += d[y_i,x_i]**factor;

			if locked[y_i+1,x_i] < 1 and not scipy.isnan(ice_vals[y_i+1,x_i]) and not scipy.isnan(u[y_i,x_i]) and (str(y_i+1) + " " + str(x_i)) not in inputs:
				out_total += u[y_i,x_i]**factor;

			if locked[y_i,x_i+1] < 1 and not scipy.isnan(ice_vals[y_i,x_i+1]) and not scipy.isnan(r[y_i,x_i]) and (str(y_i) + " " + str(x_i+1)) not in inputs:
				out_total += r[y_i,x_i]**factor;

			if locked[y_i,x_i-1] < 1 and not scipy.isnan(ice_vals[y_i,x_i-1]) and not scipy.isnan(l[y_i,x_i]) and (str(y_i) + " " + str(x_i-1)) not in inputs:
				out_total += l[y_i,x_i]**factor;

			if locked[y_i-1,x_i+1] < 1 and not scipy.isnan(ice_vals[y_i-1,x_i+1]) and not scipy.isnan(dr[y_i,x_i]) and (str(y_i-1) + " " + str(x_i+1)) not in inputs:
				out_total += dr[y_i,x_i]**factor;

			if locked[y_i-1,x_i-1] < 1 and not scipy.isnan(ice_vals[y_i-1,x_i-1]) and not scipy.isnan(dl[y_i,x_i]) and (str(y_i-1) + " " + str(x_i-1)) not in inputs:
				out_total += dl[y_i,x_i]**factor;

			if locked[y_i+1,x_i+1] < 1 and not scipy.isnan(ice_vals[y_i+1,x_i+1]) and not scipy.isnan(ur[y_i,x_i]) and (str(y_i+1) + " " + str(x_i+1)) not in inputs:
				out_total += ur[y_i,x_i]**factor;

			if locked[y_i+1,x_i-1] < 1 and not scipy.isnan(ice_vals[y_i+1,x_i-1]) and not scipy.isnan(ul[y_i,x_i]) and (str(y_i+1) + " " + str(x_i-1)) not in inputs:
				out_total += ul[y_i,x_i]**factor;

			if locked[y_i-1,x_i] < 1 and not scipy.isnan(ice_vals[y_i-1,x_i]) and not scipy.isnan(d[y_i,x_i]) and (str(y_i-1) + " " + str(x_i)) not in inputs:
				fluxes[y_i-1,x_i] += fluxes[y_i,x_i] * (d[y_i,x_i]**factor / out_total);
				tolock[str(y_i-1) + " " + str(x_i)] = True;

			if locked[y_i+1,x_i] < 1 and not scipy.isnan(ice_vals[y_i+1,x_i]) and not scipy.isnan(u[y_i,x_i]) and (str(y_i+1) + " " + str(x_i)) not in inputs:
				fluxes[y_i+1,x_i] += fluxes[y_i,x_i] * (u[y_i,x_i]**factor / out_total);
				tolock[str(y_i+1) + " " + str(x_i)] = True;

			if locked[y_i,x_i+1] < 1 and not scipy.isnan(ice_vals[y_i,x_i+1]) and not scipy.isnan(r[y_i,x_i]) and (str(y_i) + " " + str(x_i+1)) not in inputs:
				fluxes[y_i,x_i+1] += fluxes[y_i,x_i] * (r[y_i,x_i]**factor / out_total);
				tolock[str(y_i) + " " + str(x_i+1)] = True;

			if locked[y_i,x_i-1] < 1 and not scipy.isnan(ice_vals[y_i,x_i-1]) and not scipy.isnan(l[y_i,x_i]) and (str(y_i) + " " + str(x_i-1)) not in inputs:
				fluxes[y_i,x_i-1] += fluxes[y_i,x_i] * (l[y_i,x_i]**factor / out_total);
				tolock[str(y_i) + " " + str(x_i-1)] = True;

			if locked[y_i-1,x_i+1] < 1 and not scipy.isnan(ice_vals[y_i-1,x_i+1]) and not scipy.isnan(dr[y_i,x_i]) and (str(y_i-1) + " " + str(x_i+1)) not in inputs:
				fluxes[y_i-1,x_i+1] += fluxes[y_i,x_i] * (dr[y_i,x_i]**factor / out_total);
				tolock[str(y_i-1) + " " + str(x_i+1)] = True;

			if locked[y_i-1,x_i-1] < 1 and not scipy.isnan(ice_vals[y_i-1,x_i-1]) and not scipy.isnan(dl[y_i,x_i]) and (str(y_i-1) + " " + str(x_i-1)) not in inputs:
				fluxes[y_i-1,x_i-1] += fluxes[y_i,x_i] * (dl[y_i,x_i]**factor / out_total);
				tolock[str(y_i-1) + " " + str(x_i-1)] = True;

			if locked[y_i+1,x_i+1] < 1 and not scipy.isnan(ice_vals[y_i+1,x_i+1]) and not scipy.isnan(ur[y_i,x_i]) and (str(y_i+1) + " " + str(x_i+1)) not in inputs:
				fluxes[y_i+1,x_i+1] += fluxes[y_i,x_i] * (ur[y_i,x_i]**factor / out_total);
				tolock[str(y_i+1) + " " + str(x_i+1)] = True;

			if locked[y_i+1,x_i-1] < 1 and not scipy.isnan(ice_vals[y_i+1,x_i-1]) and not scipy.isnan(ul[y_i,x_i]) and (str(y_i+1) + " " + str(x_i-1)) not in inputs:
				fluxes[y_i+1,x_i-1] += fluxes[y_i,x_i] * (ul[y_i,x_i]**factor / out_total);
				tolock[str(y_i+1) + " " + str(x_i-1)] = True;
			"""

#		print(x[x_i],y[y_i]);
#		print(x[x_i-1],y[y_i]);
#		print(x[x_i+1],y[y_i]);
#		print(x[x_i],y[y_i-1]);
#		print(x[x_i],y[y_i+1]);
#		print(x[x_i+1],y[y_i+1]);
#		print(x[x_i+1],y[y_i-1]);
#		print(x[x_i-1],y[y_i+1]);
#		print(x[x_i-1],y[y_i-1]);
#		return;

        for coord in tolock:

            str_i, str_j = coord.split()
            i = int(str_i)
            j = int(str_j)

            cs2 += fluxes[i, j]

            thicknesses[coord] = fluxes[i, j] / speeds[i, j]
            locked[i, j] = 1
            todo = tolock.keys()


#		print(cs1, cs2, cs3);

        cur_iteration += 1

    for coord in thicknesses:

        i, j = coord.split()
        angle = angles[i, j]
        sub_speeds = speeds[i - 1:i + 2, j - 1:j + 2]
        sub_angles = angles[i - 1:i + 2, j - 1:j + 2]

        indices_x = [1, 2, 2, 2, 1, 0, 0, 0]
        indices_y = [0, 2, 0, 1, 2, 0, 2, 1]

        if angle >= math.pi / 4 and angle < math.pi / 2:
            indices_x = [2, 2, 2, 1, 0, 0, 0, 1]
            indices_y = [1, 2, 0, 2, 1, 0, 2, 0]

        elif angle >= math.pi / 2 and angle < 3 * math.pi / 4:
            indices_x = [2, 1, 2, 0, 0, 1, 0, 2]
            indices_y = [1, 2, 2, 2, 1, 0, 0, 0]

        elif angle >= 3 * math.pi / 4 and angle <= math.pi:
            indices_x = [2, 0, 1, 0, 0, 2, 1, 2]
            indices_y = [2, 2, 2, 1, 0, 0, 0, 1]

        elif angle < 0 and angle >= -1 * math.pi / 4:
            indices_x = [1, 2, 0, 2, 1, 0, 2, 0]
            indices_y = [0, 0, 0, 1, 2, 2, 2, 1]

        elif angle < -1 * math.pi / 4 and angle >= -1 * math.pi / 2:
            indices_x = [0, 2, 0, 1, 2, 0, 2, 1]
            indices_y = [0, 0, 1, 0, 2, 2, 1, 2]

        elif angle < -1 * math.pi / 2 and angle >= -3 * math.pi / 4:
            indices_x = [0, 1, 0, 0, 2, 1, 2, 2]
            indices_y = [1, 0, 2, 0, 1, 2, 0, 2]

        elif angle < -3 * math.pi / 4 and angle >= -1 * math.pi:
            indices_x = [0, 0, 1, 0, 2, 2, 1, 2]
            indices_y = [2, 0, 2, 1, 0, 1, 0, 2]

        dux_dx = dirDeriv(sub_speeds, sub_angles, indices_x, inc)
        dux_dy = dirDeriv(sub_speeds, sub_angles, indices_y, inc)
        duy_dx = dirDeriv(sub_speeds, sub_angles, indices_x, inc)
        duy_dy = dirDeriv(sub_speeds, sub_angles, indices_y, inc)

        out_str = str(x[int(j)]) + " " + str(y[int(i)]) + " " + str(
            thicknesses[coord]) + " " + str(fluxes[i, j])

        if coord in inputs:
            out_str += " input"
        elif coord in outputs:
            out_str += " output"

        print(out_str)

    os.remove("one.grd")
    os.remove("two.grd")
    os.remove("three.grd")
    os.remove("four.grd")
    os.remove("five.grd")
    os.remove("six.grd")
    os.remove("seven.grd")
    os.remove("eight.grd")
    os.remove("u.grd")
    os.remove("d.grd")
    os.remove("l.grd")
    os.remove("r.grd")
    os.remove("ul.grd")
    os.remove("ur.grd")
    os.remove("dl.grd")
    os.remove("dr.grd")

    return
Exemple #41
0
def dirDeriv(val_mat, direction_mat, inc):

    import scipy
    import scipy.linalg
    import math

    cell_angles = scipy.array(
        [[3 * math.pi / 4, math.pi / 2, math.pi / 4], [math.pi, scipy.nan, 0],
         [-3 * math.pi / 4, -1 * math.pi / 2, -1 * math.pi / 4]])
    cell_incs = scipy.array([[(inc**2 + inc**2)**0.5, inc,
                              (inc**2 + inc**2)**0.5], [inc, scipy.nan, inc],
                             [(inc**2 + inc**2)**0.5, inc,
                              (inc**2 + inc**2)**0.5]])
    angle = direction_mat[1, 1]
    vals_x = scipy.cos(angle - direction_mat) * val_mat
    vals_y = scipy.sin(angle - direction_mat) * val_mat

    cell_cosines_f = scipy.cos(angle - cell_angles)
    cell_cosines_b = scipy.cos(angle - cell_angles)
    cell_sines_f = scipy.sin(angle - cell_angles)
    cell_sines_b = scipy.sin(angle - cell_angles)

    cell_cosines_f[cell_cosines_f < 0.00001] = scipy.nan
    cell_cosines_f = cell_cosines_f**2
    cell_cosines_f = cell_cosines_f / sum(
        cell_cosines_f[~scipy.isnan(cell_cosines_f)])
    cell_cosines_b[cell_cosines_b > -0.00001] = scipy.nan
    cell_cosines_b = cell_cosines_b**2
    cell_cosines_b = cell_cosines_b / sum(
        cell_cosines_b[~scipy.isnan(cell_cosines_b)])
    cell_sines_f[cell_sines_f < 0.00001] = scipy.nan
    cell_sines_f = cell_sines_f**2
    cell_sines_f = cell_sines_f / sum(cell_sines_f[~scipy.isnan(cell_sines_f)])
    cell_sines_b[cell_sines_b > -0.00001] = scipy.nan
    cell_sines_b = cell_sines_b**2
    cell_sines_b = cell_sines_b / sum(cell_sines_b[~scipy.isnan(cell_sines_b)])

    temp = vals_x * cell_cosines_f
    ux_x_f = sum(temp[~scipy.isnan(temp)])

    temp = vals_x * cell_cosines_b
    ux_x_b = sum(temp[~scipy.isnan(temp)])

    temp = vals_x * cell_sines_f
    ux_y_f = sum(temp[~scipy.isnan(temp)])

    temp = vals_x * cell_sines_b
    ux_y_b = sum(temp[~scipy.isnan(temp)])

    temp = vals_y * cell_cosines_f
    uy_x_f = sum(temp[~scipy.isnan(temp)])

    temp = vals_y * cell_cosines_b
    uy_x_b = sum(temp[~scipy.isnan(temp)])

    temp = vals_y * cell_sines_f
    uy_y_f = sum(temp[~scipy.isnan(temp)])

    temp = vals_y * cell_sines_b
    uy_y_b = sum(temp[~scipy.isnan(temp)])

    ux_x = scipy.array([ux_x_b, val_mat[1, 1], ux_x_f])
    ux_y = scipy.array([ux_y_b, val_mat[1, 1], ux_y_f])
    uy_x = scipy.array([uy_x_b, 0, uy_x_f])
    uy_y = scipy.array([uy_y_b, 0, uy_y_f])

    xs = scipy.array([-1 * int(inc), 0, int(inc)])
    A = scipy.vstack([xs, scipy.ones(len(xs))]).T

    dux_dx, intercept = scipy.linalg.lstsq(A, ux_x)[0]
    dux_dy, intercept = scipy.linalg.lstsq(A, ux_y)[0]
    duy_dx, intercept = scipy.linalg.lstsq(A, uy_x)[0]
    duy_dy, intercept = scipy.linalg.lstsq(A, uy_y)[0]

    return dux_dx, dux_dy, duy_dx, duy_dy
Exemple #42
0
def formatdata(data):
    x = data[:,0]
    y = data[:,1]
    x = x[~sp.isnan(y)]
    y = y[~sp.isnan(y)]
    return x,y
def plotStrip(bp, at, ps, crop):
    """
    Plot a strip of colored polygons along a trace of GPS coordinates (deg).
    Extension of the strip outward from the line to either side is specified
    by ps.sideRange, in meters.

    Parameters
    ----------
    bp.polyList: master list of polygons, all lines included
    bp.colorList: master list of colors for each polygon
    bp.lineList: master list of survey lines
    at.fix : float (deg), (pktCount)x2 array
      [longitude, latitude] coordinates of ship, rows are packets in order.
    at.depth :  float (m), array length pktCount
      Water depth beneath the ship at each fix. Used with extended lead-in
      length of cable to estimate sensor position by layback calculation.
    at.leadin : float (m)
    at.color : float (m), array length pktCount
      List of numbers for each position indicating the color to plot
      representing IP data results.
    """
    # Start by transforming the fix points into a local azimuthal equidistant
    # reference system. Units along x and y are meters.
    ptList = [point(tuple(row)) for row in at.fix]
    dfPt = gpd.GeoDataFrame({'geometry': ptList})
    # Assign the WGS84 latitude-longitude Coordinate Reference System (CRS).
    dfPt.crs = ps.crsWGS84

    # Transform to the azimuthal equidistant reference.
    dfPt = dfPt.to_crs(ps.crsAzEq)
    # Extract the transformed coordinates into an array.
    flatFix = sp.zeros_like(at.fix, dtype=float)
    for p in range(len(flatFix)):
        flatFix[p, :] = sp.array(dfPt.geometry[p].coords)  # (m)

    # Track vectors between each pair of consecutive GPS fixes.
    vParSeg = flatFix[1:, :] - flatFix[0:-1, :]
    # Length of each trach vector.
    segLen = sp.sqrt(vParSeg[:, 0]**2 + vParSeg[:, 1]**2)  # (m)
    # Cumulative sum along the track line.
    sumLen = sp.hstack((0, sp.cumsum(segLen)))

    # Print the total line length (m).
    #    print('%.1f m along line.' % (sumLen[-1]))
    # Distance between start and endpoints.
    startFinDist = mm.norm(flatFix[0, :] - flatFix[-1, :])
    #    print('%.1f m distance from start point to finish point.' % startFinDist)
    # Time elapsed on the line.
    lineTime = (at.cpuDT[-1] - at.cpuDT[0]).total_seconds()
    #    print('%.0f s elapsed.' % lineTime)
    lineSpeed = startFinDist / lineTime  # (m/s)
    lineSpeed *= 1.94384  # (kt)
    #    print('%.1f kt average speed' % lineSpeed)

    # Interpolate a laidback fix location on the track line.
    # Layback the extra length at the start of the line according to
    # the boat's heading for the first few meters twice the length of
    # the cable lead in.
    newFix = sp.zeros_like(flatFix, dtype=float)
    linLoc = 2 * at.leadin
    closeIdx = sp.argmin(abs(sumLen - linLoc))
    # If the line is at least as long as twice the lead in.
    if sumLen[-1] > linLoc:
        if linLoc >= sumLen[closeIdx]:
            idx1 = closeIdx
            idx2 = closeIdx + 1
        else:
            idx1 = closeIdx - 1
            idx2 = closeIdx
        l1 = sumLen[idx1]
        l2 = sumLen[idx2]
        startHeadingFix = flatFix[idx1, :] + (
            flatFix[idx2, :] - flatFix[idx1, :]) * (linLoc - l1) / (l2 - l1)
    else:
        # Else just use the heading of the whole line.
        startHeadingFix = flatFix[-1, :]
    startHeadingVec = mm.unit(startHeadingFix - flatFix[0, :])
    for p in range(len(flatFix)):
        linLoc = sumLen[p] - mm.cableRange(at.leadin, at.depth[p])
        if linLoc >= 0:
            closeIdx = sp.argmin(abs(sumLen - linLoc))
            if linLoc >= sumLen[closeIdx]:
                idx1 = closeIdx
                idx2 = closeIdx + 1
            else:
                idx1 = closeIdx - 1
                idx2 = closeIdx
            l1 = sumLen[idx1]
            l2 = sumLen[idx2]
            if l1 != l2:
                newFix[p, :] = flatFix[idx1, :] + (flatFix[idx2, :] - flatFix[
                    idx1, :]) * (linLoc - l1) / (l2 - l1)
            else:
                # Case of interpolation between two repeated locations.
                newFix[p, :] = flatFix[idx1, :]
        else:
            newFix[p, :] = flatFix[0, :] + linLoc * startHeadingVec
    # Overwrite.
    flatFix = newFix

    # Reevaluate track vectors between each pair of consecutive GPS fixes.
    vParSeg = flatFix[1:, :] - flatFix[0:-1, :]
    # Track vectors at each point, found from points before and after.
    vParPt = flatFix[2:, :] - flatFix[0:-2, :]
    # Include segment parallels for the boundary fix points.
    vParPt = sp.vstack((vParSeg[0, :], vParPt, vParSeg[-1, :]))
    # Midpoints along the sequence of GPS fixes.
    midPts = (flatFix[1:, :] + flatFix[0:-1, :]) / 2

    # Perpendicular vectors at each segment and fix point.
    # Vector lengths are set to sideRange.
    vPerpSeg = ps.sideRange * mm.unit(mm.perp(vParSeg))  # (m)
    vPerpPt = ps.sideRange * mm.unit(mm.perp(vParPt))  # (m)

    # If cropping, only include fix points where asked.
    plottedPkts = sp.array(range(len(at.pkt)))
    if crop and ps.plotThis != 'crop':
        plottedPkts = plottedPkts[at.cropLogic]
    lastGoodVerts = sp.zeros((4, 2))
    # Polygon patches for each packet.
    for p in plottedPkts:
        # Perpendicular displacement, length sideRange, at the first midpoint.
        if p != 0:
            # Identify a trailing midpoint which is different from the
            # present fix location. (Not between duplicate fixes.)
            pPrior = p - 1
            while pPrior >= 0 and all(midPts[pPrior, :] == flatFix[p, :]):
                pPrior -= 1
            vert01 = sp.vstack((midPts[pPrior, :] - vPerpSeg[pPrior, :],
                                midPts[pPrior, :] + vPerpSeg[pPrior, :]))
        else:
            vert01 = sp.zeros((0, 2))
        # Polygon points offset from the flat fix points themselves.
        vert2 = flatFix[p, :] + vPerpPt[p, :]
        vert5 = flatFix[p, :] - vPerpPt[p, :]
        if p != len(flatFix) - 1:

            # at the second midpoint.
            vert34 = sp.vstack(
                (midPts[p, :] + vPerpSeg[p, :], midPts[p, :] - vPerpSeg[p, :]))
        else:
            vert34 = sp.zeros((0, 2))
        # Polygon vertices.
        verts = sp.vstack((vert01, vert2, vert34, vert5))
        # In the case where IP packets come in at a higher rate than the GPS
        # fixes are updated, consecutive packets have the same position at
        # times. In this case, reuse the last useable polygon. This will plot
        # on top of the reused position.
        if sp.isnan(verts).any():
            verts = lastGoodVerts.copy()
        else:
            lastGoodVerts = verts.copy()
        # Vertices as tuples in a list.
        vertList = [tuple(row) for row in verts]
        # Append the latest polygon vertices to the list of polygons.
        bp.polyList.append(polygon(vertList))

    bp.colorList = sp.hstack((bp.colorList, at.color[plottedPkts]))

    # Include each segment between the fix coordinates as its own line object.
    for p in plottedPkts:
        if p < len(flatFix) - 1:
            endPts = [tuple(row) for row in flatFix[p:p + 2, :]]
            if at.xmitFund == 8:
                bp.lineList.append(lineStr(endPts))

    if ps.saveTxt:
        # Pseudocolor plots.
        txtName = 'ch%d_H%d_%s_%s_%d.txt' % (
            ps.ch,
            ps.h,
            ps.plotThis,
            at.fileDateStr,
            at.fileNum,
        )
        txtPath = os.path.join(ps.folderPath, 'plotData', ps.plotThis, txtName)
        with open(txtPath, 'w') as f:
            for p in range(at.pktCount):
                # longi (deg), lat (deg), color (?)
                wStr = (str(dfPt.geometry[p].x) + ',' +
                        str(dfPt.geometry[p].y) + ',' + str(at.color[p]) +
                        '\n')
                f.write(wStr)
Exemple #44
0
def parse_cegs_drosophila_phenotypes(
    phenotype_file='/Users/bjarnivilhjalmsson/data/cegs_lehmann/allphenotypes_5.0_cleaned.tab.reps.hdf5',
):
    """
    Parser for CEGS Drosophila phenotype data
    """
    import pylab
    #Load phenotypes...
    ph5f = h5py.File(phenotype_file)
    #Now take the median and mean of all values for all individuals.
    phen_dict = {}
    for phen in ph5f.keys():
        #First mated
        Y_mated = ph5f[phen]['Y_mated'][...]
        Z_mated = ph5f[phen]['Z_mated'][...]
        sample_filter = sp.negative(sp.isnan(Y_mated))
        Ys_sum = sp.dot(Y_mated[sample_filter], Z_mated[sample_filter])
        rep_count = sp.dot(sp.ones(sum(sample_filter)), Z_mated[sample_filter])
        Y_means = Ys_sum / rep_count
        #Now calculate medians by iteration.
        phen_vals_list = [[] for i in range(216)]
        for i in range(len(Y_mated)):
            ind_i = sp.where(1 == Z_mated[i])[0][0]
            phen_vals_list[ind_i].append(Y_mated[i])
        medians = sp.zeros(216)
        for i, pl in enumerate(phen_vals_list):
            if len(pl) > 0:
                medians[i] = sp.median(pl)
            else:
                medians[i] = sp.nan
        ind_filter = sp.negative(sp.isnan(Y_means))
        if phen == 'Triglyceride':
            ind_filter = (Y_means > 0) * ind_filter

        phen_dict[phen] = {
            'mated': {
                'Y_means': Y_means,
                'rep_count': rep_count,
                'ind_filter': ind_filter,
                'Y_medians': medians
            }
        }

        print 'Plotting phenotype histograms for %s, %s' % (phen, 'mated')
        mated_filtered_means = Y_means[ind_filter]
        pylab.hist(mated_filtered_means)
        pylab.savefig(
            '/Users/bjarnivilhjalmsson/data/tmp/cegs_hist_%s_mated_means.png' %
            (phen))
        pylab.clf()
        mated_filtered_medians = medians[ind_filter]
        pylab.hist(mated_filtered_medians)
        pylab.savefig(
            '/Users/bjarnivilhjalmsson/data/tmp/cegs_hist_%s_mated_medians.png'
            % (phen))
        pylab.clf()

        #Then virgin
        Y_virgin = ph5f[phen]['Y_virgin'][...]
        Z_virgin = ph5f[phen]['Z_virgin'][...]
        sample_filter = sp.negative(sp.isnan(Y_virgin))
        Ys_sum = sp.dot(Y_virgin[sample_filter], Z_virgin[sample_filter])
        rep_count = sp.dot(sp.ones(sum(sample_filter)),
                           Z_virgin[sample_filter])
        Y_means = Ys_sum / rep_count
        #Now calculate medians by iteration.
        phen_vals_list = [[] for i in range(216)]
        for i in range(len(Y_virgin)):
            ind_i = sp.where(1 == Z_virgin[i])[0][0]
            phen_vals_list[ind_i].append(Y_virgin[i])
        medians = sp.zeros(216)
        for i, pl in enumerate(phen_vals_list):
            if len(pl) > 0:
                medians[i] = sp.median(pl)
            else:
                medians[i] = sp.nan
        ind_filter = sp.negative(sp.isnan(Y_means))
        if phen == 'Triglyceride':
            ind_filter = (Y_means > 0) * ind_filter

        phen_dict[phen]['virgin'] = {
            'Y_means': Y_means,
            'rep_count': rep_count,
            'ind_filter': ind_filter,
            'Y_medians': medians
        }

        print 'Plotting phenotype histograms for %s, %s' % (phen, 'virgin')
        virgin_filtered_means = Y_means[ind_filter]
        pylab.hist(virgin_filtered_means)
        pylab.savefig(
            '/Users/bjarnivilhjalmsson/data/tmp/cegs_hist_%s_virgin_means.png'
            % (phen))
        pylab.clf()
        virgin_filtered_medians = medians[ind_filter]
        pylab.hist(virgin_filtered_medians)
        pylab.savefig(
            '/Users/bjarnivilhjalmsson/data/tmp/cegs_hist_%s_virgin_medians.png'
            % (phen))
        pylab.clf()

        means_corr = sp.corrcoef(mated_filtered_means,
                                 virgin_filtered_means)[0, 1]
        medians_corr = sp.corrcoef(mated_filtered_medians,
                                   virgin_filtered_medians)[0, 1]
        print 'Correlation between mated and virgin flies, means: %0.2f, medians: %0.2f' % (
            means_corr, medians_corr)
        phen_dict[phen]['corrs'] = {
            'means': means_corr,
            'medians': medians_corr
        }
    return phen_dict
Exemple #45
0
def coordinate_cegs_genotype_phenotype(
    phen_dict,
    phenotype='Protein',
    env='mated',
    k_thres=0.8,
    ind_missing_thres=0.5,
    snp_missing_thres=0.05,
    maf_thres=0.1,
    genotype_file='/Users/bjarnivilhjalmsson/data/cegs_lehmann/CEGS.216.lines.NO_DPGP4.GATK.SNP.HETS.FILTERED.Filter_imputed.hdf5'
):
    """
    Parse genotypes and coordinate with phenotype, and ready data for analysis.
    """
    gh5f = h5py.File(genotype_file)
    p_dict = phen_dict[phenotype][env]
    print 'Loading SNPs'
    snps = sp.array(gh5f['gt'][...], dtype='single')
    snps = snps[:, p_dict['ind_filter']]
    positions = gh5f['pos'][...]
    m, n = snps.shape
    print 'Loaded %d SNPs for %d individuals' % (m, n)
    print 'Filtering individuals with missing rates >%0.2f' % ind_missing_thres
    missing_mat = sp.isnan(snps)
    ind_missing_rates = sp.sum(missing_mat, 0) / float(m)
    ind_filter = ind_missing_rates < ind_missing_thres
    snps = snps[:, ind_filter]
    n = sp.sum(ind_filter)
    print 'Filtered %d individuals due to high missing rates' % sp.sum(
        sp.negative(ind_filter))
    gt_ids = gh5f['gt_ids'][p_dict['ind_filter']]
    gt_ids = gt_ids[ind_filter]
    Y_means = p_dict['Y_means'][p_dict['ind_filter']]
    Y_means = Y_means[ind_filter]
    Y_medians = p_dict['Y_medians'][p_dict['ind_filter']]
    Y_medians = Y_medians[ind_filter]
    rep_count = p_dict['rep_count'][p_dict['ind_filter']]
    rep_count = rep_count[ind_filter]

    print 'Now removing "bad" genotypes.'
    bad_genotypes = [
        'Raleigh_272', 'Raleigh_378', 'Raleigh_554', 'Raleigh_591',
        'Raleigh_398', 'Raleigh_138', 'Raleigh_208', 'Raleigh_336',
        'Raleigh_370', 'Raleigh_373', 'Raleigh_374', 'Raleigh_799',
        'Raleigh_821', 'Raleigh_822', 'Raleigh_884', 'Raleigh_335'
    ]
    ind_filter = sp.negative(sp.in1d(gt_ids, bad_genotypes))
    gt_ids = gt_ids[ind_filter]
    Y_means = Y_means[ind_filter]
    Y_medians = Y_medians[ind_filter]
    rep_count = rep_count[ind_filter]
    snps = snps[:, ind_filter]
    print 'Removed %d "bad" genotypes' % sp.sum(sp.negative(ind_filter))

    n = len(snps[0])
    print 'Filtering SNPs with missing rate >%0.2f' % snp_missing_thres
    missing_mat = sp.isnan(snps)
    snp_missing_rates = sp.sum(missing_mat, 1) / float(n)
    snps_filter = snp_missing_rates < snp_missing_thres
    snps = snps[snps_filter]
    positions = positions[snps_filter]
    m = sp.sum(snps_filter)
    print 'Filtered %d SNPs due to high missing rate' % sp.sum(
        sp.negative(snps_filter))

    print 'Now imputing (w mean)'
    missing_mat = sp.isnan(snps)
    ok_counts = n - sp.sum(missing_mat, 1)
    snps[missing_mat] = 0
    snp_means = sp.sum(snps, 1) / ok_counts
    #     print snp_means.shape
    #     print snp_means[:10]
    #     import pdb
    #     pdb.set_trace()
    for i in range(len(snps)):
        snps[i, missing_mat[i]] = snp_means[i]

    print 'And filtering SNPs with MAF<%0.2f' % maf_thres
    snp_means = sp.mean(snps, 1)
    snp_mafs = sp.minimum(snp_means, 1 - snp_means)
    snps_filter = snp_mafs > maf_thres
    snps = snps[snps_filter]
    positions = positions[snps_filter]
    print 'Filtered %d SNPs with low MAFs' % sp.sum(sp.negative(snps_filter))

    print 'Filtering based on kinship w threshold:', k_thres
    import kinship
    K = kinship.calc_ibd_kinship(snps)
    print '\nKinship calculated'
    K_ind_filter = []
    for i in range(n):
        K_ind_filter.append(not sp.any(K[i, i + 1:n] > k_thres))
    if sum(K_ind_filter) == n:
        print 'No individuals were filtered based on kinship..'
    else:
        print 'Filtering %d individuals based on kinship.' % (
            n - sum(K_ind_filter))
        K_ind_filter = sp.array(K_ind_filter)
        gt_ids = gt_ids[K_ind_filter]
        Y_means = Y_means[K_ind_filter]
        Y_medians = Y_medians[K_ind_filter]
        rep_count = rep_count[K_ind_filter]
        snps = snps[:, K_ind_filter]

        print 'Again filtering SNPs with MAF<%0.2f' % maf_thres
        snp_means = sp.mean(snps, 1)
        snp_mafs = sp.minimum(snp_means, 1 - snp_means)
        snps_filter = snp_mafs > maf_thres
        snps = snps[snps_filter]
        positions = positions[snps_filter]
        print 'Filtered %d additional SNPs with low MAFs' % sp.sum(
            sp.negative(snps_filter))

    print 'All filtering done.'

    m, n = snps.shape
    print 'In all there are %d SNPs remaining, for %d individuals.' % (m, n)

    ret_dict = {
        'Y_means': Y_means,
        'Y_medians': Y_medians,
        'rep_count': rep_count,
        'gt_ids': gt_ids,
        'positions': positions,
        'snps': snps
    }

    return ret_dict