Exemple #1
0
def calc_distance_map(pipeline,
                      ap_name,
                      ca_name,
                      ca_type,
                      plotFlag=True,
                      histIdx=False,
                      fontsize=10):
    """ 
    Calculates distances/similarities between pipeline runs

    Optionally visualizes the result as a seaborn clustermap for PBO
    pipelines (requires multiple stations)

    Calculates the square root of the summed squared differences between eigenvectors.
    Only works, because of internal assumptions, on pipelines with multiple stations
    Returns the distances as a pandas dataframe
    
    @param pipeline: Pipeline to analyze.
    @param ap_name: Name of the pipeline item that is being perturbed
    @param ca_name: Name of the pipeline item used as the comparison metric for calculating the distance
    @param ca_type: Type of comparison metric [PCA for PCA, MogiSource of Mogi Source, MogiVector for Mogi vectors]
    @param plotFlag: Boolean flag for plotting the clustermap of distances
    @param histIdx: Flag for returning the perturbed pipeline item parameters
    @param fontsize: Fontsize adjustments

    @return cg: The generated clustermap of the calculated distances/similarities
    @return dist_mat: A matrix of the calculated distances/similarities
    @return history: The record of the perturbed pipeline item parameters
    """
    # a history of the perturbed pipeline item
    history = []
    for runInfo in pipeline.getMetadataHistory():
        for stageItem in runInfo:
            if ap_name in stageItem:
                history.append(stageItem.rsplit(ap_name)[1].strip('[]'))

    # number of runs
    num_results = len(pipeline.RA_results)
    dist_mat = np.zeros([num_results, num_results])

    # compute distances between all pairs of runs
    for i in range(num_results):
        for j in range(i, num_results):
            # Check the ca_name type to properly format the type of comparison
            if ca_type == 'PCA':
                ctitle = 'PCA Vector Distance Similarity'
                summation = 0
                rstation_list = pipeline.RA_results[i][ca_name]['labels']
                num_stations = len(rstation_list)
                # some redundancy because of the way the modules/functions were structured
                # eigenvectors (lat and lon) for the one run
                coord_list = pbo_util.getStationCoords(
                    pipeline.data_fetcher.meta_data, rstation_list)
                _, _, EV1_lat, EV1_lon, _ = pbo_tools.dirEigenvectors(
                    coord_list,
                    pipeline.RA_results[i][ca_name]['CA'].components_[0])
                # and for the other run
                _, _, EV2_lat, EV2_lon, _ = pbo_tools.dirEigenvectors(
                    coord_list,
                    pipeline.RA_results[j][ca_name]['CA'].components_[0])

                for k in range(num_stations):
                    ev1 = np.hstack((EV1_lat[k], EV1_lon[k]))
                    ev2 = np.hstack((EV2_lat[k], EV2_lon[k]))
                    # calculate the euclidean distance difference at each station
                    summation += sp.spatial.distance.euclidean(ev1, ev2)**2

            elif ca_type == 'MogiSource':
                ctitle = 'Mogi Source Distance [deg+km]'
                # for now, just uses lat, lon, and depth for Mogi comparison
                ev1 = np.array([
                    pipeline.RA_results[i][ca_name]['lat'],
                    pipeline.RA_results[i][ca_name]['lon'],
                    pipeline.RA_results[i][ca_name]['depth']
                ])
                ev2 = np.array([
                    pipeline.RA_results[j][ca_name]['lat'],
                    pipeline.RA_results[j][ca_name]['lon'],
                    pipeline.RA_results[j][ca_name]['depth']
                ])
                summation = sp.spatial.distance.euclidean(ev1, ev2)**2
            elif ca_type == 'MogiVector':
                ctitle = 'Summed Mogi Vector Distance [mm]'
                # do the same comparison as eigenvectors for the mogi modeled vectors
                summation = 0
                rstation_list = pipeline.RA_results[i][ca_name]['labels']
                num_stations = len(rstation_list)
                # got mogi vectors for the two runs
                coord_list = np.array(
                    pbo_util.getStationCoords(pipeline.data_fetcher.meta_data,
                                              rstation_list))
                mogi_x_1, mogi_y_1 = MogiVectors(
                    pipeline.RA_results[i][ca_name], coord_list[:, 0],
                    coord_list[:, 1])
                mogi_x_2, mogi_y_2 = MogiVectors(
                    pipeline.RA_results[j][ca_name], coord_list[:, 0],
                    coord_list[:, 1])

                for k in range(num_stations):
                    ev1 = np.hstack((mogi_x_1[k], mogi_y_1[k]))
                    ev2 = np.hstack((mogi_x_2[k], mogi_y_2[k]))
                    # calculate the euclidean distance difference at each station
                    summation += sp.spatial.distance.euclidean(ev1, ev2)**2
                # as all pca amplitudes are the same, to scale Mogi to difference in mm
                summation *= (
                    pipeline.RA_results[0][ca_name]['pca_amplitude'])**2

            dist_mat[i][j] = np.sqrt(summation)

    dist_mat += dist_mat.transpose()
    if histIdx:
        dist_mat = pd.DataFrame(dist_mat,
                                index=[
                                    'Configuration ' + str(ii).zfill(2)
                                    for ii in range(len(history))
                                ],
                                columns=[
                                    'Configuration ' + str(ii).zfill(2)
                                    for ii in range(len(history))
                                ])
    else:
        dist_mat = pd.DataFrame(dist_mat, index=history, columns=history)

    if plotFlag:
        cg = sns.clustermap(dist_mat)
        plt.setp(cg.ax_heatmap.yaxis.get_majorticklabels(),
                 rotation=0,
                 fontsize=fontsize)
        plt.setp(cg.ax_heatmap.xaxis.get_majorticklabels(),
                 rotation=90,
                 fontsize=fontsize)
        cg.cax.set_title(ctitle, fontsize=fontsize)
        if histIdx:
            return cg, history
        else:
            return cg
    else:
        if histIdx:
            return dist_mat, history
        else:
            return dist_mat
    def process(self, obj_data):
        ''' 
        Plot the General Component Analysis results present stored in obj_data. 

        Saves the basemap in obj_data results.
        
        @param obj_data: Data Wrapper that holds component analysis HPCA
        '''
        HPCA_name = self.comp_name
        Mogi_name = self.mogi_name
        pca_comp  = self.pca_comp

        plt.figure()

        meta_data = obj_data.info()
        try:
            station_list = obj_data.get().minor_axis
        except AttributeError:
            station_list = list(obj_data.get().keys())

        lat_range, lon_range = pbo_utils.getLatLonRange(meta_data, station_list)
        coord_list = pbo_utils.getStationCoords(meta_data, station_list)

        # Create a map projection of area
        offset = self.offset
        bmap = Basemap(llcrnrlat=lat_range[0] - offset, urcrnrlat=lat_range[1] + offset, llcrnrlon=lon_range[0] - offset, urcrnrlon=lon_range[1] + offset,
                       projection='gnom', lon_0=np.mean(lon_range), lat_0=np.mean(lat_range), resolution=self._bmap_res)

        # bmap.fillcontinents(color='white')
        bmap.drawmapboundary(fill_color='white')

        # Draw just coastlines, no lakes
        for i,cp in enumerate(bmap.coastpolygons):
             if bmap.coastpolygontypes[i]<2:
                bmap.plot(cp[0],cp[1],'k-')

        parallels = np.arange(np.round(lat_range[0]-offset,decimals=1),np.round(lat_range[1]+offset,decimals=1),.1)
        meridians = np.arange(np.round(lon_range[0]-offset,decimals=1),np.round(lon_range[1]+offset,decimals=1),.1)

        bmap.drawmeridians(meridians, labels=[0,0,0,1],fontsize=14)
        bmap.drawparallels(parallels, labels=[1,0,0,0],fontsize=14)

        
        pca_results = obj_data.getResults()[HPCA_name]
        pca = pca_results['CA']

        lonscale = 1
        latscale = 1
        scaleFactor = self.scaleFactor

        if self.pca_dir == 'V':
            station_lat_list, station_lon_list, ev_lat_list, ev_lon_list, dir_sign = pbo_tools.dirEigenvectors(coord_list, pca.components_[pca_comp],pdir='V')
            self.dir_sign = dir_sign            
            pca_results['Projection'] *= dir_sign
            ev_lat_list *= latscale
            ev_lon_list *= lonscale
                
            # Plot station coords
            for coord in coord_list:
                bmap.plot(coord[1], coord[0], 'bo', markersize=8, latlon=True)
                x,y = bmap(coord[1], coord[0])
                plt.text(x-(1+np.sign(ev_lon_list[coord_list.index(coord)]))*900+250,
                         y-(1+np.sign(ev_lat_list[coord_list.index(coord)]))*100+450,
                         station_list[coord_list.index(coord)],fontsize=14)
    
            bmap.quiver(station_lon_list, station_lat_list, ev_lon_list, ev_lat_list, latlon=True, scale = scaleFactor)
            
            ax_x = plt.gca().get_xlim()
            ax_y = plt.gca().get_ylim()
            x,y = bmap(ax_x[0]+.1*(ax_x[1]-ax_x[0]), ax_y[0]+.1*(ax_y[1]-ax_y[0]),inverse=True)
            bmap.quiver(x, y, 0, .2, latlon=True, scale = scaleFactor, headwidth=3,headlength=3)
            plt.text(ax_x[0]+.1*(ax_x[1]-ax_x[0])-650, ax_y[0]+.1*(ax_y[1]-ax_y[0])-1000,'20%', fontsize=14)
            
        else:
            station_lat_list, station_lon_list, ev_lat_list, ev_lon_list, dir_sign = pbo_tools.dirEigenvectors(coord_list, pca.components_[pca_comp])
            self.dir_sign = dir_sign            
            pca_results['Projection'] *= dir_sign
            ev_lat_list *= latscale
            ev_lon_list *= lonscale
                
            # Plot station coords
            for coord in coord_list:
                bmap.plot(coord[1], coord[0], 'bo', markersize=8, latlon=True)
                x,y = bmap(coord[1], coord[0])
                plt.text(x-(1+np.sign(ev_lon_list[coord_list.index(coord)]))*900+250,
                         y-(1+np.sign(ev_lat_list[coord_list.index(coord)]))*800+450,
                         station_list[coord_list.index(coord)], fontsize=14)
    
            bmap.quiver(station_lon_list, station_lat_list, ev_lon_list, ev_lat_list, latlon=True, scale = scaleFactor)
            
            ax_x = plt.gca().get_xlim()
            ax_y = plt.gca().get_ylim()
            x,y = bmap(ax_x[0]+.1*(ax_x[1]-ax_x[0]), ax_y[0]+.1*(ax_y[1]-ax_y[0]),inverse=True)
            bmap.quiver(x, y, 0, .2, latlon=True, scale = scaleFactor, headwidth=3,headlength=3)
            plt.text(ax_x[0]+.1*(ax_x[1]-ax_x[0])-650, ax_y[0]+.1*(ax_y[1]-ax_y[0])-1000,'20%', fontsize=14)
    
            # Plotting Mogi source
            if Mogi_name != None:
                mogi_res = obj_data.getResults()[Mogi_name]
                bmap.plot(mogi_res['lon'], mogi_res['lat'], "g^", markersize = 10, latlon=True)
                mogi_x_disp, mogi_y_disp = mogi.MogiVectors(mogi_res,station_lat_list,station_lon_list)
                bmap.quiver(station_lon_list, station_lat_list, mogi_x_disp*dir_sign, mogi_y_disp*dir_sign,
                            latlon=True, scale=scaleFactor,color='red')
                            
            # Plot error ellipses for the PCA
            if self.errorE:
                ax = plt.gca()
                yScale = (bmap.urcrnrlat - bmap.llcrnrlat)/scaleFactor
                xScale = (bmap.urcrnrlon - bmap.llcrnrlon)/scaleFactor
                midY = (bmap.urcrnrlat + bmap.llcrnrlat)/2
                midX = (bmap.urcrnrlon + bmap.llcrnrlon)/2
                from matplotlib.patches import Ellipse
 
                n=len(pca_results['Projection'][:,0])
                tau=self.KF_tau
                delta_t = np.arange(-(n-1),n+1)
                mseq = (1-np.abs(delta_t)/n)
                rdelt = np.exp(-np.abs(delta_t)/tau)
                neff = n/np.sum(mseq*rdelt)
                eigval = pca.explained_variance_
                aaTs = [np.outer(pca.components_[ii,:],pca.components_[ii,:].T) for ii in range(pca.components_.shape[0])]
                VVs = [eigval[ii]/neff*np.sum([eigval[k]/(eigval[k]-eigval[ii])**2*aaTs[k] for k in (j for j in range(pca.components_.shape[0]) if j != ii)],axis=0) for ii in range(pca.components_.shape[0])]
                sigmas = np.diag(VVs[0])**(1/2)                
                
                for kk in range(len(station_lon_list)):
                    vlon = ev_lon_list[kk]
                    vlat = ev_lat_list[kk]
                    slon = station_lon_list[kk]
                    slat = station_lat_list[kk]
                    Elat = sigmas[2*kk]
                    Elon = sigmas[2*kk+1]
                    cir_w, cir_h = np.array(bmap(midX+Elon/scaleFactor,midY+Elat/scaleFactor*.85))-np.array(bmap(midX,midY))
                
                    x,y = bmap(slon+vlon*xScale*.95,slat+vlat*yScale*.85)
                    # if need to rotate ellipse, np.arctan2(vlat,vlon)*180/np.pi
                    etest = Ellipse(xy=(x,y),width=cir_w,height=cir_h,angle=0,
                                    edgecolor='k',fc='w',lw=1,zorder=-1)
                    ax.add_artist(etest);
                    
                           
        obj_data.addResult(self.str_description, bmap)
Exemple #3
0
    def process(self, obj_data):
        '''
        Finds the magma source (default-mogi) from PBO GPS data.

        Assumes time series columns are named ('dN', 'dE', 'dU'). Predicts location of the
        magma source using scipy.optimize.curve_fit

        The location of the magma source is stored in the data wrapper as a list
        res[0] = latitude
        res[1] = longitude
        res[2] = source depth (km)
        res[3] = volume change (meters^3)
        res[4] = extra parameters (depends on mogi fit type)

        @param obj_data: Data object containing the results from the PCA stage
        '''
        h_pca_name = self.ap_paramList[0]()
        if len(self.ap_paramList)>=2:
            exN = {'mogi':0,'finite_sphere':1,'closed_pipe':1,'constant_open_pipe':1,'rising_open_pipe':2,'sill':0}
            try:
                mag_source = getattr(pbo_tools,self.ap_paramList[1]().lower())
                ExScParams = tuple(np.ones((exN[self.ap_paramList[1]().lower()],)))
            except:
                mag_source = pbo_tools.mogi
                ExScParams = ()
                print('No source type called '+self.ap_paramList[1]()+', defaulting to a Mogi source.')
        else:
            mag_source = pbo_tools.mogi
            ExScParams = ()
        projection = obj_data.getResults()[h_pca_name]['Projection']
        start_date = obj_data.getResults()[h_pca_name]['start_date']
        end_date = obj_data.getResults()[h_pca_name]['end_date']        

        ct, pca_amp = self.FitPCA(projection)
        pca_amp *= np.pi

        tp_directions = ('dN', 'dE', 'dU')
        xvs = []
        yvs = []
        zvs = []

        for label, data, err in obj_data.getIterator():
            if label in tp_directions:
                distance,f_error = self.FitTimeSeries(data.loc[start_date:end_date], ct)
                if label == tp_directions[1]:
                    xvs.append(distance)
                elif label == tp_directions[0]:
                    yvs.append(distance)
                elif label == tp_directions[2]:
                    zvs.append(distance)
            else:
                print('Ignoring column: ', label)

        xvs = np.array(xvs)*1e-6
        yvs = np.array(yvs)*1e-6
        zvs = np.array(zvs)*1e-6

        ydata = np.hstack((xvs, yvs,zvs)).T
        station_list = obj_data.get().minor_axis
        meta_data = obj_data.info()
        station_coords = pbo_utils.getStationCoords(meta_data, station_list)
        
        dimensions = ('x','y','z')
        xdata = []
        for dim in dimensions:
            for coord in station_coords:
                xdata.append((dim, coord[0], coord[1]))

        coord_range = np.array(pbo_utils.getLatLonRange(meta_data, station_list))

        lat_guess = np.mean(coord_range[0,:])
        lon_guess = np.mean(coord_range[1,:])

        fit = optimize.curve_fit(mag_source, xdata, ydata, (lat_guess, lon_guess, 5, 1e-4)+ExScParams)

        res = collections.OrderedDict()

        res['lat'] = fit[0][0]
        res['lon'] = fit[0][1]
        res['depth'] = fit[0][2]
        res['amplitude'] = fit[0][3]
        if len(fit[0])>4:
            res['ex_params'] = fit[0][4:]
        else:
            res['ex_params'] = np.nan
        res['pca_amplitude'] = pca_amp
        if len(self.ap_paramList)>=2:
            res['source_type'] = self.ap_paramList[1]().lower()
        else:
            res['source_type'] = 'mogi'

        obj_data.addResult(self.str_description, res)
def multiCaPlot(pipeline,
                mogiFlag=False,
                offset=.15,
                direction='H',
                pca_comp=0,
                scaleFactor=2.5,
                map_res='i'):
    '''
    The multiCaPlot function generates a geographic eigenvector plot of several pipeline runs
    
    This function plots multiple pipeline runs over perturbed pipeline
    parameters. The various perturbations are plotted more
    transparently (alpha=.5), while the median eigen_vector and Mogi
    inversion are plotted in solid blue and red

    @param pipeline: The pipeline object with multiple runs
    @param mogiFlag: Flag to indicate plotting the Mogi source as well as the PCA
    @param offset: Offset for padding the corners of the generated map
    @param direction: Indicates the eigenvectors to plot. Only Horizontal component is currently supported ('H')
    @param pca_comp: Choose the PCA component to use (integer)
    @param scaleFactor: Size of the arrow scaling factor
    @map_res: Map data resolution for Basemap ('c', 'i', 'h', 'f', or None)
    '''

    # as this is a multi_ca_plot function, assumes GPCA
    plt.figure()

    meta_data = pipeline.data_generator.meta_data
    station_list = pipeline.data_generator.station_list

    lat_range, lon_range = pbo_tools.getLatLonRange(meta_data, station_list)
    coord_list = pbo_tools.getStationCoords(meta_data, station_list)

    # Create a map projection of area
    bmap = Basemap(llcrnrlat=lat_range[0] - offset,
                   urcrnrlat=lat_range[1] + offset,
                   llcrnrlon=lon_range[0] - offset,
                   urcrnrlon=lon_range[1] + offset,
                   projection='gnom',
                   lon_0=np.mean(lon_range),
                   lat_0=np.mean(lat_range),
                   resolution=map_res)

    # bmap.fillcontinents(color='white')
    # bmap.drawmapboundary(fill_color='white')
    bmap.drawmapboundary(fill_color='#41BBEC')
    bmap.fillcontinents(color='white')

    # Draw just coastlines, no lakes
    for i, cp in enumerate(bmap.coastpolygons):
        if bmap.coastpolygontypes[i] < 2:
            bmap.plot(cp[0], cp[1], 'k-')

    parallels = np.arange(np.round(lat_range[0] - offset, decimals=1),
                          np.round(lat_range[1] + offset, decimals=1), .1)
    meridians = np.arange(np.round(lon_range[0] - offset, decimals=1),
                          np.round(lon_range[1] + offset, decimals=1), .1)

    bmap.drawmeridians(meridians, labels=[0, 0, 0, 1])
    bmap.drawparallels(parallels, labels=[1, 0, 0, 0])

    # Plot station coords
    for coord in coord_list:
        bmap.plot(coord[1],
                  coord[0],
                  'ko',
                  markersize=6,
                  latlon=True,
                  zorder=12)
        x, y = bmap(coord[1], coord[0])
        plt.text(x + 250,
                 y - 450,
                 station_list[coord_list.index(coord)],
                 zorder=12)

    # loop over each pipeline run
    elatmean = np.zeros(len(station_list))
    elonmean = np.zeros_like(elatmean)
    # check if want to plot Mogi as well
    if mogiFlag:
        avg_mogi = np.array([0., 0.])
        mlatmean = np.zeros_like(elatmean)
        mlonmean = np.zeros_like(elatmean)

    for nrun in range(len(pipeline.RA_results)):
        pca = pipeline.RA_results[nrun]['GPCA']['CA']
        station_lat_list, station_lon_list, ev_lat_list, ev_lon_list, dir_sign = pbo_tools.dirEigenvectors(
            coord_list, pca.components_[pca_comp])

        elatmean += ev_lat_list
        elonmean += ev_lon_list
        # plot each run in light blue
        bmap.quiver(station_lon_list,
                    station_lat_list,
                    ev_lon_list,
                    ev_lat_list,
                    latlon=True,
                    scale=scaleFactor,
                    alpha=.25,
                    color='blue',
                    zorder=11)

        if mogiFlag:
            mogi_res = pipeline.RA_results[nrun]['Mogi']
            avg_mogi += np.array([mogi_res['lon'], mogi_res['lat']])
            mogi_x_disp, mogi_y_disp = mogi.MogiVectors(
                mogi_res, station_lat_list, station_lon_list)
            mlatmean += mogi_y_disp
            mlonmean += mogi_x_disp

            bmap.plot(mogi_res['lon'],
                      mogi_res['lat'],
                      "g^",
                      markersize=10,
                      latlon=True,
                      alpha=.25,
                      zorder=12)
            bmap.quiver(station_lon_list,
                        station_lat_list,
                        mogi_x_disp * dir_sign,
                        mogi_y_disp * dir_sign,
                        latlon=True,
                        scale=scaleFactor,
                        color='red',
                        alpha=.25,
                        zorder=11)

    #plot the mean ev in blue
    elatmean = elatmean / len(pipeline.RA_results)
    elonmean = elonmean / len(pipeline.RA_results)
    bmap.quiver(station_lon_list,
                station_lat_list,
                elonmean,
                elatmean,
                latlon=True,
                scale=scaleFactor,
                color='blue',
                alpha=1,
                zorder=11)
    if mogiFlag:
        # plot mean mogi results
        avg_mogi = avg_mogi / len(pipeline.RA_results)
        mlatmean = mlatmean / len(pipeline.RA_results)
        mlonmean = mlonmean / len(pipeline.RA_results)
        bmap.plot(avg_mogi[0],
                  avg_mogi[1],
                  "g^",
                  markersize=10,
                  latlon=True,
                  alpha=1,
                  zorder=12)
        bmap.quiver(station_lon_list,
                    station_lat_list,
                    mlonmean * dir_sign,
                    mlatmean * dir_sign,
                    latlon=True,
                    scale=scaleFactor,
                    color='red',
                    alpha=1,
                    zorder=11)

    ax_x = plt.gca().get_xlim()
    ax_y = plt.gca().get_ylim()
    x, y = bmap(ax_x[0] + .1 * (ax_x[1] - ax_x[0]),
                ax_y[0] + .1 * (ax_y[1] - ax_y[0]),
                inverse=True)
    bmap.quiver(x,
                y,
                0,
                .2,
                latlon=True,
                scale=scaleFactor,
                headwidth=3,
                headlength=3,
                zorder=11)
    plt.text(ax_x[0] + .1 * (ax_x[1] - ax_x[0]) - 650,
             ax_y[0] + .1 * (ax_y[1] - ax_y[0]) - 1000,
             '20%',
             zorder=11)