Esempio n. 1
0
def BoxPlotByCluster(DataDirectory, OutDirectory, fname_prefix,  raster_name, stream_order=1):
    """
    Make a boxplot of the results of the clustering compared to the raster specified
    by raster_name
    """
    #df = pd.read_csv(OutDirectory+fname_prefix+'_profiles_clustered_SO{}.csv'.format(stream_order))

    # read in the raster
    raster_ext = '.bil'
    this_raster = IO.ReadRasterArrayBlocks(DataDirectory+raster_name)
    EPSG_string = IO.GetUTMEPSG(DataDirectory+raster_name)
    NDV, xsize, ysize, GeoT, Projection, DataType = IO.GetGeoInfo(DataDirectory+raster_name)
    CellSize,XMin,XMax,YMin,YMax = IO.GetUTMMaxMin(DataDirectory+raster_name)

    pts = PT.LSDMap_PointData(OutDirectory+fname_prefix+'_profiles_clustered_SO{}.csv'.format(stream_order),data_type ='csv')
    easting, northing = pts.GetUTMEastingNorthing(EPSG_string=EPSG_string)
    cluster_id = pts.QueryData('cluster_id', PANDEX=True)
    clusters = list(set(cluster_id))

    # dict for the data
    data = {k: [] for k in clusters}
    for x, (i, j) in enumerate(zip(northing, easting)):
    # convert to rows and cols
        X_coordinate_shifted_origin = j - XMin;
        Y_coordinate_shifted_origin = i - YMin;

        col_point = int(X_coordinate_shifted_origin/CellSize);
        row_point = (ysize - 1) - int(round(Y_coordinate_shifted_origin/CellSize));
        # check for data at this cell
        this_value = this_raster[row_point][col_point]
        if not np.isnan(this_value):
            if this_value < 10:
                # get the cluster id
                data[cluster_id[x]].append(this_value)

    print(data)

    # now make a boxplot
    labels, these_data = [*zip(*data.items())]  # 'transpose' items to parallel key, value lists

    plt.boxplot(these_data)
    plt.xticks(range(1, len(labels) + 1), labels)
    plt.show()
Esempio n. 2
0
def GetLithologyPercentages(DataDirectory, OutDirectory, fname_prefix, raster_name, stream_order=1):
    """
    Get the percentage of the nodes in each cluster that drain each lithology
    """
    from collections import Counter
    # read in the raster
    raster_ext = '.bil'
    this_raster = IO.ReadRasterArrayBlocks(DataDirectory+raster_name)
    EPSG_string = IO.GetUTMEPSG(DataDirectory+raster_name)
    NDV, xsize, ysize, GeoT, Projection, DataType = IO.GetGeoInfo(DataDirectory+raster_name)
    CellSize,XMin,XMax,YMin,YMax = IO.GetUTMMaxMin(DataDirectory+raster_name)

    pts = PT.LSDMap_PointData(OutDirectory+fname_prefix+'_profiles_clustered_SO{}.csv'.format(stream_order),data_type ='csv')
    easting, northing = pts.GetUTMEastingNorthing(EPSG_string=EPSG_string)
    cluster_id = pts.QueryData('cluster_id', PANDEX=True)
    clusters = list(set(cluster_id))

    # dict for the data
    data = {k: [] for k in clusters}
    for x, (i, j) in enumerate(zip(northing, easting)):
    # convert to rows and cols
        X_coordinate_shifted_origin = j - XMin;
        Y_coordinate_shifted_origin = i - YMin;

        col_point = int(X_coordinate_shifted_origin/CellSize);
        row_point = (ysize - 1) - int(round(Y_coordinate_shifted_origin/CellSize));
        # check for data at this cell
        this_value = this_raster[row_point][col_point]
        if not np.isnan(this_value):
            data[cluster_id[x]].append(this_value)

    # you have the values. now what percentage are each?
    for key, liths in data.items():
        c = Counter(liths)
        n_ndv = c[0.0]
        print(c)
        [print(x,": ",vals/len(liths) * 100) for x, vals in c.items()]
Esempio n. 3
0
def MakeBoxPlotsKsnLithology(DataDirectory, fname_prefix, raster_name, theta=0.45, label_list=[]):
    """
    Make boxplots of ksn compared to lithology raster. Lithology should have integer
    values for the different rock types (rock type with 0 is excluded). Pass in list of
    labels for the different units, which must be the same length as the number of lithology codes.
    If none is passed then just use the integer values for labelling.
    """
    from scipy import stats

    # read in the raster
    raster_ext = '.bil'
    this_raster = IO.ReadRasterArrayBlocks(DataDirectory+raster_name)
    #EPSG_string = IO.GetUTMEPSG(DataDirectory+raster_name)
    EPSG_string='epsg:32611'
    print(EPSG_string)
    NDV, xsize, ysize, GeoT, Projection, DataType = IO.GetGeoInfo(DataDirectory+raster_name)
    CellSize,XMin,XMax,YMin,YMax = IO.GetUTMMaxMin(DataDirectory+raster_name)

    pts = PT.LSDMap_PointData(DataDirectory+fname_prefix+'_ksn.csv',data_type ='csv')
    print(pts)
    easting, northing = pts.GetUTMEastingNorthing(EPSG_string=EPSG_string)
    ksn = pts.QueryData('ksn', PANDEX=True)
    #print(ksn)

    # get the unique values in the raster
    raster_values = np.unique(this_raster)
    raster_values = raster_values[1:]


    # dict for the data
    data = {k: [] for k in raster_values}

    for x, (i, j) in enumerate(zip(northing, easting)):
    # convert to rows and cols
        X_coordinate_shifted_origin = j - XMin;
        Y_coordinate_shifted_origin = i - YMin;

        col_point = int(X_coordinate_shifted_origin/CellSize);
        row_point = (ysize - 1) - int(round(Y_coordinate_shifted_origin/CellSize));
        # check for data at this cell
        this_raster_value = this_raster[row_point][col_point]
        if not np.isnan(this_raster_value) and this_raster_value != 0:
            data[this_raster_value].append(ksn[x])

    # set up a figure
    fig,ax = plt.subplots(nrows=1,ncols=1, figsize=(5,5), sharex=True, sharey=True)

    labels, dict = [*zip(*data.items())]  # 'transpose' items to parallel key, value lists
    print(label_list)
    if label_list:
        labels = label_list
    print(labels)
    box = plt.boxplot(dict, patch_artist=True)
    plt.xticks(range(1, len(labels) + 1), labels)
    plt.ylabel('$k_{sn}$', fontsize=14)

    # get the medians for plotting as an upper label
    medians = []
    print("========SOME KSN STATISTICS=========")
    for key, value in data.items():
        print("Key {}, median ksn = {}".format(key, np.median(value)))
        medians.append(np.median(value))
        print("Key {}, IQR = {}".format(key, stats.iqr(value)))
    print("========================================")
    pos = np.arange(len(labels)) + 1
    upperLabels = [str(np.round(s, 2)) for s in medians]

    # change the colours for each lithology
    colors=['#60609fff', '#fdbb7fff', '#935353ff', '#f07b72ff']
    for patch, color in zip(box['boxes'], colors):
        patch.set_facecolor(color)
        patch.set_alpha(0.9)
        patch.set_edgecolor('k')
    for cap in box['caps']:
        cap.set(color='k')
    for wh in box['whiskers']:
        wh.set(color='k')
    for med in box['medians']:
        med.set(color='k')
    for flier, color in zip(box['fliers'], colors):
        flier.set_markeredgecolor(color)
        flier.set_markerfacecolor(color)
        flier.set_markersize(2)

    # for tick, label in zip(range(len(labels)), ax.get_xticklabels()):
    #     k = tick % 2
    #     ax.text(pos[tick], top - (top*0.05), upperLabels[tick],
    #              horizontalalignment='center', color=colors[k])

    ax.grid(color='0.8', linestyle='--', which='major', zorder=1)
    plt.title('Boxplots of $k_{sn}$ by lithology', fontsize=14)
    plt.savefig(DataDirectory+fname_prefix+'_boxplot_lith_ksn.png', dpi=300, transparent=True)
    plt.clf()



    # Do some stats, yo
    # KS test to see if we can distinguish the distributions at a confidence level of p = 0.05
    # https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.stats.kstest.html
    # relief
    keys = list(data.keys())
    values = list(data.values())
    k=0
    for i in range(len(keys)-1):
        for j in range(i+1, len(keys)):
            print("KS test between {} and {}".format(keys[i], keys[j]))
            d, p = stats.ks_2samp(values[i], values[j])
            print(d, p)
            k += 1
Esempio n. 4
0
# This MUST come before you import the C hillshade pyx file if you are doing it
# this way.
####################
import pyximport
pyximport.install()
####################

from LSDPlottingTools import fast_hillshade as fasthill
import LSDPlottingTools.LSDMap_GDALIO as LSDMap_IO
import LSDPlottingTools.LSDMap_BasicPlotting as LSDMap_BP

Directory = "/mnt/SCRATCH/Dev/ExampleTopoDatasets/"
BackgroundRasterName = "indian_creek.bil"

raster = LSDMap_IO.ReadRasterArrayBlocks(Directory + BackgroundRasterName)
data_res = LSDMap_IO.GetGeoInfo(Directory + BackgroundRasterName)[3][1]
try:
    NoDataValue = float(
        LSDMap_IO.getNoDataValue(Directory + BackgroundRasterName))
except TypeError:
    NoDataValue = -9999.0

ncols, nrows = raster.shape

# LSDMappingTools hillshade
#hs = LSDMap_BP.Hillshade(raster)
#plt.imshow(hs, cmap="gray")
#plt.show()

#LSDRaster Cythonised version pf hillshade
hs_nice = fasthill.Hillshade(raster, data_res, NoDataValue=NoDataValue)