Ejemplo n.º 1
0
def _moran_loc_from_rose_calc(rose):
    """
    Calculate esda.moran.Moran_Local values from giddy.rose object
    """
    old_state = np.random.get_state()
    moran_locy = Moran_Local(rose.Y[:, 0], rose.w)
    np.random.set_state(old_state)
    moran_locx = Moran_Local(rose.Y[:, 1], rose.w)
    np.random.set_state(old_state)
    return moran_locy, moran_locx
Ejemplo n.º 2
0
def test_plot_local_autocorrelation():
    columbus = examples.load_example('Columbus')
    link_to_data = columbus.get_path('columbus.shp')
    df = gpd.read_file(link_to_data)

    y = df['HOVAL'].values
    w = Queen.from_dataframe(df)
    w.transform = 'r'

    moran_loc = Moran_Local(y, w)

    fig, _ = plot_local_autocorrelation(moran_loc, df, 'HOVAL', p=0.05)
    plt.close(fig)

    # also test with quadrant and mask
    fig, _ = plot_local_autocorrelation(moran_loc, df, 'HOVAL', p=0.05,
                                        region_column='POLYID',
                                        aspect_equal=False,
                                        mask=['1', '2', '3'], quadrant=1)
    plt.close(fig)
    
    # also test with quadrant and mask
    assert_raises(ValueError, plot_local_autocorrelation, moran_loc,
                  df, 'HOVAL', p=0.05, region_column='POLYID',
                 mask=['100', '200', '300'], quadrant=1)
Ejemplo n.º 3
0
def test_moran_loc_bv_scatterplot():
    link_to_data = examples.get_path('Guerry.shp')
    gdf = gpd.read_file(link_to_data)
    x = gdf['Suicids'].values
    y = gdf['Donatns'].values
    w = Queen.from_dataframe(gdf)
    w.transform = 'r'
    # Calculate Univariate and Bivariate Moran
    moran_loc = Moran_Local(y, w)
    moran_loc_bv = Moran_Local_BV(x, y, w)
    # try with p value so points are colored
    fig, _ = _moran_loc_bv_scatterplot(moran_loc_bv)
    plt.close(fig)

    # try with p value and different figure size
    fig, _ = _moran_loc_bv_scatterplot(moran_loc_bv,
                                       p=0.05,
                                       aspect_equal=False)
    plt.close(fig)

    assert_raises(ValueError, _moran_loc_bv_scatterplot, moran_loc, p=0.5)
    assert_warns(UserWarning,
                 _moran_loc_bv_scatterplot,
                 moran_loc_bv,
                 p=0.5,
                 scatter_kwds=dict(c='r'))
Ejemplo n.º 4
0
def _test_calc_moran_loc(gdf, var='HOVAL'):
    y = gdf[var].values
    w = Queen.from_dataframe(gdf)
    w.transform = 'r'

    moran_loc = Moran_Local(y, w)
    return moran_loc
Ejemplo n.º 5
0
def test_moran_loc_scatterplot():
    df = _test_data_columbus()

    x = df['INC'].values
    y = df['HOVAL'].values
    w = Queen.from_dataframe(df)
    w.transform = 'r'

    moran_loc = Moran_Local(y, w)
    moran_bv = Moran_BV(x, y, w)

    # try without p value
    fig, _ = _moran_loc_scatterplot(moran_loc)
    plt.close(fig)

    # try with p value and different figure size
    fig, _ = _moran_loc_scatterplot(moran_loc, p=0.05,
                                    aspect_equal=False,
                                    fitline_kwds=dict(color='#4393c3'))
    plt.close(fig)

    # try with p value and zstandard=False
    fig, _ = _moran_loc_scatterplot(moran_loc, p=0.05, zstandard=False,
                                    fitline_kwds=dict(color='#4393c3'))
    plt.close(fig)

    # try without p value and zstandard=False
    fig, _ = _moran_loc_scatterplot(moran_loc, zstandard=False,
                                    fitline_kwds=dict(color='#4393c3'))
    plt.close(fig)

    raises(ValueError, _moran_loc_scatterplot, moran_bv, p=0.5)
    warns(UserWarning, _moran_loc_scatterplot, moran_loc, p=0.5,
                 scatter_kwds=dict(c='#4393c3'))
Ejemplo n.º 6
0
def test_lisa_cluster():
    link = examples.get_path('columbus.shp')
    df = gpd.read_file(link)

    y = df['HOVAL'].values
    w = Queen.from_dataframe(df)
    w.transform = 'r'

    moran_loc = Moran_Local(y, w)

    fig, _ = lisa_cluster(moran_loc, df)
    plt.close(fig)
Ejemplo n.º 7
0
def test_plot_local_autocorrelation():
    link = examples.get_path('columbus.shp')
    df = gpd.read_file(link)

    y = df['HOVAL'].values
    w = lp.Queen.from_dataframe(df)
    w.transform = 'r'

    moran_loc = Moran_Local(y, w)

    fig, _ = plot_local_autocorrelation(moran_loc, df, 'HOVAL', p=0.05)
    plt.close(fig)

    # also test with quadrant and mask
    fig, _ = plot_local_autocorrelation(moran_loc, df, 'HOVAL', p=0.05,
                                        region_column='POLYID',
                                        mask=['1', '2', '3'], quadrant=1)
    plt.close(fig)
Ejemplo n.º 8
0
def test_moran_loc_scatterplot():
    link = examples.get_path('columbus.shp')
    df = gpd.read_file(link)

    y = df['HOVAL'].values
    w = lp.Queen.from_dataframe(df)
    w.transform = 'r'

    moran_loc = Moran_Local(y, w)

    # try with p value so points are colored
    fig, _ = moran_loc_scatterplot(moran_loc, p=0.05)
    plt.close(fig)

    # try with p value and different figure size
    fig, _ = moran_loc_scatterplot(moran_loc, p=0.05,
                                   fitline_kwds=dict(color='#4393c3'))
    plt.close(fig)
Ejemplo n.º 9
0
def test_moran_loc_scatterplot():
    columbus = examples.load_example('Columbus')
    link_to_data = columbus.get_path('columbus.shp')
    df = gpd.read_file(link_to_data)

    x = df['INC'].values
    y = df['HOVAL'].values
    w = Queen.from_dataframe(df)
    w.transform = 'r'

    moran_loc = Moran_Local(y, w)
    moran_bv = Moran_BV(x, y, w)

    # try without p value
    fig, _ = _moran_loc_scatterplot(moran_loc)
    plt.close(fig)

    # try with p value and different figure size
    fig, _ = _moran_loc_scatterplot(moran_loc,
                                    p=0.05,
                                    aspect_equal=False,
                                    fitline_kwds=dict(color='#4393c3'))
    plt.close(fig)

    # try with p value and zstandard=False
    fig, _ = _moran_loc_scatterplot(moran_loc,
                                    p=0.05,
                                    zstandard=False,
                                    fitline_kwds=dict(color='#4393c3'))
    plt.close(fig)

    # try without p value and zstandard=False
    fig, _ = _moran_loc_scatterplot(moran_loc,
                                    zstandard=False,
                                    fitline_kwds=dict(color='#4393c3'))
    plt.close(fig)

    assert_raises(ValueError, _moran_loc_scatterplot, moran_bv, p=0.5)
    assert_warns(UserWarning,
                 _moran_loc_scatterplot,
                 moran_loc,
                 p=0.5,
                 scatter_kwds=dict(c='#4393c3'))
Ejemplo n.º 10
0
def test_moran_scatterplot():
    gdf = _test_data()
    x = gdf['Suicids'].values
    y = gdf['Donatns'].values
    w = Queen.from_dataframe(gdf)
    w.transform = 'r'
    # Calculate `esda.moran` Objects
    moran = Moran(y, w)
    moran_bv = Moran_BV(y, x, w)
    moran_loc = Moran_Local(y, w)
    moran_loc_bv = Moran_Local_BV(y, x, w)
    # try with p value so points are colored or warnings apply
    fig, _ = moran_scatterplot(moran, p=0.05, aspect_equal=False)
    plt.close(fig)
    fig, _ = moran_scatterplot(moran_loc, p=0.05)
    plt.close(fig)
    fig, _ = moran_scatterplot(moran_bv, p=0.05)
    plt.close(fig)
    fig, _ = moran_scatterplot(moran_loc_bv, p=0.05)
    plt.close(fig)
Ejemplo n.º 11
0
def test_moran_scatterplot():
    link_to_data = examples.get_path('Guerry.shp')
    gdf = gpd.read_file(link_to_data)
    x = gdf['Suicids'].values
    y = gdf['Donatns'].values
    w = Queen.from_dataframe(gdf)
    w.transform = 'r'
    # Calculate `esda.moran` Objects
    moran = Moran(y, w)
    moran_bv = Moran_BV(y, x, w)
    moran_loc = Moran_Local(y, w)
    moran_loc_bv = Moran_Local_BV(y, x, w)
    # try with p value so points are colored or warnings apply
    fig, _ = moran_scatterplot(moran, p=0.05)
    plt.close(fig)
    fig, _ = moran_scatterplot(moran_loc, p=0.05)
    plt.close(fig)
    fig, _ = moran_scatterplot(moran_bv, p=0.05)
    plt.close(fig)
    fig, _ = moran_scatterplot(moran_loc_bv, p=0.05)
    plt.close(fig)
Ejemplo n.º 12
0
def test_moran_loc_bv_scatterplot():
    gdf = _test_data()
    x = gdf['Suicids'].values
    y = gdf['Donatns'].values
    w = Queen.from_dataframe(gdf)
    w.transform = 'r'
    # Calculate Univariate and Bivariate Moran
    moran_loc = Moran_Local(y, w)
    moran_loc_bv = Moran_Local_BV(x, y, w)
    # try with p value so points are colored
    fig, _ = _moran_loc_bv_scatterplot(moran_loc_bv)
    plt.close(fig)

    # try with p value and different figure size
    fig, _ = _moran_loc_bv_scatterplot(moran_loc_bv, p=0.05,
                                       aspect_equal=False)
    plt.close(fig)

    raises(ValueError, _moran_loc_bv_scatterplot, moran_loc, p=0.5)
    warns(UserWarning, _moran_loc_bv_scatterplot, moran_loc_bv, p=0.5,
                 scatter_kwds=dict(c='r'))
Ejemplo n.º 13
0
    #Spatial Weights - select one
    #w = weights.Queen.from_dataframe(sa_df, idVariable="region_name") # Queen Contiguity Matrix
    #w = weights.Rook.from_dataframe(sa_df, idVariable="region_name")  # Rook contiguity Matrix
    w = weights.distance.KNN.from_dataframe(sa_df, ids="REG_NAME", k=6) # K-Nearest Neighbors

    w.transform = "R"
    
    sa_df["lag_infections"] = weights.lag_spatial(w, sa_df[week])
    
    # Global spatial autocorrelation
    y = sa_df[week]
    moran = Moran(y, w)
    
    # Local spatial autocorrelation
    m_local = Moran_Local(y, w)
    lisa = m_local.Is
    
    
    # set CRS
    sa_df = sa_df.to_crs("EPSG:3857")

    #Plot map
    fig, ax = plt.subplots(figsize=(9,9))
    lisa_cluster(m_local, sa_df, p=0.05, figsize = (9,9),ax=ax)
    description = 'Weekly Covid-19 Spatial Autocorrelation'
    info_text = 'Hot- and coldspots indicates clusters of high and low infection rates. \nDonuts are regions with low infection-rates sorrounded by areas with high infection-rates. \nDiamonds are regions with high infection-rates sorrounded by regions with low infection-rates'
    ax.set_title(str(week_name), fontdict={'fontsize': 22}, loc='left')
    ax.annotate(description, xy=(0.325, 0.140), size=14, xycoords='figure fraction')
    ax.annotate(info_text, xy=(0.325, 0.090), size=8, xycoords='figure fraction')
Ejemplo n.º 14
0
    def run_stats(self):
        """Main function which do the process."""

        # Get the common fields..currentField()
        self.admin_layer = self.cbx_aggregation_layer.currentLayer()
        input_name = self.admin_layer.name()
        field = self.cbx_indicator_field.currentField()

        self.layer = QgsProject.instance().mapLayersByName(input_name)[0]
        # Output.
        self.output_file_path = self.le_output_filepath.text()

        try:
            self.button_box_ok.setDisabled(True)
            # noinspection PyArgumentList
            QApplication.setOverrideCursor(Qt.WaitCursor)
            # noinspection PyArgumentList
            QApplication.processEvents()

            if not self.admin_layer:
                raise NoLayerProvidedException

            if not self.admin_layer and self.use_point_layer:
                raise NoLayerProvidedException

            crs_admin_layer = self.admin_layer.crs()

            # Output
            if not self.output_file_path:
                temp_file = NamedTemporaryFile(delete=False,
                                               suffix='-geopublichealth.shp')
                self.output_file_path = temp_file.name
                temp_file.flush()
                temp_file.close()
            else:
                with open(self.output_file_path, 'w') as document:
                    pass

            admin_layer_provider = self.layer.dataProvider()
            fields = admin_layer_provider.fields()

            if admin_layer_provider.fields().indexFromName(
                    self.name_field) != -1:
                raise FieldExistingException(field=self.name_field)

            fields.append(QgsField('LISA_P', QVariant.Double))
            fields.append(QgsField('LISA_Z', QVariant.Double))
            fields.append(QgsField('LISA_Q', QVariant.Int))
            fields.append(QgsField('LISA_I', QVariant.Double))
            fields.append(QgsField('LISA_C', QVariant.Double))

            # The QgsVectorFileWriter was Deprecated since 3.10 However,.......
            #The create() function DOEST NOT Flush the feature unless QGIS close.
            #options = QgsVectorFileWriter.SaveVectorOptions()
            #options.driverName = "ESRI Shapefile"
            #file_writer=QgsVectorFileWriter.create(self.output_file_path,fields,QgsWkbTypes.Polygon,self.admin_layer.crs(),QgsCoordinateTransformContext(),options)

            #It's currently a bug https://github.com/qgis/QGIS/issues/35021
            # So I will keep it for now

            file_writer = QgsVectorFileWriter(self.output_file_path, 'utf-8',
                                              fields, QgsWkbTypes.Polygon,
                                              self.admin_layer.crs(),
                                              'ESRI Shapefile')

            if self.cbx_contiguity.currentIndex() == 0:  # queen
                # fix_print_with_import

                print('Info: Local Moran\'s using queen contiguity')
                #Pysal 2.0 change
                #https://github.com/pysal/pysal/blob/master/MIGRATING.md

                w = Queen.from_shapefile(self.admin_layer.source())
            else:  # 1 for rook
                # fix_print_with_import
                print('Info: Local Moran\'s using rook contiguity')
                w = Rook.from_shapefile(self.admin_layer.source())

            #Pysal 2.0
            #https://stackoverflow.com/questions/59455383/pysal-does-not-have-attribute-open
            import geopandas

            f = geopandas.read_file(self.admin_layer.source().replace(
                '.shp', '.dbf'))

            y = f[str(field)]
            lm = Moran_Local(y, w, transformation="r", permutations=999)

            sig_q = lm.q * (lm.p_sim <= 0.05
                            )  # could make significance level an option
            outFeat = QgsFeature()
            i = 0

            count = self.admin_layer.featureCount()

            for i, feature in enumerate(self.admin_layer.getFeatures()):
                attributes = feature.attributes()
                attributes.append(float(lm.p_sim[i]))
                attributes.append(float(lm.z_sim[i]))
                attributes.append(int(lm.q[i]))
                attributes.append(float(lm.Is[i]))
                attributes.append(int(sig_q[i]))

                new_feature = QgsFeature()
                new_geom = QgsGeometry(feature.geometry())
                new_feature.setAttributes(attributes)
                new_feature.setGeometry(new_geom)
                file_writer.addFeature(new_feature)

            del file_writer

            self.output_layer = QgsVectorLayer(self.output_file_path,
                                               "LISA Moran's I - " + field,
                                               'ogr')
            QgsProject.instance().addMapLayer(self.output_layer)

            self.add_symbology()

            self.signalStatus.emit(3, tr('Successful process'))

        except GeoPublicHealthException as e:
            display_message_bar(msg=e.msg, level=e.level, duration=e.duration)

        finally:
            self.button_box_ok.setDisabled(False)
            # noinspection PyArgumentList
            QApplication.restoreOverrideCursor()
            # noinspection PyArgumentList
            QApplication.processEvents()
Ejemplo n.º 15
0
def moran_gen(file):
    # Read in shapefile
    df = file
    # df = gpd.read_file(file)
    # print(df.dtypes)
    y = df['ind_100t']
    # Calculate weight
    # First calculate minimum threshold distance to nearest neightbor
    thresh = ps.min_threshold_dist_from_shapefile(
        "C:\zoovision\data\Region1.shp")
    # thresh = 1
    # print(thresh)
    # weight based on fixed distance, for binary(0 or 1 if within threshold)
    # arcgis_swm = ps.open('C:\zoovision\data\weightfiles\week1test.swm', 'r')
    # w = arcgis_swm.read()
    # arcgis_swm.close()
    # e = open('C:\zoovision\data\Region1_count.txt')
    # x = e.readlines()
    # print(x.head())
    # gwt = ps.open('C:\zoovision\weights.gwt', 'r')
    # w = gwt.read()
    # gwt.close()
    # w = ps.open('C:\zoovision\data\Region1_count.txt', 'r', 'Region1_count').read()
    testfile = ps.open('C:\zoovision\data\Region1_count.txt', 'r',
                       'arcgis_text')
    testfile = ps.open('C:\zoovision\data\Region1_count.txt', 'r',
                       'arcgis_text')
    w = testfile.read()
    testfile.close()
    # testfile = ps.open('C:\zoovision\data\weightfiles\Region1_genweights.swm', 'r')
    # w = testfile.read()
    testfile.close()

    w.n
    # f = tempfile.NamedTemporaryFile(suffix='.txt')
    # fname = f.name
    # f.close()
    # o = ps.open(fname, 'w', 'Region1_count')
    # o.write(w)
    # o.close()
    # wnew = ps.open(fname, 'r', 'Region1_count').read()
    # wnew.pct_nonzero == w.pct_nonzero
    # os.remove(fname)
    # arcgis_txt.close()
    # w = ps.queen_from_shapefile("C:\zoovision\data\Region1.shp")
    # w = ps.weights.DistanceBand.from_shapefile("C:\zoovision\data\Region1.shp",  threshold=thresh, binary=False)
    # print(tuple(w1))
    # f = ps.open(ps.examples.get_path("stl_hom.txt"))
    # y = np.array(f.by_col['HR8893'])
    # w = ps.open(ps.examples.get_path("stl.gal")).read()
    # np.random.seed(12345)
    # moran_loc = ps.Moran_Local(y, w)
    # print(tuple(w))
    # w2 = ps.lat2W(6, 4)
    # w = ps.w_union(w1, w2)
    # w = w1.multiply(w2)

    moran_loc = Moran_Local(y, w, transformation='r', permutations=999)

    # moran_loc = ps.Moran_Local(y, w, permutations=999)
    fig, ax = plt.subplots(figsize=(15, 10))

    fig, ax = lisa_cluster(moran_loc, df, p=0.05, figsize=(15, 10))

    ax.set_title(
        "Local Indicators of Spatial Association ",
        fontsize=35)  # plot_moran(moran_loc, zstandard=True, figsize=(10, 4))
Ejemplo n.º 16
0
def generate_clusters(gdf: gpd.GeoDataFrame,
                      col: str,
                      crs: Optional[int] = None,
                      alpha: float = 0.005,
                      geom_column: str = "geometry") -> gpd.GeoDataFrame:
    """Calculates spatial clusters/outliers based on a column in a geofataframe


    Workflow:

    1.  Create a spatial weights matrix
    2.  Create a spatially lagged version of the variable of interest
    3.  Calculate global spatial autocorrelation metrics
    4.  Calculate local spatial autocorrelation (the clusters) using LISA
        (local indicators of spatial autocorrelation)
    5.  Join data to original gdf

    While the code should work for any geodataframe, the current workflow is
    based on the assumption that the data being analyzed is in a hexagonal
    grid. This means we have polygons of approximately uniform weights. The
    https://pysal.org/libpysal/generated/libpysal.weights.Rook.html weighting
    calculates weights between all polygons that share an edge. Note that this
    requires the grid is filled with polygons, i.e. we don't have islands in
    the grid.


    Input:

    gdf     The source geodataframe, should be a hexagonal grid if using this
            script as is
    crs     A coordinate reference system, EPSG code
    col     The column with the data being modeled
    alpha   The threshold of statistical significance to be used when determing
            whether a cell is a cluster/outlier or not. Defaults to 0.005. Such
            a low value is used because our data typically contains large contrasts
            between areas of zero index (forest, seas) and built-up areas.
                - Larger values show the boundary between built-up and nature
                - Smaller values show contrasts within built-up areas


    The output is the original dataframe with 2 new columns:

    quadrant        The quadrant to which the observation belongs to:
                    LL = low clusters = low values surrounded by low values
                    HH = high clusters = high values surrounded by high values
                    LH = low outliers = low values surrounded by high values
                    HL = high outliers = high values surrounded by low values
    significant     Whether the quadrant information is statistically
                    significant. The significance will depend on the number of
                    iterations and the random seed used in the process, as
                    polygons at the edge of significance may get slightly
                    different values at different runs.
    """

    # Project
    if crs:
        gdf = gdf.to_crs(crs)

    # The cluster algorithm fails if there are islands in the data, i.e. we must have a full grid.
    # This means filling the bbox of the geodataframe with zero values at each missing hex.
    # Zero index value indicates none of the datasets had any values on the hex.
    # Note that
    # 1) the datasets may have different bboxes and
    # 2) they may be sparse.
    # We cannot make any assumptions of the form of the data, other than that it is aggregated
    # in hex grid.
    gdf_filled = fill_hex_grid(gdf, geom_column=geom_column)

    # Compute spatial weights and row-standardize
    weights = lps.weights.Rook.from_dataframe(gdf_filled, geom_col=geom_column)
    weights.set_transform("R")

    # Compute spatial lag
    y = gdf_filled[col]
    y_lag = lps.weights.lag_spatial(weights, y)
    col_lag = f"{col}_lag"
    data_lag = pd.DataFrame(data={col: y, col_lag: y_lag})

    # Global spatial autocorrelation
    mi = Moran(data_lag[col], weights)
    p_value = mi.p_sim
    print("\nGlobal spatial autocorrelation:\n" + "Moran's I:   " +
          str(round(mi.I, 3)) + "\np-value:     " + str(round(p_value, 3)))

    # Calculate LISA values
    lisa = Moran_Local(
        data_lag[col],
        weights,
        permutations=100000,
        # seed=1             # Use this if absolute repoducibility is needed
    )

    # identify whether each observation is significant or not
    data_lag["significant"] = lisa.p_sim < alpha

    # identify the quadrant each observation belongs to
    data_lag["quadrant"] = lisa.q
    data_lag["quadrant"] = data_lag["quadrant"].replace({
        1: "HH",
        2: "LH",
        3: "LL",
        4: "HL"
    })

    # Print info
    print("\nDistribution of clusters/outliers (quadrants):\n" +
          str(data_lag["quadrant"].sort_values().value_counts()))
    print("\nSignificant clusters (using significance threshold " +
          str(alpha) + "):\n" + str(data_lag["significant"].value_counts()))

    # Merge original gdf and LISA quadrants data together
    gdf_clusters = gdf_filled.merge(data_lag[["quadrant", "significant"]],
                                    how="left",
                                    left_index=True,
                                    right_index=True)

    return gdf_clusters