Ejemplo n.º 1
0
    def __init__(self, shp_in, out_srs=None):
        print "\nOpening and reading shapefile '{0}'.".format(shp_in)
        try:
            self.__shp_rd = shp.Reader(shp_in)
        except shp.ShapefileException:
            exit("\nERROR -> File '{0}' not found".format(shp_in))

        # Import projection
        prj_file = splitext(shp_in)[0] + '.prj'
        try:
            shp_prj = open(prj_file)
        except IOError:
            exit("\nERROR -> Could not find projection file '{0}'.".format(prj_file))
        else:
            with shp_prj:
                prj_txt = shp_prj.read()
                self.__shp_srs = osr.SpatialReference()
                if self.__shp_srs.ImportFromESRI([prj_txt]) != 0:
                    exit("\nERROR -> Error importing the projection information from '{0}'.".format(shp_in))

        # Store requested spatial reference
        self.__out_srs = out_srs

        # If destination coordinates are specified
        if self.__out_srs:
            # Define the coordinates transformation
            self.__trans = prjpnt(self.__shp_srs, self.__out_srs)
        else:
            self.__trans = None

        # Initialize data loaded indicator
        self.__dataLoaded = False

        # Define coordinate labels
        self.__coo_lbl = ['SHP_X', 'SHP_Y']

        # An empty dictionary to store the data for internal representation
        self.__data = {}
Ejemplo n.º 2
0
def amp2xls(xls_in,  # args.xls_in
            amp_in=None,  # args.amp_in
            shp_in=None,  # args.shp_in
            ts_in=None,  # args.ts_in
            xls_epsg=4326, # args.xls_epsg
            xls_sheet_in=['IS', 'PR', 'SC'],  # args.xls_sheet_in
            keep_bad=False,  # args.keep_bad
            ndval=-9999.0,  # args.ndval
            period="winter",  # args.period
            corr=['NIRI Average', 'CCI', 'CCI Class'],  # args.corr
            pkfile=None,  # args.pkfile
            csvfile=None,  # args.txtfile
            differential=False,  # args.differential
            prepend="",  # args.prepend
            verbose=False):  # args.verbose

    # Some default values that can be turned into arguments later on
    slng = 'Start GPS Longitude'  # XLS column containing the staring GPS longitude
    slat = 'Start GPS Latitude'  # XLS column containing the staring GPS latitued
    dtest = 'Date Tested'  # XLS column contining the date when the section of road was tested
    year = 'Year'  # XLS column contining the official year for the dataset
    ts_keys = ['VEL_STDEV', 'VEL']  # Keywords identitying temporary raster files

    # Load the input excel file names
    xls_stack = glob(xls_in)
    if not xls_stack:
        exit("\nERROR -> No excel files were selected using '{0}'".format(xls_in))
    else:
        print "\nAnalyzing following excel files:"
        for x in xls_stack:
            print "- {0}".format(x)

    # Check if at least another source is selected
    if amp_in is None and shp_in is None and ts_in is None:
        exit("\nERROR -> At least and additional source (amplitude, SqueeSAR or Temporarary Scatterer) should be chosen for data extraction.")

    # Define the destination (excel) spatial reference
    xls_srs = osr.SpatialReference()
    if xls_srs.ImportFromEPSG(int(xls_epsg)) != 0:
        exit("\nERROR -> Error setting the destination data spatial reference to EPSG:{0}".format(xls_epsg))

    # Define prepend string
    if prepend != "":
        prepend += "_"

    # Load the SAR amplitude stack file names
    af = ""
    if amp_in is not None:
        sar_stack = [f for f in glob(amp_in) if f[-4:] == '.tif']
        if not sar_stack:
            exit("\nERROR -> No SAR amplitude files were selected using '{0}'".format(amp_in))
        sar_stack.sort()  # Sort the files
        # Initialize SAR spatial reference
        sar_srs = osr.SpatialReference()
        af = "_AMP"

    # If selected, open the shapefile containing displacement data
    sf = ""
    if shp_in is not None:
       # Load shapefile data
        shp = shp2df(shp_in, out_srs=xls_srs)
        shp_dat = shp.getDF()
        # Initialize spatial search tree
        [shp_x_lbl, shp_y_lbl] = shp.getCooLabels()
        kdt = KDTree(shp_dat[[shp_x_lbl, shp_y_lbl]].values)
        sf = "_SHP"

    # If selected, load the temporary scatterer files
    tsf = ""
    if ts_in is not None:
        ts_stack = [f for f in glob(ts_in) if f[-4:] == '.tif']
        if not ts_stack:
            exit("\nERROR -> No temporary scatterer files were selected using '{0}'".format(ts_in))
        # Initialize the TS spatial reference
        ts_srs = osr.SpatialReference()
        tsf = "_TS"

    # Differential processing
    dif = ""
    if differential is True:
        dif = "_DIF"

    # Load the list of correlating values selected by the user
    clist = list(corr)
    ccl = None
    # Check if the user selected 'CCI Class'
    if clist.count('CCI Class') == 1:
        ccl = clist.index('CCI Class')
        clist[ccl] = 'CCI'

    # Define the last element of the output files name
    if len(clist) > 1:
        cf = "_many"
    else:
        cf = "_" + list(corr)[0]

    # Process all the files in the xls stack
    for xls_in in xls_stack:
        print "\nOpening excel file '{0}' as input".format(xls_in)
        try:
            xlfil = pd.ExcelFile(xls_in)
        except IOError:
            exit("\nERROR -> File '{0}' not found!".format(xls_in))
        except Exception as e:
            exit("\nERROR -> Error: '{0}'".format(e))

        # Load the list of sheets selected by the user
        xls_data = xlfil.parse(list(xls_sheet_in))

        # Open output excel file
        # If using XLSX, pandas requires for openpyxl to be version 1.6.1 or
        # higher, but lower than 2.0.0.
        # When using XLS, the limit is 256 columns and 65535 rows.
        # TODO: move the decision at the end of the file where teh number of
        #       columns and rows is known. Then select driver and extension
        #       based on the amount of rows and columns.
        name, ext = splitext(xls_in)
        ext = '.xlsx'
        xls_out = prepend + name + "_" + period + af + sf + tsf + dif + ext
        print "- Creating excel file '{0}' as output".format(xls_out)
        try:
            writer = pd.ExcelWriter(xls_out)
        except IOError:
            exit("\nERROR -> File '{0}' not found!".format(xls_out))
        except Exception as e:
            exit("\nERROR -> Error: '{0}'".format(e))

        # Iterate over the selected sheets
        for sheet_in, xldata in xls_data.items():
            print "  - Processing input sheet '{0}'".format(sheet_in)

            # Clear NaN data
            xldata.fillna(0, inplace=True)

            lxldata = len(xldata)  # Length of the xldata frame
            npout_fields = list(clist)  # Copy of the corr list

            # Extract GPS start coordinates
            xls_coo = xldata[[slng, slat]].values

            # Evaluate the date range based on user selection
            meas_year = dt.strptime(str(xldata[year].irow(0)), "%Y")
            if period == "year":
                dates = [dt.strptime(str(d), "%Y%m%d") for d in xldata[dtest].values]
                max_date = max(dates)
                min_date = max(dates) - relativedelta(years=1)
            elif period == "winter":
                min_date = meas_year - relativedelta(months=3)
                max_date = meas_year + relativedelta(months=3)
            elif period == "all":
                min_date = dt.min
                max_date = dt.max
            else:
                exit("\nERROR -> The defined period ({0}) is not allowed".format(period))

            if period == "all":
                print "    - Using all available dates."
            else:
                print "    - Range of dates ({0}): {1} -> {2}".format(period, dt.strftime(min_date, "%Y-%m-%d"), dt.strftime(max_date, "%Y-%m-%d"))

            # For each SAR amplitude file, extract the amplitude values at the GPS
            if amp_in is not None:
                # Find dates to process and define processing method
                dates = process_date_range(sar_stack, min_date, max_date, differential, prefix='A')
                if dates is None:
                    exit("\nERROR -> No dates found inside {}".format(sar_stack))

                # Process amplitude files
                diff_fields = []
                diff_dict = {}
                for f in sar_stack:
                    # Skip if date is out of range
                    processing = dates[f]['processing']
                    if processing == 'skip':
                        continue

                    # Open SAR amplitude raster files
                    if verbose:
                        print "      - Extracting amplitude values from '{0}'".format(f)
                    tf = gdal.Open(f)
                    if sar_srs.ImportFromWkt(tf.GetProjectionRef()) != 0:
                        exit("\nERROR -> Error importing the projection information from '{0}'.".format(f))
                    xls2sar = prjpnt(xls_srs, sar_srs)
                    geo = tf.GetGeoTransform()
                    tfb = tf.GetRasterBand(1)
                    sar_ndval = tfb.GetNoDataValue()
                    lng_max = tfb.XSize - 1
                    lat_max = tfb.YSize - 1

                    # Loop through the coordinates in the xls
                    amp = np.zeros(lxldata)
                    for i in range(lxldata):
                        # Convert xls coordinates to raster coordinates
                        [lng, lat] = xls2sar.prj_coo(xls_coo[i])

                        # Calculate pixel location in raster coordinates
                        plng = int((lng - geo[0]) / geo[1])
                        plat = int((lat - geo[3]) / geo[5])

                        # Check if it's inside the raster file.
                        # It it is, gather amplitude.
                        if plng < 0 or plng > lng_max or plat < 0 or plat > lat_max:
                            amp[i] = np.nan
                        else:
                            amp[i] = tfb.ReadAsArray(plng, plat, 1, 1)

                        # Check if amplitude is valid.
                        if amp[i] == 0 or amp[i] == ndval or amp[i] == sar_ndval:
                            amp[i] = np.nan

                        # Mark row as bad if the corr value is negative or zero
                        if np.any(xldata[clist].irow(i) < 0):
                            amp[i] = np.nan

                    # Close gdal handles
                    tfb = None
                    tf = None

                    # If differential processing and differential frame, just
                    # store the data.
                    if differential:
                        # If differential frame
                        if processing == 'differential':
                            prev_amp = amp.copy()
                            continue

                    # If regular frame, store data
                    date = dates[f]['date']
                    xldata[date] = amp
                    npout_fields.append(date)

                    # If differential processing
                    if differential:
                        months = dates[f]['months']
                        if months != 0:
                            diff_name = 'D' + date + "({})".format(months)
                            diff_dict[diff_name] = (amp - prev_amp) / months
                            diff_fields.append(diff_name)
                        prev_amp = amp.copy()

                # If differential processing, add data
                if differential:
                    # Insert differential data in xdata after amplitude
                    xldata = pd.concat((xldata, pd.DataFrame(diff_dict)), axis=1)
                    # Append field names to npout
                    npout_fields.extend(diff_fields)

                # Remove rows marked as to be removed
                if not keep_bad:
                    xldata.dropna(inplace=True)
                    xldata.reset_index(drop=True, inplace=True)
                    xls_coo = xldata[[slng, slat]].values
                    lxldata = len(xldata)

            # If shapefile need to be analyzed
            if shp_in is not None:
                # Clear the xls data outside the shapefile bounding box
                shp_extent = shp.getExtent()
                xldata = xldata[xldata[slng] >= shp_extent[0]]
                xldata = xldata[xldata[slng] <= shp_extent[2]]
                xldata = xldata[xldata[slat] >= shp_extent[1]]
                xldata = xldata[xldata[slat] <= shp_extent[3]]
                xldata.reset_index(drop=True, inplace=True)
                xls_coo = xldata[[slng, slat]].values
                lxldata = len(xldata)

                # Look into the shapefile for the nearest point
                print "    - Looking for neighbors in shapefile"
                neigh, neigh_idx = kdt.query(xls_coo)
                print "    - Extracting SqueeSAR values from neighbors"
                n_shp = shp_dat.iloc[neigh_idx]

                # Extract processing information
                dates = process_date_range(n_shp.columns.values, min_date, max_date, differential, prefix='D')
                if dates is None:
                    exit("\nERROR -> No dates found inside {}".format(n_shp.columns.values))

                # Process the shapefile fields
                diff_fields = []
                diff_dict = {}
                for lbl in n_shp.columns.values:
#                    for date in n_shp.filter(regex='D[0-9]{8}').columns.values:
                    # Process the date fields
                    if re.search("D[0-9]{8}", lbl):
                        # Skip if date is out of range
                        processing = dates[lbl]['processing']
                        if processing == 'skip':
                            continue

                        # Get displacement data and store it
                        disp = n_shp[lbl].values

                        # If differential processing and differential frame, just
                        # store the data.
                        if differential:
                            if processing == "differential":
                                prev_disp = disp.copy()
                                continue

                        # If regular frame, store data
                        xldata[lbl] = disp
                        npout_fields.append(lbl)

                        # If differential processing
                        if differential:
                            months = dates[lbl]['months']
                            if months != 0:
                                diff_name = 'D' + lbl + "({})".format(months)
                                diff_dict[diff_name] = (disp - prev_disp) / months
                                diff_fields.append(diff_name)
                            prev_disp = disp.copy()
                    else:
                        xldata[lbl] = n_shp[lbl].values
                        npout_fields.append(lbl)

                # If differential processing, add data
                if differential:
                    # Insert differential data in xdata after amplitude
                    xldata = pd.concat((xldata, pd.DataFrame(diff_dict)), axis=1)
                    # Append field names to npout
                    npout_fields.extend(diff_fields)

                # Calculate the approximate distance between the points
                lt1 = np.radians(np.asarray(xls_coo[:, 1]))
                lt2 = np.radians(xldata[shp_y_lbl].values)
                ln1 = np.radians(np.asarray(xls_coo[:, 0]))
                ln2 = np.radians(xldata[shp_x_lbl].values)
                x = (ln2 - ln1) * np.cos(0.5 * (lt2 + lt1))
                y = lt2 - lt1
                dist = 6371000. * np.sqrt(x*x + y*y)
                xldata['Aprx. Distance (m)'] = dist
                npout_fields.append('Aprx. Distance (m)')

            # Add the TS information to output dataframe
            if ts_in is not None:
                # Look into the shapefile for the nearest point
                print "    - Extracting temporary scatter values"

                # Initialize temporary dictionary to hold data
                ts_dict = dict.fromkeys(ts_stack, [])

                # For each file store the data corresponding to the xls coordinates
                for f in ts_stack:
                    if verbose is True:
                        print "      - Extracting TS values from '{0}'".format(f)
                    tf = gdal.Open(f)
                    if ts_srs.ImportFromWkt(tf.GetProjectionRef()) != 0:
                        exit("\nERROR -> Error importing the projection information from '{0}'.".format(f))
                    xls2ts = prjpnt(xls_srs, ts_srs)
                    geo = tf.GetGeoTransform()
                    tfb = tf.GetRasterBand(1)
                    ts_ndval = tfb.GetNoDataValue()
                    lng_max = tfb.XSize - 1
                    lat_max = tfb.YSize - 1

                    # Loop through the coordinates
                    ts_val = np.zeros(lxldata)
                    for i in range(lxldata):
                        # Convert xls coordinates to TS raster coordinates
                        [lng, lat] = xls2ts.prj_coo(xls_coo[i])

                        # Calculate pixel location in TS raster coordinates
                        plng = int((lng - geo[0]) / geo[1])
                        plat = int((lat - geo[3]) / geo[5])

                        # Check if it's inside the raster file.
                        # If it is, gather TS values
                        if plng < 0 or plng > lng_max or plat < 0 or plat > lat_max:
                            ts_val[i] = np.nan
                        else:
                            ts_val[i] = float(tfb.ReadAsArray(plng, plat, 1, 1))

                        # Check if the value is valid
                        if ts_val[i] == ts_ndval or ts_val[i] == ndval:
                            ts_val[i] = np.nan

                    # Store values in temporary dictionary
                    ts_dict[f] = ts_val

                    # Close gdal handles
                    tfb = None
                    tf = None

                # TODO: For keys that can be included within other keys (for
                # example 'VEL' and 'VEL_STDEV') it is assumed that they are
                # provided in 'contains' order: 'VEL_STDEV' should come before
                # 'VEL'. See if there is a way to automatically accomplish this
                # without expecting the user to specify them in order.

                # Associate files to user provided keys
                ts_k_dict = {}
                ts_set = set(ts_stack)
                for k in ts_keys:
                    ts_k_dict[k] = [f for f in ts_set if f.count(k)]
                    ts_set = ts_set - set(ts_k_dict[k])

                # Merge data and add to output dataframe
                for k, v in ts_k_dict.iteritems():
                    xldata["TS_" + k] = np.nan * np.ones(lxldata)
                    npout_fields.append("TS_" + k)
                    for f in v:
                        xldata["TS_" + k] = np.fmax(xldata["TS_" + k], ts_dict[f])

                # Remove bad data
                if not keep_bad:
                    xldata.dropna(inplace=True)
                    xldata.reset_index(drop=True, inplace=True)
                    xls_coo = xldata[[slng, slat]].values

            # Extract subarray to pickle and convert to CSV
            npout = xldata[npout_fields].copy()

            # If the user selected 'CCI Class'
            if ccl is not None:
                # Function to map CCI values to classes
                def cci2class(cci):
                    ccicl = np.zeros_like(cci)
                    ccicl[cci <= 100] = 4
                    ccicl[cci < 90] = 3
                    ccicl[cci < 70] = 2
                    ccicl[cci < 60] = 1
                    ccicl[cci < 50] = 0
                    return ccicl

                npout.columns.values[ccl] = 'CCI Class'
                cci = npout.loc[:, 'CCI Class'].values
                npout.loc[:, 'CCI Class'] = cci2class(cci)

            # Add xls coordinates to output dataframe
            npout.insert(0, 'XLS Longitude', xls_coo[:, 0])
            npout.insert(1, 'XLS Latitude', xls_coo[:, 1])

            # Define basic name for output files
            name = prepend + dt.strftime(meas_year, "%Y") + "_" + sheet_in + "_" + period + af + sf + tsf + dif + cf

            # Store table in pickle file
            if pkfile is None:
                npkl = name + ".pkl"
            else:
                npkl = pkfile
            pth = join(dirname(xls_in), npkl)
            print "    - Saving pickled dataframe to '{0}'".format(pth)
            npout.to_pickle(pth)

            # Store table in CSV file
            if csvfile is None:
                ncsv = name + ".csv"
            else:
                ncsv = csvfile
            pth = join(dirname(xls_in), ncsv)
            print "    - Saving CSV dataframe to '{0}'".format(pth)
            npout.to_csv(pth, index=False)

            # Write to the corresponding sheet in new file
            print "    - Writing to output sheet: '{0}'".format(sheet_in)
            xldata.to_excel(writer, sheet_name=sheet_in, index=False)

        # Save and close xls file
        writer.save()
        writer.close()