Exemple #1
0
    def CreateLoadArray(self, tempRastName, attribute_name, rasterArrayType):
        """
        Create the loading array

        Input:
            tempRastName = The names of the raster
            attribute_name = The attribute to create on
            rasterArrayType = The type of the raster

        Output:
            None
        """

        import scidb
        sdb = scidb.iquery()

        if rasterArrayType <= 2:
            theQuery = "create array %s <y1:int64, x1:int64, %s> [xy=0:*,?,?]" % (
                tempRastName, attribute_name)
        elif rasterArrayType == 3:
            theQuery = "create array %s <z1:int64, y1:int64, x1:int64, %s> [xy=0:*,?,?]" % (
                tempRastName, attribute_name)

        try:
            sdb.query(theQuery)
        except:
            # Silently deleting temp arrays
            sdb.query("remove(%s)" % tempRastName)
            sdb.query(theQuery)
Exemple #2
0
    def GetSciDBInstances(self):
        """
        Setup the SciDB instances for the class

        Input:
            None

        Output:
            None
        """
        from scidb import iquery
        sdb = iquery()
        query = sdb.queryAFL("list('instances')")
        self.SciDB_Instances = len(query.splitlines()) - 1
Exemple #3
0
def GetNumberofSciDBInstances():
    """
    Get the number of running SciDB instances

    Input:
        None

    Output:
        Number of running SciDB instances
    """

    import scidb
    sdb = scidb.iquery()

    query = sdb.queryAFL("list('instances')")
    numInstances = len(query.splitlines()) - 1
    numInstances = list(range(numInstances))
    return numInstances
Exemple #4
0
    def __init__(self, boundaryPath, rasterPath, SciDBArray):
        """
        Initialization of a ZonalStats object

        Input:
            boundaryPath = Path of the boundary
            rasterPath = Path of the raster
            SciDBArray = SciBD array

        Output:
            An instance of the ZonalStats class
        """
        import scidb

        self.sdb = scidb.iquery()
        self.__SciDBInstances()

        self.vectorPath = boundaryPath
        self.geoTiffPath = rasterPath
        self.SciDBArrayName = SciDBArray
Exemple #5
0
    def CreateDestinationArray(self, rasterArrayName, height, width, chunk,
                               overlap):
        """
        Function creates the final destination array.
        Updated to handle 3D arrays.

        Input:
            rasterArrayName = The name of the array to create
            height = The height of the array to create
            width = The width of the array to create
            chunk = The chunk to create with
            overlap = The overlap of the array

        Output:
            None
        """

        import scidb
        sdb = scidb.iquery()

        if self.RasterArrayShape <= 2:
            myQuery = "create array %s <%s> [y=0:%s,%s,%s; x=0:%s,%s,%s]" % (
                rasterArrayName, self.AttributeString, height - 1, chunk,
                overlap, width - 1, chunk, overlap)
        else:
            # Add in bands to the query
            myQuery = "create array %s <%s> [band=0:%s,1,%s; y=0:%s,%s,0; x=0:%s,%s,%s]" % (
                rasterArrayName, self.AttributeString, self.numbands - 1,
                height - 1, chunk, overlap, width - 1, chunk, overlap)

        try:
            sdb.query(myQuery)
        except:
            # Remove the array if it already exists then rerun the query
            print("*****  Array %s already exists. Removing ****" %
                  rasterArrayName)
            sdb.query("remove(%s)" % rasterArrayName)
            sdb.query(myQuery)

        del sdb
Exemple #6
0
    def CreateDestinationArray(self, rasterArrayName, height, width, chunk):
        """
        Function creates the final destination array.
        Updated to handle 3D arrays.

        Input:
            rasterArrayName = Name of the destination array
            height = The height of the raster
            width = The width of the raster
            chunk = The size of the chunks

        Output:
            None
        """

        import scidb
        sdb = scidb.iquery()

        # Attempt to create array, removing the previous one if it exists
        if self.RasterArrayShape <= 2:
            myQuery = "create array %s <%s> [y=0:%s,%s,0; x=0:%s,%s,0]" % (
                rasterArrayName, self.AttributeString, height - 1, chunk,
                width - 1, chunk)
        else:
            myQuery = "create array %s <%s> [band=0:%s,1,0; y=0:%s,%s,0; x=0:%s,%s,0]" % (
                rasterArrayName, self.AttributeString, self.numbands - 1,
                height - 1, chunk, width - 1, chunk)

        try:
            sdb.query(myQuery)
        except:
            print("*****  Array %s already exists. Removing ****" %
                  rasterArrayName)
            sdb.query("remove(%s)" % rasterArrayName)
            print("here2")
            sdb.query(myQuery)
            print("here3")

        del sdb
Exemple #7
0
def GDALReader(inParams):
    """
    This is the main worker function.
    Split up Loading and Redimensioning. Only Loading is multiprocessing

    Input:
        inParams = A tuple or list containing the following:
            theMetadata = Metadata for the reading
            theInstance = Instance to read from
            theRasterPath = Path to the raster to read
            theSciDBOutPath = Out path for SciDB processing
            theSciDBLoadPath = Load path for SciDB processing
            bandIndex = Index of the band to process on

    Output:
        A tuple in the following format:
            (metadata for the raster, write time for the raster, load time for the raster)
    """
    theMetadata = inParams[0]
    theInstance = inParams[1]
    theRasterPath = inParams[2]
    theSciDBOutPath = inParams[3]
    theSciDBLoadPath = inParams[4]
    bandIndex = inParams[5]

    from scidb import iquery, Statements
    sdb = iquery()
    sdb_statements = Statements(sdb)

    tempArray = "temprast_%s" % (theMetadata['version'])
    rasterBinaryFilePath = "%s/%s.sdbbin" % (theSciDBOutPath, tempArray)
    rasterBinaryLoadPath = "%s/%s.sdbbin" % (theSciDBLoadPath, tempArray)

    print("xoffset: %s, yOffSet: %s, xWindow: %s, yWindow: %s " %
          (theMetadata['xOffSet'], theMetadata['yOffSet'],
           theMetadata['xWindow'], theMetadata['yWindow']))

    raster = gdal.Open(theRasterPath, GA_ReadOnly)
    if bandIndex:
        # This code is for multibanded arrays, with z (band) dimension.
        print("**** Reading band %s" % bandIndex)
        band = raster.GetRasterBand(bandIndex)
        array = band.ReadAsArray(xoff=theMetadata['xOffSet'],
                                 yoff=theMetadata['yOffSet'],
                                 win_xsize=theMetadata['xWindow'],
                                 win_ysize=theMetadata['yWindow'])
        rasterBinaryFilePath = "%s/band%s_%s.sdbbin" % (theSciDBOutPath,
                                                        bandIndex, tempArray)
        rasterBinaryLoadPath = "%s/band%s_%s.sdbbin" % (theSciDBLoadPath,
                                                        bandIndex, tempArray)
        tempArray = "temprast_band%s_%s" % (bandIndex, theMetadata['version'])
    else:
        array = raster.ReadAsArray(xoff=theMetadata['xOffSet'],
                                   yoff=theMetadata['yOffSet'],
                                   xsize=theMetadata['xWindow'],
                                   ysize=theMetadata['yWindow'])

    # Time the array write
    start = timeit.default_timer()
    WriteArray(array, rasterBinaryFilePath, theMetadata['array_type'],
               theMetadata['attribute'], bandIndex)
    stop = timeit.default_timer()
    writeTime = stop - start

    # Process depending on array type
    if theMetadata['array_type'] == 2:
        items = [
            "%s:%s" % (attribute.split(":")[0].strip() + "1",
                       attribute.split(":")[1].strip())
            for attribute in theMetadata['attribute'].split(",")
        ]
        pseudoAttributes = ", ".join(items)
    else:
        pseudoAttributes = "%s:%s" % (
            theMetadata['attribute'].split(":")[0].strip() + "1",
            theMetadata['attribute'].split(":")[1].strip())

    os.chmod(rasterBinaryFilePath, 0o755)
    # Support multiple attributes or 2D and 3D arrays
    sdb_statements.CreateLoadArray(tempArray, theMetadata['attribute'],
                                   theMetadata['array_type'])
    start = timeit.default_timer()

    if sdb_statements.LoadOneDimensionalArray(theInstance, tempArray,
                                              pseudoAttributes,
                                              theMetadata['array_type'],
                                              rasterBinaryLoadPath):
        stop = timeit.default_timer()
        loadTime = stop - start

        dataLoadingTime = ((writeTime + loadTime) * theMetadata["loops"]) / 60
        if theMetadata['version'] == 0:
            print(
                "Estimated time for loading in minutes %s: WriteTime: %s, LoadTime: %s"
                % (dataLoadingTime, writeTime, loadTime))

        # Clean up
        gc.collect()

        RedimensionAndInsertArray(sdb, tempArray, theMetadata['scidbArray'],
                                  theMetadata['array_type'],
                                  theMetadata['xOffSet'],
                                  theMetadata['yOffSet'])

        return theMetadata['version'], writeTime, loadTime

    else:
        print("Error Loading")
        return theMetadata['version'], -999, -999
    return parser


if __name__ == '__main__':
    """
        Entry point for SciDB_analysis
        This file contains the functions used for performing spatial analyses in SciDB
    """
    config = configparser.ConfigParser()
    config.read("config.ini")

    def parse(s):
        return json.loads(config.get("main", s))

    args = argument_parser().parse_args()
    sdb = iquery()
    query = sdb.queryAFL("list('instances')")
    SciDBInstances = len(query.splitlines()) - 1

    runs = parse("runs")

    filePath = parse("filePath")
    rasterStatsCSVBase = parse("rasterStatsCSVBase")
    if args.command == "overlap":
        datasets = args.func(config, 'overlap')
    else:
        datasets = args.func(config)
    timings = OrderedDict()

    for d in datasets:
        print(d)
Exemple #9
0
def ParallelLoad(rasterReadingMetadata):
    """
    This function is designed to load all sizes of arrays
    We are using a couple of custom functions to break the dataset into smaller pieces for repetive parallel writing /
    loading and then a single redimension store
    You can improve the performance by setting a high maxPixel threshold value. 

    maxPixel = Number of pixels to read/write/load per loop.
    Make sure to consider the number of SciDB processes when setting maxPixel 

    Input:
        rasterReadingMetadata = The raster data

    Output:
        None
    """
    from scidb import iquery, Statements
    import timeit
    numProcesses = len(rasterReadingMetadata)

    sdb = iquery()
    sdb_statements = Statements(sdb)

    try:
        loadLoops = ArraySplicerLogic(rasterReadingMetadata[0]['width'],
                                      rasterReadingMetadata[0]['height'],
                                      5000000)
        loadAttribute = "%s_1:%s" % (
            rasterReadingMetadata[0]['attribute'].split(":")[0],
            rasterReadingMetadata[0]['attribute'].split(":")[1])
        nodeLoopData = AdjustMetaData(loadLoops, rasterReadingMetadata)

        start = timeit.default_timer()
        for l, nodeLoopIteration in enumerate(
                np.array_split(list(nodeLoopData.items()), loadLoops)):
            # Have to initiate the pool for each loop
            pool = mp.Pool(numProcesses)
            print("Loading %s of %s" % (l + 1, loadLoops))
            # Create the load array
            sdb_statements.CreateLoadArray(
                "LoadArray", loadAttribute,
                rasterReadingMetadata[0]['array_shape'])
            pool.imap(Read_Write_Raster, (n for n in nodeLoopIteration))
            pool.close()
            pool.join()

            # Load the one dimension array and insert redimension
            startLoad = timeit.default_timer()
            sdb_statements.LoadOneDimensionalArray(-1, "LoadArray",
                                                   loadAttribute, 1,
                                                   'pdataset.scidb')

            startRedimension = timeit.default_timer()
            sdb_statements.InsertRedimension(
                "LoadArray",
                rasterReadingMetadata[1]["destination_array"],
                oldvalue=loadAttribute.split(":")[0],
                newvalue='value')

            sdb.query("remove(LoadArray)")
            RemoveArrayVersions(sdb,
                                rasterReadingMetadata[1]["destination_array"])

            stop = timeit.default_timer()
            if l == 0:
                print(
                    "Estimated time for loading the dataset in minutes %s: WriteTime: %s seconds, LoadTime: %s "
                    "seconds, RedimensionTime: %s seconds" %
                    ((stop - start) * loadLoops / 60, startLoad - start,
                     startRedimension - startLoad, stop - startRedimension))

    except Exception as e:
        print(e)
        print("Error")
Exemple #10
0
def ParallelLoadByChunk(rasterReadingData):
    """
    This function will do parallel loading that supports fast redimensioning

    Input:
        rasterReadingData = The raster data

    Output:
        None
    """

    from scidb import iquery, Statements
    from itertools import cycle, chain
    from collections import Counter
    import timeit

    sdb = iquery()
    sdb_statements = Statements(sdb)
    query = sdb.queryAFL("list('instances')")
    scidbInstances = len(query.splitlines()) - 1

    # Cycle through the instances with the given data
    for r, node in zip(rasterReadingData, cycle(range(scidbInstances))):
        rasterReadingData[r]["node"] = node

    # Counter dictionary which reports back how many times node x occured.
    # We are just interested in node 0
    numberofNodeLoops = Counter(rasterReadingData[k]["node"]
                                for k in rasterReadingData)
    loadLoops = numberofNodeLoops[0]
    aKey = list(rasterReadingData.keys())[0]
    loadAttribute = "%s_1:%s" % (rasterReadingData[aKey]['attribute'].split(
        ":")[0], rasterReadingData[aKey]['attribute'].split(":")[1])

    try:
        start = timeit.default_timer()
        for l, nodeLoopIteration in enumerate(
                np.array_split(list(rasterReadingData.items()), loadLoops)):
            # Create load arrsy
            pool = mp.Pool(scidbInstances)
            print("Loading %s of %s" % (l, loadLoops - 1))
            sdb_statements.CreateLoadArray(
                "LoadArray", loadAttribute,
                int(nodeLoopIteration[0][1]['array_shape']))
            pool.imap(Read_Write_Raster, (n for n in nodeLoopIteration))
            pool.close()
            pool.join()

            # Load the one dimension array and insert redimension
            startLoad = timeit.default_timer()
            sdb_statements.LoadOneDimensionalArray(-1, "LoadArray",
                                                   loadAttribute, 1,
                                                   'pdataset.scidb')

            startRedimension = timeit.default_timer()
            sdb_statements.InsertRedimension(
                "LoadArray",
                nodeLoopIteration[0][1]["destination_array"],
                oldvalue=loadAttribute.split(":")[0],
                newvalue='value')

            sdb.query("remove(LoadArray)")
            RemoveArrayVersions(sdb,
                                nodeLoopIteration[0][1]["destination_array"])

            stop = timeit.default_timer()
            if l == 0:
                print(
                    "Estimated time for loading the dataset in minutes %s: LoadTime: %s seconds, RedimensionTime: "
                    "%s seconds" %
                    ((stop - start) * loadLoops / 60,
                     startRedimension - startLoad, stop - startRedimension))

    except:
        print("Something went wrong")