Example #1
    def computeSampleDistribution(self, points):
        ''' compute sample frequency distributions histogram for NOMINAL/ORDINAL or pdf for INTERVAL/RATIO
        print 'computeSampleDistribution() called'

        xmin = self.min
        xmax = self.max
        #print xmin, xmax

        vals = util.extractCovariatesAtPoints([self], points)[0]
        if self.__measurement_level in [conf.MSR_LEVEL_NOMINAL, conf.MSR_LEVEL_ORDINAL]:
            y1, x = np.histogram(vals, range = (xmin-0.5, xmax+0.5), bins = int(xmax-xmin)+1, density = True)
            #print np.unique(vals)
            #print x, y1
            if np.std(points.weights) != 0: # unequal weights
                y2, x = np.histogram(vals, weights = points.weights, bins = int(xmax-xmin)+1, density = True)
                y2 = np.copy(y1)
            x = np.linspace(xmin, xmax, conf.N_INTERVALS)

            gkde = gaussian_kde.gaussian_kde(vals)
            y1 = gkde.evaluate(x)

            if np.std(points.weights) != 0:  # unequal weights
                gkde = gaussian_kde.gaussian_kde(vals, weights=points.weights)
                y2 = gkde.evaluate(x)
                y2 = np.copy(y1)

        self.density_sample = y1
        self.density_sample_weighted = y2
Example #2
    def __simLoc2SamplesV0(self, loc_ev, evs, SD_evs):
            ''' compute similarity between a location to N samples
                return: a vector of similarity values, each to a sample
                sample_evs = util.extractCovariatesAtPoints(self.__envrasters, self.__soilsamples)
                sample_evs = np.array(sample_evs).T

                # number of samples
                N = np.shape(sample_evs)[0]

                # similarity btw a loc to N samples
                sim = np.zeros(N)

                # compute similarities
                t0 = time.time()
                for i in range(N):
                    sim[i] = self.__simLoc2SampleV0(loc_ev, sample_evs[i], evs, SD_evs)
                return sim

            except Exception as e:
Example #3
    def predict_opencl_atom(self, X = None, predict_class = False, single_cpu = conf.SINGLE_CPU, opencl_config = conf.OPENCL_CONFIG):
        ''' PyOpenCL implementation of the iPSM approach
            return: a vector of predictions, eacn for a row in X
        print 'predict_opencl_atom() was called'
            t0 = time.time()
            ##### prepare data
            # covariates values over the whole study area
            r_evs = np.int32(self.__envrasters[0].getData().size)
            c_evs = np.int32(len(self.__envrasters))

            Std_evs = np.zeros(len(self.__envrasters))
            AVG_evs = np.zeros(len(self.__envrasters))
            for i in range(len(self.__envrasters)):
                Std_evs[i] = self.__envrasters[i].std
                AVG_evs[i] = self.__envrasters[i].mean

            SD_evs = Std_evs.reshape(c_evs).astype(np.float32)

            # covariates values at prediction locations
            if X is None: # if X is not provided, make prediction for the whole study area
                X = []
                for raster in self.__envrasters:
                X = np.array(X).T

            r, c = np.shape(X)
            nrows_X = np.int32(r)
            ncols_X = np.int32(c)

            X = X.reshape(nrows_X*ncols_X).astype(np.float32)

            MSRLEVES = self.__msrInts.reshape(c_evs).astype(np.int32)
            #print MSRLEVES, MSRLEVES.shape

            if not self.__samples_stats_collected:
                # covariates values at sample locations
                if self.__soilsamples.covariates_at_points is None:
                    samples_X = util.extractCovariatesAtPoints(self.__envrasters, self.__soilsamples)
                    samples_X = self.__soilsamples.covariates_at_points.T#[0:c_evs].T ## prone to bug
                nrows_samples = np.int32(samples_X.shape[1])
                self.__nrows_samples = nrows_samples

                samples_SD_evs = np.zeros((nrows_samples, c_evs))

                for i in range(nrows_samples):
                    delta = samples_X[:,i].T - AVG_evs
                    tmp = Std_evs**2  + delta**2
                    samples_SD_evs[i] = np.sqrt(tmp)

                #print '\nsamples_SD_evs:', samples_SD_evs, '\n'

                self.__samples_SD_evs = np.array(samples_SD_evs).reshape(nrows_samples*c_evs).astype(np.float32)

                self.__samples_X = np.array(samples_X).T.reshape(nrows_samples*c_evs).astype(np.float32)
                #print 'samples_X:', samples_X.shape, samples_X.min()

                # sample weights
                self.__sample_weights = self.__soilsamples.weights.reshape(nrows_samples).astype(np.float32)
                #print 'sample_weights:', sample_weights.shape, sample_weights.min()

                # sample attributes
                self.__sample_attributes = self.__soilsamples.attributes.reshape(nrows_samples).astype(np.float32)

                self.__samples_stats_collected = True

            # hold predictions for instances in X
            X_predictions = np.zeros(nrows_X).astype(np.float32)
            # hold prediction uncertainties for instances in X
            X_uncertainties = np.zeros(nrows_X).astype(np.float32)
            print 'preparation on HOST took', time.time() - t0, 's'

            ##### config computing platform and device
            for platform in cl.get_platforms():
                #print platform.name
                if platform.name == conf.OPENCL_CONFIG['Platform']:
                    PLATFORM = platform
                    # Print each device per-platform
                    for device in platform.get_devices():
                        #print device.name
                        if device.name == conf.OPENCL_CONFIG['Device']:
                            DEVICE = device
            print DEVICE.name, 'on', PLATFORM.name
            # opencl context
            ctx = cl.Context([DEVICE])
            # opencl command queue
            queue = cl.CommandQueue(ctx)

            ##### allocate memory space on device
            mf = cl.mem_flags
            t0 = time.time()
            #evs_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=evs)
            SD_evs_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=SD_evs)
            X_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=X)
            MSRLEVES_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=MSRLEVES)
            sample_X_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=self.__samples_X)

            ## added 09/06/2017
            samples_SD_evs_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=self.__samples_SD_evs)

            sample_weights_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=self.__sample_weights)
            sample_attributes_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=self.__sample_attributes)
            X_predictions_g = cl.Buffer(ctx, mf.WRITE_ONLY, X_predictions.nbytes)
            X_uncertainties_g = cl.Buffer(ctx, mf.WRITE_ONLY, X_uncertainties.nbytes)
            t1 = time.time()
            print 'allocation and copy from HOST to DEVICE took', t1 - t0, 's'
            X = None

            ##### build opencl kernel from code in the file
            f = open(conf.iPSM_KERNEL_FN, 'r')
            fstr = "".join(f.readlines())
            fstr = fstr.replace("#define N_SAMPLES 100", "#define N_SAMPLES " + str(self.__nrows_samples))
            prg = cl.Program(ctx, fstr).build()

            ##### opencl computation
            threshold = np.float32(self.__uncthreshold)

            if predict_class:
                mode = np.int32(1)
                mode = np.int32(0)

            print X_predictions.shape

            if not single_cpu:
                t0 = time.time()
                completeEvent = \
                prg.iPSM_Predict(queue, X_predictions.shape, None, nrows_X, ncols_X, self.__nrows_samples, mode, \
                                 threshold, MSRLEVES_g, samples_SD_evs_g, SD_evs_g, X_g, sample_X_g, sample_weights_g, sample_attributes_g, \
                                 X_predictions_g, X_uncertainties_g)
                t1 = time.time()
                print 'kernel took', t1 - t0, 's'
                #print queue.finish()

                print 'SINGLE_CPU iPSM.predict_opencl() called'
                t0 = time.time()
                completeEvent = \
                prg.iPSM_Predict_Sequential(queue, (1,), (1,), nrows_X, ncols_X, self.__nrows_samples, mode, \
                                 threshold, MSRLEVES_g, samples_SD_evs_g, SD_evs_g, X_g, sample_X_g, sample_weights_g, sample_attributes_g, \
                                 X_predictions_g, X_uncertainties_g)
                t1 = time.time()
                print 'kernel took', t1 - t0, 's'
                #print queue.finish()

            #### wait until completions
            events = [completeEvent]
            print 'up to events finished kernel took', time.time() - t0, 's'
            #print queue.finish()

            ##### copy result data
            t0 = time.time()
            cl.enqueue_copy(queue, X_predictions, X_predictions_g, wait_for = events)#.wait()
            #print queue.finish()
            cl.enqueue_copy(queue, X_uncertainties, X_uncertainties_g)
            t1 = time.time()
            print 'copy from DEVICE to HOST took', t1 - t0, 's'
            y = np.vstack((X_predictions, X_uncertainties)).T
            #print y
            return y

        except Exception as e:
Example #4
    def __simLocs2Samples(self, X, parallel = True, nprocess = conf.N_PROCESS):
        ''' compute similarity between locations to predict and samples
            return: a matrix of similarity values, each row is a location, each column is a sample
        ## this import is necessary [on Windows]:
        # http://stackoverflow.com/questions/28445373/python-import-numpy-as-np-from-outer-code-gets-lost-within-my-own-user-defined
        import numpy as np
        import raster, points, util, conf
        def simLoc2SamplesV0(loc_ev, datapkg): # this function is needed for parallel computing using multiprocessing
            import conf
            # unpack data in datapkg
            t0 = time.time()
            sample_evs = datapkg[0]
            evs = datapkg[1]
            SD_evs = datapkg[2]
            # number of environmental variables
            M = SD_evs.size
            # number of samples
            N = np.shape(sample_evs)[0]
            sim = np.zeros(N)
            t0 = time.time()
            for i in range(N): # for each sample
                sim0 = np.zeros(M)
                sample_ev = sample_evs[i]
                for j in range(M): # for each environmental variable
                    evi = loc_ev[j]
                    evj= sample_ev[j]
                    msrlevel = self.__envrasters[j].getMsrLevel()
                    if msrlevel == conf.MSR_LEVEL_NOMINAL or msrlevel == conf.MSR_LEVEL_ORDINAL:
                        if evi == evj:
                            sim_i = 1.0
                            sim_i = 0.0
                        SD_ev = SD_evs[j]
                        ev = evs[:,j]
                        SD_evj = np.sqrt(np.mean((ev - evj) ** 2))
                        sim_i = np.exp(-0.5 * (evi - evj) ** 2 / (SD_ev ** 2 / SD_evj) ** 2)
                    sim0[j] = sim_i
                sim[i] = np.min(sim0) ## limiting factor
            return sim

        def simLoc2Samples(loc_ev, datapkg): # this function is needed for parallel computing using multiprocessing
            import conf ## IMPORTANT - makes **conf.MSR_LEVELS** visible
            # unpack data in datapkg
            t0 = time.time()
            sample_evs = datapkg[0]
            REVS = datapkg[1]
            SD_evs = datapkg[2]
            AVG_evs = datapkg[3]
            SUM_DIF_SQ_AVG = datapkg[4]
            # Guiming 3/31/2019
            MSRLEVES = datapkg[5]
            # number of environmental variables
            M = SD_evs.size
            # number of samples
            N = np.shape(sample_evs)[0]

            sim = np.zeros(N)
            t0 = time.time()
            for i in range(N): # for each sample
                sim0 = np.zeros(M)
                sample_ev = sample_evs[i]

                for j in range(M): # for each environmental variable
                    evi = loc_ev[j]
                    evj= sample_ev[j]
                    # Guiming 3/31/2019 - SAVES MEM, NO NEED TO DISPATCH self.__envrasters TO EACH THREAD
                    msrlevel = MSRLEVES[j]
                    ## this line below does not work without ** import conf ** at the begining of this function
                    if msrlevel == conf.MSR_LEVEL_NOMINAL or msrlevel == conf.MSR_LEVEL_ORDINAL:
                    #if msrlevel == 'nominal' or msrlevel == 'ordinal':
                        if evi == evj:
                            sim_i = 1.0
                            sim_i = 0.0
                        SD_ev = SD_evs[j]
                        delta = sample_ev[j] - AVG_evs[j]
                        tmp = SUM_DIF_SQ_AVG[j] + REVS * delta**2
                        SD_evj = np.sqrt(tmp/REVS)
                        sim_i = np.exp(-0.5 * (evi - evj) ** 2 / (SD_ev ** 2 / SD_evj) ** 2)

                    sim0[j] = sim_i
                sim[i] = np.min(sim0) ## limiting factor
            return sim

            # do dimension match check here
            if np.shape(X)[1] != len(self.__envrasters):
                print 'dimension mismatch in computing similarity in iPSM'

            msr_levels = []
            if conf.NAIVE:
                evs = np.zeros((self.__envrasters[0].getData().size, len(self.__envrasters)))
            SD_evs = np.zeros(len(self.__envrasters))
            AVG_evs = np.zeros(len(self.__envrasters))
            for i in range(len(self.__envrasters)):
                if conf.NAIVE:
                    evs[:, i] = self.__envrasters[i].getData().T
                SD_evs[i] = self.__envrasters[i].std
                AVG_evs[i] = self.__envrasters[i].mean

            NROWS = np.shape(X)[0]

            REVS = self.__envrasters[0].getData().size
            SUM_DIF_SQ_AVG = REVS * SD_evs**2

            samples_evs = util.extractCovariatesAtPoints(self.__envrasters, self.__soilsamples)
            samples_evs = np.array(samples_evs).T

            if not parallel:
                sim = np.zeros((NROWS, self.__soilsamples.size))
                for i in range(NROWS):
                    if conf.NAIVE: ## naive implementaton
                        sim[i,:] = self.__simLoc2SamplesV0(X[i], evs, SD_evs)
                    else: ## with optimizations
                        sim[i,:] = self.__simLoc2Samples(X[i], samples_evs, REVS, SD_evs, AVG_evs, SUM_DIF_SQ_AVG)
                datapkg = []
                for i in range(NROWS):
                    if conf.NAIVE: ## naive implementaton
                        datapkg.append([samples_evs, evs, SD_evs])
                        # Guiming 3/31/2019
                        datapkg.append([samples_evs, REVS, SD_evs, AVG_evs, SUM_DIF_SQ_AVG, msr_levels])

                #print 'n process', nprocess
                pool = Pool(nprocess)

                t0 = time.time()
                if conf.NAIVE: ## naive implementaton
                    sim = np.array(pool.map(simLoc2SamplesV0, X, datapkg))
                    sim = np.array(pool.map(simLoc2Samples, X, datapkg))

            return sim

        except Exception as e:
Example #5
    def predict_opencl_atom(self,
        ''' PyOpenCL implementation of the iPSM approach
            return: a vector of predictions, eacn for a row in X
            t0 = time.time()
            ##### prepare data
            # covariates values over the whole study area
            r_evs = np.int32(self.__envrasters[0].getData().size)
            c_evs = np.int32(len(self.__envrasters))
            evs = np.zeros((r_evs, c_evs))
            for i in range(len(self.__envrasters)):
                evs[:, i] = self.__envrasters[i].getData().T

            # standard deviation of each variable (over the whole study area)
            Std_evs = np.std(evs, axis=0)  ## added on Feb 26 2018
            SD_evs = Std_evs.reshape(c_evs).astype(np.float32)
            #print 'SD_evs', SD_evs.shape, SD_evs

            # covariates values at prediction locations
            if X is None:  # if X is not provided, make prediction for the whole study area
                X = []
                for raster in self.__envrasters:
                X = np.array(X).T

            r, c = np.shape(X)
            nrows_X = np.int32(r)
            ncols_X = np.int32(c)

            X = X.reshape(nrows_X * ncols_X).astype(np.float32)
            #print X, X.shape, nrows_X, ncols_X

            MSRLEVES = self.__msrInts.reshape(c_evs).astype(np.int32)
            #print MSRLEVES, MSRLEVES.shape

            #t0 = time.time()
            # covariates values at sample locations
            if self.__soilsamples.covariates_at_points is None:
                samples_X = util.extractCovariatesAtPoints(
                    self.__envrasters, self.__soilsamples)
                samples_X = self.__soilsamples.covariates_at_points[
                    0:c_evs].T  ## prone to bug

            nrows_samples = np.int32(samples_X.shape[1])
            #print samples_X.shape
            #print 'prepare samples took', time.time() - t0, 's'

            samples_SD_evs = np.zeros((nrows_samples, c_evs))
            AVG_evs = np.mean(evs, axis=0)
            SUM_DIF_SQ_AVG = r_evs * Std_evs**2

            #SUM_DIF_AVG = np.sum(evs - AVG_evs, axis = 0) ## == 0.0!!
            #print 'SUM_DIF_AVG', SUM_DIF_AVG

            for i in range(nrows_samples):
                delta = samples_X[:, i].T - AVG_evs
                tmp = SUM_DIF_SQ_AVG + r_evs * delta**2
                samples_SD_evs[i] = np.sqrt(tmp / r_evs)

            samples_SD_evs = np.array(samples_SD_evs).reshape(
                nrows_samples * c_evs).astype(np.float32)

            samples_X = np.array(samples_X).T.reshape(nrows_samples *
            #print 'samples_X:', samples_X.shape, samples_X.min()

            # sample weights
            sample_weights = self.__soilsamples.weights.reshape(
            #print 'sample_weights:', sample_weights.shape, sample_weights.min()

            # sample attributes
            sample_attributes = self.__soilsamples.attributes.reshape(
            #print 'sample_attributes:', sample_attributes.shape, sample_attributes.min()

            # hold predictions for instances in X
            X_predictions = np.zeros(nrows_X).astype(np.float32)
            # hold prediction uncertainties for instances in X
            X_uncertainties = np.zeros(nrows_X).astype(np.float32)
            print 'preparation on HOST took', time.time() - t0, 's'

            ##### config computing platform and device
            for platform in cl.get_platforms():
                #print platform.name
                if platform.name == conf.OPENCL_CONFIG['Platform']:
                    PLATFORM = platform
                    '''if os.environ['COMPUTERNAME'] == 'DU-7CQTHQ2' and 'NVIDIA CUDA' in platform.name:
                        print '!!!'
                        #for device in platform.get_devices():
                        #    if device.name == conf.OPENCL_CONFIG['Device']:
                        DEVICE = platform.get_devices()[0]
                    # Print each device per-platform
                    for device in platform.get_devices():
                        #print device.name
                        if device.name == conf.OPENCL_CONFIG['Device']:
                            DEVICE = device

            # opencl context
            ctx = cl.Context([DEVICE])
            # opencl command queue
            queue = cl.CommandQueue(ctx)

            ##### allocate memory space on device
            mf = cl.mem_flags
            t0 = time.time()
            #evs_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=evs)
            SD_evs_g = cl.Buffer(ctx,
                                 mf.READ_ONLY | mf.COPY_HOST_PTR,
            X_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=X)
            MSRLEVES_g = cl.Buffer(ctx,
                                   mf.READ_ONLY | mf.COPY_HOST_PTR,
            sample_X_g = cl.Buffer(ctx,
                                   mf.READ_ONLY | mf.COPY_HOST_PTR,

            ## added 09/06/2017
            samples_SD_evs_g = cl.Buffer(ctx,
                                         mf.READ_ONLY | mf.COPY_HOST_PTR,

            sample_weights_g = cl.Buffer(ctx,
                                         mf.READ_ONLY | mf.COPY_HOST_PTR,
            sample_attributes_g = cl.Buffer(ctx,
                                            mf.READ_ONLY | mf.COPY_HOST_PTR,
            X_predictions_g = cl.Buffer(ctx, mf.WRITE_ONLY,
            X_uncertainties_g = cl.Buffer(ctx, mf.WRITE_ONLY,
            print 'allocation and copy from HOST to DEVICE took', time.time(
            ) - t0, 's'

            ##### build opencl kernel from code in the file
            f = open(conf.iPSM_KERNEL_FN, 'r')
            fstr = "".join(f.readlines())
            fstr = fstr.replace("#define N_SAMPLES 100",
                                "#define N_SAMPLES " + str(nrows_samples))
            prg = cl.Program(ctx, fstr).build()

            ##### opencl computation
            threshold = np.float32(self.__uncthreshold)

            if predict_class:
                mode = np.int32(1)
                mode = np.int32(0)

            print X_predictions.shape

            ## improved version, 09/06/2017
            if not single_cpu:
                t0 = time.time()
                completeEvent = \
                prg.iPSM_Predict(queue, X_predictions.shape, None, r_evs, nrows_X, ncols_X, nrows_samples, mode, \
                                 threshold, MSRLEVES_g, samples_SD_evs_g, SD_evs_g, X_g, sample_X_g, sample_weights_g, sample_attributes_g, \
                                 X_predictions_g, X_uncertainties_g)
                print 'kernel took', time.time() - t0, 's'
                #print queue.finish()

            ## added on Oct. 7, 2018 [sequential version - CPU]
                print 'SINGLE_CPU iPSM.predict_opencl() called'
                t0 = time.time()
                completeEvent = \
                prg.iPSM_Predict_Sequential(queue, (1,), (1,), r_evs, nrows_X, ncols_X, nrows_samples, mode, \
                                 threshold, MSRLEVES_g, samples_SD_evs_g, SD_evs_g, X_g, sample_X_g, sample_weights_g, sample_attributes_g, \
                                 X_predictions_g, X_uncertainties_g)
                print 'kernel took', time.time() - t0, 's'
                #print queue.finish()

            #### wait until completions
            events = [completeEvent]
            print 'up to events finished kernel took', time.time() - t0, 's'
            #print queue.finish()

            ##### copy result data
            t0 = time.time()
                            wait_for=events)  #.wait()
            #print queue.finish()
            cl.enqueue_copy(queue, X_uncertainties, X_uncertainties_g)
            print 'copy from DEVICE to HOST took', time.time() - t0, 's'
            y = np.vstack((X_predictions, X_uncertainties)).T
            #print y
            return y

        except Exception as e: