def singledtw(Seeds_B_B1, Seeds_B_B2, b1, b2, nseeds_b, n_seeds, path, **kwargs): #SETUP OPTIONS info = kwargs.get('info', True) DTW_max_samp = kwargs.get('DTW_max_samp', 15) multiprocessing = kwargs.get('multiprocessing', True) #ALLOC VARIABLE DTW_matrix_B = np.zeros((nseeds_b, nseeds_b)) if b2<b1: #Exploit symmetry filename = fm.joinpath(path, 'DTW_matrix_B.h5') #read block with h5py.File(filename, 'r') as hf: DTW_matrix_B = np.array(hf["DTW_matrix_B"][b2:b2+nseeds_b, b1:b1+nseeds_b]) #transpose block DTW_matrix_B = DTW_matrix_B.transpose() #save block with h5py.File(filename, 'a') as hf: hf["DTW_matrix_B"][b1:b1+nseeds_b, b2:b2+nseeds_b] = DTW_matrix_B else: #Compute the DTW distance between the seeds if multiprocessing: counter = Value('i', 0) corecount = int( os.cpu_count()/2 - 1 ) #half to account for virtual cores kwargs['corecount'] = corecount p = Pool(corecount, initializer=counterinit, initargs=(counter,)) results = p.map( partial(single_DTW_process, Seeds_B_B1=Seeds_B_B1, Seeds_B_B2=Seeds_B_B2, nseeds_b=nseeds_b, corecount=corecount, DTW_max_samp=DTW_max_samp), range(nseeds_b) ) p.close() p.join() DTW_matrix_B = results DTW_matrix_B = np.array(DTW_matrix_B) else: for ns1 in range(nseeds_b): Traj1 = Seeds_B_B1[ns1,:] for ns2 in range(nseeds_b): Traj2 = Seeds_B_B2[ns2,:] DTW_matrix_B[ns1,ns2] = distance_fast(Traj1, Traj2, max_step=DTW_max_samp) #SAVE OUTPUT filename = fm.joinpath(path, 'DTW_matrix_B.h5') if not os.path.isfile(filename): with h5py.File(filename, 'w') as hf: hf.create_dataset("DTW_matrix_B", (n_seeds,n_seeds), chunks=True) hf["DTW_matrix_B"][b1:b1+nseeds_b, b2:b2+nseeds_b] = DTW_matrix_B else: with h5py.File(filename, 'a') as hf: hf["DTW_matrix_B"][b1:b1+nseeds_b, b2:b2+nseeds_b] = DTW_matrix_B
def manager(tile, **kwargs): #SETUP VARIBLES info = kwargs.get('info', True) years = kwargs.get('years', None) outpath = kwargs.get('outpath', None) #GET IMAGE INFO for y in years: name = tile + '_' + y featurepath = fm.check_folder(outpath, name, 'Features') fn = [f for f in os.listdir(featurepath) if f.endswith('.tif')] if len(fn) == 0: raise IOError('Unable to find input data!') img = fm.readGeoTIFFD(fm.joinpath(featurepath, fn[0]), metadata=False) height, width, totfeatures = img.shape #CHECK TS DATA for y in years: for feature in range(totfeatures): n1 = tile + '_' + y n2 = 'NDI' + str(feature + 1) tspath = fm.check_folder(outpath, n1, 'NDI_TimeSeries', n2) if not os.path.exists(fm.joinpath(tspath, 'ts.h5')): raise IOError('Unable to find input data!') #PREPARE PARAMETERS height = str(height) width = str(width) startyear = str(years[0]) endyear = str(years[-1]) frequency = str(kwargs.get('frequency', 365)) tile = str(tile) batchsize = str(kwargs.get('batchsize', 200)) for feature in range(totfeatures): if info: print('Change detection for feature %i/%i...' % ((feature + 1), totfeatures), end='\r') feature = str(feature + 1) # rscript libs/ToolboxModules/callbfast.R height width startyear endyear frequency tile feature batchsize outpath process = subprocess.run([ 'rscript', 'libs/ToolboxModules/callbfast.R', height, width, startyear, endyear, frequency, tile, feature, batchsize, outpath ], stdout=subprocess.PIPE, universal_newlines=True)
def load_seeds(tile, ns, feature, col_DATA, **kwargs): #SETUP OPTIONS years = kwargs.get('years', None) loadpath = kwargs.get('loadpath', None) Seeds = None min_f = [] max_f = [] n2 = 'NDI' + str(feature+1) for y in years: n1 = tile + '_' + y filename = fm.joinpath(loadpath, n1, 'NDI_TimeSeries', n2, 'ts.h5') with h5py.File(filename, 'r') as hf: temp = np.array(hf["ts"]) min_f.append(np.amin(temp[:,col_DATA:])) max_f.append(np.amax(temp[:,col_DATA:])) if Seeds is None: Seeds = temp[ns,:] else: Seeds = np.concatenate((Seeds, temp[ns,col_DATA:]), axis=1) min_f = min(min_f) max_f = max(max_f) - min_f Seeds = Seeds - min_f Seeds = Seeds / max_f return Seeds
def load_Seeds_FR(tile, feature, col_DATA, **kwargs): #SETUP OPTIONS years = kwargs.get('years', None) outpath = kwargs.get('outpath', None) Seeds_FR = None n2 = 'NDI' + str(feature + 1) for y in years: n1 = tile + '_' + y filename = fm.joinpath(outpath, n1, 'NDI_TimeSeries', n2, 'ts.h5') with h5py.File(filename, 'r') as hf: if Seeds_FR is None: Seeds_F = np.array(hf["ts"][:, 0:col_DATA]) Seeds_FR = np.array(hf["ts"][:, col_DATA:]) else: Seeds_FR = np.concatenate( (Seeds_FR, np.array(hf["ts"][:, col_DATA:])), axis=1) min_f = np.min(Seeds_FR) Seeds_FR = Seeds_FR - min_f max_f = np.max(Seeds_FR) Seeds_FR = Seeds_FR / max_f return Seeds_FR, Seeds_F
def load_block(tile, b, feature, col_DATA, **kwargs): #SETUP OPTIONS years = kwargs.get('years', None) blocksize = kwargs.get('blocksize', 200) outpath = kwargs.get('outpath', None) Seeds_B_B = None min_f = [] max_f = [] n2 = 'NDI' + str(feature + 1) for y in years: n1 = tile + '_' + y filename = fm.joinpath(outpath, n1, 'NDI_TimeSeries', n2, 'ts.h5') with h5py.File(filename, 'r') as hf: temp = np.array(hf["ts"][:, col_DATA:]) #temp = fm.loadh5(fm.joinpath(outpath, n1, 'NDI_TimeSeries', n2), 'ts.h5') #temp = temp[:,col_DATA:] min_f.append(np.amin(temp)) max_f.append(np.amax(temp)) if Seeds_B_B is None: Seeds_B_B = temp[b:(b + blocksize), :] else: Seeds_B_B = np.concatenate((Seeds_B_B, temp[b:(b + blocksize), :]), axis=1) min_f = min(min_f) max_f = max(max_f) - min_f Seeds_B_B = Seeds_B_B - min_f Seeds_B_B = Seeds_B_B / max_f return Seeds_B_B
def _feature(ts, path, **kwargs): info = kwargs.get('info', True) ts_length = kwargs.get("ts_legth", len(ts)) if info: print('Extracting features for each image:') t_start = time.time() #Get some information from data height, width = ts[0].feature('NDVI').shape ts = sorted(ts, key=lambda x: x.InvalidPixNum())[0:ts_length] totimg = len(ts) totfeature = 8 #Compute Index Statistics for idx, img in enumerate(ts): if info: print('.. %i/%i ' % ((idx + 1), totimg), end='\r') feature = np.empty((height, width, totfeature)) #Compute Index b1 = img.feature_resc('BLUE', dtype=np.float32) b2 = img.feature_resc('RED', dtype=np.float32) b3 = img.feature_resc('GREEN', dtype=np.float32) b4 = img.feature_resc('NIR', dtype=np.float32) b5 = img.feature_resc('SWIR1', dtype=np.float32) b6 = img.feature_resc('SWIR2', dtype=np.float32) feature[..., 0] = _ndi(b6, b1) feature[..., 1] = _ndi(b1, b2) feature[..., 2] = _ndi(b5, b3) feature[..., 3] = _ndi(b1, b5) feature[..., 4] = _ndi(b6, b4) feature[..., 5] = _ndi(b5, b2) feature[..., 6] = _ndi(b6, b2) feature[..., 7] = _ndi(b4, b2) #Manipulate features feature[feature > 1] = 1 feature[feature < -1] = -1 #Save features geotransform, projection = fm.getGeoTIFFmeta( ts[0].featurepath()['B04']) sp = fm.joinpath( path, str(img._metadata['tile']) + '_' + str(img._metadata['date']) + 'T' + str(img._metadata['time']) + '_NDI.tif') fm.writeGeoTIFFD(sp, feature, geotransform, projection) if info: t_end = time.time() print('\nMODULE 1: extracting features..Took ', (t_end - t_start) / 60, 'min')
def multidtw(Seeds_B1, Seeds_B2, b1, b2, nseeds_b, n_seeds, path, **kwargs): #SETUP OPTIONS info = kwargs.get('info', True) DTW_max_samp = kwargs.get('DTW_max_samp', 15) multiprocessing = kwargs.get('multiprocessing', True) #ALLOC VARIABLE DTW_matrix = np.zeros((nseeds_b, nseeds_b)) #Compute the DTW distance between the seeds if multiprocessing: counter = Value('i', 0) corecount = int(os.cpu_count() / 2 - 1) #half to account for virtual cores kwargs['corecount'] = corecount p = Pool(corecount, initializer=counterinit, initargs=(counter, )) results = p.map( partial(multi_DTW_process, Seeds_B1=Seeds_B1, Seeds_B2=Seeds_B2, nseeds_b=nseeds_b, corecount=corecount, DTW_max_samp=DTW_max_samp), range(nseeds_b)) p.close() p.join() DTW_matrix = results DTW_matrix = np.array(DTW_matrix) else: Traj1 = np.zeros((Seeds_B1.shape[2], Seeds_B1.shape[1])) Traj2 = np.zeros((Seeds_B2.shape[2], Seeds_B2.shape[1])) for ns1 in range(nseeds_b): for f in range(Seeds_B1.shape[2]): Traj1[f] = Seeds_B1[ns1, :, f] for ns2 in range(nseeds_b): for f in range(Seeds_B2.shape[2]): Traj2[f] = Seeds_B2[ns2, :, f] cost_matrix, cost, alignmend_a, alignmend_b = dtw1d( Traj1, Traj2) DTW_matrix_B[ns1, ns2] = cost #SAVE OUTPUT filename = fm.joinpath(path, 'DTW_matrix.h5') if not os.path.isfile(filename): with h5py.File(filename, 'w') as hf: hf.create_dataset("DTW_matrix", (n_seeds, n_seeds), chunks=True) hf["DTW_matrix"][b1:b1 + nseeds_b, b2:b2 + nseeds_b] = DTW_matrix else: with h5py.File(filename, 'a') as hf: hf["DTW_matrix"][b1:b1 + nseeds_b, b2:b2 + nseeds_b] = DTW_matrix
def loadts_block(i, j, feature, loadpath, temppath, **kwargs): blocksize = kwargs.get('blocksize', 200) fn = [f for f in os.listdir(loadpath) if f.endswith('.tif')] totimg = len(fn) matr = np.empty((blocksize, blocksize, totimg)) mask = np.empty((blocksize, blocksize, totimg), dtype=bool) days = [] #RECONSTRUCT TS for idx, f in enumerate(fn): mtr = fm.readGeoTIFFD(fm.joinpath(loadpath, f), band=feature, metadata=False) matr[..., idx] = mtr[i:i + blocksize, j:j + blocksize] #MASK FOR VALID VALUES f = f.split('_') #split filename name = f[0] + '_' + f[1] maskpath = fm.joinpath(temppath, name, 'MASK.npy') msk = np.load(maskpath) msk = (msk == 3) | (msk == 4) | bindilation(msk == 2, iterations=50) | (msk == 1) msk = np.logical_not(msk) mask[..., idx] = msk[i:i + blocksize, j:j + blocksize] date = f[1].split('T') #split date and time days.append(date[0]) start = fn[0].split('_') start = start[1].split('T') start = start[0] firstday = fm.string2ordinal(start) - 1 days = [(fm.string2ordinal(d) - firstday) for d in days] days = np.array(days) return matr, mask, days
def manager(tile, **kwargs): #SETUP VARIBLES info = kwargs.get('info', True) years = kwargs.get('years', None) outpath = kwargs.get('outpath', None) loadpath = '' #TODO: where is the test data? savepath = fm.check_folder(outpath, tile, 'LCclassificationAndCD') blocksize = kwargs.get('blocksize', 200) n_classes = kwargs.get('n_classes', 9) DTW_max_samp = kwargs.get('DTW_max_samp', 15) # max number of samples of DTW MAX_CD = kwargs.get('MAX_CD', 1) # max number of detected changes col_nPIXEL = 0 col_nCLASS = 1 col_nBAND = 2 col_DATA = 3 ############################### # GET INFO AND INITIALIZATION # ############################### for rootname, _, filenames in os.walk(loadpath): for f in filenames: if (f.endswith('.tif')): path = fm.joinpath(rootname, f) img = fm.readGeoTIFFD(path, metadata=False) width, height, totfeature = img.shape for rootname, _, filenames in os.walk(loadpath): for f in filenames: if (f.endswith('ts.h5')): path = fm.joinpath(rootname) with h5py.File(fm.joinpath(path,f), 'r') as hf: NDI_ = np.array(hf["ts"]) #Get classes intervals class_int = np.zeros(n_classes) class_int_mask = np.unique(NDI_[:,col_nCLASS]).astype(int).tolist() for n in class_int_mask: class_int[n-1] = n class_int = class_int.astype(int).tolist() #Get number of seeds n_seeds = len(np.unique(NDI_[:,col_nPIXEL])) #Get number of features n_features = totfeature #Get number of seeds per class and class seeds mask n_seeds_c = np.zeros(n_classes) for nc in class_int: n_seeds_c[nc-1] = np.size(NDI_[NDI_[:,col_nCLASS]==nc, :], axis=0) n_seeds_c = n_seeds_c.astype(int).tolist() seed_class_mask = NDI_[:,col_nCLASS] #Define blocksize nseeds_b = blocksize #Multi feature DTW maximum distance path = fm.check_folder(outpath, tile, 'LCTraining_DTW', 'Multifeature') DTW_max_d = 0 for b1 in range(0, n_seeds, nseeds_b): for b2 in range(0, n_seeds, nseeds_b): with h5py.File(filename, 'r') as hf: max_d_block = np.nanmax(np.array(hf["DTW_matrix"][b1:b1+nseeds_b, b2:b2+nseeds_b])) if max_d_block > DTW_max_d: DTW_max_d = max_d_block #Loading the models path = fm.check_folder(outpath, tile, 'LCTraining_DTW') models = np.load(fm.joinpath(path, "models.npy")) #TODO: npy or h5? ############################ # LC CLASSIFICATION AND CD # ############################ #Time array definition t_seq_st = np.array([1, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73]) t_seq_en = np.array([366, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73]) t_seq_st = np.cumsum(t_seq_st) t_seq_en = np.cumsum(t_seq_en) #Similarity trends computation and classification Test_simi_traj = [None]*n_seeds LC_seq = [None]*n_seeds for ns in range(n_seeds): Traj1 = None for nb, band in enumerate(n_features): Seeds = load_seeds(tile, ns, nb, col_DATA, **kwargs) if Traj1 is None: Traj1 = np.zeros((len(n_features), len(Seeds[col_DATA:]))) Traj1[nb,:] = Seeds[col_DATA:] else: Traj1[nb,:] = Seeds[col_DATA:] pixnr = Seeds[col_nPIXEL] Test_simi_traj[pixnr] = np.empty((n_classes, np.size(t_seq_st))) LC_seq[pixnr] = np.empty((2, np.size(t_seq_st))) for ts in range(np.size(t_seq_st)): Traj1_T = Traj1[:, t_seq_st[ts]:t_seq_en[ts]] Traj1_T = np.roll(Traj1_T, 73*ts, axis=1) for nc in range(n_classes): max_simi = 0 for nm in range(len(models[nc])): Traj2 = models[nc][nm] simi = (DTW_max_d - DTW(Traj1_T, Traj2, DTW_max_samp=DTW_max_samp)) / DTW_max_d #TODO: distance_fast max_simi = max(max_simi, simi) Test_simi_traj[pixnr][nc,ts] = max_simi LC_seq[pixnr][0,ts] = np.argmax(Test_simi_traj[ns][:,ts]) + 1 # +1 number of class vs index #Stability rule application CD_counter = np.empty(n_seeds) break_p = np.empty((n_seeds, MAX_CD)) LC_seq_bp = np.empty((n_seeds, MAX_CD+1)) for ns in range(n_seeds): counter = 0 for ts in range(np.size(t_seq_st)): if ts == 0: LC_seq[ns][1,ts] = LC_seq[ns][0,ts] else: if (LC_seq[ns][0,ts] == LC_seq[ns][0,ts-1]) and (counter == 0): LC_seq[ns][1,ts] = LC_seq[ns][1,ts-1] elif LC_seq[ns][0,ts] != LC_seq[ns][0,ts-1]: LC_seq[ns][1,ts] = LC_seq[ns][1,ts-1] counter = 1 elif LC_seq[ns][0,ts] == LC_seq[ns][0,ts-1]: counter = counter + 1 if counter<4: LC_seq[ns][1,ts] = LC_seq[ns][1,ts-1] else: LC_seq[ns][1,ts-3] = LC_seq[ns][0,ts] LC_seq[ns][1,ts-2] = LC_seq[ns][0,ts] LC_seq[ns][1,ts-1] = LC_seq[ns][0,ts] LC_seq[ns][1,ts] = LC_seq[ns][0,ts] counter = 0 CD_counter[ns] = 0 for ts in range(1, np.size(t_seq_st)): if LC_seq[ns][1,ts] != LC_seq[ns][1,ts-1]: CD_counter[ns] = CD_counter[ns] + 1 if CD_counter[ns] <= MAX_CD: break_p[ns, CD_counter[ns]-1] = ts #TODO: -1? LC_seq_bp[ns, CD_counter[ns]-1] = LC_seq[ns][1,ts-1] LC_seq_bp[ns, CD_counter[ns]] = LC_seq[ns][1,ts] if CD_counter[ns] == 0: break_p[ns,0] = 0 LC_seq_bp[ns,0] = LC_seq[ns][1,0] LC_seq_bp[ns,1] = LC_seq[ns][1,0] np.save(fm.joinpath(savepath, "LC_seq.npy"), LC_seq) np.save(fm.joinpath(savepath, "Test_simi_traj.npy"), Test_simi_traj) #Output maps nyears = len(years) outmaps = [None]*nyears for ny in range(nyears): outmaps[ny] = np.zeros((height, width, 2)) for row in range(height): for col in range(width): ns = width*row + col if break_p[ns,0] == 0: pass else: z = break_p[ns,0] start_z = t_seq_st[z] end_z = t_seq_en[z] int_z = np.arange(start_z, end_z) int_z = np.ceil(int_z/365) for ny in range(nyears): perc = np.sum(int_z[int_z == (ny+1)]) / (365*(ny+1)) perc = perc*100
def manager(tile, **kwargs): #SETUP DEFAULT OPTIONS info = kwargs.get('info', True) years = kwargs.get('years', None) outpath = kwargs.get('outpath', None) savepath = fm.check_folder(outpath, tile, 'LCTraining_DTW') blocksize = kwargs.get('blocksize', 500) n_classes = kwargs.get('n_classes', 9) multiprocessing = kwargs.get('multiprocessing', True) weekly = kwargs.get('weekly', True) singlefeaturedtw = kwargs.get('singlefeaturedtw', False) featureselection = kwargs.get('featureselection', False) multifeatureDTW = kwargs.get('multifeatureDTW', False) similarity = kwargs.get('similarity', False) classprototypes = kwargs.get('classprototypes', False) DTW_max_samp = kwargs.get('DTW_max_samp', 15) # max number of samples of DTW col_nPIXEL = 0 col_nCLASS = 1 col_nBAND = 2 col_DATA = 3 ############################### # GET INFO AND INITIALIZATION # ############################### for rootname, _, filenames in os.walk(outpath): for f in filenames: if (f.endswith('.tif')): loadpath = fm.joinpath(rootname, f) img = fm.readGeoTIFFD(loadpath, metadata=False) width, height, totfeature = img.shape for rootname, _, filenames in os.walk(outpath): for f in filenames: if (f.endswith('ts.h5')): loadpath = fm.joinpath(rootname, f) with h5py.File(loadpath, 'r') as hf: NDI_ = np.array(hf["ts"]) #Get classes intervals class_int = np.zeros(n_classes) class_int_mask = np.unique(NDI_[:, col_nCLASS]).astype(int).tolist() for n in class_int_mask: class_int[n - 1] = n class_int = class_int.astype(int).tolist() #Get number of seeds n_seeds = len(np.unique(NDI_[:, col_nPIXEL])) #Get number of features n_features = totfeature #Get number of seeds per class and class seeds mask n_seeds_c = np.zeros(n_classes) for nc in class_int: n_seeds_c[nc - 1] = np.size(NDI_[NDI_[:, col_nCLASS] == nc, :], axis=0) n_seeds_c = n_seeds_c.astype(int).tolist() seed_class_mask = NDI_[:, col_nCLASS] #Define blocksize nseeds_b = blocksize #Space of analysis parameters min_perc_samp_V = np.arange( 1, 0.64, -0.03).tolist() # minimum percentage of total used samples min_perc_samp_mod_V = np.ones(12, dtype=float) / np.arange( 1, 13) # minimum percentage of used samples per model min_perc_samp_mod_V = min_perc_samp_mod_V.tolist() sepa_b_vs_b = np.zeros((12, 12, n_features)) ########################################## # SINGLE FEATURE DTW SIMILARITY MATRICES # ########################################## if singlefeaturedtw: for feature in range(n_features): if info: t_start = time.time() print('Computing DTW feature %i/%i...' % ((feature + 1), n_features), end='\r') path = fm.check_folder(savepath, "Singlefeature", 'DTW_matrix_B' + str(feature + 1)) for b1 in range(0, n_seeds, nseeds_b): Seeds_B_B1 = load_block(tile, b1, feature, col_DATA, **kwargs) for b2 in range(0, n_seeds, nseeds_b): Seeds_B_B2 = load_block(tile, b2, feature, col_DATA, **kwargs) singledtw(Seeds_B_B1, Seeds_B_B2, b1, b2, nseeds_b, n_seeds, path, **kwargs) if info: t_end = time.time() print( '\nMODULE 4: calculating DTW for %ith feature..Took %i' % (feature + 1, (t_end - t_start) / 60), 'min') #Single feature DTW maximum distance DTW_max_d_B = np.zeros(n_features) for feature in range(n_features): path = fm.check_folder(savepath, "Singlefeature", 'DTW_matrix_B' + str(feature + 1)) filename = fm.joinpath(path, 'DTW_matrix_B.h5') max_d = 0 for b1 in range(0, n_seeds, nseeds_b): for b2 in range(0, n_seeds, nseeds_b): with h5py.File(filename, 'r') as hf: block = np.array(hf["DTW_matrix_B"][b1:b1 + nseeds_b, b2:b2 + nseeds_b]) max_d_block = np.nanmax(block[block != np.inf]) if max_d_block > max_d: max_d = max_d_block DTW_max_d_B[feature] = max_d ###################################################### # FEATURE SPACE ANALYSIS AND FEATURE SPACE REDUCTION # ###################################################### if featureselection: for feature in range(n_features): if info: t_start = time.time() print('Feature %i/%i...' % ((feature + 1), n_features), end='\r') sepa_c_vs_c = np.zeros((12, 12)) sepa_c_vs_c_N = np.zeros((12, 12)) for i, nc in enumerate(class_int_mask): c_r = np.delete(class_int_mask, i).tolist() for nc1 in c_r: simi_c_W, simi_c_C = load_block_DTW( seed_class_mask, feature, DTW_max_d_B[feature], nc, nc1, savepath) for col_i, min_perc_samp in enumerate(min_perc_samp_V): for row_i, min_perc_samp_mod in enumerate( min_perc_samp_mod_V): sepa_mea = np.zeros(n_seeds_c[nc - 1]) for nsc in range(n_seeds_c[nc - 1]): simi_c_C_s = simi_c_C[:, nsc] simi_c_C_s = simi_c_C_s[~np.isnan(simi_c_C_s)] simi_c_C_s = sorted(simi_c_C_s, reverse=True) simi_c_C_s = simi_c_C_s[ 0:math.ceil(n_seeds_c[nc - 1] * min_perc_samp_mod * min_perc_samp)] simi_c_W_s = simi_c_W[:, nsc] simi_c_W_s = sorted(simi_c_W_s, reverse=True) simi_c_W_s = simi_c_W_s[ 0:math.ceil(n_seeds_c[nc - 1] * min_perc_samp_mod * min_perc_samp)] pd_C_mu, pd_C_sigma = scipy.stats.distributions.norm.fit( simi_c_C_s) pd_W_mu, pd_W_sigma = scipy.stats.distributions.norm.fit( simi_c_W_s) if pd_C_mu <= pd_W_mu: sepa_mea[nsc] = np.nan else: sepa_mea[nsc] = (pd_C_mu - pd_W_mu) / ( pd_C_sigma + pd_W_sigma) if (sepa_mea[~np.isnan(sepa_mea)]).size / ( n_seeds_c[nc - 1]) >= min_perc_samp: sepa_c_vs_c[row_i, col_i] = sepa_c_vs_c[ row_i, col_i] + np.mean( sepa_mea[~np.isnan(sepa_mea)]) sepa_c_vs_c_N[row_i, col_i] = sepa_c_vs_c_N[row_i, col_i] + 1 sepa_b_vs_b[..., feature] = sepa_c_vs_c * sepa_c_vs_c_N if info: t_end = time.time() print( '\nMODULE 4: feature selection for %i th feature..Took %i' % (feature + 1, t_end - t_start / 60), 'min') np.save(fm.joinpath(savepath, "sepa_b_vs_b.npy"), sepa_b_vs_b) #Search for Class Cluster Parameters # select_bands = np.load(fm.joinpath(savepath, "select_bands.npy")) sepa_b_vs_b = np.load(fm.joinpath(savepath, "sepa_b_vs_b.npy")) # select_bands = select_bands.astype(int).tolist() sepa_FS = np.zeros((12, 12)) for nb in range(n_features): sepa_FS = sepa_FS + sepa_b_vs_b[:, :, nb] mean_sepa_FS = np.mean(sepa_FS, axis=1) max_sepa_pos_samp_x_mod_FS = np.argmax(mean_sepa_FS) mean_sepa_max_v_FS = sepa_FS[max_sepa_pos_samp_x_mod_FS, :] mean_sepa_max_v_derivate_FS = np.diff(mean_sepa_max_v_FS) mean_sepa_max_v_derivate_FS = mean_sepa_max_v_derivate_FS / np.max( mean_sepa_max_v_derivate_FS) mean_sepa_max_v_derivate_FS = mean_sepa_max_v_derivate_FS * mean_sepa_max_v_FS[ 1:] max_sepa_pos_perc_samp_FS = np.argmax(mean_sepa_max_v_derivate_FS) max_sepa_pos_perc_samp_FS = max_sepa_pos_perc_samp_FS + 1 min_perc_samp = min_perc_samp_V[max_sepa_pos_perc_samp_FS] min_perc_samp_mod = min_perc_samp_V[ max_sepa_pos_perc_samp_FS] * min_perc_samp_mod_V[ max_sepa_pos_samp_x_mod_FS] max_mod_class = np.round(min_perc_samp_V[max_sepa_pos_perc_samp_FS] / min_perc_samp_mod) ####################################### # MULTI FEATURE DTW SIMILARITY MATRIX # ####################################### if multifeatureDTW: if info: t_start = time.time() print('Computing multifeature DTW ...', end='\r') # select_bands = np.load(fm.joinpath(savepath, "select_bands.npy")) # select_bands = select_bands.astype(int).tolist() path = fm.check_folder(savepath, 'Multifeature') for b1 in range(0, n_seeds, nseeds_b): Seeds_B1 = load_block_multifeature(tile, b1, n_features, col_DATA, **kwargs) for b2 in range(0, n_seeds, nseeds_b): Seeds_B2 = load_block_multifeature(tile, b1, n_features, col_DATA, **kwargs) multidtw(Seeds_B1, Seeds_B2, b1, b2, nseeds_b, n_seeds, path, **kwargs) if info: t_end = time.time() print( '\nMODULE 4: calculating multifeature DTW ...Took %i' % ((t_end - t_start) / 60), 'min') #Multi feature DTW maximum distance path = fm.check_folder(savepath, 'Multifeature') filename = fm.joinpath(path, 'DTW_matrix.h5') DTW_max_d = 0 for b1 in range(0, n_seeds, nseeds_b): for b2 in range(0, n_seeds, nseeds_b): with h5py.File(filename, 'r') as hf: block = np.array(hf["DTW_matrix"][b1:b1 + nseeds_b, b2:b2 + nseeds_b]) max_d_block = np.nanmax(block[block != np.inf]) if max_d_block > DTW_max_d: DTW_max_d = max_d_block ####################### # SIMILARITY ANALYSIS # ####################### if similarity: simi_high = kwargs.get('simi_high', 1) # high similarity measure simi_decr = kwargs.get('simi_decr', 0.001) # decrese value of similarity measure min_c_vs_c = np.zeros((len(class_int_mask), len(class_int_mask) - 1)) max_c_vs_c = np.zeros((len(class_int_mask), len(class_int_mask) - 1)) mean_c_vs_c = np.zeros((len(class_int_mask), len(class_int_mask) - 1)) simi_low = np.zeros((len(class_int_mask))) for i, nc in enumerate(class_int_mask): c_r = np.delete(class_int_mask, i).tolist() for n, nc1 in enumerate(c_r): simi_c_W, simi_c_C = load_block_DTW_multi( seed_class_mask, DTW_max_d, nc, nc1, savepath) min_c_s = np.zeros((n_seeds_c[nc - 1])) max_c_s = np.zeros((n_seeds_c[nc - 1])) for nsc in range(n_seeds_c[nc - 1]): simi_c_C_s = simi_c_C[:, nsc] simi_c_C_s = simi_c_C_s[~np.isnan(simi_c_C_s)] simi_c_C_s = sorted(simi_c_C_s, reverse=True) simi_c_C_s = simi_c_C_s[0:math.ceil(n_seeds_c[nc - 1] * min_perc_samp_mod * min_perc_samp)] simi_c_W_s = simi_c_W[:, nsc] simi_c_W_s = sorted(simi_c_W_s, reverse=True) simi_c_W_s = simi_c_W_s[0:math.ceil(n_seeds_c[nc - 1] * min_perc_samp_mod * min_perc_samp)] pd_C_mu, pd_C_sigma = scipy.stats.distributions.norm.fit( simi_c_C_s) pd_W_mu, pd_W_sigma = scipy.stats.distributions.norm.fit( simi_c_W_s) if pd_C_mu <= pd_W_mu: min_c_s[nsc] = np.nan else: a = scipy.stats.norm(pd_C_mu, pd_C_sigma).pdf( np.arange(0, 1, simi_decr)) b = scipy.stats.norm(pd_W_mu, pd_W_sigma).pdf( np.arange(0, 1, simi_decr)) for int_mu in np.int64( np.arange(np.floor(pd_W_mu * (1 / simi_decr)), (math.ceil(pd_C_mu * (1 / simi_decr)) + 1), 1000 * simi_decr)): if (round(b[int_mu - 1], 1) - round(a[int_mu - 1], 1) <= 0): min_c_s[nsc] = int_mu * simi_decr break else: min_c_s[nsc] = np.nan for int_mu in np.flipud( np.int64( np.arange( np.floor(pd_W_mu * (1 / simi_decr)), (math.ceil(pd_C_mu * (1 / simi_decr)) + 1), 1000 * simi_decr))): if (round(a[int_mu - 1], 1) - round(b[int_mu - 1], 1) <= 0): max_c_s[nsc] = int_mu * simi_decr break else: max_c_s[nsc] = np.nan min_c_vs_c[i, n] = np.mean(min_c_s[~np.isnan(min_c_s)]) max_c_vs_c[i, n] = np.mean(max_c_s[~np.isnan(max_c_s)]) mean_c_vs_c[i, n] = min_c_vs_c[ i, n] #mean([min_c_vs_c(nc,nc1) max_c_vs_c(nc,nc1)]) simi_low[i] = np.max(mean_c_vs_c[i, :]) np.save(fm.joinpath(savepath, "simi_low.npy"), simi_low) ############################### # CLASS PROTOTYPES GENERATION # ############################### if classprototypes: pass_table = np.zeros(n_classes) # array of pass/no pass models_C = [None] * 9 # variable that contains the models seeds used_models = np.zeros( n_classes) # array of number of model used per class used_samples_perc = np.zeros( n_classes) # array of used samples per class used_simi = np.zeros(n_classes) # array of used similarity per class for i, nc in enumerate(class_int_mask): max_s = 1 # set max similarity = 1 min_s = 0 #simi_low(nc); # set min similarity while pass_table[nc - 1] == 0: _, dist_simi_c = load_block_DTW_multi(seed_class_mask, DTW_max_d, nc, nc, savepath) count_simi_c = ( dist_simi_c > max_s ) # check class seed with a similarity major then the threshold mean_simi_c = np.empty( (n_seeds_c[nc - 1] )) * np.nan # initializate the similarity mean value # compute the mean similarity value per seed for each accepted other seed for nsc in range(n_seeds_c[nc - 1]): mean_simi_c[nsc] = np.mean(dist_simi_c[count_simi_c[:, nsc], nsc]) # form a matrix with [seed ID | number of accepted seeds | mean similarity for accepted seeds] simi_order = np.column_stack([ np.arange(0, n_seeds_c[nc - 1], 1), np.sum(count_simi_c, axis=0), mean_simi_c ]) # order the seeds simi_order = simi_order[np.argsort(-simi_order[:, 0])] simi_order = np.array( simi_order[np.argsort(-simi_order[:, 0])], dtype=int) #simi_order = sorted(simi_order, key=lambda x : x[0], reverse=True) models = [] # initialize the models for nsc in range(n_seeds_c[nc - 1]): n_mod = len(models) #number of exist models if n_mod == 0: # if the number of models is zero, just insert the initial seed models.append(simi_order[nsc, 0]) else: # else check if any model can accept the new seed simi = np.zeros( (n_mod, 3)) #initialize the similarity matrix # for each model check if all seed can accept the new one for nm in range(n_mod): seed_int = models[nm] # get seed ID interval # form a matrix with [model ID | acceptance value | mean similarity between new seed and model seeds] simi[nm, :] = [ nm, np.sum((dist_simi_c[simi_order[nsc, 0], seed_int] > max_s) * 1) >= (np.ceil(np.size(seed_int) * 1)), np.mean(dist_simi_c[simi_order[nsc, 0], seed_int]) ] # sort the similarity matrix to get the most similar model simi = np.array(simi[np.argsort(-simi[:, 2])], dtype=int) if simi[0, 1] == 1: # if the first model can accept the new seed, insert it models[simi[0, 0]] = list( flatten( [models[simi[0, 0]], simi_order[nsc, 0]])) else: # otherwise create a new model and insert the seed models.append(simi_order[nsc, 0]) n_mod = np.size(models, 0) # get number of models # delete models with a percentage of seed lower than the threshold for nm in range(n_mod): if np.size(models[nm]) < math.ceil( n_seeds_c[nc - 1] * min_perc_samp_mod): models[nm] = [] models = list(filter(None, models)) u_models = len(models) # get number of used models u_samples = np.zeros( u_models) # initialized the percentage of used seeds # compute the percentage of used seeds for um in range(u_models): u_samples[um] = np.size(models[um]) u_samples = (np.sum(u_samples)) / (n_seeds_c[nc - 1]) # if the pass condition are respected update the output matrixes if ((u_models <= max_mod_class) and (bool(u_samples >= min_perc_samp))): pass_table[nc - 1] = 1 models_C[nc - 1] = models used_models[nc - 1] = u_models used_samples_perc[nc - 1] = u_samples used_simi[nc - 1] = max_s else: if ((max_s > min_s) and (max_s > simi_decr) ): # otherwise decrease the similarity threshold max_s = max_s - simi_decr print(max_s) else: # or if not possible put in the pass table a false value pass_table[nc - 1] = 2 # class prototypes creation models = [[[] for _ in range(len(n_features))] for _ in range(n_classes)] for nc in (class_int_mask): for nb_o, nb in enumerate(n_features): n_mod = np.size(models_C[nc - 1]) Seeds_FR, Seeds_F = load_Seeds_FR(tile, nb, col_DATA, **kwargs) m1 = Seeds_F[:, col_nCLASS] == nc m2 = Seeds_F[:, col_nBAND] == nb m3 = np.logical_and(m1, m2) TABLE_cb = Seeds_FR[m3, :] for nm in range(n_mod): TABLE_cbm = TABLE_cb[models_C[nc - 1][nm], :] traj = np.mean(TABLE_cbm, 0) models[nc - 1][nb_o].append(traj) # prototypes vs samples _, col = Seeds_FR.shape Traj1 = np.zeros((len(n_features), col)) sampleVSmodels = np.zeros((n_seeds, n_classes + 3)) for ns in range(n_seeds): for n, nb in enumerate(n_features): Seeds_FR, Seeds_F = load_Seeds_FR(tile, nb, col_DATA, **kwargs) Traj1[n, :] = Seeds_FR[ns, :] sample_simi = [ns, Seeds_F[ns, col_nCLASS], 0] for nc in (class_int): if nc == 0: max_simi = 0 else: n_mod = len(models[nc - 1]) max_simi = 0 for nm in range(n_mod): Traj2 = models[nc - 1][nm] simi = ((DTW_max_d - distance_fast( Traj1, Traj2, max_step=DTW_max_samp)) / DTW_max_d) max_simi = np.max([max_simi, simi]) sample_simi.append(max_simi) max_v = max(sample_simi[3:]) max_p = sample_simi[3:].index(max_v) sample_simi[2] = max_p + 1 sampleVSmodels[ns, :] = sample_simi #confusion matrix between training samples and prototypes CM_S = confusion_matrix(sampleVSmodels[:, 1], sampleVSmodels[:, 2])
def manager(tilename, **kwargs): #SETUP DEFAULT OPTIONS info = kwargs.get('info', True) blocksize = kwargs.get('blocksize', 200) mappath = kwargs.get('mappath', None) #PATHS loadpath = fm.check_folder(kwargs.get('savepath', None), 'Features') savepath = fm.check_folder(kwargs.get('savepath', None), 'NDI_TimeSeries') maindir = kwargs.get('maindir', None) temppath = fm.joinpath(maindir, 'numpy', tilename) #GET IMAGE INFO fn = [f for f in os.listdir(loadpath) if f.endswith('.tif')] if len(fn) > 0: img = fm.readGeoTIFFD(fm.joinpath(loadpath, fn[0]), metadata=False) height, width, totfeatures = img.shape else: raise IOError('Unable to find input data!') #LOAD CLASSIFICATION MAP if mappath is not None: classmap = fm.readGeoTIFFD(mappath, metadata=False) else: classmap = np.empty(heigth, width) #ALLOC VARIABLES npixels = blocksize * blocksize rects = np.empty((npixels, 368)) mse = np.empty((npixels, 4)) #LOOP THROUGH FEATURES for feature in range(totfeatures): if info: print('Reconstructing feature %i/%i...' % ((feature + 1), totfeatures), end='\r') folder = 'NDI' + str(feature + 1) path = fm.check_folder(savepath, folder) #FOR EACH BLOCK POSITION for i in range(0, width, blocksize): for j in range(0, height, blocksize): matr, mask, days = loadts_block(i, j, feature, loadpath, temppath, **kwargs) counter = Value('i', 0) corecount = int(os.cpu_count() / 2 - 1) #half to account for virtual cores p = Pool(corecount, initializer=counterinit, initargs=(counter, )) results = p.map( partial(parallel_manager, matr=matr, mask=mask, days=days, blocksize=blocksize), range(npixels)) p.close() p.join() for npx in range(npixels): row, col = divmod(npx, blocksize) row = row + i col = col + j rects[npx, 0] = width * row + col rects[npx, 1] = classmap[row, col] rects[npx, 2] = feature + 1 mse[npx, 0] = width * row + col mse[npx, 1] = classmap[row, col] mse[npx, 2] = feature + 1 rects[npx, 3:] = results[npx][0] mse[npx, 3] = results[npx][1] filename = fm.joinpath(path, 'ts.h5') if not os.path.isfile(filename): with h5py.File(filename, 'w') as hf: hf.create_dataset("ts", data=rects, chunks=True, maxshape=(None, rects.shape[1])) else: with h5py.File(filename, 'a') as hf: hf["ts"].resize((hf["ts"].shape[0] + rects.shape[0]), axis=0) hf["ts"][-rects.shape[0]:] = rects filename = fm.joinpath(path, 'mse.h5') if not os.path.isfile(filename): with h5py.File(filename, 'w') as hf: hf.create_dataset("mse", data=mse, chunks=True, maxshape=(None, mse.shape[1])) else: with h5py.File(filename, 'a') as hf: hf["mse"].resize((hf["mse"].shape[0] + mse.shape[0]), axis=0) hf["mse"][-mse.shape[0]:] = mse