def prepare_tiff(tile): save_path = MODEL_TIFFS_DIR # defining temporary files names output_tiffs = {'tiff_b4_name': join(save_path, f'{tile.tile_name}_B04.tif'), 'tiff_b8_name': join(save_path, f'{tile.tile_name}_B08.tif'), 'tiff_rgb_name': join(save_path, f'{tile.tile_name}_TCI.tif'), 'tiff_ndvi_name': join(save_path, f'{tile.tile_name}_ndvi.tif'), 'scaled_b8_name': join(save_path, f'{tile.tile_name}_B08'), 'scaled_ndvi_name': join(save_path, f'{tile.tile_name}_ndvi')} print('\nb4 and b8 bands are converting to *tif...\n') to_tiff(tile.source_b04_location, output_tiffs.get('tiff_b4_name')) to_tiff(tile.source_b08_location, output_tiffs.get('tiff_b8_name')) to_tiff(tile.source_tci_location, output_tiffs.get('tiff_rgb_name'), 'Byte') print('\nndvi band is processing...') get_ndvi(output_tiffs.get('tiff_b4_name'), output_tiffs.get('tiff_b8_name'), output_tiffs.get('tiff_ndvi_name')) print('\nall bands are scaling to 8-bit images...\n') scale_img(output_tiffs.get('tiff_ndvi_name'), output_tiffs.get('scaled_ndvi_name')) scale_img(tile.source_b08_location, output_tiffs.get('scaled_b8_name')) output_folder = path_exists_or_create(os.path.join(MODEL_TIFFS_DIR, tile.tile_name)) tiff_output_name = os.path.join(output_folder, f'{tile.tile_name}.tif') print('\nall bands are being merged...\n') os.system( f"gdal_merge.py -separate -o {tiff_output_name} \ {output_tiffs.get('tiff_rgb_name')} {output_tiffs.get('scaled_ndvi_name')}_scaled.tif " f"{output_tiffs.get('scaled_b8_name')}_scaled.tif" ) tile.model_tiff_location = tiff_output_name tile.save() print('\nsaving in png...\n') bands = { f'{join(output_folder, "rgb.png")}': output_tiffs.get('tiff_rgb_name'), f'{join(output_folder, "ndvi.png")}': output_tiffs.get('tiff_ndvi_name'), f'{join(output_folder, "b8.png")}': f"{output_tiffs.get('scaled_b8_name')}_scaled.tif" } for dest, source in tqdm(bands.items()): with rasterio.open(source) as src: imageio.imwrite(dest, np.moveaxis(src.read(), 0, -1)) src.close() for item in os.listdir(save_path): if item.endswith('.tif'): os.remove(join(save_path, item)) print('\ntemp files have been deleted\n')
def process(self): for filename in os.listdir(self.tiff_path): data_path = path_exists_or_create( os.path.join(PIECES_DIR, filename)) image_path = os.path.join(self.tiff_path, filename, f"{filename}_merged.tiff") cloud_path = os.path.join(self.tiff_path, filename, "clouds.tiff") self.poly2mask(filename, image_path, data_path, filter_by_date=False) mask_path, markup = self.poly2mask(filename, image_path, data_path, filter_by_date=True) self.divide_into_pieces(filename, image_path, data_path) pieces_info = os.path.join(data_path, 'image_pieces.csv') save_mask_path = path_exists_or_create( os.path.join(data_path, 'masks')) self.split_mask(mask_path, save_mask_path, pieces_info) save_cloud_path = path_exists_or_create( os.path.join(data_path, 'clouds')) self.split_cloud(cloud_path, save_cloud_path, pieces_info)
def prepare_tiff(filename): config = ConfigParser(allow_no_value=True) config.read('gcp_config.ini') bands_to_download = config.get('config', 'BANDS_TO_DOWNLOAD').split() save_path = path_exists_or_create(join(MODEL_TIFFS_DIR, f"{filename}")) output_tiffs = {} bands_to_convert = [band for band in bands_to_download] if 'TCI' in bands_to_download: output_tiffs['tiff_rgb_name'] = join(save_path, f'{filename}_TCI.tif') to_tiff(join(DOWNLOADED_IMAGES_DIR, f'{filename}{ROOT}_TCI.jp2'), join(save_path, f'{filename}_TCI.tif'), 'Byte') bands_to_convert.remove('TCI') for band in bands_to_convert: output_tiffs[f'tiff_{band}_name'] = join(save_path, f'{filename}_{band}.tif') to_tiff(join(DOWNLOADED_IMAGES_DIR, f'{filename}{ROOT}_{band}.jp2'), output_tiffs[f'tiff_{band}_name']) if 'B04' in bands_to_download and 'B08' in bands_to_download: output_tiffs['tiff_ndvi_name'] = join(save_path, f'{filename}_ndvi.tif') print('\nndvi band is processing...') get_ndvi(output_tiffs.get('tiff_B04_name'), output_tiffs.get('tiff_B08_name'), output_tiffs.get('tiff_ndvi_name')) bands_to_convert.append('ndvi') if 'B11' in bands_to_download and 'B8A' in bands_to_download: output_tiffs['tiff_ndmi_name'] = join(save_path, f'{filename}_ndmi.tif') print('\nndmi band is processing...') get_ndvi(output_tiffs.get('tiff_B11_name'), output_tiffs.get('tiff_B8A_name'), output_tiffs.get('tiff_ndmi_name')) bands_to_convert.append('ndmi') for band in bands_to_convert: output_tiffs[ f'scaled_{band}_name'] = f"{output_tiffs[f'tiff_{band}_name']}_scaled.tif" scale_img(output_tiffs[f'tiff_{band}_name'], output_tiffs[f'scaled_{band}_name']) tiff_output_name = join(save_path, f'{filename}_merged.tiff') if 'B04' in bands_to_download: bands_to_convert.remove('B04') # if 'TCI' in bands_to_download: # bands_to_convert = [output_tiffs['tiff_rgb_name']] + bands_to_convert files_to_merge = [ output_tiffs.get(f'scaled_{band}_name') for band in bands_to_convert ] files_to_merge = [output_tiffs['tiff_rgb_name']] + files_to_merge merged_files = " ".join(files_to_merge) print(merged_files) os.system(f"gdal_merge.py -separate -o {tiff_output_name} {merged_files}") to_tiff( join(DOWNLOADED_IMAGES_DIR, f'{filename}{ROOT}_MSK_CLDPRB_20m.jp2'), f'{join(save_path, "clouds.tiff")}') for item in os.listdir(save_path): if item.endswith('.tif'): os.remove(join(save_path, item))
f'gdal_calc.py -A {b4_file} -B {b8_file} \ --outfile={ndvi_file} \ --calc="(B-A)/(A+B+0.001)" --type=Float32' ) def parse_args(): parser = argparse.ArgumentParser(description='Script for predicting masks.') parser.add_argument( '--save_path', '-s', dest='save_path', default='data', help='Path to directory where results will be stored' ) return parser.parse_args() MODEL_TIFFS_DIR = path_exists_or_create('data/model_tiffs') def prepare_tiff(tile): save_path = MODEL_TIFFS_DIR # defining temporary files names output_tiffs = {'tiff_b4_name': join(save_path, f'{tile.tile_name}_B04.tif'), 'tiff_b8_name': join(save_path, f'{tile.tile_name}_B08.tif'), 'tiff_rgb_name': join(save_path, f'{tile.tile_name}_TCI.tif'), 'tiff_ndvi_name': join(save_path, f'{tile.tile_name}_ndvi.tif'), 'scaled_b8_name': join(save_path, f'{tile.tile_name}_B08'), 'scaled_ndvi_name': join(save_path, f'{tile.tile_name}_ndvi')} print('\nb4 and b8 bands are converting to *tif...\n') to_tiff(tile.source_b04_location, output_tiffs.get('tiff_b4_name'))
import os import subprocess from django.conf import settings from concurrent.futures import ThreadPoolExecutor from configparser import ConfigParser from google.cloud import storage from xml.dom import minidom from clearcuts.models import TileInformation from utils import path_exists_or_create, Bands DOWNLOADED_IMAGES_DIR = path_exists_or_create('data/source_images/') class SentinelDownload: def __init__(self): os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = './key.json' self.tile_dates_count = settings.MAXIMUM_DATES_REVIEWED_FOR_TILE self.storage_client = storage.Client() self.storage_bucket = self.storage_client.get_bucket( 'gcp-public-data-sentinel-2') self.config = ConfigParser(allow_no_value=True) self.config.read('gcp_config.ini') self.area_tile_set = self.config.get('config', 'AREA_TILE_SET').split() self.bands_to_download = self.config.get('config', 'BANDS_TO_DOWNLOAD').split() self.resolution = self.config.get('config', 'RESOLUTION') self.executor = ThreadPoolExecutor(max_workers=10) def process_download(self):
def get_diff_and_split(self): tiles_description = getdates(self.data_path) tiles = pd.DataFrame(tiles_description, columns=['tileID', 'img_date']) tiles = tiles.sort_values(['img_date'], ascending=False) infofile = os.path.join(self.save_path, 'data_info.csv') markups = [ gp.read_file(os.path.join(self.polys_path, shp)) for shp in os.listdir(self.polys_path) ] for shp in markups: shp['img_date'] = shp['img_date'].apply( lambda x: datetime.datetime.strptime(x, '%Y-%m-%d')) with open(infofile, 'w') as csvFile: writer = csv.writer(csvFile) writer.writerow(['dataset_folder', 'name', 'position', 'mask_pxl']) number_of_dates = len(tiles) for index_current in range(number_of_dates - 1): index_next = index_current + 1 for index_previous in range(index_next, index_next + self.days_limit): if index_previous < number_of_dates: date_current = str( tiles['img_date'].iloc[index_current].date()) date_previous = str( tiles['img_date'].iloc[index_previous].date()) diff_path = f"{self.save_path}/{date_current}_{date_previous}" markup_number_current, markup_number_previous = 0, 0 for shp_num in range(len(markups)): if date_limit( tiles['img_date'].iloc[index_current], markups[shp_num]): markup_number_current = shp_num if date_limit( tiles['img_date'].iloc[index_previous], markups[shp_num]): markup_number_previous = shp_num dt = tiles['img_date'].iloc[index_current] - tiles[ 'img_date'].iloc[index_previous] if dt.days > (self.days_limit + 1) * settings.SENTINEL_DELTA_DAYS: pass elif markup_number_current != markup_number_previous: pass else: path_exists_or_create(diff_path) path_exists_or_create( os.path.join(diff_path, self.images_path)) path_exists_or_create( os.path.join(diff_path, self.masks_path)) self.imgdiff(tiles['tileID'].iloc[index_current], tiles['tileID'].iloc[index_previous], diff_path, writer) df = pd.read_csv(infofile) xy = df['position'].unique() np.random.seed(seed=59) rand = np.random.random(size=len(xy)) train = [] test = [] valid = [] for i in range(len(xy)): if rand[i] <= self.train_size: train.append(xy[i]) elif rand[i] > self.train_size and rand[ i] < self.train_size + self.test_size: test.append(xy[i]) else: valid.append(xy[i]) path_exists_or_create(f'{self.save_path}/onlymasksplit') for data_type, name_type in zip([train, test, valid], ['train', 'test', 'valid']): markups = 0 position_save = os.path.join(self.save_path, 'onlymasksplit', f'{name_type}_df.csv') output_file = os.path.join(self.save_path, f'{name_type}_df.csv') os.system(f'head -n1 {infofile} > {output_file}') os.system(f'head -n1 {infofile} > {position_save}') for position in data_type: df[df['position'] == position].to_csv(output_file, mode='a', header=False, index=False, sep=',') df[(df['position'] == position) & (df['mask_pxl'] > 0)].to_csv( position_save, mode='a', header=False, index=False, sep=',') markups += df[df['position'] == position].shape[0] print(f"{name_type} markups: {markups}") print('Train split: %d' % len(train)) print('Test split: %d' % len(test)) print('Valid split: %d' % len(valid))
import logging import os import subprocess from clearcuts.models import TileInformation from utils import path_exists_or_create logging.basicConfig(format='%(asctime)s %(message)s') MAPBOX_TIFFS_DIR = path_exists_or_create('data/mapbox_tiffs') def jp2_to_tiff(): """ Conversion raw satellite jp2 images to tiffs for mapbox """ jp2files = list( TileInformation.objects.filter( source_tci_location__contains='jp2').filter( source_tci_location__contains='TCI').values_list( 'source_tci_location', flat=True)) for file in jp2files: filename = os.path.basename(file).split('.')[0] logging.warning('Converting %s to TIFF format', file) geo_tiff_file = os.path.join(MAPBOX_TIFFS_DIR, f'{filename}.tiff') command_jp2_to_tiff = f'gdalwarp -of GTiff -overwrite -ot Byte -t_srs EPSG:4326 ' \ f'-wm 4096 -multi -wo NUM_THREADS=ALL_CPUS ' \ f'-co COMPRESS=DEFLATE -co PREDICTOR=2 {file} {geo_tiff_file}' # TODO: compressing and renaming dataset for backup reasons """ rio calc "(asarray (take a 1) (take a 2) (take a 3))"
from utils import path_exists_or_create MAXIMUM_DATES_REVIEWED_FOR_TILE = 220 MAXIMUM_DATES_STORE_FOR_TILE = 2 MAXIMUM_EMPTY_PIXEL_PERCENTAGE = 0.05 MAXIMUM_CLOUD_PERCENTAGE_ALLOWED = 50 SENTINEL_DELTA_DAYS = 5 PIECE_WIDTH = 56 PIECE_HEIGHT = 56 NEIGHBOURS = 6 TRAIN_SIZE, TEST_SIZE, VALID_SIZE = 0.7, 0.15, 0.15 DOWNLOADED_IMAGES_DIR = path_exists_or_create('data/source_images/') MODEL_TIFFS_DIR = path_exists_or_create('data/model_tiffs') PIECES_DIR = path_exists_or_create('data/output') POLYS_PATH = '../data/time-dependent' DIFF_PATH = path_exists_or_create('data/diff')