def GeoImage(filename, georef): img = Image.open(filename) vrt = mkVRT(georef, img.height, img.width, filename) with NamedTemporaryFile(suffix='.vrt') as f: f.write(etree.tostring(vrt)) f.flush() return geoio.GeoImage(f.name)
def raster_to_gt_points(gt_path, train_mask_path=None): gt_img = geoio.GeoImage(gt_path) gt_data = gt_img.get_data() gt_data = gt_data[0, ...] if train_mask_path is not None: train_mask = tiff.imread(train_mask_path) print(gt_data.shape, train_mask.shape) ys, xs = np.where(gt_data > 0) gts = [] targets = [] isTrain = [] for x, y in zip(xs, ys): loc_x, loc_y = gt_img.raster_to_proj(x, y) gts.append((loc_y, loc_x)) targets.append(int(gt_data[y, x])) if train_mask_path is not None: isTrain.append(train_mask[y, x] > 0) else: # every points are test points isTrain.append(False) return gts, targets, isTrain
def _check_inputs(self): ''' Ensure proper composition of input directory ports and all images have same first dimension. Returns path to geojson file and number of bands used in imagery ''' # Ensure proper number of images provided in_img_dir = os.listdir(self.img_dir) imgs = [img for img in in_img_dir if img.endswith('.tif')] if len(imgs) > 5: raise Exception('There are too many images in the input image directory. ' \ 'Please use a maximum of five image strips.') if len(imgs) == 0: raise Exception('No images were found in the input directory. Please ' \ 'provide at lease one GeoTif image.') # Ensure all images have same number of bands bands = [ geoio.GeoImage(os.path.join(self.img_dir, img)).shape[0] for img in imgs ] if not all(dim == bands[0] for dim in bands): raise Exception( 'Please make sure all images have the same number of bands') # Ensure only one geojson geoj_list = [ geoj for geoj in os.listdir(self.geoj_dir) if geoj.endswith('.geojson') ] if len(geoj_list) != 1: raise Exception('Make sure there is exactly one geojson in image_dest s3 ' \ 'bucket') return os.path.join(self.geoj_dir, geoj_list[0]), bands[0]
def setUp(self): self.test_img = dgsamples.wv2_longmont_1k.ms #self.test_img = "../data/imagefiles/053792616010_01/053792616010_01_P001_MUL/14JUN20181517-M2AS-053792616010_01_P001.TIL" #self.test_geoimg = "../data/imagefiles/smalldgdelivery/053792616010_01/053792616010_01_P001_MUL/14JUN20181517-M2AS-053792616010_01_P001.TIF" #self.test_img_at_sensor_rad = "../data/imagefiles/smalldgdelivery_ms_scaled_to_rad_uW_per_cm2_nm_sr.tif" #self.test_img_toa_ref = "../data/imagefiles/smalldgdelivery_ms_scaled_to_toa_ref.tif" self.img = geoio.dg.DGImage(self.test_img) self.geoimg = geoio.GeoImage(self.test_img)
def load_mask(self, image_id): """Generate instance masks for shapes of the given image ID. """ masks = np.zeros((650, 650)) ResimPATH = 'D:/DATASET/SpaceNet/Train/AOI_2_Vegas_Train/RGB-PanSharpen/RGB-PanSharpen_AOI_2_Vegas_imgg' + str( image_id) + '.tif' RGBTIFResmi = geoio.GeoImage(ResimPATH) with open(DATASET_DIR + "/geojson/buildings/buildings_AOI_2_Vegas_imgg" + str(image_id) + ".geojson") as f: data = json.load(f) allBuildings = data['features'] for building in allBuildings: veri = building['geometry']['coordinates'][0] tip = str(building['geometry']['type']) coordinates = list() if tip == ('Point'): continue elif tip == ('MultiPolygon'): if isinstance(veri, float): continue kucukBinalar = (building['geometry']['coordinates']) for b in range(len(kucukBinalar)): veri = kucukBinalar[b][0] for i in veri: xPixel, yPixel = RGBTIFResmi.proj_to_raster( i[0], i[1]) xPixel = 649 if xPixel > 649 else xPixel yPixel = 649 if yPixel > 649 else yPixel coordinates.append((xPixel, yPixel)) else: if isinstance(veri, float): continue for i in veri: xPixel, yPixel = RGBTIFResmi.proj_to_raster(i[0], i[1]) xPixel = 649 if xPixel > 649 else xPixel yPixel = 649 if yPixel > 649 else yPixel coordinates.append((xPixel, yPixel)) maske = fill_between(coordinates) masks = np.dstack((masks, maske)) if masks.shape != (650, 650): masks = masks[:, :, 1:] class_ids = np.asarray([1] * masks.shape[2]) else: class_ids = np.ones((1)) masks = masks.reshape((650, 650, 1)) return masks.astype(np.bool), class_ids.astype(np.int32)
def invoke(self): ''' Execute task ''' # Load list of features with open(self.geoj) as f: info = geojson.load(f)['features'] poly_ct = len(info) # Load trained model if self.classes: m = VggNet(classes=self.classes, model_name='model') else: m = VggNet(model_name='model') m.model.load_weights(self.weights) # Format input_shape and max_side_dim inp_shape = m.input_shape[-3:] if not self.max_side_dim: self.max_side_dim = inp_shape[-1] # Check all imgs have correct bands bands = inp_shape[0] for img in self.imgs: img_bands = geoio.GeoImage(img).shape[0] if bands != img_bands: raise Exception('Make sure the model was trained on an image with the ' \ 'same number of bands as all input images.') # Filter shapefile de.filter_polygon_size(self.geoj, output_file=self.geoj, max_side_dim=self.max_side_dim, min_side_dim=self.min_side_dim) # Numerical vs string classes out_name, num_classes = 'classified.geojson', True if self.classes: num_classes = False # Classify file m.classify_geojson(self.geoj, output_name=out_name, numerical_classes=num_classes, max_side_dim=self.max_side_dim, min_side_dim=self.min_side_dim, chips_in_mem=1000, bit_depth=self.bit_depth) # Write output move(out_name, self.output_dir)
def generate_features(gts, targets, filepath): ft_table = [] targets = [] patch_size = 1 image = geoio.GeoImage(filepath) image_data = image.get_data() for loc, label in tqdm(zip(gts, labels)): x, y = image.proj_to_raster(loc[0], loc[1]) x, y = int(x), int(y) if x >= patch_size and y >= patch_size and x < image_data.shape[ 2] - patch_size and y < image_data.shape[1] - patch_size: data_point = [] left_x = x - patch_size right_x = x + patch_size + 1 bot_y = y - patch_size top_y = y + patch_size + 1 patch = image_data[:, bot_y:top_y, left_x:right_x] patch = np.swapaxes(patch, 0, 2) data_point.append(np.mean(patch)) data_point.append(np.std(patch)) data_point.append(skew(patch.reshape(-1))) data_point.append(kurtosis(patch.reshape(-1))) # calculate second-order stats glcm = greycomatrix(exband_histgram(patch), [1], [i * np.pi / 8 for i in range(8)]) for prop in GLCM_PROPS: data_point.append(greycoprops(glcm, prop)[0, 0]) ft_table.append(data_point) targets.append(label) targets = np.array(targets) ft_table = np.array(ft_table) data_table = np.hstack((targets, ft_table)) headers = ["Target"] + ['b_%d' % i for i in range(ft_table.shape[1])] df = pd.DataFrame(data=data_table, index=None, columns=headers) return df
def main(): import time import dgsamples import geoio img_small = geoio.GeoImage(dgsamples.wv2_longmont_1k.ms) data_small = img_small.get_data() start = time.time() out_small_numba = downsample(data_small, shape=[300, 300], source='numba') print('small numba: %s' % (time.time() - start)) start = time.time() out_small_cv2 = downsample(arr=data_small, shape=(300, 300), source='cv2') print('small cv2: %s' % (time.time() - start)) print('Max diff is: %s' % (out_small_numba - out_small_cv2).max()) print('Min diff is: %s' % (out_small_numba - out_small_cv2).min())
def random_window(image, chip_size, no_chips=10000): """Implement a random chipper on a georeferenced image. Args: image (str): Image filename. chip_size (list): Array of chip dimensions. no_chips (int): Number of chips. Returns: List of chip rasters. """ img = geoio.GeoImage(image) chips = [] for i, chip in enumerate(img.iter_window_random( win_size=chip_size, no_chips=no_chips)): chips.append(chip) if i == no_chips - 1: break return chips
def generate_patches(gts, labels, train_test, filepath, save_dir, patch_size=4): for label in labels: ensure_dir(os.path.join(save_dir, "train", str(label))) ensure_dir(os.path.join(save_dir, "test", str(label))) image = geoio.GeoImage(filepath) image_data = image.get_data() for loc, label, istrain in tqdm(zip(gts, labels, train_test)): loc_y, loc_x = loc x, y = image.proj_to_raster(loc_x, loc_y) x, y = int(x), int(y) if x >= patch_size and y >= patch_size and x < image_data.shape[ 2] - patch_size and y < image_data.shape[1] - patch_size: left_x = x - patch_size right_x = x + patch_size + 1 bot_y = y - patch_size top_y = y + patch_size + 1 patch = image_data[:, bot_y:top_y, left_x:right_x] patch = np.swapaxes(patch, 0, 2) if istrain: tiff.imsave(os.path.join(save_dir, "train", str(label), "%d_%d.tif" % (y, x)), patch, planarconfig='contig') else: tiff.imsave(os.path.join(save_dir, "test", str(label), "%d_%d.tif" % (y, x)), patch, planarconfig='contig')
def prepare_data(): """ Preprocessing function. """ print("Preprocessing...") df_clusters = pd.read_csv(CLUSTER_PREDICTIONS_DIR) filename = 'ppp_2020_1km_Aggregated.tif' img = geoio.GeoImage(os.path.join(WORLDPOP, filename)) im_array = np.squeeze(img.get_data()) cluster_population = [] for _, r in df_clusters.iterrows(): min_lat, min_lon, max_lat, max_lon = create_space( r.cluster_lat, r.cluster_lon) xminPixel, yminPixel = img.proj_to_raster(min_lon, min_lat) xmaxPixel, ymaxPixel = img.proj_to_raster(max_lon, max_lat) xminPixel, xmaxPixel = min(xminPixel, xmaxPixel), max(xminPixel, xmaxPixel) yminPixel, ymaxPixel = min(yminPixel, ymaxPixel), max(yminPixel, ymaxPixel) xminPixel, yminPixel, xmaxPixel, ymaxPixel = (int(xminPixel), int(yminPixel), int(xmaxPixel), int(ymaxPixel)) arr = im_array[yminPixel:ymaxPixel, xminPixel:xmaxPixel] arr[arr < 0] = 0 # can't have negative populations cluster_population.append(round(arr.mean())) df_clusters['cluster_population_density_1km2'] = cluster_population return df_clusters
if check_equal(Ap_val): Ap = float(Ap_val[0]) else: print("Ap values are not equal, examine MTL file") print(Ap_list) if (float(Sun_val[1]) <= 90.0 and float(Sun_val[1]) >= 0.0): sunelev = float(Sun_val[1]) else: print("Sun elevation value out of bounds, examine MTL file") print(Sun_val) print(Mp, Ap, sunelev) ######## --------- CONVERT TO TOA REFLECTANCE --------- ######## # Open the multiband landsat image img = geoio.GeoImage(in_filename) # Numpy arrays of tif data = img.get_data() # Calculate TOA reflectances - equations from https://landsat.usgs.gov/using-usgs-landsat-8-product newdata = Mp * data + Ap solzenith = 90 - sunelev TOA_refl = newdata / math.cos(solzenith / 360 * 2 * math.pi) img.write_img_like_this(out_filename, TOA_refl)
import numpy as np import os import glob import cv2 import geoio import tifffile as tiff flood_map_dir = "D:\\Workspace\\results\\pisar\\scences\\flood_mask_post" geo_dir = "D:\\Workspace\\data\\raw\\pi-sar2\\20110312\\tiff_all" dem_path = "D:\\Workspace\\data\\raw\\pi-sar2\\20110312\\dem.tif" save_dir = "D:\\Workspace\\results\\pisar\\scences\\dem" dem_img = geoio.GeoImage(dem_path) dem_data = dem_img.get_data() # (bands, rows, cols) for filepath in glob.glob(os.path.join(flood_map_dir, "*.png")): basename = os.path.basename(filepath).split(".")[0] flood_img = cv2.imread(filepath, 0) flood_dem_img = np.zeros(flood_img.shape + (1, )) geo_path = os.path.join(geo_dir, "%s_sc.tif" % basename) geo_img = geoio.GeoImage(geo_path) y = np.arange(flood_img.shape[0]) x = np.arange(flood_img.shape[1]) yx = [[i, j] for i in y for j in x]
def setUp(self): self.test_img = dgsamples.wv2_longmont_1k.ms self.img = geoio.GeoImage(self.test_img)
def setUp(self): self.test_img = dgsamples.wv2_longmont_1k.ms self.img = geoio.GeoImage(self.test_img) self.vec = dgsamples.wv2_longmont_1k_vectors.poly_geojson_latlon self.badvec = dgsamples.bayou_vectors.poly
def create_hdr(img_path, output_port_path, **kwargs): logit = kwargs.get('logger') #img_path is must be a path to the .tif file of an AOP image product filename = os.path.split(img_path)[1] new_filename = '%s.hdr' % os.path.splitext(filename)[0] try: os.makedirs(output_port_path) except: pass # Copy input files to output for filename in glob('%s.*' % os.path.splitext(img_path)[0]): dest = os.path.join(output_port_path, os.path.split(filename)[1]) copyfile(filename, dest) logit.debug('%s -> %s' % (filename, dest)) #create empty hdr file hdr_file = open(os.path.join(output_port_path, new_filename), "w+") logit.debug('New hdr file: %s' % hdr_file) # Add fixed values hdr line hdr_file.write('ENVI\n') #open image in geoio try: img = geoio.DGImage(img_path) except Exception: # Not an ortho image img = geoio.GeoImage(img_path) #create ordered dictto have some contraol over writing order envi_dict = collections.OrderedDict() #add elements to the ODict envi_dict['description'] = '{Creating ENVI hdr file from AOP data}' envi_dict['sensor type'] = DG_SATID_TO_ENVI[img.meta.satid] envi_dict['lines'] = str(img.meta.shape[1]) envi_dict['samples'] = str(img.meta.shape[2]) envi_dict['bands'] = str(img.meta.shape[0]) if envi_dict['bands'] == '3': # Condition for RGB images?? band_indexes = [ i for i, v in enumerate(img.meta.band_names) if v in RGB_BANDS ] band_wavelengths = [ img.meta.band_centers[index] for index in band_indexes ] envi_dict['band_names'] = '{%s}' % ', '.join(RGB_BANDS) envi_dict['wavelength'] = '{%s}' % ', '.join( str(e) for e in band_wavelengths) else: envi_dict['band names'] = '{%s}' % ', '.join( str(e) for e in img.meta.band_names) envi_dict['wavelength'] = '{%s}' % ', '.join( str(e) for e in img.meta.band_centers) envi_dict['wavelength units'] = DG_WAVELENGTH_UNITS for entry, value in envi_dict.iteritems(): #iterate through elements to write them out to file hdr_file.write('%s = %s\n' % (entry, value)) #close file hdr_file.close()
def filter_polygon_size(input_file, output_file, min_side_dim=0, max_side_dim=125, shuffle=False, make_omitted_files=False): ''' Create a geojson file containing only polygons with acceptable side dimensions. INPUT input_file (str): File name output_file (str): Name under which to save filtered polygons. min_side_dim (int): Minimum acceptable side length (in pixels) for each polygon. Defaults to 0. max_side_dim (int): Maximum acceptable side length (in pixels) for each polygon. Defaults to 125. shuffle (bool): Shuffle polygons before saving to output file. Defaults to False. make_omitted_files (bool): Create files with omitted polygons. Two files are created: one with polygons that are too small and one with large polygons. Defaults to False. ''' def write_status(percent_complete): '''helper function to write percent complete to stdout''' sys.stdout.write('\r%{0:.2f}'.format(percent_complete) + ' ' * 20) sys.stdout.flush() # load polygons with open(input_file) as f: data = geojson.load(f) total_features = float(len(data['features'])) # format output file name if not output_file.endswith('.geojson'): output_file += '.geojson' # find indicies of acceptable polygons ix_ok, small_ix, large_ix = [], [], [] img_ids = find_unique_values(input_file, property_name='image_id') print 'Filtering polygons... \n' for img_id in img_ids: ix = 0 print '... for image {} \n'.format(img_id) img = geoio.GeoImage(img_id + '.tif') # create vrt if img has multiple bands (more efficient) if img.shape[0] > 1: vrt_flag = True vrt_cmd = 'gdalbuildvrt tmp.vrt -b 1 {}.tif'.format(img_id) subprocess.call(vrt_cmd, shell=True) #saves temporary vrt file to filter on img = geoio.GeoImage('tmp.vrt') # cycle thru polygons for chip, properties in img.iter_vector(vector=input_file, properties=True, filter=[{'image_id': img_id}], mask=True): ix += 1 if chip is None: write_status(100 * ix / total_features) continue chan,h,w = np.shape(chip) # Identify small chips if min(h, w) < min_side_dim: small_ix.append(ix - 1) write_status(100 * ix / total_features) continue # Identify large chips elif max(h, w) > max_side_dim: large_ix.append(ix - 1) write_status(100 * ix / total_features) continue # Identify valid chips ix_ok.append(ix - 1) write_status(100 * ix / total_features) # remove vrt file if vrt_flag: os.remove('tmp.vrt') # save new geojson ok_polygons = [data['features'][i] for i in ix_ok] small_polygons = [data['features'][i] for i in small_ix] large_polygons = [data['features'][i] for i in large_ix] print str(len(small_polygons)) + ' small polygons removed' print str(len(large_polygons)) + ' large polygons removed' if shuffle: np.random.shuffle(ok_polygons) data['features'] = ok_polygons with open(output_file, 'wb') as f: geojson.dump(data, f) if make_omitted_files: # make file with small polygons data['features'] = small_polygons with open('small_' + output_file, 'w') as f: geojson.dump(data, f) # make file with large polygons data['features'] = large_polygons with open('large_' + output_file, 'w') as f: geojson.dump(data, f) print 'Saved {} polygons to {}'.format(str(len(ok_polygons)), output_file)
def get_data_from_polygon_list(features, min_side_dim=0, max_side_dim=125, num_chips=None, classes=['No swimming pool', 'Swimming pool'], normalize=True, return_id=False, return_labels=True, bit_depth=8, mask=True, show_percentage=True, assert_all_valid=False, resize_dim=None, **kwargs): ''' Extract pixel intensity arrays ('chips') from image strips given a list of polygon features from a geojson file. All chips will be of uniform size. Will only return chips whose side dimension is between min_side_dim and max_side_dim. Each image strip referenced in the image_id property must be in the working directory and named as follows: <image_id>.tif. INPUTS features (list): list of polygon features from an open geojson file. IMPORTANT: Geometries must be in the same projection as the imagery! No projection checking is done! min_side_dim (int): minimum size acceptable (in pixels) for a polygon. defaults to 10. max_side_dim (int): maximum size acceptable (in pixels) for a polygon. Note that this will be the size of the height and width of all output chips. defaults to 125. num_chips (int): Maximum number of chips to return. If None, all valid chips from features will be returned. Defaults to None. classes (list['string']): name of classes for chips. Defualts to swimming pool classes (['Swimming_pool', 'No_swimming_pool']) normalize (bool): divide all chips by max pixel intensity (normalize net input). Defualts to True. return_id (bool): return the feature id with each chip. Defaults to False. return_labels (bool): Include labels in output. Labels will be numerical and correspond to the class index within the classes argument. Defualts to True. bit_depth (int): Bit depth of the imagery, necessary for proper normalization. defualts to 8 (standard for dra'd imagery). show_percentage (bool): Print percent of chips collected to stdout. Defaults to True assert_all_valid (bool): Throw an error if any of the included polygons do not match the size criteria (defined by min and max_side_dim), or are returned as None from geoio. Defaults to False. resize_dim (tup): Dimensions to reshape chips into after padding. Use for downsampling large chips. Dimensions: (n_chan, rows, cols). Defaults to None (does not resize). kwargs: ------- bands (list of ints): The band numbers (base 1) to be retrieved from the imagery. Defualts to None (all bands retrieved) buffer (int or list of two ints): Number of pixels to add as a buffer around the requested pixels. If an int, the same number of pixels will be added to both dimensions. If a list of two ints, they will be interpreted as xpad and ypad. OUTPUT chips (array): Uniformly sized chips with the following dimensions: (num_chips, num_channels, max_side_dim, max_side_dim) ids (list): Feature ids corresponding to chips. labels (array): One-hot encoded labels for chips with the follwoing dimensions: (num_chips, num_classes) ''' ct, inputs, labels, ids, nb_classes = 0, [], [], [], len(classes) total = len(features) if not num_chips else num_chips cls_dict, imgs = {classes[i]: i for i in xrange(len(classes))}, {} def write_status(ct, chip_err=False): '''helper function to write percent complete to stdout + raise AssertionError''' if show_percentage: sys.stdout.write('\r%{0:.2f}'.format(100 * (ct + 1) / float(total)) + ' ' * 20) sys.stdout.flush() if chip_err and assert_all_valid: raise AssertionError('One or more invalid polygons. Please make sure all ' \ 'polygons are valid or set assert_all_valid to False.') return ct + 1 # cycle through polygons and get pixel data for poly in features: id = poly['properties']['image_id'] coords = poly['geometry']['coordinates'][0] # open all images in geoio if id not in imgs.keys(): try: imgs[id] = geoio.GeoImage(id + '.tif') except (ValueError): raise Exception('{}.tif not found in current directory. Please make ' \ 'sure all images refereced in features are present and ' \ 'named properly'.format(str(id))) # call get_data on polygon geom chip = imgs[id].get_data_from_coords(coords, mask=mask, **kwargs) if chip is None: ct = write_status(100 * ct / float(total), ct, chip_err=True) continue # check for adequate chip size chan, h, w = np.shape(chip) pad_h, pad_w = max_side_dim - h, max_side_dim - w if min(h, w) < min_side_dim or max(h, w) > max_side_dim: ct = write_status(ct, chip_err=True) continue # zero-pad polygons to (n_bands, max_side_dim, max_side_dim) chip = chip.filled(0).astype(float) if mask else chip chip_patch = np.pad(chip, [(0, 0), (pad_h/2, (pad_h - pad_h/2)), (pad_w/2, (pad_w - pad_w/2))], 'constant', constant_values=0) # resize chip if resize_dim: new_chip = [] for band_ix in xrange(len(chip_patch)): new_chip.append(imresize(chip_patch[band_ix], resize_dim[-2:]).astype(float)) chip_patch = np.array(new_chip) # norm pixel intenisty from 0 to 1 if normalize: div = (2 ** bit_depth) - 1 chip_patch /= float(div) # get labels if return_labels: try: label = poly['properties']['class_name'] if label is None: ct = write_status(ct, chip_err=True) continue labels.append(cls_dict[label]) except (TypeError, KeyError): ct = write_status(ct, chip_err=True) continue # get feature ids if return_id: id = poly['properties']['feature_id'] ids.append(id) # append chip to inputs inputs.append(chip_patch) ct = write_status(ct) if num_chips: if len(inputs) == num_chips: break # combine data inputs = [np.array([i for i in inputs])] if return_id: inputs.append(ids) if return_labels: # format labels Y = np.zeros((len(labels), nb_classes)) for i in range(len(labels)): Y[i, labels[i]] = 1 inputs.append(Y) return inputs
import os import glob import cv2 import geoio import tifffile as tiff from tqdm import tqdm flood_map_dir = "D:\\Workspace\\results\\pisar\\scences\\water_mask" geo_dir = "D:\\Workspace\\data\\raw\\pi-sar2\\20110312\\tiff_all" slope_path = "D:\\Workspace\\data\\raw\\pi-sar2\\20110312\\slope.tif" save_dir = "D:\\Workspace\\results\\pisar\\scences\\slope" slope_img = geoio.GeoImage(slope_path) slope_data = slope_img.get_data() # (bands, rows, cols) pbar = tqdm(glob.glob(os.path.join(flood_map_dir, "*.png"))) for filepath in pbar: basename = os.path.basename(filepath).split(".")[0] flood_img = cv2.imread(filepath, 0) flood_slope_img = np.zeros(flood_img.shape + (1, )) geo_path = os.path.join(geo_dir, "%s_sc.tif" % basename) geo_img = geoio.GeoImage(geo_path) y = np.arange(flood_img.shape[0]) x = np.arange(flood_img.shape[1])
def get_data(shapefile, return_labels=False, return_id=False, buffer=[0, 0], mask=False, num_chips=None): """Return pixel intensity array for each geometry in shapefile. The image reference for each geometry is found in the image_id property of the shapefile. If shapefile contains points, then buffer must have non-zero entries. The function also can also return a list of geometry ids; this is useful in case some of the shapefile entries do not produce a valid intensity array and/or class name. Args: shapefile (str): Name of shapefile in mltools geojson format. return_labels (bool): If True, then a label vector is returned. return_id (bool): if True, then the geometry id is returned. buffer (list): 2-dim buffer in PIXELS. The size of the box in each dimension is TWICE the buffer size. mask (bool): Return a masked array. num_chips (int): Maximum number of arrays to return. Returns: chips (list): List of pixel intensity numpy arrays. ids (list): List of corresponding geometry ids. labels (list): List of class names, if return_labels=True """ data, ct = [], 0 # go through point_file and unique image_id's image_ids = gt.find_unique_values(shapefile, property_name='image_id') # go through the shapefile for each image --- this is how geoio works for image_id in image_ids: # add tif extension img = geoio.GeoImage(image_id + '.tif') for chip, properties in img.iter_vector(vector=shapefile, properties=True, filter=[ {'image_id': image_id}], buffer=buffer, mask=mask): if chip is None or reduce(lambda x, y: x * y, chip.shape) == 0: continue # every geometry must have id if return_id: this_data = [chip, properties['feature_id']] else: this_data = [chip] if return_labels: try: label = properties['class_name'] if label is None: continue except (TypeError, KeyError): continue this_data.append(label) data.append(this_data) # return if max num chips is reached if num_chips: ct += 1 if ct == num_chips: return zip(*data) return zip(*data)