def download_section(savepath, section_id, downsample): # Downloading all of the images from a section data set image_api = ImageDownloadApi() input_directory = str(section_id) + '_input' output_directory = str(section_id) + '_output' format_str = '.jpg' section_images = image_api.section_image_query(section_id) section_image_ids = [si['id'] for si in section_images] # You have probably noticed that the AllenSDK has a logger which notifies you of file downloads. # Since we are downloading ~300 images, we don't want to see messages for each one. # The following line will temporarily disable the download logger.(optional) logging.getLogger( 'allensdk.api.api.retrieve_file_over_http').disabled = True for section_image_id in section_image_ids: file_name = str(section_image_id) + format_str input_file_path = os.path.join(savepath, input_directory, file_name) output_file_path = os.path.join(savepath, output_directory, file_name) Manifest.safe_make_parent_dirs(input_file_path) image_api.download_section_image(section_image_id, file_path=input_file_path, downsample=downsample, expression=0) Manifest.safe_make_parent_dirs(output_file_path) image_api.download_section_image(section_image_id, file_path=output_file_path, downsample=downsample, expression=1) # re-enable the logger (optional) logging.getLogger( 'allensdk.api.api.retrieve_file_over_http').disabled = False file_names = os.listdir(os.path.join(savepath, input_directory)) print(len(file_names))
def download_brain_slice(df): # create an image download API image_api = ImageDownloadApi() format_str = ".jpg" # You have probably noticed that the AllenSDK has a logger which notifies you of file downloads. # Since we are downloading ~300 images, we don't want to see messages for each one. # The following line will temporarily disable the download logger. logging.getLogger("allensdk.api.api.retrieve_file_over_http").disabled = True # get parameters path, downsample, indices = ask_parameters_for_downloading(df) print( "Downloads initiated", end="...", file=sys.stderr, flush=True, ) for index in indices: # from indices, get experiment id and gene symbol from df exp_id = df["Experiment"][index] # set the dirname as the gene symbol dirname = df["Gene Symbol"][index] plane = df["Plane"][index] section_data_set_id = exp_id section_image_directory = os.path.join(path, dirname) # get the image ids for all of the images in this data set section_images = image_api.section_image_query( section_data_set_id ) # Should be a dicionary of the features of section images section_image_ids = [ si["id"] for si in section_images ] # Take value of 'id' from the dictionary # Create a progress bar pbar_image = tqdm(total=len(section_image_ids), desc=dirname + " " + plane) for section_image_id in section_image_ids: file_name = str(section_image_id) + format_str file_path = os.path.join(section_image_directory, file_name) Manifest.safe_make_parent_dirs(file_path) # Check if the file is already downloaded, which happens if the downloads have been interrupted. saved_file_names = os.listdir(section_image_directory) if file_name in saved_file_names: pass else: image_api.download_section_image( section_image_id, file_path=file_path, downsample=downsample ) pbar_image.update() pbar_image.close() # re-enable the logger logging.getLogger("allensdk.api.api.retrieve_file_over_http").disabled = False print( "Downloads completed.", file=sys.stderr, flush=True, )
image_api = ImageDownloadApi() svg_api = SvgApi() #Downloading all of the images from a section data set section_data_set_id = 71724696 downsample = 4 expression = 1 section_image_directory = str(section_data_set_id) + '_section_images' format_str = '.jpg' section_images = image_api.section_image_query(section_data_set_id) section_image_ids = [si['id'] for si in section_images] print(len(section_image_ids)) # You have probably noticed that the AllenSDK has a logger which notifies you of file downloads. # Since we are downloading ~300 images, we don't want to see messages for each one. # The following line will temporarily disable the download logger.(optional) logging.getLogger('allensdk.api.api.retrieve_file_over_http').disabled = True for section_image_id in section_image_ids: file_name = str(section_image_id) + format_str file_path = os.path.join(section_image_directory, file_name) Manifest.safe_make_parent_dirs(file_path)
class ExperimentImagesDownloader(DirWatcher): def __init__(self, input_dir, process_dir, output_dir, structure_map_dir, structs, connectivity_dir, _processor_number, brightness_threshold, strains): super().__init__(input_dir, process_dir, output_dir, f'experiment-images-downloader-{_processor_number}') self.brightness_threshold = brightness_threshold self.structs = ast.literal_eval(structs) self.segmentation_dir = structure_map_dir self.mcc = MouseConnectivityCache( manifest_file=f'{connectivity_dir}/mouse_connectivity_manifest.json' ) struct_tree = self.mcc.get_structure_tree() structure_ids = [ i for sublist in struct_tree.descendant_ids(self.structs) for i in sublist ] self.structure_ids = set(structure_ids) self.image_api = ImageDownloadApi() self.bbox_dilation_kernel = cv2.getStructuringElement( cv2.MORPH_ELLIPSE, (14, 14)) exps = self.mcc.get_experiments(dataframe=True) items = [] for s, gs in strains.items(): strain_items = [] for g in gs: strain_items += [ sorted(exps[(exps.strain == s) & (exps.gender == g)].id.tolist()) ] if strain_items: min_len = min([len(i) for i in strain_items]) strain_items = [i[:min_len] for i in strain_items] items += [str(i) for j in zip(*strain_items) for i in j] self.initial_items = [i for i in items if i in self.initial_items] + [ i for i in self.initial_items if i not in items ] def on_process_error(self, item, exception): retval = super().on_process_error(item, exception) self.logger.error(f"Error occurred during processing", exc_info=True) if any( map(lambda x: issubclass(type(exception), x), [ urllib.error.HTTPError, OSError, ValueError, http.client.error ])): return False else: return retval def process_item(self, item, directory): experiment_id = int(item) retries = 0 images = [] while True: try: time.sleep(2**(retries // 2)) images = self.image_api.section_image_query(experiment_id) break except simplejson.errors.JSONDecodeError as e: if retries > 10: raise e else: self.logger.info(f"Exception invoking image API, retrying") retries += 1 continue images = {i['section_number']: i for i in images} segmentation = np.load( f'{self.segmentation_dir}/{item}/{item}-sections.npz')['arr_0'] mask = np.isin(segmentation, list(self.structure_ids)) locs = np.where(mask) sections = [ s for s in sorted(np.unique(locs[2]).tolist()) if s in images ] bboxes = { section: self.extract_bounding_boxes(mask[:, :, section]) for section in sections } valid_sections = list(filter(lambda s: bboxes[s], sections)) self.logger.info( f"Experiment {experiment_id}, evaluating brightness...") brightness = self.calculate_brightness(directory, experiment_id, images, valid_sections, mask) if brightness < self.brightness_threshold: self.logger.info( f"Experiment {experiment_id}: brightness less than required minimum, removing..." ) return False with open(f'{directory}/bboxes.pickle', 'wb') as f: pickle.dump(bboxes, f) for section in valid_sections: self.process_section(directory, experiment_id, images, section, bboxes[section]) def calculate_brightness(self, directory, experiment_id, images, valid_sections, mask): pixels = np.zeros_like(mask, dtype=np.uint8) for section in valid_sections: self.download_snapshot(experiment_id, section, images[section], directory) filename = f'{directory}/thumbnail-{experiment_id}-{section}.jpg' image = cv2.imread(filename, cv2.IMREAD_GRAYSCALE) pixels[:, :, section] = image[:mask.shape[0], :mask.shape[1]] return np.median(pixels[mask != 0]) def process_section(self, directory, experiment_id, images, section, bboxes): self.logger.info( f"Experiment {experiment_id}, downloading section {section}...") self.download_snapshot(experiment_id, section, images[section], directory) for bbox in bboxes: self.download_fullres(experiment_id, section, bbox, images[section], directory) def extract_bounding_boxes(self, mask, area_threshold=0): bboxes = self.get_bounding_boxes(mask) bbmask = np.zeros_like(mask, dtype=np.uint8) for bbox in bboxes: cv2.rectangle(bbmask, *bbox.corners(), color=1, thickness=-1) bbmask = cv2.dilate(bbmask, self.bbox_dilation_kernel) bboxes = [ bbox for bbox in self.get_bounding_boxes(bbmask) if bbox.area() > area_threshold ] return bboxes @staticmethod def get_bounding_boxes(mask): contours, hierarchy = cv2.findContours(mask.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[-2:] rects = [] for cnt in contours: x, y, w, h = cv2.boundingRect(cnt) if w > 5 and h > 5: rects.append(Rect(x=x, y=y, w=w, h=h)) return rects def download_fullres(self, experiment_id, section, bbox, image_desc, directory): url = f'https://connectivity.brain-map.org/cgi-bin/imageservice?path={image_desc["path"]}&' \ f'mime=1&zoom={8}&&filter=range&filterVals=0,534,0,1006,0,4095' x, y, w, h = bbox.scale(64) url += f'&top={y}&left={x}&width={w}&height={h}' filename = f'{directory}/full-{experiment_id}-{section}-{x}_{y}_{w}_{h}.jpg' for retries in range(3): fname, _, downloaded = self.retrieve_url(filename, url) if downloaded: try: image = Image.open(fname) image.load() break except OSError or FileNotFoundError as e: os.remove(fname) if retries == 2: raise e else: self.logger.info( f"Corrupted file {fname}, re-downloading {filename}" ) else: self.logger.info( f"Cached version of {filename} used, skipping verification" ) return filename def download_snapshot(self, experiment_id, section, image_desc, directory): url = f'https://connectivity.brain-map.org/cgi-bin/imageservice?path={image_desc["path"]}&' \ f'mime=1&zoom={2}&&filter=range&filterVals=0,534,0,1006,0,4095' filename = f'{directory}/thumbnail-{experiment_id}-{section}.jpg' filename, _, _ = self.retrieve_url(filename, url) return filename def download_brightness_snapshot(self, experiment_id, section, image_desc, directory): url = f'https://connectivity.brain-map.org/cgi-bin/imageservice?path={image_desc["path"]}&' \ f'mime=1&zoom={2}&&filter=range&filterVals=0,534,0,1006,0,4095' filename = f'{directory}/thumbnail-{experiment_id}-{section}.jpg' filename, _, _ = self.retrieve_url(filename, url) return filename def retrieve_url(self, filename, url, retries=10): if os.path.isfile(filename): self.logger.info(f"File {filename} already downloaded") return filename, None, False backoff = 0 urllib.request.urlcleanup() while True: try: time.sleep(2**backoff) fname, msg = urllib.request.urlretrieve( url, filename=f'{filename}.partial') os.replace(fname, filename) return filename, msg, True except http.client.HTTPException or OSError or urllib.error.HTTPError as e: backoff += 1 retries -= 1 if retries > 0: self.logger.info( f"Transient error downloading {url}, " f"retrying ({retries} retries left) ...", exc_info=True) continue else: self.logger.exception( f"Retry count exceeded or permanent error for {url} ({filename}), exiting..." ) raise e
entry['regions'][0]['shape_attributes'], 'filename': entry['filename'] } for entry in entries] experiments = defaultdict(list) for info in scan_info: experiments[info['experiment_id']] += [info] image_api = ImageDownloadApi() for experiment_id, infos in experiments.items(): images = { s['section_number']: s for s in image_api.section_image_query(experiment_id) } for info in infos: img = images[info["slice_id"]] y = int(info["shape"]["y"]) // 4 x = int(info["shape"]["x"]) // 4 width = int(info["shape"]["width"]) // 4 height = int(info["shape"]["height"]) // 4 zoom_factor = 8 print(f'\tProcessing slice {info["slice_id"]}') url = f'http://connectivity.brain-map.org/cgi-bin/imageservice?path={img["path"]}&zoom={zoom_factor}&'\ f'top={y*2**8}&left={x*2**8}&width={width*2**zoom_factor}&'\ f'height={height*2**zoom_factor}&filter=range&filterVals=0,1051,0,0,0,0' urllib.request.urlretrieve( url, f'{output_dir}/{experiment_id}-{img["section_number"]}.jpg') image = cv2.imread(