예제 #1
0
def download_section(savepath, section_id, downsample):
    # Downloading all of the images from a section data set

    image_api = ImageDownloadApi()

    input_directory = str(section_id) + '_input'
    output_directory = str(section_id) + '_output'
    format_str = '.jpg'

    section_images = image_api.section_image_query(section_id)
    section_image_ids = [si['id'] for si in section_images]

    # You have probably noticed that the AllenSDK has a logger which notifies you of file downloads.
    # Since we are downloading ~300 images, we don't want to see messages for each one.
    # The following line will temporarily disable the download logger.(optional)
    logging.getLogger(
        'allensdk.api.api.retrieve_file_over_http').disabled = True

    for section_image_id in section_image_ids:
        file_name = str(section_image_id) + format_str
        input_file_path = os.path.join(savepath, input_directory, file_name)
        output_file_path = os.path.join(savepath, output_directory, file_name)
        Manifest.safe_make_parent_dirs(input_file_path)
        image_api.download_section_image(section_image_id,
                                         file_path=input_file_path,
                                         downsample=downsample,
                                         expression=0)
        Manifest.safe_make_parent_dirs(output_file_path)
        image_api.download_section_image(section_image_id,
                                         file_path=output_file_path,
                                         downsample=downsample,
                                         expression=1)
    # re-enable the logger (optional)
    logging.getLogger(
        'allensdk.api.api.retrieve_file_over_http').disabled = False

    file_names = os.listdir(os.path.join(savepath, input_directory))
    print(len(file_names))
예제 #2
0
def download_brain_slice(df):

    # create an image download API
    image_api = ImageDownloadApi()
    format_str = ".jpg"

    # You have probably noticed that the AllenSDK has a logger which notifies you of file downloads.
    # Since we are downloading ~300 images, we don't want to see messages for each one.
    # The following line will temporarily disable the download logger.
    logging.getLogger("allensdk.api.api.retrieve_file_over_http").disabled = True

    # get parameters
    path, downsample, indices = ask_parameters_for_downloading(df)

    print(
        "Downloads initiated", end="...", file=sys.stderr, flush=True,
    )

    for index in indices:

        # from indices, get experiment id and gene symbol from df
        exp_id = df["Experiment"][index]

        # set the dirname as the gene symbol
        dirname = df["Gene Symbol"][index]

        plane = df["Plane"][index]
        section_data_set_id = exp_id
        section_image_directory = os.path.join(path, dirname)

        # get the image ids for all of the images in this data set
        section_images = image_api.section_image_query(
            section_data_set_id
        )  # Should be a dicionary of the features of section images
        section_image_ids = [
            si["id"] for si in section_images
        ]  # Take value of 'id' from the dictionary

        # Create a progress bar
        pbar_image = tqdm(total=len(section_image_ids), desc=dirname + " " + plane)

        for section_image_id in section_image_ids:

            file_name = str(section_image_id) + format_str
            file_path = os.path.join(section_image_directory, file_name)

            Manifest.safe_make_parent_dirs(file_path)

            # Check if the file is already downloaded, which happens if the downloads have been interrupted.
            saved_file_names = os.listdir(section_image_directory)
            if file_name in saved_file_names:
                pass
            else:
                image_api.download_section_image(
                    section_image_id, file_path=file_path, downsample=downsample
                )

            pbar_image.update()

        pbar_image.close()

    # re-enable the logger
    logging.getLogger("allensdk.api.api.retrieve_file_over_http").disabled = False
    print(
        "Downloads completed.", file=sys.stderr, flush=True,
    )
예제 #3
0

image_api = ImageDownloadApi()
svg_api = SvgApi()


#Downloading all of the images from a section data set

section_data_set_id = 71724696
downsample = 4
expression = 1

section_image_directory = str(section_data_set_id) + '_section_images'
format_str = '.jpg'

section_images = image_api.section_image_query(section_data_set_id)
section_image_ids = [si['id'] for si in section_images]

print(len(section_image_ids))

# You have probably noticed that the AllenSDK has a logger which notifies you of file downloads. 
# Since we are downloading ~300 images, we don't want to see messages for each one.
# The following line will temporarily disable the download logger.(optional)
logging.getLogger('allensdk.api.api.retrieve_file_over_http').disabled = True 

for section_image_id in section_image_ids:
    
    file_name = str(section_image_id) + format_str
    file_path = os.path.join(section_image_directory, file_name)
    
    Manifest.safe_make_parent_dirs(file_path)
예제 #4
0
class ExperimentImagesDownloader(DirWatcher):
    def __init__(self, input_dir, process_dir, output_dir, structure_map_dir,
                 structs, connectivity_dir, _processor_number,
                 brightness_threshold, strains):
        super().__init__(input_dir, process_dir, output_dir,
                         f'experiment-images-downloader-{_processor_number}')
        self.brightness_threshold = brightness_threshold
        self.structs = ast.literal_eval(structs)
        self.segmentation_dir = structure_map_dir
        self.mcc = MouseConnectivityCache(
            manifest_file=f'{connectivity_dir}/mouse_connectivity_manifest.json'
        )
        struct_tree = self.mcc.get_structure_tree()
        structure_ids = [
            i for sublist in struct_tree.descendant_ids(self.structs)
            for i in sublist
        ]
        self.structure_ids = set(structure_ids)
        self.image_api = ImageDownloadApi()
        self.bbox_dilation_kernel = cv2.getStructuringElement(
            cv2.MORPH_ELLIPSE, (14, 14))
        exps = self.mcc.get_experiments(dataframe=True)
        items = []
        for s, gs in strains.items():
            strain_items = []
            for g in gs:
                strain_items += [
                    sorted(exps[(exps.strain == s)
                                & (exps.gender == g)].id.tolist())
                ]
            if strain_items:
                min_len = min([len(i) for i in strain_items])
                strain_items = [i[:min_len] for i in strain_items]
                items += [str(i) for j in zip(*strain_items) for i in j]
        self.initial_items = [i for i in items if i in self.initial_items] + [
            i for i in self.initial_items if i not in items
        ]

    def on_process_error(self, item, exception):
        retval = super().on_process_error(item, exception)
        self.logger.error(f"Error occurred during processing", exc_info=True)
        if any(
                map(lambda x: issubclass(type(exception), x), [
                    urllib.error.HTTPError, OSError, ValueError,
                    http.client.error
                ])):
            return False
        else:
            return retval

    def process_item(self, item, directory):
        experiment_id = int(item)
        retries = 0
        images = []
        while True:
            try:
                time.sleep(2**(retries // 2))
                images = self.image_api.section_image_query(experiment_id)
                break
            except simplejson.errors.JSONDecodeError as e:
                if retries > 10:
                    raise e
                else:
                    self.logger.info(f"Exception invoking image API, retrying")
                    retries += 1
                    continue

        images = {i['section_number']: i for i in images}
        segmentation = np.load(
            f'{self.segmentation_dir}/{item}/{item}-sections.npz')['arr_0']
        mask = np.isin(segmentation, list(self.structure_ids))
        locs = np.where(mask)
        sections = [
            s for s in sorted(np.unique(locs[2]).tolist()) if s in images
        ]
        bboxes = {
            section: self.extract_bounding_boxes(mask[:, :, section])
            for section in sections
        }
        valid_sections = list(filter(lambda s: bboxes[s], sections))
        self.logger.info(
            f"Experiment {experiment_id}, evaluating brightness...")
        brightness = self.calculate_brightness(directory, experiment_id,
                                               images, valid_sections, mask)

        if brightness < self.brightness_threshold:
            self.logger.info(
                f"Experiment {experiment_id}: brightness less than required minimum, removing..."
            )
            return False

        with open(f'{directory}/bboxes.pickle', 'wb') as f:
            pickle.dump(bboxes, f)

        for section in valid_sections:
            self.process_section(directory, experiment_id, images, section,
                                 bboxes[section])

    def calculate_brightness(self, directory, experiment_id, images,
                             valid_sections, mask):
        pixels = np.zeros_like(mask, dtype=np.uint8)
        for section in valid_sections:
            self.download_snapshot(experiment_id, section, images[section],
                                   directory)
            filename = f'{directory}/thumbnail-{experiment_id}-{section}.jpg'
            image = cv2.imread(filename, cv2.IMREAD_GRAYSCALE)
            pixels[:, :, section] = image[:mask.shape[0], :mask.shape[1]]

        return np.median(pixels[mask != 0])

    def process_section(self, directory, experiment_id, images, section,
                        bboxes):
        self.logger.info(
            f"Experiment {experiment_id}, downloading section {section}...")
        self.download_snapshot(experiment_id, section, images[section],
                               directory)
        for bbox in bboxes:
            self.download_fullres(experiment_id, section, bbox,
                                  images[section], directory)

    def extract_bounding_boxes(self, mask, area_threshold=0):
        bboxes = self.get_bounding_boxes(mask)
        bbmask = np.zeros_like(mask, dtype=np.uint8)
        for bbox in bboxes:
            cv2.rectangle(bbmask, *bbox.corners(), color=1, thickness=-1)
        bbmask = cv2.dilate(bbmask, self.bbox_dilation_kernel)
        bboxes = [
            bbox for bbox in self.get_bounding_boxes(bbmask)
            if bbox.area() > area_threshold
        ]
        return bboxes

    @staticmethod
    def get_bounding_boxes(mask):
        contours, hierarchy = cv2.findContours(mask.astype(np.uint8),
                                               cv2.RETR_EXTERNAL,
                                               cv2.CHAIN_APPROX_SIMPLE)[-2:]
        rects = []
        for cnt in contours:
            x, y, w, h = cv2.boundingRect(cnt)
            if w > 5 and h > 5:
                rects.append(Rect(x=x, y=y, w=w, h=h))
        return rects

    def download_fullres(self, experiment_id, section, bbox, image_desc,
                         directory):
        url = f'https://connectivity.brain-map.org/cgi-bin/imageservice?path={image_desc["path"]}&' \
              f'mime=1&zoom={8}&&filter=range&filterVals=0,534,0,1006,0,4095'
        x, y, w, h = bbox.scale(64)
        url += f'&top={y}&left={x}&width={w}&height={h}'
        filename = f'{directory}/full-{experiment_id}-{section}-{x}_{y}_{w}_{h}.jpg'
        for retries in range(3):
            fname, _, downloaded = self.retrieve_url(filename, url)
            if downloaded:
                try:
                    image = Image.open(fname)
                    image.load()
                    break
                except OSError or FileNotFoundError as e:
                    os.remove(fname)
                    if retries == 2:
                        raise e
                    else:
                        self.logger.info(
                            f"Corrupted file {fname}, re-downloading {filename}"
                        )
            else:
                self.logger.info(
                    f"Cached version of {filename} used, skipping verification"
                )

        return filename

    def download_snapshot(self, experiment_id, section, image_desc, directory):
        url = f'https://connectivity.brain-map.org/cgi-bin/imageservice?path={image_desc["path"]}&' \
              f'mime=1&zoom={2}&&filter=range&filterVals=0,534,0,1006,0,4095'
        filename = f'{directory}/thumbnail-{experiment_id}-{section}.jpg'
        filename, _, _ = self.retrieve_url(filename, url)
        return filename

    def download_brightness_snapshot(self, experiment_id, section, image_desc,
                                     directory):
        url = f'https://connectivity.brain-map.org/cgi-bin/imageservice?path={image_desc["path"]}&' \
              f'mime=1&zoom={2}&&filter=range&filterVals=0,534,0,1006,0,4095'
        filename = f'{directory}/thumbnail-{experiment_id}-{section}.jpg'
        filename, _, _ = self.retrieve_url(filename, url)
        return filename

    def retrieve_url(self, filename, url, retries=10):
        if os.path.isfile(filename):
            self.logger.info(f"File {filename} already downloaded")
            return filename, None, False

        backoff = 0
        urllib.request.urlcleanup()
        while True:
            try:
                time.sleep(2**backoff)
                fname, msg = urllib.request.urlretrieve(
                    url, filename=f'{filename}.partial')
                os.replace(fname, filename)
                return filename, msg, True
            except http.client.HTTPException or OSError or urllib.error.HTTPError as e:
                backoff += 1
                retries -= 1
                if retries > 0:
                    self.logger.info(
                        f"Transient error downloading {url}, "
                        f"retrying ({retries} retries left) ...",
                        exc_info=True)
                    continue
                else:
                    self.logger.exception(
                        f"Retry count exceeded or permanent error for {url} ({filename}), exiting..."
                    )
                    raise e
예제 #5
0
    entry['regions'][0]['shape_attributes'],
    'filename':
    entry['filename']
} for entry in entries]

experiments = defaultdict(list)

for info in scan_info:
    experiments[info['experiment_id']] += [info]

image_api = ImageDownloadApi()

for experiment_id, infos in experiments.items():
    images = {
        s['section_number']: s
        for s in image_api.section_image_query(experiment_id)
    }
    for info in infos:
        img = images[info["slice_id"]]
        y = int(info["shape"]["y"]) // 4
        x = int(info["shape"]["x"]) // 4
        width = int(info["shape"]["width"]) // 4
        height = int(info["shape"]["height"]) // 4
        zoom_factor = 8
        print(f'\tProcessing slice {info["slice_id"]}')
        url = f'http://connectivity.brain-map.org/cgi-bin/imageservice?path={img["path"]}&zoom={zoom_factor}&'\
              f'top={y*2**8}&left={x*2**8}&width={width*2**zoom_factor}&'\
              f'height={height*2**zoom_factor}&filter=range&filterVals=0,1051,0,0,0,0'
        urllib.request.urlretrieve(
            url, f'{output_dir}/{experiment_id}-{img["section_number"]}.jpg')
        image = cv2.imread(