def ISS(use_test_data: bool = False) -> Experiment: """ Loads an experiment containing a single field of view of In-Situ Sequencing data created by imaging human breast cancer tissue. The complete dataset is published: `<https://doi.org/10.1038/nmeth.2563>`_ The data consist of 16 images from 4 channels, 4 rounds, and a single z-plane. Each image is (1044, 1390) in (y, x) Parameters ---------- use_test_data : bool If True, returns a cropped Experiment where images are (140, 200) in (y, x) Returns ------- Experiment """ if use_test_data: return Experiment.from_json( "https://d2nhj9g34unfro.cloudfront.net/20181005/ISS-TEST/experiment.json" ) return Experiment.from_json( "https://d2nhj9g34unfro.cloudfront.net/browse/formatted/iss/20190506/experiment.json" )
def ISS(use_test_data: bool = False): if use_test_data: return Experiment.from_json( 'https://dmf0bdeheu4zf.cloudfront.net/20180919/ISS-TEST/experiment.json' ) return Experiment.from_json( 'https://dmf0bdeheu4zf.cloudfront.net/20180919/ISS/experiment.json')
def osmFISH(use_test_data: bool = False) -> Experiment: """ Loads an experiment containing 3 fields of view of osmFISH data generated by imaging mouse sematosensory cortex. The complete dataset is published: `<https://doi.org/10.1038/s41592-018-0175-z>`_ The data consist of 1755 images per field of view, taken over 13 rounds, 3 channels, and 45 z-planes. Each image is (2048, 2048) in (y, x) Parameters ---------- use_test_data : bool If True, return a single round from one field of view, suitable for testing. The images are still (2048, 2048) in (y, x) Returns ------- Experiment """ if use_test_data: return Experiment.from_json( "https://d2nhj9g34unfro.cloudfront.net/browse/formatted/20181217/osmFISH/" "experiment.json") return Experiment.from_json( "https://d26bikfyahveg8.cloudfront.net/osmFISH/formatted/20190626/experiment.json" )
def ISS(use_test_data: bool = False): if use_test_data: return Experiment.from_json( "https://d2nhj9g34unfro.cloudfront.net/20181005/ISS-TEST/experiment.json" ) return Experiment.from_json( "https://d2nhj9g34unfro.cloudfront.net/20181005/ISS/experiment.json")
def osmFISH(use_test_data: bool=False): """Return osmFISH data from Codeluppi et al. 2018 This function returns a single round of a single field of view from the 16 field of view study if use_test_data is True, or three fields of view containing all rounds of data if use_test_data is False. Parameters ---------- use_test_data : bool If True, return one round from one field of view, suitable for testing (default False) Notes ----- - osmFISH fields of view are quite large (14, 2, 45, 2048, 2048) which takes up approximately 21 gb in memory. Use the non-test data with care. See Also -------- Codeluppi et al. 2018: https://www.nature.com/articles/s41592-018-0175-z """ if use_test_data: return Experiment.from_json( 'https://d2nhj9g34unfro.cloudfront.net/20181005/osmFISH/experiment.json') return Experiment.from_json( "https://d2nhj9g34unfro.cloudfront.net/20181031/osmFISH/experiment.json")
def MERFISH(use_test_data: bool = False) -> Experiment: """ Loads an experiment with a single field of view of MERFISH data derived from cultured U2-OS cells, published in the following manuscript: `<https://doi.org/10.1073/pnas.1612826113>`_ The data consist of 16 images from 8 rounds, 2 channels, and a single z-plane. Each image is (2048, 2048) (y, x) Parameters ---------- use_test_data: bool If True, returns a cropped Experiment where tiles are (205, 405) (y, x) Returns ------- Experiment """ if use_test_data: return Experiment.from_json( "https://d2nhj9g34unfro.cloudfront.net/20181005/MERFISH-TEST/experiment.json" ) return Experiment.from_json( "https://d2nhj9g34unfro.cloudfront.net/browse/formatted/" "MERFISH/20190511/experiment.json")
def DARTFISH(use_test_data: bool = False) -> Experiment: """ Loads an experiment with a single field of view from unpublished data generated with DARTFISH v1 2017, produced by imaging human occipital cortex. These data were donated by the Zhuang lab as part of the SpaceTx consortium project. The data consist of 18 images from 3 channels, 6 rounds, and 1 z-plane. Each image is (998, 998) in (y, x). Parameters ---------- use_test_data : bool If True, returns a cropped Experiment where tiles are (170, 290) in (y, x) Returns ------- Experiment """ if use_test_data: return Experiment.from_json( "https://d2nhj9g34unfro.cloudfront.net/20181005/DARTFISH-TEST/experiment.json" ) return Experiment.from_json( "https://d2nhj9g34unfro.cloudfront.net/20181005/DARTFISH/experiment.json" )
def MERFISH(use_test_data: bool = False): if use_test_data: return Experiment.from_json( 'https://d2nhj9g34unfro.cloudfront.net/20181005/MERFISH-TEST/experiment.json' ) return Experiment.from_json( 'https://d2nhj9g34unfro.cloudfront.net/20181005/MERFISH/experiment.json' )
def MERFISH(use_test_data: bool = False): if use_test_data: return Experiment.from_json( 'https://dmf0bdeheu4zf.cloudfront.net/20180919/MERFISH-TEST/experiment.json' ) return Experiment.from_json( 'https://dmf0bdeheu4zf.cloudfront.net/20180924/MERFISH/experiment.json' )
def SeqFISH(use_test_data: bool=False): """Loads a SeqFISH field of view generated from cultured mES cells. Parameters ---------- use_test_data : bool If true, return a small region of testing data that was visually determined to contain two cells. Notes ----- SeqFISH fields of view are quite large (12, 5, 29, 2048, 2048) and take up approximately 5 gb in memory. Use the non-test data with care. See Also -------- Manuscript for Intron-SeqFISH: https://doi.org/10.1016/j.cell.2018.05.035 """ suffix = "-TEST" if use_test_data else "" url = ( f"https://d2nhj9g34unfro.cloudfront.net/browse/formatted/20181211/" f"seqfish{suffix}/experiment.json" ) return Experiment.from_json(url)
def saveExp(source_dir: str, save_dir: str, exp: Experiment = None): # go through and save all images, if an experiment is provided if exp: for fov in exp.keys(): for view in exp[fov].image_types: img = exp[fov].get_image(view) prefix = f"{view}-{fov}" saveImg(save_dir, prefix, img) # copy the non-tiff files to the new directory cp_files = [x for x in os.listdir(source_dir) if x[-5:] != ".tiff" and x[-4:] != ".log"] for file in cp_files: if "fov" in file: # if file contains images, we need to update sha's data = json.load(open(str(source_dir) + "/" + file)) for i in range(len(data["tiles"])): abspath = str(save_dir) + "/" + data["tiles"][i]["file"] with open(os.fspath(abspath), "rb") as fh: hsh = hashlib.sha256(fh.read()).hexdigest() data["tiles"][i]["sha256"] = hsh print(f"\tupdated hash for {data['tiles'][i]['file']}") with open(str(save_dir) + "/" + file, "w") as f: json.dump(data, f) print(f"saved {file} with modified hashes") else: # we can just copy the rest of the files shutil.copyfile(f"{source_dir}/{file}", f"{save_dir}/{file}") print(f"copied {file}")
def SeqFISH(use_test_data: bool = False) -> Experiment: """ Loads a single field of view from a SeqFISH dataset generated from cultured mES cells. These data are published: `<https://doi.org/10.1016/j.cell.2018.05.035>`_ This dataset contains 1140 images acquired across 12 channels, 4 imaging rounds, and 29 z-planes. Each image is (2048, 2048) in (y, x) Parameters ---------- use_test_data : bool If true, return a cropped Experiment where images are (280, 280) in (y, x) Returns ------- Experiment """ suffix = "-TEST" if use_test_data else "" url = ( f"https://d2nhj9g34unfro.cloudfront.net/browse/formatted/20181211/" f"seqfish{suffix}/experiment.json" ) return Experiment.from_json(url)
def process_experiment(experiment: starfish.Experiment): decoded_intensities = {} regions = {} for name_, fov in experiment.items(): decoded, segmentation_results = iss_pipeline(fov, experiment.codebook) decoded_intensities[name_] = decoded regions[name_] = segmentation_results return decoded_intensities, regions
def process_experiment(experiment: starfish.Experiment): decoded_intensities = {} regions = {} for i, (name_, fov) in enumerate(experiment.items()): decoded, segmentation_results = iss_pipeline(fov, experiment.codebook) decoded_intensities[name_] = decoded regions[name_] = segmentation_results if test and i == 1: # only run through 2 fovs for the test break return decoded_intensities, regions
def MOUSE_V_HUMAN(): """ Corresponds to sequencing 4 bases of the beta-actin gene in co-cultured mouse and human fibroblasts. These 4 bases only differ in one position, a SNP between species. as such, these data offer ground truth information to validate a cell typing exercise `<https://www.ncbi.nlm.nih.gov/pubmed/23852452>`_ Returns ------- Experiment """ return Experiment.from_json( "https://d2z4zivcmlmaj1.cloudfront.net/ISS/mouse_vs_human/experiment.json")
def allen_smFISH(use_test_data: bool = False) -> Experiment: """ Loads an experiment with a single round from a single field of view of sequential smFISH data taken from mouse primary visual cortex. These data are unpublished, and were kindly contributed by the Allen Institute for Brain Science as a part of the SpaceTx consortium project. The data consist of 102 images from 1 round, 3 channels, and 33 z-planes. Each image is (2048, 2048) (y, x). There are no test data. Parameters ---------- use_test_data : bool Not used. Returns ------- Experiment """ if use_test_data: return Experiment.from_json( "https://d2nhj9g34unfro.cloudfront.net/20181005/allen_smFISH/experiment.json") return Experiment.from_json( "https://d26bikfyahveg8.cloudfront.net/smFISH/mouse/formatted_with_DAPI/experiment.json")
def reduceImages_for_Ilastik(dataset, dict_of_datasets, export_path) -> None: save_path, exp_name, _ = dict_of_datasets[dataset] exp = Experiment.from_json(save_path + 'experiment.json') exp_name_safe = exp_name.replace('/', '-') # get all fovs from the experiment fovs = [k for k in exp.keys()] # To import images into Ilastik, we want to take the max projection of z-Planes across all color channels, # since the stroma tissue flouresces brightly and we want to paint these regions in Ilastik. for fov in fovs: img_stack = next(exp[fov].get_images(FieldOfView.PRIMARY_IMAGES)) img_stack_sel = img_stack.sel({Axes.CH: 0}) img_stack_reduced = img_stack_sel.reduce({Axes.ZPLANE}, func='max') save_stack(img_stack_reduced, export_path + exp_name_safe + fov + '.tif')
def BaristaSeq(use_test_data: bool = False) -> Experiment: """Loads a BaristaSeq dataset generated from mouse visual cortex. The extracted field of view comes from an internal layer of V1 (range: 2-5) Parameters ---------- use_test_data : bool This parameter is not used for this data type, as there is no data of testing size. Returns ------- Experiment Experiment containing raw image data """ return Experiment.from_json( "https://d2nhj9g34unfro.cloudfront.net/browse/formatted/20181028/" "BaristaSeq/cropped_formatted/experiment.json")
def ImagingMassCytometry(use_test_data: bool = False) -> Experiment: """ Loads an Imaging Mass Cytometry dataset donated to starfish by Denis Shapiro. This dataset consists of 52 fields of view. Each field of view consists of 19 images acquired from 19 channels, 1 round, and 1 z-plane. Individual images have variable sizes, ranging from 400-500 pixels for each of (y, x). Parameters ---------- use_test_data: Not used. Returns ------- Experiment """ return Experiment.from_json( "https://d2nhj9g34unfro.cloudfront.net/browse/formatted/20181023/" "imaging_cytof/BodenmillerBreastCancerSamples/experiment.json")
def BaristaSeq(use_test_data: bool = False) -> Experiment: """Loads a BaristaSeq dataset generated from mouse visual cortex. The extracted field of view comes from an internal layer of V1 (range: 2-5). These data are published here: `<https://doi.org/10.1093/nar/gkx1206>`_ The data consist of 204 images acquired from 3 rounds, 4 channels, and 17 z-planes. Each image is (1000, 800) in (y, x) Parameters ---------- use_test_data : bool Not used. Returns ------- Experiment """ return Experiment.from_json( "https://d2nhj9g34unfro.cloudfront.net/browse/formatted/20181028/" "BaristaSeq/cropped_formatted/experiment.json")
def STARmap(use_test_data: bool = False) -> Experiment: """ Loads a STARmap field of view generated from mouse primary visual cortex. These data are published: `<https://doi.org/10.1126/science.aat5691>`_ This dataset contains 696 images acquired across 6 rounds, 4 channels, and 29 z-planes. Each image is (1024, 1024) in (y, x) Parameters ---------- use_test_data : bool Not used. Returns ------- Experiment """ url = ("https://d2nhj9g34unfro.cloudfront.net/browse/formatted/20190309/" "starmap/experiment.json") return Experiment.from_json(url)
def STARmap(use_test_data: bool = False): """Loads a STARmap field of view generated from mouse primary visual cortext. Parameters ---------- use_test_data : bool If true, return a small region of testing data that was visually determined to contain two cells. Notes ----- starfish received stitched STARmap images. To make it compatible with starfish, we extracted a single field of view with shape (r=6, c=4, z=29, y=1024, x=1024). See Also -------- Manuscript for STARmap: https://doi.org/10.1126/science.aat5691 """ url = ("https://d2nhj9g34unfro.cloudfront.net/browse/formatted/20190309/" "starmap/experiment.json") return Experiment.from_json(url)
def run( output_dir: str, experiment: Experiment, blob_based: bool, use_ref: bool, blobRunnerKwargs: dict, decodeRunnerKwargs: dict, pixelRunnerKwargs: dict, ): """ Main method for executing runs. Sets up directories and calls appropriate driver methods. Parameters ---------- output_dir: str Location to put all output from this tool. Dir will be created if not present. experiment: Experiment Experiment object with corresponding images and codebook. blob_based: bool If true, use blob-detection and decoding methods. Else, use pixel-based methods. use_ref: bool If true, a reference image will be used and created by flattening the fov. blobRunnerKwargs: dict Dictionary with arguments for blob detection. Refer to blobRunner. decodeRunnerKwargs: dict Dictionary with arguments for spot-based decoding. Refer to decodeRunner. pixelRunnerKwargs: dict Dictionary with arguments for pixel-based detection and decoding. Refer to starfish PixelSpotDecoder. """ if not path.isdir(output_dir): makedirs(output_dir) if not path.isdir(output_dir + "csv/"): makedirs(output_dir + "csv") if not path.isdir(output_dir + "cdf/"): makedirs(output_dir + "cdf") if blob_based and not path.isdir(output_dir + "spots/"): makedirs(output_dir + "spots") reporter = open( path.join(output_dir, datetime.now().strftime("%Y%m%d_%H%M_starfish_runner.log")), "w") sys.stdout = reporter sys.stderr = reporter print( "output_dir: {}\nexp: {}\nblob_based: {}\nuse_ref: {}\nblobrunner: {}\ndecoderunner: {}\npixelrunner: {}\n" .format( output_dir, experiment, blob_based, use_ref, blobRunnerKwargs, decodeRunnerKwargs, pixelRunnerKwargs, )) # disabling tdqm for pipeline runs tqdm.__init__ = partialmethod(tqdm.__init__, disable=True) imgs = {} for fov in experiment.keys(): imgs[fov] = experiment[fov].get_image("primary") ref_imgs = None if use_ref: ref_imgs = {} for fov in experiment.keys(): ref_imgs[fov] = imgs[fov].reduce( {Axes.CH, Axes.ROUND, Axes.ZPLANE}, func="max") decoded = {} if blob_based: blobs, decoded = blobDriver(imgs, ref_imgs, experiment.codebook, blobRunnerKwargs, decodeRunnerKwargs, output_dir) else: decoded = pixelDriver(imgs, experiment.codebook, pixelRunnerKwargs) # saving for fov in decoded.keys(): saveTable(decoded[fov], output_dir + "csv/" + fov + "_decoded.csv") # decoded[fov].to_decoded_dataframe().save_csv(output_dir+fov+"_decoded.csv") decoded[fov].to_netcdf(output_dir + "cdf/" + fov + "_decoded.cdf") sys.stdout = sys.__stdout__ return 0
def from_spacetx_format(store: str, overwrite: Optional[bool] = True ) -> zarr.hierarchy.Group: """Create a Zarr version of SpaceTx-Format from BaristaSeq data to test this library Parameters ========== store : str folder to back the created zarr archive overwrite: Optional[Bool] if True, overwrite any zarr array previously stored in the `store` directory. (Default True) Returns ======= zarr.hierarchy.Group : the root of the zarr archive created from the passed experiment """ exp = Experiment.from_json( "https://d2nhj9g34unfro.cloudfront.net/browse/formatted/20181028/" "BaristaSeq/cropped_formatted/experiment.json") def add_coordinates(array: zarr.hierarchy.Group, start: int = 0): """Add coordinates to an FOV""" coordinates = defaultdict( lambda: defaultdict(lambda: defaultdict(dict))) ordered_numeric_tile_indices = product(range(array.shape[0]), range(array.shape[1]), range(array.shape[2])) for r, c, z in ordered_numeric_tile_indices: coordinates[r][c][z] = { 'zc': (0.0 + start, 0.01 + start), 'yc': (0.0 + start, 10.0 + start), 'xc': (0.0 + start, 10.0 + start) } array.attrs['tile_coordinates'] = coordinates return array def create_fov(root: zarr.hierarchy.Group, fov_name: str, overwrite: bool, start: int) -> None: fov = root.create_group(name=fov_name, overwrite=overwrite) for stack_type in si_fov.image_types: data = si_fov[stack_type] arr = fov.array(name=stack_type, data=data.xarray.values, chunks=(1, 1, 1, *data.raw_shape[-2:]), overwrite=overwrite) add_coordinates(arr, start=start) # get the first FOV from the SpaceTx BaristaSeq dataset for fov_name, si_fov in exp.items(): break # create the root of the zarr archive root: zarr.hierarchy.Group = zarr.group(store=store, overwrite=overwrite) # make two FOVs from the test datasets. They will have the same data create_fov("fov_000", root, overwrite=True, start=0) create_fov("fov_001", root, overwrite=True, start=10) return root
log = img_stack.log) mask = BinaryMaskCollection.from_label_image(label_im) return mask, label_image list_of_datasets = pickle.load(open('list_of_experiments.obj', 'rb')) dict_of_datasets = pickle.load(open('dict_of_experiments.obj', 'rb')) CODEBOOK = pickle.load(open('codebook.obj', 'rb')) # could iterate this next line of all datasets dataset = list_of_datasets[0] save_path, exp_name, assayNo = dict_of_datasets[dataset] codebook = Codebook.from_code_array(CODEBOOK[int(assayNo)]) exp = Experiment.from_json(save_path + 'experiment.json') """ Pipeline procedure: - functions should operate on datasets individually - Order of operations: 1. Reduce images for export to Ilastik 2. Filter images and find spots 3. Import classifications from Ilastik tif files and generate masks 4. Assign genes to cells 5. Calculate area of non-stroma tissue, add up each type of gene, normalize gene density. """ gene_counts_across_fovs = [] for fov in exp.fovs():
import posixpath import requests from starfish import Experiment, FieldOfView from starfish.util.argparse import FsExistsType if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("experiment_url") parser.add_argument("output_dir", type=FsExistsType()) args = parser.parse_args() # save image stacks locally exp = Experiment.from_json(posixpath.join(args.experiment_url, "experiment.json")) for fov in exp.fovs(): fov_dir = pathlib.Path(args.output_dir, fov.name) fov_dir.mkdir() fov[FieldOfView.PRIMARY_IMAGES].export(str(fov_dir / "primary_images.json")) for image_type in fov.image_types: if image_type == FieldOfView.PRIMARY_IMAGES: continue fov[image_type].export(str(fov_dir / f"{image_type}.json")) # get codebook from url and save locally to tmp dir codebook = requests.get(posixpath.join(args.experiment_url, "codebook.json")) data = codebook.json() with open(pathlib.Path(args.output_dir, 'codebook.json'), 'w') as f: json.dump(data, f)
################################################################################################### # Construct an experiment out of the raw files. # --------------------------------------------- outputdir = tempfile.TemporaryDirectory() from slicedimage import ImageFormat from starfish.core.experiment.builder.structured_formatter import format_structured_dataset format_structured_dataset( inputdir.name, coordinates_path, outputdir.name, ImageFormat.TIFF, ) ################################################################################################### # List the output directory # ------------------------- for file in sorted(os.listdir(outputdir.name)): print(file) ################################################################################################### # Load up the experiment # ---------------------- from starfish import Experiment exp = Experiment.from_json(os.path.join(outputdir.name, "experiment.json")) print(exp.fovs()) print(repr(exp.fov().get_image('primary')))
def ImagingMassCytometry(use_test_data: bool=False): return Experiment.from_json( "https://d2nhj9g34unfro.cloudfront.net/browse/formatted/20181023/" "imaging_cytof/BodenmillerBreastCancerSamples/experiment.json" )
with open(os.path.join(primary_out, "experiment.json"), "r+") as fh: contents = fh.readlines() print("original experiment.json\n") print("".join(contents)) contents[3] = ",".join([contents[3].strip("\n"),"\n"]) contents.insert(4, '\t"nuclei": "../nuclei/nuclei.json"\n') # new_string should end in a newline fh.seek(0) # readlines consumes the iterator, so we need to start over fh.writelines(contents) # No need to truncate as we are increasing filesize fh.seek(0) print("\nmodified experiment.json\n") print(fh.read()) ################################################################################################### # Don't forget to replace the fake codebook.json # ---------------------------------------------- # There are no starfish tools for creating a codebook. You can write the JSON manually or write a # script to do it for you. Be sure the format matches the examples in # :ref:`SpaceTx Format<sptx_codebook_format>`. # this is the placeholder codebook.json with open(os.path.join(primary_out, "codebook.json"), "r") as fh: print(fh.read()) ################################################################################################### # Load up the experiment # ---------------------- from starfish import Experiment exp = Experiment.from_json(os.path.join(primary_out, "experiment.json")) print(exp.fovs())
def allen_smFISH(use_test_data: bool=False): return Experiment.from_json( 'https://d2nhj9g34unfro.cloudfront.net/20181005/allen_smFISH/experiment.json')