Beispiel #1
def test_code_length_properly_counts_bit_length_of_codes():
    use the codebook factory from another testing module that produces codes with 3 channels and
    two rounds. This produces a code which should have length 6.

    Then, pass n_round=10 to make the total length 3 * 10 = 30 and verify that the code length is
    returned as 30.
    codebook_data = codebook_array_factory()
    codebook = Codebook.from_code_array(codebook_data)
    assert codebook.code_length == 6

    codebook_long = Codebook.from_code_array(codebook_data, n_round=10)
    assert codebook_long.code_length == 30
def test_from_code_array_throws_exceptions_when_data_does_not_match_channel_or_round_requests(
    The codebook factory produces codes with 3 channels and 2 rounds. This test provides numbers
    larger than that, and the codebook should be expanded to those numbers as a result.
    code_array: List = codebook_array_factory()

    # should throw an exception, as 3 channels are present in the data
    with pytest.raises(ValueError):
        Codebook.from_code_array(code_array, n_ch=2, n_round=4)

    # should throw an exception, as 2 rounds are present in the data
    with pytest.raises(ValueError):
        Codebook.from_code_array(code_array, n_ch=3, n_round=1)
Beispiel #3
def test_unmatched_intensities_and_codebook_table_sizes_throws_value_error():
    Codebook and Intensity channel and round number must match. Here we use a codebook with 3
    channels, but an IntensityTable with only 2 to verify an error is thrown.

    # this codebook has 3 channels
    codebook_array = [
            Features.CODEWORD: [{
                Axes.ROUND.value: 0,
                Axes.CH.value: 2,
                Features.CODE_VALUE: 1
            }, {
                Axes.ROUND.value: 1,
                Axes.CH.value: 0,
                Features.CODE_VALUE: 1
    codebook = Codebook.from_code_array(codebook_array)
    intensities = intensity_table_factory()
    with pytest.raises(ValueError):
def codebook_factory() -> Codebook:
    Codebook with two codewords describing an experiment with three channels and two imaging rounds.
    Both codes have two "on" channels.
    codebook_array = [
            Features.CODEWORD: [{
                Axes.ROUND.value: 0,
                Axes.CH.value: 0,
                Features.CODE_VALUE: 1
            }, {
                Axes.ROUND.value: 1,
                Axes.CH.value: 1,
                Features.CODE_VALUE: 1
            Features.CODEWORD: [{
                Axes.ROUND.value: 0,
                Axes.CH.value: 2,
                Features.CODE_VALUE: 1
            }, {
                Axes.ROUND.value: 1,
                Axes.CH.value: 1,
                Features.CODE_VALUE: 1
    return Codebook.from_code_array(codebook_array)
Beispiel #5
def test_from_code_array_has_three_channels_two_rounds_and_two_codes():
    Tests that from_code_array loads a small codebook that has the correct size and values
    code_array: List = codebook_array_factory()
    codebook: Codebook = Codebook.from_code_array(code_array)
Beispiel #6
def parse_codebook(codebook_csv: str) -> Codebook:
    """Parses a codebook csv file provided by SeqFISH developers.

    codebook_csv : str
        The codebook file is expected to contain a matrix whose rows are barcodes and whose columns
        are imaging rounds. Column IDs are expected to be sequential, and round identifiers (roman
        numerals) are replaced by integer IDs.

    Codebook :
        Codebook object in SpaceTx format.
    csv: pd.DataFrame = pd.read_csv(codebook_csv, index_col=0)
    integer_round_ids = range(csv.shape[1])
    csv.columns = integer_round_ids

    mappings = []

    for gene, channel_series in csv.iterrows():
            Features.CODEWORD: [{
                Axes.ROUND.value: r, Axes.CH.value: c - 1, Features.CODE_VALUE: 1
            } for r, c in channel_series.items()],
            Features.TARGET: gene

    return Codebook.from_code_array(mappings)
Beispiel #7
    def generate_codebook(self, output_dir: str) -> None:
        """Generate and save a codebook from the provided mapping of genes to DNA sequences.

        StarMAP codebooks are encoded with the 2-base encoding used for solid sequencing. In this
        scheme, multiple pairs of bases map into the same fluorescence channels. This function
        exposes this mapping.

        output_dir : str
            directory in which to save the generated codebook. Codebook is saved as "codebook.json"

        dinucleotides_to_channels = {
            "AT": 4,
            "CT": 3,
            "GT": 2,
            "TT": 1,
            "AG": 3,
            "CG": 4,
            "GG": 1,
            "TG": 2,
            "AC": 2,
            "CC": 1,
            "GC": 4,
            "TC": 3,
            "AA": 1,
            "CA": 2,
            "GA": 3,
            "TA": 4,

        with open(os.path.join(self.input_dir, "genes.csv"), "r") as f:
            codes = [l.strip().split(",") for l in f.readlines()]  # List[(gene, dna_barcode), ...]

        def iter_dinucleotides(sequence):
            i = 0
            while i + 1 < len(sequence):
                yield sequence[i:i + 2]
                i += 1

        # construct codebook target mappings
        code_array = []
        for gene, dna_barcode in codes:
            dna_barcode = dna_barcode[::-1]  # reverse barcode
            spacetx_barcode = [
                    Axes.ROUND.value: r,
                    Axes.CH.value: dinucleotides_to_channels[dinucleotide],
                    Features.CODE_VALUE: 1
                } for r, dinucleotide in enumerate(iter_dinucleotides(dna_barcode))
                Features.CODEWORD: spacetx_barcode,
                Features.TARGET: gene

        codebook = Codebook.from_code_array(code_array)
        codebook.to_json(os.path.join(output_dir, "codebook.json"))
Beispiel #8
def two_spot_one_hot_coded_data_factory(
) -> Tuple[Codebook, ImageStack, float]:
    Produce a 2-channel 2-round Codebook with two codes and an ImageStack containing one spot from
    each code. The spots do not overlap and the data are noiseless.

    The encoding of this data is similar to that used in In-situ Sequencing, FISSEQ,
    BaristaSeq, STARMAP, MExFISH, or SeqFISH.

    Codebook :
        codebook containing codes that match the data
    ImageStack :
        noiseless ImageStack containing one spot per code in codebook
    float :
        the maximum intensity found in the created ImageStack


    codebook_data = [
            Features.CODEWORD: [{
                Axes.ROUND.value: 0,
                Axes.CH.value: 0,
                Features.CODE_VALUE: 1
            }, {
                Axes.ROUND.value: 1,
                Axes.CH.value: 1,
                Features.CODE_VALUE: 1
            Features.CODEWORD: [{
                Axes.ROUND.value: 0,
                Axes.CH.value: 1,
                Features.CODE_VALUE: 1
            }, {
                Axes.ROUND.value: 1,
                Axes.CH.value: 0,
                Features.CODE_VALUE: 1
    codebook = Codebook.from_code_array(codebook_data)

    imagestack = create_imagestack_from_codebook(
        pixel_dimensions=(10, 100, 100),
        spot_coordinates=((4, 10, 90), (5, 90, 10)),

    max_intensity = np.max(imagestack.xarray.values)

    return codebook, imagestack, max_intensity
Beispiel #9
def two_spot_sparse_coded_data_factory() -> Tuple[Codebook, ImageStack, float]:
    Produce a 3-channel 3-round Codebook with two codes and an ImageStack containing one spot from
    each code. The spots do not overlap and the data are noiseless.

    These spots display sparsity in both rounds and channels, similar to the sparse encoding of

    ImageStack :
        noiseless ImageStack containing two spots


    codebook_data = [
            Features.CODEWORD: [{
                Axes.ROUND.value: 0,
                Axes.CH.value: 0,
                Features.CODE_VALUE: 1
            }, {
                Axes.ROUND.value: 2,
                Axes.CH.value: 1,
                Features.CODE_VALUE: 1
            Features.CODEWORD: [{
                Axes.ROUND.value: 0,
                Axes.CH.value: 1,
                Features.CODE_VALUE: 1
            }, {
                Axes.ROUND.value: 1,
                Axes.CH.value: 2,
                Features.CODE_VALUE: 1
    codebook = Codebook.from_code_array(codebook_data)

    imagestack = create_imagestack_from_codebook(
        pixel_dimensions=(10, 100, 100),
        spot_coordinates=((4, 10, 90), (5, 90, 10)),

    max_intensity = np.max(imagestack.xarray.values)

    return codebook, imagestack, max_intensity
def test_from_code_array_expands_codebook_when_provided_n_codes_that_exceeds_array_value(
    The codebook factory produces codes with 3 channels and 2 rounds. This test provides numbers
    larger than that, and the codebook should be expanded to those numbers as a result.
    code_array: List = codebook_array_factory()
    codebook: Codebook = Codebook.from_code_array(code_array,
    assert codebook.sizes[Indices.CH] == 10
    assert codebook.sizes[Indices.ROUND] == 4
    assert codebook.sizes[Features.TARGET] == 2
Beispiel #11
def two_spot_informative_blank_coded_data_factory() -> Tuple[Codebook, ImageStack, float]:
    Produce a 4-channel 2-round Codebook with two codes and an ImageStack containing one spot from
    each code. The spots do not overlap and the data are noiseless.

    The encoding of this data is essentially a one-hot encoding, but where one of the channels is a
    intentionally and meaningfully "blank".

    Codebook :
        codebook containing codes that match the data
    ImageStack :
        noiseless ImageStack containing one spot per code in codebook
    float :
        the maximum intensity found in the created ImageStack


    codebook_data = [
            Features.CODEWORD: [
                {Axes.ROUND.value: 0, Axes.CH.value: 0, Features.CODE_VALUE: 1},
                {Axes.ROUND.value: 1, Axes.CH.value: 1, Features.CODE_VALUE: 1},
                # round 3 is blank and channel 3 is not used
            Features.TARGET: "GENE_A"
            Features.CODEWORD: [
                # round 0 is blank and channel 0 is not used
                {Axes.ROUND.value: 1, Axes.CH.value: 3, Features.CODE_VALUE: 1},
                {Axes.ROUND.value: 2, Axes.CH.value: 2, Features.CODE_VALUE: 1},
            Features.TARGET: "GENE_B"
    codebook = Codebook.from_code_array(codebook_data)

    imagestack = create_imagestack_from_codebook(
        pixel_dimensions=(10, 100, 100),
        spot_coordinates=((4, 10, 90), (5, 90, 10)),

    max_intensity = np.max(imagestack.xarray.values)

    return codebook, imagestack, max_intensity
def test_codebook_serialization():
    Test that codebook can be saved to disk and recovered, and that the recovered codebook is
    identical to the one that it was serialized from.
    # Create a codebook
    codebook_array = codebook_json_data_factory()
    codebook = Codebook.from_code_array(codebook_array)

    # Dump it to a temporary file
    with tempfile.TemporaryDirectory() as directory:
        json_codebook = os.path.join(directory, 'codebook.json')

        # Retrieve it and test that the data it contains has not changed
        codebook_reloaded = Codebook.from_json(json_codebook)
        assert codebook_reloaded.equals(codebook)
def test_from_code_array_has_three_channels_two_rounds_and_two_codes():
    Tests that from_code_array loads a small codebook that has the correct size and values
    code_array: List = codebook_array_factory()
    codebook: Codebook = Codebook.from_code_array(code_array)

    assert codebook.sizes[Indices.CH] == 3
    assert codebook.sizes[Indices.ROUND] == 2
    assert codebook.sizes[Features.TARGET] == 2

    # codebook should have 4 "on" combinations
    expected_values = np.zeros((2, 3, 2))
    expected_values[0, 0, 0] = 1
    expected_values[0, 1, 1] = 1
    expected_values[1, 2, 0] = 1
    expected_values[1, 1, 1] = 1

    assert np.array_equal(codebook.values, expected_values)
def test_from_code_array_throws_key_error_with_missing_channel_round_or_value(
    """Tests that from_code_array throws errors when it encounters malformed codes"""
    code_array: List = codebook_array_factory()

    # codebook is now missing a channel
    del code_array[0][Features.CODEWORD][0][Indices.ROUND.value]
    with pytest.raises(KeyError):

    code_array: List = codebook_array_factory()
    del code_array[0][Features.CODEWORD][0][Indices.CH.value]
    with pytest.raises(KeyError):

    code_array: List = codebook_array_factory()
    del code_array[0][Features.CODEWORD][0][Features.CODE_VALUE]
    with pytest.raises(KeyError):
Beispiel #15
def format_data(input_dir, output_dir, d):
    if not input_dir.endswith("/"):
        input_dir += "/"

    if not output_dir.endswith("/"):
        output_dir += "/"

    if d:
        url = ""
        download(input_dir, url)
        input_dir += "ExampleInSituSequencing/"
        print("Data downloaded to: {}".format(input_dir))
        input_dir += "ExampleInSituSequencing/"
        print("Using data in : {}".format(input_dir))

    def add_codebook(experiment_json_doc):
        experiment_json_doc['codebook'] = "codebook.json"

        return experiment_json_doc

    # the magic numbers here are just for the ISS example data set.
            Axes.ROUND: 4,
            Axes.CH: 4,
            Axes.ZPLANE: 1,
            'nuclei': {
                Axes.ROUND: 1,
                Axes.CH: 1,
                Axes.ZPLANE: 1,
            'dots': {
                Axes.ROUND: 1,
                Axes.CH: 1,
                Axes.ZPLANE: 1,
            'nuclei': ISSAuxTileFetcher(os.path.join(input_dir, "DO", "c1.TIF")),
            'dots': ISSAuxTileFetcher(os.path.join(input_dir, "DO", "c2.TIF")),

    codebook_array = [
            Features.CODEWORD: [
                {Axes.ROUND.value: 0, Axes.CH.value: 3, Features.CODE_VALUE: 1},
                {Axes.ROUND.value: 1, Axes.CH.value: 3, Features.CODE_VALUE: 1},
                {Axes.ROUND.value: 2, Axes.CH.value: 1, Features.CODE_VALUE: 1},
                {Axes.ROUND.value: 3, Axes.CH.value: 2, Features.CODE_VALUE: 1}
            Features.TARGET: "ACTB_human"
            Features.CODEWORD: [
                {Axes.ROUND.value: 0, Axes.CH.value: 3, Features.CODE_VALUE: 1},
                {Axes.ROUND.value: 1, Axes.CH.value: 1, Features.CODE_VALUE: 1},
                {Axes.ROUND.value: 2, Axes.CH.value: 1, Features.CODE_VALUE: 1},
                {Axes.ROUND.value: 3, Axes.CH.value: 2, Features.CODE_VALUE: 1}
            Features.TARGET: "ACTB_mouse"
    codebook = Codebook.from_code_array(codebook_array)
    codebook_json_filename = "codebook.json"
    codebook.to_json(os.path.join(output_dir, codebook_json_filename))
Beispiel #16
                                                physical_ticks=physical_ticks, \
                                                log = img_stack.log)

    mask = BinaryMaskCollection.from_label_image(label_im)
    return mask, label_image

list_of_datasets = pickle.load(open('list_of_experiments.obj', 'rb'))
dict_of_datasets = pickle.load(open('dict_of_experiments.obj', 'rb'))
CODEBOOK = pickle.load(open('codebook.obj', 'rb'))

# could iterate this next line of all datasets
dataset = list_of_datasets[0]

save_path, exp_name, assayNo = dict_of_datasets[dataset]
codebook = Codebook.from_code_array(CODEBOOK[int(assayNo)])
exp = Experiment.from_json(save_path + 'experiment.json')

Pipeline procedure: 
 - functions should operate on datasets individually
 - Order of operations:
        1. Reduce images for export to Ilastik
        2. Filter images and find spots
        3. Import classifications from Ilastik tif files and generate masks
        4. Assign genes to cells
        5. Calculate area of non-stroma tissue, add up each type of gene, normalize gene density. 
gene_counts_across_fovs = []
def test_from_code_array_throws_exception_when_data_is_improperly_formatted():
    code_array: List = codebook_array_factory()
    code_array[0][Features.CODEWORD][0] = ('I should be a dict, oops!', )
    with pytest.raises(TypeError):
        Codebook.from_code_array(code_array, n_ch=3, n_round=1)