Example #1
0
def parse_codebook(codebook_csv: str) -> Codebook:
    """Parses a codebook csv file provided by SeqFISH developers.

    Parameters
    ----------
    codebook_csv : str
        The codebook file is expected to contain a matrix whose rows are barcodes and whose columns
        are imaging rounds. Column IDs are expected to be sequential, and round identifiers (roman
        numerals) are replaced by integer IDs.

    Returns
    -------
    Codebook :
        Codebook object in SpaceTx format.
    """
    csv: pd.DataFrame = pd.read_csv(codebook_csv, index_col=0)
    genes = csv.index.values
    data_raw = csv.values
    rounds = csv.shape[1]
    channels = data_raw.max()

    # convert data_raw -> data, where data is genes x channels x rounds
    data = np.zeros((len(data_raw), rounds, channels))
    for b in range(len(data_raw)):
        for i in range(len(data_raw[b])):
            if data_raw[b][i] != 0:
                data[b][i][data_raw[b][i] - 1] = 1

    return Codebook.from_numpy(genes, rounds, channels, data)
def parse_codebook(codebook_csv: str) -> Codebook:
    csv: pd.DataFrame = pd.read_csv(codebook_csv, index_col=0)
    genes = csv.index.values
    data_raw = csv.values
    rounds = csv.shape[1]
    channels = data_raw.max()

    # convert data_raw -> data, where data is genes x channels x rounds
    data = np.zeros((len(data_raw), rounds, channels))
    for b in range(len(data_raw)):
        for i in range(len(data_raw[b])):
            if data_raw[b][i] != 0:
                data[b][i][data_raw[b][i] - 1] = 1

    return Codebook.from_numpy(genes, rounds, channels, data)
def convert_codebook(oldbook: Codebook, cycles_conv: Dict[int, int],
                     channels_conv: List[Dict[int, int]]) -> Codebook:
    raw = oldbook.data
    targets = np.shape(raw)[0]
    rounds = len(cycles_conv)
    channels = len(channels_conv[0])
    new_data = np.empty((targets, rounds, channels), dtype=int)
    for t in range(targets):
        for pr in range(len(raw[t])):
            # annoying math because dicts are saved for the other direction
            pchannel = np.argmax(raw[t][pr])
            subChannel = [[
                tch for tch, pch in subchannel.items() if pch == pchannel
            ] for subchannel in channels_conv]
            subRound = np.argmax([len(per_round) for per_round in subChannel])
            tchannel = subChannel[subRound][0]
            tround = [
                tr for tr, pround in cycles_conv.items() if pround == pr
            ][subRound]
            # print("channel {}->{} round {}->{}".format(pchannel,tchannel,pr,tround))
            new_data[t][tround][tchannel] = 1

    return Codebook.from_numpy(oldbook.coords["target"].data, rounds, channels,
                               new_data)
Example #4
0
def blank_codebook(real_codebook, num_blanks):
    """
    From a codebook of real codes, creates a codebook of those original codes plus a set of blank codes that
    follow the hamming distance > 1 rule. Resulting codebook will have num_blanks blank codes in addition to all
    the original real codes. If num_blanks is greater than the total number of blank codes found then all blanks
    will be added.
    """

    # Extract dimensions and create empty xarray for barcodes
    roundsN = len(real_codebook["r"])
    channelsN = len(real_codebook["c"])
    allCombo = xr.zeros_like(
        xr.DataArray(
            np.zeros((channelsN**roundsN, roundsN, channelsN)), dims=["target", "r", "c"]
        )
    )

    # Start from set of all possible codes
    barcode = [0] * roundsN
    for i in range(np.shape(allCombo)[0]):
        allCombo[i] = barcodeConv(barcode, channelsN)
        barcode = incrBarcode(barcode, channelsN)

    # Remove codes that have hamming distance <= 1 to any code in the real codebook
    cb_codes = real_codebook.argmax(Axes.CH.value)
    drop_cb_codes = {}
    rounds = [True] * roundsN
    for r in range(roundsN):
        rounds[r] = False
        drop_codes = cb_codes.sel(r=rounds)
        drop_codes.values = np.ascontiguousarray(drop_codes.values)
        drop_codes = _view_row_as_element(drop_codes.values.reshape(drop_codes.shape[0], -1))
        drop_cb_codes[r] = drop_codes
        rounds[r] = True

    drop_combos = {}
    rounds = [True] * roundsN
    for r in range(roundsN):
        rounds[r] = False
        combo_codes = allCombo.argmax(Axes.CH.value)
        combo_codes = combo_codes.sel(r=rounds)
        combo_codes.values = np.ascontiguousarray(combo_codes.values)
        combo_codes = _view_row_as_element(combo_codes.values.reshape(combo_codes.shape[0], -1))
        drop_combos[r] = combo_codes
        rounds[r] = True
    combo_codes = allCombo.argmax(Axes.CH.value)
    combo_codes.values = np.ascontiguousarray(combo_codes.values)
    combo_codes = _view_row_as_element(combo_codes.values.reshape(combo_codes.shape[0], -1))

    drop = []
    for i in range(len(combo_codes)):
        for r in range(roundsN):
            if np.any(drop_combos[r][i] == drop_cb_codes[r]):
                drop.append(i)
                break

    drop = set(drop)
    allCombo = allCombo[[x for x in range(len(combo_codes)) if x not in drop]]

    # Find set of codes that all have hamming distance of more than 1 to each other

    # Creates set of codebooks each with a different dropped round, can determine if two codes are 1 or fewer hamming
    # distances from each other by seeing if they match exactly when the same round is dropped for each code
    drop_combos = {}
    rounds = [True] * roundsN
    for r in range(roundsN):
        rounds[r] = False
        combo_codes = allCombo.argmax(Axes.CH.value)
        combo_codes = combo_codes.sel(r=rounds)
        combo_codes.values = np.ascontiguousarray(combo_codes.values)
        combo_codes = _view_row_as_element(combo_codes.values.reshape(combo_codes.shape[0], -1))
        drop_combos[r] = combo_codes
        rounds[r] = True
    combo_codes = allCombo.argmax(Axes.CH.value)
    combo_codes.values = np.ascontiguousarray(combo_codes.values)
    combo_codes = _view_row_as_element(combo_codes.values.reshape(combo_codes.shape[0], -1))

    i = 0
    while i < len(combo_codes):
        drop = set()
        for r in range(roundsN):
            drop.update([x for x in np.nonzero(drop_combos[r][i] == drop_combos[r])[0]])
        drop.remove(i)
        inds = [x for x in range(len(combo_codes)) if x not in drop]
        combo_codes = combo_codes[inds]
        for r in range(roundsN):
            drop_combos[r] = drop_combos[r][inds]
        i += 1

    # Create Codebook object with blanks
    blanks = np.zeros((len(combo_codes), roundsN, channelsN))
    for i, code in enumerate(combo_codes):
        for j, x in enumerate(code[0]):
            blanks[i][j][x] = 1

    blank_codebook = Codebook.from_numpy(
        code_names=["blank" + str(x) for x in range(len(blanks))],
        n_round=roundsN,
        n_channel=channelsN,
        data=blanks,
    )

    # Combine correct number of blank codes with real codebook and return combined codebook
    if num_blanks > len(blanks):
        num_blanks = len(blanks)
    rand_sample = random.sample(range(len(blanks)), num_blanks)
    combined = xr.concat([real_codebook, blank_codebook[rand_sample]], "target")

    return combined