Ejemplo n.º 1
0
    def collect_cells(self, out_file: str) -> None:
        # Verify that the previous punchard subset exists
        parent = os.path.join(self.config.paths.build, "data",
                              self.subset.card.name + ".loom")
        if not os.path.exists(parent):
            logging.error(f"Punchcard file '{parent}' was missing.")
            sys.exit(1)

        # Verify that there are some cells in the subset
        with loompy.connect(parent, mode="r") as ds:
            if (ds.ca.Subset == self.subset.name).sum() == 0:
                logging.info(
                    f"Skipping {self.name} because the subset was empty")
                sys.exit(0)

        logging.info(f"Collecting cells for {self.name}")
        with loompy.new(out_file) as dsout:
            # Collect from a previous punchard subset
            with loompy.connect(parent, mode="r") as ds:
                for (_, _, view) in ds.scan(
                        items=(ds.ca.Subset == self.subset.name),
                        axis=1,
                        key="Accession",
                        layers=["", "spliced", "unspliced"],
                        what=["layers", "col_attrs", "row_attrs"]):
                    dsout.add_columns(view.layers, view.ca, row_attrs=view.ra)
Ejemplo n.º 2
0
def create_subsetted_loom(loom, output_loom, cellmask):
    """Deprecated.

    Parameters
    ----------
    loom :
        
    output_loom :
        
    cellmask :
        
    genemask :
        

    Returns
    -------

    
    """
    import loompy

    with loompy.new(output_loom) as dsout:
        cells = np.where(cellmask)[0]
        for (ix, selection, view) in loom.scan(items=cells, axis=1,
                                               key="gene"):
            dsout.add_columns(view.layers,
                              col_attrs=view.ca,
                              row_attrs=view.ra)
Ejemplo n.º 3
0
def create_subsetted_loom(loom, output_loom_filename, cellmask):
    """Deprecated.

    Will create a new loom file with cells specified according to a Boolean vector mask.

    Parameters
    ----------
    loom : LoomConnection object which will be subsetted
        
    output_loom_filename : string denoting the path and filename of the output loom file.  
        
    cellmask : Boolean numpy vector with length equal to the number of cells in "loom"
        
    Returns
    -------

    
    """
    import loompy

    if len(cellmask) != loom.shape[1]:
        raise Exception(
            "cellmask must be boolean mask with length equal to the number of columns of loom"
        )
    with loompy.new(output_loom_filename) as dsout:
        cells = np.where(cellmask)[0]
        for (ix, selection, view) in loom.scan(items=cells, axis=1,
                                               key="gene"):
            dsout.add_columns(view.layers,
                              col_attrs=view.ca,
                              row_attrs=view.ra)
Ejemplo n.º 4
0
def test_new() -> None:
    with loompy.new("test.loom") as ds:
        m = np.zeros((20, 100))
        ra = {"Gene": [x for x in "ABCDEFGHILJKLMNOPQRS"]}
        ca = {"Cell": np.arange(100)}
        ds.add_columns(m, ca, row_attrs=ra)
        ds.add_columns(m, ca, row_attrs=ra)
    with loompy.connect("test.loom") as ds:
        assert (ds.shape == (20, 200))
Ejemplo n.º 5
0
def combine_loom_files(loom_file_list, library, species, organ, project_id, project_name, output_loom_file):
    expression_data_type_list = []
    optimus_output_schema_version_list = []
    pipeline_versions_list = []
    input_id_metadata_field_list = []
    input_name_metadata_field_list = []
    input_id_list = []
    input_name_list = []

    with loompy.new("intermediate.loom") as dsout:
        for i in range(len(loom_file_list)):
            loom_file = loom_file_list[i]
            with loompy.connect(loom_file) as ds:

                # add global attributes for this file to the running list of global attributes
                expression_data_type_list.append(ds.attrs["expression_data_type"])
                optimus_output_schema_version_list.append(ds.attrs["optimus_output_schema_version"])
                pipeline_versions_list.append(ds.attrs["pipeline_version"])
                input_id_metadata_field_list.append(ds.attrs["input_id_metadata_field"])
                input_name_metadata_field_list.append(ds.attrs["input_name_metadata_field"])
                input_id_list.append(ds.attrs["input_id"])
                input_name_list.append(ds.attrs["input_name"])

                # check that the ordering is the same for the matrices being combined
                if dsout.shape[0] != 0:
                    assert(np.array_equal(dsout.ra["ensembl_ids"], ds.ra["ensembl_ids"]))

                # filter out cells with low counts n_molecules > 1
                UMIs = ds.ca['n_molecules']
                cells = np.where(UMIs >= 100)[0]
                for (ix, selection, view) in ds.scan(items=cells, axis=1):
                    view.ca['cell_names'] = view.ca['cell_names'] + "-" + str(i)
                    dsout.add_columns(view.layers, col_attrs=view.ca, row_attrs=view.ra)

    # add global attributes for this file to the running list of global attributes
    ds = loompy.connect("intermediate.loom")

    row_attrs = ds.ra[:]
    col_attrs = ds.ca[:]
    sp = ds.sparse()

    # Write out a new loom file with the sparse matrix
    loompy.create(output_loom_file, sp, row_attrs, col_attrs)

    ds.close()

    # add the global atributes to the loom file
    ds = loompy.connect(output_loom_file)

    ds.attrs["library_preparation_protocol.library_construction_approach"] = library
    ds.attrs["donor_organism.genus_species"] = species
    ds.attrs["specimen_from_organism.organ"] = organ
    ds.attrs["project.provenance.document_id"] = project_id
    ds.attrs["project.project_core.project_name"] = project_name
    ds.attrs["expression_data_type"] = ", ".join(set(expression_data_type_list))
    ds.attrs["optimus_output_schema_version"] = ", ".join(set(optimus_output_schema_version_list))
    ds.attrs["pipeline_version"] = ", ".join(set(pipeline_versions_list))
    ds.attrs["input_id_metadata_field"] = ", ".join(set(input_id_metadata_field_list))
    ds.attrs["input_name_metadata_field"] = ", ".join(set(input_name_metadata_field_list))
    ds.attrs["input_id"] = ", ".join(input_id_list)
    ds.attrs["input_name"] = ", ".join(input_name_list)

    ds.close()
Ejemplo n.º 6
0
import scipy.sparse as sparse

loom_file_in = snakemake.input['loom']

loom_test_out = snakemake.output['loom_test']
loom_train_out = snakemake.output['loom_train']

# Load the barcode list for cells from the loom file
ds = loompy.connect(loom_file_in, 'r')

test_ii = np.random.choice(ds.shape[1], size=ds.shape[1] // 2, replace=False)

train_ii = np.setdiff1d(np.arange(ds.shape[1]), test_ii)

test_ii = np.sort(test_ii)
train_ii = np.sort(train_ii)

ds_test_out = loompy.new(loom_test_out)

for (ix, selection, view) in ds.scan(items=test_ii, axis=1):
    ds_test_out.add_columns(view.layers, col_attrs=view.ca, row_attrs=view.ra)

ds_train_out = loompy.new(loom_train_out)

for (ix, selection, view) in ds.scan(items=train_ii, axis=1):
    ds_train_out.add_columns(view.layers, col_attrs=view.ca, row_attrs=view.ra)

ds.close()
ds_test_out.close()
ds_train_out.close()
    ind_healthy = meta['Compartment'].str.startswith('Normal').values
    meta = meta[ind_healthy]
    counts = counts[:, ind_healthy]

    print('Turns out now we can convert to dense')
    counts = counts.todense()

    # FIXME: figure out what those annotations actually mean!!

    print('Set output file')
    fdn_out = '../data_full/Young_2018/'
    fn_out = fdn_out+'dataset.loom'
    os.makedirs(fdn_out, exist_ok=True)

    print('Write to loom')
    with loompy.new(fn_out) as dsl:
        dsl.add_columns(
            layers={'': counts},
            row_attrs={
                'GeneName': meta_genes['Symbol'].values,
                },
            col_attrs={
                'CellID': meta.index.values,
                'CellType': meta['ClusterID'].values,
                'NumberOfGenes': meta['nGenes'].astype(int).values,
                'NumberOfUMI': meta['nUMI'].astype(int).values,
                'Subject': meta['Source'].values,
                'Location': meta['Compartment'].values,
                }
            )
Ejemplo n.º 8
0
def clr(x):

    x = np.log(x + 1)
    x = x.subtract(x.mean(axis=1), axis=0)

    return x


ab_clr = clr(ab)

# import matplotlib.pyplot as plt
# plt.figure()
# plt.plot(ab_clr['CD3'], ab_clr['CD4'], 'o', ms=2)
# #plt.plot(ab_clr['CD14'], ab_clr['CD4'], 'o', ms=2)
# plt.show()
#
# plt.figure()
# plt.hist(np.log10(ab['CD4']+1), 30)
# plt.show()

is_mono = ((ab_clr['CD14'] > 2)).values

ab_mono = ab.loc[is_mono]

with loompy.connect(in_loom, mode='r') as ds:
    with loompy.new(out_loom) as ds_out:
        view = ds.view[:, is_mono]
        ds_out.add_columns(view.layers, col_attrs=view.ca, row_attrs=view.ra)

ab_mono.to_csv(out_ab, sep="\t", compression='gzip')
Ejemplo n.º 9
0
def submit(loomfile, model, hapcode, chunk, submit_start, submit_end, outdir, email, queue, mem, walltime, systype, dryrun):
    LOG.warn('Loom file: %s' % loomfile)
    LOG.warn('Models: %s, %s' % (model[0], model[1]))
    LOG.warn('HPC system type: %s' % systype)
    if dryrun:
        LOG.warn('Showing submission script only')

    with loompy.connect(loomfile) as ds:
        ds.attrs.HapCode = hapcode
        num_genes, num_cells = ds.shape
        if submit_end == 0:
            submit_end = num_genes
        gsurv = np.where(ds.ra.Selected[submit_start:submit_end])[0] + submit_start
        num_gsurv = len(gsurv)
        LOG.warn('The number of selected genes: %d' % num_gsurv)
        LOG.warn('The number of selected cells: %d' % num_cells)
        LOG.warn('%d jobs will be submitted' % int(np.ceil(num_gsurv/chunk)))
    processed = 0

    if systype == 'pbs':
        #tgx_layer = ''
        #mat_layer = hapcode[0]
        mat_layer, pat_layer = hapcode
        for idx_start in range(0, num_gsurv, chunk):
        # for idx_start in xrange(0, num_gsurv, chunk):
        # for idx_start in xrange(submit_start, submit_end, chunk):
            idx_end = min(idx_start+chunk, num_gsurv-1)
            #idx_end = min(submit_end, idx_start+chunk, num_gsurv-1)
            start = gsurv[idx_start]
            if idx_end < num_gsurv-1:
                end = gsurv[idx_end]
                genes = gsurv[idx_start:idx_end]
            else:  #idx_end == num_gsurv-1:
                end = submit_end
                #end = num_genes
                genes = gsurv[idx_start:]
            LOG.info('Chunk start: %d, end %d' % (start, end))
            infile = os.path.join(outdir, '_chunk.%05d-%05d.npz' % (start, end))
            LOG.debug('Genes: %s' % ' '.join(genes.astype(str)))
            LOG.debug('Total %d genes submitted in this job' % len(genes))
            data_dict = dict()
            data_dict['shape'] = (len(genes), num_cells)
            with loompy.connect(loomfile, 'r') as ds:
                data_dict['GeneID'] = ds.ra.GeneID[genes]
                cur_chunk = dict()
                #cur_chunk[tgx_layer] = ds.layers[tgx_layer][genes, :]
                cur_chunk[mat_layer] = ds.layers[mat_layer][genes, :]
                cur_chunk[pat_layer] = ds.layers[pat_layer][genes, :]
                #cur_chunk[tgx_layer] = cur_chunk[mat_layer] + cur_chunk[pat_layer]
                data_dict['Counts'] = cur_chunk
                data_dict['Size'] = ds.ca.Size
                data_dict['Selected'] = np.ones(len(genes))  # select all
                np.savez_compressed(infile, **data_dict)
            outfile = os.path.join(outdir, '_scbase.%05d-%05d.param.npz' % (start, end))
            job_par = 'ASE_MODEL=%s,TGX_MODEL=%s,MAT_HAPCODE=%s,PAT_HAPCODE=%s,OUTFILE=%s,INFILE=%s' % \
                      (model[0], model[1], hapcode[0], hapcode[1], outfile, infile)
            cmd = ['qsub']
            if email is not None:
                cmd += ['-M', email]
            if queue is not None:
                cmd += ['-q', queue]
            if mem > 0:
                cmd += ['-l', 'mem=%d' % mem]
            if walltime > 0:
                cmd += ['-l', 'walltime=%d:00:00' % walltime]
            cmd += ['-v', job_par]
            cmd += [os.path.join(os.path.dirname(os.environ['_']), 'run_mcmc_on_cluster.sh')]
            if dryrun:
                print(" ".join(cmd))
            else:
                LOG.info(" ".join(cmd))
                call(cmd)
                time.sleep(1.0)
            processed += len(genes)
        LOG.debug('Total %d genes were submitted' % processed)
        LOG.warn('Job submission complete')
    elif systype == 'pbs-with-whole-loom':  # Do not use this: loom is not stable
        # for idx_start in xrange(0, num_gsurv, chunk):
        for idx_start in range(0, num_gsurv, chunk):
            idx_end = min(idx_start+chunk, num_gsurv-1)
            start = gsurv[idx_start]
            if idx_end < num_gsurv-1:
                end = gsurv[idx_end]
                genes = gsurv[idx_start:idx_end]
            else:  #idx_end == num_gsurv-1:
                end = num_genes
                genes = gsurv[idx_start:]
            LOG.info('Chunk start: %d, end %d' % (start, end))
            LOG.debug('Genes: %s' % ' '.join(genes.astype(str)))
            LOG.debug('Total %d genes submitted in this job' % len(genes))
            outfile = os.path.join(outdir, '_scbase.%05d-%05d.param.npz' % (start, end))
            job_par = 'ASE_MODEL=%s,TGX_MODEL=%s,MAT_HAPCODE=%s,PAT_HAPCODE=%s,START=%d,END=%d,OUTFILE=%s,INFILE=%s' % \
                      (model[0], model[1], hapcode[0], hapcode[1], start, end, outfile, loomfile)
            cmd = ['qsub']
            if email is not None:
                cmd += ['-M', email]
            if queue is not None:
                cmd += ['-q', queue]
            if mem > 0:
                cmd += ['-l', 'mem=%d' % mem]
            if walltime > 0:
                cmd += ['-l', 'walltime=%d:00:00' % walltime]
            cmd += ['-v', job_par]
            cmd += [os.path.join(os.path.dirname(os.environ['_']), 'run_mcmc_on_cluster.sh')]
            if dryrun:
                print(" ".join(cmd))
            else:
                LOG.info(" ".join(cmd))
                call(cmd)
                time.sleep(1.0)
            processed += len(genes)
        LOG.debug('Total %d genes were submitted' % processed)
        LOG.warn('Job submission complete')
    elif systype == 'pbs-with-loom-chunks':  # Do not use this: loompy does not support this
        # for idx_start in xrange(0, num_gsurv, chunk):
        for idx_start in range(0, num_gsurv, chunk):
            idx_end = min(idx_start+chunk, num_gsurv-1)
            start = gsurv[idx_start]
            end = gsurv[idx_end]
            if idx_end < num_gsurv-1:
                end = gsurv[idx_end]
                genes = gsurv[idx_start:idx_end]
            else:  #idx_end == num_gsurv-1:
                end = num_genes
                genes = gsurv[idx_start:]
            LOG.info('Chunk start: %d, end %d' % (start, end))
            infile = os.path.join(outdir, '_chunk.%05d-%05d.loom' % (start, end))
            LOG.debug('Genes: %s' % ' '.join(genes.astype(str)))
            LOG.debug('Total %d genes submitted in this job' % len(genes))
            with loompy.connect(loomfile, 'r') as ds:
                with loompy.new(infile) as dsout:
                    for (_, selection, view) in ds.scan(items=genes, axis=0):
                        LOG.debug('Genes in this view: %s' % ' '.join(selection.astype()))
                        dsout.add_columns(view.layers, col_attrs=view.col_attrs, row_attrs=view.row_attrs)
            outfile = os.path.join(outdir, '_scbase.%05d-%05d.param.npz' % (start, end))
            job_par = 'ASE_MODEL=%s,TGX_MODEL=%s,MAT_HAPCODE=%s,PAT_HAPCODE=%s,OUTFILE=%s,INFILE=%s' % \
                      (model[0], model[1], hapcode[0], hapcode[1], outfile, infile)
            cmd = ['qsub']
            if email is not None:
                cmd += ['-M', email]
            if queue is not None:
                cmd += ['-q', queue]
            if mem > 0:
                cmd += ['-l', 'mem=%d' % mem]
            if walltime > 0:
                cmd += ['-l', 'walltime=%d:00:00' % walltime]
            cmd += ['-v', job_par]
            cmd += [os.path.join(os.path.dirname(os.environ['_']), 'run_mcmc_on_cluster.sh')]
            if dryrun:
                print(" ".join(cmd))
            else:
                LOG.info(" ".join(cmd))
                call(cmd)
                time.sleep(1.0)
            processed += len(genes)
        LOG.debug('Total %d genes were submitted' % processed)
        LOG.warn('Job submission complete')
    elif 'lsf':
        raise NotImplementedError('LSF submission is not yet supported')
    else:
        raise RuntimeError('No plan to support other job scheduling system until we see many requests')
Ejemplo n.º 10
0
    def fit(self, ds: loompy.LoomConnection) -> None:
        logging.info("Computing pseudoage")
        ages = np.array([age_to_num(x) for x in ds.ca.Age])
        knn = ds.col_graphs.KNN
        k = knn.nnz / knn.shape[0]
        ds.ca.PseudoAge = (knn.astype("bool") @ ages) / k

        logging.info("Slicing pseudoage")
        slice_names: List[str] = []
        with TemporaryDirectory() as tempfolder:
            slices = np.percentile(ds.ca.PseudoAge, np.arange(0, 101, 5))
            logging.info("Collecting cells")
            for (ix, _, view) in ds.scan(axis=1):
                for i in range(len(slices) - 2):
                    s1 = slices[i]
                    s2 = slices[i + 2]
                    slice_name = f"Age{s1:05.2f}to{s2:05.2f}".replace(
                        ".", "") + ".loom"
                    if slice_name not in slice_names:
                        slice_names.append(slice_name)
                    cells = ((view.ca.PseudoAge >= s1) &
                             (view.ca.PseudoAge < s2))
                    if cells.sum() == 0:
                        continue
                    fname = os.path.join(tempfolder, slice_name)
                    if not os.path.exists(fname):
                        with loompy.new(fname) as dsout:
                            dsout.add_columns(view.layers[:, cells],
                                              col_attrs=view.ca[cells],
                                              row_attrs=view.ra)
                    else:
                        with loompy.connect(fname) as dsout:
                            dsout.add_columns(view.layers[:, cells],
                                              col_attrs=view.ca[cells],
                                              row_attrs=view.ra)

            for slice_name in slice_names:
                fname = os.path.join(tempfolder, slice_name)
                logging.info("Cytograph on " + slice_name)
                with loompy.connect(fname) as ds:
                    Cytograph(config=load_config()).fit(ds)

            # Use dynamic programming to find the deepest tree (forest), as given by total number of cells along each branch
            logging.info("Computing pseudolineage")
            clusters = "Clusters"
            min_pct = 0.1

            # List of matrices giving the bipartite graph between each pair of layers, weighted by number of shared cells
            overlaps = []
            n_nodes = []  # List of number of nodes (clusters) in each layer
            n_cells = [
            ]  # List of arrays giving the number of cells in each cluster
            n_layers = len(slice_names)

            # Compute the bipartite graphs between layers
            for t in range(n_layers):
                # Link clusters from layer t to clusters from layer t + 1
                logging.info(f"{slice_names[t]}.loom")
                with loompy.connect(os.path.join(tempfolder,
                                                 slice_names[t])) as ds1:
                    n_nodes.append(ds1.ca[clusters].max() + 1)
                    n_cells.append(np.zeros(n_nodes[t]))
                    for c in range(n_nodes[t]):
                        n_cells[t][c] = (ds1.ca[clusters] == c).sum()
                    if t >= n_layers - 1:
                        break
                    with loompy.connect(
                            os.path.join(tempfolder,
                                         slice_names[t + 1])) as ds2:
                        overlap = np.zeros(
                            (np.unique(ds1.ca[clusters]).shape[0],
                             np.unique(ds2.ca[clusters]).shape[0]),
                            dtype="int")
                        for i in np.unique(ds1.ca[clusters]):
                            cells1 = ds1.ca.CellID[ds1.ca[clusters] == i]
                            for j in np.unique(ds2.ca[clusters]):
                                cells2 = ds2.ca.CellID[ds2.ca[clusters] == j]
                                overlap[i, j] = np.intersect1d(cells1,
                                                               cells2).shape[0]
                        overlaps.append(overlap)

            # List of arrays keeping track of the depth of the deepest tree starting at each node in the layer
            # Depth defined as sum of the number of shared cells along the branch
            depths = [np.zeros(n, dtype="int") for n in n_nodes]
            edges = [
                np.zeros(n, dtype="int") for n in n_nodes[1:]
            ]  # List of arrays giving the predecessor of each cluster (or -1 if no predecessor)
            for t in range(0, n_layers - 1):
                for i in range(n_nodes[t + 1]):
                    # Now find the widest deepest branch from any node j in layer t to node i in layer t + 1
                    # Widest, deepest meaning: greatest sum of depth up to node j in layer t plus number of shared cells
                    # But disallowing any branch with less than min_pct % shared cells
                    best_j = -1
                    best_depth = 0
                    for j in range(n_nodes[t]):
                        pct_overlapping = 100 * overlaps[t][j, i] / (
                            n_cells[t][j] + n_cells[t + 1][i])
                        if pct_overlapping > min_pct:
                            depth = depths[t][j] + overlaps[t][j, i]
                            if depth > best_depth:
                                best_depth = depth
                                best_j = j
                    edges[t][i] = best_j

            # Now we have
            #
            # edges:    List of arrays giving the index of the predecessor of each cluster (or -1 if no predecessor exists)
            # overlaps: List of matrices giving the number of cells shared between clusters in layer t and t + 1
            # n_nodes:  List of number of nodes (clusters) in each layer
            # n_cells:  List of arrays of number of cells in each node (cluster)

            # Now position the nodes of each layer such that no edges cross
            ypositions = [np.arange(n_nodes[0])]
            for t in range(len(edges)):
                pos = np.full(n_nodes[t + 1], -1)
                for i in range(pos.shape[0]):
                    prev = edges[t][i]
                    if (prev) >= 0:
                        pos[i] = ypositions[t][prev]
                ordering = np.argsort(pos)
                mapping = dict(zip(ordering, range(len(ordering))))
                ypositions.append(
                    np.array([mapping[i] for i in range(len(ordering))]))
            # Make the positions proportional to the number of cells (cumulative)
            max_pos = 0
            for i, pos in enumerate(ypositions):
                with loompy.connect(os.path.join(tempfolder,
                                                 slice_names[i])) as ds0:
                    n_clusters = ds0.ca[clusters].max() + 1
                    ncells = np.array([(ds0.ca[clusters] == i).sum()
                                       for i in range(n_clusters)])
                    total = 0
                    new_pos = np.zeros_like(pos)
                    for j in range(len(pos)):
                        cluster = np.where(pos == j)[0]
                        new_pos[cluster] = total + ncells[cluster] / 2
                        total += ncells[cluster]
                ypositions[i] = new_pos / 1000
                max_pos = max(max_pos, max(ypositions[i]))

            for i, pos in enumerate(ypositions):
                ypositions[i] += (max_pos - np.max(pos)) / 2

            # Then position the layers properly in time
            xpositions = []
            for i in range(n_layers):
                with loompy.connect(os.path.join(tempfolder,
                                                 slice_names[i])) as ds0:
                    xpositions.append(np.mean(ds0.ca.PseudoAge))

            # Now project each individual cell to the pseudolineage
            logging.info("Projecting cells to pseudolineage")
            cell_to_xy = {}
            for t in range(len(n_nodes) - 1):
                with loompy.connect(os.path.join(tempfolder,
                                                 slice_names[t])) as ds0:
                    with loompy.connect(
                            os.path.join(tempfolder,
                                         slice_names[t + 1])) as ds1:
                        for i in range(n_nodes[t + 1]):
                            if edges[t][i] != -1:
                                y1 = ypositions[t][edges[t][i]]
                                y2 = ypositions[t + 1][i]
                                offset = (xpositions[t + 1] -
                                          xpositions[t]) / 4
                                overlapping_cells = (ds1.ca[clusters] == i) & (
                                    ds1.ca.PseudoAge < slices[t + 2])
                                crs = np.array(
                                    CatmullRomSpline(
                                        n_points=100).fit_transform(
                                            np.array(
                                                [[slices[t + 1] - offset, y1],
                                                 [slices[t + 1], y1],
                                                 [slices[t + 2], y2],
                                                 [slices[t + 2] + offset,
                                                  y2]])))
                                widths = np.linspace(n_cells[t][edges[t][i]],
                                                     n_cells[t + 1][i],
                                                     num=100) / 1500
                                f = interp1d(crs[:, 0],
                                             crs[:, 1],
                                             fill_value="extrapolate")
                                fw = interp1d(crs[:, 0],
                                              widths,
                                              fill_value="extrapolate")
                                y = f(
                                    ds1.ca.PseudoAge[overlapping_cells]
                                ) + np.random.normal(
                                    scale=fw(
                                        ds1.ca.PseudoAge[overlapping_cells]) /
                                    6,
                                    size=overlapping_cells.sum())
                                for i, ix in enumerate(
                                        np.where(overlapping_cells)[0]):
                                    cell_to_xy[ds1.ca.CellID[ix]] = [
                                        ds1.ca.PseudoAge[ix], y[i]
                                    ]
                        # Draw the leftmost pseudoage slice
                        if t == 0:
                            for i in range(n_nodes[0]):
                                y1 = ypositions[0][i]
                                y2 = ypositions[0][i]
                                widths = np.linspace(n_cells[t][i],
                                                     n_cells[t][i],
                                                     num=100) / 1500
                                overlapping_cells = (ds0.ca[clusters] == i) & (
                                    ds0.ca.PseudoAge < slices[1])
                                y = y1 + np.random.normal(
                                    scale=widths[0] / 6,
                                    size=overlapping_cells.sum())
                                for i, ix in enumerate(
                                        np.where(overlapping_cells)[0]):
                                    cell_to_xy[ds1.ca.CellID[ix]] = [
                                        ds0.ca.PseudoAge[ix], y[i]
                                    ]
                        # Draw the rightmost pseudoage slice
                        if t == len(n_nodes) - 2:
                            for i in range(n_nodes[-1]):
                                y1 = ypositions[t][edges[t][i]]
                                y2 = ypositions[t + 1][i]
                                widths = np.linspace(n_cells[t][edges[t][i]],
                                                     n_cells[t + 1][i],
                                                     num=100) / 1500
                                overlapping_cells = (ds1.ca[clusters] == i) & (
                                    ds1.ca.PseudoAge > slices[-2])
                                y = y2 + np.random.normal(
                                    scale=widths[-1] / 6,
                                    size=overlapping_cells.sum())
                                for i, ix in enumerate(
                                        np.where(overlapping_cells)[0]):
                                    cell_to_xy[ds1.ca.CellID[ix]] = [
                                        ds1.ca.PseudoAge[ix], y[i]
                                    ]

            logging.info(
                "Saving pseudolineage projection back in original file")
            logging.info(ds.ca)
            return cell_to_xy
            xy = np.zeros((ds.shape[1], 2))
            for i, cellid in enumerate(cell_to_xy.keys()):
                j = np.where(ds.ca.CellID == cellid)[0]
                xy[j] = cell_to_xy[cellid]
            ds.ca.PseudoLineage = xy