import sys import numpy as np import matplotlib import matplotlib.pyplot as plt import loompy import velocyto as vcy import logging import pandas as pd from sklearn.svm import SVR from sklearn.linear_model import LinearRegression from statsmodels.nonparametric.smoothers_lowess import lowess from scipy.interpolate import interp1d # In[4]: vlm = vcy.load_velocyto_hdf5(snakemake.input[0]) # In[ ]: # In[ ]: ## color condes consistent with main text from collections import defaultdict monocytelabels = defaultdict(lambda: 'Grey') #ice_cream = {} monocytelabels['4'] = '#9382ae' monocytelabels['3'] = '#fdceb8' monocytelabels['12'] = '#df5974'
def main(): if len(sys.argv) != 2: print('runs the velocyto workflow on a given hdf5 object') print('usage: velocyto_workflow <path to file>') sys.exit() input_path = sys.argv[1] output_path = input_path[:-5] + '_tsne33_velocity.hdf5' print('loading data') vlm = vcy.load_velocyto_hdf5(input_path) print(len(vlm.ca['CellID']), 'cells') print(len(vlm.ra['Gene']), 'genes') print('filtering cells') vlm.filter_cells(bool_array=vlm.initial_Ucell_size > np.percentile( vlm.initial_Ucell_size, 0.5)) print('filtering genes') vlm.score_cv_vs_mean(3000, plot=False, max_expr_avg=35) vlm.filter_genes(by_cv_vs_mean=True) print(len(vlm.ca['CellID']), 'cells') print(len(vlm.ra['Gene']), 'genes') #print('setting sample names as clusters') #samplenames = list(map(lambda x: x.split(':')[0], vlm.ca['CellID'])) #vlm.ca['sample_name'] = samplenames #vlm.set_clusters(vlm.ca["sample_name"]) print('normalizing data matrices') vlm._normalize_S(relative_size=vlm.S.sum(0), target_size=vlm.S.sum(0).mean()) vlm._normalize_U(relative_size=vlm.U.sum(0), target_size=vlm.U.sum(0).mean()) print('running pca') vlm.perform_PCA() print('knn smoothing') vlm.knn_imputation(n_pca_dims=15, k=500, balanced=True, b_sight=3000, b_maxl=1500, n_jobs=20) print('fit gammas') vlm.fit_gammas() print('calculate velocity') vlm.predict_U() vlm.calculate_velocity() vlm.calculate_shift(assumption="constant_velocity") vlm.extrapolate_cell_at_t(delta_t=1.) print('running tsne') bh_tsne = TSNE(random_state=33) vlm.ts = bh_tsne.fit_transform(vlm.pcs[:, :15]) print('projection of velocity onto embeddings') vlm.estimate_transition_prob(hidim="Sx_sz", embed="ts", transform="sqrt", psc=1, n_neighbors=3500, knn_random=True, sampled_fraction=0.5) print('calculate embedding shift') vlm.calculate_embedding_shift(sigma_corr=0.05, expression_scaling=True) print('calculate grid arrows') vlm.calculate_grid_arrows(smooth=0.8, steps=(40, 40), n_neighbors=100) print('saving hdf5') vlm.to_hdf5(output_path)
import loompy import glob import velocyto as vcy import numpy as np from sklearn.manifold import TSNE #print('reading loom file') #vlm = vcy.VelocytoLoom('/projects/pytrik/sc_adipose/analyze_10x_fluidigm/data/velocyto/all10x.loom') print('loading data') vlm = vcy.load_velocyto_hdf5( '/projects/pytrik/sc_adipose/analyze_10x_fluidigm/data/velocyto/all10x-downsampled-36.hdf5' ) print(len(vlm.ca['CellID']), 'cells') print(len(vlm.ra['Gene']), 'genes') print('filtering cells') vlm.filter_cells(bool_array=vlm.initial_Ucell_size > np.percentile( vlm.initial_Ucell_size, 0.5)) print('filtering genes') vlm.score_cv_vs_mean(3000, plot=False, max_expr_avg=35) vlm.filter_genes(by_cv_vs_mean=True) print(len(vlm.ca['CellID']), 'cells') print(len(vlm.ra['Gene']), 'genes') #print('setting sample names as clusters') #samplenames = list(map(lambda x: x.split(':')[0], vlm.ca['CellID'])) #vlm.ca['sample_name'] = samplenames
] cellid["strain"] = np.where(cellid["sample"].isin(ab1), "ab1", "renca") cellid["response"] = np.where(cellid["sample"].isin(respondervec), "responder", "nonresponder") # In[ ]: targetmono = pd.read_csv(snakemake.input[3]) monos = targetmono["loom_cellid"] ind = cellid["cellnames"].isin(monos) cluster15mono = cellid.index[ind] # In[ ]: # extract projections from renca velocyto object and calculate momentum renca = vcy.load_velocyto_hdf5(snakemake.input[1]) delta_embedding_renca = pd.DataFrame(renca.delta_embedding) delta_embedding_renca.index = renca.ca["CellID"] renca_ly6_cells_deltas = delta_embedding_renca.reindex(cluster15mono).dropna() renca_ly6_cells_deltas.columns = ["deltax", "deltay"] renca_ly6_cells_deltas["vel"] = np.square( renca_ly6_cells_deltas["deltax"]) + np.square( renca_ly6_cells_deltas["deltay"]) ly6crenca = pd.concat([renca_ly6_cells_deltas, cellid], axis=1, join="inner") # merge with cell metadata del (renca) # In[ ]: # extract projections from ab1 velocyto object and calculate momentum ab1 = vcy.load_velocyto_hdf5(snakemake.input[0])
import loompy import glob import velocyto as vcy import numpy as np from sklearn.manifold import TSNE print('loading data') vlm = vcy.load_velocyto_hdf5( '/projects/pytrik/sc_adipose/analyze_10x_fluidigm/data/velocyto/180831.hdf5' ) print(len(vlm.ca['CellID']), 'cells') print(len(vlm.ra['Gene']), 'genes') #print('filtering cells') #vlm.filter_cells(bool_array=vlm.initial_Ucell_size > np.percentile(vlm.initial_Ucell_size, 0.5)) print('filtering genes') #vlm.score_cv_vs_mean(3000, plot=False, max_expr_avg=35) vlm.score_cv_vs_mean(3000, plot=False) vlm.filter_genes(by_cv_vs_mean=True) print(len(vlm.ca['CellID']), 'cells') print(len(vlm.ra['Gene']), 'genes') print('normalizing data matrices') vlm._normalize_S(relative_size=vlm.S.sum(0), target_size=vlm.S.sum(0).mean()) vlm._normalize_U(relative_size=vlm.U.sum(0), target_size=vlm.U.sum(0).mean()) print('running pca') vlm.perform_PCA()
vlm.estimate_transition_prob(hidim="Sx_sz", embed="ts", transform="sqrt", psc=1, knn_random=True, sampled_fraction=0.3, random_seed=42) vlm.calculate_embedding_shift(sigma_corr=0.05, expression_scaling=True) vlm.calculate_grid_arrows(smooth=0.5, steps=(40, 40), n_neighbors=50) vlm.to_hdf5("combined.hdf5") ### ### vlm = vcy.load_velocyto_hdf5("combined.hdf5") def despline(): ax1 = plt.gca() # Hide the right and top spines ax1.spines['right'].set_visible(False) ax1.spines['top'].set_visible(False) # Only show ticks on the left and bottom spines ax1.yaxis.set_ticks_position('left') ax1.xaxis.set_ticks_position('bottom') def minimal_xticks(start, end): end_ = np.around(end, -int(np.log10(end)) + 1) xlims = np.linspace(start, end_, 5)