def read_stream(stream_save_path, vcfg, mq: Queue): """ read real-time video stream from provided configuration :param stream_save_path: streams of each video will be save here :param vcfg: video configurations :param mq: process communication pipe in which stream receiver will write the newest stream index,coorperated with # object detector :return: """ stream.read(stream_save_path, vcfg, mq) return True
def test_mixture_feature_parallel(): wavData = stream.read(wavPath).value frames = stream.cut_frames(wavData) pipe = base.PIPE() for i in range(10): pipe.put( base.Packet({"rawWave": frames[i * 50:(i + 1) * 50]}, cid=i, idmaker=0)) pipe.stop() extractor = feature.MixtureExtractor(mixType=["fbank", "mfcc"], ) # Split packets def split_rule(items): return {"fbank": items["fbank"]}, {"mfcc": items["mfcc"]} spliter = joint.Spliter(split_rule, outNums=2) # use a processor to transform fbank feature processor1 = feature.MatrixFeatureProcessor( spliceLeft=2, spliceRight=2, cmvNormalizer=feature.FrameSlideCMVNormalizer(), oKey="fbank", ) # use a processor to transform mfcc feature processor2 = feature.MatrixFeatureProcessor( spliceLeft=3, spliceRight=3, cmvNormalizer=feature.FrameSlideCMVNormalizer(), oKey="mfcc", ) # combine packets def combine_rule(items): return { "feat": np.concatenate([items[0]["fbank"], items[1]["mfcc"]], axis=1) } combiner = joint.Combiner(combine_rule) extractor.start(inPIPE=pipe) spliter.start(inPIPE=extractor.outPIPE) processor1.start( inPIPE=spliter.outPIPE[0]) # specify which key you want to process processor2.start( inPIPE=spliter.outPIPE[1]) # specify which key you want to process combiner.start(inPIPE=[processor1.outPIPE, processor2.outPIPE]) combiner.wait() print(combiner.outPIPE[0].size()) packet = combiner.outPIPE[0].get() print(packet.keys()) print(packet["feat"].shape) # 211 = 120 + 91
def test_mfcc_extractor(): # get wave data wavData = stream.read(wavPath).value frames = stream.cut_frames(wavData) # define an input pipe pipe = base.PIPE() for i in range(10): pipe.put( base.Packet( {"rawWave":frames[i*50:(i+1)*50]}, cid=i, idmaker=0 ) ) pipe.stop() print( pipe.size() ) # run a mfcc extractor extractor = feature.MfccExtractor(oKey="mfcc") extractor.start(inPIPE=pipe) extractor.wait() print( extractor.outPIPE.size() ) packet = extractor.outPIPE.get() print( packet.keys() ) print( packet.mainKey ) print( packet["mfcc"].shape )
def test_processor_cmvn(): wavData = stream.read(wavPath).value frames = stream.cut_frames(wavData) pipe = base.PIPE() for i in range(10): pipe.put( base.Packet( {"rawWave":frames[i*50:(i+1)*50]}, cid=i, idmaker=0 ) ) pipe.stop() extractor = feature.MfccExtractor(minParallelSize=100,oKey="mfcc") processor = feature.MatrixFeatureProcessor( spliceLeft=3, spliceRight=3, cmvNormalizer=feature.FrameSlideCMVNormalizer(), oKey="mfcc", ) extractor.start(inPIPE=pipe) processor.start(inPIPE=extractor.outPIPE,iKey="mfcc") processor.wait() print( processor.outPIPE.size() ) packet = processor.outPIPE.get() print( packet.keys() ) print( packet.mainKey ) print( packet["mfcc"].shape ) # 273 = 13 * 3 * 7
def get_genes(): """ http://127.0.0.1:8000/genes?db_name=1_scanpy_10xpbmc """ db_name = request.args.get("db_name") try: del adata gc.collect() except: pass adata = None if get_dataset_type_adata(db_name).lower() == 'stream': adata = st.read(glob(os.path.join(DATASET_DIRECTORY, f"{db_name}.*"))[0], file_format="pkl", workdir="./") else: adata = sc.read( glob(os.path.join(DATASET_DIRECTORY, f"{db_name}.*"))[0]) genes = adata.var_names if adata: del adata gc.collect() return jsonify(list(genes))
def get_available_annotations(): """ http://127.0.0.1:8000/columns?db_name=1_scanpy_10xpbmc """ db_name = request.args.get("db_name") filename = glob(os.path.join(DATASET_DIRECTORY, f"{db_name}.*"))[0] try: del adata gc.collect() except: pass adata = None if get_dataset_type_adata(db_name).lower() in [ "scanpy", "velocity", "seurat", "paga" ]: adata = sc.read(filename) else: adata = st.read(filename, file_format="pkl", workdir="./") annotations = [ name for name in list(adata.obs.columns) if name not in ['branch_id', 'branch_id_alias'] ] del adata gc.collect() # Hack to remove two stream adata annotations that dont work in the annotation menu return jsonify(annotations)
def main(): sns.set_style('white') sns.set_context('poster') parser = argparse.ArgumentParser(description='%s Parameters' % __tool_name__ ,formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("-m", "--data-file", dest="input_filename",default = None, help="input file name, pkl format from Stream preprocessing module", metavar="FILE") parser.add_argument("-of","--of",dest="output_filename_prefix", default="StreamiFSOutput", help="output file name prefix") parser.add_argument("-fig_width",dest="fig_width", type=int, default=8, help="") parser.add_argument("-fig_height",dest="fig_height", type=int, default=8, help="") parser.add_argument("-cutoff_zscore",dest="cutoff_zscore", type=float, default=1.5, help="") parser.add_argument("-percentile_expr",dest="percentile_expr", type=int, default=95, help="") parser.add_argument("-flag_use_precomputed",dest="flag_use_precomputed", action="store_true", help="") parser.add_argument("-root",dest="root", default=None, help="") parser.add_argument("-preference",dest="preference", default=None, help="") parser.add_argument("-cutoff_logfc",dest="cutoff_logfc", type=float, default=0.25, help="") parser.add_argument("-num_genes",dest="num_genes", type=int, default=15, help="") parser.add_argument("-n_jobs",dest="n_jobs", type=int, default=8, help="") args = parser.parse_args() workdir = "./" adata = st.read(file_name=args.input_filename, file_format='pkl', experiment='rna-seq', workdir=workdir) preference = args.preference.split(',') st.detect_de_genes(adata,cutoff_zscore=args.cutoff_zscore,cutoff_logfc=args.cutoff_logfc,percentile_expr=args.percentile_expr,n_jobs=args.n_jobs, use_precomputed=args.flag_use_precomputed, root=args.root, preference=preference) st.plot_de_genes(adata, num_genes=args.num_genes,cutoff_zscore=args.cutoff_zscore, cutoff_logfc=args.cutoff_logfc, save_fig=True,fig_path=None,fig_size=(args.fig_width,args.fig_height)) st.write(adata,file_name=(args.output_filename_prefix + '_stream_result.pkl'),file_path='./',file_format='pkl') print('Finished computation.')
def test_functions(): # Read wave info and data wav = stream.read(wavPath) print(wav.value) # Cut the stream into N frames (discard the rest) frames1 = stream.cut_frames(wav.value[:-10], width=400, shift=160, snip=True) print(frames1.shape) # Cut the stream into N frames (retain the rest) frames2 = stream.cut_frames(wav.value[:-10], width=400, shift=160, snip=False) print(frames2.shape)
def test_mixture_feature_series(): wavData = stream.read(wavPath).value frames = stream.cut_frames(wavData) pipe = base.PIPE() for i in range(10): pipe.put( base.Packet({"rawWave": frames[i * 50:(i + 1) * 50]}, cid=i, idmaker=0)) pipe.stop() extractor = feature.MixtureExtractor(mixType=["fbank", "mfcc"], ) # use a processor to transform fbank feature processor1 = feature.MatrixFeatureProcessor( spliceLeft=2, spliceRight=2, cmvNormalizer=feature.FrameSlideCMVNormalizer(), oKey="fbank", ) # use a processor to transform mfcc feature processor2 = feature.MatrixFeatureProcessor( spliceLeft=3, spliceRight=3, cmvNormalizer=feature.FrameSlideCMVNormalizer(), oKey="mfcc", ) extractor.start(inPIPE=pipe) processor1.start(inPIPE=extractor.outPIPE, iKey="fbank") # specify which key you want to process processor2.start(inPIPE=processor1.outPIPE, iKey="mfcc") # specify which key you want to process processor2.wait() print(processor2.outPIPE.size()) packet = processor2.outPIPE.get() print(packet.keys()) print(packet["fbank"].shape) # 120 = 24 * 5 print(packet["mfcc"].shape) # 91 = 13 * 7
def canRead(id, userId): """ Check if a user can read an event. Allowed under the following conditions: 1. the user is admin. 2. the stream is public. 3. the stream is readable by the user. Parameters: id - an event id. userId - a user id. Returns: bool. """ if user.isAdmin(userId): return True streamId = data["streamId"] theStream = stream.read(userId) if not theStream["private"]: return True readable = permission.readableStreams(userId) return (streamId in readable)
def canCreate(data, userId): """ Check if a user can create an event. Allowed under the following conditions: 1. user is admin. 2. the stream is public. 3. the stream is writeable by the user. Parameters: data - the event data. userId - a user id. Returns: bool. """ if user.isAdmin(userId): return True streamId = data["streamId"] theStream = stream.read(userId) if not theStream["private"]: return True writeable = permission.writeableStreams(userId) return (streamId in writeable)
def main(): sns.set_style('white') sns.set_context('poster') parser = argparse.ArgumentParser(description='%s Parameters' % __tool_name__ ,formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("-m", "--data-file", dest="input_filename",default = None, help="input file name, pkl format from Stream preprocessing module", metavar="FILE") parser.add_argument("-of","--of",dest="output_filename_prefix", default="StreamiFSOutput", help="output file name prefix") parser.add_argument("-epg_n_nodes",dest="epg_n_nodes", type=int, default=50, help="") parser.add_argument("-incr_n_nodes",dest="incr_n_nodes", type=int, default=30, help="") parser.add_argument("-epg_trimmingradius",dest="epg_trimmingradius", default='Inf', help="") parser.add_argument("-epg_alpha",dest="epg_alpha", type=float, default=0.02, help="") parser.add_argument("-epg_beta",dest="epg_beta", type=float, default=0.0, help="") parser.add_argument("-epg_n_processes",dest="epg_n_processes", type=int, default=1, help="") parser.add_argument("-epg_lambda",dest="epg_lambda", type=float, default=0.02, help="") parser.add_argument("-epg_mu",dest="epg_mu", type=float, default=0.1, help="") parser.add_argument("-epg_finalenergy",dest="epg_finalenergy", default='Penalized', help="") parser.add_argument("-comp1",dest="comp1", type = int, default=0, help="") parser.add_argument("-comp2",dest="comp2", type = int, default=1, help="") parser.add_argument("-n_comp",dest="n_comp", type = int, default=3, help="") parser.add_argument("-fig_width",dest="fig_width", type=int, default=8, help="") parser.add_argument("-fig_height",dest="fig_height", type=int, default=8, help="") parser.add_argument("-fig_legend_ncol",dest="fig_legend_ncol", type=int, default=None, help="") args = parser.parse_args() workdir = "./" adata = st.read(file_name=args.input_filename, file_format='pkl', experiment='rna-seq', workdir=workdir) st.elastic_principal_graph(adata,epg_n_nodes=args.epg_n_nodes,incr_n_nodes=args.incr_n_nodes,epg_trimmingradius=args.epg_trimmingradius,epg_alpha=args.epg_alpha, epg_n_processes=args.epg_n_processes, epg_lambda=args.epg_lambda,epg_mu=args.epg_mu, epg_beta=args.epg_beta, epg_finalenergy=args.epg_finalenergy) st.plot_branches(adata, n_components=args.n_comp, comp1=args.comp1, comp2=args.comp2, save_fig=True, fig_name=(args.output_filename_prefix +'_branches.png'), fig_path=None,fig_size=(args.fig_width, args.fig_height)) st.plot_branches_with_cells(adata,n_components=args.n_comp,comp1=args.comp1,comp2=args.comp2, save_fig=True,fig_name=(args.output_filename_prefix +'_branches_with_cells.png'),fig_path=None,fig_size=(args.fig_width, args.fig_height),fig_legend_ncol=args.fig_legend_ncol) st.write(adata,file_name=(args.output_filename_prefix + '_stream_result.pkl'),file_path='./',file_format='pkl') print('Finished computation.')
def main(): sns.set_style('white') sns.set_context('poster') parser = argparse.ArgumentParser(description='%s Parameters' % __tool_name__ ,formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("-m", "--data-file", dest="input_filename",default = None, help="input file name, pkl format from Stream preprocessing module", metavar="FILE") parser.add_argument("-of","--of",dest="output_filename_prefix", default="StreamiFSOutput", help="output file name prefix") parser.add_argument("-nb_pct","--percent_neighbor_cells",dest="nb_pct", type=float, default=0.1, help="") parser.add_argument("-n_clusters",dest="n_clusters", type = int, default=10, help="") parser.add_argument("-damping",dest="damping", type=float, default=0.75, help="") parser.add_argument("-pref_perc",dest="pref_perc", type=int, default=50, help="") parser.add_argument("-max_n_clusters",dest="max_n_clusters", type=int, default=200, help="") parser.add_argument("-clustering",dest="clustering", default='kmeans', help="") parser.add_argument("-comp1",dest="comp1", type = int, default=0, help="") parser.add_argument("-comp2",dest="comp2", type = int, default=1, help="") parser.add_argument("-n_comp",dest="n_comp", type = int, default=3, help="") parser.add_argument("-fig_width",dest="fig_width", type=int, default=8, help="") parser.add_argument("-fig_height",dest="fig_height", type=int, default=8, help="") parser.add_argument("-fig_legend_ncol",dest="fig_legend_ncol", type=int, default=None, help="") args = parser.parse_args() print('Starting validation procedure...') workdir = "./" adata = st.read(file_name=args.input_filename, file_format='pkl', experiment='rna-seq', workdir=workdir) st.seed_elastic_principal_graph(adata, clustering=args.clustering, n_clusters=args.n_clusters, damping=args.damping, pref_perc=args.pref_perc, max_n_clusters=args.max_n_clusters, nb_pct=args.nb_pct) st.plot_branches(adata, n_components=args.n_comp, comp1=args.comp1, comp2=args.comp2, save_fig=True, fig_name=(args.output_filename_prefix +'_branches.png'), fig_path=None,fig_size=(args.fig_width, args.fig_height)) st.plot_branches_with_cells(adata,n_components=args.n_comp,comp1=args.comp1,comp2=args.comp2, save_fig=True,fig_name=(args.output_filename_prefix +'_branches_with_cells.png'),fig_path=None,fig_size=(args.fig_width, args.fig_height),fig_legend_ncol=args.fig_legend_ncol) st.write(adata,file_name=(args.output_filename_prefix + '_stream_result.pkl'),file_path='./',file_format='pkl') print('Finished computation.')
def __init__(self, fileName, cellLabel, cellLabelColor, rawCount = True): self.adata = st.read(file_name = fileName, file_format = 'mtx') st.add_cell_labels(self.adata, file_name = cellLabel) st.add_cell_colors(self.adata, file_name = cellLabelColor) self.adata.var_names_make_unique() self.adata.obs_names_make_unique() self.allCells = self.adata.obs.index.to_list() self.allGenes = self.adata.var.index.to_list() print('Raw input parsed...') print(self.adata) self.nCells = self.adata.n_obs self.nGenes = self.adata.n_vars self._keepCurrentRecords() st.remove_mt_genes(self.adata) if rawCount: st.normalize_per_cell(self.adata) st.log_transform(self.adata) self.backupDict = {} self.backupKey = 0 self.backup(0) print('Initial backup saved with key: 0') print('Restore with self.restoreFromBackup()')
import dash import dash_html_components as html import dash_core_components as dcc from dash.dependencies import Input, Output import dash_bootstrap_components as dbc import numpy as np import stream as st import matplotlib matplotlib.use('Agg') from app import app adata = st.read(file_name='./SampleData/SCoPE2_2020/stream_result_var.pkl', workdir='./stream_result') available_samples = [ 'Nestorowa, S. et al. 2016', 'Harrison, S. et al. 2021', 'Trapnell, C. et al. 2014', ' Tang, Q. et al. 2017' ] available_projections = [ 'dimension_reduction', 'visualization_2D', 'flat_tree', 'branches' ] available_colors = adata.obs.columns available_stream = ['single_cell_stream', 'stream'] layout = html.Div([ dbc.Container([ dbc.Col(html.H1("Visualization", className="text-center"), className="mb-5 mt-5"),
def main(): sns.set_style('white') sns.set_context('poster') parser = argparse.ArgumentParser( description='%s Parameters' % __tool_name__, formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("-m", "--matrix", dest="input_filename", default=None, help="input file name", metavar="FILE") parser.add_argument("-l", "--cell_labels", dest="cell_label_filename", default=None, help="filename of cell labels") parser.add_argument("-c", "--cell_labels_colors", dest="cell_label_color_filename", default=None, help="filename of cell label colors") parser.add_argument( "-s", "--select_features", dest="s_method", default='LOESS', help= "LOESS,PCA or all: Select variable genes using LOESS or principal components using PCA or all the genes are kept" ) parser.add_argument("--TG", "--detect_TG_genes", dest="flag_gene_TG_detection", action="store_true", help="detect transition genes automatically") parser.add_argument("--DE", "--detect_DE_genes", dest="flag_gene_DE_detection", action="store_true", help="detect DE genes automatically") parser.add_argument("--LG", "--detect_LG_genes", dest="flag_gene_LG_detection", action="store_true", help="detect leaf genes automatically") parser.add_argument( "-g", "--genes", dest="genes", default=None, help= "genes to visualize, it can either be filename which contains all the genes in one column or a set of gene names separated by comma" ) parser.add_argument( "-p", "--use_precomputed", dest="use_precomputed", action="store_true", help= "use precomputed data files without re-computing structure learning part" ) parser.add_argument("--new", dest="new_filename", default=None, help="file name of data to be mapped") parser.add_argument("--new_l", dest="new_label_filename", default=None, help="filename of new cell labels") parser.add_argument("--new_c", dest="new_label_color_filename", default=None, help="filename of new cell label colors") parser.add_argument("--log2", dest="flag_log2", action="store_true", help="perform log2 transformation") parser.add_argument("--norm", dest="flag_norm", action="store_true", help="normalize data based on library size") parser.add_argument("--atac", dest="flag_atac", action="store_true", help="indicate scATAC-seq data") parser.add_argument( "--n_jobs", dest="n_jobs", type=int, default=1, help="Specify the number of processes to use. (default, 1") parser.add_argument( "--loess_frac", dest="loess_frac", type=float, default=0.1, help="The fraction of the data used in LOESS regression") parser.add_argument( "--loess_cutoff", dest="loess_cutoff", type=int, default=95, help= "the percentile used in variable gene selection based on LOESS regression" ) parser.add_argument("--pca_first_PC", dest="flag_first_PC", action="store_true", help="keep first PC") parser.add_argument("--pca_n_PC", dest="pca_n_PC", type=int, default=15, help="The number of selected PCs,it's 15 by default") parser.add_argument( "--dr_method", dest="dr_method", default='se', help= "Method used for dimension reduction. Choose from {{'se','mlle','umap','pca'}}" ) parser.add_argument("--n_neighbors", dest="n_neighbors", type=float, default=50, help="The number of neighbor cells") parser.add_argument( "--nb_pct", dest="nb_pct", type=float, default=None, help= "The percentage of neighbor cells (when sepcified, it will overwrite n_neighbors)." ) parser.add_argument("--n_components", dest="n_components", type=int, default=3, help="Number of components to keep.") parser.add_argument( "--clustering", dest="clustering", default='kmeans', help= "Clustering method used for seeding the intial structure, choose from 'ap','kmeans','sc'" ) parser.add_argument("--damping", dest="damping", type=float, default=0.75, help="Affinity Propagation: damping factor") parser.add_argument( "--n_clusters", dest="n_clusters", type=int, default=10, help="Number of clusters for spectral clustering or kmeans") parser.add_argument("--EPG_n_nodes", dest="EPG_n_nodes", type=int, default=50, help=" Number of nodes for elastic principal graph") parser.add_argument( "--EPG_lambda", dest="EPG_lambda", type=float, default=0.02, help="lambda parameter used to compute the elastic energy") parser.add_argument("--EPG_mu", dest="EPG_mu", type=float, default=0.1, help="mu parameter used to compute the elastic energy") parser.add_argument( "--EPG_trimmingradius", dest="EPG_trimmingradius", type=float, default=np.inf, help="maximal distance of point from a node to affect its embedment") parser.add_argument( "--EPG_alpha", dest="EPG_alpha", type=float, default=0.02, help= "positive numeric, the value of the alpha parameter of the penalized elastic energy" ) parser.add_argument("--EPG_collapse", dest="flag_EPG_collapse", action="store_true", help="collapsing small branches") parser.add_argument( "--EPG_collapse_mode", dest="EPG_collapse_mode", default="PointNumber", help= "the mode used to collapse branches. PointNumber,PointNumber_Extrema, PointNumber_Leaves,EdgesNumber or EdgesLength" ) parser.add_argument( "--EPG_collapse_par", dest="EPG_collapse_par", type=float, default=5, help= "positive numeric, the cotrol paramter used for collapsing small branches" ) parser.add_argument("--disable_EPG_optimize", dest="flag_disable_EPG_optimize", action="store_true", help="disable optimizing branching") parser.add_argument("--EPG_shift", dest="flag_EPG_shift", action="store_true", help="shift branching point ") parser.add_argument( "--EPG_shift_mode", dest="EPG_shift_mode", default='NodeDensity', help= "the mode to use to shift the branching points NodePoints or NodeDensity" ) parser.add_argument( "--EPG_shift_DR", dest="EPG_shift_DR", type=float, default=0.05, help= "positive numeric, the radius to be used when computing point density if EPG_shift_mode is NodeDensity" ) parser.add_argument( "--EPG_shift_maxshift", dest="EPG_shift_maxshift", type=int, default=5, help= "positive integer, the maxium distance (as number of edges) to consider when exploring the branching point neighborhood" ) parser.add_argument("--disable_EPG_ext", dest="flag_disable_EPG_ext", action="store_true", help="disable extending leaves with additional nodes") parser.add_argument( "--EPG_ext_mode", dest="EPG_ext_mode", default='QuantDists', help= " the mode used to extend the graph,QuantDists, QuantCentroid or WeigthedCentroid" ) parser.add_argument( "--EPG_ext_par", dest="EPG_ext_par", type=float, default=0.5, help= "the control parameter used for contribution of the different data points when extending leaves with nodes" ) parser.add_argument("--DE_zscore_cutoff", dest="DE_zscore_cutoff", default=2, help="Differentially Expressed Genes z-score cutoff") parser.add_argument( "--DE_logfc_cutoff", dest="DE_logfc_cutoff", default=0.25, help="Differentially Expressed Genes log fold change cutoff") parser.add_argument("--TG_spearman_cutoff", dest="TG_spearman_cutoff", default=0.4, help="Transition Genes Spearman correlation cutoff") parser.add_argument("--TG_logfc_cutoff", dest="TG_logfc_cutoff", default=0.25, help="Transition Genes log fold change cutoff") parser.add_argument("--LG_zscore_cutoff", dest="LG_zscore_cutoff", default=1.5, help="Leaf Genes z-score cutoff") parser.add_argument("--LG_pvalue_cutoff", dest="LG_pvalue_cutoff", default=1e-2, help="Leaf Genes p value cutoff") parser.add_argument( "--umap", dest="flag_umap", action="store_true", help="whether to use UMAP for visualization (default: No)") parser.add_argument("-r", dest="root", default=None, help="root node for subwaymap_plot and stream_plot") parser.add_argument("--stream_log_view", dest="flag_stream_log_view", action="store_true", help="use log2 scale for y axis of stream_plot") parser.add_argument("-o", "--output_folder", dest="output_folder", default=None, help="Output folder") parser.add_argument("--for_web", dest="flag_web", action="store_true", help="Output files for website") parser.add_argument( "--n_genes", dest="n_genes", type=int, default=5, help= "Number of top genes selected from each output marker gene file for website gene visualization" ) args = parser.parse_args() if (args.input_filename is None) and (args.new_filename is None): parser.error("at least one of -m, --new required") new_filename = args.new_filename new_label_filename = args.new_label_filename new_label_color_filename = args.new_label_color_filename flag_stream_log_view = args.flag_stream_log_view flag_gene_TG_detection = args.flag_gene_TG_detection flag_gene_DE_detection = args.flag_gene_DE_detection flag_gene_LG_detection = args.flag_gene_LG_detection flag_web = args.flag_web flag_first_PC = args.flag_first_PC flag_umap = args.flag_umap genes = args.genes DE_zscore_cutoff = args.DE_zscore_cutoff DE_logfc_cutoff = args.DE_logfc_cutoff TG_spearman_cutoff = args.TG_spearman_cutoff TG_logfc_cutoff = args.TG_logfc_cutoff LG_zscore_cutoff = args.LG_zscore_cutoff LG_pvalue_cutoff = args.LG_pvalue_cutoff root = args.root input_filename = args.input_filename cell_label_filename = args.cell_label_filename cell_label_color_filename = args.cell_label_color_filename s_method = args.s_method use_precomputed = args.use_precomputed n_jobs = args.n_jobs loess_frac = args.loess_frac loess_cutoff = args.loess_cutoff pca_n_PC = args.pca_n_PC flag_log2 = args.flag_log2 flag_norm = args.flag_norm flag_atac = args.flag_atac dr_method = args.dr_method nb_pct = args.nb_pct # neighbour percent n_neighbors = args.n_neighbors n_components = args.n_components #number of components to keep clustering = args.clustering damping = args.damping n_clusters = args.n_clusters EPG_n_nodes = args.EPG_n_nodes EPG_lambda = args.EPG_lambda EPG_mu = args.EPG_mu EPG_trimmingradius = args.EPG_trimmingradius EPG_alpha = args.EPG_alpha flag_EPG_collapse = args.flag_EPG_collapse EPG_collapse_mode = args.EPG_collapse_mode EPG_collapse_par = args.EPG_collapse_par flag_EPG_shift = args.flag_EPG_shift EPG_shift_mode = args.EPG_shift_mode EPG_shift_DR = args.EPG_shift_DR EPG_shift_maxshift = args.EPG_shift_maxshift flag_disable_EPG_optimize = args.flag_disable_EPG_optimize flag_disable_EPG_ext = args.flag_disable_EPG_ext EPG_ext_mode = args.EPG_ext_mode EPG_ext_par = args.EPG_ext_par output_folder = args.output_folder #work directory n_genes = args.n_genes if (flag_web): flag_savefig = False else: flag_savefig = True gene_list = [] if (genes != None): if (os.path.exists(genes)): gene_list = pd.read_csv(genes, sep='\t', header=None, index_col=None, compression='gzip' if genes.split('.')[-1] == 'gz' else None).iloc[:, 0].tolist() gene_list = list(set(gene_list)) else: gene_list = genes.split(',') print('Genes to visualize: ') print(gene_list) if (new_filename is None): if (output_folder == None): workdir = os.path.join(os.getcwd(), 'stream_result') else: workdir = output_folder if (use_precomputed): print('Importing the precomputed pkl file...') adata = st.read(file_name='stream_result.pkl', file_format='pkl', file_path=workdir, workdir=workdir) else: if (flag_atac): print('Reading in atac zscore matrix...') adata = st.read(file_name=input_filename, workdir=workdir, experiment='atac-seq') else: adata = st.read(file_name=input_filename, workdir=workdir) print('Input: ' + str(adata.obs.shape[0]) + ' cells, ' + str(adata.var.shape[0]) + ' genes') adata.var_names_make_unique() adata.obs_names_make_unique() if (cell_label_filename != None): st.add_cell_labels(adata, file_name=cell_label_filename) else: st.add_cell_labels(adata) if (cell_label_color_filename != None): st.add_cell_colors(adata, file_name=cell_label_color_filename) else: st.add_cell_colors(adata) if (flag_atac): print('Selecting top principal components...') st.select_top_principal_components(adata, n_pc=pca_n_PC, first_pc=flag_first_PC, save_fig=True) st.dimension_reduction(adata, method=dr_method, n_components=n_components, n_neighbors=n_neighbors, nb_pct=nb_pct, n_jobs=n_jobs, feature='top_pcs') else: if (flag_norm): st.normalize_per_cell(adata) if (flag_log2): st.log_transform(adata) if (s_method != 'all'): print('Filtering genes...') st.filter_genes(adata, min_num_cells=5) print('Removing mitochondrial genes...') st.remove_mt_genes(adata) if (s_method == 'LOESS'): print('Selecting most variable genes...') st.select_variable_genes(adata, loess_frac=loess_frac, percentile=loess_cutoff, save_fig=True) pd.DataFrame(adata.uns['var_genes']).to_csv( os.path.join(workdir, 'selected_variable_genes.tsv'), sep='\t', index=None, header=False) st.dimension_reduction(adata, method=dr_method, n_components=n_components, n_neighbors=n_neighbors, nb_pct=nb_pct, n_jobs=n_jobs, feature='var_genes') if (s_method == 'PCA'): print('Selecting top principal components...') st.select_top_principal_components( adata, n_pc=pca_n_PC, first_pc=flag_first_PC, save_fig=True) st.dimension_reduction(adata, method=dr_method, n_components=n_components, n_neighbors=n_neighbors, nb_pct=nb_pct, n_jobs=n_jobs, feature='top_pcs') else: print('Keep all the genes...') st.dimension_reduction(adata, n_components=n_components, n_neighbors=n_neighbors, nb_pct=nb_pct, n_jobs=n_jobs, feature='all') st.plot_dimension_reduction(adata, save_fig=flag_savefig) st.seed_elastic_principal_graph(adata, clustering=clustering, damping=damping, n_clusters=n_clusters) st.plot_branches( adata, save_fig=flag_savefig, fig_name='seed_elastic_principal_graph_skeleton.pdf') st.plot_branches_with_cells( adata, save_fig=flag_savefig, fig_name='seed_elastic_principal_graph.pdf') st.elastic_principal_graph(adata, epg_n_nodes=EPG_n_nodes, epg_lambda=EPG_lambda, epg_mu=EPG_mu, epg_trimmingradius=EPG_trimmingradius, epg_alpha=EPG_alpha) st.plot_branches(adata, save_fig=flag_savefig, fig_name='elastic_principal_graph_skeleton.pdf') st.plot_branches_with_cells(adata, save_fig=flag_savefig, fig_name='elastic_principal_graph.pdf') if (not flag_disable_EPG_optimize): st.optimize_branching(adata, epg_trimmingradius=EPG_trimmingradius) st.plot_branches( adata, save_fig=flag_savefig, fig_name='optimizing_elastic_principal_graph_skeleton.pdf') st.plot_branches_with_cells( adata, save_fig=flag_savefig, fig_name='optimizing_elastic_principal_graph.pdf') if (flag_EPG_shift): st.shift_branching(adata, epg_shift_mode=EPG_shift_mode, epg_shift_radius=EPG_shift_DR, epg_shift_max=EPG_shift_maxshift, epg_trimmingradius=EPG_trimmingradius) st.plot_branches( adata, save_fig=flag_savefig, fig_name='shifting_elastic_principal_graph_skeleton.pdf') st.plot_branches_with_cells( adata, save_fig=flag_savefig, fig_name='shifting_elastic_principal_graph.pdf') if (flag_EPG_collapse): st.prune_elastic_principal_graph( adata, epg_collapse_mode=EPG_collapse_mode, epg_collapse_par=EPG_collapse_par, epg_trimmingradius=EPG_trimmingradius) st.plot_branches( adata, save_fig=flag_savefig, fig_name='pruning_elastic_principal_graph_skeleton.pdf') st.plot_branches_with_cells( adata, save_fig=flag_savefig, fig_name='pruning_elastic_principal_graph.pdf') if (not flag_disable_EPG_ext): st.extend_elastic_principal_graph( adata, epg_ext_mode=EPG_ext_mode, epg_ext_par=EPG_ext_par, epg_trimmingradius=EPG_trimmingradius) st.plot_branches( adata, save_fig=flag_savefig, fig_name='extending_elastic_principal_graph_skeleton.pdf') st.plot_branches_with_cells( adata, save_fig=flag_savefig, fig_name='extending_elastic_principal_graph.pdf') st.plot_branches( adata, save_fig=flag_savefig, fig_name='finalized_elastic_principal_graph_skeleton.pdf') st.plot_branches_with_cells( adata, save_fig=flag_savefig, fig_name='finalized_elastic_principal_graph.pdf') st.plot_flat_tree(adata, save_fig=flag_savefig) if (flag_umap): print('UMAP visualization based on top MLLE components...') st.plot_visualization_2D(adata, save_fig=flag_savefig, fig_name='umap_cells') st.plot_visualization_2D(adata, color_by='branch', save_fig=flag_savefig, fig_name='umap_branches') if (root is None): print('Visualization of subwaymap and stream plots...') flat_tree = adata.uns['flat_tree'] list_node_start = [ value for key, value in nx.get_node_attributes( flat_tree, 'label').items() ] for ns in list_node_start: if (flag_web): st.subwaymap_plot(adata, percentile_dist=100, root=ns, save_fig=flag_savefig) st.stream_plot(adata, root=ns, fig_size=(8, 8), save_fig=True, flag_log_view=flag_stream_log_view, fig_legend=False, fig_name='stream_plot.png') else: st.subwaymap_plot(adata, percentile_dist=100, root=ns, save_fig=flag_savefig) st.stream_plot(adata, root=ns, fig_size=(8, 8), save_fig=flag_savefig, flag_log_view=flag_stream_log_view) else: st.subwaymap_plot(adata, percentile_dist=100, root=root, save_fig=flag_savefig) st.stream_plot(adata, root=root, fig_size=(8, 8), save_fig=flag_savefig, flag_log_view=flag_stream_log_view) output_cell_info(adata) if (flag_web): output_for_website(adata) st.write(adata) if (flag_gene_TG_detection): print('Identifying transition genes...') st.detect_transistion_genes(adata, cutoff_spearman=TG_spearman_cutoff, cutoff_logfc=TG_logfc_cutoff, n_jobs=n_jobs) if (flag_web): ## Plot top5 genes flat_tree = adata.uns['flat_tree'] list_node_start = [ value for key, value in nx.get_node_attributes( flat_tree, 'label').items() ] gene_list = [] for x in adata.uns['transition_genes'].keys(): gene_list = gene_list + adata.uns['transition_genes'][ x].index[:n_genes].tolist() gene_list = np.unique(gene_list) for ns in list_node_start: output_for_website_subwaymap_gene(adata, gene_list) st.stream_plot_gene(adata, root=ns, fig_size=(8, 8), genes=gene_list, save_fig=True, flag_log_view=flag_stream_log_view, fig_format='png') else: st.plot_transition_genes(adata, save_fig=flag_savefig) if (flag_gene_DE_detection): print('Identifying differentially expressed genes...') st.detect_de_genes(adata, cutoff_zscore=DE_logfc_cutoff, cutoff_logfc=DE_logfc_cutoff, n_jobs=n_jobs) if (flag_web): flat_tree = adata.uns['flat_tree'] list_node_start = [ value for key, value in nx.get_node_attributes( flat_tree, 'label').items() ] gene_list = [] for x in adata.uns['de_genes_greater'].keys(): gene_list = gene_list + adata.uns['de_genes_greater'][ x].index[:n_genes].tolist() for x in adata.uns['de_genes_less'].keys(): gene_list = gene_list + adata.uns['de_genes_less'][ x].index[:n_genes].tolist() gene_list = np.unique(gene_list) for ns in list_node_start: output_for_website_subwaymap_gene(adata, gene_list) st.stream_plot_gene(adata, root=ns, fig_size=(8, 8), genes=gene_list, save_fig=True, flag_log_view=flag_stream_log_view, fig_format='png') else: st.plot_de_genes(adata, save_fig=flag_savefig) if (flag_gene_LG_detection): print('Identifying leaf genes...') st.detect_leaf_genes(adata, cutoff_zscore=LG_zscore_cutoff, cutoff_pvalue=LG_pvalue_cutoff, n_jobs=n_jobs) if (flag_web): ## Plot top5 genes flat_tree = adata.uns['flat_tree'] list_node_start = [ value for key, value in nx.get_node_attributes( flat_tree, 'label').items() ] gene_list = [] for x in adata.uns['leaf_genes'].keys(): gene_list = gene_list + adata.uns['leaf_genes'][ x].index[:n_genes].tolist() gene_list = np.unique(gene_list) for ns in list_node_start: output_for_website_subwaymap_gene(adata, gene_list) st.stream_plot_gene(adata, root=ns, fig_size=(8, 8), genes=gene_list, save_fig=True, flag_log_view=flag_stream_log_view, fig_format='png') if ((genes != None) and (len(gene_list) > 0)): print('Visualizing genes...') flat_tree = adata.uns['flat_tree'] list_node_start = [ value for key, value in nx.get_node_attributes( flat_tree, 'label').items() ] if (root is None): for ns in list_node_start: if (flag_web): output_for_website_subwaymap_gene(adata, gene_list) st.stream_plot_gene(adata, root=ns, fig_size=(8, 8), genes=gene_list, save_fig=True, flag_log_view=flag_stream_log_view, fig_format='png') else: st.subwaymap_plot_gene(adata, percentile_dist=100, root=ns, genes=gene_list, save_fig=flag_savefig) st.stream_plot_gene(adata, root=ns, fig_size=(8, 8), genes=gene_list, save_fig=flag_savefig, flag_log_view=flag_stream_log_view) else: if (flag_web): output_for_website_subwaymap_gene(adata, gene_list) st.stream_plot_gene(adata, root=root, fig_size=(8, 8), genes=gene_list, save_fig=True, flag_log_view=flag_stream_log_view, fig_format='png') else: st.subwaymap_plot_gene(adata, percentile_dist=100, root=root, genes=gene_list, save_fig=flag_savefig) st.stream_plot_gene(adata, root=root, fig_size=(8, 8), genes=gene_list, save_fig=flag_savefig, flag_log_view=flag_stream_log_view) else: print('Starting mapping procedure...') if (output_folder == None): workdir_ref = os.path.join(os.getcwd(), 'stream_result') else: workdir_ref = output_folder adata = st.read(file_name='stream_result.pkl', file_format='pkl', file_path=workdir_ref, workdir=workdir_ref) workdir = os.path.join(workdir_ref, os.pardir, 'mapping_result') adata_new = st.read(file_name=new_filename, workdir=workdir) st.add_cell_labels(adata_new, file_name=new_label_filename) st.add_cell_colors(adata_new, file_name=new_label_color_filename) if (s_method == 'LOESS'): st.map_new_data(adata, adata_new, feature='var_genes') if (s_method == 'all'): st.map_new_data(adata, adata_new, feature='all') if (flag_umap): st.plot_visualization_2D(adata, adata_new=adata_new, use_precomputed=False, save_fig=flag_savefig, fig_name='umap_new_cells') st.plot_visualization_2D(adata, adata_new=adata_new, show_all_colors=True, save_fig=flag_savefig, fig_name='umap_all_cells') st.plot_visualization_2D(adata, adata_new=adata_new, color_by='branch', save_fig=flag_savefig, fig_name='umap_branches') if (root is None): flat_tree = adata.uns['flat_tree'] list_node_start = [ value for key, value in nx.get_node_attributes( flat_tree, 'label').items() ] for ns in list_node_start: st.subwaymap_plot(adata, adata_new=adata_new, percentile_dist=100, show_all_cells=False, root=ns, save_fig=flag_savefig) st.stream_plot(adata, adata_new=adata_new, show_all_colors=False, root=ns, fig_size=(8, 8), save_fig=flag_savefig, flag_log_view=flag_stream_log_view) else: st.subwaymap_plot(adata, adata_new=adata_new, percentile_dist=100, show_all_cells=False, root=root, save_fig=flag_savefig) st.stream_plot(adata, adata_new=adata_new, show_all_colors=False, root=root, fig_size=(8, 8), save_fig=flag_savefig, flag_log_view=flag_stream_log_view) if ((genes != None) and (len(gene_list) > 0)): if (root is None): for ns in list_node_start: st.subwaymap_plot_gene(adata, adata_new=adata_new, percentile_dist=100, root=ns, save_fig=flag_savefig, flag_log_view=flag_stream_log_view) else: st.subwaymap_plot_gene(adata, adata_new=adata_new, percentile_dist=100, root=root, save_fig=flag_savefig, flag_log_view=flag_stream_log_view) st.write(adata_new, file_name='stream_mapping_result.pkl') print('Finished computation.')
# -*- coding: utf-8 -*- import dash import dash_html_components as html import dash_core_components as dcc from dash.dependencies import Input, Output import dash_bootstrap_components as dbc import numpy as np import stream as st import matplotlib matplotlib.use('Agg') from app import app adata = st.read(file_name='./SampleData/Nestorowa_2016/Nestorowa-2016.pkl', workdir='./stream_result') adata.uns[ 'discription'] = 'This scRNA-seq dataset contains 1656 cells and 4768 genes from mouse hematopoietic stem and progenitor cell differentiation. A single-cell resolution map of mouse hematopoietic stem and progenitor cell differentiation. Blood 128, e20-31 (2016).' fig_qc = st.plot_qc(adata, jitter=0.2, fig_size=(2, 2), return_svg=True) available_samples = [ 'Nestorowa, S. et al. 2016', 'Harrison, S. et al. 2021', 'Trapnell, C. et al. 2014', ' Tang, Q. et al. 2017' ] available_projections = [ 'dimension_reduction', 'visualization_2D', 'flat_tree', 'branches' ] available_colors = adata.obs.columns available_stream = ['single_cell_stream', 'stream'] layout = html.Div([
# Infer trajectories # read in parameters definition = open('./definition.yml', 'r') task = yaml.safe_load(definition) p = dict() for x in task["parameters"]: p[x['id']] = x['default'] pd.DataFrame(counts.toarray(), index=cell_ids, columns=gene_ids).T.to_csv(output_folder + "counts.tsv", sep='\t') checkpoints["method_afterpreproc"] = time.time() adata = st.read(file_name=output_folder + "counts.tsv") st.add_cell_labels(adata) st.add_cell_colors(adata) if (p["norm"]): st.normalize_per_cell(adata) if (p["log2"]): st.log_transform(adata) st.filter_genes(adata, min_num_cells=max(5, int(round(adata.shape[0] * 0.001))), min_pct_cells=None, expr_cutoff=1) if (adata.shape[1] < 1000): adata.uns['var_genes'] = gene_ids adata.obsm['var_genes'] = adata.X
import argparse import os import stream as st import scanpy as sc import numpy as np parser = argparse.ArgumentParser() parser.add_argument("-o", "--outdir", type=str, default=None) args = parser.parse_args() adata = st.read("ouput/aging-xxx/subset/adata.h5ad", file_format="h5ad", workdir=os.path.join(args.outdir, "stream_result")) adata.obs["label"] = adata.obs["louvain"] label_color = { "0": "#FF0000", "1": "#836FFF", "2": "#0000FF", "3": "#C6E2FF", "4": "#548B54", "5": "#00FF00", "6": "#FFF68F", "7": "#8B864E", "8": "#FFFF00", "9": "#FFD700", "10": "#8B658B", "11": "#FF6A6A", "12": "#FFD39B", "13": "#EE2C2C",
def main(): parser = argparse.ArgumentParser( description='%s Parameters' % __tool_name__, formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("-f", "--filename", dest="filename", default=None, required=True, help="Analysis result file name", metavar="FILE") parser.add_argument( "-t", "--toolname", dest="toolname", default=None, required=True, type=str.lower, choices=['scanpy', 'paga', 'seurat', 'stream', 'velocity'], help="Tool used to generate the analysis result.") parser.add_argument( "-a", "--annotations", dest="annotations", default=None, required=True, help= "Annotation file name. It contains the cell annotation key(s) to visualize in one column." ) parser.add_argument( "-g", "--genes", dest="genes", default=None, help= "Gene list file name. It contains the genes to visualize in one column." ) parser.add_argument("-o", "--output", dest="output", default='vr_report', help="Output folder name") parser.add_argument( "--layer", dest="layer", default='norm_data', help="The name of layer in Anndata object for gene expression") args = parser.parse_args() filename = args.filename toolname = args.toolname genes = args.genes output = args.output #work directory annotations = args.annotations layer = args.layer if (annotations is None): raise Exception( "Annotation file must be specified when %s is chosen." % (toolname)) if toolname != 'velocity': try: ann_list = pd.read_csv(annotations, sep='\t', header=None, index_col=None).iloc[:, 0].tolist() except FileNotFoundError as fnf_error: print(fnf_error) raise except: print('Failed to load in annotation file.') raise else: ann_list = list(set(ann_list)) if (genes is not None): try: gene_list = pd.read_csv(genes, sep='\t', header=None, index_col=None).iloc[:, 0].tolist() except FileNotFoundError as fnf_error: print(fnf_error) raise except: print('Failed to load in gene list.') raise else: gene_list = list(set(gene_list)) else: gene_list = None print("Converting '%s' analysis result ..." % toolname) if (toolname in ['scanpy', 'paga', 'seurat']): if (toolname == 'scanpy'): assert (filename.lower().endswith( ('.h5ad'))), "For PAGA only .h5ad file is supported." print('reading in h5ad file ...') adata = ad.read_h5ad(filename) scvr.output_scanpy_cells(adata, ann_list, gene_list=gene_list, reportdir=output) if (toolname == 'paga'): assert (filename.lower().endswith( ('.h5ad'))), "For PAGA only .h5ad file is supported." print('reading in h5ad file ...') adata = ad.read_h5ad(filename) scvr.output_paga_graph(adata, reportdir=output) scvr.output_paga_cells(adata, ann_list, gene_list=gene_list, reportdir=output) if (toolname == 'seurat'): assert (filename.lower().endswith( ('.loom'))) or (filename.lower().endswith( ('.h5ad' ))), "For Seurat only .loom .h5ad file is supported." print('reading in loom file ...') if filename.lower().endswith(('.loom')): adata = ad.read_loom(filename) else: adata = ad.read(filename) scvr.output_seurat_cells(adata, ann_list, gene_list=gene_list, reportdir=output) with open(os.path.join(output, 'index.json'), 'w') as f: json.dump({"tool": toolname}, f) shutil.make_archive(base_name=output, format='zip', root_dir=output) shutil.rmtree(output) if toolname == 'velocity': assert (filename.lower().endswith('.h5ad') or filename.lower().endswith('.loom') ), 'Velocity supports .h5ad or .loom.' adata = scv.read(filename) scvr.output_velocity_cells(adata, ann_field=annotations, gene_list=gene_list, reportdir=output) if (toolname == 'stream'): try: import stream as st except ImportError: raise ImportError( 'Please install STREAM >=0.5: `conda install -c bioconda stream`.' ) assert (filename.lower().endswith( ('.pkl'))), "For STREAM only .pkl file is supported." print('reading in pkl file ...') adata = st.read(filename, file_format='pkl', workdir='./') st.save_vr_report(adata, ann_list=ann_list, gene_list=gene_list, file_name=output)
def get_features(): """ scanpy examples: http://127.0.0.1:8000/features?db_name=1_scanpy_10xpbmc&feature=louvain http://127.0.0.1:8000/features?db_name=1_scanpy_10xpbmc&feature=expression&gene=SUMO3 seurat examples: http://127.0.0.1:8000/features?db_name=4_seurat_10xpbmc&feature=expression&gene=SUMO3 http://127.0.0.1:8000/features?db_name=4_seurat_10xpbmc&feature=expression&gene=SUMO3 velocity examples: http://127.0.0.1:8000/features?db_name=3_velocity_pancrease&feature=clusters http://127.0.0.1:8000/features?db_name=3_velocity_pancrease&feature=expression&gene=Rbbp7 http://127.0.0.1:8000/features?db_name=3_velocity_pancrease&feature=velocity&embed=umap&time=None http://127.0.0.1:8000/features?db_name=3_velocity_pancrease&feature=velocity&embed=umap&time=1 http://127.0.0.1:8000/features?db_name=3_velocity_pancrease&feature=velocity&embed=umap&time=10 velocity grid examples: http://127.0.0.1:8000/features?db_name=3_velocity_pancrease&feature=velocity_grid&embed=umap&time=10 http://127.0.0.1:8000/features?db_name=3_velocity_pancrease&feature=velocity_grid&embed=umap&time=100 """ database = request.args.get("db_name") feature = request.args.get("feature") filename = glob(os.path.join(DATASET_DIRECTORY, f"{database}.*"))[0] db_type = get_dataset_type_adata(filename) if feature.lower() == "velocity": embed = request.args.get("embed") try: del adata except: pass if get_dataset_type_adata(database).lower() in [ "scanpy", "velocity", "seurat", "paga" ]: adata = sc.read(filename) else: adata = st.read(filename, file_format="pkl", workdir="./") list_metadata = [] if feature in get_available_annotations_adata(adata): # cluster columns if f"{feature}_colors" in adata.uns.keys(): dict_colors = { feature: dict( zip(adata.obs[feature].cat.categories, adata.uns[f"{feature}_colors"])) } else: dict_colors = { feature: dict( zip(adata.obs[feature], converters.get_colors(adata, feature))) } for i in range(adata.shape[0]): dict_metadata = dict() dict_metadata["cell_id"] = adata.obs_names[i] dict_metadata["label"] = adata.obs[feature].tolist()[i] dict_metadata["clusters"] = adata.obs[feature].tolist()[i] dict_metadata["clusters_color"] = dict_colors[feature][ dict_metadata["clusters"]] list_metadata.append(dict_metadata) elif feature in ["expression", "rna"]: # pseudotime or latent_time columns gene = request.args.get("gene") if gene not in adata.var_names: return jsonify({}) else: if "time" in feature: values = adata.obs[feature] else: if db_type == "seurat": values = (adata[:, gene].layers["norm_data"].toarray()[:, 0] if isspmatrix(adata.layers["norm_data"]) else adata[:, gene].layers["norm_data"][:, 0]) else: values = (adata[:, gene].X.toarray()[:, 0] if isspmatrix( adata.X) else adata[:, gene].X[:, 0]) cm = mpl.cm.get_cmap("viridis", 512) norm = mpl.colors.Normalize(vmin=0, vmax=max(values), clip=True) list_metadata = [] for i, x in enumerate(adata.obs_names): dict_genes = dict() dict_genes["cell_id"] = x dict_genes["color"] = mpl.colors.to_hex(cm(norm(values[i]))) list_metadata.append(dict_genes) elif feature == "velocity": list_metadata = [] time = request.args.get("time") for i in range(adata.shape[0]): dict_coord_cells = dict() if isinstance(adata.obs_names[i], bytes): dict_coord_cells["cell_id"] = adata.obs_names[i].decode( "utf-8") else: dict_coord_cells["cell_id"] = adata.obs_names[i] dict_coord_cells["x"] = str(adata.obsm[f"X_{embed}"][i, 0]) dict_coord_cells["y"] = str(adata.obsm[f"X_{embed}"][i, 1]) dict_coord_cells["z"] = str(adata.obsm[f"X_{embed}"][i, 2]) if time == "None": dict_coord_cells["x1"] = str( adata.obsm[f"velocity_{embed}"][i, 0]) dict_coord_cells["y1"] = str( adata.obsm[f"velocity_{embed}"][i, 1]) dict_coord_cells["z1"] = str( adata.obsm[f"velocity_{embed}"][i, 2]) elif time in list(map(str, [0.01, 0.1, 1, 5, 10, 20, 30, 50, 100])): dict_coord_cells["x1"] = str( adata.obsm[f"absolute_velocity_{embed}_{time}s"][i, 0]) dict_coord_cells["y1"] = str( adata.obsm[f"absolute_velocity_{embed}_{time}s"][i, 1]) dict_coord_cells["z1"] = str( adata.obsm[f"absolute_velocity_{embed}_{time}s"][i, 2]) else: return jsonify({}) list_metadata.append(dict_coord_cells) elif feature == "velocity_grid": list_metadata = [] time = request.args.get("time") p_mass = adata.uns['p_mass'] for i in np.where(p_mass >= 1)[0]: dict_coord_cells = dict() if time == "None": dict_coord_cells["x"] = str(adata.uns[f"X_grid"][i, 0]) dict_coord_cells["y"] = str(adata.uns[f"X_grid"][i, 1]) dict_coord_cells["z"] = str(adata.uns[f"X_grid"][i, 2]) dict_coord_cells["x1"] = str(adata.uns[f"V_grid"][i, 0]) dict_coord_cells["y1"] = str(adata.uns[f"V_grid"][i, 1]) dict_coord_cells["z1"] = str(adata.uns[f"V_grid"][i, 2]) elif time in list(map(str, [0.01, 0.1, 1, 5, 10, 20, 50, 80, 100])): dict_coord_cells["x"] = str(adata.uns[f"X_grid_{time}"][i, 0]) dict_coord_cells["y"] = str(adata.uns[f"X_grid_{time}"][i, 1]) dict_coord_cells["z"] = str(adata.uns[f"X_grid_{time}"][i, 2]) dict_coord_cells["x1"] = str(adata.uns[f"V_grid_{time}"][i, 0]) dict_coord_cells["y1"] = str(adata.uns[f"V_grid_{time}"][i, 1]) dict_coord_cells["z1"] = str(adata.uns[f"V_grid_{time}"][i, 2]) else: return jsonify({}) list_metadata.append(dict_coord_cells) elif feature == "paga": G = nx.from_numpy_matrix(adata.uns["paga"]["connectivities"].toarray()) adata.uns["paga"]["pos"] = get_paga3d_pos(adata) ## output coordinates of paga graph list_lines = [] for edge_i in G.edges(): dict_coord_lines = dict() dict_coord_lines["branch_id"] = [[str(edge_i[0]), str(edge_i[1])]] dict_coord_lines["xyz"] = [{ "x": pos[0], "y": pos[1], "z": pos[2] } for pos in adata.uns["paga"]["pos"][[edge_i[0], edge_i[1]], :]] list_lines.append(dict_coord_lines) ## output topology of paga graph dict_nodes = dict() list_edges = [] dict_nodename = { i: adata.obs[adata.uns["paga"]["groups"]].cat.categories[i] for i in G.nodes() } for node_i in G.nodes(): dict_nodes_i = dict() dict_nodes_i["node_name"] = dict_nodename[node_i] dict_nodes_i["xyz"] = { "x": adata.uns["paga"]["pos"][:, 0][node_i], "y": adata.uns["paga"]["pos"][:, 1][node_i], "z": adata.uns["paga"]["pos"][:, 2][node_i], } dict_nodes[node_i] = dict_nodes_i for edge_i in G.edges(): dict_edges = dict() dict_edges["nodes"] = [str(edge_i[0]), str(edge_i[1])] dict_edges["weight"] = adata.uns["paga"]["connectivities"][ edge_i[0], edge_i[1]] list_edges.append(dict_edges) list_metadata = {"nodes": dict_nodes, "edges": list_edges} elif feature == "curves": flat_tree = adata.uns['flat_tree'] epg = adata.uns['epg'] epg_node_pos = nx.get_node_attributes(epg, 'pos') ft_node_label = nx.get_node_attributes(flat_tree, 'label') ft_node_pos = nx.get_node_attributes(flat_tree, 'pos') list_curves = [] for edge_i in flat_tree.edges(): branch_i_pos = np.array( [epg_node_pos[i] for i in flat_tree.edges[edge_i]['nodes']]) df_coord_curve_i = pd.DataFrame(branch_i_pos) dict_coord_curves = dict() dict_coord_curves['branch_id'] = ft_node_label[ edge_i[0]] + '_' + ft_node_label[edge_i[1]] dict_coord_curves['xyz'] = [{ 'x': df_coord_curve_i.iloc[j, 0], 'y': df_coord_curve_i.iloc[j, 1], 'z': df_coord_curve_i.iloc[j, 2] } for j in range(df_coord_curve_i.shape[0])] list_curves.append(dict_coord_curves) list_metadata = list_curves del adata gc.collect() return jsonify({feature: list_metadata})
def main(): parser = argparse.ArgumentParser( description='%s Parameters' % __tool_name__, formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("-f", "--filename", dest="filename", default=None, required=True, help="Analysis result file name", metavar="FILE") parser.add_argument("-t", "--toolname", dest="toolname", default=None, required=True, type=str.lower, choices=['paga', 'seurat', 'stream'], help="Tool used to generate the analysis result.") parser.add_argument( "-a", "--annotations", dest="annotations", default=None, help= "Annotation file name. It contains the cell annotation(s) used to color cells" ) parser.add_argument( "-g", "--genes", dest="genes", default=None, help= "Gene list file name. It contains the genes to visualize in one column" ) parser.add_argument("-o", "--output", dest="output", default='vr_report', help="Output folder name") args = parser.parse_args() filename = args.filename toolname = args.toolname genes = args.genes output = args.output #work directory annotations = args.annotations if (toolname in ['paga', 'seurat']): if (annotations is None): raise Exception( "Annotation file must be specified when %s is chosen." % (toolname)) try: ann_list = pd.read_csv(annotations, sep='\t', header=None, index_col=None).iloc[:, 0].tolist() except FileNotFoundError as fnf_error: print(fnf_error) raise except: print('Failed to load in annotation file.') raise else: ann_list = list(set(ann_list)) if (genes is not None): try: gene_list = pd.read_csv(genes, sep='\t', header=None, index_col=None).iloc[:, 0].tolist() except FileNotFoundError as fnf_error: print(fnf_error) raise except: print('Failed to load in gene list.') raise else: gene_list = list(set(gene_list)) else: gene_list = None if (toolname == 'paga'): assert (filename.lower().endswith( ('.h5ad'))), "For PAGA only .h5ad file is supported." print('reading in h5ad file ...') adata = ad.read_h5ad(filename) adata.uns['paga']['pos'] = scvr_prep.get_paga3d_pos(adata) scvr_prep.output_paga_graph(adata, reportdir=output) scvr_prep.output_paga_cells(adata, ann_list, genes=gene_list, reportdir=output) shutil.make_archive(base_name=output, format='zip', root_dir=output) shutil.rmtree(output) if (toolname == 'seurat'): assert (filename.lower().endswith( ('.h5ad'))), "For Seurat only .loom file is supported." print('reading in loom file ...') adata = ad.read_loom(filename) scvr_prep.output_seurat_cells(adata, ann_list, genes=gene_list, reportdir=output) shutil.make_archive(base_name=output, format='zip', root_dir=output) shutil.rmtree(output) if (toolname == 'stream'): try: import stream as st except ImportError: raise ImportError( 'Please install STREAM >=0.4.2: `conda install -c bioconda stream`.' ) assert (filename.lower().endswith( ('.h5ad'))), "For STREAM only .pkl file is supported." print('reading in pkl file ...') adata = st.read(filename, file_format='pkl', workdir='./') st.save_vr_report(adata, genes=gene_list, file_name=output)
def main(): sns.set_style('white') sns.set_context('poster') parser = argparse.ArgumentParser( description='%s Parameters' % __tool_name__, formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument( "-m", "--data-file", dest="input_filename", default=None, help="input file name, pkl format from Stream preprocessing module", metavar="FILE") parser.add_argument("-of", "--of", dest="output_filename_prefix", default="StreamiFSOutput", help="output file name prefix") parser.add_argument("-nb_pct", "--percent_neighbor_cells", dest="nb_pct", type=float, default=None, help="") parser.add_argument("-n_comp_k", dest="n_comp_k", type=int, default=None, help="") parser.add_argument("-feat", dest="feature", default=None, help="feature") parser.add_argument("-method", dest="method", default=None, help="") parser.add_argument("-nc_plot", dest="nc_plot", type=int, default=None, help="") parser.add_argument("-comp1", dest="comp1", default=None, help="feature") parser.add_argument("-comp2", dest="comp2", type=int, default=None, help="") parser.add_argument("-fig_width", dest="fig_width", type=int, default=8, help="") parser.add_argument("-fig_height", dest="fig_height", type=int, default=8, help="") parser.add_argument("-n_jobs", dest="n_jobs", type=int, default=2, help="") parser.add_argument("-fig_legend_ncol", dest="fig_legend_ncol", type=int, default=None, help="") args = parser.parse_args() print(args) print('Starting dimension reduction procedure...') workdir = "./" adata = st.read(file_name=args.input_filename, file_format='pkl', experiment='rna-seq', workdir=workdir) print("Feature ", args.feature, type(args.feature)) st.dimension_reduction(adata, method=args.method, feature='var_genes', nb_pct=args.nb_pct, n_components=args.n_comp_k, n_jobs=args.n_jobs, eigen_solver=None) fig_size = (args.fig_width, args.fig_height) st.plot_dimension_reduction(adata, n_components=args.nc_plot, comp1=args.comp1, comp2=args.comp2, save_fig=True, fig_name=(args.output_filename_prefix + '_stddev_dotplot.png'), fig_path="./", fig_size=fig_size, fig_legend_ncol=args.fig_legend_ncol) st.write(adata, file_name=(args.output_filename_prefix + '_stream_result.pkl'), file_path='./', file_format='pkl') print('Output: ' + str(adata.obs.shape[0]) + ' cells, ' + str(adata.var.shape[0]) + ' genes') print('Finished computation.')
def get_coordinates(): """ http://127.0.0.1:8000/coordinates?db_name=1_scanpy_10xpbmc&embed=umap http://127.0.0.1:8000/coordinates?db_name=3_velocity_pancrease&embed=umap http://127.0.0.1:8000/coordinates?db_name=4_seurat_10xpbmc&embed=umap http://127.0.0.1:8000/coordinates?db_name=5_stream_nestorowa16&embed=umap """ db_name = request.args.get("db_name") filename = glob(os.path.join(DATASET_DIRECTORY, f"{db_name}.*"))[0] try: del adata except: pass if get_dataset_type_adata(db_name).lower() in [ "scanpy", "velocity", "seurat", "paga" ]: adata = sc.read(filename) embed = request.args.get("embed") else: print(filename) adata = st.read(filename, file_format="pkl", workdir="./") list_cells = [] for i in range(adata.shape[0]): dict_coord_cells = dict() dict_coord_cells["cell_id"] = adata.obs_names[i] if get_dataset_type_adata(db_name).lower() in [ "scanpy", "paga", "velocity" ]: dict_coord_cells["x"] = str(adata.obsm[f"X_{embed}"][i, 0]) dict_coord_cells["y"] = str(adata.obsm[f"X_{embed}"][i, 1]) dict_coord_cells["z"] = str(adata.obsm[f"X_{embed}"][i, 2]) elif get_dataset_type_adata(db_name).lower() == "seurat": dict_coord_cells["x"] = str( adata.obsm[f"{embed}_cell_embeddings"][i, 0]) dict_coord_cells["y"] = str( adata.obsm[f"{embed}_cell_embeddings"][i, 1]) dict_coord_cells["z"] = str( adata.obsm[f"{embed}_cell_embeddings"][i, 2]) elif get_dataset_type_adata(db_name).lower() == "stream": file_path = os.path.join(adata.uns["workdir"], "test") if not os.path.exists(file_path): os.makedirs(file_path) flat_tree = adata.uns["flat_tree"] epg = adata.uns["epg"] epg_node_pos = nx.get_node_attributes(epg, "pos") ft_node_label = nx.get_node_attributes(flat_tree, "label") ft_node_pos = nx.get_node_attributes(flat_tree, "pos") list_curves = [] for edge_i in flat_tree.edges(): branch_i_pos = np.array([ epg_node_pos[i] for i in flat_tree.edges[edge_i]["nodes"] ]) df_coord_curve_i = pd.DataFrame(branch_i_pos) dict_coord_curves = dict() dict_coord_curves["branch_id"] = (ft_node_label[edge_i[0]] + "_" + ft_node_label[edge_i[1]]) dict_coord_curves["xyz"] = [{ "x": df_coord_curve_i.iloc[j, 0], "y": df_coord_curve_i.iloc[j, 1], "z": df_coord_curve_i.iloc[j, 2], } for j in range(df_coord_curve_i.shape[0])] list_curves.append(dict_coord_curves) ## output topology of stream graph dict_nodes = dict() list_edges = [] for node_i in flat_tree.nodes(): dict_nodes_i = dict() dict_nodes_i["node_name"] = ft_node_label[node_i] dict_nodes_i["xyz"] = { "x": ft_node_pos[node_i][0], "y": ft_node_pos[node_i][1], "z": ft_node_pos[node_i][2], } dict_nodes[ft_node_label[node_i]] = dict_nodes_i for edge_i in flat_tree.edges(): dict_edges = dict() dict_edges["nodes"] = [ ft_node_label[edge_i[0]], ft_node_label[edge_i[1]], ] dict_edges["weight"] = 1 list_edges.append(dict_edges) list_cells = [] for i in range(adata.shape[0]): dict_coord_cells = dict() dict_coord_cells['cell_id'] = adata.obs_names[i] dict_coord_cells['x'] = adata.obsm['X_dr'][i, 0] dict_coord_cells['y'] = adata.obsm['X_dr'][i, 1] dict_coord_cells['z'] = adata.obsm['X_dr'][i, 2] list_cells.append(dict_coord_cells) return jsonify({ "nodes": dict_nodes, "edges": list_edges, "graph": list_curves, "cells": list_cells }) else: raise TypeError("not supported format") list_cells.append(dict_coord_cells) del adata gc.collect() return jsonify(list_cells)
#!/usr/bin/env python3 ### Here we perform Pseudotime analysis with STREAM v0.36 [https://doi.org/10.1038/s41467-019-09670-4] [https://github.com/pinellolab/STREAM] ### Download counts matrix here: import matplotlib as mpl mpl.use('Agg') import matplotlib.pyplot as plt plt.ioff() import stream as st import os.path import pickle #### Read Counts table adata = st.read(file_name='./counts.tsv', workdir='./') # Read Cell labels table st.add_cell_labels(adata, file_name='./cell_label.tsv') # Add random colors to each sample st.add_cell_colors(adata, file_name='./cell_color.tsv') ### CHECK FOR VARIABLE GENES # Check if the blue (loess) curve fits the points well st.select_variable_genes(adata) # Open plot file plt.savefig('loess.png') # Close Plot plt.close('loess.png') # Adjust the blue curve to fits better st.select_variable_genes(adata, loess_frac=0.01) plt.savefig('adjust_loess.png')
def main(): sns.set_style('white') sns.set_context('poster') parser = argparse.ArgumentParser( description='%s Parameters' % __tool_name__, formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument( "-m", "--data-file", dest="input_filename", default=None, help="input file name, pkl format from Stream preprocessing module", metavar="FILE") parser.add_argument("-of", "--of", dest="output_filename_prefix", default="StreamiFSOutput", help="output file name prefix") parser.add_argument("-fig_width", dest="fig_width", type=int, default=8, help="") parser.add_argument("-fig_height", dest="fig_height", type=int, default=8, help="") parser.add_argument("-fig_legend_ncol", dest="fig_legend_ncol", type=int, default=None, help="") parser.add_argument("-root", dest="root", default=None, help="") parser.add_argument("-preference", dest="preference", help="") parser.add_argument("-subway_factor", dest="subway_factor", type=float, default=2.0, help="") parser.add_argument("-color_by", dest="color_by", default='label', help="") parser.add_argument("-factor_num_win", dest="factor_num_win", type=int, default=10, help="") parser.add_argument("-factor_min_win", dest="factor_min_win", type=float, default=2.0, help="") parser.add_argument("-factor_width", dest="factor_width", type=float, default=2.5, help="") parser.add_argument("-flag_log_view", dest="flag_log_view", action="store_true", help="") parser.add_argument("-factor_zoomin", dest="factor_zoomin", type=float, default=100.0, help="") parser.add_argument("-flag_cells", dest="flag_cells", action="store_true", help="") parser.add_argument("-flag_genes", dest="flag_genes", action="store_true", help="") parser.add_argument("-genes", dest="genes", default=None, help="") parser.add_argument("-percentile_dist", dest="percentile_dist", type=float, default=100, help="") args = parser.parse_args() workdir = "./" adata = st.read(file_name=args.input_filename, file_format='pkl', experiment='rna-seq', workdir=workdir) preference = args.preference.split(',') if (args.flag_cells != None): st.plot_flat_tree(adata, save_fig=True, fig_path="./", fig_name=(args.output_filename_prefix + '_flat_tree.png'), fig_size=(args.fig_width, args.fig_height), fig_legend_ncol=args.fig_legend_ncol) st.subwaymap_plot(adata, root=args.root, percentile_dist=args.percentile_dist, preference=preference, factor=args.subway_factor, color_by=args.color_by, save_fig=True, fig_path="./", fig_name=(args.output_filename_prefix + '_cell_subway_map.png'), fig_size=(args.fig_width, args.fig_height), fig_legend_ncol=args.fig_legend_ncol) st.stream_plot(adata, root=args.root, preference=preference, factor_num_win=args.factor_num_win, factor_min_win=args.factor_min_win, factor_width=args.factor_width, flag_log_view=args.flag_log_view, factor_zoomin=args.factor_zoomin, save_fig=True, fig_path="./", fig_name=(args.output_filename_prefix + '_cell_stream_plot.png'), fig_size=(args.fig_width, args.fig_height), fig_legend=True, fig_legend_ncol=args.fig_legend_ncol, tick_fontsize=20, label_fontsize=25) if (args.flag_genes != None): genes = args.genes.split(',') st.subwaymap_plot_gene(adata, root=args.root, genes=genes, preference=preference, percentile_dist=args.percentile_dist, factor=args.subway_factor, save_fig=True, fig_path="./", fig_format='png', fig_size=(args.fig_width, args.fig_height)) # , fig_name=(args.output_filename_prefix + '_gene_subway_plot.png')) st.stream_plot_gene(adata, root=args.root, genes=genes, preference=preference, factor_min_win=args.factor_min_win, factor_num_win=args.factor_num_win, factor_width=args.factor_width, save_fig=True, fig_path="./", fig_format='png', fig_size=(args.fig_width, args.fig_height), tick_fontsize=20, label_fontsize=25) # , fig_name=(args.output_filename_prefix + '_gene_stream_plot.png')) st.write(adata, file_name=(args.output_filename_prefix + '_stream_result.pkl'), file_path='./', file_format='pkl') print('Finished computation.')
import dash_bootstrap_components as dbc import time import plotly.graph_objects as go ### import calculation related packages import numpy as np import stream as st import matplotlib matplotlib.use('Agg') from app import app ### preset data adata_computed = st.read( file_name='./SampleData/Nestorowa_2016/Nestorowa-2016.pkl', workdir='./stream_result') adata = st.read( file_name='./SampleData/Nestorowa_2016/Nestorowa-2016-raw.h5ad', workdir='./stream_result') adata.uns[ 'discription'] = 'This scRNA-seq dataset contains 1656 cells and 40594 genes from mouse hematopoietic stem and progenitor cell differentiation. A single-cell resolution map of mouse hematopoietic stem and progenitor cell differentiation. Blood 128, e20-31 (2016).' fig_ds = st.plot_stream(adata_computed, root='S1', return_svg=True) ### Set optionals available_samples = [ 'Nestorowa, S. et al. 2016', 'Harrison, S. et al. 2021', 'Trapnell, C. et al. 2014', ' Tang, Q. et al. 2017' ] available_normalization = [ 'Library size correction', 'TF-IDF transformation', 'None'
def stream_test_Nestorowa_2016(): workdir = os.path.join(_root, 'datasets/Nestorowa_2016/') temp_folder = tempfile.gettempdir() tar = tarfile.open(workdir + 'output/stream_result.tar.gz') tar.extractall(path=temp_folder) tar.close() ref_temp_folder = os.path.join(temp_folder, 'stream_result') print(workdir + 'data_Nestorowa.tsv.gz') input_file = os.path.join(workdir, 'data_Nestorowa.tsv.gz') label_file = os.path.join(workdir, 'cell_label.tsv.gz') label_color_file = os.path.join(workdir, 'cell_label_color.tsv.gz') comp_temp_folder = os.path.join(temp_folder, 'stream_result_comp') try: st.set_figure_params(dpi=80, style='white', figsize=[5.4, 4.8], rc={'image.cmap': 'viridis'}) adata = st.read(file_name=input_file, workdir=comp_temp_folder) adata.var_names_make_unique() adata.obs_names_make_unique() st.add_cell_labels(adata, file_name=label_file) st.add_cell_colors(adata, file_name=label_color_file) st.cal_qc(adata, assay='rna') st.filter_features(adata, min_n_cells=5) st.select_variable_genes(adata, n_genes=2000, save_fig=True) st.select_top_principal_components(adata, feature='var_genes', first_pc=True, n_pc=30, save_fig=True) st.dimension_reduction(adata, method='se', feature='top_pcs', n_neighbors=100, n_components=4, n_jobs=2) st.plot_dimension_reduction(adata, color=['label', 'Gata1', 'n_genes'], n_components=3, show_graph=False, show_text=False, save_fig=True, fig_name='dimension_reduction.pdf') st.plot_visualization_2D(adata, method='umap', n_neighbors=100, color=['label', 'Gata1', 'n_genes'], use_precomputed=False, save_fig=True, fig_name='visualization_2D.pdf') st.seed_elastic_principal_graph(adata, n_clusters=20) st.plot_dimension_reduction(adata, color=['label', 'Gata1', 'n_genes'], n_components=2, show_graph=True, show_text=False, save_fig=True, fig_name='dr_seed.pdf') st.plot_branches(adata, show_text=True, save_fig=True, fig_name='branches_seed.pdf') st.elastic_principal_graph(adata, epg_alpha=0.01, epg_mu=0.05, epg_lambda=0.01) st.plot_dimension_reduction(adata, color=['label', 'Gata1', 'n_genes'], n_components=2, show_graph=True, show_text=False, save_fig=True, fig_name='dr_epg.pdf') st.plot_branches(adata, show_text=True, save_fig=True, fig_name='branches_epg.pdf') ###Extend leaf branch to reach further cells st.extend_elastic_principal_graph(adata, epg_ext_mode='QuantDists', epg_ext_par=0.8) st.plot_dimension_reduction(adata, color=['label'], n_components=2, show_graph=True, show_text=True, save_fig=True, fig_name='dr_extend.pdf') st.plot_branches(adata, show_text=True, save_fig=True, fig_name='branches_extend.pdf') st.plot_visualization_2D( adata, method='umap', n_neighbors=100, color=['label', 'branch_id_alias', 'S4_pseudotime'], use_precomputed=False, save_fig=True, fig_name='visualization_2D_2.pdf') st.plot_flat_tree(adata, color=['label', 'branch_id_alias', 'S4_pseudotime'], dist_scale=0.5, show_graph=True, show_text=True, save_fig=True) st.plot_stream_sc(adata, root='S4', color=['label', 'Gata1'], dist_scale=0.5, show_graph=True, show_text=False, save_fig=True) st.plot_stream(adata, root='S4', color=['label', 'Gata1'], save_fig=True) st.detect_leaf_markers(adata, marker_list=adata.uns['var_genes'][:300], root='S4', n_jobs=4) st.detect_transition_markers(adata, root='S4', marker_list=adata.uns['var_genes'][:300], n_jobs=4) st.detect_de_markers(adata, marker_list=adata.uns['var_genes'][:300], root='S4', n_jobs=4) # st.write(adata,file_name='stream_result.pkl') except: print("STREAM analysis failed!") raise else: print("STREAM analysis finished!") print(ref_temp_folder) print(comp_temp_folder) pathlist = Path(ref_temp_folder) for path in pathlist.glob('**/*'): if path.is_file() and (not path.name.startswith('.')): file = os.path.relpath(str(path), ref_temp_folder) print(file) if (file.endswith('pdf')): if (os.path.getsize(os.path.join(comp_temp_folder, file)) > 0): print('The file %s passed' % file) else: raise Exception('Error! The file %s is not matched' % file) else: checklist = list() df_ref = pd.read_csv(os.path.join(ref_temp_folder, file), sep='\t') # print(df_ref.shape) # print(df_ref.head()) df_comp = pd.read_csv(os.path.join(comp_temp_folder, file), sep='\t') # print(df_comp.shape) # print(df_comp.head()) for c in df_ref.columns: # print(c) if (is_numeric_dtype(df_ref[c])): checklist.append(all(np.isclose(df_ref[c], df_comp[c]))) else: checklist.append(all(df_ref[c] == df_comp[c])) if (all(checklist)): print('The file %s passed' % file) else: raise Exception('Error! The file %s is not matched' % file) print('Successful!') rmtree(comp_temp_folder, ignore_errors=True) rmtree(ref_temp_folder, ignore_errors=True)
def main(): sns.set_style('white') sns.set_context('poster') parser = argparse.ArgumentParser( description='%s Parameters' % __tool_name__, formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument( "-m", "--data-file", dest="input_filename", default=None, help="input file name, pkl format from Stream preprocessing module", metavar="FILE") parser.add_argument("--flag_useprecomputed", dest="flag_useprecomputed", action="store_true", help="use precomputed features for PCA") parser.add_argument("--flag_firstpc", dest="flag_firstpc", action="store_true", help="Use the first principal component") parser.add_argument("--flag_pca", dest="flag_pca", action="store_true", help="perform PCA") parser.add_argument("--flag_variable", dest="flag_variable", action="store_true", help="find variable genes") parser.add_argument("-of", "--of", dest="output_filename_prefix", default="StreamiFSOutput", help="output file name prefix") parser.add_argument("-lf", "--loess_fraction", dest="loess_fraction", type=float, default=None, help="loess fraction") parser.add_argument("-per", dest="percentile", type=int, default=None, help="percent of variable genes to find") parser.add_argument("-n_g", dest="num_genes", type=int, default=None, help="num genes") parser.add_argument("-n_j", dest="num_jobs", type=int, default=None, help="num jobs") parser.add_argument("-feat", dest="feature", default=None, help="feature") parser.add_argument("-n_pc", dest="num_principal_components", type=int, default=None, help="num principal components") parser.add_argument("-max_pc", dest="max_principal_components", type=int, default=None, help="max principal components") parser.add_argument("-fig_width", dest="fig_width", type=int, default=8, help="") parser.add_argument("-fig_height", dest="fig_height", type=int, default=8, help="") parser.add_argument("--flag", dest="flag", action="store_true", help="debugging flag") args = parser.parse_args() print('Starting feature selection procedure...') print(args) workdir = "./" adata = st.read(file_name=args.input_filename, file_format='pkl', experiment='rna-seq', workdir=workdir) print('Input: ' + str(adata.obs.shape[0]) + ' cells, ' + str(adata.var.shape[0]) + ' genes') #print('N_genes is ' + str(args.num_genes)) if (args.flag_variable): st.select_variable_genes(adata, loess_frac=args.loess_fraction, percentile=args.percentile, n_genes=args.num_genes, n_jobs=args.num_jobs, save_fig=True, fig_name=(args.output_filename_prefix + '_variable_genes.png'), fig_size=(args.fig_width, args.fig_height), fig_path="./") if (args.flag_pca): st.select_top_principal_components( adata, feature=args.feature, n_pc=args.num_principal_components, max_pc=args.max_principal_components, first_pc=args.flag_firstpc, use_precomputed=args.flag_useprecomputed, save_fig=True, fig_name=(args.output_filename_prefix + '_pca.png'), fig_size=(args.fig_width, args.fig_height), fig_path='./') st.write(adata, file_name=(args.output_filename_prefix + '_stream_result.pkl'), file_path='./', file_format='pkl') print('Output: ' + str(adata.obs.shape[0]) + ' cells, ' + str(adata.var.shape[0]) + ' genes') print('Finished computation.')
def main(): sns.set_style('white') sns.set_context('poster') parser = argparse.ArgumentParser( description='%s Parameters' % __tool_name__, formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("-m", "--matrix", dest="input_filename", default=None, help="input file name", metavar="FILE") parser.add_argument("-l", "--cell_labels", dest="cell_label_filename", default=None, help="filename of cell labels") parser.add_argument("-c", "--cell_labels_colors", dest="cell_label_color_filename", default=None, help="filename of cell label colors") parser.add_argument("--log2", dest="flag_log2", action="store_true", help="perform log2 transformation") parser.add_argument("--norm", dest="flag_norm", action="store_true", help="normalize data based on library size") parser.add_argument("-o", "--output_folder", dest="output_folder", default=None, help="Output folder") parser.add_argument("-rmt", "--remove_mt_genes", dest="flag_remove_mt_genes", action="store_true", default=False, help="Remove Mitochondrial genes") parser.add_argument("-mcg", "--min_count_genes", dest="min_count_genes", type=int, default=None, help="filter cells with less than this many genes") parser.add_argument("-mpg", "--min_percent_genes", dest="min_percent_genes", type=float, default=None, help="The minimum percent genes") parser.add_argument("-mpc", "--min_percent_cells", dest="min_percent_cells", type=float, default=None, help="The minimum percent cells") parser.add_argument("-mcc", "--min_count_cells", dest="min_count_cells", type=int, default=None, help="The minimum count cells") parser.add_argument("-mnc", "--min_num_cells", dest="min_num_cells", type=int, default=None, help="The minimum number of cells") parser.add_argument("-ec", "--expression_cutoff", dest="expression_cutoff", type=float, default=None, help="The expression cutoff") parser.add_argument("-of", "--of", dest="output_filename_prefix", default="StreamOutput", help="output file name prefix") args = parser.parse_args() print(args) input_filename = args.input_filename cell_label_filename = args.cell_label_filename cell_label_color_filename = args.cell_label_color_filename flag_norm = args.flag_norm flag_log2 = args.flag_log2 output_folder = args.output_folder #work directory flag_remove_mt_genes = args.flag_remove_mt_genes min_count_genes = args.min_count_genes min_percent_cells = args.min_percent_cells min_percent_genes = args.min_percent_genes min_count_cells = args.min_count_cells min_num_cells = args.min_num_cells expression_cutoff = args.expression_cutoff output_filename_prefix = args.output_filename_prefix print('Starting mapping procedure...') if (output_folder == None): workdir_ref = os.path.join(os.getcwd(), 'stream_result') else: workdir_ref = output_folder workdir = "./" if (input_filename.endswith('pkl')): adata = st.read(file_name=input_filename, file_format='pkl', workdir=workdir) else: adata = st.read(file_name=input_filename, workdir=workdir) print('Input: ' + str(adata.obs.shape[0]) + ' cells, ' + str(adata.var.shape[0]) + ' genes') adata.var_names_make_unique() adata.obs_names_make_unique() if (cell_label_filename != None): st.add_cell_labels(adata, file_name=cell_label_filename) else: st.add_cell_labels(adata) if (cell_label_color_filename != None): st.add_cell_colors(adata, file_name=cell_label_color_filename) else: st.add_cell_colors(adata) if (flag_norm): st.normalize_per_cell(adata) if (flag_log2): st.log_transform(adata, base=2) if (flag_remove_mt_genes): st.remove_mt_genes(adata) st.filter_cells(adata, min_pct_genes=min_percent_genes, min_count=min_count_genes, expr_cutoff=expression_cutoff) st.filter_genes(adata, min_num_cells=min_num_cells, min_pct_cells=min_percent_cells, min_count=min_count_genes, expr_cutoff=expression_cutoff) print("Writing " + output_filename_prefix + " " + args.output_filename_prefix + "_stream_result.pkl") st.write(adata, file_name=(output_filename_prefix + '_stream_result.pkl'), file_path='./', file_format='pkl') print('Output: ' + str(adata.obs.shape[0]) + ' cells, ' + str(adata.var.shape[0]) + ' genes') print('Finished computation.')
def main(): sns.set_style('white') sns.set_context('poster') parser = argparse.ArgumentParser( description='%s Parameters' % __tool_name__, formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument( "-m", "--data-file", dest="input_filename", default=None, help="input file name, pkl format from Stream preprocessing module", metavar="FILE") parser.add_argument("-of", "--of", dest="output_filename_prefix", default="StreamiFSOutput", help="output file name prefix") parser.add_argument("--flag_useprecomputed", dest="flag_useprecomputed", action="store_true", help="Save the figure") parser.add_argument("-nb_pct", "--percent_neighbor_cells", dest="nb_pct", type=float, default=None, help="") parser.add_argument("-n_comp_k", dest="n_comp_k", type=int, default=None, help="") parser.add_argument("-perplexity", dest="perplexity", type=float, default=None, help="") parser.add_argument("-method", dest="method", default=None, help="") parser.add_argument("-color_by", dest="color_by", default='label', help="") parser.add_argument("-fig_width", dest="fig_width", type=int, default=8, help="") parser.add_argument("-fig_height", dest="fig_height", type=int, default=8, help="") parser.add_argument("-fig_legend_ncol", dest="fig_legend_ncol", type=int, default=None, help="") args = parser.parse_args() print('Starting ...') workdir = "./" adata = st.read(file_name=args.input_filename, file_format='pkl', experiment='rna-seq', workdir=workdir) st.plot_visualization_2D(adata, method=args.method, nb_pct=args.nb_pct, perplexity=args.perplexity, color_by=args.color_by, use_precomputed=args.flag_useprecomputed, save_fig=True, fig_path='./', fig_name=(args.output_filename_prefix + "_2D_plot.png"), fig_size=(args.fig_width, args.fig_height), fig_legend_ncol=args.fig_legend_ncol) st.write(adata, file_name=(args.output_filename_prefix + '_stream_result.pkl'), file_path='./', file_format='pkl') print('Finished computation.')