예제 #1
0
def read_stream(stream_save_path, vcfg, mq: Queue):
    """
    read real-time video stream from provided configuration
    :param stream_save_path: streams of each video will be save here
    :param vcfg: video configurations
    :param mq: process communication pipe in which stream receiver will write
    the newest stream index,coorperated with # object detector
    :return:
    """
    stream.read(stream_save_path, vcfg, mq)
    return True
예제 #2
0
def test_mixture_feature_parallel():

    wavData = stream.read(wavPath).value
    frames = stream.cut_frames(wavData)

    pipe = base.PIPE()

    for i in range(10):
        pipe.put(
            base.Packet({"rawWave": frames[i * 50:(i + 1) * 50]},
                        cid=i,
                        idmaker=0))

    pipe.stop()

    extractor = feature.MixtureExtractor(mixType=["fbank", "mfcc"], )

    # Split packets
    def split_rule(items):
        return {"fbank": items["fbank"]}, {"mfcc": items["mfcc"]}

    spliter = joint.Spliter(split_rule, outNums=2)

    # use a processor to transform fbank feature
    processor1 = feature.MatrixFeatureProcessor(
        spliceLeft=2,
        spliceRight=2,
        cmvNormalizer=feature.FrameSlideCMVNormalizer(),
        oKey="fbank",
    )
    # use a processor to transform mfcc feature
    processor2 = feature.MatrixFeatureProcessor(
        spliceLeft=3,
        spliceRight=3,
        cmvNormalizer=feature.FrameSlideCMVNormalizer(),
        oKey="mfcc",
    )

    # combine packets
    def combine_rule(items):
        return {
            "feat": np.concatenate([items[0]["fbank"], items[1]["mfcc"]],
                                   axis=1)
        }

    combiner = joint.Combiner(combine_rule)

    extractor.start(inPIPE=pipe)
    spliter.start(inPIPE=extractor.outPIPE)
    processor1.start(
        inPIPE=spliter.outPIPE[0])  # specify which key you want to process
    processor2.start(
        inPIPE=spliter.outPIPE[1])  # specify which key you want to process
    combiner.start(inPIPE=[processor1.outPIPE, processor2.outPIPE])
    combiner.wait()

    print(combiner.outPIPE[0].size())
    packet = combiner.outPIPE[0].get()
    print(packet.keys())
    print(packet["feat"].shape)  # 211 = 120 + 91
예제 #3
0
def test_mfcc_extractor():

  # get wave data
  wavData = stream.read(wavPath).value
  frames = stream.cut_frames(wavData)

  # define an input pipe
  pipe = base.PIPE() 
  for i in range(10):
    pipe.put( base.Packet( {"rawWave":frames[i*50:(i+1)*50]}, cid=i, idmaker=0 ) )

  pipe.stop()
  print( pipe.size() )

  # run a mfcc extractor
  extractor = feature.MfccExtractor(oKey="mfcc")

  extractor.start(inPIPE=pipe)
  extractor.wait()

  print( extractor.outPIPE.size() )
  packet = extractor.outPIPE.get()
  print( packet.keys() )
  print( packet.mainKey )
  print( packet["mfcc"].shape )
예제 #4
0
def test_processor_cmvn():

  wavData = stream.read(wavPath).value
  frames = stream.cut_frames(wavData)

  pipe = base.PIPE() 

  for i in range(10):
    pipe.put( base.Packet( {"rawWave":frames[i*50:(i+1)*50]}, cid=i, idmaker=0 ) )

  pipe.stop()

  extractor = feature.MfccExtractor(minParallelSize=100,oKey="mfcc")
  processor = feature.MatrixFeatureProcessor(
                                    spliceLeft=3,
                                    spliceRight=3,
                                    cmvNormalizer=feature.FrameSlideCMVNormalizer(),
                                    oKey="mfcc",
                                  )

  extractor.start(inPIPE=pipe)
  processor.start(inPIPE=extractor.outPIPE,iKey="mfcc")
  processor.wait()

  print( processor.outPIPE.size() )
  packet = processor.outPIPE.get()
  print( packet.keys() )
  print( packet.mainKey )
  print( packet["mfcc"].shape ) # 273 = 13 * 3 * 7
예제 #5
0
def get_genes():
    """
    http://127.0.0.1:8000/genes?db_name=1_scanpy_10xpbmc
    """
    db_name = request.args.get("db_name")
    try:
        del adata
        gc.collect()
    except:
        pass
    adata = None
    if get_dataset_type_adata(db_name).lower() == 'stream':
        adata = st.read(glob(os.path.join(DATASET_DIRECTORY,
                                          f"{db_name}.*"))[0],
                        file_format="pkl",
                        workdir="./")
    else:
        adata = sc.read(
            glob(os.path.join(DATASET_DIRECTORY, f"{db_name}.*"))[0])

    genes = adata.var_names
    if adata:
        del adata
        gc.collect()
    return jsonify(list(genes))
예제 #6
0
def get_available_annotations():
    """
    http://127.0.0.1:8000/columns?db_name=1_scanpy_10xpbmc
    """
    db_name = request.args.get("db_name")
    filename = glob(os.path.join(DATASET_DIRECTORY, f"{db_name}.*"))[0]

    try:
        del adata
        gc.collect()
    except:
        pass

    adata = None
    if get_dataset_type_adata(db_name).lower() in [
            "scanpy", "velocity", "seurat", "paga"
    ]:
        adata = sc.read(filename)
    else:
        adata = st.read(filename, file_format="pkl", workdir="./")

    annotations = [
        name for name in list(adata.obs.columns)
        if name not in ['branch_id', 'branch_id_alias']
    ]
    del adata
    gc.collect()
    # Hack to remove two stream adata annotations that dont work in the annotation menu
    return jsonify(annotations)
예제 #7
0
def main():
    sns.set_style('white')
    sns.set_context('poster')
    parser = argparse.ArgumentParser(description='%s Parameters' % __tool_name__ ,formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument("-m", "--data-file", dest="input_filename",default = None, help="input file name, pkl format from Stream preprocessing module", metavar="FILE")
    parser.add_argument("-of","--of",dest="output_filename_prefix", default="StreamiFSOutput",  help="output file name prefix")
    parser.add_argument("-fig_width",dest="fig_width", type=int, default=8, help="")
    parser.add_argument("-fig_height",dest="fig_height", type=int, default=8, help="")
    parser.add_argument("-cutoff_zscore",dest="cutoff_zscore", type=float, default=1.5, help="")
    parser.add_argument("-percentile_expr",dest="percentile_expr", type=int, default=95, help="")
    parser.add_argument("-flag_use_precomputed",dest="flag_use_precomputed", action="store_true", help="")
    parser.add_argument("-root",dest="root", default=None, help="")
    parser.add_argument("-preference",dest="preference", default=None, help="")
    parser.add_argument("-cutoff_logfc",dest="cutoff_logfc", type=float, default=0.25, help="")
    parser.add_argument("-num_genes",dest="num_genes", type=int, default=15, help="")
    parser.add_argument("-n_jobs",dest="n_jobs", type=int, default=8, help="")

    args = parser.parse_args()
    
    workdir = "./"

    adata = st.read(file_name=args.input_filename, file_format='pkl', experiment='rna-seq', workdir=workdir)
    preference = args.preference.split(',')
   
    st.detect_de_genes(adata,cutoff_zscore=args.cutoff_zscore,cutoff_logfc=args.cutoff_logfc,percentile_expr=args.percentile_expr,n_jobs=args.n_jobs,
                   use_precomputed=args.flag_use_precomputed, root=args.root, preference=preference)
    st.plot_de_genes(adata, num_genes=args.num_genes,cutoff_zscore=args.cutoff_zscore, cutoff_logfc=args.cutoff_logfc, save_fig=True,fig_path=None,fig_size=(args.fig_width,args.fig_height))

    st.write(adata,file_name=(args.output_filename_prefix + '_stream_result.pkl'),file_path='./',file_format='pkl') 

    print('Finished computation.')
예제 #8
0
def test_functions():

    # Read wave info and data
    wav = stream.read(wavPath)
    print(wav.value)

    # Cut the stream into N frames (discard the rest)
    frames1 = stream.cut_frames(wav.value[:-10],
                                width=400,
                                shift=160,
                                snip=True)
    print(frames1.shape)

    # Cut the stream into N frames (retain the rest)
    frames2 = stream.cut_frames(wav.value[:-10],
                                width=400,
                                shift=160,
                                snip=False)
    print(frames2.shape)
예제 #9
0
def test_mixture_feature_series():

    wavData = stream.read(wavPath).value
    frames = stream.cut_frames(wavData)

    pipe = base.PIPE()

    for i in range(10):
        pipe.put(
            base.Packet({"rawWave": frames[i * 50:(i + 1) * 50]},
                        cid=i,
                        idmaker=0))

    pipe.stop()

    extractor = feature.MixtureExtractor(mixType=["fbank", "mfcc"], )
    # use a processor to transform fbank feature
    processor1 = feature.MatrixFeatureProcessor(
        spliceLeft=2,
        spliceRight=2,
        cmvNormalizer=feature.FrameSlideCMVNormalizer(),
        oKey="fbank",
    )
    # use a processor to transform mfcc feature
    processor2 = feature.MatrixFeatureProcessor(
        spliceLeft=3,
        spliceRight=3,
        cmvNormalizer=feature.FrameSlideCMVNormalizer(),
        oKey="mfcc",
    )

    extractor.start(inPIPE=pipe)
    processor1.start(inPIPE=extractor.outPIPE,
                     iKey="fbank")  # specify which key you want to process
    processor2.start(inPIPE=processor1.outPIPE,
                     iKey="mfcc")  # specify which key you want to process
    processor2.wait()

    print(processor2.outPIPE.size())
    packet = processor2.outPIPE.get()
    print(packet.keys())
    print(packet["fbank"].shape)  # 120 = 24 * 5
    print(packet["mfcc"].shape)  # 91 = 13 * 7
예제 #10
0
파일: event.py 프로젝트: ljxia/shiftserver
def canRead(id, userId):
    """
    Check if a user can read an event. Allowed under the following conditions:
        1. the user is admin.
        2. the stream is public.
        3. the stream is readable by the user.
    Parameters:
        id - an event id.
        userId - a user id.
    Returns:
        bool.
    """
    if user.isAdmin(userId):
        return True
    streamId = data["streamId"]
    theStream = stream.read(userId)
    if not theStream["private"]:
        return True
    readable = permission.readableStreams(userId)
    return (streamId in readable)
예제 #11
0
파일: event.py 프로젝트: ljxia/shiftserver
def canCreate(data, userId):
    """
    Check if a user can create an event. Allowed under the following conditions:
        1. user is admin.
        2. the stream is public.
        3. the stream is writeable by the user.
    Parameters:
        data - the event data.
        userId - a user id.
    Returns:
        bool.
    """
    if user.isAdmin(userId):
        return True
    streamId = data["streamId"]
    theStream = stream.read(userId)
    if not theStream["private"]:
        return True
    writeable = permission.writeableStreams(userId)
    return (streamId in writeable)
def main():
    sns.set_style('white')
    sns.set_context('poster')
    parser = argparse.ArgumentParser(description='%s Parameters' % __tool_name__ ,formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument("-m", "--data-file", dest="input_filename",default = None, help="input file name, pkl format from Stream preprocessing module", metavar="FILE")
    parser.add_argument("-of","--of",dest="output_filename_prefix", default="StreamiFSOutput",  help="output file name prefix")


    parser.add_argument("-epg_n_nodes",dest="epg_n_nodes", type=int, default=50, help="")
    parser.add_argument("-incr_n_nodes",dest="incr_n_nodes", type=int, default=30, help="")
    parser.add_argument("-epg_trimmingradius",dest="epg_trimmingradius",  default='Inf', help="")
    parser.add_argument("-epg_alpha",dest="epg_alpha", type=float, default=0.02, help="")
    parser.add_argument("-epg_beta",dest="epg_beta", type=float, default=0.0, help="")
    parser.add_argument("-epg_n_processes",dest="epg_n_processes", type=int, default=1, help="")
    parser.add_argument("-epg_lambda",dest="epg_lambda", type=float, default=0.02, help="")
    parser.add_argument("-epg_mu",dest="epg_mu", type=float, default=0.1, help="")
    parser.add_argument("-epg_finalenergy",dest="epg_finalenergy",  default='Penalized', help="")
   
 
    parser.add_argument("-comp1",dest="comp1", type = int, default=0,  help="")   
    parser.add_argument("-comp2",dest="comp2", type = int, default=1,  help="")      
    parser.add_argument("-n_comp",dest="n_comp", type = int, default=3,  help="")      
    parser.add_argument("-fig_width",dest="fig_width", type=int, default=8, help="")        
    parser.add_argument("-fig_height",dest="fig_height", type=int, default=8, help="")
    parser.add_argument("-fig_legend_ncol",dest="fig_legend_ncol", type=int, default=None, help="")                                   


    args = parser.parse_args()
    
    workdir = "./"

    adata = st.read(file_name=args.input_filename, file_format='pkl', experiment='rna-seq', workdir=workdir)

    st.elastic_principal_graph(adata,epg_n_nodes=args.epg_n_nodes,incr_n_nodes=args.incr_n_nodes,epg_trimmingradius=args.epg_trimmingradius,epg_alpha=args.epg_alpha, epg_n_processes=args.epg_n_processes, epg_lambda=args.epg_lambda,epg_mu=args.epg_mu, epg_beta=args.epg_beta, epg_finalenergy=args.epg_finalenergy)
    
    st.plot_branches(adata, n_components=args.n_comp, comp1=args.comp1, comp2=args.comp2, save_fig=True, fig_name=(args.output_filename_prefix +'_branches.png'), fig_path=None,fig_size=(args.fig_width, args.fig_height))
    st.plot_branches_with_cells(adata,n_components=args.n_comp,comp1=args.comp1,comp2=args.comp2, save_fig=True,fig_name=(args.output_filename_prefix +'_branches_with_cells.png'),fig_path=None,fig_size=(args.fig_width, args.fig_height),fig_legend_ncol=args.fig_legend_ncol)

    st.write(adata,file_name=(args.output_filename_prefix + '_stream_result.pkl'),file_path='./',file_format='pkl') 

    print('Finished computation.')
def main():
    sns.set_style('white')
    sns.set_context('poster')
    parser = argparse.ArgumentParser(description='%s Parameters' % __tool_name__ ,formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument("-m", "--data-file", dest="input_filename",default = None, help="input file name, pkl format from Stream preprocessing module", metavar="FILE")
    parser.add_argument("-of","--of",dest="output_filename_prefix", default="StreamiFSOutput",  help="output file name prefix")

    parser.add_argument("-nb_pct","--percent_neighbor_cells",dest="nb_pct", type=float, default=0.1, help="")
    parser.add_argument("-n_clusters",dest="n_clusters", type = int, default=10,  help="")
    parser.add_argument("-damping",dest="damping", type=float, default=0.75,   help="")
    parser.add_argument("-pref_perc",dest="pref_perc", type=int, default=50,   help="")
    parser.add_argument("-max_n_clusters",dest="max_n_clusters", type=int, default=200,   help="")

    parser.add_argument("-clustering",dest="clustering",  default='kmeans',  help="")
    
    parser.add_argument("-comp1",dest="comp1", type = int, default=0,  help="")   
    parser.add_argument("-comp2",dest="comp2", type = int, default=1,  help="")      
    parser.add_argument("-n_comp",dest="n_comp", type = int, default=3,  help="")      


    parser.add_argument("-fig_width",dest="fig_width", type=int, default=8, help="")        
    parser.add_argument("-fig_height",dest="fig_height", type=int, default=8, help="")
    parser.add_argument("-fig_legend_ncol",dest="fig_legend_ncol", type=int, default=None, help="")                                   


    args = parser.parse_args()
    
    print('Starting validation procedure...')
    workdir = "./"

    adata = st.read(file_name=args.input_filename, file_format='pkl', experiment='rna-seq', workdir=workdir)

    st.seed_elastic_principal_graph(adata, clustering=args.clustering, n_clusters=args.n_clusters, damping=args.damping, pref_perc=args.pref_perc,  max_n_clusters=args.max_n_clusters, nb_pct=args.nb_pct)
    st.plot_branches(adata, n_components=args.n_comp, comp1=args.comp1, comp2=args.comp2, save_fig=True, fig_name=(args.output_filename_prefix +'_branches.png'), fig_path=None,fig_size=(args.fig_width, args.fig_height))
    st.plot_branches_with_cells(adata,n_components=args.n_comp,comp1=args.comp1,comp2=args.comp2, save_fig=True,fig_name=(args.output_filename_prefix +'_branches_with_cells.png'),fig_path=None,fig_size=(args.fig_width, args.fig_height),fig_legend_ncol=args.fig_legend_ncol)

    st.write(adata,file_name=(args.output_filename_prefix + '_stream_result.pkl'),file_path='./',file_format='pkl') 

    print('Finished computation.')
예제 #14
0
 def __init__(self, fileName, cellLabel, cellLabelColor, rawCount = True):
     self.adata = st.read(file_name = fileName, file_format = 'mtx')
     st.add_cell_labels(self.adata, file_name = cellLabel)
     st.add_cell_colors(self.adata, file_name = cellLabelColor)
     self.adata.var_names_make_unique()
     self.adata.obs_names_make_unique()
     self.allCells = self.adata.obs.index.to_list()
     self.allGenes = self.adata.var.index.to_list()
     print('Raw input parsed...')
     print(self.adata)
     self.nCells = self.adata.n_obs
     self.nGenes = self.adata.n_vars
     self._keepCurrentRecords()
     st.remove_mt_genes(self.adata)
     if rawCount:
         st.normalize_per_cell(self.adata)
         st.log_transform(self.adata)
     self.backupDict = {}
     self.backupKey = 0
     self.backup(0)
     print('Initial backup saved with key: 0')
     print('Restore with self.restoreFromBackup()')
예제 #15
0
import dash
import dash_html_components as html
import dash_core_components as dcc
from dash.dependencies import Input, Output
import dash_bootstrap_components as dbc

import numpy as np
import stream as st

import matplotlib

matplotlib.use('Agg')

from app import app

adata = st.read(file_name='./SampleData/SCoPE2_2020/stream_result_var.pkl',
                workdir='./stream_result')

available_samples = [
    'Nestorowa, S. et al. 2016', 'Harrison, S. et al. 2021',
    'Trapnell, C. et al. 2014', ' Tang, Q. et al. 2017'
]
available_projections = [
    'dimension_reduction', 'visualization_2D', 'flat_tree', 'branches'
]
available_colors = adata.obs.columns
available_stream = ['single_cell_stream', 'stream']

layout = html.Div([
    dbc.Container([
        dbc.Col(html.H1("Visualization", className="text-center"),
                className="mb-5 mt-5"),
예제 #16
0
def main():
    sns.set_style('white')
    sns.set_context('poster')
    parser = argparse.ArgumentParser(
        description='%s Parameters' % __tool_name__,
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument("-m",
                        "--matrix",
                        dest="input_filename",
                        default=None,
                        help="input file name",
                        metavar="FILE")
    parser.add_argument("-l",
                        "--cell_labels",
                        dest="cell_label_filename",
                        default=None,
                        help="filename of cell labels")
    parser.add_argument("-c",
                        "--cell_labels_colors",
                        dest="cell_label_color_filename",
                        default=None,
                        help="filename of cell label colors")
    parser.add_argument(
        "-s",
        "--select_features",
        dest="s_method",
        default='LOESS',
        help=
        "LOESS,PCA or all: Select variable genes using LOESS or principal components using PCA or all the genes are kept"
    )
    parser.add_argument("--TG",
                        "--detect_TG_genes",
                        dest="flag_gene_TG_detection",
                        action="store_true",
                        help="detect transition genes automatically")
    parser.add_argument("--DE",
                        "--detect_DE_genes",
                        dest="flag_gene_DE_detection",
                        action="store_true",
                        help="detect DE genes automatically")
    parser.add_argument("--LG",
                        "--detect_LG_genes",
                        dest="flag_gene_LG_detection",
                        action="store_true",
                        help="detect leaf genes automatically")
    parser.add_argument(
        "-g",
        "--genes",
        dest="genes",
        default=None,
        help=
        "genes to visualize, it can either be filename which contains all the genes in one column or a set of gene names separated by comma"
    )
    parser.add_argument(
        "-p",
        "--use_precomputed",
        dest="use_precomputed",
        action="store_true",
        help=
        "use precomputed data files without re-computing structure learning part"
    )
    parser.add_argument("--new",
                        dest="new_filename",
                        default=None,
                        help="file name of data to be mapped")
    parser.add_argument("--new_l",
                        dest="new_label_filename",
                        default=None,
                        help="filename of new cell labels")
    parser.add_argument("--new_c",
                        dest="new_label_color_filename",
                        default=None,
                        help="filename of new cell label colors")
    parser.add_argument("--log2",
                        dest="flag_log2",
                        action="store_true",
                        help="perform log2 transformation")
    parser.add_argument("--norm",
                        dest="flag_norm",
                        action="store_true",
                        help="normalize data based on library size")
    parser.add_argument("--atac",
                        dest="flag_atac",
                        action="store_true",
                        help="indicate scATAC-seq data")
    parser.add_argument(
        "--n_jobs",
        dest="n_jobs",
        type=int,
        default=1,
        help="Specify the number of processes to use. (default, 1")
    parser.add_argument(
        "--loess_frac",
        dest="loess_frac",
        type=float,
        default=0.1,
        help="The fraction of the data used in LOESS regression")
    parser.add_argument(
        "--loess_cutoff",
        dest="loess_cutoff",
        type=int,
        default=95,
        help=
        "the percentile used in variable gene selection based on LOESS regression"
    )
    parser.add_argument("--pca_first_PC",
                        dest="flag_first_PC",
                        action="store_true",
                        help="keep first PC")
    parser.add_argument("--pca_n_PC",
                        dest="pca_n_PC",
                        type=int,
                        default=15,
                        help="The number of selected PCs,it's 15 by default")
    parser.add_argument(
        "--dr_method",
        dest="dr_method",
        default='se',
        help=
        "Method used for dimension reduction. Choose from {{'se','mlle','umap','pca'}}"
    )
    parser.add_argument("--n_neighbors",
                        dest="n_neighbors",
                        type=float,
                        default=50,
                        help="The number of neighbor cells")
    parser.add_argument(
        "--nb_pct",
        dest="nb_pct",
        type=float,
        default=None,
        help=
        "The percentage of neighbor cells (when sepcified, it will overwrite n_neighbors)."
    )
    parser.add_argument("--n_components",
                        dest="n_components",
                        type=int,
                        default=3,
                        help="Number of components to keep.")
    parser.add_argument(
        "--clustering",
        dest="clustering",
        default='kmeans',
        help=
        "Clustering method used for seeding the intial structure, choose from 'ap','kmeans','sc'"
    )
    parser.add_argument("--damping",
                        dest="damping",
                        type=float,
                        default=0.75,
                        help="Affinity Propagation: damping factor")
    parser.add_argument(
        "--n_clusters",
        dest="n_clusters",
        type=int,
        default=10,
        help="Number of clusters for spectral clustering or kmeans")
    parser.add_argument("--EPG_n_nodes",
                        dest="EPG_n_nodes",
                        type=int,
                        default=50,
                        help=" Number of nodes for elastic principal graph")
    parser.add_argument(
        "--EPG_lambda",
        dest="EPG_lambda",
        type=float,
        default=0.02,
        help="lambda parameter used to compute the elastic energy")
    parser.add_argument("--EPG_mu",
                        dest="EPG_mu",
                        type=float,
                        default=0.1,
                        help="mu parameter used to compute the elastic energy")
    parser.add_argument(
        "--EPG_trimmingradius",
        dest="EPG_trimmingradius",
        type=float,
        default=np.inf,
        help="maximal distance of point from a node to affect its embedment")
    parser.add_argument(
        "--EPG_alpha",
        dest="EPG_alpha",
        type=float,
        default=0.02,
        help=
        "positive numeric, the value of the alpha parameter of the penalized elastic energy"
    )
    parser.add_argument("--EPG_collapse",
                        dest="flag_EPG_collapse",
                        action="store_true",
                        help="collapsing small branches")
    parser.add_argument(
        "--EPG_collapse_mode",
        dest="EPG_collapse_mode",
        default="PointNumber",
        help=
        "the mode used to collapse branches. PointNumber,PointNumber_Extrema, PointNumber_Leaves,EdgesNumber or EdgesLength"
    )
    parser.add_argument(
        "--EPG_collapse_par",
        dest="EPG_collapse_par",
        type=float,
        default=5,
        help=
        "positive numeric, the cotrol paramter used for collapsing small branches"
    )
    parser.add_argument("--disable_EPG_optimize",
                        dest="flag_disable_EPG_optimize",
                        action="store_true",
                        help="disable optimizing branching")
    parser.add_argument("--EPG_shift",
                        dest="flag_EPG_shift",
                        action="store_true",
                        help="shift branching point ")
    parser.add_argument(
        "--EPG_shift_mode",
        dest="EPG_shift_mode",
        default='NodeDensity',
        help=
        "the mode to use to shift the branching points NodePoints or NodeDensity"
    )
    parser.add_argument(
        "--EPG_shift_DR",
        dest="EPG_shift_DR",
        type=float,
        default=0.05,
        help=
        "positive numeric, the radius to be used when computing point density if EPG_shift_mode is NodeDensity"
    )
    parser.add_argument(
        "--EPG_shift_maxshift",
        dest="EPG_shift_maxshift",
        type=int,
        default=5,
        help=
        "positive integer, the maxium distance (as number of edges) to consider when exploring the branching point neighborhood"
    )
    parser.add_argument("--disable_EPG_ext",
                        dest="flag_disable_EPG_ext",
                        action="store_true",
                        help="disable extending leaves with additional nodes")
    parser.add_argument(
        "--EPG_ext_mode",
        dest="EPG_ext_mode",
        default='QuantDists',
        help=
        " the mode used to extend the graph,QuantDists, QuantCentroid or WeigthedCentroid"
    )
    parser.add_argument(
        "--EPG_ext_par",
        dest="EPG_ext_par",
        type=float,
        default=0.5,
        help=
        "the control parameter used for contribution of the different data points when extending leaves with nodes"
    )
    parser.add_argument("--DE_zscore_cutoff",
                        dest="DE_zscore_cutoff",
                        default=2,
                        help="Differentially Expressed Genes z-score cutoff")
    parser.add_argument(
        "--DE_logfc_cutoff",
        dest="DE_logfc_cutoff",
        default=0.25,
        help="Differentially Expressed Genes log fold change cutoff")
    parser.add_argument("--TG_spearman_cutoff",
                        dest="TG_spearman_cutoff",
                        default=0.4,
                        help="Transition Genes Spearman correlation cutoff")
    parser.add_argument("--TG_logfc_cutoff",
                        dest="TG_logfc_cutoff",
                        default=0.25,
                        help="Transition Genes log fold change cutoff")
    parser.add_argument("--LG_zscore_cutoff",
                        dest="LG_zscore_cutoff",
                        default=1.5,
                        help="Leaf Genes z-score cutoff")
    parser.add_argument("--LG_pvalue_cutoff",
                        dest="LG_pvalue_cutoff",
                        default=1e-2,
                        help="Leaf Genes p value cutoff")
    parser.add_argument(
        "--umap",
        dest="flag_umap",
        action="store_true",
        help="whether to use UMAP for visualization (default: No)")
    parser.add_argument("-r",
                        dest="root",
                        default=None,
                        help="root node for subwaymap_plot and stream_plot")
    parser.add_argument("--stream_log_view",
                        dest="flag_stream_log_view",
                        action="store_true",
                        help="use log2 scale for y axis of stream_plot")
    parser.add_argument("-o",
                        "--output_folder",
                        dest="output_folder",
                        default=None,
                        help="Output folder")
    parser.add_argument("--for_web",
                        dest="flag_web",
                        action="store_true",
                        help="Output files for website")
    parser.add_argument(
        "--n_genes",
        dest="n_genes",
        type=int,
        default=5,
        help=
        "Number of top genes selected from each output marker gene file for website gene visualization"
    )

    args = parser.parse_args()
    if (args.input_filename is None) and (args.new_filename is None):
        parser.error("at least one of -m, --new required")

    new_filename = args.new_filename
    new_label_filename = args.new_label_filename
    new_label_color_filename = args.new_label_color_filename
    flag_stream_log_view = args.flag_stream_log_view
    flag_gene_TG_detection = args.flag_gene_TG_detection
    flag_gene_DE_detection = args.flag_gene_DE_detection
    flag_gene_LG_detection = args.flag_gene_LG_detection
    flag_web = args.flag_web
    flag_first_PC = args.flag_first_PC
    flag_umap = args.flag_umap
    genes = args.genes
    DE_zscore_cutoff = args.DE_zscore_cutoff
    DE_logfc_cutoff = args.DE_logfc_cutoff
    TG_spearman_cutoff = args.TG_spearman_cutoff
    TG_logfc_cutoff = args.TG_logfc_cutoff
    LG_zscore_cutoff = args.LG_zscore_cutoff
    LG_pvalue_cutoff = args.LG_pvalue_cutoff
    root = args.root

    input_filename = args.input_filename
    cell_label_filename = args.cell_label_filename
    cell_label_color_filename = args.cell_label_color_filename
    s_method = args.s_method
    use_precomputed = args.use_precomputed
    n_jobs = args.n_jobs
    loess_frac = args.loess_frac
    loess_cutoff = args.loess_cutoff
    pca_n_PC = args.pca_n_PC
    flag_log2 = args.flag_log2
    flag_norm = args.flag_norm
    flag_atac = args.flag_atac
    dr_method = args.dr_method
    nb_pct = args.nb_pct  # neighbour percent
    n_neighbors = args.n_neighbors
    n_components = args.n_components  #number of components to keep
    clustering = args.clustering
    damping = args.damping
    n_clusters = args.n_clusters
    EPG_n_nodes = args.EPG_n_nodes
    EPG_lambda = args.EPG_lambda
    EPG_mu = args.EPG_mu
    EPG_trimmingradius = args.EPG_trimmingradius
    EPG_alpha = args.EPG_alpha
    flag_EPG_collapse = args.flag_EPG_collapse
    EPG_collapse_mode = args.EPG_collapse_mode
    EPG_collapse_par = args.EPG_collapse_par
    flag_EPG_shift = args.flag_EPG_shift
    EPG_shift_mode = args.EPG_shift_mode
    EPG_shift_DR = args.EPG_shift_DR
    EPG_shift_maxshift = args.EPG_shift_maxshift
    flag_disable_EPG_optimize = args.flag_disable_EPG_optimize
    flag_disable_EPG_ext = args.flag_disable_EPG_ext
    EPG_ext_mode = args.EPG_ext_mode
    EPG_ext_par = args.EPG_ext_par
    output_folder = args.output_folder  #work directory
    n_genes = args.n_genes

    if (flag_web):
        flag_savefig = False
    else:
        flag_savefig = True
    gene_list = []
    if (genes != None):
        if (os.path.exists(genes)):
            gene_list = pd.read_csv(genes,
                                    sep='\t',
                                    header=None,
                                    index_col=None,
                                    compression='gzip' if genes.split('.')[-1]
                                    == 'gz' else None).iloc[:, 0].tolist()
            gene_list = list(set(gene_list))
        else:
            gene_list = genes.split(',')
        print('Genes to visualize: ')
        print(gene_list)
    if (new_filename is None):
        if (output_folder == None):
            workdir = os.path.join(os.getcwd(), 'stream_result')
        else:
            workdir = output_folder
        if (use_precomputed):
            print('Importing the precomputed pkl file...')
            adata = st.read(file_name='stream_result.pkl',
                            file_format='pkl',
                            file_path=workdir,
                            workdir=workdir)
        else:
            if (flag_atac):
                print('Reading in atac zscore matrix...')
                adata = st.read(file_name=input_filename,
                                workdir=workdir,
                                experiment='atac-seq')
            else:
                adata = st.read(file_name=input_filename, workdir=workdir)
                print('Input: ' + str(adata.obs.shape[0]) + ' cells, ' +
                      str(adata.var.shape[0]) + ' genes')
            adata.var_names_make_unique()
            adata.obs_names_make_unique()
            if (cell_label_filename != None):
                st.add_cell_labels(adata, file_name=cell_label_filename)
            else:
                st.add_cell_labels(adata)
            if (cell_label_color_filename != None):
                st.add_cell_colors(adata, file_name=cell_label_color_filename)
            else:
                st.add_cell_colors(adata)
            if (flag_atac):
                print('Selecting top principal components...')
                st.select_top_principal_components(adata,
                                                   n_pc=pca_n_PC,
                                                   first_pc=flag_first_PC,
                                                   save_fig=True)
                st.dimension_reduction(adata,
                                       method=dr_method,
                                       n_components=n_components,
                                       n_neighbors=n_neighbors,
                                       nb_pct=nb_pct,
                                       n_jobs=n_jobs,
                                       feature='top_pcs')
            else:
                if (flag_norm):
                    st.normalize_per_cell(adata)
                if (flag_log2):
                    st.log_transform(adata)
                if (s_method != 'all'):
                    print('Filtering genes...')
                    st.filter_genes(adata, min_num_cells=5)
                    print('Removing mitochondrial genes...')
                    st.remove_mt_genes(adata)
                    if (s_method == 'LOESS'):
                        print('Selecting most variable genes...')
                        st.select_variable_genes(adata,
                                                 loess_frac=loess_frac,
                                                 percentile=loess_cutoff,
                                                 save_fig=True)
                        pd.DataFrame(adata.uns['var_genes']).to_csv(
                            os.path.join(workdir,
                                         'selected_variable_genes.tsv'),
                            sep='\t',
                            index=None,
                            header=False)
                        st.dimension_reduction(adata,
                                               method=dr_method,
                                               n_components=n_components,
                                               n_neighbors=n_neighbors,
                                               nb_pct=nb_pct,
                                               n_jobs=n_jobs,
                                               feature='var_genes')
                    if (s_method == 'PCA'):
                        print('Selecting top principal components...')
                        st.select_top_principal_components(
                            adata,
                            n_pc=pca_n_PC,
                            first_pc=flag_first_PC,
                            save_fig=True)
                        st.dimension_reduction(adata,
                                               method=dr_method,
                                               n_components=n_components,
                                               n_neighbors=n_neighbors,
                                               nb_pct=nb_pct,
                                               n_jobs=n_jobs,
                                               feature='top_pcs')
                else:
                    print('Keep all the genes...')
                    st.dimension_reduction(adata,
                                           n_components=n_components,
                                           n_neighbors=n_neighbors,
                                           nb_pct=nb_pct,
                                           n_jobs=n_jobs,
                                           feature='all')
            st.plot_dimension_reduction(adata, save_fig=flag_savefig)
            st.seed_elastic_principal_graph(adata,
                                            clustering=clustering,
                                            damping=damping,
                                            n_clusters=n_clusters)
            st.plot_branches(
                adata,
                save_fig=flag_savefig,
                fig_name='seed_elastic_principal_graph_skeleton.pdf')
            st.plot_branches_with_cells(
                adata,
                save_fig=flag_savefig,
                fig_name='seed_elastic_principal_graph.pdf')

            st.elastic_principal_graph(adata,
                                       epg_n_nodes=EPG_n_nodes,
                                       epg_lambda=EPG_lambda,
                                       epg_mu=EPG_mu,
                                       epg_trimmingradius=EPG_trimmingradius,
                                       epg_alpha=EPG_alpha)
            st.plot_branches(adata,
                             save_fig=flag_savefig,
                             fig_name='elastic_principal_graph_skeleton.pdf')
            st.plot_branches_with_cells(adata,
                                        save_fig=flag_savefig,
                                        fig_name='elastic_principal_graph.pdf')
            if (not flag_disable_EPG_optimize):
                st.optimize_branching(adata,
                                      epg_trimmingradius=EPG_trimmingradius)
                st.plot_branches(
                    adata,
                    save_fig=flag_savefig,
                    fig_name='optimizing_elastic_principal_graph_skeleton.pdf')
                st.plot_branches_with_cells(
                    adata,
                    save_fig=flag_savefig,
                    fig_name='optimizing_elastic_principal_graph.pdf')
            if (flag_EPG_shift):
                st.shift_branching(adata,
                                   epg_shift_mode=EPG_shift_mode,
                                   epg_shift_radius=EPG_shift_DR,
                                   epg_shift_max=EPG_shift_maxshift,
                                   epg_trimmingradius=EPG_trimmingradius)
                st.plot_branches(
                    adata,
                    save_fig=flag_savefig,
                    fig_name='shifting_elastic_principal_graph_skeleton.pdf')
                st.plot_branches_with_cells(
                    adata,
                    save_fig=flag_savefig,
                    fig_name='shifting_elastic_principal_graph.pdf')
            if (flag_EPG_collapse):
                st.prune_elastic_principal_graph(
                    adata,
                    epg_collapse_mode=EPG_collapse_mode,
                    epg_collapse_par=EPG_collapse_par,
                    epg_trimmingradius=EPG_trimmingradius)
                st.plot_branches(
                    adata,
                    save_fig=flag_savefig,
                    fig_name='pruning_elastic_principal_graph_skeleton.pdf')
                st.plot_branches_with_cells(
                    adata,
                    save_fig=flag_savefig,
                    fig_name='pruning_elastic_principal_graph.pdf')
            if (not flag_disable_EPG_ext):
                st.extend_elastic_principal_graph(
                    adata,
                    epg_ext_mode=EPG_ext_mode,
                    epg_ext_par=EPG_ext_par,
                    epg_trimmingradius=EPG_trimmingradius)
                st.plot_branches(
                    adata,
                    save_fig=flag_savefig,
                    fig_name='extending_elastic_principal_graph_skeleton.pdf')
                st.plot_branches_with_cells(
                    adata,
                    save_fig=flag_savefig,
                    fig_name='extending_elastic_principal_graph.pdf')
            st.plot_branches(
                adata,
                save_fig=flag_savefig,
                fig_name='finalized_elastic_principal_graph_skeleton.pdf')
            st.plot_branches_with_cells(
                adata,
                save_fig=flag_savefig,
                fig_name='finalized_elastic_principal_graph.pdf')
            st.plot_flat_tree(adata, save_fig=flag_savefig)
            if (flag_umap):
                print('UMAP visualization based on top MLLE components...')
                st.plot_visualization_2D(adata,
                                         save_fig=flag_savefig,
                                         fig_name='umap_cells')
                st.plot_visualization_2D(adata,
                                         color_by='branch',
                                         save_fig=flag_savefig,
                                         fig_name='umap_branches')
            if (root is None):
                print('Visualization of subwaymap and stream plots...')
                flat_tree = adata.uns['flat_tree']
                list_node_start = [
                    value for key, value in nx.get_node_attributes(
                        flat_tree, 'label').items()
                ]
                for ns in list_node_start:
                    if (flag_web):
                        st.subwaymap_plot(adata,
                                          percentile_dist=100,
                                          root=ns,
                                          save_fig=flag_savefig)
                        st.stream_plot(adata,
                                       root=ns,
                                       fig_size=(8, 8),
                                       save_fig=True,
                                       flag_log_view=flag_stream_log_view,
                                       fig_legend=False,
                                       fig_name='stream_plot.png')
                    else:
                        st.subwaymap_plot(adata,
                                          percentile_dist=100,
                                          root=ns,
                                          save_fig=flag_savefig)
                        st.stream_plot(adata,
                                       root=ns,
                                       fig_size=(8, 8),
                                       save_fig=flag_savefig,
                                       flag_log_view=flag_stream_log_view)
            else:
                st.subwaymap_plot(adata,
                                  percentile_dist=100,
                                  root=root,
                                  save_fig=flag_savefig)
                st.stream_plot(adata,
                               root=root,
                               fig_size=(8, 8),
                               save_fig=flag_savefig,
                               flag_log_view=flag_stream_log_view)
            output_cell_info(adata)
            if (flag_web):
                output_for_website(adata)
            st.write(adata)

        if (flag_gene_TG_detection):
            print('Identifying transition genes...')
            st.detect_transistion_genes(adata,
                                        cutoff_spearman=TG_spearman_cutoff,
                                        cutoff_logfc=TG_logfc_cutoff,
                                        n_jobs=n_jobs)
            if (flag_web):
                ## Plot top5 genes
                flat_tree = adata.uns['flat_tree']
                list_node_start = [
                    value for key, value in nx.get_node_attributes(
                        flat_tree, 'label').items()
                ]
                gene_list = []
                for x in adata.uns['transition_genes'].keys():
                    gene_list = gene_list + adata.uns['transition_genes'][
                        x].index[:n_genes].tolist()
                gene_list = np.unique(gene_list)
                for ns in list_node_start:
                    output_for_website_subwaymap_gene(adata, gene_list)
                    st.stream_plot_gene(adata,
                                        root=ns,
                                        fig_size=(8, 8),
                                        genes=gene_list,
                                        save_fig=True,
                                        flag_log_view=flag_stream_log_view,
                                        fig_format='png')
            else:
                st.plot_transition_genes(adata, save_fig=flag_savefig)

        if (flag_gene_DE_detection):
            print('Identifying differentially expressed genes...')
            st.detect_de_genes(adata,
                               cutoff_zscore=DE_logfc_cutoff,
                               cutoff_logfc=DE_logfc_cutoff,
                               n_jobs=n_jobs)
            if (flag_web):
                flat_tree = adata.uns['flat_tree']
                list_node_start = [
                    value for key, value in nx.get_node_attributes(
                        flat_tree, 'label').items()
                ]
                gene_list = []
                for x in adata.uns['de_genes_greater'].keys():
                    gene_list = gene_list + adata.uns['de_genes_greater'][
                        x].index[:n_genes].tolist()
                for x in adata.uns['de_genes_less'].keys():
                    gene_list = gene_list + adata.uns['de_genes_less'][
                        x].index[:n_genes].tolist()
                gene_list = np.unique(gene_list)
                for ns in list_node_start:
                    output_for_website_subwaymap_gene(adata, gene_list)
                    st.stream_plot_gene(adata,
                                        root=ns,
                                        fig_size=(8, 8),
                                        genes=gene_list,
                                        save_fig=True,
                                        flag_log_view=flag_stream_log_view,
                                        fig_format='png')
            else:
                st.plot_de_genes(adata, save_fig=flag_savefig)

        if (flag_gene_LG_detection):
            print('Identifying leaf genes...')
            st.detect_leaf_genes(adata,
                                 cutoff_zscore=LG_zscore_cutoff,
                                 cutoff_pvalue=LG_pvalue_cutoff,
                                 n_jobs=n_jobs)
            if (flag_web):
                ## Plot top5 genes
                flat_tree = adata.uns['flat_tree']
                list_node_start = [
                    value for key, value in nx.get_node_attributes(
                        flat_tree, 'label').items()
                ]
                gene_list = []
                for x in adata.uns['leaf_genes'].keys():
                    gene_list = gene_list + adata.uns['leaf_genes'][
                        x].index[:n_genes].tolist()
                gene_list = np.unique(gene_list)
                for ns in list_node_start:
                    output_for_website_subwaymap_gene(adata, gene_list)
                    st.stream_plot_gene(adata,
                                        root=ns,
                                        fig_size=(8, 8),
                                        genes=gene_list,
                                        save_fig=True,
                                        flag_log_view=flag_stream_log_view,
                                        fig_format='png')

        if ((genes != None) and (len(gene_list) > 0)):
            print('Visualizing genes...')
            flat_tree = adata.uns['flat_tree']
            list_node_start = [
                value for key, value in nx.get_node_attributes(
                    flat_tree, 'label').items()
            ]
            if (root is None):
                for ns in list_node_start:
                    if (flag_web):
                        output_for_website_subwaymap_gene(adata, gene_list)
                        st.stream_plot_gene(adata,
                                            root=ns,
                                            fig_size=(8, 8),
                                            genes=gene_list,
                                            save_fig=True,
                                            flag_log_view=flag_stream_log_view,
                                            fig_format='png')
                    else:
                        st.subwaymap_plot_gene(adata,
                                               percentile_dist=100,
                                               root=ns,
                                               genes=gene_list,
                                               save_fig=flag_savefig)
                        st.stream_plot_gene(adata,
                                            root=ns,
                                            fig_size=(8, 8),
                                            genes=gene_list,
                                            save_fig=flag_savefig,
                                            flag_log_view=flag_stream_log_view)
            else:
                if (flag_web):
                    output_for_website_subwaymap_gene(adata, gene_list)
                    st.stream_plot_gene(adata,
                                        root=root,
                                        fig_size=(8, 8),
                                        genes=gene_list,
                                        save_fig=True,
                                        flag_log_view=flag_stream_log_view,
                                        fig_format='png')
                else:
                    st.subwaymap_plot_gene(adata,
                                           percentile_dist=100,
                                           root=root,
                                           genes=gene_list,
                                           save_fig=flag_savefig)
                    st.stream_plot_gene(adata,
                                        root=root,
                                        fig_size=(8, 8),
                                        genes=gene_list,
                                        save_fig=flag_savefig,
                                        flag_log_view=flag_stream_log_view)

    else:
        print('Starting mapping procedure...')
        if (output_folder == None):
            workdir_ref = os.path.join(os.getcwd(), 'stream_result')
        else:
            workdir_ref = output_folder
        adata = st.read(file_name='stream_result.pkl',
                        file_format='pkl',
                        file_path=workdir_ref,
                        workdir=workdir_ref)
        workdir = os.path.join(workdir_ref, os.pardir, 'mapping_result')
        adata_new = st.read(file_name=new_filename, workdir=workdir)
        st.add_cell_labels(adata_new, file_name=new_label_filename)
        st.add_cell_colors(adata_new, file_name=new_label_color_filename)
        if (s_method == 'LOESS'):
            st.map_new_data(adata, adata_new, feature='var_genes')
        if (s_method == 'all'):
            st.map_new_data(adata, adata_new, feature='all')
        if (flag_umap):
            st.plot_visualization_2D(adata,
                                     adata_new=adata_new,
                                     use_precomputed=False,
                                     save_fig=flag_savefig,
                                     fig_name='umap_new_cells')
            st.plot_visualization_2D(adata,
                                     adata_new=adata_new,
                                     show_all_colors=True,
                                     save_fig=flag_savefig,
                                     fig_name='umap_all_cells')
            st.plot_visualization_2D(adata,
                                     adata_new=adata_new,
                                     color_by='branch',
                                     save_fig=flag_savefig,
                                     fig_name='umap_branches')
        if (root is None):
            flat_tree = adata.uns['flat_tree']
            list_node_start = [
                value for key, value in nx.get_node_attributes(
                    flat_tree, 'label').items()
            ]
            for ns in list_node_start:
                st.subwaymap_plot(adata,
                                  adata_new=adata_new,
                                  percentile_dist=100,
                                  show_all_cells=False,
                                  root=ns,
                                  save_fig=flag_savefig)
                st.stream_plot(adata,
                               adata_new=adata_new,
                               show_all_colors=False,
                               root=ns,
                               fig_size=(8, 8),
                               save_fig=flag_savefig,
                               flag_log_view=flag_stream_log_view)
        else:
            st.subwaymap_plot(adata,
                              adata_new=adata_new,
                              percentile_dist=100,
                              show_all_cells=False,
                              root=root,
                              save_fig=flag_savefig)
            st.stream_plot(adata,
                           adata_new=adata_new,
                           show_all_colors=False,
                           root=root,
                           fig_size=(8, 8),
                           save_fig=flag_savefig,
                           flag_log_view=flag_stream_log_view)
        if ((genes != None) and (len(gene_list) > 0)):
            if (root is None):
                for ns in list_node_start:
                    st.subwaymap_plot_gene(adata,
                                           adata_new=adata_new,
                                           percentile_dist=100,
                                           root=ns,
                                           save_fig=flag_savefig,
                                           flag_log_view=flag_stream_log_view)
            else:
                st.subwaymap_plot_gene(adata,
                                       adata_new=adata_new,
                                       percentile_dist=100,
                                       root=root,
                                       save_fig=flag_savefig,
                                       flag_log_view=flag_stream_log_view)
        st.write(adata_new, file_name='stream_mapping_result.pkl')
    print('Finished computation.')
예제 #17
0
# -*- coding: utf-8 -*-
import dash
import dash_html_components as html
import dash_core_components as dcc
from dash.dependencies import Input, Output
import dash_bootstrap_components as dbc

import numpy as np
import stream as st

import matplotlib
matplotlib.use('Agg')

from app import app

adata = st.read(file_name='./SampleData/Nestorowa_2016/Nestorowa-2016.pkl',
                workdir='./stream_result')
adata.uns[
    'discription'] = 'This scRNA-seq dataset contains 1656 cells and 4768 genes from mouse hematopoietic stem and progenitor cell differentiation. A single-cell resolution map of mouse hematopoietic stem and progenitor cell differentiation. Blood 128, e20-31 (2016).'
fig_qc = st.plot_qc(adata, jitter=0.2, fig_size=(2, 2), return_svg=True)

available_samples = [
    'Nestorowa, S. et al. 2016', 'Harrison, S. et al. 2021',
    'Trapnell, C. et al. 2014', ' Tang, Q. et al. 2017'
]
available_projections = [
    'dimension_reduction', 'visualization_2D', 'flat_tree', 'branches'
]
available_colors = adata.obs.columns
available_stream = ['single_cell_stream', 'stream']

layout = html.Div([
예제 #18
0
# Infer trajectories

# read in parameters
definition = open('./definition.yml', 'r')
task = yaml.safe_load(definition)
p = dict()
for x in task["parameters"]:
    p[x['id']] = x['default']

pd.DataFrame(counts.toarray(), index=cell_ids,
             columns=gene_ids).T.to_csv(output_folder + "counts.tsv", sep='\t')

checkpoints["method_afterpreproc"] = time.time()

adata = st.read(file_name=output_folder + "counts.tsv")
st.add_cell_labels(adata)
st.add_cell_colors(adata)

if (p["norm"]):
    st.normalize_per_cell(adata)
if (p["log2"]):
    st.log_transform(adata)

st.filter_genes(adata,
                min_num_cells=max(5, int(round(adata.shape[0] * 0.001))),
                min_pct_cells=None,
                expr_cutoff=1)
if (adata.shape[1] < 1000):
    adata.uns['var_genes'] = gene_ids
    adata.obsm['var_genes'] = adata.X
예제 #19
0
import argparse
import os
import stream as st
import scanpy as sc
import numpy as np

parser = argparse.ArgumentParser()
parser.add_argument("-o", "--outdir", type=str, default=None)

args = parser.parse_args()

adata = st.read("ouput/aging-xxx/subset/adata.h5ad",
                file_format="h5ad",
                workdir=os.path.join(args.outdir, "stream_result"))
adata.obs["label"] = adata.obs["louvain"]

label_color = {
    "0": "#FF0000",
    "1": "#836FFF",
    "2": "#0000FF",
    "3": "#C6E2FF",
    "4": "#548B54",
    "5": "#00FF00",
    "6": "#FFF68F",
    "7": "#8B864E",
    "8": "#FFFF00",
    "9": "#FFD700",
    "10": "#8B658B",
    "11": "#FF6A6A",
    "12": "#FFD39B",
    "13": "#EE2C2C",
예제 #20
0
def main():
    parser = argparse.ArgumentParser(
        description='%s Parameters' % __tool_name__,
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument("-f",
                        "--filename",
                        dest="filename",
                        default=None,
                        required=True,
                        help="Analysis result file name",
                        metavar="FILE")
    parser.add_argument(
        "-t",
        "--toolname",
        dest="toolname",
        default=None,
        required=True,
        type=str.lower,
        choices=['scanpy', 'paga', 'seurat', 'stream', 'velocity'],
        help="Tool used to generate the analysis result.")
    parser.add_argument(
        "-a",
        "--annotations",
        dest="annotations",
        default=None,
        required=True,
        help=
        "Annotation file name. It contains the cell annotation key(s) to visualize in one column."
    )
    parser.add_argument(
        "-g",
        "--genes",
        dest="genes",
        default=None,
        help=
        "Gene list file name. It contains the genes to visualize in one column."
    )
    parser.add_argument("-o",
                        "--output",
                        dest="output",
                        default='vr_report',
                        help="Output folder name")
    parser.add_argument(
        "--layer",
        dest="layer",
        default='norm_data',
        help="The name of layer in Anndata object for gene expression")

    args = parser.parse_args()
    filename = args.filename
    toolname = args.toolname
    genes = args.genes
    output = args.output  #work directory
    annotations = args.annotations
    layer = args.layer

    if (annotations is None):
        raise Exception(
            "Annotation file must be specified when %s is chosen." %
            (toolname))

    if toolname != 'velocity':
        try:
            ann_list = pd.read_csv(annotations,
                                   sep='\t',
                                   header=None,
                                   index_col=None).iloc[:, 0].tolist()
        except FileNotFoundError as fnf_error:
            print(fnf_error)
            raise
        except:
            print('Failed to load in annotation file.')
            raise
        else:
            ann_list = list(set(ann_list))

    if (genes is not None):
        try:
            gene_list = pd.read_csv(genes,
                                    sep='\t',
                                    header=None,
                                    index_col=None).iloc[:, 0].tolist()
        except FileNotFoundError as fnf_error:
            print(fnf_error)
            raise
        except:
            print('Failed to load in gene list.')
            raise
        else:
            gene_list = list(set(gene_list))
    else:
        gene_list = None

    print("Converting '%s' analysis result ..." % toolname)

    if (toolname in ['scanpy', 'paga', 'seurat']):
        if (toolname == 'scanpy'):
            assert (filename.lower().endswith(
                ('.h5ad'))), "For PAGA only .h5ad file is supported."
            print('reading in h5ad file ...')
            adata = ad.read_h5ad(filename)
            scvr.output_scanpy_cells(adata,
                                     ann_list,
                                     gene_list=gene_list,
                                     reportdir=output)
        if (toolname == 'paga'):
            assert (filename.lower().endswith(
                ('.h5ad'))), "For PAGA only .h5ad file is supported."
            print('reading in h5ad file ...')
            adata = ad.read_h5ad(filename)
            scvr.output_paga_graph(adata, reportdir=output)
            scvr.output_paga_cells(adata,
                                   ann_list,
                                   gene_list=gene_list,
                                   reportdir=output)
        if (toolname == 'seurat'):
            assert (filename.lower().endswith(
                ('.loom'))) or (filename.lower().endswith(
                    ('.h5ad'
                     ))), "For Seurat only .loom .h5ad file is supported."
            print('reading in loom file ...')
            if filename.lower().endswith(('.loom')):
                adata = ad.read_loom(filename)
            else:
                adata = ad.read(filename)
            scvr.output_seurat_cells(adata,
                                     ann_list,
                                     gene_list=gene_list,
                                     reportdir=output)
        with open(os.path.join(output, 'index.json'), 'w') as f:
            json.dump({"tool": toolname}, f)
        shutil.make_archive(base_name=output, format='zip', root_dir=output)
        shutil.rmtree(output)
    if toolname == 'velocity':
        assert (filename.lower().endswith('.h5ad')
                or filename.lower().endswith('.loom')
                ), 'Velocity supports .h5ad or .loom.'
        adata = scv.read(filename)
        scvr.output_velocity_cells(adata,
                                   ann_field=annotations,
                                   gene_list=gene_list,
                                   reportdir=output)
    if (toolname == 'stream'):
        try:
            import stream as st
        except ImportError:
            raise ImportError(
                'Please install STREAM >=0.5: `conda install -c bioconda stream`.'
            )
        assert (filename.lower().endswith(
            ('.pkl'))), "For STREAM only .pkl file is supported."
        print('reading in pkl file ...')
        adata = st.read(filename, file_format='pkl', workdir='./')
        st.save_vr_report(adata,
                          ann_list=ann_list,
                          gene_list=gene_list,
                          file_name=output)
예제 #21
0
def get_features():
    """
    scanpy examples:
      http://127.0.0.1:8000/features?db_name=1_scanpy_10xpbmc&feature=louvain
      http://127.0.0.1:8000/features?db_name=1_scanpy_10xpbmc&feature=expression&gene=SUMO3

    seurat examples:
      http://127.0.0.1:8000/features?db_name=4_seurat_10xpbmc&feature=expression&gene=SUMO3
      http://127.0.0.1:8000/features?db_name=4_seurat_10xpbmc&feature=expression&gene=SUMO3

    velocity examples:
      http://127.0.0.1:8000/features?db_name=3_velocity_pancrease&feature=clusters
      http://127.0.0.1:8000/features?db_name=3_velocity_pancrease&feature=expression&gene=Rbbp7
      http://127.0.0.1:8000/features?db_name=3_velocity_pancrease&feature=velocity&embed=umap&time=None
      http://127.0.0.1:8000/features?db_name=3_velocity_pancrease&feature=velocity&embed=umap&time=1
      http://127.0.0.1:8000/features?db_name=3_velocity_pancrease&feature=velocity&embed=umap&time=10

    velocity grid examples:
      http://127.0.0.1:8000/features?db_name=3_velocity_pancrease&feature=velocity_grid&embed=umap&time=10
      http://127.0.0.1:8000/features?db_name=3_velocity_pancrease&feature=velocity_grid&embed=umap&time=100
    """
    database = request.args.get("db_name")
    feature = request.args.get("feature")
    filename = glob(os.path.join(DATASET_DIRECTORY, f"{database}.*"))[0]

    db_type = get_dataset_type_adata(filename)
    if feature.lower() == "velocity":
        embed = request.args.get("embed")

    try:
        del adata
    except:
        pass

    if get_dataset_type_adata(database).lower() in [
            "scanpy", "velocity", "seurat", "paga"
    ]:
        adata = sc.read(filename)
    else:
        adata = st.read(filename, file_format="pkl", workdir="./")

    list_metadata = []
    if feature in get_available_annotations_adata(adata):  # cluster columns
        if f"{feature}_colors" in adata.uns.keys():
            dict_colors = {
                feature:
                dict(
                    zip(adata.obs[feature].cat.categories,
                        adata.uns[f"{feature}_colors"]))
            }
        else:
            dict_colors = {
                feature:
                dict(
                    zip(adata.obs[feature],
                        converters.get_colors(adata, feature)))
            }
        for i in range(adata.shape[0]):
            dict_metadata = dict()
            dict_metadata["cell_id"] = adata.obs_names[i]
            dict_metadata["label"] = adata.obs[feature].tolist()[i]
            dict_metadata["clusters"] = adata.obs[feature].tolist()[i]
            dict_metadata["clusters_color"] = dict_colors[feature][
                dict_metadata["clusters"]]
            list_metadata.append(dict_metadata)
    elif feature in ["expression", "rna"]:  # pseudotime or latent_time columns
        gene = request.args.get("gene")
        if gene not in adata.var_names:
            return jsonify({})
        else:
            if "time" in feature:
                values = adata.obs[feature]
            else:
                if db_type == "seurat":
                    values = (adata[:, gene].layers["norm_data"].toarray()[:,
                                                                           0]
                              if isspmatrix(adata.layers["norm_data"]) else
                              adata[:, gene].layers["norm_data"][:, 0])
                else:
                    values = (adata[:, gene].X.toarray()[:, 0] if isspmatrix(
                        adata.X) else adata[:, gene].X[:, 0])

            cm = mpl.cm.get_cmap("viridis", 512)
            norm = mpl.colors.Normalize(vmin=0, vmax=max(values), clip=True)
            list_metadata = []
            for i, x in enumerate(adata.obs_names):
                dict_genes = dict()
                dict_genes["cell_id"] = x
                dict_genes["color"] = mpl.colors.to_hex(cm(norm(values[i])))
                list_metadata.append(dict_genes)
    elif feature == "velocity":
        list_metadata = []
        time = request.args.get("time")
        for i in range(adata.shape[0]):
            dict_coord_cells = dict()
            if isinstance(adata.obs_names[i], bytes):
                dict_coord_cells["cell_id"] = adata.obs_names[i].decode(
                    "utf-8")
            else:
                dict_coord_cells["cell_id"] = adata.obs_names[i]

            dict_coord_cells["x"] = str(adata.obsm[f"X_{embed}"][i, 0])
            dict_coord_cells["y"] = str(adata.obsm[f"X_{embed}"][i, 1])
            dict_coord_cells["z"] = str(adata.obsm[f"X_{embed}"][i, 2])

            if time == "None":
                dict_coord_cells["x1"] = str(
                    adata.obsm[f"velocity_{embed}"][i, 0])
                dict_coord_cells["y1"] = str(
                    adata.obsm[f"velocity_{embed}"][i, 1])
                dict_coord_cells["z1"] = str(
                    adata.obsm[f"velocity_{embed}"][i, 2])
            elif time in list(map(str,
                                  [0.01, 0.1, 1, 5, 10, 20, 30, 50, 100])):
                dict_coord_cells["x1"] = str(
                    adata.obsm[f"absolute_velocity_{embed}_{time}s"][i, 0])
                dict_coord_cells["y1"] = str(
                    adata.obsm[f"absolute_velocity_{embed}_{time}s"][i, 1])
                dict_coord_cells["z1"] = str(
                    adata.obsm[f"absolute_velocity_{embed}_{time}s"][i, 2])
            else:
                return jsonify({})
            list_metadata.append(dict_coord_cells)
    elif feature == "velocity_grid":
        list_metadata = []
        time = request.args.get("time")
        p_mass = adata.uns['p_mass']
        for i in np.where(p_mass >= 1)[0]:
            dict_coord_cells = dict()

            if time == "None":
                dict_coord_cells["x"] = str(adata.uns[f"X_grid"][i, 0])
                dict_coord_cells["y"] = str(adata.uns[f"X_grid"][i, 1])
                dict_coord_cells["z"] = str(adata.uns[f"X_grid"][i, 2])
                dict_coord_cells["x1"] = str(adata.uns[f"V_grid"][i, 0])
                dict_coord_cells["y1"] = str(adata.uns[f"V_grid"][i, 1])
                dict_coord_cells["z1"] = str(adata.uns[f"V_grid"][i, 2])
            elif time in list(map(str,
                                  [0.01, 0.1, 1, 5, 10, 20, 50, 80, 100])):
                dict_coord_cells["x"] = str(adata.uns[f"X_grid_{time}"][i, 0])
                dict_coord_cells["y"] = str(adata.uns[f"X_grid_{time}"][i, 1])
                dict_coord_cells["z"] = str(adata.uns[f"X_grid_{time}"][i, 2])
                dict_coord_cells["x1"] = str(adata.uns[f"V_grid_{time}"][i, 0])
                dict_coord_cells["y1"] = str(adata.uns[f"V_grid_{time}"][i, 1])
                dict_coord_cells["z1"] = str(adata.uns[f"V_grid_{time}"][i, 2])
            else:
                return jsonify({})
            list_metadata.append(dict_coord_cells)
    elif feature == "paga":
        G = nx.from_numpy_matrix(adata.uns["paga"]["connectivities"].toarray())
        adata.uns["paga"]["pos"] = get_paga3d_pos(adata)
        ## output coordinates of paga graph
        list_lines = []
        for edge_i in G.edges():
            dict_coord_lines = dict()
            dict_coord_lines["branch_id"] = [[str(edge_i[0]), str(edge_i[1])]]
            dict_coord_lines["xyz"] = [{
                "x": pos[0],
                "y": pos[1],
                "z": pos[2]
            } for pos in adata.uns["paga"]["pos"][[edge_i[0], edge_i[1]], :]]
            list_lines.append(dict_coord_lines)

        ## output topology of paga graph
        dict_nodes = dict()
        list_edges = []
        dict_nodename = {
            i: adata.obs[adata.uns["paga"]["groups"]].cat.categories[i]
            for i in G.nodes()
        }
        for node_i in G.nodes():
            dict_nodes_i = dict()
            dict_nodes_i["node_name"] = dict_nodename[node_i]
            dict_nodes_i["xyz"] = {
                "x": adata.uns["paga"]["pos"][:, 0][node_i],
                "y": adata.uns["paga"]["pos"][:, 1][node_i],
                "z": adata.uns["paga"]["pos"][:, 2][node_i],
            }
            dict_nodes[node_i] = dict_nodes_i
        for edge_i in G.edges():
            dict_edges = dict()
            dict_edges["nodes"] = [str(edge_i[0]), str(edge_i[1])]
            dict_edges["weight"] = adata.uns["paga"]["connectivities"][
                edge_i[0], edge_i[1]]
            list_edges.append(dict_edges)
        list_metadata = {"nodes": dict_nodes, "edges": list_edges}
    elif feature == "curves":
        flat_tree = adata.uns['flat_tree']
        epg = adata.uns['epg']
        epg_node_pos = nx.get_node_attributes(epg, 'pos')
        ft_node_label = nx.get_node_attributes(flat_tree, 'label')
        ft_node_pos = nx.get_node_attributes(flat_tree, 'pos')
        list_curves = []
        for edge_i in flat_tree.edges():
            branch_i_pos = np.array(
                [epg_node_pos[i] for i in flat_tree.edges[edge_i]['nodes']])
            df_coord_curve_i = pd.DataFrame(branch_i_pos)
            dict_coord_curves = dict()
            dict_coord_curves['branch_id'] = ft_node_label[
                edge_i[0]] + '_' + ft_node_label[edge_i[1]]
            dict_coord_curves['xyz'] = [{
                'x': df_coord_curve_i.iloc[j, 0],
                'y': df_coord_curve_i.iloc[j, 1],
                'z': df_coord_curve_i.iloc[j, 2]
            } for j in range(df_coord_curve_i.shape[0])]
            list_curves.append(dict_coord_curves)
        list_metadata = list_curves
    del adata
    gc.collect()
    return jsonify({feature: list_metadata})
예제 #22
0
def main():
    parser = argparse.ArgumentParser(
        description='%s Parameters' % __tool_name__,
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument("-f",
                        "--filename",
                        dest="filename",
                        default=None,
                        required=True,
                        help="Analysis result file name",
                        metavar="FILE")
    parser.add_argument("-t",
                        "--toolname",
                        dest="toolname",
                        default=None,
                        required=True,
                        type=str.lower,
                        choices=['paga', 'seurat', 'stream'],
                        help="Tool used to generate the analysis result.")
    parser.add_argument(
        "-a",
        "--annotations",
        dest="annotations",
        default=None,
        help=
        "Annotation file name. It contains the cell annotation(s) used to color cells"
    )
    parser.add_argument(
        "-g",
        "--genes",
        dest="genes",
        default=None,
        help=
        "Gene list file name. It contains the genes to visualize in one column"
    )
    parser.add_argument("-o",
                        "--output",
                        dest="output",
                        default='vr_report',
                        help="Output folder name")

    args = parser.parse_args()
    filename = args.filename
    toolname = args.toolname
    genes = args.genes
    output = args.output  #work directory
    annotations = args.annotations

    if (toolname in ['paga', 'seurat']):
        if (annotations is None):
            raise Exception(
                "Annotation file must be specified when %s is chosen." %
                (toolname))
        try:
            ann_list = pd.read_csv(annotations,
                                   sep='\t',
                                   header=None,
                                   index_col=None).iloc[:, 0].tolist()
        except FileNotFoundError as fnf_error:
            print(fnf_error)
            raise
        except:
            print('Failed to load in annotation file.')
            raise
        else:
            ann_list = list(set(ann_list))

    if (genes is not None):
        try:
            gene_list = pd.read_csv(genes,
                                    sep='\t',
                                    header=None,
                                    index_col=None).iloc[:, 0].tolist()
        except FileNotFoundError as fnf_error:
            print(fnf_error)
            raise
        except:
            print('Failed to load in gene list.')
            raise
        else:
            gene_list = list(set(gene_list))
    else:
        gene_list = None

    if (toolname == 'paga'):
        assert (filename.lower().endswith(
            ('.h5ad'))), "For PAGA only .h5ad file is supported."
        print('reading in h5ad file ...')
        adata = ad.read_h5ad(filename)
        adata.uns['paga']['pos'] = scvr_prep.get_paga3d_pos(adata)
        scvr_prep.output_paga_graph(adata, reportdir=output)
        scvr_prep.output_paga_cells(adata,
                                    ann_list,
                                    genes=gene_list,
                                    reportdir=output)
        shutil.make_archive(base_name=output, format='zip', root_dir=output)
        shutil.rmtree(output)
    if (toolname == 'seurat'):
        assert (filename.lower().endswith(
            ('.h5ad'))), "For Seurat only .loom file is supported."
        print('reading in loom file ...')
        adata = ad.read_loom(filename)
        scvr_prep.output_seurat_cells(adata,
                                      ann_list,
                                      genes=gene_list,
                                      reportdir=output)
        shutil.make_archive(base_name=output, format='zip', root_dir=output)
        shutil.rmtree(output)
    if (toolname == 'stream'):
        try:
            import stream as st
        except ImportError:
            raise ImportError(
                'Please install STREAM >=0.4.2: `conda install -c bioconda stream`.'
            )
        assert (filename.lower().endswith(
            ('.h5ad'))), "For STREAM only .pkl file is supported."
        print('reading in pkl file ...')
        adata = st.read(filename, file_format='pkl', workdir='./')
        st.save_vr_report(adata, genes=gene_list, file_name=output)
예제 #23
0
def main():
    sns.set_style('white')
    sns.set_context('poster')
    parser = argparse.ArgumentParser(
        description='%s Parameters' % __tool_name__,
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument(
        "-m",
        "--data-file",
        dest="input_filename",
        default=None,
        help="input file name, pkl format from Stream preprocessing module",
        metavar="FILE")
    parser.add_argument("-of",
                        "--of",
                        dest="output_filename_prefix",
                        default="StreamiFSOutput",
                        help="output file name prefix")

    parser.add_argument("-nb_pct",
                        "--percent_neighbor_cells",
                        dest="nb_pct",
                        type=float,
                        default=None,
                        help="")
    parser.add_argument("-n_comp_k",
                        dest="n_comp_k",
                        type=int,
                        default=None,
                        help="")
    parser.add_argument("-feat", dest="feature", default=None, help="feature")

    parser.add_argument("-method", dest="method", default=None, help="")
    parser.add_argument("-nc_plot",
                        dest="nc_plot",
                        type=int,
                        default=None,
                        help="")
    parser.add_argument("-comp1", dest="comp1", default=None, help="feature")
    parser.add_argument("-comp2",
                        dest="comp2",
                        type=int,
                        default=None,
                        help="")
    parser.add_argument("-fig_width",
                        dest="fig_width",
                        type=int,
                        default=8,
                        help="")
    parser.add_argument("-fig_height",
                        dest="fig_height",
                        type=int,
                        default=8,
                        help="")
    parser.add_argument("-n_jobs", dest="n_jobs", type=int, default=2, help="")

    parser.add_argument("-fig_legend_ncol",
                        dest="fig_legend_ncol",
                        type=int,
                        default=None,
                        help="")

    args = parser.parse_args()
    print(args)

    print('Starting dimension reduction procedure...')

    workdir = "./"

    adata = st.read(file_name=args.input_filename,
                    file_format='pkl',
                    experiment='rna-seq',
                    workdir=workdir)
    print("Feature ", args.feature, type(args.feature))
    st.dimension_reduction(adata,
                           method=args.method,
                           feature='var_genes',
                           nb_pct=args.nb_pct,
                           n_components=args.n_comp_k,
                           n_jobs=args.n_jobs,
                           eigen_solver=None)

    fig_size = (args.fig_width, args.fig_height)
    st.plot_dimension_reduction(adata,
                                n_components=args.nc_plot,
                                comp1=args.comp1,
                                comp2=args.comp2,
                                save_fig=True,
                                fig_name=(args.output_filename_prefix +
                                          '_stddev_dotplot.png'),
                                fig_path="./",
                                fig_size=fig_size,
                                fig_legend_ncol=args.fig_legend_ncol)

    st.write(adata,
             file_name=(args.output_filename_prefix + '_stream_result.pkl'),
             file_path='./',
             file_format='pkl')
    print('Output: ' + str(adata.obs.shape[0]) + ' cells, ' +
          str(adata.var.shape[0]) + ' genes')

    print('Finished computation.')
예제 #24
0
def get_coordinates():
    """
    http://127.0.0.1:8000/coordinates?db_name=1_scanpy_10xpbmc&embed=umap
    http://127.0.0.1:8000/coordinates?db_name=3_velocity_pancrease&embed=umap
    http://127.0.0.1:8000/coordinates?db_name=4_seurat_10xpbmc&embed=umap
    http://127.0.0.1:8000/coordinates?db_name=5_stream_nestorowa16&embed=umap
    """
    db_name = request.args.get("db_name")
    filename = glob(os.path.join(DATASET_DIRECTORY, f"{db_name}.*"))[0]

    try:
        del adata
    except:
        pass

    if get_dataset_type_adata(db_name).lower() in [
            "scanpy", "velocity", "seurat", "paga"
    ]:
        adata = sc.read(filename)
        embed = request.args.get("embed")
    else:
        print(filename)
        adata = st.read(filename, file_format="pkl", workdir="./")

    list_cells = []
    for i in range(adata.shape[0]):
        dict_coord_cells = dict()
        dict_coord_cells["cell_id"] = adata.obs_names[i]
        if get_dataset_type_adata(db_name).lower() in [
                "scanpy", "paga", "velocity"
        ]:
            dict_coord_cells["x"] = str(adata.obsm[f"X_{embed}"][i, 0])
            dict_coord_cells["y"] = str(adata.obsm[f"X_{embed}"][i, 1])
            dict_coord_cells["z"] = str(adata.obsm[f"X_{embed}"][i, 2])
        elif get_dataset_type_adata(db_name).lower() == "seurat":
            dict_coord_cells["x"] = str(
                adata.obsm[f"{embed}_cell_embeddings"][i, 0])
            dict_coord_cells["y"] = str(
                adata.obsm[f"{embed}_cell_embeddings"][i, 1])
            dict_coord_cells["z"] = str(
                adata.obsm[f"{embed}_cell_embeddings"][i, 2])
        elif get_dataset_type_adata(db_name).lower() == "stream":
            file_path = os.path.join(adata.uns["workdir"], "test")
            if not os.path.exists(file_path):
                os.makedirs(file_path)
            flat_tree = adata.uns["flat_tree"]
            epg = adata.uns["epg"]
            epg_node_pos = nx.get_node_attributes(epg, "pos")
            ft_node_label = nx.get_node_attributes(flat_tree, "label")
            ft_node_pos = nx.get_node_attributes(flat_tree, "pos")
            list_curves = []
            for edge_i in flat_tree.edges():
                branch_i_pos = np.array([
                    epg_node_pos[i] for i in flat_tree.edges[edge_i]["nodes"]
                ])
                df_coord_curve_i = pd.DataFrame(branch_i_pos)
                dict_coord_curves = dict()
                dict_coord_curves["branch_id"] = (ft_node_label[edge_i[0]] +
                                                  "_" +
                                                  ft_node_label[edge_i[1]])
                dict_coord_curves["xyz"] = [{
                    "x": df_coord_curve_i.iloc[j, 0],
                    "y": df_coord_curve_i.iloc[j, 1],
                    "z": df_coord_curve_i.iloc[j, 2],
                } for j in range(df_coord_curve_i.shape[0])]
                list_curves.append(dict_coord_curves)

            ## output topology of stream graph
            dict_nodes = dict()
            list_edges = []
            for node_i in flat_tree.nodes():
                dict_nodes_i = dict()
                dict_nodes_i["node_name"] = ft_node_label[node_i]
                dict_nodes_i["xyz"] = {
                    "x": ft_node_pos[node_i][0],
                    "y": ft_node_pos[node_i][1],
                    "z": ft_node_pos[node_i][2],
                }
                dict_nodes[ft_node_label[node_i]] = dict_nodes_i
            for edge_i in flat_tree.edges():
                dict_edges = dict()
                dict_edges["nodes"] = [
                    ft_node_label[edge_i[0]],
                    ft_node_label[edge_i[1]],
                ]
                dict_edges["weight"] = 1
                list_edges.append(dict_edges)

            list_cells = []
            for i in range(adata.shape[0]):
                dict_coord_cells = dict()
                dict_coord_cells['cell_id'] = adata.obs_names[i]
                dict_coord_cells['x'] = adata.obsm['X_dr'][i, 0]
                dict_coord_cells['y'] = adata.obsm['X_dr'][i, 1]
                dict_coord_cells['z'] = adata.obsm['X_dr'][i, 2]
                list_cells.append(dict_coord_cells)
            return jsonify({
                "nodes": dict_nodes,
                "edges": list_edges,
                "graph": list_curves,
                "cells": list_cells
            })
        else:
            raise TypeError("not supported format")
        list_cells.append(dict_coord_cells)
    del adata
    gc.collect()
    return jsonify(list_cells)
예제 #25
0
#!/usr/bin/env python3

### Here we perform Pseudotime analysis with STREAM v0.36 [https://doi.org/10.1038/s41467-019-09670-4] [https://github.com/pinellolab/STREAM]
### Download counts matrix here:

import matplotlib as mpl
mpl.use('Agg')
import matplotlib.pyplot as plt
plt.ioff()
import stream as st
import os.path
import pickle

#### Read Counts table
adata = st.read(file_name='./counts.tsv', workdir='./')
# Read Cell labels table
st.add_cell_labels(adata, file_name='./cell_label.tsv')
# Add random colors to each sample
st.add_cell_colors(adata, file_name='./cell_color.tsv')

### CHECK FOR VARIABLE GENES
# Check if the blue (loess) curve fits the points well
st.select_variable_genes(adata)
# Open plot file
plt.savefig('loess.png')
# Close Plot
plt.close('loess.png')

# Adjust the blue curve to fits better
st.select_variable_genes(adata, loess_frac=0.01)
plt.savefig('adjust_loess.png')
예제 #26
0
def main():
    sns.set_style('white')
    sns.set_context('poster')
    parser = argparse.ArgumentParser(
        description='%s Parameters' % __tool_name__,
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument(
        "-m",
        "--data-file",
        dest="input_filename",
        default=None,
        help="input file name, pkl format from Stream preprocessing module",
        metavar="FILE")
    parser.add_argument("-of",
                        "--of",
                        dest="output_filename_prefix",
                        default="StreamiFSOutput",
                        help="output file name prefix")
    parser.add_argument("-fig_width",
                        dest="fig_width",
                        type=int,
                        default=8,
                        help="")
    parser.add_argument("-fig_height",
                        dest="fig_height",
                        type=int,
                        default=8,
                        help="")
    parser.add_argument("-fig_legend_ncol",
                        dest="fig_legend_ncol",
                        type=int,
                        default=None,
                        help="")

    parser.add_argument("-root", dest="root", default=None, help="")
    parser.add_argument("-preference", dest="preference", help="")
    parser.add_argument("-subway_factor",
                        dest="subway_factor",
                        type=float,
                        default=2.0,
                        help="")
    parser.add_argument("-color_by", dest="color_by", default='label', help="")
    parser.add_argument("-factor_num_win",
                        dest="factor_num_win",
                        type=int,
                        default=10,
                        help="")
    parser.add_argument("-factor_min_win",
                        dest="factor_min_win",
                        type=float,
                        default=2.0,
                        help="")
    parser.add_argument("-factor_width",
                        dest="factor_width",
                        type=float,
                        default=2.5,
                        help="")
    parser.add_argument("-flag_log_view",
                        dest="flag_log_view",
                        action="store_true",
                        help="")
    parser.add_argument("-factor_zoomin",
                        dest="factor_zoomin",
                        type=float,
                        default=100.0,
                        help="")
    parser.add_argument("-flag_cells",
                        dest="flag_cells",
                        action="store_true",
                        help="")
    parser.add_argument("-flag_genes",
                        dest="flag_genes",
                        action="store_true",
                        help="")

    parser.add_argument("-genes", dest="genes", default=None, help="")
    parser.add_argument("-percentile_dist",
                        dest="percentile_dist",
                        type=float,
                        default=100,
                        help="")

    args = parser.parse_args()

    workdir = "./"

    adata = st.read(file_name=args.input_filename,
                    file_format='pkl',
                    experiment='rna-seq',
                    workdir=workdir)
    preference = args.preference.split(',')
    if (args.flag_cells != None):
        st.plot_flat_tree(adata,
                          save_fig=True,
                          fig_path="./",
                          fig_name=(args.output_filename_prefix +
                                    '_flat_tree.png'),
                          fig_size=(args.fig_width, args.fig_height),
                          fig_legend_ncol=args.fig_legend_ncol)

        st.subwaymap_plot(adata,
                          root=args.root,
                          percentile_dist=args.percentile_dist,
                          preference=preference,
                          factor=args.subway_factor,
                          color_by=args.color_by,
                          save_fig=True,
                          fig_path="./",
                          fig_name=(args.output_filename_prefix +
                                    '_cell_subway_map.png'),
                          fig_size=(args.fig_width, args.fig_height),
                          fig_legend_ncol=args.fig_legend_ncol)

        st.stream_plot(adata,
                       root=args.root,
                       preference=preference,
                       factor_num_win=args.factor_num_win,
                       factor_min_win=args.factor_min_win,
                       factor_width=args.factor_width,
                       flag_log_view=args.flag_log_view,
                       factor_zoomin=args.factor_zoomin,
                       save_fig=True,
                       fig_path="./",
                       fig_name=(args.output_filename_prefix +
                                 '_cell_stream_plot.png'),
                       fig_size=(args.fig_width, args.fig_height),
                       fig_legend=True,
                       fig_legend_ncol=args.fig_legend_ncol,
                       tick_fontsize=20,
                       label_fontsize=25)

    if (args.flag_genes != None):
        genes = args.genes.split(',')
        st.subwaymap_plot_gene(adata,
                               root=args.root,
                               genes=genes,
                               preference=preference,
                               percentile_dist=args.percentile_dist,
                               factor=args.subway_factor,
                               save_fig=True,
                               fig_path="./",
                               fig_format='png',
                               fig_size=(args.fig_width, args.fig_height))
        #              , fig_name=(args.output_filename_prefix + '_gene_subway_plot.png'))

        st.stream_plot_gene(adata,
                            root=args.root,
                            genes=genes,
                            preference=preference,
                            factor_min_win=args.factor_min_win,
                            factor_num_win=args.factor_num_win,
                            factor_width=args.factor_width,
                            save_fig=True,
                            fig_path="./",
                            fig_format='png',
                            fig_size=(args.fig_width, args.fig_height),
                            tick_fontsize=20,
                            label_fontsize=25)
        #           , fig_name=(args.output_filename_prefix + '_gene_stream_plot.png'))

    st.write(adata,
             file_name=(args.output_filename_prefix + '_stream_result.pkl'),
             file_path='./',
             file_format='pkl')

    print('Finished computation.')
예제 #27
0
import dash_bootstrap_components as dbc
import time
import plotly.graph_objects as go

### import calculation related packages
import numpy as np
import stream as st
import matplotlib

matplotlib.use('Agg')

from app import app

### preset data
adata_computed = st.read(
    file_name='./SampleData/Nestorowa_2016/Nestorowa-2016.pkl',
    workdir='./stream_result')
adata = st.read(
    file_name='./SampleData/Nestorowa_2016/Nestorowa-2016-raw.h5ad',
    workdir='./stream_result')
adata.uns[
    'discription'] = 'This scRNA-seq dataset contains 1656 cells and 40594 genes from mouse hematopoietic stem and progenitor cell differentiation. A single-cell resolution map of mouse hematopoietic stem and progenitor cell differentiation. Blood 128, e20-31 (2016).'
fig_ds = st.plot_stream(adata_computed, root='S1', return_svg=True)

### Set optionals
available_samples = [
    'Nestorowa, S. et al. 2016', 'Harrison, S. et al. 2021',
    'Trapnell, C. et al. 2014', ' Tang, Q. et al. 2017'
]
available_normalization = [
    'Library size correction', 'TF-IDF transformation', 'None'
예제 #28
0
def stream_test_Nestorowa_2016():

    workdir = os.path.join(_root, 'datasets/Nestorowa_2016/')

    temp_folder = tempfile.gettempdir()

    tar = tarfile.open(workdir + 'output/stream_result.tar.gz')
    tar.extractall(path=temp_folder)
    tar.close()
    ref_temp_folder = os.path.join(temp_folder, 'stream_result')

    print(workdir + 'data_Nestorowa.tsv.gz')
    input_file = os.path.join(workdir, 'data_Nestorowa.tsv.gz')
    label_file = os.path.join(workdir, 'cell_label.tsv.gz')
    label_color_file = os.path.join(workdir, 'cell_label_color.tsv.gz')
    comp_temp_folder = os.path.join(temp_folder, 'stream_result_comp')

    try:
        st.set_figure_params(dpi=80,
                             style='white',
                             figsize=[5.4, 4.8],
                             rc={'image.cmap': 'viridis'})
        adata = st.read(file_name=input_file, workdir=comp_temp_folder)
        adata.var_names_make_unique()
        adata.obs_names_make_unique()
        st.add_cell_labels(adata, file_name=label_file)
        st.add_cell_colors(adata, file_name=label_color_file)
        st.cal_qc(adata, assay='rna')
        st.filter_features(adata, min_n_cells=5)
        st.select_variable_genes(adata, n_genes=2000, save_fig=True)
        st.select_top_principal_components(adata,
                                           feature='var_genes',
                                           first_pc=True,
                                           n_pc=30,
                                           save_fig=True)
        st.dimension_reduction(adata,
                               method='se',
                               feature='top_pcs',
                               n_neighbors=100,
                               n_components=4,
                               n_jobs=2)
        st.plot_dimension_reduction(adata,
                                    color=['label', 'Gata1', 'n_genes'],
                                    n_components=3,
                                    show_graph=False,
                                    show_text=False,
                                    save_fig=True,
                                    fig_name='dimension_reduction.pdf')
        st.plot_visualization_2D(adata,
                                 method='umap',
                                 n_neighbors=100,
                                 color=['label', 'Gata1', 'n_genes'],
                                 use_precomputed=False,
                                 save_fig=True,
                                 fig_name='visualization_2D.pdf')
        st.seed_elastic_principal_graph(adata, n_clusters=20)
        st.plot_dimension_reduction(adata,
                                    color=['label', 'Gata1', 'n_genes'],
                                    n_components=2,
                                    show_graph=True,
                                    show_text=False,
                                    save_fig=True,
                                    fig_name='dr_seed.pdf')
        st.plot_branches(adata,
                         show_text=True,
                         save_fig=True,
                         fig_name='branches_seed.pdf')
        st.elastic_principal_graph(adata,
                                   epg_alpha=0.01,
                                   epg_mu=0.05,
                                   epg_lambda=0.01)
        st.plot_dimension_reduction(adata,
                                    color=['label', 'Gata1', 'n_genes'],
                                    n_components=2,
                                    show_graph=True,
                                    show_text=False,
                                    save_fig=True,
                                    fig_name='dr_epg.pdf')
        st.plot_branches(adata,
                         show_text=True,
                         save_fig=True,
                         fig_name='branches_epg.pdf')
        ###Extend leaf branch to reach further cells
        st.extend_elastic_principal_graph(adata,
                                          epg_ext_mode='QuantDists',
                                          epg_ext_par=0.8)
        st.plot_dimension_reduction(adata,
                                    color=['label'],
                                    n_components=2,
                                    show_graph=True,
                                    show_text=True,
                                    save_fig=True,
                                    fig_name='dr_extend.pdf')
        st.plot_branches(adata,
                         show_text=True,
                         save_fig=True,
                         fig_name='branches_extend.pdf')
        st.plot_visualization_2D(
            adata,
            method='umap',
            n_neighbors=100,
            color=['label', 'branch_id_alias', 'S4_pseudotime'],
            use_precomputed=False,
            save_fig=True,
            fig_name='visualization_2D_2.pdf')
        st.plot_flat_tree(adata,
                          color=['label', 'branch_id_alias', 'S4_pseudotime'],
                          dist_scale=0.5,
                          show_graph=True,
                          show_text=True,
                          save_fig=True)
        st.plot_stream_sc(adata,
                          root='S4',
                          color=['label', 'Gata1'],
                          dist_scale=0.5,
                          show_graph=True,
                          show_text=False,
                          save_fig=True)
        st.plot_stream(adata,
                       root='S4',
                       color=['label', 'Gata1'],
                       save_fig=True)
        st.detect_leaf_markers(adata,
                               marker_list=adata.uns['var_genes'][:300],
                               root='S4',
                               n_jobs=4)
        st.detect_transition_markers(adata,
                                     root='S4',
                                     marker_list=adata.uns['var_genes'][:300],
                                     n_jobs=4)
        st.detect_de_markers(adata,
                             marker_list=adata.uns['var_genes'][:300],
                             root='S4',
                             n_jobs=4)
        # st.write(adata,file_name='stream_result.pkl')
    except:
        print("STREAM analysis failed!")
        raise
    else:
        print("STREAM analysis finished!")

    print(ref_temp_folder)
    print(comp_temp_folder)

    pathlist = Path(ref_temp_folder)
    for path in pathlist.glob('**/*'):
        if path.is_file() and (not path.name.startswith('.')):
            file = os.path.relpath(str(path), ref_temp_folder)
            print(file)
            if (file.endswith('pdf')):
                if (os.path.getsize(os.path.join(comp_temp_folder, file)) > 0):
                    print('The file %s passed' % file)
                else:
                    raise Exception('Error! The file %s is not matched' % file)
            else:
                checklist = list()
                df_ref = pd.read_csv(os.path.join(ref_temp_folder, file),
                                     sep='\t')
                # print(df_ref.shape)
                # print(df_ref.head())
                df_comp = pd.read_csv(os.path.join(comp_temp_folder, file),
                                      sep='\t')
                # print(df_comp.shape)
                # print(df_comp.head())
                for c in df_ref.columns:
                    # print(c)
                    if (is_numeric_dtype(df_ref[c])):
                        checklist.append(all(np.isclose(df_ref[c],
                                                        df_comp[c])))
                    else:
                        checklist.append(all(df_ref[c] == df_comp[c]))
                if (all(checklist)):
                    print('The file %s passed' % file)
                else:
                    raise Exception('Error! The file %s is not matched' % file)

    print('Successful!')

    rmtree(comp_temp_folder, ignore_errors=True)
    rmtree(ref_temp_folder, ignore_errors=True)
def main():
    sns.set_style('white')
    sns.set_context('poster')
    parser = argparse.ArgumentParser(
        description='%s Parameters' % __tool_name__,
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument(
        "-m",
        "--data-file",
        dest="input_filename",
        default=None,
        help="input file name, pkl format from Stream preprocessing module",
        metavar="FILE")
    parser.add_argument("--flag_useprecomputed",
                        dest="flag_useprecomputed",
                        action="store_true",
                        help="use precomputed features for PCA")
    parser.add_argument("--flag_firstpc",
                        dest="flag_firstpc",
                        action="store_true",
                        help="Use the first principal component")
    parser.add_argument("--flag_pca",
                        dest="flag_pca",
                        action="store_true",
                        help="perform PCA")
    parser.add_argument("--flag_variable",
                        dest="flag_variable",
                        action="store_true",
                        help="find variable genes")
    parser.add_argument("-of",
                        "--of",
                        dest="output_filename_prefix",
                        default="StreamiFSOutput",
                        help="output file name prefix")

    parser.add_argument("-lf",
                        "--loess_fraction",
                        dest="loess_fraction",
                        type=float,
                        default=None,
                        help="loess fraction")
    parser.add_argument("-per",
                        dest="percentile",
                        type=int,
                        default=None,
                        help="percent of variable genes to find")
    parser.add_argument("-n_g",
                        dest="num_genes",
                        type=int,
                        default=None,
                        help="num genes")
    parser.add_argument("-n_j",
                        dest="num_jobs",
                        type=int,
                        default=None,
                        help="num jobs")
    parser.add_argument("-feat", dest="feature", default=None, help="feature")
    parser.add_argument("-n_pc",
                        dest="num_principal_components",
                        type=int,
                        default=None,
                        help="num principal components")
    parser.add_argument("-max_pc",
                        dest="max_principal_components",
                        type=int,
                        default=None,
                        help="max principal components")
    parser.add_argument("-fig_width",
                        dest="fig_width",
                        type=int,
                        default=8,
                        help="")
    parser.add_argument("-fig_height",
                        dest="fig_height",
                        type=int,
                        default=8,
                        help="")

    parser.add_argument("--flag",
                        dest="flag",
                        action="store_true",
                        help="debugging flag")

    args = parser.parse_args()

    print('Starting feature selection procedure...')
    print(args)
    workdir = "./"

    adata = st.read(file_name=args.input_filename,
                    file_format='pkl',
                    experiment='rna-seq',
                    workdir=workdir)

    print('Input: ' + str(adata.obs.shape[0]) + ' cells, ' +
          str(adata.var.shape[0]) + ' genes')
    #print('N_genes is ' + str(args.num_genes))

    if (args.flag_variable):
        st.select_variable_genes(adata,
                                 loess_frac=args.loess_fraction,
                                 percentile=args.percentile,
                                 n_genes=args.num_genes,
                                 n_jobs=args.num_jobs,
                                 save_fig=True,
                                 fig_name=(args.output_filename_prefix +
                                           '_variable_genes.png'),
                                 fig_size=(args.fig_width, args.fig_height),
                                 fig_path="./")

    if (args.flag_pca):
        st.select_top_principal_components(
            adata,
            feature=args.feature,
            n_pc=args.num_principal_components,
            max_pc=args.max_principal_components,
            first_pc=args.flag_firstpc,
            use_precomputed=args.flag_useprecomputed,
            save_fig=True,
            fig_name=(args.output_filename_prefix + '_pca.png'),
            fig_size=(args.fig_width, args.fig_height),
            fig_path='./')

    st.write(adata,
             file_name=(args.output_filename_prefix + '_stream_result.pkl'),
             file_path='./',
             file_format='pkl')
    print('Output: ' + str(adata.obs.shape[0]) + ' cells, ' +
          str(adata.var.shape[0]) + ' genes')

    print('Finished computation.')
def main():
    sns.set_style('white')
    sns.set_context('poster')
    parser = argparse.ArgumentParser(
        description='%s Parameters' % __tool_name__,
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument("-m",
                        "--matrix",
                        dest="input_filename",
                        default=None,
                        help="input file name",
                        metavar="FILE")
    parser.add_argument("-l",
                        "--cell_labels",
                        dest="cell_label_filename",
                        default=None,
                        help="filename of cell labels")
    parser.add_argument("-c",
                        "--cell_labels_colors",
                        dest="cell_label_color_filename",
                        default=None,
                        help="filename of cell label colors")
    parser.add_argument("--log2",
                        dest="flag_log2",
                        action="store_true",
                        help="perform log2 transformation")
    parser.add_argument("--norm",
                        dest="flag_norm",
                        action="store_true",
                        help="normalize data based on library size")
    parser.add_argument("-o",
                        "--output_folder",
                        dest="output_folder",
                        default=None,
                        help="Output folder")
    parser.add_argument("-rmt",
                        "--remove_mt_genes",
                        dest="flag_remove_mt_genes",
                        action="store_true",
                        default=False,
                        help="Remove Mitochondrial genes")
    parser.add_argument("-mcg",
                        "--min_count_genes",
                        dest="min_count_genes",
                        type=int,
                        default=None,
                        help="filter cells with less than this many genes")
    parser.add_argument("-mpg",
                        "--min_percent_genes",
                        dest="min_percent_genes",
                        type=float,
                        default=None,
                        help="The minimum percent genes")
    parser.add_argument("-mpc",
                        "--min_percent_cells",
                        dest="min_percent_cells",
                        type=float,
                        default=None,
                        help="The minimum percent cells")
    parser.add_argument("-mcc",
                        "--min_count_cells",
                        dest="min_count_cells",
                        type=int,
                        default=None,
                        help="The minimum count cells")
    parser.add_argument("-mnc",
                        "--min_num_cells",
                        dest="min_num_cells",
                        type=int,
                        default=None,
                        help="The minimum number of cells")
    parser.add_argument("-ec",
                        "--expression_cutoff",
                        dest="expression_cutoff",
                        type=float,
                        default=None,
                        help="The expression cutoff")
    parser.add_argument("-of",
                        "--of",
                        dest="output_filename_prefix",
                        default="StreamOutput",
                        help="output file name prefix")

    args = parser.parse_args()

    print(args)

    input_filename = args.input_filename
    cell_label_filename = args.cell_label_filename
    cell_label_color_filename = args.cell_label_color_filename
    flag_norm = args.flag_norm
    flag_log2 = args.flag_log2
    output_folder = args.output_folder  #work directory
    flag_remove_mt_genes = args.flag_remove_mt_genes
    min_count_genes = args.min_count_genes
    min_percent_cells = args.min_percent_cells
    min_percent_genes = args.min_percent_genes

    min_count_cells = args.min_count_cells
    min_num_cells = args.min_num_cells
    expression_cutoff = args.expression_cutoff
    output_filename_prefix = args.output_filename_prefix

    print('Starting mapping procedure...')
    if (output_folder == None):
        workdir_ref = os.path.join(os.getcwd(), 'stream_result')
    else:
        workdir_ref = output_folder
    workdir = "./"

    if (input_filename.endswith('pkl')):
        adata = st.read(file_name=input_filename,
                        file_format='pkl',
                        workdir=workdir)
    else:
        adata = st.read(file_name=input_filename, workdir=workdir)
    print('Input: ' + str(adata.obs.shape[0]) + ' cells, ' +
          str(adata.var.shape[0]) + ' genes')

    adata.var_names_make_unique()
    adata.obs_names_make_unique()
    if (cell_label_filename != None):
        st.add_cell_labels(adata, file_name=cell_label_filename)
    else:
        st.add_cell_labels(adata)
    if (cell_label_color_filename != None):
        st.add_cell_colors(adata, file_name=cell_label_color_filename)
    else:
        st.add_cell_colors(adata)

    if (flag_norm):
        st.normalize_per_cell(adata)

    if (flag_log2):
        st.log_transform(adata, base=2)

    if (flag_remove_mt_genes):
        st.remove_mt_genes(adata)

    st.filter_cells(adata,
                    min_pct_genes=min_percent_genes,
                    min_count=min_count_genes,
                    expr_cutoff=expression_cutoff)
    st.filter_genes(adata,
                    min_num_cells=min_num_cells,
                    min_pct_cells=min_percent_cells,
                    min_count=min_count_genes,
                    expr_cutoff=expression_cutoff)

    print("Writing " + output_filename_prefix + " " +
          args.output_filename_prefix + "_stream_result.pkl")
    st.write(adata,
             file_name=(output_filename_prefix + '_stream_result.pkl'),
             file_path='./',
             file_format='pkl')
    print('Output: ' + str(adata.obs.shape[0]) + ' cells, ' +
          str(adata.var.shape[0]) + ' genes')

    print('Finished computation.')
def main():
    sns.set_style('white')
    sns.set_context('poster')
    parser = argparse.ArgumentParser(
        description='%s Parameters' % __tool_name__,
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument(
        "-m",
        "--data-file",
        dest="input_filename",
        default=None,
        help="input file name, pkl format from Stream preprocessing module",
        metavar="FILE")
    parser.add_argument("-of",
                        "--of",
                        dest="output_filename_prefix",
                        default="StreamiFSOutput",
                        help="output file name prefix")

    parser.add_argument("--flag_useprecomputed",
                        dest="flag_useprecomputed",
                        action="store_true",
                        help="Save the figure")

    parser.add_argument("-nb_pct",
                        "--percent_neighbor_cells",
                        dest="nb_pct",
                        type=float,
                        default=None,
                        help="")
    parser.add_argument("-n_comp_k",
                        dest="n_comp_k",
                        type=int,
                        default=None,
                        help="")
    parser.add_argument("-perplexity",
                        dest="perplexity",
                        type=float,
                        default=None,
                        help="")

    parser.add_argument("-method", dest="method", default=None, help="")
    parser.add_argument("-color_by", dest="color_by", default='label', help="")
    parser.add_argument("-fig_width",
                        dest="fig_width",
                        type=int,
                        default=8,
                        help="")
    parser.add_argument("-fig_height",
                        dest="fig_height",
                        type=int,
                        default=8,
                        help="")

    parser.add_argument("-fig_legend_ncol",
                        dest="fig_legend_ncol",
                        type=int,
                        default=None,
                        help="")

    args = parser.parse_args()

    print('Starting ...')
    workdir = "./"

    adata = st.read(file_name=args.input_filename,
                    file_format='pkl',
                    experiment='rna-seq',
                    workdir=workdir)

    st.plot_visualization_2D(adata,
                             method=args.method,
                             nb_pct=args.nb_pct,
                             perplexity=args.perplexity,
                             color_by=args.color_by,
                             use_precomputed=args.flag_useprecomputed,
                             save_fig=True,
                             fig_path='./',
                             fig_name=(args.output_filename_prefix +
                                       "_2D_plot.png"),
                             fig_size=(args.fig_width, args.fig_height),
                             fig_legend_ncol=args.fig_legend_ncol)

    st.write(adata,
             file_name=(args.output_filename_prefix + '_stream_result.pkl'),
             file_path='./',
             file_format='pkl')

    print('Finished computation.')