def prepare(self): # parse_mapfile self.fq_dict, self.col4_dict = parse_map_col4(self.args.mapfile, self.col4_default) # link link_data(self.outdir, self.fq_dict) # mk log dir self.logdir = self.outdir + '/log' os.system('mkdir -p %s' % (self.logdir)) # script init self.sjm_cmd = 'log_dir %s\n' % (self.logdir) self.sjm_order = '' self.shell_dict = defaultdict(str) # outdir dict self.outdir_dic = {} for sample in self.fq_dict: self.outdir_dic[sample] = {} index = 0 for step in self.__STEPS__: step_outdir = f"{self.outdir}/{sample}/{index:02d}.{step}" self.outdir_dic[sample].update({step: step_outdir}) index += 1
def main(): # init assay = __ASSAY__ steps = __STEPS__ conda = __CONDA__ app = 'celescope' # parser parser = multi_opts(assay) parser.add_argument('--starMem', help='starMem', default=30) parser.add_argument('--genomeDir', help='genome index dir', required=True) parser.add_argument( '--gtf_type', help='Specify attribute type in GTF annotation, default=exon', default='exon') parser.add_argument('--thread', help='thread', default=6) parser.add_argument('--probe_file', help="probe fasta file") args = parser.parse_args() # read args outdir = args.outdir chemistry = args.chemistry pattern = args.pattern whitelist = args.whitelist linker = args.linker lowQual = args.lowQual lowNum = args.lowNum mod = args.mod rm_files = args.rm_files # parse mapfile fq_dict, match_dict = parse_map_col4(args.mapfile, None) # link link_data(outdir, fq_dict) # custom args thread = args.thread genomeDir = args.genomeDir starMem = args.starMem gtf_type = args.gtf_type probe_file = args.probe_file # mk log dir logdir = outdir + '/log' os.system('mkdir -p %s' % (logdir)) # script init sjm_cmd = 'log_dir %s\n' % (logdir) sjm_order = '' shell_dict = defaultdict(str) # outdir dict for sample in fq_dict: outdir_dic = {} index = 0 for step in steps: step_outdir = f"{outdir}/{sample}/{index:02d}.{step}" outdir_dic.update({step: step_outdir}) index += 1 # sample step = "sample" cmd = ( f'{app} {assay} {step} ' f'--chemistry {chemistry} ' f'--sample {sample} --outdir {outdir_dic[step]} --assay {assay} ') sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda) shell_dict[sample] += cmd + '\n' last_step = step # barcode arr = fq_dict[sample] step = "barcode" cmd = ( f'{app} {assay} {step} ' f'--fq1 {arr[0]} --fq2 {arr[1]} --chemistry {chemistry} ' f'--pattern {pattern} --whitelist {whitelist} --linker {linker} ' f'--sample {sample} --lowQual {lowQual} --thread {thread} ' f'--lowNum {lowNum} --outdir {outdir_dic[step]} --assay {assay} ' f'--probe_file {probe_file} ') sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=5, x=thread) sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n' shell_dict[sample] += cmd + '\n' last_step = step # adapt step = "cutadapt" fq = f'{outdir_dic["barcode"]}/{sample}_2.fq.gz' cmd = (f'{app} {assay} {step} ' f'--fq {fq} --sample {sample} --outdir ' f'{outdir_dic[step]} --assay {assay} ') sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=5, x=1) sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n' shell_dict[sample] += cmd + '\n' last_step = step # STAR step = 'STAR' fq = f'{outdir_dic["cutadapt"]}/{sample}_clean_2.fq.gz' cmd = (f'{app} {assay} {step} ' f'--fq {fq} --sample {sample} ' f'--genomeDir {genomeDir} --thread {thread} ' f'--outdir {outdir_dic[step]} --assay {assay} ') sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=starMem, x=thread) sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n' shell_dict[sample] += cmd + '\n' last_step = step # featureCounts step = 'featureCounts' input = f'{outdir_dic["STAR"]}/{sample}_Aligned.sortedByCoord.out.bam' cmd = ( f'{app} {assay} {step} ' f'--input {input} --gtf_type {gtf_type} ' f'--sample {sample} --thread {thread} --outdir {outdir_dic[step]} ' f'--genomeDir {genomeDir} --assay {assay} ') sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=8, x=thread) sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n' shell_dict[sample] += cmd + '\n' last_step = step # count step = 'count_capture_rna' bam = f'{outdir_dic["featureCounts"]}/{sample}_name_sorted.bam' cmd = (f'{app} {assay} {step} ' f'--bam {bam} --sample {sample} --cells auto ' f'--outdir {outdir_dic[step]} --assay {assay} ' f'--match_dir {match_dict[sample]} ' f'--genomeDir {genomeDir}') sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=8, x=thread) sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n' shell_dict[sample] += cmd + '\n' last_step = step # analysis step = 'analysis' matrix_file = f'{outdir_dic["count_capture_rna"]}/{sample}_matrix.tsv.gz' cmd = (f'{app} {assay} {step} ' f'--matrix_file {matrix_file} --sample {sample} ' f'--outdir {outdir_dic[step]} ' f'--assay {assay} ') sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=15, x=1) sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n' shell_dict[sample] += cmd + '\n' last_step = step # merged report if mod == 'sjm': step = 'merge_report' merge_report( fq_dict, steps, last_step, sjm_cmd, sjm_order, logdir, conda, outdir, rm_files, ) if mod == 'shell': os.system('mkdir -p ./shell/') for sample in shell_dict: with open(f'./shell/{sample}.sh', 'w') as f: f.write(shell_dict[sample])
def main(): # init assay = __ASSAY__ steps = __STEPS__ conda = __CONDA__ app = 'celescope' # parser parser = multi_opts(assay) parser.add_argument('--starMem', help='starMem', default=10) parser.add_argument('--thread', help='thread', default=6) parser.add_argument('--genomeDir', help='fusion genomeDir', required=True) parser.add_argument( "--fusion_pos", help="first base position of the second gene(0-start),tsv file", required=True) parser.add_argument("--UMI_min", default=1) args = parser.parse_args() # read args outdir = args.outdir chemistry = args.chemistry pattern = args.pattern whitelist = args.whitelist linker = args.linker lowQual = args.lowQual lowNum = args.lowNum mod = args.mod rm_files = args.rm_files # parse mapfile fq_dict, match_dict = parse_map_col4(args.mapfile, None) # link link_data(outdir, fq_dict) # custom args thread = args.thread genomeDir = args.genomeDir starMem = args.starMem fusion_pos = args.fusion_pos UMI_min = args.UMI_min # mk log dir logdir = outdir + '/log' os.system('mkdir -p %s' % (logdir)) # script init sjm_cmd = 'log_dir %s\n' % (logdir) sjm_order = '' shell_dict = defaultdict(str) # outdir dict for sample in fq_dict: outdir_dic = {} index = 0 for step in steps: step_outdir = f"{outdir}/{sample}/{index:02d}.{step}" outdir_dic.update({step: step_outdir}) index += 1 # sample step = "sample" cmd = ( f'{app} {assay} {step} ' f'--chemistry {chemistry} ' f'--sample {sample} --outdir {outdir_dic[step]} --assay {assay} ') sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda) shell_dict[sample] += cmd + '\n' last_step = step # barcode arr = fq_dict[sample] step = "barcode" cmd = ( f'{app} {assay} {step} ' f'--fq1 {arr[0]} --fq2 {arr[1]} --chemistry {chemistry} ' f'--pattern {pattern} --whitelist {whitelist} --linker {linker} ' f'--sample {sample} --lowQual {lowQual} --thread {thread} ' f'--lowNum {lowNum} --outdir {outdir_dic[step]} --assay {assay} ') sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=5, x=thread) sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n' shell_dict[sample] += cmd + '\n' last_step = step # adapt step = "cutadapt" fq = f'{outdir_dic["barcode"]}/{sample}_2.fq.gz' cmd = (f'{app} {assay} {step} ' f'--fq {fq} --sample {sample} --outdir ' f'{outdir_dic[step]} --assay {assay} ') sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=5, x=1) sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n' shell_dict[sample] += cmd + '\n' last_step = step # STAR_fusion step = 'STAR_fusion' input_read = f'{outdir_dic["cutadapt"]}/{sample}_clean_2.fq.gz' cmd = ( f'{app} {assay} {step} ' f'--outdir {outdir_dic[step]} --assay {assay} --sample {sample} ' f'--thread {thread} ' f'--input_read {input_read} ' f'--genomeDir {genomeDir} ') sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=starMem, x=thread) sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n' shell_dict[sample] += cmd + '\n' last_step = step # count_fusion step = 'count_fusion' bam = f'{outdir_dic["STAR_fusion"]}/{sample}_Aligned.sortedByCoord.out.bam' cmd = ( f'{app} {assay} {step} ' f'--outdir {outdir_dic[step]} --assay {assay} --sample {sample} ' f'--bam {bam} ' f'--UMI_min {UMI_min} ' f'--match_dir {match_dict[sample]} ' f'--fusion_pos {fusion_pos} ') sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=20, x=thread) sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n' last_step = step # merged report if mod == 'sjm': step = 'merge_report' merge_report( fq_dict, steps, last_step, sjm_cmd, sjm_order, logdir, conda, outdir, rm_files, ) if mod == 'shell': os.system('mkdir -p ./shell/') for sample in shell_dict: with open(f'./shell/{sample}.sh', 'w') as f: f.write(shell_dict[sample])
def main(): # init assay = __ASSAY__ steps = __STEPS__ conda = __CONDA__ app = 'celescope' # parser parser = multi_opts(assay) parser.add_argument('--thread', help='thread', default=6) parser.add_argument( "--UMI_min", help="cells have SMK_UMI>=UMI_min are considered as valid cell", default="auto") parser.add_argument("--dim", help="SMK tag dimension", default=1) parser.add_argument("--SNR_min", help="minimum signal to noise ratio", default="auto") parser.add_argument("--SMK_pattern", help="SMK read2 pattern") parser.add_argument("--SMK_linker", help="SMK read2 linker fasta path") parser.add_argument("--SMK_barcode", help="SMK read2 barcode fasta path ") args = parser.parse_args() # read args outdir = args.outdir chemistry = args.chemistry pattern = args.pattern whitelist = args.whitelist linker = args.linker lowQual = args.lowQual lowNum = args.lowNum mod = args.mod rm_files = args.rm_files # parse mapfile fq_dict, match_dict = parse_map_col4(args.mapfile, "auto") # link link_data(outdir, fq_dict) # custom args thread = args.thread UMI_min = args.UMI_min dim = args.dim SNR_min = args.SNR_min SMK_pattern = args.SMK_pattern SMK_linker = args.SMK_linker SMK_barcode = args.SMK_barcode # mk log dir logdir = outdir + '/log' os.system('mkdir -p %s' % (logdir)) # script init sjm_cmd = 'log_dir %s\n' % (logdir) sjm_order = '' shell = '' # run for sample in fq_dict: outdir_dic = {} index = 0 for step in steps: step_outdir = f"{outdir}/{sample}/{index:02d}.{step}" outdir_dic.update({step: step_outdir}) index += 1 # sample step = "sample" cmd = ( f'{app} {assay} {step} ' f'--chemistry {chemistry} ' f'--sample {sample} --outdir {outdir_dic[step]} --assay {assay} ') sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda) shell += cmd + '\n' last_step = step # barcode arr = fq_dict[sample] step = "barcode" cmd = ( f'{app} {assay} {step} ' f'--fq1 {arr[0]} --fq2 {arr[1]} --chemistry {chemistry} ' f'--pattern {pattern} --whitelist {whitelist} --linker {linker} ' f'--sample {sample} --lowQual {lowQual} --thread {thread} ' f'--lowNum {lowNum} --outdir {outdir_dic[step]} --assay {assay} ') sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=5, x=thread) sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n' shell += cmd + '\n' last_step = step # adapt step = "cutadapt" fq = f'{outdir_dic["barcode"]}/{sample}_2.fq.gz' cmd = (f'{app} {assay} {step} ' f'--fq {fq} --sample {sample} --outdir ' f'{outdir_dic[step]} --assay {assay} ') sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=5, x=1) sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n' shell += cmd + '\n' last_step = step # mapping_smk step = 'mapping_smk' SMK_read2 = f'{outdir_dic["cutadapt"]}/{sample}_clean_2.fq.gz' cmd = (f'{app} {assay} {step} ' f'--sample {sample} ' f'--outdir {outdir_dic[step]} ' f'--assay {assay} ' f'--SMK_read2 {SMK_read2} ' f'--SMK_pattern {SMK_pattern} ' f'--SMK_barcode {SMK_barcode} ' f'--SMK_linker {SMK_linker} ') sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=5, x=1) sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n' shell += cmd + '\n' last_step = step # count_smk step = 'count_smk' read_file = f'{outdir_dic["mapping_smk"]}/{sample}_read_count.tsv' cmd = (f'{app} {assay} {step} ' f'--sample {sample} ' f'--outdir {outdir_dic[step]} ' f'--assay {assay} ' f'--match_dir {match_dict[sample]} ' f'--read_file {read_file} ' f'--dim {dim} ' f'--UMI_min {UMI_min} ' f'--SNR_min {SNR_min} ') sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=5, x=1) sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n' shell += cmd + '\n' last_step = step # analysis_smk step = 'analysis_smk' tsne_tag_file = f'{outdir_dic["count_smk"]}/{sample}_tsne_tag.tsv' cmd = (f'{app} {assay} {step} ' f'--sample {sample} ' f'--outdir {outdir_dic[step]} ' f'--assay {assay} ' f'--match_dir {match_dict[sample]} ' f'--tsne_tag_file {tsne_tag_file} ') sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=5, x=1) sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n' shell += cmd + '\n' last_step = step # merged report if mod == 'sjm': step = 'merge_report' merge_report( fq_dict, steps, last_step, sjm_cmd, sjm_order, logdir, conda, outdir, rm_files, ) if mod == 'shell': os.system('mkdir -p ./shell/') with open(f'./shell/{sample}.sh', 'w') as f: f.write(shell)
def main(): # init assay = __ASSAY__ steps = __STEPS__ conda = __CONDA__ app = 'celescope' # parser parser = multi_opts(assay) parser.add_argument('--thread', help='thread', default=6) parser.add_argument("--fq_pattern", help="tag read2 pattern", required=True) parser.add_argument("--linker_fasta", help="linker fasta") parser.add_argument("--barcode_fasta", help="barcode fasta", required=True) args = parser.parse_args() # read args outdir = args.outdir chemistry = args.chemistry pattern = args.pattern whitelist = args.whitelist linker = args.linker lowQual = args.lowQual lowNum = args.lowNum mod = args.mod rm_files = args.rm_files minimum_length = args.minimum_length # parse mapfile fq_dict, match_dict = parse_map_col4(args.mapfile, None) # link link_data(outdir, fq_dict) # custom args thread = args.thread fq_pattern = args.fq_pattern linker_fasta = args.linker_fasta barcode_fasta = args.barcode_fasta # mk log dir logdir = outdir + '/log' os.system('mkdir -p %s' % (logdir)) # script init sjm_cmd = 'log_dir %s\n' % (logdir) sjm_order = '' shell_dict = defaultdict(str) # run for sample in fq_dict: outdir_dic = {} index = 0 for step in steps: step_outdir = f"{outdir}/{sample}/{index:02d}.{step}" outdir_dic.update({step: step_outdir}) index += 1 # sample step = "sample" cmd = ( f'{app} {assay} {step} ' f'--chemistry {chemistry} ' f'--sample {sample} --outdir {outdir_dic[step]} --assay {assay} ') sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda) shell_dict[sample] += cmd + '\n' last_step = step # barcode arr = fq_dict[sample] step = "barcode" cmd = ( f'{app} {assay} {step} ' f'--fq1 {arr[0]} --fq2 {arr[1]} --chemistry {chemistry} ' f'--pattern {pattern} --whitelist {whitelist} --linker {linker} ' f'--sample {sample} --lowQual {lowQual} --thread {thread} ' f'--lowNum {lowNum} --outdir {outdir_dic[step]} --assay {assay} ') sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=5, x=thread) sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n' shell_dict[sample] += cmd + '\n' last_step = step # adapt step = "cutadapt" fq = f'{outdir_dic["barcode"]}/{sample}_2.fq.gz' cmd = (f'{app} {assay} {step} ' f'--fq {fq} --sample {sample} --outdir ' f'{outdir_dic[step]} --assay {assay} ' f'--minimum_length {minimum_length} ') sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=5, x=1) sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n' shell_dict[sample] += cmd + '\n' last_step = step # mapping_tag step = 'mapping_tag' fq = f'{outdir_dic["cutadapt"]}/{sample}_clean_2.fq.gz' cmd = (f'{app} {assay} {step} ' f'--sample {sample} ' f'--outdir {outdir_dic[step]} ' f'--assay {assay} ' f'--fq {fq} ' f'--fq_pattern {fq_pattern} ' f'--barcode_fasta {barcode_fasta} ' f'--linker_fasta {linker_fasta} ') sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=5, x=1) sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n' shell_dict[sample] += cmd + '\n' last_step = step step = 'count_cite' read_count_file = f'{outdir_dic["mapping_tag"]}/{sample}_read_count.tsv' cmd = (f'{app} {assay} {step} ' f'--sample {sample} ' f'--outdir {outdir_dic[step]} ' f'--assay {assay} ' f'--match_dir {match_dict[sample]} ' f'--read_count_file {read_count_file} ') sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=5, x=1) sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n' shell_dict[sample] += cmd + '\n' last_step = step step = 'analysis_cite' citeseq_mtx = f'{outdir_dic["count_cite"]}/{sample}_citeseq.mtx.gz' cmd = (f'{app} {assay} {step} ' f'--sample {sample} ' f'--outdir {outdir_dic[step]} ' f'--assay {assay} ' f'--match_dir {match_dict[sample]} ' f'--citeseq_mtx {citeseq_mtx} ') sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=5, x=1) sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n' shell_dict[sample] += cmd + '\n' last_step = step # merged report if mod == 'sjm': step = 'merge_report' merge_report( fq_dict, steps, last_step, sjm_cmd, sjm_order, logdir, conda, outdir, rm_files, ) if mod == 'shell': os.system('mkdir -p ./shell/') for sample in shell_dict: with open(f'./shell/{sample}.sh', 'w') as f: f.write(shell_dict[sample])
def main(): # init assay = __ASSAY__ steps = __STEPS__ conda = __CONDA__ app = 'celescope' # parser parser = multi_opts(assay) parser.add_argument('--thread', help='thread', default=6) args = parser.parse_args() # read args outdir = args.outdir chemistry = args.chemistry pattern = args.pattern whitelist = args.whitelist linker = args.linker lowQual = args.lowQual lowNum = args.lowNum mod = args.mod rm_files = args.rm_files # parse mapfile fq_dict, match_dict = parse_map_col4(args.mapfile, None) # link link_data(outdir, fq_dict) # custom args thread = args.thread # mk log dir logdir = outdir + '/log' os.system('mkdir -p %s' % (logdir)) # script init sjm_cmd = 'log_dir %s\n' % (logdir) sjm_order = '' shell = '' # outdir dict for sample in fq_dict: outdir_dic = {} index = 0 for step in steps: step_outdir = f"{outdir}/{sample}/{index:02d}.{step}" outdir_dic.update({step: step_outdir}) index += 1 # sample step = "sample" cmd = ( f'{app} {assay} {step} ' f'--chemistry {chemistry} ' f'--sample {sample} --outdir {outdir_dic[step]} --assay {assay} ') sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda) shell += cmd + '\n' last_step = step # barcode arr = fq_dict[sample] step = "barcode" cmd = ( f'{app} {assay} {step} ' f'--fq1 {arr[0]} --fq2 {arr[1]} --chemistry {chemistry} ' f'--pattern {pattern} --whitelist {whitelist} --linker {linker} ' f'--sample {sample} --lowQual {lowQual} --thread {thread} ' f'--lowNum {lowNum} --outdir {outdir_dic[step]} --assay {assay} ') sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=5, x=thread) sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n' shell += cmd + '\n' last_step = step # adapt step = "cutadapt" fq = f'{outdir_dic["barcode"]}/{sample}_2.fq.gz' cmd = (f'{app} {assay} {step} ' f'--fq {fq} --sample {sample} --outdir ' f'{outdir_dic[step]} --assay {assay} ') sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=5, x=1) sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n' shell += cmd + '\n' last_step = step # mapping_hla step = 'mapping_hla' fq = f'{outdir_dic["cutadapt"]}/{sample}_clean_2.fq.gz' cmd = (f'{app} {assay} {step} ' f'--fq {fq} --sample {sample} ' f'--thread {thread} ' f'--match_dir {match_dict[sample]} ' f'--outdir {outdir_dic[step]} --assay {assay} ') sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=30, x=thread) sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n' shell += cmd + '\n' last_step = step # merged report if mod == 'sjm': step = 'merge_report' merge_report( fq_dict, steps, last_step, sjm_cmd, sjm_order, logdir, conda, outdir, rm_files, ) if mod == 'shell': os.system('mkdir -p ./shell/') with open(f'./shell/{sample}.sh', 'w') as f: f.write(shell)