Ejemplo n.º 1
0
    def prepare(self):
        # parse_mapfile
        self.fq_dict, self.col4_dict = parse_map_col4(self.args.mapfile,
                                                      self.col4_default)

        # link
        link_data(self.outdir, self.fq_dict)

        # mk log dir
        self.logdir = self.outdir + '/log'
        os.system('mkdir -p %s' % (self.logdir))

        # script init
        self.sjm_cmd = 'log_dir %s\n' % (self.logdir)
        self.sjm_order = ''
        self.shell_dict = defaultdict(str)

        # outdir dict
        self.outdir_dic = {}
        for sample in self.fq_dict:
            self.outdir_dic[sample] = {}
            index = 0
            for step in self.__STEPS__:
                step_outdir = f"{self.outdir}/{sample}/{index:02d}.{step}"
                self.outdir_dic[sample].update({step: step_outdir})
                index += 1
Ejemplo n.º 2
0
def main():

    # init
    assay = __ASSAY__
    steps = __STEPS__
    conda = __CONDA__
    app = 'celescope'

    # parser
    parser = multi_opts(assay)
    parser.add_argument('--starMem', help='starMem', default=30)
    parser.add_argument('--genomeDir', help='genome index dir', required=True)
    parser.add_argument(
        '--gtf_type',
        help='Specify attribute type in GTF annotation, default=exon',
        default='exon')
    parser.add_argument('--thread', help='thread', default=6)
    parser.add_argument('--probe_file', help="probe fasta file")
    args = parser.parse_args()

    # read args
    outdir = args.outdir
    chemistry = args.chemistry
    pattern = args.pattern
    whitelist = args.whitelist
    linker = args.linker
    lowQual = args.lowQual
    lowNum = args.lowNum
    mod = args.mod
    rm_files = args.rm_files

    # parse mapfile
    fq_dict, match_dict = parse_map_col4(args.mapfile, None)

    # link
    link_data(outdir, fq_dict)

    # custom args
    thread = args.thread
    genomeDir = args.genomeDir
    starMem = args.starMem
    gtf_type = args.gtf_type
    probe_file = args.probe_file

    # mk log dir
    logdir = outdir + '/log'
    os.system('mkdir -p %s' % (logdir))

    # script init
    sjm_cmd = 'log_dir %s\n' % (logdir)
    sjm_order = ''
    shell_dict = defaultdict(str)

    # outdir dict
    for sample in fq_dict:
        outdir_dic = {}
        index = 0
        for step in steps:
            step_outdir = f"{outdir}/{sample}/{index:02d}.{step}"
            outdir_dic.update({step: step_outdir})
            index += 1

        # sample
        step = "sample"
        cmd = (
            f'{app} {assay} {step} '
            f'--chemistry {chemistry} '
            f'--sample {sample} --outdir {outdir_dic[step]} --assay {assay} ')
        sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda)
        shell_dict[sample] += cmd + '\n'
        last_step = step

        # barcode
        arr = fq_dict[sample]
        step = "barcode"
        cmd = (
            f'{app} {assay} {step} '
            f'--fq1 {arr[0]} --fq2 {arr[1]} --chemistry {chemistry} '
            f'--pattern {pattern} --whitelist {whitelist} --linker {linker} '
            f'--sample {sample} --lowQual {lowQual} --thread {thread} '
            f'--lowNum {lowNum} --outdir {outdir_dic[step]} --assay {assay} '
            f'--probe_file {probe_file} ')
        sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=5, x=thread)
        sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n'
        shell_dict[sample] += cmd + '\n'
        last_step = step

        # adapt
        step = "cutadapt"
        fq = f'{outdir_dic["barcode"]}/{sample}_2.fq.gz'
        cmd = (f'{app} {assay} {step} '
               f'--fq {fq} --sample {sample} --outdir '
               f'{outdir_dic[step]} --assay {assay} ')
        sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=5, x=1)
        sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n'
        shell_dict[sample] += cmd + '\n'
        last_step = step

        # STAR
        step = 'STAR'
        fq = f'{outdir_dic["cutadapt"]}/{sample}_clean_2.fq.gz'
        cmd = (f'{app} {assay} {step} '
               f'--fq {fq} --sample {sample} '
               f'--genomeDir {genomeDir} --thread {thread} '
               f'--outdir {outdir_dic[step]} --assay {assay} ')
        sjm_cmd += generate_sjm(cmd,
                                f'{step}_{sample}',
                                conda,
                                m=starMem,
                                x=thread)
        sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n'
        shell_dict[sample] += cmd + '\n'
        last_step = step

        # featureCounts
        step = 'featureCounts'
        input = f'{outdir_dic["STAR"]}/{sample}_Aligned.sortedByCoord.out.bam'
        cmd = (
            f'{app} {assay} {step} '
            f'--input {input} --gtf_type {gtf_type} '
            f'--sample {sample} --thread {thread} --outdir {outdir_dic[step]} '
            f'--genomeDir {genomeDir} --assay {assay} ')
        sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=8, x=thread)
        sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n'
        shell_dict[sample] += cmd + '\n'
        last_step = step

        # count
        step = 'count_capture_rna'
        bam = f'{outdir_dic["featureCounts"]}/{sample}_name_sorted.bam'
        cmd = (f'{app} {assay} {step} '
               f'--bam {bam} --sample {sample} --cells auto '
               f'--outdir {outdir_dic[step]} --assay {assay} '
               f'--match_dir {match_dict[sample]} '
               f'--genomeDir {genomeDir}')
        sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=8, x=thread)
        sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n'
        shell_dict[sample] += cmd + '\n'
        last_step = step

        # analysis
        step = 'analysis'
        matrix_file = f'{outdir_dic["count_capture_rna"]}/{sample}_matrix.tsv.gz'
        cmd = (f'{app} {assay} {step} '
               f'--matrix_file {matrix_file} --sample {sample} '
               f'--outdir {outdir_dic[step]} '
               f'--assay {assay} ')
        sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=15, x=1)
        sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n'
        shell_dict[sample] += cmd + '\n'
        last_step = step

    # merged report
    if mod == 'sjm':
        step = 'merge_report'
        merge_report(
            fq_dict,
            steps,
            last_step,
            sjm_cmd,
            sjm_order,
            logdir,
            conda,
            outdir,
            rm_files,
        )
    if mod == 'shell':
        os.system('mkdir -p ./shell/')
        for sample in shell_dict:
            with open(f'./shell/{sample}.sh', 'w') as f:
                f.write(shell_dict[sample])
Ejemplo n.º 3
0
def main():
    parser = argparse.ArgumentParser('single cell vdj assembly')
    #parser.add_argument('--mod', help='mod, sjm or shell', choices=['sjm', 'shell'], default='sjm')
    parser.add_argument(
        '--mapfile',
        help='mapfile, 3 columns, "LibName\\tDataDir\\tSampleName"',
        required=True)
    parser.add_argument('--linker',
                        help='linker',
                        dest="LINKER",
                        required=True)
    parser.add_argument('--index',
                        dest="index",
                        help='index,seperate by comma',
                        required=True)
    parser.add_argument('--outdir', default="./")
    args = vars(parser.parse_args())
    fq_dict, _ = parse_map_col4(args['mapfile'], None)
    index = args["index"]
    index_list = index.split(",")
    LINKER = args["LINKER"]
    logdir = args['outdir'] + '/log'
    os.system('mkdir -p %s' % (logdir))
    sjm_cmd = 'log_dir %s\n' % (logdir)
    sjm_order = ''
    basedir = os.path.realpath(args['outdir'])

    for n in fq_dict:

        arr = fq_dict[n]
        conda = 'scope1.0'
        # demultiplex
        outdir0 = '{basedir}/{sampledir}/demultiplex/'.format(basedir=basedir,
                                                              sampledir=n)
        app = toolsdir + '/demultiplex.py'
        cmd = """
        source activate {conda};
        python {app} \
        {samplename} {read1_file} {read2_file} {LINKER} {index} {outdir}
        """.format(samplename=n,
                   read1_file=arr[1],
                   read2_file=arr[0],
                   app=app,
                   LINKER=LINKER,
                   index=index,
                   outdir=outdir0,
                   conda=conda)
        sjm_cmd += generate_sjm(cmd, 'demultiplex_' + n, m=15)

        # assemble
        for i in index_list:
            fq1 = "{outdir0}/{i}/{i}_1.fq.gz".format(outdir0=outdir0, i=i)
            fq2 = "{outdir0}/{i}/{i}_2.fq.gz".format(outdir0=outdir0, i=i)
            app = toolsdir + '/assemble.py'
            outdir1 = '{basedir}/{sampledir}/assemble/{i}'.format(
                basedir=basedir, sampledir=n, i=i)
            os.system("mkdir -p " + outdir1)
            cmd = (f'cd {outdir1}; '
                   f'source activate {conda}; '
                   f'python {app} '
                   f'{fq1} {fq2} ')
            job_name = "assemble_{samplename}_{i}".format(samplename=n, i=i)
            sjm_cmd += generate_sjm(cmd, job_name, m=50)
            order_line = 'order {job_name} after demultiplex_{samplename}\n'.format(
                job_name=job_name, samplename=n)
            sjm_order += order_line

    with open(logdir + '/sjm.job', 'w') as fh:
        fh.write(sjm_cmd + '\n')
        fh.write(sjm_order)
Ejemplo n.º 4
0
def main():

    # init
    assay = __ASSAY__
    steps = __STEPS__
    conda = __CONDA__
    app = 'celescope'

    # parser
    parser = multi_opts(assay)
    parser.add_argument('--starMem', help='starMem', default=10)
    parser.add_argument('--thread', help='thread', default=6)
    parser.add_argument('--genomeDir', help='fusion genomeDir', required=True)
    parser.add_argument(
        "--fusion_pos",
        help="first base position of the second gene(0-start),tsv file",
        required=True)
    parser.add_argument("--UMI_min", default=1)
    args = parser.parse_args()

    # read args
    outdir = args.outdir
    chemistry = args.chemistry
    pattern = args.pattern
    whitelist = args.whitelist
    linker = args.linker
    lowQual = args.lowQual
    lowNum = args.lowNum
    mod = args.mod
    rm_files = args.rm_files

    # parse mapfile
    fq_dict, match_dict = parse_map_col4(args.mapfile, None)

    # link
    link_data(outdir, fq_dict)

    # custom args
    thread = args.thread
    genomeDir = args.genomeDir
    starMem = args.starMem
    fusion_pos = args.fusion_pos
    UMI_min = args.UMI_min

    # mk log dir
    logdir = outdir + '/log'
    os.system('mkdir -p %s' % (logdir))

    # script init
    sjm_cmd = 'log_dir %s\n' % (logdir)
    sjm_order = ''
    shell_dict = defaultdict(str)

    # outdir dict
    for sample in fq_dict:
        outdir_dic = {}
        index = 0
        for step in steps:
            step_outdir = f"{outdir}/{sample}/{index:02d}.{step}"
            outdir_dic.update({step: step_outdir})
            index += 1

        # sample
        step = "sample"
        cmd = (
            f'{app} {assay} {step} '
            f'--chemistry {chemistry} '
            f'--sample {sample} --outdir {outdir_dic[step]} --assay {assay} ')
        sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda)
        shell_dict[sample] += cmd + '\n'
        last_step = step

        # barcode
        arr = fq_dict[sample]
        step = "barcode"
        cmd = (
            f'{app} {assay} {step} '
            f'--fq1 {arr[0]} --fq2 {arr[1]} --chemistry {chemistry} '
            f'--pattern {pattern} --whitelist {whitelist} --linker {linker} '
            f'--sample {sample} --lowQual {lowQual} --thread {thread} '
            f'--lowNum {lowNum} --outdir {outdir_dic[step]} --assay {assay} ')
        sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=5, x=thread)
        sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n'
        shell_dict[sample] += cmd + '\n'
        last_step = step

        # adapt
        step = "cutadapt"
        fq = f'{outdir_dic["barcode"]}/{sample}_2.fq.gz'
        cmd = (f'{app} {assay} {step} '
               f'--fq {fq} --sample {sample} --outdir '
               f'{outdir_dic[step]} --assay {assay} ')
        sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=5, x=1)
        sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n'
        shell_dict[sample] += cmd + '\n'
        last_step = step

        # STAR_fusion
        step = 'STAR_fusion'
        input_read = f'{outdir_dic["cutadapt"]}/{sample}_clean_2.fq.gz'
        cmd = (
            f'{app} {assay} {step} '
            f'--outdir {outdir_dic[step]} --assay {assay} --sample {sample} '
            f'--thread {thread} '
            f'--input_read {input_read} '
            f'--genomeDir {genomeDir} ')
        sjm_cmd += generate_sjm(cmd,
                                f'{step}_{sample}',
                                conda,
                                m=starMem,
                                x=thread)
        sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n'
        shell_dict[sample] += cmd + '\n'
        last_step = step

        # count_fusion
        step = 'count_fusion'
        bam = f'{outdir_dic["STAR_fusion"]}/{sample}_Aligned.sortedByCoord.out.bam'
        cmd = (
            f'{app} {assay} {step} '
            f'--outdir {outdir_dic[step]} --assay {assay} --sample {sample} '
            f'--bam {bam} '
            f'--UMI_min {UMI_min} '
            f'--match_dir {match_dict[sample]} '
            f'--fusion_pos {fusion_pos} ')
        sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=20, x=thread)
        sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n'
        last_step = step

    # merged report
    if mod == 'sjm':
        step = 'merge_report'
        merge_report(
            fq_dict,
            steps,
            last_step,
            sjm_cmd,
            sjm_order,
            logdir,
            conda,
            outdir,
            rm_files,
        )
    if mod == 'shell':
        os.system('mkdir -p ./shell/')
        for sample in shell_dict:
            with open(f'./shell/{sample}.sh', 'w') as f:
                f.write(shell_dict[sample])
Ejemplo n.º 5
0
def main():

    # init
    assay = __ASSAY__
    steps = __STEPS__
    conda = __CONDA__
    app = 'celescope'

    # parser
    parser = multi_opts(assay)
    parser.add_argument('--thread', help='thread', default=6)
    parser.add_argument(
        "--UMI_min",
        help="cells have SMK_UMI>=UMI_min are considered as valid cell",
        default="auto")
    parser.add_argument("--dim", help="SMK tag dimension", default=1)
    parser.add_argument("--SNR_min",
                        help="minimum signal to noise ratio",
                        default="auto")
    parser.add_argument("--SMK_pattern", help="SMK read2 pattern")
    parser.add_argument("--SMK_linker", help="SMK read2 linker fasta path")
    parser.add_argument("--SMK_barcode", help="SMK read2 barcode fasta path ")
    args = parser.parse_args()

    # read args
    outdir = args.outdir
    chemistry = args.chemistry
    pattern = args.pattern
    whitelist = args.whitelist
    linker = args.linker
    lowQual = args.lowQual
    lowNum = args.lowNum
    mod = args.mod
    rm_files = args.rm_files

    # parse mapfile
    fq_dict, match_dict = parse_map_col4(args.mapfile, "auto")

    # link
    link_data(outdir, fq_dict)

    # custom args
    thread = args.thread
    UMI_min = args.UMI_min
    dim = args.dim
    SNR_min = args.SNR_min
    SMK_pattern = args.SMK_pattern
    SMK_linker = args.SMK_linker
    SMK_barcode = args.SMK_barcode

    # mk log dir
    logdir = outdir + '/log'
    os.system('mkdir -p %s' % (logdir))

    # script init
    sjm_cmd = 'log_dir %s\n' % (logdir)
    sjm_order = ''
    shell = ''

    # run
    for sample in fq_dict:
        outdir_dic = {}
        index = 0
        for step in steps:
            step_outdir = f"{outdir}/{sample}/{index:02d}.{step}"
            outdir_dic.update({step: step_outdir})
            index += 1

        # sample
        step = "sample"
        cmd = (
            f'{app} {assay} {step} '
            f'--chemistry {chemistry} '
            f'--sample {sample} --outdir {outdir_dic[step]} --assay {assay} ')
        sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda)
        shell += cmd + '\n'
        last_step = step

        # barcode
        arr = fq_dict[sample]
        step = "barcode"
        cmd = (
            f'{app} {assay} {step} '
            f'--fq1 {arr[0]} --fq2 {arr[1]} --chemistry {chemistry} '
            f'--pattern {pattern} --whitelist {whitelist} --linker {linker} '
            f'--sample {sample} --lowQual {lowQual} --thread {thread} '
            f'--lowNum {lowNum} --outdir {outdir_dic[step]} --assay {assay} ')
        sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=5, x=thread)
        sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n'
        shell += cmd + '\n'
        last_step = step

        # adapt
        step = "cutadapt"
        fq = f'{outdir_dic["barcode"]}/{sample}_2.fq.gz'
        cmd = (f'{app} {assay} {step} '
               f'--fq {fq} --sample {sample} --outdir '
               f'{outdir_dic[step]} --assay {assay} ')
        sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=5, x=1)
        sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n'
        shell += cmd + '\n'
        last_step = step

        # mapping_smk
        step = 'mapping_smk'
        SMK_read2 = f'{outdir_dic["cutadapt"]}/{sample}_clean_2.fq.gz'
        cmd = (f'{app} {assay} {step} '
               f'--sample {sample} '
               f'--outdir {outdir_dic[step]} '
               f'--assay {assay} '
               f'--SMK_read2 {SMK_read2} '
               f'--SMK_pattern {SMK_pattern} '
               f'--SMK_barcode {SMK_barcode} '
               f'--SMK_linker {SMK_linker} ')
        sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=5, x=1)
        sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n'
        shell += cmd + '\n'
        last_step = step

        # count_smk
        step = 'count_smk'
        read_file = f'{outdir_dic["mapping_smk"]}/{sample}_read_count.tsv'
        cmd = (f'{app} {assay} {step} '
               f'--sample {sample} '
               f'--outdir {outdir_dic[step]} '
               f'--assay {assay} '
               f'--match_dir {match_dict[sample]} '
               f'--read_file {read_file} '
               f'--dim {dim} '
               f'--UMI_min {UMI_min} '
               f'--SNR_min {SNR_min} ')
        sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=5, x=1)
        sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n'
        shell += cmd + '\n'
        last_step = step

        # analysis_smk
        step = 'analysis_smk'
        tsne_tag_file = f'{outdir_dic["count_smk"]}/{sample}_tsne_tag.tsv'
        cmd = (f'{app} {assay} {step} '
               f'--sample {sample} '
               f'--outdir {outdir_dic[step]} '
               f'--assay {assay} '
               f'--match_dir {match_dict[sample]} '
               f'--tsne_tag_file {tsne_tag_file} ')
        sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=5, x=1)
        sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n'
        shell += cmd + '\n'
        last_step = step

    # merged report
    if mod == 'sjm':
        step = 'merge_report'
        merge_report(
            fq_dict,
            steps,
            last_step,
            sjm_cmd,
            sjm_order,
            logdir,
            conda,
            outdir,
            rm_files,
        )
    if mod == 'shell':
        os.system('mkdir -p ./shell/')
        with open(f'./shell/{sample}.sh', 'w') as f:
            f.write(shell)
Ejemplo n.º 6
0
def main():

    parser = argparse.ArgumentParser('CeleScope RNA multi-sample')
    parser.add_argument('--mod',
                        help='mod, sjm or shell',
                        choices=['sjm', 'shell'],
                        default='sjm')
    parser.add_argument(
        '--mapfile',
        help=
        'mapfile, 4 columns, "LibName\\tDataDir\\tSampleName\\tCellNum", CellNum is optional',
        required=True)
    parser.add_argument(
        '--chemistry',
        choices=['scopeV2.0.0', 'scopeV2.0.1', 'scopeV2.1.0', 'scopeV2.1.1'],
        help='chemistry version')
    parser.add_argument('--whitelist', help='cellbarcode list')
    parser.add_argument('--linker', help='linker')
    parser.add_argument('--pattern', help='read1 pattern')
    parser.add_argument('--outdir', help='output dir', default="./")
    parser.add_argument(
        '--adapt',
        action='append',
        help='adapter sequence',
        default=['polyT=A{15}', 'p5=AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC'])
    parser.add_argument('--minimum-length',
                        dest='minimum_length',
                        help='minimum_length',
                        default=20)
    parser.add_argument('--nextseq-trim',
                        dest='nextseq_trim',
                        help='nextseq_trim',
                        default=20)
    parser.add_argument('--overlap',
                        help='minimum overlap length, default=5',
                        default=5)
    parser.add_argument('--lowQual',
                        type=int,
                        help='max phred of base as lowQual',
                        default=0)
    parser.add_argument('--lowNum',
                        type=int,
                        help='max number with lowQual allowed',
                        default=2)
    parser.add_argument('--starMem', help='starMem', default=30)
    parser.add_argument('--thread', help='thread', default=6)
    parser.add_argument('--rm_files',
                        action='store_true',
                        help='remove redundant fq.gz and bam after running')
    parser.add_argument("--mut_file", help="mutation file", required=True)
    parser.add_argument("--shift_base", default=2)
    parser.add_argument(
        '--indel_genomeDir',
        help='insertion or deletion STAR indexed genome directory',
        required=True)
    args = vars(parser.parse_args())

    fq_dict, match_dict = parse_map_col4(args['mapfile'], None)

    # link
    raw_dir = args['outdir'] + '/data_give/rawdata'
    os.system('mkdir -p %s' % (raw_dir))
    with open(raw_dir + '/ln.sh', 'w') as fh:
        fh.write('cd %s\n' % (raw_dir))
        for s, arr in fq_dict.items():
            fh.write('ln -sf %s %s\n' % (arr[0], s + '_1.fq.gz'))
            fh.write('ln -sf %s %s\n' % (arr[1], s + '_2.fq.gz'))
    #os.system('sh %s'%(raw_dir+'/ln.sh'))

    logdir = args['outdir'] + '/log'
    os.system('mkdir -p %s' % (logdir))
    sjm_cmd = 'log_dir %s\n' % (logdir)
    sjm_order = ''
    shell = ''
    app = 'celescope'
    chemistry = args['chemistry']
    pattern = args['pattern']
    whitelist = args['whitelist']
    linker = args['linker']
    lowQual = args['lowQual']
    lowNum = args['lowNum']
    starMem = args['starMem']
    thread = args['thread']
    basedir = args['outdir']
    mod = args['mod']

    mut_file = args['mut_file']
    shift_base = args['shift_base']
    indel_genomeDir = args['indel_genomeDir']

    assay = __ASSAY__
    steps = __STEPS__
    conda = __CONDA__

    for sample in fq_dict:
        outdir_dic = {}
        index = 0
        for step in steps:
            outdir = f"{basedir}/{sample}/{index:02d}.{step}"
            outdir_dic.update({step: outdir})
            index += 1

        # sample
        step = "sample"
        cmd = (
            f'{app} {assay} {step} '
            f'--chemistry {chemistry} '
            f'--sample {sample} --outdir {outdir_dic[step]} --assay {assay} ')
        sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda)
        shell += cmd + '\n'
        last_step = step

        # barcode
        arr = fq_dict[sample]
        step = "barcode"
        cmd = (
            f'{app} {assay} {step} '
            f'--fq1 {arr[0]} --fq2 {arr[1]} --chemistry {chemistry} '
            f'--pattern {pattern} --whitelist {whitelist} --linker {linker} '
            f'--sample {sample} --lowQual {lowQual} --thread {thread} '
            f'--lowNum {lowNum} --outdir {outdir_dic[step]} --assay {assay} ')
        sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=5, x=thread)
        sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n'
        shell += cmd + '\n'
        last_step = step

        # adapt
        step = "cutadapt"
        fq = f'{outdir_dic["barcode"]}/{sample}_2.fq.gz'
        cmd = (f'{app} {assay} {step} '
               f'--fq {fq} --sample {sample} --outdir '
               f'{outdir_dic[step]} --assay {assay} ')
        sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=5, x=1)
        sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n'
        shell += cmd + '\n'
        last_step = step

        # mapping_mut
        step = 'mapping_mut'
        fq = f'{outdir_dic["cutadapt"]}/{sample}_clean_2.fq.gz'
        cmd = (f'{app} {assay} {step} '
               f'--fq {fq} '
               f'--sample {sample} '
               f'--outdir {outdir_dic[step]} '
               f'--thread {thread} '
               f'--indel_genomeDir {indel_genomeDir} '
               f'--assay {assay} ')
        sjm_cmd += generate_sjm(cmd,
                                f'{step}_{sample}',
                                conda,
                                m=starMem,
                                x=thread)
        sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n'
        shell += cmd + '\n'
        last_step = step

        # count_mut
        step = 'count_mut'
        bam = f'{outdir_dic["mapping_mut"]}/{sample}_Aligned.sortedByCoord.out.bam'
        cmd = (f'{app} {assay} {step} '
               f'--bam {bam} '
               f'--sample {sample} '
               f'--outdir {outdir_dic[step]} '
               f'--mut_file {mut_file} '
               f'--match_dir {match_dict[sample]} '
               f'shift_base {shift_base} '
               f'--assay {assay} ')
        sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=8, x=1)
        sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n'
        shell += cmd + '\n'
        last_step = step

    # merged report
    if mod == 'sjm':
        step = 'merge_report'
        merge_report(fq_dict, steps, last_step, sjm_cmd, sjm_order, logdir,
                     conda, args['outdir'], args['rm_files'])
    if mod == 'shell':
        os.system('mkdir -p ./shell/')
        with open(f'./shell/{sample}.sh', 'w') as f:
            f.write(shell)
Ejemplo n.º 7
0
def main():

    parser = argparse.ArgumentParser('CeleScope vdj multi-sample')
    parser.add_argument(
        '--mapfile',
        help='mapfile, 3 columns, "LibName\\tDataDir\\tSampleName"',
        required=True)
    parser.add_argument('--mod',
                        help='mod, sjm or shell',
                        choices=['sjm', 'shell'],
                        default='sjm')
    parser.add_argument(
        '--chemistry',
        choices=['scopeV2.0.0', 'scopeV2.0.1', 'scopeV2.1.0', 'scopeV2.1.1'],
        help='chemistry version')
    parser.add_argument('--whitelist', help='cellbarcode list')
    parser.add_argument('--linker', help='linker')
    parser.add_argument('--pattern', help='read1 pattern')
    parser.add_argument('--outdir', help='output dir', default="./")
    parser.add_argument(
        '--adapt',
        action='append',
        help='adapter sequence',
        default=['polyT=A{15}', 'p5=AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC'])
    parser.add_argument('--minimum-length',
                        dest='minimum_length',
                        help='minimum_length',
                        default=20)
    parser.add_argument('--nextseq-trim',
                        dest='nextseq_trim',
                        help='nextseq_trim',
                        default=20)
    parser.add_argument('--overlap',
                        help='minimum overlap length, default=5',
                        default=5)
    parser.add_argument('--lowQual',
                        type=int,
                        help='max phred of base as lowQual',
                        default=0)
    parser.add_argument('--lowNum',
                        type=int,
                        help='max number with lowQual allowed',
                        default=2)
    parser.add_argument('--thread', help='thread', default=6)
    parser.add_argument("--type", help='TCR or BCR', required=True)
    parser.add_argument("--debug", action='store_true')
    parser.add_argument(
        '--iUMI',
        help='minimum number of UMI of identical receptor type and CDR3',
        default=1)
    parser.add_argument('--rm_files',
                        action='store_true',
                        help='remove redundant fq.gz and bam after running')
    args = vars(parser.parse_args())

    fq_dict, match_dict = parse_map_col4(args['mapfile'], None)

    raw_dir = args['outdir'] + '/data_give/rawdata'
    os.system('mkdir -p %s' % (raw_dir))
    with open(raw_dir + '/ln.sh', 'w') as fh:
        fh.write('cd %s\n' % (raw_dir))
        for s, arr in fq_dict.items():
            fh.write('ln -sf %s %s\n' % (arr[0], s + '_1.fq.gz'))
            fh.write('ln -sf %s %s\n' % (arr[1], s + '_2.fq.gz'))

    logdir = args['outdir'] + '/log'
    os.system('mkdir -p %s' % (logdir))
    sjm_cmd = 'log_dir %s\n' % (logdir)
    sjm_order = ''
    shell = ''
    app = 'celescope'
    mod = args['mod']
    thread = args['thread']
    chemistry = args['chemistry']
    pattern = args['pattern']
    whitelist = args['whitelist']
    linker = args['linker']
    lowQual = args['lowQual']
    lowNum = args['lowNum']
    basedir = args['outdir']
    type = args['type']
    iUMI = args['iUMI']

    assay = __ASSAY__
    steps = __STEPS__
    conda = __CONDA__

    for sample in fq_dict:
        outdir_dic = {}
        index = 0
        for step in steps:
            outdir = f"{basedir}/{sample}/{index:02d}.{step}"
            outdir_dic.update({step: outdir})
            index += 1

        step = "sample"
        cmd = (
            f'{app} {assay} {step} '
            f'--chemistry {chemistry} '
            f'--sample {sample} --outdir {outdir_dic[step]} --assay {assay} ')
        sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda)
        shell += cmd + '\n'
        last_step = step

        # barcode
        arr = fq_dict[sample]
        step = "barcode"
        cmd = (
            f'{app} {assay} {step} '
            f'--fq1 {arr[0]} --fq2 {arr[1]} --chemistry {chemistry} '
            f'--pattern {pattern} --whitelist {whitelist} --linker {linker} '
            f'--sample {sample} --lowQual {lowQual} --thread {thread} '
            f'--lowNum {lowNum} --outdir {outdir_dic[step]} --assay {assay} ')
        sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=5, x=thread)
        sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n'
        shell += cmd + '\n'
        last_step = step

        # adapt
        step = "cutadapt"
        fq = f'{outdir_dic["barcode"]}/{sample}_2.fq.gz'
        cmd = (f'{app} {assay} {step} '
               f'--fq {fq} --sample {sample} --outdir '
               f'{outdir_dic[step]} --assay {assay} ')
        sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=5, x=1)
        sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n'
        shell += cmd + '\n'
        last_step = step

        # mapping_vdj
        step = 'mapping_vdj'
        fq = f'{outdir_dic["cutadapt"]}/{sample}_clean_2.fq.gz'
        cmd = (f'{app} {assay} {step} '
               f'--fq {fq} '
               f'--sample {sample} '
               f'--type {type} '
               f'--thread {thread} '
               f'--outdir {outdir_dic[step]} '
               f'--assay {assay} ')
        sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=15, x=thread)
        sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n'
        shell += cmd + '\n'
        last_step = step

        # count_vdj
        step = 'count_vdj'
        UMI_count_filter1_file = f'{outdir_dic["mapping_vdj"]}/{sample}_UMI_count_filtered1.tsv'
        cmd = (f'{app} {assay} {step} '
               f'--sample {sample} '
               f'--type {type} '
               f'--iUMI {iUMI} '
               f'--outdir {outdir_dic[step]} '
               f'--assay {assay} '
               f'--UMI_count_filter1_file {UMI_count_filter1_file} '
               f'--match_dir {match_dict[sample]} ')
        sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=8, x=thread)
        sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n'
        shell += cmd + '\n'
        last_step = step

    # merged report
    step = 'merge_report'
    if mod == 'sjm':
        # add type to steps mapping and count
        for i in range(3, len(steps)):
            steps[i] = f'{type}_{steps[i]}'
        merge_report(fq_dict, steps, last_step, sjm_cmd, sjm_order, logdir,
                     conda, args['outdir'], args['rm_files'])
    if mod == 'shell':
        os.system('mkdir -p ./shell/')
        with open(f'./shell/{sample}.sh', 'w') as f:
            f.write(shell)
Ejemplo n.º 8
0
def main():

    # init
    assay = __ASSAY__
    steps = __STEPS__
    conda = __CONDA__
    app = 'celescope'

    # parser
    parser = multi_opts(assay)
    parser.add_argument('--thread', help='thread', default=6)
    parser.add_argument("--fq_pattern",
                        help="tag read2 pattern",
                        required=True)
    parser.add_argument("--linker_fasta", help="linker fasta")
    parser.add_argument("--barcode_fasta", help="barcode fasta", required=True)
    args = parser.parse_args()

    # read args
    outdir = args.outdir
    chemistry = args.chemistry
    pattern = args.pattern
    whitelist = args.whitelist
    linker = args.linker
    lowQual = args.lowQual
    lowNum = args.lowNum
    mod = args.mod
    rm_files = args.rm_files
    minimum_length = args.minimum_length

    # parse mapfile
    fq_dict, match_dict = parse_map_col4(args.mapfile, None)

    # link
    link_data(outdir, fq_dict)

    # custom args
    thread = args.thread
    fq_pattern = args.fq_pattern
    linker_fasta = args.linker_fasta
    barcode_fasta = args.barcode_fasta

    # mk log dir
    logdir = outdir + '/log'
    os.system('mkdir -p %s' % (logdir))

    # script init
    sjm_cmd = 'log_dir %s\n' % (logdir)
    sjm_order = ''
    shell_dict = defaultdict(str)

    # run
    for sample in fq_dict:
        outdir_dic = {}
        index = 0
        for step in steps:
            step_outdir = f"{outdir}/{sample}/{index:02d}.{step}"
            outdir_dic.update({step: step_outdir})
            index += 1

        # sample
        step = "sample"
        cmd = (
            f'{app} {assay} {step} '
            f'--chemistry {chemistry} '
            f'--sample {sample} --outdir {outdir_dic[step]} --assay {assay} ')
        sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda)
        shell_dict[sample] += cmd + '\n'
        last_step = step

        # barcode
        arr = fq_dict[sample]
        step = "barcode"
        cmd = (
            f'{app} {assay} {step} '
            f'--fq1 {arr[0]} --fq2 {arr[1]} --chemistry {chemistry} '
            f'--pattern {pattern} --whitelist {whitelist} --linker {linker} '
            f'--sample {sample} --lowQual {lowQual} --thread {thread} '
            f'--lowNum {lowNum} --outdir {outdir_dic[step]} --assay {assay} ')
        sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=5, x=thread)
        sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n'
        shell_dict[sample] += cmd + '\n'
        last_step = step

        # adapt
        step = "cutadapt"
        fq = f'{outdir_dic["barcode"]}/{sample}_2.fq.gz'
        cmd = (f'{app} {assay} {step} '
               f'--fq {fq} --sample {sample} --outdir '
               f'{outdir_dic[step]} --assay {assay} '
               f'--minimum_length {minimum_length} ')
        sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=5, x=1)
        sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n'
        shell_dict[sample] += cmd + '\n'
        last_step = step

        # mapping_tag
        step = 'mapping_tag'
        fq = f'{outdir_dic["cutadapt"]}/{sample}_clean_2.fq.gz'
        cmd = (f'{app} {assay} {step} '
               f'--sample {sample} '
               f'--outdir {outdir_dic[step]} '
               f'--assay {assay} '
               f'--fq {fq} '
               f'--fq_pattern {fq_pattern} '
               f'--barcode_fasta {barcode_fasta} '
               f'--linker_fasta {linker_fasta} ')
        sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=5, x=1)
        sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n'
        shell_dict[sample] += cmd + '\n'
        last_step = step

        step = 'count_cite'
        read_count_file = f'{outdir_dic["mapping_tag"]}/{sample}_read_count.tsv'
        cmd = (f'{app} {assay} {step} '
               f'--sample {sample} '
               f'--outdir {outdir_dic[step]} '
               f'--assay {assay} '
               f'--match_dir {match_dict[sample]} '
               f'--read_count_file {read_count_file} ')
        sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=5, x=1)
        sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n'
        shell_dict[sample] += cmd + '\n'
        last_step = step

        step = 'analysis_cite'
        citeseq_mtx = f'{outdir_dic["count_cite"]}/{sample}_citeseq.mtx.gz'
        cmd = (f'{app} {assay} {step} '
               f'--sample {sample} '
               f'--outdir {outdir_dic[step]} '
               f'--assay {assay} '
               f'--match_dir {match_dict[sample]} '
               f'--citeseq_mtx {citeseq_mtx} ')
        sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=5, x=1)
        sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n'
        shell_dict[sample] += cmd + '\n'
        last_step = step

    # merged report
    if mod == 'sjm':
        step = 'merge_report'
        merge_report(
            fq_dict,
            steps,
            last_step,
            sjm_cmd,
            sjm_order,
            logdir,
            conda,
            outdir,
            rm_files,
        )
    if mod == 'shell':
        os.system('mkdir -p ./shell/')
        for sample in shell_dict:
            with open(f'./shell/{sample}.sh', 'w') as f:
                f.write(shell_dict[sample])
Ejemplo n.º 9
0
def main():

    # init
    assay = __ASSAY__
    steps = __STEPS__
    conda = __CONDA__
    app = 'celescope'

    # parser
    parser = multi_opts(assay)
    parser.add_argument('--thread', help='thread', default=6)
    args = parser.parse_args()

    # read args
    outdir = args.outdir
    chemistry = args.chemistry
    pattern = args.pattern
    whitelist = args.whitelist
    linker = args.linker
    lowQual = args.lowQual
    lowNum = args.lowNum
    mod = args.mod
    rm_files = args.rm_files

    # parse mapfile
    fq_dict, match_dict = parse_map_col4(args.mapfile, None)

    # link
    link_data(outdir, fq_dict)

    # custom args
    thread = args.thread

    # mk log dir
    logdir = outdir + '/log'
    os.system('mkdir -p %s' % (logdir))

    # script init
    sjm_cmd = 'log_dir %s\n' % (logdir)
    sjm_order = ''
    shell = ''

    # outdir dict
    for sample in fq_dict:
        outdir_dic = {}
        index = 0
        for step in steps:
            step_outdir = f"{outdir}/{sample}/{index:02d}.{step}"
            outdir_dic.update({step: step_outdir})
            index += 1

        # sample
        step = "sample"
        cmd = (
            f'{app} {assay} {step} '
            f'--chemistry {chemistry} '
            f'--sample {sample} --outdir {outdir_dic[step]} --assay {assay} ')
        sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda)
        shell += cmd + '\n'
        last_step = step

        # barcode
        arr = fq_dict[sample]
        step = "barcode"
        cmd = (
            f'{app} {assay} {step} '
            f'--fq1 {arr[0]} --fq2 {arr[1]} --chemistry {chemistry} '
            f'--pattern {pattern} --whitelist {whitelist} --linker {linker} '
            f'--sample {sample} --lowQual {lowQual} --thread {thread} '
            f'--lowNum {lowNum} --outdir {outdir_dic[step]} --assay {assay} ')
        sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=5, x=thread)
        sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n'
        shell += cmd + '\n'
        last_step = step

        # adapt
        step = "cutadapt"
        fq = f'{outdir_dic["barcode"]}/{sample}_2.fq.gz'
        cmd = (f'{app} {assay} {step} '
               f'--fq {fq} --sample {sample} --outdir '
               f'{outdir_dic[step]} --assay {assay} ')
        sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=5, x=1)
        sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n'
        shell += cmd + '\n'
        last_step = step

        # mapping_hla
        step = 'mapping_hla'
        fq = f'{outdir_dic["cutadapt"]}/{sample}_clean_2.fq.gz'
        cmd = (f'{app} {assay} {step} '
               f'--fq {fq} --sample {sample} '
               f'--thread {thread} '
               f'--match_dir {match_dict[sample]} '
               f'--outdir {outdir_dic[step]} --assay {assay} ')
        sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=30, x=thread)
        sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n'
        shell += cmd + '\n'
        last_step = step

    # merged report
    if mod == 'sjm':
        step = 'merge_report'
        merge_report(
            fq_dict,
            steps,
            last_step,
            sjm_cmd,
            sjm_order,
            logdir,
            conda,
            outdir,
            rm_files,
        )
    if mod == 'shell':
        os.system('mkdir -p ./shell/')
        with open(f'./shell/{sample}.sh', 'w') as f:
            f.write(shell)