Python s_commonの例、celescope.tools.step.s_common Pythonの例

コード例 #1

0

ファイルを表示

def get_opts_count_cite(parser, sub_program):
    if sub_program:
        parser.add_argument("--match_dir",
                            help=HELP_DICT['match_dir'],
                            required=True)
        parser.add_argument("--read_count_file", help="tag read count file")
        s_common(parser)

コード例 #2

0

ファイルを表示

ファイル: split_tag.py プロジェクト: singleron-RD/CeleScope

def get_opts_split_tag(parser, sub_program):
    parser.add_argument(
        "--split_fastq",
        help=
        "If used, will split scRNA-Seq fastq file according to tag assignment.",
        action='store_true',
    )
    parser.add_argument(
        "--split_matrix",
        help=
        "If used, will split scRNA-Seq matrix file according to tag assignment.",
        action='store_true',
    )
    parser.add_argument(
        "--split_vdj",
        help=
        "If used, will split scRNA-Seq vdj count file according to tag assignment.",
        action='store_true',
    )
    parser.add_argument(
        "--vdj_dir",
        help=
        "Match celescope vdj directory. Required when --split_vdj is specified."
    )
    if sub_program:
        parser.add_argument("--umi_tag_file",
                            help="UMI tag file.",
                            required=True)
        parser.add_argument("--match_dir", help=HELP_DICT['match_dir'])
        parser.add_argument("--matrix_dir",
                            help="Match celescope scRNA-Seq matrix directory.")
        parser.add_argument("--R1_read", help='R1 read path.')
        s_common(parser)

コード例 #3

0

ファイルを表示

ファイル: count_tag.py プロジェクト: singleron-RD/CeleScope

def get_opts_count_tag(parser, sub_program):
    parser.add_argument(
        "--UMI_min",
        help=
        "Default='auto'. Minimum UMI threshold. Cell barcodes with valid UMI < UMI_min are classified as *undeterminded*.",
        default="auto")
    parser.add_argument(
        "--dim",
        help="Default=1. Tag dimentions. Usually we use 1-dimentional tag.",
        default=1)
    parser.add_argument("--SNR_min",
                        help="""Default='auto'. Minimum signal-to-noise ratio. 
Cell barcodes with UMI >=UMI_min and SNR < SNR_min are classified as *multiplet*. """,
                        default="auto")
    parser.add_argument("--combine_cluster",
                        help="Conbine cluster tsv file.",
                        default=None)
    parser.add_argument(
        "--coefficient",
        help=
        """Default=0.1. If `SNR_min` is 'auto', minimum signal-to-noise ratio is calulated as 
`SNR_min = max(median(SNRs) * coefficient, 2)`. 
Smaller `coefficient` will cause less *multiplet* in the tag assignment.""",
        default=0.1)
    if sub_program:
        parser.add_argument("--read_count_file",
                            help="Tag read count file.",
                            required=True)
        parser.add_argument("--match_dir",
                            help="Match celescope scRNA-Seq directory.")
        parser.add_argument("--matrix_dir",
                            help="Match celescope scRNA-Seq matrix directory.")
        parser.add_argument("--tsne_file", help="t-SNE coord file.")

        s_common(parser)

コード例 #4

0

ファイルを表示

def get_opts_filter(parser, sub_program):
    parser.add_argument('--not_correct_UMI',
                        help='Perform UMI correction.',
                        action='store_true')

    parser.add_argument(
        "--read_threshold_method",
        help=
        'method to find read threshold. UMIs with `support reads` < `read threshold` are filtered.',
        choices=['otsu', 'auto', 'hard'],
        default='otsu')
    parser.add_argument(
        "--read_hard_threshold",
        help='int, use together with `--read_threshold_method hard`',
    )

    parser.add_argument(
        "--umi_threshold_method",
        help=
        'method to find UMI threshold. Cell barcode with `UMI` < `UMI threshold` are considered negative.',
        choices=['otsu', 'auto', 'hard'],
        default='auto')
    parser.add_argument(
        "--umi_hard_threshold",
        help='int, use together with `--umi_threshold_method hard`',
    )

    if sub_program:
        parser.add_argument('--match_dir',
                            help=HELP_DICT['match_dir'],
                            required=True)
        parser.add_argument('--raw_read_count_file', required=True)
        s_common(parser)

コード例 #5

0

ファイルを表示

def get_opts_analysis_cite(parser, sub_program):
    if sub_program:
        parser.add_argument('--match_dir',
                            help=HELP_DICT['match_dir'],
                            required=True)
        parser.add_argument('--citeseq_mtx',
                            help='citeseq matrix .gz file',
                            required=True)
        s_common(parser)

コード例 #6

0

ファイルを表示

ファイル: filter_snp.py プロジェクト: singleron-RD/CeleScope

def get_opts_filter_snp(parser, sub_program):
    parser.add_argument('--threshold_method', default='auto', choices=['otsu', 'auto', 'hard', 'none'], help=HELP_DICT['threshold_method'])
    parser.add_argument(
        "--hard_threshold",
        help='int, use together with `--threshold_method hard`',
    )
    if sub_program:
        parser.add_argument("--vcf", help="norm vcf file")
        s_common(parser)

コード例 #7

0

ファイルを表示

def get_opts_analysis_snp(parser, sub_program):
    parser.add_argument('--annovar_config',
                        help='ANNOVAR config file.',
                        required=True)
    if sub_program:
        s_common(parser)
        parser.add_argument('--match_dir',
                            help=HELP_DICT['match_dir'],
                            required=True)
        parser.add_argument('--vcf', help='vcf file.', required=True)

コード例 #8

0

ファイルを表示

def get_opts_count_bam(parser, sub_program):
    parser.add_argument("--min_query_length",
                        help='Minimum query length.',
                        default=35)

    if sub_program:
        parser.add_argument('--match_dir',
                            help=HELP_DICT['match_dir'],
                            required=True)
        parser.add_argument('--capture_bam', required=True)
        s_common(parser)

コード例 #9

0

ファイルを表示

def get_opts_variant_calling(parser, sub_program):

    parser.add_argument("--genomeDir",
                        help=HELP_DICT['genomeDir'],
                        required=True)
    parser.add_argument("--panel", help=HELP_DICT['panel'])
    if sub_program:
        parser.add_argument("--bam",
                            help='Input BAM file from step `target_metrics`. ',
                            required=True)
        parser.add_argument("--match_dir",
                            help=HELP_DICT['match_dir'],
                            required=True)
        s_common(parser)

コード例 #10

0

ファイルを表示

def get_opts_analysis_mixin(parser, sub_program):

    parser.add_argument('--genomeDir',
                        help='Required. Genome directory.',
                        required=True)
    parser.add_argument('--save_rds',
                        action='store_true',
                        help='Write rds to disk.')
    parser.add_argument(
        '--type_marker_tsv',
        help=
        """A tsv file with header. If this parameter is provided, cell type will be annotated. Example:
```
cell_type	marker
Alveolar	"CLDN18,FOLR1,AQP4,PEBP4"
Endothelial	"CLDN5,FLT1,CDH5,RAMP2"
Epithelial	"CAPS,TMEM190,PIFO,SNTN"
Fibroblast	"COL1A1,DCN,COL1A2,C1R"
B_cell	"CD79A,IGKC,IGLC3,IGHG3"
Myeloid	"LYZ,MARCO,FCGR3A"
T_cell	"CD3D,TRBC1,TRBC2,TRAC"
LUAD	"NKX2-1,NAPSA,EPCAM"
LUSC	"TP63,KRT5,KRT6A,KRT6B,EPCAM"
```""")
    if sub_program:
        parser.add_argument(
            '--matrix_file',
            help='Required. Matrix_10X directory from step count.',
            required=True,
        )
        # do not need all count diretory
        parser.add_argument("--tsne_file", help="match_dir t-SNE coord file.")
        parser.add_argument("--df_marker_file",
                            help="match_dir df_marker_file.")
        parser = s_common(parser)

コード例 #11

0

ファイルを表示

ファイル: consensus.py プロジェクト: singleron-RD/CeleScope

def get_opts_consensus(parser, sub_program):
    parser.add_argument("--threshold",
                        help='Default 0.5. Valid base threshold. ',
                        type=float,
                        default=0.5)
    parser.add_argument("--not_consensus",
                        help="Skip the consensus step. ",
                        action='store_true')
    parser.add_argument("--min_consensus_read",
                        help="Minimum number of reads to support a base. ",
                        default=1)
    if sub_program:
        parser.add_argument("--fq",
                            help="Required. Fastq file.",
                            required=True)
        s_common(parser)

コード例 #12

0

ファイルを表示

ファイル: star_mixin.py プロジェクト: singleron-RD/CeleScope

def get_opts_star_mixin(parser, sub_program):
    parser.add_argument(
        '--genomeDir',
        help=HELP_DICT['genomeDir'],
    )
    parser.add_argument(
        '--outFilterMatchNmin',
        help=
        """Default `0`. Alignment will be output only if the number of matched bases 
is higher than or equal to this value.""",
        default=0)
    parser.add_argument('--out_unmapped',
                        help='Output unmapped reads.',
                        action='store_true')
    parser.add_argument('--STAR_param',
                        help='Other STAR parameters.',
                        default="")
    parser.add_argument(
        '--outFilterMultimapNmax',
        help=
        'Default `1`. How many places are allowed to match a read at most.',
        default=1)
    parser.add_argument('--starMem',
                        help='Default `30`. Maximum memory that STAR can use.',
                        default=30)
    if sub_program:
        parser.add_argument('--fq',
                            help="Required. R2 fastq file.",
                            required=True)
        parser.add_argument(
            "--consensus_fq",
            action='store_true',
            help="A indicator that the input fastq has been consensused.")
        parser = s_common(parser)

コード例 #13

0

ファイルを表示

def get_opts_replacement(parser, sub_program):
    parser.add_argument(
        '--bg_cov',
        type=int,
        default=1,
        help=
        'background snp depth filter, lower than bg_cov will be discarded. Only valid in csv format'
    )
    if sub_program:
        parser.add_argument('--bam',
                            help='bam file from conversion step',
                            required=True)
        parser.add_argument('--bg',
                            help='background snp file, csv or vcf format',
                            required=True)
        parser.add_argument('--cell_keep',
                            type=int,
                            default=100000,
                            help='filter cell')
        parser.add_argument(
            '--min_cell',
            type=int,
            default=10,
            help='a gene expressed in at least cells, default 10')
        parser.add_argument('--min_gene',
                            type=int,
                            default=10,
                            help='at least gene num in a cell, default 10')
        parser = s_common(parser)
    return parser

コード例 #14

0

ファイルを表示

def get_opts_sample(parser, sub_program):
    if sub_program:
        parser = s_common(parser)
        parser.add_argument('--fq1', help='read1 fq file')
    parser.add_argument('--chemistry',
                        choices=list(__PATTERN_DICT__.keys()),
                        help='chemistry version',
                        default='auto')
    return parser

コード例 #15

0

ファイルを表示

def get_opts_analysis_tag(parser, sub_program):
    if sub_program:
        parser.add_argument('--tsne_tag_file',
                            help='`{sample}_tsne_tag.tsv` from count_tag. ',
                            required=True)
        parser.add_argument("--match_dir",
                            help="Match celescope scRNA-Seq directory. ")
        parser.add_argument("--tsne_file", help="t-SNE coord file.")
        parser = s_common(parser)

コード例 #16

0

ファイルを表示

ファイル: target_metrics.py プロジェクト: singleron-RD/CeleScope

def get_opts_target_metrics(parser, sub_program):
    parser.add_argument("--gene_list", help=HELP_DICT['gene_list'])
    parser.add_argument("--panel", help=HELP_DICT['panel'])
    if sub_program:
        parser.add_argument("--bam", help='Input bam file', required=True)
        parser.add_argument('--match_dir', help=HELP_DICT['match_dir'], required=True)
        parser.add_argument(
            '--add_RG', help='Add tag read group: RG. RG is the same as CB(cell barcode)', action='store_true')
        parser = s_common(parser)

コード例 #17

0

ファイルを表示

def get_opts_conversion(parser, sub_program):
    parser.add_argument('--strand',
                        help='gene strand file, the format is "geneID,+/-"',
                        required=True)
    if sub_program:
        parser.add_argument(
            "--bam",
            help=
            'featureCount bam(sortedByCoord), must have "MD" tag, set in star step',
            required=True)
        parser.add_argument("--cell", help='barcode cell list', required=True)
        parser = s_common(parser)
    return parser

コード例 #18

0

ファイルを表示

ファイル: featureCounts.py プロジェクト: singleron-RD/CeleScope

def get_opts_featureCounts(parser, sub_program):
    parser.add_argument('--gtf_type',
                        help='Specify feature type in GTF annotation',
                        default='exon')
    parser.add_argument('--genomeDir', help='Required. Genome directory.')
    parser.add_argument('--featureCounts_param',
                        help='Other featureCounts parameters',
                        default="")

    if sub_program:
        parser.add_argument('--input',
                            help='Required. BAM file path.',
                            required=True)
        parser = s_common(parser)
    return parser

コード例 #19

0

ファイルを表示

def get_opts_mapping_vdj(parser, sub_program):
    parser.add_argument("--type", help='TCR or BCR', required=True)
    parser.add_argument('--species',
                        choices=['hs', 'mmu'],
                        help='Default `hs`. `hs`(human) or `mmu`(mouse). ',
                        default='hs')
    parser.add_argument("--not_consensus",
                        action='store_true',
                        help="Input fastq is not consensused.")
    if sub_program:
        parser.add_argument(
            "--fq",
            help="Required. Input fastq file.",
            required=True,
        )
        parser = s_common(parser)

コード例 #20

0

ファイルを表示

ファイル: cutadapt.py プロジェクト: singleron-RD/CeleScope

def get_opts_cutadapt(parser, sub_program):
    parser.add_argument('--gzip',
                        help="Output gzipped fastq files.",
                        action='store_true')
    parser.add_argument('--adapter_fasta',
                        help='Addtional adapter fasta file.')
    parser.add_argument(
        '--minimum_length',
        help=
        'Default `20`. Discard processed reads that are shorter than LENGTH.',
        default=20)
    parser.add_argument(
        '--nextseq_trim',
        help=
        """Default `20`. Quality trimming of reads using two-color chemistry (NextSeq). 
Some Illumina instruments use a two-color chemistry to encode the four bases. 
This includes the NextSeq and the NovaSeq. 
In those instruments, a ‘dark cycle’ (with no detected color) encodes a G. 
However, dark cycles also occur when sequencing “falls off” the end of the fragment.
The read then contains a run of high-quality, but incorrect “G” calls at its 3’ end.""",
        default=20,
    )
    parser.add_argument(
        '--overlap',
        help=
        """Default `10`. Since Cutadapt allows partial matches between the read and the adapter sequence,
short matches can occur by chance, leading to erroneously trimmed bases. 
For example, roughly 0.25 of all reads end with a base that is identical to the first base of the adapter. 
To reduce the number of falsely trimmed bases, the alignment algorithm requires that 
at least {overlap} bases match between adapter and read. """,
        default=10)
    parser.add_argument('--insert',
                        help="Default `150`. Read2 insert length.",
                        default=150)
    if sub_program:
        parser.add_argument('--fq',
                            help='Required. R2 reads from step Barcode.',
                            required=True)
        parser = s_common(parser)
    return parser

コード例 #21

0

ファイルを表示

ファイル: count.py プロジェクト: singleron-RD/CeleScope

def get_opts_count(parser, sub_program):
    parser.add_argument('--genomeDir', help='Required. Genome directory.')
    parser.add_argument('--expected_cell_num',
                        help='Default `3000`. Expected cell number.',
                        default=3000)
    parser.add_argument(
        '--cell_calling_method',
        help=
        'Default `auto`. Cell calling methods. Choose from `auto` and `cellranger3`',
        choices=['auto', 'cellranger3'],
        default='auto',
    )
    if sub_program:
        parser = s_common(parser)
        parser.add_argument('--bam',
                            help='Required. BAM file from featureCounts.',
                            required=True)
        parser.add_argument(
            '--force_cell_num',
            help=
            'Default `None`. Force the cell number within (value * 0.9, value * 1.1). ',
        )

コード例 #22

0

ファイルを表示

def get_opts_replace_tsne(parser, sub_program):
    if sub_program:
        parser.add_argument('--tsne',
                            help='tsne file from analysis step',
                            required=True)
        parser.add_argument(
            '--mat',
            help='matrix replacement file, from replacement step',
            required=True)
        parser.add_argument(
            '--rep',
            help='cell replacement file, from replacement step',
            required=True)
        parser.add_argument('--mincell',
                            type=int,
                            default=5,
                            help='turn-over in at least cells, default 5')
        parser.add_argument('--topgene',
                            type=int,
                            default=10,
                            help='show top N genes,default 10')
        parser = s_common(parser)
    return parser

コード例 #23

0

ファイルを表示

ファイル: count_vdj.py プロジェクト: singleron-RD/CeleScope

def get_opts_count_vdj(parser, sub_program):
    parser.add_argument("--type",
                        help="Required. `TCR` or `BCR`. ",
                        required=True)
    parser.add_argument(
        '--UMI_min',
        help=
        'Default `auto`. Minimum UMI number to filter. The barcode with UMI>=UMI_min is considered to be cell.',
        default="auto")
    parser.add_argument(
        '--iUMI',
        help=
        """Default `1`. Minimum number of UMI of identical receptor type and CDR3. 
For each (barcode, chain) combination, only UMI>=iUMI is considered valid.""",
        type=int,
        default=1)
    if sub_program:
        parser.add_argument("--UMI_count_filter_file",
                            help="Required. File from step mapping_vdj.",
                            required=True)
        parser.add_argument("--match_dir", help=HELP_DICT['match_dir'])
        parser.add_argument("--matrix_dir", help=HELP_DICT['matrix_dir'])
        parser = s_common(parser)

コード例 #24

0

ファイルを表示

ファイル: barcode.py プロジェクト: singleron-RD/CeleScope

def get_opts_barcode(parser, sub_program=True):
    parser.add_argument(
        '--chemistry',
        help=
        """Predefined (pattern, barcode whitelist, linker whitelist) combinations. Can be one of:  
- `auto` Default value. Used for Singleron GEXSCOPE libraries >= scopeV2 and automatically detects the combinations.  
- `scopeV1` Used for legacy Singleron GEXSCOPE scopeV1 libraries.  
- `customized` Used for user defined combinations. You need to provide `pattern`, `whitelist` and `linker` at the 
same time.""",
        choices=list(__PATTERN_DICT__.keys()),
        default='auto')
    parser.add_argument(
        '--pattern',
        help=
        """The pattern of R1 reads, e.g. `C8L16C8L16C8L1U12T18`. The number after the letter represents the number 
        of bases.  
- `C`: cell barcode  
- `L`: linker(common sequences)  
- `U`: UMI    
- `T`: poly T""",
    )
    parser.add_argument(
        '--whitelist',
        help='Cell barcode whitelist file path, one cell barcode per line.')
    parser.add_argument(
        '--linker', help='Linker whitelist file path, one linker per line.')
    parser.add_argument(
        '--lowQual',
        help=
        'Default 0. Bases in cell barcode and UMI whose phred value are lower than \
lowQual will be regarded as low-quality bases.',
        type=int,
        default=0)
    parser.add_argument(
        '--lowNum',
        help='The maximum allowed lowQual bases in cell barcode and UMI.',
        type=int,
        default=2)
    parser.add_argument(
        '--nopolyT',
        help='Outputs R1 reads without polyT.',
        action='store_true',
    )
    parser.add_argument(
        '--noLinker',
        help='Outputs R1 reads without correct linker.',
        action='store_true',
    )
    parser.add_argument('--allowNoPolyT',
                        help="Allow valid reads without polyT.",
                        action='store_true')
    parser.add_argument('--allowNoLinker',
                        help="Allow valid reads without correct linker.",
                        action='store_true')
    parser.add_argument('--gzip',
                        help="Output gzipped fastq files.",
                        action='store_true')
    parser.add_argument('--output_R1',
                        help="Output valid R1 reads.",
                        action='store_true')
    if sub_program:
        parser.add_argument(
            '--fq1',
            help='R1 fastq file. Multiple files are separated by comma.',
            required=True)
        parser.add_argument(
            '--fq2',
            help='R2 fastq file. Multiple files are separated by comma.',
            required=True)
        parser = s_common(parser)

    return parser

コード例 #25

0

ファイルを表示

ファイル: assemble.py プロジェクト: singleron-RD/CeleScope

def get_opts_assemble(parser, sub_program):
    s_common(parser)
    if sub_program:
        parser.add_argument("--fastq_dir", required=True)

コード例 #26

0

ファイルを表示

def get_opts_analysis(parser, sub_program):
    if sub_program:
        parser.add_argument('--filter_tsne_file', help='filter tsne file', required=True)
        s_common(parser)

コード例 #27

0

ファイルを表示

def get_opts_substitution(parser, sub_program):
    if sub_program:
        parser.add_argument('--bam', help='bam file from conversion step', required=True)
        parser = s_common(parser)
    return parser

コード例 #28

0

ファイルを表示

def get_opts_mapping_tag(parser, sub_program):
    parser.add_argument(
        "--fq_pattern",
        help="""Required. R2 read pattern. The number after the letter represents the number of bases.         
`L` linker(common sequences)  
`C` tag barcode  
""",
        required=True
    )
    parser.add_argument(
        "--barcode_fasta",
        help="""Required. Tag barcode fasta file. It will check the mismatches between tag barcode 
sequence in R2 reads with all tag barcode sequence in barcode_fasta. 
It will assign read to the tag with mismatch < len(tag barcode) / 10 + 1. 
If no such tag exists, the read is classified as invalid.

You can find the barcode fasta file under `celescope/data/Clindex`
```
>CLindex_TAG_1
CGTGTTAGGGCCGAT
>CLindex_TAG_2
GAGTGGTTGCGCCAT
>CLindex_TAG_3
AAGTTGCCAAGGGCC
>CLindex_TAG_4
TAAGAGCCCGGCAAG
>CLindex_TAG_5
TGACCTGCTTCACGC
>CLindex_TAG_6
GAGACCCGTGGAATC
>CLindex_TAG_7
GTTATGCGACCGCGA
>CLindex_TAG_8
ATACGCAGGGTCCGA
>CLindex_TAG_9
AGCGGCATTTGGGAC
>CLindex_TAG_10
TCGCCAGCCAAGTCT
>CLindex_TAG_11
ACCAATGGCGCATGG
>CLindex_TAG_12
TCCTCCTAGCAACCC
>CLindex_TAG_13
GGCCGATACTTCAGC
>CLindex_TAG_14
CCGTTCGACTTGGTG
>CLindex_TAG_15
CGCAAGACACTCCAC
>CLindex_TAG_16
CTGCAACAAGGTCGC
```
""",
        required=True,
    )
    parser.add_argument(
        "--linker_fasta",
        help="""Optional. If provided, it will check the mismatches between linker sequence in R2 reads 
with all linker sequence in linker_fasta. If no mismatch < len(linker) / 10 + 1, the read is classified as invalid.
""",
    )
    if sub_program:
        s_common(parser)
        parser.add_argument("--fq", help="R2 read fastq.", required=True)

コード例 #29

0

ファイルを表示

ファイル: res_sum.py プロジェクト: singleron-RD/CeleScope

def get_opts_res_sum(parser, sub_program):
    parser.add_argument('--Seqtype', help='TCR or BCR', choices=['TCR', 'BCR'], required=True)
    if sub_program:
        parser = s_common(parser)
        parser.add_argument('--all_rep', help='filtered assemble report without imputation', required=True)
        parser.add_argument('--fa', help='assembled fasta file', required=True)