예제 #1
0
def get_merge_bams_params(reference_dir, reference, cluster):
    referencedata = config_reference.get_cluster_reference_data(
        reference_dir, reference)

    if cluster in ['azure', 'aws']:
        one_split_job = True
    else:
        one_split_job = False

    docker_containers = config_reference.containers()['docker']
    params = {
        'memory': {
            'low': 4,
            'med': 6,
            'high': 16
        },
        'max_cores': 8,
        'docker': {
            'single_cell_pipeline': docker_containers['single_cell_pipeline'],
            'samtools': docker_containers['samtools']
        },
        'ref_genome': referencedata['ref_genome'],
        'split_size': 10000000,
        'chromosomes': referencedata['chromosomes'],
        'one_split_job': one_split_job
    }
    return {'merge_bams': params}
예제 #2
0
def get_copy_number_calling_params(reference_dir, reference, binsize):
    referencedata = config_reference.get_cluster_reference_data(
        reference_dir, reference)

    docker_containers = config_reference.containers()['docker']

    params = {
        'memory': {
            'low': 4,
            'med': 6,
            'high': 16
        },
        'ref_genome': referencedata['ref_genome'],
        'chromosomes': referencedata['chromosomes'],
        'split_size': 10000000,
        'max_cores': None,
        'chromosomes': referencedata['chromosomes'],
        'extract_seqdata': {},
        'ref_data_dir': referencedata['copynumber_ref_data'],
        'docker': {
            'single_cell_pipeline': docker_containers['remixt'],
            'titan': docker_containers['titan']
        },
        'titan_params': {
            "normal_contamination": [0.2, 0.4, 0.6, 0.8],
            'num_clusters': [1, 2],
            'ploidy': [1, 2, 3, 4],
            'chrom_info_filename': referencedata['chrom_info_filename'],
            'window_size': binsize,
            'gc_wig': referencedata['gc_wig_file'][binsize],
            'mappability_wig': referencedata['gc_wig_file'][binsize],
        }
    }

    return {'copy_number_calling': params}
예제 #3
0
def get_infer_haps_params(reference_dir, reference):
    referencedata = config_reference.get_cluster_reference_data(
        reference_dir, reference)

    docker_containers = config_reference.containers()['docker']

    params = {
        'memory': {
            'low': 4,
            'med': 6,
            'high': 16
        },
        'max_cores': None,
        'chromosomes': referencedata['chromosomes'],
        'extract_seqdata': {
            'genome_fasta_template': referencedata['ref_genome'],
            'genome_fai_template': referencedata['ref_genome'] + '.fai',
        },
        'ref_data_dir': referencedata['copynumber_ref_data'],
        'docker': {
            'single_cell_pipeline': docker_containers['remixt'],
        },
    }

    return {'infer_haps': params}
예제 #4
0
def get_germline_calling_params(reference_dir, reference):
    referencedata = config_reference.get_cluster_reference_data(
        reference_dir, reference)

    params = {
        'memory': {
            'low': 4,
            'med': 6,
            'high': 16
        },
        'max_cores': 8,
        'ref_genome': referencedata['ref_genome'],
        'chromosomes': referencedata['chromosomes'],
        'split_size': 10000000,
        'databases': {
            'mappability': {
                'url':
                'http://hgdownload-test.cse.ucsc.edu/goldenPath/hg19/encodeDCC/wgEncodeMapability/release3'
                '/wgEncodeCrgMapabilityAlign50mer.bigWig',
                'local_path':
                referencedata['databases']['mappability']['local_path'],
            },
            'snpeff': {
                "db": 'GRCh37.75',
                "data_dir": referencedata['databases']['snpeff']['local_path']
            },
        },
    }

    return {'germline_calling': params}
예제 #5
0
def get_breakpoint_params(reference_dir, reference):
    referencedata = config_reference.get_cluster_reference_data(
        reference_dir, reference)

    docker_containers = config_reference.containers()['docker']

    params = {
        'memory': {
            'low': 4,
            'med': 6,
            'high': 16
        },
        'ref_data_directory': referencedata['destruct_ref_data'],
        'destruct_config': {
            'genome_fasta': referencedata['ref_genome'],
            'genome_fai': referencedata['ref_genome'] + '.fai',
            'gtf_filename': referencedata['destruct_gtf_file'],
        },
        'docker': {
            'single_cell_pipeline': docker_containers['single_cell_pipeline'],
            'destruct': docker_containers['destruct'],
            'lumpy': docker_containers['lumpy'],
            'samtools': docker_containers['samtools'],
        },
    }

    return {'breakpoint_calling': params}
예제 #6
0
def get_hmmcopy_params(reference_dir, reference, binsize, smoothing_function):
    referencedata = config_reference.get_cluster_reference_data(
        reference_dir, reference)

    params = {
        'multipliers': [1, 2, 3, 4, 5, 6],
        'map_cutoff':
        0.9,
        'bin_size':
        binsize,
        'e':
        0.999999,
        'eta':
        50000,
        'g':
        3,
        'lambda':
        20,
        'min_mqual':
        20,
        'nu':
        2.1,
        'num_states':
        12,
        's':
        1,
        'strength':
        1000,
        'kappa':
        '100,100,700,100,25,25,25,25,25,25,25,25',
        'm':
        '0,1,2,3,4,5,6,7,8,9,10,11',
        'mu':
        '0,1,2,3,4,5,6,7,8,9,10,11',
        'smoothing_function':
        smoothing_function,
        'exclude_list':
        referencedata['exclude_list'],
        'gc_wig_file':
        referencedata['gc_wig_file'][binsize],
        'map_wig_file':
        referencedata['map_wig_file'][binsize],
        'chromosomes':
        referencedata['chromosomes'],
        'ref_genome':
        referencedata['ref_genome'],
        'igv_segs_quality_threshold':
        0.75,
        'memory': {
            'med': 6
        },
        'good_cells': [
            ['median_hmmcopy_reads_per_bin', 'ge', 50],
            ['is_contaminated', 'in', ['False', 'false', False]],
        ]
    }

    return {"hmmcopy": params}
예제 #7
0
def get_sv_genotyping_params(reference_dir, reference):
    referencedata = config_reference.get_cluster_reference_data(
        reference_dir, reference)

    params = {
        'ref_genome': referencedata['ref_genome'],
        'memory': {
            'low': 4,
            'med': 6,
            'high': 16
        },
    }
    return {'sv_genotyping': params}
예제 #8
0
def get_aneufinder_params(reference_dir, reference):
    referencedata = config_reference.get_cluster_reference_data(
        reference_dir, reference)

    params = {
        'memory': {
            'med': 6
        },
        'chromosomes': referencedata['chromosomes'],
        'ref_genome': referencedata['ref_genome']
    }

    return {'aneufinder': params}
예제 #9
0
def get_sv_genotyping_params(reference_dir, reference, version):
    docker_containers = config_reference.containers(version)['docker']

    referencedata = config_reference.get_cluster_reference_data(reference_dir, reference)

    params = {
        'ref_genome': referencedata['ref_genome'],
        'memory': {'low': 4, 'med': 6, 'high': 16},
        'docker': {
            'single_cell_pipeline': docker_containers['single_cell_pipeline'],
            'svtyper': docker_containers['svtyper']
        },
    }
    return {'sv_genotyping': params}
예제 #10
0
def get_aneufinder_params(reference_dir, reference, version):
    referencedata = config_reference.get_cluster_reference_data(reference_dir, reference)

    docker_containers = config_reference.containers(version)['docker']
    params = {
        'memory': {'med': 6},
        'docker': {
            'single_cell_pipeline': docker_containers['single_cell_pipeline'],
            'aneufinder': docker_containers['aneufinder'],
        },
        'chromosomes': referencedata['chromosomes'],
        'ref_genome': referencedata['ref_genome']
    }

    return {'aneufinder': params}
예제 #11
0
def get_align_params(reference_dir, reference):
    referencedata = config_reference.get_cluster_reference_data(
        reference_dir, reference)
    refdata_callback = config_reference.get_cluster_reference_data

    params = {
        'ref_genome': referencedata['ref_genome'],
        'memory': {
            'med': 6
        },
        'adapter': 'CTGTCTCTTATACACATCTCCGAGCCCACGAGAC',
        'adapter2': 'CTGTCTCTTATACACATCTGACGCTGCCGACGA',
        'picard_wgs_params': {
            "min_bqual": 20,
            "min_mqual": 20,
            "count_unpaired": False,
        },
        'chromosomes': referencedata['chromosomes'],
        'gc_windows': referencedata['gc_windows'],
        'fastq_screen_params': {
            'aligner':
            'bwa',
            'filter_tags':
            None,
            'genomes': [
                {
                    'name':
                    'grch37',
                    'paths':
                    refdata_callback(reference_dir, 'grch37')['ref_genome'],
                },
                {
                    'name': 'mm10',
                    'paths': refdata_callback(reference_dir,
                                              'mm10')['ref_genome'],
                },
                {
                    'name':
                    'salmon',
                    'paths':
                    refdata_callback(reference_dir,
                                     'GCF_002021735')['ref_genome'],
                },
            ]
        }
    }

    return {"alignment": params}
def get_annotation_params(reference_dir, reference, version):
    referencedata = config_reference.get_cluster_reference_data(
        reference_dir, reference)

    docker_containers = config_reference.containers(version)['docker']
    docker_containers = {
        'single_cell_pipeline': docker_containers['single_cell_pipeline'],
        'cell_cycle_classifier': docker_containers['cell_cycle_classifier'],
        'corrupt_tree': docker_containers['corrupt_tree']
    }

    params = {
        'docker':
        docker_containers,
        'memory': {
            'med': 6
        },
        'classifier_training_data':
        referencedata['classifier_training_data'],
        'fastqscreen_training_data':
        referencedata['fastqscreen_training_data'],
        'reference_gc':
        referencedata['reference_gc_qc'],
        'chromosomes':
        referencedata['chromosomes'],
        'num_states':
        12,
        'map_cutoff':
        0.9,
        'ref_type':
        reference,
        'corrupt_tree_params': {
            'neighborhood_size': 2,
            'lower_fraction': 0.05,
            'engine_nchains': 1,
            'engine_nscans': 10000,
            'model_fpr_bound': 0.1,
            'model_fnr_bound': 0.5
        },
        'good_cells': [
            ['quality', 'ge', 0.75],
            ['experimental_condition', 'notin', ["NTC", "NCC", "gDNA", "GM"]],
            ['cell_call', 'in', ['C1']],
            ['is_contaminated', 'in', ['False', 'false', False]],
        ]
    }

    return {"annotation": params}
예제 #13
0
def get_qc_params(reference_dir, reference, version):
    docker_containers = config_reference.containers(version)['docker']

    referencedata = config_reference.get_cluster_reference_data(reference_dir, reference)

    params = {
        'ref_genome': referencedata['ref_genome'],
        'vep': referencedata['vep'],
        'memory': {'low': 4, 'med': 6, 'high': 16},
        'docker': {
            'single_cell_pipeline': docker_containers['single_cell_pipeline'],
            'vcf2maf': docker_containers['vcf2maf'],
            'pseudo_bulk_qc_html_report': docker_containers['pseudo_bulk_qc_html_report'],
        },
    }
    return {'qc': params}
예제 #14
0
def get_variant_calling_params(reference_dir, reference):
    referencedata = config_reference.get_cluster_reference_data(
        reference_dir, reference)

    additional_dbs = {}
    for k, v in referencedata['databases'].items():
        if k == 'mappability' or k == 'snpeff':
            continue
        additional_dbs[k] = {'path': v['local_path']}

    params = {
        'memory': {
            'low': 4,
            'med': 6,
            'high': 16
        },
        'max_cores': 8,
        'ref_genome': referencedata['ref_genome'],
        'chromosomes': referencedata['chromosomes'],
        'use_depth_thresholds': True,
        'split_size': int(1e7),
        'databases': {
            'mappability': {
                "path": referencedata['databases']['mappability']['local_path']
            },
            'snpeff': {
                'db': referencedata["databases"]["snpeff"]["db"],
                "path": referencedata["databases"]["snpeff"]["local_path"]
            },
            'additional_databases': additional_dbs,
        },
        'museq_params': {
            'threshold': 0.5,
            'verbose': True,
            'purity': 70,
            'coverage': 4,
            'buffer_size': '2G',
            'mapq_threshold': 10,
            'indl_threshold': 0.05,
            'normal_variant': 25,
            'tumour_variant': 2,
            'baseq_threshold': 10,
        }
    }

    return {'variant_calling': params}
예제 #15
0
def get_split_bam_params(reference_dir, reference):
    referencedata = config_reference.get_cluster_reference_data(
        reference_dir, reference)

    params = {
        'memory': {
            'low': 4,
            'med': 6,
            'high': 16
        },
        'max_cores': 8,
        'ref_genome': referencedata['ref_genome'],
        'split_size': 10000000,
        'chromosomes': referencedata['chromosomes'],
        'one_split_job': True
    }

    return {'split_bam': params}
예제 #16
0
def get_breakpoint_params(reference_dir, reference):
    referencedata = config_reference.get_cluster_reference_data(
        reference_dir, reference)

    params = {
        'memory': {
            'low': 4,
            'med': 6,
            'high': 16
        },
        'ref_data_directory': referencedata['destruct_ref_data'],
        'destruct_config': {
            'genome_fasta': referencedata['ref_genome'],
            'genome_fai': referencedata['ref_genome'] + '.fai',
            'gtf_filename': referencedata['destruct_gtf_file'],
        },
    }

    return {'breakpoint_calling': params}
예제 #17
0
def get_split_bam_params(reference_dir, reference, version):
    referencedata = config_reference.get_cluster_reference_data(reference_dir, reference)

    docker_containers = config_reference.containers(version)['docker']

    params = {
        'memory': {'low': 4, 'med': 6, 'high': 16},
        'max_cores': 8,
        'docker': {
            'single_cell_pipeline': docker_containers['single_cell_pipeline'],
            'samtools': docker_containers['samtools']
        },
        'ref_genome': referencedata['ref_genome'],
        'split_size': 10000000,
        'chromosomes': referencedata['chromosomes'],
        'one_split_job': True
    }

    return {'split_bam': params}
예제 #18
0
def get_count_haps_params(reference_dir, reference):
    referencedata = config_reference.get_cluster_reference_data(
        reference_dir, reference)

    params = {
        'memory': {
            'low': 4,
            'med': 6,
            'high': 16
        },
        'max_cores': None,
        'chromosomes': referencedata['chromosomes'],
        'extract_seqdata': {
            'genome_fasta_template': referencedata['ref_genome'],
            'genome_fai_template': referencedata['ref_genome'] + '.fai',
        },
        'ref_data_dir': referencedata['copynumber_ref_data'],
    }

    return {'count_haps': params}
예제 #19
0
def get_align_params(reference_dir, reference, version):
    referencedata = config_reference.get_cluster_reference_data(reference_dir, reference)
    refdata_callback = config_reference.get_cluster_reference_data

    docker_containers = config_reference.containers(version)['docker']
    docker_containers = {
        'single_cell_pipeline': docker_containers['single_cell_pipeline'],
        'fastqc': docker_containers['fastqc'],
        'samtools': docker_containers['samtools'],
        'bwa': docker_containers['bwa'],
        'picard': docker_containers['picard'],
        'trimgalore': docker_containers['trimgalore'],
        'fastq_screen': docker_containers['fastq_screen'],
    }

    params = {
        'ref_genome': referencedata['ref_genome'],
        'docker': docker_containers,
        'memory': {'med': 6},
        'adapter': 'CTGTCTCTTATACACATCTCCGAGCCCACGAGAC',
        'adapter2': 'CTGTCTCTTATACACATCTGACGCTGCCGACGA',
        'picard_wgs_params': {
            "min_bqual": 20,
            "min_mqual": 20,
            "count_unpaired": False,
        },
        'chromosomes': referencedata['chromosomes'],
        'gc_windows': referencedata['gc_windows'],
        'fastq_screen_params': {
            'strict_validation': True,
            'filter_contaminated_reads': False,
            'aligner': 'bwa',
            'genomes': [
                {'name': 'grch37', 'path': refdata_callback(reference_dir, 'grch37')['ref_genome']},
                {'name': 'mm10', 'path': refdata_callback(reference_dir, 'mm10')['ref_genome']},
                {'name': 'salmon', 'path': refdata_callback(reference_dir, 'GCF_002021735')['ref_genome']},
            ]
        }
    }

    return {"alignment": params}
예제 #20
0
def get_merge_bams_params(reference_dir, reference, cluster):
    referencedata = config_reference.get_cluster_reference_data(
        reference_dir, reference)

    if cluster in ['azure', 'aws']:
        one_split_job = True
    else:
        one_split_job = False

    params = {
        'memory': {
            'low': 4,
            'med': 6,
            'high': 16
        },
        'max_cores': 8,
        'ref_genome': referencedata['ref_genome'],
        'split_size': 10000000,
        'chromosomes': referencedata['chromosomes'],
        'one_split_job': one_split_job
    }
    return {'merge_bams': params}
예제 #21
0
def get_cohort_qc_params(reference_dir, reference):
    referencedata = config_reference.get_cluster_reference_data(
        reference_dir, reference)

    non_synonymous_labels = [
        "Frame_Shift_Del", "Frame_Shift_Ins", "Splice_Site",
        "Translation_Start_Site", "Nonsense_Mutation", "Nonstop_Mutation",
        "In_Frame_Del", "In_Frame_Ins", "Missense_Mutation"
    ]

    params = {
        'ref_genome': referencedata['ref_genome'],
        'vep': referencedata['vep'],
        'gtf': referencedata['qc_gtf_file'],
        'non_synonymous_labels': non_synonymous_labels,
        'memory': {
            'low': 4,
            'med': 6,
            'high': 16
        },
    }
    return {'cohort_qc': params}
예제 #22
0
def get_variant_calling_params(reference_dir, reference):
    referencedata = config_reference.get_cluster_reference_data(
        reference_dir, reference)

    docker_containers = config_reference.containers()['docker']

    status_data = {'kwargs': {'split_size': 10000000}}

    params = {
        'memory': {
            'low': 4,
            'med': 6,
            'high': 16
        },
        'max_cores': 8,
        'docker': {
            'single_cell_pipeline': docker_containers['single_cell_pipeline'],
            'vcftools': docker_containers['vcftools'],
            'strelka': docker_containers['strelka'],
            'mutationseq': docker_containers['mutationseq'],
        },
        'ref_genome': referencedata['ref_genome'],
        'chromosomes': referencedata['chromosomes'],
        'split_size': 10000000,
        'cosmic_status': copy.deepcopy(status_data),
        'dbsnp_status': copy.deepcopy(status_data),
        'mappability': copy.deepcopy(status_data),
        'snpeff': copy.deepcopy(status_data),
        'tri_nucleotide_context': copy.deepcopy(status_data),
        'databases': {
            'cosmic': {
                'download_method': 'sftp',
                'user_name': '*****@*****.**',
                'password': '******',
                'host': 'sftp-cancer.sanger.ac.uk',
                'remote_paths': {
                    'coding':
                    '/files/grch37/cosmic/v75/VCF/CosmicCodingMuts.vcf.gz',
                    'non_coding':
                    '/files/grch37/cosmic/v75/VCF/CosmicNonCodingVariants.vcf.gz',
                },
                'local_path':
                referencedata['databases']['cosmic']['local_path'],
            },
            'dbsnp': {
                'url':
                'ftp://ftp.ncbi.nih.gov/snp/organisms/human_9606_b146_GRCh37p13/VCF/common_all_20151104.vcf.gz',
                'local_path':
                referencedata['databases']['dbsnp']['local_path'],
            },
            'mappability': {
                'url':
                'http://hgdownload-test.cse.ucsc.edu/goldenPath/hg19/encodeDCC/wgEncodeMapability/release3'
                '/wgEncodeCrgMapabilityAlign50mer.bigWig',
                'local_path':
                referencedata['databases']['mappability']['local_path'],
            },
            'ref_genome': {
                'url':
                'http://www.bcgsc.ca/downloads/genomes/9606/hg19/1000genomes/bwa_ind/genome/GRCh37-lite.fa',
                'local_path': referencedata['ref_genome'],
            },
            'snpeff': {
                "db": 'GRCh37.75'
            },
        },
        'museq_params': {
            'threshold': 0.5,
            'verbose': True,
            'purity': 70,
            'coverage': 4,
            'buffer_size': '2G',
            'mapq_threshold': 10,
            'indl_threshold': 0.05,
            'normal_variant': 25,
            'tumour_variant': 2,
            'baseq_threshold': 10,
        }
    }

    return {'variant_calling': params}