Esempio n. 1
0
def vcf2bed(chrom, pop):
    filtered_vcf = f'steps/recode_vcf/chr{chrom}_{pop}.recode.vcf'
    bed = f'steps/plink/chr{chrom}_{pop}.bed'
    base_name = modpath(bed, suffix=('.bed', ''))
    pruned_bed = f'steps/plink/chr{chrom}_{pop}.pruned.bed'

    inputs = [filtered_vcf]
    outputs = [pruned_bed]

    options = {'memory': '2g', 'walltime': '02:00:00'}

    spec = f'''

    mkdir -p steps/plink

    plink --vcf {filtered_vcf} --make-bed --double-id --geno 0.025 --indep-pairwise 50 10 0.1 \
        --out {base_name}
    
    plink --bfile {base_name} --extract {base_name}.prune.in --make-bed --out {base_name}.pruned

 
    '''

    return AnonymousTarget(inputs=inputs,
                           outputs=outputs,
                           options=options,
                           spec=spec)
Esempio n. 2
0
def argsample(sites_file, times_file, popsize_file, recomb_file, bed_file):

    output_dir = os.path.dirname(bed_file)
    arg_sample_base_name = modpath(bed_file, suffix='')
    # TODO should be: arg_sample_base_name = modpath(bed_file, suffix=('.bed.gz', '')
    log_file = modpath(arg_sample_base_name, suffix='.log')
    tabix_file = modpath(arg_sample_base_name, suffix='.bed.gz.tbi')

    inputs = {'sites_file': sites_file, 'recomb_file': recomb_file}
    outputs = {
        'bed_file': bed_file,
        'log_file': log_file,
        'tabix_file': tabix_file
    }
    options = {'memory': '40g', 'walltime': '14-00:00:00'}

    spec = f'''
    mkdir -p {output_dir}
    arg-sample -s {sites_file} \
            --times-file {times_file} \
            --popsize-file {popsize_file} \
            --recombmap {recomb_file} \
            -m 1.247e-08 \
            -c 25 \
            -n 30000 \
            --overwrite \
            -o {arg_sample_base_name} \
    && \
    ./argweaver/bin/smc2bed-all {arg_sample_base_name}
    '''

    return AnonymousTarget(inputs=inputs,
                           outputs=outputs,
                           options=options,
                           spec=spec)
Esempio n. 3
0
def mock_template(mocker, ):
    mock_template = mocker.MagicMock()
    mock_template.__name__ = "mock_template"
    mock_template.return_value = AnonymousTarget(inputs=[],
                                                 outputs=[],
                                                 options={})
    return mock_template
def fs_master(cp_dir, run_name, i, o):
    """Function to run the -go parts of fs"""

    inputs = i
    outputs = [cp_dir + run_name + o]

    options = {
        'cores': 2,
        'memory': "8g",
        'walltime': "01:00:00",
        "account": 'baboondiversity'
    }

    spec = f'''

    cd {cp_dir}

    fs {run_name}.cp -go

    '''

    return AnonymousTarget(inputs=inputs,
                           outputs=outputs,
                           options=options,
                           spec=spec)
Esempio n. 5
0
def admixture(k, chrom, pop):
    bedfile = f'steps/plink/chr{chrom}_{pop}.pruned.bed'
    outputq = f'results/admixture/chr{chrom}_{pop}/chr{chrom}_{pop}.pruned.{k}.Q'
    outputp = f'results/admixture/chr{chrom}_{pop}/chr{chrom}_{pop}.pruned.{k}.P'
    no_path = f'chr{chrom}_{pop}.pruned.{k}'
    logs = f'results/admixture/crossvalidation/log_chr{chrom}_{pop}.{k}.out'

    inputs = [bedfile]
    outputs = [outputq, outputp, logs]

    options = {'memory': '5g', 'walltime': '8:00:00'}

    spec = f'''

    mkdir -p results/admixture/chr{chrom}_{pop}

    mkdir -p results/admixture/crossvalidation

    admixture --cv {bedfile} {k} | tee {logs}

    mv {no_path}* results/admixture/chr{chrom}_{pop}

    '''
    return AnonymousTarget(inputs=inputs,
                           outputs=outputs,
                           options=options,
                           spec=spec)
def command_files(block, block_number, cp_dir, run_name, cf, i):
    """Function to run the commandfiles generated"""

    inputs = i
    o_file = f'{run_name}/commandfiles/{cf[-5]}_{block}_{block_number}'
    outputs = cp_dir + o_file

    options = {
        'cores': 4,
        'memory': "16g",
        'walltime': "04:00:00",
        "account": 'baboondiversity'
    }

    spec = f'''

    cd {cp_dir}
    file_length=$(wc -l < {run_name}/commandfiles/{cf})
    start=$((file_length*{block-1}/{block_number}+1))
    stop=$((file_length*{block}/{block_number}))
    sed -n "$start,$stop p" {run_name}/commandfiles/{cf} | bash
    touch {o_file}

    '''

    return AnonymousTarget(inputs=inputs,
                           outputs=outputs,
                           options=options,
                           spec=spec)
def command_files_single(cp_dir, run_name, cf, i):
    """Function to run the commandfiles generated"""
    inputs = i
    o_file = f'{run_name}/commandfiles/{cf[-5]}'
    outputs = cp_dir + o_file

    options = {
        'cores': 8,
        'memory': "16g",
        'walltime': "04:00:00",
        "account": 'baboondiversity'
    }

    spec = f'''

    cd {cp_dir}

    cat {run_name}/commandfiles/{cf} | parallel

    touch {o_file}

    '''
    print(spec)
    return AnonymousTarget(inputs=inputs,
                           outputs=outputs,
                           options=options,
                           spec=spec)
def fs_start(cp_dir, run_name, idfile, phasefile, recombfile, s3iters, s4iters,
             s1minsnps, s1indfrac):
    """Function to initialize the fs run in hpc mode. If options should be added, they are defined here"""
    commandfile = f'{cp_dir}/{run_name}/commandfiles/commandfile1.txt'

    inputs = [idfile, phasefile, recombfile]
    outputs = [commandfile]  #cp_dir+run_name+"/commandfiles/commandfile1.txt"

    options = {
        'cores': 1,
        'memory': '8g',
        'walltime': '01:00:00',
        'account': 'baboondiversity'
    }

    spec = f'''
    
    cd {cp_dir}

    fs {run_name}.cp -hpc 1 -idfile ../../{idfile} -phasefiles ../../{phasefile} -recombfiles ../../{recombfile} \
        -s3iters {s3iters} -s4iters {s4iters} -s1minsnps {s1minsnps} -s1indfrac {s1indfrac} -go

    '''

    return AnonymousTarget(inputs=inputs,
                           outputs=outputs,
                           options=options,
                           spec=spec)
Esempio n. 9
0
def bwa_map(ref_genome, r1, r2, bamfile):
    """Template for mapping reads to a reference genome with `bwa` and `samtools`."""
    inputs = [
        r1,
        r2,
        '{}.amb'.format(ref_genome),
        '{}.ann'.format(ref_genome),
        '{}.pac'.format(ref_genome),
    ]
    outputs = [bamfile]
    options = {
        'cores': 16,
        'memory': '1g',
    }

    spec = '''
    bwa mem -t 16 {ref_genome} {r1} {r2} | \
    samtools sort | \
    samtools rmdup -s - {bamfile}
    '''.format(ref_genome=ref_genome, r1=r1, r2=r2, bamfile=bamfile)

    return AnonymousTarget(inputs=inputs,
                           outputs=outputs,
                           options=options,
                           spec=spec)
Esempio n. 10
0
def vcf_filter(vcf_file, chrom, popfile, pop):
    output_vcf = f'steps/recode_vcf/chr{chrom}_{pop}.recode.vcf'
    base_name = modpath(output_vcf, suffix=('.recode.vcf', ''))

    inputs = [vcf_file]
    outputs = [output_vcf]
    options = {
        'cores': 1,
        'memory': '2g',
        'walltime': '02:00:00',
        'account': 'baboondiversity'
    }

    spec = f'''

    mkdir -p steps/recode_vcf

    vcftools --gzvcf {vcf_file} --recode --keep data/{popfile} \
        --out {base_name}
    
    '''

    return AnonymousTarget(inputs=inputs,
                           outputs=outputs,
                           options=options,
                           spec=spec)
Esempio n. 11
0
def vcf2plink(chrom, pop):
    filtered_vcf = f'steps/recode_vcf/chr{chrom}_{pop}.recode.vcf'
    ped_file = f'steps/plink/chr{chrom}_{pop}.ped'
    map_file = f'steps/plink/chr{chrom}_{pop}.map'
    base_name = modpath(ped_file, suffix=('.ped', ''))

    inputs = [filtered_vcf]
    outputs = [ped_file, map_file]
    options = {
        'cores': 1,
        'memory': '2g',
        'walltime': '10:00:00',
        'account': 'baboondiversity'
    }

    spec = f''' 
    
    mkdir -p steps/plink

    plink --vcf {filtered_vcf} --recode12 --double-id --geno 0.025 --out {base_name}  

    '''

    return AnonymousTarget(inputs=inputs,
                           outputs=outputs,
                           options=options,
                           spec=spec)
Esempio n. 12
0
def plink2finestructure(chrom, pop):
    ped_file = f'steps/plink/chr{chrom}_{pop}.ped'
    map_file = f'steps/plink/chr{chrom}_{pop}.map'
    id_file = f'steps/finestructure/chr{chrom}_{pop}.ids'
    phase_file = f'steps/finestructure/chr{chrom}_{pop}.phase'  # Fix name

    inputs = [ped_file, map_file]
    outputs = [phase_file, id_file]
    options = {
        'cores': 1,
        'memory': '15g',
        'walltime': '10:00:00',
        'account': 'baboondiversity'
    }

    spec = f''' 

    mkdir -p steps/finestructure
    
    ../../../software/fs_janne/plink2chromopainter.pl -p={ped_file} -m={map_file} \
        -d={id_file} -o={phase_file}   

    '''

    return AnonymousTarget(inputs=inputs,
                           outputs=outputs,
                           options=options,
                           spec=spec)
Esempio n. 13
0
    def nbconvert(notebook_file_name,
                  dependencies=[],
                  inplace=False,
                  output_format='notebook',
                  allow_errors=False,
                  timeout=-1):
        inputs = dependencies
        outputs = [notebook_file_name]
        options = {
            'cores': args.cores,
            'memory': args.total_memory,
        }

        if allow_errors:
            allow_errors = '--allow-errors'
        else:
            allow_errors = ''

        if inplace:
            inplace = '--inplace'
        else:
            inplace = ''

        spec = f'''
        cp {notebook_file_name} $TMPDIR/`basename {notebook_file_name}` && \
        nbconvert_cmd = "jupyter nbconvert --ClearOutputPreprocessor.enabled=True \
            --ExecutePreprocessor.timeout={timeout} {allow_errors} {inplace and '--allow-errors' or ''} \
                --to {output_format} --execute {notebook_file_name}" && \
        cp $TMPDIR/`basename {notebook_file_name}` {notebook_file_name}
        '''

        return AnonymousTarget(inputs=inputs,
                               outputs=outputs,
                               options=options,
                               spec=spec)
Esempio n. 14
0
def sum_ts(t_count_files, output_dir):
    inputs = {'t_count_files': t_count_files}
    outputs = {'sum_file': os.path.join(output_dir, 'sum.txt')}
    options = {}
    spec = """
    cat {t_count_files} | awk -f scripts/sum_ts.awk > {outputs[sum_file]}
    """.format(t_count_files=' '.join(t_count_files), outputs=outputs)
    return AnonymousTarget(inputs=inputs, outputs=outputs, options=options, spec=spec)
Esempio n. 15
0
 def template_returning_anonymous_target():
     return AnonymousTarget(
         inputs=[],
         outputs=[],
         options={},
         working_dir="/some/dir",
         spec="this is the spec",
     )
Esempio n. 16
0
def vcf_to_zarr(chrom, i):
    path = vcf_dir+vcf_names.format(chrom)
    output = zarr_dir+chrom
    inputs = i
    outputs = output
    options = {'memory': '10g',
               'walltime': '0-08:00:00'}
    spec = "python scripts/vcf_to_zarr.py {} {}".format(path, output)
    return AnonymousTarget(inputs=inputs, outputs=outputs, options=options, spec=spec)
Esempio n. 17
0
def bam_index(path):
    inputs = {'path': path}
    outputs = {'path': path + '.bai'}
    options = {'memory': '4g', 'walltime': '0-02:00:00'}
    spec = f'samtools index {path}'
    return AnonymousTarget(inputs=inputs,
                           outputs=outputs,
                           options=options,
                           spec=spec)
Esempio n. 18
0
def compute_avg_len(seq_len_file, output_dir):
    name = os.path.basename(seq_len_file)
    inputs = {'seq_len_file': seq_len_file}
    outputs = {'avg_file': os.path.join(output_dir, name + '.avg')}
    options = {}
    spec = """
    cat {inputs[seq_len_file]} | awk -f scripts/compute_avg.awk > {outputs[avg_file]}
    """.format(inputs=inputs, outputs=outputs)
    return AnonymousTarget(inputs=inputs, outputs=outputs, options=options, spec=spec)
Esempio n. 19
0
def bam2fastq(path):
    output_path = modpath(path, suffix='.fq')
    inputs = {'path': path}
    outputs = {'path': output_path}
    options = {'memory': '4g', 'walltime': '0-02:00:00'}
    spec = f'bamToFastq -i {path} -fq {output_path}'
    return AnonymousTarget(inputs=inputs,
                           outputs=outputs,
                           options=options,
                           spec=spec)
Esempio n. 20
0
def bam_chr2(path, index):
    sample_name = os.path.basename(os.path.dirname(path))
    output_path = modpath(path, base=sample_name, suffix='.chr2.bam')
    inputs = {'path': path, 'index': index}
    outputs = {'path': output_path}
    options = {'memory': '4g', 'walltime': '0-02:00:00'}
    spec = f'samtools view -f 3 -F 4 -b -h {path} -o {output_path} 2'
    return AnonymousTarget(inputs=inputs,
                           outputs=outputs,
                           options=options,
                           spec=spec)
Esempio n. 21
0
def tabix_index(path):
    """Makes a tabix index on a VCF files. Existing files are overwritten.
    Args:
        path (str): Path to VCF file.
    Returns:
        gwf.AnonymousTarget: GWF target.
    """
    inputs = {'path': path}
    outputs = path + '.tbi'
    options = {'memory': '4g',
               'walltime': '0-01:00:00'}
    spec = f'tabix -f -p vcf {path}'
    return AnonymousTarget(inputs=inputs, outputs=outputs, options=options, spec=spec)
Esempio n. 22
0
def filter_chromosomes(sample_file, output_dir):
    name = os.path.basename(sample_file)
    inputs = {'sample_file': sample_file}
    outputs = {
        'sample_file': os.path.join(
            output_dir, '{sample_file}.filtered'.format(sample_file=name)
        )
    }
    options = {}
    spec = """
    grep -E 'chrom1|chrom3' {inputs[sample_file]} > {outputs[sample_file]}
    """.format(inputs=inputs, outputs=outputs)
    return AnonymousTarget(inputs=inputs, outputs=outputs, options=options, spec=spec)
Esempio n. 23
0
def collapse_matrices_upon_eachother(matrices, output_file):
    """ This target will add coverage matrices together value by value, which can
        be helpfull if you look at coverage across different samples """
    inputs = matrices
    outputs = [output_file]
    options = {"walltime": "1:00:00", "memory": "24gb"}
    spec = """
    transcov collapse {} --output-file {} 
    """.format(" ".join(matrices), output_file)
    return AnonymousTarget(inputs=inputs,
                           outputs=outputs,
                           options=options,
                           spec=spec)
Esempio n. 24
0
def generate_matrix(bam_file, bed_file, output_file):
    """ This target will given a bam file and a bed file created by the preprocesser
        generate a coverage matrix, showing read depth in a region around the TSS """
    inputs = [bam_file, bed_file]
    outputs = [output_file]
    options = {"walltime": "12:00:00", "memory": "6gb"}
    spec = """
    transcov generate {} {} --output-file {}
    """.format(bam_file, bed_file, output_file)
    return AnonymousTarget(inputs=inputs,
                           outputs=outputs,
                           options=options,
                           spec=spec)
Esempio n. 25
0
def preprocess_gencode_annotation(annotation_file, bed_file, tss_file,
                                  region_size):
    """ This target will generate a file bed file and meta data file, defining the TSS' 
        to look for in the bam files. Theese TSS' will define the rows of the matrices """
    inputs = [annotation_file]
    outputs = [bed_file, tss_file]
    options = {}
    spec = """
    transcov preprocess {} --bed-file {} --tss-file {} --region-size {}
    """.format(annotation_file, bed_file, tss_file, region_size)
    return AnonymousTarget(inputs=inputs,
                           outputs=outputs,
                           options=options,
                           spec=spec)
Esempio n. 26
0
def arg_recomb_file(recomb_map, start, end, recomb_file):
    inputs = [recomb_map]
    outputs = {'recomb_file': recomb_file}
    options = {'memory': '4g', 'walltime': '01:00:00'}

    spec = f'''
    mkdir -p {os.path.dirname(recomb_file)}
    python scripts/argsample_rec_window.py {recomb_map} chrX {start} {end} {recomb_file}
    '''

    return AnonymousTarget(inputs=inputs,
                           outputs=outputs,
                           options=options,
                           spec=spec)
Esempio n. 27
0
def merge_chrom_files(input_files, output_file):

    inputs = input_files
    outputs = [output_file]
    options = {'memory': '4g', 'walltime': '00:10:00'}

    spec = f'''
    mkdir -p {os.path.dirname(output_file)}
    cat {" ".join(input_files)} > {output_file}
    '''

    return AnonymousTarget(inputs=inputs,
                           outputs=outputs,
                           options=options,
                           spec=spec)
Esempio n. 28
0
def arg_sites_file(start, end, sites_file, fasta_files):

    inputs = fasta_files

    outputs = {'sites_file': sites_file}
    options = {'memory': '4g', 'walltime': '01:00:00'}

    spec = f'''
    mkdir -p {os.path.dirname(sites_file)}
    python scripts/argsample_sites_file.py X {start} {end} {sites_file} {" ".join(fasta_files)}
    '''

    return AnonymousTarget(inputs=inputs,
                           outputs=outputs,
                           options=options,
                           spec=spec)
Esempio n. 29
0
def compute_stats(sample_file, output_dir):
    inputs = {'sample_file': sample_file}

    name = os.path.basename(sample_file)
    outputs = {
        't_count_file': os.path.join(output_dir, '{name}.tcounts'.format(name=name)),
        'seq_len_file': os.path.join(output_dir, '{name}.seqlengths'.format(name=name)),
    }
    options = {}
    spec = """
    python scripts/compute_stats.py \
        {inputs[sample_file]} \
        {outputs[t_count_file]} \
        {outputs[seq_len_file]}
    """.format(inputs=inputs, outputs=outputs)
    return AnonymousTarget(inputs=inputs, outputs=outputs, options=options, spec=spec)
Esempio n. 30
0
def unzip(inputfile, outputfile):
    """A template for unzipping files."""
    inputs = [inputfile]
    outputs = [outputfile]
    options = {
        'cores': 1,
        'memory': '2g',
    }

    spec = '''
    gzcat {} > {}
    '''.format(inputfile, outputfile)

    return AnonymousTarget(inputs=inputs,
                           outputs=outputs,
                           options=options,
                           spec=spec)