예제 #1
0
def hi_c_processing_partc(input_json):
    assert 'input_size_in_bytes' in input_json
    insize = input_json.get('input_size_in_bytes')
    assert 'input_cool' in insize
    assert 'input_hic' in insize

    nthreads = 1  # default value according to the cwl
    nres = 13  # default value according to the cwl
    if 'parameters' in input_json:
        if 'ncores' in input_json.get('parameters'):
            nthreads = input_json.get('parameters').get('ncores')
        if 'nres' in input_json.get('parameters'):
            nres = input_json.get('parameters').get('nres')

    input_size = insize['input_cool'] + insize['input_hic']
    out_size = input_size * nres
    inter_size = out_size
    total_size = B2GB(input_size + out_size + inter_size)
    total_safe_size = total_size * 2

    cpu = nthreads
    mem = B2MB(nthreads * input_size * 5)
    if mem < 1024:
        mem = 1024

    r = BenchmarkResult(size=total_safe_size,
                        mem=mem,
                        cpu=cpu)

    return(r.as_dict())
예제 #2
0
def repliseq_parta(input_json):
    assert 'input_size_in_bytes' in input_json
    assert 'fastq' in input_json.get('input_size_in_bytes')
    assert 'bwaIndex' in input_json.get('input_size_in_bytes')

    # cpu
    nthreads = 4  # default from cwl
    if 'parameters' in input_json:
        if 'nthreads' in input_json.get('parameters'):
            nthreads = input_json.get('parameters').get('nthreads')

    # space
    input_sizes = input_json.get('input_size_in_bytes')
    input_fastq = B2MB(input_sizes.get('fastq'))
    input_bwa = B2GB(input_sizes.get('bwaIndex'))
    if 'fastq2' in input_sizes: # pe
        total_space = input_fastq * 0.032 + input_bwa * 2.58
        mem = input_fastq * 8.48 + nthreads * 329 + input_bwa * 1658
    else: # se
        total_space = input_fastq * 0.018 + input_bwa * 2.54
        mem = input_fastq * 9.73 + nthreads * 221 + input_bwa * 1658

    r = BenchmarkResult(size=total_space * 1.5, mem=mem * 1.5, cpu=nthreads)

    return(r.as_dict())
예제 #3
0
def bwa_mem(input_json):
    assert 'input_size_in_bytes' in input_json
    assert 'fastq1' in input_json.get('input_size_in_bytes')
    assert 'fastq2' in input_json.get('input_size_in_bytes')
    assert 'bwa_index' in input_json.get('input_size_in_bytes')

    # cpu
    nthreads = 4  # default from cwl
    if 'parameters' in input_json:
        if 'nThreads' in input_json.get('parameters'):
            nthreads = input_json.get('parameters').get('nThreads')

    # space
    input_sizes = input_json.get('input_size_in_bytes')
    data_input_size = input_sizes.get('fastq1') + input_sizes.get('fastq2')
    total_input_size = data_input_size + input_sizes.get('bwa_index')
    output_bam_size = data_input_size * 2
    intermediate_index_size = input_sizes.get('bwa_index') * 2
    copied_input_size = data_input_size * 7  # copied and unzipped
    total_intermediate_size \
        = intermediate_index_size + output_bam_size + copied_input_size
    total_output_size = output_bam_size
    additional_size_in_gb = 10

    total_file_size_in_bp \
        = total_input_size + total_intermediate_size + total_output_size
    total_size = B2GB(total_file_size_in_bp) + additional_size_in_gb

    # mem
    mem = B2MB(input_sizes.get('bwa_index') * 4) + (nthreads * 500)

    r = BenchmarkResult(size=total_size, mem=mem, cpu=nthreads)

    return(r.as_dict())
예제 #4
0
def encode_rnaseq_stranded(input_json):
    assert 'input_size_in_bytes' in input_json
    r = BenchmarkResult(size=300,
                        mem=GB2MB(64),
                        cpu=16,
                        exclude_t=True)
    return(r.as_dict())
예제 #5
0
def mergebed(input_json):
    assert 'input_size_in_bytes' in input_json
    insz = input_json['input_size_in_bytes']
    assert 'input_bed' in insz
    input_size = sum(insz['input_bed'])
    total_size_in_gb = B2GB((input_size * 5) * 3)
    r = BenchmarkResult(size=total_size_in_gb,
                        mem=1024,
                        cpu=2)
    return(r.as_dict())
예제 #6
0
def md5(input_json):
    assert 'input_size_in_bytes' in input_json
    assert 'input_file' in input_json.get('input_size_in_bytes')
    input_in_bytes = input_json.get('input_size_in_bytes').get('input_file')
    input_size = B2GB(input_in_bytes) + 3
    r = BenchmarkResult(size=input_size,
                        mem=1024,
                        cpu=1)

    return(r.as_dict())
예제 #7
0
def encode_chipseq(input_json):
    assert 'input_size_in_bytes' in input_json
    insz = input_json['input_size_in_bytes']
    assert 'chip.fastqs' in insz
    assert 'chip.bwa_idx_tar' in insz
    input_fastq_size = sum(insz['chip.fastqs']) + sum(insz.get('chip.ctl_fastqs', [0]))
    input_size = input_fastq_size + insz['chip.bwa_idx_tar']
    output_size = input_fastq_size * 8 + insz['chip.bwa_idx_tar'] * 4
    total_size_in_gb = B2GB(input_size + output_size)
    r = BenchmarkResult(size=total_size_in_gb,
                        mem=30000,
                        cpu=16)
    return(r.as_dict())
예제 #8
0
def encode_atacseq_postaln(input_json):
    assert 'input_size_in_bytes' in input_json
    insz = input_json['input_size_in_bytes']
    assert 'atac.tas' in insz
    input_size = sum(insz['atac.tas'])
    nRep = len(insz['atac.tas'])
    total_size_in_gb = (B2GB(input_size * 55) + 16 * (nRep - 1)) * 2.5
    cpu = 12 + 4 * (nRep - 1)
    mem = 8 + 4 * (nRep - 1)
    r = BenchmarkResult(size=total_size_in_gb,
                        mem=GB2MB(mem),
                        cpu=cpu,
                        exclude_t=True)
    return(r.as_dict())
예제 #9
0
def encode_chipseq_aln_ctl(input_json):
    assert 'input_size_in_bytes' in input_json
    insz = input_json['input_size_in_bytes']
    assert 'chip.ctl_fastqs' in insz
    assert 'chip.bwa_idx_tar' in insz
    input_fastq_size = sum(insz['chip.ctl_fastqs'])
    total_size_in_gb = B2GB(input_fastq_size * 6 + insz['chip.bwa_idx_tar'] * 4) * 1.2
    if 'parameters' in input_json and 'chip.bwa_ctl.cpu' in input_json['parameters']:
        cpu = input_json['parameters']['chip.bwa_ctl.cpu']
    else:
        cpu = 2
    r = BenchmarkResult(size=total_size_in_gb,
                        mem=GB2MB(10),
                        cpu=cpu if cpu >= 8 else 8,
                        exclude_t=True)
    return(r.as_dict())
예제 #10
0
def pairsqc_single(input_json):
    assert 'input_size_in_bytes' in input_json
    assert 'input_pairs' in input_json.get('input_size_in_bytes')

    cpu = 1  # very rough estimate
    mem = 1024  # very rough estimate

    # space
    insize = B2GB(input_json['input_size_in_bytes']['input_pairs'])
    outsize = 0
    intersize = 0
    total_size = insize + outsize + intersize
    total_safe_size = total_size * 2

    r = BenchmarkResult(size=total_safe_size, mem=mem, cpu=cpu)
    return(r.as_dict())
예제 #11
0
def encode_atacseq(input_json):
    assert 'input_size_in_bytes' in input_json
    insz = input_json['input_size_in_bytes']
    assert 'atac.fastqs' in insz
    assert 'atac.bowtie2_idx_tar' in insz
    input_fastq_size = sum(insz['atac.fastqs'])
    input_size = input_fastq_size + insz['atac.bowtie2_idx_tar']
    output_size = input_fastq_size * 10
    total_size_in_gb = B2GB(input_size + output_size)
    if 'parameters' in input_json and  'atac.bowtie2.cpu' in input_json['parameters']:
        cpu = input_json['parameters']['atac.bowtie2.cpu'] + 2
    else:
        cpu = 6
    r = BenchmarkResult(size=total_size_in_gb,
                        mem=16000,
                        cpu=cpu)
    return(r.as_dict())
예제 #12
0
def pairsam_filter(input_json):
    assert 'input_size_in_bytes' in input_json
    assert 'input_pairsam' in input_json.get('input_size_in_bytes')

    cpu = 4  # very rough estimate
    mem = 16000  # very rough estimate

    # space
    insize = B2GB(input_json['input_size_in_bytes']['input_pairsam'])
    outbamsize = insize
    outpairssize = insize  # to be safe
    outsize = outbamsize + outpairssize
    intersize = outsize
    total_size = insize + outsize + intersize
    total_safe_size = total_size * 2

    r = BenchmarkResult(size=total_safe_size, mem=mem, cpu=cpu)
    return(r.as_dict())
예제 #13
0
def bamqc(input_json):
    assert 'input_size_in_bytes' in input_json
    assert 'bamfile' in input_json.get('input_size_in_bytes')
    in_size = input_json['input_size_in_bytes']

    # cpu
    nthreads = 1

    # space
    input_size = B2GB(in_size['bamfile'])
    total_size = input_size + 5
    total_safe_size = total_size + 5

    # mem
    mem = 4000

    # 32 cores: 1.8G/min (c4.8xlarge), 8 cores: 0.9G/min (r4.2xlarge)

    r = BenchmarkResult(size=total_safe_size, mem=mem, cpu=nthreads)
    return(r.as_dict())
예제 #14
0
def pairsam_parse_sort(input_json):
    assert 'input_size_in_bytes' in input_json
    assert 'bam' in input_json.get('input_size_in_bytes')

    # cpu
    nthreads = 8  # default from cwl
    if 'parameters' in input_json:
        if 'nThreads' in input_json.get('parameters'):
            nthreads = input_json.get('parameters').get('nThreads')

    in_size = input_json.get('input_size_in_bytes')
    bamsize = B2GB(in_size.get('bam'))
    pairsamsize = bamsize * 10  # very rough number
    tmp_pairsamsize = pairsamsize
    total_size = bamsize + pairsamsize + tmp_pairsamsize
    mem = 48000  # very rough number

    r = BenchmarkResult(size=total_size, mem=mem, cpu=nthreads * 2)

    return(r.as_dict())
예제 #15
0
def hi_c_processing_partb(input_json):
    assert 'input_size_in_bytes' in input_json
    assert 'input_pairs' in input_json.get('input_size_in_bytes')
    in_size = input_json['input_size_in_bytes']
    assert isinstance(in_size['input_pairs'], list)

    # cpu
    nthreads = 8  # default from cwl
    if 'parameters' in input_json:
        if 'ncores' in input_json.get('parameters'):
            nthreads = input_json.get('parameters').get('ncores')

    # space
    input_size = B2GB(sum(in_size['input_pairs']))
    out_pairs_size = input_size
    out_cool_size = input_size
    out_hic_size = input_size
    out_size = out_pairs_size + out_cool_size + out_hic_size
    total_size = input_size + out_size
    total_safe_size = total_size * 2

    # mem
    maxmem = MB2GB(14)  # default from cwl
    if 'parameters' in input_json:
        if 'maxmem' in input_json.get('parameters'):
            maxmem = input_json.get('parameters').get('maxmem')
            if 'g' in maxmem:
                maxmem = GB2MB(int(maxmem.replace('g', '')))
            elif 'm' in maxmem:
                maxmem = int(maxmem.replace('m', ''))
            else:
                raise Exception("proper maxmem string?")

    cooler_mem = GB2MB(nthreads * input_size)
    if cooler_mem > maxmem:
        mem = cooler_mem
    else:
        mem = maxmem

    r = BenchmarkResult(size=total_safe_size, mem=mem, cpu=nthreads)
    return(r.as_dict())
예제 #16
0
def merge_fastq(input_json):
    assert 'input_size_in_bytes' in input_json
    assert 'input_fastqs' in input_json.get('input_size_in_bytes')
    in_size = input_json['input_size_in_bytes']
    assert isinstance(in_size['input_fastqs'], list)

    # cpu
    nthreads = 1

    # space
    input_size = B2GB(sum(in_size['input_fastqs']))
    total_size = input_size * 3
    total_safe_size = total_size * 2

    # mem
    mem = 4000

    # 32 cores: 1.8G/min (c4.8xlarge), 8 cores: 0.9G/min (r4.2xlarge)

    r = BenchmarkResult(size=total_safe_size, mem=mem, cpu=nthreads)
    return(r.as_dict())
예제 #17
0
def fastqc_0_11_4_1(input_json):
    assert 'input_size_in_bytes' in input_json
    assert 'input_fastq' in input_json.get('input_size_in_bytes')

    nthreads = 1  # default value according to the cwl
    if 'parameters' in input_json:
        if 'threads' in input_json.get('parameters'):
            nthreads = input_json.get('parameters').get('threads')

    input_in_bytes = input_json.get('input_size_in_bytes').get('input_fastq')
    input_size = B2GB(input_in_bytes) * 2 + 3
    if input_size > 100:
        input_size = input_size + 20
    mem = 300 * nthreads
    if mem < 1024:
        mem = 1024
    r = BenchmarkResult(size=input_size,
                        mem=mem,
                        cpu=nthreads)

    return(r.as_dict())
예제 #18
0
def encode_chipseq_postaln(input_json):
    assert 'input_size_in_bytes' in input_json
    insz = input_json['input_size_in_bytes']
    assert 'chip.tas' in insz
    assert 'chip.bam2ta_no_filt_R1.ta' in insz
    input_size = sum(insz['chip.tas']) + sum(insz.get('chip.ctl_tas', [0])) \
                 + sum(insz['chip.bam2ta_no_filt_R1.ta'])
    total_size_in_gb = B2GB(input_size * 35) * 3
    if 'parameters' in input_json and 'chip.spp_cpu' in input_json['parameters']:
        cpu = input_json['parameters']['chip.spp_cpu']
    else:
        cpu = 2
    mem = GB2MB(cpu * 7)
    if 'parameters' in input_json and 'chip.pipeline_type' in input_json['parameters']:
        if input_json['parameters']['chip.pipeline_type'] == 'tf':
            mem *= 2.5
    print("mem=" + str(mem))
    r = BenchmarkResult(size=total_size_in_gb,
                        mem=mem,
                        cpu=cpu * 4,
                        exclude_t=True)
    return(r.as_dict())
예제 #19
0
def encode_atacseq_aln(input_json):
    assert 'input_size_in_bytes' in input_json
    insz = input_json['input_size_in_bytes']
    assert 'atac.fastqs' in insz
    assert 'atac.bowtie2_idx_tar' in insz
    input_fastq_size = sum(insz['atac.fastqs'])
    if input_json['parameters'].get('atac.paired_end', ''):
        nTechRep = len(insz['atac.fastqs']) / 2 # we assume one biological replicate
    else:
        nTechRep = len(insz['atac.fastqs'])
    print("nTechRep = " + str(nTechRep))
    total_size_in_gb = B2GB((input_fastq_size * 5 + insz['atac.bowtie2_idx_tar'] * 2.5) * nTechRep) * 1.5
    if 'parameters' in input_json and 'atac.bowtie2.cpu' in input_json['parameters']:
        cpu = input_json['parameters']['atac.bowtie2.cpu']
    else:
        cpu = 2
    mem = 6 + 2 * (nTechRep - 1)
    r = BenchmarkResult(size=total_size_in_gb,
                        mem=GB2MB(mem),
                        cpu=cpu,
                        exclude_t=True)
    return(r.as_dict())
예제 #20
0
def compartments_caller(input_json):
    assert 'input_size_in_bytes' in input_json
    assert 'mcoolfile' in input_json.get('input_size_in_bytes')

    # cpu
    nthreads = 1

    # space
    input_sizes = input_json.get('input_size_in_bytes')
    data_input_size = input_sizes.get('mcoolfile')
    total_input_size = data_input_size
    output_bw_size = data_input_size * 0.001
    total_output_size = output_bw_size
    additional_size_in_gb = 2
    total_file_size_in_bp \
        = total_input_size + total_output_size
    total_size = B2GB(total_file_size_in_bp) + additional_size_in_gb

    # mem
    mem = GB2MB(2)

    r = BenchmarkResult(size=total_size, mem=mem, cpu=nthreads)

    return(r.as_dict())
예제 #21
0
def hi_c_processing_bam(input_json):
    assert 'input_size_in_bytes' in input_json
    in_size = input_json.get('input_size_in_bytes')
    assert 'input_bams' in input_json.get('input_size_in_bytes')
    assert 'chromsize' in input_json.get('input_size_in_bytes')
    assert isinstance(in_size['input_bams'], list)

    # cpu
    nthreads_parse_sort = 8  # default from cwl
    nthreads_merge = 8  # default from cwl
    if 'parameters' in input_json:
        param = input_json['parameters']
        if 'nthreads_parse_sort' in param:
            nthreads_parse_sort = param['nthreads_parse_sort']
        if 'nthreads_merge' in param:
            nthreads_merge = param['nthreads_merge']

    # nthreads is the maximum of the two nthread parameters
    if nthreads_parse_sort > nthreads_merge:
        nthreads = nthreads_parse_sort
    else:
        nthreads = nthreads_merge

    bamsize = B2GB(sum(in_size['input_bams']))
    other_inputsize = B2GB(in_size.get('chromsize'))
    pairsize = bamsize / 2  # rough number
    outsize = bamsize + pairsize
    tmp_pairsamsize = bamsize * 5
    # input and output are copied once
    total_size = (bamsize + other_inputsize + outsize) * 2 + tmp_pairsamsize
    safe_total_size = total_size * 2
    mem = 2000  # very rough number

    r = BenchmarkResult(size=safe_total_size, mem=mem, cpu=nthreads)

    return(r.as_dict())
예제 #22
0
def pairsam_merge(input_json):
    assert 'input_size_in_bytes' in input_json
    assert 'input_pairsams' in input_json.get('input_size_in_bytes')
    in_size = input_json['input_size_in_bytes']
    assert isinstance(in_size['input_pairsams'], list)

    # cpu
    nthreads = 8  # default from cwl
    if 'parameters' in input_json:
        if 'nThreads' in input_json.get('parameters'):
            nthreads = input_json.get('parameters').get('nThreads')

    # space
    input_size = B2GB(sum(in_size['input_pairsams']))
    total_size = input_size * 3
    total_safe_size = total_size * 2

    # mem
    mem = 4000

    # 32 cores: 1.8G/min (c4.8xlarge), 8 cores: 0.9G/min (r4.2xlarge)

    r = BenchmarkResult(size=total_safe_size, mem=mem, cpu=nthreads)
    return(r.as_dict())