def test_benchmark_none1(self): input_json = { 'input_size_in_bytes': { 'fastq1': 93520, 'fastq2': 97604, 'bwa_index': 3364568 } } with self.assertRaises(B.AppNameUnavailableException): B.benchmark('some_weird_name', input_json, raise_error=True)
def get_benchmarking(self, input_size_in_bytes): benchmark_parameters = copy.deepcopy(self.args.input_parameters) benchmark_parameters.update( self.args.additional_benchmarking_parameters) try: res = B.benchmark( self.args.app_name, { 'input_size_in_bytes': input_size_in_bytes, 'parameters': benchmark_parameters }) except Exception as e: try: res raise Exception("Benchmarking not working. : {}".format( str(res))) except: raise Exception("Benchmarking not working. : None. %s" % str(e)) if res is not None: logger.info(str(res)) instance_type = res['aws']['recommended_instance_type'] ebs_size = 10 if res['total_size_in_GB'] < 10 else int( res['total_size_in_GB']) + 1 ebs_opt = res['aws']['EBS_optimized'] return { 'instance_type': instance_type, 'EBS_optimized': ebs_opt, 'ebs_size': ebs_size } else: return {'instance_type': '', 'EBS_optimized': '', 'ebs_size': 0}
def test_benchmark10(self): input_json = {'input_size_in_bytes': {'input_pairs': 1000000000}} res = B.benchmark('pairs-patch', input_json) print(res) assert 'aws' in res assert 'recommended_instance_type' in res['aws'] assert res['aws']['recommended_instance_type'] == 't3.micro'
def test_benchmark6(self): input_json = {'input_size_in_bytes': {'input_pairsam': 1000000000}} res = B.benchmark('pairsam-markasdup', input_json) print(res) assert 'aws' in res assert 'recommended_instance_type' in res['aws'] assert res['aws']['recommended_instance_type'] == 'r5a.large'
def update_config(config, app_name, input_files, parameters): if config['instance_type'] != '' and config['ebs_size'] != 0 and config[ 'EBS_optimized'] != '': pass else: input_size_in_bytes = dict() for argname, f in input_files.iteritems(): bucket = f['bucket_name'] s3 = s3_utils.s3Utils(bucket, bucket, bucket) if isinstance(f['object_key'], list): size = [] for key in f['object_key']: try: size.append(s3.get_file_size(key, bucket)) except: raise Exception("Can't get input file size") else: try: size = s3.get_file_size(f['object_key'], bucket) except: raise Exception("Can't get input file size") input_size_in_bytes.update({str(argname): size}) print({"input_size_in_bytes": input_size_in_bytes}) try: res = B.benchmark( app_name, { 'input_size_in_bytes': input_size_in_bytes, 'parameters': parameters }) except: try: res raise Exception("Benchmarking not working. : {}".format( str(res))) except: raise Exception("Benchmarking not working. : None") if res is not None: logger.info(str(res)) instance_type = res['aws']['recommended_instance_type'] ebs_size = 10 if res['total_size_in_GB'] < 10 else int( res['total_size_in_GB']) + 1 ebs_opt = res['aws']['EBS_optimized'] if config['instance_type'] == '': config['instance_type'] = instance_type if config['ebs_size'] == 0: config['ebs_size'] = ebs_size if config['EBS_optimized'] == '': config['EBS_optimized'] = ebs_opt elif config['instance_type'] == '': raise Exception("instance type cannot be determined nor given") elif config['ebs_size'] == 0: raise Exception("ebs_size cannot be determined nor given") elif config['EBS_optimized'] == '': raise Exception("EBS_optimized cannot be determined nor given")
def test_benchmark_compartments_caller(self): print("compartments-caller") input_json = {'input_size_in_bytes': {'mcoolfile': 32000000000}} res = B.benchmark('compartments-caller', input_json) print(res) assert 'aws' in res assert 'recommended_instance_type' in res['aws'] assert res['aws']['recommended_instance_type'] == 't3.small'
def test_mergebed(self): print("mergebed") input_sizes = {'input_bed': [400000000, 500000000]} res = B.benchmark('mergebed', {'input_size_in_bytes': input_sizes}) print(res) assert 'aws' in res assert 'recommended_instance_type' in res['aws'] assert res['aws']['recommended_instance_type'] == 't3.micro'
def test_benchmark_insulation_scores_and_boundaries_caller(self): print("insulation-scores-and-boundaries-caller") input_json = {'input_size_in_bytes': {'mcoolfile': 32000000000}} res = B.benchmark('insulation-scores-and-boundaries-caller', input_json) print(res) assert 'aws' in res assert 'recommended_instance_type' in res['aws'] assert res['aws']['recommended_instance_type'] == 't3.small'
def test_benchmark1(self): res = B.benchmark('md5', {'input_size_in_bytes': { 'input_file': 200000000 }}) assert 'aws' in res assert 'recommended_instance_type' in res['aws'] assert res['aws']['recommended_instance_type'] == 't3.micro' print(res)
def test_benchmark_merge_fastq(self): print("merge_fastq") input_sizes = {'input_fastqs': [GB2B(4), GB2B(5)]} res = B.benchmark('merge-fastq', {'input_size_in_bytes': input_sizes}) print(res) assert 'aws' in res assert 'recommended_instance_type' in res['aws'] assert res['aws']['recommended_instance_type'] == 't3.medium' assert int(res['total_size_in_GB']) == 54
def test_benchmark_bamqc(self): print("bamqc") input_sizes = {'bamfile': GB2B(4)} res = B.benchmark('bamqc', {'input_size_in_bytes': input_sizes}) print(res) assert 'aws' in res assert 'recommended_instance_type' in res['aws'] assert res['aws']['recommended_instance_type'] == 't3.medium' assert int(res['total_size_in_GB']) == 14
def test_benchmark_atacseq_postaln(self): print("testing atacseq-postaln") input_sizes = {'atac.tas': [827000000]} res = B.benchmark('encode-atacseq-postaln', {'input_size_in_bytes': input_sizes}) print(res) assert 'aws' in res assert 'recommended_instance_type' in res['aws'] assert res['aws']['recommended_instance_type'] == 'c5.4xlarge'
def test_benchmark_none2(self): input_json = { 'input_size_in_bytes': { 'fastq1': 93520, 'fastq2': 97604, 'bwa_index': 3364568 } } res = B.benchmark('some_weird_name', input_json) assert res is None
def test_benchmark_chipseq(self): print("testing chipseq") input_sizes = { 'chip.fastqs': [2000000000, 3000000000], 'chip.ctl_fastqs': [3000000000, 2000000000], 'chip.bwa_idx_tar': 5000000000 } res = B.benchmark('encode-chipseq', {'input_size_in_bytes': input_sizes}) print(res) assert 'aws' in res assert 'recommended_instance_type' in res['aws'] assert res['aws']['recommended_instance_type'] == 'c5.4xlarge'
def test_encode_rnaseq_stranded(self): print("rnaseq_stranded") input_json = { 'input_size_in_bytes': { 'rna.fastqs_R1': GB2B(10), 'rna.align_index': GB2B(3) } } res = B.benchmark('encode-rnaseq-stranded', input_json) print(res) assert 'aws' in res assert 'recommended_instance_type' in res['aws'] assert res['aws']['recommended_instance_type'] == 'm5a.4xlarge'
def test_benchmark_fastqc_old(self): res = B.benchmark( 'fastqc-0-11-4-1', { 'input_size_in_bytes': { 'input_fastq': 20000000000 }, 'parameters': { 'threads': 2 } }) assert 'aws' in res assert 'recommended_instance_type' in res['aws'] assert res['aws']['recommended_instance_type'] == 't3.micro' print(res)
def test_benchmark4(self): res = B.benchmark( 'pairsam-parse-sort', { 'input_size_in_bytes': { 'bam': 1000000000 }, 'parameters': { 'nThreads': 16 } }) assert 'aws' in res assert 'recommended_instance_type' in res['aws'] assert res['aws']['recommended_instance_type'] == 'c5.9xlarge' print(res)
def test_benchmark5(self): input_json = { 'input_size_in_bytes': { 'input_pairsams': [1000000000, 2000000000, 3000000000] }, 'parameters': { 'nThreads': 32 } } res = B.benchmark('pairsam-merge', input_json) assert 'aws' in res assert 'recommended_instance_type' in res['aws'] assert res['aws']['recommended_instance_type'] == 'c5.9xlarge' print(res)
def test_benchmark9(self): input_json = { 'input_size_in_bytes': { 'input_pairs': [1000000000, 2000000000, 3000000000] }, 'parameters': { 'ncores': 16, 'maxmem': '1900g' } } res = B.benchmark('hi-c-processing-partb', input_json) assert 'aws' in res assert 'recommended_instance_type' in res['aws'] assert res['aws']['recommended_instance_type'] == 'x1.32xlarge'
def test_benchmark_chipseq_aln_chip(self): print("testing chipseq") input_sizes = { 'chip.fastqs': [2000000000, 3000000000], 'chip.bwa_idx_tar': 5000000000 } res = B.benchmark('encode-chipseq-aln-chip', { 'input_size_in_bytes': input_sizes, 'parameters': { 'chip.bwa.cpu': 16 } }) assert 'aws' in res assert 'recommended_instance_type' in res['aws'] assert res['aws']['recommended_instance_type'] == 'c5.4xlarge'
def test_repliseq(self): print("repliseq se") input_json = { 'input_size_in_bytes': { 'fastq': MB2B(270), 'bwaIndex': GB2B(3.21) }, 'parameters': { 'nthreads': 4 } } res = B.benchmark('repliseq-parta', input_json) print(res) assert 'aws' in res assert 'recommended_instance_type' in res['aws'] assert res['aws']['recommended_instance_type'] == 't3.xlarge'
def test_benchmark_chipseq_postaln(self): print("testing chipseq") input_sizes = { 'chip.tas': [2000000000, 3000000000], 'chip.ctl_tas': [3000000000, 2000000000], 'chip.bam2ta_no_filt_R1.ta': [5000000000, 6000000000] } res = B.benchmark('encode-chipseq-postaln', { 'input_size_in_bytes': input_sizes, 'parameters': { 'chip.spp_cpu': 4 } }) assert 'aws' in res assert 'recommended_instance_type' in res['aws'] assert res['aws']['recommended_instance_type'] == 'c5.4xlarge'
def test_benchmark3(self): input_json = { 'input_size_in_bytes': { 'fastq1': 93520000, 'fastq2': 97604000, 'bwa_index': 3364568000 }, 'parameters': { 'nThreads': 4 } } res = B.benchmark('bwa-mem', input_json) assert 'aws' in res assert 'recommended_instance_type' in res['aws'] assert res['aws']['recommended_instance_type'] == 't3.xlarge' print(res)
def test_benchmark11(self): input_json = { 'input_size_in_bytes': { 'input_cool': 1000000000, 'input_hic': 2000000000 }, 'parameters': { 'ncores': 1 } } res = B.benchmark('hi-c-processing-partc', input_json) print('hi-c-processing-partc') print(res) assert 'aws' in res assert 'recommended_instance_type' in res['aws'] assert res['aws']['recommended_instance_type'] == 'r5a.large'
def test_benchmark_atacseq_aln(self): print("testing atacseq-aln") input_sizes = { 'atac.fastqs': [1200000000, 1200000000, 1500000000, 1500000000], 'atac.bowtie2_idx_tar': 5000000000 } res = B.benchmark( 'encode-atacseq-aln', { 'input_size_in_bytes': input_sizes, 'parameters': { 'atac.bowtie2.cpu': 4, 'atac.paired_end': True } }) print(res) assert 'aws' in res assert 'recommended_instance_type' in res['aws'] assert res['aws']['recommended_instance_type'] == 'c5.xlarge'
def test_benchmark_atacseq(self): print("testing atacseq") input_sizes = { 'atac.fastqs': [2000000000, 3000000000], 'atac.bowtie2_idx_tar': 5000000000 } res = B.benchmark( 'encode-atacseq', { 'input_size_in_bytes': input_sizes, 'parameters': { 'atac.bowtie2.cpu': 4 } }) assert 'aws' in res assert 'recommended_instance_type' in res['aws'] assert res['aws']['recommended_instance_type'] == 't3.2xlarge' assert res['min_CPU'] == 6 assert int(res['total_size_in_GB']) == 55
def test_benchmark13(self): input_json = { 'input_size_in_bytes': { 'input_pairs': [1000000000, 2000000000, 3000000000] }, 'parameters': { 'nthreads': 8, 'maxmem': '32g' } } res = B.benchmark('hi-c-processing-pairs', input_json) print('hi-c-processing-pairs') print("benchmark13") print(res) assert 'aws' in res assert res['min_CPU'] == 8 assert 'recommended_instance_type' in res['aws'] assert res['aws']['recommended_instance_type'] == 'r5a.2xlarge'
def test_benchmark_chipseq_postaln2(self): print("testing chipseq") input_sizes = { 'chip.tas': [MB2B(115.37), MB2B(115.37)], 'chip.ctl_tas': [MB2B(220.56), MB2B(220.56)], 'chip.bam2ta_no_filt_R1.ta': [MB2B(140.59), MB2B(140.59)] } res = B.benchmark( 'encode-chipseq-postaln', { 'input_size_in_bytes': input_sizes, 'parameters': { 'chip.spp_cpu': 4, 'chip.pipeline_type': 'tf' } }) assert 'aws' in res assert 'recommended_instance_type' in res['aws'] assert res['aws']['recommended_instance_type'] == 'r5a.4xlarge'
def test_benchmark12(self): input_sizes = { 'input_bams': [1000000000, 2000000000], 'chromsize': 200000 } input_json = { 'input_size_in_bytes': input_sizes, 'parameters': { 'nthreads_parse_sort': 1, 'nthreads_merge': 8 } } res = B.benchmark('hi-c-processing-bam', input_json) print('hi-c-processing-bam') print("benchmark12") print(res) assert 'aws' in res assert 'recommended_instance_type' in res['aws'] assert res['aws']['recommended_instance_type'] == 't3.2xlarge' assert res['min_CPU'] == 8
def update_config(cfg, app_name, input_files, parameters): # deal with missing fields if "instance_type" not in cfg: cfg["instance_type"] = "" if "ebs_size" not in cfg: cfg["ebs_size"] = 0 if "EBS_optimized" not in cfg: cfg['EBS_optimized'] = "" if "ebs_type" not in cfg: cfg['ebs_type'] = 'gp2' if "ebs_iops" not in cfg: cfg['ebs_iops'] = '' if "shutdown_min" not in cfg: cfg['shutdown_min'] = 'now' if 'password' not in cfg: cfg['password'] = '' if 'key_name' not in cfg: cfg['key_name'] = '' # add benchmarking result if cfg['instance_type'] != '' and cfg['ebs_size'] != 0 and cfg[ 'EBS_optimized'] != '': pass else: input_size_in_bytes = dict() for argname, f in input_files.iteritems(): bucket = f['bucket_name'] if isinstance(f['object_key'], list): size = flatten( run_on_nested_arrays1(f['object_key'], get_file_size, **{'bucket': bucket})) else: size = get_file_size(f['object_key'], bucket) input_size_in_bytes.update({str(argname): size}) print({"input_size_in_bytes": input_size_in_bytes}) if not app_name: err_msg = "app_name must be provided to use Benchmarking." + \ "Without app_name, instance_type, ebs_size and EBS_optimized must be" + \ "in the config field of the execution json." raise Exception(err_msg) try: res = B.benchmark( app_name, { 'input_size_in_bytes': input_size_in_bytes, 'parameters': parameters }) except: try: res raise Exception("Benchmarking not working. : {}".format( str(res))) except: raise Exception("Benchmarking not working. : None") if res is not None: logger.info(str(res)) instance_type = res['aws']['recommended_instance_type'] ebs_size = 10 if res['total_size_in_GB'] < 10 else int( res['total_size_in_GB']) + 1 ebs_opt = res['aws']['EBS_optimized'] if cfg['instance_type'] == '': cfg['instance_type'] = instance_type if cfg['ebs_size'] == 0: cfg['ebs_size'] = ebs_size if cfg['EBS_optimized'] == '': cfg['EBS_optimized'] = ebs_opt elif cfg['instance_type'] == '': raise Exception("instance type cannot be determined nor given") elif cfg['ebs_size'] == 0: raise Exception("ebs_size cannot be determined nor given") elif cfg['EBS_optimized'] == '': raise Exception("EBS_optimized cannot be determined nor given")