def bam(workflow, input_bam, input_bam_list, **kwargs): """ Input file is a bam with properly annotated readgroups. *** Note that this workflow assumes the bam header is *** *** also properly annotated with the correct readgroups! *** Example usage: $ genomekey bam -n 'Bam to VCF Workflow 1' input_bam.bam $ echo "dir/sample1.bam" > /tmp/bam.list $ echo "dir/sample2.bam" >> /tmp/bam.list $ genomekey bam -n 'Bam to VCF 2' -li /tmp/bam.list """ # capture and pedigree_file are used in main() input_bams = input_bam_list.read().strip().split( '\n') if input_bam_list else [] if input_bam: input_bams.append(input_bam.name) dag = DAG(ignore_stage_name_collisions=True) Bam2Fastq(workflow, dag, wga_settings, input_bams) dag.sequence_(Pipeline(), configure(wga_settings), add_run(workflow))
def bam(workflow,input_bam,input_bam_list,**kwargs): """ Input file is a bam with properly annotated readgroups. *** Note that this workflow assumes the bam header is *** *** also properly annotated with the correct readgroups! *** Example usage: $ genomekey bam -n 'Bam to VCF Workflow 1' input_bam.bam $ echo "dir/sample1.bam" > /tmp/bam.list $ echo "dir/sample2.bam" >> /tmp/bam.list $ genomekey bam -n 'Bam to VCF 2' -li /tmp/bam.list """ # capture and pedigree_file are used in main() input_bams = input_bam_list.read().strip().split('\n') if input_bam_list else [] if input_bam: input_bams.append(input_bam.name) dag = DAG(ignore_stage_name_collisions=True) Bam2Fastq(workflow,dag,wga_settings,input_bams) dag.sequence_( Pipeline(), configure(wga_settings), add_run(workflow) )
def json_(workflow, input_dict, **kwargs): """ Input file is a json of the following format: [ { 'chunk': 001, 'library': 'LIB-1216301779A', 'sample_name': '1216301779A', 'platform': 'ILLUMINA', 'platform_unit': 'C0MR3ACXX.001' 'pair': 0, #0 or 1 'path': '/path/to/fastq' }, {..} ] """ input_json = json.load(open(input_dict, 'r')) inputs = [ INPUT(name='fastq.gz', path=i['path'], fmt='fastq.gz', tags=i, stage_name='Load Input Fastqs') for i in input_json ] DAG(ignore_stage_name_collisions=True).sequence_(add_(inputs), Pipeline(), configure(wga_settings), add_run(workflow))
def json_somatic(workflow, input_dict, **kwargs): """ Input file is a json of the following format: [ { "chunk": "001", "library": "LIB-1216301779A", "platform": "ILLUMINA", "platform_unit": "C0MR3ACXX.001", "rgid": "BC18-06-2013", "sample_name": "BC18-06-2013LyT_S5_L001", "pair": "1", "path": "/path/to/fastq.gz", "sample_type": "normal or tumor" }, {..} ] """ input_json = json.load(open(input_dict, 'r')) inputs = [ INPUT(name='fastq.gz', path=i['path'], fmt='fastq.gz', tags=i, stage_name='Load Input Fastqs') for i in input_json ] DAG(ignore_stage_name_collisions=True).sequence_(add_(inputs), Pipeline_Somatic(), configure(wga_settings), add_run(workflow))
def downdbs(workflow, **kwargs): """ Download all annotation databases """ DAG().sequence_( add_([ annovarext.DownDB(tags={ 'build': 'hg19', 'dbname': db }) for db in annovarext.get_db_names() ]), configure(wga_settings), add_run(workflow))
def gunzip(workflow, input_dir, **kwargs): """ Gunzips all gz files in directory $ genomekey gunzip -n 'Gunzip' /path/to/dir """ DAG().sequence_( add_([ INPUT(f, tags={'i': i}) for i, f in enumerate(glob.glob(os.path.join(input_dir, '*.gz'))) ]), map_(unix.Gunzip), add_run(workflow))
def fastq_(workflow, input_dict, output_dict, output_json, **kwargs): json_fastq_to_split = json_creator.json_out(input_dict, output_dict) input_json = json.load(open(json_fastq_to_split, 'r')) inputs = [ INPUT(name='fastq.gz', path=i['gz_path'], fmt='fastq.gz', tags=i, stage_name='Load Input Fastqs') for i in input_json ] DAG(ignore_stage_name_collisions=True).sequence_(add_(inputs), Pipeline_split(), configure(wga_settings), add_run(workflow))
def anno(workflow, input_file, input_file_list, file_format='vcf', **kwargs): """ Annotates all files in input_Files $ genomekey anno -n 'My Annotation Workflow #1' file1.vcf file2.vcf """ input_files = input_file_list.read().strip().split( '\n') if input_file_list else [] if input_file: input_files.append(input_file.name) print('annotating {0}'.format(', '.join(input_files)), file=sys.stderr) DAG().sequence_( add_([ INPUT(input_file, tags={'vcf': i}) for i, input_file in enumerate(input_files) ]), massive_annotation, configure(wga_settings), add_run(workflow))
def upload_(workflow, bucket, project, out_dict, **kwargs): project_folder = join(out_dict, project.replace(" ", "_")) if not os.path.exists(project_folder): os.makedirs(project_folder) json_fastq_to_upload = s3_Bucket.getList(bucket, project, out_dict) input_json = json.load(open(json_fastq_to_upload, 'r')) inputs = [ INPUT(name='fastq.gz', path=i['gz_path'], fmt='fastq.gz', tags=i, stage_name='Load Input Fastqs') for i in input_json ] DAG(ignore_stage_name_collisions=True).sequence_(add_(inputs), Pipeline_upload(), configure(wga_settings), add_run(workflow))
def json_local(workflow, input_dict, **kwargs): """ Input is a folder where each file is a json of the following format: [ { 'library': 'LIB-1216301779A', 'sample_name': '1216301779A', 'platform': 'ILLUMINA', 'platform_unit': 'C0MR3ACXX.001' 'pair':1 'path': '/path/to/fastq' }, { 'library': 'LIB-1216301779A', 'sample_name': '1216301779A', 'platform': 'ILLUMINA', 'platform_unit': 'C0MR3ACXX.001' 'pair':2 'path': '/path/to/fastq'..} ] """ dirList = os.listdir(input_dict) for files in dirList: print(input_dict + files) input_json = json.load(open(input_dict + files, 'r')) inputs = [ INPUT(name='fastq.gz', path=i['path'], fmt='fastq.gz', tags=i, stage_name='Load Input Fastqs') for i in input_json ] for i in inputs: print(i) DAG(ignore_stage_name_collisions=True).sequence_( add_(inputs), Pipeline_local(), configure(wga_settings), add_run(workflow))
from cosmos.Workflow.models import Workflow from cosmos.lib.ezflow.dag import DAG, add_,split_ from tools import ECHO, CAT #################### # Workflow #################### dag = DAG().sequence_( add_([ ECHO(tags={'word':'hello'}), ECHO(tags={'word':'world'}) ]), split_([('i',[1,2])],CAT) ) dag.create_dag_img('/tmp/ex.svg') ################# # Run Workflow ################# WF = Workflow.start('Example 1',restart=True) dag.add_to_workflow(WF) WF.run()
from cosmos.Workflow.models import Workflow from cosmos.lib.ezflow.dag import DAG, split_,add_,map_,reduce_ from tools import ECHO, CAT, WC, PASTE, Sleep #################### # Workflow #################### dag = DAG().sequence_( add_([ ECHO(tags={'word':'hello'}), ECHO(tags={'word':'world'}) ]), map_(Sleep), split_([('i',[1,2])], CAT), reduce_([], PASTE), map_(WC), ) dag.create_dag_img('/tmp/ex.svg') ################# # Run Workflow ################# WF = Workflow.start('Example 3',restart=True,delete_intermediates=True) dag.add_to_workflow(WF) WF.run()
from cosmos.lib.ezflow.dag import DAG, Split, Add, Map, Reduce from tools import ECHO, MD5Sum from cosmos.Workflow.cli import CLI cli = CLI() WF = cli.parse_args() # parses command line arguments #################### # Workflow #################### dag = ( DAG() |Add| [ ECHO(tags={'word':'hello'}), ECHO(tags={'word':'world'}) ] |Reduce| ([],MD5Sum) ) dag.create_dag_img('/tmp/ex.svg') ################# # Run Workflow ################# dag.add_to_workflow(WF) WF.run()
This workflow demonstrates branching for when you need something more complicated than a linear step-by-step series of stages. cosmos.lib.ezflow.dag.DAG.branch() is the key to branching. """ from cosmos.Workflow.models import Workflow from cosmos.lib.ezflow.dag import DAG import tools #################### # Workflow #################### dag = (DAG().add([ tools.ECHO(tags={'word': 'hello'}), tools.ECHO(tags={'word': 'world'}) ]).split([('i', [1, 2])], tools.CAT).map(tools.WC).branch('ECHO').map( tools.WC, 'Extra Independent Word Count')) # Generate image dag.create_dag_img('/tmp/ex_branch.svg') ################# # Run Workflow ################# WF = Workflow.start('Example Branch', restart=True) dag.add_to_workflow(WF) WF.run()
HOST = "smtp.server" text = '{0} has failed at stage {1}'.format(stage.workflow, stage) BODY = "\r\n".join( "From: %s" % FROM, "To: %s" % TO, "Subject: %s" % SUBJECT, "", text ) server = smtplib.SMTP(HOST) server.sendmail(FROM, [TO], BODY) server.quit() #################### # Workflow #################### from cosmos.lib.ezflow.dag import DAG, Map, Split, Add import tools dag = ( DAG(). add_([tools.ECHO(tags={'word': 'hello'}), tools.ECHO(tags={'word': 'world'})]). map_(tools.FAIL) # Automatically fail ) ################# # Run Workflow ################# dag.add_to_workflow(WF) WF.run()
from cosmos.Workflow.models import Workflow from cosmos.lib.ezflow.dag import DAG, add_, split_ from tools import ECHO, CAT #################### # Workflow #################### dag = DAG().sequence_( add_([ECHO(tags={'word': 'hello'}), ECHO(tags={'word': 'world'})]), split_([('i', [1, 2])], CAT)) dag.create_dag_img('/tmp/ex.svg') ################# # Run Workflow ################# WF = Workflow.start('Example 1', restart=True) dag.add_to_workflow(WF) WF.run()