Exemple #1
0
def json_(workflow,input_dict,**kwargs):
    """
    Input file is a json of the following format:
    [
        {
            'chunk': 001,
            'library': 'LIB-1216301779A',
            'sample_name': '1216301779A',
            'platform': 'ILLUMINA',
            'platform_unit': 'C0MR3ACXX.001'
            'pair': 0, #0 or 1
            'path': '/path/to/fastq'
        },
        {..}
    ]
    """
    input_json = json.load(open(input_dict,'r'))
    inputs = [ INPUT(name='fastq.gz',path=i['path'],fmt='fastq.gz',tags=i,stage_name='Load Input Fastqs') for i in input_json ]

    DAG(ignore_stage_name_collisions=True).sequence_(
         add_(inputs),
         Pipeline(),
         configure(wga_settings),
         add_run(workflow)
    )
Exemple #2
0
def bam(workflow,input_bam,input_bam_list,**kwargs):
    """
    Input file is a bam with properly annotated readgroups.

    *** Note that this workflow assumes the bam header is    ***
    *** also properly annotated with the correct readgroups! ***

    Example usage:
    $ genomekey bam -n 'Bam to VCF Workflow 1' input_bam.bam

    $ echo "dir/sample1.bam" > /tmp/bam.list
    $ echo "dir/sample2.bam" >> /tmp/bam.list
    $ genomekey bam -n 'Bam to VCF 2' -li /tmp/bam.list

    """
    # capture and pedigree_file are used in main()

    input_bams = input_bam_list.read().strip().split('\n') if input_bam_list else []
    if input_bam:
        input_bams.append(input_bam.name)

    dag = DAG(ignore_stage_name_collisions=True)
    Bam2Fastq(workflow,dag,wga_settings,input_bams)
    dag.sequence_(
         Pipeline(),
         configure(wga_settings),
         add_run(workflow)
    )
Exemple #3
0
def json_(workflow, input_dict, **kwargs):
    """
    Input file is a json of the following format:

    [
        {
            'chunk': 001,
            'library': 'LIB-1216301779A',
            'sample_name': '1216301779A',
            'platform': 'ILLUMINA',
            'platform_unit': 'C0MR3ACXX.001'
            'pair': 0, #0 or 1
            'path': '/path/to/fastq'
        },
        {..}
    ]
    """

    input_json = json.load(open(input_dict, 'r'))
    inputs = [
        INPUT(name='fastq.gz',
              path=i['path'],
              fmt='fastq.gz',
              tags=i,
              stage_name='Load Input Fastqs') for i in input_json
    ]

    DAG(ignore_stage_name_collisions=True).sequence_(add_(inputs), Pipeline(),
                                                     configure(wga_settings),
                                                     add_run(workflow))
Exemple #4
0
def bam(workflow, input_bam, input_bam_list, **kwargs):
    """
    Input file is a bam with properly annotated readgroups.

    *** Note that this workflow assumes the bam header is    ***
    *** also properly annotated with the correct readgroups! ***

    Example usage:
    $ genomekey bam -n 'Bam to VCF Workflow 1' input_bam.bam

    $ echo "dir/sample1.bam" > /tmp/bam.list
    $ echo "dir/sample2.bam" >> /tmp/bam.list
    $ genomekey bam -n 'Bam to VCF 2' -li /tmp/bam.list

    """
    # capture and pedigree_file are used in main()

    input_bams = input_bam_list.read().strip().split(
        '\n') if input_bam_list else []
    if input_bam:
        input_bams.append(input_bam.name)

    dag = DAG(ignore_stage_name_collisions=True)
    Bam2Fastq(workflow, dag, wga_settings, input_bams)
    dag.sequence_(Pipeline(), configure(wga_settings), add_run(workflow))
Exemple #5
0
def json_somatic(workflow,input_dict,**kwargs):
    """
    Input file is a json of the following format:

    [
        {
	    "chunk": "001",
            "library": "LIB-1216301779A",
            "platform": "ILLUMINA",
            "platform_unit": "C0MR3ACXX.001",
	    "rgid": "BC18-06-2013",
	    "sample_name": "BC18-06-2013LyT_S5_L001",
	    "pair": "1",
	    "path": "/path/to/fastq.gz",
	    "sample_type": "normal or tumor"
        },
        {..}
    ]
    """
    
    input_json = json.load(open(input_dict,'r'))
    inputs = [ INPUT(name='fastq.gz',path=i['path'],fmt='fastq.gz',tags=i,stage_name='Load Input Fastqs') for i in input_json ]

    DAG(ignore_stage_name_collisions=True).sequence_(
         add_(inputs),
         Pipeline_Somatic(),
         configure(wga_settings),
         add_run(workflow)
    )
Exemple #6
0
def json_somatic(workflow, input_dict, **kwargs):
    """
    Input file is a json of the following format:

    [
        {
	    "chunk": "001",
            "library": "LIB-1216301779A",
            "platform": "ILLUMINA",
            "platform_unit": "C0MR3ACXX.001",
	    "rgid": "BC18-06-2013",
	    "sample_name": "BC18-06-2013LyT_S5_L001",
	    "pair": "1",
	    "path": "/path/to/fastq.gz",
	    "sample_type": "normal or tumor"
        },
        {..}
    ]
    """

    input_json = json.load(open(input_dict, 'r'))
    inputs = [
        INPUT(name='fastq.gz',
              path=i['path'],
              fmt='fastq.gz',
              tags=i,
              stage_name='Load Input Fastqs') for i in input_json
    ]

    DAG(ignore_stage_name_collisions=True).sequence_(add_(inputs),
                                                     Pipeline_Somatic(),
                                                     configure(wga_settings),
                                                     add_run(workflow))
Exemple #7
0
def json_local(workflow,input_dict,**kwargs):
    """
    Input is a folder where each file is a json of the following format:

    [
        {
            'library': 'LIB-1216301779A',
            'sample_name': '1216301779A',
            'platform': 'ILLUMINA',
            'platform_unit': 'C0MR3ACXX.001'
            'pair':1
            'path': '/path/to/fastq'
        },
        {
            'library': 'LIB-1216301779A',
            'sample_name': '1216301779A',
            'platform': 'ILLUMINA',
            'platform_unit': 'C0MR3ACXX.001'
            'pair':2
            'path': '/path/to/fastq'..}
    ]
    """
    dirList=os.listdir(input_dict)
    for files in dirList:
        print input_dict+files
        input_json = json.load(open(input_dict+files,'r'))
        inputs = [ INPUT(name='fastq.gz',path=i['path'],fmt='fastq.gz',tags=i,stage_name='Load Input Fastqs') for i in input_json ]
        for i in inputs:
            print i
        DAG(ignore_stage_name_collisions=True).sequence_(
            add_(inputs),
            Pipeline_local(),
            configure(wga_settings),
            add_run(workflow)
        )
Exemple #8
0
def downdbs(workflow,**kwargs):
    """
    Download all annotation databases
    """
    DAG().sequence_(
        add_([ annovarext.DownDB(tags={'build':'hg19','dbname':db}) for db in annovarext.get_db_names() ]),
        configure(wga_settings),
        add_run(workflow)
    )
Exemple #9
0
def downdbs(workflow, **kwargs):
    """
    Download all annotation databases
    """
    DAG().sequence_(
        add_([
            annovarext.DownDB(tags={
                'build': 'hg19',
                'dbname': db
            }) for db in annovarext.get_db_names()
        ]), configure(wga_settings), add_run(workflow))
Exemple #10
0
def fastq_(workflow,input_dict,output_dict,output_json,**kwargs):
    
    json_fastq_to_split=json_creator.json_out(input_dict,output_dict)
    input_json = json.load(open(json_fastq_to_split,'r'))
    inputs = [ INPUT(name='fastq.gz',path=i['gz_path'],fmt='fastq.gz',tags=i,stage_name='Load Input Fastqs') for i in input_json ]
        
    DAG(ignore_stage_name_collisions=True).sequence_(
         add_(inputs),
         Pipeline_split(),
         configure(wga_settings),
         add_run(workflow)
    )
Exemple #11
0
def upload_(workflow,bucket,project,out_dict,**kwargs):        
    project_folder=join(out_dict,project.replace(" ", "_"))
    if not os.path.exists(project_folder):
            os.makedirs(project_folder)
    json_fastq_to_upload=s3_Bucket.getList(bucket,project,out_dict)
    input_json = json.load(open(json_fastq_to_upload,'r'))
    inputs = [ INPUT(name='fastq.gz',path=i['gz_path'],fmt='fastq.gz',tags=i,stage_name='Load Input Fastqs') for i in input_json ]
        
    DAG(ignore_stage_name_collisions=True).sequence_(
         add_(inputs),
         Pipeline_upload(),
         configure(wga_settings),
         add_run(workflow)
    )
Exemple #12
0
def fastq_(workflow, input_dict, output_dict, output_json, **kwargs):

    json_fastq_to_split = json_creator.json_out(input_dict, output_dict)
    input_json = json.load(open(json_fastq_to_split, 'r'))
    inputs = [
        INPUT(name='fastq.gz',
              path=i['gz_path'],
              fmt='fastq.gz',
              tags=i,
              stage_name='Load Input Fastqs') for i in input_json
    ]

    DAG(ignore_stage_name_collisions=True).sequence_(add_(inputs),
                                                     Pipeline_split(),
                                                     configure(wga_settings),
                                                     add_run(workflow))
Exemple #13
0
def anno(workflow, input_file, input_file_list, file_format='vcf', **kwargs):
    """
    Annotates all files in input_Files

    $ genomekey anno -n 'My Annotation Workflow #1' file1.vcf file2.vcf
    """
    input_files = input_file_list.read().strip().split(
        '\n') if input_file_list else []
    if input_file:
        input_files.append(input_file.name)
    print('annotating {0}'.format(', '.join(input_files)), file=sys.stderr)

    DAG().sequence_(
        add_([
            INPUT(input_file, tags={'vcf': i})
            for i, input_file in enumerate(input_files)
        ]), massive_annotation, configure(wga_settings), add_run(workflow))
Exemple #14
0
def anno(workflow,input_file,input_file_list,file_format='vcf',**kwargs):
    """
    Annotates all files in input_Files

    $ genomekey anno -n 'My Annotation Workflow #1' file1.vcf file2.vcf
    """
    input_files = input_file_list.read().strip().split('\n') if input_file_list else []
    if input_file:
        input_files.append(input_file.name)
    print >> sys.stderr, 'annotating {0}'.format(', '.join(input_files))

    DAG().sequence_(
        add_([ INPUT(input_file,tags={'vcf':i}) for i,input_file in enumerate(input_files) ]),
        massive_annotation,
        configure(wga_settings),
        add_run(workflow)
    )
Exemple #15
0
def upload_(workflow, bucket, project, out_dict, **kwargs):
    project_folder = join(out_dict, project.replace(" ", "_"))
    if not os.path.exists(project_folder):
        os.makedirs(project_folder)
    json_fastq_to_upload = s3_Bucket.getList(bucket, project, out_dict)
    input_json = json.load(open(json_fastq_to_upload, 'r'))
    inputs = [
        INPUT(name='fastq.gz',
              path=i['gz_path'],
              fmt='fastq.gz',
              tags=i,
              stage_name='Load Input Fastqs') for i in input_json
    ]

    DAG(ignore_stage_name_collisions=True).sequence_(add_(inputs),
                                                     Pipeline_upload(),
                                                     configure(wga_settings),
                                                     add_run(workflow))
Exemple #16
0
def Bam2Fastq(workflow, dag, settings, input_bams):
    if len(input_bams) == 0:
        raise WorkflowException, 'At least 1 BAM input required'
    dag.sequence_(
        sequence_(
            *[
                sequence_(
                    add_([ INPUT(input_bam, tags={'input':os.path.basename(input_bam)})],stage_name="Load Input Bams"),
                    split_([('rgid',_inputbam2rgids(input_bam))],pipes.FilterBamByRG_To_FastQ)
                )
                for input_bam in input_bams
            ],
            combine=True
        ),
        split_([('pair',[1,2])],genomekey_scripts.SplitFastq),
        configure(settings),
        add_run(workflow,finish=False),
    ).add_(list(_splitfastq2inputs(dag)))
    return dag
Exemple #17
0
def Bam2Fastq(workflow, dag, settings, input_bams):
    if len(input_bams) == 0:
        raise WorkflowException, 'At least 1 BAM input required'
    dag.sequence_(
        sequence_(*[
            sequence_(
                add_([
                    INPUT(input_bam,
                          tags={'input': os.path.basename(input_bam)})
                ],
                     stage_name="Load Input Bams"),
                split_([('rgid', _inputbam2rgids(input_bam))],
                       pipes.FilterBamByRG_To_FastQ))
            for input_bam in input_bams
        ],
                  combine=True),
        split_([('pair', [1, 2])], genomekey_scripts.SplitFastq),
        configure(settings),
        add_run(workflow, finish=False),
    ).add_(list(_splitfastq2inputs(dag)))
    return dag
Exemple #18
0
def json_local(workflow, input_dict, **kwargs):
    """
    Input is a folder where each file is a json of the following format:

    [
        {
            'library': 'LIB-1216301779A',
            'sample_name': '1216301779A',
            'platform': 'ILLUMINA',
            'platform_unit': 'C0MR3ACXX.001'
            'pair':1
            'path': '/path/to/fastq'
        },
        {
            'library': 'LIB-1216301779A',
            'sample_name': '1216301779A',
            'platform': 'ILLUMINA',
            'platform_unit': 'C0MR3ACXX.001'
            'pair':2
            'path': '/path/to/fastq'..}
    ]
    """
    dirList = os.listdir(input_dict)
    for files in dirList:
        print(input_dict + files)
        input_json = json.load(open(input_dict + files, 'r'))
        inputs = [
            INPUT(name='fastq.gz',
                  path=i['path'],
                  fmt='fastq.gz',
                  tags=i,
                  stage_name='Load Input Fastqs') for i in input_json
        ]
        for i in inputs:
            print(i)
        DAG(ignore_stage_name_collisions=True).sequence_(
            add_(inputs), Pipeline_local(), configure(wga_settings),
            add_run(workflow))