def alignment_workflow(args): config = inpututils.load_config(args) config = config['alignment'] lib = args["library_id"] alignment_dir = args["out_dir"] bams_dir = args["bams_dir"] trim = args['trim'] center = args['sequencing_center'] sampleinfo = inpututils.get_sample_info(args['input_yaml']) cellids = inpututils.get_samples(args['input_yaml']) fastq1_files, fastq2_files = inpututils.get_fastqs(args['input_yaml']) alignment_files = get_output_files(alignment_dir, lib) alignment_meta = os.path.join(alignment_dir, 'metadata.yaml') bam_files_template = os.path.join(bams_dir, '{cell_id}.bam') mt_bam_files_template = os.path.join(bams_dir, '{cell_id}_MT.bam') bams_meta = os.path.join(bams_dir, 'metadata.yaml') lanes = sorted(set([v[1] for v in fastq1_files.keys()])) cells = sorted(set([v[0] for v in fastq1_files.keys()])) input_yaml_blob = os.path.join(alignment_dir, 'input.yaml') workflow = pypeliner.workflow.Workflow() workflow.setobj( obj=mgd.OutputChunks('cell_id', 'lane'), value=list(fastq1_files.keys()), ) workflow.subworkflow( name='alignment_workflow', func=align.create_alignment_workflow, args=( mgd.InputFile('fastq_1', 'cell_id', 'lane', fnames=fastq1_files, axes_origin=[]), mgd.InputFile('fastq_2', 'cell_id', 'lane', fnames=fastq2_files, axes_origin=[]), mgd.OutputFile('bam_markdups', 'cell_id', template=bam_files_template, axes_origin=[], extensions=['.bai']), mgd.OutputFile('mt_bam_markdups', 'cell_id', template=mt_bam_files_template, axes_origin=[], extensions=['.bai']), mgd.OutputFile(alignment_files['alignment_metrics_csv']), mgd.OutputFile(alignment_files['gc_metrics_csv']), mgd.OutputFile(alignment_files['fastqc_metrics_csv']), mgd.OutputFile(alignment_files['plot_metrics_output']), config['ref_genome'], config, sampleinfo, cellids, mgd.OutputFile(alignment_files['alignment_metrics_tar']), lib, trim, center, ), ) workflow.transform( name='generate_meta_files_results', func='single_cell.utils.helpers.generate_and_upload_metadata', args=(sys.argv[0:], alignment_dir, list(alignment_files.values()), mgd.OutputFile(alignment_meta)), kwargs={ 'input_yaml_data': inpututils.load_yaml(args['input_yaml']), 'input_yaml': mgd.OutputFile(input_yaml_blob), 'metadata': { 'library_id': lib, 'cell_ids': cells, 'lane_ids': lanes, 'type': 'alignment' } }) workflow.transform( name='generate_meta_files_bams', func='single_cell.utils.helpers.generate_and_upload_metadata', args=(sys.argv[0:], bams_dir, mgd.Template('aligned.bam', 'cell_id', template=bam_files_template), mgd.OutputFile(bams_meta)), kwargs={ 'metadata': { 'library_id': lib, 'cell_ids': cells, 'lane_ids': lanes, 'type': 'cellbams' }, 'template': (mgd.InputChunks('cell_id'), bam_files_template, 'cell_id'), }) return workflow
def hmmcopy_workflow(args): config = inpututils.load_config(args) config = config['hmmcopy'] sampleinfo = inpututils.get_sample_info(args['input_yaml']) cellids = inpututils.get_samples(args['input_yaml']) bam_files = inpututils.get_bams(args['input_yaml']) lib = args["library_id"] workflow = pypeliner.workflow.Workflow( ctx={'docker_image': config['docker']['single_cell_pipeline']}, ) hmmcopy_dir = args["out_dir"] hmmcopy_files = get_output_files(hmmcopy_dir, lib) hmmcopy_meta = os.path.join(hmmcopy_dir, 'metadata.yaml') input_yaml_blob = os.path.join(hmmcopy_dir, 'input.yaml') workflow.setobj( obj=mgd.OutputChunks('cell_id'), value=list(bam_files.keys()), ) workflow.subworkflow( name='hmmcopy_workflow', func=hmmcopy.create_hmmcopy_workflow, args=(mgd.InputFile('bam_markdups', 'cell_id', fnames=bam_files, extensions=['.bai']), mgd.OutputFile(hmmcopy_files['reads_csvs']), mgd.OutputFile(hmmcopy_files['segs_csvs']), mgd.OutputFile(hmmcopy_files['metrics_csvs']), mgd.OutputFile(hmmcopy_files['params_csvs']), mgd.OutputFile(hmmcopy_files['igv_csvs']), mgd.OutputFile(hmmcopy_files['segs_pdf']), mgd.OutputFile(hmmcopy_files['bias_pdf']), mgd.OutputFile(hmmcopy_files['heatmap_pdf']), mgd.OutputFile(hmmcopy_files['metrics_pdf']), mgd.OutputFile(hmmcopy_files['kernel_density_pdf']), mgd.OutputFile(hmmcopy_files['hmmcopy_data_tar']), cellids, config, sampleinfo), ) workflow.transform( name='generate_meta_files_results', func='single_cell.utils.helpers.generate_and_upload_metadata', args=(sys.argv[0:], hmmcopy_dir, list(hmmcopy_files.values()), mgd.OutputFile(hmmcopy_meta)), kwargs={ 'input_yaml_data': inpututils.load_yaml(args['input_yaml']), 'input_yaml': mgd.OutputFile(input_yaml_blob), 'metadata': { 'library_id': lib, 'cell_ids': list(bam_files.keys()), 'type': 'hmmcopy', } }) return workflow