def testJSON(self): default_json_dict_output = { u'helloHaplotypeCaller.haplotypeCaller.RefIndex': u'"src/toil/test/wdl/GATK_data/ref/human_g1k_b37_20.fasta.fai"', u'helloHaplotypeCaller.haplotypeCaller.sampleName': u'"WDL_tut1_output"', u'helloHaplotypeCaller.haplotypeCaller.inputBAM': u'"src/toil/test/wdl/GATK_data/inputs/NA12878_wgs_20.bam"', u'helloHaplotypeCaller.haplotypeCaller.bamIndex': u'"src/toil/test/wdl/GATK_data/inputs/NA12878_wgs_20.bai"', u'helloHaplotypeCaller.haplotypeCaller.GATK': u'"src/toil/test/wdl/GATK_data/GenomeAnalysisTK.jar"', u'helloHaplotypeCaller.haplotypeCaller.RefDict': u'"src/toil/test/wdl/GATK_data/ref/human_g1k_b37_20.dict"', u'helloHaplotypeCaller.haplotypeCaller.RefFasta': u'"src/toil/test/wdl/GATK_data/ref/human_g1k_b37_20.fasta"' } t = AnalyzeWDL( "src/toil/test/wdl/wdl_templates/t01/helloHaplotypeCaller.wdl", "src/toil/test/wdl/wdl_templates/t01/helloHaplotypeCaller_inputs.json", self.output_dir) json_dict = t.dict_from_JSON( "src/toil/test/wdl/wdl_templates/t01/helloHaplotypeCaller_inputs.json" ) assert json_dict == default_json_dict_output, ( str(json_dict) + '\nAssertionError: ' + str(default_json_dict_output))
def testJSON(self): default_json_dict_output = { u'RefIndex': u'src/toil/test/wdl/GATK_data/ref/human_g1k_b37_20.fasta.fai', u'sampleName': u'WDL_tut1_output', u'inputBAM': u'src/toil/test/wdl/GATK_data/inputs/NA12878_wgs_20.bam', u'bamIndex': u'src/toil/test/wdl/GATK_data/inputs/NA12878_wgs_20.bai', u'GATK': u'src/toil/test/wdl/GATK_data/GenomeAnalysisTK.jar', u'RefDict': u'src/toil/test/wdl/GATK_data/ref/human_g1k_b37_20.dict', u'RefFasta': u'src/toil/test/wdl/GATK_data/ref/human_g1k_b37_20.fasta' } t = AnalyzeWDL( "src/toil/test/wdl/wdl_templates/t01/helloHaplotypeCaller.wdl", "src/toil/test/wdl/wdl_templates/t01/helloHaplotypeCaller_inputs.json", self.output_dir) json_dict = t.dict_from_JSON( "src/toil/test/wdl/wdl_templates/t01/helloHaplotypeCaller_inputs.json" ) assert json_dict == default_json_dict_output
def testTSV(self): default_tsv_output = [['1', '2', '3'], ['4', '5', '6'], ['7', '8', '9']] t = AnalyzeWDL( os.path.abspath( "src/toil/test/wdl/wdl_templates/t01/helloHaplotypeCaller.wdl" ), os.path.abspath( "src/toil/test/wdl/wdl_templates/t01/helloHaplotypeCaller_inputs.json" ), self.output_dir) tsv_array = t.create_tsv_array('src/toil/test/wdl/test.tsv') assert tsv_array == default_tsv_output
def testPrimitives(self): '''Test if toilwdl correctly interprets some basic declarations.''' wdl = os.path.abspath('src/toil/test/wdl/testfiles/vocab.wdl') json = os.path.abspath('src/toil/test/wdl/testfiles/vocab.json') aWDL = AnalyzeWDL(wdl, json, self.output_dir) with open(wdl, 'r') as wdl: wdl_string = wdl.read() ast = wdl_parser.parse(wdl_string).ast() aWDL.create_tasks_dict(ast) aWDL.create_workflows_dict(ast) no_declaration = ['bool1', 'int1', 'float1', 'file1', 'string1'] collection_counter = [] for name, declaration in iteritems( aWDL.workflows_dictionary['vocabulary']['wf_declarations']): if name in no_declaration: collection_counter.append(name) assert not declaration['value'] if name == 'bool2': collection_counter.append(name) assert declaration['value'] == 'True', declaration['value'] assert declaration['type'] == 'Boolean', declaration['type'] if name == 'int2': collection_counter.append(name) assert declaration['value'] == '1', declaration['value'] assert declaration['type'] == 'Int', declaration['type'] if name == 'float2': collection_counter.append(name) assert declaration['value'] == '1.1', declaration['value'] assert declaration['type'] == 'Float', declaration['type'] if name == 'file2': collection_counter.append(name) assert declaration[ 'value'] == "'src/toil/test/wdl/test.tsv'", declaration[ 'value'] assert declaration['type'] == 'File', declaration['type'] if name == 'string2': collection_counter.append(name) assert declaration['value'] == "'x'", declaration['value'] assert declaration['type'] == 'String', declaration['type'] assert collection_counter == [ 'bool1', 'int1', 'float1', 'file1', 'string1', 'bool2', 'int2', 'float2', 'file2', 'string2' ]
def main(): ''' A program to run WDL input files using native Toil scripts. Calls two files, described below, wdl_analysis.py and wdl_synthesis.py: wdl_analysis reads the wdl, json, and extraneous files and restructures them into 2 intermediate data structures before writing (python dictionaries): "wf_dictionary": containing the parsed workflow information. "tasks_dictionary": containing the parsed task information. wdl_synthesis takes the "wf_dictionary" and "tasks_dictionary" and uses them to write a native python script for use with Toil. Requires a WDL file, and a JSON file. The WDL file contains ordered commands, and the JSON file contains input values for those commands. To run in Toil, these two files must be parsed, restructured into python dictionaries, and then compiled into a Toil formatted python script. This compiled Toil script is deleted after running unless the user specifies: "--dont_delete_compiled" as an option. The WDL parser was auto-generated from the Broad's current WDL grammar file: https://github.com/openwdl/wdl/blob/master/parsers/grammar.hgr using Scott Frazer's Hermes: https://github.com/scottfrazer/hermes Thank you Scott Frazer! Currently in alpha testing, and known to work with the Broad's GATK tutorial set for WDL on their main wdl site: software.broadinstitute.org/wdl/documentation/topic?name=wdl-tutorials And ENCODE's WDL workflow: github.com/ENCODE-DCC/pipeline-container/blob/master/local-workflows/encode_mapping_workflow.wdl Additional support to be broadened to include more features soon. ''' parser = argparse.ArgumentParser(description='Runs WDL files with toil.') parser.add_argument('wdl_file', help='A WDL workflow file.') parser.add_argument('secondary_file', help='A secondary data file (json).') parser.add_argument("--jobStore", type=str, required=False, default=None) parser.add_argument('-o', '--outdir', required=False, default=os.getcwd(), help='Optionally specify the directory that outputs ' 'are written to. Default is the current working dir.') parser.add_argument('--dev_mode', required=False, default=False, help='1. Creates "AST.out", which holds the printed AST and ' '"mappings.out", which holds the parsed task, workflow ' 'dictionaries that were generated. ' '2. Saves the compiled toil script generated from the ' 'wdl/json files from deletion. ' '3. Skips autorunning the compiled python file.') parser.add_argument('--docker_user', required=False, default='root', help='The user permissions that the docker containers will be run ' 'with (and the permissions set on any output files produced). ' 'Default is "root". Setting this to None will set this to ' 'the current user.') parser.add_argument("--destBucket", type=str, required=False, default=False, help="Specify a cloud bucket endpoint for output files.") # wdl_run_args is an array containing all of the unknown arguments not # specified by the parser in this main. All of these will be passed down in # check_call later to run the compiled toil file. args, wdl_run_args = parser.parse_known_args() wdl_file_path = os.path.abspath(args.wdl_file) args.secondary_file = os.path.abspath(args.secondary_file) args.outdir = os.path.abspath(args.outdir) if args.dev_mode: write_AST(wdl_file_path, args.outdir) aWDL = AnalyzeWDL(wdl_file_path, args.secondary_file, args.outdir) # read secondary file; create dictionary to hold variables if args.secondary_file.endswith('.json'): aWDL.dict_from_JSON(args.secondary_file) elif args.secondary_file.endswith('.yml') or args.secondary_file.endswith('.yaml'): aWDL.dict_from_YML(args.secondary_file) # json only atm else: raise RuntimeError('Unsupported Secondary File Type. Use json.') # parse the wdl AST into 2 dictionaries with open(wdl_file_path, 'r') as wdl: wdl_string = wdl.read() ast = wdl_parser.parse(wdl_string).ast() aWDL.create_tasks_dict(ast) aWDL.create_workflows_dict(ast) sWDL = SynthesizeWDL(aWDL.tasks_dictionary, aWDL.workflows_dictionary, args.outdir, aWDL.json_dict, args.docker_user, args.jobStore, args.destBucket) # use the AST dictionaries to write 4 strings # these are the future 4 sections of the compiled toil python file module_section = sWDL.write_modules() fn_section = sWDL.write_functions() main_section = sWDL.write_main() # write 3 strings to a python output file sWDL.write_python_file(module_section, fn_section, main_section, sWDL.output_file) if args.dev_mode: wdllogger.debug('WDL file compiled to toil script.') sWDL.write_mappings(aWDL) else: wdllogger.debug('WDL file compiled to toil script. Running now.') exe = sys.executable if sys.executable else 'python' cmd = [exe, sWDL.output_file] cmd.extend(wdl_run_args) subprocess.check_call(cmd) os.remove(sWDL.output_file)