def handle(self, *args, **options): tool_name = 'GeneMark-ES' tool_version = '2.3e' if self.already_exists(tool_name, tool_version): print("INFO: tool {0} {1} already exists. Skipping.".format(tool_name, tool_version) ) return True settings = configparser.ConfigParser() settings.read( os.path.join( os.path.abspath(os.path.dirname(__file__)), '../../settings.ini') ) tool_settings = settings[ "{0} {1}".format(tool_name, tool_version) ] flow_bp = FlowBlueprint( type='s' ) flow_bp.save() tool = StandaloneTool( name=tool_name, \ version=tool_version, \ primary_site='http://topaz.gatech.edu', \ flow_bp=flow_bp ) tool.save() command_bp = CommandBlueprint( name = 'Run GeneMark-ES', \ exec_path = tool_settings['gm_es_bin'] ) command_bp.save() command_bp.parents.add( flow_bp ) CommandBlueprintParam( command=command_bp, name='--max_nnn', prefix='--max_nnn ', position=1, default_value='49', \ short_desc='[number] number of unknown letters used to fill gaps').save() CommandBlueprintParam( command=command_bp, name='--min_contig', prefix='--min_contig ', position=2, default_value='20000', \ short_desc='[number] minimum length of the contig', \ long_desc='All contigs shorter then "min_contig" are excluded from training procedure.').save() CommandBlueprintParam( command=command_bp, name='--max_contig', prefix='--max_contig ', position=3, default_value='10000000', \ short_desc='[number] maximum length of contig', \ long_desc='Sequences longer then "max_contig" are split into shorter ones to avoid out of memory condition.').save() CommandBlueprintParam( command=command_bp, name='--BP', prefix='--BP ', position=4, \ short_desc='switches off the branch point submodel and runs original ES algorithm (ON/OFF)').save() CommandBlueprintParam( command=command_bp, name='--ini_mod', prefix='--ini_mod ', position=5, \ short_desc='').save() CommandBlueprintParam( command=command_bp, name='<sequence_file_name>', prefix=None, position=6, is_optional=False, \ short_desc='input sequence in FASTA format').save() tool.needs( filetype_name='FASTA (nucleotide)', via_command=command_bp, via_param='<sequence_file_name>' ) tool.can_create( filetype_name='GFF3', via_command=command_bp )
def handle(self, *args, **options): # remember that these need to match the [geneid 1.4] in settings.ini tool_name = 'geneid' tool_version = '1.4' if self.already_exists(tool_name, tool_version): print("INFO: tool {0} {1} already exists. Skipping.".format(tool_name, tool_version) ) return True settings = configparser.ConfigParser() settings.read( os.path.join( os.path.abspath(os.path.dirname(__file__)), '../../settings.ini') ) tool_settings = settings[ "{0} {1}".format(tool_name, tool_version) ] flow_bp = FlowBlueprint( type='s', name=tool_name ) flow_bp.save() tool = StandaloneTool( name=tool_name, \ version=tool_version, \ primary_site='http://www1.imim.es/software/geneid/index.html', \ flow_bp=flow_bp ) tool.save() command_bp = CommandBlueprint( name = 'Run geneid', \ exec_path = tool_settings['exec_path'] ) command_bp.save() command_bp.parents.add( flow_bp ) ########################################################### ## Now you define the parameters for the tool # There are an enormous number of options, and not all are implemented here. Even # so, it should work for most applications. # geneid -3 -P <parameter_filename> <Sequence_filename> > some.out.gff3 CommandBlueprintParam( command=command_bp, name='-3', prefix='-3 ', position=1, has_no_value=True, \ is_optional=False, short_desc='Use GFF3 format to print predictions' ).save() CommandBlueprintParam( command=command_bp, name='-P', prefix='-P ', position=2, \ is_optional=False, short_desc='Parameter file to use' ).save() ## this has to be the last argument positionally CommandBlueprintParam( command=command_bp, name='<sequence_filename>', prefix=None, position=3, \ is_optional=False, short_desc='Input query FASTA file' ).save() ########################################################### ## Now define the input/ouput of the tool tool.needs( filetype_name='FASTA (nucleotide)', via_command=command_bp, via_param='<sequence_filename>' )
def handle(self, *args, **options): tool_name = 'EVM' tool_version = 'r20120625' if self.already_exists(tool_name, tool_version): print("INFO: tool {0} {1} already exists. Skipping.".format(tool_name, tool_version) ) return True settings = configparser.ConfigParser() settings.read( os.path.join( os.path.abspath(os.path.dirname(__file__)), '../../settings.ini') ) tool_settings = settings[ "{0} {1}".format(tool_name, tool_version) ] flow_bp = FlowBlueprint( type='s' ) flow_bp.save() tool = StandaloneTool( name=tool_name, \ version=tool_version, \ primary_site='http://sourceforge.net/projects/evidencemodeler', \ flow_bp=flow_bp ) tool.save() command_bp = CommandBlueprint( name = 'Run EVM', \ exec_path = tool_settings['evm_bin'] ) command_bp.save() command_bp.parents.add( flow_bp ) CommandBlueprintParam( command=command_bp, name='--genome', prefix='--genome ', position=1, is_optional=False, \ short_desc='Genome sequence in FASTA format').save() CommandBlueprintParam( command=command_bp, name='--weights', prefix='--weights ', position=2, is_optional=False, \ short_desc='Weights for evidence types file', \ long_desc='The weights file is used to describe and score the types of each input. More info here: http://evidencemodeler.sourceforge.net/#Preparing_inputs').save() CommandBlueprintParam( command=command_bp, name='--gene_predictions', prefix='--gene_predictions ', position=3, is_optional=False, \ short_desc='Gene predictions GFF3 file', \ long_desc='All input files which correspond to gene predictions (and not protein or transcript alignments) should be concatenated into this file.').save() CommandBlueprintParam( command=command_bp, name='--protein_alignments', prefix='--protein_alignments ', position=4, is_optional=True, \ short_desc='Protein alignments in GFF3 format').save() CommandBlueprintParam( command=command_bp, name='--transcript_alignments', prefix='--transcript_alignments ', position=5, is_optional=True, \ short_desc='Transcript alignments in GFF3 format').save() tool.needs( filetype_name='FASTA (nucleotide)', via_command=command_bp, via_param='--genome' ) tool.needs( filetype_name='GFF3', via_command=command_bp, via_param='--gene_predictions' )
def handle(self, *args, **options): tool_name = 'NUCmer' tool_version = '3.23' if self.already_exists(tool_name, tool_version): print("INFO: tool {0} {1} already exists. Skipping.".format(tool_name, tool_version) ) return True settings = configparser.ConfigParser() settings.read( os.path.join( os.path.abspath(os.path.dirname(__file__)), '../../settings.ini') ) tool_settings = settings[ "{0} {1}".format('MUMmer', tool_version) ] flow_bp = FlowBlueprint( type='s' ) flow_bp.save() tool = StandaloneTool( name=tool_name, \ version=tool_version, \ primary_site='http://mummer.sourceforge.net/manual/#nucmer', \ flow_bp=flow_bp ) tool.save() command_bp = CommandBlueprint( name = 'Run NUCmer', \ exec_path = tool_settings['nucmer_bin'] ) command_bp.save() command_bp.parents.add(flow_bp) # USAGE: nucmer [options] <Reference> <Query> CommandBlueprintParam( command=command_bp, name='STDOUT', position=0 ).save(); CommandBlueprintParam( command=command_bp, name='STDERR', position=0 ).save(); CommandBlueprintParam( command=command_bp, name='--mum', prefix='--mum ', has_no_value=True, position=1, \ short_desc='Use anchor matches that are unique in both the reference and query' ).save() CommandBlueprintParam( command=command_bp, name='--mumreference', prefix='--mumreference ', has_no_value=True, position=2, \ short_desc='Use anchor matches that are unique in the reference but not necessarily unique in the query' ).save() CommandBlueprintParam( command=command_bp, name='-b', prefix='-b ', position=3, default_value='200', \ short_desc='Alignment extension distance', \ long_desc='Distance an alignment extension will attempt to extend poor scoring regions before giving up').save() CommandBlueprintParam( command=command_bp, name='-c', prefix='-c ', default_value='65', position=4, \ short_desc='Minimum length of a cluster of matches' ).save() CommandBlueprintParam( command=command_bp, name='--nodelta', prefix='--nodelta ', has_no_value=True, position=5, \ short_desc='Toggles off creation of delta file' ).save() CommandBlueprintParam( command=command_bp, name='-D', prefix='-D ', default_value='5', position=6, \ short_desc='Maximum diagonal difference between two adjacent anchors in a cluster' ).save() CommandBlueprintParam( command=command_bp, name='-d', prefix='-d ', default_value='0.12', position=7, \ short_desc='Maximum diagonal difference ratio', \ long_desc='Maximum diagonal difference between two adjacent anchors in a cluster as a differential fraction of the gap length ' ).save() CommandBlueprintParam( command=command_bp, name='--noextend', prefix='--noextend ', has_no_value=True, position=8, \ short_desc='Toggles off the cluster extension step' ).save() CommandBlueprintParam( command=command_bp, name='--forward', prefix='--forward ', has_no_value=True, position=9, \ short_desc='Use only the forward strand of the Query sequences' ).save() CommandBlueprintParam( command=command_bp, name='-g', prefix='-g ', default_value='90', position=10, \ short_desc='Maximum gap between two adjacent matches in a cluster' ).save() CommandBlueprintParam( command=command_bp, name='-l', prefix='-l ', default_value='20', position=11, \ short_desc='Minimum length of a single match' ).save() CommandBlueprintParam( command=command_bp, name='--nooptimize', prefix='--nooptimize ', has_no_value=True, position=12, \ short_desc='Toggle off alignment score optimization', \ long_desc='Toggles off alignment score optimization, i.e. if an alignment extension reaches the end of a sequence, it will backtrack to optimize the alignment score instead of terminating the alignment at the end of the sequence').save() # we make this one required just so that a tool can always look up the output file more easily CommandBlueprintParam( command=command_bp, name='-p', prefix='-p ', default_value='out', position=17, is_optional=False, \ short_desc='Sets the output file prefix, which can include the directory path' ).save() CommandBlueprintParam( command=command_bp, name='--reverse', prefix='--reverse ', has_no_value=True, position=13, \ short_desc='Use only the reverse complement of the Query sequences' ).save() CommandBlueprintParam( command=command_bp, name='--nosimplify', prefix='--nosimplify ', has_no_value=True, position=14, \ short_desc='Removes shadowed clusters', \ long_desc='Simplify alignments by removing shadowed clusters. Turn this option off if aligning a sequence to itself to look for repeats' ).save() CommandBlueprintParam( command=command_bp, name='<reference_in>', prefix=None, position=15, is_optional=False, \ short_desc='Input reference FASTA file' ).save() CommandBlueprintParam( command=command_bp, name='<query_in>', prefix=None, position=16, is_optional=False, \ short_desc='Input query FASTA file' ).save() tool.needs( filetype_name='FASTA (nucleotide)', via_command=command_bp, via_param='<reference_in>' ) tool.needs( filetype_name='FASTA (nucleotide)', via_command=command_bp, via_param='<query_in>' ) tool.creates( filetype_name='MUMmer delta file', via_command=command_bp, via_param='STDOUT' )
def handle(self, *args, **options): tool_name = 'show-coords' tool_version = '3.23' if self.already_exists(tool_name, tool_version): print("INFO: tool {0} {1} already exists. Skipping.".format(tool_name, tool_version) ) return True settings = configparser.ConfigParser() settings.read( os.path.join( os.path.abspath(os.path.dirname(__file__)), '../../settings.ini') ) tool_settings = settings[ "{0} {1}".format('MUMmer', tool_version) ] flow_bp = FlowBlueprint( type='s' ) flow_bp.save() tool = StandaloneTool( name=tool_name, \ version=tool_version, \ primary_site='http://mummer.sourceforge.net/manual/#coords', \ flow_bp=flow_bp ) tool.save() command_bp = CommandBlueprint( name = 'Run show-coords', \ exec_path = tool_settings['show_coords_bin'] ) command_bp.save() command_bp.parents.add(flow_bp) # USAGE: show-coords [options] <deltafile> CommandBlueprintParam( command=command_bp, name='-b', prefix='-b ', has_no_value=True, position=1, \ short_desc='Merges overlapping alignments', \ long_desc='Merges overlapping alignments regardless of match dir or frame and does not display any idenitity information.' ).save() CommandBlueprintParam( command=command_bp, name='-B', prefix='-B ', has_no_value=True, position=2, \ short_desc='Switch output to btab format' ).save() CommandBlueprintParam( command=command_bp, name='-c', prefix='-c ', has_no_value=True, position=3, \ short_desc='Include percent coverage information in the output' ).save() CommandBlueprintParam( command=command_bp, name='-d', prefix='-d ', has_no_value=True, position=4, \ short_desc='Display the alignment direction in the additional FRM columns (default for promer)' ).save() CommandBlueprintParam( command=command_bp, name='-H', prefix='-H ', has_no_value=True, position=5, \ short_desc='Do not print the output header' ).save() CommandBlueprintParam( command=command_bp, name='-I', prefix='-I ', position=6, \ short_desc='Set minimum percent identity to display' ).save() CommandBlueprintParam( command=command_bp, name='-k', prefix='-k ', has_no_value=True, position=7, \ short_desc='Knockout 50/75 alignments', \ long_desc='Knockout (do not display) alignments that overlap another alignment in a different frame by more than 50% of their length, AND have a smaller percent similarity or are less than 75% of the size of the other alignment (promer only)' ).save() CommandBlueprintParam( command=command_bp, name='-l', prefix='-l ', has_no_value=True, position=8, \ short_desc='Include the sequence length information in the output' ).save() CommandBlueprintParam( command=command_bp, name='-L', prefix='-L ', position=9, \ short_desc='Set minimum alignment length to display' ).save() CommandBlueprintParam( command=command_bp, name='-o', prefix='-o ', has_no_value=True, position=10, \ short_desc='Annotate maximal alignments between two sequences', \ long_desc='Annotate maximal alignments between two sequences, i.e. overlaps between reference and query sequences').save() CommandBlueprintParam( command=command_bp, name='-q', prefix='-q ', has_no_value=True, position=11, \ short_desc='Sort output lines by query IDs and coordinates' ).save() CommandBlueprintParam( command=command_bp, name='-r', prefix='-r ', has_no_value=True, position=12, \ short_desc='Sort output lines by reference IDs and coordinates' ).save() CommandBlueprintParam( command=command_bp, name='-T', prefix='-T ', has_no_value=True, position=13, \ short_desc='Switch output to tab-delimited format' ).save() CommandBlueprintParam( command=command_bp, name='<deltafile>', prefix=None, position=14, is_optional=False, \ short_desc='Input reference FASTA file' ).save() tool.needs( filetype_name='MUMmer delta file', via_command=command_bp, via_param='<deltafile>' )
def handle(self, *args, **options): tool_name = 'GeneMarkS' tool_version = '4.6b' if self.already_exists(tool_name, tool_version): print("INFO: tool {0} {1} already exists. Skipping.".format(tool_name, tool_version) ) return True settings = configparser.ConfigParser() settings.read( os.path.join( os.path.abspath(os.path.dirname(__file__)), '../../settings.ini') ) tool_settings = settings[ "{0} {1}".format(tool_name, tool_version) ] flow_bp = FlowBlueprint( type='s' ) flow_bp.save() tool = StandaloneTool( name=tool_name, \ version=tool_version, \ primary_site='http://topaz.gatech.edu', \ flow_bp=flow_bp ) tool.save() command_bp = CommandBlueprint( name = 'Run GeneMarkS', \ exec_path = tool_settings['gm_s_bin'] ) command_bp.save() command_bp.parents.add(flow_bp) CommandBlueprintParam( command=command_bp, name='--name', prefix='--name ', position=1,default_value='GeneMark_hmm.mod', \ short_desc='<string> name of output model file generated for GeneMark.hmm').save() CommandBlueprintParam( command=command_bp, name='--combine', prefix='--combine ', position=2, default_value='GeneMark_hmm_combined.mod' ,\ short_desc='combine GeneMarkS generated and Heuristic model parameters into one integrated model').save() CommandBlueprintParam( command=command_bp, name='--gm', prefix='--gm ', position=3, default_value='GeneMark.mat', \ short_desc='generate model file for GeneMark').save() CommandBlueprintParam( command=command_bp, name='--species', prefix='--species ', position=4, \ short_desc='<string> name of a species in a model file' ).save() CommandBlueprintParam( command=command_bp, name='--clean', prefix='--clean ', position=5, has_no_value=True, \ short_desc='delete all temporary files').save() CommandBlueprintParam( command=command_bp, name='--order', prefix='--order ', position=6, default_value='2', \ short_desc='<number> markov chain order. (default: 2; supported in range: >= 0)' ).save() CommandBlueprintParam( command=command_bp, name='--gcode', prefix='--gcode ', position=7, default_value='11', \ short_desc='<number> genetic code. default: 11; supported: 11, 4 and 1)' ).save() CommandBlueprintParam( command=command_bp, name='--shape', prefix='--shape ', position=8, default_value='partial', \ short_desc='<string> sequence organization (default: partial; supported: linear, circular and partial)' ).save() CommandBlueprintParam( command=command_bp, name='--motif', prefix='--motif ', position=9, default_value='1', \ long_desc='<number> iterative search for a sequence motif associated with CDS start. (default: 1; supported: 1 <true> and 0 <false>)' ).save() CommandBlueprintParam( command=command_bp, name='--width', prefix='--width ', position=10, default_value='6' , \ short_desc='<number> motif width (default: 6; supported in range: >= 3)' ).save() CommandBlueprintParam( command=command_bp, name='--prestart', prefix='--prestart ', position=11, default_value='26', \ long_desc='<number> length of sequence upstream of translation initiation site that presumably includes the motif (default: 26; supported in range: >= 0)' ).save() CommandBlueprintParam( command=command_bp, name='--identity', prefix='--identity ', position=12, default_value='0.99', \ long_desc='<number> identity level assigned for termination of iterations (default: 0.99; supported in range: >=0 and <= 1)' ).save() CommandBlueprintParam( command=command_bp, name='--matrix', prefix='--matrix ', position=13, default_value='10', \ short_desc='<number> maximum number of iterations (default: 10; supported in range: >= 1)').save() CommandBlueprintParam( command=command_bp, name='--fixmotif', prefix='--fixmotif ', position=14, has_no_value = True, \ long_desc='prohibits gene overlap (if not specified: overlaps are allowed)').save() CommandBlueprintParam( command=command_bp, name='--offover', prefix='--offover ', position=15, has_no_value = True, \ short_desc='prohibits gene overlap (if not specified: overlaps are allowed)').save() CommandBlueprintParam( command=command_bp, name='--strand', prefix='--strand ', position=16, default_value='both', \ short_desc='<string> sequence strand to predict genes in (default: both; supported: direct, reverse and both )').save() CommandBlueprintParam( command=command_bp, name='--prok', prefix='--prok ', position=16, has_no_value = True, \ short_desc='same as: --combine --clean --gm', \ long_desc='to run program on prokaryotic sequence or phage with building models for both GeneMark and GeneMark.hmm').save() CommandBlueprintParam( command=command_bp, name='--euk', prefix='--euk ', position=17, has_no_value = True, \ short_desc='same as: --offover --gcode 1 --clean --fixmotif --prestart 6 --width 12 --order 4 --gm', \ long_desc='to run program on eukaryotic intron-less sequence (i.e. low eukaryote)').save() CommandBlueprintParam( command=command_bp, name='--viral', prefix='--viral ', position=18, has_no_value = True, \ short_desc='same as: --combine --gcode 1 --clean --fixmotif --prestart 6 --width 12 --gm', \ long_desc='to run program on a eukaryotic viral genome').save() CommandBlueprintParam( command=command_bp, name='--par', prefix='--par ', position=19, \ short_desc='<file name> custom parameters for GeneMarkS', \ long_desc='default is selected based on gcode value: par_<gcode>.default').save() CommandBlueprintParam( command=command_bp, name='--imod', prefix='--imod ', position=20, \ short_desc='<file name> custom initiation model for GeneMarkS', \ long_desc='default: heuristic model derived from GC composition of input sequence').save() CommandBlueprintParam( command=command_bp, name='--test', prefix='--test ', position=21, has_no_value=True, \ short_desc='installation test').save() CommandBlueprintParam( command=command_bp, name='--verbose', prefix='--verbose ', position=22, has_no_value=True, \ short_desc='prints stderr').save() CommandBlueprintParam( command=command_bp, name='<sequence_file_name>', prefix=None, position=23, is_optional=False, \ short_desc='input sequence in FASTA format' ).save() tool.needs( filetype_name='FASTA (nucleotide)', via_command=command_bp, via_param='<sequence_file_name>' ) tool.can_create( filetype_name='GFF3', via_command=command_bp )
def handle(self, *args, **options): tool_name = 'Prodigal' tool_version = '2.60' if self.already_exists(tool_name, tool_version): print("INFO: tool {0} {1} already exists. Skipping.".format(tool_name, tool_version) ) return True settings = configparser.ConfigParser() settings.read( os.path.join( os.path.abspath(os.path.dirname(__file__)), '../../settings.ini') ) tool_settings = settings[ "{0} {1}".format(tool_name, tool_version) ] flow_bp = FlowBlueprint( type='s', name=tool_name ) flow_bp.save() tool = StandaloneTool( name=tool_name, \ version=tool_version, \ primary_site='https://code.google.com/p/prodigal/', \ flow_bp=flow_bp ) tool.save() command_bp = CommandBlueprint( name = 'Run prodigal', \ exec_path = tool_settings['exec_path'] ) command_bp.save() command_bp.parents.add( flow_bp ) CommandBlueprintParam( command=command_bp, name='-a', prefix='-a ', position=1, \ short_desc='Write protein translations to the selected file' ).save() CommandBlueprintParam( command=command_bp, name='-c', prefix='-c ', position=2, has_no_value=True, \ short_desc='Closed ends. Do not allow genes to run off edges' ).save() CommandBlueprintParam( command=command_bp, name='-d', prefix='-d ', position=3, \ short_desc='Write nucleotide sequences of genes to the selected file' ).save() ## TODO: limit choices to (gbk, gff, or sco) CommandBlueprintParam( command=command_bp, name='-f', prefix='-f ', position=4, default_value='gbk', \ short_desc='Select output format (gbk, gff, or sco). Default is gbk' ).save() CommandBlueprintParam( command=command_bp, name='-g', prefix='-g ', position=5, default_value='11', \ short_desc='Specify a translation table to use (default 11)' ).save() CommandBlueprintParam( command=command_bp, name='-i', prefix='-i ', position=6, is_optional=False, \ short_desc='Specify input file (default reads from stdin).' ).save() CommandBlueprintParam( command=command_bp, name='-m', prefix='-m ', position=7, has_no_value=True, \ short_desc='Treat runs of Ns as masked sequence and do not build genes across them' ).save() CommandBlueprintParam( command=command_bp, name='-n', prefix='-n ', position=8, has_no_value=True, \ short_desc='Bypass the Shine-Dalgarno trainer and force the program to scan for motifs' ).save() CommandBlueprintParam( command=command_bp, name='-o', prefix='-o ', position=9, is_optional=False, \ short_desc='Specify output file' ).save() CommandBlueprintParam( command=command_bp, name='-p', prefix='-p ', position=10, default_value='single', \ short_desc='Select procedure (single or meta). Default is single.' ).save() CommandBlueprintParam( command=command_bp, name='-s', prefix='-s ', position=11, \ short_desc='Write all potential genes (with scores) to the selected file' ).save() CommandBlueprintParam( command=command_bp, name='-t', prefix='-t ', position=12, \ short_desc='Write or read the specified training file', \ long_desc='Write a training file (if none exists); otherwise, read and use the specified training file' ).save() tool.needs( filetype_name='FASTA (nucleotide)', via_command=command_bp, via_param='-i' ) tool.can_create( filetype_name='GenBank Flat File Format', via_command=command_bp, via_params=['-o', '-f=gbk'] ) tool.can_create( filetype_name='GFF3', via_command=command_bp, via_params=['-o', '-f=gff'] )
def handle(self, *args, **options): tool_name = 'Bowtie-build' tool_version = '1.0.0' if self.already_exists(tool_name, tool_version): print("INFO: tool {0} {1} already exists. Skipping.".format(tool_name, tool_version) ) return True settings = configparser.ConfigParser() settings.read( os.path.join( os.path.abspath(os.path.dirname(__file__)), '../../settings.ini') ) tool_settings = settings[ "{0} {1}".format('Bowtie', tool_version) ] flow_bp = FlowBlueprint( type='s', \ description='Bowtie is an ultrafast, memory-efficient short read aligner. It aligns short DNA sequences (reads) to the human genome at a rate of over 25 million 35-bp reads per hour. Bowtie indexes the genome with a Burrows-Wheeler index to keep its memory footprint small: typically about 2.2 GB for the human genome (2.9 GB for paired-end).') flow_bp.save() tool = StandaloneTool( name=tool_name, \ version=tool_version, \ primary_site='http://bowtie-bio.sourceforge.net/index.shtml', \ flow_bp=flow_bp ) tool.save() command_bp = CommandBlueprint( name = 'Build an index for bowtie', \ exec_path = tool_settings['bowtie_build_bin'] ) command_bp.save() command_bp.parents.add( flow_bp ) # bowtie-build [options]* <reference_in> <ebwt_outfile_base> CommandBlueprintParam( command=command_bp, name='-C', prefix='-C ', has_no_value=True, position=1, \ short_desc='Build a colorspace index' ).save() CommandBlueprintParam( command=command_bp, name='-a', prefix='-a ', has_no_value=True, position=2, \ short_desc='Disable automatic -p/--bmax/--dcv memory-fitting' ).save() CommandBlueprintParam( command=command_bp, name='-p', prefix='-p ', has_no_value=True, position=3, \ short_desc='Use packed strings internally; slower, uses less mem' ).save() CommandBlueprintParam( command=command_bp, name='-B', prefix='-B ', has_no_value=True, position=4, \ short_desc='Build both letter- and colorspace indexes' ).save() CommandBlueprintParam( command=command_bp, name='--bmax', prefix='--bmax ', position=5, \ short_desc='Max bucket sz for blockwise suffix-array builder' ).save() CommandBlueprintParam( command=command_bp, name='--bmaxdivn', prefix='--bmaxdivn ', position=6, default_value='4', \ short_desc='Max bucket sz as divisor of ref len' ).save() CommandBlueprintParam( command=command_bp, name='--dcv', prefix='--dcv ', position=7, default_value='1024', \ short_desc='Diff-cover period for blockwise' ).save() CommandBlueprintParam( command=command_bp, name='--nodc', prefix='--nodc ', has_no_value=True, position=8, \ short_desc='Disable diff-cover (algorithm becomes quadratic)' ).save() CommandBlueprintParam( command=command_bp, name='-r', prefix='-r ', has_no_value=True, position=9, \ short_desc='Do not build .3/.4.ebwt (packed reference) portion' ).save() CommandBlueprintParam( command=command_bp, name='-3', prefix='-3 ', has_no_value=True, position=10, \ short_desc='Just build .3/.4.ebwt (packed reference) portion' ).save() CommandBlueprintParam( command=command_bp, name='-o', prefix='-o ', position=11, default_value='5', \ short_desc='SA is sampled every 2^offRate BWT chars' ).save() CommandBlueprintParam( command=command_bp, name='-t', prefix='-t ', position=12, default_value='10', \ short_desc='# of chars consumed in initial lookup' ).save() CommandBlueprintParam( command=command_bp, name='--ntoa', prefix='--ntoa ', has_no_value=True, position=13, \ short_desc='Convert Ns in reference to As' ).save() CommandBlueprintParam( command=command_bp, name='--seed', prefix='--seed ', position=14, \ short_desc='Seed for random number generator' ).save() CommandBlueprintParam( command=command_bp, name='<reference_in>', prefix=None, position=15, is_optional=False, \ short_desc='Input reference FASTA file' ).save() CommandBlueprintParam( command=command_bp, name='<ebwt_outfile_base>', prefix=None, position=16, is_optional=False, \ short_desc='Path to the basename of the ebwt files to be created' ).save() tool.needs( filetype_name='FASTA (nucleotide)', via_command=command_bp, via_param='<reference_in>' ) tool.creates( filetype_name='Bowtie 1.0 index', via_command=command_bp, via_param='<ebwt_outfile_base>' )