def test_str_all_modes(self): """MixedParameter: str() functions in various modes """ p = MixedParameter(Prefix='-',Name='d',Delimiter='=',Quote=']') self.assertEqual(str(p),'') p.on() self.assertEqual(str(p),'-d') p.on('a') self.assertEqual(str(p),'-d=]a]')
class RNAsubopt(CommandLineApplication): """Application controller for RNAsubopt (in the Vienna RNA package) Manual on: Parameters with default values: -e: 1 (range) -T: 37 (temperature) -d: 2 (dangling ends as in partition function folding) Input is always written to a file which is used as the application's input. StdErr is suppressed by default, but can be overwritten in an instance. """ _parameters = { '-p': ValuedParameter(Prefix='-', Name='p', Delimiter=' '), '-C': FlagParameter(Prefix='-', Name='C'), '-e': ValuedParameter(Prefix='-', Name='e', Delimiter=' ', Value=1), '-ep': ValuedParameter(Prefix='-', Name='ep', Delimiter=' '), '-s': FlagParameter(Prefix='-', Name='s'), '-lodos': FlagParameter(Prefix='-', Name='lodos'), '-T': ValuedParameter(Prefix='-', Name='T', Value=37, Delimiter=' '), '-4': FlagParameter(Prefix='-', Name=4), '-d': MixedParameter(Prefix='-', Name='d', Delimiter='', Value=2), '-noGU': FlagParameter(Prefix='-', Name='noGU'), '-noCloseGU': FlagParameter(Prefix='-', Name='noCloseGU'), '-P': ValuedParameter(Prefix='-', Name='P', Delimiter=' '), '-logML': FlagParameter(Prefix='-', Name='logML'), '-nsp': ValuedParameter(Prefix='-', Name='nsp', Delimiter=' '), '-noLP': FlagParameter(Prefix='-', Name='noLP') } _synonyms = { 'Temperature': '-T', 'Temp': '-T', 'EnergyRange': '-e', 'Sort': '-s' } _command = 'RNAsubopt' _input_handler = '_input_as_lines' _suppress_stderr = True def _input_as_path(self, filename): """Returns '>"filename"' to redirect input to stdin Includes quotes to handle file names containing spaces. """ return ''.join(\ ['<',str(super(RNAsubopt,self)._input_as_path(filename))]) def _input_as_lines(self, data): """Returns '>temp_filename to redirect input to stdin Includes quotes to handle file names containing spaces. """ return ''.join( ['<', str(super(RNAsubopt, self)._input_as_lines(data))])
def setUp(self): self.fp = FlagParameter(Prefix='-', Name='d') self.vp = ValuedParameter(Name='p', Prefix='-', Value=[1]) = MixedParameter(Prefix='--', Name='k', Delimiter=' ') self.all_params = { self.fp.Id: self.fp, self.vp.Id: self.vp, } self.p1 = Parameters() self.p2 = Parameters(self.all_params) self._synonyms = {'Pino': '-p', 'K': 'k'} self.p3 = Parameters(self.all_params, self._synonyms)
def test_str_all_modes(self): """MixedParameter: str() functions in various modes """ p = MixedParameter(Prefix='-', Name='d', Delimiter='=', Quote=']') self.assertEqual(str(p), '') p.on() self.assertEqual(str(p), '-d') p.on('a') self.assertEqual(str(p), '-d=]a]')
class RNAfold(CommandLineApplication): """Application controller for RNAfold (in the Vienna RNA package) Manual on: Parameters with default values: -T: 37 (temperature) -d: 1 (only unpaired bases in dangling ends) -S: 1.07 (scale) Input is always written to a file which is used as the application's input. StdErr is suppressed by default, but can be overruled in an instance. """ _parameters = { '-p':MixedParameter(Prefix='-',Name='p',Delimiter='',Value=False), '-C':FlagParameter(Prefix='-',Name='C'), '-T':ValuedParameter(Prefix='-',Name='T',Value=37,Delimiter=' '), '-4':FlagParameter(Prefix='-',Name=4), '-d':MixedParameter(Prefix='-',Name='d',Delimiter='',Value=1), '-noLP':FlagParameter(Prefix='-',Name='noLP'), '-noGU':FlagParameter(Prefix='-',Name='noGU'), '-noCloseGU':FlagParameter(Prefix='-',Name='noCloseGU'), '-e':ValuedParameter(Prefix='-',Name='e',Delimiter=' '), '-P':ValuedParameter(Prefix='-',Name='P',Delimiter=' '), '-nsp':ValuedParameter(Prefix='-',Name='nsp',Delimiter=' '), '-S':ValuedParameter(Prefix='-',Name='S',Value=1.07,Delimiter=' ')} _synonyms = {'Temperature':'-T','Temp':'-T','Scale':'-S'} _command = 'RNAfold' _input_handler = '_input_as_lines' _suppress_stderr = True def _input_as_path(self,filename): """Returns '>"filename"' to redirect input to stdin""" return ''.join(\ ['<', str(super(RNAfold,self)._input_as_path(filename))]) def _input_as_lines(self,data): """Returns '>"temp_filename" to redirect input to stdin""" return ''.join(['<',str(super(RNAfold,self)._input_as_lines(data))]) def _get_result_paths(self,data): """Specifies the paths of output files generated by the application data: the data the instance of the application is called on You always get back: StdOut,StdErr, and ExitStatus RNAfold can produce two additional output files: a secondary structure structure graph. Default name: a dot plot of the base pairing matrix. Default name: The default names are used for unnamed sequences. Files are created in the current working directory. You can make a sequence named by inserting a line '>name' above it in your input file (or list of sequences). The ss and dp files for named sequences will be written to and """ result = {} name_counter = 0 seq_counter = 0 if not isinstance(data,list): #means data is file data = open(data).readlines() for item in data: if item.startswith('>'): name_counter += 1 name = item.strip('>\n') result[(name+'_ss')] =\ ResultPath(Path=(self.WorkingDir+name+'')) result[(name+'_dp')] =\ ResultPath(Path=(self.WorkingDir+name+''),\ IsWritten=self.Parameters['-p'].isOn()) else: seq_counter += 1 result['SS'] = ResultPath(Path=self.WorkingDir+'',\ IsWritten=seq_counter - name_counter > 0) #Secondary Structure result['DP'] = ResultPath(Path=self.WorkingDir+'', IsWritten=(self.Parameters['-p'].isOn() and\ seq_counter - name_counter > 0)) #DotPlot return result
class Clustalw(CommandLineApplication): """ clustalw application controller The parameters are organized by function to give some idea of how the program works. However, no restrictions are put on any combinations of parameters. Misuse of parameters can lead to errors or otherwise strange results. You are supposed to choose one action for the program to perform. (align, profile, sequences, tree, or bootstrap). If you choose multiple, only the dominant action (see order above) will be executed. By DEFAULT, the -align parameter is turned on. If you decide to turn another one on, you should turn '-align' off IN ADDITION! Some references to help pages are available in the 'getHelp' method. Some might be useful to you. """ _actions = {\ '-align':FlagParameter('-','align',Value=True), '-profile':FlagParameter('-','profile'), '-sequences':FlagParameter('-','sequences'), '-tree':FlagParameter('-','tree'), '-bootstrap':MixedParameter('-','bootstrap',Delimiter='=')} #sequence file for alignment, or alignment file for bootstrap and tree #actions _input = { '-infile': ValuedParameter('-', 'infile', Delimiter='=', IsPath=True) } # matrix and dnamatrix can be filenames as well, but not always. # They won't be treated as filenames and thus not quoted. # Therefore filepaths containing spaces might result in errors. _multiple_alignment={\ '-quicktree':FlagParameter('-','quicktree'), '-type':ValuedParameter('-','type',Delimiter='='), '-matrix':ValuedParameter('-','matrix',Delimiter='='), '-dnamatrix':ValuedParameter('-','dnamatrix',Delimiter='='), '-gapopen':ValuedParameter('-','gapopen',Delimiter='='), '-gapext':ValuedParameter('-','gapext',Delimiter='='), '-endgaps':FlagParameter('-','endgaps'), '-gapdist':ValuedParameter('-',Name='gapdist',Delimiter='='), '-nopgap':FlagParameter('-','nopgap'), '-nohgap':FlagParameter('-','nohgap'), '-hgapresidues':ValuedParameter('-','hgapresidues',Delimiter='='), '-maxdiv':ValuedParameter('-',Name='maxdiv',Delimiter='='), '-negative':FlagParameter('-','negative'), '-transweight':ValuedParameter('-',Name='transweight',Delimiter='='), '-newtree':ValuedParameter('-','newtree',Delimiter='=',IsPath=True), '-usetree':ValuedParameter('-','usetree',Delimiter='=',IsPath=True)} _fast_pairwise={\ '-ktuple':ValuedParameter('-',Name='ktuple',Delimiter='='), '-topdiags':ValuedParameter('-',Name='topdiags',Delimiter='='), '-window':ValuedParameter('-',Name='window',Delimiter='='), '-pairgap':ValuedParameter('-',Name='pairgap',Delimiter='='), '-score':ValuedParameter('-',Name='score',Delimiter='=')} # pwmatrix and pwdnamatrix can be filenames as well, but not always. # They won't be treated as filenames and thus not quoted. # Therefore filepaths containing spaces might result in errors. _slow_pairwise={\ '-pwmatrix':ValuedParameter('-',Name='pwmatrix',Delimiter='='), '-pwdnamatrix':ValuedParameter('-',Name='pwdnamatrix',Delimiter='='), '-pwgapopen':ValuedParameter('-',Name='pwgapopen',Delimiter='='), '-pwgapext':ValuedParameter('-',Name='pwgapext',Delimiter='=')} #plus -bootstrap _tree={\ '-kimura':FlagParameter('-',Name='kimura'), '-tossgaps':FlagParameter('-',Name='tossgaps'), '-bootlabels':ValuedParameter('-',Name='bootlabels',Delimiter='='), '-seed':ValuedParameter('-',Name='seed',Delimiter='='), '-outputtree':ValuedParameter('-',Name='outputtree',Delimiter='=')} _output={\ '-outfile':ValuedParameter('-',Name='outfile',Delimiter='=',\ IsPath=True), '-output':ValuedParameter('-',Name='output',Delimiter='='), '-case':ValuedParameter('-',Name='case',Delimiter='='), '-outorder':ValuedParameter('-',Name='outorder',Delimiter='='), '-seqnos':ValuedParameter('-',Name='seqnos',Delimiter='=')} _profile_alignment={\ '-profile1':ValuedParameter('-','profile1',Delimiter='=',IsPath=True), '-profile2':ValuedParameter('-','profile2',Delimiter='=',IsPath=True), '-usetree1':ValuedParameter('-','usetree1',Delimiter='=',IsPath=True), '-usetree2':ValuedParameter('-','usetree2',Delimiter='=',IsPath=True), '-newtree1':ValuedParameter('-','newtree1',Delimiter='=',IsPath=True), '-newtree2':ValuedParameter('-','newtree2',Delimiter='=',IsPath=True)} _structure_alignment={\ '-nosecstr1':FlagParameter('-',Name='nosecstr1'), '-nosecstr2':FlagParameter('-',Name='nosecstr2'), '-helixgap':ValuedParameter('-',Name='helixgap',Delimiter='='), '-strandgap':ValuedParameter('-',Name='strandgap',Delimiter='='), '-loopgap':ValuedParameter('-',Name='loopgap',Delimiter='='), '-terminalgap':ValuedParameter('-',Name='terminalgap',Delimiter='='), '-helixendin':ValuedParameter('-',Name='helixendin',Delimiter='='), '-helixendout':ValuedParameter('-',Name='helixendout',Delimiter='='), '-strandendin':ValuedParameter('-',Name='strandendin',Delimiter='='), '-strandendout':ValuedParameter('-',Name='strandendout',Delimiter='='), '-secstrout':ValuedParameter('-',Name='secstrout',Delimiter='=')} #NOT SUPPORTED #'-help':FlagParameter('-','help'), #'-check':FlagParameter('-','check'), #'-options':FlagParameter('-','options'), #'-convert':FlagParameter('-','convert'), #'-batch':FlagParameter('-','batch'), #'-noweights':FlagParameter('-','noweights'), #'-novgap':FlagParameter('-','novgap'), #'-debug':ValuedParameter('-',Name='debug',Delimiter='='), _parameters = {} _parameters.update(_actions) _parameters.update(_input) _parameters.update(_multiple_alignment) _parameters.update(_fast_pairwise) _parameters.update(_slow_pairwise) _parameters.update(_tree) _parameters.update(_output) _parameters.update(_profile_alignment) _parameters.update(_structure_alignment) _command = 'clustalw' def getHelp(self): """Methods that points to the documentation""" help_str =\ """ There are several help pages available online. For example: clustalw_help_1.8.html A page that give reasonable insight in use of the parameters: """ return help_str def _input_as_multiline_string(self, data): """Writes data to tempfile and sets -infile parameter data -- list of lines """ if data: self.Parameters['-infile']\ .on(super(Clustalw,self)._input_as_multiline_string(data)) return '' def _input_as_lines(self, data): """Writes data to tempfile and sets -infile parameter data -- list of lines, ready to be written to file """ if data: self.Parameters['-infile']\ .on(super(Clustalw,self)._input_as_lines(data)) return '' def _input_as_seqs(self, data): """writes sequences to tempfile and sets -infile parameter data -- list of sequences Adds numbering to the sequences: >1, >2, etc. """ lines = [] for i, s in enumerate(data): #will number the sequences 1,2,3,etc. lines.append(''.join(['>', str(i + 1)])) lines.append(s) return self._input_as_lines(lines) def _input_as_string(self, data): """Makes data the value of a specific parameter This method returns the empty string. The parameter will be printed automatically once set. """ if data: self.Parameters['-infile'].on(data) return '' def _suffix(self): """Return appropriate suffix for alignment file""" _output_formats = { 'GCG': '.msf', 'GDE': '.gde', 'PHYLIP': '.phy', 'PIR': '.pir', 'NEXUS': '.nxs' } if self.Parameters['-output'].isOn(): return _output_formats[self.Parameters['-output'].Value] else: return '.aln' def _aln_filename(self, prefix): """Return name of file containing the alignment prefix -- str, prefix of alignment file. """ if self.Parameters['-outfile'].isOn(): aln_filename = self._absolute(self.Parameters['-outfile'].Value) else: aln_filename = prefix + self._suffix() return aln_filename def _tempfile_as_multiline_string(self, data): """Write a multiline string to a temp file and return the filename. data: a multiline string to be written to a file. * Note: the result will be the filename as a FilePath object (which is a string subclass). """ filename = FilePath(self.getTmpFilename(self.TmpDir)) data_file = open(filename, 'w') data_file.write(data) data_file.close() return filename def _get_result_paths(self, data): """Return dict of {key: ResultPath} """ #clustalw .aln is used when no or unkown output type specified _treeinfo_formats = {'nj': '.nj', 'dist': '.dst', 'nexus': '.tre'} result = {} par = self.Parameters abs = self._absolute if par['-align'].isOn(): prefix = par['-infile'].Value.rsplit('.', 1)[0] #prefix = par['-infile'].Value.split('.')[0] aln_filename = self._aln_filename(prefix) if par['-newtree'].isOn(): dnd_filename = abs(par['-newtree'].Value) elif par['-usetree'].isOn(): dnd_filename = abs(par['-usetree'].Value) else: dnd_filename = abs(prefix + '.dnd') result['Align'] = ResultPath(Path=aln_filename, IsWritten=True) result['Dendro'] = ResultPath(Path=dnd_filename, IsWritten=True) elif par['-profile'].isOn(): prefix1 = par['-profile1'].Value.rsplit('.', 1)[0] prefix2 = par['-profile2'].Value.rsplit('.', 1)[0] #prefix1 = par['-profile1'].Value.split('.')[0] #prefix2 = par['-profile2'].Value.split('.')[0] aln_filename = '' aln_written = True dnd1_filename = '' tree1_written = True dnd2_filename = '' tree2_written = True aln_filename = self._aln_filename(prefix1) #usetree1 if par['-usetree1'].isOn(): tree1_written = False #usetree2 if par['-usetree2'].isOn(): tree2_written = False if par['-newtree1'].isOn(): dnd1_filename = abs(par['-newtree1'].Value) aln_written = False else: dnd1_filename = abs(prefix1 + '.dnd') if par['-newtree2'].isOn(): dnd2_filename = abs(par['-newtree2'].Value) aln_written = False else: dnd2_filename = abs(prefix2 + '.dnd') result['Align'] = ResultPath(Path=aln_filename, IsWritten=aln_written) result['Dendro1'] = ResultPath(Path=dnd1_filename, IsWritten=tree1_written) result['Dendro2'] = ResultPath(Path=dnd2_filename, IsWritten=tree2_written) elif par['-sequences'].isOn(): prefix1 = par['-profile1'].Value.rsplit('.', 1)[0] prefix2 = par['-profile2'].Value.rsplit('.', 1)[0] #prefix1 = par['-profile1'].Value.split('.')[0] #alignment #prefix2 = par['-profile2'].Value.split('.')[0] #sequences aln_filename = '' aln_written = True dnd_filename = '' dnd_written = True aln_filename = self._aln_filename(prefix2) if par['-usetree'].isOn(): dnd_written = False elif par['-newtree'].isOn(): aln_written = False dnd_filename = abs(par['-newtree'].Value) else: dnd_filename = prefix2 + '.dnd' result['Align'] = ResultPath(Path=aln_filename,\ IsWritten=aln_written) result['Dendro'] = ResultPath(Path=dnd_filename,\ IsWritten=dnd_written) elif par['-tree'].isOn(): prefix = par['-infile'].Value.rsplit('.', 1)[0] #prefix = par['-infile'].Value.split('.')[0] tree_filename = '' tree_written = True treeinfo_filename = '' treeinfo_written = False tree_filename = prefix + '.ph' if par['-outputtree'].isOn() and\ par['-outputtree'].Value != 'phylip': treeinfo_filename = prefix +\ _treeinfo_formats[par['-outputtree'].Value] treeinfo_written = True result['Tree'] = ResultPath(Path=tree_filename,\ IsWritten=tree_written) result['TreeInfo'] = ResultPath(Path=treeinfo_filename,\ IsWritten=treeinfo_written) elif par['-bootstrap'].isOn(): prefix = par['-infile'].Value.rsplit('.', 1)[0] #prefix = par['-infile'].Value.split('.')[0] boottree_filename = prefix + '.phb' result['Tree'] = ResultPath(Path=boottree_filename, IsWritten=True) return result
class FastTree(CommandLineApplication): """FastTree application Controller""" _command = 'FastTree' _input_handler = '_input_as_multiline_string' _parameters = { '-quiet':FlagParameter('-',Name='quiet'), '-boot':ValuedParameter('-',Delimiter=' ',Name='boot'), '-seed':ValuedParameter('-',Delimiter=' ',Name='seed'), '-nni':ValuedParameter('-',Delimiter=' ',Name='nni'), '-slow':FlagParameter('-',Name='slow'), '-fastest':FlagParameter('-',Name='fastest'), '-top':FlagParameter('-',Name='top'), '-notop':FlagParameter('-',Name='notop'), '-topm':ValuedParameter('-',Delimiter=' ',Name='topm'), '-close':ValuedParameter('-',Delimiter=' ',Name='close'), '-refresh':ValuedParameter('-',Delimiter=' ',Name='refresh'), '-matrix':ValuedParameter('-',Delimiter=' ',Name='matrix'), '-nomatrix':FlagParameter('-',Name='nomatrix'), '-nj':FlagParameter('-',Name='nj'), '-bionj':FlagParameter('-',Name='bionj'), '-nt':FlagParameter('-',Name='nt'), '-n':ValuedParameter('-',Delimiter=' ',Name='n'), '-pseudo':MixedParameter('-',Delimiter=' ', Name='pseudo'), '-intree':ValuedParameter('-',Delimiter=' ',Name='intree'), '-spr':ValuedParameter('-',Delimiter=' ',Name='spr'), '-constraints':ValuedParameter('-',Delimiter=' ',\ Name='constraints'), '-constraintWeight':ValuedParameter('-',Delimiter=' ',\ Name='constraintWeight'),\ '-makematrix':ValuedParameter('-',Delimiter=' ',Name='makematrix')} def __call__(self, data=None, remove_tmp=True): """Run the application with the specified kwargs on data data: anything that can be cast into a string or written out to a file. Usually either a list of things or a single string or number. input_handler will be called on this data before it is passed as part of the command-line argument, so by creating your own input handlers you can customize what kind of data you want your application to accept remove_tmp: if True, removes tmp files NOTE: Override of the base class to handle redirected output """ input_handler = self.InputHandler suppress_stderr = self.SuppressStderr outfile = self.getTmpFilename(self.TmpDir) self._outfile = outfile if suppress_stderr: errfile = FilePath('/dev/null') else: errfile = FilePath(self.getTmpFilename(self.TmpDir)) if data is None: input_arg = '' else: input_arg = getattr(self, input_handler)(data) # Build up the command, consisting of a BaseCommand followed by # input and output (file) specifications command = self._command_delimiter.join(filter(None,\ [self.BaseCommand,str(input_arg),'>',str(outfile),'2>',\ str(errfile)])) if self.HaltExec: raise AssertionError, "Halted exec with command:\n" + command # The return value of system is a 16-bit number containing the signal # number that killed the process, and then the exit status. # We only want to keep the exit status so do a right bitwise shift to # get rid of the signal number byte exit_status = system(command) >> 8 # Determine if error should be raised due to exit status of # appliciation if not self._accept_exit_status(exit_status): raise ApplicationError, \ 'Unacceptable application exit status: %s, command: %s'\ % (str(exit_status),command) out = open(outfile, "r") err = None if not suppress_stderr: err = open(errfile, "r") result = CommandLineAppResult(out,err,exit_status,\ result_paths=self._get_result_paths(data)) # Clean up the input file if one was created if remove_tmp: if self._input_filename: remove(self._input_filename) self._input_filename = None return result def _get_result_paths(self, data): result = {} result['Tree'] = ResultPath(Path=self._outfile) return result
class RNAalifold(CommandLineApplication): """Application controller for RNAalifold application reads aligned RNA sequences from stdin or file.aln and calculates their minimum free energy (mfe) structure, partition function (pf) and base pairing probability matrix. OPTIONS -cv <float> Set the weight of the covariance term in the energy function to factor. Default is 1. -nc <float> Set the penalty for non-compatible sequences in the covariance term of the energy function to factor. Default is 1. -E Score pairs with endgaps same as gap-gap pairs. -mis Output \"most informative sequence\" instead of simple consensus: For each column of the alignment output the set of nucleotides with frequence greater than average in IUPAC notation. -p Calculate the partition function and base pairing probability matrix in addition to the mfe structure. Default is calculation of mfe structure only. -noLP Avoid structures without lonely pairs (helices of length 1). In the mfe case structures with lonely pairs are strictly forbid- den. For partition function folding this disallows pairs that can only occur isolated. Setting this option provides a signif- icant speedup. The -T, -d, -4, -noGU, -noCloseGU, -e, -P, -nsp, options should work as in RNAfold If using -C constraints will be read from stdin, the alignment has to given as a filename on the command line. For more info see respective man pages. """ _parameters = { '-cv': ValuedParameter(Prefix='-', Name='cv', Delimiter=' '), '-nc': ValuedParameter(Prefix='-', Name='nc', Delimiter=' '), '-E': FlagParameter(Prefix='-', Name='E'), '-mis': FlagParameter(Prefix='-', Name='mis'), '-noLP': FlagParameter(Prefix='-', Name='noLP'), '-T': ValuedParameter(Prefix='-', Name='T', Value=37, Delimiter=' '), '-4': FlagParameter(Prefix='-', Name=4), '-d': MixedParameter(Prefix='-', Name='d', Delimiter=''), '-noGU': FlagParameter(Prefix='-', Name='noGU'), '-noCloseGU': FlagParameter(Prefix='-', Name='noCloseGU'), '-e': ValuedParameter(Prefix='-', Name='e', Delimiter=' '), '-P': ValuedParameter(Prefix='-', Name='P', Delimiter=' '), '-nsp': ValuedParameter(Prefix='-', Name='nsp', Delimiter=' '), '-C': FlagParameter(Prefix='-', Name='C') } _synonyms = {'Temperature': '-T', 'Temp': '-T', 'EnergyRange': '-e'} _command = 'RNAalifold' _input_handler = '_input_as_string' def _get_result_paths(self, data): """Specify the paths of the output files generated by the application You always get back: StdOut, StdErr, and ExitStatus. In addition RNAalifold writes a file: It seems that this file is always written (no exceptions found so far. The documentation says the application can produce a dotplot (, but it is unclear when this file is produced, and thus it is not added to the results dictionary. """ result = {} result['SS'] = ResultPath(Path=self.WorkingDir+'',\ IsWritten=True) return result