Example #1
0
    def _input_as_multiline_string(self, data):
        """Write a multiline string to a temp file and return the filename.

            data: a multiline string to be written to a file.

           * Note: the result will be the filename as a FilePath object 
            (which is a string subclass).

        """
        filename = self._input_filename = \
            FilePath(self.getTmpFilename(self.TmpDir))
        data_file = open(filename, 'w')
        data_file.write(data)
        data_file.close()
        return filename
Example #2
0
    def _input_as_list(self, data):
        '''Takes the positional arguments as input in a list.

        The list input here should be [query_file_path, database_file_path,
        output_file_path]'''
        query, database, output = data
        if (not isabs(database)) \
                or (not isabs(query)) \
                or (not isabs(output)):
            raise ApplicationError("Only absolute paths allowed.\n%s" %
                                   ', '.join(data))

        self._database = FilePath(database)
        self._query = FilePath(query)
        self._output = ResultPath(output, IsWritten=True)

        # check parameters that can only take a particular set of values
        # check combination of databse and query type
        if self.Parameters['-t'].isOn() and self.Parameters['-q'].isOn() and \
                (self.Parameters['-t'].Value, self.Parameters['-q'].Value) not in \
                self._valid_combinations:
            error_message = "Invalid combination of database and query " + \
                            "types ('%s', '%s').\n" % \
                            (self.Paramters['-t'].Value,
                             self.Parameters['-q'].Value)

            error_message += "Must be one of: %s\n" % \
                             repr(self._valid_combinations)

            raise ApplicationError(error_message)

        # check database type
        if self.Parameters['-t'].isOn() and \
                self.Parameters['-t'].Value not in self._database_types:
            error_message = "Invalid database type %s\n" % \
                            self.Parameters['-t'].Value

            error_message += "Allowed values: %s\n" % \
                             ', '.join(self._database_types)

            raise ApplicationError(error_message)

        # check query type
        if self.Parameters['-q'].isOn() and \
                self.Parameters['-q'].Value not in self._query_types:
            error_message = "Invalid query type %s\n" % \
                            self.Parameters['-q'].Value

            error_message += "Allowed values: %s\n" % \
                ', '.join(self._query_types)

            raise ApplicationError(error_message)

        # check mask type
        if self.Parameters['-mask'].isOn() and \
                self.Parameters['-mask'].Value not in self._mask_types:
            error_message = "Invalid mask type %s\n" % \
                            self.Parameters['-mask']

            error_message += "Allowed Values: %s\n" % \
                ', '.join(self._mask_types)

            raise ApplicationError(error_message)

        # check qmask type
        if self.Parameters['-qMask'].isOn() and \
                self.Parameters['-qMask'].Value not in self._mask_types:
            error_message = "Invalid qMask type %s\n" % \
                            self.Parameters['-qMask'].Value

            error_message += "Allowed values: %s\n" % \
                             ', '.join(self._mask_types)

            raise ApplicationError(error_message)

        # check repeat type
        if self.Parameters['-repeats'].isOn() and \
                self.Parameters['-repeats'].Value not in self._mask_types:
            error_message = "Invalid repeat type %s\n" % \
                            self.Parameters['-repeat'].Value

            error_message += "Allowed values: %s\n" % \
                             ', '.join(self._mask_types)

            raise ApplicationError(error_message)

        # check output format
        if self.Parameters['-out'].isOn() and \
                self.Parameters['-out'].Value not in self._out_types:
            error_message = "Invalid output type %s\n" % \
                            self.Parameters['-out']

            error_message += "Allowed values: %s\n" % \
                             ', '.join(self._out_types)

            raise ApplicationError(error_message)

        return ''
Example #3
0
    def __call__(self, data=None, remove_tmp=True):
        """Run the application with the specified kwargs on data
        
            data: anything that can be cast into a string or written out to
                a file. Usually either a list of things or a single string or 
                number. input_handler will be called on this data before it 
                is passed as part of the command-line argument, so by creating
                your own input handlers you can customize what kind of data
                you want your application to accept

            remove_tmp: if True, removes tmp files
        """
        input_handler = self.InputHandler
        suppress_stdout = self.SuppressStdout
        suppress_stderr = self.SuppressStderr
        if suppress_stdout:
            outfile = FilePath('/dev/null')
        else:
            outfile = self.getTmpFilename(self.TmpDir)
        if suppress_stderr:
            errfile = FilePath('/dev/null')
        else:
            errfile = FilePath(self.getTmpFilename(self.TmpDir))
        if data is None:
            input_arg = ''
        else:
            input_arg = getattr(self, input_handler)(data)

        # Build up the command, consisting of a BaseCommand followed by
        # input and output (file) specifications
        command = self._command_delimiter.join(filter(None,\
            [self.BaseCommand,str(input_arg),'>',str(outfile),'2>',\
                str(errfile)]))
        if self.HaltExec:
            raise AssertionError, "Halted exec with command:\n" + command
        # The return value of system is a 16-bit number containing the signal
        # number that killed the process, and then the exit status.
        # We only want to keep the exit status so do a right bitwise shift to
        # get rid of the signal number byte
        exit_status = system(command) >> 8

        # Determine if error should be raised due to exit status of
        # appliciation
        if not self._accept_exit_status(exit_status):
            raise ApplicationError, \
             'Unacceptable application exit status: %s\n' % str(exit_status) +\
             'Command:\n%s\nStdOut:\n%s\nStdErr:\n%s\n' % (command,
                                                           open(outfile).read(),
                                                           open(errfile).read())
        # bash returns 127 as the exit status if the command could not
        # be found -- raise an ApplicationError on status == 127.
        # elif exit_status == 127:
        #     raise ApplicationError, \
        #      "Could not execute %s. Is it installed? Is it in your path?"\
        #      % self._command
        # else:
        #     pass

        # open the stdout and stderr if not being suppressed
        out = None
        if not suppress_stdout:
            out = open(outfile, "r")
        err = None
        if not suppress_stderr:
            err = open(errfile, "r")

        try:
            result = CommandLineAppResult(\
             out,err,exit_status,result_paths=self._get_result_paths(data))
        except ApplicationError:
            result = self._handle_app_result_build_failure(\
             out,err,exit_status,self._get_result_paths(data))

        # Clean up the input file if one was created
        if remove_tmp:
            if self._input_filename:
                remove(self._input_filename)
                self._input_filename = None

        return result
Example #4
0
    def __init__(self,params=None,InputHandler=None,SuppressStderr=None,\
        SuppressStdout=None,WorkingDir=None,TmpDir='/tmp', \
        TmpNameLen=20, HALT_EXEC=False):
        """ Initialize the CommandLineApplication object
        
            params: a dictionary mapping the Parameter id or synonym to its
                value (or None for FlagParameters or MixedParameters in flag
                mode) for Parameters that should be turned on
            InputHandler: this is the method to be run on data when it is
                passed into call. This should be a string containing the
                method name. The default is _input_as_string which casts data
                to a string before appending it to the command line argument
            SuppressStderr: if set to True, will route standard error to
                /dev/null, False by default
            SuppressStdout: if set to True, will route standard out to
                /dev/null, False by default
            WorkingDir: the directory where you want the application to run,
                default is the current working directory, but is useful to 
                change in cases where the program being run creates output
                to its current working directory and you either don't want
                it to end up where you are running the program, or the user 
                running the script doesn't have write access to the current 
                working directory
                WARNING: WorkingDir MUST be an absolute path!
            TmpDir: the directory where temp files will be created, /tmp
                by default 
            TmpNameLen: the length of the temp file name
            HALT_EXEC: if True, raises exception w/ command output just
            before execution, doesn't clean up temp files. Default False.
        """
        # Determine if the application is installed, and raise an error if not
        self._error_on_missing_application(params)

        # set attributes to parameter that was passed in or class default
        if InputHandler is not None:
            self.InputHandler = InputHandler
        else:
            self.InputHandler = self._input_handler
        if SuppressStderr is not None:
            self.SuppressStderr = SuppressStderr
        else:
            self.SuppressStderr = self._suppress_stderr
        if SuppressStdout is not None:
            self.SuppressStdout = SuppressStdout
        else:
            self.SuppressStdout = self._suppress_stdout
        if WorkingDir is not None:
            working_dir = WorkingDir
        else:
            working_dir = self._working_dir or getcwd()
        self.WorkingDir = FilePath(working_dir)
        self.TmpDir = FilePath(TmpDir)
        self.TmpNameLen = TmpNameLen
        self.HaltExec = HALT_EXEC
        #===========================
        #try:
        #    mkdir(self.WorkingDir)
        #except OSError:
        # Directory already exists
        #    pass
        #===========================
        # create a variable to hold the name of the file being used as
        # input to the application. this is important especially when
        # you are using an input handler which creates a temporary file
        # and the output filenames are based on the input filenames
        self._input_filename = None

        super(CommandLineApplication, self).__init__(params=params)
Example #5
0
def main():
    option_parser, options, args = parse_command_line_parameters(**script_info)
    DEBUG = options.verbose
    check_options(option_parser, options)
    start_time = time()
    option_lines = format_options_as_lines(options)
    if DEBUG:
        print FORMAT_BAR
        print "Running with options:"
        for line in sorted(option_lines):
            print line
        print FORMAT_BAR

    #because the blast app controller uses absolute paths, make sure subject
    #db path is fully specified

    subject_db = options.subjectdb
    if not subject_db.startswith('/'):
        subject_db = join(getcwd(), subject_db)
    if not options.no_format_db:

        #initialize object
        inpath = FilePath(abspath(options.subjectdb))
        subject_dir, subj_file = split(inpath)

        fdb = FormatDb(WorkingDir=subject_dir)
        # Currently we do not support protein blasts, but
        # this would be easy to add in the future...
        fdb.Parameters['-p'].on('F')

        # Create indices for record lookup
        fdb.Parameters['-o'].on('T')

        # Set input database
        fdb.Parameters['-i'].on(subject_db)

        formatdb_cmd = fdb.BaseCommand

        if DEBUG:
            print "Formatting db with command: %s" % formatdb_cmd

        app_result = fdb(subject_db)
        formatdb_filepaths = []
        for v in app_result.values():
            try:
                formatdb_filepaths.append(v.name)
            except AttributeError:
                # not a file object, so no path to return
                pass

        db_format_time = time() - start_time

        if DEBUG:
            print "Formatting subject db took: %2.f seconds" % db_format_time
            print "formatdb log file written to: %s" % app_result['log']
            print FORMAT_BAR
    else:
        db_format_time = time() - start_time
        formatdb_cmd = "None (formatdb not called)"
        # Check that User-Supplied subjectdb is valid
        db_ext = [".nhr", ".nin", ".nsd", ".nsi", ".nsq"]
        formatdb_filepaths = [subject_db + ext for ext in db_ext]

        if DEBUG:
            print "Checking that pre-existing formatdb files exist and can be read."
            print "Files to be checked:"
            for fp in formatdb_filepaths:
                print fp
            print FORMAT_BAR

        try:
            formatdb_files = [open(db_f, "U") for db_f in formatdb_filepaths]
            [f.close() for f in formatdb_files]
        except IOError:
            if DEBUG:
                print "Cannot open user-supplied database file:", db_f
            option_parser.error(
                """Problem with -d and --no_format_db option combination: Cannot open the following user-supplied database file: %s. Consider running without --no_format_db to let formatdb generate these required files"""
                % db_f)

        if DEBUG:
            print "OK: BLAST Database files exist and can be read."
            print FORMAT_BAR

    # Perform BLAST search
    blast_results,hit_ids, removed_hit_ids = find_homologs(options.querydb,\
        subject_db, options.e_value,options.max_hits,\
        options.working_dir,options.blastmatroot, options.wordsize,\
                            options.percent_aligned, DEBUG=DEBUG)

    blast_time = (time() - start_time) - db_format_time

    if DEBUG:
        print "BLAST search took: %2.f minute(s)" % (blast_time / 60.0)
        print FORMAT_BAR

    #Create output folder
    outputdir = options.outputdir
    try:
        makedirs(outputdir)
    except OSError:
        pass

    #Record raw blast results
    raw_blast_results_path = join(outputdir, "raw_blast_results.txt")
    f = open(raw_blast_results_path, 'w')
    f.writelines(blast_results)
    f.close()

    #Record excluded seqs
    excluded_seqs_path = join(outputdir, "matching.fna")
    ids_to_seq_file(hit_ids, options.querydb, excluded_seqs_path, "")

    #Record included (screened) seqs
    included_seqs_path = join(outputdir, "non-matching.fna")
    all_ids = ids_from_fasta_lines(open(options.querydb))
    included_ids = set(all_ids) - hit_ids
    ids_to_seq_file(included_ids, options.querydb, included_seqs_path, "")

    log_lines = compose_logfile_lines(start_time, db_format_time, blast_time,\
                                                   option_lines,formatdb_cmd,\
                                               blast_results,options,all_ids,\
                                                     hit_ids,removed_hit_ids,\
                                                          included_ids,DEBUG)

    log_path = join(outputdir, "sequence_exclusion.log")
    if DEBUG:
        print "Writing summary to: %s" % log_path

    f = open(log_path, 'w')
    f.writelines(log_lines)
    f.close()

    if not options.no_clean:
        if DEBUG:

            print FORMAT_BAR
            print "|                           Cleanup                        |"
            print FORMAT_BAR

        if not options.no_format_db:
            if options.verbose:
                print "Cleaning up formatdb files:", formatdb_filepaths
            remove_files(formatdb_filepaths)
        else:
            if options.verbose:
                print "Formatdb not run...nothing to clean"
Example #6
0
 def test_str_path_is_None(self):
     """FilePath: str return empty string when path is None """
     self.assertEqual(str(FilePath(None)), '')
Example #7
0
 def test_init(self):
     """FilePath: initialization returns w/o error """
     for p in self.all_paths:
         self.assertEqual(FilePath(p), p)
     self.assertEqual(FilePath(''), '')