Exemple #1
0
 def test_str(self):
     """FilePath: str wraps path in quotes """
     # Do one explicit test (for sanity), then automatically run
     # through the examples
     self.assertEqual(str(FilePath(self.filename)), '"filename.txt"')
     for p in self.all_paths:
         self.assertEqual(str(FilePath(p)), '"' + p + '"')
Exemple #2
0
 def test_FilePath_identity_preserved(self):
     """FilePath: trivial actions on FilePaths yeild original FilePath
     """
     p = FilePath(self.filename)
     # Creating FilePath from FilePath results in FilePath
     # equal to original
     self.assertEqual(FilePath(p), p)
     for p in self.all_paths:
         self.assertEqual(FilePath(p), p)
     # Appending an empty FilePath to a FilePath results in FilePath
     # equal to original
     self.assertEqual(p + FilePath(''), p)
Exemple #3
0
 def test_add(self):
     """FilePath: add (or joining of paths) functions as expected """
     actual = FilePath(self.relative_dir_path) + FilePath(self.filename)
     expected = FilePath('a/relative/path/filename.txt')
     self.assertEqual(actual, expected)
     # result is a FilePath
     assert isinstance(actual, FilePath)
     # appending a string to a FilePath results in a FilePath
     actual = FilePath(self.relative_dir_path) + 'filename.txt'
     expected = FilePath('a/relative/path/filename.txt')
     self.assertEqual(actual, expected)
     # result is a FilePath
     assert isinstance(actual, FilePath)
Exemple #4
0
 def _absolute(self, path):
     """ Convert a filename to an absolute path """
     path = FilePath(path)
     if isabs(path):
         return path
     else:
         # these are both Path objects, so joining with + is acceptable
         return self.WorkingDir + path
Exemple #5
0
    def __init__(self, Path, IsWritten=True):
        """ Initialize the ResultPath object

            Path: a string representing the absolute or relative path where
                the file can be found
            IsWritten: a boolean specifying whether the file has been written,
                default = True
        """
        self.Path = FilePath(Path)
        self.IsWritten = IsWritten
Exemple #6
0
    def _input_as_path(self, data):
        """ Return data as string with the path wrapped in quotes

            data: path or filename, most likely as a string

            * Note: the result will be the filename as a FilePath object
            (which is a string subclass).

        """
        return FilePath(data)
Exemple #7
0
    def _input_as_lines(self, data):
        """ Write a seq of lines to a temp file and return the filename string

            data: a sequence to be written to a file, each element of the
                sequence will compose a line in the file
           * Note: the result will be the filename as a FilePath object
            (which is a string subclass).

           * Note: '\n' will be stripped off the end of each sequence element
                before writing to a file in order to avoid multiple new lines
                accidentally be written to a file
        """
        filename = self._input_filename = \
            FilePath(self.getTmpFilename(self.TmpDir))
        filename = FilePath(filename)
        data_file = open(filename, 'w')
        data_to_file = '\n'.join([str(d).strip('\n') for d in data])
        data_file.write(data_to_file)
        data_file.close()
        return filename
Exemple #8
0
    def _set_WorkingDir(self, path):
        """Sets the working directory

        Appends a slash to the end of path
        The reasoning behind this is that the user may or may not pass
        in a path with a '/' at the end. Since having multiple
        '/' at the end doesn't hurt anything, it's convienient to
        be able to rely on it, and not have to check for it
        """
        self._curr_working_dir = FilePath(path) + '/'
        try:
            mkdir(self.WorkingDir)
        except OSError:
            # Directory already exists
            pass
Exemple #9
0
    def _input_as_multiline_string(self, data):
        """Write a multiline string to a temp file and return the filename.

            data: a multiline string to be written to a file.

           * Note: the result will be the filename as a FilePath object
            (which is a string subclass).

        """
        filename = self._input_filename = \
            FilePath(self.getTmpFilename(self.TmpDir))
        data_file = open(filename, 'w')
        data_file.write(data)
        data_file.close()
        return filename
Exemple #10
0
def main():
    option_parser, options, args = parse_command_line_parameters(**script_info)
    DEBUG = options.verbose
    check_options(option_parser, options)
    start_time = time()
    option_lines = format_options_as_lines(options)
    if DEBUG:
        print FORMAT_BAR
        print "Running with options:"
        for line in sorted(option_lines):
            print line
        print FORMAT_BAR

    # because the blast app controller uses absolute paths, make sure subject
    # db path is fully specified

    subject_db = options.subjectdb
    if not subject_db.startswith('/'):
        subject_db = join(getcwd(), subject_db)
    if not options.no_format_db:

        # initialize object
        inpath = FilePath(abspath(options.subjectdb))
        subject_dir, subj_file = split(inpath)

        fdb = FormatDb(WorkingDir=subject_dir)
        # Currently we do not support protein blasts, but
        # this would be easy to add in the future...
        fdb.Parameters['-p'].on('F')

        # Create indices for record lookup
        fdb.Parameters['-o'].on('T')

        # Set input database
        fdb.Parameters['-i'].on(subject_db)

        formatdb_cmd = fdb.BaseCommand

        if DEBUG:
            print "Formatting db with command: %s" % formatdb_cmd

        app_result = fdb(subject_db)
        formatdb_filepaths = []
        for v in app_result.values():
            try:
                formatdb_filepaths.append(v.name)
            except AttributeError:
                # not a file object, so no path to return
                pass

        db_format_time = time() - start_time

        if DEBUG:
            print "Formatting subject db took: %2.f seconds" % db_format_time
            print "formatdb log file written to: %s" % app_result['log']
            print FORMAT_BAR
    else:
        db_format_time = time() - start_time
        formatdb_cmd = "None (formatdb not called)"
        # Check that User-Supplied subjectdb is valid
        db_ext = [".nhr", ".nin", ".nsd", ".nsi", ".nsq"]
        formatdb_filepaths = [subject_db + ext for ext in db_ext]

        if DEBUG:
            print "Checking that pre-existing formatdb files exist and can be read."
            print "Files to be checked:"
            for fp in formatdb_filepaths:
                print fp
            print FORMAT_BAR

        try:
            formatdb_files = [open(db_f, "U") for db_f in formatdb_filepaths]
            [f.close() for f in formatdb_files]
        except IOError:
            if DEBUG:
                print "Cannot open user-supplied database file:", db_f
            option_parser.error(
                """Problem with -d and --no_format_db option combination: Cannot open the following user-supplied database file: %s. Consider running without --no_format_db to let formatdb generate these required files"""
                % db_f)

        if DEBUG:
            print "OK: BLAST Database files exist and can be read."
            print FORMAT_BAR

    # Perform BLAST search
    blast_results, hit_ids, removed_hit_ids = find_homologs(
        options.querydb,
        subject_db,
        options.e_value,
        options.max_hits,
        options.working_dir,
        options.blastmatroot,
        options.wordsize,
        options.percent_aligned,
        DEBUG=DEBUG)

    blast_time = (time() - start_time) - db_format_time

    if DEBUG:
        print "BLAST search took: %2.f minute(s)" % (blast_time / 60.0)
        print FORMAT_BAR

    # Create output folder
    outputdir = options.outputdir
    try:
        makedirs(outputdir)
    except OSError:
        pass

    # Record raw blast results
    raw_blast_results_path = join(outputdir, "raw_blast_results.txt")
    f = open(raw_blast_results_path, 'w')
    f.writelines(blast_results)
    f.close()

    # Record excluded seqs
    excluded_seqs_path = join(outputdir, "matching.fna")
    ids_to_seq_file(hit_ids, options.querydb, excluded_seqs_path, "")

    # Record included (screened) seqs
    included_seqs_path = join(outputdir, "non-matching.fna")
    all_ids = ids_from_fasta_lines(open(options.querydb))
    included_ids = set(all_ids) - hit_ids
    ids_to_seq_file(included_ids, options.querydb, included_seqs_path, "")

    log_lines = compose_logfile_lines(start_time, db_format_time, blast_time,
                                      option_lines, formatdb_cmd,
                                      blast_results, options, all_ids, hit_ids,
                                      removed_hit_ids, included_ids, DEBUG)

    log_path = join(outputdir, "sequence_exclusion.log")
    if DEBUG:
        print "Writing summary to: %s" % log_path

    f = open(log_path, 'w')
    f.writelines(log_lines)
    f.close()

    if not options.no_clean:
        if DEBUG:

            print FORMAT_BAR
            print "|                           Cleanup                        |"
            print FORMAT_BAR

        if not options.no_format_db:
            if options.verbose:
                print "Cleaning up formatdb files:", formatdb_filepaths
            remove_files(formatdb_filepaths)
        else:
            if options.verbose:
                print "Formatdb not run...nothing to clean"
Exemple #11
0
    def __call__(self, data=None, remove_tmp=True):
        """Run the application with the specified kwargs on data

            data: anything that can be cast into a string or written out to
                a file. Usually either a list of things or a single string or
                number. input_handler will be called on this data before it
                is passed as part of the command-line argument, so by creating
                your own input handlers you can customize what kind of data
                you want your application to accept

            remove_tmp: if True, removes tmp files
        """
        input_handler = self.InputHandler
        suppress_stdout = self.SuppressStdout
        suppress_stderr = self.SuppressStderr
        if suppress_stdout:
            outfile = FilePath('/dev/null')
        else:
            outfile = self.getTmpFilename(self.TmpDir)
        if suppress_stderr:
            errfile = FilePath('/dev/null')
        else:
            errfile = FilePath(self.getTmpFilename(self.TmpDir))
        if data is None:
            input_arg = ''
        else:
            input_arg = getattr(self, input_handler)(data)

        # Build up the command, consisting of a BaseCommand followed by
        # input and output (file) specifications
        command = self._command_delimiter.join(filter(None,
                                                      [self.BaseCommand,
                                                       str(input_arg),
                                                       '>', str(outfile),
                                                       '2>', str(errfile)]))
        if self.HaltExec:
            raise AssertionError("Halted exec with command:\n" + command)
        # The return value of system is a 16-bit number containing the signal
        # number that killed the process, and then the exit status.
        # We only want to keep the exit status so do a right bitwise shift to
        # get rid of the signal number byte
        exit_status = system(command) >> 8

        # Determine if error should be raised due to exit status of
        # appliciation
        if not self._accept_exit_status(exit_status):
            raise ApplicationError('Unacceptable application exit ' +
                                   'status: %s\n' % str(exit_status) +
                                   'Command:\n%s\n' % command +
                                   'StdOut:\n%s\n' % open(outfile).read() +
                                   'StdErr:\n%s\n' % open(errfile).read())

        # open the stdout and stderr if not being suppressed
        out = None
        if not suppress_stdout:
            out = open(outfile, "r")
        err = None
        if not suppress_stderr:
            err = open(errfile, "r")

        result_paths = self._get_result_paths(data)
        try:
            result = \
                CommandLineAppResult(out, err, exit_status,
                                     result_paths=result_paths)
        except ApplicationError:
            result = \
                self._handle_app_result_build_failure(out, err, exit_status,
                                                      result_paths)

        # Clean up the input file if one was created
        if remove_tmp:
            if self._input_filename:
                remove(self._input_filename)
                self._input_filename = None

        return result
Exemple #12
0
    def __init__(self, params=None, InputHandler=None, SuppressStderr=None,
                 SuppressStdout=None, WorkingDir=None, TmpDir='/tmp',
                 TmpNameLen=20, HALT_EXEC=False):
        """ Initialize the CommandLineApplication object

            params: a dictionary mapping the Parameter id or synonym to its
                value (or None for FlagParameters or MixedParameters in flag
                mode) for Parameters that should be turned on
            InputHandler: this is the method to be run on data when it is
                passed into call. This should be a string containing the
                method name. The default is _input_as_string which casts data
                to a string before appending it to the command line argument
            SuppressStderr: if set to True, will route standard error to
                /dev/null, False by default
            SuppressStdout: if set to True, will route standard out to
                /dev/null, False by default
            WorkingDir: the directory where you want the application to run,
                default is the current working directory, but is useful to
                change in cases where the program being run creates output
                to its current working directory and you either don't want
                it to end up where you are running the program, or the user
                running the script doesn't have write access to the current
                working directory
                WARNING: WorkingDir MUST be an absolute path!
            TmpDir: the directory where temp files will be created, /tmp
                by default
            TmpNameLen: the length of the temp file name
            HALT_EXEC: if True, raises exception w/ command output just
            before execution, doesn't clean up temp files. Default False.
        """
        # Determine if the application is installed, and raise an error if not
        self._error_on_missing_application(params)

        # set attributes to parameter that was passed in or class default
        if InputHandler is not None:
            self.InputHandler = InputHandler
        else:
            self.InputHandler = self._input_handler
        if SuppressStderr is not None:
            self.SuppressStderr = SuppressStderr
        else:
            self.SuppressStderr = self._suppress_stderr
        if SuppressStdout is not None:
            self.SuppressStdout = SuppressStdout
        else:
            self.SuppressStdout = self._suppress_stdout
        if WorkingDir is not None:
            working_dir = WorkingDir
        else:
            working_dir = self._working_dir or getcwd()
        self.WorkingDir = FilePath(working_dir)
        self.TmpDir = FilePath(TmpDir)
        self.TmpNameLen = TmpNameLen
        self.HaltExec = HALT_EXEC

        # create a variable to hold the name of the file being used as
        # input to the application. this is important especially when
        # you are using an input handler which creates a temporary file
        # and the output filenames are based on the input filenames
        self._input_filename = None

        super(CommandLineApplication, self).__init__(params=params)
Exemple #13
0
 def test_str_path_is_None(self):
     """FilePath: str return empty string when path is None """
     self.assertEqual(str(FilePath(None)), '')
Exemple #14
0
 def test_init(self):
     """FilePath: initialization returns w/o error """
     for p in self.all_paths:
         self.assertEqual(FilePath(p), p)
     self.assertEqual(FilePath(''), '')