Beispiel #1
0
    def test_init(self):
        """Test TempFileManager all functions."""
        t = TempFileManager()
        t.SetRootDir("/scratch")

        newFN = t.RegisterNewTmpFile()
        self.assertTrue(path.isfile(newFN))

        existingDir = t.RegisterExistingTmpFile("/tmp", isDir=True)
        self.assertTrue(path.isdir(existingDir))

        with self.assertRaises(IOError) as cm:
            t.RegisterExistingTmpFile("filethatdoesnotexist")

        newDN = t.RegisterNewTmpFile(isDir=True)
        self.assertTrue(path.isdir(newDN))
        self.assertTrue(t._isRegistered(newDN))

        newTxt = t.RegisterNewTmpFile(suffix=".txt")
        self.assertTrue(newTxt.endswith(".txt"))

        t.SetRootDir("~/tmp/")

        t.CleanUp()
        self.assertFalse(path.exists(newFN))
        self.assertFalse(path.exists(newDN))
        self.assertEqual(t.fileDB, [])
        self.assertEqual(t.dirDB, [])
Beispiel #2
0
class AlignService(Service):
    """Super class for all alignment services.

        AlignService takes argument options as input and generates a SAM file
        as output.
        Non-abstract subclasses should define the following properties.
            name        : name of the subclass align service
            availability: availability of the subclass align service
            scoreSign   : score sign of the subclass align service
        Subclasses should override the following virtual methods.
            _preProcess :
            _toCmd()
            _postProcesss()
        If --algorithmOptions needs to be supported by a subclass, override
            _resolveAlgorithmOptions().

    """
    @property
    def scoreSign(self):
        """Align service score sign can be -1 or 1.
           -1: negative scores are better than positive ones.
           1: positive scores are better than negative ones.
        """
        raise NotImplementedError(
            "Virtual property scoreSign() for AlignService must be " +
            "overwritten.")

    def _resolveAlgorithmOptions(self, options, fileNames):
        """A virtual method to resolve options specified within
            --algorithmOptions and options parsed from the command-line
            (including the config file).
            Input:
                options: options parsed from a command-line and a config file.
                fileNames: an PBAlignFiles object.
            Output: new options
        """
        if options.algorithmOptions is None or options.algorithmOptions == "":
            return copy(options)

        raise NotImplementedError(
            "_resolveAlgorithmOptions() method for AlignService must be " +
            "overridden if --algorithmOptions is specified.")

    def __init__(self, options, fileNames, tempFileManager=None):
        """Initialize an AlignSerivce object.
            Need to resolve options specified within algorithmOptions;
                    patch default options if not specified by the user
                    inherit or initialize a tempory file manager
            Input:
                options        : options parsed from (a list of arguments and
                                 a config file if --configFile is specified).
                fileNames      : an object of PBAlignFiles
                tempFileManager: a temporary file manager. If it is None,
                                 create a new temporary file manager.
        """
        self._options = options

        # Verify and assign input & output files.
        self._fileNames = fileNames
        self._fileNames.SetInOutFiles(self._options.inputFileName,
                                      self._options.referencePath,
                                      self._options.outputFileName,
                                      self._options.regionTable,
                                      self._options.pulseFile)

        # Resolve options specified within --algorithmOptions with
        # options parsed from the argument list (e.g. the command-line)
        # or a config file.
        self._options = self._resolveAlgorithmOptions(self._options,
                                                      self._fileNames)

        # Patch PBalign default options if they havn't been specified yet.
        self._options = importDefaultOptions(self._options)[0]

        if tempFileManager is None:
            self._tempFileManager = TempFileManager(self._options.tmpDir)
        else:
            self._tempFileManager = tempFileManager
            self._tempFileManager.SetRootDir(self._options.tmpDir)
        # self.args is finalized.
        logging.debug("Parsed arguments considering configFile and " +
                      "algorithmOptions: " + str(self._options))

    @property
    def cmd(self):
        """String of a command line to align reads."""
        return self._toCmd(self._options, self._fileNames,
                           self._tempFileManager)

    def _toCmd(self, options, fileNames, tempFileManager):
        """A virtual method to generate a command line string.

        Generate a command line of the aligner to use in bash based on
        options and PBAlignFiles.
            Input:
                options  : arguments parsed from the command-line, the
                           config file and --algorithmOptions.
                fileNames: an PBAlignFiles object.
                tempFileManager: temporary file manager.
            Output:
                a command-line string which can be used in bash.

        """
        raise NotImplementedError(
            "_toCmd() method for AlignService must be overridden")

    def _preProcess(self, inputFileName, referenceFile, regionTable,
                    noSplitSubreads, tempFileManager, isWithinRepository):
        """A virtual method to prepare inputs for the aligner.

           Input:
                inputFileName  : a PacBio BASE/PULSE/FOFN file.
                referenceFile  : a FASTA reference file.
                regionTable    : a region table RGN.H5/FOFN file.
                noSplitSubreads: whether to split subreads or not.
                tempFileManager: temporary file manager.
                isWithinRepository: whether or not the reference is within
                    a refererence repository.
            Output:
                String, a FASTA file which can be used by the aligner.

        """
        raise NotImplementedError(
            "_preProcess() method for AlignService must be overridden")

    def _postProcess(self):
        """A virtual method to post process the generated output file. """
        raise NotImplementedError(
            "_postProcess() method for AlignService must be overridden")

    def run(self):
        """AlignService starts to run. """
        logging.info(self.name + ": Align reads to references using " +
                     "{prog}.".format(prog=self.progName))
        # Prepare inputs for the aligner.
        self._fileNames.queryFileName = self._preProcess(
            self._fileNames.inputFileName, self._fileNames.targetFileName,
            self._fileNames.regionTable, self._options.noSplitSubreads,
            self._tempFileManager, self._fileNames.isWithinRepository)

        outFormat = getFileFormat(self._fileNames.outputFileName)
        suffix = ".bam" if (outFormat == FILE_FORMATS.BAM
                            or outFormat == FILE_FORMATS.XML) else ".sam"
        self._fileNames.alignerSamOut = self._tempFileManager.\
            RegisterNewTmpFile(suffix=suffix)

        # Generate and execute cmd.
        try:
            output, errCode, errMsg = self._execute()
        except RuntimeError as e:
            raise RuntimeError(str(e))

        # Post process the results.
        self._postProcess()

        return output, errCode, errMsg