Ejemplo n.º 1
0
 def test_realpath(self):
     """Test real_upath and real_ppath."""
     print real_upath("ref with space")
     self.assertTrue(real_upath("ref with space").endswith("ref\ with\ space"))
     self.assertTrue(real_upath("ref\ with\ space").endswith("ref\ with\ space"))
     self.assertTrue(real_ppath("ref with space").endswith("ref with space"))
     self.assertTrue(real_ppath("ref\ with\ space").endswith("ref with space"))
Ejemplo n.º 2
0
 def test_realpath(self):
     """Test real_upath and real_ppath."""
     print real_upath("ref with space")
     self.assertTrue(
         real_upath("ref with space").endswith("ref\ with\ space"))
     self.assertTrue(
         real_upath("ref\ with\ space").endswith("ref\ with\ space"))
     self.assertTrue(
         real_ppath("ref with space").endswith("ref with space"))
     self.assertTrue(
         real_ppath("ref\ with\ space").endswith("ref with space"))
Ejemplo n.º 3
0
    def _resolveAlgorithmOptions(self, options, fileNames):
        """ Resolve options specified within --algorithmOptions with
            options parsed from the command-line or the config file.
            Return updated options.
            If find conflicting values of the following options, error out.
               (1) --maxHits       and blasr -bestn
               (2) --maxAnchorSize and blasr -minMatch
               (3) --useccs        and blasr -useccs/-useccsall/useccsdenovo
            If find conflicting values of sawriter, regionTable and nproc,
            it does not matter which value is used.
            Input:
                options  : the original pbalign options from argumentList
                           and configFile.
                fileNames: an PBAlignFiles object
            Output:
                new options by resolving options specified within
                --algorithmOptions and the original pbalign options
        """
        if options.algorithmOptions is None:
            return options

        ignoredBinaryOptions = ['-m', '-out', '-V']
        ignoredUnitaryOptions = [
            '-h', '--help', '--version', '-v', '-vv', '--sam', '--bam'
        ]

        items = self.__parseAlgorithmOptionItems(options.algorithmOptions)
        i = 0
        try:
            while i < len(items):
                infoMsg, errMsg, item = "", "", items[i]
                if item == "--sa":
                    val = real_upath(items[i + 1])
                    if fileNames.sawriterFileName != val:
                        infoMsg = "Over write sa file with {0}".format(val)
                        fileNames.sawriterFileName = val
                elif item == "--regionTable":
                    val = real_upath(items[i + 1])
                    if fileNames.regionTable != val:
                        infoMsg = "Over write regionTable with {0}.\n"\
                                  .format(val)
                        fileNames.regionTable = val
                elif item == "--bestn":
                    val = int(items[i + 1])
                    if options.maxHits is not None and \
                            int(options.maxHits) != val:
                        errMsg = "blasr --bestn specified within " + \
                                 "--algorithmOptions is equivalent to " + \
                                 "--maxHits. Conflicting values of " + \
                                 "--algorithmOptions '--bestn' and " +\
                                 "--maxHits have been found."
                    else:
                        options.maxHits = val
                elif item == "--minMatch":
                    val = int(items[i + 1])
                    if options.minAnchorSize is not None and \
                            int(options.minAnchorSize) != val:
                        errMsg = "blasr --minMatch specified within " + \
                                 "--algorithmOptions is equivalent to " + \
                                 "--minAnchorSize. Conflicting values " + \
                                 "of --algorithmOptions '--minMatch' and " + \
                                 "--minAnchorSize have been found."
                    else:
                        options.minAnchorSize = val
                elif item == "--nproc":
                    val = int(items[i + 1])
                    # The number of threads is not critical.
                    if options.nproc is None or \
                            int(options.nproc) != val:
                        infoMsg = "Over write nproc with {n}.".format(n=val)
                        options.nproc = val
                elif item == "--noSplitSubreads":
                    if not options.noSplitSubreads:
                        infoMsg = "Over write noSplitSubreads with True."
                        logging.info(self.name +
                                     ": Resolve algorithmOptions. " + infoMsg)
                        options.noSplitSubreads = True
                    del items[i]
                    continue
                elif item == "--concordant":
                    if not options.concordant:
                        infoMsg = "Over writer concordant with True."
                        logging.info(self.name +
                                     ": Resolve algorithmOptions. " + infoMsg)
                        options.concordant = True
                    del items[i]
                elif "--useccs" in item:  # -useccs, -useccsall, -useccsdenovo
                    val = item.lstrip('--')
                    if options.useccs != val and options.useccs is not None:
                        errMsg = "Found conflicting options in " + \
                            "--algorithmOptions '{v}' \nand --useccs={u}"\
                            .format(v=item, u=options.useccs)
                    else:
                        options.useccs = val
                elif item == "--unaligned":
                    val = str(items[i + 1])
                    options.unaligned = val
                elif item == "--seed" or item == "--randomSeed":
                    val = int(items[i + 1])
                    if options.seed is None or int(options.seed) != val:
                        infoMsg = "Overwrite random seed with {0}.".format(val)
                        options.seed = val
                elif item in ignoredBinaryOptions:
                    pass
                elif item in ignoredUnitaryOptions:
                    del items[i:i + 1]
                    continue
                else:
                    i += 1
                    continue

                if errMsg != "":
                    logging.error(errMsg)
                    raise ValueError(errMsg)

                if infoMsg != "":
                    logging.info(self.name + ": Resolve algorithmOptions. " +
                                 infoMsg)

                del items[i:i + 2]
        except Exception as e:
            errMsg = "An error occured during parsing algorithmOptions " + \
                     "'{ao}': ".format(ao=options.algorithmOptions)
            logging.error(errMsg + str(e))
            raise ValueError(errMsg + str(e))

        # Existing suffix array always uses match size 8.
        # When BLASR search option -minMatch is less than 8, suffix array needs
        # to be created on the fly.
        if (options.minAnchorSize is not None and options.minAnchorSize != ""
                and int(options.minAnchorSize) < 8):
            logging.warning("Suffix array must be recreated on the fly when " +
                            "minMatch < 8, which may take a long time.")
            fileNames.sawriterFileName = None

        # Update algorithmOptions when resolve is done
        options.algorithmOptions = " ".join(items)
        return options
Ejemplo n.º 4
0
    def _resolveAlgorithmOptions(self, options, fileNames):
        """ Resolve options specified within --algorithmOptions with
            options parsed from the command-line or the config file.
            Return updated options.
            If find conflicting values of the following options, error out.
               (1) --maxHits       and blasr -bestn
               (2) --maxAnchorSize and blasr -minMatch
               (3) --useccs        and blasr -useccs/-useccsall/useccsdenovo
            If find conflicting values of sawriter, regionTable and nproc,
            it does not matter which value is used.
            Input:
                options  : the original pbalign options from argumentList
                           and configFile.
                fileNames: an PBAlignFiles object
            Output:
                new options by resolving options specified within
                --algorithmOptions and the original pbalign options
        """
        if options.algorithmOptions is None:
            return options

        ignoredBinaryOptions = ["-m", "-out", "-V"]
        ignoredUnitaryOptions = ["-h", "--help", "--version", "-v", "-vv", "-sam"]

        items = self.__parseAlgorithmOptionItems(options.algorithmOptions)
        i = 0
        try:
            while i < len(items):
                infoMsg, errMsg, item = "", "", items[i]
                if item == "-sa":
                    val = real_upath(items[i + 1])
                    if fileNames.sawriterFileName != val:
                        infoMsg = "Over write sa file with {0}".format(val)
                        fileNames.sawriterFileName = val
                elif item == "-regionTable":
                    val = real_upath(items[i + 1])
                    if fileNames.regionTable != val:
                        infoMsg = "Over write regionTable with {0}.\n".format(val)
                        fileNames.regionTable = val
                elif item == "-bestn":
                    val = int(items[i + 1])
                    if options.maxHits is not None and int(options.maxHits) != val:
                        errMsg = (
                            "blasr -bestn specified within "
                            + "--algorithmOptions is equivalent to "
                            + "--maxHits. Conflicting values of "
                            + "--algorithmOptions '-bestn' and "
                            + "--maxHits have been found."
                        )
                    else:
                        options.maxHits = val
                elif item == "-minMatch":
                    val = int(items[i + 1])
                    if options.minAnchorSize is not None and int(options.minAnchorSize) != val:
                        errMsg = (
                            "blasr -minMatch specified within "
                            + "--algorithmOptions is equivalent to "
                            + "--minAnchorSize. Conflicting values "
                            + "of --algorithmOptions '-minMatch' and "
                            + "--minAnchorSize have been found."
                        )
                    else:
                        options.minAnchorSize = val
                elif item == "-nproc":
                    val = int(items[i + 1])
                    # The number of threads is not critical.
                    if options.nproc is None or int(options.nproc) != val:
                        infoMsg = "Over write nproc with {n}.".format(n=val)
                        options.nproc = val
                elif item == "-noSplitSubreads":
                    if not options.noSplitSubreads:
                        infoMsg = "Over write noSplitSubreads with True."
                        logging.info(self.name + ": Resolve algorithmOptions. " + infoMsg)
                        options.noSplitSubreads = True
                    del items[i]
                    continue
                elif item == "-concordant":
                    if not options.concordant:
                        infoMsg = "Over writer concordant with True."
                        logging.info(self.name + ": Resolve algorithmOptions. " + infoMsg)
                        options.concordant = True
                    del items[i]
                elif "-useccs" in item:  # -useccs, -useccsall, -useccsdenovo
                    val = item.lstrip("-")
                    if options.useccs != val and options.useccs is not None:
                        errMsg = "Found conflicting options in " + "--algorithmOptions '{v}' \nand --useccs={u}".format(
                            v=item, u=options.useccs
                        )
                    else:
                        options.useccs = val
                elif item == "-seed" or item == "-randomSeed":
                    val = int(items[i + 1])
                    if options.seed is None or int(options.seed) != val:
                        infoMsg = "Overwrite random seed with {0}.".format(val)
                        options.seed = val
                elif item in ignoredBinaryOptions:
                    pass
                elif item in ignoredUnitaryOptions:
                    del items[i : i + 1]
                    continue
                else:
                    i += 1
                    continue

                if errMsg != "":
                    logging.error(errMsg)
                    raise ValueError(errMsg)

                if infoMsg != "":
                    logging.info(self.name + ": Resolve algorithmOptions. " + infoMsg)

                del items[i : i + 2]
        except Exception as e:
            errMsg = "An error occured during parsing algorithmOptions " + "'{ao}': ".format(
                ao=options.algorithmOptions
            )
            logging.error(errMsg + str(e))
            raise ValueError(errMsg + str(e))

        # Update algorithmOptions when resolve is done
        options.algorithmOptions = " ".join(items)
        return options
Ejemplo n.º 5
0
    def _resolveAlgorithmOptions(self, options, fileNames):
        """ Resolve options specified within --algorithmOptions with
            options parsed from the command-line or the config file.
            Return updated options.
            If find conflicting values of the following options, error out.
               (1) --maxHits       and blasr -bestn
               (2) --maxAnchorSize and blasr -minMatch
               (3) --useccs        and blasr -useccs/-useccsall/useccsdenovo
            If find conflicting values of sawriter, regionTable and nproc,
            it does not matter which value is used.
            Input:
                options  : the original pbalign options from argumentList
                           and configFile.
                fileNames: an PBAlignFiles object
            Output:
                new options by resolving options specified within
                --algorithmOptions and the original pbalign options
        """
        if options.algorithmOptions is None:
            return options

        ignoredBinaryOptions = ['-m', '-out', '-V']
        ignoredUnitaryOptions = ['-h', '--help', '--version',
                                 '-v', '-vv', '--sam', '--bam']

        items = self.__parseAlgorithmOptionItems(options.algorithmOptions)
        i = 0
        try:
            while i < len(items):
                infoMsg, errMsg, item = "", "", items[i]
                if item == "--sa":
                    val = real_upath(items[i+1])
                    if fileNames.sawriterFileName != val:
                        infoMsg = "Over write sa file with {0}".format(val)
                        fileNames.sawriterFileName = val
                elif item == "--regionTable":
                    val = real_upath(items[i+1])
                    if fileNames.regionTable != val:
                        infoMsg = "Over write regionTable with {0}.\n"\
                                  .format(val)
                        fileNames.regionTable = val
                elif item == "--bestn":
                    val = int(items[i+1])
                    if options.maxHits is not None and \
                            int(options.maxHits) != val:
                        errMsg = "blasr --bestn specified within " + \
                                 "--algorithmOptions is equivalent to " + \
                                 "--maxHits. Conflicting values of " + \
                                 "--algorithmOptions '--bestn' and " +\
                                 "--maxHits have been found."
                    else:
                        options.maxHits = val
                elif item == "--minMatch":
                    val = int(items[i+1])
                    if options.minAnchorSize is not None and \
                            int(options.minAnchorSize) != val:
                        errMsg = "blasr --minMatch specified within " + \
                                 "--algorithmOptions is equivalent to " + \
                                 "--minAnchorSize. Conflicting values " + \
                                 "of --algorithmOptions '--minMatch' and " + \
                                 "--minAnchorSize have been found."
                    else:
                        options.minAnchorSize = val
                elif item == "--maxMatch":
                    val = int(items[i+1])
                    if options.maxMatch is not None and \
                            int(options.maxMatch) != val:
                        infoMsg = "Override maxMatch with {n}.".format(n=val)
                        options.maxMatch = val
                elif item == "--nproc":
                    val = int(items[i+1])
                    # The number of threads is not critical.
                    if options.nproc is None or \
                            int(options.nproc) != val:
                        infoMsg = "Over write nproc with {n}.".format(n=val)
                        options.nproc = val
                elif item == "--noSplitSubreads":
                    if not options.noSplitSubreads:
                        infoMsg = "Over write noSplitSubreads with True."
                        logging.info(self.name +
                                     ": Resolve algorithmOptions. " + infoMsg)
                        options.noSplitSubreads = True
                    del items[i]
                    continue
                elif item == "--concordant":
                    if not options.concordant:
                        infoMsg = "Over writer concordant with True."
                        logging.info(self.name +
                                     ": Resolve algorithmOptions. " + infoMsg)
                        options.concordant = True
                    del items[i]
                elif "--useccs" in item:  # -useccs, -useccsall, -useccsdenovo
                    val = item.lstrip('--')
                    if options.useccs != val and options.useccs is not None:
                        errMsg = "Found conflicting options in " + \
                            "--algorithmOptions '{v}' \nand --useccs={u}"\
                            .format(v=item, u=options.useccs)
                    else:
                        options.useccs = val
                elif item == "--unaligned":
                    val = str(items[i+1])
                    options.unaligned = val
                elif item == "--seed" or item == "--randomSeed":
                    val = int(items[i+1])
                    if options.seed is None or int(options.seed) != val:
                        infoMsg = "Overwrite random seed with {0}.".format(val)
                        options.seed = val
                elif item in ignoredBinaryOptions:
                    pass
                elif item in ignoredUnitaryOptions:
                    del items[i:i+1]
                    continue
                else:
                    i += 1
                    continue

                if errMsg != "":
                    logging.error(errMsg)
                    raise ValueError(errMsg)

                if infoMsg != "":
                    logging.info(self.name + ": Resolve algorithmOptions. " +
                                 infoMsg)

                del items[i:i+2]
        except Exception as e:
            errMsg = "An error occured during parsing algorithmOptions " + \
                     "'{ao}': ".format(ao=options.algorithmOptions)
            logging.error(errMsg + str(e))
            raise ValueError(errMsg + str(e))

        # Existing suffix array always uses match size 8.
        # When BLASR search option -minMatch is less than 8, suffix array needs
        # to be created on the fly.
        if (options.minAnchorSize is not None and options.minAnchorSize != "" and
            int(options.minAnchorSize) < 8):
            logging.warning("Suffix array must be recreated on the fly when " +
                            "minMatch < 8, which may take a long time.")
            fileNames.sawriterFileName = None

        # Update algorithmOptions when resolve is done
        options.algorithmOptions = " ".join(items)
        return options