def evalConfigFunc(configPyFile,
                   getFuncName="getConfig",
                   getFuncArgs=[],
                   getFuncKwargs={},
                   extraEnv=None):
    """Evaluate the specified configuration file and call the specified
    function (defaulting to getConfig()) define in the file.  The value of the
    call of this function is returned. This is useful for config files that
    need to construct complex objects.  Arguments and keyword arguments can be
    passed to this functions getFuncArgs, and getFuncKwargs.  The first
    argument is configPyFile, containing the configuration file name, which is
    also is set in the module globals before evaluation.  If specified, the
    dict extraEnv contents will be passed as a module globals.
    """
    configEnv = _evalConfigFile(configPyFile,
                                configEnv=dict(),
                                extraEnv=extraEnv)
    configFunc = configEnv.get(getFuncName)
    if configFunc is None:
        raise PycbioException(
            "configuration script does not define function {}(): {} ".format(
                getFuncName, configPyFile))
    if not isinstance(configFunc, FunctionType):
        raise PycbioException(
            "configuration script defines {}, however it is not a function: {}"
            .format(getFuncName, configPyFile))
    getFuncArgs = [configPyFile] + list(getFuncArgs)
    try:
        return configFunc(*getFuncArgs, **getFuncKwargs)
    except Exception as ex:
        # FIXME really need traceback here
        raise PycbioException(
            "Error from configuration function {}(): {}".format(
                getFuncName, configPyFile)) from ex
Beispiel #2
0
 def __init__(self, procDesc, returncode=None, stderr=None, cause=None):
     self.returncode = returncode
     self.stderr = stderr
     if returncode == None:
         msg = "exec failed"
     elif (returncode < 0):
         msg = "process signaled: " + _getSigName(-returncode)
     else:
         msg = "process exited " + str(returncode)
     if procDesc != None:
         msg += ": " + procDesc
     if (stderr != None) and (len(stderr) != 0):
         msg += ":\n" + stderr
     PycbioException.__init__(self, msg, cause=cause)
Beispiel #3
0
 def __init__(self, procDesc, returncode=None, stderr=None, cause=None):
     self.returncode = returncode
     self.stderr = stderr
     if returncode is None:
         msg = "exec failed"
     elif (returncode < 0):
         msg = "process signaled: " + _getSigName(-returncode)
     else:
         msg = "process exited " + str(returncode)
     if procDesc is not None:
         msg += ": " + procDesc
     if (stderr is not None) and (len(stderr) != 0):
         msg += ":\n" + stderr
     PycbioException.__init__(self, msg, cause=cause)
    def _calcParams(self):
        "Calculate binning paramters"
        self.data.compute()
        self.binMinUse = self.binMin
        if self.binMinUse is None:
            self.binMinUse = self.data.min

        self.binMaxUse = self.binMax
        if self.binMaxUse is None:
            self.binMaxUse = self.data.max

        self.numBinsUse = self.numBins
        self.binSizeUse = self.binSize
        if (self.numBinsUse is None) and (self.binSizeUse is None):
            # default num bins and compute bin size from it below
            self.numBinsUse = 10

        if self.binMinUse is None:
            self.binSizeUse = self.binFloorUse = self.binCeilUse = 0
        elif self.binSizeUse is None:
            # compute bin size from num bins
            estBinSize = (self.binMaxUse - self.binMinUse) / (self.numBinsUse -
                                                              1)
            self.binSizeUse = (self.binMaxUse - self.binMinUse +
                               estBinSize) / self.numBinsUse
            self.binFloorUse = self.binMinUse - (self.binSizeUse / 2.0)
            self.binCeilUse = self.binFloorUse + (self.numBinsUse *
                                                  self.binSizeUse)
        else:
            # compute num bins from bin size
            raise PycbioException("doesn't work")
            self.numBinsUse = (self.binMaxUse -
                               self.binMinUse) // self.binSizeUse
            self.binFloorUse = self.binMinUse
            self.binCeilUse = self.binMaxUse
Beispiel #5
0
 def __skipToSeqTable(self, fh):
     "skip past header line before sequence records"
     for line in fh:
         if line[0:-1] == self.expectedHeader:
             return
     raise PycbioException("expected assembly report header not found in " +
                           fh.name)
Beispiel #6
0
 def _parseLine(self, line):
     line = line.strip()
     if (len(line) > 0) and not line.startswith("#"):
         i = line.find("=")
         if i < 0:
             raise PycbioException("expected name=value, got: " + line)
         self[line[0:i].strip()] = line[i + 1:].strip()
Beispiel #7
0
 def getByName(self, name):
     "get or error if not found"
     seq = self.get(name)
     if seq is None:
         raise PycbioException("can't find sequence {} in table {}".format(
             name, self.table))
     return seq
 def getrByTranscriptId(self, transcriptId):
     """get the required GencodeAttrs object for transcriptId, or error if not found."""
     attrs = self.getByTranscriptId(transcriptId)
     if attrs is None:
         PycbioException("transcriptId {} not found in {}".format(
             transcriptId, self.table))
     return attrs
 def getrByGeneId(self, geneId):
     """get required GencodeAttrs objects for geneId or error if not found"""
     attrses = self.getByGeneId(geneId)
     if len(attrses) == 0:
         PycbioException("geneId {} not found in {}".format(
             geneId, self.table))
     return attrses
    def __init__(self, lines):
        "parse file given output lines of para check"
        # simple parse
        self.unsubmitted = 0
        self.subErrors = 0
        self.queueErrors = 0
        self.trackingErrors = 0
        self.waiting = 0
        self.crashed = 0
        self.running = 0
        self.ranOk = 0
        self.totalJobs = 0
        # special cases
        self.paraResultsErrors = 0
        self.slow = 0
        self.hung = 0
        self.failed = 0

        # parse lines, skiping empty lines
        for line in lines:
            line = line.strip()
            if len(line) > 0:
                words = line.split(":")
                if (len(words) < 2) or not self._parseLine(words):
                    raise PycbioException("don't know how to parse para check output line: {}".format(line))
 def _parseCds(self, line):
     m = self._parseRe.match(line)
     if m is None:
         raise PycbioException("can't parse CDS line: " + line)
     st = int(m.group(2)) - 1
     en = int(m.group(3)) - 1
     self[m.group(1)] = (st, en)
Beispiel #12
0
 def getSubset(self, wantSet):
     "search for the specified subset object, error if it doesn't exist"
     if self.subsets is None:
         self.subsets = self._makeSubsets(self.elements)
     for ss in self.subsets:
         if ss == wantSet:
             return ss
     raise PycbioException("not a valid subset: " + str(wantSet))
 def getSubseq(self, seq):
     "find the corresponding subSeq array"
     if seq == self.qSeq:
         return self.qSubSeqs
     elif seq == self.tSeq:
         return self.tSubSeqs
     else:
         raise PycbioException("seq is not part of this alignment")
Beispiel #14
0
 def parse(cls, row, numStdCols=None):
     """Parse bed string columns into a bed object.  If self.numStdCols
     is specified, only those columns are parse and the remained goes
     to extraCols."""
     assert ((numStdCols is None) or (3 <= numStdCols <= 12))
     if numStdCols is None:
         numStdCols = min(len(row), 12)
     if len(row) < numStdCols:
         raise PycbioException(
             "expected at least {} columns, found {}: ".format(
                 numStdCols, len(row)))
     chrom = row[0]
     chromStart = int(row[1])
     chromEnd = int(row[2])
     if numStdCols > 3:
         name = row[3]
     else:
         name = None
     if numStdCols > 4:
         score = int(row[4])
     else:
         score = None
     if numStdCols > 5:
         strand = row[5]
     else:
         strand = None
     if numStdCols > 7:
         thickStart = int(row[6])
         thickEnd = int(row[7])
     else:
         thickStart = None
         thickEnd = None
     if numStdCols > 8:
         itemRgb = row[8]
     else:
         itemRgb = None
     if numStdCols > 11:
         blocks = Bed._parseBlockColumns(chromStart, row)
     else:
         blocks = None
     if len(row) > numStdCols:
         extraCols = row[numStdCols:]
     else:
         extraCols = None
     return cls(chrom,
                chromStart,
                chromEnd,
                name=name,
                score=score,
                strand=strand,
                thickStart=thickStart,
                thickEnd=thickEnd,
                itemRgb=itemRgb,
                blocks=blocks,
                extraCols=extraCols,
                numStdCols=numStdCols)
def reverseStrand(strand):
    "get reverse strand, or None if none"
    if strand is None:
        return None
    elif strand == '+':
        return '-'
    elif strand == '-':
        return '+'
    else:
        raise PycbioException("invalid strand '{}'".format(strand))
def compressCmd(path, default="cat"):
    """return the command to compress the path, or default if not compressed, which defaults
    to the `cat' command, so that it just gets written through"""
    if path.endswith(".Z"):
        raise PycbioException("writing compress .Z files not supported")
    elif path.endswith(".gz"):
        return "gzip"
    elif path.endswith(".bz2"):
        return "bzip2"
    else:
        return default
Beispiel #17
0
 def setup(self, opts):
     """initializing profiling, if requested"""
     if opts.profile is None:
         if opts.signal is not None:
             raise PycbioException("can't specify --profile-signal without --profile")
     else:
         if opts.signal is not None:
             self._setupSignalHandler(opts.signal)
         self.logFile = opts.profile
         self.profiler = cProfile.Profile()
         self.profiler.enable()
Beispiel #18
0
 def fromPhase(phase):
     """construct a Frame from a GFF/GTF like phase, which maybe an int or str"""
     if isinstance(phase, str):
         phase = int(phase)
     if phase == 0:
         return Frame(0)
     elif phase == 1:
         return Frame(2)
     elif phase == 2:
         return Frame(1)
     else:
         raise PycbioException("invalid phase: {}".format(phase))
def findTmpDir(tmpDir=None):
    """find the temporary directory to use, if tmpDir is not None, it is use"""
    if tmpDir is not None:
        return tmpDir
    tmpDir = os.getenv("TMPDIR")
    if tmpDir is not None:
        return tmpDir
    # UCSC special checks
    for tmpDir in ("/data/tmp", "/scratch/tmp", "/var/tmp", "/tmp"):
        if os.path.exists(tmpDir):
            return tmpDir
    raise PycbioException("can't find a tmp directory")
Beispiel #20
0
 def add(self, seqId, start, end, value, strand=None):
     "add an entry for a sequence and range, and optional strand"
     self._checkStrand(strand)
     if self.haveStrand is None:
         self.haveStrand = (strand is not None)
     elif self.haveStrand != (strand is not None):
         raise PycbioException("all RangeFinder entries must either have strand or not have strand")
     key = (seqId, strand)
     bins = self.seqBins.get(key)
     if bins is None:
         self.seqBins[key] = bins = RangeBins(seqId, strand)
     bins.add(start, end, value)
def _evalConfigFile(configPyFile, configEnv, extraEnv=None):
    "evaluate file and return environment"
    configEnv[configPyFileVar] = os.path.abspath(configPyFile)
    configEnv[include_config.__name__] = include_config
    if extraEnv is not None:
        configEnv.update(extraEnv)
    try:
        with open(configPyFile) as fh:
            exec(fh.read(), configEnv, configEnv)
    except Exception as ex:
        raise PycbioException("Error evaluating configuration file: {}".format(
            configPyFile)) from ex
    return configEnv
 def __init__(self, fileName, buildIdx=False, buildUniqIdx=False, buildRangeIdx=False):
     if buildIdx and buildUniqIdx:
         raise PycbioException("can't specify both buildIdx and buildUniqIdx")
     for row in GenePredReader(fileName):
         self.append(row)
     self.names = None
     self.rangeMap = None
     if buildUniqIdx:
         self._buildUniqIdx()
     if buildIdx:
         self._buildIdx()
     if buildRangeIdx:
         self._buildRangeIdx()
def opengz(fileName, mode="r", buffering=-1, encoding=None, errors=None):
    """open a file, if it ends in an extension indicating compression, open
    with a compression or decompression pipe."""
    if isCompressed(fileName):
        if mode.startswith("r"):
            cmd = decompressCmd(fileName)
            return pipettor.Popen([cmd, fileName], mode=mode, buffering=buffering, encoding=encoding, errors=errors)
        elif mode.startswith("w"):
            cmd = compressCmd(fileName)
            return pipettor.Popen([cmd], mode=mode, stdout=fileName, buffering=buffering, encoding=encoding, errors=errors)
        else:
            raise PycbioException("mode {} not support with compression for {}".format(mode, fileName))
    else:
        return open(fileName, mode, buffering=buffering, encoding=encoding, errors=errors)
Beispiel #24
0
 def _fifoMk(suffix="tmp", tmpDir=None):
     "create a FIFO with a unique name in tmp directory"
     # FIXME: don't need suffix/tmpDir, unless this made of part the Fifo API
     if tmpDir is None:
         tmpDir = os.getenv("TMPDIR", "/var/tmp")
     prefix = "{}/{}.{}".format(tmpDir, socket.gethostname(), os.getpid())
     maxTries = 1000
     unum = 0
     while unum < maxTries:
         path = "{}.{}.{}".format(prefix, unum, suffix)
         if _NamedFifo._fifoMkAtomic(path):
             return path
         unum += 1
     raise PycbioException("unable to create a unique FIFO name in the form \"{}.*.{} after {} tries".format(prefix, suffix, maxTries))
Beispiel #25
0
 def __parseRecord(self, fh, line):
     row = line.split('\t')
     if len(row) != 10:
         raise PycbioException(
             "expected 10 columns in assemble report record, found " +
             str(len(row)) + " in " + fh.name)
     rec = self.Record(row[0], row[1], row[2], row[3], row[4], row[5],
                       row[6], row[7], int(row[8]), row[9])
     self.seqs.append(rec)
     self.bySequenceName[rec.sequenceName] = rec
     if rec.genBankAccn != None:
         self.byGenBankAccn[rec.genBankAccn] = rec
     if rec.refSeqAccn != None:
         self.byRefSeqAccn[rec.refSeqAccn] = rec
     if rec.ucscStyleName != None:
         self.byUcscStyleName[rec.ucscStyleName] = rec
 def __init__(self, paraHost, runDir, paraDir, jobFile=None, cpu=None, mem=None, maxJobs=None, retries=None):
     """"will chdir to run dir.. paraDir should be relative
     to runDir or absolute, jobFile should be relative to runDir
     or absolute.
     """
     self.paraHost = paraHost
     # symlinks can confuse parasol, as it can give two different names for a job.
     self.runDir = os.path.realpath(os.path.abspath(runDir))
     self.paraDir = os.path.realpath(paraDir)
     self.jobFile = jobFile
     self.cpu = cpu
     self.mem = mem
     self.maxJobs = maxJobs
     self.retries = retries
     fileOps.ensureDir(self._mkAbs(self.runDir, self.paraDir))
     if jobFile is not None:
         absJobFile = self._mkAbs(self.runDir, self.jobFile)
         if not os.path.exists(absJobFile):
             raise PycbioException("job file not found: {}".format(absJobFile))
Beispiel #27
0
 def _checkStrand(self, strand):
     if strand not in (None, "+", "-"):
         raise PycbioException("invalid strand: {}".format(strand))
Beispiel #28
0
 def __init__(self):
     PycbioException.__init__(self, "task terminated")
Beispiel #29
0
 def __init__(self, msg, cause=None):
     PycbioException.__init__(self, msg, cause)
Beispiel #30
0
 def __init__(self, msg, reader=None, cause=None):
     if (reader != None):
         msg = str(reader.fileName) + ":" + str(reader.lineNum) + ": " + msg
     PycbioException.__init__(self, msg, cause)
Beispiel #31
0
 def __reportExprError(self, ex):
     self.verb.prall(strOps.dup(80,"=")+"\n")
     self.verb.prall(PycbioException.formatExcept(ex) + "\n")
     self.verb.prall(strOps.dup(80,"-")+"\n")
Beispiel #32
0
 def __init__(self, msg, reader=None, cause=None):
     if (reader != None):
         msg = str(reader.fileName) + ":" + str(reader.lineNum) + ": " + msg
     PycbioException.__init__(self, msg, cause)
Beispiel #33
0
 def __init__(self, msg, cause=None):
     PycbioException.__init__(self, msg, cause)
def addUniq(d, k, v):
    "add to a dict, generating an error if the item already exists"
    if k in d:
        raise PycbioException("item \"{}\" already in dict".format(str(k)))
    d[k] = v
 def _buildUniqIdx(self):
     self.names = dict()
     for row in self:
         if row.name in self.names:
             raise PycbioException("gene with this name already in index: " + row.name)
         self.names[row.name] = row